Update package corpus to stream data to the parse function.
This commit is contained in:
@@ -3,6 +3,7 @@ package corpus
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"git.fjla.uk/owlboard/go-types/pkg/database"
|
||||
"git.fjla.uk/owlboard/timetable-mgr/log"
|
||||
@@ -10,50 +11,55 @@ import (
|
||||
)
|
||||
|
||||
// Accepts CORPUS data as a byte array and formats it ready for database insertion
|
||||
func parseCorpusData(jsonData *[]byte) (*[]database.CorpusEntry, error) {
|
||||
func parseCorpusData(stream io.ReadCloser) (*[]database.CorpusEntry, error) {
|
||||
defer stream.Close()
|
||||
|
||||
log.Msg.Debug("Starting CORPUS Data parsing")
|
||||
|
||||
// Initialise data structure
|
||||
var dataMap map[string]interface{}
|
||||
|
||||
// Create JSON
|
||||
err := json.Unmarshal(*jsonData, &dataMap)
|
||||
if err != nil {
|
||||
log.Msg.Error("Unable to unmarshal CORPUS data", zap.Error(err))
|
||||
}
|
||||
|
||||
corpusDataArrayInterface, ok := dataMap["TIPLOCDATA"]
|
||||
if !ok {
|
||||
err := errors.New("corpus Data not in expected format")
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
corpusDataArray, ok := corpusDataArrayInterface.([]interface{})
|
||||
if !ok {
|
||||
err := errors.New("corpus data missing the data array")
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var corpusEntries []database.CorpusEntry
|
||||
for _, item := range corpusDataArray {
|
||||
jsonItem, err := json.Marshal(item)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
decoder := json.NewDecoder(stream)
|
||||
|
||||
var corpusEntry database.CorpusEntry
|
||||
err = json.Unmarshal(jsonItem, &corpusEntry)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
corpusEntries = append(corpusEntries, corpusEntry)
|
||||
// Expect an object at the root of the JSON stream
|
||||
if _, err := decoder.Token(); err != nil {
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Search for the "TIPLOCDATA" key
|
||||
for decoder.More() {
|
||||
// Decode the next JSON token
|
||||
if tok, err := decoder.Token(); err != nil {
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
} else if tok == "TIPLOCDATA" {
|
||||
// Found the "TIPLOCDATA" key, expect the associated array
|
||||
if !decoder.More() {
|
||||
err := errors.New("missing array after TIPLOCDATA key")
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start reading the array associated with the "TIPLOCDATA" key
|
||||
if _, err := decoder.Token(); err != nil {
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Iterate over the JSON array
|
||||
for decoder.More() {
|
||||
var corpusEntry database.CorpusEntry
|
||||
if err := decoder.Decode(&corpusEntry); err != nil {
|
||||
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
corpusEntries = append(corpusEntries, corpusEntry)
|
||||
}
|
||||
break // Exit loop after processing "TIPLOCDATA" array
|
||||
}
|
||||
}
|
||||
|
||||
log.Msg.Debug("CORPUS parsing complete")
|
||||
|
||||
return &corpusEntries, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
|
||||
// Runs all stages of the CORPUS Update process
|
||||
func RunCorpusUpdate(cfg *helpers.Configuration) error {
|
||||
resp, err := nrod.NrodDownload(url, cfg)
|
||||
resp, err := nrod.NrodStream(url, cfg)
|
||||
if err != nil {
|
||||
log.Msg.Error("Failed to fetch CORPUS data", zap.Error(err))
|
||||
return err
|
||||
|
||||
Reference in New Issue
Block a user