Streaming data into the CIF Parse function reduces memory down to 4.1GB - also looking to stream the parsed data directly into the database if feasable.
This commit is contained in:
54
cif/parse.go
54
cif/parse.go
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"git.fjla.uk/owlboard/go-types/pkg/upstreamApi"
|
||||
"git.fjla.uk/owlboard/timetable-mgr/log"
|
||||
@@ -63,3 +64,56 @@ func parseCifData(data *[]byte) (*parsedData, error) {
|
||||
log.Msg.Debug("CIF Parsing completed")
|
||||
return &parsed, nil
|
||||
}
|
||||
|
||||
func parseCifDataStream(dataStream io.ReadCloser) (*parsedData, error) {
|
||||
log.Msg.Debug("STREAM-Starting CIF Datastream parsing")
|
||||
if dataStream == nil {
|
||||
return nil, errors.New("unable to parse nil pointer")
|
||||
}
|
||||
|
||||
// Initialise data structures
|
||||
var parsed parsedData
|
||||
parsed.assoc = make([]upstreamApi.JsonAssociationV1, 0)
|
||||
parsed.sched = make([]upstreamApi.JsonScheduleV1, 0)
|
||||
|
||||
// Create JSON Decoder
|
||||
decoder := json.NewDecoder(dataStream)
|
||||
|
||||
// Iterate over JSON Objects using stream decoder
|
||||
for decoder.More() {
|
||||
var obj map[string]json.RawMessage
|
||||
if err := decoder.Decode(&obj); err != nil {
|
||||
log.Msg.Error("Error decoding JSON String")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Handle parsed data
|
||||
for key, value := range obj {
|
||||
switch key {
|
||||
case "JsonTimetableV1":
|
||||
var timetable upstreamApi.JsonTimetableV1
|
||||
if err := json.Unmarshal(value, &timetable); err != nil {
|
||||
log.Msg.Error("Error decoding JSONTimetableV1 object", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
parsed.header = timetable
|
||||
case "JsonAssociationV1":
|
||||
var association upstreamApi.JsonAssociationV1
|
||||
if err := json.Unmarshal(value, &association); err != nil {
|
||||
log.Msg.Error("Error decoding JSONAssociationV1 object", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
parsed.assoc = append(parsed.assoc, association)
|
||||
case "JsonScheduleV1":
|
||||
var schedule upstreamApi.JsonScheduleV1
|
||||
if err := json.Unmarshal(value, &schedule); err != nil {
|
||||
log.Msg.Error("Error decoding JSONScheduleV1 object", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
parsed.sched = append(parsed.sched, schedule)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Msg.Debug("CIF Parsing completed")
|
||||
return &parsed, nil
|
||||
}
|
||||
|
||||
@@ -20,25 +20,25 @@ func runCifFullDownload(cfg *helpers.Configuration) error {
|
||||
if err != nil {
|
||||
log.Msg.Error("Error getting download URL", zap.Error(err))
|
||||
}
|
||||
data, err := nrod.NrodDownload(url, cfg)
|
||||
dataStream, err := nrod.NrodStream(url, cfg)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error downloading CIF data", zap.Error(err))
|
||||
}
|
||||
|
||||
// If debug mode is on, call debugWriteDownload
|
||||
if helpers.Runtime == "debug" {
|
||||
debugWriteDownload(data)
|
||||
}
|
||||
// if helpers.Runtime == "debug" {
|
||||
// debugWriteDownload(dataStream)
|
||||
// }
|
||||
|
||||
// Parse CIF file
|
||||
parsed, err := parseCifData(data)
|
||||
parsed, err := parseCifDataStream(dataStream)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error parsing CIF data", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
// Make `data` a nil pointer as it is no longer required
|
||||
data = nil
|
||||
dataStream = nil
|
||||
|
||||
// Drop timetable collection
|
||||
dbAccess.DropCollection(dbAccess.TimetableCollection) // I should edit this to prevent removal of VSTP entries in the database.
|
||||
|
||||
Reference in New Issue
Block a user