Improve CIF Parsing to acheive 60% memory reduction

This commit is contained in:
Fred Boniface 2024-04-09 21:26:56 +01:00
parent a2c52f7b8b
commit 94e4cd964d
3 changed files with 18 additions and 20 deletions

View File

@ -11,60 +11,55 @@ import (
) )
// Unmarshalls data into the correct types for processing // Unmarshalls data into the correct types for processing
// This function suffers from extremely high memory usage
func parseCifData(data *[]byte) (*parsedData, error) { func parseCifData(data *[]byte) (*parsedData, error) {
log.Msg.Debug("Starting CIF Data parsing")
if data == nil { if data == nil {
err := errors.New("unable to parse nil pointer") return nil, errors.New("unable to parse nil pointer")
return nil, err
} }
// Split the data into lines
lines := bytes.Split(*data, []byte("\n"))
// Initialise variable for the parsed data // Initialise data structures
var parsed parsedData var parsed parsedData
parsed.assoc = make([]upstreamApi.JsonAssociationV1, 0) parsed.assoc = make([]upstreamApi.JsonAssociationV1, 0)
parsed.sched = make([]upstreamApi.JsonScheduleV1, 0) parsed.sched = make([]upstreamApi.JsonScheduleV1, 0)
for _, line := range lines { // Create JSON Decoder
decoder := json.NewDecoder(bytes.NewReader(*data))
// Skip empty lines to avoid logging errors when there is no error // Iterate over JSON Objects using stream decoder
if len(bytes.TrimSpace(line)) == 0 { for decoder.More() {
continue
}
// Map each line for processing
var obj map[string]json.RawMessage var obj map[string]json.RawMessage
if err := json.Unmarshal(line, &obj); err != nil { if err := decoder.Decode(&obj); err != nil {
log.Msg.Error("Error decoding line", zap.String("line", string(line)), zap.Error(err)) log.Msg.Error("Error decoding JSON String")
continue return nil, err
} }
// Loop through the mapped data and unmarshal to the correct type // Handle parsed data
for key, value := range obj { for key, value := range obj {
switch key { switch key {
case "JsonTimetableV1": case "JsonTimetableV1":
var timetable upstreamApi.JsonTimetableV1 var timetable upstreamApi.JsonTimetableV1
if err := json.Unmarshal(value, &timetable); err != nil { if err := json.Unmarshal(value, &timetable); err != nil {
log.Msg.Error("Unable to parse JSON Timetable", zap.Error(err), zap.String("line", string(value))) log.Msg.Error("Error decoding JSONTimetableV1 object", zap.Error(err))
continue continue
} }
parsed.header = timetable parsed.header = timetable
case "JsonAssociationV1": case "JsonAssociationV1":
var association upstreamApi.JsonAssociationV1 var association upstreamApi.JsonAssociationV1
if err := json.Unmarshal(value, &association); err != nil { if err := json.Unmarshal(value, &association); err != nil {
log.Msg.Error("Error decoding JSON Association", zap.Error(err)) log.Msg.Error("Error decoding JSONAssociationV1 object", zap.Error(err))
continue continue
} }
parsed.assoc = append(parsed.assoc, association) parsed.assoc = append(parsed.assoc, association)
case "JsonScheduleV1": case "JsonScheduleV1":
var schedule upstreamApi.JsonScheduleV1 var schedule upstreamApi.JsonScheduleV1
if err := json.Unmarshal(value, &schedule); err != nil { if err := json.Unmarshal(value, &schedule); err != nil {
log.Msg.Error("Error decoding JSON Schedule", zap.Error(err)) log.Msg.Error("Error decoding JSONScheduleV1 object", zap.Error(err))
continue continue
} }
parsed.sched = append(parsed.sched, schedule) parsed.sched = append(parsed.sched, schedule)
} }
} }
} }
log.Msg.Debug("CIF Parsing completed")
return &parsed, nil return &parsed, nil
} }

View File

@ -12,6 +12,7 @@ import (
// Processes parsed CIF data and applies the data to the database // Processes parsed CIF data and applies the data to the database
func processParsedCif(data *parsedData) error { func processParsedCif(data *parsedData) error {
log.Msg.Debug("Starting CIF Processing")
createTasks := make([]*upstreamApi.JsonScheduleV1, 0) createTasks := make([]*upstreamApi.JsonScheduleV1, 0)
deleteTasks := make([]*upstreamApi.JsonScheduleV1, 0) deleteTasks := make([]*upstreamApi.JsonScheduleV1, 0)

View File

@ -98,6 +98,7 @@ func runCifUpdateDownload(cfg *helpers.Configuration, metadata *dbAccess.CifMeta
debugWriteFile(&parsed.header, &parsed.sched) debugWriteFile(&parsed.header, &parsed.sched)
} }
log.Msg.Debug("Starting metadata checks")
// Check CIF Sequence // Check CIF Sequence
// Skip if LastSequence is >= to this sequence // Skip if LastSequence is >= to this sequence
if metadata.LastSequence >= parsed.header.Metadata.Sequence { if metadata.LastSequence >= parsed.header.Metadata.Sequence {
@ -111,6 +112,7 @@ func runCifUpdateDownload(cfg *helpers.Configuration, metadata *dbAccess.CifMeta
log.Msg.Error("CIF sequence not as expected", zap.Error(err), zap.Int64("LastSequence", metadata.LastSequence), zap.Int64("New Sequence", parsed.header.Metadata.Sequence)) log.Msg.Error("CIF sequence not as expected", zap.Error(err), zap.Int64("LastSequence", metadata.LastSequence), zap.Int64("New Sequence", parsed.header.Metadata.Sequence))
return err return err
} }
log.Msg.Debug("Metadata checks complete")
// Do further sequence checks - parsed.header.Metadata.Sequence MUST = metadata.LastSequence + 1 // Do further sequence checks - parsed.header.Metadata.Sequence MUST = metadata.LastSequence + 1