timetable-mgr/corpus/parse.go

88 lines
2.3 KiB
Go
Raw Normal View History

2024-03-26 15:40:15 +00:00
package corpus
import (
"encoding/json"
"errors"
"io"
2024-03-26 15:40:15 +00:00
"git.fjla.uk/owlboard/go-types/pkg/database"
"git.fjla.uk/owlboard/timetable-mgr/log"
"go.uber.org/zap"
)
// Accepts CORPUS data as a byte array and formats it ready for database insertion
func parseCorpusData(stream io.ReadCloser) (*[]database.CorpusEntry, error) {
defer stream.Close()
log.Msg.Debug("Starting CORPUS Data parsing")
2024-03-26 15:40:15 +00:00
var corpusEntries []database.CorpusEntry
decoder := json.NewDecoder(stream)
2024-03-26 15:40:15 +00:00
// Expect an object at the root of the JSON stream
if _, err := decoder.Token(); err != nil {
2024-03-26 15:40:15 +00:00
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
// Search for the "TIPLOCDATA" key
for decoder.More() {
// Decode the next JSON token
if tok, err := decoder.Token(); err != nil {
2024-03-26 15:40:15 +00:00
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
} else if tok == "TIPLOCDATA" {
// Found the "TIPLOCDATA" key, expect the associated array
if !decoder.More() {
err := errors.New("missing array after TIPLOCDATA key")
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
2024-03-26 15:40:15 +00:00
// Start reading the array associated with the "TIPLOCDATA" key
if _, err := decoder.Token(); err != nil {
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
2024-03-26 15:40:15 +00:00
// Iterate over the JSON array
for decoder.More() {
var corpusEntry database.CorpusEntry
if err := decoder.Decode(&corpusEntry); err != nil {
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
corpusEntries = append(corpusEntries, corpusEntry)
}
break // Exit loop after processing "TIPLOCDATA" array
}
2024-03-26 15:40:15 +00:00
}
log.Msg.Debug("CORPUS parsing complete")
return &corpusEntries, nil
2024-03-26 15:40:15 +00:00
}
// Removes empty fields from CORPUS entries
func pruneCorpusEntries(corpusEntries []database.CorpusEntry) *[]database.CorpusEntry {
2024-03-26 15:40:15 +00:00
for i := range corpusEntries {
if corpusEntries[i].CRS == " " {
corpusEntries[i].CRS = ""
}
if corpusEntries[i].TIPLOC == " " {
corpusEntries[i].TIPLOC = ""
}
if corpusEntries[i].NLCDESC16 == " " {
corpusEntries[i].NLCDESC16 = ""
}
if corpusEntries[i].STANOX == " " {
corpusEntries[i].STANOX = ""
}
if corpusEntries[i].UIC == " " {
corpusEntries[i].UIC = ""
}
}
return &corpusEntries
2024-03-26 15:40:15 +00:00
}