package corpus import ( "encoding/json" "errors" "io" "git.fjla.uk/owlboard/go-types/pkg/database" "git.fjla.uk/owlboard/timetable-mgr/log" "go.uber.org/zap" ) // Accepts CORPUS data as a byte array and formats it ready for database insertion func parseCorpusData(stream io.ReadCloser) (*[]database.CorpusEntry, error) { defer stream.Close() log.Msg.Debug("Starting CORPUS Data parsing") var corpusEntries []database.CorpusEntry decoder := json.NewDecoder(stream) // Expect an object at the root of the JSON stream if _, err := decoder.Token(); err != nil { log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) return nil, err } // Search for the "TIPLOCDATA" key for decoder.More() { // Decode the next JSON token if tok, err := decoder.Token(); err != nil { log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) return nil, err } else if tok == "TIPLOCDATA" { // Found the "TIPLOCDATA" key, expect the associated array if !decoder.More() { err := errors.New("missing array after TIPLOCDATA key") log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) return nil, err } // Start reading the array associated with the "TIPLOCDATA" key if _, err := decoder.Token(); err != nil { log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) return nil, err } // Iterate over the JSON array for decoder.More() { var corpusEntry database.CorpusEntry if err := decoder.Decode(&corpusEntry); err != nil { log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) return nil, err } corpusEntries = append(corpusEntries, corpusEntry) } break // Exit loop after processing "TIPLOCDATA" array } } log.Msg.Debug("CORPUS parsing complete") return &corpusEntries, nil } // Removes empty fields from CORPUS entries func pruneCorpusEntries(corpusEntries []database.CorpusEntry) *[]database.CorpusEntry { for i := range corpusEntries { if corpusEntries[i].CRS == " " { corpusEntries[i].CRS = "" } if corpusEntries[i].TIPLOC == " " { corpusEntries[i].TIPLOC = "" } if corpusEntries[i].NLCDESC16 == " " { corpusEntries[i].NLCDESC16 = "" } if corpusEntries[i].STANOX == " " { corpusEntries[i].STANOX = "" } if corpusEntries[i].UIC == " " { corpusEntries[i].UIC = "" } } return &corpusEntries }