2024-03-26 15:40:15 +00:00
|
|
|
package corpus
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"errors"
|
2024-04-11 20:50:36 +01:00
|
|
|
"io"
|
2024-03-26 15:40:15 +00:00
|
|
|
|
|
|
|
"git.fjla.uk/owlboard/go-types/pkg/database"
|
|
|
|
"git.fjla.uk/owlboard/timetable-mgr/log"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
2024-03-29 14:01:57 +00:00
|
|
|
// Accepts CORPUS data as a byte array and formats it ready for database insertion
|
2024-04-11 20:50:36 +01:00
|
|
|
func parseCorpusData(stream io.ReadCloser) (*[]database.CorpusEntry, error) {
|
|
|
|
defer stream.Close()
|
2024-04-10 20:56:13 +01:00
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
log.Msg.Debug("Starting CORPUS Data parsing")
|
2024-03-26 15:40:15 +00:00
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
var corpusEntries []database.CorpusEntry
|
|
|
|
decoder := json.NewDecoder(stream)
|
2024-03-26 15:40:15 +00:00
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
// Expect an object at the root of the JSON stream
|
|
|
|
if _, err := decoder.Token(); err != nil {
|
2024-03-26 15:40:15 +00:00
|
|
|
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
// Search for the "TIPLOCDATA" key
|
|
|
|
for decoder.More() {
|
|
|
|
// Decode the next JSON token
|
|
|
|
if tok, err := decoder.Token(); err != nil {
|
2024-03-26 15:40:15 +00:00
|
|
|
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
|
|
|
return nil, err
|
2024-04-11 20:50:36 +01:00
|
|
|
} else if tok == "TIPLOCDATA" {
|
|
|
|
// Found the "TIPLOCDATA" key, expect the associated array
|
|
|
|
if !decoder.More() {
|
|
|
|
err := errors.New("missing array after TIPLOCDATA key")
|
|
|
|
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-03-26 15:40:15 +00:00
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
// Start reading the array associated with the "TIPLOCDATA" key
|
|
|
|
if _, err := decoder.Token(); err != nil {
|
|
|
|
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-03-26 15:40:15 +00:00
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
// Iterate over the JSON array
|
|
|
|
for decoder.More() {
|
|
|
|
var corpusEntry database.CorpusEntry
|
|
|
|
if err := decoder.Decode(&corpusEntry); err != nil {
|
|
|
|
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
corpusEntries = append(corpusEntries, corpusEntry)
|
|
|
|
}
|
|
|
|
break // Exit loop after processing "TIPLOCDATA" array
|
|
|
|
}
|
2024-03-26 15:40:15 +00:00
|
|
|
}
|
|
|
|
|
2024-04-11 20:50:36 +01:00
|
|
|
log.Msg.Debug("CORPUS parsing complete")
|
|
|
|
|
2024-04-10 20:56:13 +01:00
|
|
|
return &corpusEntries, nil
|
2024-03-26 15:40:15 +00:00
|
|
|
}
|
|
|
|
|
2024-03-29 14:01:57 +00:00
|
|
|
// Removes empty fields from CORPUS entries
|
2024-04-10 20:56:13 +01:00
|
|
|
func pruneCorpusEntries(corpusEntries []database.CorpusEntry) *[]database.CorpusEntry {
|
2024-03-26 15:40:15 +00:00
|
|
|
for i := range corpusEntries {
|
|
|
|
if corpusEntries[i].CRS == " " {
|
|
|
|
corpusEntries[i].CRS = ""
|
|
|
|
}
|
|
|
|
if corpusEntries[i].TIPLOC == " " {
|
|
|
|
corpusEntries[i].TIPLOC = ""
|
|
|
|
}
|
|
|
|
if corpusEntries[i].NLCDESC16 == " " {
|
|
|
|
corpusEntries[i].NLCDESC16 = ""
|
|
|
|
}
|
|
|
|
if corpusEntries[i].STANOX == " " {
|
|
|
|
corpusEntries[i].STANOX = ""
|
|
|
|
}
|
|
|
|
if corpusEntries[i].UIC == " " {
|
|
|
|
corpusEntries[i].UIC = ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-10 20:56:13 +01:00
|
|
|
return &corpusEntries
|
2024-03-26 15:40:15 +00:00
|
|
|
}
|