timetable-mgr/src/corpus/fetch.go

134 lines
3.3 KiB
Go
Raw Normal View History

2024-03-26 15:40:15 +00:00
package corpus
import (
"compress/gzip"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"time"
"git.fjla.uk/owlboard/go-types/pkg/database"
"git.fjla.uk/owlboard/timetable-mgr/helpers"
"git.fjla.uk/owlboard/timetable-mgr/log"
"go.uber.org/zap"
)
const url string = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
func fetchCorpus(cfg *helpers.Configuration) (*http.Response, error) {
log.Msg.Info("Fetching CORPUS Data")
client := http.Client{
Timeout: time.Second * 10,
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Msg.Error("Failed to create CORPUS Request", zap.Error(err))
return nil, err
}
req.Header.Add("Authorization", "Basic "+helpers.BasicAuth(cfg.NrodUser, cfg.NrodPass))
resp, err := client.Do(req)
if err != nil {
log.Msg.Error("Error requesting Corpus", zap.Error(err))
return nil, err
}
if resp.StatusCode != http.StatusOK {
err := fmt.Errorf("unexpected status code: %d", resp.StatusCode)
log.Msg.Error("Unexpected status code", zap.Int("status_code", resp.StatusCode))
return nil, err
}
return resp, nil
}
func extractCorpusResponse(resp *http.Response) (string, error) {
log.Msg.Info("Decompressing CORPUS Data")
gzReader, err := gzip.NewReader(resp.Body)
if err != nil {
log.Msg.Error("Corpus response is not gzipped")
return "", errors.New("response not gzipped")
}
defer gzReader.Close()
log.Msg.Info("Reading CORPUS Data")
decompressedData, err := io.ReadAll(gzReader)
if err != nil {
log.Msg.Error("Failed to read decompressed data", zap.Error(err))
return "", err
}
responseBody := string(decompressedData)
return responseBody, nil
}
func parseCorpusData(jsonData string) ([]database.CorpusEntry, error) {
log.Msg.Info("Unmarshalling CORPUS Data")
var dataMap map[string]interface{}
err := json.Unmarshal([]byte(jsonData), &dataMap)
if err != nil {
log.Msg.Error("Unable to unmarshal CORPUS data", zap.Error(err))
}
corpusDataArrayInterface, ok := dataMap["TIPLOCDATA"]
if !ok {
err := errors.New("corpus Data not in expected format")
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
corpusDataArray, ok := corpusDataArrayInterface.([]interface{})
if !ok {
err := errors.New("corpus data missing the data array")
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
var corpusEntries []database.CorpusEntry
for _, item := range corpusDataArray {
jsonItem, err := json.Marshal(item)
if err != nil {
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
var corpusEntry database.CorpusEntry
err = json.Unmarshal(jsonItem, &corpusEntry)
if err != nil {
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
corpusEntries = append(corpusEntries, corpusEntry)
}
return corpusEntries, nil
}
func pruneCorpusEntries(corpusEntries []database.CorpusEntry) []database.CorpusEntry {
for i := range corpusEntries {
if corpusEntries[i].CRS == " " {
corpusEntries[i].CRS = ""
}
if corpusEntries[i].TIPLOC == " " {
corpusEntries[i].TIPLOC = ""
}
if corpusEntries[i].NLCDESC16 == " " {
corpusEntries[i].NLCDESC16 = ""
}
if corpusEntries[i].STANOX == " " {
corpusEntries[i].STANOX = ""
}
if corpusEntries[i].UIC == " " {
corpusEntries[i].UIC = ""
}
}
return corpusEntries
}