From f4a21ebe8651976accb5dd2d3b5c53e8b848235f Mon Sep 17 00:00:00 2001 From: Fred Boniface Date: Tue, 26 Mar 2024 15:40:15 +0000 Subject: [PATCH] Integrate corpus fetch/parse logic --- src/background/ticker.go | 2 +- src/corpus/fetch.go | 133 +++++++++++++++++++++++++++++++++++++++ src/corpus/update.go | 33 ++++++++++ src/go.mod | 2 +- src/go.sum | 4 ++ src/helpers/basicAuth.go | 8 +++ src/main.go | 7 ++- 7 files changed, 184 insertions(+), 5 deletions(-) create mode 100644 src/corpus/fetch.go create mode 100644 src/corpus/update.go create mode 100644 src/helpers/basicAuth.go diff --git a/src/background/ticker.go b/src/background/ticker.go index e69f728..d84b108 100644 --- a/src/background/ticker.go +++ b/src/background/ticker.go @@ -8,7 +8,7 @@ import ( "git.fjla.uk/owlboard/timetable-mgr/log" ) -const frequency = 5 * time.Millisecond // Figure out a sensible frequency! +const frequency = 20000 * time.Millisecond // Figure out a sensible frequency! func InitTicker(cfg *helpers.Configuration, stop <-chan struct{}) { go runTicker(cfg, stop) diff --git a/src/corpus/fetch.go b/src/corpus/fetch.go new file mode 100644 index 0000000..540b562 --- /dev/null +++ b/src/corpus/fetch.go @@ -0,0 +1,133 @@ +package corpus + +import ( + "compress/gzip" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "time" + + "git.fjla.uk/owlboard/go-types/pkg/database" + "git.fjla.uk/owlboard/timetable-mgr/helpers" + "git.fjla.uk/owlboard/timetable-mgr/log" + "go.uber.org/zap" +) + +const url string = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS" + +func fetchCorpus(cfg *helpers.Configuration) (*http.Response, error) { + log.Msg.Info("Fetching CORPUS Data") + client := http.Client{ + Timeout: time.Second * 10, + } + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.Msg.Error("Failed to create CORPUS Request", zap.Error(err)) + return nil, err + } + req.Header.Add("Authorization", "Basic "+helpers.BasicAuth(cfg.NrodUser, cfg.NrodPass)) + + resp, err := client.Do(req) + if err != nil { + log.Msg.Error("Error requesting Corpus", zap.Error(err)) + return nil, err + } + + if resp.StatusCode != http.StatusOK { + err := fmt.Errorf("unexpected status code: %d", resp.StatusCode) + log.Msg.Error("Unexpected status code", zap.Int("status_code", resp.StatusCode)) + return nil, err + } + + return resp, nil +} + +func extractCorpusResponse(resp *http.Response) (string, error) { + log.Msg.Info("Decompressing CORPUS Data") + gzReader, err := gzip.NewReader(resp.Body) + if err != nil { + log.Msg.Error("Corpus response is not gzipped") + return "", errors.New("response not gzipped") + } + + defer gzReader.Close() + + log.Msg.Info("Reading CORPUS Data") + decompressedData, err := io.ReadAll(gzReader) + if err != nil { + log.Msg.Error("Failed to read decompressed data", zap.Error(err)) + return "", err + } + + responseBody := string(decompressedData) + return responseBody, nil +} + +func parseCorpusData(jsonData string) ([]database.CorpusEntry, error) { + log.Msg.Info("Unmarshalling CORPUS Data") + + var dataMap map[string]interface{} + err := json.Unmarshal([]byte(jsonData), &dataMap) + if err != nil { + log.Msg.Error("Unable to unmarshal CORPUS data", zap.Error(err)) + } + + corpusDataArrayInterface, ok := dataMap["TIPLOCDATA"] + if !ok { + err := errors.New("corpus Data not in expected format") + log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) + return nil, err + } + + corpusDataArray, ok := corpusDataArrayInterface.([]interface{}) + if !ok { + err := errors.New("corpus data missing the data array") + log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) + return nil, err + } + + var corpusEntries []database.CorpusEntry + for _, item := range corpusDataArray { + jsonItem, err := json.Marshal(item) + if err != nil { + log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) + return nil, err + } + + var corpusEntry database.CorpusEntry + err = json.Unmarshal(jsonItem, &corpusEntry) + if err != nil { + log.Msg.Error("Error parsing CORPUS Data", zap.Error(err)) + return nil, err + } + + corpusEntries = append(corpusEntries, corpusEntry) + } + + return corpusEntries, nil +} + +func pruneCorpusEntries(corpusEntries []database.CorpusEntry) []database.CorpusEntry { + for i := range corpusEntries { + if corpusEntries[i].CRS == " " { + corpusEntries[i].CRS = "" + } + if corpusEntries[i].TIPLOC == " " { + corpusEntries[i].TIPLOC = "" + } + if corpusEntries[i].NLCDESC16 == " " { + corpusEntries[i].NLCDESC16 = "" + } + if corpusEntries[i].STANOX == " " { + corpusEntries[i].STANOX = "" + } + if corpusEntries[i].UIC == " " { + corpusEntries[i].UIC = "" + } + } + + return corpusEntries +} diff --git a/src/corpus/update.go b/src/corpus/update.go new file mode 100644 index 0000000..d29034c --- /dev/null +++ b/src/corpus/update.go @@ -0,0 +1,33 @@ +package corpus + +import ( + "fmt" + + "git.fjla.uk/owlboard/timetable-mgr/helpers" + "git.fjla.uk/owlboard/timetable-mgr/log" + "go.uber.org/zap" +) + +func RunCorpusUpdate(cfg *helpers.Configuration) { + resp, err := fetchCorpus(cfg) + if err != nil { + log.Msg.Error("Failed to update Corpus data", zap.Error(err)) + } + + datastring, err := extractCorpusResponse(resp) + if err != nil { + log.Msg.Error("Error extracting Corpus data", zap.Error(err)) + } + + corpusData, err := parseCorpusData(datastring) + if err != nil { + log.Msg.Error("Error parsing Corpus data", zap.Error(err)) + } + + prunedCorpusData := pruneCorpusEntries(corpusData) + + for _, entry := range prunedCorpusData { + fmt.Printf("DESC: %s, NLC: %d, CRS: '%s'\n", + entry.NLCDESC, entry.NLC, entry.CRS) + } +} diff --git a/src/go.mod b/src/go.mod index 2b95b76..2cf663d 100644 --- a/src/go.mod +++ b/src/go.mod @@ -3,7 +3,7 @@ module git.fjla.uk/owlboard/timetable-mgr go 1.21 require ( - git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83 + git.fjla.uk/owlboard/go-types v0.0.0-20240326151548-5491671b8bb4 github.com/go-stomp/stomp/v3 v3.0.5 go.mongodb.org/mongo-driver v1.12.0 go.uber.org/zap v1.24.0 diff --git a/src/go.sum b/src/go.sum index 1a6e4f9..e796a80 100644 --- a/src/go.sum +++ b/src/go.sum @@ -1,5 +1,9 @@ git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83 h1:q+I66M4YVRnKwdyYqcwou7TTniM1uwUSh3Bpa8SDLuM= git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY= +git.fjla.uk/owlboard/go-types v0.0.0-20240326142657-0fc57cb903e6 h1:p3m0gpFqsyBRMm1eW2p/awTVR2LXE1QEE6XbIhCSpws= +git.fjla.uk/owlboard/go-types v0.0.0-20240326142657-0fc57cb903e6/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY= +git.fjla.uk/owlboard/go-types v0.0.0-20240326151548-5491671b8bb4 h1:FqDR2uRWVpZjwyEUHrsHSBXcnRgUmoRlF/lGojL7KcE= +git.fjla.uk/owlboard/go-types v0.0.0-20240326151548-5491671b8bb4/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/src/helpers/basicAuth.go b/src/helpers/basicAuth.go new file mode 100644 index 0000000..28c7111 --- /dev/null +++ b/src/helpers/basicAuth.go @@ -0,0 +1,8 @@ +package helpers + +import "encoding/base64" + +func BasicAuth(username, password string) string { + authString := username + ":" + password + return base64.StdEncoding.EncodeToString([]byte(authString)) +} diff --git a/src/main.go b/src/main.go index 29ba5f6..53c778d 100644 --- a/src/main.go +++ b/src/main.go @@ -8,6 +8,7 @@ import ( "time" "git.fjla.uk/owlboard/timetable-mgr/background" + "git.fjla.uk/owlboard/timetable-mgr/corpus" "git.fjla.uk/owlboard/timetable-mgr/dbAccess" "git.fjla.uk/owlboard/timetable-mgr/helpers" "git.fjla.uk/owlboard/timetable-mgr/log" @@ -35,12 +36,12 @@ func main() { // Handle signals from the OS go handleSignals(cfg, stop) - // Defer cleanup task - //defer cleanup(cfg, stop) - // Start CIF Task Ticker background.InitTicker(cfg, stop) + // Test Corpus Fetching + corpus.RunCorpusUpdate(cfg) + if cfg.VstpOn { messaging.StompInit(cfg) vstp.Subscribe()