Integrate corpus fetch/parse logic

This commit is contained in:
Fred Boniface 2024-03-26 15:40:15 +00:00
parent cf633eeb8f
commit f4a21ebe86
7 changed files with 184 additions and 5 deletions

View File

@ -8,7 +8,7 @@ import (
"git.fjla.uk/owlboard/timetable-mgr/log" "git.fjla.uk/owlboard/timetable-mgr/log"
) )
const frequency = 5 * time.Millisecond // Figure out a sensible frequency! const frequency = 20000 * time.Millisecond // Figure out a sensible frequency!
func InitTicker(cfg *helpers.Configuration, stop <-chan struct{}) { func InitTicker(cfg *helpers.Configuration, stop <-chan struct{}) {
go runTicker(cfg, stop) go runTicker(cfg, stop)

133
src/corpus/fetch.go Normal file
View File

@ -0,0 +1,133 @@
package corpus
import (
"compress/gzip"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"time"
"git.fjla.uk/owlboard/go-types/pkg/database"
"git.fjla.uk/owlboard/timetable-mgr/helpers"
"git.fjla.uk/owlboard/timetable-mgr/log"
"go.uber.org/zap"
)
const url string = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
func fetchCorpus(cfg *helpers.Configuration) (*http.Response, error) {
log.Msg.Info("Fetching CORPUS Data")
client := http.Client{
Timeout: time.Second * 10,
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Msg.Error("Failed to create CORPUS Request", zap.Error(err))
return nil, err
}
req.Header.Add("Authorization", "Basic "+helpers.BasicAuth(cfg.NrodUser, cfg.NrodPass))
resp, err := client.Do(req)
if err != nil {
log.Msg.Error("Error requesting Corpus", zap.Error(err))
return nil, err
}
if resp.StatusCode != http.StatusOK {
err := fmt.Errorf("unexpected status code: %d", resp.StatusCode)
log.Msg.Error("Unexpected status code", zap.Int("status_code", resp.StatusCode))
return nil, err
}
return resp, nil
}
func extractCorpusResponse(resp *http.Response) (string, error) {
log.Msg.Info("Decompressing CORPUS Data")
gzReader, err := gzip.NewReader(resp.Body)
if err != nil {
log.Msg.Error("Corpus response is not gzipped")
return "", errors.New("response not gzipped")
}
defer gzReader.Close()
log.Msg.Info("Reading CORPUS Data")
decompressedData, err := io.ReadAll(gzReader)
if err != nil {
log.Msg.Error("Failed to read decompressed data", zap.Error(err))
return "", err
}
responseBody := string(decompressedData)
return responseBody, nil
}
func parseCorpusData(jsonData string) ([]database.CorpusEntry, error) {
log.Msg.Info("Unmarshalling CORPUS Data")
var dataMap map[string]interface{}
err := json.Unmarshal([]byte(jsonData), &dataMap)
if err != nil {
log.Msg.Error("Unable to unmarshal CORPUS data", zap.Error(err))
}
corpusDataArrayInterface, ok := dataMap["TIPLOCDATA"]
if !ok {
err := errors.New("corpus Data not in expected format")
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
corpusDataArray, ok := corpusDataArrayInterface.([]interface{})
if !ok {
err := errors.New("corpus data missing the data array")
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
var corpusEntries []database.CorpusEntry
for _, item := range corpusDataArray {
jsonItem, err := json.Marshal(item)
if err != nil {
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
var corpusEntry database.CorpusEntry
err = json.Unmarshal(jsonItem, &corpusEntry)
if err != nil {
log.Msg.Error("Error parsing CORPUS Data", zap.Error(err))
return nil, err
}
corpusEntries = append(corpusEntries, corpusEntry)
}
return corpusEntries, nil
}
func pruneCorpusEntries(corpusEntries []database.CorpusEntry) []database.CorpusEntry {
for i := range corpusEntries {
if corpusEntries[i].CRS == " " {
corpusEntries[i].CRS = ""
}
if corpusEntries[i].TIPLOC == " " {
corpusEntries[i].TIPLOC = ""
}
if corpusEntries[i].NLCDESC16 == " " {
corpusEntries[i].NLCDESC16 = ""
}
if corpusEntries[i].STANOX == " " {
corpusEntries[i].STANOX = ""
}
if corpusEntries[i].UIC == " " {
corpusEntries[i].UIC = ""
}
}
return corpusEntries
}

33
src/corpus/update.go Normal file
View File

@ -0,0 +1,33 @@
package corpus
import (
"fmt"
"git.fjla.uk/owlboard/timetable-mgr/helpers"
"git.fjla.uk/owlboard/timetable-mgr/log"
"go.uber.org/zap"
)
func RunCorpusUpdate(cfg *helpers.Configuration) {
resp, err := fetchCorpus(cfg)
if err != nil {
log.Msg.Error("Failed to update Corpus data", zap.Error(err))
}
datastring, err := extractCorpusResponse(resp)
if err != nil {
log.Msg.Error("Error extracting Corpus data", zap.Error(err))
}
corpusData, err := parseCorpusData(datastring)
if err != nil {
log.Msg.Error("Error parsing Corpus data", zap.Error(err))
}
prunedCorpusData := pruneCorpusEntries(corpusData)
for _, entry := range prunedCorpusData {
fmt.Printf("DESC: %s, NLC: %d, CRS: '%s'\n",
entry.NLCDESC, entry.NLC, entry.CRS)
}
}

View File

@ -3,7 +3,7 @@ module git.fjla.uk/owlboard/timetable-mgr
go 1.21 go 1.21
require ( require (
git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83 git.fjla.uk/owlboard/go-types v0.0.0-20240326151548-5491671b8bb4
github.com/go-stomp/stomp/v3 v3.0.5 github.com/go-stomp/stomp/v3 v3.0.5
go.mongodb.org/mongo-driver v1.12.0 go.mongodb.org/mongo-driver v1.12.0
go.uber.org/zap v1.24.0 go.uber.org/zap v1.24.0

View File

@ -1,5 +1,9 @@
git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83 h1:q+I66M4YVRnKwdyYqcwou7TTniM1uwUSh3Bpa8SDLuM= git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83 h1:q+I66M4YVRnKwdyYqcwou7TTniM1uwUSh3Bpa8SDLuM=
git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY= git.fjla.uk/owlboard/go-types v0.0.0-20230727192011-171bd3eafd83/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY=
git.fjla.uk/owlboard/go-types v0.0.0-20240326142657-0fc57cb903e6 h1:p3m0gpFqsyBRMm1eW2p/awTVR2LXE1QEE6XbIhCSpws=
git.fjla.uk/owlboard/go-types v0.0.0-20240326142657-0fc57cb903e6/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY=
git.fjla.uk/owlboard/go-types v0.0.0-20240326151548-5491671b8bb4 h1:FqDR2uRWVpZjwyEUHrsHSBXcnRgUmoRlF/lGojL7KcE=
git.fjla.uk/owlboard/go-types v0.0.0-20240326151548-5491671b8bb4/go.mod h1:kG+BX9UF+yJaAVnln/QSKlTdrtKRRReezMeSk1ZLMzY=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

8
src/helpers/basicAuth.go Normal file
View File

@ -0,0 +1,8 @@
package helpers
import "encoding/base64"
func BasicAuth(username, password string) string {
authString := username + ":" + password
return base64.StdEncoding.EncodeToString([]byte(authString))
}

View File

@ -8,6 +8,7 @@ import (
"time" "time"
"git.fjla.uk/owlboard/timetable-mgr/background" "git.fjla.uk/owlboard/timetable-mgr/background"
"git.fjla.uk/owlboard/timetable-mgr/corpus"
"git.fjla.uk/owlboard/timetable-mgr/dbAccess" "git.fjla.uk/owlboard/timetable-mgr/dbAccess"
"git.fjla.uk/owlboard/timetable-mgr/helpers" "git.fjla.uk/owlboard/timetable-mgr/helpers"
"git.fjla.uk/owlboard/timetable-mgr/log" "git.fjla.uk/owlboard/timetable-mgr/log"
@ -35,12 +36,12 @@ func main() {
// Handle signals from the OS // Handle signals from the OS
go handleSignals(cfg, stop) go handleSignals(cfg, stop)
// Defer cleanup task
//defer cleanup(cfg, stop)
// Start CIF Task Ticker // Start CIF Task Ticker
background.InitTicker(cfg, stop) background.InitTicker(cfg, stop)
// Test Corpus Fetching
corpus.RunCorpusUpdate(cfg)
if cfg.VstpOn { if cfg.VstpOn {
messaging.StompInit(cfg) messaging.StompInit(cfg)
vstp.Subscribe() vstp.Subscribe()