Compare commits
3 Commits
7acae49812
...
3883031e04
Author | SHA1 | Date |
---|---|---|
Fred Boniface | 3883031e04 | |
Fred Boniface | 39405e6d6a | |
Fred Boniface | f903219276 |
55
cif/parse.go
55
cif/parse.go
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"git.fjla.uk/owlboard/go-types/pkg/upstreamApi"
|
||||
"git.fjla.uk/owlboard/timetable-mgr/log"
|
||||
|
@ -11,6 +12,7 @@ import (
|
|||
)
|
||||
|
||||
// Unmarshalls data into the correct types for processing
|
||||
// - Currently not used
|
||||
func parseCifData(data *[]byte) (*parsedData, error) {
|
||||
log.Msg.Debug("Starting CIF Data parsing")
|
||||
if data == nil {
|
||||
|
@ -63,3 +65,56 @@ func parseCifData(data *[]byte) (*parsedData, error) {
|
|||
log.Msg.Debug("CIF Parsing completed")
|
||||
return &parsed, nil
|
||||
}
|
||||
|
||||
func parseCifDataStream(dataStream io.ReadCloser) (*parsedData, error) {
|
||||
log.Msg.Debug("STREAM-Starting CIF Datastream parsing")
|
||||
if dataStream == nil {
|
||||
return nil, errors.New("unable to parse nil pointer")
|
||||
}
|
||||
|
||||
// Initialise data structures
|
||||
var parsed parsedData
|
||||
parsed.assoc = make([]upstreamApi.JsonAssociationV1, 0)
|
||||
parsed.sched = make([]upstreamApi.JsonScheduleV1, 0)
|
||||
|
||||
// Create JSON Decoder
|
||||
decoder := json.NewDecoder(dataStream)
|
||||
|
||||
// Iterate over JSON Objects using stream decoder
|
||||
for decoder.More() {
|
||||
var obj map[string]json.RawMessage
|
||||
if err := decoder.Decode(&obj); err != nil {
|
||||
log.Msg.Error("Error decoding JSON String")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Handle parsed data
|
||||
for key, value := range obj {
|
||||
switch key {
|
||||
case "JsonTimetableV1":
|
||||
var timetable upstreamApi.JsonTimetableV1
|
||||
if err := json.Unmarshal(value, &timetable); err != nil {
|
||||
log.Msg.Error("Error decoding JSONTimetableV1 object", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
parsed.header = timetable
|
||||
case "JsonAssociationV1":
|
||||
var association upstreamApi.JsonAssociationV1
|
||||
if err := json.Unmarshal(value, &association); err != nil {
|
||||
log.Msg.Error("Error decoding JSONAssociationV1 object", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
parsed.assoc = append(parsed.assoc, association)
|
||||
case "JsonScheduleV1":
|
||||
var schedule upstreamApi.JsonScheduleV1
|
||||
if err := json.Unmarshal(value, &schedule); err != nil {
|
||||
log.Msg.Error("Error decoding JSONScheduleV1 object", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
parsed.sched = append(parsed.sched, schedule)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Msg.Debug("CIF Parsing completed")
|
||||
return &parsed, nil
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package cif
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"git.fjla.uk/owlboard/timetable-mgr/dbAccess"
|
||||
|
@ -20,25 +21,28 @@ func runCifFullDownload(cfg *helpers.Configuration) error {
|
|||
if err != nil {
|
||||
log.Msg.Error("Error getting download URL", zap.Error(err))
|
||||
}
|
||||
data, err := nrod.NrodDownload(url, cfg)
|
||||
dataStream, err := nrod.NrodStream(url, cfg)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error downloading CIF data", zap.Error(err))
|
||||
}
|
||||
|
||||
// DOES NOT WORK WITH NEW DOWNLOAD STREAMING
|
||||
// If debug mode is on, call debugWriteDownload
|
||||
if helpers.Runtime == "debug" {
|
||||
debugWriteDownload(data)
|
||||
}
|
||||
// if helpers.Runtime == "debug" {
|
||||
// debugWriteDownload(dataStream)
|
||||
// }
|
||||
|
||||
// Parse CIF file
|
||||
parsed, err := parseCifData(data)
|
||||
parsed, err := parseCifDataStream(dataStream)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error parsing CIF data", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
// Look to stream data onwards to the parsing function
|
||||
|
||||
// Make `data` a nil pointer as it is no longer required
|
||||
data = nil
|
||||
dataStream = nil
|
||||
|
||||
// Drop timetable collection
|
||||
dbAccess.DropCollection(dbAccess.TimetableCollection) // I should edit this to prevent removal of VSTP entries in the database.
|
||||
|
@ -73,13 +77,14 @@ func runCifUpdateDownload(cfg *helpers.Configuration, metadata *dbAccess.CifMeta
|
|||
return err
|
||||
}
|
||||
|
||||
// DOES NOT WORK WITH NEW NROD STREAMER
|
||||
// If debug mode is on, call debugWriteDownload
|
||||
if helpers.Runtime == "debug" {
|
||||
debugWriteDownload(data)
|
||||
}
|
||||
// if helpers.Runtime == "debug" {
|
||||
// debugWriteDownload(data)
|
||||
// }
|
||||
|
||||
// Parse CIF file
|
||||
parsed, err := parseCifData(data)
|
||||
parsed, err := parseCifDataStream(data)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error parsing CIF data", zap.Error(err))
|
||||
return err
|
||||
|
@ -109,7 +114,8 @@ func runCifUpdateDownload(cfg *helpers.Configuration, metadata *dbAccess.CifMeta
|
|||
log.Msg.Debug("CIF Data has passed checks and should now be processed <<<<<<")
|
||||
// Process CIF file
|
||||
|
||||
metadata = generateMetadata(&parsed.header)
|
||||
// Temporarily disable METADATA GENERATION AND WRITING
|
||||
// metadata = generateMetadata(&parsed.header)
|
||||
}
|
||||
|
||||
ok := dbAccess.PutCifMetadata(metadata)
|
||||
|
@ -121,7 +127,7 @@ func runCifUpdateDownload(cfg *helpers.Configuration, metadata *dbAccess.CifMeta
|
|||
}
|
||||
|
||||
// Wraps nrod.NrodDownload() into a function which can handle downloading data for a given day
|
||||
func fetchUpdate(t time.Time, cfg *helpers.Configuration) (*[]byte, error) {
|
||||
func fetchUpdate(t time.Time, cfg *helpers.Configuration) (io.ReadCloser, error) {
|
||||
url, err := getUpdateUrl("daily")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -130,9 +136,9 @@ func fetchUpdate(t time.Time, cfg *helpers.Configuration) (*[]byte, error) {
|
|||
// Append day string to URL
|
||||
url = url + getDayString(t)
|
||||
|
||||
downloadedData, err := nrod.NrodDownload(url, cfg)
|
||||
dataStream, err := nrod.NrodStream(url, cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return downloadedData, nil
|
||||
return dataStream, nil
|
||||
}
|
||||
|
|
|
@ -36,6 +36,3 @@ func CheckCorpus(cfg *helpers.Configuration) {
|
|||
log.Msg.Info("CORPUS Data is less than two weeks old, not updating")
|
||||
}
|
||||
}
|
||||
|
||||
// Check if corpus data needs updating.
|
||||
// Run update if needed.
|
||||
|
|
|
@ -10,10 +10,13 @@ import (
|
|||
)
|
||||
|
||||
// Accepts CORPUS data as a byte array and formats it ready for database insertion
|
||||
func parseCorpusData(jsonData *[]byte) ([]database.CorpusEntry, error) {
|
||||
log.Msg.Info("Unmarshalling CORPUS Data")
|
||||
func parseCorpusData(jsonData *[]byte) (*[]database.CorpusEntry, error) {
|
||||
log.Msg.Debug("Starting CORPUS Data parsing")
|
||||
|
||||
// Initialise data structure
|
||||
var dataMap map[string]interface{}
|
||||
|
||||
// Create JSON
|
||||
err := json.Unmarshal(*jsonData, &dataMap)
|
||||
if err != nil {
|
||||
log.Msg.Error("Unable to unmarshal CORPUS data", zap.Error(err))
|
||||
|
@ -51,11 +54,11 @@ func parseCorpusData(jsonData *[]byte) ([]database.CorpusEntry, error) {
|
|||
corpusEntries = append(corpusEntries, corpusEntry)
|
||||
}
|
||||
|
||||
return corpusEntries, nil
|
||||
return &corpusEntries, nil
|
||||
}
|
||||
|
||||
// Removes empty fields from CORPUS entries
|
||||
func pruneCorpusEntries(corpusEntries []database.CorpusEntry) []database.CorpusEntry {
|
||||
func pruneCorpusEntries(corpusEntries []database.CorpusEntry) *[]database.CorpusEntry {
|
||||
for i := range corpusEntries {
|
||||
if corpusEntries[i].CRS == " " {
|
||||
corpusEntries[i].CRS = ""
|
||||
|
@ -74,5 +77,5 @@ func pruneCorpusEntries(corpusEntries []database.CorpusEntry) []database.CorpusE
|
|||
}
|
||||
}
|
||||
|
||||
return corpusEntries
|
||||
return &corpusEntries
|
||||
}
|
||||
|
|
|
@ -3,10 +3,10 @@ package corpus
|
|||
import "git.fjla.uk/owlboard/go-types/pkg/database"
|
||||
|
||||
// Removes non-station entities from the CORPUS Data, ready for insertion to the database (stations collection)
|
||||
func createStationEntries(corpusData []database.CorpusEntry) []database.StationEntry {
|
||||
func createStationEntries(corpusData *[]database.CorpusEntry) *[]database.StationEntry {
|
||||
var stationEntries []database.StationEntry
|
||||
|
||||
for _, entry := range corpusData {
|
||||
for _, entry := range *corpusData {
|
||||
if entry.CRS != "" {
|
||||
stationEntry := database.StationEntry{
|
||||
CRS: entry.CRS,
|
||||
|
@ -19,5 +19,5 @@ func createStationEntries(corpusData []database.CorpusEntry) []database.StationE
|
|||
}
|
||||
}
|
||||
|
||||
return stationEntries
|
||||
return &stationEntries
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ func RunCorpusUpdate(cfg *helpers.Configuration) error {
|
|||
return err
|
||||
}
|
||||
|
||||
corpusData := pruneCorpusEntries(unsortedCorpusData)
|
||||
corpusData := pruneCorpusEntries(*unsortedCorpusData)
|
||||
stationData := createStationEntries(corpusData)
|
||||
|
||||
if err := dbAccess.DropCollection(dbAccess.CorpusCollection); err != nil {
|
||||
|
|
|
@ -60,9 +60,10 @@ func SetUpdateTime(collectionName string) error {
|
|||
}
|
||||
update := bson.M{
|
||||
"$set": bson.M{
|
||||
"updated": updateTime,
|
||||
"target": collectionName,
|
||||
"type": "collection",
|
||||
"updated": updateTime,
|
||||
"updated_time": time.Now().In(time.UTC),
|
||||
"target": collectionName,
|
||||
"type": "collection",
|
||||
},
|
||||
}
|
||||
_, err := collection.UpdateOne(context.Background(), filter, update, options)
|
||||
|
@ -78,13 +79,13 @@ func SetUpdateTime(collectionName string) error {
|
|||
// Currently uses the old name of mq-client
|
||||
func PushVersionToDb() {
|
||||
version := database.Version{
|
||||
Target: "mq-client",
|
||||
Component: "mq-client",
|
||||
Target: "timetable-mgr",
|
||||
Component: "timetable-mgr",
|
||||
Version: helpers.Version,
|
||||
}
|
||||
versionSelector := database.VersionSelector{
|
||||
Target: "mq-client",
|
||||
Component: "mq-client",
|
||||
Target: "timetable-mgr",
|
||||
Component: "timetable-mgr",
|
||||
}
|
||||
opts := options.Update().SetUpsert(true)
|
||||
coll := MongoClient.Database("owlboard").Collection("versions")
|
||||
|
|
|
@ -7,12 +7,12 @@ import (
|
|||
)
|
||||
|
||||
// Puts an array of Corpus Documents into the database
|
||||
func PutManyCorpus(corpusData []database.CorpusEntry) error {
|
||||
func PutManyCorpus(corpusData *[]database.CorpusEntry) error {
|
||||
collection := MongoClient.Database(databaseName).Collection(CorpusCollection)
|
||||
|
||||
documents := convertCorpusToInterfaceSlice(corpusData)
|
||||
|
||||
_, err := collection.InsertMany(context.Background(), documents)
|
||||
_, err := collection.InsertMany(context.Background(), *documents)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -22,12 +22,12 @@ func PutManyCorpus(corpusData []database.CorpusEntry) error {
|
|||
}
|
||||
|
||||
// Puts an array of Stations documents into the database
|
||||
func PutManyStations(stationsData []database.StationEntry) error {
|
||||
func PutManyStations(stationsData *[]database.StationEntry) error {
|
||||
collection := MongoClient.Database(databaseName).Collection(StationsCollection)
|
||||
|
||||
documents := convertStationsToInterfaceSlice(stationsData)
|
||||
|
||||
_, err := collection.InsertMany(context.Background(), documents)
|
||||
_, err := collection.InsertMany(context.Background(), *documents)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -37,19 +37,19 @@ func PutManyStations(stationsData []database.StationEntry) error {
|
|||
}
|
||||
|
||||
// Converts []database.CorpusEntry types into interface slices required to put them into the database
|
||||
func convertCorpusToInterfaceSlice(corpusData []database.CorpusEntry) []interface{} {
|
||||
interfaceSlice := make([]interface{}, len(corpusData))
|
||||
for i, doc := range corpusData {
|
||||
func convertCorpusToInterfaceSlice(corpusData *[]database.CorpusEntry) *[]interface{} {
|
||||
interfaceSlice := make([]interface{}, len(*corpusData))
|
||||
for i, doc := range *corpusData {
|
||||
interfaceSlice[i] = doc
|
||||
}
|
||||
return interfaceSlice
|
||||
return &interfaceSlice
|
||||
}
|
||||
|
||||
// Converts []database.StationEntry types into interface slices required to put them into the database
|
||||
func convertStationsToInterfaceSlice(stationsData []database.StationEntry) []interface{} {
|
||||
interfaceSlice := make([]interface{}, len(stationsData))
|
||||
for i, doc := range stationsData {
|
||||
func convertStationsToInterfaceSlice(stationsData *[]database.StationEntry) *[]interface{} {
|
||||
interfaceSlice := make([]interface{}, len(*stationsData))
|
||||
for i, doc := range *stationsData {
|
||||
interfaceSlice[i] = doc
|
||||
}
|
||||
return interfaceSlice
|
||||
return &interfaceSlice
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ func NrodDownload(url string, cfg *helpers.Configuration) (*[]byte, error) {
|
|||
}
|
||||
|
||||
// Yes, I know `readedData` is not proper English. But readData reads more like a verb action.
|
||||
readedData, err := nrodExtract(*resp)
|
||||
readedData, err := nrodExtract(resp)
|
||||
if err != nil {
|
||||
log.Msg.Error("Unable to read response data")
|
||||
return nil, err
|
||||
|
@ -49,7 +49,7 @@ func NrodDownload(url string, cfg *helpers.Configuration) (*[]byte, error) {
|
|||
}
|
||||
|
||||
// Extracts GZIP Data from an HTTP Response and returns the decompresses data as a byte array
|
||||
func nrodExtract(resp http.Response) (*[]byte, error) {
|
||||
func nrodExtract(resp *http.Response) (*[]byte, error) {
|
||||
log.Msg.Debug("Extracting HTTP Response Data")
|
||||
gzReader, err := gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
package nrod
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"git.fjla.uk/owlboard/timetable-mgr/helpers"
|
||||
"git.fjla.uk/owlboard/timetable-mgr/log"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Downloads NROD Data and extracts if GZIP, returns a io.Reader
|
||||
func NrodStream(url string, cfg *helpers.Configuration) (io.ReadCloser, error) {
|
||||
log.Msg.Debug("Fetching NROD data stream", zap.String("Request URL", url))
|
||||
|
||||
client := http.Client{
|
||||
Timeout: time.Second * 300,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error creating HTTP Request", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Add("Authorization", "Basic "+helpers.BasicAuth(cfg.NrodUser, cfg.NrodPass))
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
log.Msg.Error("Error carrying out HTTP Request", zap.Error(err), zap.Int("STATUS", resp.StatusCode))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
err := fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
log.Msg.Error("Non-successful status code", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Run the data through the extractor function and return io.ReadCloser, error from
|
||||
// that function directly
|
||||
return NrodStreamExtract(resp)
|
||||
}
|
||||
|
||||
func NrodStreamExtract(resp *http.Response) (io.ReadCloser, error) {
|
||||
log.Msg.Debug("Extracting NROD Download")
|
||||
|
||||
log.Msg.Debug("Content Type", zap.String("Content-Encoding", resp.Header.Get("Content-Encoding")))
|
||||
|
||||
gzReader, err := gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
log.Msg.Warn("Unable to create GZIP Reader, data probably not gzipped")
|
||||
return resp.Body, err
|
||||
}
|
||||
|
||||
return gzReader, nil
|
||||
}
|
Loading…
Reference in New Issue