timetable-mgr/cif/update.go

160 lines
4.4 KiB
Go
Raw Normal View History

package cif
import (
2024-04-10 20:46:20 +01:00
"io"
2024-03-30 01:09:12 +00:00
"time"
2024-03-28 22:47:08 +00:00
"git.fjla.uk/owlboard/timetable-mgr/dbAccess"
"git.fjla.uk/owlboard/timetable-mgr/helpers"
"git.fjla.uk/owlboard/timetable-mgr/log"
"git.fjla.uk/owlboard/timetable-mgr/nrod"
"go.uber.org/zap"
)
2024-04-04 22:39:09 +01:00
// Replaces all existing CIF Data with a new download
func runCifFullDownload(cfg *helpers.Configuration) error {
preTime := time.Now()
2024-04-14 19:03:13 +01:00
log.Info("Downloading all CIF Data")
2024-04-04 22:39:09 +01:00
// Download CIF Data file
url, err := getUpdateUrl("full")
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error getting download URL", zap.Error(err))
}
dataStream, err := nrod.NrodStream(url, cfg)
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error downloading CIF data", zap.Error(err))
return err
}
2024-04-04 22:39:09 +01:00
// Parse CIF file
parsed, err := parseCifDataStream(dataStream)
2024-04-04 22:39:09 +01:00
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error parsing CIF data", zap.Error(err))
2024-04-04 22:39:09 +01:00
return err
2024-03-30 01:09:12 +00:00
}
// Try to remove all non-vstp entries, else give up and delete collection
count, err := dbAccess.PurgeNonVstp()
if err != nil {
log.Warn("Error purging non-vstp schedules, dropping collection", zap.Error(err))
dbAccess.DropCollection(dbAccess.TimetableCollection) // I should edit this to prevent removal of VSTP entries in the database.
} else {
log.Info("Removed non-vstp services", zap.Int64("deletion count", count))
}
2024-03-30 01:09:12 +00:00
2024-04-04 22:39:09 +01:00
// Process CIF file
err = ProcessParsedCif(parsed)
2024-04-04 22:39:09 +01:00
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error processing CIF data", zap.Error(err))
2024-03-30 01:09:12 +00:00
}
newMeta := generateMetadata(&parsed.Header)
2024-04-11 20:59:02 +01:00
ok := dbAccess.PutCifMetadata(newMeta, fullUpdateType)
if !ok {
2024-04-14 19:03:13 +01:00
log.Warn("CIF Data updated, but metadata write failed")
}
2024-03-30 01:09:12 +00:00
2024-04-14 21:21:35 +01:00
// Set parsed to nil to encourage garbage collection
parsed = nil
2024-04-14 21:21:35 +01:00
2024-04-23 00:27:33 +01:00
// Create Indexes
err = dbAccess.CreateTimetableIndexes()
if err != nil {
log.Warn("Error creating timetable indexes, degraded performance", zap.Error(err))
}
2024-04-14 21:21:35 +01:00
postTime := time.Now()
updateDuration := postTime.Sub(preTime)
log.Info("Execution time", zap.Duration("duration", updateDuration))
2024-04-04 22:39:09 +01:00
return nil
}
2024-03-30 01:09:12 +00:00
2024-04-04 22:39:09 +01:00
// Runs a CIF Update for up to five days
func runCifUpdateDownload(cfg *helpers.Configuration, metadata *dbAccess.CifMetadata, days []time.Time) error {
2024-04-16 09:19:12 +01:00
startTime := time.Now()
2024-04-14 19:03:13 +01:00
log.Info("Downloading CIF Updates")
2024-03-30 01:09:12 +00:00
2024-04-04 22:39:09 +01:00
// Loop over dates
for _, time := range days {
2024-04-14 19:03:13 +01:00
log.Info("Downloading CIF File", zap.Time("CIF Data from", time))
2024-04-04 22:39:09 +01:00
// Download CIF data file
data, err := fetchUpdate(time, cfg)
2024-03-30 01:09:12 +00:00
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error fetching CIF update", zap.Error(err))
2024-04-04 22:39:09 +01:00
return err
2024-04-03 22:25:27 +01:00
}
2024-04-04 22:39:09 +01:00
// Parse CIF file
2024-04-10 20:46:20 +01:00
parsed, err := parseCifDataStream(data)
2024-04-03 22:25:27 +01:00
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error parsing CIF data", zap.Error(err))
2024-04-04 22:39:09 +01:00
return err
2024-04-03 22:25:27 +01:00
}
2024-04-12 20:43:03 +01:00
// Check CIF Metadata
2024-04-14 19:03:13 +01:00
log.Debug("Starting metadata checks")
2024-04-11 20:59:02 +01:00
reason, update := checkMetadata(metadata, &parsed.Header)
2024-04-12 20:43:03 +01:00
if !update {
2024-04-14 19:03:13 +01:00
log.Warn("Update file not processed", zap.String("reason", reason))
continue
}
2024-04-14 19:03:13 +01:00
log.Info("CIF Data is suitable for processing", zap.String("reason", reason))
2024-04-04 22:39:09 +01:00
// Process CIF file
err = ProcessParsedCif(parsed)
2024-04-12 20:43:03 +01:00
if err != nil {
2024-04-14 19:03:13 +01:00
log.Error("Error processing CIF data", zap.Error(err))
2024-04-12 20:43:03 +01:00
}
2024-04-06 22:31:38 +01:00
metadata = generateMetadata(&parsed.Header)
parsed = nil
2024-04-03 22:25:27 +01:00
}
2024-04-04 22:39:09 +01:00
2024-04-23 00:27:33 +01:00
// Write metadata to database
2024-04-11 20:59:02 +01:00
ok := dbAccess.PutCifMetadata(metadata, dailyUpdateType)
if !ok {
2024-04-14 19:03:13 +01:00
log.Warn("CIF Data updated, but metadata write failed.")
}
2024-04-06 22:28:26 +01:00
2024-04-23 00:27:33 +01:00
// Clear out of date schedules
cutoff := time.Now().Add(-time.Hour * 24 * 7)
log.Debug("Attempting to remove outdated services", zap.Time("scheduleEnd before", cutoff))
count, err := dbAccess.RemoveOutdatedServices(cutoff)
if err != nil {
log.Warn("Out of date services not removed", zap.Error(err))
} else {
log.Info("Out of date services removed", zap.Int64("removal count", count))
}
2024-04-16 09:19:12 +01:00
endTime := time.Now()
duration := endTime.Sub(startTime)
2024-04-16 09:19:12 +01:00
log.Info("CIF Update process ended", zap.Duration("duration", duration))
2024-04-04 22:39:09 +01:00
return nil
}
2024-03-30 01:09:12 +00:00
// Wraps nrod.NrodDownload() into a function which can handle downloading data for a given day.
// Note that the previous days update is the latest and is downloaded.
2024-04-10 20:46:20 +01:00
func fetchUpdate(t time.Time, cfg *helpers.Configuration) (io.ReadCloser, error) {
2024-03-30 01:09:12 +00:00
url, err := getUpdateUrl("daily")
if err != nil {
return nil, err
}
// Calcuates the day yesterday which is the file that needs downloading
updateDay := t.Add(-24 * time.Hour)
2024-04-04 22:39:09 +01:00
// Append day string to URL
url = url + getDayString(updateDay)
2024-03-30 01:09:12 +00:00
log.Debug("Fetching CIF Data", zap.Time("Update_File_Produced", updateDay))
2024-04-10 20:46:20 +01:00
dataStream, err := nrod.NrodStream(url, cfg)
2024-03-30 01:09:12 +00:00
if err != nil {
return nil, err
}
2024-04-10 20:46:20 +01:00
return dataStream, nil
2024-03-30 01:09:12 +00:00
}