This repository has been archived on 2024-11-02. You can view files and clone it, but cannot push or open issues or pull requests.
db-manager/src/timetable.py

124 lines
4.8 KiB
Python
Raw Normal View History

2023-05-26 11:23:50 +01:00
# db-manager - Builds and manages an OwlBoard database instance - To be run on a
# cron schedule
# Copyright (C) 2023 Frederick Boniface
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
#Imports
import os
import requests
import logger as log
import zlib
import json
import mongo
import time
# This module downloads a single TOCs Schedule data
TOC_Code = "EF" # Business code for GWR
fullDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_FULL_DAILY&day=toc-full"
updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_UPDATE_DAILY&day=toc-update-{day}"
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
# Determine state of current Timetable Database
def isUpdateRequired():
timetableLength = mongo.getLength("timetable")
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents")
timetableUpdateDate = mongo.metaUpdateTime("timetable")
if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800):
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild")
return "full"
if (int(time.time()) > (timetableUpdateDate + 86400)):
log.out(f"timetable.isUpdateRequired: timetable collection requires update")
return "update"
return False
def getTimetable(full = False):
download_url = full_data_url if full else update_data_url
response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS))
mongo.incrementCounter("schedule_api")
2023-05-28 00:22:44 +01:00
return zlib.decompress(response.content, 16+zlib.MAX_WBITS)
2023-05-26 11:23:50 +01:00
2023-05-28 00:22:44 +01:00
def loopTimetable(data):
listify = data.splitlines()
documents = []
for item in listify:
dic = json.loads(item)
if ('JsonTimetableV1' in dic):
timestamp = dic['JsonTimetableV1']['timestamp']
sequence = dic['JsonTimetableV1']['Metadata']['sequence']
detail = {timestamp: timestamp, sequence: sequence}
# Do something with this data here
# Check if timestamp and sequence are correct, if not trigger a full download
elif ('TiplocV1' in dic):
print("Disregarding TIPLOC Data")
# Not used as TIPLOCs etc. are sourced from CORPUS
elif ('JsonAssociationV1' in dic):
print("JsonAssociationData")
# Associates trains with eachother - not planning to use yet.
elif ('JsonScheduleV1' in dic):
print("JsonScheduleData")
document = insertSchedule(dic)
documents.append(document)
return documents
2023-05-26 11:23:50 +01:00
def runUpdate():
required = isUpdateRequired()
if (required == "full"):
log.out("timetable.runUpdate: Fetching full timetable data")
data = getTimetable(full = True)
elif (required == "update"):
log.out("timetable.runUpdate: Fetching todays timetable update")
data = getTimetable()
else:
log.out("timetable.runUpdate: timetable update is not needed")
return "done"
## Check what happens if there is no update
2023-05-28 00:22:44 +01:00
def insertSchedule(sch_record):
schedule = sch_record['JsonScheduleV1']
scheduleId = schedule['CIF_train_uid']
transactionType = schedule['transaction_type']
document = {
'stpIndicator': schedule['CIF_stp_indicator'],
'trainUid': scheduleId,
'headcode': schedule['schedule_segment']['signalling_id'],
'powerType': schedule['schedule_segment']['CIF_power_type'],
'planSpeed': schedule['schedule_segment']['CIF_speed'],
'scheduleEndDate': schedule['schedule_end_date']
}
return document
# Proposed Document Schema:
# {
# stp_indicator: "O",
# train_uid: "C07284"
# atoc_code: "GW"
# schedule_days_runs: [0,1,2,3,4,5,6] # Sunday-Saturday
# schedule_end_date: "2023-06-02"
# headcode: "5G30"
# power_type: "DMU"
# speed: "090"
# catering_code: null
# service_branding: ""
# passenger_stops: [
# {
# 'tiploc': "TIPLOC",
# 'pb_arr': "PublicArrival",
# 'pb_dep': "PublicDepartr"
# }
# ]
### CURRENT STATE: loopTimetable and insertSchedule builds the data into
### a suitable format to send to Mongo, passengerStops are not yet
### parsed and there needs to be logic around the transaction_type