This repository has been archived on 2024-11-02. You can view files and clone it, but cannot push or open issues or pull requests.
db-manager/src/timetable.py

164 lines
6.2 KiB
Python
Raw Normal View History

2023-05-26 11:23:50 +01:00
# db-manager - Builds and manages an OwlBoard database instance - To be run on a
# cron schedule
# Copyright (C) 2023 Frederick Boniface
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
#Imports
import os
import requests
import logger as log
import zlib
import json
import mongo
import time
2023-05-30 23:03:16 +01:00
from datetime import datetime
2023-05-26 11:23:50 +01:00
# This module downloads a single TOCs Schedule data
TOC_Code = "EF" # Business code for GWR
fullDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_FULL_DAILY&day=toc-full"
2023-05-30 23:03:16 +01:00
#updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_UPDATE_DAILY&day=toc-update-{day}"
2023-05-26 11:23:50 +01:00
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
# Determine state of current Timetable Database
def isUpdateRequired():
timetableLength = mongo.getLength("timetable")
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents")
timetableUpdateDate = mongo.metaUpdateTime("timetable")
if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800):
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild")
return "full"
if (int(time.time()) > (timetableUpdateDate + 86400)):
log.out(f"timetable.isUpdateRequired: timetable collection requires update")
return "update"
return False
def getTimetable(full = False):
2023-05-30 23:03:16 +01:00
downloadUrl = fullDataUrl if full else updateDataUrl
2023-05-26 11:23:50 +01:00
response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS))
mongo.incrementCounter("schedule_api")
2023-05-28 00:22:44 +01:00
return zlib.decompress(response.content, 16+zlib.MAX_WBITS)
2023-05-26 11:23:50 +01:00
2023-05-28 00:22:44 +01:00
def loopTimetable(data):
listify = data.splitlines()
documents = []
for item in listify:
dic = json.loads(item)
if ('JsonTimetableV1' in dic):
timestamp = dic['JsonTimetableV1']['timestamp']
sequence = dic['JsonTimetableV1']['Metadata']['sequence']
detail = {timestamp: timestamp, sequence: sequence}
# Do something with this data here
# Check if timestamp and sequence are correct, if not trigger a full download
elif ('TiplocV1' in dic):
print("Disregarding TIPLOC Data")
# Not used as TIPLOCs etc. are sourced from CORPUS
elif ('JsonAssociationV1' in dic):
print("JsonAssociationData")
# Associates trains with eachother - not planning to use yet.
elif ('JsonScheduleV1' in dic):
document = insertSchedule(dic)
documents.append(document)
return documents
2023-05-26 11:23:50 +01:00
def runUpdate():
required = isUpdateRequired()
if (required == "full"):
log.out("timetable.runUpdate: Fetching full timetable data")
data = getTimetable(full = True)
elif (required == "update"):
log.out("timetable.runUpdate: Fetching todays timetable update")
data = getTimetable()
else:
log.out("timetable.runUpdate: timetable update is not needed")
return "done"
2023-05-30 23:03:16 +01:00
parsed = loopTimetable(data)
mongo.putTimetable(parsed)
2023-05-26 11:23:50 +01:00
## Check what happens if there is no update
2023-05-28 00:22:44 +01:00
def insertSchedule(sch_record):
schedule = sch_record['JsonScheduleV1']
scheduleId = schedule['CIF_train_uid']
transactionType = schedule['transaction_type']
2023-05-30 23:03:16 +01:00
if ('schedule_start_date' in sch_record):
scheduleStart = _helpParseDate(sch_record['schedule_start_date'])
else:
now = datetime.now()
scheduleStart = now.replace(hour=0,minute=0,second=0,microsecond=0)
2023-05-28 00:22:44 +01:00
document = {
'stpIndicator': schedule['CIF_stp_indicator'],
'trainUid': scheduleId,
'headcode': schedule['schedule_segment']['signalling_id'],
'powerType': schedule['schedule_segment']['CIF_power_type'],
'planSpeed': schedule['schedule_segment']['CIF_speed'],
2023-05-30 23:03:16 +01:00
'scheduleStartDate': scheduleStart,
'scheduleEndDate': _helpParseDate(schedule['schedule_end_date'], "end"),
'daysRun': _helpParseDays(schedule['schedule_days_runs'])
2023-05-28 00:22:44 +01:00
}
2023-05-30 23:03:16 +01:00
passengerStops = []
if ('schedule_location' in schedule['schedule_segment']):
stops = _helpParseStops(schedule['schedule_segment']['schedule_location'])
document['stops'] = stops
2023-05-28 00:22:44 +01:00
return document
2023-05-30 23:03:16 +01:00
def _helpParseStops(schedule_segment):
return
def _helpParseDays(string):
# Incoming string contains seven numbers, each number from 0-6 representing days Mon-Sun
daysList = ["m", "t", "w", "th", "f", "s", "su"]
selectedDays = [daysList[i] for i, value in enumerate(string) if value == "1"]
return selectedDays
def _helpParseDate(string, time):
# Incoming string contains date in format %Y-%m-%d, if the time signified end of schedule,
# append 23:59:59 to the time, else append 00:00:00 to the string.
if time == "end":
string += " 235959"
else:
string += " 000000"
return datetime.strptime(string, "%Y-%m-%d %H%M%S")
2023-05-28 00:22:44 +01:00
# Proposed Document Schema:
# {
# stp_indicator: "O",
# train_uid: "C07284"
# atoc_code: "GW"
2023-05-30 23:03:16 +01:00
# schedule_days_runs: []
2023-05-28 00:22:44 +01:00
# schedule_end_date: "2023-06-02"
# headcode: "5G30"
# power_type: "DMU"
# speed: "090"
# catering_code: null
# service_branding: ""
# passenger_stops: [
# {
# 'tiploc': "TIPLOC",
# 'pb_arr': "PublicArrival",
# 'pb_dep': "PublicDepartr"
# }
# ]
### CURRENT STATE: loopTimetable and insertSchedule builds the data into
2023-05-30 23:03:16 +01:00
### a suitable format to send to Mongo, there needs to be logic around
### the transaction_type. Parsinghelper funtions implemented to keep code tidy
### Stops need parsing
# Function Usage Map =>
# runUpdate() =>
# isUpdateRequired()
# loopTimetable() =>
# insertSchedule()
# Will then need to insert into database