2023-05-26 11:23:50 +01:00
# db-manager - Builds and manages an OwlBoard database instance - To be run on a
# cron schedule
# Copyright (C) 2023 Frederick Boniface
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
2023-05-31 22:09:09 +01:00
REBUILD : bool = False ## Set to true to rebuild database
2023-05-26 11:23:50 +01:00
#Imports
import os
import requests
import logger as log
2023-06-03 21:05:35 +01:00
import helpers
2023-05-26 11:23:50 +01:00
import zlib
import json
import mongo
import time
2023-05-31 19:04:33 +01:00
from datetime import datetime , timedelta
2023-05-26 11:23:50 +01:00
# This module downloads a single TOCs Schedule data
2023-05-31 19:04:33 +01:00
now = datetime . now ( )
yesterday = now - timedelta ( days = 1 )
2023-05-31 22:09:09 +01:00
yesterdayDay = yesterday . strftime ( " %a " ) . lower ( )
2023-06-03 21:05:35 +01:00
todayDay = now . strftime ( " %a " ) . lower ( )
isAfter0800 = ( int ( now . strftime ( " % H " ) ) > = 8 )
filePath = " cif_data "
2023-05-26 11:23:50 +01:00
TOC_Code = " EF " # Business code for GWR
fullDataUrl = f " https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_ { TOC_Code } _TOC_FULL_DAILY&day=toc-full "
2023-05-31 19:04:33 +01:00
updateDataUrl = f " https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_ { TOC_Code } _TOC_UPDATE_DAILY&day=toc-update- { yesterdayDay } "
2023-05-26 11:23:50 +01:00
CORPUS_USER = os . getenv ( ' OWL_LDB_CORPUSUSER ' )
CORPUS_PASS = os . getenv ( ' OWL_LDB_CORPUSPASS ' )
2023-05-31 22:09:09 +01:00
log . out ( " timetable.py: Timetable module loaded " , " DBUG " )
2023-05-26 11:23:50 +01:00
# Determine state of current Timetable Database
def isUpdateRequired ( ) :
timetableLength = mongo . getLength ( " timetable " )
2023-05-31 19:04:33 +01:00
log . out ( f " timetable.isUpdateRequired: timetable collection contains { timetableLength } documents " , " DBUG " )
2023-06-03 21:05:35 +01:00
timetableUpdateTime = mongo . metaCheckTime ( " timetable " )
timetableDataAge = helpers . getAgeInSeconds ( timetableUpdateTime )
2023-06-06 12:59:44 +01:00
readable_age = str ( timedelta ( seconds = timetableDataAge ) )
log . out ( f " timetable.isUpdateRequired: Timetable data age: { readable_age } " , " INFO " )
2023-06-06 13:11:54 +01:00
if ( timetableDataAge > = helpers . two_day_in_seconds and isAfter0800 ) or REBUILD :
2023-05-31 19:04:33 +01:00
log . out ( f " timetable.isUpdateRequired: timetable collection requires rebuild " , " INFO " )
2023-05-26 11:23:50 +01:00
return " full "
2023-06-07 21:14:49 +01:00
if ( timetableDataAge > = helpers . twenty_hours and isAfter0800 ) :
2023-05-31 19:04:33 +01:00
log . out ( f " timetable.isUpdateRequired: timetable collection requires update " , " INFO " )
2023-05-26 11:23:50 +01:00
return " update "
return False
2023-05-31 19:04:33 +01:00
def getTimetable ( full : bool = False ) :
downloadUrl : str = fullDataUrl if full else updateDataUrl
2023-05-26 11:23:50 +01:00
response = requests . get ( downloadUrl , auth = ( CORPUS_USER , CORPUS_PASS ) )
mongo . incrementCounter ( " schedule_api " )
2023-05-31 19:04:33 +01:00
log . out ( f " timetable.getTimetable: Fetch (Full: { full } ) response: { response . status_code } " , " DBUG " )
2023-06-03 21:57:26 +01:00
decompressed = zlib . decompress ( response . content , 16 + zlib . MAX_WBITS )
2023-06-03 21:05:35 +01:00
with open ( filePath , " wb " ) as f :
2023-06-03 21:57:26 +01:00
f . write ( decompressed )
return decompressed
2023-05-26 11:23:50 +01:00
2023-05-28 00:22:44 +01:00
def loopTimetable ( data ) :
listify = data . splitlines ( )
2023-05-31 19:04:33 +01:00
documents : list = [ ]
2023-05-28 00:22:44 +01:00
for item in listify :
dic = json . loads ( item )
if ( ' JsonTimetableV1 ' in dic ) :
timestamp = dic [ ' JsonTimetableV1 ' ] [ ' timestamp ' ]
sequence = dic [ ' JsonTimetableV1 ' ] [ ' Metadata ' ] [ ' sequence ' ]
detail = { timestamp : timestamp , sequence : sequence }
# Do something with this data here
# Check if timestamp and sequence are correct, if not trigger a full download
elif ( ' TiplocV1 ' in dic ) :
2023-06-02 00:47:24 +01:00
pass
2023-05-28 00:22:44 +01:00
# Not used as TIPLOCs etc. are sourced from CORPUS
elif ( ' JsonAssociationV1 ' in dic ) :
2023-06-02 00:47:24 +01:00
pass
2023-05-28 00:22:44 +01:00
# Associates trains with eachother - not planning to use yet.
elif ( ' JsonScheduleV1 ' in dic ) :
document = insertSchedule ( dic )
documents . append ( document )
return documents
2023-05-26 11:23:50 +01:00
def runUpdate ( ) :
required = isUpdateRequired ( )
if ( required == " full " ) :
2023-05-31 19:04:33 +01:00
log . out ( " timetable.runUpdate: Fetching full timetable data " , " INFO " )
2023-05-26 11:23:50 +01:00
data = getTimetable ( full = True )
elif ( required == " update " ) :
2023-05-31 19:04:33 +01:00
log . out ( " timetable.runUpdate: Fetching todays timetable update " , " INFO " )
2023-05-26 11:23:50 +01:00
data = getTimetable ( )
else :
2023-05-31 19:04:33 +01:00
log . out ( " timetable.runUpdate: timetable update is not needed " , " INFO " )
2023-05-26 11:23:50 +01:00
return " done "
2023-05-30 23:03:16 +01:00
parsed = loopTimetable ( data )
2023-05-31 19:04:33 +01:00
status = _insertToDb ( parsed , required )
if ( status ) :
mongo . metaUpdateTime ( " timetable " )
2023-06-01 09:07:57 +01:00
_removeOutdatedServices ( )
2023-05-26 11:23:50 +01:00
## Check what happens if there is no update
2023-05-28 00:22:44 +01:00
def insertSchedule ( sch_record ) :
schedule = sch_record [ ' JsonScheduleV1 ' ]
scheduleId = schedule [ ' CIF_train_uid ' ]
transactionType = schedule [ ' transaction_type ' ]
2023-05-31 22:09:09 +01:00
if ( ' schedule_start_date ' in schedule ) :
scheduleStart = _helpParseDate ( schedule [ ' schedule_start_date ' ] )
2023-05-30 23:03:16 +01:00
else :
now = datetime . now ( )
scheduleStart = now . replace ( hour = 0 , minute = 0 , second = 0 , microsecond = 0 )
2023-06-02 00:47:24 +01:00
if ( ' schedule_end_date ' in schedule ) :
scheduleEnd = _helpParseDate ( schedule [ ' schedule_end_date ' ] , " end " )
else :
scheduleEnd = " null "
2023-05-28 00:22:44 +01:00
document = {
2023-06-02 00:47:24 +01:00
' transactionType ' : schedule . get ( ' transaction_type ' ) ,
' stpIndicator ' : schedule . get ( ' CIF_stp_indicator ' ) ,
2023-05-28 00:22:44 +01:00
' trainUid ' : scheduleId ,
2023-06-02 00:47:24 +01:00
' headcode ' : schedule . get ( ' schedule_segment ' , { } ) . get ( ' signalling_id ' ) ,
' powerType ' : schedule . get ( ' schedule_segment ' , { } ) . get ( ' CIF_power_type ' ) ,
' planSpeed ' : schedule . get ( ' schedule_segment ' , { } ) . get ( ' CIF_speed ' ) ,
2023-05-30 23:03:16 +01:00
' scheduleStartDate ' : scheduleStart ,
2023-06-02 00:47:24 +01:00
' scheduleEndDate ' : scheduleEnd ,
' daysRun ' : _helpParseDays ( schedule . get ( ' schedule_days_runs ' , ' 0000000 ' ) )
2023-05-28 00:22:44 +01:00
}
2023-06-02 00:47:24 +01:00
if ( ' schedule_location ' in schedule . get ( ' schedule_segment ' , { } ) ) :
2023-05-30 23:03:16 +01:00
stops = _helpParseStops ( schedule [ ' schedule_segment ' ] [ ' schedule_location ' ] )
2023-05-31 19:04:33 +01:00
else :
stops = [ ]
2023-05-30 23:03:16 +01:00
document [ ' stops ' ] = stops
2023-05-28 00:22:44 +01:00
return document
2023-05-31 19:04:33 +01:00
def _insertToDb ( data : list , type : str ) :
2023-06-05 20:55:59 +01:00
pre_count = mongo . getLength ( " timetable " )
2023-06-04 18:39:06 +01:00
try :
if type == " full " :
mongo . dropCollection ( " timetable " )
mongo . putTimetable ( data )
mongo . createSingleIndex ( " timetable " , " headcode " )
elif type == " update " :
for item in data :
if item [ ' transactionType ' ] == " Create " :
singleList = [ item ]
mongo . putTimetable ( singleList )
elif item [ ' transactionType ' ] == " Delete " :
2023-06-05 20:55:59 +01:00
mongo . deleteTimetableData ( { ' trainUid ' : item [ ' trainUid ' ] , ' scheduleStartDate ' : item [ ' scheduleStartDate ' ] , ' stpIndicator ' : item [ ' stpIndicator ' ] } ) ## Also need to consider the STP indicator and end date here else I am deleting LTP services when an STP is meant to be deleted.
post_count = mongo . getLength ( " timetable " )
log . out ( f " timetable._insertToDb: Document count difference after processing: { pre_count - post_count } " , " DBUG " )
2023-06-04 18:39:06 +01:00
return True #If Successfuls
except Exception as e :
log . out ( " timetable._insertToDb: Error inserting timetable data " , " ERR " )
log . out ( f " timteable._insertToDb: { e } " )
return False # If error inserting timetable
2023-05-31 19:04:33 +01:00
2023-05-30 23:03:16 +01:00
def _helpParseStops ( schedule_segment ) :
2023-05-31 19:04:33 +01:00
stops = [ ]
for i in schedule_segment :
timing_point = { }
public_departure = i . get ( " public_departure " )
wtt_departure = i . get ( " departure " )
public_arrival = i . get ( " public_arrival " )
wtt_arrival = i . get ( " arrival " )
tiploc_code = i . get ( " tiploc_code " )
isPublic = False
if public_departure and len ( public_departure ) == 4 and public_departure . isdigit ( ) :
isPublic = True
timing_point [ ' publicDeparture ' ] = public_departure
if public_arrival and len ( public_arrival ) == 4 and public_arrival . isdigit ( ) :
isPublic = True
timing_point [ ' publicArrival ' ] = public_arrival
if wtt_departure :
timing_point [ ' wttDeparture ' ] = wtt_departure
if wtt_arrival :
timing_point [ ' wttArrival ' ] = wtt_arrival
timing_point [ ' isPublic ' ] = isPublic
timing_point [ ' tiploc ' ] = tiploc_code
stops . append ( timing_point )
return stops
2023-05-30 23:03:16 +01:00
def _helpParseDays ( string ) :
# Incoming string contains seven numbers, each number from 0-6 representing days Mon-Sun
daysList = [ " m " , " t " , " w " , " th " , " f " , " s " , " su " ]
selectedDays = [ daysList [ i ] for i , value in enumerate ( string ) if value == " 1 " ]
return selectedDays
2023-05-31 22:09:09 +01:00
def _helpParseDate ( string : str , time : str = " false " ) :
2023-05-30 23:03:16 +01:00
# Incoming string contains date in format %Y-%m-%d, if the time signified end of schedule,
# append 23:59:59 to the time, else append 00:00:00 to the string.
if time == " end " :
string + = " 235959 "
else :
string + = " 000000 "
return datetime . strptime ( string , " % Y- % m- %d % H % M % S " )
2023-05-31 19:04:33 +01:00
def _removeOutdatedServices ( ) :
2023-06-01 09:07:57 +01:00
log . out ( " timetable._removeOutdatedServices: Removing out of date schedules " , " INFO " )
2023-06-05 20:55:59 +01:00
pre_count = mongo . getLength ( " timetable " )
2023-06-01 09:07:57 +01:00
query = {
2023-06-07 21:14:49 +01:00
" scheduleEndDate " : {
" $lt " : now
2023-06-01 09:07:57 +01:00
}
}
2023-06-05 20:55:59 +01:00
mongo . deleteMany ( " timetable " , query )
post_count = mongo . getLength ( " timetable " )
log . out ( f " timetable._removeOutdatedServices: Removed { pre_count - post_count } out of date services " , " DBUG " )