# db-manager - Builds and manages an OwlBoard database instance - To be run on a # cron schedule # Copyright (C) 2023 Frederick Boniface # This program is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more details. # You should have received a copy of the GNU General Public License along with this # program. If not, see # https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE #Imports import os import requests import logger as log import zlib import json import mongo import time from datetime import datetime # This module downloads a single TOCs Schedule data TOC_Code = "EF" # Business code for GWR fullDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_FULL_DAILY&day=toc-full" #updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_UPDATE_DAILY&day=toc-update-{day}" CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER') CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS') # Determine state of current Timetable Database def isUpdateRequired(): timetableLength = mongo.getLength("timetable") log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents") timetableUpdateDate = mongo.metaUpdateTime("timetable") if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800): log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild") return "full" if (int(time.time()) > (timetableUpdateDate + 86400)): log.out(f"timetable.isUpdateRequired: timetable collection requires update") return "update" return False def getTimetable(full = False): downloadUrl = fullDataUrl if full else updateDataUrl response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS)) mongo.incrementCounter("schedule_api") return zlib.decompress(response.content, 16+zlib.MAX_WBITS) def loopTimetable(data): listify = data.splitlines() documents = [] for item in listify: dic = json.loads(item) if ('JsonTimetableV1' in dic): timestamp = dic['JsonTimetableV1']['timestamp'] sequence = dic['JsonTimetableV1']['Metadata']['sequence'] detail = {timestamp: timestamp, sequence: sequence} # Do something with this data here # Check if timestamp and sequence are correct, if not trigger a full download elif ('TiplocV1' in dic): print("Disregarding TIPLOC Data") # Not used as TIPLOCs etc. are sourced from CORPUS elif ('JsonAssociationV1' in dic): print("JsonAssociationData") # Associates trains with eachother - not planning to use yet. elif ('JsonScheduleV1' in dic): document = insertSchedule(dic) documents.append(document) return documents def runUpdate(): required = isUpdateRequired() if (required == "full"): log.out("timetable.runUpdate: Fetching full timetable data") data = getTimetable(full = True) elif (required == "update"): log.out("timetable.runUpdate: Fetching todays timetable update") data = getTimetable() else: log.out("timetable.runUpdate: timetable update is not needed") return "done" parsed = loopTimetable(data) mongo.putTimetable(parsed) ## Check what happens if there is no update def insertSchedule(sch_record): schedule = sch_record['JsonScheduleV1'] scheduleId = schedule['CIF_train_uid'] transactionType = schedule['transaction_type'] if ('schedule_start_date' in sch_record): scheduleStart = _helpParseDate(sch_record['schedule_start_date']) else: now = datetime.now() scheduleStart = now.replace(hour=0,minute=0,second=0,microsecond=0) document = { 'stpIndicator': schedule['CIF_stp_indicator'], 'trainUid': scheduleId, 'headcode': schedule['schedule_segment']['signalling_id'], 'powerType': schedule['schedule_segment']['CIF_power_type'], 'planSpeed': schedule['schedule_segment']['CIF_speed'], 'scheduleStartDate': scheduleStart, 'scheduleEndDate': _helpParseDate(schedule['schedule_end_date'], "end"), 'daysRun': _helpParseDays(schedule['schedule_days_runs']) } passengerStops = [] if ('schedule_location' in schedule['schedule_segment']): stops = _helpParseStops(schedule['schedule_segment']['schedule_location']) document['stops'] = stops return document def _helpParseStops(schedule_segment): return def _helpParseDays(string): # Incoming string contains seven numbers, each number from 0-6 representing days Mon-Sun daysList = ["m", "t", "w", "th", "f", "s", "su"] selectedDays = [daysList[i] for i, value in enumerate(string) if value == "1"] return selectedDays def _helpParseDate(string, time): # Incoming string contains date in format %Y-%m-%d, if the time signified end of schedule, # append 23:59:59 to the time, else append 00:00:00 to the string. if time == "end": string += " 235959" else: string += " 000000" return datetime.strptime(string, "%Y-%m-%d %H%M%S") # Proposed Document Schema: # { # stp_indicator: "O", # train_uid: "C07284" # atoc_code: "GW" # schedule_days_runs: [] # schedule_end_date: "2023-06-02" # headcode: "5G30" # power_type: "DMU" # speed: "090" # catering_code: null # service_branding: "" # passenger_stops: [ # { # 'tiploc': "TIPLOC", # 'pb_arr': "PublicArrival", # 'pb_dep': "PublicDepartr" # } # ] ### CURRENT STATE: loopTimetable and insertSchedule builds the data into ### a suitable format to send to Mongo, there needs to be logic around ### the transaction_type. Parsinghelper funtions implemented to keep code tidy ### Stops need parsing # Function Usage Map => # runUpdate() => # isUpdateRequired() # loopTimetable() => # insertSchedule() # Will then need to insert into database