Add timetable data

This commit is contained in:
Fred Boniface 2023-05-31 19:04:33 +01:00
parent 467651c45f
commit 3d111e2563
6 changed files with 149 additions and 56 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
run.sh run.sh
env
# ---> Python # ---> Python
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files

View File

@ -1,4 +1,8 @@
from datetime import datetime from datetime import datetime
def out(msg, level = "OTHR"): def out(msg :str, level :str = "OTHR"):
print(datetime.now().strftime("%m/%d/%Y, %H:%M:%S") + ": " + level + ": " + msg) logline :str = f'{datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}: {level}: {msg}'
print(logline)
tmpfile = "dbman-log"
with open(tmpfile, 'a') as logfile:
logfile.write(f'{logline}\n')

57
src/mailer.py Normal file
View File

@ -0,0 +1,57 @@
# db-manager - Builds and manages an OwlBoard database instance - To be run on a
# cron schedule
# Copyright (C) 2023 Frederick Boniface
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
import smtplib, ssl, os
def submitLogs():
text :str = fetchLogs()
sendMail(text)
def fetchLogs():
with open("dbman-log", "r") as tmpfile:
return tmpfile.read()
def deleteLogs():
if os.path.exists("dbman-log"):
os.remove("dbman-log")
print("Tidied log file")
else:
print("No logfile to tidy")
def sendMail(messageBody :str):
smtpHost = os.getenv("OWL_EML_HOST")
smtpPort = os.getenv("OWL_EML_PORT")
smtpUser = os.getenv("OWL_EML_USER")
smtpPass = os.getenv("OWL_EML_PASS")
smtpFrom = os.getenv("OWL_EML_FROM")
context = ssl.create_default_context()
message = f"""Subject: OwlBoard-dbman-logs
{messageBody}"""
try:
server = smtplib.SMTP(smtpHost,smtpPort)
server.ehlo()
server.starttls(context=context) # Secure the connection
server.ehlo()
server.login(smtpUser, smtpPass)
server.sendmail(smtpFrom, "server-notification-receipt@fjla.uk", message)
except Exception as e:
# Print any error messages to stdout
print(e)
finally:
server.quit()
deleteLogs()

View File

@ -14,7 +14,7 @@
# program. If not, see # program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE # https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
version = "2023.5.6" version = "2023.5.7"
print(f"main.py: Initialising db-manager v{version}") print(f"main.py: Initialising db-manager v{version}")
#Third Party Imports #Third Party Imports
@ -22,7 +22,7 @@ import os
import time import time
#Local Imports #Local Imports
import corpus, mongo, pis import corpus, mongo, pis, mailer, timetable
import logger as log import logger as log
log.out("main.py: db-manager Initialised", "INFO") log.out("main.py: db-manager Initialised", "INFO")
@ -71,6 +71,9 @@ if pisAge > 43200: # Temporarily set to 15 minutes
else: else:
log.out('main.py: Not updating PIS data until is it 1036800s old', "INFO") log.out('main.py: Not updating PIS data until is it 1036800s old', "INFO")
## Run Timetable Update
timetable.runUpdate()
log.out('main.py: Requesting TTL Index Creation', "INFO") log.out('main.py: Requesting TTL Index Creation', "INFO")
mongo.createTtlIndex("users", "atime", 2629800) mongo.createTtlIndex("users", "atime", 2629800)
mongo.createTtlIndex("registrations", "time", 1800) mongo.createTtlIndex("registrations", "time", 1800)
@ -80,3 +83,5 @@ mongo.putVersion(version)
# END # END
log.out(f"main.py: db-manager v{version} Complete", "INFO") log.out(f"main.py: db-manager v{version} Complete", "INFO")
log.out(f"main.py: Mailing logs")
mailer.submitLogs()

View File

@ -11,7 +11,7 @@ db_user = urllib.parse.quote_plus(os.getenv('OWL_DB_USER', "owl"))
db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS', "twittwoo")) db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS', "twittwoo"))
db_name = os.getenv('OWL_DB_NAME', "owlboard") db_name = os.getenv('OWL_DB_NAME', "owlboard")
log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "INFO") log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "DBUG")
client = MongoClient(f"mongodb://{db_user}:{db_pass}@{db_host}:{db_port}") client = MongoClient(f"mongodb://{db_user}:{db_pass}@{db_host}:{db_port}")
db = client[db_name] db = client[db_name]
@ -23,12 +23,12 @@ def metaCheckTime(target):
if 'updated' in res: if 'updated' in res:
log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO") log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO")
return res["updated"] return res["updated"]
log.out(f'mongo.metaUpdatetime: {target} does not exist', "INFO") log.out(f'mongo.metaUpdatetime: {target} does not exist', "EROR")
return 0 return 0
def metaUpdateTime(target): def metaUpdateTime(target):
col = db["meta"] col = db["meta"]
log.out(f'mongo.metaUpdateTime: Updating updated time for {target}', "INFO") log.out(f'mongo.metaUpdateTime: Updating updated time for {target}', "DBUG")
res = col.update_one({"target": target, "type":"collection"}, {"$set":{"updated": int(time.time()),"target":target, "type":"collection"}}, upsert=True) res = col.update_one({"target": target, "type":"collection"}, {"$set":{"updated": int(time.time()),"target":target, "type":"collection"}}, upsert=True)
incrementCounter("meta") incrementCounter("meta")
@ -137,3 +137,12 @@ def putTimetable(data):
collection = "timetable" collection = "timetable"
col = db[collection] col = db[collection]
res = col.insert_many(data) res = col.insert_many(data)
def dropCollection(collection):
col = db[collection]
res = col.drop()
def deleteTimetableData(query):
collection = "timetable"
col = db[collection]
res = col.delete_one(query)

View File

@ -22,37 +22,42 @@ import zlib
import json import json
import mongo import mongo
import time import time
from datetime import datetime from datetime import datetime, timedelta
# This module downloads a single TOCs Schedule data # This module downloads a single TOCs Schedule data
now = datetime.now()
yesterday = now - timedelta(days=1)
yesterdayDay = yesterday.strftime("%a").lower()
TOC_Code = "EF" # Business code for GWR TOC_Code = "EF" # Business code for GWR
fullDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_FULL_DAILY&day=toc-full" fullDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_FULL_DAILY&day=toc-full"
#updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_UPDATE_DAILY&day=toc-update-{day}" updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_UPDATE_DAILY&day=toc-update-{yesterdayDay}"
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER') CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS') CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
# Determine state of current Timetable Database # Determine state of current Timetable Database
def isUpdateRequired(): def isUpdateRequired():
timetableLength = mongo.getLength("timetable") timetableLength = mongo.getLength("timetable")
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents") log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents", "DBUG")
timetableUpdateDate = mongo.metaUpdateTime("timetable") timetableUpdateDate = mongo.metaCheckTime("timetable")
log.out(f"timetable.isUpdateRequired: Timetable last updated at {timetableUpdateDate}", "INFO")
if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800): if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800):
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild") log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild", "INFO")
return "full" return "full"
if (int(time.time()) > (timetableUpdateDate + 86400)): if (int(time.time()) > (timetableUpdateDate + 86400)):
log.out(f"timetable.isUpdateRequired: timetable collection requires update") log.out(f"timetable.isUpdateRequired: timetable collection requires update", "INFO")
return "update" return "update"
return False return False
def getTimetable(full = False): def getTimetable(full :bool = False):
downloadUrl = fullDataUrl if full else updateDataUrl downloadUrl :str = fullDataUrl if full else updateDataUrl
response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS)) response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS))
mongo.incrementCounter("schedule_api") mongo.incrementCounter("schedule_api")
log.out(f"timetable.getTimetable: Fetch (Full:{full}) response: {response.status_code}", "DBUG")
return zlib.decompress(response.content, 16+zlib.MAX_WBITS) return zlib.decompress(response.content, 16+zlib.MAX_WBITS)
def loopTimetable(data): def loopTimetable(data):
listify = data.splitlines() listify = data.splitlines()
documents = [] documents :list = []
for item in listify: for item in listify:
dic = json.loads(item) dic = json.loads(item)
if ('JsonTimetableV1' in dic): if ('JsonTimetableV1' in dic):
@ -75,16 +80,19 @@ def loopTimetable(data):
def runUpdate(): def runUpdate():
required = isUpdateRequired() required = isUpdateRequired()
if (required == "full"): if (required == "full"):
log.out("timetable.runUpdate: Fetching full timetable data") log.out("timetable.runUpdate: Fetching full timetable data", "INFO")
data = getTimetable(full = True) data = getTimetable(full = True)
elif (required == "update"): elif (required == "update"):
log.out("timetable.runUpdate: Fetching todays timetable update") log.out("timetable.runUpdate: Fetching todays timetable update", "INFO")
data = getTimetable() data = getTimetable()
else: else:
log.out("timetable.runUpdate: timetable update is not needed") log.out("timetable.runUpdate: timetable update is not needed", "INFO")
return "done" return "done"
parsed = loopTimetable(data) parsed = loopTimetable(data)
mongo.putTimetable(parsed) status = _insertToDb(parsed, required)
if (status):
mongo.metaUpdateTime("timetable")
## Check what happens if there is no update ## Check what happens if there is no update
def insertSchedule(sch_record): def insertSchedule(sch_record):
@ -97,6 +105,7 @@ def insertSchedule(sch_record):
now = datetime.now() now = datetime.now()
scheduleStart = now.replace(hour=0,minute=0,second=0,microsecond=0) scheduleStart = now.replace(hour=0,minute=0,second=0,microsecond=0)
document = { document = {
'transactionType': schedule['transaction_type'],
'stpIndicator': schedule['CIF_stp_indicator'], 'stpIndicator': schedule['CIF_stp_indicator'],
'trainUid': scheduleId, 'trainUid': scheduleId,
'headcode': schedule['schedule_segment']['signalling_id'], 'headcode': schedule['schedule_segment']['signalling_id'],
@ -106,14 +115,51 @@ def insertSchedule(sch_record):
'scheduleEndDate': _helpParseDate(schedule['schedule_end_date'], "end"), 'scheduleEndDate': _helpParseDate(schedule['schedule_end_date'], "end"),
'daysRun': _helpParseDays(schedule['schedule_days_runs']) 'daysRun': _helpParseDays(schedule['schedule_days_runs'])
} }
passengerStops = []
if ('schedule_location' in schedule['schedule_segment']): if ('schedule_location' in schedule['schedule_segment']):
stops = _helpParseStops(schedule['schedule_segment']['schedule_location']) stops = _helpParseStops(schedule['schedule_segment']['schedule_location'])
else:
stops = []
document['stops'] = stops document['stops'] = stops
return document return document
def _insertToDb(data :list, type :str):
if type == "full":
mongo.dropCollection("timetable")
mongo.putTimetable(data)
mongo.createSingleIndex("timetable", "headcode")
elif type == "update":
for item in data:
if item['transactionType'] == "Create":
singleList = [item]
mongo.putTimetable(singleList)
elif item['transactionType'] == "Delete":
mongo.deleteTimetableData({'trainUid': item.trainUid})
return True #If Successful else False
def _helpParseStops(schedule_segment): def _helpParseStops(schedule_segment):
return stops = []
for i in schedule_segment:
timing_point = {}
public_departure = i.get("public_departure")
wtt_departure = i.get("departure")
public_arrival = i.get("public_arrival")
wtt_arrival = i.get("arrival")
tiploc_code = i.get("tiploc_code")
isPublic = False
if public_departure and len(public_departure) == 4 and public_departure.isdigit():
isPublic = True
timing_point['publicDeparture'] = public_departure
if public_arrival and len(public_arrival) == 4 and public_arrival.isdigit():
isPublic = True
timing_point['publicArrival'] = public_arrival
if wtt_departure:
timing_point['wttDeparture'] = wtt_departure
if wtt_arrival:
timing_point['wttArrival'] = wtt_arrival
timing_point['isPublic'] = isPublic
timing_point['tiploc'] = tiploc_code
stops.append(timing_point)
return stops
def _helpParseDays(string): def _helpParseDays(string):
# Incoming string contains seven numbers, each number from 0-6 representing days Mon-Sun # Incoming string contains seven numbers, each number from 0-6 representing days Mon-Sun
@ -130,35 +176,6 @@ def _helpParseDate(string, time):
string += " 000000" string += " 000000"
return datetime.strptime(string, "%Y-%m-%d %H%M%S") return datetime.strptime(string, "%Y-%m-%d %H%M%S")
# Proposed Document Schema: def _removeOutdatedServices():
# { #Remove services with an end date before today.
# stp_indicator: "O", return
# train_uid: "C07284"
# atoc_code: "GW"
# schedule_days_runs: []
# schedule_end_date: "2023-06-02"
# headcode: "5G30"
# power_type: "DMU"
# speed: "090"
# catering_code: null
# service_branding: ""
# passenger_stops: [
# {
# 'tiploc': "TIPLOC",
# 'pb_arr': "PublicArrival",
# 'pb_dep': "PublicDepartr"
# }
# ]
### CURRENT STATE: loopTimetable and insertSchedule builds the data into
### a suitable format to send to Mongo, there needs to be logic around
### the transaction_type. Parsinghelper funtions implemented to keep code tidy
### Stops need parsing
# Function Usage Map =>
# runUpdate() =>
# isUpdateRequired()
# loopTimetable() =>
# insertSchedule()
# Will then need to insert into database