PIS Update based on hash rather than time

This commit is contained in:
Fred Boniface 2023-05-31 22:09:09 +01:00
parent 71dbf5df52
commit 7122233ac3
8 changed files with 93 additions and 48 deletions

View File

@ -4,6 +4,7 @@ Dockerfile
.dockerignore
.gitignore
LICENSE
dbman-log
# ---> Python
# Byte-compiled / optimized / DLL files

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
run.sh
env
dbman-log
# ---> Python
# Byte-compiled / optimized / DLL files

View File

@ -9,10 +9,11 @@ import mongo
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
#Fetch Configuration
log.out("corpus.py: Fetching CORPUS Configuration", "INFO")
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
log.out("corpus.py: CORPUS Module Loaded", "DBUG")
def fetch():
log.out("corpus.fetch: Fetching CORPUS Data from Network Rail", "INFO")
response = requests.get(CORPUS_URL, auth=(CORPUS_USER, CORPUS_PASS))

View File

@ -15,6 +15,9 @@
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
import smtplib, ssl, os
import logger as log
log.out("mailer.py: Mailer module loaded", "DBUG")
def submitLogs():
text :str = fetchLogs()

View File

@ -14,21 +14,21 @@
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
version = "2023.5.7"
version = "2023.5.9"
print(f"main.py: Initialising db-manager v{version}")
#Third Party Imports
import os
import time
# Import logger
import logger as log
log.out(f"main.py: db-manager {version} Initialised", "INFO")
#Local Imports
import corpus, mongo, pis, mailer, timetable
import logger as log
log.out("main.py: db-manager Initialised", "INFO")
#Ensure count document exists in meta, wrap in while look to prevent crashing if the DB is not ready:
# Ensure count document exists in meta, wrap in while look to prevent crashing if the DB is not ready:
dbReady = False
while dbReady is False:
try:
@ -59,17 +59,8 @@ if corpusAge > 1036800:
else:
log.out('main.py: Not updating stations data until it is 1036800s old.', "INFO")
#Check & Update pis data:
# If older than 2 days then update
pisAge = int(time.time()) - mongo.metaCheckTime("pis")
log.out(f'main.py: PIS Data is {pisAge}s old', "INFO")
if pisAge > 43200: # Temporarily set to 15 minutes
log.out('main.py: Updating PIS data', "INFO")
pisData = pis.load()
pisParsed = pis.parse(pisData)
mongo.putBulkPis(pisParsed)
else:
log.out('main.py: Not updating PIS data until is it 1036800s old', "INFO")
## Run PIS Update
pis.runUpdate()
## Run Timetable Update
timetable.runUpdate()

View File

@ -4,17 +4,18 @@ import time
import urllib.parse
import logger as log
log.out("mongo.py: Fetching configuration", "INFO")
db_host = os.getenv('OWL_DB_HOST', 'localhost')
db_port = os.getenv('OWL_DB_PORT', 27017)
db_user = urllib.parse.quote_plus(os.getenv('OWL_DB_USER', "owl"))
db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS', "twittwoo"))
db_name = os.getenv('OWL_DB_NAME', "owlboard")
log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "DBUG")
log.out(f"mongo.py: Database URI: {db_host}:{db_port}", "DBUG")
client = MongoClient(f"mongodb://{db_user}:{db_pass}@{db_host}:{db_port}")
db = client[db_name]
log.out("mongo.py: Mongo module loaded", "DBUG")
def metaCheckTime(target):
col = db["meta"]
res = col.find_one({"target": target, "type": "collection"})
@ -104,6 +105,13 @@ def putBulkPis(data):
metaUpdateTime(collection)
return
def putMany(collection :str, data :list):
log.out(f"mongo.putMany: Inserting many documents to: {collection}")
col = db[collection]
incrementCounter(collection)
col.insert_many(data)
metaUpdateTime(collection)
def incrementCounter(target):
collection = "meta"
@ -116,7 +124,6 @@ def metaCounters():
collection = "meta"
col = db[collection]
res = col.find_one({"target": "counters","type": "count"})
log.out(f'mongo.metaCounters: Query returned `{res}`', "DEBG")
if type(res) is dict:
if 'since' in res:
log.out('mongo.metaCounters: counters already exists, skipping', "INFO")
@ -139,10 +146,39 @@ def putTimetable(data):
res = col.insert_many(data)
def dropCollection(collection):
log.out(f"mongo.dropCollection: Dropping collection '{collection}'")
col = db[collection]
res = col.drop()
def deleteTimetableData(query):
collection = "timetable"
col = db[collection]
res = col.delete_one(query)
res = col.delete_one(query)
def getMetaHash(target):
collection = "meta"
col = db[collection]
res = col.find_one({"target": target, "type": "collection"})
incrementCounter("meta")
if type(res) is dict:
if 'updated' in res:
try:
log.out(f'mongo.metaGetHash: {target} hash is {res["hash"]}', 'INFO')
return res["hash"]
except:
return None
return None
def putMetaHash(target :str, hash :str):
collection = "meta"
col = db[collection]
filter = {
"target": target,
"type": "collection"
}
update = {
"target": target,
"type": "collection",
"hash": hash
}
res = col.update_one(filter, {"$set": update}, upsert=True)

View File

@ -1,9 +1,34 @@
import yaml
import yaml, hashlib
import logger as log
import mongo
print("PIS Module imported")
print("pis.py: PIS Module Loaded", "DBUG")
file_location :str = "/app/data/pis/gwr.yaml"
def runUpdate():
if (not requiresUpdate()):
log.out('pis.runUpdate: PIS Codes do not need updating', 'INFO')
return
log.out(f"pis.runUpdate: Update required", "INFO")
pis_data = load()
pis_parsed = parse(pis_data)
mongo.dropCollection("pis")
mongo.putMany("pis", pis_parsed)
def requiresUpdate():
currentHash = mongo.getMetaHash("pis")
with open(file_location, "r") as f:
text = f.read()
newHash = hashlib.md5(text.encode()).hexdigest()
if currentHash is None or newHash != currentHash:
log.out(f"pis.requiresUpdate: currentHash: {currentHash}, newHash: {newHash}", "INFO")
mongo.putMetaHash("pis", newHash)
return True
mongo.putMetaHash("pis", newHash)
return False
def load(): # Programatically add a `toc` field to each entry.
with open("/app/data/pis/gwr.yaml", "r") as data:
with open(file_location, "r") as data:
try:
pis = yaml.safe_load(data)
print(pis)
@ -12,9 +37,6 @@ def load(): # Programatically add a `toc` field to each entry.
print(exc)
return exc
## Do some magic here so that if any pis["pis"]["stops"][0] field contains 'reverse' then get the stops for the code stored in pis["pis"]["stops"][1]
## reverse the stops and store that.
def parse(codeList):
StartLen = len(codeList)
print(f"pis.parse: codeList starting length: {StartLen}")
@ -26,18 +48,4 @@ def parse(codeList):
print(f"Identical stopping pattern found: {ii['code']}")
codeList.remove(ii)
print(f"pis.parse: Removed {StartLen - len(codeList)} duplicates")
return codeList
def devLoad(): # Programatically add a `toc` field to each entry.
with open("/home/fred.boniface/git/owlboard/db-manager/data/pis/gwr.yaml", "r") as data:
try:
pis = yaml.safe_load(data)
print(pis)
return pis["pis"]
except yaml.YAMLError as exc:
print(exc)
return exc
def dev():
data = devLoad()
parse(data)
return codeList

View File

@ -14,6 +14,8 @@
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
REBUILD :bool = False ## Set to true to rebuild database
#Imports
import os
import requests
@ -27,20 +29,22 @@ from datetime import datetime, timedelta
# This module downloads a single TOCs Schedule data
now = datetime.now()
yesterday = now - timedelta(days=1)
yesterdayDay = yesterday.strftime("%a").lower()
yesterdayDay = yesterday.strftime("%a").lower()
TOC_Code = "EF" # Business code for GWR
fullDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_FULL_DAILY&day=toc-full"
updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_{TOC_Code}_TOC_UPDATE_DAILY&day=toc-update-{yesterdayDay}"
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
log.out("timetable.py: Timetable module loaded", "DBUG")
# Determine state of current Timetable Database
def isUpdateRequired():
timetableLength = mongo.getLength("timetable")
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents", "DBUG")
timetableUpdateDate = mongo.metaCheckTime("timetable")
log.out(f"timetable.isUpdateRequired: Timetable last updated at {timetableUpdateDate}", "INFO")
if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800):
if (not timetableLength or int(time.time()) > timetableUpdateDate + 172800 or REBUILD):
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild", "INFO")
return "full"
if (int(time.time()) > (timetableUpdateDate + 86400)):
@ -99,8 +103,8 @@ def insertSchedule(sch_record):
schedule = sch_record['JsonScheduleV1']
scheduleId = schedule['CIF_train_uid']
transactionType = schedule['transaction_type']
if ('schedule_start_date' in sch_record):
scheduleStart = _helpParseDate(sch_record['schedule_start_date'])
if ('schedule_start_date' in schedule):
scheduleStart = _helpParseDate(schedule['schedule_start_date'])
else:
now = datetime.now()
scheduleStart = now.replace(hour=0,minute=0,second=0,microsecond=0)
@ -167,7 +171,7 @@ def _helpParseDays(string):
selectedDays = [daysList[i] for i, value in enumerate(string) if value == "1"]
return selectedDays
def _helpParseDate(string, time):
def _helpParseDate(string :str, time :str = "false"):
# Incoming string contains date in format %Y-%m-%d, if the time signified end of schedule,
# append 23:59:59 to the time, else append 00:00:00 to the string.
if time == "end":