Improve modularity of code and readability of logs

This commit is contained in:
Fred Boniface 2023-06-06 12:59:44 +01:00
parent 8ef0585916
commit 88fe86d56a
6 changed files with 49 additions and 91 deletions

View File

@ -4,7 +4,8 @@ import requests
import logger as log
import zlib
import json
import mongo
import datetime
import mongo, helpers
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
@ -14,6 +15,33 @@ CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
log.out("corpus.py: CORPUS Module Loaded", "DBUG")
def runUpdate():
required = isUpdateRequired()
if required:
corpus_data = fetch()
prepared_corpus = removeEmpty(corpus_data)
prepared_stations = onlyStations(prepared_corpus)
corpus_indexes = ["3ALPHA", "NLC"]
mongo.dropCollection("corpus")
mongo.putMany("corpus", prepared_corpus, corpus_indexes)
stations_indexes = ["3ALPHA", "STANOX", "TIPLOC"]
mongo.dropCollection("stations")
mongo.putMany("stations", prepared_stations, stations_indexes)
return
return
def isUpdateRequired():
update_time = mongo.metaCheckTime("corpus")
age = helpers.getAgeInSeconds(update_time)
readable_age = str(datetime.timedelta(seconds=age))
log.out(f"corpus.isUpdateRequired: CORPUS data is {readable_age} seconds old.", "INFO")
if age > helpers.two_weeks_in_seconds:
log.out("corpus.isUpdateRequired: CORPUS data required update", "INFO")
return True
log.out("corpus.isUpdateRequired: CORPUS data does not need updating", "INFO")
return False
def fetch():
log.out("corpus.fetch: Fetching CORPUS Data from Network Rail", "INFO")
response = requests.get(CORPUS_URL, auth=(CORPUS_USER, CORPUS_PASS))

View File

@ -1,5 +1,8 @@
import time
one_day_in_seconds = 84600
two_weeks_in_seconds = 1209600
def getAgeInSeconds(updateTimeInSeconds :int):
now = int(time.time())
ageInSeconds :int = now - updateTimeInSeconds

View File

@ -14,7 +14,7 @@
# program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
version = "2023.6.6"
version = "2023.6.8"
print(f"main.py: Initialising db-manager v{version}")
#Third Party Imports
@ -23,7 +23,7 @@ import time
# Import logger
import logger as log
log.out(f"main.py: db-manager {version} Initialised", "INFO")
log.out(f"main.py: db-manager {version} Initialised on host {os.uname()[1]}", "INFO")
#Local Imports
import corpus, mongo, pis, mailer, timetable
@ -38,27 +38,8 @@ while dbReady is False:
except:
dbReady = False
#Check & Update corpus/stations:
# If older than 12 days then update
corpusAge = int(time.time()) - mongo.metaCheckTime("corpus")
log.out(f'main.py: Corpus is {corpusAge}s old', "INFO")
if corpusAge > 1036800:
log.out('main.py: Updating CORPUS data', "INFO")
corpusData = corpus.removeEmpty(corpus.fetch())
mongo.putBulkCorpus(corpusData)
else:
log.out('main.py: Not updating CORPUS data until it is 1036800s old.', "INFO")
stationsAge = int(time.time()) - mongo.metaCheckTime("stations")
log.out(f'main.py: Stations is {stationsAge}s old', "INFO")
# While the source of stations data is CORPUS, this statement is based on corpusAge, when/if changing the source, it should be changed to use stationsAge
# if stationsAge is used now, there could be a situation where stationsAge tries to update but fails as corpusData doesn't exist.
if corpusAge > 1036800:
log.out('main.py: Updating stations data', "INFO")
stationData = corpus.onlyStations(corpusData)
mongo.putBulkStations(stationData)
else:
log.out('main.py: Not updating stations data until it is 1036800s old.', "INFO")
## Run CORPUS Update
status = corpus.runUpdate()
## Run PIS Update
pis.runUpdate()
@ -66,6 +47,7 @@ pis.runUpdate()
## Run Timetable Update
timetable.runUpdate()
## Create general indexes
log.out('main.py: Requesting TTL Index Creation', "INFO")
mongo.createTtlIndex("users", "atime", 2629800)
mongo.createTtlIndex("registrations", "time", 1800)
@ -73,7 +55,6 @@ mongo.createTtlIndex("registrations", "time", 1800)
# Push version number to database for reporting
mongo.putVersion(version)
# END
log.out(f"main.py: db-manager v{version} Complete", "INFO")
log.out(f"main.py: Mailing logs")
mailer.submitLogs()

View File

@ -1,6 +1,6 @@
import os
from pymongo import MongoClient
import time
import time, datetime
import urllib.parse
import logger as log
@ -22,7 +22,8 @@ def metaCheckTime(target):
incrementCounter("meta")
if type(res) is dict:
if 'updated' in res:
log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO")
readable_datetime = datetime.datetime.fromtimestamp(res["updated"])
log.out(f'mongo.metaUpdateTime: {target} last updated at {readable_datetime}', "INFO")
return res["updated"]
log.out(f'mongo.metaUpdatetime: {target} does not exist', "EROR")
return 0
@ -50,67 +51,14 @@ def createTtlIndex(collection, field, time):
col.create_index(field, expireAfterSeconds = time)
log.out(f'mongo.createTtlIndex: Created TTL Index of {field} in {collection} to expire after {time} seconds', "INFO")
def putBulkCorpus(data):
collection = "corpus"
startCount = getLength(collection)
col = db[collection]
incrementCounter(collection)
if startCount > 0:
log.out(f'mongo.putBulkCorpus: Dropping {startCount} CORPUS documents', "INFO")
col.drop()
col.insert_many(data)
endCount = getLength(collection)
log.out(f'mongo.putBulkCorpus: {endCount} documents inserted', "INFO")
log.out(f'mongo.putBulkCorpus: {endCount - startCount} new documents', "INFO")
log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
createSingleIndex(collection, "NLC")
createSingleIndex(collection, "3ALPHA")
log.out('mongo.putBulkCorpus: Updating meta time',"INFO")
metaUpdateTime(collection)
return
def putBulkStations(data):
collection = "stations"
startCount = getLength(collection)
col = db[collection]
incrementCounter("stations")
if startCount > 0:
log.out(f'mongo.putBulkStations: Dropping {startCount} station documents', "INFO")
col.drop()
col.insert_many(data)
endCount = getLength(collection)
log.out(f'mongo.putBulkStations: {endCount} documents inserted', "INFO")
log.out(f'mongo.putBulkStations: {endCount - startCount} new documents', "INFO")
log.out('mongo.putBulkStations: Building collection indexes',"INFO")
createSingleIndex(collection, "3ALPHA")
createSingleIndex(collection, "STANOX")
createSingleIndex(collection, "TIPLOC")
log.out('mongo.putBulkStations: Updating meta time',"INFO")
metaUpdateTime(collection)
return
def putBulkPis(data):
collection = "pis"
startCount = getLength(collection)
col = db[collection]
incrementCounter(collection)
if startCount > 0:
log.out(f'mongo.putBulkPid: Dropping {startCount} pis documents', "INFO")
col.drop()
col.insert_many(data)
endCount = getLength(collection)
log.out(f'mongo.putBulkPis: {endCount} documents inserted', "INFO")
log.out(f'mongo.putBulkPis: {endCount-startCount} new documents', "INFO")
log.out('mongo.putBulkPis: Updating meta time', "INFO")
metaUpdateTime(collection)
return
def putMany(collection :str, data :list):
def putMany(collection :str, data :list, indexed_fields :list = []):
log.out(f"mongo.putMany: Inserting many documents to: {collection}")
col = db[collection]
incrementCounter(collection)
col.insert_many(data)
metaUpdateTime(collection)
for item in indexed_fields:
createSingleIndex(collection, item)
def incrementCounter(target):

View File

@ -15,10 +15,9 @@ def runUpdate():
log.out(f"pis.runUpdate: Update required", "INFO")
pis_data = load()
pis_parsed = parse(pis_data)
pis_indexes = ["stops", "tiplocs"]
mongo.dropCollection("pis")
mongo.putMany("pis", pis_parsed)
mongo.createSingleIndex("pis", "stops")
mongo.createSingleIndex("pis", "tiplocs")
mongo.putMany("pis", pis_parsed, pis_indexes)
def requiresUpdate():
if REBUILD:
@ -42,7 +41,7 @@ def load():
pis = yaml.safe_load(data)
return pis["pis"]
except yaml.YAMLError as exc:
print(exc)
log.out(f"pis.load: Error loading YAML: {exc}", "EROR")
return exc
def parse(codeList):
@ -51,7 +50,6 @@ def parse(codeList):
log.out(f"pis.parse: Removing duplicate codes & adding TIPLOCs")
for i in codeList:
stops = i['stops']
print(stops)
code = i['code']
for ii in codeList:
if stops == ii['stops'] and code != ii['code']:
@ -63,10 +61,9 @@ def parse(codeList):
# show 0 results.
tiplocs = []
for iii in stops:
print(iii)
tiplocs.append(getTiploc(iii))
i['tiplocs'] = tiplocs
print(f"pis.parse: Removed {StartLen - len(codeList)} duplicates")
log.out(f"pis.parse: Removed {StartLen - len(codeList)} duplicates", "INFO")
return codeList
def getTiploc(crs :str):

View File

@ -49,8 +49,9 @@ def isUpdateRequired():
timetableLength = mongo.getLength("timetable")
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents", "DBUG")
timetableUpdateTime = mongo.metaCheckTime("timetable")
log.out(f"timetable.isUpdateRequired: Timetable last updated at {timetableUpdateTime}", "INFO")
timetableDataAge = helpers.getAgeInSeconds(timetableUpdateTime)
readable_age = str(timedelta(seconds=timetableDataAge))
log.out(f"timetable.isUpdateRequired: Timetable data age: {readable_age}", "INFO")
if (timetableDataAge >= twoDayinSecs and isAfter0800) or REBUILD:
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild", "INFO")
return "full"