Improve modularity of code and readability of logs
This commit is contained in:
parent
8ef0585916
commit
88fe86d56a
@ -4,7 +4,8 @@ import requests
|
||||
import logger as log
|
||||
import zlib
|
||||
import json
|
||||
import mongo
|
||||
import datetime
|
||||
import mongo, helpers
|
||||
|
||||
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
|
||||
|
||||
@ -14,6 +15,33 @@ CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
|
||||
|
||||
log.out("corpus.py: CORPUS Module Loaded", "DBUG")
|
||||
|
||||
def runUpdate():
|
||||
required = isUpdateRequired()
|
||||
if required:
|
||||
corpus_data = fetch()
|
||||
prepared_corpus = removeEmpty(corpus_data)
|
||||
prepared_stations = onlyStations(prepared_corpus)
|
||||
corpus_indexes = ["3ALPHA", "NLC"]
|
||||
mongo.dropCollection("corpus")
|
||||
mongo.putMany("corpus", prepared_corpus, corpus_indexes)
|
||||
stations_indexes = ["3ALPHA", "STANOX", "TIPLOC"]
|
||||
mongo.dropCollection("stations")
|
||||
mongo.putMany("stations", prepared_stations, stations_indexes)
|
||||
return
|
||||
return
|
||||
|
||||
def isUpdateRequired():
|
||||
update_time = mongo.metaCheckTime("corpus")
|
||||
age = helpers.getAgeInSeconds(update_time)
|
||||
readable_age = str(datetime.timedelta(seconds=age))
|
||||
log.out(f"corpus.isUpdateRequired: CORPUS data is {readable_age} seconds old.", "INFO")
|
||||
if age > helpers.two_weeks_in_seconds:
|
||||
log.out("corpus.isUpdateRequired: CORPUS data required update", "INFO")
|
||||
return True
|
||||
log.out("corpus.isUpdateRequired: CORPUS data does not need updating", "INFO")
|
||||
return False
|
||||
|
||||
|
||||
def fetch():
|
||||
log.out("corpus.fetch: Fetching CORPUS Data from Network Rail", "INFO")
|
||||
response = requests.get(CORPUS_URL, auth=(CORPUS_USER, CORPUS_PASS))
|
||||
|
@ -1,5 +1,8 @@
|
||||
import time
|
||||
|
||||
one_day_in_seconds = 84600
|
||||
two_weeks_in_seconds = 1209600
|
||||
|
||||
def getAgeInSeconds(updateTimeInSeconds :int):
|
||||
now = int(time.time())
|
||||
ageInSeconds :int = now - updateTimeInSeconds
|
||||
|
29
src/main.py
29
src/main.py
@ -14,7 +14,7 @@
|
||||
# program. If not, see
|
||||
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
|
||||
|
||||
version = "2023.6.6"
|
||||
version = "2023.6.8"
|
||||
print(f"main.py: Initialising db-manager v{version}")
|
||||
|
||||
#Third Party Imports
|
||||
@ -23,7 +23,7 @@ import time
|
||||
|
||||
# Import logger
|
||||
import logger as log
|
||||
log.out(f"main.py: db-manager {version} Initialised", "INFO")
|
||||
log.out(f"main.py: db-manager {version} Initialised on host {os.uname()[1]}", "INFO")
|
||||
|
||||
#Local Imports
|
||||
import corpus, mongo, pis, mailer, timetable
|
||||
@ -38,27 +38,8 @@ while dbReady is False:
|
||||
except:
|
||||
dbReady = False
|
||||
|
||||
#Check & Update corpus/stations:
|
||||
# If older than 12 days then update
|
||||
corpusAge = int(time.time()) - mongo.metaCheckTime("corpus")
|
||||
log.out(f'main.py: Corpus is {corpusAge}s old', "INFO")
|
||||
if corpusAge > 1036800:
|
||||
log.out('main.py: Updating CORPUS data', "INFO")
|
||||
corpusData = corpus.removeEmpty(corpus.fetch())
|
||||
mongo.putBulkCorpus(corpusData)
|
||||
else:
|
||||
log.out('main.py: Not updating CORPUS data until it is 1036800s old.', "INFO")
|
||||
|
||||
stationsAge = int(time.time()) - mongo.metaCheckTime("stations")
|
||||
log.out(f'main.py: Stations is {stationsAge}s old', "INFO")
|
||||
# While the source of stations data is CORPUS, this statement is based on corpusAge, when/if changing the source, it should be changed to use stationsAge
|
||||
# if stationsAge is used now, there could be a situation where stationsAge tries to update but fails as corpusData doesn't exist.
|
||||
if corpusAge > 1036800:
|
||||
log.out('main.py: Updating stations data', "INFO")
|
||||
stationData = corpus.onlyStations(corpusData)
|
||||
mongo.putBulkStations(stationData)
|
||||
else:
|
||||
log.out('main.py: Not updating stations data until it is 1036800s old.', "INFO")
|
||||
## Run CORPUS Update
|
||||
status = corpus.runUpdate()
|
||||
|
||||
## Run PIS Update
|
||||
pis.runUpdate()
|
||||
@ -66,6 +47,7 @@ pis.runUpdate()
|
||||
## Run Timetable Update
|
||||
timetable.runUpdate()
|
||||
|
||||
## Create general indexes
|
||||
log.out('main.py: Requesting TTL Index Creation', "INFO")
|
||||
mongo.createTtlIndex("users", "atime", 2629800)
|
||||
mongo.createTtlIndex("registrations", "time", 1800)
|
||||
@ -73,7 +55,6 @@ mongo.createTtlIndex("registrations", "time", 1800)
|
||||
# Push version number to database for reporting
|
||||
mongo.putVersion(version)
|
||||
|
||||
# END
|
||||
log.out(f"main.py: db-manager v{version} Complete", "INFO")
|
||||
log.out(f"main.py: Mailing logs")
|
||||
mailer.submitLogs()
|
64
src/mongo.py
64
src/mongo.py
@ -1,6 +1,6 @@
|
||||
import os
|
||||
from pymongo import MongoClient
|
||||
import time
|
||||
import time, datetime
|
||||
import urllib.parse
|
||||
import logger as log
|
||||
|
||||
@ -22,7 +22,8 @@ def metaCheckTime(target):
|
||||
incrementCounter("meta")
|
||||
if type(res) is dict:
|
||||
if 'updated' in res:
|
||||
log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO")
|
||||
readable_datetime = datetime.datetime.fromtimestamp(res["updated"])
|
||||
log.out(f'mongo.metaUpdateTime: {target} last updated at {readable_datetime}', "INFO")
|
||||
return res["updated"]
|
||||
log.out(f'mongo.metaUpdatetime: {target} does not exist', "EROR")
|
||||
return 0
|
||||
@ -50,67 +51,14 @@ def createTtlIndex(collection, field, time):
|
||||
col.create_index(field, expireAfterSeconds = time)
|
||||
log.out(f'mongo.createTtlIndex: Created TTL Index of {field} in {collection} to expire after {time} seconds', "INFO")
|
||||
|
||||
def putBulkCorpus(data):
|
||||
collection = "corpus"
|
||||
startCount = getLength(collection)
|
||||
col = db[collection]
|
||||
incrementCounter(collection)
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkCorpus: Dropping {startCount} CORPUS documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength(collection)
|
||||
log.out(f'mongo.putBulkCorpus: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkCorpus: {endCount - startCount} new documents', "INFO")
|
||||
log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
|
||||
createSingleIndex(collection, "NLC")
|
||||
createSingleIndex(collection, "3ALPHA")
|
||||
log.out('mongo.putBulkCorpus: Updating meta time',"INFO")
|
||||
metaUpdateTime(collection)
|
||||
return
|
||||
|
||||
def putBulkStations(data):
|
||||
collection = "stations"
|
||||
startCount = getLength(collection)
|
||||
col = db[collection]
|
||||
incrementCounter("stations")
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkStations: Dropping {startCount} station documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength(collection)
|
||||
log.out(f'mongo.putBulkStations: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkStations: {endCount - startCount} new documents', "INFO")
|
||||
log.out('mongo.putBulkStations: Building collection indexes',"INFO")
|
||||
createSingleIndex(collection, "3ALPHA")
|
||||
createSingleIndex(collection, "STANOX")
|
||||
createSingleIndex(collection, "TIPLOC")
|
||||
log.out('mongo.putBulkStations: Updating meta time',"INFO")
|
||||
metaUpdateTime(collection)
|
||||
return
|
||||
|
||||
def putBulkPis(data):
|
||||
collection = "pis"
|
||||
startCount = getLength(collection)
|
||||
col = db[collection]
|
||||
incrementCounter(collection)
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkPid: Dropping {startCount} pis documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength(collection)
|
||||
log.out(f'mongo.putBulkPis: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkPis: {endCount-startCount} new documents', "INFO")
|
||||
log.out('mongo.putBulkPis: Updating meta time', "INFO")
|
||||
metaUpdateTime(collection)
|
||||
return
|
||||
|
||||
def putMany(collection :str, data :list):
|
||||
def putMany(collection :str, data :list, indexed_fields :list = []):
|
||||
log.out(f"mongo.putMany: Inserting many documents to: {collection}")
|
||||
col = db[collection]
|
||||
incrementCounter(collection)
|
||||
col.insert_many(data)
|
||||
metaUpdateTime(collection)
|
||||
for item in indexed_fields:
|
||||
createSingleIndex(collection, item)
|
||||
|
||||
|
||||
def incrementCounter(target):
|
||||
|
11
src/pis.py
11
src/pis.py
@ -15,10 +15,9 @@ def runUpdate():
|
||||
log.out(f"pis.runUpdate: Update required", "INFO")
|
||||
pis_data = load()
|
||||
pis_parsed = parse(pis_data)
|
||||
pis_indexes = ["stops", "tiplocs"]
|
||||
mongo.dropCollection("pis")
|
||||
mongo.putMany("pis", pis_parsed)
|
||||
mongo.createSingleIndex("pis", "stops")
|
||||
mongo.createSingleIndex("pis", "tiplocs")
|
||||
mongo.putMany("pis", pis_parsed, pis_indexes)
|
||||
|
||||
def requiresUpdate():
|
||||
if REBUILD:
|
||||
@ -42,7 +41,7 @@ def load():
|
||||
pis = yaml.safe_load(data)
|
||||
return pis["pis"]
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
log.out(f"pis.load: Error loading YAML: {exc}", "EROR")
|
||||
return exc
|
||||
|
||||
def parse(codeList):
|
||||
@ -51,7 +50,6 @@ def parse(codeList):
|
||||
log.out(f"pis.parse: Removing duplicate codes & adding TIPLOCs")
|
||||
for i in codeList:
|
||||
stops = i['stops']
|
||||
print(stops)
|
||||
code = i['code']
|
||||
for ii in codeList:
|
||||
if stops == ii['stops'] and code != ii['code']:
|
||||
@ -63,10 +61,9 @@ def parse(codeList):
|
||||
# show 0 results.
|
||||
tiplocs = []
|
||||
for iii in stops:
|
||||
print(iii)
|
||||
tiplocs.append(getTiploc(iii))
|
||||
i['tiplocs'] = tiplocs
|
||||
print(f"pis.parse: Removed {StartLen - len(codeList)} duplicates")
|
||||
log.out(f"pis.parse: Removed {StartLen - len(codeList)} duplicates", "INFO")
|
||||
return codeList
|
||||
|
||||
def getTiploc(crs :str):
|
||||
|
@ -49,8 +49,9 @@ def isUpdateRequired():
|
||||
timetableLength = mongo.getLength("timetable")
|
||||
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents", "DBUG")
|
||||
timetableUpdateTime = mongo.metaCheckTime("timetable")
|
||||
log.out(f"timetable.isUpdateRequired: Timetable last updated at {timetableUpdateTime}", "INFO")
|
||||
timetableDataAge = helpers.getAgeInSeconds(timetableUpdateTime)
|
||||
readable_age = str(timedelta(seconds=timetableDataAge))
|
||||
log.out(f"timetable.isUpdateRequired: Timetable data age: {readable_age}", "INFO")
|
||||
if (timetableDataAge >= twoDayinSecs and isAfter0800) or REBUILD:
|
||||
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild", "INFO")
|
||||
return "full"
|
||||
|
Reference in New Issue
Block a user