Improve modularity of code and readability of logs
This commit is contained in:
parent
8ef0585916
commit
88fe86d56a
@ -4,7 +4,8 @@ import requests
|
|||||||
import logger as log
|
import logger as log
|
||||||
import zlib
|
import zlib
|
||||||
import json
|
import json
|
||||||
import mongo
|
import datetime
|
||||||
|
import mongo, helpers
|
||||||
|
|
||||||
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
|
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
|
||||||
|
|
||||||
@ -14,6 +15,33 @@ CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
|
|||||||
|
|
||||||
log.out("corpus.py: CORPUS Module Loaded", "DBUG")
|
log.out("corpus.py: CORPUS Module Loaded", "DBUG")
|
||||||
|
|
||||||
|
def runUpdate():
|
||||||
|
required = isUpdateRequired()
|
||||||
|
if required:
|
||||||
|
corpus_data = fetch()
|
||||||
|
prepared_corpus = removeEmpty(corpus_data)
|
||||||
|
prepared_stations = onlyStations(prepared_corpus)
|
||||||
|
corpus_indexes = ["3ALPHA", "NLC"]
|
||||||
|
mongo.dropCollection("corpus")
|
||||||
|
mongo.putMany("corpus", prepared_corpus, corpus_indexes)
|
||||||
|
stations_indexes = ["3ALPHA", "STANOX", "TIPLOC"]
|
||||||
|
mongo.dropCollection("stations")
|
||||||
|
mongo.putMany("stations", prepared_stations, stations_indexes)
|
||||||
|
return
|
||||||
|
return
|
||||||
|
|
||||||
|
def isUpdateRequired():
|
||||||
|
update_time = mongo.metaCheckTime("corpus")
|
||||||
|
age = helpers.getAgeInSeconds(update_time)
|
||||||
|
readable_age = str(datetime.timedelta(seconds=age))
|
||||||
|
log.out(f"corpus.isUpdateRequired: CORPUS data is {readable_age} seconds old.", "INFO")
|
||||||
|
if age > helpers.two_weeks_in_seconds:
|
||||||
|
log.out("corpus.isUpdateRequired: CORPUS data required update", "INFO")
|
||||||
|
return True
|
||||||
|
log.out("corpus.isUpdateRequired: CORPUS data does not need updating", "INFO")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def fetch():
|
def fetch():
|
||||||
log.out("corpus.fetch: Fetching CORPUS Data from Network Rail", "INFO")
|
log.out("corpus.fetch: Fetching CORPUS Data from Network Rail", "INFO")
|
||||||
response = requests.get(CORPUS_URL, auth=(CORPUS_USER, CORPUS_PASS))
|
response = requests.get(CORPUS_URL, auth=(CORPUS_USER, CORPUS_PASS))
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
one_day_in_seconds = 84600
|
||||||
|
two_weeks_in_seconds = 1209600
|
||||||
|
|
||||||
def getAgeInSeconds(updateTimeInSeconds :int):
|
def getAgeInSeconds(updateTimeInSeconds :int):
|
||||||
now = int(time.time())
|
now = int(time.time())
|
||||||
ageInSeconds :int = now - updateTimeInSeconds
|
ageInSeconds :int = now - updateTimeInSeconds
|
||||||
|
29
src/main.py
29
src/main.py
@ -14,7 +14,7 @@
|
|||||||
# program. If not, see
|
# program. If not, see
|
||||||
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
|
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
|
||||||
|
|
||||||
version = "2023.6.6"
|
version = "2023.6.8"
|
||||||
print(f"main.py: Initialising db-manager v{version}")
|
print(f"main.py: Initialising db-manager v{version}")
|
||||||
|
|
||||||
#Third Party Imports
|
#Third Party Imports
|
||||||
@ -23,7 +23,7 @@ import time
|
|||||||
|
|
||||||
# Import logger
|
# Import logger
|
||||||
import logger as log
|
import logger as log
|
||||||
log.out(f"main.py: db-manager {version} Initialised", "INFO")
|
log.out(f"main.py: db-manager {version} Initialised on host {os.uname()[1]}", "INFO")
|
||||||
|
|
||||||
#Local Imports
|
#Local Imports
|
||||||
import corpus, mongo, pis, mailer, timetable
|
import corpus, mongo, pis, mailer, timetable
|
||||||
@ -38,27 +38,8 @@ while dbReady is False:
|
|||||||
except:
|
except:
|
||||||
dbReady = False
|
dbReady = False
|
||||||
|
|
||||||
#Check & Update corpus/stations:
|
## Run CORPUS Update
|
||||||
# If older than 12 days then update
|
status = corpus.runUpdate()
|
||||||
corpusAge = int(time.time()) - mongo.metaCheckTime("corpus")
|
|
||||||
log.out(f'main.py: Corpus is {corpusAge}s old', "INFO")
|
|
||||||
if corpusAge > 1036800:
|
|
||||||
log.out('main.py: Updating CORPUS data', "INFO")
|
|
||||||
corpusData = corpus.removeEmpty(corpus.fetch())
|
|
||||||
mongo.putBulkCorpus(corpusData)
|
|
||||||
else:
|
|
||||||
log.out('main.py: Not updating CORPUS data until it is 1036800s old.', "INFO")
|
|
||||||
|
|
||||||
stationsAge = int(time.time()) - mongo.metaCheckTime("stations")
|
|
||||||
log.out(f'main.py: Stations is {stationsAge}s old', "INFO")
|
|
||||||
# While the source of stations data is CORPUS, this statement is based on corpusAge, when/if changing the source, it should be changed to use stationsAge
|
|
||||||
# if stationsAge is used now, there could be a situation where stationsAge tries to update but fails as corpusData doesn't exist.
|
|
||||||
if corpusAge > 1036800:
|
|
||||||
log.out('main.py: Updating stations data', "INFO")
|
|
||||||
stationData = corpus.onlyStations(corpusData)
|
|
||||||
mongo.putBulkStations(stationData)
|
|
||||||
else:
|
|
||||||
log.out('main.py: Not updating stations data until it is 1036800s old.', "INFO")
|
|
||||||
|
|
||||||
## Run PIS Update
|
## Run PIS Update
|
||||||
pis.runUpdate()
|
pis.runUpdate()
|
||||||
@ -66,6 +47,7 @@ pis.runUpdate()
|
|||||||
## Run Timetable Update
|
## Run Timetable Update
|
||||||
timetable.runUpdate()
|
timetable.runUpdate()
|
||||||
|
|
||||||
|
## Create general indexes
|
||||||
log.out('main.py: Requesting TTL Index Creation', "INFO")
|
log.out('main.py: Requesting TTL Index Creation', "INFO")
|
||||||
mongo.createTtlIndex("users", "atime", 2629800)
|
mongo.createTtlIndex("users", "atime", 2629800)
|
||||||
mongo.createTtlIndex("registrations", "time", 1800)
|
mongo.createTtlIndex("registrations", "time", 1800)
|
||||||
@ -73,7 +55,6 @@ mongo.createTtlIndex("registrations", "time", 1800)
|
|||||||
# Push version number to database for reporting
|
# Push version number to database for reporting
|
||||||
mongo.putVersion(version)
|
mongo.putVersion(version)
|
||||||
|
|
||||||
# END
|
|
||||||
log.out(f"main.py: db-manager v{version} Complete", "INFO")
|
log.out(f"main.py: db-manager v{version} Complete", "INFO")
|
||||||
log.out(f"main.py: Mailing logs")
|
log.out(f"main.py: Mailing logs")
|
||||||
mailer.submitLogs()
|
mailer.submitLogs()
|
64
src/mongo.py
64
src/mongo.py
@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
import time
|
import time, datetime
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import logger as log
|
import logger as log
|
||||||
|
|
||||||
@ -22,7 +22,8 @@ def metaCheckTime(target):
|
|||||||
incrementCounter("meta")
|
incrementCounter("meta")
|
||||||
if type(res) is dict:
|
if type(res) is dict:
|
||||||
if 'updated' in res:
|
if 'updated' in res:
|
||||||
log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO")
|
readable_datetime = datetime.datetime.fromtimestamp(res["updated"])
|
||||||
|
log.out(f'mongo.metaUpdateTime: {target} last updated at {readable_datetime}', "INFO")
|
||||||
return res["updated"]
|
return res["updated"]
|
||||||
log.out(f'mongo.metaUpdatetime: {target} does not exist', "EROR")
|
log.out(f'mongo.metaUpdatetime: {target} does not exist', "EROR")
|
||||||
return 0
|
return 0
|
||||||
@ -50,67 +51,14 @@ def createTtlIndex(collection, field, time):
|
|||||||
col.create_index(field, expireAfterSeconds = time)
|
col.create_index(field, expireAfterSeconds = time)
|
||||||
log.out(f'mongo.createTtlIndex: Created TTL Index of {field} in {collection} to expire after {time} seconds', "INFO")
|
log.out(f'mongo.createTtlIndex: Created TTL Index of {field} in {collection} to expire after {time} seconds', "INFO")
|
||||||
|
|
||||||
def putBulkCorpus(data):
|
def putMany(collection :str, data :list, indexed_fields :list = []):
|
||||||
collection = "corpus"
|
|
||||||
startCount = getLength(collection)
|
|
||||||
col = db[collection]
|
|
||||||
incrementCounter(collection)
|
|
||||||
if startCount > 0:
|
|
||||||
log.out(f'mongo.putBulkCorpus: Dropping {startCount} CORPUS documents', "INFO")
|
|
||||||
col.drop()
|
|
||||||
col.insert_many(data)
|
|
||||||
endCount = getLength(collection)
|
|
||||||
log.out(f'mongo.putBulkCorpus: {endCount} documents inserted', "INFO")
|
|
||||||
log.out(f'mongo.putBulkCorpus: {endCount - startCount} new documents', "INFO")
|
|
||||||
log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
|
|
||||||
createSingleIndex(collection, "NLC")
|
|
||||||
createSingleIndex(collection, "3ALPHA")
|
|
||||||
log.out('mongo.putBulkCorpus: Updating meta time',"INFO")
|
|
||||||
metaUpdateTime(collection)
|
|
||||||
return
|
|
||||||
|
|
||||||
def putBulkStations(data):
|
|
||||||
collection = "stations"
|
|
||||||
startCount = getLength(collection)
|
|
||||||
col = db[collection]
|
|
||||||
incrementCounter("stations")
|
|
||||||
if startCount > 0:
|
|
||||||
log.out(f'mongo.putBulkStations: Dropping {startCount} station documents', "INFO")
|
|
||||||
col.drop()
|
|
||||||
col.insert_many(data)
|
|
||||||
endCount = getLength(collection)
|
|
||||||
log.out(f'mongo.putBulkStations: {endCount} documents inserted', "INFO")
|
|
||||||
log.out(f'mongo.putBulkStations: {endCount - startCount} new documents', "INFO")
|
|
||||||
log.out('mongo.putBulkStations: Building collection indexes',"INFO")
|
|
||||||
createSingleIndex(collection, "3ALPHA")
|
|
||||||
createSingleIndex(collection, "STANOX")
|
|
||||||
createSingleIndex(collection, "TIPLOC")
|
|
||||||
log.out('mongo.putBulkStations: Updating meta time',"INFO")
|
|
||||||
metaUpdateTime(collection)
|
|
||||||
return
|
|
||||||
|
|
||||||
def putBulkPis(data):
|
|
||||||
collection = "pis"
|
|
||||||
startCount = getLength(collection)
|
|
||||||
col = db[collection]
|
|
||||||
incrementCounter(collection)
|
|
||||||
if startCount > 0:
|
|
||||||
log.out(f'mongo.putBulkPid: Dropping {startCount} pis documents', "INFO")
|
|
||||||
col.drop()
|
|
||||||
col.insert_many(data)
|
|
||||||
endCount = getLength(collection)
|
|
||||||
log.out(f'mongo.putBulkPis: {endCount} documents inserted', "INFO")
|
|
||||||
log.out(f'mongo.putBulkPis: {endCount-startCount} new documents', "INFO")
|
|
||||||
log.out('mongo.putBulkPis: Updating meta time', "INFO")
|
|
||||||
metaUpdateTime(collection)
|
|
||||||
return
|
|
||||||
|
|
||||||
def putMany(collection :str, data :list):
|
|
||||||
log.out(f"mongo.putMany: Inserting many documents to: {collection}")
|
log.out(f"mongo.putMany: Inserting many documents to: {collection}")
|
||||||
col = db[collection]
|
col = db[collection]
|
||||||
incrementCounter(collection)
|
incrementCounter(collection)
|
||||||
col.insert_many(data)
|
col.insert_many(data)
|
||||||
metaUpdateTime(collection)
|
metaUpdateTime(collection)
|
||||||
|
for item in indexed_fields:
|
||||||
|
createSingleIndex(collection, item)
|
||||||
|
|
||||||
|
|
||||||
def incrementCounter(target):
|
def incrementCounter(target):
|
||||||
|
11
src/pis.py
11
src/pis.py
@ -15,10 +15,9 @@ def runUpdate():
|
|||||||
log.out(f"pis.runUpdate: Update required", "INFO")
|
log.out(f"pis.runUpdate: Update required", "INFO")
|
||||||
pis_data = load()
|
pis_data = load()
|
||||||
pis_parsed = parse(pis_data)
|
pis_parsed = parse(pis_data)
|
||||||
|
pis_indexes = ["stops", "tiplocs"]
|
||||||
mongo.dropCollection("pis")
|
mongo.dropCollection("pis")
|
||||||
mongo.putMany("pis", pis_parsed)
|
mongo.putMany("pis", pis_parsed, pis_indexes)
|
||||||
mongo.createSingleIndex("pis", "stops")
|
|
||||||
mongo.createSingleIndex("pis", "tiplocs")
|
|
||||||
|
|
||||||
def requiresUpdate():
|
def requiresUpdate():
|
||||||
if REBUILD:
|
if REBUILD:
|
||||||
@ -42,7 +41,7 @@ def load():
|
|||||||
pis = yaml.safe_load(data)
|
pis = yaml.safe_load(data)
|
||||||
return pis["pis"]
|
return pis["pis"]
|
||||||
except yaml.YAMLError as exc:
|
except yaml.YAMLError as exc:
|
||||||
print(exc)
|
log.out(f"pis.load: Error loading YAML: {exc}", "EROR")
|
||||||
return exc
|
return exc
|
||||||
|
|
||||||
def parse(codeList):
|
def parse(codeList):
|
||||||
@ -51,7 +50,6 @@ def parse(codeList):
|
|||||||
log.out(f"pis.parse: Removing duplicate codes & adding TIPLOCs")
|
log.out(f"pis.parse: Removing duplicate codes & adding TIPLOCs")
|
||||||
for i in codeList:
|
for i in codeList:
|
||||||
stops = i['stops']
|
stops = i['stops']
|
||||||
print(stops)
|
|
||||||
code = i['code']
|
code = i['code']
|
||||||
for ii in codeList:
|
for ii in codeList:
|
||||||
if stops == ii['stops'] and code != ii['code']:
|
if stops == ii['stops'] and code != ii['code']:
|
||||||
@ -63,10 +61,9 @@ def parse(codeList):
|
|||||||
# show 0 results.
|
# show 0 results.
|
||||||
tiplocs = []
|
tiplocs = []
|
||||||
for iii in stops:
|
for iii in stops:
|
||||||
print(iii)
|
|
||||||
tiplocs.append(getTiploc(iii))
|
tiplocs.append(getTiploc(iii))
|
||||||
i['tiplocs'] = tiplocs
|
i['tiplocs'] = tiplocs
|
||||||
print(f"pis.parse: Removed {StartLen - len(codeList)} duplicates")
|
log.out(f"pis.parse: Removed {StartLen - len(codeList)} duplicates", "INFO")
|
||||||
return codeList
|
return codeList
|
||||||
|
|
||||||
def getTiploc(crs :str):
|
def getTiploc(crs :str):
|
||||||
|
@ -49,8 +49,9 @@ def isUpdateRequired():
|
|||||||
timetableLength = mongo.getLength("timetable")
|
timetableLength = mongo.getLength("timetable")
|
||||||
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents", "DBUG")
|
log.out(f"timetable.isUpdateRequired: timetable collection contains {timetableLength} documents", "DBUG")
|
||||||
timetableUpdateTime = mongo.metaCheckTime("timetable")
|
timetableUpdateTime = mongo.metaCheckTime("timetable")
|
||||||
log.out(f"timetable.isUpdateRequired: Timetable last updated at {timetableUpdateTime}", "INFO")
|
|
||||||
timetableDataAge = helpers.getAgeInSeconds(timetableUpdateTime)
|
timetableDataAge = helpers.getAgeInSeconds(timetableUpdateTime)
|
||||||
|
readable_age = str(timedelta(seconds=timetableDataAge))
|
||||||
|
log.out(f"timetable.isUpdateRequired: Timetable data age: {readable_age}", "INFO")
|
||||||
if (timetableDataAge >= twoDayinSecs and isAfter0800) or REBUILD:
|
if (timetableDataAge >= twoDayinSecs and isAfter0800) or REBUILD:
|
||||||
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild", "INFO")
|
log.out(f"timetable.isUpdateRequired: timetable collection requires rebuild", "INFO")
|
||||||
return "full"
|
return "full"
|
||||||
|
Reference in New Issue
Block a user