Recover lost branch into main

This commit is contained in:
Fred Boniface 2023-05-08 19:55:09 +01:00
parent e2ecc4a9fe
commit 326b9a4f7a
6 changed files with 1143 additions and 7 deletions

View File

@ -2,4 +2,5 @@ FROM python:3.11-alpine
COPY ./requirements.txt /app/requirements.txt COPY ./requirements.txt /app/requirements.txt
RUN [ "pip", "install", "-r", "/app/requirements.txt" ] RUN [ "pip", "install", "-r", "/app/requirements.txt" ]
COPY ./src /app/src COPY ./src /app/src
COPY ./data /app/data
CMD [ "python", "/app/src/main.py" ] CMD [ "python", "/app/src/main.py" ]

1046
data/pis/gwr.yaml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,8 @@ certifi==2022.12.7
charset-normalizer==3.0.1 charset-normalizer==3.0.1
dnspython==2.3.0 dnspython==2.3.0
idna==3.4 idna==3.4
pyaml==21.10.1
pymongo==4.3.3 pymongo==4.3.3
PyYAML==6.0
requests==2.28.2 requests==2.28.2
urllib3==1.26.14 urllib3==1.26.14

View File

@ -14,7 +14,7 @@
# program. If not, see # program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE # https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
version = "1.1.1" version = "2.1.2"
print(f"main.py: Initialising db-manager v{version}") print(f"main.py: Initialising db-manager v{version}")
#Third Party Imports #Third Party Imports
@ -22,14 +22,20 @@ import os
import time import time
#Local Imports #Local Imports
import corpus, mongo import corpus, mongo, pis
import logger as log import logger as log
log.out("main.py: db-manager Initialised", "INFO") log.out("main.py: db-manager Initialised", "INFO")
#Ensure count document exists in meta: #Ensure count document exists in meta, wrap in while look to prevent crashing if the DB is not ready:
dbReady = False
while dbReady is False:
try:
mongo.metaCounters() mongo.metaCounters()
dbReady = True
except:
dbReady = False
#Check & Update corpus/stations: #Check & Update corpus/stations:
# If older than 12 days then update # If older than 12 days then update
@ -47,11 +53,27 @@ log.out(f'main.py: Stations is {stationsAge}s old', "INFO")
# While the source of stations data is CORPUS, this statement is based on corpusAge, when/if changing the source, it should be changed to use stationsAge # While the source of stations data is CORPUS, this statement is based on corpusAge, when/if changing the source, it should be changed to use stationsAge
# if stationsAge is used now, there could be a situation where stationsAge tries to update but fails as corpusData doesn't exist. # if stationsAge is used now, there could be a situation where stationsAge tries to update but fails as corpusData doesn't exist.
if corpusAge > 1036800: if corpusAge > 1036800:
log.out('main.py: Updating stations data until it is 1036800s old.', "INFO") log.out('main.py: Updating stations data', "INFO")
stationData = corpus.onlyStations(corpusData) stationData = corpus.onlyStations(corpusData)
mongo.putBulkStations(stationData) mongo.putBulkStations(stationData)
else: else:
log.out('main.py: Not updating stations data', "INFO") log.out('main.py: Not updating stations data until it is 1036800s old.', "INFO")
#Check & Update pis data:
# If older than 2 days then update
pisAge = int(time.time()) - mongo.metaCheckTime("pis")
log.out(f'main.py: PIS Data is {pisAge}s old', "INFO")
if pisAge > 172800: # Temporarily set to 15 minutes
log.out('main.py: Updating PIS data', "INFO")
pisData = pis.load()
pisParsed = pis.parse(pisData)
mongo.putBulkPis(pisParsed)
else:
log.out('main.py: Not updating PIS data until is it 1036800s old', "INFO")
log.out('main.py: Requesting TTL Index Creation', "INFO")
mongo.createTtlIndex("users", "atime", 2629800)
mongo.createTtlIndex("registrations", "time", 1800)
# Push version number to database for reporting # Push version number to database for reporting
mongo.putVersion(version) mongo.putVersion(version)

View File

@ -44,6 +44,11 @@ def createSingleIndex(collection, field):
log.out(f'mongo.createSingleIndex: Created index of {field} in {collection}', "INFO") log.out(f'mongo.createSingleIndex: Created index of {field} in {collection}', "INFO")
return return
def createTtlIndex(collection, field, time):
col = db[collection]
col.create_index(field, expireAfterSeconds = time)
log.out(f'mongo.createTtlIndex: Created TTL Index of {field} in {collection} to expire after {time} seconds', "INFO")
def putBulkCorpus(data): def putBulkCorpus(data):
collection = "corpus" collection = "corpus"
startCount = getLength(collection) startCount = getLength(collection)
@ -83,6 +88,23 @@ def putBulkStations(data):
metaUpdateTime(collection) metaUpdateTime(collection)
return return
def putBulkPis(data):
collection = "pis"
startCount = getLength(collection)
col = db[collection]
incrementCounter(collection)
if startCount > 0:
log.out(f'mongo.putBulkPid: Dropping {startCount} pis documents', "INFO")
col.drop()
col.insert_many(data)
endCount = getLength(collection)
log.out(f'mongo.putBulkPis: {endCount} documents inserted', "INFO")
log.out(f'mongo.putBulkPis: {endCount-startCount} new documents', "INFO")
log.out('mongo.putBulkPis: Updating meta time', "INFO")
metaUpdateTime(collection)
return
def incrementCounter(target): def incrementCounter(target):
collection = "meta" collection = "meta"
col = db[collection] col = db[collection]

43
src/pis.py Normal file
View File

@ -0,0 +1,43 @@
import yaml
print("PIS Module imported")
def load(): # Programatically add a `toc` field to each entry.
with open("/app/data/pis/gwr.yaml", "r") as data:
try:
pis = yaml.safe_load(data)
print(pis)
return pis["pis"]
except yaml.YAMLError as exc:
print(exc)
return exc
## Do some magic here so that if any pis["pis"]["stops"][0] field contains 'reverse' then get the stops for the code stored in pis["pis"]["stops"][1]
## reverse the stops and store that.
def parse(codeList):
StartLen = len(codeList)
print(f"pis.parse: codeList starting length: {StartLen}")
for i in codeList:
stops = i['stops']
code = i['code']
for ii in codeList:
if stops == ii['stops'] and code != ii['code']:
print(f"Identical stopping pattern found: {ii['code']}")
codeList.remove(ii)
print(f"pis.parse: Removed {StartLen - len(codeList)} duplicates")
return codeList
def devLoad(): # Programatically add a `toc` field to each entry.
with open("/home/fred.boniface/git/owlboard/db-manager/data/pis/gwr.yaml", "r") as data:
try:
pis = yaml.safe_load(data)
print(pis)
return pis["pis"]
except yaml.YAMLError as exc:
print(exc)
return exc
def dev():
data = devLoad()
parse(data)