Put data to DB in unordered manner

This commit is contained in:
Fred Boniface 2023-07-12 21:03:41 +01:00
parent 591cbb2e46
commit ec6e425a0d
3 changed files with 20 additions and 16 deletions

View File

@ -14,7 +14,7 @@
# program. If not, see # program. If not, see
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE # https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
version = "2023.7.1" version = "2023.7.4"
print(f"main.py: Initialising db-manager v{version}") print(f"main.py: Initialising db-manager v{version}")
#Third Party Imports #Third Party Imports

View File

@ -55,7 +55,7 @@ def putMany(collection :str, data :list, indexed_fields :list = []):
log.out(f"mongo.putMany: Inserting many documents to: {collection}") log.out(f"mongo.putMany: Inserting many documents to: {collection}")
col = db[collection] col = db[collection]
incrementCounter(collection) incrementCounter(collection)
col.insert_many(data) col.insert_many(data, ordered= False)
metaUpdateTime(collection) metaUpdateTime(collection)
for item in indexed_fields: for item in indexed_fields:
createSingleIndex(collection, item) createSingleIndex(collection, item)
@ -92,7 +92,7 @@ def putTimetable(data):
log.out(f"mongo.putTimetable: Adding {data_length} documents to the database") log.out(f"mongo.putTimetable: Adding {data_length} documents to the database")
collection = "timetable" collection = "timetable"
col = db[collection] col = db[collection]
res = col.insert_many(data) res = col.insert_many(data,ordered = False)
def dropCollection(collection): def dropCollection(collection):
log.out(f"mongo.dropCollection: Dropping collection '{collection}'") log.out(f"mongo.dropCollection: Dropping collection '{collection}'")

View File

@ -34,7 +34,7 @@ todayDay = now.strftime("%a").lower()
isAfter0800 = (int(now.strftime("%H")) >= 8) isAfter0800 = (int(now.strftime("%H")) >= 8)
filePath = "cif_data" filePath = "cif_data"
fullDataUrl = "https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_ALL_FULL_DAILY&day=toc-full" fullDataUrl = "https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_ALL_FULL_DAILY&day=toc-full"
updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_ALL_FULL_DAILY&day=toc-update-{yesterdayDay}" updateDataUrl = f"https://publicdatafeeds.networkrail.co.uk/ntrod/CifFileAuthenticate?type=CIF_ALL_UPDATE_DAILY&day=toc-update-{yesterdayDay}"
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER') CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS') CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
@ -58,6 +58,7 @@ def isUpdateRequired():
def getTimetable(full :bool = False): def getTimetable(full :bool = False):
downloadUrl :str = fullDataUrl if full else updateDataUrl downloadUrl :str = fullDataUrl if full else updateDataUrl
log.out(f"timetable.getTimetable: Attempting to download timetable data from {downloadUrl}")
response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS)) response = requests.get(downloadUrl, auth=(CORPUS_USER, CORPUS_PASS))
mongo.incrementCounter("schedule_api") mongo.incrementCounter("schedule_api")
log.out(f"timetable.getTimetable: Fetch (Full:{full}) response: {response.status_code}", "DBUG") log.out(f"timetable.getTimetable: Fetch (Full:{full}) response: {response.status_code}", "DBUG")
@ -90,15 +91,18 @@ def loopTimetable(data):
def runUpdate(): def runUpdate():
required = isUpdateRequired() required = isUpdateRequired()
if (required == "full"): try:
log.out("timetable.runUpdate: Fetching full timetable data", "INFO") if (required == "full"):
data = getTimetable(full = True) log.out("timetable.runUpdate: Fetching full timetable data", "INFO")
elif (required == "update"): data = getTimetable(full = True)
log.out("timetable.runUpdate: Fetching todays timetable update", "INFO") elif (required == "update"):
data = getTimetable() log.out("timetable.runUpdate: Fetching todays timetable update", "INFO")
else: data = getTimetable()
log.out("timetable.runUpdate: timetable update is not needed", "INFO") else:
return "done" log.out("timetable.runUpdate: timetable update is not needed", "INFO")
return "done"
except Exception as e:
log.out(f"timetable.runUpdate: Error checking for update timetable: {e}")
parsed = loopTimetable(data) parsed = loopTimetable(data)
status = _insertToDb(parsed, required) status = _insertToDb(parsed, required)
if (status): if (status):
@ -139,21 +143,21 @@ def insertSchedule(sch_record):
def _insertToDb(data :list, type :str): def _insertToDb(data :list, type :str):
pre_count = mongo.getLength("timetable") pre_count = mongo.getLength("timetable")
create_transactions = []
try: try:
if type == "full": if type == "full":
mongo.dropCollection("timetable") mongo.dropCollection("timetable")
mongo.putTimetable(data) mongo.putTimetable(data)
mongo.createSingleIndex("timetable", "headcode") mongo.createSingleIndex("timetable", "headcode")
elif type == "update": elif type == "update":
create_transactions = []
for item in data: for item in data:
if item['transactionType'] == "Create": if item['transactionType'] == "Create":
create_transactions.append([item]) create_transactions.append(item)
elif item['transactionType'] == "Delete": elif item['transactionType'] == "Delete":
mongo.deleteTimetableData({'trainUid': item['trainUid'], 'scheduleStartDate': item['scheduleStartDate'], 'stpIndicator': item['stpIndicator']}) mongo.deleteTimetableData({'trainUid': item['trainUid'], 'scheduleStartDate': item['scheduleStartDate'], 'stpIndicator': item['stpIndicator']})
mongo.putTimetable(create_transactions) mongo.putTimetable(create_transactions)
post_count = mongo.getLength("timetable") post_count = mongo.getLength("timetable")
log.out(f"timetable._insertToDb: Document count difference after processing: {pre_count - post_count}", "DBUG") log.out(f"timetable._insertToDb: Document count difference after processing: {post_count - pre_count}", "DBUG")
return True #If Successfuls return True #If Successfuls
except Exception as e: except Exception as e:
log.out("timetable._insertToDb: Error inserting timetable data", "ERR") log.out("timetable._insertToDb: Error inserting timetable data", "ERR")