diff --git a/src/corpus.py b/src/corpus.py index 6e564b4..91f5d3c 100644 --- a/src/corpus.py +++ b/src/corpus.py @@ -40,13 +40,19 @@ def removeEmpty(data): def onlyStations(data): # DATA: List of Dictionaries # Iterates through the list and checks each item (which is a dictionary) for the STANOX key - # If the 3ALPHA exists the dictionary will be appended to the `workingList` which is returned once iteration is complete + # If the 3ALPHA exists the relevant fields will be appended to the `workingDict` which is appended to the workingList + # the workingList is finally returned. preLength = len(data) log.out(f"corpus.onlyStations: Removing items without `3ALPHA` from {preLength} documents", "INFO") workingList = [] for item in data: + workingDict = {} if '3ALPHA' in item: - workingList.append(item) + workingDict.update(item) + workingDict.pop("NLC", False) + workingDict.pop("NLCDESC16", False) + workingDict.pop("UIC", False) + workingList.append(workingDict) postLength = len(workingList) log.out(f"corpus.onlyStations: Removed {preLength - postLength} documents", "INFO") log.out(f"Yes, I am aware there are not {postLength} stations but the data includes NI, TfL, some bus, tram and closed stations too","OTHR") diff --git a/src/main.py b/src/main.py index 81cc2c1..204e531 100644 --- a/src/main.py +++ b/src/main.py @@ -50,7 +50,7 @@ else: log.out('main.py: Not updating stations data', "INFO") #Ensure count document exists in meta: - +mongo.metaCounters() # END log.out(f"main.py: db-manager v{version} Complete", "INFO") \ No newline at end of file diff --git a/src/mongo.py b/src/mongo.py index 563dd34..44f527d 100644 --- a/src/mongo.py +++ b/src/mongo.py @@ -18,8 +18,12 @@ db = client[db_name] def metaCheckTime(target): col = db["meta"] res = col.find_one({"target": target, "type": "collection"}) - log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO") - return res["updated"] + if 'updated' in res: + log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO") + return res["updated"] + else: + log.out(f'mongo.metaUpdatetime: {target} does not exist', "INFO") + return 0 def metaUpdateTime(target): col = db["meta"] @@ -72,4 +76,15 @@ def putBulkStations(data): createSingleIndex(collection, "TIPLOC") log.out('mongo.putBulkStations: Updating meta time',"INFO") metaUpdateTime(collection) + return + +def metaCounters(): + collection = "meta" + col = db[collection] + res = col.find_one({"target": "counters","type": "count"}) + if 'since' not in res: + log.out('mongo.metaCounters: counters does not exist, creating', "INFO") + col.update_one({"target": "counters","type": "count"}, {"target": "counters","type": "count","since": int(time.time())}) + else: + log.out('mongo.metaCounters: counters already exists, skipping', "INFO") return \ No newline at end of file