Add indexes

This commit is contained in:
Fred Boniface 2023-02-12 20:53:59 +00:00
parent 4c10cb6667
commit e2a00aa5e3
2 changed files with 29 additions and 17 deletions

View File

@ -8,7 +8,7 @@ import json
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
#Fetch Configuration
log.out("corpus: Fetching CORPUS Configuration", "INFO")
log.out("corpus.py: Fetching CORPUS Configuration", "INFO")
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
@ -27,7 +27,7 @@ def removeEmpty(data):
# Appends the working dictionary to the 'workingList' before moving on to the next item in the list
# Finally returns the 'workingList' which is the CORPUS data without any " " values.
corpusLength = len(data)
log.out(f"corpus.removeEmpty: Removing empty strings from {corpusLength} CORPUS datapoints", "INFO")
log.out(f"corpus.removeEmpty: Removing empty strings from {corpusLength} CORPUS documents", "INFO")
workingList = []
for item in data:
workingDict = {}
@ -42,12 +42,12 @@ def onlyStations(data):
# Iterates through the list and checks each item (which is a dictionary) for the STANOX key
# If the 3ALPHA exists the dictionary will be appended to the `workingList` which is returned once iteration is complete
preLength = len(data)
log.out(f"corpus.onlyStations: Removing items without STANOX from {preLength} datapoints", "INFO")
log.out(f"corpus.onlyStations: Removing items without `3ALPHA` from {preLength} documents", "INFO")
workingList = []
for item in data:
if '3ALPHA' in item:
workingList.append(item)
postLength = len(workingList)
log.out(f"corpus.onlyStations: Removed {preLength - postLength} datapoints", "INFO")
log.out(f"corpus.onlyStations: Removed {preLength - postLength} documents", "INFO")
log.out(f"Yes, I am aware there are not {postLength} stations but the data includes NI, TfL, some bus, tram and closed stations too","OTHR")
return workingList

View File

@ -11,7 +11,7 @@ db_user = urllib.parse.quote_plus(os.getenv('OWL_DB_USER', "owl"))
db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS', "twittwoo"))
db_name = os.getenv('OWL_DB_NAME', "owlboard")
log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "INFOm")
log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "INFO")
client = MongoClient(f"mongodb://{db_user}:{db_pass}@{db_host}:{db_port}")
db = client[db_name]
@ -30,34 +30,46 @@ def getLength(collection):
col = db[collection]
return col.count_documents({})
def createSingleIndex(collection, field):
col = db[collection]
col.create_index(field)
log.out(f'mongo.createSingleIndex: Created index of {field} in {collection}', "INFO")
return
def putBulkCorpus(data):
startCount = getLength("corpus")
col = db["corpus"]
collection = "corpus"
startCount = getLength(collection)
col = db[collection]
if startCount > 0:
log.out(f'mongo.putBulkCorpus: Dropping {startCount} CORPUS documents', "INFO")
col.drop()
col.insert_many(data)
endCount = getLength("corpus")
endCount = getLength(collection)
log.out(f'mongo.putBulkCorpus: {endCount} documents inserted', "INFO")
log.out(f'mongo.putBulkCorpus: {endCount - startCount} new documents', "INFO")
#log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
# ADD INDEXES HERE
log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
createSingleIndex(collection, "NLC")
createSingleIndex(collection, "3ALPHA")
log.out('mongo.putBulkCorpus: Updating meta time',"INFO")
metaUpdateTime("corpus")
metaUpdateTime(collection)
return
def putBulkStations(data):
startCount = getLength("stations")
col = db["stations"]
collection = "stations"
startCount = getLength(collection)
col = db[collection]
if startCount > 0:
log.out(f'mongo.putBulkStations: Dropping {startCount} station documents', "INFO")
col.drop()
col.insert_many(data)
endCount = getLength("stations")
endCount = getLength(collection)
log.out(f'mongo.putBulkStations: {endCount} documents inserted', "INFO")
log.out(f'mongo.putBulkStations: {endCount - startCount} new documents', "INFO")
#log.out('mongo.putBulkStations: Building collection indexes',"INFO")
# ADD INDEXES HERE
log.out('mongo.putBulkStations: Building collection indexes',"INFO")
createSingleIndex(collection, "3ALPHA")
createSingleIndex(collection, "STANOX")
createSingleIndex(collection, "TIPLOC")
log.out('mongo.putBulkStations: Updating meta time',"INFO")
metaUpdateTime("stations")
metaUpdateTime(collection)
return