Add indexes
This commit is contained in:
parent
4c10cb6667
commit
e2a00aa5e3
@ -8,7 +8,7 @@ import json
|
||||
CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS"
|
||||
|
||||
#Fetch Configuration
|
||||
log.out("corpus: Fetching CORPUS Configuration", "INFO")
|
||||
log.out("corpus.py: Fetching CORPUS Configuration", "INFO")
|
||||
CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER')
|
||||
CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS')
|
||||
|
||||
@ -27,7 +27,7 @@ def removeEmpty(data):
|
||||
# Appends the working dictionary to the 'workingList' before moving on to the next item in the list
|
||||
# Finally returns the 'workingList' which is the CORPUS data without any " " values.
|
||||
corpusLength = len(data)
|
||||
log.out(f"corpus.removeEmpty: Removing empty strings from {corpusLength} CORPUS datapoints", "INFO")
|
||||
log.out(f"corpus.removeEmpty: Removing empty strings from {corpusLength} CORPUS documents", "INFO")
|
||||
workingList = []
|
||||
for item in data:
|
||||
workingDict = {}
|
||||
@ -42,12 +42,12 @@ def onlyStations(data):
|
||||
# Iterates through the list and checks each item (which is a dictionary) for the STANOX key
|
||||
# If the 3ALPHA exists the dictionary will be appended to the `workingList` which is returned once iteration is complete
|
||||
preLength = len(data)
|
||||
log.out(f"corpus.onlyStations: Removing items without STANOX from {preLength} datapoints", "INFO")
|
||||
log.out(f"corpus.onlyStations: Removing items without `3ALPHA` from {preLength} documents", "INFO")
|
||||
workingList = []
|
||||
for item in data:
|
||||
if '3ALPHA' in item:
|
||||
workingList.append(item)
|
||||
postLength = len(workingList)
|
||||
log.out(f"corpus.onlyStations: Removed {preLength - postLength} datapoints", "INFO")
|
||||
log.out(f"corpus.onlyStations: Removed {preLength - postLength} documents", "INFO")
|
||||
log.out(f"Yes, I am aware there are not {postLength} stations but the data includes NI, TfL, some bus, tram and closed stations too","OTHR")
|
||||
return workingList
|
38
src/mongo.py
38
src/mongo.py
@ -11,7 +11,7 @@ db_user = urllib.parse.quote_plus(os.getenv('OWL_DB_USER', "owl"))
|
||||
db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS', "twittwoo"))
|
||||
db_name = os.getenv('OWL_DB_NAME', "owlboard")
|
||||
|
||||
log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "INFOm")
|
||||
log.out(f"mongo.py: Connecting to database at {db_host}:{db_port}", "INFO")
|
||||
client = MongoClient(f"mongodb://{db_user}:{db_pass}@{db_host}:{db_port}")
|
||||
db = client[db_name]
|
||||
|
||||
@ -30,34 +30,46 @@ def getLength(collection):
|
||||
col = db[collection]
|
||||
return col.count_documents({})
|
||||
|
||||
def createSingleIndex(collection, field):
|
||||
col = db[collection]
|
||||
col.create_index(field)
|
||||
log.out(f'mongo.createSingleIndex: Created index of {field} in {collection}', "INFO")
|
||||
return
|
||||
|
||||
|
||||
def putBulkCorpus(data):
|
||||
startCount = getLength("corpus")
|
||||
col = db["corpus"]
|
||||
collection = "corpus"
|
||||
startCount = getLength(collection)
|
||||
col = db[collection]
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkCorpus: Dropping {startCount} CORPUS documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength("corpus")
|
||||
endCount = getLength(collection)
|
||||
log.out(f'mongo.putBulkCorpus: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkCorpus: {endCount - startCount} new documents', "INFO")
|
||||
#log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
|
||||
# ADD INDEXES HERE
|
||||
log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
|
||||
createSingleIndex(collection, "NLC")
|
||||
createSingleIndex(collection, "3ALPHA")
|
||||
log.out('mongo.putBulkCorpus: Updating meta time',"INFO")
|
||||
metaUpdateTime("corpus")
|
||||
metaUpdateTime(collection)
|
||||
return
|
||||
|
||||
def putBulkStations(data):
|
||||
startCount = getLength("stations")
|
||||
col = db["stations"]
|
||||
collection = "stations"
|
||||
startCount = getLength(collection)
|
||||
col = db[collection]
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkStations: Dropping {startCount} station documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength("stations")
|
||||
endCount = getLength(collection)
|
||||
log.out(f'mongo.putBulkStations: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkStations: {endCount - startCount} new documents', "INFO")
|
||||
#log.out('mongo.putBulkStations: Building collection indexes',"INFO")
|
||||
# ADD INDEXES HERE
|
||||
log.out('mongo.putBulkStations: Building collection indexes',"INFO")
|
||||
createSingleIndex(collection, "3ALPHA")
|
||||
createSingleIndex(collection, "STANOX")
|
||||
createSingleIndex(collection, "TIPLOC")
|
||||
log.out('mongo.putBulkStations: Updating meta time',"INFO")
|
||||
metaUpdateTime("stations")
|
||||
metaUpdateTime(collection)
|
||||
return
|
Reference in New Issue
Block a user