Now updates corpus and stations
This commit is contained in:
parent
72445785a5
commit
b9bb1b1afa
@ -40,12 +40,12 @@ def removeEmpty(data):
|
||||
def onlyStations(data):
|
||||
# DATA: List of Dictionaries
|
||||
# Iterates through the list and checks each item (which is a dictionary) for the STANOX key
|
||||
# If the STANOX exists the dictionary will be appended to the `workingList` which is returned once iteration is complete
|
||||
# If the 3ALPHA exists the dictionary will be appended to the `workingList` which is returned once iteration is complete
|
||||
preLength = len(data)
|
||||
log.out(f"corpus.onlyStations: Removing items without STANOX from {preLength} datapoints", "INFO")
|
||||
workingList = []
|
||||
for item in data:
|
||||
if 'STANOX' in item:
|
||||
if '3ALPHA' in item:
|
||||
workingList.append(item)
|
||||
postLength = len(workingList)
|
||||
log.out(f"corpus.onlyStations: Removed {preLength - postLength} datapoints", "INFO")
|
||||
|
37
src/main.py
37
src/main.py
@ -14,19 +14,40 @@
|
||||
# program. If not, see
|
||||
# https://git.fjla.uk/OwlBoard/db-manager/src/branch/main/LICENSE
|
||||
|
||||
print("main.py: Initialising db-manager")
|
||||
version = "0.1.0"
|
||||
print(f"main.py: Initialising db-manager v{version}")
|
||||
|
||||
#Third Party Imports
|
||||
import os
|
||||
import time
|
||||
|
||||
#Local Imports
|
||||
import corpus
|
||||
#import mongo
|
||||
import corpus, mongo
|
||||
import logger as log
|
||||
|
||||
#This is only a test run:
|
||||
rawCorpus = corpus.fetch()
|
||||
cleanCorpus = corpus.removeEmpty(rawCorpus)
|
||||
stationsOnly = corpus.onlyStations(cleanCorpus)
|
||||
print("DONE")
|
||||
log.out("main.py: db-manager Initialised", "INFO")
|
||||
|
||||
#Check & Update corpus/stations:
|
||||
# If older than 12 days then update
|
||||
corpusAge = int(time.time()) - mongo.metaCheckTime("corpus")
|
||||
log.out(f'main.py: Corpus is {corpusAge}s old', "INFO")
|
||||
if corpusAge > 1036800:
|
||||
log.out('main.py: Updating CORPUS data', "INFO")
|
||||
corpusData = corpus.removeEmpty(corpus.fetch())
|
||||
mongo.putBulkCorpus(corpusData)
|
||||
else:
|
||||
log.out('main.py: Not updating CORPUS data', "INFO")
|
||||
|
||||
stationsAge = int(time.time()) - mongo.metaCheckTime("stations")
|
||||
log.out(f'main.py: Stations is {stationsAge}s old', "INFO")
|
||||
# While the source of stations data is CORPUS, this statement is based on corpusAge, when/if changing the source, it should be changed to use stationsAge
|
||||
# if stationsAge is used now, there could be a situation where stationsAge tries to update but fails as corpusData doesn't exist.
|
||||
if corpusAge > 1036800:
|
||||
log.out('main.py: Updating stations data', "INFO")
|
||||
stationData = corpus.onlyStations(corpusData)
|
||||
mongo.putBulkStations(stationData)
|
||||
else:
|
||||
log.out('main.py: Not updating stations data', "INFO")
|
||||
|
||||
# END
|
||||
log.out(f"main.py: db-manager v{version} Complete", "INFO")
|
59
src/mongo.py
59
src/mongo.py
@ -1,9 +1,62 @@
|
||||
import os
|
||||
from pymongo import MongoClient
|
||||
import time
|
||||
import urllib.parse
|
||||
import logger as log
|
||||
|
||||
log.out("mongo.py: Fetching configuration", "INFO")
|
||||
db_url = os.getenv('OWL_DB_HOST') + ":" + os.getenv('OWL_DB_PORT')
|
||||
db_user = urllib.parse.quote_plus(os.getenv('OWL_DB_USER'))
|
||||
db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS'))
|
||||
db_host = os.getenv('OWL_DB_HOST', 'localhost')
|
||||
db_port = os.getenv('OWL_DB_PORT', 27017)
|
||||
db_user = urllib.parse.quote_plus(os.getenv('OWL_DB_USER', "owl"))
|
||||
db_pass = urllib.parse.quote_plus(os.getenv('OWL_DB_PASS', "twittwoo"))
|
||||
db_name = os.getenv('OWL_DB_NAME', "owlboard")
|
||||
|
||||
client = MongoClient(f"mongodb://{db_user}:{db_pass}@{db_host}:{db_port}")
|
||||
db = client[db_name]
|
||||
|
||||
def metaCheckTime(target):
|
||||
col = db["meta"]
|
||||
res = col.find_one({"target": target})
|
||||
log.out(f'mongo.metaUpdateTime: {target} last updated at {res["updated"]}', "INFO")
|
||||
return res["updated"]
|
||||
|
||||
def metaUpdateTime(target):
|
||||
col = db["meta"]
|
||||
log.out(f'mongo.metaUpdateTime: Updating updated time for {target}', "INFO")
|
||||
res = col.update_one({"target": target}, {"$set":{"updated": int(time.time())}}, upsert=True)
|
||||
|
||||
def getLength(collection):
|
||||
col = db[collection]
|
||||
return col.count_documents({})
|
||||
|
||||
def putBulkCorpus(data):
|
||||
startCount = getLength("corpus")
|
||||
col = db["corpus"]
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkCorpus: Dropping {startCount} CORPUS documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength("corpus")
|
||||
log.out(f'mongo.putBulkCorpus: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkCorpus: {endCount - startCount} new documents', "INFO")
|
||||
#log.out('mongo.putBulkCorpus: Building collection indexes',"INFO")
|
||||
# ADD INDEXES HERE
|
||||
log.out('mongo.putBulkCorpus: Updating meta time',"INFO")
|
||||
metaUpdateTime("corpus")
|
||||
return
|
||||
|
||||
def putBulkStations(data):
|
||||
startCount = getLength("stations")
|
||||
col = db["stations"]
|
||||
if startCount > 0:
|
||||
log.out(f'mongo.putBulkStations: Dropping {startCount} station documents', "INFO")
|
||||
col.drop()
|
||||
col.insert_many(data)
|
||||
endCount = getLength("stations")
|
||||
log.out(f'mongo.putBulkStations: {endCount} documents inserted', "INFO")
|
||||
log.out(f'mongo.putBulkStations: {endCount - startCount} new documents', "INFO")
|
||||
#log.out('mongo.putBulkStations: Building collection indexes',"INFO")
|
||||
# ADD INDEXES HERE
|
||||
log.out('mongo.putBulkStations: Updating meta time',"INFO")
|
||||
metaUpdateTime("stations")
|
||||
return
|
Reference in New Issue
Block a user