#Imports import os import requests import logger as log import zlib import json CORPUS_URL = "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS" #Fetch Configuration log.out("corpus: Fetching CORPUS Configuration", "INFO") CORPUS_USER = os.getenv('OWL_LDB_CORPUSUSER') CORPUS_PASS = os.getenv('OWL_LDB_CORPUSPASS') def fetch(): log.out("corpus.fetch: Fetching CORPUS Data from Network Rail", "INFO") response = requests.get(CORPUS_URL, auth=(CORPUS_USER, CORPUS_PASS)) log.out("corpus.fetch: Decompressing & parsing CORPUS data", "INFO") parsed = json.loads(zlib.decompress(response.content, 16+zlib.MAX_WBITS).decode()) return parsed['TIPLOCDATA'] def removeEmpty(data): # DATA: List of Dictionaries # Iterates through the list, # Iterates through each iterated list item (will be dictionary) # Adds items that are not equal to " " to a new 'workingDictionary' # Appends the working dictionary to the 'workingList' before moving on to the next item in the list # Finally returns the 'workingList' which is the CORPUS data without any " " values. corpusLength = len(data) log.out(f"corpus.removeEmpty: Removing empty strings from {corpusLength} CORPUS datapoints", "INFO") workingList = [] for item in data: workingDict = {} for key in item: if item[key] != " ": workingDict.update({key: item[key]}) workingList.append(workingDict) return workingList def onlyStations(data): # DATA: List of Dictionaries # Iterates through the list and checks each item (which is a dictionary) for the STANOX key # If the 3ALPHA exists the dictionary will be appended to the `workingList` which is returned once iteration is complete preLength = len(data) log.out(f"corpus.onlyStations: Removing items without STANOX from {preLength} datapoints", "INFO") workingList = [] for item in data: if '3ALPHA' in item: workingList.append(item) postLength = len(workingList) log.out(f"corpus.onlyStations: Removed {preLength - postLength} datapoints", "INFO") log.out(f"Yes, I am aware there are not {postLength} stations but the data includes NI, TfL, some bus, tram and closed stations too","OTHR") return workingList