Add validation steps
This commit is contained in:
parent
7d56f09c87
commit
be31e6cfe1
2990
output.txt
Normal file
2990
output.txt
Normal file
File diff suppressed because it is too large
Load Diff
52
src/main.py
52
src/main.py
@ -3,7 +3,10 @@ import sys
|
||||
import glob
|
||||
import datetime
|
||||
import json
|
||||
|
||||
import parse_pdf
|
||||
import train_detail
|
||||
import validate
|
||||
|
||||
# List all PDF files in the given directory
|
||||
def list_pdf_files(directory):
|
||||
@ -72,11 +75,52 @@ def main():
|
||||
for pdf_file in pdf_files:
|
||||
schedule_cards.append(get_schedule_card_data(pdf_file))
|
||||
|
||||
# print(schedule_cards)
|
||||
out = open("output.txt", "w")
|
||||
out.write(json.dumps(schedule_cards, indent=4, default=str))
|
||||
out.close()
|
||||
# Iterate over schedule cards and fetch train data
|
||||
trains = []
|
||||
existing_codes = []
|
||||
for schedule_card in schedule_cards:
|
||||
date = schedule_card['schedule_date']
|
||||
for schedule in schedule_card['schedule_data']:
|
||||
|
||||
## Skip over schedules which have no PIS Code
|
||||
if schedule['pis_code'] is None:
|
||||
continue
|
||||
|
||||
## Check if PIS code already processed, if so skip it. Else add it to 'existing_codes'
|
||||
if schedule['pis_code'] in existing_codes:
|
||||
continue
|
||||
|
||||
existing_codes.append(schedule['pis_code'])
|
||||
|
||||
full_schedule = train_detail.find_gw_trains_by_headcode(schedule['headcode'], date)
|
||||
train = {
|
||||
'schedule_date': date,
|
||||
'schedule_headcode': schedule['headcode'],
|
||||
'schedule_location': schedule['location'],
|
||||
'schedule_time1': schedule['time0'],
|
||||
'schedule_time2': schedule['time1'],
|
||||
'schedule_pis_code': schedule['pis_code'],
|
||||
'timetable_entry': full_schedule,
|
||||
}
|
||||
|
||||
## Filter out values missing schedule details or with matching PIS Codes
|
||||
if not full_schedule:
|
||||
continue # Skip if timetable entry was not found
|
||||
|
||||
if len(train['timetable_entry']) == 1 and train['schedule_pis_code'] == train['timetable_entry'][0].get('pis', {}).get('code'):
|
||||
continue # Skip if only one possible timetable entry exists, and PIS code matches OwlBoards suggestion
|
||||
|
||||
trains.append(train)
|
||||
|
||||
organised_for_processing = train_detail.organise_trains(trains)
|
||||
|
||||
auto_matched = validate.filter_timetable_entries(organised_for_processing)
|
||||
|
||||
|
||||
# print(trains)
|
||||
out = open("output.txt", "w")
|
||||
out.write(json.dumps(auto_matched, indent=4, default=str))
|
||||
out.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
65
src/train_detail.py
Normal file
65
src/train_detail.py
Normal file
@ -0,0 +1,65 @@
|
||||
from pyOwlBoard import client
|
||||
|
||||
ob_client = client.OwlBoardClient('https://owlboard.info', 'x')
|
||||
|
||||
def find_gw_trains_by_headcode(headcode, date):
|
||||
train_list = ob_client.get_trains_by_headcode(headcode, date)
|
||||
|
||||
# Filter for 'gw & hx' services only
|
||||
for train in train_list[:]:
|
||||
if train['operator'] not in ('GW', 'HX'):
|
||||
train_list.remove(train)
|
||||
|
||||
# Get details for any services remaining in list
|
||||
train_detail_list = []
|
||||
for train in train_list:
|
||||
train_detail = ob_client.get_trains_by_trainUid(train['trainUid'], date)
|
||||
train_detail_list.append(train_detail)
|
||||
|
||||
return train_detail_list
|
||||
|
||||
# Organize list of trains removing unimportant information
|
||||
def organise_trains(trains):
|
||||
organised_trains = []
|
||||
for train in trains:
|
||||
organised_train = {
|
||||
'diagram_date': train['schedule_date'],
|
||||
'train_headcode': train['schedule_headcode'],
|
||||
'diagram_time1': train['schedule_time1'],
|
||||
'diagram_time2': train['schedule_time2'],
|
||||
'diagram_pis_code': train['schedule_pis_code'],
|
||||
'timetable_entries': rationalise_timetable_entry(train['timetable_entry']),
|
||||
}
|
||||
|
||||
organised_trains.append(organised_train)
|
||||
return organised_trains
|
||||
|
||||
# Convert timetable entry into a list of CRS Codes for PIS identification
|
||||
def rationalise_timetable_entry(timetable_entries):
|
||||
rationalised_entries = []
|
||||
for timetable_entry in timetable_entries:
|
||||
entry = {
|
||||
'stpIndicator': timetable_entry['stpIndicator'],
|
||||
'operator': timetable_entry['operator'],
|
||||
'trainUid': timetable_entry['trainUid'],
|
||||
'scheduleStart': timetable_entry['scheduleStart'],
|
||||
'scheduleEnd': timetable_entry['scheduleEnd'],
|
||||
'daysRun': timetable_entry['daysRun'],
|
||||
'trainStartTime': timetable_entry['stops'][0].get('wttDeparture') or timetable_entry['stops'][0].get('publicDeparture'),
|
||||
'stops': create_crs_list_from_stops(timetable_entry['stops']),
|
||||
}
|
||||
|
||||
rationalised_entries.append(entry)
|
||||
return rationalised_entries
|
||||
|
||||
# Work through list of stops, and create a list of CRS codes for public stops
|
||||
def create_crs_list_from_stops(stops):
|
||||
crs_list = []
|
||||
for stop in stops:
|
||||
if not stop['isPublic']:
|
||||
continue
|
||||
|
||||
loc_ref = ob_client.get_loc_ref_codes_by_tiploc(stop['tiploc'])
|
||||
|
||||
crs_list.append(loc_ref[0]['3ALPHA'])
|
||||
return crs_list
|
36
src/validate.py
Normal file
36
src/validate.py
Normal file
@ -0,0 +1,36 @@
|
||||
## Validates and filters input based on whether diagram time matches schedule start time
|
||||
def filter_timetable_entries(diagram_entries):
|
||||
for entry in diagram_entries:
|
||||
diagram_time1 = (entry.get('diagram_time1') or '').replace('.', '')
|
||||
diagram_time2 = (entry.get('diagram_time2') or '').replace('.', '')
|
||||
timetable_entries = entry.get('timetable_entries', [])
|
||||
|
||||
# Collect train start times for comparison
|
||||
train_start_times = [t['trainStartTime'] for t in timetable_entries]
|
||||
|
||||
# Check for match
|
||||
matching_times = [
|
||||
t for t in train_start_times
|
||||
if t == diagram_time1 or t== diagram_time2
|
||||
]
|
||||
|
||||
# Check for unique matches
|
||||
if len(matching_times) == 1:
|
||||
# Filter any non-matching entries if there is a match
|
||||
entry['timetable_entries'] = [
|
||||
t for t in timetable_entries
|
||||
if t['trainStartTime'] == matching_times[0]
|
||||
]
|
||||
|
||||
entry['verification'] = "AUTO: Diagram/Schedule Time Match"
|
||||
|
||||
return diagram_entries
|
||||
|
||||
## Checks OwlBoard API for existing PIS codes and whether they match
|
||||
def check_and_validate_against_owlboard(train_entries):
|
||||
### Loop through input list (which is output of above function, currently in output.txt)
|
||||
### check whether code exists in OwlBoard API, if so - does it match.
|
||||
### If exists but no match, open an issue.
|
||||
### If does not exist, do nothing.
|
||||
### If exists and does match, remove from input list.
|
||||
### Return output
|
Loading…
Reference in New Issue
Block a user