Add validation steps
This commit is contained in:
parent
7d56f09c87
commit
be31e6cfe1
2990
output.txt
Normal file
2990
output.txt
Normal file
File diff suppressed because it is too large
Load Diff
52
src/main.py
52
src/main.py
@ -3,7 +3,10 @@ import sys
|
|||||||
import glob
|
import glob
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import parse_pdf
|
import parse_pdf
|
||||||
|
import train_detail
|
||||||
|
import validate
|
||||||
|
|
||||||
# List all PDF files in the given directory
|
# List all PDF files in the given directory
|
||||||
def list_pdf_files(directory):
|
def list_pdf_files(directory):
|
||||||
@ -72,11 +75,52 @@ def main():
|
|||||||
for pdf_file in pdf_files:
|
for pdf_file in pdf_files:
|
||||||
schedule_cards.append(get_schedule_card_data(pdf_file))
|
schedule_cards.append(get_schedule_card_data(pdf_file))
|
||||||
|
|
||||||
# print(schedule_cards)
|
# Iterate over schedule cards and fetch train data
|
||||||
out = open("output.txt", "w")
|
trains = []
|
||||||
out.write(json.dumps(schedule_cards, indent=4, default=str))
|
existing_codes = []
|
||||||
out.close()
|
for schedule_card in schedule_cards:
|
||||||
|
date = schedule_card['schedule_date']
|
||||||
|
for schedule in schedule_card['schedule_data']:
|
||||||
|
|
||||||
|
## Skip over schedules which have no PIS Code
|
||||||
|
if schedule['pis_code'] is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
## Check if PIS code already processed, if so skip it. Else add it to 'existing_codes'
|
||||||
|
if schedule['pis_code'] in existing_codes:
|
||||||
|
continue
|
||||||
|
|
||||||
|
existing_codes.append(schedule['pis_code'])
|
||||||
|
|
||||||
|
full_schedule = train_detail.find_gw_trains_by_headcode(schedule['headcode'], date)
|
||||||
|
train = {
|
||||||
|
'schedule_date': date,
|
||||||
|
'schedule_headcode': schedule['headcode'],
|
||||||
|
'schedule_location': schedule['location'],
|
||||||
|
'schedule_time1': schedule['time0'],
|
||||||
|
'schedule_time2': schedule['time1'],
|
||||||
|
'schedule_pis_code': schedule['pis_code'],
|
||||||
|
'timetable_entry': full_schedule,
|
||||||
|
}
|
||||||
|
|
||||||
|
## Filter out values missing schedule details or with matching PIS Codes
|
||||||
|
if not full_schedule:
|
||||||
|
continue # Skip if timetable entry was not found
|
||||||
|
|
||||||
|
if len(train['timetable_entry']) == 1 and train['schedule_pis_code'] == train['timetable_entry'][0].get('pis', {}).get('code'):
|
||||||
|
continue # Skip if only one possible timetable entry exists, and PIS code matches OwlBoards suggestion
|
||||||
|
|
||||||
|
trains.append(train)
|
||||||
|
|
||||||
|
organised_for_processing = train_detail.organise_trains(trains)
|
||||||
|
|
||||||
|
auto_matched = validate.filter_timetable_entries(organised_for_processing)
|
||||||
|
|
||||||
|
|
||||||
|
# print(trains)
|
||||||
|
out = open("output.txt", "w")
|
||||||
|
out.write(json.dumps(auto_matched, indent=4, default=str))
|
||||||
|
out.close()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
65
src/train_detail.py
Normal file
65
src/train_detail.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
from pyOwlBoard import client
|
||||||
|
|
||||||
|
ob_client = client.OwlBoardClient('https://owlboard.info', 'x')
|
||||||
|
|
||||||
|
def find_gw_trains_by_headcode(headcode, date):
|
||||||
|
train_list = ob_client.get_trains_by_headcode(headcode, date)
|
||||||
|
|
||||||
|
# Filter for 'gw & hx' services only
|
||||||
|
for train in train_list[:]:
|
||||||
|
if train['operator'] not in ('GW', 'HX'):
|
||||||
|
train_list.remove(train)
|
||||||
|
|
||||||
|
# Get details for any services remaining in list
|
||||||
|
train_detail_list = []
|
||||||
|
for train in train_list:
|
||||||
|
train_detail = ob_client.get_trains_by_trainUid(train['trainUid'], date)
|
||||||
|
train_detail_list.append(train_detail)
|
||||||
|
|
||||||
|
return train_detail_list
|
||||||
|
|
||||||
|
# Organize list of trains removing unimportant information
|
||||||
|
def organise_trains(trains):
|
||||||
|
organised_trains = []
|
||||||
|
for train in trains:
|
||||||
|
organised_train = {
|
||||||
|
'diagram_date': train['schedule_date'],
|
||||||
|
'train_headcode': train['schedule_headcode'],
|
||||||
|
'diagram_time1': train['schedule_time1'],
|
||||||
|
'diagram_time2': train['schedule_time2'],
|
||||||
|
'diagram_pis_code': train['schedule_pis_code'],
|
||||||
|
'timetable_entries': rationalise_timetable_entry(train['timetable_entry']),
|
||||||
|
}
|
||||||
|
|
||||||
|
organised_trains.append(organised_train)
|
||||||
|
return organised_trains
|
||||||
|
|
||||||
|
# Convert timetable entry into a list of CRS Codes for PIS identification
|
||||||
|
def rationalise_timetable_entry(timetable_entries):
|
||||||
|
rationalised_entries = []
|
||||||
|
for timetable_entry in timetable_entries:
|
||||||
|
entry = {
|
||||||
|
'stpIndicator': timetable_entry['stpIndicator'],
|
||||||
|
'operator': timetable_entry['operator'],
|
||||||
|
'trainUid': timetable_entry['trainUid'],
|
||||||
|
'scheduleStart': timetable_entry['scheduleStart'],
|
||||||
|
'scheduleEnd': timetable_entry['scheduleEnd'],
|
||||||
|
'daysRun': timetable_entry['daysRun'],
|
||||||
|
'trainStartTime': timetable_entry['stops'][0].get('wttDeparture') or timetable_entry['stops'][0].get('publicDeparture'),
|
||||||
|
'stops': create_crs_list_from_stops(timetable_entry['stops']),
|
||||||
|
}
|
||||||
|
|
||||||
|
rationalised_entries.append(entry)
|
||||||
|
return rationalised_entries
|
||||||
|
|
||||||
|
# Work through list of stops, and create a list of CRS codes for public stops
|
||||||
|
def create_crs_list_from_stops(stops):
|
||||||
|
crs_list = []
|
||||||
|
for stop in stops:
|
||||||
|
if not stop['isPublic']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
loc_ref = ob_client.get_loc_ref_codes_by_tiploc(stop['tiploc'])
|
||||||
|
|
||||||
|
crs_list.append(loc_ref[0]['3ALPHA'])
|
||||||
|
return crs_list
|
36
src/validate.py
Normal file
36
src/validate.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
## Validates and filters input based on whether diagram time matches schedule start time
|
||||||
|
def filter_timetable_entries(diagram_entries):
|
||||||
|
for entry in diagram_entries:
|
||||||
|
diagram_time1 = (entry.get('diagram_time1') or '').replace('.', '')
|
||||||
|
diagram_time2 = (entry.get('diagram_time2') or '').replace('.', '')
|
||||||
|
timetable_entries = entry.get('timetable_entries', [])
|
||||||
|
|
||||||
|
# Collect train start times for comparison
|
||||||
|
train_start_times = [t['trainStartTime'] for t in timetable_entries]
|
||||||
|
|
||||||
|
# Check for match
|
||||||
|
matching_times = [
|
||||||
|
t for t in train_start_times
|
||||||
|
if t == diagram_time1 or t== diagram_time2
|
||||||
|
]
|
||||||
|
|
||||||
|
# Check for unique matches
|
||||||
|
if len(matching_times) == 1:
|
||||||
|
# Filter any non-matching entries if there is a match
|
||||||
|
entry['timetable_entries'] = [
|
||||||
|
t for t in timetable_entries
|
||||||
|
if t['trainStartTime'] == matching_times[0]
|
||||||
|
]
|
||||||
|
|
||||||
|
entry['verification'] = "AUTO: Diagram/Schedule Time Match"
|
||||||
|
|
||||||
|
return diagram_entries
|
||||||
|
|
||||||
|
## Checks OwlBoard API for existing PIS codes and whether they match
|
||||||
|
def check_and_validate_against_owlboard(train_entries):
|
||||||
|
### Loop through input list (which is output of above function, currently in output.txt)
|
||||||
|
### check whether code exists in OwlBoard API, if so - does it match.
|
||||||
|
### If exists but no match, open an issue.
|
||||||
|
### If does not exist, do nothing.
|
||||||
|
### If exists and does match, remove from input list.
|
||||||
|
### Return output
|
Loading…
Reference in New Issue
Block a user