Compare commits

..

2 Commits

Author SHA1 Message Date
c7fd19f477 Fix syntax err 2024-12-06 11:45:16 +00:00
42c0ae34ab Prevent issue creation is PIS Code not populated 2024-12-01 12:35:24 +00:00
4 changed files with 15 additions and 29 deletions

View File

@ -1,13 +1,7 @@
certifi==2024.8.30 certifi==2024.8.30
cffi==1.17.1
charset-normalizer==3.4.0 charset-normalizer==3.4.0
cryptography==44.0.0
idna==3.10 idna==3.10
pdfminer.six==20231228
pdfplumber==0.11.4
pillow==11.0.0
pycparser==2.22
pyOwlBoard==0.0.2 pyOwlBoard==0.0.2
pypdfium2==4.30.0 PyPDF2==3.0.1
requests==2.32.3 requests==2.32.3
urllib3==2.2.3 urllib3==2.2.3

View File

@ -1,4 +1,4 @@
import pdfplumber import PyPDF2
import re import re
## re Patterns ## re Patterns
@ -11,18 +11,19 @@ train_pis_line_pattern = r"\(PIS Code\s*:\s*(?P<pis_code>\d+|____)\s*\)" # Extra
# Extract Schedule Cards to list of strings # Extract Schedule Cards to list of strings
def extract_pdf_text(file_path): def extract_pdf_text(file_path):
tables = [] with open(file_path, 'rb') as file:
try: pdf_reader = PyPDF2.PdfReader(file)
with pdfplumber.open(file_path) as file:
for page_number, page in enumerate(file.pages, start=1): page_texts = []
page_tables = page.extract_tables() for page in pdf_reader.pages:
if page_tables: text = page.extract_text()
tables.extend(page_tables) if text:
except Exception as e: page_texts.append(text)
print(f"Error processing PDF: {e}")
return tables
# Does everything - NEEDS REWRITE TO HANDLE EXTRACTED TABLES FROM ABOVE FN full_text = " ".join(page_texts)
return full_text
# Does everything - should be split in to functional functions
def parse_pdf_file(filename): def parse_pdf_file(filename):
pdf_text = extract_pdf_text(filename) pdf_text = extract_pdf_text(filename)
schedule_cards = re.findall(schedule_pattern, pdf_text, re.DOTALL) schedule_cards = re.findall(schedule_pattern, pdf_text, re.DOTALL)

View File

@ -1,9 +0,0 @@
import pdfplumber
with pdfplumber.open("./../1031 THO SC CNDR.pdf") as pdf:
for page_number, page in enumerate(pdf.pages, start=1):
tables = page.extract_tables()
for table in tables:
print(f"Table on page {page_number}: ")
for row in table:
print(row[0])

View File

@ -55,7 +55,7 @@ def check_and_validate_against_owlboard(train_entries):
output.append(train_entry) output.append(train_entry)
# Else if only one possible entry (and previous statements false), open issue # Else if only one possible entry (and previous statements false), open issue
elif len(train_entry['timetable_entries']) == 1: elif len(train_entry['timetable_entries']) == 1 and train_entry['diagram_pis_code'] != "____":
issue_title = f"PIS Error | Code: {train_entry['diagram_pis_code']}" issue_title = f"PIS Error | Code: {train_entry['diagram_pis_code']}"
issue_content = f""" issue_content = f"""
PIS Code {train_entry['diagram_pis_code']}. PIS Code {train_entry['diagram_pis_code']}.