Fix parsing of schedule card text

2024-10-26 11:37:07 +01:00 · 2024-10-26 11:37:07 +01:00 · 11ec1329a5
commit 11ec1329a5
parent 49d47b479b
3 changed files with 19 additions and 4 deletions
--- a/pyvenv.cfg
+++ b/pyvenv.cfg
@ -0,0 +1,5 @@
 home = /usr/bin
 include-system-site-packages = false
 version = 3.12.7
 executable = /usr/bin/python3.12
 command = /usr/bin/python -m venv /home/fred.boniface/git/owlboard/dgp2
--- a/src/main.py
+++ b/src/main.py
@ -1,6 +1,8 @@
 import os
 import sys
 import glob
 import datetime
 import json
 import parse_pdf
 # List all PDF files in the given directory
@ -12,7 +14,7 @@ def list_pdf_files(directory):
 def get_schedule_card_data(filepath):
    filename = os.path.basename(filepath)
    date_str = filename[:4]
-    date_object = datetime.now()
+    date_object = datetime.datetime.now()
    if len(date_str) < 4:
        raise ValueError("Filename must have at least four characters")
        return None
@ -24,8 +26,8 @@ def get_schedule_card_data(filepath):
        month = int(month_str)
        day = int(day_str)
-        year = datetime.now().year
+        year = datetime.datetime.now().year
-        date_object = datetime(year=year, month=month, day=day)
+        date_object = datetime.datetime(year=year, month=month, day=day)
    except ValueError as e:
        print(f"Error parsing date: {e}")
        return None
@ -66,6 +68,14 @@ def main():
    else:
        print(f"Found {len(pdf_files)} PDF files")
    schedule_cards = []
    for pdf_file in pdf_files:
        schedule_cards.append(get_schedule_card_data(pdf_file))
   # print(schedule_cards)
    out = open("output.txt", "w")
    out.write(json.dumps(schedule_cards, indent=4, default=str))
    out.close()
    ## For each file in list, run get_schedule_card_data(filepath)
    ## if returned value is not None, append to a list.  Once
--- a/src/parse_pdf.py
+++ b/src/parse_pdf.py
@ -32,7 +32,7 @@ def parse_pdf_file(filename):
    parsed_schedule_cards = []
    for schedule_card in schedule_cards:
-        train_sections = re.split(f"(?={train_start_pattern})", schedule_cards[0])
+        train_sections = re.split(f"(?={train_start_pattern})", schedule_card)
        train_sections = [section.strip() for section in train_sections if section.strip()]
        parsed_schedule_cards.append(train_sections)