import os import sys import glob import datetime import json import parse_pdf # List all PDF files in the given directory def list_pdf_files(directory): pdf_files = glob.glob(os.path.join(directory, '*.pdf')) return pdf_files # Extracts date from PDF filename and returns its content def get_schedule_card_data(filepath): filename = os.path.basename(filepath) date_str = filename[:4] date_object = datetime.datetime.now() if len(date_str) < 4: raise ValueError("Filename must have at least four characters") return None month_str = date_str[:2] day_str = date_str[2:] try: month = int(month_str) day = int(day_str) year = datetime.datetime.now().year date_object = datetime.datetime(year=year, month=month, day=day) except ValueError as e: print(f"Error parsing date: {e}") return None schedule_card_data = { "schedule_date": date_object, "schedule_data": parse_pdf.parse_pdf_file(filepath) } return schedule_card_data # Loop through data and remove duplicate codes. # Check for existing codes via OwlBoard API. # Validate existing codes and submit issue if not correct. # Use the train list to search for stopping pattern of any absent codes. # Format file and commit to git def main(): # Check for arguments if len(sys.argv) > 1: directory = sys.argv[1] else: directory = os.getcwd() if not os.path.isdir(directory): print(f"Error: '{directory}' is not a valid directory") return pdf_files = list_pdf_files(directory) if len(pdf_files) == 0: print(f"Error: '{directory}' contains no PDF files") return else: print(f"Found {len(pdf_files)} PDF files") schedule_cards = [] for pdf_file in pdf_files: schedule_cards.append(get_schedule_card_data(pdf_file)) # print(schedule_cards) out = open("output.txt", "w") out.write(json.dumps(schedule_cards, indent=4, default=str)) out.close() if __name__ == "__main__": main()