diff --git a/main.py b/main.py deleted file mode 100644 index a6d8892..0000000 --- a/main.py +++ /dev/null @@ -1,11 +0,0 @@ -# Open all PDF files in the working directory. - -# Loop through data and remove duplicate codes. - -# Check for existing codes via OwlBoard API. -# Validate existing codes and submit issue if not correct. - -# Use the train list to search for stopping pattern of any absent codes. - - -# Format file and commit to git \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..e1a7332 --- /dev/null +++ b/src/main.py @@ -0,0 +1,77 @@ +import os +import sys +import glob +import parse_pdf + +# List all PDF files in the given directory +def list_pdf_files(directory): + pdf_files = glob.glob(os.path.join(directory, '*.pdf')) + return pdf_files + +# Extracts date from PDF filename and returns its content +def get_schedule_card_data(filepath): + filename = os.path.basename(filepath) + date_str = filename[:4] + date_object = datetime.now() + if len(date_str) < 4: + raise ValueError("Filename must have at least four characters") + return None + + month_str = date_str[:2] + day_str = date_str[2:] + + try: + month = int(month_str) + day = int(day_str) + + year = datetime.now().year + date_object = datetime(year=year, month=month, day=day) + except ValueError as e: + print(f"Error parsing date: {e}") + return None + + schedule_card_data = { + "schedule_date": date_object, + "schedule_data": parse_pdf.parse_pdf_file(filepath) + } + + return schedule_card_data + + +# Loop through data and remove duplicate codes. + +# Check for existing codes via OwlBoard API. +# Validate existing codes and submit issue if not correct. + +# Use the train list to search for stopping pattern of any absent codes. + + +# Format file and commit to git + +def main(): + # Check for arguments + if len(sys.argv) > 1: + directory = sys.argv[1] + else: + directory = os.getcwd() + + if not os.path.isdir(directory): + print(f"Error: '{directory}' is not a valid directory") + return + + pdf_files = list_pdf_files(directory) + if len(pdf_files) == 0: + print(f"Error: '{directory}' contains no PDF files") + return + else: + print(f"Found {len(pdf_files)} PDF files") + + + ## For each file in list, run get_schedule_card_data(filepath) + ## if returned value is not None, append to a list. Once + ## complete, pass to a validation function, then into Git handling + ## function. + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/parse_pdf.py b/src/parse_pdf.py similarity index 100% rename from parse_pdf.py rename to src/parse_pdf.py