From fdb6f73f268ac7dca7b4ca7bf820d613b6f9d1e7 Mon Sep 17 00:00:00 2001 From: Fred Boniface Date: Tue, 16 Apr 2024 10:49:54 +0100 Subject: [PATCH] Update to handle MMDD file name format, rather than the manual renaming of files to YYYYMMDD format. --- README.md | 2 +- src/parse_docx.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a72942c..12eb1e7 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This is an experimental project and is not yet used as part of the OwlBoard stac ## Language -It is so-far undecided what language will be used. Documents for parsing are likely to be a few hundred lines long so searching may become processor intensive meaning Go may be a good candidate, however Python offers an array of libraries which coule be helpful. +It is so-far undecided what language will be used. Documents for parsing are likely to be a few hundred lines long so searching may become processor intensive meaning Go may be a good candidate, however Python offers an array of libraries which could be helpful. ## File formats diff --git a/src/parse_docx.py b/src/parse_docx.py index f77c75b..8085b74 100644 --- a/src/parse_docx.py +++ b/src/parse_docx.py @@ -26,7 +26,7 @@ def extract_tables(file_path): if pis_and_headcode: pis_and_headcode['source_file'] = file_path current_year = datetime.now().year - date_string_with_year = f"{current_year}{datetime.strptime(file_path.split[' '][0])}" + date_string_with_year = f"{current_year}{file_path.split()[0]}" pis_and_headcode['date'] = datetime.strptime(date_string_with_year, "%Y%m%d") pis_info.append(pis_and_headcode) @@ -67,4 +67,4 @@ def solo_run(): print(extract_tables("./file.docx")) if __name__ == "__main__": - solo_run() \ No newline at end of file + solo_run()