Update to handle MMDD file name format, rather than the manual renaming of files to YYYYMMDD format.

This commit is contained in:
Fred Boniface 2024-04-16 10:49:54 +01:00
parent 9439a4e251
commit fdb6f73f26
2 changed files with 3 additions and 3 deletions

View File

@ -4,7 +4,7 @@ This is an experimental project and is not yet used as part of the OwlBoard stac
## Language
It is so-far undecided what language will be used. Documents for parsing are likely to be a few hundred lines long so searching may become processor intensive meaning Go may be a good candidate, however Python offers an array of libraries which coule be helpful.
It is so-far undecided what language will be used. Documents for parsing are likely to be a few hundred lines long so searching may become processor intensive meaning Go may be a good candidate, however Python offers an array of libraries which could be helpful.
## File formats

View File

@ -26,7 +26,7 @@ def extract_tables(file_path):
if pis_and_headcode:
pis_and_headcode['source_file'] = file_path
current_year = datetime.now().year
date_string_with_year = f"{current_year}{datetime.strptime(file_path.split[' '][0])}"
date_string_with_year = f"{current_year}{file_path.split()[0]}"
pis_and_headcode['date'] = datetime.strptime(date_string_with_year, "%Y%m%d")
pis_info.append(pis_and_headcode)
@ -67,4 +67,4 @@ def solo_run():
print(extract_tables("./file.docx"))
if __name__ == "__main__":
solo_run()
solo_run()