34 lines
990 B
Python
34 lines
990 B
Python
### This uses the 'python-docx-2023' module
|
|
from docx import Document
|
|
|
|
def extract_table(file_path):
|
|
document = Document(file_path)
|
|
|
|
table = document.tables[4]
|
|
print(document.tables[1])
|
|
print(document.tables[2])
|
|
print(document.tables[3])
|
|
print(document.tables[4])
|
|
print(document.tables[5])
|
|
|
|
data = []
|
|
keys = None
|
|
for i, row in enumerate(table.rows):
|
|
text = (cell.text for cell in row.cells)
|
|
if i == 0:
|
|
keys = tuple(text)
|
|
continue
|
|
row_data = dict(zip(keys, text))
|
|
data.append(row_data)
|
|
|
|
print(data)
|
|
|
|
if __name__ == "__main__":
|
|
extract_table("./file.docx")
|
|
|
|
### This can parse each table. What needs to happen next
|
|
### is to parse all tables, then check for a PIS code.
|
|
### If PIS code exists, then find the associated headcode,
|
|
### Then an API request can be made to OwlBoard to try
|
|
### and find a service with valid stopping pattern,
|
|
### then the PIS codes can be generated for review. |