Add CORPUS data and temporary python script

This commit is contained in:
Fred Boniface 2022-12-05 13:26:08 +00:00
parent 4057b50d08
commit 40900bc502
3 changed files with 29 additions and 0 deletions

1
tmp/CORPUSExtract.json Normal file

File diff suppressed because one or more lines are too long

27
tmp/CORPUStidy.py Executable file
View File

@ -0,0 +1,27 @@
#! /usr/bin/python3
import json
print("Opening CORPUSExtract.json")
with open("./CORPUSExtract.json", "r") as input_file:
dict = json.load(input_file)
list = dict['TIPLOCDATA']
cleanList = []
print("Processing data")
for item in list:
if item["3ALPHA"] != ' ' and item["STANOX"] != ' ':
item.pop("UIC")
item.pop("NLCDESC16")
item.pop("NLC")
cleanList.append(item)
print("Saving data")
cleanDict = {"data":cleanList}
with open("CorpusClean.json", "w") as output_file:
output_file.write(json.dumps(cleanDict))
print(cleanList)
print("Processed.")

1
tmp/CorpusClean.json Normal file

File diff suppressed because one or more lines are too long