27 lines
599 B
Python
27 lines
599 B
Python
|
#! /usr/bin/python3
|
||
|
|
||
|
import json
|
||
|
|
||
|
print("Opening CORPUSExtract.json")
|
||
|
with open("./CORPUSExtract.json", "r") as input_file:
|
||
|
dict = json.load(input_file)
|
||
|
list = dict['TIPLOCDATA']
|
||
|
|
||
|
cleanList = []
|
||
|
|
||
|
print("Processing data")
|
||
|
for item in list:
|
||
|
if item["3ALPHA"] != ' ' and item["STANOX"] != ' ':
|
||
|
item.pop("UIC")
|
||
|
item.pop("NLCDESC16")
|
||
|
item.pop("NLC")
|
||
|
cleanList.append(item)
|
||
|
|
||
|
print("Saving data")
|
||
|
|
||
|
cleanDict = {"data":cleanList}
|
||
|
with open("CorpusClean.json", "w") as output_file:
|
||
|
output_file.write(json.dumps(cleanDict))
|
||
|
|
||
|
print(cleanList)
|
||
|
print("Processed.")
|