Add CORPUS data and temporary python script
This commit is contained in:
parent
4057b50d08
commit
40900bc502
1
tmp/CORPUSExtract.json
Normal file
1
tmp/CORPUSExtract.json
Normal file
File diff suppressed because one or more lines are too long
27
tmp/CORPUStidy.py
Executable file
27
tmp/CORPUStidy.py
Executable file
@ -0,0 +1,27 @@
|
||||
#! /usr/bin/python3
|
||||
|
||||
import json
|
||||
|
||||
print("Opening CORPUSExtract.json")
|
||||
with open("./CORPUSExtract.json", "r") as input_file:
|
||||
dict = json.load(input_file)
|
||||
list = dict['TIPLOCDATA']
|
||||
|
||||
cleanList = []
|
||||
|
||||
print("Processing data")
|
||||
for item in list:
|
||||
if item["3ALPHA"] != ' ' and item["STANOX"] != ' ':
|
||||
item.pop("UIC")
|
||||
item.pop("NLCDESC16")
|
||||
item.pop("NLC")
|
||||
cleanList.append(item)
|
||||
|
||||
print("Saving data")
|
||||
|
||||
cleanDict = {"data":cleanList}
|
||||
with open("CorpusClean.json", "w") as output_file:
|
||||
output_file.write(json.dumps(cleanDict))
|
||||
|
||||
print(cleanList)
|
||||
print("Processed.")
|
1
tmp/CorpusClean.json
Normal file
1
tmp/CorpusClean.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user