Add UR5 control test, datasheet JSON output
This commit is contained in:
@@ -7,6 +7,7 @@ import camelot
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import io
|
||||
import json
|
||||
|
||||
def parse(filename, output_dir):
|
||||
|
||||
@@ -43,9 +44,10 @@ def parse(filename, output_dir):
|
||||
|
||||
page.extract_text(visitor_text=visitor_body)
|
||||
text_body = "".join(parts).strip('\n')
|
||||
if len(text_body) == 0:
|
||||
text_body = str(n)
|
||||
#print(text_body)
|
||||
|
||||
|
||||
|
||||
|
||||
table_list[text_body] = table.df
|
||||
#table.to_html("table" + str(n) + ".html")
|
||||
@@ -55,7 +57,7 @@ def parse(filename, output_dir):
|
||||
n=n+1
|
||||
#camelot.plot(tables[0], kind='grid').savefig("test.png")
|
||||
|
||||
tables.export(output_dir + '/techdata.json', f='json')
|
||||
#tables.export(output_dir + '/techdata.json', f='json')
|
||||
|
||||
# print(table_list)
|
||||
# Extract Basic details - part name & description, image, etc
|
||||
@@ -81,7 +83,95 @@ def parse(filename, output_dir):
|
||||
with open(output_dir + "/brand.png", "wb") as fp:
|
||||
fp.write(image_file_object.data)
|
||||
count += 1
|
||||
return table_list
|
||||
|
||||
# Table parsing and reordring
|
||||
tables = dict()
|
||||
previous_table = ""
|
||||
for table_name in table_list.keys():
|
||||
# determine shape: horizontal or vertical
|
||||
table = table_list[table_name]
|
||||
rows = table.shape[0]
|
||||
cols = table.shape[1]
|
||||
vertical = None
|
||||
if rows > 2 and cols == 2:
|
||||
vertical = True
|
||||
elif cols == 1:
|
||||
vertical = False
|
||||
elif rows == 1:
|
||||
vertical = True
|
||||
elif cols == 2: # and rows <= 2
|
||||
# inconsistent
|
||||
if table.iloc[0, 0].find(":") == len(table.iloc[0, 0]) - 1: # check if last character is ":" indicating a vertical table
|
||||
vertical = True
|
||||
else:
|
||||
vertical = False
|
||||
|
||||
elif cols > 2: # and rows <= 2
|
||||
vertical = False
|
||||
elif rows > 2 and cols > 2: # big table
|
||||
vertical = False
|
||||
else: # 1 column, <= 2 rows
|
||||
vertical = False
|
||||
|
||||
# missing name check
|
||||
for table_name_2 in table_list.keys():
|
||||
if table_name_2.find(table.iloc[-1, 0]) >= 0:
|
||||
# Name taken from table directly above - this table does not have a name
|
||||
table_list["Specs " + str(len(tables))] = table_list.pop(table_name_2, None) # rename table to arbitrary altername name
|
||||
break
|
||||
|
||||
if vertical:
|
||||
out = dict()
|
||||
for row in table.itertuples(index=False, name=None):
|
||||
out[row[0].replace("\n", " ").replace(":", "")] = row[1]
|
||||
|
||||
else: # horizontal
|
||||
out = dict()
|
||||
for col in table.columns:
|
||||
col_data = tuple(table[col])
|
||||
out[col_data[0].replace("\n", " ")] = col_data[1:]
|
||||
|
||||
tables[table_name] = out
|
||||
|
||||
|
||||
|
||||
# multi-page table check
|
||||
if table_name.isdigit() and len(tables) > 1:
|
||||
print(table_name)
|
||||
print(previous_table)
|
||||
|
||||
|
||||
|
||||
|
||||
main_key = previous_table
|
||||
cont_key = table_name
|
||||
print(tables)
|
||||
if vertical == False:
|
||||
main_keys = list(tables[main_key].keys())
|
||||
for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
|
||||
if i < len(main_keys):
|
||||
print(tables[main_key][main_keys[i]])
|
||||
tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
|
||||
|
||||
del tables[table_name]
|
||||
|
||||
else:
|
||||
for key in tables[cont_key].keys():
|
||||
tables[main_key][key] = tables[cont_key][key]
|
||||
del tables[table_name]
|
||||
|
||||
previous_table = table_name
|
||||
|
||||
|
||||
print(tables)
|
||||
with open(output_dir + "/tables.json", 'w') as json_file:
|
||||
json.dump(tables, json_file)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return tables
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user