Merge branch 'main' into dthomas_meilisearch

This commit is contained in:
2024-03-01 19:25:30 -06:00
5 changed files with 110 additions and 66 deletions

View File

@@ -12,7 +12,7 @@ from util import fprint
import uuid
from util import run_cmd
def parse(filename, output_dir, partnum):
def parse(filename, output_dir, partnum, dstype):
# Extract table data
@@ -24,6 +24,7 @@ def parse(filename, output_dir, partnum):
page = reader.pages[0]
table_list = {}
for table in tables:
table.df.infer_objects(copy=False)
table.df.replace('', np.nan, inplace=True)
table.df.dropna(inplace=True, how="all")
table.df.dropna(inplace=True, axis="columns", how="all")
@@ -139,29 +140,30 @@ def parse(filename, output_dir, partnum):
# multi-page table check
if table_name.isdigit() and len(tables) > 1:
fprint(table_name)
fprint(previous_table)
main_key = previous_table
cont_key = table_name
fprint(tables)
if vertical == False:
main_keys = list(tables[main_key].keys())
for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
if i < len(main_keys):
fprint(tables[main_key][main_keys[i]])
tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
del tables[table_name]
else:
for key in tables[cont_key].keys():
tables[main_key][key] = tables[cont_key][key]
del tables[table_name]
if dstype == "Belden":
if table_name.isdigit() and len(tables) > 1:
fprint(table_name)
fprint(previous_table)
main_key = previous_table
cont_key = table_name
fprint(tables)
if vertical == False:
main_keys = list(tables[main_key].keys())
for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
if i < len(main_keys):
fprint(tables[main_key][main_keys[i]])
tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
del tables[table_name]
else:
for key in tables[cont_key].keys():
tables[main_key][key] = tables[cont_key][key]
del tables[table_name]
previous_table = table_name