28 lines
1.1 KiB
Python
Executable File
28 lines
1.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Parse Belden catalog techdata datasheets
|
|
|
|
import camelot
|
|
import numpy as np
|
|
|
|
def parse(filename):
|
|
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
|
|
print("Total tables extracted:", tables.n)
|
|
n = 0
|
|
|
|
for table in tables:
|
|
table.df.replace('', np.nan, inplace=True)
|
|
table.df.dropna(inplace=True, how="all")
|
|
table.df.dropna(inplace=True, axis="columns", how="all")
|
|
table.df.replace(np.nan, '', inplace=True)
|
|
if not table.df.empty:
|
|
table.to_html("table" + str(n) + ".html")
|
|
print("\nTable " + str(n))
|
|
print(table.df)
|
|
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
|
|
n=n+1
|
|
camelot.plot(tables[0], kind='contour').savefig("test.png")
|
|
#tables.export('foo.csv', f='csv')
|
|
|
|
if __name__ == "__main__":
|
|
parse("test.pdf") |