Prototype for datasheet data extraction

This commit is contained in:
2024-01-02 16:03:49 -06:00
parent 40d833901e
commit ec1d1be2a5
7 changed files with 68 additions and 1 deletions
+1
View File
@@ -0,0 +1 @@
venv
+19 -1
View File
@@ -1,3 +1,21 @@
# jukebox-software
This repository is for all of the software used in the Jukebox project.
This repository is for all of the software used in the Jukebox project.
To setup and run:
run `install-deps.sh` to prepare the python venv and install packages:
chmod +x ./install-deps.sh && ./install-deps.sh
then, you need to source the venv. Run
source venv/bin/activate
or
source venv/bin/activate.fish
if you use fish shell.
Then you are good to go to execute any python file.
+9
View File
@@ -0,0 +1,9 @@
#!/bin/sh
if ! [ -d "venv" ]; then
./venv-setup.sh
fi
source ./venv/bin/activate
pip3 install -r requirements.txt
+28
View File
@@ -0,0 +1,28 @@
#!/usr/bin/env python3
# Parse Belden catalog techdata datasheets
import camelot
import numpy as np
def parse(filename):
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
print("Total tables extracted:", tables.n)
n = 0
for table in tables:
table.df.replace('', np.nan, inplace=True)
table.df.dropna(inplace=True, how="all")
table.df.dropna(inplace=True, axis="columns", how="all")
table.df.replace(np.nan, '', inplace=True)
if not table.df.empty:
table.to_html("table" + str(n) + ".html")
print("\nTable " + str(n))
print(table.df)
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
n=n+1
camelot.plot(tables[0], kind='contour').savefig("test.png")
#tables.export('foo.csv', f='csv')
if __name__ == "__main__":
parse("test.pdf")
+5
View File
@@ -0,0 +1,5 @@
camelot-py[base]
opencv-python
pypdf2==2.12.1
matplotlib
#PyQt5
BIN
View File
Binary file not shown.
Executable
+6
View File
@@ -0,0 +1,6 @@
#!/bin/sh
python -m venv ./venv
source ./venv/bin/activate
pip install --upgrade pip