Prototype for datasheet data extraction

This commit is contained in:
Cole Deck 2024-01-02 16:03:49 -06:00
parent 40d833901e
commit ec1d1be2a5
7 changed files with 68 additions and 1 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
venv

View File

@ -1,3 +1,21 @@
# jukebox-software
This repository is for all of the software used in the Jukebox project.
To setup and run:
run `install-deps.sh` to prepare the python venv and install packages:
chmod +x ./install-deps.sh && ./install-deps.sh
then, you need to source the venv. Run
source venv/bin/activate
or
source venv/bin/activate.fish
if you use fish shell.
Then you are good to go to execute any python file.

9
install-deps.sh Executable file
View File

@ -0,0 +1,9 @@
#!/bin/sh
if ! [ -d "venv" ]; then
./venv-setup.sh
fi
source ./venv/bin/activate
pip3 install -r requirements.txt

28
read-datasheet.py Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python3
# Parse Belden catalog techdata datasheets
import camelot
import numpy as np
def parse(filename):
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
print("Total tables extracted:", tables.n)
n = 0
for table in tables:
table.df.replace('', np.nan, inplace=True)
table.df.dropna(inplace=True, how="all")
table.df.dropna(inplace=True, axis="columns", how="all")
table.df.replace(np.nan, '', inplace=True)
if not table.df.empty:
table.to_html("table" + str(n) + ".html")
print("\nTable " + str(n))
print(table.df)
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
n=n+1
camelot.plot(tables[0], kind='contour').savefig("test.png")
#tables.export('foo.csv', f='csv')
if __name__ == "__main__":
parse("test.pdf")

5
requirements.txt Normal file
View File

@ -0,0 +1,5 @@
camelot-py[base]
opencv-python
pypdf2==2.12.1
matplotlib
#PyQt5

BIN
test.pdf Normal file

Binary file not shown.

6
venv-setup.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/sh
python -m venv ./venv
source ./venv/bin/activate
pip install --upgrade pip