Prototype for datasheet data extraction
This commit is contained in:
parent
40d833901e
commit
ec1d1be2a5
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
venv
|
20
README.md
20
README.md
@ -1,3 +1,21 @@
|
|||||||
# jukebox-software
|
# jukebox-software
|
||||||
|
|
||||||
This repository is for all of the software used in the Jukebox project.
|
This repository is for all of the software used in the Jukebox project.
|
||||||
|
|
||||||
|
To setup and run:
|
||||||
|
|
||||||
|
run `install-deps.sh` to prepare the python venv and install packages:
|
||||||
|
|
||||||
|
chmod +x ./install-deps.sh && ./install-deps.sh
|
||||||
|
|
||||||
|
then, you need to source the venv. Run
|
||||||
|
|
||||||
|
source venv/bin/activate
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
source venv/bin/activate.fish
|
||||||
|
|
||||||
|
if you use fish shell.
|
||||||
|
|
||||||
|
Then you are good to go to execute any python file.
|
9
install-deps.sh
Executable file
9
install-deps.sh
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
if ! [ -d "venv" ]; then
|
||||||
|
./venv-setup.sh
|
||||||
|
fi
|
||||||
|
|
||||||
|
source ./venv/bin/activate
|
||||||
|
|
||||||
|
pip3 install -r requirements.txt
|
28
read-datasheet.py
Executable file
28
read-datasheet.py
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Parse Belden catalog techdata datasheets
|
||||||
|
|
||||||
|
import camelot
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def parse(filename):
|
||||||
|
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
|
||||||
|
print("Total tables extracted:", tables.n)
|
||||||
|
n = 0
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
table.df.replace('', np.nan, inplace=True)
|
||||||
|
table.df.dropna(inplace=True, how="all")
|
||||||
|
table.df.dropna(inplace=True, axis="columns", how="all")
|
||||||
|
table.df.replace(np.nan, '', inplace=True)
|
||||||
|
if not table.df.empty:
|
||||||
|
table.to_html("table" + str(n) + ".html")
|
||||||
|
print("\nTable " + str(n))
|
||||||
|
print(table.df)
|
||||||
|
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
|
||||||
|
n=n+1
|
||||||
|
camelot.plot(tables[0], kind='contour').savefig("test.png")
|
||||||
|
#tables.export('foo.csv', f='csv')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parse("test.pdf")
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
camelot-py[base]
|
||||||
|
opencv-python
|
||||||
|
pypdf2==2.12.1
|
||||||
|
matplotlib
|
||||||
|
#PyQt5
|
6
venv-setup.sh
Executable file
6
venv-setup.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
python -m venv ./venv
|
||||||
|
source ./venv/bin/activate
|
||||||
|
|
||||||
|
pip install --upgrade pip
|
Loading…
x
Reference in New Issue
Block a user