Prototype for datasheet data extraction
This commit is contained in:
parent
40d833901e
commit
ec1d1be2a5
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
venv
|
20
README.md
20
README.md
@ -1,3 +1,21 @@
|
||||
# jukebox-software
|
||||
|
||||
This repository is for all of the software used in the Jukebox project.
|
||||
This repository is for all of the software used in the Jukebox project.
|
||||
|
||||
To setup and run:
|
||||
|
||||
run `install-deps.sh` to prepare the python venv and install packages:
|
||||
|
||||
chmod +x ./install-deps.sh && ./install-deps.sh
|
||||
|
||||
then, you need to source the venv. Run
|
||||
|
||||
source venv/bin/activate
|
||||
|
||||
or
|
||||
|
||||
source venv/bin/activate.fish
|
||||
|
||||
if you use fish shell.
|
||||
|
||||
Then you are good to go to execute any python file.
|
9
install-deps.sh
Executable file
9
install-deps.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/sh
|
||||
|
||||
if ! [ -d "venv" ]; then
|
||||
./venv-setup.sh
|
||||
fi
|
||||
|
||||
source ./venv/bin/activate
|
||||
|
||||
pip3 install -r requirements.txt
|
28
read-datasheet.py
Executable file
28
read-datasheet.py
Executable file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Parse Belden catalog techdata datasheets
|
||||
|
||||
import camelot
|
||||
import numpy as np
|
||||
|
||||
def parse(filename):
|
||||
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
|
||||
print("Total tables extracted:", tables.n)
|
||||
n = 0
|
||||
|
||||
for table in tables:
|
||||
table.df.replace('', np.nan, inplace=True)
|
||||
table.df.dropna(inplace=True, how="all")
|
||||
table.df.dropna(inplace=True, axis="columns", how="all")
|
||||
table.df.replace(np.nan, '', inplace=True)
|
||||
if not table.df.empty:
|
||||
table.to_html("table" + str(n) + ".html")
|
||||
print("\nTable " + str(n))
|
||||
print(table.df)
|
||||
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
|
||||
n=n+1
|
||||
camelot.plot(tables[0], kind='contour').savefig("test.png")
|
||||
#tables.export('foo.csv', f='csv')
|
||||
|
||||
if __name__ == "__main__":
|
||||
parse("test.pdf")
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
camelot-py[base]
|
||||
opencv-python
|
||||
pypdf2==2.12.1
|
||||
matplotlib
|
||||
#PyQt5
|
6
venv-setup.sh
Executable file
6
venv-setup.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
python -m venv ./venv
|
||||
source ./venv/bin/activate
|
||||
|
||||
pip install --upgrade pip
|
Loading…
x
Reference in New Issue
Block a user