diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f5e96db --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv \ No newline at end of file diff --git a/README.md b/README.md index c590148..39ce483 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,21 @@ # jukebox-software -This repository is for all of the software used in the Jukebox project. \ No newline at end of file +This repository is for all of the software used in the Jukebox project. + +To setup and run: + +run `install-deps.sh` to prepare the python venv and install packages: + + chmod +x ./install-deps.sh && ./install-deps.sh + +then, you need to source the venv. Run + + source venv/bin/activate + +or + + source venv/bin/activate.fish + +if you use fish shell. + +Then you are good to go to execute any python file. \ No newline at end of file diff --git a/install-deps.sh b/install-deps.sh new file mode 100755 index 0000000..65bc6cc --- /dev/null +++ b/install-deps.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +if ! [ -d "venv" ]; then + ./venv-setup.sh +fi + +source ./venv/bin/activate + +pip3 install -r requirements.txt diff --git a/read-datasheet.py b/read-datasheet.py new file mode 100755 index 0000000..0a096b9 --- /dev/null +++ b/read-datasheet.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +# Parse Belden catalog techdata datasheets + +import camelot +import numpy as np + +def parse(filename): + tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't']) + print("Total tables extracted:", tables.n) + n = 0 + + for table in tables: + table.df.replace('', np.nan, inplace=True) + table.df.dropna(inplace=True, how="all") + table.df.dropna(inplace=True, axis="columns", how="all") + table.df.replace(np.nan, '', inplace=True) + if not table.df.empty: + table.to_html("table" + str(n) + ".html") + print("\nTable " + str(n)) + print(table.df) + #camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png") + n=n+1 + camelot.plot(tables[0], kind='contour').savefig("test.png") + #tables.export('foo.csv', f='csv') + +if __name__ == "__main__": + parse("test.pdf") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..511e0ce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +camelot-py[base] +opencv-python +pypdf2==2.12.1 +matplotlib +#PyQt5 \ No newline at end of file diff --git a/test.pdf b/test.pdf new file mode 100644 index 0000000..489e0e9 Binary files /dev/null and b/test.pdf differ diff --git a/venv-setup.sh b/venv-setup.sh new file mode 100755 index 0000000..ad297b7 --- /dev/null +++ b/venv-setup.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +python -m venv ./venv +source ./venv/bin/activate + +pip install --upgrade pip \ No newline at end of file