Add basic label generator app, add return values to parsing
This commit is contained in:
parent
5502a5069d
commit
992040e812
4
.gitignore
vendored
4
.gitignore
vendored
@ -16,3 +16,7 @@ output.mp4
|
||||
output.log
|
||||
# images
|
||||
*.png
|
||||
# Built app
|
||||
build
|
||||
# Generated label images
|
||||
labels
|
||||
|
75
get_specs.py
75
get_specs.py
@ -53,15 +53,15 @@ def query_search(partnum, source):
|
||||
a = json.loads(a)
|
||||
idx = -1
|
||||
name = ""
|
||||
for partid in range(len(a["results"])):
|
||||
for partid in range(len(a["results"])-1, -1, -1):
|
||||
name = a["results"][partid]["title"]
|
||||
if name != partnum:
|
||||
if name.find(partnum) >= 0:
|
||||
idx = partid
|
||||
break
|
||||
#break
|
||||
elif partnum.find(name) >= 0:
|
||||
idx = partid
|
||||
break
|
||||
#break
|
||||
|
||||
else:
|
||||
idx = partid
|
||||
@ -113,7 +113,7 @@ def query_search(partnum, source):
|
||||
r = requests.get(url=alphaurl)
|
||||
data = r.json()
|
||||
output = dict()
|
||||
print(data["Results"])
|
||||
#print(data["Results"])
|
||||
|
||||
try:
|
||||
if data["Count"] > 0:
|
||||
@ -136,9 +136,10 @@ def query_search(partnum, source):
|
||||
dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
|
||||
dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
|
||||
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
|
||||
output["partnum"] = partnum.replace("-", "").replace("/", "_")
|
||||
#"test".index()
|
||||
print(output)
|
||||
output["partnum"] = partnum.replace("/", "_") #.replace("-", "").replace("/", "_")
|
||||
#
|
||||
# "test".index()
|
||||
#print(output)
|
||||
return output
|
||||
|
||||
|
||||
@ -156,9 +157,10 @@ def touch(path):
|
||||
|
||||
|
||||
|
||||
def get_multi(partnums, delay=0.25):
|
||||
def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
|
||||
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
|
||||
|
||||
failed = list()
|
||||
actualpartnums = list()
|
||||
def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
|
||||
global bartext
|
||||
|
||||
@ -258,8 +260,9 @@ def get_multi(partnums, delay=0.25):
|
||||
fprint("Parsing Datasheet contents of " + partnum)
|
||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||
|
||||
read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
out = read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
bar(skipped=False)
|
||||
return out
|
||||
else:
|
||||
fprint("Datasheet already parsed for " + partnum)
|
||||
bar.text = "Datasheet already parsed for " + partnum + ".pdf"
|
||||
@ -271,21 +274,23 @@ def get_multi(partnums, delay=0.25):
|
||||
bar(skipped=False)
|
||||
fprint("Parsing Datasheet contents of " + partnum)
|
||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||
read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
out = read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
bar(skipped=False)
|
||||
return out
|
||||
|
||||
def run_search(partnum):
|
||||
oldpartnum = partnum
|
||||
if dstype == "Alphawire":
|
||||
# For alphawire, sanitize the part number for only the final result check, because their API is very wierd
|
||||
# For the actual search, it must be un-sanitized
|
||||
partnum = partnum.replace("-", "").replace("/","_")
|
||||
output_dir = "cables/" + partnum
|
||||
partnum = partnum.replace("/","_")
|
||||
output_dir = dir + partnum
|
||||
path = output_dir + "/datasheet.pdf"
|
||||
bartext = "Downloading files for part " + partnum
|
||||
bar.text = bartext
|
||||
partnum = oldpartnum
|
||||
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
|
||||
partnum = oldpartnum.replace("_","/")
|
||||
returnval = [partnum, dstype, False, False]
|
||||
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1) or not cache:
|
||||
# Use query
|
||||
search_result = query_search(partnum, dstype)
|
||||
# Try to use belden.com search
|
||||
@ -293,41 +298,49 @@ def get_multi(partnums, delay=0.25):
|
||||
# Download high resolution part image if available and needed
|
||||
#oldpartnum = partnum
|
||||
partnum = search_result["partnum"]
|
||||
output_dir = "cables/" + partnum
|
||||
returnval = [partnum, dstype, False, False]
|
||||
output_dir = dir + partnum
|
||||
path = output_dir + "/datasheet.pdf"
|
||||
bartext = "Downloading files for part " + partnum
|
||||
bar.text = bartext
|
||||
|
||||
if not os.path.exists(output_dir + "/found_part_hires"):
|
||||
if not os.path.exists(output_dir + "/found_part_hires") or not cache:
|
||||
if _download_image(search_result["image"], output_dir):
|
||||
fprint("Downloaded hi-res part image for " + partnum)
|
||||
returnval = [partnum, dstype, True, False]
|
||||
touch(output_dir + "/found_part_hires")
|
||||
else:
|
||||
fprint("Using cached hi-res part image for " + partnum)
|
||||
|
||||
# Download datasheet from provided URL if needed
|
||||
if os.path.exists(path) and os.path.getsize(path) > 1:
|
||||
__use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
if os.path.exists(path) and os.path.getsize(path) > 1 and cache:
|
||||
out = __use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
returnval = [partnum, dstype, True, out]
|
||||
|
||||
elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
|
||||
__downloaded_datasheet(partnum, path, output_dir, dstype)
|
||||
out = __downloaded_datasheet(partnum, path, output_dir, dstype)
|
||||
returnval = [partnum, dstype, True, out]
|
||||
|
||||
elif os.path.exists(path) and os.path.getsize(path) > 1:
|
||||
__use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
elif os.path.exists(path) and os.path.getsize(path) > 1 and cache:
|
||||
out = __use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
returnval = [partnum, dstype, True, out]
|
||||
|
||||
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
|
||||
elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
|
||||
__downloaded_datasheet(partnum, path, output_dir, dstype)
|
||||
out = __downloaded_datasheet(partnum, path, output_dir, dstype)
|
||||
returnval = [partnum, dstype, False, out]
|
||||
|
||||
# Failed to download with search or guess :(
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
actualpartnums.append(returnval)
|
||||
return returnval
|
||||
|
||||
# We already have a hi-res image and the datasheet - perfect!
|
||||
else:
|
||||
fprint("Using cached hi-res part image for " + partnum)
|
||||
__use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
out = __use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
returnval = [partnum, dstype, False, out]
|
||||
return True
|
||||
|
||||
for fullpartnum in partnums:
|
||||
@ -358,7 +371,7 @@ def get_multi(partnums, delay=0.25):
|
||||
if not success:
|
||||
fprint("Failed to download datasheet for part " + partnum)
|
||||
bar.text = "Failed to download datasheet for part " + partnum
|
||||
failed.append(partnum)
|
||||
failed.append((partnum, dstype))
|
||||
bar(skipped=True)
|
||||
bar(skipped=True)
|
||||
time.sleep(delay)
|
||||
@ -366,10 +379,10 @@ def get_multi(partnums, delay=0.25):
|
||||
if len(failed) > 0:
|
||||
fprint("Failed to download:")
|
||||
for partnum in failed:
|
||||
fprint(partnum)
|
||||
return False # Go to manual review upload page
|
||||
fprint(partnum[1] + " " + partnum[0])
|
||||
return False, actualpartnums # Go to manual review upload page
|
||||
else:
|
||||
return True # All cables downloaded; we are good to go
|
||||
return True, actualpartnums # All cables downloaded; we are good to go
|
||||
|
||||
|
||||
|
||||
@ -440,8 +453,8 @@ if __name__ == "__main__":
|
||||
"BLC6D1100007"
|
||||
|
||||
]
|
||||
#print(query_search("FIT-221-1/4", "Alphawire"))
|
||||
get_multi(partnums, 0.25)
|
||||
print(query_search("74002", "Belden"))
|
||||
#get_multi(partnums, 0.25)
|
||||
#query_search("10GXS13", "Belden")
|
||||
|
||||
|
||||
|
BIN
gs10030w64.exe
Normal file
BIN
gs10030w64.exe
Normal file
Binary file not shown.
100
label_generator.py
Executable file
100
label_generator.py
Executable file
@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
|
||||
from get_specs import get_multi
|
||||
import sys
|
||||
import uuid
|
||||
import os
|
||||
import signal
|
||||
from PIL import Image
|
||||
from label_image import generate_code
|
||||
|
||||
|
||||
def input_cable():
|
||||
print("")
|
||||
print("Use the full part number. Spaces, special characters are allowed. Do not specify the brand.")
|
||||
print("")
|
||||
print("Please enter a part number and press enter:")
|
||||
inputnum = input("").strip()
|
||||
if len(inputnum) < 2:
|
||||
killall_signal(0, 0)
|
||||
print("Input part number:", inputnum)
|
||||
print("Searching databases for cables...")
|
||||
# Search both AW and BL sites
|
||||
status, output = get_multi(["BL"+inputnum, "AW"+inputnum], delay=0.1, dir="temp/" + str(uuid.uuid4()) + "/", cache=False)
|
||||
print("")
|
||||
if len(output) > 1:
|
||||
for i in output:
|
||||
print(i[1], i[0])
|
||||
print("Multiple brands with the same part number! Please type \"b\" for the Belden part number or \"a\" for the Alphawire cable")
|
||||
inputbrand = input()
|
||||
if inputbrand == "b":
|
||||
output = [output[0]]
|
||||
elif inputbrand == "a":
|
||||
output = [output[1]]
|
||||
elif len(output) == 0:
|
||||
print("No results found for part number", inputnum + ". Please try again with a different part number.")
|
||||
return
|
||||
|
||||
output = output[0]
|
||||
print("")
|
||||
if output[2] and output[3]:
|
||||
print("Cable result found -",output[1], output[0], "with high-quality image and full specs")
|
||||
elif output[2]:
|
||||
print("Cable result found -",output[1], output[0], "with high-quality image and no specs")
|
||||
elif output[3]:
|
||||
print("Cable result found -",output[1], output[0], "with no/low quality image and full specs")
|
||||
else:
|
||||
print("Cable result found -",output[1], output[0], "with no/low quality image and no specs")
|
||||
print("")
|
||||
if not output[3]:
|
||||
print("Unable to decode cable specs. Please try again with a different part number.")
|
||||
return False
|
||||
else:
|
||||
print("")
|
||||
print("*** Cable details confirmed. Creating label...")
|
||||
print("")
|
||||
img = None
|
||||
imgstr = ""
|
||||
if output[1] == "Belden":
|
||||
imgstr = "BL"
|
||||
elif output[1] == "Alphawire":
|
||||
imgstr = "AW"
|
||||
img = generate_code(imgstr + output[0])
|
||||
os.makedirs("labels", exist_ok=True)
|
||||
img.save("labels/" + imgstr + output[0] + ".png")
|
||||
|
||||
def delete_folder(path):
|
||||
# Check if the path is a directory
|
||||
if not os.path.isdir(path):
|
||||
return
|
||||
|
||||
# List all files and directories in the path
|
||||
for filename in os.listdir(path):
|
||||
file_path = os.path.join(path, filename)
|
||||
# If it's a directory, recursively call this function
|
||||
if os.path.isdir(file_path):
|
||||
delete_folder(file_path)
|
||||
else:
|
||||
# If it's a file, remove it
|
||||
os.remove(file_path)
|
||||
|
||||
# After removing all contents, remove the directory itself
|
||||
os.rmdir(path)
|
||||
|
||||
def killall_signal(a,b):
|
||||
delete_folder("temp")
|
||||
os.kill(os.getpid(), 9) # dirty kill of self
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
signal.signal(signal.SIGINT, killall_signal)
|
||||
signal.signal(signal.SIGTERM, killall_signal)
|
||||
print("Welcome to the Jukebox cable utility. This tool will allow you to verify Belden & Alphawire cable part numbers and create labels for samples in the Jukebox.")
|
||||
print("This tool requires internet access to download cable specifications and verify part numbers.")
|
||||
#print("Use Ctrl+C to exit.")
|
||||
while True:
|
||||
delete_folder("temp")
|
||||
input_cable()
|
||||
|
238
label_image.py
Executable file
238
label_image.py
Executable file
@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
from util import fprint
|
||||
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
#import cv2
|
||||
import numpy as np
|
||||
#import math
|
||||
|
||||
|
||||
|
||||
# Copied from http://en.wikipedia.org/wiki/Code_128
|
||||
# Value Weights 128A 128B 128C
|
||||
CODE128_CHART = """
|
||||
0 212222 space space 00
|
||||
1 222122 ! ! 01
|
||||
2 222221 " " 02
|
||||
3 121223 # # 03
|
||||
4 121322 $ $ 04
|
||||
5 131222 % % 05
|
||||
6 122213 & & 06
|
||||
7 122312 ' ' 07
|
||||
8 132212 ( ( 08
|
||||
9 221213 ) ) 09
|
||||
10 221312 * * 10
|
||||
11 231212 + + 11
|
||||
12 112232 , , 12
|
||||
13 122132 - - 13
|
||||
14 122231 . . 14
|
||||
15 113222 / / 15
|
||||
16 123122 0 0 16
|
||||
17 123221 1 1 17
|
||||
18 223211 2 2 18
|
||||
19 221132 3 3 19
|
||||
20 221231 4 4 20
|
||||
21 213212 5 5 21
|
||||
22 223112 6 6 22
|
||||
23 312131 7 7 23
|
||||
24 311222 8 8 24
|
||||
25 321122 9 9 25
|
||||
26 321221 : : 26
|
||||
27 312212 ; ; 27
|
||||
28 322112 < < 28
|
||||
29 322211 = = 29
|
||||
30 212123 > > 30
|
||||
31 212321 ? ? 31
|
||||
32 232121 @ @ 32
|
||||
33 111323 A A 33
|
||||
34 131123 B B 34
|
||||
35 131321 C C 35
|
||||
36 112313 D D 36
|
||||
37 132113 E E 37
|
||||
38 132311 F F 38
|
||||
39 211313 G G 39
|
||||
40 231113 H H 40
|
||||
41 231311 I I 41
|
||||
42 112133 J J 42
|
||||
43 112331 K K 43
|
||||
44 132131 L L 44
|
||||
45 113123 M M 45
|
||||
46 113321 N N 46
|
||||
47 133121 O O 47
|
||||
48 313121 P P 48
|
||||
49 211331 Q Q 49
|
||||
50 231131 R R 50
|
||||
51 213113 S S 51
|
||||
52 213311 T T 52
|
||||
53 213131 U U 53
|
||||
54 311123 V V 54
|
||||
55 311321 W W 55
|
||||
56 331121 X X 56
|
||||
57 312113 Y Y 57
|
||||
58 312311 Z Z 58
|
||||
59 332111 [ [ 59
|
||||
60 314111 \ \ 60
|
||||
61 221411 ] ] 61
|
||||
62 431111 ^ ^ 62
|
||||
63 111224 _ _ 63
|
||||
64 111422 NUL ` 64
|
||||
65 121124 SOH a 65
|
||||
66 121421 STX b 66
|
||||
67 141122 ETX c 67
|
||||
68 141221 EOT d 68
|
||||
69 112214 ENQ e 69
|
||||
70 112412 ACK f 70
|
||||
71 122114 BEL g 71
|
||||
72 122411 BS h 72
|
||||
73 142112 HT i 73
|
||||
74 142211 LF j 74
|
||||
75 241211 VT k 75
|
||||
76 221114 FF l 76
|
||||
77 413111 CR m 77
|
||||
78 241112 SO n 78
|
||||
79 134111 SI o 79
|
||||
80 111242 DLE p 80
|
||||
81 121142 DC1 q 81
|
||||
82 121241 DC2 r 82
|
||||
83 114212 DC3 s 83
|
||||
84 124112 DC4 t 84
|
||||
85 124211 NAK u 85
|
||||
86 411212 SYN v 86
|
||||
87 421112 ETB w 87
|
||||
88 421211 CAN x 88
|
||||
89 212141 EM y 89
|
||||
90 214121 SUB z 90
|
||||
91 412121 ESC { 91
|
||||
92 111143 FS | 92
|
||||
93 111341 GS } 93
|
||||
94 131141 RS ~ 94
|
||||
95 114113 US DEL 95
|
||||
96 114311 FNC3 FNC3 96
|
||||
97 411113 FNC2 FNC2 97
|
||||
98 411311 ShiftB ShiftA 98
|
||||
99 113141 CodeC CodeC 99
|
||||
100 114131 CodeB FNC4 CodeB
|
||||
101 311141 FNC4 CodeA CodeA
|
||||
102 411131 FNC1 FNC1 FNC1
|
||||
103 211412 StartA StartA StartA
|
||||
104 211214 StartB StartB StartB
|
||||
105 211232 StartC StartC StartC
|
||||
106 2331112 Stop Stop Stop
|
||||
""".split()
|
||||
|
||||
VALUES = [int(value) for value in CODE128_CHART[0::5]]
|
||||
WEIGHTS = dict(zip(VALUES, CODE128_CHART[1::5]))
|
||||
CODE128A = dict(zip(CODE128_CHART[2::5], VALUES))
|
||||
CODE128B = dict(zip(CODE128_CHART[3::5], VALUES))
|
||||
CODE128C = dict(zip(CODE128_CHART[4::5], VALUES))
|
||||
|
||||
for charset in (CODE128A, CODE128B):
|
||||
charset[' '] = charset.pop('space')
|
||||
|
||||
|
||||
|
||||
def generate_code(data, show=False, check=False):
|
||||
|
||||
img = code128_image(data)
|
||||
if show:
|
||||
img.show()
|
||||
#img.show()
|
||||
#print(data)
|
||||
|
||||
if(check):
|
||||
from pyzbar.pyzbar import decode
|
||||
from pyzbar.pyzbar import ZBarSymbol
|
||||
print(decode(img, symbols=[ZBarSymbol.CODE128])[0].data.decode('ascii'))
|
||||
|
||||
#if(decode(img, symbols=[ZBarSymbol.CODE128])[0].data.decode('ascii') == data):
|
||||
# return True
|
||||
#else:
|
||||
# return False
|
||||
return img
|
||||
|
||||
def code128_format(data):
|
||||
"""
|
||||
Generate an optimal barcode from ASCII text
|
||||
"""
|
||||
text = str(data)
|
||||
pos = 0
|
||||
length = len(text)
|
||||
|
||||
# Start Code
|
||||
if text[:2].isdigit():
|
||||
charset = CODE128C
|
||||
codes = [charset['StartC']]
|
||||
else:
|
||||
charset = CODE128B
|
||||
codes = [charset['StartB']]
|
||||
|
||||
# Data
|
||||
while pos < length:
|
||||
if charset is CODE128C:
|
||||
if text[pos:pos+2].isdigit() and length - pos > 1:
|
||||
# Encode Code C two characters at a time
|
||||
codes.append(int(text[pos:pos+2]))
|
||||
pos += 2
|
||||
else:
|
||||
# Switch to Code B
|
||||
codes.append(charset['CodeB'])
|
||||
charset = CODE128B
|
||||
elif text[pos:pos+4].isdigit() and length - pos >= 4:
|
||||
# Switch to Code C
|
||||
codes.append(charset['CodeC'])
|
||||
charset = CODE128C
|
||||
else:
|
||||
# Encode Code B one character at a time
|
||||
codes.append(charset[text[pos]])
|
||||
pos += 1
|
||||
|
||||
# Checksum
|
||||
checksum = 0
|
||||
for weight, code in enumerate(codes):
|
||||
checksum += max(weight, 1) * code
|
||||
codes.append(checksum % 103)
|
||||
|
||||
# Stop Code
|
||||
codes.append(charset['Stop'])
|
||||
return codes
|
||||
|
||||
def code128_image(data, height=100, thickness=3, quiet_zone=False):
|
||||
if not data[-1] == CODE128B['Stop']:
|
||||
data = code128_format(data)
|
||||
|
||||
|
||||
barcode_widths = []
|
||||
for code in data:
|
||||
for weight in WEIGHTS[code]:
|
||||
barcode_widths.append(int(weight) * thickness)
|
||||
width = sum(barcode_widths)
|
||||
x = 0
|
||||
|
||||
|
||||
if quiet_zone:
|
||||
width += 20 * thickness
|
||||
x = 10 * thickness
|
||||
|
||||
# Monochrome Image
|
||||
img = Image.new('RGB', (int(width * 10), int(width * 10)), 'white')
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw_bar = True
|
||||
for bwidth in barcode_widths:
|
||||
bwidth *= 4
|
||||
if draw_bar:
|
||||
draw.rectangle(((x + int(width * 3), width*6.25), (x + int(width * 3) + bwidth - 1, width*7)), fill='black')
|
||||
draw_bar = not draw_bar
|
||||
x += bwidth
|
||||
|
||||
#draw.arc(((width - width/5, width - width/5), (width*9 + width/5, width*9 + width/5)),0,360,fill='blue', width = int(width/8))
|
||||
draw.arc(((width+int(width / 1.4), width+int(width / 1.4)), (width*9-int(width / 1.4), width*9-int(width / 1.4))),0,360,fill='blue', width = int(width/8))
|
||||
return img
|
||||
|
||||
if __name__ == "__main__":
|
||||
#print(generate_code("BL10GXS13"))
|
||||
#print(generate_code("BL10GXgd35j35S13"))
|
||||
#print(generate_code("BL10GX54hS13"))
|
||||
print(generate_code("BL10Gj34qXS13", False, False))
|
||||
#print(generate_code("BL104w5545dp7bfwp43643534/4563G-XS13"))
|
||||
#adjust_image(cv2.imread('test_skew.jpg'))
|
@ -1,9 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Parse Belden catalog techdata datasheets
|
||||
# Parse Belden (100%) & Alphawire (75%) catalog techdata datasheets
|
||||
|
||||
import pandas as pd
|
||||
pd.set_option('future.no_silent_downcasting', True)
|
||||
from PyPDF2 import PdfReader
|
||||
import camelot
|
||||
import numpy as np
|
||||
@ -13,12 +12,25 @@ import json
|
||||
from util import fprint
|
||||
import uuid
|
||||
from util import run_cmd
|
||||
from util import win32
|
||||
import os
|
||||
import glob
|
||||
import sys
|
||||
|
||||
def touch(path):
|
||||
with open(path, 'a'):
|
||||
os.utime(path, None)
|
||||
|
||||
def find_data_file(filename):
|
||||
if getattr(sys, "frozen", False):
|
||||
# The application is frozen
|
||||
datadir = os.path.dirname(sys.executable)
|
||||
else:
|
||||
# The application is not frozen
|
||||
# Change this bit to match where you store your data files:
|
||||
datadir = os.path.dirname(__file__)
|
||||
return os.path.join(datadir, filename)
|
||||
|
||||
def extract_table_name(table_start, searchpage, reader, dstype, fallbackname):
|
||||
if dstype == "Belden":
|
||||
ymin = table_start
|
||||
@ -41,12 +53,25 @@ def extract_table_name(table_start, searchpage, reader, dstype, fallbackname):
|
||||
#fprint(text_body)
|
||||
|
||||
def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
tables = []
|
||||
# Extract table data
|
||||
if dstype == "Belden":
|
||||
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
|
||||
elif dstype == "Alphawire":
|
||||
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=50, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': True, 'char_margin': 0.5}, shift_text=['l', 't'])
|
||||
try:
|
||||
if dstype == "Belden":
|
||||
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="ghostscript", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
|
||||
elif dstype == "Alphawire":
|
||||
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="ghostscript", split_text=False, line_scale=50, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': True, 'char_margin': 0.5}, shift_text=['l', 't'])
|
||||
except OSError as e:
|
||||
print(e)
|
||||
if win32:
|
||||
print("Ghostscript is not installed! Launching installer...")
|
||||
#subprocess.run([r".\\gs10030w64.exe"])
|
||||
os.system(r'''Powershell -Command "& { Start-Process \"''' + find_data_file("gs10030w64.exe") + r'''\" -Verb RunAs } " ''')
|
||||
# Will return once file launched...
|
||||
print("Once the install is completed, try again.")
|
||||
return False
|
||||
else:
|
||||
print("Ghostscript is not installed. You can install it with e.g. apt install ghostscript for Debian-based systems.")
|
||||
return False
|
||||
#fprint("Total tables extracted:", tables.n)
|
||||
n = 0
|
||||
#pagenum = 0
|
||||
@ -54,13 +79,14 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
page = reader.pages[0]
|
||||
table_list = {}
|
||||
table_list_raw = {}
|
||||
|
||||
pd.set_option('future.no_silent_downcasting', True)
|
||||
for table in tables:
|
||||
#with pd.options.context("future.no_silent_downcasting", True):
|
||||
table.df.infer_objects(copy=False)
|
||||
table.df.replace('', np.nan, inplace=True)
|
||||
table.df = table.df.replace('', np.nan).infer_objects(copy=False)
|
||||
table.df.dropna(inplace=True, how="all")
|
||||
table.df.dropna(inplace=True, axis="columns", how="all")
|
||||
table.df.replace(np.nan, '', inplace=True)
|
||||
table.df = table.df.replace(np.nan, '').infer_objects(copy=False)
|
||||
|
||||
if not table.df.empty:
|
||||
#fprint("\nTable " + str(n))
|
||||
@ -281,7 +307,12 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
#print(output_table)
|
||||
|
||||
run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
|
||||
#run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
|
||||
pattern = os.path.join(output_dir, '*.json')
|
||||
json_files = glob.glob(pattern)
|
||||
for file_path in json_files:
|
||||
os.remove(file_path)
|
||||
#print(f"Deleted {file_path}")
|
||||
with open(output_dir + "/search_" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
|
||||
json.dump(output_table["searchspecs"], json_file)
|
||||
with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file:
|
||||
@ -289,7 +320,7 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
#print(json.dumps(output_table, indent=2))
|
||||
touch(output_dir + "/parsed") # mark as parsed
|
||||
return output_table
|
||||
return True
|
||||
|
||||
|
||||
def flatten(tables):
|
||||
@ -338,4 +369,4 @@ def flatten(tables):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parse("cables/3050/datasheet.pdf", "cables/3050", "3050", "Alphawire")
|
||||
print(parse("cables/3050/datasheet.pdf", "cables/3050", "3050", "Alphawire"))
|
@ -1,5 +1,5 @@
|
||||
# Runtime
|
||||
camelot-py[base]
|
||||
camelot-py
|
||||
opencv-python
|
||||
pypdf2==2.12.1
|
||||
alive-progress
|
||||
@ -15,6 +15,13 @@ websockets
|
||||
numpy
|
||||
scipy
|
||||
ipywidgets
|
||||
pandas
|
||||
pyarrow
|
||||
ghostscript
|
||||
pyzbar
|
||||
|
||||
|
||||
# Development
|
||||
matplotlib
|
||||
#cx_Freeze # uncomment if building label generator app
|
||||
# requires windows 10 SDK, visual C++, etc
|
31
setup-label-generator.py
Normal file
31
setup-label-generator.py
Normal file
@ -0,0 +1,31 @@
|
||||
import sys
|
||||
from cx_Freeze import setup, Executable
|
||||
|
||||
debug = True
|
||||
debug = not debug
|
||||
# Dependencies are automatically detected, but it might need fine tuning.
|
||||
# "packages": ["os"] is used as example only
|
||||
|
||||
import opcode
|
||||
import os
|
||||
import distutils
|
||||
#distutils_path = os.path.join(os.path.dirname(opcode.__file__), 'distutils')
|
||||
build_exe_options = {"include_msvcr": True, "packages": ["camelot", "setuptools"], "optimize": 0, "silent": True, "include_files": ["gs10030w64.exe"], "excludes": ["scipy", "torch"]}
|
||||
|
||||
# base="Win32GUI" should be used only for Windows GUI app
|
||||
base = "console"
|
||||
#if sys.platform == "win32" and not debug:
|
||||
# base = "Win32GUI"
|
||||
|
||||
if sys.platform == "linux" or sys.platform == "linux2" or sys.platform == "darwin":
|
||||
name = "jukebox-labelgen"
|
||||
else:
|
||||
name = "jukebox-labelgen.exe"
|
||||
|
||||
setup(
|
||||
name="IP Pigeon",
|
||||
version="0.2.4",
|
||||
description="IP Pigeon client application",
|
||||
options={"build_exe": build_exe_options},
|
||||
executables=[Executable("label_generator.py", base=base, uac_admin=False, target_name=name)],
|
||||
)
|
Loading…
x
Reference in New Issue
Block a user