Add basic label generator app, add return values to parsing

This commit is contained in:
Cole Deck 2024-03-15 20:31:37 -05:00
parent 5502a5069d
commit 992040e812
9 changed files with 470 additions and 46 deletions

4
.gitignore vendored
View File

@ -16,3 +16,7 @@ output.mp4
output.log
# images
*.png
# Built app
build
# Generated label images
labels

View File

@ -53,15 +53,15 @@ def query_search(partnum, source):
a = json.loads(a)
idx = -1
name = ""
for partid in range(len(a["results"])):
for partid in range(len(a["results"])-1, -1, -1):
name = a["results"][partid]["title"]
if name != partnum:
if name.find(partnum) >= 0:
idx = partid
break
#break
elif partnum.find(name) >= 0:
idx = partid
break
#break
else:
idx = partid
@ -113,7 +113,7 @@ def query_search(partnum, source):
r = requests.get(url=alphaurl)
data = r.json()
output = dict()
print(data["Results"])
#print(data["Results"])
try:
if data["Count"] > 0:
@ -136,9 +136,10 @@ def query_search(partnum, source):
dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
output["partnum"] = partnum.replace("-", "").replace("/", "_")
#"test".index()
print(output)
output["partnum"] = partnum.replace("/", "_") #.replace("-", "").replace("/", "_")
#
# "test".index()
#print(output)
return output
@ -156,9 +157,10 @@ def touch(path):
def get_multi(partnums, delay=0.25):
def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
failed = list()
actualpartnums = list()
def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
global bartext
@ -258,8 +260,9 @@ def get_multi(partnums, delay=0.25):
fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir, partnum, dstype)
out = read_datasheet.parse(path, output_dir, partnum, dstype)
bar(skipped=False)
return out
else:
fprint("Datasheet already parsed for " + partnum)
bar.text = "Datasheet already parsed for " + partnum + ".pdf"
@ -271,21 +274,23 @@ def get_multi(partnums, delay=0.25):
bar(skipped=False)
fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir, partnum, dstype)
out = read_datasheet.parse(path, output_dir, partnum, dstype)
bar(skipped=False)
return out
def run_search(partnum):
oldpartnum = partnum
if dstype == "Alphawire":
# For alphawire, sanitize the part number for only the final result check, because their API is very wierd
# For the actual search, it must be un-sanitized
partnum = partnum.replace("-", "").replace("/","_")
output_dir = "cables/" + partnum
partnum = partnum.replace("/","_")
output_dir = dir + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
bar.text = bartext
partnum = oldpartnum
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
partnum = oldpartnum.replace("_","/")
returnval = [partnum, dstype, False, False]
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1) or not cache:
# Use query
search_result = query_search(partnum, dstype)
# Try to use belden.com search
@ -293,41 +298,49 @@ def get_multi(partnums, delay=0.25):
# Download high resolution part image if available and needed
#oldpartnum = partnum
partnum = search_result["partnum"]
output_dir = "cables/" + partnum
returnval = [partnum, dstype, False, False]
output_dir = dir + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
bar.text = bartext
if not os.path.exists(output_dir + "/found_part_hires"):
if not os.path.exists(output_dir + "/found_part_hires") or not cache:
if _download_image(search_result["image"], output_dir):
fprint("Downloaded hi-res part image for " + partnum)
returnval = [partnum, dstype, True, False]
touch(output_dir + "/found_part_hires")
else:
fprint("Using cached hi-res part image for " + partnum)
# Download datasheet from provided URL if needed
if os.path.exists(path) and os.path.getsize(path) > 1:
__use_cached_datasheet(partnum, path, output_dir, dstype)
if os.path.exists(path) and os.path.getsize(path) > 1 and cache:
out = __use_cached_datasheet(partnum, path, output_dir, dstype)
returnval = [partnum, dstype, True, out]
elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
__downloaded_datasheet(partnum, path, output_dir, dstype)
out = __downloaded_datasheet(partnum, path, output_dir, dstype)
returnval = [partnum, dstype, True, out]
elif os.path.exists(path) and os.path.getsize(path) > 1:
__use_cached_datasheet(partnum, path, output_dir, dstype)
elif os.path.exists(path) and os.path.getsize(path) > 1 and cache:
out = __use_cached_datasheet(partnum, path, output_dir, dstype)
returnval = [partnum, dstype, True, out]
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
__downloaded_datasheet(partnum, path, output_dir, dstype)
out = __downloaded_datasheet(partnum, path, output_dir, dstype)
returnval = [partnum, dstype, False, out]
# Failed to download with search or guess :(
else:
return False
return True
actualpartnums.append(returnval)
return returnval
# We already have a hi-res image and the datasheet - perfect!
else:
fprint("Using cached hi-res part image for " + partnum)
__use_cached_datasheet(partnum, path, output_dir, dstype)
out = __use_cached_datasheet(partnum, path, output_dir, dstype)
returnval = [partnum, dstype, False, out]
return True
for fullpartnum in partnums:
@ -358,7 +371,7 @@ def get_multi(partnums, delay=0.25):
if not success:
fprint("Failed to download datasheet for part " + partnum)
bar.text = "Failed to download datasheet for part " + partnum
failed.append(partnum)
failed.append((partnum, dstype))
bar(skipped=True)
bar(skipped=True)
time.sleep(delay)
@ -366,10 +379,10 @@ def get_multi(partnums, delay=0.25):
if len(failed) > 0:
fprint("Failed to download:")
for partnum in failed:
fprint(partnum)
return False # Go to manual review upload page
fprint(partnum[1] + " " + partnum[0])
return False, actualpartnums # Go to manual review upload page
else:
return True # All cables downloaded; we are good to go
return True, actualpartnums # All cables downloaded; we are good to go
@ -440,8 +453,8 @@ if __name__ == "__main__":
"BLC6D1100007"
]
#print(query_search("FIT-221-1/4", "Alphawire"))
get_multi(partnums, 0.25)
print(query_search("74002", "Belden"))
#get_multi(partnums, 0.25)
#query_search("10GXS13", "Belden")

BIN
gs10030w64.exe Normal file

Binary file not shown.

100
label_generator.py Executable file
View File

@ -0,0 +1,100 @@
#!/usr/bin/env python3
from get_specs import get_multi
import sys
import uuid
import os
import signal
from PIL import Image
from label_image import generate_code
def input_cable():
print("")
print("Use the full part number. Spaces, special characters are allowed. Do not specify the brand.")
print("")
print("Please enter a part number and press enter:")
inputnum = input("").strip()
if len(inputnum) < 2:
killall_signal(0, 0)
print("Input part number:", inputnum)
print("Searching databases for cables...")
# Search both AW and BL sites
status, output = get_multi(["BL"+inputnum, "AW"+inputnum], delay=0.1, dir="temp/" + str(uuid.uuid4()) + "/", cache=False)
print("")
if len(output) > 1:
for i in output:
print(i[1], i[0])
print("Multiple brands with the same part number! Please type \"b\" for the Belden part number or \"a\" for the Alphawire cable")
inputbrand = input()
if inputbrand == "b":
output = [output[0]]
elif inputbrand == "a":
output = [output[1]]
elif len(output) == 0:
print("No results found for part number", inputnum + ". Please try again with a different part number.")
return
output = output[0]
print("")
if output[2] and output[3]:
print("Cable result found -",output[1], output[0], "with high-quality image and full specs")
elif output[2]:
print("Cable result found -",output[1], output[0], "with high-quality image and no specs")
elif output[3]:
print("Cable result found -",output[1], output[0], "with no/low quality image and full specs")
else:
print("Cable result found -",output[1], output[0], "with no/low quality image and no specs")
print("")
if not output[3]:
print("Unable to decode cable specs. Please try again with a different part number.")
return False
else:
print("")
print("*** Cable details confirmed. Creating label...")
print("")
img = None
imgstr = ""
if output[1] == "Belden":
imgstr = "BL"
elif output[1] == "Alphawire":
imgstr = "AW"
img = generate_code(imgstr + output[0])
os.makedirs("labels", exist_ok=True)
img.save("labels/" + imgstr + output[0] + ".png")
def delete_folder(path):
# Check if the path is a directory
if not os.path.isdir(path):
return
# List all files and directories in the path
for filename in os.listdir(path):
file_path = os.path.join(path, filename)
# If it's a directory, recursively call this function
if os.path.isdir(file_path):
delete_folder(file_path)
else:
# If it's a file, remove it
os.remove(file_path)
# After removing all contents, remove the directory itself
os.rmdir(path)
def killall_signal(a,b):
delete_folder("temp")
os.kill(os.getpid(), 9) # dirty kill of self
if __name__ == "__main__":
signal.signal(signal.SIGINT, killall_signal)
signal.signal(signal.SIGTERM, killall_signal)
print("Welcome to the Jukebox cable utility. This tool will allow you to verify Belden & Alphawire cable part numbers and create labels for samples in the Jukebox.")
print("This tool requires internet access to download cable specifications and verify part numbers.")
#print("Use Ctrl+C to exit.")
while True:
delete_folder("temp")
input_cable()

238
label_image.py Executable file
View File

@ -0,0 +1,238 @@
#!/usr/bin/env python3
from util import fprint
from PIL import Image
from PIL import ImageDraw
#import cv2
import numpy as np
#import math
# Copied from http://en.wikipedia.org/wiki/Code_128
# Value Weights 128A 128B 128C
CODE128_CHART = """
0 212222 space space 00
1 222122 ! ! 01
2 222221 " " 02
3 121223 # # 03
4 121322 $ $ 04
5 131222 % % 05
6 122213 & & 06
7 122312 ' ' 07
8 132212 ( ( 08
9 221213 ) ) 09
10 221312 * * 10
11 231212 + + 11
12 112232 , , 12
13 122132 - - 13
14 122231 . . 14
15 113222 / / 15
16 123122 0 0 16
17 123221 1 1 17
18 223211 2 2 18
19 221132 3 3 19
20 221231 4 4 20
21 213212 5 5 21
22 223112 6 6 22
23 312131 7 7 23
24 311222 8 8 24
25 321122 9 9 25
26 321221 : : 26
27 312212 ; ; 27
28 322112 < < 28
29 322211 = = 29
30 212123 > > 30
31 212321 ? ? 31
32 232121 @ @ 32
33 111323 A A 33
34 131123 B B 34
35 131321 C C 35
36 112313 D D 36
37 132113 E E 37
38 132311 F F 38
39 211313 G G 39
40 231113 H H 40
41 231311 I I 41
42 112133 J J 42
43 112331 K K 43
44 132131 L L 44
45 113123 M M 45
46 113321 N N 46
47 133121 O O 47
48 313121 P P 48
49 211331 Q Q 49
50 231131 R R 50
51 213113 S S 51
52 213311 T T 52
53 213131 U U 53
54 311123 V V 54
55 311321 W W 55
56 331121 X X 56
57 312113 Y Y 57
58 312311 Z Z 58
59 332111 [ [ 59
60 314111 \ \ 60
61 221411 ] ] 61
62 431111 ^ ^ 62
63 111224 _ _ 63
64 111422 NUL ` 64
65 121124 SOH a 65
66 121421 STX b 66
67 141122 ETX c 67
68 141221 EOT d 68
69 112214 ENQ e 69
70 112412 ACK f 70
71 122114 BEL g 71
72 122411 BS h 72
73 142112 HT i 73
74 142211 LF j 74
75 241211 VT k 75
76 221114 FF l 76
77 413111 CR m 77
78 241112 SO n 78
79 134111 SI o 79
80 111242 DLE p 80
81 121142 DC1 q 81
82 121241 DC2 r 82
83 114212 DC3 s 83
84 124112 DC4 t 84
85 124211 NAK u 85
86 411212 SYN v 86
87 421112 ETB w 87
88 421211 CAN x 88
89 212141 EM y 89
90 214121 SUB z 90
91 412121 ESC { 91
92 111143 FS | 92
93 111341 GS } 93
94 131141 RS ~ 94
95 114113 US DEL 95
96 114311 FNC3 FNC3 96
97 411113 FNC2 FNC2 97
98 411311 ShiftB ShiftA 98
99 113141 CodeC CodeC 99
100 114131 CodeB FNC4 CodeB
101 311141 FNC4 CodeA CodeA
102 411131 FNC1 FNC1 FNC1
103 211412 StartA StartA StartA
104 211214 StartB StartB StartB
105 211232 StartC StartC StartC
106 2331112 Stop Stop Stop
""".split()
VALUES = [int(value) for value in CODE128_CHART[0::5]]
WEIGHTS = dict(zip(VALUES, CODE128_CHART[1::5]))
CODE128A = dict(zip(CODE128_CHART[2::5], VALUES))
CODE128B = dict(zip(CODE128_CHART[3::5], VALUES))
CODE128C = dict(zip(CODE128_CHART[4::5], VALUES))
for charset in (CODE128A, CODE128B):
charset[' '] = charset.pop('space')
def generate_code(data, show=False, check=False):
img = code128_image(data)
if show:
img.show()
#img.show()
#print(data)
if(check):
from pyzbar.pyzbar import decode
from pyzbar.pyzbar import ZBarSymbol
print(decode(img, symbols=[ZBarSymbol.CODE128])[0].data.decode('ascii'))
#if(decode(img, symbols=[ZBarSymbol.CODE128])[0].data.decode('ascii') == data):
# return True
#else:
# return False
return img
def code128_format(data):
"""
Generate an optimal barcode from ASCII text
"""
text = str(data)
pos = 0
length = len(text)
# Start Code
if text[:2].isdigit():
charset = CODE128C
codes = [charset['StartC']]
else:
charset = CODE128B
codes = [charset['StartB']]
# Data
while pos < length:
if charset is CODE128C:
if text[pos:pos+2].isdigit() and length - pos > 1:
# Encode Code C two characters at a time
codes.append(int(text[pos:pos+2]))
pos += 2
else:
# Switch to Code B
codes.append(charset['CodeB'])
charset = CODE128B
elif text[pos:pos+4].isdigit() and length - pos >= 4:
# Switch to Code C
codes.append(charset['CodeC'])
charset = CODE128C
else:
# Encode Code B one character at a time
codes.append(charset[text[pos]])
pos += 1
# Checksum
checksum = 0
for weight, code in enumerate(codes):
checksum += max(weight, 1) * code
codes.append(checksum % 103)
# Stop Code
codes.append(charset['Stop'])
return codes
def code128_image(data, height=100, thickness=3, quiet_zone=False):
if not data[-1] == CODE128B['Stop']:
data = code128_format(data)
barcode_widths = []
for code in data:
for weight in WEIGHTS[code]:
barcode_widths.append(int(weight) * thickness)
width = sum(barcode_widths)
x = 0
if quiet_zone:
width += 20 * thickness
x = 10 * thickness
# Monochrome Image
img = Image.new('RGB', (int(width * 10), int(width * 10)), 'white')
draw = ImageDraw.Draw(img)
draw_bar = True
for bwidth in barcode_widths:
bwidth *= 4
if draw_bar:
draw.rectangle(((x + int(width * 3), width*6.25), (x + int(width * 3) + bwidth - 1, width*7)), fill='black')
draw_bar = not draw_bar
x += bwidth
#draw.arc(((width - width/5, width - width/5), (width*9 + width/5, width*9 + width/5)),0,360,fill='blue', width = int(width/8))
draw.arc(((width+int(width / 1.4), width+int(width / 1.4)), (width*9-int(width / 1.4), width*9-int(width / 1.4))),0,360,fill='blue', width = int(width/8))
return img
if __name__ == "__main__":
#print(generate_code("BL10GXS13"))
#print(generate_code("BL10GXgd35j35S13"))
#print(generate_code("BL10GX54hS13"))
print(generate_code("BL10Gj34qXS13", False, False))
#print(generate_code("BL104w5545dp7bfwp43643534/4563G-XS13"))
#adjust_image(cv2.imread('test_skew.jpg'))

View File

@ -1,9 +1,8 @@
#!/usr/bin/env python3
# Parse Belden catalog techdata datasheets
# Parse Belden (100%) & Alphawire (75%) catalog techdata datasheets
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
from PyPDF2 import PdfReader
import camelot
import numpy as np
@ -13,12 +12,25 @@ import json
from util import fprint
import uuid
from util import run_cmd
from util import win32
import os
import glob
import sys
def touch(path):
with open(path, 'a'):
os.utime(path, None)
def find_data_file(filename):
if getattr(sys, "frozen", False):
# The application is frozen
datadir = os.path.dirname(sys.executable)
else:
# The application is not frozen
# Change this bit to match where you store your data files:
datadir = os.path.dirname(__file__)
return os.path.join(datadir, filename)
def extract_table_name(table_start, searchpage, reader, dstype, fallbackname):
if dstype == "Belden":
ymin = table_start
@ -41,12 +53,25 @@ def extract_table_name(table_start, searchpage, reader, dstype, fallbackname):
#fprint(text_body)
def parse(filename, output_dir, partnum, dstype):
tables = []
# Extract table data
if dstype == "Belden":
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
elif dstype == "Alphawire":
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=50, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': True, 'char_margin': 0.5}, shift_text=['l', 't'])
try:
if dstype == "Belden":
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="ghostscript", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't'])
elif dstype == "Alphawire":
tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="ghostscript", split_text=False, line_scale=50, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': True, 'char_margin': 0.5}, shift_text=['l', 't'])
except OSError as e:
print(e)
if win32:
print("Ghostscript is not installed! Launching installer...")
#subprocess.run([r".\\gs10030w64.exe"])
os.system(r'''Powershell -Command "& { Start-Process \"''' + find_data_file("gs10030w64.exe") + r'''\" -Verb RunAs } " ''')
# Will return once file launched...
print("Once the install is completed, try again.")
return False
else:
print("Ghostscript is not installed. You can install it with e.g. apt install ghostscript for Debian-based systems.")
return False
#fprint("Total tables extracted:", tables.n)
n = 0
#pagenum = 0
@ -54,13 +79,14 @@ def parse(filename, output_dir, partnum, dstype):
page = reader.pages[0]
table_list = {}
table_list_raw = {}
pd.set_option('future.no_silent_downcasting', True)
for table in tables:
#with pd.options.context("future.no_silent_downcasting", True):
table.df.infer_objects(copy=False)
table.df.replace('', np.nan, inplace=True)
table.df = table.df.replace('', np.nan).infer_objects(copy=False)
table.df.dropna(inplace=True, how="all")
table.df.dropna(inplace=True, axis="columns", how="all")
table.df.replace(np.nan, '', inplace=True)
table.df = table.df.replace(np.nan, '').infer_objects(copy=False)
if not table.df.empty:
#fprint("\nTable " + str(n))
@ -281,7 +307,12 @@ def parse(filename, output_dir, partnum, dstype):
#print(output_table)
run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
#run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
pattern = os.path.join(output_dir, '*.json')
json_files = glob.glob(pattern)
for file_path in json_files:
os.remove(file_path)
#print(f"Deleted {file_path}")
with open(output_dir + "/search_" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
json.dump(output_table["searchspecs"], json_file)
with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file:
@ -289,7 +320,7 @@ def parse(filename, output_dir, partnum, dstype):
#print(json.dumps(output_table, indent=2))
touch(output_dir + "/parsed") # mark as parsed
return output_table
return True
def flatten(tables):
@ -338,4 +369,4 @@ def flatten(tables):
if __name__ == "__main__":
parse("cables/3050/datasheet.pdf", "cables/3050", "3050", "Alphawire")
print(parse("cables/3050/datasheet.pdf", "cables/3050", "3050", "Alphawire"))

View File

@ -1,5 +1,5 @@
# Runtime
camelot-py[base]
camelot-py
opencv-python
pypdf2==2.12.1
alive-progress
@ -15,6 +15,13 @@ websockets
numpy
scipy
ipywidgets
pandas
pyarrow
ghostscript
pyzbar
# Development
matplotlib
#cx_Freeze # uncomment if building label generator app
# requires windows 10 SDK, visual C++, etc

31
setup-label-generator.py Normal file
View File

@ -0,0 +1,31 @@
import sys
from cx_Freeze import setup, Executable
debug = True
debug = not debug
# Dependencies are automatically detected, but it might need fine tuning.
# "packages": ["os"] is used as example only
import opcode
import os
import distutils
#distutils_path = os.path.join(os.path.dirname(opcode.__file__), 'distutils')
build_exe_options = {"include_msvcr": True, "packages": ["camelot", "setuptools"], "optimize": 0, "silent": True, "include_files": ["gs10030w64.exe"], "excludes": ["scipy", "torch"]}
# base="Win32GUI" should be used only for Windows GUI app
base = "console"
#if sys.platform == "win32" and not debug:
# base = "Win32GUI"
if sys.platform == "linux" or sys.platform == "linux2" or sys.platform == "darwin":
name = "jukebox-labelgen"
else:
name = "jukebox-labelgen.exe"
setup(
name="IP Pigeon",
version="0.2.4",
description="IP Pigeon client application",
options={"build_exe": build_exe_options},
executables=[Executable("label_generator.py", base=base, uac_admin=False, target_name=name)],
)

View File

@ -70,7 +70,7 @@ def fprint(msg, settings = None, sendqueue = None):
except Exception as e:
try:
print('[????:' + frm.function + ']:', str(msg))
print('[util:fprint]: ' + str(e))
#print('[util:fprint]: ' + str(e))
except:
print('[????]:', str(msg))