diff --git a/.gitignore b/.gitignore index 9ffdf12..40318df 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ output.mp4 output.log # images *.png +# Built app +build +# Generated label images +labels diff --git a/get_specs.py b/get_specs.py index 7ee96fb..b6c4067 100755 --- a/get_specs.py +++ b/get_specs.py @@ -53,15 +53,15 @@ def query_search(partnum, source): a = json.loads(a) idx = -1 name = "" - for partid in range(len(a["results"])): + for partid in range(len(a["results"])-1, -1, -1): name = a["results"][partid]["title"] if name != partnum: if name.find(partnum) >= 0: idx = partid - break + #break elif partnum.find(name) >= 0: idx = partid - break + #break else: idx = partid @@ -113,7 +113,7 @@ def query_search(partnum, source): r = requests.get(url=alphaurl) data = r.json() output = dict() - print(data["Results"]) + #print(data["Results"]) try: if data["Count"] > 0: @@ -136,9 +136,10 @@ def query_search(partnum, source): dsidx = result["Html"].index(" 1): + partnum = oldpartnum.replace("_","/") + returnval = [partnum, dstype, False, False] + if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1) or not cache: # Use query search_result = query_search(partnum, dstype) # Try to use belden.com search @@ -293,41 +298,49 @@ def get_multi(partnums, delay=0.25): # Download high resolution part image if available and needed #oldpartnum = partnum partnum = search_result["partnum"] - output_dir = "cables/" + partnum + returnval = [partnum, dstype, False, False] + output_dir = dir + partnum path = output_dir + "/datasheet.pdf" bartext = "Downloading files for part " + partnum bar.text = bartext - if not os.path.exists(output_dir + "/found_part_hires"): + if not os.path.exists(output_dir + "/found_part_hires") or not cache: if _download_image(search_result["image"], output_dir): fprint("Downloaded hi-res part image for " + partnum) + returnval = [partnum, dstype, True, False] touch(output_dir + "/found_part_hires") else: fprint("Using cached hi-res part image for " + partnum) # Download datasheet from provided URL if needed - if os.path.exists(path) and os.path.getsize(path) > 1: - __use_cached_datasheet(partnum, path, output_dir, dstype) + if os.path.exists(path) and os.path.getsize(path) > 1 and cache: + out = __use_cached_datasheet(partnum, path, output_dir, dstype) + returnval = [partnum, dstype, True, out] elif _download_datasheet(search_result["datasheet"], output_dir) is not False: - __downloaded_datasheet(partnum, path, output_dir, dstype) + out = __downloaded_datasheet(partnum, path, output_dir, dstype) + returnval = [partnum, dstype, True, out] - elif os.path.exists(path) and os.path.getsize(path) > 1: - __use_cached_datasheet(partnum, path, output_dir, dstype) + elif os.path.exists(path) and os.path.getsize(path) > 1 and cache: + out = __use_cached_datasheet(partnum, path, output_dir, dstype) + returnval = [partnum, dstype, True, out] # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download elif _try_download_datasheet(partnum, output_dir, dstype) is not False: - __downloaded_datasheet(partnum, path, output_dir, dstype) + out = __downloaded_datasheet(partnum, path, output_dir, dstype) + returnval = [partnum, dstype, False, out] # Failed to download with search or guess :( else: return False - return True + actualpartnums.append(returnval) + return returnval # We already have a hi-res image and the datasheet - perfect! else: fprint("Using cached hi-res part image for " + partnum) - __use_cached_datasheet(partnum, path, output_dir, dstype) + out = __use_cached_datasheet(partnum, path, output_dir, dstype) + returnval = [partnum, dstype, False, out] return True for fullpartnum in partnums: @@ -358,7 +371,7 @@ def get_multi(partnums, delay=0.25): if not success: fprint("Failed to download datasheet for part " + partnum) bar.text = "Failed to download datasheet for part " + partnum - failed.append(partnum) + failed.append((partnum, dstype)) bar(skipped=True) bar(skipped=True) time.sleep(delay) @@ -366,10 +379,10 @@ def get_multi(partnums, delay=0.25): if len(failed) > 0: fprint("Failed to download:") for partnum in failed: - fprint(partnum) - return False # Go to manual review upload page + fprint(partnum[1] + " " + partnum[0]) + return False, actualpartnums # Go to manual review upload page else: - return True # All cables downloaded; we are good to go + return True, actualpartnums # All cables downloaded; we are good to go @@ -440,8 +453,8 @@ if __name__ == "__main__": "BLC6D1100007" ] - #print(query_search("FIT-221-1/4", "Alphawire")) - get_multi(partnums, 0.25) + print(query_search("74002", "Belden")) + #get_multi(partnums, 0.25) #query_search("10GXS13", "Belden") diff --git a/gs10030w64.exe b/gs10030w64.exe new file mode 100644 index 0000000..578cd06 Binary files /dev/null and b/gs10030w64.exe differ diff --git a/label_generator.py b/label_generator.py new file mode 100755 index 0000000..f0fbaa9 --- /dev/null +++ b/label_generator.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + + + +from get_specs import get_multi +import sys +import uuid +import os +import signal +from PIL import Image +from label_image import generate_code + + +def input_cable(): + print("") + print("Use the full part number. Spaces, special characters are allowed. Do not specify the brand.") + print("") + print("Please enter a part number and press enter:") + inputnum = input("").strip() + if len(inputnum) < 2: + killall_signal(0, 0) + print("Input part number:", inputnum) + print("Searching databases for cables...") + # Search both AW and BL sites + status, output = get_multi(["BL"+inputnum, "AW"+inputnum], delay=0.1, dir="temp/" + str(uuid.uuid4()) + "/", cache=False) + print("") + if len(output) > 1: + for i in output: + print(i[1], i[0]) + print("Multiple brands with the same part number! Please type \"b\" for the Belden part number or \"a\" for the Alphawire cable") + inputbrand = input() + if inputbrand == "b": + output = [output[0]] + elif inputbrand == "a": + output = [output[1]] + elif len(output) == 0: + print("No results found for part number", inputnum + ". Please try again with a different part number.") + return + + output = output[0] + print("") + if output[2] and output[3]: + print("Cable result found -",output[1], output[0], "with high-quality image and full specs") + elif output[2]: + print("Cable result found -",output[1], output[0], "with high-quality image and no specs") + elif output[3]: + print("Cable result found -",output[1], output[0], "with no/low quality image and full specs") + else: + print("Cable result found -",output[1], output[0], "with no/low quality image and no specs") + print("") + if not output[3]: + print("Unable to decode cable specs. Please try again with a different part number.") + return False + else: + print("") + print("*** Cable details confirmed. Creating label...") + print("") + img = None + imgstr = "" + if output[1] == "Belden": + imgstr = "BL" + elif output[1] == "Alphawire": + imgstr = "AW" + img = generate_code(imgstr + output[0]) + os.makedirs("labels", exist_ok=True) + img.save("labels/" + imgstr + output[0] + ".png") + +def delete_folder(path): + # Check if the path is a directory + if not os.path.isdir(path): + return + + # List all files and directories in the path + for filename in os.listdir(path): + file_path = os.path.join(path, filename) + # If it's a directory, recursively call this function + if os.path.isdir(file_path): + delete_folder(file_path) + else: + # If it's a file, remove it + os.remove(file_path) + + # After removing all contents, remove the directory itself + os.rmdir(path) + +def killall_signal(a,b): + delete_folder("temp") + os.kill(os.getpid(), 9) # dirty kill of self + +if __name__ == "__main__": + + signal.signal(signal.SIGINT, killall_signal) + signal.signal(signal.SIGTERM, killall_signal) + print("Welcome to the Jukebox cable utility. This tool will allow you to verify Belden & Alphawire cable part numbers and create labels for samples in the Jukebox.") + print("This tool requires internet access to download cable specifications and verify part numbers.") + #print("Use Ctrl+C to exit.") + while True: + delete_folder("temp") + input_cable() + diff --git a/label_image.py b/label_image.py new file mode 100755 index 0000000..17de9ba --- /dev/null +++ b/label_image.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +from util import fprint + +from PIL import Image +from PIL import ImageDraw +#import cv2 +import numpy as np +#import math + + + +# Copied from http://en.wikipedia.org/wiki/Code_128 +# Value Weights 128A 128B 128C +CODE128_CHART = """ +0 212222 space space 00 +1 222122 ! ! 01 +2 222221 " " 02 +3 121223 # # 03 +4 121322 $ $ 04 +5 131222 % % 05 +6 122213 & & 06 +7 122312 ' ' 07 +8 132212 ( ( 08 +9 221213 ) ) 09 +10 221312 * * 10 +11 231212 + + 11 +12 112232 , , 12 +13 122132 - - 13 +14 122231 . . 14 +15 113222 / / 15 +16 123122 0 0 16 +17 123221 1 1 17 +18 223211 2 2 18 +19 221132 3 3 19 +20 221231 4 4 20 +21 213212 5 5 21 +22 223112 6 6 22 +23 312131 7 7 23 +24 311222 8 8 24 +25 321122 9 9 25 +26 321221 : : 26 +27 312212 ; ; 27 +28 322112 < < 28 +29 322211 = = 29 +30 212123 > > 30 +31 212321 ? ? 31 +32 232121 @ @ 32 +33 111323 A A 33 +34 131123 B B 34 +35 131321 C C 35 +36 112313 D D 36 +37 132113 E E 37 +38 132311 F F 38 +39 211313 G G 39 +40 231113 H H 40 +41 231311 I I 41 +42 112133 J J 42 +43 112331 K K 43 +44 132131 L L 44 +45 113123 M M 45 +46 113321 N N 46 +47 133121 O O 47 +48 313121 P P 48 +49 211331 Q Q 49 +50 231131 R R 50 +51 213113 S S 51 +52 213311 T T 52 +53 213131 U U 53 +54 311123 V V 54 +55 311321 W W 55 +56 331121 X X 56 +57 312113 Y Y 57 +58 312311 Z Z 58 +59 332111 [ [ 59 +60 314111 \ \ 60 +61 221411 ] ] 61 +62 431111 ^ ^ 62 +63 111224 _ _ 63 +64 111422 NUL ` 64 +65 121124 SOH a 65 +66 121421 STX b 66 +67 141122 ETX c 67 +68 141221 EOT d 68 +69 112214 ENQ e 69 +70 112412 ACK f 70 +71 122114 BEL g 71 +72 122411 BS h 72 +73 142112 HT i 73 +74 142211 LF j 74 +75 241211 VT k 75 +76 221114 FF l 76 +77 413111 CR m 77 +78 241112 SO n 78 +79 134111 SI o 79 +80 111242 DLE p 80 +81 121142 DC1 q 81 +82 121241 DC2 r 82 +83 114212 DC3 s 83 +84 124112 DC4 t 84 +85 124211 NAK u 85 +86 411212 SYN v 86 +87 421112 ETB w 87 +88 421211 CAN x 88 +89 212141 EM y 89 +90 214121 SUB z 90 +91 412121 ESC { 91 +92 111143 FS | 92 +93 111341 GS } 93 +94 131141 RS ~ 94 +95 114113 US DEL 95 +96 114311 FNC3 FNC3 96 +97 411113 FNC2 FNC2 97 +98 411311 ShiftB ShiftA 98 +99 113141 CodeC CodeC 99 +100 114131 CodeB FNC4 CodeB +101 311141 FNC4 CodeA CodeA +102 411131 FNC1 FNC1 FNC1 +103 211412 StartA StartA StartA +104 211214 StartB StartB StartB +105 211232 StartC StartC StartC +106 2331112 Stop Stop Stop +""".split() + +VALUES = [int(value) for value in CODE128_CHART[0::5]] +WEIGHTS = dict(zip(VALUES, CODE128_CHART[1::5])) +CODE128A = dict(zip(CODE128_CHART[2::5], VALUES)) +CODE128B = dict(zip(CODE128_CHART[3::5], VALUES)) +CODE128C = dict(zip(CODE128_CHART[4::5], VALUES)) + +for charset in (CODE128A, CODE128B): + charset[' '] = charset.pop('space') + + + +def generate_code(data, show=False, check=False): + + img = code128_image(data) + if show: + img.show() + #img.show() + #print(data) + + if(check): + from pyzbar.pyzbar import decode + from pyzbar.pyzbar import ZBarSymbol + print(decode(img, symbols=[ZBarSymbol.CODE128])[0].data.decode('ascii')) + + #if(decode(img, symbols=[ZBarSymbol.CODE128])[0].data.decode('ascii') == data): + # return True + #else: + # return False + return img + +def code128_format(data): + """ + Generate an optimal barcode from ASCII text + """ + text = str(data) + pos = 0 + length = len(text) + + # Start Code + if text[:2].isdigit(): + charset = CODE128C + codes = [charset['StartC']] + else: + charset = CODE128B + codes = [charset['StartB']] + + # Data + while pos < length: + if charset is CODE128C: + if text[pos:pos+2].isdigit() and length - pos > 1: + # Encode Code C two characters at a time + codes.append(int(text[pos:pos+2])) + pos += 2 + else: + # Switch to Code B + codes.append(charset['CodeB']) + charset = CODE128B + elif text[pos:pos+4].isdigit() and length - pos >= 4: + # Switch to Code C + codes.append(charset['CodeC']) + charset = CODE128C + else: + # Encode Code B one character at a time + codes.append(charset[text[pos]]) + pos += 1 + + # Checksum + checksum = 0 + for weight, code in enumerate(codes): + checksum += max(weight, 1) * code + codes.append(checksum % 103) + + # Stop Code + codes.append(charset['Stop']) + return codes + +def code128_image(data, height=100, thickness=3, quiet_zone=False): + if not data[-1] == CODE128B['Stop']: + data = code128_format(data) + + + barcode_widths = [] + for code in data: + for weight in WEIGHTS[code]: + barcode_widths.append(int(weight) * thickness) + width = sum(barcode_widths) + x = 0 + + + if quiet_zone: + width += 20 * thickness + x = 10 * thickness + + # Monochrome Image + img = Image.new('RGB', (int(width * 10), int(width * 10)), 'white') + draw = ImageDraw.Draw(img) + draw_bar = True + for bwidth in barcode_widths: + bwidth *= 4 + if draw_bar: + draw.rectangle(((x + int(width * 3), width*6.25), (x + int(width * 3) + bwidth - 1, width*7)), fill='black') + draw_bar = not draw_bar + x += bwidth + + #draw.arc(((width - width/5, width - width/5), (width*9 + width/5, width*9 + width/5)),0,360,fill='blue', width = int(width/8)) + draw.arc(((width+int(width / 1.4), width+int(width / 1.4)), (width*9-int(width / 1.4), width*9-int(width / 1.4))),0,360,fill='blue', width = int(width/8)) + return img + +if __name__ == "__main__": + #print(generate_code("BL10GXS13")) + #print(generate_code("BL10GXgd35j35S13")) + #print(generate_code("BL10GX54hS13")) + print(generate_code("BL10Gj34qXS13", False, False)) + #print(generate_code("BL104w5545dp7bfwp43643534/4563G-XS13")) + #adjust_image(cv2.imread('test_skew.jpg')) \ No newline at end of file diff --git a/read_datasheet.py b/read_datasheet.py index b5fe474..a0dbc37 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -1,9 +1,8 @@ #!/usr/bin/env python3 -# Parse Belden catalog techdata datasheets +# Parse Belden (100%) & Alphawire (75%) catalog techdata datasheets import pandas as pd -pd.set_option('future.no_silent_downcasting', True) from PyPDF2 import PdfReader import camelot import numpy as np @@ -13,12 +12,25 @@ import json from util import fprint import uuid from util import run_cmd +from util import win32 import os +import glob +import sys def touch(path): with open(path, 'a'): os.utime(path, None) +def find_data_file(filename): + if getattr(sys, "frozen", False): + # The application is frozen + datadir = os.path.dirname(sys.executable) + else: + # The application is not frozen + # Change this bit to match where you store your data files: + datadir = os.path.dirname(__file__) + return os.path.join(datadir, filename) + def extract_table_name(table_start, searchpage, reader, dstype, fallbackname): if dstype == "Belden": ymin = table_start @@ -41,12 +53,25 @@ def extract_table_name(table_start, searchpage, reader, dstype, fallbackname): #fprint(text_body) def parse(filename, output_dir, partnum, dstype): - + tables = [] # Extract table data - if dstype == "Belden": - tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't']) - elif dstype == "Alphawire": - tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="poppler", split_text=False, line_scale=50, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': True, 'char_margin': 0.5}, shift_text=['l', 't']) + try: + if dstype == "Belden": + tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="ghostscript", split_text=False, line_scale=100, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': False, 'char_margin': 0.5}, shift_text=['r', 't']) + elif dstype == "Alphawire": + tables = camelot.read_pdf(filename, pages="1-end", flavor='lattice', backend="ghostscript", split_text=False, line_scale=50, process_background=True, resolution=600, interations=1, layout_kwargs={'detect_vertical': True, 'char_margin': 0.5}, shift_text=['l', 't']) + except OSError as e: + print(e) + if win32: + print("Ghostscript is not installed! Launching installer...") + #subprocess.run([r".\\gs10030w64.exe"]) + os.system(r'''Powershell -Command "& { Start-Process \"''' + find_data_file("gs10030w64.exe") + r'''\" -Verb RunAs } " ''') + # Will return once file launched... + print("Once the install is completed, try again.") + return False + else: + print("Ghostscript is not installed. You can install it with e.g. apt install ghostscript for Debian-based systems.") + return False #fprint("Total tables extracted:", tables.n) n = 0 #pagenum = 0 @@ -54,13 +79,14 @@ def parse(filename, output_dir, partnum, dstype): page = reader.pages[0] table_list = {} table_list_raw = {} - + pd.set_option('future.no_silent_downcasting', True) for table in tables: + #with pd.options.context("future.no_silent_downcasting", True): table.df.infer_objects(copy=False) - table.df.replace('', np.nan, inplace=True) + table.df = table.df.replace('', np.nan).infer_objects(copy=False) table.df.dropna(inplace=True, how="all") table.df.dropna(inplace=True, axis="columns", how="all") - table.df.replace(np.nan, '', inplace=True) + table.df = table.df.replace(np.nan, '').infer_objects(copy=False) if not table.df.empty: #fprint("\nTable " + str(n)) @@ -281,7 +307,12 @@ def parse(filename, output_dir, partnum, dstype): #print(output_table) - run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable! + #run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable! + pattern = os.path.join(output_dir, '*.json') + json_files = glob.glob(pattern) + for file_path in json_files: + os.remove(file_path) + #print(f"Deleted {file_path}") with open(output_dir + "/search_" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file: json.dump(output_table["searchspecs"], json_file) with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file: @@ -289,7 +320,7 @@ def parse(filename, output_dir, partnum, dstype): #print(json.dumps(output_table, indent=2)) touch(output_dir + "/parsed") # mark as parsed - return output_table + return True def flatten(tables): @@ -338,4 +369,4 @@ def flatten(tables): if __name__ == "__main__": - parse("cables/3050/datasheet.pdf", "cables/3050", "3050", "Alphawire") \ No newline at end of file + print(parse("cables/3050/datasheet.pdf", "cables/3050", "3050", "Alphawire")) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bbe8914..55c1942 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # Runtime -camelot-py[base] +camelot-py opencv-python pypdf2==2.12.1 alive-progress @@ -15,6 +15,13 @@ websockets numpy scipy ipywidgets +pandas +pyarrow +ghostscript +pyzbar + # Development matplotlib +#cx_Freeze # uncomment if building label generator app +# requires windows 10 SDK, visual C++, etc \ No newline at end of file diff --git a/setup-label-generator.py b/setup-label-generator.py new file mode 100644 index 0000000..0376975 --- /dev/null +++ b/setup-label-generator.py @@ -0,0 +1,31 @@ +import sys +from cx_Freeze import setup, Executable + +debug = True +debug = not debug +# Dependencies are automatically detected, but it might need fine tuning. +# "packages": ["os"] is used as example only + +import opcode +import os +import distutils +#distutils_path = os.path.join(os.path.dirname(opcode.__file__), 'distutils') +build_exe_options = {"include_msvcr": True, "packages": ["camelot", "setuptools"], "optimize": 0, "silent": True, "include_files": ["gs10030w64.exe"], "excludes": ["scipy", "torch"]} + +# base="Win32GUI" should be used only for Windows GUI app +base = "console" +#if sys.platform == "win32" and not debug: +# base = "Win32GUI" + +if sys.platform == "linux" or sys.platform == "linux2" or sys.platform == "darwin": + name = "jukebox-labelgen" +else: + name = "jukebox-labelgen.exe" + +setup( + name="IP Pigeon", + version="0.2.4", + description="IP Pigeon client application", + options={"build_exe": build_exe_options}, + executables=[Executable("label_generator.py", base=base, uac_admin=False, target_name=name)], +) diff --git a/util.py b/util.py index b957305..f199986 100755 --- a/util.py +++ b/util.py @@ -70,7 +70,7 @@ def fprint(msg, settings = None, sendqueue = None): except Exception as e: try: print('[????:' + frm.function + ']:', str(msg)) - print('[util:fprint]: ' + str(e)) + #print('[util:fprint]: ' + str(e)) except: print('[????]:', str(msg))