diff --git a/get_specs.py b/get_specs.py index 7c16010..7ee96fb 100755 --- a/get_specs.py +++ b/get_specs.py @@ -27,6 +27,7 @@ def check_internet(url='https://belden.com', timeout=5): def query_search(partnum, source): + fprint("Searching for " + partnum) if source == "Belden": token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time())) with requests.get(token_url) as r: @@ -69,7 +70,7 @@ def query_search(partnum, source): if idx < 0: fprint("Could not find part in API: " + partnum) return False - fprint("Search result found: result " + str(idx) + ", for ID " + name) + #fprint("Search result found: result " + str(idx) + ", for ID " + name) #urlname = a["results"][0]["raw"]["catalogitemurlname"] img = a["results"][idx]["raw"]["catalogitemimageurl"] img = img[0:img.index("?")] @@ -92,7 +93,7 @@ def query_search(partnum, source): #print(out) return out except: - print("falied to search with API. Falling back to datasheet lookup.") + print("Failed to search with API. Falling back to datasheet lookup.") return False @@ -118,7 +119,7 @@ def query_search(partnum, source): if data["Count"] > 0: #print(data["Results"][0]["Url"]) for result in data["Results"]: - #print(result["Url"].split("/")[-1], partnum.replace("-", "").replace("/", "_")) + #print(result["Url"]) if result["Url"].split("/")[-1] == partnum.replace("-", "").replace("/", "_"): #print(partnum) #print(result["Html"]) @@ -137,12 +138,15 @@ def query_search(partnum, source): output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2] output["partnum"] = partnum.replace("-", "").replace("/", "_") #"test".index() - #print(output) + print(output) return output except: + print("Failed to search with API. Falling back to datasheet lookup.") return False + + print("Failed to search with API. Falling back to datasheet lookup.") return False @@ -155,11 +159,15 @@ def touch(path): def get_multi(partnums, delay=0.25): with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar: - def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL + def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL global bartext - sanitized_name = partnum.replace(" ", "") - url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf" + if dstype == "Belden": + sanitized_name = partnum.replace(" ", "") + url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf" + elif dstype == "Alphawire": + # Alphawire Datasheet URLs do not use a sanitized part number (but product pages do) + url = "https://www.alphawire.com/disteAPI/SpecPDF/DownloadProductSpecPdf?productPartNumber=" + partnum #fprint(url) try: with requests.get(url, stream=True) as r: @@ -267,17 +275,23 @@ def get_multi(partnums, delay=0.25): bar(skipped=False) def run_search(partnum): + oldpartnum = partnum + if dstype == "Alphawire": + # For alphawire, sanitize the part number for only the final result check, because their API is very wierd + # For the actual search, it must be un-sanitized + partnum = partnum.replace("-", "").replace("/","_") output_dir = "cables/" + partnum path = output_dir + "/datasheet.pdf" bartext = "Downloading files for part " + partnum bar.text = bartext - # + partnum = oldpartnum if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1): # Use query search_result = query_search(partnum, dstype) # Try to use belden.com search if search_result is not False: # Download high resolution part image if available and needed + #oldpartnum = partnum partnum = search_result["partnum"] output_dir = "cables/" + partnum path = output_dir + "/datasheet.pdf" @@ -302,7 +316,7 @@ def get_multi(partnums, delay=0.25): __use_cached_datasheet(partnum, path, output_dir, dstype) # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download - elif _try_download_datasheet(partnum, output_dir) is not False: + elif _try_download_datasheet(partnum, output_dir, dstype) is not False: __downloaded_datasheet(partnum, path, output_dir, dstype) # Failed to download with search or guess :( diff --git a/read_datasheet.py b/read_datasheet.py index acb33bd..76c6820 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -112,14 +112,14 @@ def parse(filename, output_dir, partnum, dstype): tables = dict() torename = dict() previous_table = "" - print(table_list.keys()) + #print(table_list.keys()) for table_name in table_list.keys(): # determine shape: horizontal or vertical table = table_list[table_name] rows = table.shape[0] cols = table.shape[1] vertical = None - print(rows, cols, table_name) + #print(rows, cols, table_name) if rows > 2 and cols == 2: vertical = True elif cols == 1 and rows > 1: @@ -145,7 +145,7 @@ def parse(filename, output_dir, partnum, dstype): vertical = False else: # 1 column, <= 2 rows vertical = False - print(vertical) + #print(vertical) # missing name check for table_name_2 in table_list.keys(): if dstype == "Alphawire" and table_name_2.find("\n") >= 0: @@ -182,11 +182,11 @@ def parse(filename, output_dir, partnum, dstype): if prevtbl.cells[-1][0].lb[1] < 50 and thistbl.cells[0][0].lt[1] > 600: # wraparound - print("WRAP") - print("PREV TABLE", prevtbl.df) - print("THIS TABLE", thistbl.df) - print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1]) - print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1]) + #print("WRAP") + #print("PREV TABLE", prevtbl.df) + #print("THIS TABLE", thistbl.df) + #print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1]) + #print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1]) main_key = previous_table cont_key = table_name #print(vertical) @@ -200,21 +200,21 @@ def parse(filename, output_dir, partnum, dstype): del tables[table_name] else: - print(tables[cont_key].keys()) + #print(tables[cont_key].keys()) for key in tables[cont_key].keys(): - print(main_key, key, cont_key, key) + #print(main_key, key, cont_key, key) tables[main_key][key] = tables[cont_key][key] del tables[table_name] elif thistbl.cells[0][0].lt[1] > 600: # name on previous page (grrrr) - print("NAMEABOVE") - print("PREV TABLE", prevtbl.df) - print("THIS TABLE", thistbl.df) - print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1]) - print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1]) + #print("NAMEABOVE") + #print("PREV TABLE", prevtbl.df) + #print("THIS TABLE", thistbl.df) + #print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1]) + #print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1]) name = extract_table_name(50, prevtbl.page,reader,dstype,table_name).strip("\n").strip() - print("FOUND NAME:", name) + #print("FOUND NAME:", name) torename[table_name] = name @@ -287,7 +287,7 @@ def parse(filename, output_dir, partnum, dstype): with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file: json.dump(output_table["fullspecs"], json_file) - print(json.dumps(output_table, indent=2)) + #print(json.dumps(output_table, indent=2)) touch(output_dir + "/parsed") # mark as parsed return output_table