Add Alphawire datasheet fallback

This commit is contained in:
Cole Deck 2024-03-14 22:06:13 -05:00
parent 39723ec442
commit fc2af34450
2 changed files with 40 additions and 26 deletions

View File

@ -27,6 +27,7 @@ def check_internet(url='https://belden.com', timeout=5):
def query_search(partnum, source):
fprint("Searching for " + partnum)
if source == "Belden":
token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
with requests.get(token_url) as r:
@ -69,7 +70,7 @@ def query_search(partnum, source):
if idx < 0:
fprint("Could not find part in API: " + partnum)
return False
fprint("Search result found: result " + str(idx) + ", for ID " + name)
#fprint("Search result found: result " + str(idx) + ", for ID " + name)
#urlname = a["results"][0]["raw"]["catalogitemurlname"]
img = a["results"][idx]["raw"]["catalogitemimageurl"]
img = img[0:img.index("?")]
@ -92,7 +93,7 @@ def query_search(partnum, source):
#print(out)
return out
except:
print("falied to search with API. Falling back to datasheet lookup.")
print("Failed to search with API. Falling back to datasheet lookup.")
return False
@ -118,7 +119,7 @@ def query_search(partnum, source):
if data["Count"] > 0:
#print(data["Results"][0]["Url"])
for result in data["Results"]:
#print(result["Url"].split("/")[-1], partnum.replace("-", "").replace("/", "_"))
#print(result["Url"])
if result["Url"].split("/")[-1] == partnum.replace("-", "").replace("/", "_"):
#print(partnum)
#print(result["Html"])
@ -137,12 +138,15 @@ def query_search(partnum, source):
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
output["partnum"] = partnum.replace("-", "").replace("/", "_")
#"test".index()
#print(output)
print(output)
return output
except:
print("Failed to search with API. Falling back to datasheet lookup.")
return False
print("Failed to search with API. Falling back to datasheet lookup.")
return False
@ -155,11 +159,15 @@ def touch(path):
def get_multi(partnums, delay=0.25):
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
global bartext
sanitized_name = partnum.replace(" ", "")
url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
if dstype == "Belden":
sanitized_name = partnum.replace(" ", "")
url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
elif dstype == "Alphawire":
# Alphawire Datasheet URLs do not use a sanitized part number (but product pages do)
url = "https://www.alphawire.com/disteAPI/SpecPDF/DownloadProductSpecPdf?productPartNumber=" + partnum
#fprint(url)
try:
with requests.get(url, stream=True) as r:
@ -267,17 +275,23 @@ def get_multi(partnums, delay=0.25):
bar(skipped=False)
def run_search(partnum):
oldpartnum = partnum
if dstype == "Alphawire":
# For alphawire, sanitize the part number for only the final result check, because their API is very wierd
# For the actual search, it must be un-sanitized
partnum = partnum.replace("-", "").replace("/","_")
output_dir = "cables/" + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
bar.text = bartext
#
partnum = oldpartnum
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
# Use query
search_result = query_search(partnum, dstype)
# Try to use belden.com search
if search_result is not False:
# Download high resolution part image if available and needed
#oldpartnum = partnum
partnum = search_result["partnum"]
output_dir = "cables/" + partnum
path = output_dir + "/datasheet.pdf"
@ -302,7 +316,7 @@ def get_multi(partnums, delay=0.25):
__use_cached_datasheet(partnum, path, output_dir, dstype)
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
elif _try_download_datasheet(partnum, output_dir) is not False:
elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
__downloaded_datasheet(partnum, path, output_dir, dstype)
# Failed to download with search or guess :(

View File

@ -112,14 +112,14 @@ def parse(filename, output_dir, partnum, dstype):
tables = dict()
torename = dict()
previous_table = ""
print(table_list.keys())
#print(table_list.keys())
for table_name in table_list.keys():
# determine shape: horizontal or vertical
table = table_list[table_name]
rows = table.shape[0]
cols = table.shape[1]
vertical = None
print(rows, cols, table_name)
#print(rows, cols, table_name)
if rows > 2 and cols == 2:
vertical = True
elif cols == 1 and rows > 1:
@ -145,7 +145,7 @@ def parse(filename, output_dir, partnum, dstype):
vertical = False
else: # 1 column, <= 2 rows
vertical = False
print(vertical)
#print(vertical)
# missing name check
for table_name_2 in table_list.keys():
if dstype == "Alphawire" and table_name_2.find("\n") >= 0:
@ -182,11 +182,11 @@ def parse(filename, output_dir, partnum, dstype):
if prevtbl.cells[-1][0].lb[1] < 50 and thistbl.cells[0][0].lt[1] > 600:
# wraparound
print("WRAP")
print("PREV TABLE", prevtbl.df)
print("THIS TABLE", thistbl.df)
print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
#print("WRAP")
#print("PREV TABLE", prevtbl.df)
#print("THIS TABLE", thistbl.df)
#print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
#print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
main_key = previous_table
cont_key = table_name
#print(vertical)
@ -200,21 +200,21 @@ def parse(filename, output_dir, partnum, dstype):
del tables[table_name]
else:
print(tables[cont_key].keys())
#print(tables[cont_key].keys())
for key in tables[cont_key].keys():
print(main_key, key, cont_key, key)
#print(main_key, key, cont_key, key)
tables[main_key][key] = tables[cont_key][key]
del tables[table_name]
elif thistbl.cells[0][0].lt[1] > 600:
# name on previous page (grrrr)
print("NAMEABOVE")
print("PREV TABLE", prevtbl.df)
print("THIS TABLE", thistbl.df)
print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
#print("NAMEABOVE")
#print("PREV TABLE", prevtbl.df)
#print("THIS TABLE", thistbl.df)
#print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
#print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
name = extract_table_name(50, prevtbl.page,reader,dstype,table_name).strip("\n").strip()
print("FOUND NAME:", name)
#print("FOUND NAME:", name)
torename[table_name] = name
@ -287,7 +287,7 @@ def parse(filename, output_dir, partnum, dstype):
with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file:
json.dump(output_table["fullspecs"], json_file)
print(json.dumps(output_table, indent=2))
#print(json.dumps(output_table, indent=2))
touch(output_dir + "/parsed") # mark as parsed
return output_table