Add Alphawire datasheet fallback
This commit is contained in:
parent
39723ec442
commit
fc2af34450
28
get_specs.py
28
get_specs.py
@ -27,6 +27,7 @@ def check_internet(url='https://belden.com', timeout=5):
|
|||||||
|
|
||||||
|
|
||||||
def query_search(partnum, source):
|
def query_search(partnum, source):
|
||||||
|
fprint("Searching for " + partnum)
|
||||||
if source == "Belden":
|
if source == "Belden":
|
||||||
token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
|
token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
|
||||||
with requests.get(token_url) as r:
|
with requests.get(token_url) as r:
|
||||||
@ -69,7 +70,7 @@ def query_search(partnum, source):
|
|||||||
if idx < 0:
|
if idx < 0:
|
||||||
fprint("Could not find part in API: " + partnum)
|
fprint("Could not find part in API: " + partnum)
|
||||||
return False
|
return False
|
||||||
fprint("Search result found: result " + str(idx) + ", for ID " + name)
|
#fprint("Search result found: result " + str(idx) + ", for ID " + name)
|
||||||
#urlname = a["results"][0]["raw"]["catalogitemurlname"]
|
#urlname = a["results"][0]["raw"]["catalogitemurlname"]
|
||||||
img = a["results"][idx]["raw"]["catalogitemimageurl"]
|
img = a["results"][idx]["raw"]["catalogitemimageurl"]
|
||||||
img = img[0:img.index("?")]
|
img = img[0:img.index("?")]
|
||||||
@ -92,7 +93,7 @@ def query_search(partnum, source):
|
|||||||
#print(out)
|
#print(out)
|
||||||
return out
|
return out
|
||||||
except:
|
except:
|
||||||
print("falied to search with API. Falling back to datasheet lookup.")
|
print("Failed to search with API. Falling back to datasheet lookup.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@ -118,7 +119,7 @@ def query_search(partnum, source):
|
|||||||
if data["Count"] > 0:
|
if data["Count"] > 0:
|
||||||
#print(data["Results"][0]["Url"])
|
#print(data["Results"][0]["Url"])
|
||||||
for result in data["Results"]:
|
for result in data["Results"]:
|
||||||
#print(result["Url"].split("/")[-1], partnum.replace("-", "").replace("/", "_"))
|
#print(result["Url"])
|
||||||
if result["Url"].split("/")[-1] == partnum.replace("-", "").replace("/", "_"):
|
if result["Url"].split("/")[-1] == partnum.replace("-", "").replace("/", "_"):
|
||||||
#print(partnum)
|
#print(partnum)
|
||||||
#print(result["Html"])
|
#print(result["Html"])
|
||||||
@ -137,12 +138,15 @@ def query_search(partnum, source):
|
|||||||
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
|
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
|
||||||
output["partnum"] = partnum.replace("-", "").replace("/", "_")
|
output["partnum"] = partnum.replace("-", "").replace("/", "_")
|
||||||
#"test".index()
|
#"test".index()
|
||||||
#print(output)
|
print(output)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
print("Failed to search with API. Falling back to datasheet lookup.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
print("Failed to search with API. Falling back to datasheet lookup.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@ -155,11 +159,15 @@ def touch(path):
|
|||||||
def get_multi(partnums, delay=0.25):
|
def get_multi(partnums, delay=0.25):
|
||||||
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
|
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
|
||||||
|
|
||||||
def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
|
def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
|
||||||
global bartext
|
global bartext
|
||||||
|
|
||||||
|
if dstype == "Belden":
|
||||||
sanitized_name = partnum.replace(" ", "")
|
sanitized_name = partnum.replace(" ", "")
|
||||||
url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
|
url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
|
||||||
|
elif dstype == "Alphawire":
|
||||||
|
# Alphawire Datasheet URLs do not use a sanitized part number (but product pages do)
|
||||||
|
url = "https://www.alphawire.com/disteAPI/SpecPDF/DownloadProductSpecPdf?productPartNumber=" + partnum
|
||||||
#fprint(url)
|
#fprint(url)
|
||||||
try:
|
try:
|
||||||
with requests.get(url, stream=True) as r:
|
with requests.get(url, stream=True) as r:
|
||||||
@ -267,17 +275,23 @@ def get_multi(partnums, delay=0.25):
|
|||||||
bar(skipped=False)
|
bar(skipped=False)
|
||||||
|
|
||||||
def run_search(partnum):
|
def run_search(partnum):
|
||||||
|
oldpartnum = partnum
|
||||||
|
if dstype == "Alphawire":
|
||||||
|
# For alphawire, sanitize the part number for only the final result check, because their API is very wierd
|
||||||
|
# For the actual search, it must be un-sanitized
|
||||||
|
partnum = partnum.replace("-", "").replace("/","_")
|
||||||
output_dir = "cables/" + partnum
|
output_dir = "cables/" + partnum
|
||||||
path = output_dir + "/datasheet.pdf"
|
path = output_dir + "/datasheet.pdf"
|
||||||
bartext = "Downloading files for part " + partnum
|
bartext = "Downloading files for part " + partnum
|
||||||
bar.text = bartext
|
bar.text = bartext
|
||||||
#
|
partnum = oldpartnum
|
||||||
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
|
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
|
||||||
# Use query
|
# Use query
|
||||||
search_result = query_search(partnum, dstype)
|
search_result = query_search(partnum, dstype)
|
||||||
# Try to use belden.com search
|
# Try to use belden.com search
|
||||||
if search_result is not False:
|
if search_result is not False:
|
||||||
# Download high resolution part image if available and needed
|
# Download high resolution part image if available and needed
|
||||||
|
#oldpartnum = partnum
|
||||||
partnum = search_result["partnum"]
|
partnum = search_result["partnum"]
|
||||||
output_dir = "cables/" + partnum
|
output_dir = "cables/" + partnum
|
||||||
path = output_dir + "/datasheet.pdf"
|
path = output_dir + "/datasheet.pdf"
|
||||||
@ -302,7 +316,7 @@ def get_multi(partnums, delay=0.25):
|
|||||||
__use_cached_datasheet(partnum, path, output_dir, dstype)
|
__use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||||
|
|
||||||
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
|
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
|
||||||
elif _try_download_datasheet(partnum, output_dir) is not False:
|
elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
|
||||||
__downloaded_datasheet(partnum, path, output_dir, dstype)
|
__downloaded_datasheet(partnum, path, output_dir, dstype)
|
||||||
|
|
||||||
# Failed to download with search or guess :(
|
# Failed to download with search or guess :(
|
||||||
|
@ -112,14 +112,14 @@ def parse(filename, output_dir, partnum, dstype):
|
|||||||
tables = dict()
|
tables = dict()
|
||||||
torename = dict()
|
torename = dict()
|
||||||
previous_table = ""
|
previous_table = ""
|
||||||
print(table_list.keys())
|
#print(table_list.keys())
|
||||||
for table_name in table_list.keys():
|
for table_name in table_list.keys():
|
||||||
# determine shape: horizontal or vertical
|
# determine shape: horizontal or vertical
|
||||||
table = table_list[table_name]
|
table = table_list[table_name]
|
||||||
rows = table.shape[0]
|
rows = table.shape[0]
|
||||||
cols = table.shape[1]
|
cols = table.shape[1]
|
||||||
vertical = None
|
vertical = None
|
||||||
print(rows, cols, table_name)
|
#print(rows, cols, table_name)
|
||||||
if rows > 2 and cols == 2:
|
if rows > 2 and cols == 2:
|
||||||
vertical = True
|
vertical = True
|
||||||
elif cols == 1 and rows > 1:
|
elif cols == 1 and rows > 1:
|
||||||
@ -145,7 +145,7 @@ def parse(filename, output_dir, partnum, dstype):
|
|||||||
vertical = False
|
vertical = False
|
||||||
else: # 1 column, <= 2 rows
|
else: # 1 column, <= 2 rows
|
||||||
vertical = False
|
vertical = False
|
||||||
print(vertical)
|
#print(vertical)
|
||||||
# missing name check
|
# missing name check
|
||||||
for table_name_2 in table_list.keys():
|
for table_name_2 in table_list.keys():
|
||||||
if dstype == "Alphawire" and table_name_2.find("\n") >= 0:
|
if dstype == "Alphawire" and table_name_2.find("\n") >= 0:
|
||||||
@ -182,11 +182,11 @@ def parse(filename, output_dir, partnum, dstype):
|
|||||||
|
|
||||||
if prevtbl.cells[-1][0].lb[1] < 50 and thistbl.cells[0][0].lt[1] > 600:
|
if prevtbl.cells[-1][0].lb[1] < 50 and thistbl.cells[0][0].lt[1] > 600:
|
||||||
# wraparound
|
# wraparound
|
||||||
print("WRAP")
|
#print("WRAP")
|
||||||
print("PREV TABLE", prevtbl.df)
|
#print("PREV TABLE", prevtbl.df)
|
||||||
print("THIS TABLE", thistbl.df)
|
#print("THIS TABLE", thistbl.df)
|
||||||
print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
|
#print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
|
||||||
print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
|
#print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
|
||||||
main_key = previous_table
|
main_key = previous_table
|
||||||
cont_key = table_name
|
cont_key = table_name
|
||||||
#print(vertical)
|
#print(vertical)
|
||||||
@ -200,21 +200,21 @@ def parse(filename, output_dir, partnum, dstype):
|
|||||||
del tables[table_name]
|
del tables[table_name]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(tables[cont_key].keys())
|
#print(tables[cont_key].keys())
|
||||||
for key in tables[cont_key].keys():
|
for key in tables[cont_key].keys():
|
||||||
print(main_key, key, cont_key, key)
|
#print(main_key, key, cont_key, key)
|
||||||
tables[main_key][key] = tables[cont_key][key]
|
tables[main_key][key] = tables[cont_key][key]
|
||||||
del tables[table_name]
|
del tables[table_name]
|
||||||
|
|
||||||
elif thistbl.cells[0][0].lt[1] > 600:
|
elif thistbl.cells[0][0].lt[1] > 600:
|
||||||
# name on previous page (grrrr)
|
# name on previous page (grrrr)
|
||||||
print("NAMEABOVE")
|
#print("NAMEABOVE")
|
||||||
print("PREV TABLE", prevtbl.df)
|
#print("PREV TABLE", prevtbl.df)
|
||||||
print("THIS TABLE", thistbl.df)
|
#print("THIS TABLE", thistbl.df)
|
||||||
print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
|
#print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
|
||||||
print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
|
#print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
|
||||||
name = extract_table_name(50, prevtbl.page,reader,dstype,table_name).strip("\n").strip()
|
name = extract_table_name(50, prevtbl.page,reader,dstype,table_name).strip("\n").strip()
|
||||||
print("FOUND NAME:", name)
|
#print("FOUND NAME:", name)
|
||||||
torename[table_name] = name
|
torename[table_name] = name
|
||||||
|
|
||||||
|
|
||||||
@ -287,7 +287,7 @@ def parse(filename, output_dir, partnum, dstype):
|
|||||||
with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file:
|
with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file:
|
||||||
json.dump(output_table["fullspecs"], json_file)
|
json.dump(output_table["fullspecs"], json_file)
|
||||||
|
|
||||||
print(json.dumps(output_table, indent=2))
|
#print(json.dumps(output_table, indent=2))
|
||||||
touch(output_dir + "/parsed") # mark as parsed
|
touch(output_dir + "/parsed") # mark as parsed
|
||||||
return output_table
|
return output_table
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user