jukebox-software/get_specs.py

505 lines
26 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import read_datasheet
from alive_progress import alive_bar
import requests
import time
import json
import subprocess
from util import fprint
bartext = ""
failed = []
def check_internet(url='https://belden.com', timeout=5):
try:
# Make a GET request to the specified URL
response = requests.get(url, timeout=timeout)
# If the request succeeds, return True
return True
except requests.ConnectionError:
# If a connection error occurs, return False
return False
def query_search(partnum, source):
fprint("Searching for " + partnum)
if source == "Belden":
token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
try:
with requests.get(token_url) as r:
out = json.loads(r.content)
token = out["token"]
search_url = "https://www.belden.com/coveo/rest/search"
# Ridiculous search parameters extracted from website. Do not touch
search_data = r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
search_data = search_data.replace(r"{QUERY}", partnum)
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
#fprint(search_data)
#fprint(json.loads(search_data))
#search_data = '{ "q": "' + str(partnum) + '" }'
#fprint(search_data)
headers = headers = {
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
}
with requests.post(search_url, headers=headers, data=search_data) as r:
a = r.text
a = json.loads(a)
idx = -1
name = ""
for partid in range(len(a["results"])-1, -1, -1):
name = a["results"][partid]["title"]
if name != partnum:
if name.find(partnum) >= 0:
idx = partid
#break
elif partnum.find(name) >= 0:
idx = partid
#break
else:
idx = partid
break
if idx < 0:
fprint("Could not find part in API: " + partnum)
return False
name = a["results"][idx]["title"]
#fprint("Search result found: result " + str(idx) + ", for ID " + name)
#urlname = a["results"][0]["raw"]["catalogitemurlname"]
img = a["results"][idx]["raw"]["catalogitemimageurl"]
img = img[0:img.index("?")]
uri = a["results"][idx]["raw"]["clickableuri"]
dsid = a["results"][idx]["raw"]["catalogitemdatasheetid"]
brand = a["results"][idx]["raw"]["catalogitembrand"]
desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
app = a["results"][idx]["raw"]["catalogitemapplication"]
category = a["results"][idx]["raw"]["catalogitemfilterproductcategory"]
#a = json.dumps(a["results"][idx], indent=2)
#print(a, urlname, img, uri, dsurl)
out = dict()
out["url"] = "https://www.belden.com/products/" + uri
out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
out["brand"] = brand
out["short_description"] = shortdesc
out["description"] = desc
out["application"] = app
out["category"] = category
out["image"] = "https://www.belden.com" + img
out["partnum"] = name
#print(out)
return out
except:
print("Failed to search with API. Falling back to datasheet lookup.")
return False
# Original bash script
# superceded by above
if source == "Belden_shell":
command = ["./query-search.sh", partnum]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0: # error
fprint("No results found in search database for " + partnum + ". No hi-res part image available.", result.stderr)
return False
else:
data_out = json.loads(result.stdout)
return data_out
elif source == "Alphawire":
alphaurl = "https://www.alphawire.com//sxa/search/results/?l=en&s={4A774076-6068-460C-9CC6-A2D8E85E407F}&itemid={BF82F58C-EFD9-4D8B-AE3E-097DD12CF7DA}&sig=&autoFireSearch=true&productpartnumber=*" + partnum + "*&v={B22CD56D-AB95-4048-8AA1-5BBDF2F2D17F}&p=10&e=0&o=ProductPartNumber%2CAscending"
r = requests.get(url=alphaurl)
data = r.json()
output = dict()
#print(data["Results"])
try:
if data["Count"] > 0:
#print(data["Results"][0]["Url"])
for result in data["Results"]:
#print(result["Url"])
if result["Url"].split("/")[-1] == partnum.replace("-", "").replace("/", "_"):
#print(partnum)
#print(result["Html"])
try:
imgidx = result["Html"].index("<img src=") + 10
imgidx2 = result["Html"].index("?", imgidx)
output["image"] = result["Html"][imgidx:imgidx2]
if output["image"].index("http") != 0:
output["image"] = ""
print("No cable image found.")
except:
print("No cable image found.")
dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
output["partnum"] = partnum.replace("/", "_") #.replace("-", "").replace("/", "_")
#
# "test".index()
#print(output)
return output
except:
print("Failed to search with API. Falling back to datasheet lookup.")
return False
print("Failed to search with API. Falling back to datasheet lookup.")
return False
def touch(path):
with open(path, 'a'):
os.utime(path, None)
def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
#with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic", disable=True, file=sys.stdout) as bar:
failed = list()
actualpartnums = list()
def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
global bartext
if dstype == "Belden":
sanitized_name = partnum.replace(" ", "")
url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
elif dstype == "Alphawire":
# Alphawire Datasheet URLs do not use a sanitized part number (but product pages do)
url = "https://www.alphawire.com/disteAPI/SpecPDF/DownloadProductSpecPdf?productPartNumber=" + partnum
#fprint(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.headers.get("Content-Type") != "application/pdf":
return False
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
bartext = ""
with open(output_dir + "/datasheet.pdf", 'wb') as f:
for chunk in r.iter_content(chunk_size=65536):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
# bar.text = bartext
f.write(chunk)
#fprint("")
return output_dir + "/datasheet.pdf"
except KeyboardInterrupt:
fprint("Quitting!")
os.remove(output_dir + "/datasheet.pdf")
sys.exit()
def _download_datasheet(url, output_dir): # Download datasheet with known URL
global bartext
#fprint(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.headers.get("Content-Type") != "application/pdf":
return False
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
bartext = ""
with open(output_dir + "/datasheet.pdf", 'wb') as f:
for chunk in r.iter_content(chunk_size=65536):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
# bar.text = bartext
f.write(chunk)
#fprint("")
return output_dir + "/datasheet.pdf"
except KeyboardInterrupt:
fprint("Quitting!")
os.remove(output_dir + "/datasheet.pdf")
sys.exit()
def _download_image(url, output_dir): # Download image with known URL
global bartext
#fprint(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
bartext = ""
with open(output_dir + "/part-hires." + url.split(".")[-1], 'wb') as f:
for chunk in r.iter_content(chunk_size=65536):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
# bar.text = bartext
f.write(chunk)
#fprint("")
return output_dir + "/part-hires." + url.split(".")[-1]
except KeyboardInterrupt:
fprint("Quitting!")
os.remove(partnum + "/datasheet.pdf")
sys.exit()
def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, extra):
fprint("Using cached datasheet for " + partnum)
# bar.text = "Using cached datasheet for " + partnum
# bar(skipped=True)
if not os.path.exists(output_dir + "/parsed"):
fprint("Parsing Datasheet contents of " + partnum)
# bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
# bar(skipped=False)
return out
else:
fprint("Datasheet already parsed for " + partnum)
# bar.text = "Datasheet already parsed for " + partnum + ".pdf"
# bar(skipped=True)
def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, extra):
fprint("Downloaded " + path)
# bar.text = "Downloaded " + path
# bar(skipped=False)
fprint("Parsing Datasheet contents of " + partnum)
# bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
# bar(skipped=False)
return out
def run_search(partnum):
partnum = partnum.replace("%20", " ") # undo URL encoding
oldpartnum = partnum
if dstype == "Alphawire":
# For alphawire, sanitize the part number for only the final result check, because their API is very wierd
# For the actual search, it must be un-sanitized
partnum = partnum.replace("/","_")
output_dir = dir + partnum
path = output_dir + "/datasheet.pdf"
weburl = ":" + str(webport) + "/" + partnum + "/"
bartext = "Downloading files for part " + partnum
# bar.text = bartext
partnum = oldpartnum.replace("_","/")
returnval = [partnum, dstype, False, False]
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1) or not cache:
# Use query
search_result = query_search(partnum, dstype)
# Try to use belden.com search
if search_result is not False:
# Download high resolution part image if available and needed
#oldpartnum = partnum
partnum = search_result["partnum"]
returnval = [partnum, dstype, False, False]
output_dir = dir + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/search-result.json", 'w') as json_file:
fprint("Saving search result of " + partnum)
json.dump(search_result, json_file)
# bar.text = bartext
if not os.path.exists(output_dir + "/found_part_hires") or not cache:
if _download_image(search_result["image"], output_dir):
fprint("Downloaded hi-res part image for " + partnum)
returnval = [partnum, dstype, True, False]
if os.path.exists(output_dir + "/parsed"):
os.remove(output_dir + "/parsed")
touch(output_dir + "/found_part_hires")
else:
fprint("Using cached hi-res part image for " + partnum)
# Download datasheet from provided URL if needed
if os.path.exists(path) and os.path.getsize(path) > 1 and cache:
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, True, out]
elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, True, out]
elif os.path.exists(path) and os.path.getsize(path) > 1 and cache:
search_result = {}
if os.path.exists(output_dir + "/search-result.json"):
with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
search_result = json.load(file)
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, True, out]
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
search_result = {}
if os.path.exists(output_dir + "/search-result.json"):
with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
search_result = json.load(file)
out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, False, out]
# Failed to download with search or guess :(
else:
return False
time.sleep(delay)
actualpartnums.append(returnval)
return returnval
# We already have a hi-res image and the datasheet - perfect!
else:
fprint("Using cached hi-res part image for " + partnum)
search_result = {}
if os.path.exists(output_dir + "/search-result.json"):
with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
search_result = json.load(file)
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, False, out]
actualpartnums.append(returnval)
return True
for fullpartnum in partnums:
if fullpartnum is False:
actualpartnums.append(False)
continue
if fullpartnum[0:2] == "BL": # catalog.belden.com entry
partnum = fullpartnum[2:]
dstype = "Belden"
elif fullpartnum[0:2] == "AW":
partnum = fullpartnum[2:]
dstype = "Alphawire"
else:
dstype = "Belden" # guess
partnum = fullpartnum
if not run_search(partnum):
success = False
if len(partnum.split(" ")) > 1:
for name in partnum.split(" "):
fprint("Retrying with alternate name: " + name)
if(run_search(name)):
success = True
break
time.sleep(delay)
if not success:
namestripped = partnum.strip(" ")
fprint("Retrying with alternate name: " + namestripped)
if(run_search(namestripped)):
success = True
time.sleep(delay)
if not success:
fprint("Failed to download datasheet for part " + partnum)
# bar.text = "Failed to download datasheet for part " + partnum
failed.append((partnum, dstype))
# bar(skipped=True)
# bar(skipped=True)
if len(failed) > 0:
fprint("Failed to download:")
for partnum in failed:
fprint(partnum[1] + " " + partnum[0])
return False, actualpartnums # Go to manual review upload page
else:
return True, actualpartnums # All cables downloaded; we are good to go
if __name__ == "__main__":
# partnums = ["BLFISX012W0", "BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949",
# "BL10GXS13",
# "BL10GXW12",
# "BL10GXW13",
# "BL2412",
# "BL2413",
# "BLOSP6AU",
# "BLFI4D024P9",
# "BLFISD012R9",
# "BLFDSD012A9",
# "BLFSSL024NG",
# "BLFISX006W0",
# "BLFISX00103",
# "BLC6D1100007"
# ]
partnums = [
# Actual cables in Jukebox
"BL3092A",
"AW86104CY",
"AW3050",
"AW6714",
"AW1172C",
"AWFIT-221-1/4",
"BLTF-1LF-006-RS5N",
"BLTF-SD9-006-RI5N",
"BLTT-SLG-024-HTNN",
"BLFISX012W0",
"BLFI4X012W0",
"BLSPE101 006Q",
"BLSPE102 006Q",
"BL7922A 010Q",
"BL7958A 008Q",
"BLIOP6U 010Q",
"BL10GXW13 D15Q",
"BL10GXW53 D15Q",
"BL29501F 010Q",
"BL29512 010Q",
"BL3106A 010Q",
"BL9841 060Q",
"BL3105A 010Q",
"BL3092A 010Q",
"BL8760 060Q",
"BL6300UE 008Q",
"BL6300FE 009Q",
"BLRA500P 006Q",
]
# Some ones I picked, including some invalid ones
a = [
"BL10GXS12",
"BLRST%205L-RKT%205L-949",
"BL10GXS13",
"BL10GXW12",
"BL10GXW13",
"BL2412",
"BL2413",
"BLOSP6AU",
"BLFI4D024P9",
"BLFISD012R9",
"BLFDSD012A9",
"BLFSSL024NG",
"BLFISX006W0", # datasheet only
"BLFISX00103", # invalid
"BLC6D1100007" # invalid
]
#print(query_search("TT-SLG-024-HTNN", "Belden"))
from label_generator import gen_label
gen_label("BLTF-SD9-006-RI5")
gen_label("BLRA500P")
gen_label("AWFIT-221-1_4")
gen_label("BLRST 5L-RKT 5L-949")
get_multi(partnums, 0.25)
#query_search("10GXS13", "Belden")