Pull description from Belden API for more consistent values

This commit is contained in:
Cole Deck 2024-05-08 16:44:22 -05:00
parent a905858e3b
commit 21cb2beb67
5 changed files with 64 additions and 21 deletions

View File

@ -7,6 +7,7 @@ def run_server(port, directory):
Run a simple HTTP server serving files from the specified directory. Run a simple HTTP server serving files from the specified directory.
""" """
# Change the working directory to the specified directory # Change the working directory to the specified directory
os.makedirs(directory, exist_ok=True)
os.chdir(directory) os.chdir(directory)
# Create the HTTP server # Create the HTTP server

View File

@ -81,15 +81,18 @@ def query_search(partnum, source):
brand = a["results"][idx]["raw"]["catalogitembrand"] brand = a["results"][idx]["raw"]["catalogitembrand"]
desc = a["results"][idx]["raw"]["catalogitemlongdesc"] desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"] shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
a = json.dumps(a["results"][idx], indent=2) app = a["results"][idx]["raw"]["catalogitemapplication"]
#a = json.dumps(a["results"][idx], indent=2)
#print(a, urlname, img, uri, dsurl) #print(a, urlname, img, uri, dsurl)
out = dict() out = dict()
out["url"] = "https://www.belden.com/products/" + uri out["url"] = "https://www.belden.com/products/" + uri
out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf" out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
out["brand"] = brand out["brand"] = brand
out["name"] = shortdesc out["short_description"] = shortdesc
out["description"] = desc out["description"] = desc
out["application"] = app
out["image"] = "https://www.belden.com" + img out["image"] = "https://www.belden.com" + img
out["partnum"] = name out["partnum"] = name
#print(out) #print(out)
@ -254,7 +257,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
os.remove(partnum + "/datasheet.pdf") os.remove(partnum + "/datasheet.pdf")
sys.exit() sys.exit()
def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl): def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, extra):
fprint("Using cached datasheet for " + partnum) fprint("Using cached datasheet for " + partnum)
# bar.text = "Using cached datasheet for " + partnum # bar.text = "Using cached datasheet for " + partnum
# bar(skipped=True) # bar(skipped=True)
@ -263,7 +266,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
fprint("Parsing Datasheet contents of " + partnum) fprint("Parsing Datasheet contents of " + partnum)
# bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl) out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
# bar(skipped=False) # bar(skipped=False)
return out return out
else: else:
@ -271,13 +274,13 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
# bar.text = "Datasheet already parsed for " + partnum + ".pdf" # bar.text = "Datasheet already parsed for " + partnum + ".pdf"
# bar(skipped=True) # bar(skipped=True)
def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl): def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, extra):
fprint("Downloaded " + path) fprint("Downloaded " + path)
# bar.text = "Downloaded " + path # bar.text = "Downloaded " + path
# bar(skipped=False) # bar(skipped=False)
fprint("Parsing Datasheet contents of " + partnum) fprint("Parsing Datasheet contents of " + partnum)
# bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl) out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
# bar(skipped=False) # bar(skipped=False)
return out return out
@ -302,11 +305,15 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
if search_result is not False: if search_result is not False:
# Download high resolution part image if available and needed # Download high resolution part image if available and needed
#oldpartnum = partnum #oldpartnum = partnum
partnum = search_result["partnum"] partnum = search_result["partnum"]
returnval = [partnum, dstype, False, False] returnval = [partnum, dstype, False, False]
output_dir = dir + partnum output_dir = dir + partnum
path = output_dir + "/datasheet.pdf" path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum bartext = "Downloading files for part " + partnum
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/search-result.json", 'w') as json_file:
json.dump(search_result, json_file)
# bar.text = bartext # bar.text = bartext
if not os.path.exists(output_dir + "/found_part_hires") or not cache: if not os.path.exists(output_dir + "/found_part_hires") or not cache:
@ -321,20 +328,28 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
# Download datasheet from provided URL if needed # Download datasheet from provided URL if needed
if os.path.exists(path) and os.path.getsize(path) > 1 and cache: if os.path.exists(path) and os.path.getsize(path) > 1 and cache:
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl) out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, True, out] returnval = [partnum, dstype, True, out]
elif _download_datasheet(search_result["datasheet"], output_dir) is not False: elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl) out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, True, out] returnval = [partnum, dstype, True, out]
elif os.path.exists(path) and os.path.getsize(path) > 1 and cache: elif os.path.exists(path) and os.path.getsize(path) > 1 and cache:
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl) search_result = {}
if os.path.exists(output_dir + "/search-result.json"):
with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
search_result = json.load(file)
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, True, out] returnval = [partnum, dstype, True, out]
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
elif _try_download_datasheet(partnum, output_dir, dstype) is not False: elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl) search_result = {}
if os.path.exists(output_dir + "/search-result.json"):
with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
search_result = json.load(file)
out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, False, out] returnval = [partnum, dstype, False, out]
# Failed to download with search or guess :( # Failed to download with search or guess :(
@ -346,7 +361,11 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
# We already have a hi-res image and the datasheet - perfect! # We already have a hi-res image and the datasheet - perfect!
else: else:
fprint("Using cached hi-res part image for " + partnum) fprint("Using cached hi-res part image for " + partnum)
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl) search_result = {}
if os.path.exists(output_dir + "/search-result.json"):
with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
search_result = json.load(file)
out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
returnval = [partnum, dstype, False, out] returnval = [partnum, dstype, False, out]
actualpartnums.append(returnval) actualpartnums.append(returnval)
return True return True

@ -1 +1 @@
Subproject commit 4cea1b08329761fa5485686e0ff0871730f73820 Subproject commit e55c7b5b5af29ff11b037fdfc40321d422859f5c

View File

@ -89,7 +89,7 @@ def rotate_and_crop_image(path, image_name, force_rotate=False):
# Save or display the image # Save or display the image
img_cropped.save(path + "/" + "thumbnail-" + image_name) # Save the cropped image img_cropped.save(path + "/" + "thumbnail-" + image_name) # Save the cropped image
def parse(filename, output_dir, partnum, dstype, weburl): def parse(filename, output_dir, partnum, dstype, weburl, extra):
tables = [] tables = []
# Extract table data # Extract table data
try: try:
@ -362,14 +362,31 @@ def parse(filename, output_dir, partnum, dstype, weburl):
id = str(uuid.uuid4()) id = str(uuid.uuid4())
output_table["id"] = id output_table["id"] = id
#output_table["position"] = id #output_table["position"] = id
output_table["brand"] = dstype if "brand" in extra:
output_table["brand"] = extra["brand"]
else:
output_table["brand"] = dstype
if img is not None: if img is not None:
output_table["image"] = img output_table["image"] = img
output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, "image": img, **tables} output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], "image": img, **tables}
output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, "image": img, **flatten(tables)} output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], "image": img, **flatten(tables)}
else: else:
output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, **tables} output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], **tables}
output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, **flatten(tables)} output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], **flatten(tables)}
if "short_description" in extra:
output_table["short_description"] = extra["short_description"]
output_table["fullspecs"]["short_description"] = extra["short_description"]
output_table["searchspecs"]["short_description"] = extra["short_description"]
if "description" in extra:
output_table["description"] = extra["description"]
output_table["fullspecs"]["description"] = extra["description"]
output_table["searchspecs"]["description"] = extra["description"]
if "application" in extra:
output_table["application"] = extra["application"]
output_table["fullspecs"]["application"] = extra["application"]
output_table["searchspecs"]["application"] = extra["application"]
output_table["searchspecs"]["id"] = id output_table["searchspecs"]["id"] = id

12
run.py
View File

@ -168,8 +168,6 @@ def check_server():
cabledata = jbs.get_position(str(idx)) cabledata = jbs.get_position(str(idx))
fs = cabledata["fullspecs"] fs = cabledata["fullspecs"]
tmp1 = {"part_number": cable_list[idx], "position": idx, "name": cable_list[idx], "brand": cabledata["brand"] } tmp1 = {"part_number": cable_list[idx], "position": idx, "name": cable_list[idx], "brand": cabledata["brand"] }
if "image" in cabledata:
tmp1["image"] = cabledata["image"]
if "Product Overview" in fs and "Product Category" in fs["Product Overview"]: if "Product Overview" in fs and "Product Category" in fs["Product Overview"]:
tmp1["short_description"] = fs["Product Overview"]["Product Category"] tmp1["short_description"] = fs["Product Overview"]["Product Category"]
if "Product Overview" in fs and "Suitable Applications" in fs["Product Overview"]: if "Product Overview" in fs and "Suitable Applications" in fs["Product Overview"]:
@ -194,7 +192,15 @@ def check_server():
tmp1["description"] = key tmp1["description"] = key
else: else:
tmp1["description"] = fs["Product Overview"]["Suitable Applications:"] tmp1["description"] = fs["Product Overview"]["Suitable Applications:"]
if "image" in cabledata:
tmp1["image"] = cabledata["image"]
if "description" in cabledata:
tmp1["description"] = cabledata["description"]
if "short_description" in cabledata:
tmp1["short_description"] = cabledata["short_description"]
if "application" in cabledata:
tmp1["application"] = cabledata["application"]
tmp.append(tmp1) tmp.append(tmp1)
out = {"map": tmp} out = {"map": tmp}
fprint(out) fprint(out)