diff --git a/fileserver.py b/fileserver.py index 9e222b4..9a29944 100644 --- a/fileserver.py +++ b/fileserver.py @@ -7,6 +7,7 @@ def run_server(port, directory): Run a simple HTTP server serving files from the specified directory. """ # Change the working directory to the specified directory + os.makedirs(directory, exist_ok=True) os.chdir(directory) # Create the HTTP server diff --git a/get_specs.py b/get_specs.py index 834a6c7..b1c86f3 100755 --- a/get_specs.py +++ b/get_specs.py @@ -81,15 +81,18 @@ def query_search(partnum, source): brand = a["results"][idx]["raw"]["catalogitembrand"] desc = a["results"][idx]["raw"]["catalogitemlongdesc"] shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"] - a = json.dumps(a["results"][idx], indent=2) + app = a["results"][idx]["raw"]["catalogitemapplication"] + + #a = json.dumps(a["results"][idx], indent=2) #print(a, urlname, img, uri, dsurl) out = dict() out["url"] = "https://www.belden.com/products/" + uri out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf" out["brand"] = brand - out["name"] = shortdesc + out["short_description"] = shortdesc out["description"] = desc + out["application"] = app out["image"] = "https://www.belden.com" + img out["partnum"] = name #print(out) @@ -254,7 +257,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None): os.remove(partnum + "/datasheet.pdf") sys.exit() - def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl): + def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, extra): fprint("Using cached datasheet for " + partnum) # bar.text = "Using cached datasheet for " + partnum # bar(skipped=True) @@ -263,7 +266,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None): fprint("Parsing Datasheet contents of " + partnum) # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." - out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl) + out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra) # bar(skipped=False) return out else: @@ -271,13 +274,13 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None): # bar.text = "Datasheet already parsed for " + partnum + ".pdf" # bar(skipped=True) - def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl): + def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, extra): fprint("Downloaded " + path) # bar.text = "Downloaded " + path # bar(skipped=False) fprint("Parsing Datasheet contents of " + partnum) # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." - out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl) + out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra) # bar(skipped=False) return out @@ -302,11 +305,15 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None): if search_result is not False: # Download high resolution part image if available and needed #oldpartnum = partnum + partnum = search_result["partnum"] returnval = [partnum, dstype, False, False] output_dir = dir + partnum path = output_dir + "/datasheet.pdf" bartext = "Downloading files for part " + partnum + os.makedirs(output_dir, exist_ok=True) + with open(output_dir + "/search-result.json", 'w') as json_file: + json.dump(search_result, json_file) # bar.text = bartext if not os.path.exists(output_dir + "/found_part_hires") or not cache: @@ -321,20 +328,28 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None): # Download datasheet from provided URL if needed if os.path.exists(path) and os.path.getsize(path) > 1 and cache: - out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl) + out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result) returnval = [partnum, dstype, True, out] elif _download_datasheet(search_result["datasheet"], output_dir) is not False: - out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl) + out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result) returnval = [partnum, dstype, True, out] elif os.path.exists(path) and os.path.getsize(path) > 1 and cache: - out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl) + search_result = {} + if os.path.exists(output_dir + "/search-result.json"): + with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file: + search_result = json.load(file) + out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result) returnval = [partnum, dstype, True, out] # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download elif _try_download_datasheet(partnum, output_dir, dstype) is not False: - out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl) + search_result = {} + if os.path.exists(output_dir + "/search-result.json"): + with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file: + search_result = json.load(file) + out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result) returnval = [partnum, dstype, False, out] # Failed to download with search or guess :( @@ -346,7 +361,11 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None): # We already have a hi-res image and the datasheet - perfect! else: fprint("Using cached hi-res part image for " + partnum) - out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl) + search_result = {} + if os.path.exists(output_dir + "/search-result.json"): + with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file: + search_result = json.load(file) + out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result) returnval = [partnum, dstype, False, out] actualpartnums.append(returnval) return True diff --git a/jukebox-web b/jukebox-web index 4cea1b0..e55c7b5 160000 --- a/jukebox-web +++ b/jukebox-web @@ -1 +1 @@ -Subproject commit 4cea1b08329761fa5485686e0ff0871730f73820 +Subproject commit e55c7b5b5af29ff11b037fdfc40321d422859f5c diff --git a/read_datasheet.py b/read_datasheet.py index 53d3d30..79dc601 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -89,7 +89,7 @@ def rotate_and_crop_image(path, image_name, force_rotate=False): # Save or display the image img_cropped.save(path + "/" + "thumbnail-" + image_name) # Save the cropped image -def parse(filename, output_dir, partnum, dstype, weburl): +def parse(filename, output_dir, partnum, dstype, weburl, extra): tables = [] # Extract table data try: @@ -362,14 +362,31 @@ def parse(filename, output_dir, partnum, dstype, weburl): id = str(uuid.uuid4()) output_table["id"] = id #output_table["position"] = id - output_table["brand"] = dstype + if "brand" in extra: + output_table["brand"] = extra["brand"] + else: + output_table["brand"] = dstype + if img is not None: output_table["image"] = img - output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, "image": img, **tables} - output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, "image": img, **flatten(tables)} + output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], "image": img, **tables} + output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], "image": img, **flatten(tables)} else: - output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, **tables} - output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, **flatten(tables)} + output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], **tables} + output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], **flatten(tables)} + + if "short_description" in extra: + output_table["short_description"] = extra["short_description"] + output_table["fullspecs"]["short_description"] = extra["short_description"] + output_table["searchspecs"]["short_description"] = extra["short_description"] + if "description" in extra: + output_table["description"] = extra["description"] + output_table["fullspecs"]["description"] = extra["description"] + output_table["searchspecs"]["description"] = extra["description"] + if "application" in extra: + output_table["application"] = extra["application"] + output_table["fullspecs"]["application"] = extra["application"] + output_table["searchspecs"]["application"] = extra["application"] output_table["searchspecs"]["id"] = id diff --git a/run.py b/run.py index 1dfcd3f..8831799 100755 --- a/run.py +++ b/run.py @@ -168,8 +168,6 @@ def check_server(): cabledata = jbs.get_position(str(idx)) fs = cabledata["fullspecs"] tmp1 = {"part_number": cable_list[idx], "position": idx, "name": cable_list[idx], "brand": cabledata["brand"] } - if "image" in cabledata: - tmp1["image"] = cabledata["image"] if "Product Overview" in fs and "Product Category" in fs["Product Overview"]: tmp1["short_description"] = fs["Product Overview"]["Product Category"] if "Product Overview" in fs and "Suitable Applications" in fs["Product Overview"]: @@ -194,7 +192,15 @@ def check_server(): tmp1["description"] = key else: tmp1["description"] = fs["Product Overview"]["Suitable Applications:"] - + if "image" in cabledata: + tmp1["image"] = cabledata["image"] + if "description" in cabledata: + tmp1["description"] = cabledata["description"] + if "short_description" in cabledata: + tmp1["short_description"] = cabledata["short_description"] + if "application" in cabledata: + tmp1["application"] = cabledata["application"] + tmp.append(tmp1) out = {"map": tmp} fprint(out)