Pull description from Belden API for more consistent values

2024-05-08 16:44:22 -05:00
parent a905858e3b
commit 21cb2beb67
5 changed files with 64 additions and 21 deletions
--- a/fileserver.py
+++ b/fileserver.py
@@ -7,6 +7,7 @@ def run_server(port, directory):
    Run a simple HTTP server serving files from the specified directory.
    """
    # Change the working directory to the specified directory
+    os.makedirs(directory, exist_ok=True)
    os.chdir(directory)

    # Create the HTTP server
--- a/get_specs.py
+++ b/get_specs.py
@@ -81,15 +81,18 @@ def query_search(partnum, source):
                brand = a["results"][idx]["raw"]["catalogitembrand"]
                desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
                shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
-                a = json.dumps(a["results"][idx], indent=2)
+                app = a["results"][idx]["raw"]["catalogitemapplication"]
+                
+                #a = json.dumps(a["results"][idx], indent=2)
                #print(a, urlname, img, uri, dsurl)

                out = dict()
                out["url"] = "https://www.belden.com/products/" + uri
                out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
                out["brand"] = brand
-                out["name"] = shortdesc
+                out["short_description"] = shortdesc
                out["description"] = desc
+                out["application"] = app
                out["image"] = "https://www.belden.com" + img
                out["partnum"] = name
                #print(out)
@@ -254,7 +257,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                os.remove(partnum + "/datasheet.pdf")
                sys.exit()

-        def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl):
+        def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, extra):
            fprint("Using cached datasheet for " + partnum)
            # bar.text = "Using cached datasheet for " + partnum
            # bar(skipped=True)
@@ -263,7 +266,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                fprint("Parsing Datasheet contents of " + partnum)
                # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
            
-                out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl)
+                out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
                # bar(skipped=False) 
                return out
            else:
@@ -271,13 +274,13 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                # bar.text = "Datasheet already parsed for " + partnum + ".pdf"
                # bar(skipped=True)

-        def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl):
+        def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, extra):
            fprint("Downloaded " + path)
            # bar.text = "Downloaded " + path
            # bar(skipped=False)
            fprint("Parsing Datasheet contents of " + partnum)
            # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
-            out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl)
+            out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
            # bar(skipped=False)
            return out

@@ -302,11 +305,15 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                if search_result is not False:
                    # Download high resolution part image if available and needed
                    #oldpartnum = partnum
+                    
                    partnum = search_result["partnum"]
                    returnval = [partnum, dstype, False, False]
                    output_dir = dir + partnum
                    path = output_dir + "/datasheet.pdf"
                    bartext = "Downloading files for part " + partnum
+                    os.makedirs(output_dir, exist_ok=True)
+                    with open(output_dir + "/search-result.json", 'w') as json_file:
+                        json.dump(search_result, json_file)
                    # bar.text = bartext

                    if not os.path.exists(output_dir + "/found_part_hires") or not cache:
@@ -321,20 +328,28 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):

                    # Download datasheet from provided URL if needed
                    if os.path.exists(path) and os.path.getsize(path) > 1 and cache:
-                        out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl)
+                        out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                        returnval = [partnum, dstype, True, out]

                    elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
-                        out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl)
+                        out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                        returnval = [partnum, dstype, True, out]
                
                elif os.path.exists(path) and os.path.getsize(path) > 1 and cache:
-                    out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl)
+                    search_result = {}
+                    if os.path.exists(output_dir + "/search-result.json"):
+                        with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
+                            search_result = json.load(file)
+                    out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                    returnval = [partnum, dstype, True, out]
                
                # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
                elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
-                    out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl)
+                    search_result = {}
+                    if os.path.exists(output_dir + "/search-result.json"):
+                        with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
+                            search_result = json.load(file)
+                    out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                    returnval = [partnum, dstype, False, out]

                # Failed to download with search or guess :(
@@ -346,7 +361,11 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
            # We already have a hi-res image and the datasheet - perfect!
            else:
                fprint("Using cached hi-res part image for " + partnum)
-                out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl)
+                search_result = {}
+                if os.path.exists(output_dir + "/search-result.json"):
+                    with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
+                        search_result = json.load(file)
+                out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                returnval = [partnum, dstype, False, out]
                actualpartnums.append(returnval)
                return True
--- a/2
+++ b/2
--- a/read_datasheet.py
+++ b/read_datasheet.py
@@ -89,7 +89,7 @@ def rotate_and_crop_image(path, image_name, force_rotate=False):
        # Save or display the image
        img_cropped.save(path + "/" + "thumbnail-" + image_name)  # Save the cropped image

-def parse(filename, output_dir, partnum, dstype, weburl):
+def parse(filename, output_dir, partnum, dstype, weburl, extra):
    tables = []
    # Extract table data
    try:
@@ -362,14 +362,31 @@ def parse(filename, output_dir, partnum, dstype, weburl):
    id = str(uuid.uuid4())
    output_table["id"] = id
    #output_table["position"] = id
+    if "brand" in extra:
+        output_table["brand"] = extra["brand"]
+    else:
        output_table["brand"] = dstype
+
    if img is not None:
        output_table["image"] = img
-        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, "image": img, **tables}
-        output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, "image": img, **flatten(tables)}
+        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], "image": img, **tables}
+        output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], "image": img, **flatten(tables)}
    else:
-        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, **tables}
-        output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, **flatten(tables)}
+        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], **tables}
+        output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], **flatten(tables)}
+
+    if "short_description" in extra:
+        output_table["short_description"] = extra["short_description"]
+        output_table["fullspecs"]["short_description"] = extra["short_description"]
+        output_table["searchspecs"]["short_description"] = extra["short_description"]
+    if "description" in extra:
+        output_table["description"] = extra["description"]
+        output_table["fullspecs"]["description"] = extra["description"]
+        output_table["searchspecs"]["description"] = extra["description"]
+    if "application" in extra:
+        output_table["application"] = extra["application"]
+        output_table["fullspecs"]["application"] = extra["application"]
+        output_table["searchspecs"]["application"] = extra["application"]
    
    output_table["searchspecs"]["id"] = id
    
--- a/run.py
+++ b/run.py
@@ -168,8 +168,6 @@ def check_server():
                                    cabledata = jbs.get_position(str(idx))
                                    fs = cabledata["fullspecs"]
                                    tmp1 = {"part_number": cable_list[idx], "position": idx, "name": cable_list[idx], "brand": cabledata["brand"] }
-                                    if "image" in cabledata:
-                                        tmp1["image"] = cabledata["image"]
                                    if "Product Overview" in fs and "Product Category" in fs["Product Overview"]:
                                        tmp1["short_description"] = fs["Product Overview"]["Product Category"]
                                    if "Product Overview" in fs and "Suitable Applications" in fs["Product Overview"]:
@@ -194,6 +192,14 @@ def check_server():
                                                    tmp1["description"] = key
                                        else:
                                            tmp1["description"] = fs["Product Overview"]["Suitable Applications:"]
+                                    if "image" in cabledata:
+                                        tmp1["image"] = cabledata["image"]
+                                    if "description" in cabledata:
+                                        tmp1["description"] = cabledata["description"]
+                                    if "short_description" in cabledata:
+                                        tmp1["short_description"] = cabledata["short_description"]
+                                    if "application" in cabledata:
+                                        tmp1["application"] = cabledata["application"]
                                        
                                    tmp.append(tmp1)
                            out = {"map": tmp}