From 21cb2beb674f77a465efa703158583272c9d5c65 Mon Sep 17 00:00:00 2001
From: Cole Deck <cdeck@hawk.iit.edu>
Date: Wed, 8 May 2024 16:44:22 -0500
Subject: [PATCH] Pull description from Belden API for more consistent values

---
 fileserver.py     |  1 +
 get_specs.py      | 41 ++++++++++++++++++++++++++++++-----------
 jukebox-web       |  2 +-
 read_datasheet.py | 29 +++++++++++++++++++++++------
 run.py            | 12 +++++++++---
 5 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/fileserver.py b/fileserver.py
index 9e222b4..9a29944 100644
--- a/fileserver.py
+++ b/fileserver.py
@@ -7,6 +7,7 @@ def run_server(port, directory):
     Run a simple HTTP server serving files from the specified directory.
     """
     # Change the working directory to the specified directory
+    os.makedirs(directory, exist_ok=True)
     os.chdir(directory)
 
     # Create the HTTP server
diff --git a/get_specs.py b/get_specs.py
index 834a6c7..b1c86f3 100755
--- a/get_specs.py
+++ b/get_specs.py
@@ -81,15 +81,18 @@ def query_search(partnum, source):
                 brand = a["results"][idx]["raw"]["catalogitembrand"]
                 desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
                 shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
-                a = json.dumps(a["results"][idx], indent=2)
+                app = a["results"][idx]["raw"]["catalogitemapplication"]
+                
+                #a = json.dumps(a["results"][idx], indent=2)
                 #print(a, urlname, img, uri, dsurl)
 
                 out = dict()
                 out["url"] = "https://www.belden.com/products/" + uri
                 out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
                 out["brand"] = brand
-                out["name"] = shortdesc
+                out["short_description"] = shortdesc
                 out["description"] = desc
+                out["application"] = app
                 out["image"] = "https://www.belden.com" + img
                 out["partnum"] = name
                 #print(out)
@@ -254,7 +257,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                 os.remove(partnum + "/datasheet.pdf")
                 sys.exit()
 
-        def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl):
+        def __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, extra):
             fprint("Using cached datasheet for " + partnum)
             # bar.text = "Using cached datasheet for " + partnum
             # bar(skipped=True)
@@ -263,7 +266,7 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                 fprint("Parsing Datasheet contents of " + partnum)
                 # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
             
-                out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl)
+                out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
                 # bar(skipped=False) 
                 return out
             else:
@@ -271,13 +274,13 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                 # bar.text = "Datasheet already parsed for " + partnum + ".pdf"
                 # bar(skipped=True)
 
-        def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl):
+        def __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, extra):
             fprint("Downloaded " + path)
             # bar.text = "Downloaded " + path
             # bar(skipped=False)
             fprint("Parsing Datasheet contents of " + partnum)
             # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
-            out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl)
+            out = read_datasheet.parse(path, output_dir, partnum, dstype, weburl, extra)
             # bar(skipped=False)
             return out
 
@@ -302,11 +305,15 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
                 if search_result is not False:
                     # Download high resolution part image if available and needed
                     #oldpartnum = partnum
+                    
                     partnum = search_result["partnum"]
                     returnval = [partnum, dstype, False, False]
                     output_dir = dir + partnum
                     path = output_dir + "/datasheet.pdf"
                     bartext = "Downloading files for part " + partnum
+                    os.makedirs(output_dir, exist_ok=True)
+                    with open(output_dir + "/search-result.json", 'w') as json_file:
+                        json.dump(search_result, json_file)
                     # bar.text = bartext
 
                     if not os.path.exists(output_dir + "/found_part_hires") or not cache:
@@ -321,20 +328,28 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
 
                     # Download datasheet from provided URL if needed
                     if os.path.exists(path) and os.path.getsize(path) > 1 and cache:
-                        out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl)
+                        out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                         returnval = [partnum, dstype, True, out]
 
                     elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
-                        out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl)
+                        out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                         returnval = [partnum, dstype, True, out]
                 
                 elif os.path.exists(path) and os.path.getsize(path) > 1 and cache:
-                    out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl)
+                    search_result = {}
+                    if os.path.exists(output_dir + "/search-result.json"):
+                        with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
+                            search_result = json.load(file)
+                    out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                     returnval = [partnum, dstype, True, out]
                 
                 # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
                 elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
-                    out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl)
+                    search_result = {}
+                    if os.path.exists(output_dir + "/search-result.json"):
+                        with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
+                            search_result = json.load(file)
+                    out = __downloaded_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                     returnval = [partnum, dstype, False, out]
 
                 # Failed to download with search or guess :(
@@ -346,7 +361,11 @@ def get_multi(partnums, delay, dir, webport, cache=True, bar=None):
             # We already have a hi-res image and the datasheet - perfect!
             else:
                 fprint("Using cached hi-res part image for " + partnum)
-                out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl)
+                search_result = {}
+                if os.path.exists(output_dir + "/search-result.json"):
+                    with open(output_dir + "/search-result.json", 'r', encoding='utf-8') as file:
+                        search_result = json.load(file)
+                out = __use_cached_datasheet(partnum, path, output_dir, dstype, weburl, search_result)
                 returnval = [partnum, dstype, False, out]
                 actualpartnums.append(returnval)
                 return True
diff --git a/jukebox-web b/jukebox-web
index 4cea1b0..e55c7b5 160000
--- a/jukebox-web
+++ b/jukebox-web
@@ -1 +1 @@
-Subproject commit 4cea1b08329761fa5485686e0ff0871730f73820
+Subproject commit e55c7b5b5af29ff11b037fdfc40321d422859f5c
diff --git a/read_datasheet.py b/read_datasheet.py
index 53d3d30..79dc601 100755
--- a/read_datasheet.py
+++ b/read_datasheet.py
@@ -89,7 +89,7 @@ def rotate_and_crop_image(path, image_name, force_rotate=False):
         # Save or display the image
         img_cropped.save(path + "/" + "thumbnail-" + image_name)  # Save the cropped image
 
-def parse(filename, output_dir, partnum, dstype, weburl):
+def parse(filename, output_dir, partnum, dstype, weburl, extra):
     tables = []
     # Extract table data
     try:
@@ -362,14 +362,31 @@ def parse(filename, output_dir, partnum, dstype, weburl):
     id = str(uuid.uuid4())
     output_table["id"] = id
     #output_table["position"] = id
-    output_table["brand"] = dstype
+    if "brand" in extra:
+        output_table["brand"] = extra["brand"]
+    else:
+        output_table["brand"] = dstype
+
     if img is not None:
         output_table["image"] = img
-        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, "image": img, **tables}
-        output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, "image": img, **flatten(tables)}
+        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], "image": img, **tables}
+        output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], "image": img, **flatten(tables)}
     else:
-        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": dstype, **tables}
-        output_table["searchspecs"] = {"partnum": partnum, "brand": dstype, **flatten(tables)}
+        output_table["fullspecs"] = {"partnum": partnum, "id": id, "brand": output_table["brand"], **tables}
+        output_table["searchspecs"] = {"partnum": partnum, "brand": output_table["brand"], **flatten(tables)}
+
+    if "short_description" in extra:
+        output_table["short_description"] = extra["short_description"]
+        output_table["fullspecs"]["short_description"] = extra["short_description"]
+        output_table["searchspecs"]["short_description"] = extra["short_description"]
+    if "description" in extra:
+        output_table["description"] = extra["description"]
+        output_table["fullspecs"]["description"] = extra["description"]
+        output_table["searchspecs"]["description"] = extra["description"]
+    if "application" in extra:
+        output_table["application"] = extra["application"]
+        output_table["fullspecs"]["application"] = extra["application"]
+        output_table["searchspecs"]["application"] = extra["application"]
     
     output_table["searchspecs"]["id"] = id
     
diff --git a/run.py b/run.py
index 1dfcd3f..8831799 100755
--- a/run.py
+++ b/run.py
@@ -168,8 +168,6 @@ def check_server():
                                     cabledata = jbs.get_position(str(idx))
                                     fs = cabledata["fullspecs"]
                                     tmp1 = {"part_number": cable_list[idx], "position": idx, "name": cable_list[idx], "brand": cabledata["brand"] }
-                                    if "image" in cabledata:
-                                        tmp1["image"] = cabledata["image"]
                                     if "Product Overview" in fs and "Product Category" in fs["Product Overview"]:
                                         tmp1["short_description"] = fs["Product Overview"]["Product Category"]
                                     if "Product Overview" in fs and "Suitable Applications" in fs["Product Overview"]:
@@ -194,7 +192,15 @@ def check_server():
                                                     tmp1["description"] = key
                                         else:
                                             tmp1["description"] = fs["Product Overview"]["Suitable Applications:"]
-                                    
+                                    if "image" in cabledata:
+                                        tmp1["image"] = cabledata["image"]
+                                    if "description" in cabledata:
+                                        tmp1["description"] = cabledata["description"]
+                                    if "short_description" in cabledata:
+                                        tmp1["short_description"] = cabledata["short_description"]
+                                    if "application" in cabledata:
+                                        tmp1["application"] = cabledata["application"]
+                                        
                                     tmp.append(tmp1)
                             out = {"map": tmp}
                             fprint(out)