Add Alphawire datasheet fallback

2024-03-14 22:06:13 -05:00
parent 39723ec442
commit fc2af34450
2 changed files with 40 additions and 26 deletions
--- a/get_specs.py
+++ b/get_specs.py
@ -27,6 +27,7 @@ def check_internet(url='https://belden.com', timeout=5):
 def query_search(partnum, source):
    fprint("Searching for " + partnum)
    if source == "Belden":
        token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
        with requests.get(token_url) as r:
@ -69,7 +70,7 @@ def query_search(partnum, source):
                if idx < 0:
                    fprint("Could not find part in API: " + partnum)
                    return False
-                fprint("Search result found: result " + str(idx) + ", for ID " + name)
+                #fprint("Search result found: result " + str(idx) + ", for ID " + name)
                #urlname = a["results"][0]["raw"]["catalogitemurlname"]
                img = a["results"][idx]["raw"]["catalogitemimageurl"]
                img = img[0:img.index("?")]
@ -92,7 +93,7 @@ def query_search(partnum, source):
                #print(out)
                return out
        except:
-            print("falied to search with API. Falling back to datasheet lookup.")
+            print("Failed to search with API. Falling back to datasheet lookup.")
            return False
@ -118,7 +119,7 @@ def query_search(partnum, source):
            if data["Count"] > 0:
                #print(data["Results"][0]["Url"])
                for result in data["Results"]:
-                    #print(result["Url"].split("/")[-1], partnum.replace("-", "").replace("/", "_"))
+                    #print(result["Url"])
                    if result["Url"].split("/")[-1] == partnum.replace("-", "").replace("/", "_"):
                        #print(partnum)
                        #print(result["Html"])
@ -137,12 +138,15 @@ def query_search(partnum, source):
                        output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
                        output["partnum"] = partnum.replace("-", "").replace("/", "_")
                        #"test".index()
-                        #print(output)
+                        print(output)
                        return output
        except:
            print("Failed to search with API. Falling back to datasheet lookup.")
            return False
        print("Failed to search with API. Falling back to datasheet lookup.")
        return False
@ -155,11 +159,15 @@ def touch(path):
 def get_multi(partnums, delay=0.25):
    with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
-        def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
+        def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
            global bartext
            if dstype == "Belden":
                sanitized_name = partnum.replace(" ", "")
                url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
            elif dstype == "Alphawire":
                # Alphawire Datasheet URLs do not use a sanitized part number (but product pages do)
                url = "https://www.alphawire.com/disteAPI/SpecPDF/DownloadProductSpecPdf?productPartNumber=" + partnum
            #fprint(url)
            try:
                with requests.get(url, stream=True) as r:
@ -267,17 +275,23 @@ def get_multi(partnums, delay=0.25):
            bar(skipped=False)
        def run_search(partnum):
            oldpartnum = partnum
            if dstype == "Alphawire":
                # For alphawire, sanitize the part number for only the final result check, because their API is very wierd
                # For the actual search, it must be un-sanitized
                partnum = partnum.replace("-", "").replace("/","_")
            output_dir = "cables/" + partnum
            path = output_dir + "/datasheet.pdf"
            bartext = "Downloading files for part " + partnum
            bar.text = bartext
-            #
+            partnum = oldpartnum
            if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
                # Use query
                search_result = query_search(partnum, dstype)
                # Try to use belden.com search
                if search_result is not False:
                    # Download high resolution part image if available and needed
                    #oldpartnum = partnum
                    partnum = search_result["partnum"]
                    output_dir = "cables/" + partnum
                    path = output_dir + "/datasheet.pdf"
@ -302,7 +316,7 @@ def get_multi(partnums, delay=0.25):
                    __use_cached_datasheet(partnum, path, output_dir, dstype)
                # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
-                elif _try_download_datasheet(partnum, output_dir) is not False:
+                elif _try_download_datasheet(partnum, output_dir, dstype) is not False:
                    __downloaded_datasheet(partnum, path, output_dir, dstype)
                # Failed to download with search or guess :(
--- a/read_datasheet.py
+++ b/read_datasheet.py
@ -112,14 +112,14 @@ def parse(filename, output_dir, partnum, dstype):
    tables = dict()
    torename = dict()
    previous_table = ""
-    print(table_list.keys())
+    #print(table_list.keys())
    for table_name in table_list.keys():
        # determine shape: horizontal or vertical
        table = table_list[table_name]
        rows = table.shape[0]
        cols = table.shape[1]
        vertical = None
-        print(rows, cols, table_name)
+        #print(rows, cols, table_name)
        if rows > 2 and cols == 2:
            vertical = True
        elif cols == 1 and rows > 1:
@ -145,7 +145,7 @@ def parse(filename, output_dir, partnum, dstype):
            vertical = False
        else: # 1 column, <= 2 rows
            vertical = False
-        print(vertical)
+        #print(vertical)
        # missing name check
        for table_name_2 in table_list.keys(): 
            if dstype == "Alphawire" and table_name_2.find("\n") >= 0:
@ -182,11 +182,11 @@ def parse(filename, output_dir, partnum, dstype):
            if prevtbl.cells[-1][0].lb[1] < 50 and thistbl.cells[0][0].lt[1] > 600:
                # wraparound
-                print("WRAP")
+                #print("WRAP")
-                print("PREV TABLE", prevtbl.df)
+                #print("PREV TABLE", prevtbl.df)
-                print("THIS TABLE", thistbl.df)
+                #print("THIS TABLE", thistbl.df)
-                print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
+                #print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
-                print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
+                #print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
                main_key = previous_table
                cont_key = table_name
                #print(vertical)
@ -200,21 +200,21 @@ def parse(filename, output_dir, partnum, dstype):
                    del tables[table_name]
                else:
-                    print(tables[cont_key].keys())
+                    #print(tables[cont_key].keys())
                    for key in tables[cont_key].keys():
-                        print(main_key, key, cont_key, key)
+                        #print(main_key, key, cont_key, key)
                        tables[main_key][key] = tables[cont_key][key]
                    del tables[table_name]
            elif thistbl.cells[0][0].lt[1] > 600:
                # name on previous page (grrrr)
-                print("NAMEABOVE")
+                #print("NAMEABOVE")
-                print("PREV TABLE", prevtbl.df)
+                #print("PREV TABLE", prevtbl.df)
-                print("THIS TABLE", thistbl.df)
+                #print("THIS TABLE", thistbl.df)
-                print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
+                #print("PREV TABLE CORNER", prevtbl.cells[-1][0].lb[1])
-                print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
+                #print("THIS TABLE CORNER", thistbl.cells[0][0].lt[1])
                name = extract_table_name(50, prevtbl.page,reader,dstype,table_name).strip("\n").strip()
-                print("FOUND NAME:", name)
+                #print("FOUND NAME:", name)
                torename[table_name] = name
@ -287,7 +287,7 @@ def parse(filename, output_dir, partnum, dstype):
    with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file:
        json.dump(output_table["fullspecs"], json_file)
-    print(json.dumps(output_table, indent=2))
+    #print(json.dumps(output_table, indent=2))
    touch(output_dir + "/parsed") # mark as parsed
    return output_table