Improve search & parsing algorithm, easier to source venv files
This commit is contained in:
		
							
								
								
									
										291
									
								
								get_specs.py
									
									
									
									
									
								
							
							
						
						
									
										291
									
								
								get_specs.py
									
									
									
									
									
								
							@@ -5,7 +5,7 @@ import sys
 | 
			
		||||
import read_datasheet
 | 
			
		||||
from alive_progress import alive_bar
 | 
			
		||||
import requests
 | 
			
		||||
#import time
 | 
			
		||||
import time
 | 
			
		||||
import json
 | 
			
		||||
import subprocess
 | 
			
		||||
from util import fprint
 | 
			
		||||
@@ -27,29 +27,78 @@ def check_internet(url='https://belden.com', timeout=5):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def query_search(partnum, source):
 | 
			
		||||
    """token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
 | 
			
		||||
    with requests.get(token_url) as r:
 | 
			
		||||
        out = json.loads(r.content)
 | 
			
		||||
    token = out["token"]
 | 
			
		||||
    search_url = "https://www.belden.com/coveo/rest/search"
 | 
			
		||||
    search_data ='{ "q": "' + str(partnum) + '", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en" }'
 | 
			
		||||
    #"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
 | 
			
		||||
    #fprint(search_data)
 | 
			
		||||
    fprint(json.loads(search_data))
 | 
			
		||||
    #search_data = '{ "q": "' + str(partnum) + '" }'
 | 
			
		||||
    fprint(search_data)
 | 
			
		||||
    headers = headers = {
 | 
			
		||||
        'Authorization': f'Bearer {token}',
 | 
			
		||||
        'Content-Type': 'application/json'
 | 
			
		||||
    }
 | 
			
		||||
    with requests.post(search_url, headers=headers, data=search_data) as r:
 | 
			
		||||
        fprint(r.text)"""
 | 
			
		||||
    
 | 
			
		||||
    # TODO: Reimplement in python
 | 
			
		||||
    # Bash script uses some crazy json formatting that I could not figure out
 | 
			
		||||
    # Despite the fact that I wrote it
 | 
			
		||||
    # So I'll just leave it, becuase it works.
 | 
			
		||||
    if source == "Belden":
 | 
			
		||||
        token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
 | 
			
		||||
        with requests.get(token_url) as r:
 | 
			
		||||
            out = json.loads(r.content)
 | 
			
		||||
        token = out["token"]
 | 
			
		||||
        search_url = "https://www.belden.com/coveo/rest/search"
 | 
			
		||||
 | 
			
		||||
        # Ridiculous search parameters extracted from website. Do not touch
 | 
			
		||||
        search_data = r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": "    [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
 | 
			
		||||
        search_data = search_data.replace(r"{QUERY}", partnum)
 | 
			
		||||
        #"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
 | 
			
		||||
        #fprint(search_data)
 | 
			
		||||
        #fprint(json.loads(search_data))
 | 
			
		||||
        #search_data = '{ "q": "' + str(partnum) + '" }'
 | 
			
		||||
        #fprint(search_data)
 | 
			
		||||
        headers = headers = {
 | 
			
		||||
            'Authorization': f'Bearer {token}',
 | 
			
		||||
            'Content-Type': 'application/json'
 | 
			
		||||
        }
 | 
			
		||||
        try:
 | 
			
		||||
            with requests.post(search_url, headers=headers, data=search_data) as r:
 | 
			
		||||
                a = r.text
 | 
			
		||||
                a = json.loads(a)
 | 
			
		||||
                idx = -1
 | 
			
		||||
                name = ""
 | 
			
		||||
                for partid in range(len(a["results"])):
 | 
			
		||||
                    name = a["results"][partid]["title"]
 | 
			
		||||
                    if name != partnum:
 | 
			
		||||
                        if name.find(partnum) >= 0:
 | 
			
		||||
                            idx = partid
 | 
			
		||||
                            break
 | 
			
		||||
                        elif partnum.find(name) >= 0:
 | 
			
		||||
                            idx = partid
 | 
			
		||||
                            break
 | 
			
		||||
                        
 | 
			
		||||
                    else:
 | 
			
		||||
                        idx = partid
 | 
			
		||||
                        break
 | 
			
		||||
                        
 | 
			
		||||
                if idx < 0:
 | 
			
		||||
                    fprint("Could not find part in API: " + partnum)
 | 
			
		||||
                    return False
 | 
			
		||||
                fprint("Search result found: result " + str(idx) + ", for ID " + name)
 | 
			
		||||
                #urlname = a["results"][0]["raw"]["catalogitemurlname"]
 | 
			
		||||
                img = a["results"][idx]["raw"]["catalogitemimageurl"]
 | 
			
		||||
                img = img[0:img.index("?")]
 | 
			
		||||
                uri = a["results"][idx]["raw"]["clickableuri"]
 | 
			
		||||
                dsid = a["results"][idx]["raw"]["catalogitemdatasheetid"]
 | 
			
		||||
                brand = a["results"][idx]["raw"]["catalogitembrand"]
 | 
			
		||||
                desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
 | 
			
		||||
                shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
 | 
			
		||||
                a = json.dumps(a["results"][idx], indent=2)
 | 
			
		||||
                #print(a, urlname, img, uri, dsurl)
 | 
			
		||||
 | 
			
		||||
                out = dict()
 | 
			
		||||
                out["url"] = "https://www.belden.com/products/" + uri
 | 
			
		||||
                out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
 | 
			
		||||
                out["brand"] = brand
 | 
			
		||||
                out["name"] = shortdesc
 | 
			
		||||
                out["description"] = desc
 | 
			
		||||
                out["image"] = "https://www.belden.com" + img
 | 
			
		||||
                out["partnum"] = name
 | 
			
		||||
                #print(out)
 | 
			
		||||
                return out
 | 
			
		||||
        except:
 | 
			
		||||
            print("falied to search with API. Falling back to datasheet lookup.")
 | 
			
		||||
            return False
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    # Original bash script
 | 
			
		||||
    # superceded by above
 | 
			
		||||
    if source == "Belden_shell":
 | 
			
		||||
        command = ["./query-search.sh", partnum]
 | 
			
		||||
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
 | 
			
		||||
        if result.returncode != 0: # error
 | 
			
		||||
@@ -66,27 +115,28 @@ def query_search(partnum, source):
 | 
			
		||||
        #print(data)
 | 
			
		||||
        try:
 | 
			
		||||
            if data["Count"] > 0:
 | 
			
		||||
                print(data["Results"][0]["Url"])
 | 
			
		||||
                result = data["Results"][0]
 | 
			
		||||
                if result["Url"].split("/")[-1] == partnum:
 | 
			
		||||
                    #print(partnum)
 | 
			
		||||
                    print(result["Html"])
 | 
			
		||||
                    try:
 | 
			
		||||
                        imgidx = result["Html"].index("<img src=") + 10
 | 
			
		||||
                        imgidx2 = result["Html"].index("?", imgidx)
 | 
			
		||||
                        output["image"] = result["Html"][imgidx:imgidx2]
 | 
			
		||||
                        if output["image"].index("http") != 0:
 | 
			
		||||
                            output["image"] = ""
 | 
			
		||||
                #print(data["Results"][0]["Url"])
 | 
			
		||||
                for result in data["Results"]:
 | 
			
		||||
                    if result["Url"].split("/")[-1] == partnum:
 | 
			
		||||
                        #print(partnum)
 | 
			
		||||
                        #print(result["Html"])
 | 
			
		||||
                        try:
 | 
			
		||||
                            imgidx = result["Html"].index("<img src=") + 10
 | 
			
		||||
                            imgidx2 = result["Html"].index("?", imgidx)
 | 
			
		||||
                            output["image"] = result["Html"][imgidx:imgidx2]
 | 
			
		||||
                            if output["image"].index("http") != 0:
 | 
			
		||||
                                output["image"] = ""
 | 
			
		||||
                                print("No cable image found.")
 | 
			
		||||
                        except:
 | 
			
		||||
                            print("No cable image found.")
 | 
			
		||||
                    except:
 | 
			
		||||
                        print("No cable image found.")
 | 
			
		||||
 | 
			
		||||
                    dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
 | 
			
		||||
                    dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
 | 
			
		||||
                    output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
 | 
			
		||||
                    #"test".index()
 | 
			
		||||
                    print(output)
 | 
			
		||||
                    return output
 | 
			
		||||
                        dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
 | 
			
		||||
                        dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
 | 
			
		||||
                        output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
 | 
			
		||||
                        output["partnum"] = partnum
 | 
			
		||||
                        #"test".index()
 | 
			
		||||
                        #print(output)
 | 
			
		||||
                        return output
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        except:
 | 
			
		||||
@@ -100,7 +150,7 @@ def touch(path):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_multi(partnums):
 | 
			
		||||
def get_multi(partnums, delay=0.25):
 | 
			
		||||
    with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
 | 
			
		||||
 | 
			
		||||
        def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
 | 
			
		||||
@@ -190,10 +240,17 @@ def get_multi(partnums):
 | 
			
		||||
            fprint("Using cached datasheet for " + partnum)
 | 
			
		||||
            bar.text = "Using cached datasheet for " + partnum
 | 
			
		||||
            bar(skipped=True)
 | 
			
		||||
            fprint("Parsing Datasheet contents of " + partnum)
 | 
			
		||||
            bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
 | 
			
		||||
            read_datasheet.parse(path, output_dir, partnum, dstype)
 | 
			
		||||
            bar(skipped=False)
 | 
			
		||||
            if not os.path.exists(output_dir + "/parsed"):
 | 
			
		||||
                
 | 
			
		||||
                fprint("Parsing Datasheet contents of " + partnum)
 | 
			
		||||
                bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
 | 
			
		||||
            
 | 
			
		||||
                read_datasheet.parse(path, output_dir, partnum, dstype)
 | 
			
		||||
                bar(skipped=False) 
 | 
			
		||||
            else:
 | 
			
		||||
                fprint("Datasheet already parsed for " + partnum)
 | 
			
		||||
                bar.text = "Datasheet already parsed for " + partnum + ".pdf"
 | 
			
		||||
                bar(skipped=True)
 | 
			
		||||
 | 
			
		||||
        def __downloaded_datasheet(partnum, path, output_dir, dstype):
 | 
			
		||||
            fprint("Downloaded " + path)
 | 
			
		||||
@@ -204,13 +261,7 @@ def get_multi(partnums):
 | 
			
		||||
            read_datasheet.parse(path, output_dir, partnum, dstype)
 | 
			
		||||
            bar(skipped=False)
 | 
			
		||||
 | 
			
		||||
        for fullpartnum in partnums:
 | 
			
		||||
            if fullpartnum[0:2] == "BL": # catalog.belden.com entry\
 | 
			
		||||
                partnum = fullpartnum[2:]
 | 
			
		||||
                dstype = "Belden"
 | 
			
		||||
            elif fullpartnum[0:2] == "AW":
 | 
			
		||||
                partnum = fullpartnum[2:]
 | 
			
		||||
                dstype = "Alphawire"
 | 
			
		||||
        def run_search(partnum):
 | 
			
		||||
            output_dir = "cables/" + partnum
 | 
			
		||||
            path = output_dir + "/datasheet.pdf"
 | 
			
		||||
            bartext = "Downloading files for part " + partnum
 | 
			
		||||
@@ -218,10 +269,16 @@ def get_multi(partnums):
 | 
			
		||||
            #
 | 
			
		||||
            if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
 | 
			
		||||
                # Use query
 | 
			
		||||
                search_result = query_search(partnum.replace(" ", ""), dstype)
 | 
			
		||||
                search_result = query_search(partnum, dstype)
 | 
			
		||||
                # Try to use belden.com search
 | 
			
		||||
                if search_result is not False:
 | 
			
		||||
                    # Download high resolution part image if available and needed
 | 
			
		||||
                    partnum = search_result["partnum"]
 | 
			
		||||
                    output_dir = "cables/" + partnum
 | 
			
		||||
                    path = output_dir + "/datasheet.pdf"
 | 
			
		||||
                    bartext = "Downloading files for part " + partnum
 | 
			
		||||
                    bar.text = bartext
 | 
			
		||||
 | 
			
		||||
                    if not os.path.exists(output_dir + "/found_part_hires"):
 | 
			
		||||
                        if _download_image(search_result["image"], output_dir):
 | 
			
		||||
                            fprint("Downloaded hi-res part image for " + partnum)
 | 
			
		||||
@@ -245,17 +302,48 @@ def get_multi(partnums):
 | 
			
		||||
 | 
			
		||||
                # Failed to download with search or guess :(
 | 
			
		||||
                else: 
 | 
			
		||||
                    fprint("Failed to download datasheet for part " + partnum)
 | 
			
		||||
                    bar.text = "Failed to download datasheet for part " + partnum
 | 
			
		||||
                    failed.append(partnum)
 | 
			
		||||
                    bar(skipped=True)
 | 
			
		||||
                    bar(skipped=True)
 | 
			
		||||
                    return False
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
            # We already have a hi-res image and the datasheet - perfect!
 | 
			
		||||
            else:
 | 
			
		||||
                fprint("Using cached hi-res part image for " + partnum)
 | 
			
		||||
                __use_cached_datasheet(partnum, path, output_dir, dstype)
 | 
			
		||||
                return True
 | 
			
		||||
    
 | 
			
		||||
        for fullpartnum in partnums:
 | 
			
		||||
            if fullpartnum[0:2] == "BL": # catalog.belden.com entry
 | 
			
		||||
                partnum = fullpartnum[2:]
 | 
			
		||||
                dstype = "Belden"
 | 
			
		||||
            elif fullpartnum[0:2] == "AW":
 | 
			
		||||
                partnum = fullpartnum[2:]
 | 
			
		||||
                dstype = "Alphawire"
 | 
			
		||||
            else:
 | 
			
		||||
                dstype = "Belden" # guess
 | 
			
		||||
                partnum = fullpartnum
 | 
			
		||||
            if not run_search(partnum):
 | 
			
		||||
                success = False
 | 
			
		||||
                if len(partnum.split(" ")) > 1:
 | 
			
		||||
                    for name in partnum.split(" "):
 | 
			
		||||
                        fprint("Retrying with alternate name: " + name)
 | 
			
		||||
                        if(run_search(name)):
 | 
			
		||||
                            success = True
 | 
			
		||||
                            break
 | 
			
		||||
                        time.sleep(delay)
 | 
			
		||||
                    if not success:
 | 
			
		||||
                        namestripped = partnum.strip(" ")
 | 
			
		||||
                        fprint("Retrying with alternate name: " + namestripped)
 | 
			
		||||
                        if(run_search(namestripped)):
 | 
			
		||||
                            success = True
 | 
			
		||||
                            time.sleep(delay)
 | 
			
		||||
                if not success:
 | 
			
		||||
                    fprint("Failed to download datasheet for part " + partnum)
 | 
			
		||||
                    bar.text = "Failed to download datasheet for part " + partnum
 | 
			
		||||
                    failed.append(partnum)
 | 
			
		||||
                    bar(skipped=True)
 | 
			
		||||
                    bar(skipped=True)
 | 
			
		||||
            time.sleep(delay)
 | 
			
		||||
            
 | 
			
		||||
    if len(failed) > 0:
 | 
			
		||||
        fprint("Failed to download:")
 | 
			
		||||
        for partnum in failed:
 | 
			
		||||
@@ -268,22 +356,73 @@ def get_multi(partnums):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    partnums = ["BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949", 
 | 
			
		||||
"BL10GXS13",
 | 
			
		||||
"BL10GXW12",
 | 
			
		||||
"BL10GXW13",
 | 
			
		||||
"BL2412",
 | 
			
		||||
"BL2413",
 | 
			
		||||
"BLOSP6AU",
 | 
			
		||||
"BLFI4D024P9",
 | 
			
		||||
"BLFISD012R9",
 | 
			
		||||
"BLFDSD012A9",
 | 
			
		||||
"BLFSSL024NG",
 | 
			
		||||
"BLFISX006W0",
 | 
			
		||||
"BLFISX00103",
 | 
			
		||||
"BLC6D1100007"
 | 
			
		||||
    # partnums = ["BLFISX012W0", "BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949", 
 | 
			
		||||
    # "BL10GXS13",
 | 
			
		||||
    # "BL10GXW12",
 | 
			
		||||
    # "BL10GXW13",
 | 
			
		||||
    # "BL2412",
 | 
			
		||||
    # "BL2413",
 | 
			
		||||
    # "BLOSP6AU",
 | 
			
		||||
    # "BLFI4D024P9",
 | 
			
		||||
    # "BLFISD012R9",
 | 
			
		||||
    # "BLFDSD012A9",
 | 
			
		||||
    # "BLFSSL024NG",
 | 
			
		||||
    # "BLFISX006W0",
 | 
			
		||||
    # "BLFISX00103",
 | 
			
		||||
    # "BLC6D1100007"
 | 
			
		||||
    # ]
 | 
			
		||||
    partnums = [
 | 
			
		||||
    # Actual cables in Jukebox
 | 
			
		||||
    "AW86104CY",
 | 
			
		||||
    "AW3050",
 | 
			
		||||
    "AW6714",
 | 
			
		||||
    "AW1172C",
 | 
			
		||||
    "AW2211/4",
 | 
			
		||||
 | 
			
		||||
    "BLTF-1LF-006-RS5N",
 | 
			
		||||
    "BLTF-SD9-006-RI5N",
 | 
			
		||||
    "BLTT-SLG-024-HTNN",
 | 
			
		||||
    "BLFISX012W0",
 | 
			
		||||
    "BLFI4X012W0",
 | 
			
		||||
    "BLSPE101 006Q",
 | 
			
		||||
    "BLSPE102 006Q",
 | 
			
		||||
    "BL7922A 010Q",
 | 
			
		||||
    "BL7958A 008Q",
 | 
			
		||||
    "BLIOP6U 010Q",
 | 
			
		||||
    "BL10GXW13 D15Q",
 | 
			
		||||
    "BL10GXW53 D15Q",
 | 
			
		||||
    "BL29501F 010Q",
 | 
			
		||||
    "BL29512 010Q",
 | 
			
		||||
    "BL3106A 010Q",
 | 
			
		||||
    "BL9841 060Q",
 | 
			
		||||
    "BL3105A 010Q",
 | 
			
		||||
    "BL3092A 010Q",
 | 
			
		||||
    "BL8760 060Q",
 | 
			
		||||
    "BL6300UE 008Q",
 | 
			
		||||
    "BL6300FE 009Q",
 | 
			
		||||
    "BLRA500P 006Q",
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Some ones I picked, including some invalid ones
 | 
			
		||||
    "BL10GXS12", 
 | 
			
		||||
    "BLRST 5L-RKT 5L-949", 
 | 
			
		||||
    "BL10GXS13",
 | 
			
		||||
    "BL10GXW12",
 | 
			
		||||
    "BL10GXW13",
 | 
			
		||||
    "BL2412",
 | 
			
		||||
    "BL2413",
 | 
			
		||||
    "BLOSP6AU",
 | 
			
		||||
    "BLFI4D024P9",
 | 
			
		||||
    "BLFISD012R9",
 | 
			
		||||
    "BLFDSD012A9",
 | 
			
		||||
    "BLFSSL024NG",
 | 
			
		||||
    "BLFISX006W0",
 | 
			
		||||
    "BLFISX00103",
 | 
			
		||||
    "BLC6D1100007"
 | 
			
		||||
 | 
			
		||||
    ]
 | 
			
		||||
    get_multi(partnums)
 | 
			
		||||
    #query_search("3248", "Alphawire")
 | 
			
		||||
    #query_search("86104CY", "Alphawire")
 | 
			
		||||
    get_multi(partnums, 0.25)
 | 
			
		||||
    #query_search("10GXS13", "Belden")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,8 @@
 | 
			
		||||
 | 
			
		||||
# Parse Belden catalog techdata datasheets 
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
pd.set_option('future.no_silent_downcasting', True)
 | 
			
		||||
from PyPDF2 import PdfReader
 | 
			
		||||
import camelot
 | 
			
		||||
import numpy as np
 | 
			
		||||
@@ -11,6 +13,11 @@ import json
 | 
			
		||||
from util import fprint
 | 
			
		||||
import uuid
 | 
			
		||||
from util import run_cmd
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
def touch(path):
 | 
			
		||||
    with open(path, 'a'):
 | 
			
		||||
        os.utime(path, None)
 | 
			
		||||
 | 
			
		||||
def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
 | 
			
		||||
@@ -23,6 +30,7 @@ def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
    reader = PdfReader(filename)
 | 
			
		||||
    page = reader.pages[0]
 | 
			
		||||
    table_list = {}
 | 
			
		||||
    
 | 
			
		||||
    for table in tables:
 | 
			
		||||
        table.df.infer_objects(copy=False)
 | 
			
		||||
        table.df.replace('', np.nan, inplace=True)
 | 
			
		||||
@@ -90,6 +98,7 @@ def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
 | 
			
		||||
    # Table parsing and reordring
 | 
			
		||||
    tables = dict()
 | 
			
		||||
    torename = dict()
 | 
			
		||||
    previous_table = ""
 | 
			
		||||
    for table_name in table_list.keys():
 | 
			
		||||
        # determine shape: horizontal or vertical
 | 
			
		||||
@@ -121,7 +130,8 @@ def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
        for table_name_2 in table_list.keys(): 
 | 
			
		||||
            if table_name_2.find(table.iloc[-1, 0]) >= 0:
 | 
			
		||||
                # Name taken from table directly above - this table does not have a name
 | 
			
		||||
                table_list["Specs " + str(len(tables))] = table_list.pop(table_name_2, None) # rename table to arbitrary altername name
 | 
			
		||||
                torename[table_name_2] = "Specs " + str(len(tables))
 | 
			
		||||
                #table_list["Specs " + str(len(tables))] = table_list[table_name_2] # rename table to arbitrary altername name
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
        if vertical:
 | 
			
		||||
@@ -142,21 +152,21 @@ def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
        # multi-page table check
 | 
			
		||||
        if dstype == "Belden":
 | 
			
		||||
            if table_name.isdigit() and len(tables) > 1:
 | 
			
		||||
                fprint(table_name)
 | 
			
		||||
                fprint(previous_table)
 | 
			
		||||
                #fprint(table_name)
 | 
			
		||||
                #fprint(previous_table)
 | 
			
		||||
                
 | 
			
		||||
                
 | 
			
		||||
                
 | 
			
		||||
                
 | 
			
		||||
                main_key = previous_table
 | 
			
		||||
                cont_key = table_name
 | 
			
		||||
                fprint(tables)
 | 
			
		||||
                #fprint(tables)
 | 
			
		||||
                if vertical == False:
 | 
			
		||||
                    main_keys = list(tables[main_key].keys())
 | 
			
		||||
                    for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
 | 
			
		||||
                        if i < len(main_keys):
 | 
			
		||||
                            fprint(tables[main_key][main_keys[i]])
 | 
			
		||||
                            tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
 | 
			
		||||
                            #fprint(tables[main_key][main_keys[i]])
 | 
			
		||||
                            tables[main_key][main_keys[i]] = (tuple(tables[main_key][main_keys[i]]) + (cont_key,) + cont_values)
 | 
			
		||||
    
 | 
			
		||||
                    del tables[table_name]
 | 
			
		||||
    
 | 
			
		||||
@@ -167,6 +177,10 @@ def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
 | 
			
		||||
        previous_table = table_name
 | 
			
		||||
    
 | 
			
		||||
    # remove renamed tables
 | 
			
		||||
    for table_name in torename.keys():
 | 
			
		||||
        tables[torename[table_name]] = tables[table_name]
 | 
			
		||||
        del tables[table_name]
 | 
			
		||||
    # remove multi-line values that occasionally squeak through
 | 
			
		||||
    def replace_newlines_in_dict(d):
 | 
			
		||||
        for key, value in d.items():
 | 
			
		||||
@@ -195,12 +209,12 @@ def parse(filename, output_dir, partnum, dstype):
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    print(output_table)
 | 
			
		||||
    #print(output_table)
 | 
			
		||||
 | 
			
		||||
    run_cmd("rm " + output_dir + "/*.json") # not reliable!
 | 
			
		||||
    run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
 | 
			
		||||
    with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
 | 
			
		||||
        json.dump(output_table["searchspecs"], json_file)
 | 
			
		||||
 | 
			
		||||
    touch(output_dir + "/parsed")
 | 
			
		||||
    return output_table
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -217,7 +231,7 @@ def flatten(tables):
 | 
			
		||||
                # If it fails again, return the original string.
 | 
			
		||||
                return s
 | 
			
		||||
    out = dict()
 | 
			
		||||
    print("{")
 | 
			
		||||
    #print("{")
 | 
			
		||||
    for table in tables.keys():
 | 
			
		||||
        for key in tables[table].keys():
 | 
			
		||||
            if len(key) < 64:
 | 
			
		||||
@@ -228,13 +242,19 @@ def flatten(tables):
 | 
			
		||||
            fullkeyname = (table + ": " + keyname).replace(".","")
 | 
			
		||||
            if type(tables[table][key]) is not tuple:
 | 
			
		||||
                out[fullkeyname] = convert_to_number(tables[table][key])
 | 
			
		||||
                print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
 | 
			
		||||
                #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
 | 
			
		||||
            elif len(tables[table][key]) == 1:
 | 
			
		||||
                out[fullkeyname] = convert_to_number(tables[table][key][0])
 | 
			
		||||
                
 | 
			
		||||
                print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
 | 
			
		||||
                #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
 | 
			
		||||
 | 
			
		||||
    print("}")
 | 
			
		||||
            # if the item has at least two commas in it, split it
 | 
			
		||||
            if tables[table][key].count(',') > 0:
 | 
			
		||||
                out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(",")))
 | 
			
		||||
                #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    #print("}")
 | 
			
		||||
    return out
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								run.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								run.py
									
									
									
									
									
								
							@@ -139,6 +139,8 @@ def start_server_socket():
 | 
			
		||||
 | 
			
		||||
                    # TODO Helper for converting Python Dictionaries to JSON
 | 
			
		||||
                    # make function: pythonData --> { { "type": "...", "call": "...", "data": pythonData } }
 | 
			
		||||
                            
 | 
			
		||||
                    # to send: to_server_queue.put(("*", "JSON STRING HERE")) # replace * with UUID of client to send to one specific location
 | 
			
		||||
                    
 | 
			
		||||
                    case "cable_details":
 | 
			
		||||
                        fprint("cable_details message")
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								source.fish
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								source.fish
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
source venv/bin/activate.fish
 | 
			
		||||
							
								
								
									
										106
									
								
								test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								test.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,106 @@
 | 
			
		||||
print("\u001b[37m")
 | 
			
		||||
 | 
			
		||||
class Ring:
 | 
			
		||||
    def __init__(self) -> None:
 | 
			
		||||
        self.leds = [0] * 24
 | 
			
		||||
        self.id = 0
 | 
			
		||||
        self.dirty = False
 | 
			
		||||
 | 
			
		||||
    def __iter__(self) -> iter:
 | 
			
		||||
        yield from self.leds
 | 
			
		||||
 | 
			
		||||
    def __repr__(self) -> str:
 | 
			
		||||
        return f"Ring<id={self.id}, led_state={' '.join(list(map(lambda x: str(x+1), self.leds)))}, dirty={self.dirty}>"
 | 
			
		||||
 | 
			
		||||
    def __add__(self, other):
 | 
			
		||||
        self.leds.extend(other)
 | 
			
		||||
        return self
 | 
			
		||||
 | 
			
		||||
    def __bool__(self):
 | 
			
		||||
        return self.dirty
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, index):
 | 
			
		||||
        return self.leds[index]
 | 
			
		||||
    
 | 
			
		||||
    def __setitem__(self, index, value):
 | 
			
		||||
        ivalue = self.leds[index]
 | 
			
		||||
        if ivalue != value:
 | 
			
		||||
            self.dirty = True
 | 
			
		||||
            self.leds[index] = value
 | 
			
		||||
    
 | 
			
		||||
    def __getattr__(self, name):
 | 
			
		||||
        import word2num
 | 
			
		||||
        name = int(word2num.word2num(name))
 | 
			
		||||
        print(name)
 | 
			
		||||
        if 0 <= name < len(self.leds):
 | 
			
		||||
            return self.leds[name]
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
a = Ring()
 | 
			
		||||
print(a)
 | 
			
		||||
b = Ring()
 | 
			
		||||
b.leds[2] = 3
 | 
			
		||||
 | 
			
		||||
print(a + b)
 | 
			
		||||
 | 
			
		||||
b.active = True
 | 
			
		||||
 | 
			
		||||
if b:
 | 
			
		||||
    print("Bexist")
 | 
			
		||||
 | 
			
		||||
c = [a, b, b, a, a]
 | 
			
		||||
 | 
			
		||||
d = list(filter(lambda x: bool(x), c))
 | 
			
		||||
 | 
			
		||||
print(d)
 | 
			
		||||
 | 
			
		||||
for i, ring in enumerate(c):
 | 
			
		||||
    ring[0] = i
 | 
			
		||||
    print(ring)
 | 
			
		||||
 | 
			
		||||
print(a, b)
 | 
			
		||||
 | 
			
		||||
print(f"\u001b[32m{a}")
 | 
			
		||||
print(f"\u001b[37ma")
 | 
			
		||||
 | 
			
		||||
print(getattr(a, "twenty two"))
 | 
			
		||||
 | 
			
		||||
# eval(f"getattr(a,\"{input()}\")")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# a = r"wow this string is cursed; for example \n"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# SEARCHDATA=r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": "    [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
 | 
			
		||||
 | 
			
		||||
# QUERY = "AAAAAAAAAAAA"
 | 
			
		||||
 | 
			
		||||
# b = SEARCHDATA.replace(r"{QUERY}", QUERY)
 | 
			
		||||
 | 
			
		||||
q = [i * 2 for i in range(10)]
 | 
			
		||||
 | 
			
		||||
d = {a : b for a,b in enumerate(q)}
 | 
			
		||||
 | 
			
		||||
print(q)
 | 
			
		||||
 | 
			
		||||
print(d)
 | 
			
		||||
 | 
			
		||||
def stalin_sort(a):
 | 
			
		||||
    b = sum(a)
 | 
			
		||||
    b /= len(a)
 | 
			
		||||
    return [b for _ in range(len(a))]
 | 
			
		||||
 | 
			
		||||
def mao_sort(a):
 | 
			
		||||
    i = 0
 | 
			
		||||
    while i < len(a) - 1:
 | 
			
		||||
        if a[i+1] < a[i]:
 | 
			
		||||
            del a[i]
 | 
			
		||||
        else:
 | 
			
		||||
            i += 1
 | 
			
		||||
    return a
 | 
			
		||||
 | 
			
		||||
print(stalin_sort(list(range(10))))
 | 
			
		||||
print(mao_sort([1, 3, 2, 4, 5, 8, 7, 6, 9]))
 | 
			
		||||
 | 
			
		||||
# i l
 | 
			
		||||
		Reference in New Issue
	
	Block a user