Improve search & parsing algorithm, easier to source venv files
This commit is contained in:
parent
1a07501d53
commit
dd0ac46662
291
get_specs.py
291
get_specs.py
@ -5,7 +5,7 @@ import sys
|
||||
import read_datasheet
|
||||
from alive_progress import alive_bar
|
||||
import requests
|
||||
#import time
|
||||
import time
|
||||
import json
|
||||
import subprocess
|
||||
from util import fprint
|
||||
@ -27,29 +27,78 @@ def check_internet(url='https://belden.com', timeout=5):
|
||||
|
||||
|
||||
def query_search(partnum, source):
|
||||
"""token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
|
||||
with requests.get(token_url) as r:
|
||||
out = json.loads(r.content)
|
||||
token = out["token"]
|
||||
search_url = "https://www.belden.com/coveo/rest/search"
|
||||
search_data ='{ "q": "' + str(partnum) + '", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en" }'
|
||||
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
|
||||
#fprint(search_data)
|
||||
fprint(json.loads(search_data))
|
||||
#search_data = '{ "q": "' + str(partnum) + '" }'
|
||||
fprint(search_data)
|
||||
headers = headers = {
|
||||
'Authorization': f'Bearer {token}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
with requests.post(search_url, headers=headers, data=search_data) as r:
|
||||
fprint(r.text)"""
|
||||
|
||||
# TODO: Reimplement in python
|
||||
# Bash script uses some crazy json formatting that I could not figure out
|
||||
# Despite the fact that I wrote it
|
||||
# So I'll just leave it, becuase it works.
|
||||
if source == "Belden":
|
||||
token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
|
||||
with requests.get(token_url) as r:
|
||||
out = json.loads(r.content)
|
||||
token = out["token"]
|
||||
search_url = "https://www.belden.com/coveo/rest/search"
|
||||
|
||||
# Ridiculous search parameters extracted from website. Do not touch
|
||||
search_data = r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
|
||||
search_data = search_data.replace(r"{QUERY}", partnum)
|
||||
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
|
||||
#fprint(search_data)
|
||||
#fprint(json.loads(search_data))
|
||||
#search_data = '{ "q": "' + str(partnum) + '" }'
|
||||
#fprint(search_data)
|
||||
headers = headers = {
|
||||
'Authorization': f'Bearer {token}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
try:
|
||||
with requests.post(search_url, headers=headers, data=search_data) as r:
|
||||
a = r.text
|
||||
a = json.loads(a)
|
||||
idx = -1
|
||||
name = ""
|
||||
for partid in range(len(a["results"])):
|
||||
name = a["results"][partid]["title"]
|
||||
if name != partnum:
|
||||
if name.find(partnum) >= 0:
|
||||
idx = partid
|
||||
break
|
||||
elif partnum.find(name) >= 0:
|
||||
idx = partid
|
||||
break
|
||||
|
||||
else:
|
||||
idx = partid
|
||||
break
|
||||
|
||||
if idx < 0:
|
||||
fprint("Could not find part in API: " + partnum)
|
||||
return False
|
||||
fprint("Search result found: result " + str(idx) + ", for ID " + name)
|
||||
#urlname = a["results"][0]["raw"]["catalogitemurlname"]
|
||||
img = a["results"][idx]["raw"]["catalogitemimageurl"]
|
||||
img = img[0:img.index("?")]
|
||||
uri = a["results"][idx]["raw"]["clickableuri"]
|
||||
dsid = a["results"][idx]["raw"]["catalogitemdatasheetid"]
|
||||
brand = a["results"][idx]["raw"]["catalogitembrand"]
|
||||
desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
|
||||
shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
|
||||
a = json.dumps(a["results"][idx], indent=2)
|
||||
#print(a, urlname, img, uri, dsurl)
|
||||
|
||||
out = dict()
|
||||
out["url"] = "https://www.belden.com/products/" + uri
|
||||
out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
|
||||
out["brand"] = brand
|
||||
out["name"] = shortdesc
|
||||
out["description"] = desc
|
||||
out["image"] = "https://www.belden.com" + img
|
||||
out["partnum"] = name
|
||||
#print(out)
|
||||
return out
|
||||
except:
|
||||
print("falied to search with API. Falling back to datasheet lookup.")
|
||||
return False
|
||||
|
||||
|
||||
# Original bash script
|
||||
# superceded by above
|
||||
if source == "Belden_shell":
|
||||
command = ["./query-search.sh", partnum]
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
if result.returncode != 0: # error
|
||||
@ -66,27 +115,28 @@ def query_search(partnum, source):
|
||||
#print(data)
|
||||
try:
|
||||
if data["Count"] > 0:
|
||||
print(data["Results"][0]["Url"])
|
||||
result = data["Results"][0]
|
||||
if result["Url"].split("/")[-1] == partnum:
|
||||
#print(partnum)
|
||||
print(result["Html"])
|
||||
try:
|
||||
imgidx = result["Html"].index("<img src=") + 10
|
||||
imgidx2 = result["Html"].index("?", imgidx)
|
||||
output["image"] = result["Html"][imgidx:imgidx2]
|
||||
if output["image"].index("http") != 0:
|
||||
output["image"] = ""
|
||||
#print(data["Results"][0]["Url"])
|
||||
for result in data["Results"]:
|
||||
if result["Url"].split("/")[-1] == partnum:
|
||||
#print(partnum)
|
||||
#print(result["Html"])
|
||||
try:
|
||||
imgidx = result["Html"].index("<img src=") + 10
|
||||
imgidx2 = result["Html"].index("?", imgidx)
|
||||
output["image"] = result["Html"][imgidx:imgidx2]
|
||||
if output["image"].index("http") != 0:
|
||||
output["image"] = ""
|
||||
print("No cable image found.")
|
||||
except:
|
||||
print("No cable image found.")
|
||||
except:
|
||||
print("No cable image found.")
|
||||
|
||||
dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
|
||||
dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
|
||||
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
|
||||
#"test".index()
|
||||
print(output)
|
||||
return output
|
||||
dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
|
||||
dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
|
||||
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
|
||||
output["partnum"] = partnum
|
||||
#"test".index()
|
||||
#print(output)
|
||||
return output
|
||||
|
||||
|
||||
except:
|
||||
@ -100,7 +150,7 @@ def touch(path):
|
||||
|
||||
|
||||
|
||||
def get_multi(partnums):
|
||||
def get_multi(partnums, delay=0.25):
|
||||
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
|
||||
|
||||
def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
|
||||
@ -190,10 +240,17 @@ def get_multi(partnums):
|
||||
fprint("Using cached datasheet for " + partnum)
|
||||
bar.text = "Using cached datasheet for " + partnum
|
||||
bar(skipped=True)
|
||||
fprint("Parsing Datasheet contents of " + partnum)
|
||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||
read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
bar(skipped=False)
|
||||
if not os.path.exists(output_dir + "/parsed"):
|
||||
|
||||
fprint("Parsing Datasheet contents of " + partnum)
|
||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||
|
||||
read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
bar(skipped=False)
|
||||
else:
|
||||
fprint("Datasheet already parsed for " + partnum)
|
||||
bar.text = "Datasheet already parsed for " + partnum + ".pdf"
|
||||
bar(skipped=True)
|
||||
|
||||
def __downloaded_datasheet(partnum, path, output_dir, dstype):
|
||||
fprint("Downloaded " + path)
|
||||
@ -204,13 +261,7 @@ def get_multi(partnums):
|
||||
read_datasheet.parse(path, output_dir, partnum, dstype)
|
||||
bar(skipped=False)
|
||||
|
||||
for fullpartnum in partnums:
|
||||
if fullpartnum[0:2] == "BL": # catalog.belden.com entry\
|
||||
partnum = fullpartnum[2:]
|
||||
dstype = "Belden"
|
||||
elif fullpartnum[0:2] == "AW":
|
||||
partnum = fullpartnum[2:]
|
||||
dstype = "Alphawire"
|
||||
def run_search(partnum):
|
||||
output_dir = "cables/" + partnum
|
||||
path = output_dir + "/datasheet.pdf"
|
||||
bartext = "Downloading files for part " + partnum
|
||||
@ -218,10 +269,16 @@ def get_multi(partnums):
|
||||
#
|
||||
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
|
||||
# Use query
|
||||
search_result = query_search(partnum.replace(" ", ""), dstype)
|
||||
search_result = query_search(partnum, dstype)
|
||||
# Try to use belden.com search
|
||||
if search_result is not False:
|
||||
# Download high resolution part image if available and needed
|
||||
partnum = search_result["partnum"]
|
||||
output_dir = "cables/" + partnum
|
||||
path = output_dir + "/datasheet.pdf"
|
||||
bartext = "Downloading files for part " + partnum
|
||||
bar.text = bartext
|
||||
|
||||
if not os.path.exists(output_dir + "/found_part_hires"):
|
||||
if _download_image(search_result["image"], output_dir):
|
||||
fprint("Downloaded hi-res part image for " + partnum)
|
||||
@ -245,17 +302,48 @@ def get_multi(partnums):
|
||||
|
||||
# Failed to download with search or guess :(
|
||||
else:
|
||||
fprint("Failed to download datasheet for part " + partnum)
|
||||
bar.text = "Failed to download datasheet for part " + partnum
|
||||
failed.append(partnum)
|
||||
bar(skipped=True)
|
||||
bar(skipped=True)
|
||||
return False
|
||||
return True
|
||||
|
||||
# We already have a hi-res image and the datasheet - perfect!
|
||||
else:
|
||||
fprint("Using cached hi-res part image for " + partnum)
|
||||
__use_cached_datasheet(partnum, path, output_dir, dstype)
|
||||
return True
|
||||
|
||||
for fullpartnum in partnums:
|
||||
if fullpartnum[0:2] == "BL": # catalog.belden.com entry
|
||||
partnum = fullpartnum[2:]
|
||||
dstype = "Belden"
|
||||
elif fullpartnum[0:2] == "AW":
|
||||
partnum = fullpartnum[2:]
|
||||
dstype = "Alphawire"
|
||||
else:
|
||||
dstype = "Belden" # guess
|
||||
partnum = fullpartnum
|
||||
if not run_search(partnum):
|
||||
success = False
|
||||
if len(partnum.split(" ")) > 1:
|
||||
for name in partnum.split(" "):
|
||||
fprint("Retrying with alternate name: " + name)
|
||||
if(run_search(name)):
|
||||
success = True
|
||||
break
|
||||
time.sleep(delay)
|
||||
if not success:
|
||||
namestripped = partnum.strip(" ")
|
||||
fprint("Retrying with alternate name: " + namestripped)
|
||||
if(run_search(namestripped)):
|
||||
success = True
|
||||
time.sleep(delay)
|
||||
if not success:
|
||||
fprint("Failed to download datasheet for part " + partnum)
|
||||
bar.text = "Failed to download datasheet for part " + partnum
|
||||
failed.append(partnum)
|
||||
bar(skipped=True)
|
||||
bar(skipped=True)
|
||||
time.sleep(delay)
|
||||
|
||||
if len(failed) > 0:
|
||||
fprint("Failed to download:")
|
||||
for partnum in failed:
|
||||
@ -268,22 +356,73 @@ def get_multi(partnums):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
partnums = ["BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949",
|
||||
"BL10GXS13",
|
||||
"BL10GXW12",
|
||||
"BL10GXW13",
|
||||
"BL2412",
|
||||
"BL2413",
|
||||
"BLOSP6AU",
|
||||
"BLFI4D024P9",
|
||||
"BLFISD012R9",
|
||||
"BLFDSD012A9",
|
||||
"BLFSSL024NG",
|
||||
"BLFISX006W0",
|
||||
"BLFISX00103",
|
||||
"BLC6D1100007"
|
||||
# partnums = ["BLFISX012W0", "BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949",
|
||||
# "BL10GXS13",
|
||||
# "BL10GXW12",
|
||||
# "BL10GXW13",
|
||||
# "BL2412",
|
||||
# "BL2413",
|
||||
# "BLOSP6AU",
|
||||
# "BLFI4D024P9",
|
||||
# "BLFISD012R9",
|
||||
# "BLFDSD012A9",
|
||||
# "BLFSSL024NG",
|
||||
# "BLFISX006W0",
|
||||
# "BLFISX00103",
|
||||
# "BLC6D1100007"
|
||||
# ]
|
||||
partnums = [
|
||||
# Actual cables in Jukebox
|
||||
"AW86104CY",
|
||||
"AW3050",
|
||||
"AW6714",
|
||||
"AW1172C",
|
||||
"AW2211/4",
|
||||
|
||||
"BLTF-1LF-006-RS5N",
|
||||
"BLTF-SD9-006-RI5N",
|
||||
"BLTT-SLG-024-HTNN",
|
||||
"BLFISX012W0",
|
||||
"BLFI4X012W0",
|
||||
"BLSPE101 006Q",
|
||||
"BLSPE102 006Q",
|
||||
"BL7922A 010Q",
|
||||
"BL7958A 008Q",
|
||||
"BLIOP6U 010Q",
|
||||
"BL10GXW13 D15Q",
|
||||
"BL10GXW53 D15Q",
|
||||
"BL29501F 010Q",
|
||||
"BL29512 010Q",
|
||||
"BL3106A 010Q",
|
||||
"BL9841 060Q",
|
||||
"BL3105A 010Q",
|
||||
"BL3092A 010Q",
|
||||
"BL8760 060Q",
|
||||
"BL6300UE 008Q",
|
||||
"BL6300FE 009Q",
|
||||
"BLRA500P 006Q",
|
||||
|
||||
|
||||
# Some ones I picked, including some invalid ones
|
||||
"BL10GXS12",
|
||||
"BLRST 5L-RKT 5L-949",
|
||||
"BL10GXS13",
|
||||
"BL10GXW12",
|
||||
"BL10GXW13",
|
||||
"BL2412",
|
||||
"BL2413",
|
||||
"BLOSP6AU",
|
||||
"BLFI4D024P9",
|
||||
"BLFISD012R9",
|
||||
"BLFDSD012A9",
|
||||
"BLFSSL024NG",
|
||||
"BLFISX006W0",
|
||||
"BLFISX00103",
|
||||
"BLC6D1100007"
|
||||
|
||||
]
|
||||
get_multi(partnums)
|
||||
#query_search("3248", "Alphawire")
|
||||
#query_search("86104CY", "Alphawire")
|
||||
get_multi(partnums, 0.25)
|
||||
#query_search("10GXS13", "Belden")
|
||||
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
# Parse Belden catalog techdata datasheets
|
||||
|
||||
import pandas as pd
|
||||
pd.set_option('future.no_silent_downcasting', True)
|
||||
from PyPDF2 import PdfReader
|
||||
import camelot
|
||||
import numpy as np
|
||||
@ -11,6 +13,11 @@ import json
|
||||
from util import fprint
|
||||
import uuid
|
||||
from util import run_cmd
|
||||
import os
|
||||
|
||||
def touch(path):
|
||||
with open(path, 'a'):
|
||||
os.utime(path, None)
|
||||
|
||||
def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
@ -23,6 +30,7 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
reader = PdfReader(filename)
|
||||
page = reader.pages[0]
|
||||
table_list = {}
|
||||
|
||||
for table in tables:
|
||||
table.df.infer_objects(copy=False)
|
||||
table.df.replace('', np.nan, inplace=True)
|
||||
@ -90,6 +98,7 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
# Table parsing and reordring
|
||||
tables = dict()
|
||||
torename = dict()
|
||||
previous_table = ""
|
||||
for table_name in table_list.keys():
|
||||
# determine shape: horizontal or vertical
|
||||
@ -121,7 +130,8 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
for table_name_2 in table_list.keys():
|
||||
if table_name_2.find(table.iloc[-1, 0]) >= 0:
|
||||
# Name taken from table directly above - this table does not have a name
|
||||
table_list["Specs " + str(len(tables))] = table_list.pop(table_name_2, None) # rename table to arbitrary altername name
|
||||
torename[table_name_2] = "Specs " + str(len(tables))
|
||||
#table_list["Specs " + str(len(tables))] = table_list[table_name_2] # rename table to arbitrary altername name
|
||||
break
|
||||
|
||||
if vertical:
|
||||
@ -142,21 +152,21 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
# multi-page table check
|
||||
if dstype == "Belden":
|
||||
if table_name.isdigit() and len(tables) > 1:
|
||||
fprint(table_name)
|
||||
fprint(previous_table)
|
||||
#fprint(table_name)
|
||||
#fprint(previous_table)
|
||||
|
||||
|
||||
|
||||
|
||||
main_key = previous_table
|
||||
cont_key = table_name
|
||||
fprint(tables)
|
||||
#fprint(tables)
|
||||
if vertical == False:
|
||||
main_keys = list(tables[main_key].keys())
|
||||
for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
|
||||
if i < len(main_keys):
|
||||
fprint(tables[main_key][main_keys[i]])
|
||||
tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
|
||||
#fprint(tables[main_key][main_keys[i]])
|
||||
tables[main_key][main_keys[i]] = (tuple(tables[main_key][main_keys[i]]) + (cont_key,) + cont_values)
|
||||
|
||||
del tables[table_name]
|
||||
|
||||
@ -167,6 +177,10 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
previous_table = table_name
|
||||
|
||||
# remove renamed tables
|
||||
for table_name in torename.keys():
|
||||
tables[torename[table_name]] = tables[table_name]
|
||||
del tables[table_name]
|
||||
# remove multi-line values that occasionally squeak through
|
||||
def replace_newlines_in_dict(d):
|
||||
for key, value in d.items():
|
||||
@ -195,12 +209,12 @@ def parse(filename, output_dir, partnum, dstype):
|
||||
|
||||
|
||||
|
||||
print(output_table)
|
||||
#print(output_table)
|
||||
|
||||
run_cmd("rm " + output_dir + "/*.json") # not reliable!
|
||||
run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
|
||||
with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
|
||||
json.dump(output_table["searchspecs"], json_file)
|
||||
|
||||
touch(output_dir + "/parsed")
|
||||
return output_table
|
||||
|
||||
|
||||
@ -217,7 +231,7 @@ def flatten(tables):
|
||||
# If it fails again, return the original string.
|
||||
return s
|
||||
out = dict()
|
||||
print("{")
|
||||
#print("{")
|
||||
for table in tables.keys():
|
||||
for key in tables[table].keys():
|
||||
if len(key) < 64:
|
||||
@ -228,13 +242,19 @@ def flatten(tables):
|
||||
fullkeyname = (table + ": " + keyname).replace(".","")
|
||||
if type(tables[table][key]) is not tuple:
|
||||
out[fullkeyname] = convert_to_number(tables[table][key])
|
||||
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||
#print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||
elif len(tables[table][key]) == 1:
|
||||
out[fullkeyname] = convert_to_number(tables[table][key][0])
|
||||
|
||||
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||
#print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||
|
||||
print("}")
|
||||
# if the item has at least two commas in it, split it
|
||||
if tables[table][key].count(',') > 0:
|
||||
out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(",")))
|
||||
#print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||
|
||||
|
||||
#print("}")
|
||||
return out
|
||||
|
||||
|
||||
|
2
run.py
2
run.py
@ -139,6 +139,8 @@ def start_server_socket():
|
||||
|
||||
# TODO Helper for converting Python Dictionaries to JSON
|
||||
# make function: pythonData --> { { "type": "...", "call": "...", "data": pythonData } }
|
||||
|
||||
# to send: to_server_queue.put(("*", "JSON STRING HERE")) # replace * with UUID of client to send to one specific location
|
||||
|
||||
case "cable_details":
|
||||
fprint("cable_details message")
|
||||
|
1
source.fish
Normal file
1
source.fish
Normal file
@ -0,0 +1 @@
|
||||
source venv/bin/activate.fish
|
106
test.py
Normal file
106
test.py
Normal file
@ -0,0 +1,106 @@
|
||||
print("\u001b[37m")
|
||||
|
||||
class Ring:
|
||||
def __init__(self) -> None:
|
||||
self.leds = [0] * 24
|
||||
self.id = 0
|
||||
self.dirty = False
|
||||
|
||||
def __iter__(self) -> iter:
|
||||
yield from self.leds
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Ring<id={self.id}, led_state={' '.join(list(map(lambda x: str(x+1), self.leds)))}, dirty={self.dirty}>"
|
||||
|
||||
def __add__(self, other):
|
||||
self.leds.extend(other)
|
||||
return self
|
||||
|
||||
def __bool__(self):
|
||||
return self.dirty
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.leds[index]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
ivalue = self.leds[index]
|
||||
if ivalue != value:
|
||||
self.dirty = True
|
||||
self.leds[index] = value
|
||||
|
||||
def __getattr__(self, name):
|
||||
import word2num
|
||||
name = int(word2num.word2num(name))
|
||||
print(name)
|
||||
if 0 <= name < len(self.leds):
|
||||
return self.leds[name]
|
||||
|
||||
|
||||
|
||||
a = Ring()
|
||||
print(a)
|
||||
b = Ring()
|
||||
b.leds[2] = 3
|
||||
|
||||
print(a + b)
|
||||
|
||||
b.active = True
|
||||
|
||||
if b:
|
||||
print("Bexist")
|
||||
|
||||
c = [a, b, b, a, a]
|
||||
|
||||
d = list(filter(lambda x: bool(x), c))
|
||||
|
||||
print(d)
|
||||
|
||||
for i, ring in enumerate(c):
|
||||
ring[0] = i
|
||||
print(ring)
|
||||
|
||||
print(a, b)
|
||||
|
||||
print(f"\u001b[32m{a}")
|
||||
print(f"\u001b[37ma")
|
||||
|
||||
print(getattr(a, "twenty two"))
|
||||
|
||||
# eval(f"getattr(a,\"{input()}\")")
|
||||
|
||||
|
||||
# a = r"wow this string is cursed; for example \n"
|
||||
|
||||
|
||||
# SEARCHDATA=r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
|
||||
|
||||
# QUERY = "AAAAAAAAAAAA"
|
||||
|
||||
# b = SEARCHDATA.replace(r"{QUERY}", QUERY)
|
||||
|
||||
q = [i * 2 for i in range(10)]
|
||||
|
||||
d = {a : b for a,b in enumerate(q)}
|
||||
|
||||
print(q)
|
||||
|
||||
print(d)
|
||||
|
||||
def stalin_sort(a):
|
||||
b = sum(a)
|
||||
b /= len(a)
|
||||
return [b for _ in range(len(a))]
|
||||
|
||||
def mao_sort(a):
|
||||
i = 0
|
||||
while i < len(a) - 1:
|
||||
if a[i+1] < a[i]:
|
||||
del a[i]
|
||||
else:
|
||||
i += 1
|
||||
return a
|
||||
|
||||
print(stalin_sort(list(range(10))))
|
||||
print(mao_sort([1, 3, 2, 4, 5, 8, 7, 6, 9]))
|
||||
|
||||
# i l
|
Loading…
x
Reference in New Issue
Block a user