Improve search & parsing algorithm, easier to source venv files

This commit is contained in:
Cole Deck 2024-03-11 23:52:24 -05:00
parent 1a07501d53
commit dd0ac46662
6 changed files with 358 additions and 89 deletions

View File

@ -5,7 +5,7 @@ import sys
import read_datasheet import read_datasheet
from alive_progress import alive_bar from alive_progress import alive_bar
import requests import requests
#import time import time
import json import json
import subprocess import subprocess
from util import fprint from util import fprint
@ -27,29 +27,78 @@ def check_internet(url='https://belden.com', timeout=5):
def query_search(partnum, source): def query_search(partnum, source):
"""token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time())) if source == "Belden":
token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
with requests.get(token_url) as r: with requests.get(token_url) as r:
out = json.loads(r.content) out = json.loads(r.content)
token = out["token"] token = out["token"]
search_url = "https://www.belden.com/coveo/rest/search" search_url = "https://www.belden.com/coveo/rest/search"
search_data ='{ "q": "' + str(partnum) + '", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en" }'
# Ridiculous search parameters extracted from website. Do not touch
search_data = r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
search_data = search_data.replace(r"{QUERY}", partnum)
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }' #"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
#fprint(search_data) #fprint(search_data)
fprint(json.loads(search_data)) #fprint(json.loads(search_data))
#search_data = '{ "q": "' + str(partnum) + '" }' #search_data = '{ "q": "' + str(partnum) + '" }'
fprint(search_data) #fprint(search_data)
headers = headers = { headers = headers = {
'Authorization': f'Bearer {token}', 'Authorization': f'Bearer {token}',
'Content-Type': 'application/json' 'Content-Type': 'application/json'
} }
try:
with requests.post(search_url, headers=headers, data=search_data) as r: with requests.post(search_url, headers=headers, data=search_data) as r:
fprint(r.text)""" a = r.text
a = json.loads(a)
idx = -1
name = ""
for partid in range(len(a["results"])):
name = a["results"][partid]["title"]
if name != partnum:
if name.find(partnum) >= 0:
idx = partid
break
elif partnum.find(name) >= 0:
idx = partid
break
# TODO: Reimplement in python else:
# Bash script uses some crazy json formatting that I could not figure out idx = partid
# Despite the fact that I wrote it break
# So I'll just leave it, becuase it works.
if source == "Belden": if idx < 0:
fprint("Could not find part in API: " + partnum)
return False
fprint("Search result found: result " + str(idx) + ", for ID " + name)
#urlname = a["results"][0]["raw"]["catalogitemurlname"]
img = a["results"][idx]["raw"]["catalogitemimageurl"]
img = img[0:img.index("?")]
uri = a["results"][idx]["raw"]["clickableuri"]
dsid = a["results"][idx]["raw"]["catalogitemdatasheetid"]
brand = a["results"][idx]["raw"]["catalogitembrand"]
desc = a["results"][idx]["raw"]["catalogitemlongdesc"]
shortdesc = a["results"][idx]["raw"]["catalogitemshortdesc"]
a = json.dumps(a["results"][idx], indent=2)
#print(a, urlname, img, uri, dsurl)
out = dict()
out["url"] = "https://www.belden.com/products/" + uri
out["datasheet"] = "https://catalog.belden.com/techdata/EN/" + dsid + "_techdata.pdf"
out["brand"] = brand
out["name"] = shortdesc
out["description"] = desc
out["image"] = "https://www.belden.com" + img
out["partnum"] = name
#print(out)
return out
except:
print("falied to search with API. Falling back to datasheet lookup.")
return False
# Original bash script
# superceded by above
if source == "Belden_shell":
command = ["./query-search.sh", partnum] command = ["./query-search.sh", partnum]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0: # error if result.returncode != 0: # error
@ -66,11 +115,11 @@ def query_search(partnum, source):
#print(data) #print(data)
try: try:
if data["Count"] > 0: if data["Count"] > 0:
print(data["Results"][0]["Url"]) #print(data["Results"][0]["Url"])
result = data["Results"][0] for result in data["Results"]:
if result["Url"].split("/")[-1] == partnum: if result["Url"].split("/")[-1] == partnum:
#print(partnum) #print(partnum)
print(result["Html"]) #print(result["Html"])
try: try:
imgidx = result["Html"].index("<img src=") + 10 imgidx = result["Html"].index("<img src=") + 10
imgidx2 = result["Html"].index("?", imgidx) imgidx2 = result["Html"].index("?", imgidx)
@ -84,8 +133,9 @@ def query_search(partnum, source):
dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9 dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum) dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2] output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
output["partnum"] = partnum
#"test".index() #"test".index()
print(output) #print(output)
return output return output
@ -100,7 +150,7 @@ def touch(path):
def get_multi(partnums): def get_multi(partnums, delay=0.25):
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar: with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
@ -190,10 +240,17 @@ def get_multi(partnums):
fprint("Using cached datasheet for " + partnum) fprint("Using cached datasheet for " + partnum)
bar.text = "Using cached datasheet for " + partnum bar.text = "Using cached datasheet for " + partnum
bar(skipped=True) bar(skipped=True)
if not os.path.exists(output_dir + "/parsed"):
fprint("Parsing Datasheet contents of " + partnum) fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir, partnum, dstype) read_datasheet.parse(path, output_dir, partnum, dstype)
bar(skipped=False) bar(skipped=False)
else:
fprint("Datasheet already parsed for " + partnum)
bar.text = "Datasheet already parsed for " + partnum + ".pdf"
bar(skipped=True)
def __downloaded_datasheet(partnum, path, output_dir, dstype): def __downloaded_datasheet(partnum, path, output_dir, dstype):
fprint("Downloaded " + path) fprint("Downloaded " + path)
@ -204,13 +261,7 @@ def get_multi(partnums):
read_datasheet.parse(path, output_dir, partnum, dstype) read_datasheet.parse(path, output_dir, partnum, dstype)
bar(skipped=False) bar(skipped=False)
for fullpartnum in partnums: def run_search(partnum):
if fullpartnum[0:2] == "BL": # catalog.belden.com entry\
partnum = fullpartnum[2:]
dstype = "Belden"
elif fullpartnum[0:2] == "AW":
partnum = fullpartnum[2:]
dstype = "Alphawire"
output_dir = "cables/" + partnum output_dir = "cables/" + partnum
path = output_dir + "/datasheet.pdf" path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum bartext = "Downloading files for part " + partnum
@ -218,10 +269,16 @@ def get_multi(partnums):
# #
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1): if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
# Use query # Use query
search_result = query_search(partnum.replace(" ", ""), dstype) search_result = query_search(partnum, dstype)
# Try to use belden.com search # Try to use belden.com search
if search_result is not False: if search_result is not False:
# Download high resolution part image if available and needed # Download high resolution part image if available and needed
partnum = search_result["partnum"]
output_dir = "cables/" + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
bar.text = bartext
if not os.path.exists(output_dir + "/found_part_hires"): if not os.path.exists(output_dir + "/found_part_hires"):
if _download_image(search_result["image"], output_dir): if _download_image(search_result["image"], output_dir):
fprint("Downloaded hi-res part image for " + partnum) fprint("Downloaded hi-res part image for " + partnum)
@ -245,16 +302,47 @@ def get_multi(partnums):
# Failed to download with search or guess :( # Failed to download with search or guess :(
else: else:
fprint("Failed to download datasheet for part " + partnum) return False
bar.text = "Failed to download datasheet for part " + partnum return True
failed.append(partnum)
bar(skipped=True)
bar(skipped=True)
# We already have a hi-res image and the datasheet - perfect! # We already have a hi-res image and the datasheet - perfect!
else: else:
fprint("Using cached hi-res part image for " + partnum) fprint("Using cached hi-res part image for " + partnum)
__use_cached_datasheet(partnum, path, output_dir, dstype) __use_cached_datasheet(partnum, path, output_dir, dstype)
return True
for fullpartnum in partnums:
if fullpartnum[0:2] == "BL": # catalog.belden.com entry
partnum = fullpartnum[2:]
dstype = "Belden"
elif fullpartnum[0:2] == "AW":
partnum = fullpartnum[2:]
dstype = "Alphawire"
else:
dstype = "Belden" # guess
partnum = fullpartnum
if not run_search(partnum):
success = False
if len(partnum.split(" ")) > 1:
for name in partnum.split(" "):
fprint("Retrying with alternate name: " + name)
if(run_search(name)):
success = True
break
time.sleep(delay)
if not success:
namestripped = partnum.strip(" ")
fprint("Retrying with alternate name: " + namestripped)
if(run_search(namestripped)):
success = True
time.sleep(delay)
if not success:
fprint("Failed to download datasheet for part " + partnum)
bar.text = "Failed to download datasheet for part " + partnum
failed.append(partnum)
bar(skipped=True)
bar(skipped=True)
time.sleep(delay)
if len(failed) > 0: if len(failed) > 0:
fprint("Failed to download:") fprint("Failed to download:")
@ -268,22 +356,73 @@ def get_multi(partnums):
if __name__ == "__main__": if __name__ == "__main__":
partnums = ["BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949", # partnums = ["BLFISX012W0", "BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949",
"BL10GXS13", # "BL10GXS13",
"BL10GXW12", # "BL10GXW12",
"BL10GXW13", # "BL10GXW13",
"BL2412", # "BL2412",
"BL2413", # "BL2413",
"BLOSP6AU", # "BLOSP6AU",
"BLFI4D024P9", # "BLFI4D024P9",
"BLFISD012R9", # "BLFISD012R9",
"BLFDSD012A9", # "BLFDSD012A9",
"BLFSSL024NG", # "BLFSSL024NG",
"BLFISX006W0", # "BLFISX006W0",
"BLFISX00103", # "BLFISX00103",
"BLC6D1100007" # "BLC6D1100007"
# ]
partnums = [
# Actual cables in Jukebox
"AW86104CY",
"AW3050",
"AW6714",
"AW1172C",
"AW2211/4",
"BLTF-1LF-006-RS5N",
"BLTF-SD9-006-RI5N",
"BLTT-SLG-024-HTNN",
"BLFISX012W0",
"BLFI4X012W0",
"BLSPE101 006Q",
"BLSPE102 006Q",
"BL7922A 010Q",
"BL7958A 008Q",
"BLIOP6U 010Q",
"BL10GXW13 D15Q",
"BL10GXW53 D15Q",
"BL29501F 010Q",
"BL29512 010Q",
"BL3106A 010Q",
"BL9841 060Q",
"BL3105A 010Q",
"BL3092A 010Q",
"BL8760 060Q",
"BL6300UE 008Q",
"BL6300FE 009Q",
"BLRA500P 006Q",
# Some ones I picked, including some invalid ones
"BL10GXS12",
"BLRST 5L-RKT 5L-949",
"BL10GXS13",
"BL10GXW12",
"BL10GXW13",
"BL2412",
"BL2413",
"BLOSP6AU",
"BLFI4D024P9",
"BLFISD012R9",
"BLFDSD012A9",
"BLFSSL024NG",
"BLFISX006W0",
"BLFISX00103",
"BLC6D1100007"
] ]
get_multi(partnums) #query_search("86104CY", "Alphawire")
#query_search("3248", "Alphawire") get_multi(partnums, 0.25)
#query_search("10GXS13", "Belden")

View File

@ -2,6 +2,8 @@
# Parse Belden catalog techdata datasheets # Parse Belden catalog techdata datasheets
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
import camelot import camelot
import numpy as np import numpy as np
@ -11,6 +13,11 @@ import json
from util import fprint from util import fprint
import uuid import uuid
from util import run_cmd from util import run_cmd
import os
def touch(path):
with open(path, 'a'):
os.utime(path, None)
def parse(filename, output_dir, partnum, dstype): def parse(filename, output_dir, partnum, dstype):
@ -23,6 +30,7 @@ def parse(filename, output_dir, partnum, dstype):
reader = PdfReader(filename) reader = PdfReader(filename)
page = reader.pages[0] page = reader.pages[0]
table_list = {} table_list = {}
for table in tables: for table in tables:
table.df.infer_objects(copy=False) table.df.infer_objects(copy=False)
table.df.replace('', np.nan, inplace=True) table.df.replace('', np.nan, inplace=True)
@ -90,6 +98,7 @@ def parse(filename, output_dir, partnum, dstype):
# Table parsing and reordring # Table parsing and reordring
tables = dict() tables = dict()
torename = dict()
previous_table = "" previous_table = ""
for table_name in table_list.keys(): for table_name in table_list.keys():
# determine shape: horizontal or vertical # determine shape: horizontal or vertical
@ -121,7 +130,8 @@ def parse(filename, output_dir, partnum, dstype):
for table_name_2 in table_list.keys(): for table_name_2 in table_list.keys():
if table_name_2.find(table.iloc[-1, 0]) >= 0: if table_name_2.find(table.iloc[-1, 0]) >= 0:
# Name taken from table directly above - this table does not have a name # Name taken from table directly above - this table does not have a name
table_list["Specs " + str(len(tables))] = table_list.pop(table_name_2, None) # rename table to arbitrary altername name torename[table_name_2] = "Specs " + str(len(tables))
#table_list["Specs " + str(len(tables))] = table_list[table_name_2] # rename table to arbitrary altername name
break break
if vertical: if vertical:
@ -142,21 +152,21 @@ def parse(filename, output_dir, partnum, dstype):
# multi-page table check # multi-page table check
if dstype == "Belden": if dstype == "Belden":
if table_name.isdigit() and len(tables) > 1: if table_name.isdigit() and len(tables) > 1:
fprint(table_name) #fprint(table_name)
fprint(previous_table) #fprint(previous_table)
main_key = previous_table main_key = previous_table
cont_key = table_name cont_key = table_name
fprint(tables) #fprint(tables)
if vertical == False: if vertical == False:
main_keys = list(tables[main_key].keys()) main_keys = list(tables[main_key].keys())
for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()): for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
if i < len(main_keys): if i < len(main_keys):
fprint(tables[main_key][main_keys[i]]) #fprint(tables[main_key][main_keys[i]])
tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values) tables[main_key][main_keys[i]] = (tuple(tables[main_key][main_keys[i]]) + (cont_key,) + cont_values)
del tables[table_name] del tables[table_name]
@ -167,6 +177,10 @@ def parse(filename, output_dir, partnum, dstype):
previous_table = table_name previous_table = table_name
# remove renamed tables
for table_name in torename.keys():
tables[torename[table_name]] = tables[table_name]
del tables[table_name]
# remove multi-line values that occasionally squeak through # remove multi-line values that occasionally squeak through
def replace_newlines_in_dict(d): def replace_newlines_in_dict(d):
for key, value in d.items(): for key, value in d.items():
@ -195,12 +209,12 @@ def parse(filename, output_dir, partnum, dstype):
print(output_table) #print(output_table)
run_cmd("rm " + output_dir + "/*.json") # not reliable! run_cmd("rm \"" + output_dir + "\"/*.json") # not reliable!
with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file: with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
json.dump(output_table["searchspecs"], json_file) json.dump(output_table["searchspecs"], json_file)
touch(output_dir + "/parsed")
return output_table return output_table
@ -217,7 +231,7 @@ def flatten(tables):
# If it fails again, return the original string. # If it fails again, return the original string.
return s return s
out = dict() out = dict()
print("{") #print("{")
for table in tables.keys(): for table in tables.keys():
for key in tables[table].keys(): for key in tables[table].keys():
if len(key) < 64: if len(key) < 64:
@ -228,13 +242,19 @@ def flatten(tables):
fullkeyname = (table + ": " + keyname).replace(".","") fullkeyname = (table + ": " + keyname).replace(".","")
if type(tables[table][key]) is not tuple: if type(tables[table][key]) is not tuple:
out[fullkeyname] = convert_to_number(tables[table][key]) out[fullkeyname] = convert_to_number(tables[table][key])
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
elif len(tables[table][key]) == 1: elif len(tables[table][key]) == 1:
out[fullkeyname] = convert_to_number(tables[table][key][0]) out[fullkeyname] = convert_to_number(tables[table][key][0])
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
print("}") # if the item has at least two commas in it, split it
if tables[table][key].count(',') > 0:
out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(",")))
#print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
#print("}")
return out return out

2
run.py
View File

@ -140,6 +140,8 @@ def start_server_socket():
# TODO Helper for converting Python Dictionaries to JSON # TODO Helper for converting Python Dictionaries to JSON
# make function: pythonData --> { { "type": "...", "call": "...", "data": pythonData } } # make function: pythonData --> { { "type": "...", "call": "...", "data": pythonData } }
# to send: to_server_queue.put(("*", "JSON STRING HERE")) # replace * with UUID of client to send to one specific location
case "cable_details": case "cable_details":
fprint("cable_details message") fprint("cable_details message")
if call == "send": if call == "send":

1
source.fish Normal file
View File

@ -0,0 +1 @@
source venv/bin/activate.fish

1
source.sh Normal file
View File

@ -0,0 +1 @@
source venv/bin/activate

106
test.py Normal file
View File

@ -0,0 +1,106 @@
print("\u001b[37m")
class Ring:
def __init__(self) -> None:
self.leds = [0] * 24
self.id = 0
self.dirty = False
def __iter__(self) -> iter:
yield from self.leds
def __repr__(self) -> str:
return f"Ring<id={self.id}, led_state={' '.join(list(map(lambda x: str(x+1), self.leds)))}, dirty={self.dirty}>"
def __add__(self, other):
self.leds.extend(other)
return self
def __bool__(self):
return self.dirty
def __getitem__(self, index):
return self.leds[index]
def __setitem__(self, index, value):
ivalue = self.leds[index]
if ivalue != value:
self.dirty = True
self.leds[index] = value
def __getattr__(self, name):
import word2num
name = int(word2num.word2num(name))
print(name)
if 0 <= name < len(self.leds):
return self.leds[name]
a = Ring()
print(a)
b = Ring()
b.leds[2] = 3
print(a + b)
b.active = True
if b:
print("Bexist")
c = [a, b, b, a, a]
d = list(filter(lambda x: bool(x), c))
print(d)
for i, ring in enumerate(c):
ring[0] = i
print(ring)
print(a, b)
print(f"\u001b[32m{a}")
print(f"\u001b[37ma")
print(getattr(a, "twenty two"))
# eval(f"getattr(a,\"{input()}\")")
# a = r"wow this string is cursed; for example \n"
# SEARCHDATA=r"""{ "q": "{QUERY}", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"{QUERY}\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }"""
# QUERY = "AAAAAAAAAAAA"
# b = SEARCHDATA.replace(r"{QUERY}", QUERY)
q = [i * 2 for i in range(10)]
d = {a : b for a,b in enumerate(q)}
print(q)
print(d)
def stalin_sort(a):
b = sum(a)
b /= len(a)
return [b for _ in range(len(a))]
def mao_sort(a):
i = 0
while i < len(a) - 1:
if a[i+1] < a[i]:
del a[i]
else:
i += 1
return a
print(stalin_sort(list(range(10))))
print(mao_sort([1, 3, 2, 4, 5, 8, 7, 6, 9]))
# i l