Use search query to find hi-res part images. Improve cache quality
This commit is contained in:
parent
e35d41031a
commit
59f61f7ae3
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
venv
|
venv
|
||||||
__pycache__
|
__pycache__
|
||||||
|
cables
|
194
get_specs.py
194
get_specs.py
@ -5,11 +5,13 @@ import sys
|
|||||||
import read_datasheet
|
import read_datasheet
|
||||||
from alive_progress import alive_bar
|
from alive_progress import alive_bar
|
||||||
import requests
|
import requests
|
||||||
|
#import time
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
|
||||||
bartext = ""
|
bartext = ""
|
||||||
|
|
||||||
def try_download_datasheet(partnum):
|
def try_download_datasheet(partnum, output_dir): # Guess datasheet URL
|
||||||
global bartext
|
global bartext
|
||||||
|
|
||||||
sanitized_name = partnum.replace(" ", "")
|
sanitized_name = partnum.replace(" ", "")
|
||||||
@ -22,8 +24,8 @@ def try_download_datasheet(partnum):
|
|||||||
return False
|
return False
|
||||||
if r.status_code == 404:
|
if r.status_code == 404:
|
||||||
return False
|
return False
|
||||||
os.mkdir(partnum)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
with open(partnum + "/datasheet.pdf", 'wb') as f:
|
with open(output_dir + "/datasheet.pdf", 'wb') as f:
|
||||||
for chunk in r.iter_content(chunk_size=131072):
|
for chunk in r.iter_content(chunk_size=131072):
|
||||||
# If you have chunk encoded response uncomment if
|
# If you have chunk encoded response uncomment if
|
||||||
# and set chunk_size parameter to None.
|
# and set chunk_size parameter to None.
|
||||||
@ -32,17 +34,105 @@ def try_download_datasheet(partnum):
|
|||||||
bar.text = bartext
|
bar.text = bartext
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
#print("")
|
#print("")
|
||||||
return sanitized_name + ".pdf"
|
return output_dir + "/datasheet.pdf"
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("Quitting!")
|
print("Quitting!")
|
||||||
os.remove(partnum + "/datasheet.pdf")
|
os.remove(partnum + "/datasheet.pdf")
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
def download_datasheet(url, output_dir): # Download datasheet with known URL
|
||||||
|
global bartext
|
||||||
|
|
||||||
|
#print(url)
|
||||||
|
try:
|
||||||
|
with requests.get(url, stream=True) as r:
|
||||||
|
#r.raise_for_status()
|
||||||
|
if r.headers.get("Content-Type") != "application/pdf":
|
||||||
|
return False
|
||||||
|
if r.status_code == 404:
|
||||||
|
return False
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
with open(output_dir + "/datasheet.pdf", 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=131072):
|
||||||
|
# If you have chunk encoded response uncomment if
|
||||||
|
# and set chunk_size parameter to None.
|
||||||
|
#if chunk:
|
||||||
|
bartext = bartext + "."
|
||||||
|
bar.text = bartext
|
||||||
|
f.write(chunk)
|
||||||
|
#print("")
|
||||||
|
return output_dir + "/datasheet.pdf"
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Quitting!")
|
||||||
|
os.remove(partnum + "/datasheet.pdf")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
def download_image(url, output_dir): # Download datasheet with known URL
|
||||||
|
global bartext
|
||||||
|
|
||||||
|
#print(url)
|
||||||
|
try:
|
||||||
|
with requests.get(url, stream=True) as r:
|
||||||
|
#r.raise_for_status()
|
||||||
|
if r.status_code == 404:
|
||||||
|
return False
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
with open(output_dir + "/part-hires." + url.split(".")[-1], 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=131072):
|
||||||
|
# If you have chunk encoded response uncomment if
|
||||||
|
# and set chunk_size parameter to None.
|
||||||
|
#if chunk:
|
||||||
|
bartext = bartext + "."
|
||||||
|
bar.text = bartext
|
||||||
|
f.write(chunk)
|
||||||
|
#print("")
|
||||||
|
return output_dir + "/part-hires." + url.split(".")[-1]
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Quitting!")
|
||||||
|
os.remove(partnum + "/datasheet.pdf")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
def query_search(partnum):
|
||||||
|
"""token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
|
||||||
|
with requests.get(token_url) as r:
|
||||||
|
out = json.loads(r.content)
|
||||||
|
token = out["token"]
|
||||||
|
search_url = "https://www.belden.com/coveo/rest/search"
|
||||||
|
search_data ='{ "q": "' + str(partnum) + '", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en" }'
|
||||||
|
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
|
||||||
|
#print(search_data)
|
||||||
|
print(json.loads(search_data))
|
||||||
|
#search_data = '{ "q": "' + str(partnum) + '" }'
|
||||||
|
print(search_data)
|
||||||
|
headers = headers = {
|
||||||
|
'Authorization': f'Bearer {token}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
with requests.post(search_url, headers=headers, data=search_data) as r:
|
||||||
|
print(r.text)"""
|
||||||
|
|
||||||
|
# TODO: Reimplement in python
|
||||||
|
# Bash script uses some crazy json formatting that I could not figure out
|
||||||
|
# Despite the fact that I wrote it
|
||||||
|
# So I'll just leave it, becuase it works.
|
||||||
|
|
||||||
|
command = ["./query-search.sh", partnum]
|
||||||
|
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
if result.returncode != 0: # error
|
||||||
|
print("No results found in search database for " + partnum + ". No hi-res part image available.", result.stderr)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
data_out = json.loads(result.stdout)
|
||||||
|
return data_out
|
||||||
|
|
||||||
|
def touch(path):
|
||||||
|
with open(path, 'a'):
|
||||||
|
os.utime(path, None)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
partnums = ["10GXS12", "RST 5L-RKT 5L-949",
|
partnums = ["10GXS12", "RST 5L-RKT 5L-949",
|
||||||
"10GXS13",
|
"10GXS13",
|
||||||
"10GXW12",
|
"10GXW12",
|
||||||
@ -58,28 +148,74 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
|
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
|
||||||
for partnum in partnums:
|
for partnum in partnums:
|
||||||
path = partnum + "/datasheet.pdf"
|
output_dir = "cables/" + partnum
|
||||||
bartext = "Downloading datasheet for part " + partnum
|
path = output_dir + "/datasheet.pdf"
|
||||||
|
bartext = "Downloading files for part " + partnum
|
||||||
bar.text = bartext
|
bar.text = bartext
|
||||||
if os.path.exists(path) and os.path.getsize(path) > 1:
|
#
|
||||||
print("Using cached " + path, end='')
|
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
|
||||||
bar.text = "Using cached " + path
|
# Use query
|
||||||
bar(skipped=True)
|
search_result = query_search(partnum.replace(" ", ""))
|
||||||
print("Parsing Datasheet contents of " + path, end='')
|
# Try to use belden.com search
|
||||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
if search_result is not False:
|
||||||
read_datasheet.parse(path, partnum)
|
# Download high resolution part image if available and needed
|
||||||
bar(skipped=False)
|
if not os.path.exists(output_dir + "/found_part_hires"):
|
||||||
elif try_download_datasheet(partnum) is not False:
|
if download_image(search_result["image"], output_dir):
|
||||||
print("Downloaded " + path, end='')
|
print("Downloaded hi-res part image for " + partnum)
|
||||||
bar.text = "Downloaded " + path
|
touch(output_dir + "/found_part_hires")
|
||||||
bar(skipped=False)
|
|
||||||
print("Parsing Datasheet contents of " + path, end='')
|
# Download datasheet from provided URL if needed
|
||||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
if os.path.exists(path) and os.path.getsize(path) > 1:
|
||||||
read_datasheet.parse(path, partnum)
|
print("Using cached " + path, end='')
|
||||||
bar(skipped=False)
|
bar.text = "Using cached " + path
|
||||||
else:
|
bar(skipped=True)
|
||||||
print("Failed to download datasheet for part " + partnum, end='')
|
print("Parsing Datasheet contents of " + partnum, end='')
|
||||||
bar.text = "Failed to download datasheet for part " + partnum
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
bar(skipped=True)
|
read_datasheet.parse(path, output_dir)
|
||||||
bar(skipped=True)
|
bar(skipped=False)
|
||||||
|
|
||||||
|
elif download_datasheet(search_result["datasheet"], output_dir) is not False:
|
||||||
|
print("Downloaded " + path, end='')
|
||||||
|
bar.text = "Downloaded " + path
|
||||||
|
bar(skipped=False)
|
||||||
|
print("Parsing Datasheet contents of " + partnum, end='')
|
||||||
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
|
read_datasheet.parse(path, output_dir)
|
||||||
|
bar(skipped=False)
|
||||||
|
|
||||||
|
|
||||||
|
elif os.path.exists(path) and os.path.getsize(path) > 1:
|
||||||
|
print("Using cached " + path, end='')
|
||||||
|
bar.text = "Using cached " + path
|
||||||
|
bar(skipped=True)
|
||||||
|
print("Parsing Datasheet contents of " + partnum, end='')
|
||||||
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
|
read_datasheet.parse(path, output_dir)
|
||||||
|
bar(skipped=False)
|
||||||
|
|
||||||
|
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
|
||||||
|
elif try_download_datasheet(partnum, output_dir) is not False:
|
||||||
|
print("Downloaded " + path, end='')
|
||||||
|
bar.text = "Downloaded " + path
|
||||||
|
bar(skipped=False)
|
||||||
|
print("Parsing Datasheet contents of " + partnum, end='')
|
||||||
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
|
read_datasheet.parse(path, output_dir)
|
||||||
|
bar(skipped=False)
|
||||||
|
|
||||||
|
# Failed to download with search or guess :(
|
||||||
|
else:
|
||||||
|
print("Failed to download datasheet for part " + partnum, end='')
|
||||||
|
bar.text = "Failed to download datasheet for part " + partnum
|
||||||
|
bar(skipped=True)
|
||||||
|
bar(skipped=True)
|
||||||
|
# We already have a hi-res image and the datasheet - perfect!
|
||||||
|
else:
|
||||||
|
if os.path.exists(path) and os.path.getsize(path) > 1:
|
||||||
|
print("Using cached " + path, end='')
|
||||||
|
bar.text = "Using cached " + path
|
||||||
|
bar(skipped=True)
|
||||||
|
print("Parsing Datasheet contents of " + partnum, end='')
|
||||||
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
|
read_datasheet.parse(path, output_dir)
|
||||||
|
bar(skipped=False)
|
@ -7,7 +7,7 @@ QUERY=$@
|
|||||||
|
|
||||||
# Get auth token (seems to be unnecessary - but just in case, match what the website does)
|
# Get auth token (seems to be unnecessary - but just in case, match what the website does)
|
||||||
TOKENURL="https://www.belden.com/coveo/rest/token?t="$(date +%s)
|
TOKENURL="https://www.belden.com/coveo/rest/token?t="$(date +%s)
|
||||||
TOKEN=$(curl "$TOKENURL" | jq -r ".token")
|
TOKEN=$(curl "$TOKENURL" 2>/dev/null | jq -r ".token")
|
||||||
|
|
||||||
# Coveo search URL
|
# Coveo search URL
|
||||||
SEARCHURL="https://www.belden.com/coveo/rest/search"
|
SEARCHURL="https://www.belden.com/coveo/rest/search"
|
||||||
@ -18,7 +18,7 @@ SEARCHURL="https://www.belden.com/coveo/rest/search"
|
|||||||
|
|
||||||
# Query data - includes the default filter, sort, etc options used by the website search
|
# Query data - includes the default filter, sort, etc options used by the website search
|
||||||
#SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "25", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}" }'
|
#SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "25", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}" }'
|
||||||
SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "25", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"'$QUERY'\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }'
|
SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"'$QUERY'\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }'
|
||||||
|
|
||||||
#SEARCHDATA='{ "q": "LioN-X" }'
|
#SEARCHDATA='{ "q": "LioN-X" }'
|
||||||
#echo $SEARCHDATA
|
#echo $SEARCHDATA
|
||||||
@ -26,35 +26,38 @@ SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "
|
|||||||
#echo curl \"$SEARCHURL\" $SEARCHOPTS -d "$SEARCHDATA"
|
#echo curl \"$SEARCHURL\" $SEARCHOPTS -d "$SEARCHDATA"
|
||||||
|
|
||||||
# Query the coveo search API.
|
# Query the coveo search API.
|
||||||
RESULTS=$(curl "$SEARCHURL" -H "Content-Type: application/json" -H "Authorization: Bearer $TOKEN" -d "$SEARCHDATA" )
|
RESULTS=$(curl "$SEARCHURL" -H "Content-Type: application/json" -H "Authorization: Bearer $TOKEN" -d "$SEARCHDATA" 2>/dev/null )
|
||||||
|
|
||||||
# Count number of results
|
# Count number of results
|
||||||
COUNT=$(echo "$RESULTS" | jq ".results | length")
|
COUNT=$(echo "$RESULTS" | jq ".results | length")
|
||||||
if [ "$COUNT" -ge "25" ]; then
|
#if [ "$COUNT" -ge "250" ]; then
|
||||||
echo "There are 25+ search results."
|
# echo "There are 250+ search results."
|
||||||
else
|
#else
|
||||||
echo "There are" $(echo "$RESULTS" | jq ".results | length") "search results."
|
# echo "There are" $(echo "$RESULTS" | jq ".results | length") "search results."
|
||||||
fi
|
#fi
|
||||||
#echo $RESULTS | jq
|
#echo $RESULTS | jq
|
||||||
# Reformat results to simpler json. Includes basic cable specs
|
# Reformat results to simpler json. Includes basic cable specs
|
||||||
echo "$RESULTS" | jq -C "[ .results.[] | { name: .raw.catalogitemshortdesc, partnum: .title } ]"
|
#echo "$RESULTS" | jq -C "[ .results.[] | { name: .raw.catalogitemshortdesc, partnum: .title } ]"
|
||||||
|
|
||||||
LIST=$(echo "$RESULTS" | jq "[ .results.[] | { name: .raw.catalogitemshortdesc, brand: .raw.catalogitembrand, description: .raw.catalogitemlongdesc, application: .raw.catalogitemapplication, partnum: .raw.catalogitempartnumber, friendlypartnum: .title, santitizedpartnum: .raw.catalogitemsanitiz122xedname, usagecategory: .raw.catalogitemfilterproductcategory, category: .raw.catalogitemoriginalcategories, gauge: .raw.catalogitemconductorsiz122xe, voltage: .raw.catalogitemulvoltageratingvoltage1, conductors: .raw.catalogitemconductorcombinedcount, fibercount: .raw.catalogitemfibrecountfiber1, maxtemp: .raw.catalogitemultemprating, temprange: .raw.catalogitemoperatingtemprange, shielding: .raw.catalogitemshielding, producttype: .raw.catalogitemproducttype, material: .raw.catalogitemconductormaterialandstranding, construction: .raw.catalogitemconstructiontype, insulation: .raw.catalogiteminsulationmaterial, jacket: .raw.catalogitemjacket, armor: .raw.catalogitemarmortypeandmaterial, flamerating: .raw.catalogitemflamerating, url: (\"https://www.belden.com/products\" + .raw.clickableuri), datasheet: (\"https://catalog.belden.com/techdata/EN/\" + .raw.catalogitemdatasheetid + \"_techdata.pdf\" ), image: (\"https://www.belden.com\" + .raw.catalogitemimageurl) | split(\"?\")[0] } ]")
|
LIST=$(echo "$RESULTS" | jq "[ .results.[] | { name: .raw.catalogitemshortdesc, brand: .raw.catalogitembrand, description: .raw.catalogitemlongdesc, application: .raw.catalogitemapplication, partnum: .raw.catalogitempartnumber, friendlypartnum: .title, santitizedpartnum: .raw.catalogitemsanitiz122xedname, usagecategory: .raw.catalogitemfilterproductcategory, category: .raw.catalogitemoriginalcategories, gauge: .raw.catalogitemconductorsiz122xe, voltage: .raw.catalogitemulvoltageratingvoltage1, conductors: .raw.catalogitemconductorcombinedcount, fibercount: .raw.catalogitemfibrecountfiber1, maxtemp: .raw.catalogitemultemprating, temprange: .raw.catalogitemoperatingtemprange, shielding: .raw.catalogitemshielding, producttype: .raw.catalogitemproducttype, material: .raw.catalogitemconductormaterialandstranding, construction: .raw.catalogitemconstructiontype, insulation: .raw.catalogiteminsulationmaterial, jacket: .raw.catalogitemjacket, armor: .raw.catalogitemarmortypeandmaterial, flamerating: .raw.catalogitemflamerating, url: (\"https://www.belden.com/products\" + .raw.clickableuri), datasheet: (\"https://catalog.belden.com/techdata/EN/\" + .raw.catalogitemdatasheetid + \"_techdata.pdf\" ), image: (\"https://www.belden.com\" + .raw.catalogitemimageurl) | split(\"?\")[0] } ]")
|
||||||
|
|
||||||
NUM=$(echo "$RESULTS" | jq -r "[ .results.[].title] | index(\"$QUERY\")")
|
NUM=$(echo "$RESULTS" | jq -r "[ .results.[].title] | index(\"$QUERY\")")
|
||||||
if [ "$NUM" -ge 0 ]; then
|
if ! [ "$NUM" = "null" ]; then
|
||||||
echo $LIST | jq ".[$NUM]"
|
echo $LIST | jq -r ".[$NUM]"
|
||||||
echo "Result matches exact part number input."
|
#echo "Result matches exact part number input."
|
||||||
echo "Result uncertainty:" $(( $COUNT * 1 ))
|
#echo "Result uncertainty:" $(( $COUNT * 1 ))
|
||||||
|
exit 0
|
||||||
else
|
else
|
||||||
NUM=$(echo "$RESULTS" | jq -r "[ .results.[].raw.catalogitemsanitiz122xedname ] | index(\"$QUERY\")")
|
NUM=$(echo "$RESULTS" | jq -r "[ .results.[].raw.catalogitemsanitiz122xedname ] | index(\"$QUERY\")")
|
||||||
if [ "$NUM" -ge 0 ]; then
|
if ! [ "$NUM" = "null" ]; then
|
||||||
echo $LIST | jq ".[$NUM]"
|
echo $LIST | jq -r ".[$NUM]"
|
||||||
echo "Result matches exact part number input."
|
exit 0
|
||||||
|
#echo "Result matches exact part number input."
|
||||||
else
|
else
|
||||||
echo $LIST | jq ".[0]"
|
exit 1
|
||||||
echo "Unable to find exact match."
|
#echo $LIST | jq ".[0]"
|
||||||
echo "Result uncertainty:" $(( $COUNT * 4 ))
|
#echo "Unable to find exact match."
|
||||||
|
#echo "Result uncertainty:" $(( $COUNT * 4 ))
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -48,14 +48,14 @@ def parse(filename, output_dir):
|
|||||||
|
|
||||||
|
|
||||||
table_list[text_body] = table.df
|
table_list[text_body] = table.df
|
||||||
table.to_html("table" + str(n) + ".html")
|
#table.to_html("table" + str(n) + ".html")
|
||||||
|
|
||||||
#print(table.df)
|
#print(table.df)
|
||||||
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
|
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
|
||||||
n=n+1
|
n=n+1
|
||||||
#camelot.plot(tables[0], kind='grid').savefig("test.png")
|
#camelot.plot(tables[0], kind='grid').savefig("test.png")
|
||||||
|
|
||||||
tables.export(output_dir + '/techdata.csv', f='csv')
|
tables.export(output_dir + '/techdata.json', f='json')
|
||||||
|
|
||||||
# print(table_list)
|
# print(table_list)
|
||||||
# Extract Basic details - part name & description, image, etc
|
# Extract Basic details - part name & description, image, etc
|
||||||
|
Loading…
x
Reference in New Issue
Block a user