Use search query to find hi-res part images. Improve cache quality

This commit is contained in:
Cole Deck 2024-01-05 20:03:13 -06:00
parent e35d41031a
commit 59f61f7ae3
4 changed files with 191 additions and 51 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
venv
__pycache__
__pycache__
cables

View File

@ -5,11 +5,13 @@ import sys
import read_datasheet
from alive_progress import alive_bar
import requests
#import time
import json
import subprocess
bartext = ""
def try_download_datasheet(partnum):
def try_download_datasheet(partnum, output_dir): # Guess datasheet URL
global bartext
sanitized_name = partnum.replace(" ", "")
@ -22,8 +24,8 @@ def try_download_datasheet(partnum):
return False
if r.status_code == 404:
return False
os.mkdir(partnum)
with open(partnum + "/datasheet.pdf", 'wb') as f:
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/datasheet.pdf", 'wb') as f:
for chunk in r.iter_content(chunk_size=131072):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
@ -32,17 +34,105 @@ def try_download_datasheet(partnum):
bar.text = bartext
f.write(chunk)
#print("")
return sanitized_name + ".pdf"
return output_dir + "/datasheet.pdf"
except KeyboardInterrupt:
print("Quitting!")
os.remove(partnum + "/datasheet.pdf")
sys.exit()
def download_datasheet(url, output_dir): # Download datasheet with known URL
global bartext
#print(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.headers.get("Content-Type") != "application/pdf":
return False
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/datasheet.pdf", 'wb') as f:
for chunk in r.iter_content(chunk_size=131072):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
bar.text = bartext
f.write(chunk)
#print("")
return output_dir + "/datasheet.pdf"
except KeyboardInterrupt:
print("Quitting!")
os.remove(partnum + "/datasheet.pdf")
sys.exit()
def download_image(url, output_dir): # Download datasheet with known URL
global bartext
#print(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/part-hires." + url.split(".")[-1], 'wb') as f:
for chunk in r.iter_content(chunk_size=131072):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
bar.text = bartext
f.write(chunk)
#print("")
return output_dir + "/part-hires." + url.split(".")[-1]
except KeyboardInterrupt:
print("Quitting!")
os.remove(partnum + "/datasheet.pdf")
sys.exit()
def query_search(partnum):
"""token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
with requests.get(token_url) as r:
out = json.loads(r.content)
token = out["token"]
search_url = "https://www.belden.com/coveo/rest/search"
search_data ='{ "q": "' + str(partnum) + '", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en" }'
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
#print(search_data)
print(json.loads(search_data))
#search_data = '{ "q": "' + str(partnum) + '" }'
print(search_data)
headers = headers = {
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
}
with requests.post(search_url, headers=headers, data=search_data) as r:
print(r.text)"""
# TODO: Reimplement in python
# Bash script uses some crazy json formatting that I could not figure out
# Despite the fact that I wrote it
# So I'll just leave it, becuase it works.
command = ["./query-search.sh", partnum]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0: # error
print("No results found in search database for " + partnum + ". No hi-res part image available.", result.stderr)
return False
else:
data_out = json.loads(result.stdout)
return data_out
def touch(path):
with open(path, 'a'):
os.utime(path, None)
if __name__ == "__main__":
partnums = ["10GXS12", "RST 5L-RKT 5L-949",
"10GXS13",
"10GXW12",
@ -58,28 +148,74 @@ if __name__ == "__main__":
]
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
for partnum in partnums:
path = partnum + "/datasheet.pdf"
bartext = "Downloading datasheet for part " + partnum
output_dir = "cables/" + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
bar.text = bartext
if os.path.exists(path) and os.path.getsize(path) > 1:
print("Using cached " + path, end='')
bar.text = "Using cached " + path
bar(skipped=True)
print("Parsing Datasheet contents of " + path, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, partnum)
bar(skipped=False)
elif try_download_datasheet(partnum) is not False:
print("Downloaded " + path, end='')
bar.text = "Downloaded " + path
bar(skipped=False)
print("Parsing Datasheet contents of " + path, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, partnum)
bar(skipped=False)
else:
print("Failed to download datasheet for part " + partnum, end='')
bar.text = "Failed to download datasheet for part " + partnum
bar(skipped=True)
bar(skipped=True)
#
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
# Use query
search_result = query_search(partnum.replace(" ", ""))
# Try to use belden.com search
if search_result is not False:
# Download high resolution part image if available and needed
if not os.path.exists(output_dir + "/found_part_hires"):
if download_image(search_result["image"], output_dir):
print("Downloaded hi-res part image for " + partnum)
touch(output_dir + "/found_part_hires")
# Download datasheet from provided URL if needed
if os.path.exists(path) and os.path.getsize(path) > 1:
print("Using cached " + path, end='')
bar.text = "Using cached " + path
bar(skipped=True)
print("Parsing Datasheet contents of " + partnum, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir)
bar(skipped=False)
elif download_datasheet(search_result["datasheet"], output_dir) is not False:
print("Downloaded " + path, end='')
bar.text = "Downloaded " + path
bar(skipped=False)
print("Parsing Datasheet contents of " + partnum, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir)
bar(skipped=False)
elif os.path.exists(path) and os.path.getsize(path) > 1:
print("Using cached " + path, end='')
bar.text = "Using cached " + path
bar(skipped=True)
print("Parsing Datasheet contents of " + partnum, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir)
bar(skipped=False)
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
elif try_download_datasheet(partnum, output_dir) is not False:
print("Downloaded " + path, end='')
bar.text = "Downloaded " + path
bar(skipped=False)
print("Parsing Datasheet contents of " + partnum, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir)
bar(skipped=False)
# Failed to download with search or guess :(
else:
print("Failed to download datasheet for part " + partnum, end='')
bar.text = "Failed to download datasheet for part " + partnum
bar(skipped=True)
bar(skipped=True)
# We already have a hi-res image and the datasheet - perfect!
else:
if os.path.exists(path) and os.path.getsize(path) > 1:
print("Using cached " + path, end='')
bar.text = "Using cached " + path
bar(skipped=True)
print("Parsing Datasheet contents of " + partnum, end='')
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir)
bar(skipped=False)

View File

@ -7,7 +7,7 @@ QUERY=$@
# Get auth token (seems to be unnecessary - but just in case, match what the website does)
TOKENURL="https://www.belden.com/coveo/rest/token?t="$(date +%s)
TOKEN=$(curl "$TOKENURL" | jq -r ".token")
TOKEN=$(curl "$TOKENURL" 2>/dev/null | jq -r ".token")
# Coveo search URL
SEARCHURL="https://www.belden.com/coveo/rest/search"
@ -18,7 +18,7 @@ SEARCHURL="https://www.belden.com/coveo/rest/search"
# Query data - includes the default filter, sort, etc options used by the website search
#SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "25", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}" }'
SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "25", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"'$QUERY'\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }'
SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en", "aq": "(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((@syssource==\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\" @catalogitemprimarycategorypublished==true)) ((@catalogitemregionavailable=Global) (@z95xlanguage==en))", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\"Coveo_web_index - rg-nc-prod-sitecore-prod\")) OR (@source==(\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\",\"website_001002_Category_index-rg-nc-prod-sitecore-prod\"))", "firstResult": "0" }, "categoryFacets": "[{\"field\":\"@catalogitemcategories\",\"path\":[],\"injectionDepth\":1000,\"maximumNumberOfValues\":6,\"delimitingCharacter\":\"|\"}]", "facetOptions": "{}", "groupBy": " [{\"field\":\"@contenttype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[\"Products\"],\"queryOverride\":\"'$QUERY'\",\"advancedQueryOverride\":\"(NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B)) ((((((((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) ((@z95xpath=C292F3A37B3A4E6BAB345DF87ADDE516 @z95xid<>C292F3A37B3A4E6BAB345DF87ADDE516) @z95xtemplate==E4EFEB787BDC4B1A908EFC64D56CB2A4)) OR ((@z95xpath=723501A864754FEEB8AE377E4C710271 @z95xid<>723501A864754FEEB8AE377E4C710271) ((@z95xpath=600114EAB0E5407A84AAA9F0985B6575 @z95xid<>600114EAB0E5407A84AAA9F0985B6575) @z95xtemplate==2BE4FD6B3B2C49EBBD9E1F6C92238B05))) OR (@syssource==\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\" @catalogitemprimarycategorypublished==true)) OR ((@z95xpath=3324AF2D58F64C0FB725521052F679D2 @z95xid<>3324AF2D58F64C0FB725521052F679D2) @z95xpath<>C292F3A37B3A4E6BAB345DF87ADDE516)) OR @syssource==\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\") NOT @z95xtemplate==(ADB6CA4F03EF4F47B9AC9CE2BA53FF97,FE5DD82648C6436DB87A7C4210C7413B))) ((@catalogitemregionavailable=Global) (@z95xlanguage==en) OR (@contenttype=(Blogs,Resources,Other)) (NOT @ez120xcludefromcoveo==1))\",\"constantQueryOverride\":\"((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))\"},{\"field\":\"@catalogitembrand\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemenvironment\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@catalogitemregionalavailability\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@prez45xtez120xt\",\"maximumNumberOfValues\":5,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@tags\",\"maximumNumberOfValues\":4,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetassettype\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetbrand\",\"maximumNumberOfValues\":3,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetmarket\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsolution\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]},{\"field\":\"@facetsearchcontentpagetype\",\"maximumNumberOfValues\":6,\"sortCriteria\":\"occurrences\",\"injectionDepth\":1000,\"completeFacetWithStandardValues\":true,\"allowedValues\":[]}]" }'
#SEARCHDATA='{ "q": "LioN-X" }'
#echo $SEARCHDATA
@ -26,35 +26,38 @@ SEARCHDATA='{ "q": "'$QUERY'", "sortCriteria": "relevancy", "numberOfResults": "
#echo curl \"$SEARCHURL\" $SEARCHOPTS -d "$SEARCHDATA"
# Query the coveo search API.
RESULTS=$(curl "$SEARCHURL" -H "Content-Type: application/json" -H "Authorization: Bearer $TOKEN" -d "$SEARCHDATA" )
RESULTS=$(curl "$SEARCHURL" -H "Content-Type: application/json" -H "Authorization: Bearer $TOKEN" -d "$SEARCHDATA" 2>/dev/null )
# Count number of results
COUNT=$(echo "$RESULTS" | jq ".results | length")
if [ "$COUNT" -ge "25" ]; then
echo "There are 25+ search results."
else
echo "There are" $(echo "$RESULTS" | jq ".results | length") "search results."
fi
#if [ "$COUNT" -ge "250" ]; then
# echo "There are 250+ search results."
#else
# echo "There are" $(echo "$RESULTS" | jq ".results | length") "search results."
#fi
#echo $RESULTS | jq
# Reformat results to simpler json. Includes basic cable specs
echo "$RESULTS" | jq -C "[ .results.[] | { name: .raw.catalogitemshortdesc, partnum: .title } ]"
#echo "$RESULTS" | jq -C "[ .results.[] | { name: .raw.catalogitemshortdesc, partnum: .title } ]"
LIST=$(echo "$RESULTS" | jq "[ .results.[] | { name: .raw.catalogitemshortdesc, brand: .raw.catalogitembrand, description: .raw.catalogitemlongdesc, application: .raw.catalogitemapplication, partnum: .raw.catalogitempartnumber, friendlypartnum: .title, santitizedpartnum: .raw.catalogitemsanitiz122xedname, usagecategory: .raw.catalogitemfilterproductcategory, category: .raw.catalogitemoriginalcategories, gauge: .raw.catalogitemconductorsiz122xe, voltage: .raw.catalogitemulvoltageratingvoltage1, conductors: .raw.catalogitemconductorcombinedcount, fibercount: .raw.catalogitemfibrecountfiber1, maxtemp: .raw.catalogitemultemprating, temprange: .raw.catalogitemoperatingtemprange, shielding: .raw.catalogitemshielding, producttype: .raw.catalogitemproducttype, material: .raw.catalogitemconductormaterialandstranding, construction: .raw.catalogitemconstructiontype, insulation: .raw.catalogiteminsulationmaterial, jacket: .raw.catalogitemjacket, armor: .raw.catalogitemarmortypeandmaterial, flamerating: .raw.catalogitemflamerating, url: (\"https://www.belden.com/products\" + .raw.clickableuri), datasheet: (\"https://catalog.belden.com/techdata/EN/\" + .raw.catalogitemdatasheetid + \"_techdata.pdf\" ), image: (\"https://www.belden.com\" + .raw.catalogitemimageurl) | split(\"?\")[0] } ]")
NUM=$(echo "$RESULTS" | jq -r "[ .results.[].title] | index(\"$QUERY\")")
if [ "$NUM" -ge 0 ]; then
echo $LIST | jq ".[$NUM]"
echo "Result matches exact part number input."
echo "Result uncertainty:" $(( $COUNT * 1 ))
if ! [ "$NUM" = "null" ]; then
echo $LIST | jq -r ".[$NUM]"
#echo "Result matches exact part number input."
#echo "Result uncertainty:" $(( $COUNT * 1 ))
exit 0
else
NUM=$(echo "$RESULTS" | jq -r "[ .results.[].raw.catalogitemsanitiz122xedname ] | index(\"$QUERY\")")
if [ "$NUM" -ge 0 ]; then
echo $LIST | jq ".[$NUM]"
echo "Result matches exact part number input."
if ! [ "$NUM" = "null" ]; then
echo $LIST | jq -r ".[$NUM]"
exit 0
#echo "Result matches exact part number input."
else
echo $LIST | jq ".[0]"
echo "Unable to find exact match."
echo "Result uncertainty:" $(( $COUNT * 4 ))
exit 1
#echo $LIST | jq ".[0]"
#echo "Unable to find exact match."
#echo "Result uncertainty:" $(( $COUNT * 4 ))
fi
fi

View File

@ -48,14 +48,14 @@ def parse(filename, output_dir):
table_list[text_body] = table.df
table.to_html("table" + str(n) + ".html")
#table.to_html("table" + str(n) + ".html")
#print(table.df)
#camelot.plot(table, kind='grid').savefig("test" + str(n) + ".png")
n=n+1
#camelot.plot(tables[0], kind='grid').savefig("test.png")
tables.export(output_dir + '/techdata.csv', f='csv')
tables.export(output_dir + '/techdata.json', f='json')
# print(table_list)
# Extract Basic details - part name & description, image, etc