jukebox-software/get_specs.py

248 lines
10 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import read_datasheet
from alive_progress import alive_bar
import requests
#import time
import json
import subprocess
from util import fprint
bartext = ""
failed = []
def check_internet(url='https://belden.com', timeout=5):
try:
# Make a GET request to the specified URL
response = requests.get(url, timeout=timeout)
# If the request succeeds, return True
return True
except requests.ConnectionError:
# If a connection error occurs, return False
return False
def query_search(partnum):
"""token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
with requests.get(token_url) as r:
out = json.loads(r.content)
token = out["token"]
search_url = "https://www.belden.com/coveo/rest/search"
search_data ='{ "q": "' + str(partnum) + '", "sortCriteria": "relevancy", "numberOfResults": "250", "sortCriteria": "@catalogitemwebdisplaypriority ascending", "searchHub": "products-only-search", "pipeline": "Site Search", "maximumAge": "900000", "tab": "products-search", "locale": "en" }'
#"aq": "", "cq": "((@z95xlanguage==en) (@z95xlatestversion==1) (@source==\\"Coveo_web_index - rg-nc-prod-sitecore-prod\\")) OR (@source==(\\"website_001002_catalog_index-rg-nc-prod-sitecore-prod\\",\\"website_001002_Category_index-rg-nc-prod-sitecore-prod\\"))", "firstResult": "0", "categoryFacets": "[{\\"field\\":\\"@catalogitemcategories\\",\\"path\\":[],\\"injectionDepth\\":1000,\\"maximumNumberOfValues\\":6,\\"delimitingCharacter\\":\\"|\\"}]", "facetOptions": "{}", "groupBy": "" }'
#fprint(search_data)
fprint(json.loads(search_data))
#search_data = '{ "q": "' + str(partnum) + '" }'
fprint(search_data)
headers = headers = {
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
}
with requests.post(search_url, headers=headers, data=search_data) as r:
fprint(r.text)"""
# TODO: Reimplement in python
# Bash script uses some crazy json formatting that I could not figure out
# Despite the fact that I wrote it
# So I'll just leave it, becuase it works.
command = ["./query-search.sh", partnum]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0: # error
fprint("No results found in search database for " + partnum + ". No hi-res part image available.", result.stderr)
return False
else:
data_out = json.loads(result.stdout)
return data_out
def touch(path):
with open(path, 'a'):
os.utime(path, None)
def get_multi(partnums):
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar:
def _try_download_datasheet(partnum, output_dir): # Guess datasheet URL
global bartext
sanitized_name = partnum.replace(" ", "")
url = "https://catalog.belden.com/techdata/EN/" + sanitized_name + "_techdata.pdf"
#fprint(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.headers.get("Content-Type") != "application/pdf":
return False
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/datasheet.pdf", 'wb') as f:
for chunk in r.iter_content(chunk_size=131072):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
bar.text = bartext
f.write(chunk)
#fprint("")
return output_dir + "/datasheet.pdf"
except KeyboardInterrupt:
fprint("Quitting!")
os.remove(output_dir + "/datasheet.pdf")
sys.exit()
def _download_datasheet(url, output_dir): # Download datasheet with known URL
global bartext
#fprint(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.headers.get("Content-Type") != "application/pdf":
return False
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/datasheet.pdf", 'wb') as f:
for chunk in r.iter_content(chunk_size=131072):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
bar.text = bartext
f.write(chunk)
#fprint("")
return output_dir + "/datasheet.pdf"
except KeyboardInterrupt:
fprint("Quitting!")
os.remove(output_dir + "/datasheet.pdf")
sys.exit()
def _download_image(url, output_dir): # Download datasheet with known URL
global bartext
#fprint(url)
try:
with requests.get(url, stream=True) as r:
#r.raise_for_status()
if r.status_code == 404:
return False
os.makedirs(output_dir, exist_ok=True)
with open(output_dir + "/part-hires." + url.split(".")[-1], 'wb') as f:
for chunk in r.iter_content(chunk_size=131072):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
bartext = bartext + "."
bar.text = bartext
f.write(chunk)
#fprint("")
return output_dir + "/part-hires." + url.split(".")[-1]
except KeyboardInterrupt:
fprint("Quitting!")
os.remove(partnum + "/datasheet.pdf")
sys.exit()
def __use_cached_datasheet(partnum, path, output_dir):
fprint("Using cached datasheet for " + partnum)
bar.text = "Using cached datasheet for " + partnum
bar(skipped=True)
fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir, partnum)
bar(skipped=False)
def __downloaded_datasheet(partnum, path, output_dir):
fprint("Downloaded " + path)
bar.text = "Downloaded " + path
bar(skipped=False)
fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
read_datasheet.parse(path, output_dir, partnum)
bar(skipped=False)
for partnum in partnums:
output_dir = "cables/" + partnum
path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum
bar.text = bartext
#
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
# Use query
search_result = query_search(partnum.replace(" ", ""))
# Try to use belden.com search
if search_result is not False:
# Download high resolution part image if available and needed
if not os.path.exists(output_dir + "/found_part_hires"):
if _download_image(search_result["image"], output_dir):
fprint("Downloaded hi-res part image for " + partnum)
touch(output_dir + "/found_part_hires")
else:
fprint("Using cached hi-res part image for " + partnum)
# Download datasheet from provided URL if needed
if os.path.exists(path) and os.path.getsize(path) > 1:
__use_cached_datasheet(partnum, path, output_dir)
elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
__downloaded_datasheet(partnum, path, output_dir)
elif os.path.exists(path) and os.path.getsize(path) > 1:
__use_cached_datasheet(partnum, path, output_dir)
# If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
elif _try_download_datasheet(partnum, output_dir) is not False:
__downloaded_datasheet(partnum, path, output_dir)
# Failed to download with search or guess :(
else:
fprint("Failed to download datasheet for part " + partnum)
bar.text = "Failed to download datasheet for part " + partnum
failed.append(partnum)
bar(skipped=True)
bar(skipped=True)
# We already have a hi-res image and the datasheet - perfect!
else:
fprint("Using cached hi-res part image for " + partnum)
__use_cached_datasheet(partnum, path, output_dir)
if len(failed) > 0:
fprint("Failed to download:")
for partnum in failed:
fprint(partnum)
return False # Go to manual review upload page
else:
return True # All cables downloaded; we are good to go
if __name__ == "__main__":
partnums = ["7958A", "10GXS12", "RST 5L-RKT 5L-949",
"10GXS13",
"10GXW12",
"10GXW13",
"2412",
"2413",
"OSP6AU",
"FI4D024P9",
"FISD012R9",
"FDSD012A9",
"FSSL024NG",
"FISX006W0",
"FISX00103",
"C6D1100007"
]
get_multi(partnums)