Add cables to meilisearch db

This commit is contained in:
Cole Deck 2024-03-26 15:09:26 -05:00
parent 77fdc43fce
commit 82a52dea5a
4 changed files with 224 additions and 53 deletions

View File

@ -159,8 +159,8 @@ def touch(path):
def get_multi(partnums, delay=0.25, dir="cables/", cache=True): def get_multi(partnums, delay=0.25, dir="cables/", cache=True, bar=None):
with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic") as bar: #with alive_bar(len(partnums) * 2, dual_line=True, calibrate=30, bar="classic2", spinner="classic", disable=True, file=sys.stdout) as bar:
failed = list() failed = list()
actualpartnums = list() actualpartnums = list()
def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL def _try_download_datasheet(partnum, output_dir, dstype): # Guess datasheet URL
@ -188,7 +188,7 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
# and set chunk_size parameter to None. # and set chunk_size parameter to None.
#if chunk: #if chunk:
bartext = bartext + "." bartext = bartext + "."
bar.text = bartext # bar.text = bartext
f.write(chunk) f.write(chunk)
#fprint("") #fprint("")
return output_dir + "/datasheet.pdf" return output_dir + "/datasheet.pdf"
@ -217,7 +217,7 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
# and set chunk_size parameter to None. # and set chunk_size parameter to None.
#if chunk: #if chunk:
bartext = bartext + "." bartext = bartext + "."
bar.text = bartext # bar.text = bartext
f.write(chunk) f.write(chunk)
#fprint("") #fprint("")
return output_dir + "/datasheet.pdf" return output_dir + "/datasheet.pdf"
@ -244,7 +244,7 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
# and set chunk_size parameter to None. # and set chunk_size parameter to None.
#if chunk: #if chunk:
bartext = bartext + "." bartext = bartext + "."
bar.text = bartext # bar.text = bartext
f.write(chunk) f.write(chunk)
#fprint("") #fprint("")
return output_dir + "/part-hires." + url.split(".")[-1] return output_dir + "/part-hires." + url.split(".")[-1]
@ -255,29 +255,29 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
def __use_cached_datasheet(partnum, path, output_dir, dstype): def __use_cached_datasheet(partnum, path, output_dir, dstype):
fprint("Using cached datasheet for " + partnum) fprint("Using cached datasheet for " + partnum)
bar.text = "Using cached datasheet for " + partnum # bar.text = "Using cached datasheet for " + partnum
bar(skipped=True) # bar(skipped=True)
if not os.path.exists(output_dir + "/parsed"): if not os.path.exists(output_dir + "/parsed"):
fprint("Parsing Datasheet contents of " + partnum) fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
out = read_datasheet.parse(path, output_dir, partnum, dstype) out = read_datasheet.parse(path, output_dir, partnum, dstype)
bar(skipped=False) # bar(skipped=False)
return out return out
else: else:
fprint("Datasheet already parsed for " + partnum) fprint("Datasheet already parsed for " + partnum)
bar.text = "Datasheet already parsed for " + partnum + ".pdf" # bar.text = "Datasheet already parsed for " + partnum + ".pdf"
bar(skipped=True) # bar(skipped=True)
def __downloaded_datasheet(partnum, path, output_dir, dstype): def __downloaded_datasheet(partnum, path, output_dir, dstype):
fprint("Downloaded " + path) fprint("Downloaded " + path)
bar.text = "Downloaded " + path # bar.text = "Downloaded " + path
bar(skipped=False) # bar(skipped=False)
fprint("Parsing Datasheet contents of " + partnum) fprint("Parsing Datasheet contents of " + partnum)
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." # bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
out = read_datasheet.parse(path, output_dir, partnum, dstype) out = read_datasheet.parse(path, output_dir, partnum, dstype)
bar(skipped=False) # bar(skipped=False)
return out return out
def run_search(partnum): def run_search(partnum):
@ -290,7 +290,7 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
output_dir = dir + partnum output_dir = dir + partnum
path = output_dir + "/datasheet.pdf" path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum bartext = "Downloading files for part " + partnum
bar.text = bartext # bar.text = bartext
partnum = oldpartnum.replace("_","/") partnum = oldpartnum.replace("_","/")
returnval = [partnum, dstype, False, False] returnval = [partnum, dstype, False, False]
if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1) or not cache: if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1) or not cache:
@ -305,7 +305,7 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
output_dir = dir + partnum output_dir = dir + partnum
path = output_dir + "/datasheet.pdf" path = output_dir + "/datasheet.pdf"
bartext = "Downloading files for part " + partnum bartext = "Downloading files for part " + partnum
bar.text = bartext # bar.text = bartext
if not os.path.exists(output_dir + "/found_part_hires") or not cache: if not os.path.exists(output_dir + "/found_part_hires") or not cache:
if _download_image(search_result["image"], output_dir): if _download_image(search_result["image"], output_dir):
@ -373,19 +373,19 @@ def get_multi(partnums, delay=0.25, dir="cables/", cache=True):
time.sleep(delay) time.sleep(delay)
if not success: if not success:
fprint("Failed to download datasheet for part " + partnum) fprint("Failed to download datasheet for part " + partnum)
bar.text = "Failed to download datasheet for part " + partnum # bar.text = "Failed to download datasheet for part " + partnum
failed.append((partnum, dstype)) failed.append((partnum, dstype))
bar(skipped=True) # bar(skipped=True)
bar(skipped=True) # bar(skipped=True)
time.sleep(delay) time.sleep(delay)
if len(failed) > 0: if len(failed) > 0:
fprint("Failed to download:") fprint("Failed to download:")
for partnum in failed: for partnum in failed:
fprint(partnum[1] + " " + partnum[0]) fprint(partnum[1] + " " + partnum[0])
return False, actualpartnums # Go to manual review upload page return False, actualpartnums # Go to manual review upload page
else: else:
return True, actualpartnums # All cables downloaded; we are good to go return True, actualpartnums # All cables downloaded; we are good to go
@ -408,7 +408,7 @@ if __name__ == "__main__":
# ] # ]
partnums = [ partnums = [
# Actual cables in Jukebox # Actual cables in Jukebox
"BL3092A",
"AW86104CY", "AW86104CY",
"AW3050", "AW3050",
"AW6714", "AW6714",
@ -438,8 +438,9 @@ if __name__ == "__main__":
"BL6300FE 009Q", "BL6300FE 009Q",
"BLRA500P 006Q", "BLRA500P 006Q",
]
# Some ones I picked, including some invalid ones # Some ones I picked, including some invalid ones
a = [
"BL10GXS12", "BL10GXS12",
"BLRST%205L-RKT%205L-949", "BLRST%205L-RKT%205L-949",
"BL10GXS13", "BL10GXS13",

View File

@ -177,7 +177,7 @@ def parse(filename, output_dir, partnum, dstype):
if dstype == "Alphawire" and table_name_2.find("\n") >= 0: if dstype == "Alphawire" and table_name_2.find("\n") >= 0:
torename[table_name_2] = table_name_2[0:table_name_2.find("\n")] torename[table_name_2] = table_name_2[0:table_name_2.find("\n")]
if table_name_2.find(table.iloc[-1, 0]) >= 0: if dstype == "Alphawire" and table_name_2.find(table.iloc[-1, 0]) >= 0:
# Name taken from table directly above - this table does not have a name # Name taken from table directly above - this table does not have a name
torename[table_name_2] = "Specs " + str(len(tables)) torename[table_name_2] = "Specs " + str(len(tables))
#table_list["Specs " + str(len(tables))] = table_list[table_name_2] # rename table to arbitrary altername name #table_list["Specs " + str(len(tables))] = table_list[table_name_2] # rename table to arbitrary altername name
@ -251,9 +251,6 @@ def parse(filename, output_dir, partnum, dstype):
#fprint(table_name) #fprint(table_name)
#fprint(previous_table) #fprint(previous_table)
main_key = previous_table main_key = previous_table
cont_key = table_name cont_key = table_name
#fprint(tables) #fprint(tables)
@ -267,15 +264,21 @@ def parse(filename, output_dir, partnum, dstype):
del tables[table_name] del tables[table_name]
else: else:
#print(tables)
#print(main_key)
#print(cont_key)
for key in tables[cont_key].keys(): for key in tables[cont_key].keys():
tables[main_key][key] = tables[cont_key][key] tables[main_key][key] = tables[cont_key][key]
del tables[table_name] del tables[table_name]
else:
previous_table = table_name previous_table = table_name
else:
previous_table = table_name
# remove & rename tables # remove & rename tables
#print(torename)
for table_name in torename.keys(): for table_name in torename.keys():
tables[torename[table_name]] = tables[table_name] tables[torename[str(table_name)]] = tables[str(table_name)]
del tables[table_name] del tables[table_name]
# remove multi-line values that occasionally squeak through # remove multi-line values that occasionally squeak through
def replace_newlines_in_dict(d): def replace_newlines_in_dict(d):
@ -313,9 +316,9 @@ def parse(filename, output_dir, partnum, dstype):
for file_path in json_files: for file_path in json_files:
os.remove(file_path) os.remove(file_path)
#print(f"Deleted {file_path}") #print(f"Deleted {file_path}")
with open(output_dir + "/search_" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file: with open(output_dir + "/search.json", 'w') as json_file:
json.dump(output_table["searchspecs"], json_file) json.dump(output_table["searchspecs"], json_file)
with open(output_dir + "/specs_" + output_table["partnum"] + ".json", 'w') as json_file: with open(output_dir + "/specs.json", 'w') as json_file:
json.dump(output_table["fullspecs"], json_file) json.dump(output_table["fullspecs"], json_file)
#print(json.dumps(output_table, indent=2)) #print(json.dumps(output_table, indent=2))
@ -346,12 +349,20 @@ def flatten(tables):
fullkeyname = (table + ": " + keyname).replace(".","") fullkeyname = (table + ": " + keyname).replace(".","")
if type(tables[table][key]) is not tuple: if type(tables[table][key]) is not tuple:
out[fullkeyname] = convert_to_number(tables[table][key]) if len(tables[table][key]) > 0:
out[fullkeyname] = convert_to_number(tables[table][key])
#print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
elif len(tables[table][key]) == 1: elif len(tables[table][key]) == 1:
out[fullkeyname] = convert_to_number(tables[table][key][0]) if len(tables[table][key][0]) > 0:
out[fullkeyname] = convert_to_number(tables[table][key][0])
#print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") #print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
else:
tmp = []
for x in range(len(tables[table][key])):
if len(tables[table][key][x]) > 0:
tmp.append(tables[table][key][x].strip())
#out[fullkeyname + " " + str(x+1)] = convert_to_number(tables[table][key][x])
out[fullkeyname] = tmp
# if the item has at least two commas in it, split it # if the item has at least two commas in it, split it
if tables[table][key].count(',') > 0: if tables[table][key].count(',') > 0:
out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(","))) out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(",")))

166
run.py
View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from alive_progress import alive_bar
import get_specs import get_specs
import traceback import traceback
#import logging #import logging
@ -23,6 +24,8 @@ import server
import asyncio import asyncio
import json import json
import process_video import process_video
import search
from search import JukeboxSearch
@ -33,6 +36,7 @@ led_ready = False
camera_ready = False camera_ready = False
sensor_ready = False sensor_ready = False
vm_ready = False vm_ready = False
cable_search_ready = False
killme = None killme = None
#pool = None #pool = None
serverproc = None serverproc = None
@ -41,6 +45,13 @@ ledsys = None
arm = None arm = None
to_server_queue = Queue() to_server_queue = Queue()
from_server_queue = Queue() from_server_queue = Queue()
mode = "Startup"
counter = 0
jbs = None
scan_value = None
arm_state = None
cable_list = list()
parse_res = None
def arm_start_callback(res): def arm_start_callback(res):
global arm_ready global arm_ready
@ -55,6 +66,8 @@ def led_start_callback(res):
def camera_start_callback(res): def camera_start_callback(res):
global camera_ready global camera_ready
camera_ready = True camera_ready = True
global scan_value
scan_value = res
def sensor_start_callback(res): def sensor_start_callback(res):
global sensor_ready global sensor_ready
@ -64,6 +77,12 @@ def vm_start_callback(res):
global vm_ready global vm_ready
vm_ready = True vm_ready = True
def cable_search_callback(res):
global cable_search_ready
cable_search_ready = True
global parse_res
parse_res = res
def wait_for(val, name): def wait_for(val, name):
#global val #global val
if val is False: if val is False:
@ -236,6 +255,7 @@ def setup_server(pool):
global serverproc global serverproc
global camera global camera
global arm global arm
global jbs
arm = Rob(config) arm = Rob(config)
pool.apply_async(arm.init_arm, callback=arm_start_callback) pool.apply_async(arm.init_arm, callback=arm_start_callback)
global ledsys global ledsys
@ -245,6 +265,7 @@ def setup_server(pool):
serverproc = Process(target=start_server_socket) serverproc = Process(target=start_server_socket)
serverproc.start() serverproc.start()
if led_ready is False: if led_ready is False:
fprint("waiting for " + "LED controller initialization" + " to complete...", sendqueue=to_server_queue) fprint("waiting for " + "LED controller initialization" + " to complete...", sendqueue=to_server_queue)
while led_ready is False: while led_ready is False:
@ -260,7 +281,7 @@ def setup_server(pool):
if camera_ready is False: if camera_ready is False:
fprint("waiting for " + "Camera initilization" + " to complete...", sendqueue=to_server_queue) fprint("waiting for " + "Camera initilization" + " to complete...", sendqueue=to_server_queue)
# camera = process_video.qr_reader(config["cameras"]["banner"]["ip"], config["cameras"]["banner"]["port"]) camera = process_video.qr_reader(config["cameras"]["banner"]["ip"], config["cameras"]["banner"]["port"])
fprint("Camera initialized.", sendqueue=to_server_queue) fprint("Camera initialized.", sendqueue=to_server_queue)
@ -272,23 +293,156 @@ def setup_server(pool):
fprint("Arm initialized.", sendqueue=to_server_queue) fprint("Arm initialized.", sendqueue=to_server_queue)
jbs = JukeboxSearch()
return True return True
def mainloop_server(pool): def mainloop_server(pool):
# NON-blocking loop
global config global config
global counter global counter
global killme global killme
global mode
global jbs
global arm
global ledsys
global camera
global arm_ready
global arm_state
global camera_ready
global cable_search_ready
global cable_list
if killme.value > 0: if killme.value > 0:
killall() killall()
counter = counter + 1
# fprint("Looking for QR code...") if mode == "Startup":
# print(camera.read_qr(30)) counter = 54
if counter < 54:
# scanning cables
if arm_state is None:
#pool.apply_async(arm.get cable to camera, callback=arm_start_callback)
#ur5_control.goto_holder_index(arm)
#ur5 get item
# ur5 bring to camera
fprint("Getting cable index " + str(counter) + " and scanning...")
arm_state = "GET"
elif arm_ready and arm_state == "GET":
fprint("Looking for QR code...")
pool.apply_async(camera.read_qr, (30,), callback=camera_start_callback)
arm_ready = False
elif camera_ready:
fprint("Adding cable to list...")
global scan_value
if scan_value.find("bldn.app/") > -1:
scan_value = scan_value[scan_value.find("bldn.app/")+9:]
cable_list.append((counter, scan_value))
fprint(scan_value)
#pool.apply_async(arm.return cable, callback=arm_start_callback)
arm_state = "RETURN"
camera_ready = False
elif arm_ready and arm_state == "RETURN":
counter += 1
arm_state = None
else:
# just wait til arm/camera is ready
pass
else:
# scanned everything
tmp = list()
for cable in cable_list:
tmp.append(cable[1])
tmp = [
# Actual cables in Jukebox
"AW86104CY",
"AW3050",
"AW6714",
"AW1172C",
"AWFIT-221-1_4",
"BLTF-1LF-006-RS5",
"BLTF-SD9-006-RI5",
"BLTT-SLG-024-HTN",
"BLFISX012W0",
"BLFI4X012W0",
"BLSPE101",
"BLSPE102",
"BL7922A",
"BL7958A",
"BLIOP6U",
"BL10GXW13",
"BL10GXW53",
"BL29501F",
"BL29512",
"BL3106A",
"BL9841",
"BL3105A",
"BL3092A",
"BL8760",
"BL6300UE",
"BL6300FE",
"BLRA500P"
]
cable_list = tmp
pool.apply_async(get_specs.get_multi, (tmp, 0.5), callback=cable_search_callback)
mode = "Parsing"
fprint("All cables scanned. Finding & parsing datasheets...")
if mode == "Parsing":
# waiting for search & parse to complete
#cable_search_ready = True
if cable_search_ready is False:
pass
else:
# done
global parse_res
success, partnums = parse_res
#partnums = list()
# debug
#success = True
#cable_list = list(range(len(partnums)))
if success:
# easy mode
fprint("All cables inventoried and parsed.")
for x in range(len(cable_list)):
#cable_list[x] = (cable_list[x][0], partnums[x])
cable_list[x] = (x, cable_list[x])
fprint("Adding to database...")
for idx,partnum in cable_list:
with open("cables/" + partnum[2:] + "/search.json", "rb") as f:
searchdata = json.load(f)
searchdata["position"] = idx
with open("cables/" + partnum[2:] + "/specs.json", "rb") as f:
specs = json.load(f)
searchdata["fullspecs"] = specs
jbs.add_document(searchdata)
fprint("All cables added to database.")
mode = "Idle"
else:
# TODO: manual input
pass
if mode == "Idle":
# do nothing
if arm_ready is False:
pool.apply_async(ur5_control.move_to_home, (arm,), callback=arm_start_callback)
arm_ready = True
else:
# LED idle anim
pass
def run_loading_app(): def run_loading_app():

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python3
"""Interactions with the Meilisearch API for adding and searching cables.""" """Interactions with the Meilisearch API for adding and searching cables."""
from meilisearch import Client from meilisearch import Client
from meilisearch.task import TaskInfo from meilisearch.task import TaskInfo
from meilisearch.errors import MeilisearchApiError from meilisearch.errors import MeilisearchApiError
import json import time
DEFAULT_URL = "http://localhost:7700" DEFAULT_URL = "http://localhost:7700"
DEFAULT_APIKEY = "fluffybunnyrabbit" # I WOULD RECOMMEND SOMETHING MORE SECURE DEFAULT_APIKEY = "fluffybunnyrabbit" # I WOULD RECOMMEND SOMETHING MORE SECURE
@ -34,12 +36,15 @@ class JukeboxSearch:
# create the index if it does not exist already # create the index if it does not exist already
try: try:
self.client.get_index(self.index) self.client.get_index(self.index)
self.client.delete_index(self.index)
self.client.create_index(self.index)
except MeilisearchApiError as _: except MeilisearchApiError as _:
self.client.create_index(self.index) self.client.create_index(self.index)
# make a variable to easily reference the index # make a variable to easily reference the index
self.idxref = self.client.index(self.index) self.idxref = self.client.index(self.index)
time.sleep(0.05)
# update filterable attributes if needed # update filterable attributes if needed
self.idxref.update_distinct_attribute('partnum')
self.update_filterables(filterable_attrs) self.update_filterables(filterable_attrs)
def add_document(self, document: dict) -> TaskInfo: def add_document(self, document: dict) -> TaskInfo: