Fix get_specs

Update parsing and stuff
Adjust datasheet parsing for meilisearch, add dockerfile
2024-03-01 19:26:47 -06:00 · 2024-03-01 19:25:01 -06:00 · 2024-02-17 23:08:21 -06:00 · 2024-02-17 20:31:43 -06:00 · 2024-02-17 20:25:51 -06:00 · 2024-02-17 20:22:54 -06:00
8 changed files with 188 additions and 72 deletions
--- a/11
+++ b/11
@@ -0,0 +1,11 @@
+FROM python:latest
+
+RUN apt-get update && apt-get install -y libgl1-mesa-glx ghostscript && apt-get clean && rm -rf /var/lib/apt/lists
+COPY . .
+#COPY config-server.yml config.yml
+RUN pip3 install -r requirements.txt
+
+CMD ["python3", "run.py"]
+EXPOSE 5000
+EXPOSE 8000
+EXPOSE 9000
--- a/config.yml
+++ b/config.yml
@@ -32,13 +32,13 @@ led:
      ledstart: 288
      ledend: 431
      mode: rgb
-    - universe: 1
-      ip: 192.168.68.130
+    - universe: 4
+      ip: 192.168.5.40
      ledstart: 432
      ledend: 575
      mode: rgb
-    - universe: 4
-      ip: 192.168.68.131
+    - universe: 1
+      ip: 192.168.5.4
      ledstart: 576
      ledend: 719
      mode: rgb
--- a/get_specs.py
+++ b/get_specs.py
@@ -26,7 +26,7 @@ def check_internet(url='https://belden.com', timeout=5):
    


-def query_search(partnum):
+def query_search(partnum, source):
    """token_url = "https://www.belden.com/coveo/rest/token?t=" + str(int(time.time()))
    with requests.get(token_url) as r:
        out = json.loads(r.content)
@@ -49,15 +49,50 @@ def query_search(partnum):
    # Bash script uses some crazy json formatting that I could not figure out
    # Despite the fact that I wrote it
    # So I'll just leave it, becuase it works.
+    if source == "Belden":
+        command = ["./query-search.sh", partnum]
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        if result.returncode != 0: # error
+            fprint("No results found in search database for " + partnum + ". No hi-res part image available.", result.stderr)
+            return False
+        else:
+            data_out = json.loads(result.stdout)
+            return data_out
+    elif source == "Alphawire":
+        alphaurl = "https://www.alphawire.com//sxa/search/results/?l=en&s={4A774076-6068-460C-9CC6-A2D8E85E407F}&itemid={BF82F58C-EFD9-4D8B-AE3E-097DD12CF7DA}&sig=&autoFireSearch=true&productpartnumber=*" + partnum + "*&v={B22CD56D-AB95-4048-8AA1-5BBDF2F2D17F}&p=10&e=0&o=ProductPartNumber%2CAscending"
+        r = requests.get(url=alphaurl)
+        data = r.json()
+        output = dict()
+        #print(data)
+        try:
+            if data["Count"] > 0:
+                print(data["Results"][0]["Url"])
+                result = data["Results"][0]
+                if result["Url"].split("/")[-1] == partnum:
+                    #print(partnum)
+                    print(result["Html"])
+                    try:
+                        imgidx = result["Html"].index("<img src=") + 10
+                        imgidx2 = result["Html"].index("?", imgidx)
+                        output["image"] = result["Html"][imgidx:imgidx2]
+                        if output["image"].index("http") != 0:
+                            output["image"] = ""
+                            print("No cable image found.")
+                    except:
+                        print("No cable image found.")

-    command = ["./query-search.sh", partnum]
-    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    if result.returncode != 0: # error
-        fprint("No results found in search database for " + partnum + ". No hi-res part image available.", result.stderr)
+                    dsidx = result["Html"].index("<a href=\"/disteAPI/") + 9
+                    dsidx2 = result["Html"].index(partnum, dsidx) + len(partnum)
+                    output["datasheet"] = "https://www.alphawire.com" + result["Html"][dsidx:dsidx2]
+                    #"test".index()
+                    print(output)
+                    return output
+
+
+        except:
+            return False
        return False
-    else:
-        data_out = json.loads(result.stdout)
-        return data_out
+

 def touch(path):
    with open(path, 'a'):
@@ -126,7 +161,7 @@ def get_multi(partnums):
                sys.exit()


-        def _download_image(url, output_dir): # Download datasheet with known URL
+        def _download_image(url, output_dir): # Download image with known URL
            global bartext

            #fprint(url)
@@ -151,25 +186,31 @@ def get_multi(partnums):
                os.remove(partnum + "/datasheet.pdf")
                sys.exit()

-        def __use_cached_datasheet(partnum, path, output_dir):
+        def __use_cached_datasheet(partnum, path, output_dir, dstype):
            fprint("Using cached datasheet for " + partnum)
            bar.text = "Using cached datasheet for " + partnum
            bar(skipped=True)
            fprint("Parsing Datasheet contents of " + partnum)
            bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
-            read_datasheet.parse(path, output_dir)
+            read_datasheet.parse(path, output_dir, partnum, dstype)
            bar(skipped=False)

-        def __downloaded_datasheet(partnum, path, output_dir):
+        def __downloaded_datasheet(partnum, path, output_dir, dstype):
            fprint("Downloaded " + path)
            bar.text = "Downloaded " + path
            bar(skipped=False)
            fprint("Parsing Datasheet contents of " + partnum)
            bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
-            read_datasheet.parse(path, output_dir)
+            read_datasheet.parse(path, output_dir, partnum, dstype)
            bar(skipped=False)

-        for partnum in partnums:
+        for fullpartnum in partnums:
+            if fullpartnum[0:2] == "BL": # catalog.belden.com entry\
+                partnum = fullpartnum[2:]
+                dstype = "Belden"
+            elif fullpartnum[0:2] == "AW":
+                partnum = fullpartnum[2:]
+                dstype = "Alphawire"
            output_dir = "cables/" + partnum
            path = output_dir + "/datasheet.pdf"
            bartext = "Downloading files for part " + partnum
@@ -177,7 +218,7 @@ def get_multi(partnums):
            #
            if (not os.path.exists(output_dir + "/found_part_hires")) or not (os.path.exists(path) and os.path.getsize(path) > 1):
                # Use query
-                search_result = query_search(partnum.replace(" ", ""))
+                search_result = query_search(partnum.replace(" ", ""), dstype)
                # Try to use belden.com search
                if search_result is not False:
                    # Download high resolution part image if available and needed
@@ -190,17 +231,17 @@ def get_multi(partnums):

                    # Download datasheet from provided URL if needed
                    if os.path.exists(path) and os.path.getsize(path) > 1:
-                        __use_cached_datasheet(partnum, path, output_dir)
+                        __use_cached_datasheet(partnum, path, output_dir, dstype)

                    elif _download_datasheet(search_result["datasheet"], output_dir) is not False:
-                        __downloaded_datasheet(partnum, path, output_dir)
+                        __downloaded_datasheet(partnum, path, output_dir, dstype)
                
                elif os.path.exists(path) and os.path.getsize(path) > 1:
-                    __use_cached_datasheet(partnum, path, output_dir)
+                    __use_cached_datasheet(partnum, path, output_dir, dstype)
                
                # If search fails, and we don't already have the datasheet, guess datasheet URL and skip the hires image download
                elif _try_download_datasheet(partnum, output_dir) is not False:
-                    __downloaded_datasheet(partnum, path, output_dir)
+                    __downloaded_datasheet(partnum, path, output_dir, dstype)

                # Failed to download with search or guess :(
                else: 
@@ -213,7 +254,7 @@ def get_multi(partnums):
            # We already have a hi-res image and the datasheet - perfect!
            else:
                fprint("Using cached hi-res part image for " + partnum)
-                __use_cached_datasheet(partnum, path, output_dir)
+                __use_cached_datasheet(partnum, path, output_dir, dstype)
    
    if len(failed) > 0:
        fprint("Failed to download:")
@@ -227,21 +268,22 @@ def get_multi(partnums):


 if __name__ == "__main__":
-    partnums = ["10GXS12", "RST 5L-RKT 5L-949", 
-"10GXS13",
-"10GXW12",
-"10GXW13",
-"2412",
-"2413",
-"OSP6AU",
-"FI4D024P9",
-"FISD012R9",
-"FDSD012A9",
-"FSSL024NG",
-"FISX006W0",
-"FISX00103",
-"C6D1100007"
+    partnums = ["BL7958A", "BL10GXS12", "BLRST 5L-RKT 5L-949", 
+"BL10GXS13",
+"BL10GXW12",
+"BL10GXW13",
+"BL2412",
+"BL2413",
+"BLOSP6AU",
+"BLFI4D024P9",
+"BLFISD012R9",
+"BLFDSD012A9",
+"BLFSSL024NG",
+"BLFISX006W0",
+"BLFISX00103",
+"BLC6D1100007"
    ]
    get_multi(partnums)
+    #query_search("3248", "Alphawire")


--- a/led_control.py
+++ b/led_control.py
@@ -178,9 +178,9 @@ def init():
    sendall(data)
    #time.sleep(50000)    
    fprint("Running start-up test sequence...")
-    for y in range(1):
+    for y in range(100):
        for x in range(len(leds)):
-            setpixel(5,5,5,x)
+            setpixel(0,0,150,x)
        sendall(data)
        #time.sleep(2)
        #alloffsmooth()
@@ -290,7 +290,7 @@ def close():
    time.sleep(0.5)
    sender.stop()

-def mapimage(image, fps=60):
+def mapimage(image, fps=90):
    global start
    while uptime() - start < 1/fps:
        time.sleep(0.00001)
--- a/map.png
+++ b/map.png
--- a/read_datasheet.py
+++ b/read_datasheet.py
@@ -9,8 +9,10 @@ from PIL import Image
 import io
 import json
 from util import fprint
+import uuid
+from util import run_cmd

-def parse(filename, output_dir):
+def parse(filename, output_dir, partnum, dstype):

    # Extract table data

@@ -22,6 +24,7 @@ def parse(filename, output_dir):
    page = reader.pages[0]
    table_list = {}
    for table in tables:
+        table.df.infer_objects(copy=False)
        table.df.replace('', np.nan, inplace=True)
        table.df.dropna(inplace=True, how="all")
        table.df.dropna(inplace=True, axis="columns", how="all")
@@ -137,44 +140,104 @@ def parse(filename, output_dir):


        # multi-page table check
-        if table_name.isdigit() and len(tables) > 1:
-            fprint(table_name)
-            fprint(previous_table)
-            
-            
-            
-            
-            main_key = previous_table
-            cont_key = table_name
-            fprint(tables)
-            if vertical == False:
-                main_keys = list(tables[main_key].keys())
-                for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
-                    if i < len(main_keys):
-                        fprint(tables[main_key][main_keys[i]])
-                        tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
-
-                del tables[table_name]
-
-            else:
-                for key in tables[cont_key].keys():
-                    tables[main_key][key] = tables[cont_key][key]
-                del tables[table_name]
+        if dstype == "Belden":
+            if table_name.isdigit() and len(tables) > 1:
+                fprint(table_name)
+                fprint(previous_table)
+                
+                
+                
+                
+                main_key = previous_table
+                cont_key = table_name
+                fprint(tables)
+                if vertical == False:
+                    main_keys = list(tables[main_key].keys())
+                    for i, (cont_key, cont_values) in enumerate(tables[cont_key].items()):
+                        if i < len(main_keys):
+                            fprint(tables[main_key][main_keys[i]])
+                            tables[main_key][main_keys[i]] = (tables[main_key][main_keys[i]] + (cont_key,) + cont_values)
+    
+                    del tables[table_name]
+    
+                else:
+                    for key in tables[cont_key].keys():
+                        tables[main_key][key] = tables[cont_key][key]
+                    del tables[table_name]

        previous_table = table_name
    
+    # remove multi-line values that occasionally squeak through
+    def replace_newlines_in_dict(d):
+        for key, value in d.items():
+            if isinstance(value, str):
+                # Replace \n with " " if the value is a string
+                d[key] = value.replace('\n', ' ')
+            elif isinstance(value, dict):
+                # Recursively call the function if the value is another dictionary
+                replace_newlines_in_dict(value)
+        return d
+    
+    tables = replace_newlines_in_dict(tables)

-    fprint(tables)
-    with open(output_dir + "/tables.json", 'w') as json_file:
-        json.dump(tables, json_file)
+    # summary
+
+    output_table = dict()
+    output_table["partnum"] = partnum
+    id = str(uuid.uuid4())
+    output_table["id"] = id
+    #output_table["position"] = id
+    #output_table["brand"] = brand
+    output_table["fullspecs"] = tables
+    output_table["searchspecs"] = {"partnum": partnum, **flatten(tables)}
+    
+    output_table["searchspecs"]["id"] = id
+    


+    print(output_table)
+
+    run_cmd("rm " + output_dir + "/*.json") # not reliable!
+    with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
+        json.dump(output_table["searchspecs"], json_file)
+
+    return output_table


+def flatten(tables):
+    def convert_to_number(s):
+        try:
+            # First, try converting to an integer.
+            return int(s)
+        except ValueError:
+            # If that fails, try converting to a float.
+            try:
+                return float(s)
+            except ValueError:
+                # If it fails again, return the original string.
+                return s
+    out = dict()
+    print("{")
+    for table in tables.keys():
+        for key in tables[table].keys():
+            if len(key) < 64:
+                keyname = key
+            else:
+                keyname = key[0:64]

-    return tables
+            fullkeyname = (table + ": " + keyname).replace(".","")
+            if type(tables[table][key]) is not tuple:
+                out[fullkeyname] = convert_to_number(tables[table][key])
+                print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
+            elif len(tables[table][key]) == 1:
+                out[fullkeyname] = convert_to_number(tables[table][key][0])
+                
+                print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
+
+    print("}")
+    return out

    

 if __name__ == "__main__":
-    parse("test2.pdf", "10GXS13")
+    parse("test2.pdf", "cables/10GXS13", "10GXS13")
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ pypdf2==2.12.1
 alive-progress
 requests
 git+https://github.com/Byeongdulee/python-urx.git
-psycopg2
+psycopg2-binary
 pyyaml
 Flask
 selenium
--- a/util.py
+++ b/util.py
@@ -123,7 +123,7 @@ class Logger(object):
        self.terminal = sys.stdout

    def write(self, message):
-        self.log.write(message)
+        #self.log.write(message)
        #close(filename)
        #self.log = open(filename, "a")
        try:
Author	SHA1	Message	Date
Cole Deck	af6ffe451d	Fix get_specs	2024-03-01 19:26:47 -06:00
Cole Deck	50bf835d13	Update parsing and stuff	2024-03-01 19:25:01 -06:00
Cole Deck	fe5de4e54c	Adjust datasheet parsing for meilisearch, add dockerfile	2024-02-17 23:08:21 -06:00
Cole Deck	523915feb0	add partnum to parsing	2024-02-17 20:31:43 -06:00
Cole Deck	b5b2a936c1	Switch to binary dependency	2024-02-17 20:25:51 -06:00
Cole Deck	afd144bd32	FIx requirements.txt	2024-02-17 20:22:54 -06:00
Cole Deck	eb221a5206	Create main runner app, with async multithreading	2024-02-17 20:21:42 -06:00
Cole Deck	db7c8c4577	Add video abstraction class	2024-02-17 20:06:37 -06:00
BlueOceanWave	21b1bf7992	Added system exit	2024-02-16 20:36:43 -06:00
Cole Deck	d376dba67c	Add keyboard control scripts, add websocket server	2024-02-08 12:35:30 -06:00
Cole Deck	95631dbdbe	Add LED array mapping, load base config for tabletop rings, and image/video player mode	2024-01-25 20:06:10 -06:00
Cole Deck	9aef296763	More logging	2024-01-18 16:45:37 -06:00
Cole Deck	2b287492de	Make firefox start faster, verbose start	2024-01-18 16:41:57 -06:00
Cole Deck	d2a4d93590	Working network test	2024-01-18 16:35:02 -06:00
Cole Deck	58605dbe85	test server/client setup	2024-01-18 16:00:12 -06:00
Cole Deck	7bf3276ce9	Add basic windows control system. Needs VM functional to continue	2024-01-17 20:14:20 -06:00
Cole Deck	818688452b	Add local loading page	2024-01-17 16:46:20 -06:00
Cole Deck	01526524d4	Create main runner app, with async multithreading	2024-01-17 16:06:15 -06:00
Cole Deck	33671683ea	test rendering json in HTML table format	2024-01-17 09:23:06 -06:00
Cole Deck	fad885c610	Add UR5 control test, datasheet JSON output	2024-01-16 17:27:55 -06:00