Merge branch 'main' into dthomas_meilisearch

Adjust datasheet parsing for meilisearch, add dockerfile
add a module for using meilisearch
2024-02-20 10:04:33 -06:00 · 2024-02-17 23:08:21 -06:00 · 2024-02-17 22:46:11 -06:00 · 2024-02-17 22:45:30 -06:00 · 2024-02-17 20:31:43 -06:00 · 2024-02-17 20:25:51 -06:00
7 changed files with 96 additions and 11 deletions
--- a/11
+++ b/11
@@ -0,0 +1,11 @@
+FROM python:latest
+
+RUN apt-get update && apt-get install -y libgl1-mesa-glx ghostscript && apt-get clean && rm -rf /var/lib/apt/lists
+COPY . .
+#COPY config-server.yml config.yml
+RUN pip3 install -r requirements.txt
+
+CMD ["python3", "run.py"]
+EXPOSE 5000
+EXPOSE 8000
+EXPOSE 9000
--- a/compose.yml
+++ b/compose.yml
@@ -0,0 +1,13 @@
+services:
+  meilisearch:
+    image: "getmeili/meilisearch:v1.6.2"
+    ports:
+      - "7700:7700"
+    environment:
+      MEILI_MASTER_KEY: fluffybunnyrabbit
+      MEILI_NO_ANALYTICS: true
+    volumes:
+      - "meili_data:/meili_data"
+
+volumes:
+  meili_data:
--- a/get_specs.py
+++ b/get_specs.py
@@ -157,7 +157,7 @@ def get_multi(partnums):
            bar(skipped=True)
            fprint("Parsing Datasheet contents of " + partnum)
            bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
-            read_datasheet.parse(path, output_dir)
+            read_datasheet.parse(path, output_dir, partnum)
            bar(skipped=False)

        def __downloaded_datasheet(partnum, path, output_dir):
@@ -166,7 +166,7 @@ def get_multi(partnums):
            bar(skipped=False)
            fprint("Parsing Datasheet contents of " + partnum)
            bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
-            read_datasheet.parse(path, output_dir)
+            read_datasheet.parse(path, output_dir, partnum)
            bar(skipped=False)

        for partnum in partnums:
@@ -227,7 +227,7 @@ def get_multi(partnums):


 if __name__ == "__main__":
-    partnums = ["10GXS12", "RST 5L-RKT 5L-949", 
+    partnums = ["7958A", "10GXS12", "RST 5L-RKT 5L-949", 
 "10GXS13",
 "10GXW12",
 "10GXW13",
--- a/read_datasheet.py
+++ b/read_datasheet.py
@@ -9,8 +9,10 @@ from PIL import Image
 import io
 import json
 from util import fprint
+import uuid
+from util import run_cmd

-def parse(filename, output_dir):
+def parse(filename, output_dir, partnum):

    # Extract table data

@@ -163,18 +165,77 @@ def parse(filename, output_dir):

        previous_table = table_name
    
+    # remove multi-line values that occasionally squeak through
+    def replace_newlines_in_dict(d):
+        for key, value in d.items():
+            if isinstance(value, str):
+                # Replace \n with " " if the value is a string
+                d[key] = value.replace('\n', ' ')
+            elif isinstance(value, dict):
+                # Recursively call the function if the value is another dictionary
+                replace_newlines_in_dict(value)
+        return d
+    
+    tables = replace_newlines_in_dict(tables)

-    fprint(tables)
-    with open(output_dir + "/tables.json", 'w') as json_file:
-        json.dump(tables, json_file)
+    # summary
+
+    output_table = dict()
+    output_table["partnum"] = partnum
+    id = str(uuid.uuid4())
+    output_table["id"] = id
+    #output_table["position"] = id
+    #output_table["brand"] = brand
+    output_table["fullspecs"] = tables
+    output_table["searchspecs"] = {"partnum": partnum, **flatten(tables)}
+    
+    output_table["searchspecs"]["id"] = id
+    


+    print(output_table)
+
+    run_cmd("rm " + output_dir + "/*.json") # not reliable!
+    with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
+        json.dump(output_table["searchspecs"], json_file)
+
+    return output_table


+def flatten(tables):
+    def convert_to_number(s):
+        try:
+            # First, try converting to an integer.
+            return int(s)
+        except ValueError:
+            # If that fails, try converting to a float.
+            try:
+                return float(s)
+            except ValueError:
+                # If it fails again, return the original string.
+                return s
+    out = dict()
+    print("{")
+    for table in tables.keys():
+        for key in tables[table].keys():
+            if len(key) < 64:
+                keyname = key
+            else:
+                keyname = key[0:64]

-    return tables
+            fullkeyname = (table + ": " + keyname).replace(".","")
+            if type(tables[table][key]) is not tuple:
+                out[fullkeyname] = convert_to_number(tables[table][key])
+                print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
+            elif len(tables[table][key]) == 1:
+                out[fullkeyname] = convert_to_number(tables[table][key][0])
+                
+                print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
+
+    print("}")
+    return out

    

 if __name__ == "__main__":
-    parse("test2.pdf", "10GXS13")
+    parse("test2.pdf", "cables/10GXS13", "10GXS13")
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ pypdf2==2.12.1
 alive-progress
 requests
 git+https://github.com/Byeongdulee/python-urx.git
-psycopg2
+meilisearch
 pyyaml
 Flask
 selenium
--- a/search.py
+++ b/search.py
--- a/util.py
+++ b/util.py
@@ -123,7 +123,7 @@ class Logger(object):
        self.terminal = sys.stdout

    def write(self, message):
-        self.log.write(message)
+        #self.log.write(message)
        #close(filename)
        #self.log = open(filename, "a")
        try:
Author	SHA1	Message	Date
Dustin Thomas	eea8c9f5fa	Merge branch 'main' into dthomas_meilisearch	2024-02-20 10:04:33 -06:00
Cole Deck	fe5de4e54c	Adjust datasheet parsing for meilisearch, add dockerfile	2024-02-17 23:08:21 -06:00
Dustin Thomas	68b95bfe17	add a module for using meilisearch	2024-02-17 22:46:11 -06:00
Dustin Thomas	e3e9b855f9	add compose file with meilisearch image	2024-02-17 22:45:30 -06:00
Cole Deck	523915feb0	add partnum to parsing	2024-02-17 20:31:43 -06:00
Cole Deck	b5b2a936c1	Switch to binary dependency	2024-02-17 20:25:51 -06:00
Cole Deck	afd144bd32	FIx requirements.txt	2024-02-17 20:22:54 -06:00
Cole Deck	eb221a5206	Create main runner app, with async multithreading	2024-02-17 20:21:42 -06:00
Cole Deck	db7c8c4577	Add video abstraction class	2024-02-17 20:06:37 -06:00
BlueOceanWave	21b1bf7992	Added system exit	2024-02-16 20:36:43 -06:00
Cole Deck	d376dba67c	Add keyboard control scripts, add websocket server	2024-02-08 12:35:30 -06:00
Cole Deck	95631dbdbe	Add LED array mapping, load base config for tabletop rings, and image/video player mode	2024-01-25 20:06:10 -06:00
Cole Deck	9aef296763	More logging	2024-01-18 16:45:37 -06:00
Cole Deck	2b287492de	Make firefox start faster, verbose start	2024-01-18 16:41:57 -06:00
Cole Deck	d2a4d93590	Working network test	2024-01-18 16:35:02 -06:00
Cole Deck	58605dbe85	test server/client setup	2024-01-18 16:00:12 -06:00
Cole Deck	7bf3276ce9	Add basic windows control system. Needs VM functional to continue	2024-01-17 20:14:20 -06:00
Cole Deck	818688452b	Add local loading page	2024-01-17 16:46:20 -06:00
Cole Deck	01526524d4	Create main runner app, with async multithreading	2024-01-17 16:06:15 -06:00
Cole Deck	33671683ea	test rendering json in HTML table format	2024-01-17 09:23:06 -06:00
Cole Deck	fad885c610	Add UR5 control test, datasheet JSON output	2024-01-16 17:27:55 -06:00