Adjust datasheet parsing for meilisearch, add dockerfile

This commit is contained in:
Cole Deck 2024-02-17 23:08:21 -06:00
parent 523915feb0
commit fe5de4e54c
4 changed files with 76 additions and 8 deletions

11
Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM python:latest
RUN apt-get update && apt-get install -y libgl1-mesa-glx ghostscript && apt-get clean && rm -rf /var/lib/apt/lists
COPY . .
#COPY config-server.yml config.yml
RUN pip3 install -r requirements.txt
CMD ["python3", "run.py"]
EXPOSE 5000
EXPOSE 8000
EXPOSE 9000

View File

@ -227,7 +227,7 @@ def get_multi(partnums):
if __name__ == "__main__":
partnums = ["10GXS12", "RST 5L-RKT 5L-949",
partnums = ["7958A", "10GXS12", "RST 5L-RKT 5L-949",
"10GXS13",
"10GXW12",
"10GXW13",

View File

@ -9,6 +9,8 @@ from PIL import Image
import io
import json
from util import fprint
import uuid
from util import run_cmd
def parse(filename, output_dir, partnum):
@ -163,20 +165,75 @@ def parse(filename, output_dir, partnum):
previous_table = table_name
# remove multi-line values that occasionally squeak through
def replace_newlines_in_dict(d):
for key, value in d.items():
if isinstance(value, str):
# Replace \n with " " if the value is a string
d[key] = value.replace('\n', ' ')
elif isinstance(value, dict):
# Recursively call the function if the value is another dictionary
replace_newlines_in_dict(value)
return d
fprint(tables)
tables = replace_newlines_in_dict(tables)
# summary
tables["partnum"] = partnum
with open(output_dir + "/tables.json", 'w') as json_file:
json.dump(tables, json_file)
output_table = dict()
output_table["partnum"] = partnum
id = str(uuid.uuid4())
output_table["id"] = id
#output_table["position"] = id
#output_table["brand"] = brand
output_table["fullspecs"] = tables
output_table["searchspecs"] = {"partnum": partnum, **flatten(tables)}
output_table["searchspecs"]["id"] = id
print(output_table)
run_cmd("rm " + output_dir + "/*.json") # not reliable!
with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
json.dump(output_table["searchspecs"], json_file)
return output_table
return tables
def flatten(tables):
def convert_to_number(s):
try:
# First, try converting to an integer.
return int(s)
except ValueError:
# If that fails, try converting to a float.
try:
return float(s)
except ValueError:
# If it fails again, return the original string.
return s
out = dict()
print("{")
for table in tables.keys():
for key in tables[table].keys():
if len(key) < 64:
keyname = key
else:
keyname = key[0:64]
fullkeyname = (table + ": " + keyname).replace(".","")
if type(tables[table][key]) is not tuple:
out[fullkeyname] = convert_to_number(tables[table][key])
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
elif len(tables[table][key]) == 1:
out[fullkeyname] = convert_to_number(tables[table][key][0])
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
print("}")
return out

View File

@ -123,7 +123,7 @@ class Logger(object):
self.terminal = sys.stdout
def write(self, message):
self.log.write(message)
#self.log.write(message)
#close(filename)
#self.log = open(filename, "a")
try: