Compare commits
25 Commits
dthomas-db
...
f12d8a8062
Author | SHA1 | Date | |
---|---|---|---|
f12d8a8062
|
|||
fc9ff4c8b2
|
|||
e903150fd4
|
|||
d0ea696274
|
|||
eea8c9f5fa
|
|||
fe5de4e54c | |||
68b95bfe17
|
|||
e3e9b855f9
|
|||
523915feb0 | |||
b5b2a936c1 | |||
afd144bd32 | |||
eb221a5206 | |||
db7c8c4577 | |||
21b1bf7992 | |||
d376dba67c | |||
95631dbdbe | |||
9aef296763 | |||
2b287492de | |||
d2a4d93590 | |||
58605dbe85 | |||
7bf3276ce9 | |||
818688452b | |||
01526524d4 | |||
33671683ea | |||
fad885c610 |
11
.gitignore
vendored
11
.gitignore
vendored
@ -1,9 +1,16 @@
|
|||||||
|
# python
|
||||||
venv
|
venv
|
||||||
__pycache__
|
__pycache__
|
||||||
|
# cable data folder(s)
|
||||||
cables
|
cables
|
||||||
|
cables-sample.zip
|
||||||
|
# meilisearch (mainly where I've put the data volume for the container)
|
||||||
|
meili_data
|
||||||
|
# IDE things
|
||||||
.vscode
|
.vscode
|
||||||
output.log
|
.idea
|
||||||
|
# videos
|
||||||
*.webm
|
*.webm
|
||||||
output.mp4
|
output.mp4
|
||||||
|
# log files
|
||||||
output.log
|
output.log
|
||||||
cables-sample.zip
|
|
||||||
|
11
Dockerfile
Normal file
11
Dockerfile
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
FROM python:latest
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y libgl1-mesa-glx ghostscript && apt-get clean && rm -rf /var/lib/apt/lists
|
||||||
|
COPY . .
|
||||||
|
#COPY config-server.yml config.yml
|
||||||
|
RUN pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
CMD ["python3", "run.py"]
|
||||||
|
EXPOSE 5000
|
||||||
|
EXPOSE 8000
|
||||||
|
EXPOSE 9000
|
13
compose.yml
Normal file
13
compose.yml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
services:
|
||||||
|
meilisearch:
|
||||||
|
image: "getmeili/meilisearch:v1.6.2"
|
||||||
|
ports:
|
||||||
|
- "7700:7700"
|
||||||
|
environment:
|
||||||
|
MEILI_MASTER_KEY: fluffybunnyrabbit
|
||||||
|
MEILI_NO_ANALYTICS: true
|
||||||
|
volumes:
|
||||||
|
- "meili_data:/meili_data"
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
meili_data:
|
@ -157,7 +157,7 @@ def get_multi(partnums):
|
|||||||
bar(skipped=True)
|
bar(skipped=True)
|
||||||
fprint("Parsing Datasheet contents of " + partnum)
|
fprint("Parsing Datasheet contents of " + partnum)
|
||||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
read_datasheet.parse(path, output_dir)
|
read_datasheet.parse(path, output_dir, partnum)
|
||||||
bar(skipped=False)
|
bar(skipped=False)
|
||||||
|
|
||||||
def __downloaded_datasheet(partnum, path, output_dir):
|
def __downloaded_datasheet(partnum, path, output_dir):
|
||||||
@ -166,7 +166,7 @@ def get_multi(partnums):
|
|||||||
bar(skipped=False)
|
bar(skipped=False)
|
||||||
fprint("Parsing Datasheet contents of " + partnum)
|
fprint("Parsing Datasheet contents of " + partnum)
|
||||||
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..."
|
||||||
read_datasheet.parse(path, output_dir)
|
read_datasheet.parse(path, output_dir, partnum)
|
||||||
bar(skipped=False)
|
bar(skipped=False)
|
||||||
|
|
||||||
for partnum in partnums:
|
for partnum in partnums:
|
||||||
@ -227,7 +227,7 @@ def get_multi(partnums):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
partnums = ["10GXS12", "RST 5L-RKT 5L-949",
|
partnums = ["7958A", "10GXS12", "RST 5L-RKT 5L-949",
|
||||||
"10GXS13",
|
"10GXS13",
|
||||||
"10GXW12",
|
"10GXW12",
|
||||||
"10GXW13",
|
"10GXW13",
|
||||||
|
@ -9,8 +9,10 @@ from PIL import Image
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
from util import fprint
|
from util import fprint
|
||||||
|
import uuid
|
||||||
|
from util import run_cmd
|
||||||
|
|
||||||
def parse(filename, output_dir):
|
def parse(filename, output_dir, partnum):
|
||||||
|
|
||||||
# Extract table data
|
# Extract table data
|
||||||
|
|
||||||
@ -163,18 +165,83 @@ def parse(filename, output_dir):
|
|||||||
|
|
||||||
previous_table = table_name
|
previous_table = table_name
|
||||||
|
|
||||||
|
# remove multi-line values that occasionally squeak through
|
||||||
|
def replace_newlines_in_dict(d):
|
||||||
|
for key, value in d.items():
|
||||||
|
if isinstance(value, str):
|
||||||
|
# Replace \n with " " if the value is a string
|
||||||
|
d[key] = value.replace('\n', ' ')
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
# Recursively call the function if the value is another dictionary
|
||||||
|
replace_newlines_in_dict(value)
|
||||||
|
return d
|
||||||
|
|
||||||
fprint(tables)
|
tables = replace_newlines_in_dict(tables)
|
||||||
with open(output_dir + "/tables.json", 'w') as json_file:
|
|
||||||
json.dump(tables, json_file)
|
# summary
|
||||||
|
|
||||||
|
output_table = dict()
|
||||||
|
output_table["partnum"] = partnum
|
||||||
|
id = str(uuid.uuid4())
|
||||||
|
output_table["id"] = id
|
||||||
|
#output_table["position"] = id
|
||||||
|
#output_table["brand"] = brand
|
||||||
|
output_table["fullspecs"] = tables
|
||||||
|
output_table["searchspecs"] = {"partnum": partnum, **flatten(tables)}
|
||||||
|
|
||||||
|
output_table["searchspecs"]["id"] = id
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print(output_table)
|
||||||
|
|
||||||
|
run_cmd("rm " + output_dir + "/*.json") # not reliable!
|
||||||
|
with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file:
|
||||||
|
json.dump(output_table["searchspecs"], json_file)
|
||||||
|
|
||||||
|
return output_table
|
||||||
|
|
||||||
|
|
||||||
return tables
|
def flatten(tables):
|
||||||
|
def convert_to_number(s):
|
||||||
|
try:
|
||||||
|
# First, try converting to an integer.
|
||||||
|
return int(s)
|
||||||
|
except ValueError:
|
||||||
|
# If that fails, try converting to a float.
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
# If it fails again, return the original string.
|
||||||
|
return s
|
||||||
|
out = dict()
|
||||||
|
print("{")
|
||||||
|
for table in tables.keys():
|
||||||
|
for key in tables[table].keys():
|
||||||
|
if len(key) < 64:
|
||||||
|
keyname = key
|
||||||
|
else:
|
||||||
|
keyname = key[0:64]
|
||||||
|
|
||||||
|
fullkeyname = (table + ": " + keyname).replace(".","")
|
||||||
|
if type(tables[table][key]) is not tuple:
|
||||||
|
out[fullkeyname] = convert_to_number(tables[table][key])
|
||||||
|
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||||
|
elif len(tables[table][key]) == 1:
|
||||||
|
out[fullkeyname] = convert_to_number(tables[table][key][0])
|
||||||
|
|
||||||
|
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||||
|
|
||||||
|
# if the item has at least two commas in it, split it
|
||||||
|
if tables[table][key].count(',') >= 2:
|
||||||
|
out[fullkeyname] = map(lambda x: x.strip(), tables[table][key].split(","))
|
||||||
|
print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",")
|
||||||
|
|
||||||
|
|
||||||
|
print("}")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parse("test2.pdf", "10GXS13")
|
parse("test2.pdf", "cables/10GXS13", "10GXS13")
|
@ -5,7 +5,7 @@ pypdf2==2.12.1
|
|||||||
alive-progress
|
alive-progress
|
||||||
requests
|
requests
|
||||||
git+https://github.com/Byeongdulee/python-urx.git
|
git+https://github.com/Byeongdulee/python-urx.git
|
||||||
psycopg2
|
meilisearch
|
||||||
pyyaml
|
pyyaml
|
||||||
Flask
|
Flask
|
||||||
selenium
|
selenium
|
||||||
|
49
search.py
Normal file
49
search.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
"""Interactions with the Meilisearch API for adding and searching cables."""
|
||||||
|
from meilisearch import Client
|
||||||
|
from meilisearch.task import TaskInfo
|
||||||
|
import json
|
||||||
|
|
||||||
|
DEFAULT_URL = "http://localhost:7700"
|
||||||
|
DEFAULT_APIKEY = "fluffybunnyrabbit" # I WOULD RECOMMEND SOMETHING MORE SECURE
|
||||||
|
DEFAULT_INDEX = "cables"
|
||||||
|
|
||||||
|
|
||||||
|
class JukeboxSearch:
|
||||||
|
"""Class for interacting with the Meilisearch API."""
|
||||||
|
def __init__(self, url: str = None, api_key: str = None, index: str = None):
|
||||||
|
"""Connect to Meilisearch and perform first-run tasks as necessary.
|
||||||
|
|
||||||
|
:param url: Address of the Meilisearch server. Defaults to ``http://localhost:7700`` if unspecified.
|
||||||
|
:param api_key: API key used to authenticate with Meilisearch. It is highly recommended to set this as something
|
||||||
|
secure if you can access this endpoint publicly, but you can ignore this and set Meilisearch's default API key
|
||||||
|
to ``fluffybunnyrabbit``.
|
||||||
|
:param index: The name of the index to configure. Defaults to ``cables`` if unspecified."""
|
||||||
|
# connect to Meilisearch
|
||||||
|
url = url or DEFAULT_URL
|
||||||
|
api_key = api_key or DEFAULT_APIKEY
|
||||||
|
self.index = index or DEFAULT_INDEX
|
||||||
|
self.client = Client(url, api_key)
|
||||||
|
# create the index if it does not exist already
|
||||||
|
if self.client.get_index(self.index) is None:
|
||||||
|
self.client.create_index(self.index)
|
||||||
|
|
||||||
|
def add_document(self, document: dict) -> TaskInfo:
|
||||||
|
"""Add a cable to the Meilisearch index.
|
||||||
|
|
||||||
|
:param document: Dictionary containing all the cable data.
|
||||||
|
:returns: A TaskInfo object for the addition of the new document."""
|
||||||
|
return self.client.index(self.index).add_documents(document)
|
||||||
|
|
||||||
|
def add_documents(self, documents: list):
|
||||||
|
"""Add a list of cables to the Meilisearch index.
|
||||||
|
|
||||||
|
:param documents: List of dictionaries containing all the cable data.
|
||||||
|
:returns: A TaskInfo object for the last new document."""
|
||||||
|
taskinfo = None
|
||||||
|
for i in documents:
|
||||||
|
taskinfo = self.add_document(i)
|
||||||
|
return taskinfo
|
||||||
|
|
||||||
|
def query(self):
|
||||||
|
"""Execute a search query on the Meilisearch index."""
|
||||||
|
pass
|
2
util.py
2
util.py
@ -123,7 +123,7 @@ class Logger(object):
|
|||||||
self.terminal = sys.stdout
|
self.terminal = sys.stdout
|
||||||
|
|
||||||
def write(self, message):
|
def write(self, message):
|
||||||
self.log.write(message)
|
#self.log.write(message)
|
||||||
#close(filename)
|
#close(filename)
|
||||||
#self.log = open(filename, "a")
|
#self.log = open(filename, "a")
|
||||||
try:
|
try:
|
||||||
|
Reference in New Issue
Block a user