From e3e9b855f933a4cc59bcb76215d32e2bea06a05e Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Sat, 17 Feb 2024 22:45:30 -0600 Subject: [PATCH 01/11] add compose file with meilisearch image --- compose.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 compose.yml diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..51434a5 --- /dev/null +++ b/compose.yml @@ -0,0 +1,13 @@ +services: + meilisearch: + image: "getmeili/meilisearch:v1.6.2" + ports: + - "7700:7700" + environment: + MEILI_MASTER_KEY: fluffybunnyrabbit + MEILI_NO_ANALYTICS: true + volumes: + - "meili_data:/meili_data" + +volumes: + meili_data: \ No newline at end of file From 68b95bfe176950d7e0eae42e4e1eecbbf7430034 Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Sat, 17 Feb 2024 22:46:11 -0600 Subject: [PATCH 02/11] add a module for using meilisearch --- requirements.txt | 2 +- search.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 search.py diff --git a/requirements.txt b/requirements.txt index 76fc407..2ab841f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ pypdf2==2.12.1 alive-progress requests git+https://github.com/Byeongdulee/python-urx.git -psycopg2-binary +meilisearch pyyaml Flask selenium diff --git a/search.py b/search.py new file mode 100644 index 0000000..e69de29 From d0ea6962740695448362b3549e52db2823f0756a Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Tue, 20 Feb 2024 10:15:56 -0600 Subject: [PATCH 03/11] reorganize gitignore and add comments --- .gitignore | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9261e82..a21f53b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,16 @@ +# python venv __pycache__ +# cable data folder(s) cables +cables-sample.zip +# meilisearch (mainly where I've put the data volume for the container) +meili_data +# IDE things .vscode -output.log +.idea +# videos *.webm output.mp4 +# log files output.log -cables-sample.zip From e903150fd4164d07ea4af5edd60deab952bbbfb9 Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Tue, 20 Feb 2024 10:33:01 -0600 Subject: [PATCH 04/11] Add functions for connecting to Meilisearch and adding documents --- search.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/search.py b/search.py index e69de29..b45934a 100644 --- a/search.py +++ b/search.py @@ -0,0 +1,49 @@ +"""Interactions with the Meilisearch API for adding and searching cables.""" +from meilisearch import Client +from meilisearch.task import TaskInfo +import json + +DEFAULT_URL = "http://localhost:7700" +DEFAULT_APIKEY = "fluffybunnyrabbit" # I WOULD RECOMMEND SOMETHING MORE SECURE +DEFAULT_INDEX = "cables" + + +class JukeboxSearch: + """Class for interacting with the Meilisearch API.""" + def __init__(self, url: str = None, api_key: str = None, index: str = None): + """Connect to Meilisearch and perform first-run tasks as necessary. + + :param url: Address of the Meilisearch server. Defaults to ``http://localhost:7700`` if unspecified. + :param api_key: API key used to authenticate with Meilisearch. It is highly recommended to set this as something + secure if you can access this endpoint publicly, but you can ignore this and set Meilisearch's default API key + to ``fluffybunnyrabbit``. + :param index: The name of the index to configure. Defaults to ``cables`` if unspecified.""" + # connect to Meilisearch + url = url or DEFAULT_URL + api_key = api_key or DEFAULT_APIKEY + self.index = index or DEFAULT_INDEX + self.client = Client(url, api_key) + # create the index if it does not exist already + if self.client.get_index(self.index) is None: + self.client.create_index(self.index) + + def add_document(self, document: dict) -> TaskInfo: + """Add a cable to the Meilisearch index. + + :param document: Dictionary containing all the cable data. + :returns: A TaskInfo object for the addition of the new document.""" + return self.client.index(self.index).add_documents(document) + + def add_documents(self, documents: list): + """Add a list of cables to the Meilisearch index. + + :param documents: List of dictionaries containing all the cable data. + :returns: A TaskInfo object for the last new document.""" + taskinfo = None + for i in documents: + taskinfo = self.add_document(i) + return taskinfo + + def query(self): + """Execute a search query on the Meilisearch index.""" + pass From fc9ff4c8b268acfa52f8a6fc39f920848e0186fd Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Fri, 1 Mar 2024 19:13:28 -0600 Subject: [PATCH 05/11] split lists if they contain more than 2 commas --- read_datasheet.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/read_datasheet.py b/read_datasheet.py index 729c8a9..6b3273d 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -232,6 +232,10 @@ def flatten(tables): print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") + # if the item has at least two commas in it, split it + if tables[table][key].count(',') >= 2: + out[fullkeyname] = map(lambda x: x.strip(), tables[table][key].split(",")) + print("}") return out From f12d8a8062c39545783a8c78472caba0abf00bf9 Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Fri, 1 Mar 2024 19:24:47 -0600 Subject: [PATCH 06/11] add print statement --- read_datasheet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/read_datasheet.py b/read_datasheet.py index 6b3273d..5bec00c 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -235,6 +235,8 @@ def flatten(tables): # if the item has at least two commas in it, split it if tables[table][key].count(',') >= 2: out[fullkeyname] = map(lambda x: x.strip(), tables[table][key].split(",")) + print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") + print("}") return out From 6edd0b4ef0f888074ea58c3c5314d5e49eb2994c Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Fri, 1 Mar 2024 20:37:02 -0600 Subject: [PATCH 07/11] fix map datatype --- read_datasheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read_datasheet.py b/read_datasheet.py index f19d30f..d22a51c 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -236,7 +236,7 @@ def flatten(tables): # if the item has at least two commas in it, split it if tables[table][key].count(',') >= 2: - out[fullkeyname] = map(lambda x: x.strip(), tables[table][key].split(",")) + out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(","))) print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") From 4561b1c1a3a98ff6c3384694cbc33ee8cc30308a Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Fri, 1 Mar 2024 20:37:22 -0600 Subject: [PATCH 08/11] fix error when index does not exist --- search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/search.py b/search.py index b45934a..c962b86 100644 --- a/search.py +++ b/search.py @@ -1,6 +1,7 @@ """Interactions with the Meilisearch API for adding and searching cables.""" from meilisearch import Client from meilisearch.task import TaskInfo +from meilisearch.errors import MeilisearchApiError import json DEFAULT_URL = "http://localhost:7700" @@ -24,7 +25,9 @@ class JukeboxSearch: self.index = index or DEFAULT_INDEX self.client = Client(url, api_key) # create the index if it does not exist already - if self.client.get_index(self.index) is None: + try: + self.client.get_index(self.index) + except MeilisearchApiError as _: self.client.create_index(self.index) def add_document(self, document: dict) -> TaskInfo: From aadb6ba24d7964cc996bfb5046e2917575cbad6d Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Fri, 1 Mar 2024 21:24:37 -0600 Subject: [PATCH 09/11] add search functions to JukeboxSearch --- read_datasheet.py | 2 +- search.py | 60 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/read_datasheet.py b/read_datasheet.py index d22a51c..0517608 100755 --- a/read_datasheet.py +++ b/read_datasheet.py @@ -235,7 +235,7 @@ def flatten(tables): print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") # if the item has at least two commas in it, split it - if tables[table][key].count(',') >= 2: + if tables[table][key].count(',') > 0: out[fullkeyname] = list(map(lambda x: x.strip(), tables[table][key].split(","))) print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") diff --git a/search.py b/search.py index c962b86..88aef38 100644 --- a/search.py +++ b/search.py @@ -7,21 +7,28 @@ import json DEFAULT_URL = "http://localhost:7700" DEFAULT_APIKEY = "fluffybunnyrabbit" # I WOULD RECOMMEND SOMETHING MORE SECURE DEFAULT_INDEX = "cables" +DEFAULT_FILTERABLE_ATTRS = ["partnum", "uuid", "position"] # default filterable attributes class JukeboxSearch: """Class for interacting with the Meilisearch API.""" - def __init__(self, url: str = None, api_key: str = None, index: str = None): + def __init__(self, + url: str = None, + api_key: str = None, + index: str = None, + filterable_attrs: list = None): """Connect to Meilisearch and perform first-run tasks as necessary. :param url: Address of the Meilisearch server. Defaults to ``http://localhost:7700`` if unspecified. :param api_key: API key used to authenticate with Meilisearch. It is highly recommended to set this as something secure if you can access this endpoint publicly, but you can ignore this and set Meilisearch's default API key to ``fluffybunnyrabbit``. - :param index: The name of the index to configure. Defaults to ``cables`` if unspecified.""" + :param index: The name of the index to configure. Defaults to ``cables`` if unspecified. + :param filterable_attrs: List of all the attributes we want to filter by.""" # connect to Meilisearch url = url or DEFAULT_URL api_key = api_key or DEFAULT_APIKEY + filterable_attrs = filterable_attrs or DEFAULT_FILTERABLE_ATTRS self.index = index or DEFAULT_INDEX self.client = Client(url, api_key) # create the index if it does not exist already @@ -29,13 +36,17 @@ class JukeboxSearch: self.client.get_index(self.index) except MeilisearchApiError as _: self.client.create_index(self.index) + # make a variable to easily reference the index + self.idxref = self.client.index(self.index) + + self.idxref.update_filterable_attributes(filterable_attrs) def add_document(self, document: dict) -> TaskInfo: """Add a cable to the Meilisearch index. :param document: Dictionary containing all the cable data. :returns: A TaskInfo object for the addition of the new document.""" - return self.client.index(self.index).add_documents(document) + return self.idxref.add_documents(document) def add_documents(self, documents: list): """Add a list of cables to the Meilisearch index. @@ -47,6 +58,43 @@ class JukeboxSearch: taskinfo = self.add_document(i) return taskinfo - def query(self): - """Execute a search query on the Meilisearch index.""" - pass + def search(self, query: str, filters: str = None): + """Execute a search query on the Meilisearch index. + + :param query: Seach query + :param filters: A meilisearch compatible filter statement. + :returns: The search results dict. Actual results are in a list under "hits", but there are other nice values that are useful in the root element.""" + if filters: + q = self.idxref.search(query, {"filter": filters}) + else: + q = self.idxref.search(query) + return q + + def _filter_one(self, filter: str): + """Get the first item to match a filter. + + :param filter: A meilisearch compatible filter statement. + :returns: A dict containing the results; If no results found, an empty dict.""" + q = self.search("", filter) + if q["estimatedTotalHits"] != 0: + return ["hits"][0] + else: + return dict() + + def get_position(self, position: str): + """Get a part by position. + + :param partnum: The position to search for.""" + return self._filter_one(f"position = {position}") + + def get_uuid(self, uuid: str): + """Get a specific UUID. + + :param uuid: The UUID to search for.""" + return self._filter_one(f"uuid = {uuid}") + + def get_partnum(self, partnum: str): + """Get a specific part number. + + :param partnum: The part number to search for.""" + return self._filter_one(f"partnum = {partnum}") From b18355fc14b989d5452745d64b0f9d87e4d0b3f2 Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Fri, 8 Mar 2024 19:12:41 -0600 Subject: [PATCH 10/11] nuke database.py --- database.py | 140 ---------------------------------------------------- 1 file changed, 140 deletions(-) delete mode 100644 database.py diff --git a/database.py b/database.py deleted file mode 100644 index 2befec2..0000000 --- a/database.py +++ /dev/null @@ -1,140 +0,0 @@ -"""This module contains functionality for interacting with a PostgreSQL database. It will automatically handle error -conditions (i.e. missing columns) without terminating the entire program. Use the :py:class:`DBConnector` class to -handle database interactions, either as a standalone object or in a context manager.""" -from __future__ import annotations - -import os -import psycopg2 -from psycopg2 import DatabaseError, OperationalError -from psycopg2.errors import UndefinedColumn - -DB_ADDRESS = os.getenv('DB_ADDRESS', 'localhost') -DB_PORT = os.getenv('DB_PORT', 5432) -DB_USER = os.getenv('DB_USER', 'postgres') -DB_PASSWORD = os.getenv('DB_PASSWORD', '') -DB_NAME = os.getenv('DB_NAME', 'postgres') -DB_TABLE = os.getenv('DB_TABLE', 'cables') - - -class DBConnector: - """Context managed database class. Use with statements to automatically open and close the database connection, like - so: - - .. code-block:: python - with DBConnector() as db: - db.read() - """ - - def _db_start(self): - """Setup the database connection and cursor.""" - try: - self.conn = psycopg2.connect( - f"host={DB_ADDRESS} port={DB_PORT} dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD}") - self.cur = self.conn.cursor() - except OperationalError as e: - raise e - - def _db_stop(self): - """Close the cursor and connection.""" - self.cur.close() - self.conn.close() - - def __init__(self): - self._db_start() - - def __del__(self): - self._db_stop() - - def __enter__(self): - self._db_start() - - def __exit__(self): - self._db_stop() - - def _get_cols(self) -> set[str]: - """Get the list of columns in the database. - - :return: A list of column names.""" - query = f"select COLUMN_NAME from information_schema.columns where table_name={DB_TABLE}" - rows = {x["COLUMN_NAME"] for x in self._query(query)} - return rows - - def _column_parity(self, columns: list[str] | set[str]) -> set[str]: - """If the listed columns are not in the database, add them. - - :param columns: The columns we expect are in the database. - :return: The list of columns in the database after querying.""" - cols = set(columns) - existing = self._get_cols() - needs = cols.difference(existing.intersection(cols)) - if len(needs) > 0: - query = f"ALTER TABLE {DB_TABLE} {', '.join([f'ADD COLUMN {c}' for c in needs])}" - self._query(query) - existing = self._get_cols() - return existing - - def _query(self, sql) -> list[dict]: - """Basic function for running queries. - - :param sql: SQL query as plaintext. - :return: Results of the query, or an empty list if none.""" - result = [] - try: - self.cur.execute(sql) - result = self._read_dict() - except DatabaseError as e: - print(f"ERROR {e.pgcode}: {e.pgerror}\n" - f"Caused by query: {sql}") - finally: - return result - - def _read_dict(self) -> list[dict]: - """Read the cursor as a list of dictionaries. psycopg2 defaults to using a list of tuples, so we want to convert - each row into a dictionary before we return it.""" - cols = [i.name for i in self.cur.description] - results = [] - for row in self.cur: - row_dict = {} - for i in range(0, len(row)): - if row[i]: - row_dict = {**row_dict, cols[i]: row[i]} - results.append(row_dict) - return results - - def read(self, **kwargs) -> list[dict]: - """Read rows from a database that match the specified filters. - - :param kwargs: Column constraints; i.e. what value to filter by in what column. - :returns: A list of dictionaries of all matching rows, or an empty list if no match.""" - args = [] - for kw in kwargs.keys(): - args.append(f"{kw} ILIKE {kwargs['kw']}") - query = f"SELECT * FROM {DB_TABLE}" - if len(args) > 0: - query += f" WHERE {' AND '.join(args)}" - return self._query(query) - - def write(self, **kwargs) -> dict: - """Write a row to the database. - - :param kwargs: Values to write for each database; specify each column separately! - :returns: The row you just added.""" - self._column_parity(set(kwargs.keys())) - values = [] - for val in kwargs.keys(): - values.append(kwargs[val]) - query = f"INSERT INTO {DB_TABLE} ({', '.join(kwargs.keys())}) VALUES ({', '.join(values)})" - self._query(query) - return kwargs - - def write_all(self, items: list[dict]) -> list[dict]: - """Write multiple rows to the database. - - :param items: Rows to write, as a list of dictionaries. - :returns: The rows that were added successfully.""" - successes = [] - for i in items: - res0 = self.write(**i) - if res0: - successes.append(res0) - return successes From a63faba2aa666aca513f3cc2d2e8591f7afe86d9 Mon Sep 17 00:00:00 2001 From: Dustin Thomas Date: Tue, 12 Mar 2024 16:08:47 -0500 Subject: [PATCH 11/11] Add checks to updating filterable attributes to avoid hitting weird edge cases --- search.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/search.py b/search.py index 88aef38..6e6a720 100644 --- a/search.py +++ b/search.py @@ -39,7 +39,8 @@ class JukeboxSearch: # make a variable to easily reference the index self.idxref = self.client.index(self.index) - self.idxref.update_filterable_attributes(filterable_attrs) + # update filterable attributes if needed + self.update_filterables(filterable_attrs) def add_document(self, document: dict) -> TaskInfo: """Add a cable to the Meilisearch index. @@ -57,6 +58,18 @@ class JukeboxSearch: for i in documents: taskinfo = self.add_document(i) return taskinfo + + def update_filterables(self, filterables: list): + """Update filterable attributes and wait for database to fully index. If the filterable attributes matches the + current attributes in the database, don't update (saves reindexing). + + :param filterables: List of all filterable attributes""" + + existing_filterables = self.idxref.get_filterable_attributes() + if len(set(existing_filterables).difference(set(filterables))) > 0: + taskref = self.idxref.update_filterable_attributes(filterables) + + self.client.wait_for_task(taskref.index_uid) def search(self, query: str, filters: str = None): """Execute a search query on the Meilisearch index. @@ -98,3 +111,7 @@ class JukeboxSearch: :param partnum: The part number to search for.""" return self._filter_one(f"partnum = {partnum}") + +# entrypoint +if __name__ == "__main__": + jbs = JukeboxSearch() \ No newline at end of file