Compare commits
	
		
			21 Commits
		
	
	
		
			dthomas-db
			...
			eea8c9f5fa
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| eea8c9f5fa | |||
| fe5de4e54c | |||
| 68b95bfe17 | |||
| e3e9b855f9 | |||
| 523915feb0 | |||
| b5b2a936c1 | |||
| afd144bd32 | |||
| eb221a5206 | |||
| db7c8c4577 | |||
|  | 21b1bf7992 | ||
| d376dba67c | |||
| 95631dbdbe | |||
| 9aef296763 | |||
| 2b287492de | |||
| d2a4d93590 | |||
| 58605dbe85 | |||
| 7bf3276ce9 | |||
| 818688452b | |||
| 01526524d4 | |||
| 33671683ea | |||
| fad885c610 | 
							
								
								
									
										11
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| FROM python:latest | ||||
|  | ||||
| RUN apt-get update && apt-get install -y libgl1-mesa-glx ghostscript && apt-get clean && rm -rf /var/lib/apt/lists | ||||
| COPY . . | ||||
| #COPY config-server.yml config.yml | ||||
| RUN pip3 install -r requirements.txt | ||||
|  | ||||
| CMD ["python3", "run.py"] | ||||
| EXPOSE 5000 | ||||
| EXPOSE 8000 | ||||
| EXPOSE 9000 | ||||
							
								
								
									
										13
									
								
								compose.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								compose.yml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| services: | ||||
|   meilisearch: | ||||
|     image: "getmeili/meilisearch:v1.6.2" | ||||
|     ports: | ||||
|       - "7700:7700" | ||||
|     environment: | ||||
|       MEILI_MASTER_KEY: fluffybunnyrabbit | ||||
|       MEILI_NO_ANALYTICS: true | ||||
|     volumes: | ||||
|       - "meili_data:/meili_data" | ||||
|  | ||||
| volumes: | ||||
|   meili_data: | ||||
| @@ -157,7 +157,7 @@ def get_multi(partnums): | ||||
|             bar(skipped=True) | ||||
|             fprint("Parsing Datasheet contents of " + partnum) | ||||
|             bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." | ||||
|             read_datasheet.parse(path, output_dir) | ||||
|             read_datasheet.parse(path, output_dir, partnum) | ||||
|             bar(skipped=False) | ||||
|  | ||||
|         def __downloaded_datasheet(partnum, path, output_dir): | ||||
| @@ -166,7 +166,7 @@ def get_multi(partnums): | ||||
|             bar(skipped=False) | ||||
|             fprint("Parsing Datasheet contents of " + partnum) | ||||
|             bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." | ||||
|             read_datasheet.parse(path, output_dir) | ||||
|             read_datasheet.parse(path, output_dir, partnum) | ||||
|             bar(skipped=False) | ||||
|  | ||||
|         for partnum in partnums: | ||||
| @@ -227,7 +227,7 @@ def get_multi(partnums): | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     partnums = ["10GXS12", "RST 5L-RKT 5L-949",  | ||||
|     partnums = ["7958A", "10GXS12", "RST 5L-RKT 5L-949",  | ||||
| "10GXS13", | ||||
| "10GXW12", | ||||
| "10GXW13", | ||||
|   | ||||
| @@ -9,8 +9,10 @@ from PIL import Image | ||||
| import io | ||||
| import json | ||||
| from util import fprint | ||||
| import uuid | ||||
| from util import run_cmd | ||||
|  | ||||
| def parse(filename, output_dir): | ||||
| def parse(filename, output_dir, partnum): | ||||
|  | ||||
|     # Extract table data | ||||
|  | ||||
| @@ -163,18 +165,77 @@ def parse(filename, output_dir): | ||||
|  | ||||
|         previous_table = table_name | ||||
|      | ||||
|     # remove multi-line values that occasionally squeak through | ||||
|     def replace_newlines_in_dict(d): | ||||
|         for key, value in d.items(): | ||||
|             if isinstance(value, str): | ||||
|                 # Replace \n with " " if the value is a string | ||||
|                 d[key] = value.replace('\n', ' ') | ||||
|             elif isinstance(value, dict): | ||||
|                 # Recursively call the function if the value is another dictionary | ||||
|                 replace_newlines_in_dict(value) | ||||
|         return d | ||||
|      | ||||
|     tables = replace_newlines_in_dict(tables) | ||||
|  | ||||
|     fprint(tables) | ||||
|     with open(output_dir + "/tables.json", 'w') as json_file: | ||||
|         json.dump(tables, json_file) | ||||
|     # summary | ||||
|  | ||||
|     output_table = dict() | ||||
|     output_table["partnum"] = partnum | ||||
|     id = str(uuid.uuid4()) | ||||
|     output_table["id"] = id | ||||
|     #output_table["position"] = id | ||||
|     #output_table["brand"] = brand | ||||
|     output_table["fullspecs"] = tables | ||||
|     output_table["searchspecs"] = {"partnum": partnum, **flatten(tables)} | ||||
|      | ||||
|     output_table["searchspecs"]["id"] = id | ||||
|      | ||||
|  | ||||
|  | ||||
|     print(output_table) | ||||
|  | ||||
|     run_cmd("rm " + output_dir + "/*.json") # not reliable! | ||||
|     with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file: | ||||
|         json.dump(output_table["searchspecs"], json_file) | ||||
|  | ||||
|     return output_table | ||||
|  | ||||
|  | ||||
| def flatten(tables): | ||||
|     def convert_to_number(s): | ||||
|         try: | ||||
|             # First, try converting to an integer. | ||||
|             return int(s) | ||||
|         except ValueError: | ||||
|             # If that fails, try converting to a float. | ||||
|             try: | ||||
|                 return float(s) | ||||
|             except ValueError: | ||||
|                 # If it fails again, return the original string. | ||||
|                 return s | ||||
|     out = dict() | ||||
|     print("{") | ||||
|     for table in tables.keys(): | ||||
|         for key in tables[table].keys(): | ||||
|             if len(key) < 64: | ||||
|                 keyname = key | ||||
|             else: | ||||
|                 keyname = key[0:64] | ||||
|  | ||||
|     return tables | ||||
|             fullkeyname = (table + ": " + keyname).replace(".","") | ||||
|             if type(tables[table][key]) is not tuple: | ||||
|                 out[fullkeyname] = convert_to_number(tables[table][key]) | ||||
|                 print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") | ||||
|             elif len(tables[table][key]) == 1: | ||||
|                 out[fullkeyname] = convert_to_number(tables[table][key][0]) | ||||
|                  | ||||
|                 print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") | ||||
|  | ||||
|     print("}") | ||||
|     return out | ||||
|  | ||||
|      | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     parse("test2.pdf", "10GXS13") | ||||
|     parse("test2.pdf", "cables/10GXS13", "10GXS13") | ||||
| @@ -5,7 +5,7 @@ pypdf2==2.12.1 | ||||
| alive-progress | ||||
| requests | ||||
| git+https://github.com/Byeongdulee/python-urx.git | ||||
| psycopg2 | ||||
| meilisearch | ||||
| pyyaml | ||||
| Flask | ||||
| selenium | ||||
|   | ||||
		Reference in New Issue
	
	Block a user