Compare commits
	
		
			21 Commits
		
	
	
		
			dthomas-db
			...
			eea8c9f5fa
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| eea8c9f5fa | |||
| fe5de4e54c | |||
| 68b95bfe17 | |||
| e3e9b855f9 | |||
| 523915feb0 | |||
| b5b2a936c1 | |||
| afd144bd32 | |||
| eb221a5206 | |||
| db7c8c4577 | |||
|  | 21b1bf7992 | ||
| d376dba67c | |||
| 95631dbdbe | |||
| 9aef296763 | |||
| 2b287492de | |||
| d2a4d93590 | |||
| 58605dbe85 | |||
| 7bf3276ce9 | |||
| 818688452b | |||
| 01526524d4 | |||
| 33671683ea | |||
| fad885c610 | 
							
								
								
									
										11
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | FROM python:latest | ||||||
|  |  | ||||||
|  | RUN apt-get update && apt-get install -y libgl1-mesa-glx ghostscript && apt-get clean && rm -rf /var/lib/apt/lists | ||||||
|  | COPY . . | ||||||
|  | #COPY config-server.yml config.yml | ||||||
|  | RUN pip3 install -r requirements.txt | ||||||
|  |  | ||||||
|  | CMD ["python3", "run.py"] | ||||||
|  | EXPOSE 5000 | ||||||
|  | EXPOSE 8000 | ||||||
|  | EXPOSE 9000 | ||||||
							
								
								
									
										13
									
								
								compose.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								compose.yml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | services: | ||||||
|  |   meilisearch: | ||||||
|  |     image: "getmeili/meilisearch:v1.6.2" | ||||||
|  |     ports: | ||||||
|  |       - "7700:7700" | ||||||
|  |     environment: | ||||||
|  |       MEILI_MASTER_KEY: fluffybunnyrabbit | ||||||
|  |       MEILI_NO_ANALYTICS: true | ||||||
|  |     volumes: | ||||||
|  |       - "meili_data:/meili_data" | ||||||
|  |  | ||||||
|  | volumes: | ||||||
|  |   meili_data: | ||||||
| @@ -157,7 +157,7 @@ def get_multi(partnums): | |||||||
|             bar(skipped=True) |             bar(skipped=True) | ||||||
|             fprint("Parsing Datasheet contents of " + partnum) |             fprint("Parsing Datasheet contents of " + partnum) | ||||||
|             bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." |             bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." | ||||||
|             read_datasheet.parse(path, output_dir) |             read_datasheet.parse(path, output_dir, partnum) | ||||||
|             bar(skipped=False) |             bar(skipped=False) | ||||||
|  |  | ||||||
|         def __downloaded_datasheet(partnum, path, output_dir): |         def __downloaded_datasheet(partnum, path, output_dir): | ||||||
| @@ -166,7 +166,7 @@ def get_multi(partnums): | |||||||
|             bar(skipped=False) |             bar(skipped=False) | ||||||
|             fprint("Parsing Datasheet contents of " + partnum) |             fprint("Parsing Datasheet contents of " + partnum) | ||||||
|             bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." |             bar.text = "Parsing Datasheet contents of " + partnum + ".pdf..." | ||||||
|             read_datasheet.parse(path, output_dir) |             read_datasheet.parse(path, output_dir, partnum) | ||||||
|             bar(skipped=False) |             bar(skipped=False) | ||||||
|  |  | ||||||
|         for partnum in partnums: |         for partnum in partnums: | ||||||
| @@ -227,7 +227,7 @@ def get_multi(partnums): | |||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     partnums = ["10GXS12", "RST 5L-RKT 5L-949",  |     partnums = ["7958A", "10GXS12", "RST 5L-RKT 5L-949",  | ||||||
| "10GXS13", | "10GXS13", | ||||||
| "10GXW12", | "10GXW12", | ||||||
| "10GXW13", | "10GXW13", | ||||||
|   | |||||||
| @@ -9,8 +9,10 @@ from PIL import Image | |||||||
| import io | import io | ||||||
| import json | import json | ||||||
| from util import fprint | from util import fprint | ||||||
|  | import uuid | ||||||
|  | from util import run_cmd | ||||||
|  |  | ||||||
| def parse(filename, output_dir): | def parse(filename, output_dir, partnum): | ||||||
|  |  | ||||||
|     # Extract table data |     # Extract table data | ||||||
|  |  | ||||||
| @@ -163,18 +165,77 @@ def parse(filename, output_dir): | |||||||
|  |  | ||||||
|         previous_table = table_name |         previous_table = table_name | ||||||
|      |      | ||||||
|  |     # remove multi-line values that occasionally squeak through | ||||||
|  |     def replace_newlines_in_dict(d): | ||||||
|  |         for key, value in d.items(): | ||||||
|  |             if isinstance(value, str): | ||||||
|  |                 # Replace \n with " " if the value is a string | ||||||
|  |                 d[key] = value.replace('\n', ' ') | ||||||
|  |             elif isinstance(value, dict): | ||||||
|  |                 # Recursively call the function if the value is another dictionary | ||||||
|  |                 replace_newlines_in_dict(value) | ||||||
|  |         return d | ||||||
|      |      | ||||||
|     fprint(tables) |     tables = replace_newlines_in_dict(tables) | ||||||
|     with open(output_dir + "/tables.json", 'w') as json_file: |  | ||||||
|         json.dump(tables, json_file) |     # summary | ||||||
|  |  | ||||||
|  |     output_table = dict() | ||||||
|  |     output_table["partnum"] = partnum | ||||||
|  |     id = str(uuid.uuid4()) | ||||||
|  |     output_table["id"] = id | ||||||
|  |     #output_table["position"] = id | ||||||
|  |     #output_table["brand"] = brand | ||||||
|  |     output_table["fullspecs"] = tables | ||||||
|  |     output_table["searchspecs"] = {"partnum": partnum, **flatten(tables)} | ||||||
|  |      | ||||||
|  |     output_table["searchspecs"]["id"] = id | ||||||
|      |      | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     print(output_table) | ||||||
|  |  | ||||||
|  |     run_cmd("rm " + output_dir + "/*.json") # not reliable! | ||||||
|  |     with open(output_dir + "/" + output_table["searchspecs"]["id"] + ".json", 'w') as json_file: | ||||||
|  |         json.dump(output_table["searchspecs"], json_file) | ||||||
|  |  | ||||||
|  |     return output_table | ||||||
|  |  | ||||||
|  |  | ||||||
|     return tables | def flatten(tables): | ||||||
|  |     def convert_to_number(s): | ||||||
|  |         try: | ||||||
|  |             # First, try converting to an integer. | ||||||
|  |             return int(s) | ||||||
|  |         except ValueError: | ||||||
|  |             # If that fails, try converting to a float. | ||||||
|  |             try: | ||||||
|  |                 return float(s) | ||||||
|  |             except ValueError: | ||||||
|  |                 # If it fails again, return the original string. | ||||||
|  |                 return s | ||||||
|  |     out = dict() | ||||||
|  |     print("{") | ||||||
|  |     for table in tables.keys(): | ||||||
|  |         for key in tables[table].keys(): | ||||||
|  |             if len(key) < 64: | ||||||
|  |                 keyname = key | ||||||
|  |             else: | ||||||
|  |                 keyname = key[0:64] | ||||||
|  |  | ||||||
|  |             fullkeyname = (table + ": " + keyname).replace(".","") | ||||||
|  |             if type(tables[table][key]) is not tuple: | ||||||
|  |                 out[fullkeyname] = convert_to_number(tables[table][key]) | ||||||
|  |                 print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") | ||||||
|  |             elif len(tables[table][key]) == 1: | ||||||
|  |                 out[fullkeyname] = convert_to_number(tables[table][key][0]) | ||||||
|  |                  | ||||||
|  |                 print("\"" + keyname + "\":", "\"" + str(out[fullkeyname]) + "\",") | ||||||
|  |  | ||||||
|  |     print("}") | ||||||
|  |     return out | ||||||
|  |  | ||||||
|      |      | ||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     parse("test2.pdf", "10GXS13") |     parse("test2.pdf", "cables/10GXS13", "10GXS13") | ||||||
| @@ -5,7 +5,7 @@ pypdf2==2.12.1 | |||||||
| alive-progress | alive-progress | ||||||
| requests | requests | ||||||
| git+https://github.com/Byeongdulee/python-urx.git | git+https://github.com/Byeongdulee/python-urx.git | ||||||
| psycopg2 | meilisearch | ||||||
| pyyaml | pyyaml | ||||||
| Flask | Flask | ||||||
| selenium | selenium | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								util.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								util.py
									
									
									
									
									
								
							| @@ -123,7 +123,7 @@ class Logger(object): | |||||||
|         self.terminal = sys.stdout |         self.terminal = sys.stdout | ||||||
|  |  | ||||||
|     def write(self, message): |     def write(self, message): | ||||||
|         self.log.write(message) |         #self.log.write(message) | ||||||
|         #close(filename) |         #close(filename) | ||||||
|         #self.log = open(filename, "a") |         #self.log = open(filename, "a") | ||||||
|         try: |         try: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user