Spaces:

bziiit
/

VEGETALIS_AI_API

Sleeping

Ilyas KHIAT commited on Aug 23, 2024

Commit

eeaf024

1 Parent(s): fe370a3

api first commit by me :)

Files changed (2) hide show

main.py CHANGED Viewed

@@ -62,7 +62,8 @@ async def upload_file(file: UploadFile, enterprise_data: Json[EnterpriseData]):
         # Assign a new UUID if id is not provided
         if enterprise_data.id is None:
-            enterprise_data.id = f"{enterprise_name}_{uuid4()}"
         # Open the file with PyMuPDF
         pdf_document = pymupdf.open(stream=contents, filetype="pdf")

         # Assign a new UUID if id is not provided
         if enterprise_data.id is None:
+            clean_name = remove_non_standard_ascii(enterprise_name)
+            enterprise_data.id = f"{clean_name}_{uuid4()}"
         # Open the file with PyMuPDF
         pdf_document = pymupdf.open(stream=contents, filetype="pdf")

rag.py CHANGED Viewed

@@ -8,6 +8,13 @@ from langchain_openai import ChatOpenAI
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import PromptTemplate
 def get_text_from_content_for_doc(content):
@@ -44,7 +51,8 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
             page_content=chunk,
             metadata={"filename":filename,"file_type":file_type},
             )
-            uuid = f"{file_name}_{i}"
             uuids.append(uuid)
             documents.append(document)
@@ -53,6 +61,7 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
         return True
     except Exception as e:
         return False
 def get_retreive_answer(enterprise_id,prompt,index):
@@ -70,6 +79,7 @@ def get_retreive_answer(enterprise_id,prompt,index):
         return response
     except Exception as e:
         return False
 def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :

 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import PromptTemplate
+import unicodedata
+def remove_non_standard_ascii(input_string: str) -> str:
+    normalized_string = unicodedata.normalize('NFKD', input_string)
+    return ''.join(char for char in normalized_string if 'a' <= char <= 'z' or 'A' <= char <= 'Z' or char.isdigit() or char in ' .,!?')
 def get_text_from_content_for_doc(content):
             page_content=chunk,
             metadata={"filename":filename,"file_type":file_type},
             )
+            clean_filename = remove_non_standard_ascii(file_name)
+            uuid = f"{clean_filename}_{i}"
             uuids.append(uuid)
             documents.append(document)
         return True
     except Exception as e:
+        print(e)
         return False
 def get_retreive_answer(enterprise_id,prompt,index):
         return response
     except Exception as e:
+        print(e)
         return False
 def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :