Spaces:
Sleeping
Sleeping
Ilyas KHIAT
commited on
Commit
·
eeaf024
1
Parent(s):
fe370a3
api first commit by me :)
Browse files
main.py
CHANGED
|
@@ -62,7 +62,8 @@ async def upload_file(file: UploadFile, enterprise_data: Json[EnterpriseData]):
|
|
| 62 |
|
| 63 |
# Assign a new UUID if id is not provided
|
| 64 |
if enterprise_data.id is None:
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
# Open the file with PyMuPDF
|
| 68 |
pdf_document = pymupdf.open(stream=contents, filetype="pdf")
|
|
|
|
| 62 |
|
| 63 |
# Assign a new UUID if id is not provided
|
| 64 |
if enterprise_data.id is None:
|
| 65 |
+
clean_name = remove_non_standard_ascii(enterprise_name)
|
| 66 |
+
enterprise_data.id = f"{clean_name}_{uuid4()}"
|
| 67 |
|
| 68 |
# Open the file with PyMuPDF
|
| 69 |
pdf_document = pymupdf.open(stream=contents, filetype="pdf")
|
rag.py
CHANGED
|
@@ -8,6 +8,13 @@ from langchain_openai import ChatOpenAI
|
|
| 8 |
from langchain_core.output_parsers import StrOutputParser
|
| 9 |
from langchain_core.prompts import PromptTemplate
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def get_text_from_content_for_doc(content):
|
|
@@ -44,7 +51,8 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
|
|
| 44 |
page_content=chunk,
|
| 45 |
metadata={"filename":filename,"file_type":file_type},
|
| 46 |
)
|
| 47 |
-
|
|
|
|
| 48 |
uuids.append(uuid)
|
| 49 |
documents.append(document)
|
| 50 |
|
|
@@ -53,6 +61,7 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
|
|
| 53 |
return True
|
| 54 |
|
| 55 |
except Exception as e:
|
|
|
|
| 56 |
return False
|
| 57 |
|
| 58 |
def get_retreive_answer(enterprise_id,prompt,index):
|
|
@@ -70,6 +79,7 @@ def get_retreive_answer(enterprise_id,prompt,index):
|
|
| 70 |
return response
|
| 71 |
|
| 72 |
except Exception as e:
|
|
|
|
| 73 |
return False
|
| 74 |
|
| 75 |
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :
|
|
|
|
| 8 |
from langchain_core.output_parsers import StrOutputParser
|
| 9 |
from langchain_core.prompts import PromptTemplate
|
| 10 |
|
| 11 |
+
import unicodedata
|
| 12 |
+
|
| 13 |
+
def remove_non_standard_ascii(input_string: str) -> str:
|
| 14 |
+
normalized_string = unicodedata.normalize('NFKD', input_string)
|
| 15 |
+
return ''.join(char for char in normalized_string if 'a' <= char <= 'z' or 'A' <= char <= 'Z' or char.isdigit() or char in ' .,!?')
|
| 16 |
+
|
| 17 |
+
|
| 18 |
|
| 19 |
|
| 20 |
def get_text_from_content_for_doc(content):
|
|
|
|
| 51 |
page_content=chunk,
|
| 52 |
metadata={"filename":filename,"file_type":file_type},
|
| 53 |
)
|
| 54 |
+
clean_filename = remove_non_standard_ascii(file_name)
|
| 55 |
+
uuid = f"{clean_filename}_{i}"
|
| 56 |
uuids.append(uuid)
|
| 57 |
documents.append(document)
|
| 58 |
|
|
|
|
| 61 |
return True
|
| 62 |
|
| 63 |
except Exception as e:
|
| 64 |
+
print(e)
|
| 65 |
return False
|
| 66 |
|
| 67 |
def get_retreive_answer(enterprise_id,prompt,index):
|
|
|
|
| 79 |
return response
|
| 80 |
|
| 81 |
except Exception as e:
|
| 82 |
+
print(e)
|
| 83 |
return False
|
| 84 |
|
| 85 |
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :
|