Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_huggingface.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| def setup_vector_db(pdf_path): | |
| """Setup vector database from PDF""" | |
| # carregando e dividindo o PDF em chunks | |
| loader = PyPDFLoader(pdf_path) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=50 | |
| ) | |
| chunks = text_splitter.split_documents(documents) | |
| # criando banco de dados vetorial | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="Snowflake/snowflake-arctic-embed-l-v2.0" | |
| ) | |
| vector_db = FAISS.from_documents(chunks, embeddings) | |
| return vector_db | |
| def get_local_content(vector_db, query): | |
| """Get content from vector database""" | |
| docs = vector_db.similarity_search(query, k=5) | |
| return " ".join([doc.page_content for doc in docs]) | |
| def check_local_knowledge(query, vector_db, threshold=0.7): | |
| """ | |
| Verifica se a consulta pode ser respondida com base no conhecimento local. | |
| Retorna True se houver documentos relevantes no banco de dados vetorial. | |
| """ | |
| try: | |
| # buscando documentos relevantes do banco de dados | |
| docs = vector_db.similarity_search(query, k=1) | |
| if docs: | |
| return True # háá documentos relevantes | |
| return False # não há documentos relevantes | |
| except Exception as e: | |
| print(f"Erro ao verificar conhecimento local: {e}") | |
| return False | |