Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| import os | |
| import streamlit as st | |
| from langchain_community.document_loaders import PDFPlumberLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.chains import LLMChain | |
| from langchain.llms import CTransformers | |
| # === Configuration === | |
| pdfs_directory = 'pdfs' | |
| vectorstores_directory = 'vectorstores' | |
| os.makedirs(pdfs_directory, exist_ok=True) | |
| os.makedirs(vectorstores_directory, exist_ok=True) | |
| PREDEFINED_BOOKS = [f for f in os.listdir(pdfs_directory) if f.endswith(".pdf")] | |
| TEMPLATE = """ | |
| You are a helpful assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. | |
| If you don't know the answer, say "I don't know". Limit your answer to three concise sentences. | |
| Question: {question} | |
| Context: {context} | |
| Answer: | |
| """ | |
| # === Embeddings and LLM (CPU-friendly) === | |
| embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') | |
| llm = CTransformers( | |
| model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF', | |
| model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf', | |
| model_type='mistral', | |
| config={'max_new_tokens': 512, 'temperature': 0.5} | |
| ) | |
| # === Functions === | |
| def upload_pdf(file): | |
| save_path = os.path.join(pdfs_directory, file.name) | |
| with open(save_path, "wb") as f: | |
| f.write(file.getbuffer()) | |
| return file.name | |
| def load_pdf(file_path): | |
| loader = PDFPlumberLoader(file_path) | |
| return loader.load() | |
| def split_text(documents): | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| add_start_index=True | |
| ) | |
| return splitter.split_documents(documents) | |
| def get_vectorstore_path(book_filename): | |
| base_name = os.path.splitext(book_filename)[0] | |
| return os.path.join(vectorstores_directory, base_name) | |
| def load_or_create_vectorstore(book_filename, documents=None): | |
| vs_path = get_vectorstore_path(book_filename) | |
| if os.path.exists(os.path.join(vs_path, "index.faiss")): | |
| return FAISS.load_local(vs_path, embedding_model, allow_dangerous_deserialization=True) | |
| if documents is None: | |
| raise ValueError("Documents must be provided when creating a new vectorstore.") | |
| os.makedirs(vs_path, exist_ok=True) | |
| chunks = split_text(documents) | |
| vector_store = FAISS.from_documents(chunks, embedding_model) | |
| vector_store.save_local(vs_path) | |
| return vector_store | |
| def retrieve_docs(vector_store, query): | |
| return vector_store.similarity_search(query) | |
| def answer_question(question, documents): | |
| context = "\n\n".join(doc.page_content for doc in documents) | |
| prompt = ChatPromptTemplate.from_template(TEMPLATE) | |
| chain = LLMChain(llm=llm, prompt=prompt) | |
| return chain.run({"question": question, "context": context}) | |
| # === UI === | |
| st.set_page_config(page_title="π PDF Q&A (Cached FAISS)", layout="centered") | |
| st.title("π Chat with PDF - Cached Vector Stores") | |
| with st.sidebar: | |
| st.header("Select or Upload a Book") | |
| selected_book = st.selectbox("Choose a PDF", PREDEFINED_BOOKS + ["Upload new book"]) | |
| if selected_book == "Upload new book": | |
| uploaded_file = st.file_uploader("Upload PDF", type="pdf") | |
| if uploaded_file: | |
| filename = upload_pdf(uploaded_file) | |
| st.success(f"Uploaded: {filename}") | |
| selected_book = filename | |
| # === Load or Create Vector Store === | |
| if selected_book and selected_book != "Upload new book": | |
| file_path = os.path.join(pdfs_directory, selected_book) | |
| vectorstore_path = get_vectorstore_path(selected_book) | |
| try: | |
| if os.path.exists(os.path.join(vectorstore_path, "index.faiss")): | |
| st.info("β Using cached vector store.") | |
| vector_store = load_or_create_vectorstore(selected_book) | |
| else: | |
| st.warning("β³ Creating new vector store (first-time load)...") | |
| documents = load_pdf(file_path) | |
| vector_store = load_or_create_vectorstore(selected_book, documents) | |
| st.success("β Vector store created and cached.") | |
| question = st.chat_input("Ask a question about the book...") | |
| if question: | |
| st.chat_message("user").write(question) | |
| related_docs = retrieve_docs(vector_store, question) | |
| answer = answer_question(question, related_docs) | |
| st.chat_message("assistant").write(answer) | |
| except Exception as e: | |
| st.error(f"β Error loading PDF or vector store: {e}") | |