Spaces:

samim2024
/

bsnl-chatboot

Sleeping

App Files Files Community

samim2024 commited on May 16

Commit

aa2bec3

verified ·

1 Parent(s): 32ec859

Create app.py

Browse files

Files changed (1) hide show

app.py +170 -0

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import streamlit as st
+import os
+import zipfile
+import shutil
+from io import BytesIO
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.docstore.in_memory import InMemoryDocstore
+from langchain_community.llms import HuggingFaceHub
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+import faiss
+import uuid
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+RAG_ACCESS_KEY = os.getenv("RAG_ACCESS_KEY")
+# Initialize session state
+if "vectorstore" not in st.session_state:
+    st.session_state.vectorstore = None
+if "history" not in st.session_state:
+    st.session_state.history = []
+if "authenticated" not in st.session_state:
+    st.session_state.authenticated = False
+# Sidebar
+with st.sidebar:
+    st.header("RAG Control Panel")
+    api_key_input = st.text_input("Enter RAG Access Key", type="password")
+    # Authentication
+    if st.button("Authenticate"):
+        if api_key_input == RAG_ACCESS_KEY:
+            st.session_state.authenticated = True
+            st.success("Authentication successful!")
+        else:
+            st.error("Invalid API key.")
+    # File uploader
+    if st.session_state.authenticated:
+        input_type = st.selectbox("Select Input Type", ["Single PDF", "Folder/Zip of PDFs"])
+        input_data = None
+        if input_type == "Single PDF":
+            input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
+        else:
+            input_data = st.file_uploader("Upload a folder or zip of PDFs", type=["zip"])
+        if st.button("Process Files") and input_data is not None:
+            with st.spinner("Processing files..."):
+                vector_store = process_input(input_type, input_data)
+                st.session_state.vectorstore = vector_store
+                st.success("Files processed successfully. You can now ask questions.")
+    # Display chat history
+    st.subheader("Chat History")
+    for i, (q, a) in enumerate(st.session_state.history):
+        st.write(f"**Q{i+1}:** {q}")
+        st.write(f"**A{i+1}:** {a}")
+        st.markdown("---")
+# Main app
+def main():
+    st.title("RAG Q&A App with Mistral AI")
+    if not st.session_state.authenticated:
+        st.warning("Please authenticate with your API key in the sidebar.")
+        return
+    if st.session_state.vectorstore is None:
+        st.info("Please upload and process a PDF or folder/zip of PDFs in the sidebar.")
+        return
+    query = st.text_input("Enter your question:")
+    if st.button("Submit") and query:
+        with st.spinner("Generating answer..."):
+            answer = answer_question(st.session_state.vectorstore, query)
+            st.session_state.history.append((query, answer))
+            st.write("**Answer:**", answer)
+def process_input(input_type, input_data):
+    # Create uploads directory
+    os.makedirs("uploads", exist_ok=True)
+    documents = ""
+    if input_type == "Single PDF":
+        pdf_reader = PdfReader(input_data)
+        for page in pdf_reader.pages:
+            documents += page.extract_text() or ""
+    else:
+        # Handle zip file
+        zip_path = "uploads/uploaded.zip"
+        with open(zip_path, "wb") as f:
+            f.write(input_data.getvalue())
+        with zipfile.ZipFile(zip_path, "r") as zip_ref:
+            zip_ref.extractall("uploads/extracted")
+        # Process all PDFs in extracted folder
+        for root, _, files in os.walk("uploads/extracted"):
+            for file in files:
+                if file.endswith(".pdf"):
+                    pdf_path = os.path.join(root, file)
+                    pdf_reader = PdfReader(pdf_path)
+                    for page in pdf_reader.pages:
+                        documents += page.extract_text() or ""
+        # Clean up extracted files
+        shutil.rmtree("uploads/extracted", ignore_errors=True)
+        os.remove(zip_path)
+    # Split text
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    texts = text_splitter.split_text(documents)
+    # Create embeddings
+    hf_embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-mpnet-base-v2",
+        model_kwargs={'device': 'cpu'}
+    )
+    # Initialize FAISS
+    dimension = len(hf_embeddings.embed_query("sample text"))
+    index = faiss.IndexFlatL2(dimension)
+    vector_store = FAISS(
+        embedding_function=hf_embeddings,
+        index=index,
+        docstore=InMemoryDocstore({}),
+        index_to_docstore_id={}
+    )
+    # Add texts to vector store
+    uuids = [str(uuid.uuid4()) for _ in range(len(texts))]
+    vector_store.add_texts(texts, ids=uuids)
+    # Save vector store locally
+    vector_store.save_local("vectorstore/faiss_index")
+    return vector_store
+def answer_question(vectorstore, query):
+    llm = HuggingFaceHub(
+        repo_id="mistralai/Mistral-7B-Instruct-v0.1",
+        model_kwargs={"temperature": 0.7, "max_length": 512},
+        huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
+    )
+    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+    prompt_template = PromptTemplate(
+        template="Use the provided context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
+        input_variables=["context", "question"]
+    )
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        return_source_documents=False,
+        chain_type_kwargs={"prompt": prompt_template}
+    )
+    result = qa_chain({"query": query})
+    return result["result"].split("Answer:")[-1].strip()
+if __name__ == "__main__":
+    main()