Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from groq import Groq | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| import tempfile | |
| import os | |
| # Set API Key | |
| GROQ_API_KEY = os.getenv("your_groq_api_key") | |
| # Initialize Groq client | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Streamlit App Title | |
| st.title("π RAG Chatbot with Using Deepseek") | |
| # File Upload | |
| uploaded_files = st.file_uploader("π Upload PDFs", type=["pdf"], accept_multiple_files=True) | |
| if uploaded_files: | |
| with st.spinner("Processing PDFs... β³"): | |
| docs = [] | |
| for file in uploaded_files: | |
| # Save uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: | |
| temp_pdf.write(file.read()) | |
| temp_pdf_path = temp_pdf.name | |
| # Load PDF and extract text | |
| loader = PyPDFLoader(temp_pdf_path) | |
| docs.extend(loader.load()) | |
| # Clean up temp file | |
| os.remove(temp_pdf_path) | |
| # Split text into smaller chunks | |
| text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| split_docs = text_splitter.split_documents(docs) | |
| # Create embeddings & FAISS vector store | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vector_db = FAISS.from_documents(split_docs, embeddings) | |
| st.success("β PDFs processed and stored in the vector database!") | |
| # User Input | |
| query = st.text_input("π Ask a question:") | |
| if query: | |
| with st.spinner("Retrieving answer... β³"): | |
| # Perform retrieval | |
| retriever = vector_db.as_retriever() | |
| relevant_docs = retriever.get_relevant_documents(query) | |
| # Get context from retrieved docs | |
| context = "\n\n".join([doc.page_content for doc in relevant_docs]) | |
| # Make API call to Groq LLaMA-70B | |
| completion = client.chat.completions.create( | |
| model="deepseek-r1-distill-llama-70b", | |
| messages=[ | |
| {"role": "system", "content": "You are an AI assistant providing answers based on the given context."}, | |
| {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"} | |
| ], | |
| temperature=0.6, | |
| max_completion_tokens=1024, | |
| top_p=0.95, | |
| stream=True, | |
| reasoning_format="raw" | |
| ) | |
| # Stream response | |
| response_text = "" | |
| response_container = st.empty() | |
| for chunk in completion: | |
| response_text += chunk.choices[0].delta.content or "" | |
| response_container.markdown(response_text) | |
| st.success("β Answer generated!") | |