Spaces:

prestiva
/

vibbabackend

Running

App Files Files Community

prestiva commited on 30 days ago

Commit

92e030f

1 Parent(s): 20104e9

UPDATE: book3

Browse files

Files changed (12) hide show

.DS_Store +0 -0
.gitattributes +1 -0
Dockerfile +5 -3
__pycache__/main.cpython-310.pyc +0 -0
config.py +19 -0
llm_chain.py +82 -0
main.py +87 -54
pyproject.toml +18 -0
requirements.txt +0 -9
retriever.py +61 -0
uv.lock +0 -0
{vectorDB → vectorStore}/chroma.sqlite3 +2 -2

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

.gitattributes CHANGED Viewed

@@ -37,3 +37,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 vectorDB/.sqlite filter=lfs diff=lfs merge=lfs -text
 vectorDB/*.sqlite filter=lfs diff=lfs merge=lfs -text
 vectorDB/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

 vectorDB/.sqlite filter=lfs diff=lfs merge=lfs -text
 vectorDB/*.sqlite filter=lfs diff=lfs merge=lfs -text
 vectorDB/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
+*.sqlite3 filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.10-slim
 COPY . /app
@@ -6,10 +6,12 @@ WORKDIR /app
 USER root
-RUN pip install -r requirements.txt
 RUN chmod -R 777 /app
 EXPOSE 7860
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.12-slim
 COPY . /app
 USER root
+RUN pip install uv
+RUN uv sync
 RUN chmod -R 777 /app
 EXPOSE 7860
+CMD ["uv", "run", "main.py"]

__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (3.58 kB). View file

config.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# config.py
+# This file stores all the configuration variables and constants for the application.
+# Embedding Model Configuration
+MODEL_NAME = "jinaai/jina-embeddings-v3"
+MODEL_KWARGS = {'device': 'cpu', 'trust_remote_code': True}
+ENCODE_KWARGS = {'normalize_embeddings': False}
+# Vector Store Configuration
+VECTOR_STORE_DIRECTORY = "vectorStore"
+# LLM Configuration
+LLM_MODEL = "llama-3.3-70b-versatile"
+LLM_TEMPERATURE = 0.8
+# Retriever Configuration
+DENSE_RETRIEVER_K = 3
+KEYWORD_RETRIEVER_K = 3
+ENSEMBLE_WEIGHTS = [0.5, 0.5]

llm_chain.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# llm_chain.py
+# This file configures the language model, prompt template, and the final processing chain.
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+from config import LLM_MODEL, LLM_TEMPERATURE
+def get_llm():
+    """Initializes and returns the ChatGroq LLM."""
+    return ChatGroq(
+        model=LLM_MODEL,
+        temperature=LLM_TEMPERATURE
+    )
+def get_prompt_template():
+    """Creates and returns the ChatPromptTemplate for the RAG chain."""
+    prompt_text = """
+    You are a chatbot AI assistant and an expert in mathematics, specialized exclusively in answering questions from the three mathematics books authored by Ice Venkatesh. Your primary role is to provide clear, precise, and complete answers that ensure user satisfaction, based strictly on the retrieved context from those books.
+    ---
+    ### Core Principles
+    1.  **Absolute Fidelity to the Source**
+        * Your answers must be 100% accurate and based **strictly** on the retrieved context from Ice Venkatesh’s books.
+        * You must **only** use the provided context. Do not invent, assume, or guess missing information.
+        * Never use external sources, the internet, or your prior training knowledge. Your knowledge is confined to the provided text.
+        * Never cite or refer to any source, including the books themselves. Do not mention words like "context," "retrieved," or "the book says."
+    2.  **Mathematical Rigor and Clarity**
+        * As a math expert, ensure all solutions are flawless and easily understandable.
+        * Always provide step-by-step solutions for calculations, proofs, or problem-solving.
+        * All reasoning must be mathematically correct and clearly explained, avoiding shortcuts unless the context itself provides them.
+    3.  **Professional and Concise Communication**
+        * Responses must be polite, professional, clear, and concise.
+        * Do not include speculation, unnecessary commentary, or conversational filler. You are a direct, to-the-point assistant.
+    4.  **Handling Specific Scenarios**
+        * **If the context is insufficient:** You must reply **only** with the following text and nothing else:
+            "The books by Ice Venkatesh do not provide enough information to answer this question."
+        * **If the query is off-topic:** If the user asks anything unrelated to mathematics or the content of Ice Venkatesh's books, you must decline using **exactly** this phrasing:
+            "I can only help with questions related to the three mathematics books by Ice Venkatesh. Unfortunately, I cannot assist with topics outside that scope."
+    ---
+    ### Output Format Instructions
+    * The entire response must be a single, continuous block of text.
+    * The entire block must be wrapped in double quotes (`"`).
+    * Do not include any prefixes like “Answer:” or “Here is the solution:”. No text should appear outside the quotes.
+    **Example:**
+    "Step 1: Divide the number by 3. Step 2: Multiply the result by 111. Final Answer: 37 × 24 = 888."
+    ---
+    Retrieved context:
+    {context}
+    User question:
+    {query}
+    """
+    return ChatPromptTemplate.from_template(prompt_text)
+def create_rag_chain(retriever):
+    """Creates and returns the full RAG chain, accepting a retriever as an argument."""
+    print("Creating RAG chain...")
+    llm = get_llm()
+    prompt = get_prompt_template()
+    output_parser = StrOutputParser()
+    chain = (
+        {"context": retriever, "query": RunnablePassthrough()}
+        | prompt
+        | llm
+        | output_parser
+    )
+    print("RAG chain created successfully.")
+    return chain

main.py CHANGED Viewed

@@ -1,61 +1,94 @@
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_groq import ChatGroq
-from langchain_chroma import Chroma
-from fastapi import FastAPI
-app = FastAPI(title = "VibbaAIEndpoints")
-model_name = "BAAI/bge-large-en-v1.5"
-model_kwargs = {'device': 'cpu', "trust_remote_code": True}
-encode_kwargs = {'normalize_embeddings': False}
-hf = HuggingFaceEmbeddings(
-    model_name=model_name,
-    model_kwargs=model_kwargs,
-    encode_kwargs=encode_kwargs
-)
-vectorStore = Chroma(
-    collection_name="collection",
-    embedding_function=hf,
-    persist_directory="./vectorDB",
-)
-vectorStore = vectorStore.as_retriever(search_kwargs={"k": 4})
-llm = ChatGroq(model = "llama-3.3-70b-versatile", temperature = 0.75)
-outputParser = StrOutputParser()
-prompt = """
-You are a highly specialized chatbot designed to assist users with queries related to a specific book about mathematics.
-Your primary role is to answer user questions accurately and comprehensively using the retrieved context from the book.
-- Accuracy is paramount: Your answers must be 100% accurate and based strictly on the context you have been provided.
-- No additional information: Never introduce information or ideas outside the retrieved context. You must rely solely on the book's content to guide your responses.
-- User satisfaction: Your goal is to provide complete satisfaction to users by solving their doubts and answering their questions
-  with clarity, precision, and politeness.
-- Math expertise: The book revolves around mathematics, and you are an expert in math. Ensure all solutions and explanations are
-  flawless and easily understandable, offering correct guidance for any math-related queries.
-- Off-topic queries: If a user asks a question unrelated to the book or mathematics, politely respond that you are not designed
-  to address topics beyond the scope of the book and math. You can assess the topic's relevance based on the context retrieved.
-Example response for off-topic queries:
-"I'm here to help with questions related to the mathematics book I was designed around. Unfortunately, I cannot assist with
-topics outside that scope. Please feel free to ask any math-related questions!"
-Always maintain professionalism, politeness, and clarity in every response. You are a reliable and expert guide for users
-seeking help with math through the context of the book.
-Here's the retrieved context:
-{context}
-Here's the question which user has asked:
-{query}
-"""
-prompt = ChatPromptTemplate.from_template(prompt)
-chain = {"query": RunnablePassthrough(), "context": RunnablePassthrough() | vectorStore} | prompt | llm | outputParser
-@app.get("/getResponse")
-async def generateResponse(question: str):
-    return chain.invoke(question)

+# main.py
+# This is the main file that runs the Sanic web server.
+from sanic import Sanic, response
+from retriever import get_ensemble_retriever
+from llm_chain import create_rag_chain
+app = Sanic("VibbaBackend")
+@app.before_server_start
+async def setup_model(app_instance, loop):
+    """
+    Initializes the retriever and RAG chain and attaches them
+    to the application context before the server starts.
+    """
+    print("Server starting up... Initializing model pipeline.")
+    retriever = get_ensemble_retriever()
+    rag_chain = create_rag_chain(retriever)
+    app_instance.ctx.rag_chain = rag_chain
+    print("Model pipeline is ready.")
+@app.get("/")
+async def home(request):
+    """
+    Root endpoint showing app name and description.
+    """
+    html_content = """
+    <html>
+        <head>
+            <title>VibbaBackend</title>
+            <style>
+                body {
+                    font-family: Arial, sans-serif;
+                    margin: 40px;
+                    background-color: #f9f9f9;
+                    color: #333;
+                }
+                h1 { color: #0073e6; }
+                .container {
+                    max-width: 800px;
+                    margin: auto;
+                    padding: 20px;
+                    background: #fff;
+                    border-radius: 8px;
+                    box-shadow: 0 2px 6px rgba(0,0,0,0.1);
+                }
+            </style>
+        </head>
+        <body>
+            <div class="container">
+                <h1>VibbaBackend</h1>
+                <p>
+                    Welcome to the <strong>VibbaBackend</strong> service! 🚀<br><br>
+                    This backend powers a Retrieval-Augmented Generation (RAG) pipeline
+                    using an ensemble retriever and a large language model.
+                </p>
+                <p>
+                    <strong>Available endpoints:</strong>
+                    <ul>
+                        <li><code>/getResponse?question=Your+query</code> – Get an answer to your question.</li>
+                    </ul>
+                </p>
+            </div>
+        </body>
+    </html>
+    """
+    return response.html(html_content)
+@app.get("/getResponse")
+async def get_response_endpoint(request):
+    """
+    Endpoint to get an answer to a question using the RAG chain.
+    Expects a 'question' query parameter.
+    """
+    question = request.args.get("question")
+    if not question:
+        return response.json(
+            {"error": "Please provide a 'question' query parameter."},
+            status=400
+        )
+    try:
+        chain = request.app.ctx.rag_chain
+        result = chain.invoke(question)
+        return response.text(result)
+    except Exception as e:
+        print(f"An error occurred during invocation: {e}")
+        return response.json(
+            {"error": "An internal error occurred while processing your request."},
+            status=500
+        )
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,18 @@

+[project]
+name = "bookai"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "einops>=0.8.1",
+    "langchain>=0.3.27",
+    "langchain-chroma>=0.2.6",
+    "langchain-community>=0.3.29",
+    "langchain-core>=0.3.76",
+    "langchain-groq>=0.3.8",
+    "langchain-huggingface>=0.3.1",
+    "rank-bm25>=0.2.2",
+    "sanic>=25.3.0",
+    "sentence-transformers>=5.1.1",
+]

requirements.txt DELETED Viewed

@@ -1,9 +0,0 @@
-uvicorn
-fastapi
-pdfplumber
-langchain
-langchain-groq
-langchain-chroma
-langchain-community
-langchain-huggingface
-langchain_text_splitters

retriever.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# retriever.py
+# This file handles the setup of embeddings, vector stores, and the ensemble retriever.
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers import EnsembleRetriever
+from config import (
+    MODEL_NAME, MODEL_KWARGS, ENCODE_KWARGS, VECTOR_STORE_DIRECTORY,
+    DENSE_RETRIEVER_K, KEYWORD_RETRIEVER_K, ENSEMBLE_WEIGHTS
+)
+def get_embedding_function():
+    """Initializes and returns the HuggingFace embedding model."""
+    return HuggingFaceEmbeddings(
+        model_name=MODEL_NAME,
+        model_kwargs=MODEL_KWARGS,
+        encode_kwargs=ENCODE_KWARGS
+    )
+def get_vector_store(embedding_function):
+    """Initializes and returns the Chroma vector store."""
+    return Chroma(
+        embedding_function=embedding_function,
+        persist_directory=VECTOR_STORE_DIRECTORY
+    )
+def get_ensemble_retriever():
+    """
+    Creates and returns an ensemble retriever combining dense and keyword-based search.
+    """
+    print("Initializing embeddings and vector store...")
+    embeddings = get_embedding_function()
+    vector_store = get_vector_store(embeddings)
+    dense_vector_retriever = vector_store.as_retriever(k=DENSE_RETRIEVER_K)
+    print("Loading documents for BM25 retriever...")
+    ids = vector_store.get().get("ids", [])
+    if not ids:
+        all_documents = []
+    else:
+        all_documents = vector_store.get_by_ids(ids)
+    keyword_search_retriever = BM25Retriever.from_documents(
+        documents=all_documents, k=KEYWORD_RETRIEVER_K
+    ) if all_documents else None
+    if keyword_search_retriever:
+        print("Creating ensemble retriever...")
+        ensemble_retriever = EnsembleRetriever(
+            retrievers=[dense_vector_retriever, keyword_search_retriever],
+            weights=ENSEMBLE_WEIGHTS
+        )
+    else:
+        print("Creating dense-only retriever...")
+        ensemble_retriever = dense_vector_retriever
+    print("Retriever setup complete.")
+    return ensemble_retriever

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

{vectorDB → vectorStore}/chroma.sqlite3 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a7fa2d24362ebecf737fc691e886d750716b24adbdfbfad4c76a02c96400865
-size 3903488

 version https://git-lfs.github.com/spec/v1
+oid sha256:7081f2a48908387f519e62c823ef7a9f3487b0e17a4f3a68d7ab81cd949a4a29
+size 5992448