prestiva commited on
Commit
92e030f
·
1 Parent(s): 20104e9

UPDATE: book3

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitattributes CHANGED
@@ -37,3 +37,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
37
  vectorDB/.sqlite filter=lfs diff=lfs merge=lfs -text
38
  vectorDB/*.sqlite filter=lfs diff=lfs merge=lfs -text
39
  vectorDB/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
 
 
37
  vectorDB/.sqlite filter=lfs diff=lfs merge=lfs -text
38
  vectorDB/*.sqlite filter=lfs diff=lfs merge=lfs -text
39
  vectorDB/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
40
+ *.sqlite3 filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.10-slim
2
 
3
  COPY . /app
4
 
@@ -6,10 +6,12 @@ WORKDIR /app
6
 
7
  USER root
8
 
9
- RUN pip install -r requirements.txt
 
 
10
 
11
  RUN chmod -R 777 /app
12
 
13
  EXPOSE 7860
14
 
15
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.12-slim
2
 
3
  COPY . /app
4
 
 
6
 
7
  USER root
8
 
9
+ RUN pip install uv
10
+
11
+ RUN uv sync
12
 
13
  RUN chmod -R 777 /app
14
 
15
  EXPOSE 7860
16
 
17
+ CMD ["uv", "run", "main.py"]
__pycache__/main.cpython-310.pyc ADDED
Binary file (3.58 kB). View file
 
config.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+ # This file stores all the configuration variables and constants for the application.
3
+
4
+ # Embedding Model Configuration
5
+ MODEL_NAME = "jinaai/jina-embeddings-v3"
6
+ MODEL_KWARGS = {'device': 'cpu', 'trust_remote_code': True}
7
+ ENCODE_KWARGS = {'normalize_embeddings': False}
8
+
9
+ # Vector Store Configuration
10
+ VECTOR_STORE_DIRECTORY = "vectorStore"
11
+
12
+ # LLM Configuration
13
+ LLM_MODEL = "llama-3.3-70b-versatile"
14
+ LLM_TEMPERATURE = 0.8
15
+
16
+ # Retriever Configuration
17
+ DENSE_RETRIEVER_K = 3
18
+ KEYWORD_RETRIEVER_K = 3
19
+ ENSEMBLE_WEIGHTS = [0.5, 0.5]
llm_chain.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_chain.py
2
+ # This file configures the language model, prompt template, and the final processing chain.
3
+
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langchain_core.runnables import RunnablePassthrough
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_groq import ChatGroq
8
+ from config import LLM_MODEL, LLM_TEMPERATURE
9
+
10
+ def get_llm():
11
+ """Initializes and returns the ChatGroq LLM."""
12
+ return ChatGroq(
13
+ model=LLM_MODEL,
14
+ temperature=LLM_TEMPERATURE
15
+ )
16
+
17
+ def get_prompt_template():
18
+ """Creates and returns the ChatPromptTemplate for the RAG chain."""
19
+ prompt_text = """
20
+ You are a chatbot AI assistant and an expert in mathematics, specialized exclusively in answering questions from the three mathematics books authored by Ice Venkatesh. Your primary role is to provide clear, precise, and complete answers that ensure user satisfaction, based strictly on the retrieved context from those books.
21
+
22
+ ---
23
+
24
+ ### Core Principles
25
+
26
+ 1. **Absolute Fidelity to the Source**
27
+ * Your answers must be 100% accurate and based **strictly** on the retrieved context from Ice Venkatesh’s books.
28
+ * You must **only** use the provided context. Do not invent, assume, or guess missing information.
29
+ * Never use external sources, the internet, or your prior training knowledge. Your knowledge is confined to the provided text.
30
+ * Never cite or refer to any source, including the books themselves. Do not mention words like "context," "retrieved," or "the book says."
31
+
32
+ 2. **Mathematical Rigor and Clarity**
33
+ * As a math expert, ensure all solutions are flawless and easily understandable.
34
+ * Always provide step-by-step solutions for calculations, proofs, or problem-solving.
35
+ * All reasoning must be mathematically correct and clearly explained, avoiding shortcuts unless the context itself provides them.
36
+
37
+ 3. **Professional and Concise Communication**
38
+ * Responses must be polite, professional, clear, and concise.
39
+ * Do not include speculation, unnecessary commentary, or conversational filler. You are a direct, to-the-point assistant.
40
+
41
+ 4. **Handling Specific Scenarios**
42
+ * **If the context is insufficient:** You must reply **only** with the following text and nothing else:
43
+ "The books by Ice Venkatesh do not provide enough information to answer this question."
44
+ * **If the query is off-topic:** If the user asks anything unrelated to mathematics or the content of Ice Venkatesh's books, you must decline using **exactly** this phrasing:
45
+ "I can only help with questions related to the three mathematics books by Ice Venkatesh. Unfortunately, I cannot assist with topics outside that scope."
46
+
47
+ ---
48
+
49
+ ### Output Format Instructions
50
+
51
+ * The entire response must be a single, continuous block of text.
52
+ * The entire block must be wrapped in double quotes (`"`).
53
+ * Do not include any prefixes like “Answer:” or “Here is the solution:”. No text should appear outside the quotes.
54
+
55
+ **Example:**
56
+ "Step 1: Divide the number by 3. Step 2: Multiply the result by 111. Final Answer: 37 × 24 = 888."
57
+
58
+ ---
59
+
60
+ Retrieved context:
61
+ {context}
62
+
63
+ User question:
64
+ {query}
65
+ """
66
+ return ChatPromptTemplate.from_template(prompt_text)
67
+
68
+ def create_rag_chain(retriever):
69
+ """Creates and returns the full RAG chain, accepting a retriever as an argument."""
70
+ print("Creating RAG chain...")
71
+ llm = get_llm()
72
+ prompt = get_prompt_template()
73
+ output_parser = StrOutputParser()
74
+
75
+ chain = (
76
+ {"context": retriever, "query": RunnablePassthrough()}
77
+ | prompt
78
+ | llm
79
+ | output_parser
80
+ )
81
+ print("RAG chain created successfully.")
82
+ return chain
main.py CHANGED
@@ -1,61 +1,94 @@
1
- from langchain_core.output_parsers import StrOutputParser
2
- from langchain_core.runnables import RunnablePassthrough
3
- from langchain_huggingface import HuggingFaceEmbeddings
4
- from langchain_core.prompts import ChatPromptTemplate
5
- from langchain_groq import ChatGroq
6
- from langchain_chroma import Chroma
7
- from fastapi import FastAPI
8
 
9
- app = FastAPI(title = "VibbaAIEndpoints")
 
 
10
 
11
- model_name = "BAAI/bge-large-en-v1.5"
12
- model_kwargs = {'device': 'cpu', "trust_remote_code": True}
13
- encode_kwargs = {'normalize_embeddings': False}
14
- hf = HuggingFaceEmbeddings(
15
- model_name=model_name,
16
- model_kwargs=model_kwargs,
17
- encode_kwargs=encode_kwargs
18
- )
19
 
20
- vectorStore = Chroma(
21
- collection_name="collection",
22
- embedding_function=hf,
23
- persist_directory="./vectorDB",
24
- )
25
- vectorStore = vectorStore.as_retriever(search_kwargs={"k": 4})
 
 
 
 
 
26
 
27
- llm = ChatGroq(model = "llama-3.3-70b-versatile", temperature = 0.75)
28
- outputParser = StrOutputParser()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- prompt = """
31
- You are a highly specialized chatbot designed to assist users with queries related to a specific book about mathematics.
32
- Your primary role is to answer user questions accurately and comprehensively using the retrieved context from the book.
33
-
34
- - Accuracy is paramount: Your answers must be 100% accurate and based strictly on the context you have been provided.
35
- - No additional information: Never introduce information or ideas outside the retrieved context. You must rely solely on the book's content to guide your responses.
36
- - User satisfaction: Your goal is to provide complete satisfaction to users by solving their doubts and answering their questions
37
- with clarity, precision, and politeness.
38
- - Math expertise: The book revolves around mathematics, and you are an expert in math. Ensure all solutions and explanations are
39
- flawless and easily understandable, offering correct guidance for any math-related queries.
40
- - Off-topic queries: If a user asks a question unrelated to the book or mathematics, politely respond that you are not designed
41
- to address topics beyond the scope of the book and math. You can assess the topic's relevance based on the context retrieved.
42
-
43
- Example response for off-topic queries:
44
- "I'm here to help with questions related to the mathematics book I was designed around. Unfortunately, I cannot assist with
45
- topics outside that scope. Please feel free to ask any math-related questions!"
46
-
47
- Always maintain professionalism, politeness, and clarity in every response. You are a reliable and expert guide for users
48
- seeking help with math through the context of the book.
49
-
50
- Here's the retrieved context:
51
- {context}
52
 
53
- Here's the question which user has asked:
54
- {query}
55
- """
56
- prompt = ChatPromptTemplate.from_template(prompt)
57
- chain = {"query": RunnablePassthrough(), "context": RunnablePassthrough() | vectorStore} | prompt | llm | outputParser
 
 
 
 
 
58
 
59
- @app.get("/getResponse")
60
- async def generateResponse(question: str):
61
- return chain.invoke(question)
 
1
+ # main.py
2
+ # This is the main file that runs the Sanic web server.
 
 
 
 
 
3
 
4
+ from sanic import Sanic, response
5
+ from retriever import get_ensemble_retriever
6
+ from llm_chain import create_rag_chain
7
 
8
+ app = Sanic("VibbaBackend")
 
 
 
 
 
 
 
9
 
10
+ @app.before_server_start
11
+ async def setup_model(app_instance, loop):
12
+ """
13
+ Initializes the retriever and RAG chain and attaches them
14
+ to the application context before the server starts.
15
+ """
16
+ print("Server starting up... Initializing model pipeline.")
17
+ retriever = get_ensemble_retriever()
18
+ rag_chain = create_rag_chain(retriever)
19
+ app_instance.ctx.rag_chain = rag_chain
20
+ print("Model pipeline is ready.")
21
 
22
+ @app.get("/")
23
+ async def home(request):
24
+ """
25
+ Root endpoint showing app name and description.
26
+ """
27
+ html_content = """
28
+ <html>
29
+ <head>
30
+ <title>VibbaBackend</title>
31
+ <style>
32
+ body {
33
+ font-family: Arial, sans-serif;
34
+ margin: 40px;
35
+ background-color: #f9f9f9;
36
+ color: #333;
37
+ }
38
+ h1 { color: #0073e6; }
39
+ .container {
40
+ max-width: 800px;
41
+ margin: auto;
42
+ padding: 20px;
43
+ background: #fff;
44
+ border-radius: 8px;
45
+ box-shadow: 0 2px 6px rgba(0,0,0,0.1);
46
+ }
47
+ </style>
48
+ </head>
49
+ <body>
50
+ <div class="container">
51
+ <h1>VibbaBackend</h1>
52
+ <p>
53
+ Welcome to the <strong>VibbaBackend</strong> service! 🚀<br><br>
54
+ This backend powers a Retrieval-Augmented Generation (RAG) pipeline
55
+ using an ensemble retriever and a large language model.
56
+ </p>
57
+ <p>
58
+ <strong>Available endpoints:</strong>
59
+ <ul>
60
+ <li><code>/getResponse?question=Your+query</code> – Get an answer to your question.</li>
61
+ </ul>
62
+ </p>
63
+ </div>
64
+ </body>
65
+ </html>
66
+ """
67
+ return response.html(html_content)
68
 
69
+ @app.get("/getResponse")
70
+ async def get_response_endpoint(request):
71
+ """
72
+ Endpoint to get an answer to a question using the RAG chain.
73
+ Expects a 'question' query parameter.
74
+ """
75
+ question = request.args.get("question")
76
+ if not question:
77
+ return response.json(
78
+ {"error": "Please provide a 'question' query parameter."},
79
+ status=400
80
+ )
 
 
 
 
 
 
 
 
 
 
81
 
82
+ try:
83
+ chain = request.app.ctx.rag_chain
84
+ result = chain.invoke(question)
85
+ return response.text(result)
86
+ except Exception as e:
87
+ print(f"An error occurred during invocation: {e}")
88
+ return response.json(
89
+ {"error": "An internal error occurred while processing your request."},
90
+ status=500
91
+ )
92
 
93
+ if __name__ == "__main__":
94
+ app.run(host="0.0.0.0", port=7860)
 
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "bookai"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "einops>=0.8.1",
9
+ "langchain>=0.3.27",
10
+ "langchain-chroma>=0.2.6",
11
+ "langchain-community>=0.3.29",
12
+ "langchain-core>=0.3.76",
13
+ "langchain-groq>=0.3.8",
14
+ "langchain-huggingface>=0.3.1",
15
+ "rank-bm25>=0.2.2",
16
+ "sanic>=25.3.0",
17
+ "sentence-transformers>=5.1.1",
18
+ ]
requirements.txt DELETED
@@ -1,9 +0,0 @@
1
- uvicorn
2
- fastapi
3
- pdfplumber
4
- langchain
5
- langchain-groq
6
- langchain-chroma
7
- langchain-community
8
- langchain-huggingface
9
- langchain_text_splitters
 
 
 
 
 
 
 
 
 
 
retriever.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # retriever.py
2
+ # This file handles the setup of embeddings, vector stores, and the ensemble retriever.
3
+
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_chroma import Chroma
6
+ from langchain_community.retrievers import BM25Retriever
7
+ from langchain.retrievers import EnsembleRetriever
8
+ from config import (
9
+ MODEL_NAME, MODEL_KWARGS, ENCODE_KWARGS, VECTOR_STORE_DIRECTORY,
10
+ DENSE_RETRIEVER_K, KEYWORD_RETRIEVER_K, ENSEMBLE_WEIGHTS
11
+ )
12
+
13
+ def get_embedding_function():
14
+ """Initializes and returns the HuggingFace embedding model."""
15
+ return HuggingFaceEmbeddings(
16
+ model_name=MODEL_NAME,
17
+ model_kwargs=MODEL_KWARGS,
18
+ encode_kwargs=ENCODE_KWARGS
19
+ )
20
+
21
+ def get_vector_store(embedding_function):
22
+ """Initializes and returns the Chroma vector store."""
23
+ return Chroma(
24
+ embedding_function=embedding_function,
25
+ persist_directory=VECTOR_STORE_DIRECTORY
26
+ )
27
+
28
+ def get_ensemble_retriever():
29
+ """
30
+ Creates and returns an ensemble retriever combining dense and keyword-based search.
31
+ """
32
+ print("Initializing embeddings and vector store...")
33
+ embeddings = get_embedding_function()
34
+ vector_store = get_vector_store(embeddings)
35
+
36
+ dense_vector_retriever = vector_store.as_retriever(k=DENSE_RETRIEVER_K)
37
+
38
+ print("Loading documents for BM25 retriever...")
39
+ ids = vector_store.get().get("ids", [])
40
+
41
+ if not ids:
42
+ all_documents = []
43
+ else:
44
+ all_documents = vector_store.get_by_ids(ids)
45
+
46
+ keyword_search_retriever = BM25Retriever.from_documents(
47
+ documents=all_documents, k=KEYWORD_RETRIEVER_K
48
+ ) if all_documents else None
49
+
50
+ if keyword_search_retriever:
51
+ print("Creating ensemble retriever...")
52
+ ensemble_retriever = EnsembleRetriever(
53
+ retrievers=[dense_vector_retriever, keyword_search_retriever],
54
+ weights=ENSEMBLE_WEIGHTS
55
+ )
56
+ else:
57
+ print("Creating dense-only retriever...")
58
+ ensemble_retriever = dense_vector_retriever
59
+
60
+ print("Retriever setup complete.")
61
+ return ensemble_retriever
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
{vectorDB → vectorStore}/chroma.sqlite3 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a7fa2d24362ebecf737fc691e886d750716b24adbdfbfad4c76a02c96400865
3
- size 3903488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7081f2a48908387f519e62c823ef7a9f3487b0e17a4f3a68d7ab81cd949a4a29
3
+ size 5992448