Spaces:
Sleeping
Sleeping
Commit
·
10e8a0c
0
Parent(s):
new commit
Browse files- .gitignore +40 -0
- Dockerfile +31 -0
- Kubernetes/deployment.yml +30 -0
- Kubernetes/namespace.yml +4 -0
- Kubernetes/service.yml +13 -0
- Procfile +1 -0
- README.md +108 -0
- data/my_document.txt +33 -0
- data/sample.pdf +0 -0
- endpoints.py +13 -0
- main.py +20 -0
- rag.py +69 -0
- requirements.txt +13 -0
- static/script.js +276 -0
- static/styles.css +364 -0
- templates/index.html +81 -0
- vector_rag.py +54 -0
.gitignore
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore virtual environment
|
| 2 |
+
venv/
|
| 3 |
+
ragenv/
|
| 4 |
+
ENV/
|
| 5 |
+
env/
|
| 6 |
+
.venv/
|
| 7 |
+
|
| 8 |
+
# Python compiled files
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.py[cod]
|
| 11 |
+
*.so
|
| 12 |
+
|
| 13 |
+
# Environment variables
|
| 14 |
+
.env
|
| 15 |
+
|
| 16 |
+
# VS Code settings
|
| 17 |
+
.vscode/
|
| 18 |
+
*.code-workspace
|
| 19 |
+
|
| 20 |
+
# OS-specific
|
| 21 |
+
.DS_Store
|
| 22 |
+
Thumbs.db
|
| 23 |
+
|
| 24 |
+
# Logs and databases (optional)
|
| 25 |
+
*.log
|
| 26 |
+
*.sqlite3
|
| 27 |
+
|
| 28 |
+
# Jupyter/IPython
|
| 29 |
+
.ipynb_checkpoints/
|
| 30 |
+
|
| 31 |
+
# Cache
|
| 32 |
+
*.cache
|
| 33 |
+
*.pkl
|
| 34 |
+
*.db
|
| 35 |
+
|
| 36 |
+
# Node modules (if ever added)
|
| 37 |
+
node_modules/
|
| 38 |
+
|
| 39 |
+
Kubernetes/secret.yml
|
| 40 |
+
|
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Base image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set environment variables
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 6 |
+
ENV PYTHONUNBUFFERED=1
|
| 7 |
+
|
| 8 |
+
# Set work directory inside the container
|
| 9 |
+
WORKDIR /app
|
| 10 |
+
|
| 11 |
+
# Install system dependencies
|
| 12 |
+
RUN apt-get update && apt-get install -y \
|
| 13 |
+
build-essential \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# Install pipenv/venv if needed, but here we use pip
|
| 17 |
+
# Copy dependency list first for caching
|
| 18 |
+
COPY requirements.txt .
|
| 19 |
+
|
| 20 |
+
# Install dependencies
|
| 21 |
+
RUN pip install --upgrade pip
|
| 22 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 23 |
+
|
| 24 |
+
# Copy entire project into the container
|
| 25 |
+
COPY . .
|
| 26 |
+
|
| 27 |
+
# Expose the port your app runs on
|
| 28 |
+
EXPOSE 8000
|
| 29 |
+
|
| 30 |
+
# Command to run the application using uvicorn
|
| 31 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
Kubernetes/deployment.yml
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
apiVersion: apps/v1
|
| 2 |
+
kind: Deployment
|
| 3 |
+
metadata:
|
| 4 |
+
name: rag-app
|
| 5 |
+
namespace: rag
|
| 6 |
+
spec:
|
| 7 |
+
replicas: 1
|
| 8 |
+
selector:
|
| 9 |
+
matchLabels:
|
| 10 |
+
app: rag-app
|
| 11 |
+
template:
|
| 12 |
+
metadata:
|
| 13 |
+
labels:
|
| 14 |
+
app: rag-app
|
| 15 |
+
spec:
|
| 16 |
+
containers:
|
| 17 |
+
- name: rag-container
|
| 18 |
+
image: yadavkapil23/rag-app:latest
|
| 19 |
+
ports:
|
| 20 |
+
- containerPort: 8000
|
| 21 |
+
# --- NEW CODE: INJECT HUGGINGFACE TOKEN FROM A SECRET ---
|
| 22 |
+
env:
|
| 23 |
+
- name: HUGGINGFACE_API_TOKEN
|
| 24 |
+
valueFrom:
|
| 25 |
+
secretKeyRef:
|
| 26 |
+
# You must create a secret named 'huggingface-secret' beforehand
|
| 27 |
+
name: huggingface-secret
|
| 28 |
+
# Assuming the key inside the secret is also named HUGGINGFACE_API_TOKEN
|
| 29 |
+
key: HUGGINGFACE_API_TOKEN
|
| 30 |
+
# --------------------------------------------------------
|
Kubernetes/namespace.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
apiVersion: v1
|
| 2 |
+
kind: Namespace
|
| 3 |
+
metadata:
|
| 4 |
+
name: rag
|
Kubernetes/service.yml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
apiVersion: v1
|
| 2 |
+
kind: Service
|
| 3 |
+
metadata:
|
| 4 |
+
name: rag-service
|
| 5 |
+
namespace: rag
|
| 6 |
+
spec:
|
| 7 |
+
type: NodePort
|
| 8 |
+
selector:
|
| 9 |
+
app: rag-app
|
| 10 |
+
ports:
|
| 11 |
+
- port: 8000
|
| 12 |
+
targetPort: 8000
|
| 13 |
+
nodePort: 30036 # optional fixed port, else Kubernetes assigns a random one
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: uvicorn main:app --host=0.0.0.0 --port=8000
|
README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
# 🚀 RAG System with LangChain and FastAPI 🌐
|
| 4 |
+
|
| 5 |
+
Welcome to this repository! This project demonstrates how to build a powerful RAG system using **LangChain** and **FastAPI** for generating contextually relevant and accurate responses by integrating external data into the generative process.
|
| 6 |
+
|
| 7 |
+
## 📋 Project Overview
|
| 8 |
+
|
| 9 |
+
The RAG system combines retrieval and generation to provide smarter AI-driven responses. Using **LangChain** for document handling and embeddings, and **FastAPI** for deploying a fast, scalable API, this project includes:
|
| 10 |
+
|
| 11 |
+
- 🗂️ **Document Loading**: Load data from various sources (text, PDFs, etc.).
|
| 12 |
+
- ✂️ **Text Splitting**: Break large documents into manageable chunks.
|
| 13 |
+
- 🧠 **Embeddings**: Generate vector embeddings for efficient search and retrieval.
|
| 14 |
+
- 🔍 **Vector Stores**: Store embeddings in a vector store for fast similarity searches.
|
| 15 |
+
- 🔧 **Retrieval**: Retrieve the most relevant document chunks based on user queries.
|
| 16 |
+
- 💬 **Generative Response**: Use retrieved data with language models (LLMs) to generate accurate, context-aware answers.
|
| 17 |
+
- 🌐 **FastAPI**: Deploy the RAG system as a scalable API for easy interaction.
|
| 18 |
+
|
| 19 |
+
## ⚙️ Setup and Installation
|
| 20 |
+
|
| 21 |
+
### Prerequisites
|
| 22 |
+
|
| 23 |
+
Make sure you have the following installed:
|
| 24 |
+
- 🐍 Python 3.10+
|
| 25 |
+
- 🐳 Docker (optional, for deployment)
|
| 26 |
+
- 🛠️ PostgreSQL or FAISS (for vector storage)
|
| 27 |
+
|
| 28 |
+
### Installation Steps
|
| 29 |
+
|
| 30 |
+
1. **Clone the repository**:
|
| 31 |
+
```bash
|
| 32 |
+
git clone https://github.com/yadavkapil23/RAG_Project.git
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
2. **Set up a virtual environment**:
|
| 36 |
+
```bash
|
| 37 |
+
python -m venv venv
|
| 38 |
+
source venv/bin/activate # For Linux/Mac
|
| 39 |
+
venv\Scripts\activate # For Windows
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
3. **Install dependencies**:
|
| 43 |
+
```bash
|
| 44 |
+
pip install -r requirements.txt
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
4. **Run the FastAPI server**:
|
| 48 |
+
```bash
|
| 49 |
+
uvicorn main:app --reload
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
Now, your FastAPI app will be running at `http://127.0.0.1:8000` 🎉!
|
| 53 |
+
|
| 54 |
+
### Set up Ollama 🦙
|
| 55 |
+
|
| 56 |
+
This project uses Ollama to run local large language models.
|
| 57 |
+
|
| 58 |
+
1. **Install Ollama:** Follow the instructions on the [Ollama website](https://ollama.ai/) to download and install Ollama.
|
| 59 |
+
|
| 60 |
+
2. **Pull a model:** Pull a model to use with the application. This project uses `llama3`.
|
| 61 |
+
```bash
|
| 62 |
+
ollama pull llama3
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
## 🛠️ Features
|
| 67 |
+
|
| 68 |
+
- **Retrieval-Augmented Generation**: Combines the best of both worlds—retrieving relevant data and generating insightful responses.
|
| 69 |
+
- **Scalable API**: FastAPI makes it easy to deploy and scale the RAG system.
|
| 70 |
+
- **Document Handling**: Supports multiple document types for loading and processing.
|
| 71 |
+
- **Vector Embeddings**: Efficient search with FAISS or other vector stores.
|
| 72 |
+
|
| 73 |
+
## 🛡️ Security
|
| 74 |
+
|
| 75 |
+
- 🔐 **OAuth2 and API Key** authentication support for secure API access.
|
| 76 |
+
- 🔒 **TLS/SSL** for encrypting data in transit.
|
| 77 |
+
- 🛡️ **Data encryption** for sensitive document storage.
|
| 78 |
+
|
| 79 |
+
## 🚀 Deployment
|
| 80 |
+
|
| 81 |
+
### Docker Deployment
|
| 82 |
+
If you want to deploy your RAG system using Docker, simply build the Docker image and run the container:
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
docker build -t rag-system .
|
| 86 |
+
docker run -p 8000:8000 rag-system
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
### Cloud Deployment
|
| 90 |
+
Deploy your RAG system to the cloud using platforms like **AWS**, **Azure**, or **Google Cloud** with minimal setup.
|
| 91 |
+
|
| 92 |
+
## 🧠 Future Enhancements
|
| 93 |
+
|
| 94 |
+
- 🔄 **Real-time Data Integration**: Add real-time data sources for dynamic responses.
|
| 95 |
+
- 🤖 **Advanced Retrieval Techniques**: Implement deep learning-based retrievers for better query understanding.
|
| 96 |
+
- 📊 **Monitoring Tools**: Add monitoring with tools like Prometheus or Grafana for performance insights.
|
| 97 |
+
|
| 98 |
+
## 🤝 Contributing
|
| 99 |
+
|
| 100 |
+
Want to contribute? Feel free to fork this repository, submit a pull request, or open an issue. We welcome all contributions! 🛠️
|
| 101 |
+
|
| 102 |
+
## 📄 License
|
| 103 |
+
|
| 104 |
+
This project is licensed under the MIT License.
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
🎉 **Thank you for checking out the RAG System with LangChain and FastAPI!** If you have any questions or suggestions, feel free to reach out or open an issue. Let's build something amazing!
|
data/my_document.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Knowledge Base
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
Quantum computing uses qubits that can represent both 0 and 1 simultaneously, offering immense parallelism for computation.
|
| 5 |
+
A transformer model uses self-attention to weigh the importance of each word in a sentence for tasks like translation or summarization.
|
| 6 |
+
Python 3.12 introduced new error messages, better performance, and support for isolated subinterpreters.
|
| 7 |
+
|
| 8 |
+
The French Revolution (1789–1799) radically transformed French society, ending monarchy and spreading ideas of liberty and equality.
|
| 9 |
+
Mahatma Gandhi led the Indian independence movement through nonviolent civil disobedience, notably during the Salt March.
|
| 10 |
+
|
| 11 |
+
A Random Forest is an ensemble of decision trees used for classification or regression. It reduces overfitting and improves accuracy.
|
| 12 |
+
LangChain is a framework for developing LLM-powered apps with components like chains, tools, memory, and agents.
|
| 13 |
+
|
| 14 |
+
Meditation helps in reducing stress, enhancing concentration, and improving emotional regulation. Regular practice can reduce anxiety.
|
| 15 |
+
Intermittent fasting involves alternating periods of eating and fasting. It can help in weight loss and metabolic health.
|
| 16 |
+
|
| 17 |
+
GDP (Gross Domestic Product) measures a country's economic output. A growing GDP usually indicates a healthy economy.
|
| 18 |
+
Inflation refers to the general rise in prices over time, reducing purchasing power. Central banks use interest rates to control inflation.
|
| 19 |
+
|
| 20 |
+
Photosynthesis is the process where green plants use sunlight, CO₂, and water to produce oxygen and glucose.
|
| 21 |
+
Black holes are regions in space where gravity is so strong that nothing—not even light—can escape.
|
| 22 |
+
|
| 23 |
+
A binary search tree is a node-based data structure where left children are smaller and right children are larger than the parent node.
|
| 24 |
+
Recursion is a function calling itself until a base condition is met. It’s used in tree traversal, backtracking, and divide-and-conquer.
|
| 25 |
+
|
| 26 |
+
Japan is an island country in East Asia known for its technology, cherry blossoms, and cultural traditions like tea ceremony and sumo.
|
| 27 |
+
The Eiffel Tower was constructed in 1889 in Paris and is one of the most visited monuments in the world.
|
| 28 |
+
|
| 29 |
+
Q: What is a black hole?
|
| 30 |
+
A: A black hole is a region in space where gravity is so strong that nothing, not even light, can escape its pull.
|
| 31 |
+
|
| 32 |
+
Q: How do neural networks work?
|
| 33 |
+
A: Neural networks consist of layers of nodes that process inputs through weighted connections and activation functions to detect patterns.
|
data/sample.pdf
ADDED
|
Binary file (71.9 kB). View file
|
|
|
endpoints.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
|
| 3 |
+
router = APIRouter()
|
| 4 |
+
|
| 5 |
+
from rag import get_smart_rag_response
|
| 6 |
+
|
| 7 |
+
@router.get("/query/")
|
| 8 |
+
async def query_rag_system(query: str):
|
| 9 |
+
try:
|
| 10 |
+
response = await get_smart_rag_response(query)
|
| 11 |
+
return {"query": query, "response": response}
|
| 12 |
+
except Exception as e:
|
| 13 |
+
raise HTTPException(status_code=500, detail=str(e))
|
main.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.staticfiles import StaticFiles
|
| 3 |
+
from fastapi.templating import Jinja2Templates
|
| 4 |
+
from fastapi.requests import Request
|
| 5 |
+
from endpoints import router
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
|
| 9 |
+
# Serve static files (CSS, JS)
|
| 10 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 11 |
+
|
| 12 |
+
# Serve HTML templates
|
| 13 |
+
templates = Jinja2Templates(directory="templates")
|
| 14 |
+
|
| 15 |
+
@app.get("/")
|
| 16 |
+
def home(request: Request):
|
| 17 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
| 18 |
+
|
| 19 |
+
# Include your API endpoints
|
| 20 |
+
app.include_router(router)
|
rag.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from vector_rag import query_vector_store
|
| 2 |
+
import wikipedia
|
| 3 |
+
from langchain_community.llms import HuggingFacePipeline
|
| 4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 5 |
+
import os
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
model_name = "Qwen/Qwen2-1.5B-Instruct"
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 11 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
|
| 12 |
+
llm_pipeline = pipeline(
|
| 13 |
+
"text-generation",
|
| 14 |
+
model=model,
|
| 15 |
+
tokenizer=tokenizer,
|
| 16 |
+
max_new_tokens=512,
|
| 17 |
+
do_sample=True,
|
| 18 |
+
temperature=0.7,
|
| 19 |
+
top_p=0.9,
|
| 20 |
+
)
|
| 21 |
+
llm = HuggingFacePipeline(pipeline=llm_pipeline)
|
| 22 |
+
|
| 23 |
+
wikipedia.set_lang("en")
|
| 24 |
+
|
| 25 |
+
async def get_smart_rag_response(query: str) -> str:
|
| 26 |
+
print(" Received Query:", query)
|
| 27 |
+
|
| 28 |
+
# First: Try Wikipedia
|
| 29 |
+
try:
|
| 30 |
+
summary = wikipedia.summary(query, sentences=5) # Dynamically gets summary
|
| 31 |
+
print("Wikipedia summary found.")
|
| 32 |
+
|
| 33 |
+
prompt = f"""Use the following Wikipedia information to answer the question as clearly as possible.
|
| 34 |
+
|
| 35 |
+
Wikipedia Context:
|
| 36 |
+
{summary}
|
| 37 |
+
|
| 38 |
+
Question: {query}
|
| 39 |
+
Answer:"""
|
| 40 |
+
result = llm.predict(prompt)
|
| 41 |
+
answer = result.replace(prompt, "").strip() # Cleanup
|
| 42 |
+
return f"[Wikipedia]\n{answer}"
|
| 43 |
+
except wikipedia.exceptions.PageError:
|
| 44 |
+
print("Wikipedia page not found.") # Corrected simple handling
|
| 45 |
+
except wikipedia.exceptions.DisambiguationError as e:
|
| 46 |
+
return f"The query is ambiguous. Did you mean: {', '.join(e.options[:5])}?"
|
| 47 |
+
|
| 48 |
+
# Second: Fallback to LLM (no context)
|
| 49 |
+
try:
|
| 50 |
+
print("Fallback: LLM with no context")
|
| 51 |
+
# FALLBACK PROMPT LOGIC RESTORED
|
| 52 |
+
fallback_prompt = f"You are a knowledgeable assistant. Please answer the following question clearly:\n\n{query}"
|
| 53 |
+
llm_answer = llm.predict(fallback_prompt)
|
| 54 |
+
answer = llm_answer.replace(fallback_prompt, "").strip() # Cleanup
|
| 55 |
+
if answer and "not sure" not in answer.lower():
|
| 56 |
+
return f"[LLM Fallback]\n{answer.strip()}"
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print("Error during LLM fallback:", e)
|
| 59 |
+
|
| 60 |
+
#Finally: Fallback to Local Documents
|
| 61 |
+
try:
|
| 62 |
+
print("Fallback: Local vector search")
|
| 63 |
+
vector_answer = query_vector_store(query)
|
| 64 |
+
if vector_answer:
|
| 65 |
+
return f"[Local Document]\n{vector_answer}"
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print("Error during local vector search:", e)
|
| 68 |
+
|
| 69 |
+
return "Sorry, I couldn’t find any information to answer your question."
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
langchain
|
| 4 |
+
langchain-community
|
| 5 |
+
python-dotenv
|
| 6 |
+
faiss-cpu
|
| 7 |
+
jinja2
|
| 8 |
+
wikipedia
|
| 9 |
+
pypdf
|
| 10 |
+
sentence-transformers
|
| 11 |
+
torch
|
| 12 |
+
transformers
|
| 13 |
+
accelerate
|
static/script.js
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 2 |
+
const queryInput = document.getElementById('queryInput');
|
| 3 |
+
const askButton = document.getElementById('askButton');
|
| 4 |
+
const responseContainer = document.getElementById('responseContainer');
|
| 5 |
+
const themeToggle = document.getElementById('themeToggle');
|
| 6 |
+
const exampleChipsContainer = document.querySelector('.example-chips');
|
| 7 |
+
|
| 8 |
+
// 🎨 Apply a random light theme
|
| 9 |
+
function applyRandomTheme() {
|
| 10 |
+
const themes = ['theme-blue', 'theme-green', 'theme-orange', 'theme-purple'];
|
| 11 |
+
const saved = localStorage.getItem('color-theme');
|
| 12 |
+
const theme = saved || themes[Math.floor(Math.random() * themes.length)];
|
| 13 |
+
document.body.classList.add(theme);
|
| 14 |
+
localStorage.setItem('color-theme', theme);
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
// 🌙 Theme (dark/light) toggle
|
| 18 |
+
function initTheme() {
|
| 19 |
+
const savedTheme = localStorage.getItem('theme') || 'light';
|
| 20 |
+
document.body.classList.toggle('dark-mode', savedTheme === 'dark');
|
| 21 |
+
themeToggle.innerHTML = savedTheme === 'dark'
|
| 22 |
+
? '<i class="fas fa-sun"></i>'
|
| 23 |
+
: '<i class="fas fa-moon"></i>';
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
themeToggle.addEventListener('click', () => {
|
| 27 |
+
document.body.classList.toggle('dark-mode');
|
| 28 |
+
const isDark = document.body.classList.contains('dark-mode');
|
| 29 |
+
localStorage.setItem('theme', isDark ? 'dark' : 'light');
|
| 30 |
+
themeToggle.innerHTML = isDark
|
| 31 |
+
? '<i class="fas fa-sun"></i>'
|
| 32 |
+
: '<i class="fas fa-moon"></i>';
|
| 33 |
+
});
|
| 34 |
+
|
| 35 |
+
// 🧠 Example questions
|
| 36 |
+
const examples = [
|
| 37 |
+
// 🌐 Tech & AI
|
| 38 |
+
"How does a quantum computer differ from a classical one?",
|
| 39 |
+
"What is Retrieval-Augmented Generation (RAG)?",
|
| 40 |
+
"How do self-driving cars detect obstacles?",
|
| 41 |
+
"What is the impact of AI on job markets?",
|
| 42 |
+
"How do recommender systems work?",
|
| 43 |
+
"What is Web3 and how is it different from Web2?",
|
| 44 |
+
"How does natural language processing power chatbots?",
|
| 45 |
+
"What is blockchain and how does it ensure security?",
|
| 46 |
+
"Explain edge computing with an example.",
|
| 47 |
+
"What is federated learning in AI?",
|
| 48 |
+
|
| 49 |
+
// 🧠 Science & Space
|
| 50 |
+
"What causes auroras near the poles?",
|
| 51 |
+
"How do vaccines trigger immunity?",
|
| 52 |
+
"Why is Pluto no longer a planet?",
|
| 53 |
+
"What is antimatter?",
|
| 54 |
+
"Explain the theory of relativity in simple terms.",
|
| 55 |
+
"What causes lightning during a storm?",
|
| 56 |
+
"What is DNA and how does it function?",
|
| 57 |
+
"How are black holes formed?",
|
| 58 |
+
"What is the greenhouse effect?",
|
| 59 |
+
"Why do some materials conduct electricity?",
|
| 60 |
+
|
| 61 |
+
// 🏛️ History & Politics
|
| 62 |
+
"Who was the first emperor of China?",
|
| 63 |
+
"What triggered World War I?",
|
| 64 |
+
"Explain the Cold War in brief.",
|
| 65 |
+
"What was the Silk Road and why was it important?",
|
| 66 |
+
"Tell me about the Mughal Empire.",
|
| 67 |
+
"What is the United Nations and what does it do?",
|
| 68 |
+
"How did Gandhi influence India's independence?",
|
| 69 |
+
"What is the Magna Carta?",
|
| 70 |
+
"Who was Nelson Mandela?",
|
| 71 |
+
"Explain the French Revolution.",
|
| 72 |
+
|
| 73 |
+
// 🌍 Geography & Culture
|
| 74 |
+
"Why is the Amazon rainforest important?",
|
| 75 |
+
"What are the Seven Wonders of the World?",
|
| 76 |
+
"Tell me about the culture of Japan.",
|
| 77 |
+
"What are the major rivers of Africa?",
|
| 78 |
+
"Why is Mount Everest so difficult to climb?",
|
| 79 |
+
"What are the key differences between North and South Korea?",
|
| 80 |
+
"What are traditional foods of Italy?",
|
| 81 |
+
"What is Diwali and how is it celebrated?",
|
| 82 |
+
"What is the origin of the Olympics?",
|
| 83 |
+
"What languages are spoken in Switzerland?",
|
| 84 |
+
|
| 85 |
+
// 📚 Education & Career
|
| 86 |
+
"How can I improve my time management?",
|
| 87 |
+
"What are the top universities in the world?",
|
| 88 |
+
"How to write an impressive resume?",
|
| 89 |
+
"Tips to crack a coding interview.",
|
| 90 |
+
"What are essential skills for data analysts?",
|
| 91 |
+
"How to become a product manager?",
|
| 92 |
+
"What are MOOCs and how do they help?",
|
| 93 |
+
"What are some popular scholarships abroad?",
|
| 94 |
+
"Best ways to learn a new language?",
|
| 95 |
+
"What is the difference between MBA and MTech?",
|
| 96 |
+
|
| 97 |
+
// 💡 Philosophy & Psychology
|
| 98 |
+
"What is the meaning of life?",
|
| 99 |
+
"What is Maslow’s hierarchy of needs?",
|
| 100 |
+
"Why do humans dream?",
|
| 101 |
+
"What is existentialism?",
|
| 102 |
+
"What is the placebo effect?",
|
| 103 |
+
"How does the brain store memories?",
|
| 104 |
+
"What causes anxiety and how can it be managed?",
|
| 105 |
+
"What is cognitive dissonance?",
|
| 106 |
+
"What is emotional intelligence?",
|
| 107 |
+
"How do habits form in the brain?",
|
| 108 |
+
|
| 109 |
+
// 💰 Business & Economics
|
| 110 |
+
"What is inflation and how is it measured?",
|
| 111 |
+
"What are cryptocurrencies?",
|
| 112 |
+
"What is the stock market and how does it work?",
|
| 113 |
+
"What caused the 2008 financial crisis?",
|
| 114 |
+
"What is supply chain management?",
|
| 115 |
+
"How does international trade work?",
|
| 116 |
+
"What is GDP and why is it important?",
|
| 117 |
+
"What are NFTs and why are they controversial?",
|
| 118 |
+
"How do banks create money?",
|
| 119 |
+
"What is microfinance?",
|
| 120 |
+
|
| 121 |
+
// 🧘 Lifestyle & Health
|
| 122 |
+
"What is a healthy sleep cycle?",
|
| 123 |
+
"What are the benefits of yoga?",
|
| 124 |
+
"How can I eat healthier on a budget?",
|
| 125 |
+
"What is intermittent fasting?",
|
| 126 |
+
"How much water should I drink daily?",
|
| 127 |
+
"How does regular exercise affect mental health?",
|
| 128 |
+
"What is mindfulness meditation?",
|
| 129 |
+
"How to improve focus while studying?",
|
| 130 |
+
"How can one build better habits?",
|
| 131 |
+
"What are superfoods?",
|
| 132 |
+
|
| 133 |
+
// 🌎 Current Affairs & Trends
|
| 134 |
+
"What is the Paris Climate Agreement?",
|
| 135 |
+
"What are the key outcomes of COP28?",
|
| 136 |
+
"What is 5G technology?",
|
| 137 |
+
"Why are electric vehicles important?",
|
| 138 |
+
"What is the metaverse?",
|
| 139 |
+
"How is climate change affecting the Arctic?",
|
| 140 |
+
"What are the major goals of the UN's SDGs?",
|
| 141 |
+
"What are smart cities?",
|
| 142 |
+
"What is TikTok's global impact?",
|
| 143 |
+
"What is the controversy around AI regulation?",
|
| 144 |
+
|
| 145 |
+
// 🎨 Arts, Music & Literature
|
| 146 |
+
"Who painted the Mona Lisa?",
|
| 147 |
+
"What is minimalism in art?",
|
| 148 |
+
"Tell me about Shakespeare’s major works.",
|
| 149 |
+
"What is the difference between classical and jazz music?",
|
| 150 |
+
"Who are some famous Indian authors?",
|
| 151 |
+
"What is the story behind Van Gogh’s Starry Night?",
|
| 152 |
+
"What are the Nobel Prizes in Literature?",
|
| 153 |
+
"What is haiku poetry?",
|
| 154 |
+
"Explain surrealism in art.",
|
| 155 |
+
"What is calligraphy and where is it practiced?"
|
| 156 |
+
];
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
function updateExampleChips() {
|
| 160 |
+
const shuffled = [...examples].sort(() => 0.5 - Math.random());
|
| 161 |
+
const selected = shuffled.slice(0, 3);
|
| 162 |
+
exampleChipsContainer.innerHTML = selected.map(q => `<span class="chip">${q}</span>`).join('');
|
| 163 |
+
|
| 164 |
+
document.querySelectorAll('.chip').forEach(chip => {
|
| 165 |
+
chip.addEventListener('click', () => {
|
| 166 |
+
queryInput.value = chip.textContent;
|
| 167 |
+
queryInput.focus();
|
| 168 |
+
});
|
| 169 |
+
});
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
// ❄️ Snowflake animation
|
| 173 |
+
function createSnowflakes() {
|
| 174 |
+
const snowfall = document.querySelector('.snowfall');
|
| 175 |
+
const count = 50;
|
| 176 |
+
for (let i = 0; i < count; i++) {
|
| 177 |
+
setTimeout(() => {
|
| 178 |
+
const flake = document.createElement('div');
|
| 179 |
+
flake.className = 'snowflake';
|
| 180 |
+
const size = Math.random() * 4 + 2;
|
| 181 |
+
const posX = Math.random() * window.innerWidth;
|
| 182 |
+
const duration = Math.random() * 10 + 5;
|
| 183 |
+
const delay = Math.random() * 5;
|
| 184 |
+
const opacity = Math.random() * 0.5 + 0.3;
|
| 185 |
+
|
| 186 |
+
flake.style.width = `${size}px`;
|
| 187 |
+
flake.style.height = `${size}px`;
|
| 188 |
+
flake.style.left = `${posX}px`;
|
| 189 |
+
flake.style.animationDuration = `${duration}s`;
|
| 190 |
+
flake.style.animationDelay = `${delay}s`;
|
| 191 |
+
flake.style.opacity = opacity;
|
| 192 |
+
|
| 193 |
+
snowfall.appendChild(flake);
|
| 194 |
+
setTimeout(() => flake.remove(), duration * 1000);
|
| 195 |
+
}, i * 300);
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
// 🔍 Query handler
|
| 200 |
+
async function handleQuery() {
|
| 201 |
+
const query = queryInput.value.trim();
|
| 202 |
+
if (!query) return;
|
| 203 |
+
|
| 204 |
+
responseContainer.innerHTML = `
|
| 205 |
+
<div class="loading-state">
|
| 206 |
+
<div class="loader"></div>
|
| 207 |
+
<p class="loading-text">Searching knowledge base...</p>
|
| 208 |
+
</div>
|
| 209 |
+
`;
|
| 210 |
+
|
| 211 |
+
try {
|
| 212 |
+
const res = await fetch(`/query/?query=${encodeURIComponent(query)}`);
|
| 213 |
+
if (!res.ok) throw new Error(`Server returned ${res.status}`);
|
| 214 |
+
const data = await res.json();
|
| 215 |
+
|
| 216 |
+
responseContainer.innerHTML = `
|
| 217 |
+
<div class="answer-container">
|
| 218 |
+
<div class="question-text">
|
| 219 |
+
<i class="fas fa-question-circle"></i> ${data.query}
|
| 220 |
+
</div>
|
| 221 |
+
<div class="answer-text">${formatAnswer(data.response)}</div>
|
| 222 |
+
</div>
|
| 223 |
+
`;
|
| 224 |
+
} catch (err) {
|
| 225 |
+
responseContainer.innerHTML = `
|
| 226 |
+
<div class="answer-container error">
|
| 227 |
+
<div class="question-text">
|
| 228 |
+
<i class="fas fa-exclamation-triangle"></i> Error
|
| 229 |
+
</div>
|
| 230 |
+
<div class="answer-text">
|
| 231 |
+
Failed to get response: ${err.message}
|
| 232 |
+
</div>
|
| 233 |
+
</div>
|
| 234 |
+
`;
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
function formatAnswer(text) {
|
| 239 |
+
if (typeof text !== "string") {
|
| 240 |
+
text = String(text ?? "No response received.");
|
| 241 |
+
}
|
| 242 |
+
return text
|
| 243 |
+
.split('\n')
|
| 244 |
+
.filter(line => line.trim())
|
| 245 |
+
.map(line => `<p>${line}</p>`)
|
| 246 |
+
.join('');
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
// 🔗 Event listeners
|
| 250 |
+
askButton.addEventListener('click', handleQuery);
|
| 251 |
+
queryInput.addEventListener('keypress', e => {
|
| 252 |
+
if (e.key === 'Enter') handleQuery();
|
| 253 |
+
});
|
| 254 |
+
|
| 255 |
+
function applyRandomTheme() {
|
| 256 |
+
const themes = ['theme-blue', 'theme-green', 'theme-orange', 'theme-purple', 'theme-pink', 'theme-teal'];
|
| 257 |
+
const currentTheme = localStorage.getItem('color-theme');
|
| 258 |
+
let selectedTheme = currentTheme;
|
| 259 |
+
|
| 260 |
+
if (!selectedTheme) {
|
| 261 |
+
selectedTheme = themes[Math.floor(Math.random() * themes.length)];
|
| 262 |
+
localStorage.setItem('color-theme', selectedTheme);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
document.body.classList.add(selectedTheme);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
// 🔄 Init everything
|
| 270 |
+
applyRandomTheme();
|
| 271 |
+
initTheme();
|
| 272 |
+
updateExampleChips();
|
| 273 |
+
setInterval(updateExampleChips, 10000); // refresh chips every 10s
|
| 274 |
+
createSnowflakes();
|
| 275 |
+
setInterval(createSnowflakes, 15000); // new flakes every 15s
|
| 276 |
+
});
|
static/styles.css
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--primary: #f23817;
|
| 3 |
+
--primary-dark: #2c3e50;
|
| 4 |
+
--text: #333333;
|
| 5 |
+
--bg: #f9f9f9;
|
| 6 |
+
--card-bg: #ffffff;
|
| 7 |
+
--border: #dddddd;
|
| 8 |
+
--shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
|
| 9 |
+
--header-gradient-start: #1e3c72;
|
| 10 |
+
--header-gradient-end: #2a5298;
|
| 11 |
+
--question-bar: #e8f0fe;
|
| 12 |
+
--explorer-highlight: #f13434;
|
| 13 |
+
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
/* Dark mode overrides */
|
| 17 |
+
body.dark-mode {
|
| 18 |
+
--primary: #66d9ef; /* A softer, pastel blue */
|
| 19 |
+
--text: #f9f9f9; /* Light gray */
|
| 20 |
+
--bg: #2c3e50; /* Darker blue */
|
| 21 |
+
--card-bg: #333333; /* Deep gray */
|
| 22 |
+
--border: #444444; /* Medium gray */
|
| 23 |
+
--shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.3);
|
| 24 |
+
--question-bar: #444444;
|
| 25 |
+
--send-button: #8BC34A;
|
| 26 |
+
--explorer-text: #cccccc;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
/* Color theme variations */
|
| 30 |
+
body.theme-green {
|
| 31 |
+
--primary: #8bc34a;
|
| 32 |
+
--primary-dark: #3e8e41;
|
| 33 |
+
--send-button: #03A9F4;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
body.theme-orange {
|
| 37 |
+
--primary: #ffa07a;
|
| 38 |
+
--primary-dark: #ff9900;
|
| 39 |
+
--send-button: #FF69B4;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
body.theme-purple {
|
| 43 |
+
--primary: #33c4b5;
|
| 44 |
+
--primary-dark: #9ab6b6;
|
| 45 |
+
--send-button: #9C27B0;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
/* Update question bar styles */
|
| 49 |
+
#queryInput {
|
| 50 |
+
background-color: var(--question-bar);
|
| 51 |
+
border: 2px solid var(--border);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
/* Update send button styles */
|
| 55 |
+
.primary-btn {
|
| 56 |
+
background-color: var(--send-button);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.primary-btn:hover {
|
| 60 |
+
background-color: var(--primary-dark);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/* Update RAG Explorer text styles */
|
| 64 |
+
.examples p {
|
| 65 |
+
color: var(--explorer-text);
|
| 66 |
+
}
|
| 67 |
+
/* Base styles */
|
| 68 |
+
* {
|
| 69 |
+
margin: 0;
|
| 70 |
+
padding: 0;
|
| 71 |
+
box-sizing: border-box;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
body {
|
| 75 |
+
font-family: 'Poppins', sans-serif;
|
| 76 |
+
background-color: var(--bg);
|
| 77 |
+
color: var(--text);
|
| 78 |
+
transition: all 0.3s ease;
|
| 79 |
+
min-height: 100vh;
|
| 80 |
+
overflow-x: hidden;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
/* Layout */
|
| 84 |
+
.app-container {
|
| 85 |
+
display: flex;
|
| 86 |
+
justify-content: center;
|
| 87 |
+
align-items: center;
|
| 88 |
+
min-height: 100vh;
|
| 89 |
+
padding: 2rem;
|
| 90 |
+
position: relative;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.main-card {
|
| 94 |
+
background-color: var(--card-bg);
|
| 95 |
+
border-radius: 16px;
|
| 96 |
+
box-shadow: var(--shadow);
|
| 97 |
+
width: 100%;
|
| 98 |
+
max-width: 800px;
|
| 99 |
+
overflow: hidden;
|
| 100 |
+
position: relative;
|
| 101 |
+
z-index: 10;
|
| 102 |
+
transition: transform 0.3s ease, box-shadow 0.3s ease;
|
| 103 |
+
}
|
| 104 |
+
.main-card:hover {
|
| 105 |
+
transform: translateY(-5px);
|
| 106 |
+
box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1);
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
/* Header */
|
| 110 |
+
header {
|
| 111 |
+
padding: 1.5rem 2rem;
|
| 112 |
+
border-bottom: 1px solid var(--border);
|
| 113 |
+
background: linear-gradient(135deg, var(--header-gradient-start), var(--header-gradient-end));
|
| 114 |
+
color: white;
|
| 115 |
+
}
|
| 116 |
+
.header-content {
|
| 117 |
+
display: flex;
|
| 118 |
+
justify-content: space-between;
|
| 119 |
+
align-items: center;
|
| 120 |
+
}
|
| 121 |
+
h1 {
|
| 122 |
+
font-size: 1.75rem;
|
| 123 |
+
font-weight: 600;
|
| 124 |
+
display: flex;
|
| 125 |
+
align-items: center;
|
| 126 |
+
gap: 0.75rem;
|
| 127 |
+
}
|
| 128 |
+
.subtitle {
|
| 129 |
+
font-size: 0.875rem;
|
| 130 |
+
opacity: 0.9;
|
| 131 |
+
margin-top: 0.25rem;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
/* Theme toggle */
|
| 135 |
+
#themeToggle {
|
| 136 |
+
background: transparent;
|
| 137 |
+
border: none;
|
| 138 |
+
color: white;
|
| 139 |
+
font-size: 1.25rem;
|
| 140 |
+
cursor: pointer;
|
| 141 |
+
transition: transform 0.3s ease;
|
| 142 |
+
}
|
| 143 |
+
#themeToggle:hover {
|
| 144 |
+
transform: rotate(25deg);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
/* Search section */
|
| 148 |
+
.search-container {
|
| 149 |
+
padding: 2rem;
|
| 150 |
+
}
|
| 151 |
+
.search-box {
|
| 152 |
+
display: flex;
|
| 153 |
+
gap: 0.75rem;
|
| 154 |
+
margin-bottom: 1.5rem;
|
| 155 |
+
}
|
| 156 |
+
#queryInput {
|
| 157 |
+
flex: 1;
|
| 158 |
+
padding: 0.875rem 1.25rem;
|
| 159 |
+
border: 2px solid var(--border);
|
| 160 |
+
border-radius: 12px;
|
| 161 |
+
font-size: 1rem;
|
| 162 |
+
background-color: var(--card-bg);
|
| 163 |
+
color: var(--text);
|
| 164 |
+
transition: all 0.3s ease;
|
| 165 |
+
}
|
| 166 |
+
#queryInput:focus {
|
| 167 |
+
outline: none;
|
| 168 |
+
border-color: var(--primary);
|
| 169 |
+
box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.2);
|
| 170 |
+
}
|
| 171 |
+
.primary-btn {
|
| 172 |
+
background-color: var(--primary);
|
| 173 |
+
color: white;
|
| 174 |
+
border: none;
|
| 175 |
+
padding: 0 1.75rem;
|
| 176 |
+
border-radius: 12px;
|
| 177 |
+
font-weight: 500;
|
| 178 |
+
cursor: pointer;
|
| 179 |
+
display: flex;
|
| 180 |
+
align-items: center;
|
| 181 |
+
gap: 0.5rem;
|
| 182 |
+
transition: all 0.3s ease;
|
| 183 |
+
}
|
| 184 |
+
.primary-btn:hover {
|
| 185 |
+
background-color: var(--primary-dark);
|
| 186 |
+
transform: translateY(-2px);
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
/* Examples */
|
| 190 |
+
.examples p {
|
| 191 |
+
font-size: 0.875rem;
|
| 192 |
+
color: var(--text);
|
| 193 |
+
opacity: 0.7;
|
| 194 |
+
margin-bottom: 0.5rem;
|
| 195 |
+
}
|
| 196 |
+
.example-chips {
|
| 197 |
+
display: flex;
|
| 198 |
+
gap: 0.5rem;
|
| 199 |
+
flex-wrap: wrap;
|
| 200 |
+
}
|
| 201 |
+
.chip {
|
| 202 |
+
background-color: var(--border);
|
| 203 |
+
padding: 0.5rem 1rem;
|
| 204 |
+
border-radius: 999px;
|
| 205 |
+
font-size: 0.875rem;
|
| 206 |
+
cursor: pointer;
|
| 207 |
+
transition: all 0.2s ease;
|
| 208 |
+
}
|
| 209 |
+
.chip:hover {
|
| 210 |
+
background-color: var(--primary);
|
| 211 |
+
color: white;
|
| 212 |
+
transform: translateY(-1px);
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
/* Response section */
|
| 216 |
+
.response-container {
|
| 217 |
+
padding: 0 2rem 2rem;
|
| 218 |
+
min-height: 300px;
|
| 219 |
+
}
|
| 220 |
+
.welcome-message {
|
| 221 |
+
text-align: center;
|
| 222 |
+
padding: 2rem 0;
|
| 223 |
+
}
|
| 224 |
+
.welcome-image {
|
| 225 |
+
width: 150px;
|
| 226 |
+
height: auto;
|
| 227 |
+
margin-bottom: 1rem;
|
| 228 |
+
opacity: 0.9;
|
| 229 |
+
}
|
| 230 |
+
.welcome-message h2 {
|
| 231 |
+
font-size: 1.5rem;
|
| 232 |
+
margin-bottom: 0.5rem;
|
| 233 |
+
color: var(--explorer-highlight);
|
| 234 |
+
}
|
| 235 |
+
.welcome-message p {
|
| 236 |
+
color: var(--text);
|
| 237 |
+
opacity: 0.8;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
/* Answer block */
|
| 241 |
+
.answer-container {
|
| 242 |
+
background-color: var(--bg);
|
| 243 |
+
border-radius: 12px;
|
| 244 |
+
padding: 1.5rem;
|
| 245 |
+
margin-top: 1rem;
|
| 246 |
+
border: 1px solid var(--border);
|
| 247 |
+
animation: fadeIn 0.5s ease;
|
| 248 |
+
}
|
| 249 |
+
.question-text {
|
| 250 |
+
font-weight: 600;
|
| 251 |
+
margin-bottom: 1rem;
|
| 252 |
+
color: var(--primary);
|
| 253 |
+
display: flex;
|
| 254 |
+
align-items: center;
|
| 255 |
+
gap: 0.5rem;
|
| 256 |
+
}
|
| 257 |
+
.answer-text p {
|
| 258 |
+
line-height: 1.6;
|
| 259 |
+
margin-bottom: 0.75rem;
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
/* Loading state */
|
| 263 |
+
.loading-state {
|
| 264 |
+
display: flex;
|
| 265 |
+
flex-direction: column;
|
| 266 |
+
align-items: center;
|
| 267 |
+
padding: 2rem 0;
|
| 268 |
+
}
|
| 269 |
+
.loader {
|
| 270 |
+
border: 4px solid var(--border);
|
| 271 |
+
border-top: 4px solid var(--primary);
|
| 272 |
+
border-radius: 50%;
|
| 273 |
+
width: 40px;
|
| 274 |
+
height: 40px;
|
| 275 |
+
animation: spin 1s linear infinite;
|
| 276 |
+
margin-bottom: 1rem;
|
| 277 |
+
}
|
| 278 |
+
.loading-text {
|
| 279 |
+
font-size: 0.875rem;
|
| 280 |
+
color: var(--text);
|
| 281 |
+
opacity: 0.7;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
/* Footer */
|
| 285 |
+
footer {
|
| 286 |
+
padding: 1.5rem;
|
| 287 |
+
text-align: center;
|
| 288 |
+
font-size: 0.875rem;
|
| 289 |
+
color: var(--text);
|
| 290 |
+
opacity: 0.7;
|
| 291 |
+
border-top: 1px solid var(--border);
|
| 292 |
+
}
|
| 293 |
+
.footer-links {
|
| 294 |
+
display: flex;
|
| 295 |
+
justify-content: center;
|
| 296 |
+
gap: 1rem;
|
| 297 |
+
margin-top: 0.5rem;
|
| 298 |
+
}
|
| 299 |
+
.footer-link {
|
| 300 |
+
color: var(--primary);
|
| 301 |
+
text-decoration: none;
|
| 302 |
+
display: flex;
|
| 303 |
+
align-items: center;
|
| 304 |
+
gap: 0.25rem;
|
| 305 |
+
transition: all 0.2s ease;
|
| 306 |
+
}
|
| 307 |
+
.footer-link:hover {
|
| 308 |
+
color: var(--primary-dark);
|
| 309 |
+
transform: translateY(-1px);
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
/* Snowfall effect */
|
| 313 |
+
.snowfall {
|
| 314 |
+
position: fixed;
|
| 315 |
+
top: 0;
|
| 316 |
+
left: 0;
|
| 317 |
+
width: 100%;
|
| 318 |
+
height: 100%;
|
| 319 |
+
pointer-events: none;
|
| 320 |
+
z-index: 1;
|
| 321 |
+
background: linear-gradient(to bottom, transparent 95%, var(--bg) 100%);
|
| 322 |
+
}
|
| 323 |
+
.snowflake {
|
| 324 |
+
position: absolute;
|
| 325 |
+
background: white;
|
| 326 |
+
border-radius: 50%;
|
| 327 |
+
opacity: 0.8;
|
| 328 |
+
animation: fall linear infinite;
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
/* Animations */
|
| 332 |
+
@keyframes spin {
|
| 333 |
+
to { transform: rotate(360deg); }
|
| 334 |
+
}
|
| 335 |
+
@keyframes fadeIn {
|
| 336 |
+
from { opacity: 0; transform: translateY(10px); }
|
| 337 |
+
to { opacity: 1; transform: translateY(0); }
|
| 338 |
+
}
|
| 339 |
+
@keyframes fall {
|
| 340 |
+
to { transform: translateY(100vh); }
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
/* Responsive */
|
| 344 |
+
@media (max-width: 768px) {
|
| 345 |
+
.app-container {
|
| 346 |
+
padding: 1rem;
|
| 347 |
+
}
|
| 348 |
+
.search-box {
|
| 349 |
+
flex-direction: column;
|
| 350 |
+
}
|
| 351 |
+
.primary-btn {
|
| 352 |
+
width: 100%;
|
| 353 |
+
justify-content: center;
|
| 354 |
+
padding: 0.875rem;
|
| 355 |
+
}
|
| 356 |
+
.header-content {
|
| 357 |
+
flex-direction: column;
|
| 358 |
+
align-items: flex-start;
|
| 359 |
+
gap: 1rem;
|
| 360 |
+
}
|
| 361 |
+
#themeToggle {
|
| 362 |
+
align-self: flex-end;
|
| 363 |
+
}
|
| 364 |
+
}
|
templates/index.html
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<meta name="theme-color" content="#3b82f6" />
|
| 7 |
+
<title>RAG Explorer | Intelligent Knowledge Assistant</title>
|
| 8 |
+
|
| 9 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" />
|
| 10 |
+
<link rel="stylesheet" href="/static/styles.css" />
|
| 11 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 12 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 13 |
+
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap" rel="stylesheet" />
|
| 14 |
+
</head>
|
| 15 |
+
<body>
|
| 16 |
+
<div class="app-container">
|
| 17 |
+
<!-- Snow Background -->
|
| 18 |
+
<div class="snowfall"></div>
|
| 19 |
+
|
| 20 |
+
<!-- Main Card -->
|
| 21 |
+
<div class="main-card">
|
| 22 |
+
<!-- Header -->
|
| 23 |
+
<header>
|
| 24 |
+
<div class="header-content">
|
| 25 |
+
<div>
|
| 26 |
+
<h1></i> NeoBot</h1>
|
| 27 |
+
<p class="subtitle">Powered by RAG (Retrieval-Augmented Generation)</p>
|
| 28 |
+
</div>
|
| 29 |
+
<button id="themeToggle" aria-label="Toggle dark mode">
|
| 30 |
+
<i class="fas fa-moon"></i>
|
| 31 |
+
</button>
|
| 32 |
+
</div>
|
| 33 |
+
</header>
|
| 34 |
+
|
| 35 |
+
<!-- Main Content -->
|
| 36 |
+
<main>
|
| 37 |
+
<div class="search-container">
|
| 38 |
+
<div class="search-box">
|
| 39 |
+
<input
|
| 40 |
+
type="text"
|
| 41 |
+
id="queryInput"
|
| 42 |
+
placeholder="Ask your question here..."
|
| 43 |
+
autocomplete="off"
|
| 44 |
+
aria-label="Question input"
|
| 45 |
+
/>
|
| 46 |
+
<button id="askButton" class="primary-btn">
|
| 47 |
+
<i class="fas fa-search"></i> Ask
|
| 48 |
+
</button>
|
| 49 |
+
</div>
|
| 50 |
+
|
| 51 |
+
<div class="examples">
|
| 52 |
+
<p>Try asking:</p>
|
| 53 |
+
<div class="example-chips" id="exampleChips">
|
| 54 |
+
<!-- Questions will be dynamically added here -->
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<!-- Answer Response -->
|
| 59 |
+
<div class="response-container" id="responseContainer">
|
| 60 |
+
<div class="welcome-message">
|
| 61 |
+
<h2>Welcome to RAG Explorer!</h2>
|
| 62 |
+
<p>Ask any question to get accurate, document-backed answers instantly.</p>
|
| 63 |
+
</div>
|
| 64 |
+
</div>
|
| 65 |
+
</main>
|
| 66 |
+
|
| 67 |
+
<!-- Footer -->
|
| 68 |
+
<footer>
|
| 69 |
+
<p>ChatBot</p>
|
| 70 |
+
<div class="footer-links">
|
| 71 |
+
<a href="#" class="footer-link"><i class="fab fa-github"></i> Source</a>
|
| 72 |
+
<a href="#" class="footer-link"><i class="fas fa-info-circle"></i> About</a>
|
| 73 |
+
</div>
|
| 74 |
+
</footer>
|
| 75 |
+
</div>
|
| 76 |
+
</div>
|
| 77 |
+
|
| 78 |
+
<script src="/static/script.js"></script>
|
| 79 |
+
</body>
|
| 80 |
+
</html>
|
| 81 |
+
|
vector_rag.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 2 |
+
from langchain_community.vectorstores import FAISS
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
| 5 |
+
from langchain_community.llms import HuggingFacePipeline
|
| 6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 7 |
+
import os
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
loader = PyPDFLoader("data/sample.pdf")
|
| 13 |
+
documents = loader.load()
|
| 14 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 15 |
+
chunks = text_splitter.split_documents(documents)
|
| 16 |
+
|
| 17 |
+
if not chunks:
|
| 18 |
+
raise ValueError("No document chunks found. Ensure 'sample.pdf' exists and is readable.")
|
| 19 |
+
|
| 20 |
+
# Embed & store (HuggingFace Embeddings are free and fast)
|
| 21 |
+
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
|
| 22 |
+
vectorstore = FAISS.from_documents(chunks, embeddings)
|
| 23 |
+
|
| 24 |
+
retriever = vectorstore.as_retriever()
|
| 25 |
+
|
| 26 |
+
# 1. NEW MODEL NAME
|
| 27 |
+
model_name = "Qwen/Qwen2-1.5B-Instruct"
|
| 28 |
+
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 30 |
+
|
| 31 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
|
| 32 |
+
|
| 33 |
+
# 2. Use the pipeline for text generation
|
| 34 |
+
llm_pipeline = pipeline(
|
| 35 |
+
"text-generation",
|
| 36 |
+
model=model,
|
| 37 |
+
tokenizer=tokenizer,
|
| 38 |
+
max_new_tokens=512,
|
| 39 |
+
do_sample=True,
|
| 40 |
+
temperature=0.7,
|
| 41 |
+
top_p=0.9,
|
| 42 |
+
)
|
| 43 |
+
llm = HuggingFacePipeline(pipeline=llm_pipeline)
|
| 44 |
+
|
| 45 |
+
def query_vector_store(query: str) -> str:
|
| 46 |
+
docs = retriever.get_relevant_documents(query)
|
| 47 |
+
if docs:
|
| 48 |
+
context = "\n\n".join([doc.page_content for doc in docs])
|
| 49 |
+
prompt = f"""Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}\nAnswer:"""
|
| 50 |
+
|
| 51 |
+
raw_output = llm.predict(prompt)
|
| 52 |
+
answer = raw_output.replace(prompt, "").strip()
|
| 53 |
+
return answer
|
| 54 |
+
return None
|