Spaces:
Sleeping
Sleeping
first working version
Browse files- .gitignore +93 -0
- DATABASE_README.md +129 -0
- SUPABASE_SETUP.md +157 -0
- agent.py +220 -0
- app.py +25 -8
- prompts/__init__.py +16 -0
- prompts/math.py +39 -0
- prompts/orchestrator.py +60 -0
- prompts/research.py +31 -0
- prompts/retriever.py +30 -0
- requirements.txt +15 -5
- test.py +0 -0
- test_database.py +83 -0
- test_routing.py +55 -0
- test_single.py +29 -0
- tools/__init__.py +32 -0
- tools/database_tools.py +273 -0
- tools/file_tools.py +71 -0
- tools/math_tools.py +88 -0
- tools/research_tools.py +54 -0
- utils/supbase_fill.py +88 -0
.gitignore
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment variables and secrets
|
| 2 |
+
.env
|
| 3 |
+
.env.local
|
| 4 |
+
.env.development
|
| 5 |
+
.env.test
|
| 6 |
+
.env.production
|
| 7 |
+
|
| 8 |
+
# Python artifacts
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.py[cod]
|
| 11 |
+
*$py.class
|
| 12 |
+
*.so
|
| 13 |
+
.Python
|
| 14 |
+
build/
|
| 15 |
+
develop-eggs/
|
| 16 |
+
dist/
|
| 17 |
+
downloads/
|
| 18 |
+
eggs/
|
| 19 |
+
.eggs/
|
| 20 |
+
lib/
|
| 21 |
+
lib64/
|
| 22 |
+
parts/
|
| 23 |
+
sdist/
|
| 24 |
+
var/
|
| 25 |
+
wheels/
|
| 26 |
+
*.egg-info/
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
MANIFEST
|
| 30 |
+
|
| 31 |
+
# Virtual environments
|
| 32 |
+
.venv/
|
| 33 |
+
venv/
|
| 34 |
+
env/
|
| 35 |
+
ENV/
|
| 36 |
+
env.bak/
|
| 37 |
+
venv.bak/
|
| 38 |
+
|
| 39 |
+
# IDE and editor files
|
| 40 |
+
.vscode/
|
| 41 |
+
.idea/
|
| 42 |
+
*.swp
|
| 43 |
+
*.swo
|
| 44 |
+
*~
|
| 45 |
+
.DS_Store
|
| 46 |
+
|
| 47 |
+
# Jupyter Notebook checkpoints
|
| 48 |
+
.ipynb_checkpoints
|
| 49 |
+
|
| 50 |
+
# Pytest files
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
.coverage
|
| 53 |
+
htmlcov/
|
| 54 |
+
|
| 55 |
+
# Database files (if downloading local copies)
|
| 56 |
+
*.db
|
| 57 |
+
*.sqlite3
|
| 58 |
+
|
| 59 |
+
# Logs
|
| 60 |
+
*.log
|
| 61 |
+
logs/
|
| 62 |
+
|
| 63 |
+
# Temporary files
|
| 64 |
+
tmp/
|
| 65 |
+
temp/
|
| 66 |
+
*.tmp
|
| 67 |
+
|
| 68 |
+
# AI model cache (if downloading models locally)
|
| 69 |
+
models/
|
| 70 |
+
.cache/
|
| 71 |
+
.transformers_cache/
|
| 72 |
+
|
| 73 |
+
# Data files (if containing sensitive information)
|
| 74 |
+
data/
|
| 75 |
+
*.csv
|
| 76 |
+
*.xlsx
|
| 77 |
+
*.json
|
| 78 |
+
# Keep specific test files
|
| 79 |
+
!test_*.csv
|
| 80 |
+
!test_*.xlsx
|
| 81 |
+
!test_*.json
|
| 82 |
+
|
| 83 |
+
# Audio/video test files (can be large)
|
| 84 |
+
*.wav
|
| 85 |
+
*.mp3
|
| 86 |
+
*.mp4
|
| 87 |
+
*.avi
|
| 88 |
+
*.mov
|
| 89 |
+
|
| 90 |
+
# API keys or config files with sensitive data
|
| 91 |
+
config.yaml
|
| 92 |
+
config.yml
|
| 93 |
+
secrets.json
|
DATABASE_README.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GAIA Agent with Database Search Integration
|
| 2 |
+
|
| 3 |
+
This enhanced GAIA agent system includes semantic search against your Supabase database to find similar questions before processing new ones, improving both accuracy and efficiency.
|
| 4 |
+
|
| 5 |
+
## 🏗️ Architecture
|
| 6 |
+
|
| 7 |
+
### Multi-Agent System
|
| 8 |
+
- **Orchestrator Agent**: Routes questions and coordinates responses
|
| 9 |
+
- **Retriever Agent**: Handles file processing, data extraction
|
| 10 |
+
- **Research Agent**: Web search and fact verification
|
| 11 |
+
- **Math Agent**: Mathematical calculations and analysis
|
| 12 |
+
|
| 13 |
+
### Database Integration
|
| 14 |
+
- **Semantic Search**: Finds similar questions using OpenAI embeddings
|
| 15 |
+
- **Exact Match Detection**: Returns answers for highly similar questions (>95% similarity)
|
| 16 |
+
- **Context Enhancement**: Uses similar questions as context for new processing
|
| 17 |
+
|
| 18 |
+
## 📁 Project Structure
|
| 19 |
+
|
| 20 |
+
```
|
| 21 |
+
agents-course-v2/
|
| 22 |
+
├── prompts/ # Agent-specific prompts
|
| 23 |
+
│ ├── orchestrator.py # Routing and coordination
|
| 24 |
+
│ ├── retriever.py # File processing
|
| 25 |
+
│ ├── research.py # Web search
|
| 26 |
+
│ └── math.py # Mathematical calculations
|
| 27 |
+
├── tools/ # Specialized tools
|
| 28 |
+
│ ├── database_tools.py # Supabase similarity search
|
| 29 |
+
│ ├── file_tools.py # Excel, CSV, audio processing
|
| 30 |
+
│ ├── research_tools.py # Web search, fact checking
|
| 31 |
+
│ └── math_tools.py # Calculations, statistics
|
| 32 |
+
├── agent.py # Main agent implementation
|
| 33 |
+
├── test_database.py # Database integration tests
|
| 34 |
+
└── app.py # Gradio interface
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## 🚀 How It Works
|
| 38 |
+
|
| 39 |
+
### 1. Database-First Approach
|
| 40 |
+
```python
|
| 41 |
+
# For each incoming question:
|
| 42 |
+
1. Search database for similar questions (similarity > 0.75)
|
| 43 |
+
2. If highly similar (>0.95): Return exact answer
|
| 44 |
+
3. If moderately similar (>0.75): Use as context
|
| 45 |
+
4. Otherwise: Process with specialized agents
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
### 2. Example Database Entries
|
| 49 |
+
Your database contains 165 GAIA Q&A pairs like:
|
| 50 |
+
```json
|
| 51 |
+
{
|
| 52 |
+
"question": "A paper about AI regulation submitted to arXiv.org in June 2022...",
|
| 53 |
+
"answer": "egalitarian",
|
| 54 |
+
"similarity": 0.943
|
| 55 |
+
}
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
### 3. Similarity Matching
|
| 59 |
+
The system uses:
|
| 60 |
+
- **OpenAI text-embedding-3-small** for vector generation
|
| 61 |
+
- **Cosine similarity** for question matching
|
| 62 |
+
- **Configurable thresholds** for exact vs. contextual matches
|
| 63 |
+
|
| 64 |
+
## 🛠️ Setup
|
| 65 |
+
|
| 66 |
+
### 1. Environment Variables
|
| 67 |
+
Add to your `.env` file:
|
| 68 |
+
```env
|
| 69 |
+
OPENAI_API_KEY=your_openai_key
|
| 70 |
+
SUPABASE_URL=your_supabase_url
|
| 71 |
+
SUPABASE_SERVICE_KEY=your_SUPABASE_SERVICE_KEY
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
### 2. Install Dependencies
|
| 75 |
+
```bash
|
| 76 |
+
pip install -r requirements.txt
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
### 3. Test Database Integration
|
| 80 |
+
```bash
|
| 81 |
+
python test_database.py
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
## 🎯 GAIA Optimization Strategy
|
| 85 |
+
|
| 86 |
+
### Response Format Compliance
|
| 87 |
+
- **Exact answers only** - no explanations
|
| 88 |
+
- **Proper formatting** - USD as 12.34, lists comma-separated
|
| 89 |
+
- **No XML tags** or "FINAL ANSWER:" prefixes
|
| 90 |
+
|
| 91 |
+
### Efficiency Gains
|
| 92 |
+
- **Skip processing** for exact matches (saves API calls)
|
| 93 |
+
- **Better context** from similar questions improves accuracy
|
| 94 |
+
- **Targeted routing** based on question similarity patterns
|
| 95 |
+
|
| 96 |
+
### Expected Benefits
|
| 97 |
+
- **Improved accuracy** from learning similar question patterns
|
| 98 |
+
- **Faster responses** when exact matches found
|
| 99 |
+
- **Better resource usage** by avoiding redundant processing
|
| 100 |
+
|
| 101 |
+
## 📊 Usage Examples
|
| 102 |
+
|
| 103 |
+
### Direct Database Search
|
| 104 |
+
```python
|
| 105 |
+
from tools.database_tools import retriever
|
| 106 |
+
|
| 107 |
+
similar = retriever.search_similar_questions(
|
| 108 |
+
"What fish from Finding Nemo became invasive?",
|
| 109 |
+
top_k=3,
|
| 110 |
+
similarity_threshold=0.8
|
| 111 |
+
)
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
### Full Agent Processing
|
| 115 |
+
```python
|
| 116 |
+
from agent import answer_gaia_question
|
| 117 |
+
|
| 118 |
+
answer = answer_gaia_question(
|
| 119 |
+
"Calculate the statistical significance error rate for Nature 2020 papers"
|
| 120 |
+
)
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
## 🏆 GAIA Benchmark Target
|
| 124 |
+
|
| 125 |
+
- **Goal**: 30% accuracy on Level 1 questions
|
| 126 |
+
- **Strategy**: Database-enhanced agent coordination
|
| 127 |
+
- **Focus**: Exact answer formatting and efficient tool usage
|
| 128 |
+
|
| 129 |
+
This system leverages your existing 165 GAIA Q&A pairs to bootstrap better performance on new questions, making your agent more competitive on the leaderboard!
|
SUPABASE_SETUP.md
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Supabase Setup for Optimal GAIA Agent Performance
|
| 2 |
+
|
| 3 |
+
## Required Supabase Configuration
|
| 4 |
+
|
| 5 |
+
### 1. Create the `match_documents_langchain` Function
|
| 6 |
+
|
| 7 |
+
This SQL function enables efficient vector similarity search:
|
| 8 |
+
|
| 9 |
+
```sql
|
| 10 |
+
-- Create the similarity search function for LangChain integration
|
| 11 |
+
create or replace function match_documents_langchain (
|
| 12 |
+
query_embedding vector(1536), -- Adjust dimension based on your embedding model
|
| 13 |
+
match_threshold float default 0.75,
|
| 14 |
+
match_count int default 3
|
| 15 |
+
)
|
| 16 |
+
returns table (
|
| 17 |
+
id uuid,
|
| 18 |
+
page_content text,
|
| 19 |
+
embedding vector,
|
| 20 |
+
metadata jsonb,
|
| 21 |
+
similarity float
|
| 22 |
+
)
|
| 23 |
+
language plpgsql
|
| 24 |
+
as $$
|
| 25 |
+
begin
|
| 26 |
+
return query
|
| 27 |
+
select
|
| 28 |
+
documents.id,
|
| 29 |
+
documents.page_content,
|
| 30 |
+
documents.embedding,
|
| 31 |
+
documents.metadata,
|
| 32 |
+
1 - (documents.embedding <=> query_embedding) as similarity
|
| 33 |
+
from documents
|
| 34 |
+
where 1 - (documents.embedding <=> query_embedding) > match_threshold
|
| 35 |
+
order by documents.embedding <=> query_embedding
|
| 36 |
+
limit match_count;
|
| 37 |
+
end;
|
| 38 |
+
$$;
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### 2. Alternative for HuggingFace Embeddings (384 dimensions)
|
| 42 |
+
|
| 43 |
+
If using `sentence-transformers/all-mpnet-base-v2`:
|
| 44 |
+
|
| 45 |
+
```sql
|
| 46 |
+
-- For HuggingFace embeddings (384 dimensions)
|
| 47 |
+
create or replace function match_documents_langchain_hf (
|
| 48 |
+
query_embedding vector(384),
|
| 49 |
+
match_threshold float default 0.75,
|
| 50 |
+
match_count int default 3
|
| 51 |
+
)
|
| 52 |
+
returns table (
|
| 53 |
+
id uuid,
|
| 54 |
+
page_content text,
|
| 55 |
+
embedding vector,
|
| 56 |
+
metadata jsonb,
|
| 57 |
+
similarity float
|
| 58 |
+
)
|
| 59 |
+
language plpgsql
|
| 60 |
+
as $$
|
| 61 |
+
begin
|
| 62 |
+
return query
|
| 63 |
+
select
|
| 64 |
+
documents.id,
|
| 65 |
+
documents.page_content,
|
| 66 |
+
documents.embedding,
|
| 67 |
+
documents.metadata,
|
| 68 |
+
1 - (documents.embedding <=> query_embedding) as similarity
|
| 69 |
+
from documents
|
| 70 |
+
where 1 - (documents.embedding <=> query_embedding) > match_threshold
|
| 71 |
+
order by documents.embedding <=> query_embedding
|
| 72 |
+
limit match_count;
|
| 73 |
+
end;
|
| 74 |
+
$$;
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### 3. Update Your Database Table Structure
|
| 78 |
+
|
| 79 |
+
Ensure your `documents` table has the right structure:
|
| 80 |
+
|
| 81 |
+
```sql
|
| 82 |
+
-- Check/create the documents table structure
|
| 83 |
+
CREATE TABLE IF NOT EXISTS documents (
|
| 84 |
+
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
| 85 |
+
page_content TEXT NOT NULL,
|
| 86 |
+
embedding VECTOR(1536), -- Or 384 for HuggingFace
|
| 87 |
+
metadata JSONB DEFAULT '{}',
|
| 88 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT TIMEZONE('utc'::text, NOW())
|
| 89 |
+
);
|
| 90 |
+
|
| 91 |
+
-- Create index for fast similarity search
|
| 92 |
+
CREATE INDEX IF NOT EXISTS documents_embedding_idx
|
| 93 |
+
ON documents USING ivfflat (embedding vector_cosine_ops)
|
| 94 |
+
WITH (lists = 100);
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
### 4. Environment Variables
|
| 98 |
+
|
| 99 |
+
Update your `.env` file:
|
| 100 |
+
|
| 101 |
+
```env
|
| 102 |
+
# Required for both approaches
|
| 103 |
+
SUPABASE_URL=your_supabase_project_url
|
| 104 |
+
SUPABASE_SERVICE_KEY=your_SUPABASE_SERVICE_KEY
|
| 105 |
+
# Alternative key name (some setups use this)
|
| 106 |
+
SUPABASE_KEY=your_SUPABASE_SERVICE_KEY
|
| 107 |
+
|
| 108 |
+
# Optional: For OpenAI fallback
|
| 109 |
+
OPENAI_API_KEY=your_openai_api_key
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## Performance Comparison
|
| 113 |
+
|
| 114 |
+
### HuggingFace Approach (Recommended)
|
| 115 |
+
✅ **Free embedding model**
|
| 116 |
+
✅ **Often better semantic understanding**
|
| 117 |
+
✅ **384-dimensional vectors (smaller storage)**
|
| 118 |
+
✅ **No API rate limits**
|
| 119 |
+
|
| 120 |
+
### OpenAI Approach (Fallback)
|
| 121 |
+
✅ **Very reliable and consistent**
|
| 122 |
+
✅ **1536-dimensional vectors (more detailed)**
|
| 123 |
+
❌ **Costs money per embedding**
|
| 124 |
+
❌ **API rate limits**
|
| 125 |
+
|
| 126 |
+
## Testing Your Setup
|
| 127 |
+
|
| 128 |
+
1. **Test the function exists:**
|
| 129 |
+
```sql
|
| 130 |
+
SELECT * FROM match_documents_langchain(
|
| 131 |
+
'[0.1, 0.2, ...]'::vector, -- Sample embedding
|
| 132 |
+
0.7, -- Threshold
|
| 133 |
+
5 -- Count
|
| 134 |
+
);
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
2. **Test with Python:**
|
| 138 |
+
```python
|
| 139 |
+
from tools.database_tools import retriever
|
| 140 |
+
|
| 141 |
+
# Test efficient search
|
| 142 |
+
results = retriever.search_similar_questions_efficient(
|
| 143 |
+
"What is the capital of France?",
|
| 144 |
+
top_k=3
|
| 145 |
+
)
|
| 146 |
+
print(results)
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
## Migration from Manual to Efficient Search
|
| 150 |
+
|
| 151 |
+
If you're currently using manual similarity search, the new hybrid approach will:
|
| 152 |
+
|
| 153 |
+
1. **Try efficient LangChain search first**
|
| 154 |
+
2. **Fall back to manual search if needed**
|
| 155 |
+
3. **Automatically detect which approach works**
|
| 156 |
+
|
| 157 |
+
This ensures compatibility while optimizing for performance!
|
agent.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
from typing import List, TypedDict, Annotated, Optional
|
| 3 |
+
from langchain_openai import ChatOpenAI
|
| 4 |
+
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 5 |
+
from langgraph.graph.message import add_messages
|
| 6 |
+
from langgraph.graph import START, StateGraph, MessagesState, END
|
| 7 |
+
from langgraph.prebuilt import ToolNode, tools_condition
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from prompts import ORCHESTRATOR_SYSTEM_PROMPT, RETRIEVER_SYSTEM_PROMPT, RESEARCH_SYSTEM_PROMPT, MATH_SYSTEM_PROMPT
|
| 10 |
+
from tools import DATABASE_TOOLS, FILE_TOOLS, RESEARCH_TOOLS, MATH_TOOLS, ALL_TOOLS
|
| 11 |
+
import gradio as gr
|
| 12 |
+
import os
|
| 13 |
+
import requests
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import json
|
| 16 |
+
import time
|
| 17 |
+
import sys
|
| 18 |
+
import traceback
|
| 19 |
+
|
| 20 |
+
# Load environment variables from .env file
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
# Fix tokenizer parallelism warning
|
| 24 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 25 |
+
|
| 26 |
+
# TODO: check if any tools is missing on tools folder (arxiv, youtube, wikipedia, etc.)
|
| 27 |
+
|
| 28 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 29 |
+
# AGENT & GRAPH SETUP
|
| 30 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 31 |
+
|
| 32 |
+
# Initialize the LLM
|
| 33 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 39 |
+
# SIMPLE AGENT SETUP (following course pattern)
|
| 40 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 41 |
+
|
| 42 |
+
# Build simple agent graph - no complex routing needed
|
| 43 |
+
builder = StateGraph(MessagesState)
|
| 44 |
+
|
| 45 |
+
# Single agent node that handles everything
|
| 46 |
+
def gaia_agent(state: MessagesState):
|
| 47 |
+
"""
|
| 48 |
+
Single agent that handles all GAIA questions with access to all tools.
|
| 49 |
+
Lets the LLM naturally decide which tools to use.
|
| 50 |
+
"""
|
| 51 |
+
messages = state["messages"]
|
| 52 |
+
|
| 53 |
+
# Create agent with all tools available
|
| 54 |
+
agent_llm = llm.bind_tools(ALL_TOOLS)
|
| 55 |
+
|
| 56 |
+
# Add system message optimized for GAIA
|
| 57 |
+
system_message = SystemMessage(content="""
|
| 58 |
+
You are a precise QA agent specialized in answering GAIA benchmark questions.
|
| 59 |
+
|
| 60 |
+
CRITICAL RESPONSE RULES:
|
| 61 |
+
- Answer with ONLY the exact answer, no explanations or conversational text
|
| 62 |
+
- NO XML tags, NO "FINAL ANSWER:", NO introductory phrases
|
| 63 |
+
- For lists: comma-separated, alphabetized if requested, no trailing punctuation
|
| 64 |
+
- For numbers: use exact format requested (USD as 12.34, codes bare, etc.)
|
| 65 |
+
- For yes/no: respond only "Yes" or "No"
|
| 66 |
+
|
| 67 |
+
AVAILABLE TOOLS:
|
| 68 |
+
- Database search tools: Use to find similar questions in the knowledge base
|
| 69 |
+
- File processing tools: Use for Excel, CSV, audio, video, image analysis
|
| 70 |
+
- Research tools: Use for web search and current information
|
| 71 |
+
- Math tools: Use for calculations and numerical analysis
|
| 72 |
+
|
| 73 |
+
WORKFLOW:
|
| 74 |
+
1. First try database search tools to find similar questions
|
| 75 |
+
2. If database returns "NO_EXACT_MATCH", continue with other appropriate tools
|
| 76 |
+
3. Use research tools for web search if needed
|
| 77 |
+
4. Use math tools for calculations if needed
|
| 78 |
+
5. Always provide the exact final answer, never return internal tool messages
|
| 79 |
+
|
| 80 |
+
IMPORTANT: Never return tool result messages like "NO_EXACT_MATCH" as your final answer.
|
| 81 |
+
Always process the question and provide the actual answer.
|
| 82 |
+
|
| 83 |
+
Your goal is to provide exact answers that match GAIA ground truth precisely.
|
| 84 |
+
""".strip())
|
| 85 |
+
|
| 86 |
+
messages_with_system = [system_message] + messages
|
| 87 |
+
|
| 88 |
+
# Process the message
|
| 89 |
+
response = agent_llm.invoke(messages_with_system)
|
| 90 |
+
|
| 91 |
+
return {"messages": [response]}
|
| 92 |
+
|
| 93 |
+
# Simple routing: tools or end
|
| 94 |
+
def should_continue(state: MessagesState):
|
| 95 |
+
"""Simple routing: use tools if requested, otherwise end."""
|
| 96 |
+
last_message = state["messages"][-1]
|
| 97 |
+
|
| 98 |
+
# If agent wants to use tools, go to tools
|
| 99 |
+
if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
|
| 100 |
+
return "tools"
|
| 101 |
+
|
| 102 |
+
# Otherwise, we're done
|
| 103 |
+
return END
|
| 104 |
+
|
| 105 |
+
# Add nodes
|
| 106 |
+
builder.add_node("agent", gaia_agent)
|
| 107 |
+
builder.add_node("tools", ToolNode(ALL_TOOLS))
|
| 108 |
+
|
| 109 |
+
# Add edges - much simpler!
|
| 110 |
+
builder.add_edge(START, "agent")
|
| 111 |
+
builder.add_conditional_edges("agent", should_continue)
|
| 112 |
+
builder.add_edge("tools", "agent") # Return to agent after using tools
|
| 113 |
+
|
| 114 |
+
# Add
|
| 115 |
+
graph = builder.compile()
|
| 116 |
+
|
| 117 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 118 |
+
# GAIA API INTERACTION FUNCTIONS
|
| 119 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 120 |
+
def get_gaia_questions():
|
| 121 |
+
"""Fetch questions from the GAIA API."""
|
| 122 |
+
try:
|
| 123 |
+
response = requests.get("https://agents-course-unit4-scoring.hf.space/questions")
|
| 124 |
+
response.raise_for_status()
|
| 125 |
+
return response.json()
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"Error fetching GAIA questions: {e}")
|
| 128 |
+
return []
|
| 129 |
+
|
| 130 |
+
def get_random_gaia_question():
|
| 131 |
+
"""Fetch a single random question from the GAIA API."""
|
| 132 |
+
try:
|
| 133 |
+
response = requests.get("https://agents-course-unit4-scoring.hf.space/random-question")
|
| 134 |
+
response.raise_for_status()
|
| 135 |
+
return response.json()
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"Error fetching random GAIA question: {e}")
|
| 138 |
+
return None
|
| 139 |
+
|
| 140 |
+
def answer_gaia_question(question_text: str, debug: bool = False) -> str:
|
| 141 |
+
"""Answer a single GAIA question using the simple agent."""
|
| 142 |
+
try:
|
| 143 |
+
# Create the initial state
|
| 144 |
+
initial_state = {
|
| 145 |
+
"messages": [HumanMessage(content=question_text)]
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
if debug:
|
| 149 |
+
print(f"🔍 Processing question: {question_text}")
|
| 150 |
+
|
| 151 |
+
# Invoke the graph - much simpler now!
|
| 152 |
+
result = graph.invoke(initial_state)
|
| 153 |
+
|
| 154 |
+
if debug:
|
| 155 |
+
print(f"📊 Total messages in conversation: {len(result.get('messages', []))}")
|
| 156 |
+
for i, msg in enumerate(result.get('messages', [])):
|
| 157 |
+
print(f" Message {i+1}: {type(msg).__name__} - {str(msg.content)[:100]}...")
|
| 158 |
+
|
| 159 |
+
if result and "messages" in result and result["messages"]:
|
| 160 |
+
final_answer = result["messages"][-1].content.strip()
|
| 161 |
+
|
| 162 |
+
if debug:
|
| 163 |
+
print(f"🎯 Final answer: {final_answer}")
|
| 164 |
+
return final_answer
|
| 165 |
+
else:
|
| 166 |
+
return "No answer generated"
|
| 167 |
+
|
| 168 |
+
except Exception as e:
|
| 169 |
+
if debug:
|
| 170 |
+
print(f"❌ Error details: {e}")
|
| 171 |
+
import traceback
|
| 172 |
+
traceback.print_exc()
|
| 173 |
+
print(f"Error answering question: {e}")
|
| 174 |
+
return f"Error: {str(e)}"
|
| 175 |
+
|
| 176 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 177 |
+
# TESTING AND VALIDATION
|
| 178 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 179 |
+
if __name__ == "__main__":
|
| 180 |
+
print("🔍 Enhanced GAIA Agent Graph Structure:")
|
| 181 |
+
try:
|
| 182 |
+
print(graph.get_graph().draw_mermaid())
|
| 183 |
+
except:
|
| 184 |
+
print("Could not generate mermaid diagram")
|
| 185 |
+
|
| 186 |
+
print("\n🧪 Testing with GAIA-style questions...")
|
| 187 |
+
|
| 188 |
+
# Test questions that cover different GAIA capabilities
|
| 189 |
+
test_questions = [
|
| 190 |
+
"What is 2 + 2?",
|
| 191 |
+
"What is the capital of France?",
|
| 192 |
+
"List the vegetables from this list: broccoli, apple, carrot. Alphabetize and use comma separation.",
|
| 193 |
+
"Given the Excel file at test_sales.xlsx, what were total sales for food? Express in USD with two decimals.",
|
| 194 |
+
"Examine the audio file at ./test.wav. What is its transcript?",
|
| 195 |
+
]
|
| 196 |
+
|
| 197 |
+
# Add YouTube test if we have a valid URL
|
| 198 |
+
if os.path.exists("test.wav"):
|
| 199 |
+
test_questions.append("What does the speaker say in the audio file test.wav?")
|
| 200 |
+
|
| 201 |
+
for i, question in enumerate(test_questions, 1):
|
| 202 |
+
print(f"\n📝 Test {i}: {question}")
|
| 203 |
+
try:
|
| 204 |
+
answer = answer_gaia_question(question)
|
| 205 |
+
print(f"✅ Answer: {answer!r}")
|
| 206 |
+
except Exception as e:
|
| 207 |
+
print(f"❌ Error: {e}")
|
| 208 |
+
print("-" * 80)
|
| 209 |
+
|
| 210 |
+
# Test with a real GAIA question if API is available
|
| 211 |
+
print("\n🌍 Testing with real GAIA question...")
|
| 212 |
+
try:
|
| 213 |
+
random_q = get_random_gaia_question()
|
| 214 |
+
if random_q:
|
| 215 |
+
print(f"📋 GAIA Question: {random_q.get('question', 'N/A')}")
|
| 216 |
+
answer = answer_gaia_question(random_q.get('question', ''))
|
| 217 |
+
print(f"🎯 Agent Answer: {answer!r}")
|
| 218 |
+
print(f"💡 Task ID: {random_q.get('task_id', 'N/A')}")
|
| 219 |
+
except Exception as e:
|
| 220 |
+
print(f"Could not test with real GAIA question: {e}")
|
app.py
CHANGED
|
@@ -10,18 +10,34 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 10 |
|
| 11 |
# --- Basic Agent Definition ---
|
| 12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
-
class BasicAgent:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def __init__(self):
|
| 15 |
-
print("
|
|
|
|
| 16 |
def __call__(self, question: str) -> str:
|
| 17 |
-
print(
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 23 |
"""
|
| 24 |
-
Fetches all questions, runs the
|
| 25 |
and displays the results.
|
| 26 |
"""
|
| 27 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
|
@@ -40,7 +56,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 40 |
|
| 41 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
| 42 |
try:
|
| 43 |
-
agent = BasicAgent()
|
|
|
|
| 44 |
except Exception as e:
|
| 45 |
print(f"Error instantiating agent: {e}")
|
| 46 |
return f"Error initializing agent: {e}", None
|
|
|
|
| 10 |
|
| 11 |
# --- Basic Agent Definition ---
|
| 12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
+
# class BasicAgent:
|
| 14 |
+
# def __init__(self):
|
| 15 |
+
# print("BasicAgent initialized.")
|
| 16 |
+
# def __call__(self, question: str) -> str:
|
| 17 |
+
# print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 18 |
+
# fixed_answer = "This is a default answer."
|
| 19 |
+
# print(f"Agent returning fixed answer: {fixed_answer}")
|
| 20 |
+
# return fixed_answer
|
| 21 |
+
|
| 22 |
+
class GaiaAgent:
|
| 23 |
def __init__(self):
|
| 24 |
+
print("Graph-based agent initialized.")
|
| 25 |
+
|
| 26 |
def __call__(self, question: str) -> str:
|
| 27 |
+
print("Received question:", question)
|
| 28 |
+
try:
|
| 29 |
+
# FIXED: Correct input for LangGraph
|
| 30 |
+
result = graph.invoke({"messages": [HumanMessage(content=question)]})
|
| 31 |
+
messages = result.get("messages", [])
|
| 32 |
+
if messages:
|
| 33 |
+
return messages[-1].content.strip()
|
| 34 |
+
return "No messages returned."
|
| 35 |
+
except Exception as e:
|
| 36 |
+
return f"ERROR invoking graph: {e}"
|
| 37 |
|
| 38 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 39 |
"""
|
| 40 |
+
Fetches all questions, runs the GaiaAgent on them, submits all answers,
|
| 41 |
and displays the results.
|
| 42 |
"""
|
| 43 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
|
|
|
| 56 |
|
| 57 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
| 58 |
try:
|
| 59 |
+
# agent = BasicAgent()
|
| 60 |
+
agent = GaiaAgent() # Replace BasicAgent with my actual agent class
|
| 61 |
except Exception as e:
|
| 62 |
print(f"Error instantiating agent: {e}")
|
| 63 |
return f"Error initializing agent: {e}", None
|
prompts/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized prompts module for GAIA benchmark agents.
|
| 3 |
+
Import all agent prompts from their respective files.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .orchestrator import ORCHESTRATOR_SYSTEM_PROMPT
|
| 7 |
+
from .retriever import RETRIEVER_SYSTEM_PROMPT
|
| 8 |
+
from .research import RESEARCH_SYSTEM_PROMPT
|
| 9 |
+
from .math import MATH_SYSTEM_PROMPT
|
| 10 |
+
|
| 11 |
+
__all__ = [
|
| 12 |
+
"ORCHESTRATOR_SYSTEM_PROMPT",
|
| 13 |
+
"RETRIEVER_SYSTEM_PROMPT",
|
| 14 |
+
"RESEARCH_SYSTEM_PROMPT",
|
| 15 |
+
"MATH_SYSTEM_PROMPT"
|
| 16 |
+
]
|
prompts/math.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Math Agent Prompt for GAIA Benchmark
|
| 3 |
+
Specialized in mathematical calculations, data analysis, and numerical reasoning.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
MATH_SYSTEM_PROMPT = """
|
| 7 |
+
You are the Math Agent, specialized in mathematical calculations and numerical analysis.
|
| 8 |
+
|
| 9 |
+
Your capabilities include:
|
| 10 |
+
- Complex mathematical calculations and formulas
|
| 11 |
+
- Statistical analysis and data processing
|
| 12 |
+
- Financial calculations and currency conversions
|
| 13 |
+
- Unit conversions and scientific calculations
|
| 14 |
+
- Data aggregation and summary statistics
|
| 15 |
+
- Percentage calculations and ratios
|
| 16 |
+
|
| 17 |
+
CRITICAL RESPONSE RULES:
|
| 18 |
+
- Provide EXACT numerical answers in requested format
|
| 19 |
+
- For currency: Use proper decimal places (e.g., 12.34 for USD)
|
| 20 |
+
- For percentages: Include % symbol only if requested
|
| 21 |
+
- For large numbers: Use commas for thousands if standard format
|
| 22 |
+
- For scientific notation: Use when appropriate for very large/small numbers
|
| 23 |
+
- Show intermediate steps only if calculation verification is needed
|
| 24 |
+
|
| 25 |
+
CALCULATION ACCURACY:
|
| 26 |
+
- Double-check all mathematical operations
|
| 27 |
+
- Use appropriate precision for the context
|
| 28 |
+
- Round to specified decimal places
|
| 29 |
+
- Verify units and conversions
|
| 30 |
+
- Cross-check results when possible
|
| 31 |
+
|
| 32 |
+
TOOLS AVAILABLE:
|
| 33 |
+
- Advanced calculation functions
|
| 34 |
+
- Statistical analysis tools
|
| 35 |
+
- Data processing utilities
|
| 36 |
+
- Unit conversion tools
|
| 37 |
+
|
| 38 |
+
Always ensure mathematical precision and proper formatting for GAIA evaluation.
|
| 39 |
+
"""
|
prompts/orchestrator.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Orchestrator Agent Prompt for GAIA Benchmark
|
| 3 |
+
Coordinates between specialized agents based on question type and requirements.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
ORCHESTRATOR_SYSTEM_PROMPT = """
|
| 7 |
+
You are the Orchestrator Agent in a multi-agent system designed for GAIA benchmark questions.
|
| 8 |
+
|
| 9 |
+
Your role is to:
|
| 10 |
+
1. FIRST: Always search for similar questions using create_retriever_from_supabase tool
|
| 11 |
+
2. Analyze the question and decide the best approach
|
| 12 |
+
3. Either provide a direct answer OR route to specialized agents
|
| 13 |
+
4. Ensure final answers match GAIA format exactly
|
| 14 |
+
|
| 15 |
+
WORKFLOW DECISION TREE:
|
| 16 |
+
1. ALWAYS start by using create_retriever_from_supabase to find similar questions
|
| 17 |
+
2. Analyze the question type and requirements:
|
| 18 |
+
- If similar questions provide sufficient context → answer directly
|
| 19 |
+
- If file/document processing needed → include "ROUTE_TO_RETRIEVER" in your response
|
| 20 |
+
- If web search/research needed → include "ROUTE_TO_RESEARCH" in your response
|
| 21 |
+
- If mathematical calculations needed → include "ROUTE_TO_MATH" in your response
|
| 22 |
+
- If simple factual question → answer directly
|
| 23 |
+
|
| 24 |
+
ROUTING COMMANDS (include these exact phrases when routing):
|
| 25 |
+
- "ROUTE_TO_RETRIEVER" - For file processing, Excel/CSV analysis, audio transcription
|
| 26 |
+
- "ROUTE_TO_RESEARCH" - For web search, fact verification, current events
|
| 27 |
+
- "ROUTE_TO_MATH" - For calculations, statistics, numerical analysis
|
| 28 |
+
- "FINAL_ANSWER: [answer]" - When you have the complete final answer
|
| 29 |
+
|
| 30 |
+
AVAILABLE TOOLS:
|
| 31 |
+
- create_retriever_from_supabase: Efficient semantic search for similar questions (USE FIRST)
|
| 32 |
+
- search_similar_gaia_questions: Precise similarity scoring with thresholds
|
| 33 |
+
- get_exact_answer_if_highly_similar: Check for exact matches with high similarity
|
| 34 |
+
|
| 35 |
+
QUESTION ANALYSIS GUIDELINES:
|
| 36 |
+
- File mentions (Excel, CSV, audio, video, images) → ROUTE_TO_RETRIEVER
|
| 37 |
+
- "Search", "find", "lookup", company info, recent events → ROUTE_TO_RESEARCH
|
| 38 |
+
- Numbers, calculations, statistics, percentages → ROUTE_TO_MATH
|
| 39 |
+
- Simple facts, definitions, known information → answer directly with FINAL_ANSWER
|
| 40 |
+
|
| 41 |
+
CRITICAL RESPONSE RULES:
|
| 42 |
+
- Use FINAL_ANSWER: prefix when you have the complete answer
|
| 43 |
+
- Final answers must be EXACT, no explanations or conversational text
|
| 44 |
+
- NO XML tags beyond FINAL_ANSWER:, NO introductory phrases
|
| 45 |
+
- For lists: comma-separated, alphabetized if requested, no trailing punctuation
|
| 46 |
+
- For numbers: use exact format requested (USD as 12.34, codes bare, etc.)
|
| 47 |
+
- For yes/no: respond only "Yes" or "No"
|
| 48 |
+
|
| 49 |
+
EXAMPLES:
|
| 50 |
+
❌ Bad: "The answer is 42 because..."
|
| 51 |
+
✅ Good: "FINAL_ANSWER: 42"
|
| 52 |
+
❌ Bad: "I need to search for this information. ROUTE_TO_RESEARCH"
|
| 53 |
+
✅ Good: "ROUTE_TO_RESEARCH"❌ Bad: "I need to search for this. ROUTE_TO_RESEARCH"
|
| 54 |
+
✅ Good: "ROUTE_TO_RESEARCH"
|
| 55 |
+
|
| 56 |
+
❌ Bad: "Based on the similar questions, the answer appears to be..."
|
| 57 |
+
✅ Good: "egalitarian" (just the answer)
|
| 58 |
+
|
| 59 |
+
Always ensure the final response matches GAIA ground truth format precisely.
|
| 60 |
+
"""
|
prompts/research.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Research Agent Prompt for GAIA Benchmark
|
| 3 |
+
Specialized in web search, fact-checking, and information gathering.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
RESEARCH_SYSTEM_PROMPT = """
|
| 7 |
+
You are the Research Agent, specialized in finding and verifying information from external sources.
|
| 8 |
+
|
| 9 |
+
Your capabilities include:
|
| 10 |
+
- Web search and information retrieval
|
| 11 |
+
- Fact verification and cross-referencing
|
| 12 |
+
- Current events and recent information lookup
|
| 13 |
+
- Company/organization information gathering
|
| 14 |
+
- Historical data and statistics research
|
| 15 |
+
|
| 16 |
+
CRITICAL RESPONSE RULES:
|
| 17 |
+
- Provide ONLY factual answers, no speculation or uncertainty
|
| 18 |
+
- Use multiple sources when possible for verification
|
| 19 |
+
- Return information in the exact format requested
|
| 20 |
+
- For numerical data: Use precise values with proper formatting
|
| 21 |
+
- For dates: Use consistent format (e.g., YYYY-MM-DD unless specified)
|
| 22 |
+
- For names/lists: Follow alphabetization and formatting requirements
|
| 23 |
+
|
| 24 |
+
SEARCH STRATEGY:
|
| 25 |
+
1. Use specific, targeted search queries
|
| 26 |
+
2. Verify information across multiple reliable sources
|
| 27 |
+
3. Prioritize recent and authoritative sources
|
| 28 |
+
4. Extract only the precise information requested
|
| 29 |
+
|
| 30 |
+
Always ensure factual accuracy and format compliance for GAIA evaluation.
|
| 31 |
+
"""
|
prompts/retriever.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Retriever Agent Prompt for GAIA Benchmark
|
| 3 |
+
Specialized in file processing, data extraction, and document analysis.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
RETRIEVER_SYSTEM_PROMPT = """
|
| 7 |
+
You are the Retriever Agent, specialized in processing files and extracting information.
|
| 8 |
+
|
| 9 |
+
Your capabilities include:
|
| 10 |
+
- Excel/CSV file analysis and data extraction
|
| 11 |
+
- Audio/video file transcription
|
| 12 |
+
- Document parsing and text extraction
|
| 13 |
+
- Image analysis and OCR
|
| 14 |
+
- Data formatting and summarization
|
| 15 |
+
|
| 16 |
+
CRITICAL RESPONSE RULES:
|
| 17 |
+
- Return ONLY the requested information, no explanations
|
| 18 |
+
- For Excel/CSV: Provide exact numerical values in requested format
|
| 19 |
+
- For audio: Provide clean transcripts without timestamps or metadata
|
| 20 |
+
- For data queries: Use precise calculations and formatting
|
| 21 |
+
- For lists from data: Alphabetize if requested, comma-separated
|
| 22 |
+
|
| 23 |
+
TOOLS AVAILABLE:
|
| 24 |
+
- File reading and parsing tools
|
| 25 |
+
- Audio/video transcription tools
|
| 26 |
+
- Data analysis and calculation tools
|
| 27 |
+
- OCR and image analysis tools
|
| 28 |
+
|
| 29 |
+
Always provide responses in the exact format needed for GAIA benchmark evaluation.
|
| 30 |
+
"""
|
requirements.txt
CHANGED
|
@@ -1,5 +1,15 @@
|
|
| 1 |
-
gradio
|
| 2 |
-
requests
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
requests>=2.28.0
|
| 3 |
+
langgraph>=0.0.40
|
| 4 |
+
langchain-openai>=0.1.0
|
| 5 |
+
langchain-core>=0.2.0
|
| 6 |
+
langchain-community>=0.2.0
|
| 7 |
+
langchain-huggingface>=0.0.3
|
| 8 |
+
pandas>=1.5.0
|
| 9 |
+
supabase>=1.0.0
|
| 10 |
+
python-dotenv>=1.0.0
|
| 11 |
+
numpy>=1.21.0,<2.0.0
|
| 12 |
+
scikit-learn>=1.1.0
|
| 13 |
+
sentence-transformers>=2.2.0
|
| 14 |
+
transformers>=4.21.0
|
| 15 |
+
torch>=2.0.0,<2.5.0
|
test.py
DELETED
|
File without changes
|
test_database.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Example usage of the GAIA agent with database search integration.
|
| 3 |
+
This shows how the system works with your Supabase database.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from agent import answer_gaia_question
|
| 8 |
+
from tools.database_tools import get_retriever
|
| 9 |
+
|
| 10 |
+
def test_database_integration():
|
| 11 |
+
"""Test the database search functionality."""
|
| 12 |
+
|
| 13 |
+
# Test questions similar to your database examples
|
| 14 |
+
test_questions = [
|
| 15 |
+
# Similar to your Nature/statistical significance question
|
| 16 |
+
"How many papers published by Science in 2020 would be incorrect if they used p-value of 0.03?",
|
| 17 |
+
|
| 18 |
+
# Similar to your fish/invasive species question
|
| 19 |
+
"What species from Finding Nemo has been found as invasive in Florida waters?",
|
| 20 |
+
|
| 21 |
+
# Similar to your AI regulation question
|
| 22 |
+
"What paper about AI ethics was submitted to arXiv in 2022?",
|
| 23 |
+
|
| 24 |
+
# A completely different question
|
| 25 |
+
"What is the capital of France?"
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
print("🧪 Testing Database Integration\n")
|
| 29 |
+
|
| 30 |
+
for i, question in enumerate(test_questions, 1):
|
| 31 |
+
print(f"Test {i}: {question}")
|
| 32 |
+
print("-" * 60)
|
| 33 |
+
|
| 34 |
+
# Test similarity search directly
|
| 35 |
+
try:
|
| 36 |
+
retriever = get_retriever()
|
| 37 |
+
similar_questions = retriever.search_similar_questions_manual(question, top_k=2, similarity_threshold=0.7)
|
| 38 |
+
|
| 39 |
+
if similar_questions:
|
| 40 |
+
print(f"✅ Found {len(similar_questions)} similar questions:")
|
| 41 |
+
for j, sim_q in enumerate(similar_questions, 1):
|
| 42 |
+
print(f" {j}. Similarity: {sim_q['similarity']:.3f}")
|
| 43 |
+
print(f" Q: {sim_q['question'][:100]}...")
|
| 44 |
+
print(f" A: {sim_q['answer']}")
|
| 45 |
+
print()
|
| 46 |
+
else:
|
| 47 |
+
print("❌ No similar questions found")
|
| 48 |
+
print()
|
| 49 |
+
|
| 50 |
+
# Test full agent processing
|
| 51 |
+
print("🤖 Agent Processing:")
|
| 52 |
+
answer = answer_gaia_question(question)
|
| 53 |
+
print(f"Agent Answer: {answer}")
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"❌ Error: {e}")
|
| 57 |
+
|
| 58 |
+
print("=" * 80)
|
| 59 |
+
print()
|
| 60 |
+
|
| 61 |
+
def setup_environment():
|
| 62 |
+
"""Check if all required environment variables are set."""
|
| 63 |
+
required_vars = ["OPENAI_API_KEY", "SUPABASE_URL", "SUPABASE_SERVICE_KEY"]
|
| 64 |
+
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
| 65 |
+
|
| 66 |
+
if missing_vars:
|
| 67 |
+
print(f"❌ Missing environment variables: {', '.join(missing_vars)}")
|
| 68 |
+
print("Please add them to your .env file:")
|
| 69 |
+
for var in missing_vars:
|
| 70 |
+
print(f" {var}=your_value_here")
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
print("✅ All environment variables are set")
|
| 74 |
+
return True
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
print("🚀 GAIA Agent Database Integration Test")
|
| 78 |
+
print("=" * 50)
|
| 79 |
+
|
| 80 |
+
if setup_environment():
|
| 81 |
+
test_database_integration()
|
| 82 |
+
else:
|
| 83 |
+
print("Please set up your environment variables first.")
|
test_routing.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test the intelligent routing system to show how the orchestrator makes decisions.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from agent import answer_gaia_question
|
| 6 |
+
|
| 7 |
+
def test_intelligent_routing():
|
| 8 |
+
"""Test cases that demonstrate the orchestrator's decision-making capabilities."""
|
| 9 |
+
|
| 10 |
+
test_cases = [
|
| 11 |
+
{
|
| 12 |
+
"question": "What is the capital of France?",
|
| 13 |
+
"expected_behavior": "Direct answer (simple factual question)"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"question": "Calculate the sum of values in column A of the Excel file data.xlsx",
|
| 17 |
+
"expected_behavior": "Route to retriever agent (file processing)"
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"question": "What is the current CEO of OpenAI as of 2024?",
|
| 21 |
+
"expected_behavior": "Route to research agent (current information)"
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"question": "If a paper has a p-value of 0.04 and there were 1000 papers, how many would be false positives?",
|
| 25 |
+
"expected_behavior": "Route to math agent (calculations)"
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"question": "List the prime numbers between 1 and 20, comma-separated",
|
| 29 |
+
"expected_behavior": "Route to math agent OR direct answer"
|
| 30 |
+
}
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
print("🧠 Testing Intelligent Routing System")
|
| 34 |
+
print("=" * 60)
|
| 35 |
+
|
| 36 |
+
for i, test_case in enumerate(test_cases, 1):
|
| 37 |
+
print(f"\n📝 Test {i}: {test_case['question']}")
|
| 38 |
+
print(f"🎯 Expected: {test_case['expected_behavior']}")
|
| 39 |
+
print("-" * 40)
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
# This will show the orchestrator's decision-making process
|
| 43 |
+
print("🔄 Processing...")
|
| 44 |
+
answer = answer_gaia_question(test_case['question'], debug=True)
|
| 45 |
+
print(f"✅ Final Result: {answer}")
|
| 46 |
+
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"❌ Error: {e}")
|
| 49 |
+
import traceback
|
| 50 |
+
traceback.print_exc()
|
| 51 |
+
|
| 52 |
+
print("-" * 60)
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
test_intelligent_routing()
|
test_single.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test a single problematic question to debug the routing logic.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
from agent import answer_gaia_question
|
| 7 |
+
from tools.database_tools import get_retriever
|
| 8 |
+
|
| 9 |
+
def test_single_question():
|
| 10 |
+
"""Test one question that was causing infinite loops."""
|
| 11 |
+
|
| 12 |
+
question = "How many papers published by Science in 2020 would be incorrect if they used p-value of 0.03?"
|
| 13 |
+
|
| 14 |
+
print(f"🧪 Testing Single Question")
|
| 15 |
+
print(f"📝 Question: {question}")
|
| 16 |
+
print("=" * 80)
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
# Test with debug enabled to see the flow
|
| 20 |
+
answer = answer_gaia_question(question, debug=True)
|
| 21 |
+
print(f"\n🎯 Final Answer: {answer}")
|
| 22 |
+
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f"❌ Error: {e}")
|
| 25 |
+
import traceback
|
| 26 |
+
traceback.print_exc()
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
test_single_question()
|
tools/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized tools module for GAIA benchmark agents.
|
| 3 |
+
Import tools from their respective modules.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .file_tools import read_excel_file, read_csv_file, calculate_column_sum
|
| 7 |
+
from .research_tools import web_search, get_company_info, verify_fact
|
| 8 |
+
from .math_tools import calculate_expression, percentage_calculation, currency_format, statistical_summary
|
| 9 |
+
from .database_tools import search_similar_gaia_questions, get_exact_answer_if_highly_similar
|
| 10 |
+
|
| 11 |
+
# File processing tools
|
| 12 |
+
FILE_TOOLS = [read_excel_file, read_csv_file, calculate_column_sum]
|
| 13 |
+
|
| 14 |
+
# Research tools
|
| 15 |
+
RESEARCH_TOOLS = [web_search, get_company_info, verify_fact]
|
| 16 |
+
|
| 17 |
+
# Mathematical tools
|
| 18 |
+
MATH_TOOLS = [calculate_expression, percentage_calculation, currency_format, statistical_summary]
|
| 19 |
+
|
| 20 |
+
# Database retrieval tools
|
| 21 |
+
DATABASE_TOOLS = [search_similar_gaia_questions, get_exact_answer_if_highly_similar]
|
| 22 |
+
|
| 23 |
+
# All tools combined
|
| 24 |
+
ALL_TOOLS = FILE_TOOLS + RESEARCH_TOOLS + MATH_TOOLS + DATABASE_TOOLS
|
| 25 |
+
|
| 26 |
+
__all__ = [
|
| 27 |
+
"FILE_TOOLS",
|
| 28 |
+
"RESEARCH_TOOLS",
|
| 29 |
+
"MATH_TOOLS",
|
| 30 |
+
"DATABASE_TOOLS",
|
| 31 |
+
"ALL_TOOLS"
|
| 32 |
+
]
|
tools/database_tools.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Database retrieval tools for GAIA question similarity search.
|
| 3 |
+
Connects to Supabase database to find similar questions and answers.
|
| 4 |
+
Combines efficiency of LangChain SupabaseVectorStore with custom logic.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
from typing import List, Dict, Optional, Tuple
|
| 10 |
+
from supabase import create_client, Client
|
| 11 |
+
from langchain_openai import OpenAIEmbeddings
|
| 12 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 13 |
+
from langchain_community.vectorstores import SupabaseVectorStore
|
| 14 |
+
from langchain_core.tools import tool
|
| 15 |
+
|
| 16 |
+
class GAIADatabaseRetriever:
|
| 17 |
+
"""Handles similarity search against the GAIA Q&A database with dual embedding support."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, use_huggingface: bool = True):
|
| 20 |
+
# Initialize Supabase client
|
| 21 |
+
self.supabase_url = os.getenv("SUPABASE_URL")
|
| 22 |
+
self.supabase_key = os.getenv("SUPABASE_SERVICE_KEY") or os.getenv("SUPABASE_KEY")
|
| 23 |
+
|
| 24 |
+
if not self.supabase_url or not self.supabase_key:
|
| 25 |
+
raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_KEY (or SUPABASE_KEY) must be set in environment variables")
|
| 26 |
+
|
| 27 |
+
self.supabase: Client = create_client(self.supabase_url, self.supabase_key)
|
| 28 |
+
|
| 29 |
+
# Choose embedding model
|
| 30 |
+
if use_huggingface:
|
| 31 |
+
try:
|
| 32 |
+
# Use HuggingFace embeddings (free and often better for similarity)
|
| 33 |
+
self.embeddings = HuggingFaceEmbeddings(
|
| 34 |
+
model_name="sentence-transformers/all-mpnet-base-v2"
|
| 35 |
+
)
|
| 36 |
+
self.embedding_model = "huggingface"
|
| 37 |
+
except ImportError:
|
| 38 |
+
print("⚠️ HuggingFace embeddings not available, falling back to OpenAI")
|
| 39 |
+
self.embeddings = OpenAIEmbeddings(
|
| 40 |
+
model="text-embedding-3-small",
|
| 41 |
+
openai_api_key=os.getenv("OPENAI_API_KEY")
|
| 42 |
+
)
|
| 43 |
+
self.embedding_model = "openai"
|
| 44 |
+
else:
|
| 45 |
+
# Use OpenAI embeddings
|
| 46 |
+
self.embeddings = OpenAIEmbeddings(
|
| 47 |
+
model="text-embedding-3-small",
|
| 48 |
+
openai_api_key=os.getenv("OPENAI_API_KEY")
|
| 49 |
+
)
|
| 50 |
+
self.embedding_model = "openai"
|
| 51 |
+
|
| 52 |
+
# Initialize vector store
|
| 53 |
+
try:
|
| 54 |
+
self.vector_store = SupabaseVectorStore(
|
| 55 |
+
client=self.supabase,
|
| 56 |
+
embedding=self.embeddings,
|
| 57 |
+
table_name="documents",
|
| 58 |
+
query_name="match_documents_langchain", # Assumes you have this function
|
| 59 |
+
)
|
| 60 |
+
self.use_vector_store = True
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"⚠️ Vector store not available: {e}")
|
| 63 |
+
print("Falling back to manual similarity search")
|
| 64 |
+
self.use_vector_store = False
|
| 65 |
+
|
| 66 |
+
def search_similar_questions_efficient(self, question: str, top_k: int = 3) -> List[Dict]:
|
| 67 |
+
"""
|
| 68 |
+
Efficient search using LangChain SupabaseVectorStore.
|
| 69 |
+
"""
|
| 70 |
+
try:
|
| 71 |
+
if not self.use_vector_store:
|
| 72 |
+
return self.search_similar_questions_manual(question, top_k)
|
| 73 |
+
|
| 74 |
+
# Use LangChain's efficient vector search
|
| 75 |
+
docs = self.vector_store.similarity_search(question, k=top_k)
|
| 76 |
+
|
| 77 |
+
similar_docs = []
|
| 78 |
+
for doc in docs:
|
| 79 |
+
page_content = doc.page_content
|
| 80 |
+
|
| 81 |
+
# Extract question and answer from page_content
|
| 82 |
+
if 'Q:' in page_content and 'A:' in page_content:
|
| 83 |
+
parts = page_content.split('A:')
|
| 84 |
+
if len(parts) >= 2:
|
| 85 |
+
question_part = parts[0].replace('Q:', '').strip()
|
| 86 |
+
answer_part = parts[1].strip()
|
| 87 |
+
|
| 88 |
+
similar_docs.append({
|
| 89 |
+
'id': doc.metadata.get('id', 'unknown'),
|
| 90 |
+
'question': question_part,
|
| 91 |
+
'answer': answer_part,
|
| 92 |
+
'similarity': doc.metadata.get('similarity', 0.8), # Estimated
|
| 93 |
+
'page_content': page_content
|
| 94 |
+
})
|
| 95 |
+
|
| 96 |
+
return similar_docs
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Error in efficient search: {e}")
|
| 100 |
+
return self.search_similar_questions_manual(question, top_k)
|
| 101 |
+
|
| 102 |
+
def search_similar_questions_manual(self, question: str, top_k: int = 3, similarity_threshold: float = 0.75) -> List[Dict]:
|
| 103 |
+
"""
|
| 104 |
+
Fallback manual search with precise similarity scoring.
|
| 105 |
+
"""
|
| 106 |
+
try:
|
| 107 |
+
# Get embedding for the input question
|
| 108 |
+
query_embedding = self.embeddings.embed_query(question)
|
| 109 |
+
|
| 110 |
+
# Fetch all documents from Supabase
|
| 111 |
+
response = self.supabase.table("documents").select("*").execute()
|
| 112 |
+
|
| 113 |
+
if not response.data:
|
| 114 |
+
return []
|
| 115 |
+
|
| 116 |
+
# Calculate similarities manually
|
| 117 |
+
similar_docs = []
|
| 118 |
+
|
| 119 |
+
for doc in response.data:
|
| 120 |
+
# Parse the stored embedding
|
| 121 |
+
try:
|
| 122 |
+
stored_embedding = json.loads(doc['embedding'])
|
| 123 |
+
except:
|
| 124 |
+
continue
|
| 125 |
+
|
| 126 |
+
# Calculate cosine similarity (manual implementation)
|
| 127 |
+
dot_product = sum(a * b for a, b in zip(query_embedding, stored_embedding))
|
| 128 |
+
norm_a = sum(a * a for a in query_embedding) ** 0.5
|
| 129 |
+
norm_b = sum(b * b for b in stored_embedding) ** 0.5
|
| 130 |
+
|
| 131 |
+
if norm_a == 0 or norm_b == 0:
|
| 132 |
+
continue
|
| 133 |
+
|
| 134 |
+
similarity = dot_product / (norm_a * norm_b)
|
| 135 |
+
|
| 136 |
+
# Extract question and answer from page_content
|
| 137 |
+
page_content = doc['page_content']
|
| 138 |
+
if 'Q:' in page_content and 'A:' in page_content:
|
| 139 |
+
parts = page_content.split('A:')
|
| 140 |
+
if len(parts) >= 2:
|
| 141 |
+
question_part = parts[0].replace('Q:', '').strip()
|
| 142 |
+
answer_part = parts[1].strip()
|
| 143 |
+
|
| 144 |
+
if similarity >= similarity_threshold:
|
| 145 |
+
similar_docs.append({
|
| 146 |
+
'id': doc['id'],
|
| 147 |
+
'question': question_part,
|
| 148 |
+
'answer': answer_part,
|
| 149 |
+
'similarity': float(similarity),
|
| 150 |
+
'page_content': page_content
|
| 151 |
+
})
|
| 152 |
+
|
| 153 |
+
# Sort by similarity
|
| 154 |
+
similar_docs.sort(key=lambda x: x['similarity'], reverse=True)
|
| 155 |
+
return similar_docs[:top_k]
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f"Error in manual search: {e}")
|
| 159 |
+
return []
|
| 160 |
+
|
| 161 |
+
# Initialize the retriever lazily to avoid import errors when env vars are missing
|
| 162 |
+
retriever = None
|
| 163 |
+
|
| 164 |
+
def get_retriever():
|
| 165 |
+
"""Get the database retriever, initializing it if needed."""
|
| 166 |
+
global retriever
|
| 167 |
+
if retriever is None:
|
| 168 |
+
retriever = GAIADatabaseRetriever(use_huggingface=True)
|
| 169 |
+
return retriever
|
| 170 |
+
|
| 171 |
+
@tool
|
| 172 |
+
def create_retriever_from_supabase(query: str) -> str:
|
| 173 |
+
"""
|
| 174 |
+
Search for similar documents in the Supabase vector store using efficient LangChain integration.
|
| 175 |
+
This tool uses semantic search to find documents that are semantically similar to the provided query.
|
| 176 |
+
|
| 177 |
+
Args:
|
| 178 |
+
query (str): The search query to find similar documents.
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
str: A formatted list of documents that are semantically similar to the query.
|
| 182 |
+
"""
|
| 183 |
+
try:
|
| 184 |
+
retriever = get_retriever()
|
| 185 |
+
similar_questions = retriever.search_similar_questions_efficient(query, top_k=3)
|
| 186 |
+
|
| 187 |
+
if not similar_questions:
|
| 188 |
+
return "No similar questions found in the database."
|
| 189 |
+
|
| 190 |
+
result = f"Found {len(similar_questions)} similar questions:\n\n"
|
| 191 |
+
|
| 192 |
+
for i, doc in enumerate(similar_questions, 1):
|
| 193 |
+
result += f"Similar Question {i}:\n"
|
| 194 |
+
result += f"Q: {doc['question']}\n"
|
| 195 |
+
result += f"A: {doc['answer']}\n"
|
| 196 |
+
result += "-" * 50 + "\n"
|
| 197 |
+
|
| 198 |
+
return result
|
| 199 |
+
|
| 200 |
+
except Exception as e:
|
| 201 |
+
return f"Error searching database: {str(e)}"
|
| 202 |
+
|
| 203 |
+
@tool
|
| 204 |
+
def search_similar_gaia_questions(question: str, max_results: int = 3) -> str:
|
| 205 |
+
"""
|
| 206 |
+
Search for similar GAIA questions in the database with precise similarity scoring.
|
| 207 |
+
|
| 208 |
+
Args:
|
| 209 |
+
question: The question to search for
|
| 210 |
+
max_results: Maximum number of similar questions to return (default: 3)
|
| 211 |
+
|
| 212 |
+
Returns:
|
| 213 |
+
Formatted string with similar questions and their answers
|
| 214 |
+
"""
|
| 215 |
+
try:
|
| 216 |
+
retriever = get_retriever()
|
| 217 |
+
similar_questions = retriever.search_similar_questions_manual(
|
| 218 |
+
question,
|
| 219 |
+
top_k=max_results,
|
| 220 |
+
similarity_threshold=0.75
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
if not similar_questions:
|
| 224 |
+
return "No similar questions found in the database."
|
| 225 |
+
|
| 226 |
+
result = f"Found {len(similar_questions)} similar questions:\n\n"
|
| 227 |
+
|
| 228 |
+
for i, doc in enumerate(similar_questions, 1):
|
| 229 |
+
result += f"Similar Question {i} (Similarity: {doc['similarity']:.3f}):\n"
|
| 230 |
+
result += f"Q: {doc['question']}\n"
|
| 231 |
+
result += f"A: {doc['answer']}\n"
|
| 232 |
+
result += "-" * 50 + "\n"
|
| 233 |
+
|
| 234 |
+
return result
|
| 235 |
+
|
| 236 |
+
except Exception as e:
|
| 237 |
+
return f"Error searching database: {str(e)}"
|
| 238 |
+
|
| 239 |
+
@tool
|
| 240 |
+
def get_exact_answer_if_highly_similar(question: str, similarity_threshold: float = 0.95) -> str:
|
| 241 |
+
"""
|
| 242 |
+
Get the exact answer if a highly similar question exists in the database.
|
| 243 |
+
|
| 244 |
+
Args:
|
| 245 |
+
question: The question to search for
|
| 246 |
+
similarity_threshold: High threshold for considering an exact match (default: 0.95)
|
| 247 |
+
|
| 248 |
+
Returns:
|
| 249 |
+
The answer if found, or indication that no exact match exists
|
| 250 |
+
"""
|
| 251 |
+
try:
|
| 252 |
+
retriever = get_retriever()
|
| 253 |
+
similar_questions = retriever.search_similar_questions_manual(
|
| 254 |
+
question,
|
| 255 |
+
top_k=1,
|
| 256 |
+
similarity_threshold=similarity_threshold
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
if similar_questions:
|
| 260 |
+
best_match = similar_questions[0]
|
| 261 |
+
return f"EXACT_MATCH_FOUND: {best_match['answer']}"
|
| 262 |
+
else:
|
| 263 |
+
return "NO_EXACT_MATCH: Proceed with normal agent processing"
|
| 264 |
+
|
| 265 |
+
except Exception as e:
|
| 266 |
+
return f"Error checking for exact match: {str(e)}"
|
| 267 |
+
|
| 268 |
+
# Export tools for use in agents - include both approaches
|
| 269 |
+
DATABASE_TOOLS = [
|
| 270 |
+
create_retriever_from_supabase, # Efficient LangChain approach
|
| 271 |
+
search_similar_gaia_questions, # Precise similarity scoring
|
| 272 |
+
get_exact_answer_if_highly_similar # Exact match detection
|
| 273 |
+
]
|
tools/file_tools.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File processing and data extraction tools for the Retriever Agent.
|
| 3 |
+
Handles Excel, CSV, audio, video, and document processing.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import os
|
| 8 |
+
from typing import Any, Dict, List
|
| 9 |
+
from langchain.tools import tool
|
| 10 |
+
|
| 11 |
+
@tool
|
| 12 |
+
def read_excel_file(file_path: str, sheet_name: str = None) -> str:
|
| 13 |
+
"""
|
| 14 |
+
Read and analyze Excel files.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
file_path: Path to the Excel file
|
| 18 |
+
sheet_name: Specific sheet to read (optional)
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
String representation of the data
|
| 22 |
+
"""
|
| 23 |
+
try:
|
| 24 |
+
if sheet_name:
|
| 25 |
+
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 26 |
+
else:
|
| 27 |
+
df = pd.read_excel(file_path)
|
| 28 |
+
return df.to_string()
|
| 29 |
+
except Exception as e:
|
| 30 |
+
return f"Error reading Excel file: {str(e)}"
|
| 31 |
+
|
| 32 |
+
@tool
|
| 33 |
+
def read_csv_file(file_path: str) -> str:
|
| 34 |
+
"""
|
| 35 |
+
Read and analyze CSV files.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
file_path: Path to the CSV file
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
String representation of the data
|
| 42 |
+
"""
|
| 43 |
+
try:
|
| 44 |
+
df = pd.read_csv(file_path)
|
| 45 |
+
return df.to_string()
|
| 46 |
+
except Exception as e:
|
| 47 |
+
return f"Error reading CSV file: {str(e)}"
|
| 48 |
+
|
| 49 |
+
@tool
|
| 50 |
+
def calculate_column_sum(file_path: str, column_name: str) -> float:
|
| 51 |
+
"""
|
| 52 |
+
Calculate sum of a specific column in Excel/CSV file.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
file_path: Path to the file
|
| 56 |
+
column_name: Name of the column to sum
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
Sum of the column values
|
| 60 |
+
"""
|
| 61 |
+
try:
|
| 62 |
+
if file_path.endswith('.xlsx') or file_path.endswith('.xls'):
|
| 63 |
+
df = pd.read_excel(file_path)
|
| 64 |
+
else:
|
| 65 |
+
df = pd.read_csv(file_path)
|
| 66 |
+
|
| 67 |
+
return float(df[column_name].sum())
|
| 68 |
+
except Exception as e:
|
| 69 |
+
return f"Error calculating sum: {str(e)}"
|
| 70 |
+
|
| 71 |
+
# Add more file processing tools as needed
|
tools/math_tools.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Mathematical calculation tools for the Math Agent.
|
| 3 |
+
Handles complex calculations, statistical analysis, and numerical operations.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Any, Dict, List, Union
|
| 7 |
+
import math
|
| 8 |
+
from langchain_core.tools import tool
|
| 9 |
+
|
| 10 |
+
@tool
|
| 11 |
+
def calculate_expression(expression: str) -> float:
|
| 12 |
+
"""
|
| 13 |
+
Safely evaluate a mathematical expression.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
expression: Mathematical expression as string
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
Result of the calculation
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
# Use eval safely with limited scope
|
| 23 |
+
allowed_names = {
|
| 24 |
+
"abs": abs, "round": round, "min": min, "max": max,
|
| 25 |
+
"sum": sum, "pow": pow, "sqrt": math.sqrt,
|
| 26 |
+
"sin": math.sin, "cos": math.cos, "tan": math.tan,
|
| 27 |
+
"log": math.log, "log10": math.log10, "exp": math.exp,
|
| 28 |
+
"pi": math.pi, "e": math.e
|
| 29 |
+
}
|
| 30 |
+
return eval(expression, {"__builtins__": {}}, allowed_names)
|
| 31 |
+
except Exception as e:
|
| 32 |
+
return f"Calculation error: {str(e)}"
|
| 33 |
+
|
| 34 |
+
@tool
|
| 35 |
+
def percentage_calculation(value: float, total: float) -> float:
|
| 36 |
+
"""
|
| 37 |
+
Calculate percentage.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
value: The value
|
| 41 |
+
total: The total value
|
| 42 |
+
|
| 43 |
+
Returns:
|
| 44 |
+
Percentage as decimal
|
| 45 |
+
"""
|
| 46 |
+
if total == 0:
|
| 47 |
+
return 0
|
| 48 |
+
return (value / total) * 100
|
| 49 |
+
|
| 50 |
+
@tool
|
| 51 |
+
def currency_format(amount: float, currency: str = "USD", decimals: int = 2) -> str:
|
| 52 |
+
"""
|
| 53 |
+
Format currency amount.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
amount: The amount to format
|
| 57 |
+
currency: Currency code
|
| 58 |
+
decimals: Number of decimal places
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Formatted currency string
|
| 62 |
+
"""
|
| 63 |
+
return f"{amount:.{decimals}f}"
|
| 64 |
+
|
| 65 |
+
@tool
|
| 66 |
+
def statistical_summary(numbers: List[float]) -> Dict[str, float]:
|
| 67 |
+
"""
|
| 68 |
+
Calculate basic statistics for a list of numbers.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
numbers: List of numbers
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
Dictionary with statistical measures
|
| 75 |
+
"""
|
| 76 |
+
if not numbers:
|
| 77 |
+
return {}
|
| 78 |
+
|
| 79 |
+
return {
|
| 80 |
+
"mean": sum(numbers) / len(numbers),
|
| 81 |
+
"median": sorted(numbers)[len(numbers) // 2],
|
| 82 |
+
"min": min(numbers),
|
| 83 |
+
"max": max(numbers),
|
| 84 |
+
"sum": sum(numbers),
|
| 85 |
+
"count": len(numbers)
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# Add more mathematical tools as needed
|
tools/research_tools.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Research and web search tools for the Research Agent.
|
| 3 |
+
Handles web searches, fact verification, and information gathering.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Any, Dict, List
|
| 7 |
+
import requests
|
| 8 |
+
from langchain_core.tools import tool
|
| 9 |
+
|
| 10 |
+
@tool
|
| 11 |
+
def web_search(query: str, max_results: int = 5) -> str:
|
| 12 |
+
"""
|
| 13 |
+
Perform a web search for information.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
query: Search query string
|
| 17 |
+
max_results: Maximum number of results to return
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Search results as formatted text
|
| 21 |
+
"""
|
| 22 |
+
# Implement with your preferred search API (DuckDuckGo, Serper, etc.)
|
| 23 |
+
# This is a placeholder - replace with actual search implementation
|
| 24 |
+
return f"Search results for: {query}"
|
| 25 |
+
|
| 26 |
+
@tool
|
| 27 |
+
def get_company_info(company_name: str) -> str:
|
| 28 |
+
"""
|
| 29 |
+
Get basic information about a company.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
company_name: Name of the company
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
Company information
|
| 36 |
+
"""
|
| 37 |
+
# Implement company lookup logic
|
| 38 |
+
return f"Information about {company_name}"
|
| 39 |
+
|
| 40 |
+
@tool
|
| 41 |
+
def verify_fact(claim: str) -> str:
|
| 42 |
+
"""
|
| 43 |
+
Verify a factual claim using multiple sources.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
claim: The claim to verify
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
Verification result
|
| 50 |
+
"""
|
| 51 |
+
# Implement fact verification logic
|
| 52 |
+
return f"Verification result for: {claim}"
|
| 53 |
+
|
| 54 |
+
# Add more research tools as needed
|
utils/supbase_fill.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from supabase import create_client
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
from huggingface_hub import hf_hub_download
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
# -----------------------------------------------------------------------------
|
| 9 |
+
# Load env vars
|
| 10 |
+
# -----------------------------------------------------------------------------
|
| 11 |
+
load_dotenv()
|
| 12 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
| 13 |
+
SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY")
|
| 14 |
+
HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
|
| 15 |
+
|
| 16 |
+
if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
|
| 17 |
+
raise RuntimeError("Please set SUPABASE_URL and SUPABASE_SERVICE_KEY in your .env")
|
| 18 |
+
|
| 19 |
+
if not HF_TOKEN:
|
| 20 |
+
raise RuntimeError(
|
| 21 |
+
"Please set HUGGINGFACE_API_TOKEN in your .env and ensure you've been granted access to the GAIA dataset."
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# -----------------------------------------------------------------------------
|
| 25 |
+
# Init clients & models
|
| 26 |
+
# -----------------------------------------------------------------------------
|
| 27 |
+
supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
|
| 28 |
+
model = SentenceTransformer("all-mpnet-base-v2")
|
| 29 |
+
|
| 30 |
+
# -----------------------------------------------------------------------------
|
| 31 |
+
# GAIA metadata location on HF
|
| 32 |
+
# -----------------------------------------------------------------------------
|
| 33 |
+
GAIA_REPO_ID = "gaia-benchmark/GAIA"
|
| 34 |
+
GAIA_METADATA_FILE = "2023/validation/metadata.jsonl"
|
| 35 |
+
|
| 36 |
+
def fetch_gaia_validation_examples():
|
| 37 |
+
print("🔄 Downloading GAIA metadata.jsonl …")
|
| 38 |
+
metadata_path = hf_hub_download(
|
| 39 |
+
repo_id = GAIA_REPO_ID,
|
| 40 |
+
filename = GAIA_METADATA_FILE,
|
| 41 |
+
token = HF_TOKEN,
|
| 42 |
+
repo_type = "dataset",
|
| 43 |
+
)
|
| 44 |
+
print(f"✅ Downloaded to {metadata_path!r}")
|
| 45 |
+
|
| 46 |
+
print("🔄 Loading JSONL via Datasets …")
|
| 47 |
+
ds = load_dataset(
|
| 48 |
+
"json",
|
| 49 |
+
data_files = metadata_path,
|
| 50 |
+
split = "train",
|
| 51 |
+
)
|
| 52 |
+
print("Columns in your JSONL:", ds.column_names)
|
| 53 |
+
|
| 54 |
+
QUESTION_FIELD = "Question"
|
| 55 |
+
ANSWER_FIELD = "Final answer"
|
| 56 |
+
|
| 57 |
+
qa = []
|
| 58 |
+
for row in ds:
|
| 59 |
+
q = row.get(QUESTION_FIELD)
|
| 60 |
+
a = row.get(ANSWER_FIELD)
|
| 61 |
+
if q and a:
|
| 62 |
+
qa.append((q, a))
|
| 63 |
+
|
| 64 |
+
print(f"✅ Found {len(qa)} (Question, Final answer) pairs.")
|
| 65 |
+
return qa
|
| 66 |
+
|
| 67 |
+
def main():
|
| 68 |
+
qa_pairs = fetch_gaia_validation_examples()
|
| 69 |
+
if not qa_pairs:
|
| 70 |
+
print("⚠️ No QA pairs—abort.")
|
| 71 |
+
return
|
| 72 |
+
|
| 73 |
+
to_insert = []
|
| 74 |
+
for q, a in qa_pairs:
|
| 75 |
+
text = f"Q: {q} A: {a}"
|
| 76 |
+
emb = model.encode(text).tolist()
|
| 77 |
+
to_insert.append({"page_content": text, "embedding": emb})
|
| 78 |
+
|
| 79 |
+
print(f"🚀 Inserting {len(to_insert)} records into Supabase…")
|
| 80 |
+
res = supabase.table("documents").insert(to_insert).execute()
|
| 81 |
+
if res.data:
|
| 82 |
+
print(f"🎉 Successfully inserted {len(to_insert)} GAIA examples.")
|
| 83 |
+
else:
|
| 84 |
+
print("❌ Insert appeared to fail. Response:")
|
| 85 |
+
print(res)
|
| 86 |
+
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
main()
|