diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..9a8a9a5ffa18cb69b2342a48a2fe3d4f31046ae4 Binary files /dev/null and b/.DS_Store differ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..f6b1f326ca4ab7cf0c8798856f8fe0020ff82d58 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ae248fb887ac3580b5d33ffc3d557f413e9a7c0b --- /dev/null +++ b/README.md @@ -0,0 +1,212 @@ +# Healthcare AI Chatbot + +Trợ lý sức khỏe cá nhân thông minh sử dụng OpenAI API và Gradio. + +## ✨ Key Features + +- 🤖 **Multi-Agent Architecture** - Specialized agents for nutrition, exercise, symptoms, mental health +- 🧠 **Conversation Memory** - Remembers user data, no repeated questions +- 🔄 **Agent Handoffs** - Smooth transitions between specialists +- 💬 **Agent Communication** - Agents share context and collaborate +- 📚 **RAG Integration** - Medical knowledge from WHO, CDC, NIMH +- 🎯 **Context-Aware Routing** - Intelligent query understanding + +See [agents/AGENT_ARCHITECTURE.md](agents/AGENT_ARCHITECTURE.md) for detailed architecture documentation. + +## Setup + +### 1. Create Virtual Environment + +First, create a virtual environment to isolate project dependencies: + +**macOS/Linux:** +```bash +python3 -m venv venv +``` + +**Windows:** +```bash +python -m venv venv +``` + +### 2. Activate Virtual Environment + +**macOS/Linux:** +```bash +source venv/bin/activate +``` + +**Windows:** +```bash +venv\Scripts\activate +``` + +### 3. Install Dependencies + +```bash +pip install -r requirements.txt +``` + +### 4. Configure Environment Variables + +Create a `.env` file in the project root: + +```bash +OPENAI_API_KEY=your_api_key_here +``` + +### 5. Setup RAG System & Fine-tuning (One-time) + +**IMPORTANT:** Before running the app, setup the complete system with one command: + +```bash +# One command to setup everything (15-20 minutes) +bash scripts/setup_rag.sh +``` + +**What this does:** + +**Phase 1: RAG Databases (10-15 minutes)** +- ✅ Downloads and processes medical datasets from HuggingFace +- ✅ Builds ChromaDB vector databases for each domain +- ✅ Total: ~160 MB, 6 specialized databases + +**Datasets by Domain:** +- **Symptoms/Diseases**: ViMedical_Disease (603 diseases, ~50 MB) +- **Mental Health**: MentalChat16K (16K conversations, 33 topics, ~80 MB) +- **Nutrition**: LLM_Dietary_Recommendation (50 patient profiles + diet plans, ~20 MB) +- **Vietnamese Food**: Vietnamese_Nutrition (73 foods with nutrition facts, ~5 MB) +- **Fitness**: GYM-Exercise (1.66K exercises, ~10 MB) +- **Medical Q&A**: Vietnamese_Medical_QA (9.3K Q&A pairs, ~15 MB) + +**Phase 2: Training Data Generation (2-3 minutes)** +- ✅ Generates 200 synthetic conversations using GPT-4o-mini +- ✅ 50 scenarios per agent (nutrition, symptom, exercise, mental_health) +- ✅ Cost: ~$0.50 from your API budget +- ✅ Saved to `fine_tuning/training_data/` (NOT committed to git) + +**Phase 3: Fine-tuning (Optional, 30-60 minutes)** +- ❓ Prompts: "Do you want to fine-tune now? (y/N)" (10 sec timeout) +- ✅ If yes: Uploads data, creates fine-tuning jobs, waits for completion +- ✅ If no: Skip, you can fine-tune later with `python scripts/auto_finetune.py` +- ✅ Cost: ~$2.00 from your API budget +- ✅ Creates `config/fine_tuned_models.py` (NOT committed to git) + +**Total Cost:** ~$2.50 from your API budget (if you choose to fine-tune) + +**Fine-tune Later (Optional):** +```bash +# If you skipped fine-tuning during setup +python scripts/auto_finetune.py +``` + +**Manual Setup (Alternative):** +```bash +# RAG only (no training data generation) +python data_mining/mining_vimedical.py +python data_mining/mining_mentalchat.py +# ... other mining scripts + +# Training data only +python scripts/generate_training_data.py + +# Fine-tuning only +python scripts/auto_finetune.py +``` + +**Team Sharing:** +- ✅ Each team member runs `bash scripts/setup_rag.sh` once +- ✅ Everyone generates their own data with their API key +- ❌ RAG databases and training data are NOT committed to git (too large) +- ✅ Scripts and code are committed for easy sharing + +## Run the Application + +You have multiple options to run the application: + +**Option 1: Using the shell script (recommended):** +```bash +bash run.sh +``` + +**Option 2: Using Gradio CLI:** +```bash +gradio app.py +``` + +**Option 3: Using Python directly:** +```bash +python app.py +``` + +**Notes:** +- The app will launch with a local URL and a public shareable link +- Ensure your virtual environment is activated before running +- Make sure `OPENAI_API_KEY` is set in your `.env` file + +### 5. Deactivate Virtual Environment (when done) + +```bash +deactivate +``` + +## Project Structure + +``` +healthcare_bot/ +├── app.py # File chính (Gradio UI) +├── rag/ +│ ├── ingest.py # Ingest tài liệu vào ChromaDB +│ ├── query_engine.py # LangChain Retrieval QA +│ └── data/ # Nguồn PDF/CSV/MD +├── modules/ +│ ├── nutrition.py # Module dinh dưỡng +│ ├── exercise.py # Module bài tập +│ └── rules.json # Quy tắc cơ bản +├── utils/ +│ └── helpers.py # Hàm hỗ trợ, tính BMI, format output +├── config/ +│ └── settings.py # Config env + API + model +└── requirements.txt +``` + +## Technologies + +- Python 3.9+ +- OpenAI API (GPT-4o-mini) +- Gradio 5.49.0 +- python-dotenv 1.1.1 +- Virtual Environment (venv) + +## Features + +- 💬 **Chat interface với AI** - Giao diện trò chuyện thân thiện +- 🏥 **Tư vấn sức khỏe toàn diện** - Hỏi kỹ thông tin trước khi tư vấn +- 🔍 **Thu thập thông tin chi tiết** - Hỏi về triệu chứng, bệnh nền, thuốc đang dùng +- 📊 **Đánh giá tổng quan** - Phân tích dựa trên nhiều yếu tố sức khỏe +- 🌐 **Public shareable link** - Chia sẻ dễ dàng +- 📱 **Responsive UI** - Giao diện đẹp, hiện đại +- 💾 **Lưu lịch sử hội thoại** - Nhớ ngữ cảnh cuộc trò chuyện + +## Cách chatbot hoạt động + +Khi bạn chia sẻ triệu chứng hoặc thông tin sức khỏe, chatbot sẽ: + +1. **Hỏi thông tin cá nhân**: Tuổi, giới tính, cân nặng, chiều cao (nếu cần) +2. **Hỏi về triệu chứng**: Thời gian, mức độ nghiêm trọng, triệu chứng kèm theo +3. **Hỏi về bệnh nền**: Tiểu đường, huyết áp cao, tim mạch, v.v. +4. **Hỏi về thuốc**: Thuốc đang dùng, liệu pháp điều trị +5. **Hỏi về lối sống**: Chế độ ăn, tập luyện, giấc ngủ, stress +6. **Đưa ra tư vấn**: Sau khi có đủ thông tin, đưa ra lời khuyên toàn diện và chính xác + +**Ví dụ:** +``` +User: "Tôi bị đau đầu" +Bot: "Tôi hiểu bạn đang bị đau đầu. Để tư vấn chính xác hơn, cho tôi hỏi thêm: + - Bạn bao nhiêu tuổi? + - Đau đầu kéo dài bao lâu rồi? + - Mức độ đau (nhẹ/vừa/nặng)? + - Có triệu chứng kèm theo không (buồn nôn, chóng mặt)? + - Bạn có bệnh nền gì không? + - Đang dùng thuốc gì không?" +``` diff --git a/agents/AGENT_ARCHITECTURE.md b/agents/AGENT_ARCHITECTURE.md new file mode 100644 index 0000000000000000000000000000000000000000..c29b4f3c6a34f2672d9862313e072f8409705836 --- /dev/null +++ b/agents/AGENT_ARCHITECTURE.md @@ -0,0 +1,1235 @@ +# Agent-Based Architecture Documentation 🏗️ + +## Overview + +This system uses an **agent-based architecture** with **OpenAI function calling** for intelligent healthcare assistance. + +### Why Agent-Based Architecture? + +**Advantages over Monolithic:** +1. **Token Efficiency** - Each agent loads only necessary prompts (60-70% reduction) +2. **Scalability** - Easy to add new specialized agents +3. **Accuracy** - Domain-specific expertise per agent +4. **Maintainability** - Clear separation of concerns +5. **Context Awareness** - Intelligent routing with conversation history + +### Core Capabilities + +- **Specialized Agents** - Nutrition, Exercise, Symptoms, Mental Health, General Health +- **Conversation Memory** - Persistent user data across conversation +- **Agent Handoffs** - Smooth transitions between specialists +- **Agent Communication** - Cross-agent data sharing and collaboration +- **Multi-Agent Responses** - Coordinate multiple agents for complex queries +- **Context-Aware Routing** - Understand conversation flow and intent + +--- + +## 📊 System Architecture + +``` +User Input + ↓ +Agent Coordinator + ↓ +┌─────────────────────────────────────────────┐ +│ Shared Conversation Memory │ +│ ┌────────────────────────────────────┐ │ +│ │ • User Profile (age, gender, etc.) │ │ +│ │ • Agent-specific Data │ │ +│ │ • Conversation State │ │ +│ │ • Pending Questions │ │ +│ └────────────────────────────────────┘ │ +└─────────────────────────────────────────────┘ + ↓ +Router (Function Calling) + Context Analysis + ↓ +┌─────────────────────────────────────┐ +│ Chọn Agent(s) Phù Hợp │ +├─────────────────────────────────────┤ +│ • Nutrition Agent │ +│ • Exercise Agent │ +│ • Symptom Agent │ +│ • Mental Health Agent │ +│ • General Health Agent (default) │ +└─────────────────────────────────────┘ + ↓ +┌─ Single Agent Response +├─ Agent Handoff (smooth transition) +└─ Multi-Agent Combined Response + ↓ +Response (with full context awareness) +``` + +--- + +## 🤖 Các Agent + +### 1. **Router** (`agents/core/router.py`) + +**Chức năng:** Phân tích user input và route đến agent phù hợp + +**Công nghệ:** OpenAI Function Calling + +**Available Functions:** +```python +- nutrition_agent: Dinh dưỡng, BMI, calo, thực đơn +- exercise_agent: Tập luyện, gym, yoga, cardio +- symptom_agent: Triệu chứng bệnh, đau đầu, sốt +- mental_health_agent: Stress, lo âu, trầm cảm +- general_health_agent: Câu hỏi chung về sức khỏe +``` + +**🆕 Context-Aware Features:** + +1. **Extended Context Window:** + - OLD: 3 exchanges + - NEW: **10 exchanges** (+233%) + - Hiểu conversation flow tốt hơn + +2. **Last Agent Tracking:** + - Track agent nào vừa được dùng + - Giúp xử lý follow-up questions + - Example: "Vậy nên ăn gì?" → biết đang nói về giảm cân + +3. **Enhanced Routing Prompt:** + - Hướng dẫn rõ ràng về câu hỏi mơ hồ + - Ví dụ cụ thể về follow-up questions + - Detect topic switching + +4. **Improved System Prompt:** + - Nhấn mạnh khả năng hiểu ngữ cảnh + - Xử lý ambiguous questions + - Recognize follow-up patterns (vậy, còn, thì sao) + +**Routing Accuracy:** +- Clear questions: **90-95%** +- Follow-up questions: **80-85%** (improved from ~60%) +- Topic switching: **85-90%** +- Multi-topic: **70-75%** + +**Ví dụ:** +```python +from agents import route_to_agent + +# Example 1: Clear question +result = route_to_agent("Tôi muốn giảm cân", chat_history) +# Returns: { +# "agent": "nutrition_agent", +# "parameters": {"user_query": "Tôi muốn giảm cân"}, +# "confidence": 0.9 +# } + +# Example 2: Ambiguous follow-up (NEW - context-aware) +chat_history = [ + ["Tôi muốn giảm cân", "Response from nutrition_agent..."] +] +result = route_to_agent("Vậy nên ăn gì?", chat_history) +# Returns: { +# "agent": "nutrition_agent", # ✅ Understands context! +# "parameters": {"user_query": "Vậy nên ăn gì?"}, +# "confidence": 0.9 +# } + +# Example 3: Topic switch +chat_history = [ + ["Tôi muốn giảm cân", "Response..."], + ["Vậy nên ăn gì?", "Response..."] +] +result = route_to_agent("À mà tôi bị đau đầu", chat_history) +# Returns: { +# "agent": "symptom_agent", # ✅ Detects topic switch! +# "parameters": {"user_query": "À mà tôi bị đau đầu"}, +# "confidence": 0.9 +# } +``` + +**Context Handling Examples:** + +| User Message | Context | Routed To | Why | +|--------------|---------|-----------|-----| +| "Tôi muốn giảm cân" | None | nutrition_agent | Clear question | +| "Vậy nên ăn gì?" | After giảm cân | nutrition_agent | Follow-up with context | +| "Tôi nên tập gì?" | After giảm cân | exercise_agent | Clear topic | +| "Còn về dinh dưỡng?" | After tập gym | nutrition_agent | Explicit topic mention | +| "À mà tôi bị đau đầu" | Any | symptom_agent | Clear topic switch | +| "Nó có ảnh hưởng gì?" | After đau đầu | symptom_agent | Pronoun resolution | + +--- + +### 2. **Nutrition Agent** (`agents/specialized/nutrition_agent.py`) + +**Chuyên môn:** +- Tính BMI, phân tích thể trạng +- Tính calo, macro (protein/carb/fat) +- Gợi ý thực đơn +- Thực phẩm bổ sung + +**System Prompt:** ~500 tokens (thay vì 3000+ tokens của monolithic) + +**Data Flow:** +``` +User: "Tôi muốn giảm cân" + ↓ +Router → nutrition_agent + ↓ +Agent hỏi: tuổi, giới tính, cân nặng, chiều cao + ↓ +User cung cấp thông tin + ↓ +Agent tính BMI → Gọi NutritionAdvisor + ↓ +Response: BMI + Calo + Thực đơn + Lời khuyên +``` + +**Ví dụ Response:** +``` +🥗 Tư Vấn Dinh Dưỡng Cá Nhân Hóa + +📊 Phân tích BMI: +- BMI: 24.5 (normal) +- Lời khuyên: Duy trì cân nặng + +🎯 Mục tiêu hàng ngày: +- 🔥 Calo: 1800 kcal +- 🥩 Protein: 112g +- 🍚 Carb: 202g +- 🥑 Chất béo: 50g + +🍽️ Gợi ý thực đơn: +[Chi tiết món ăn...] +``` + +--- + +### 3. **Exercise Agent** (`agents/specialized/exercise_agent.py`) + +**Chuyên môn:** +- Tạo lịch tập 7 ngày +- Tư vấn bài tập theo mục tiêu +- Hướng dẫn kỹ thuật an toàn +- Progression (tuần 1, 2, 3...) + +**System Prompt:** ~400 tokens + +**Data Flow:** +``` +User: "Tôi muốn tập gym" + ↓ +Router → exercise_agent + ↓ +Agent hỏi: tuổi, giới tính, thể lực, mục tiêu, thời gian + ↓ +User cung cấp thông tin + ↓ +Agent gọi generate_exercise_plan() + ↓ +Response: Lịch tập 7 ngày chi tiết +``` + +--- + +### 4. **Symptom Agent** (`agents/specialized/symptom_agent.py`) + +**Chuyên môn:** +- Đánh giá triệu chứng bằng OPQRST method +- Phát hiện red flags +- Tư vấn xử lý tại nhà +- Khuyên khi nào cần gặp bác sĩ + +**System Prompt:** ~600 tokens + +**OPQRST Method:** +- **O**nset: Khi nào bắt đầu? +- **P**rovocation/Palliation: Gì làm tệ/đỡ hơn? +- **Q**uality: Mô tả cảm giác? +- **R**egion/Radiation: Vị trí? +- **S**everity: Mức độ 1-10? +- **T**iming: Lúc nào xuất hiện? + +**Red Flags Detection:** +```python +- Đau ngực + khó thở → Heart attack warning +- Đau đầu + cứng gáy + sốt → Meningitis warning +- Yếu một bên cơ thể → Stroke warning +``` + +**Data Flow:** +``` +User: "Tôi bị đau đầu" + ↓ +Router → symptom_agent + ↓ +Agent check red flags → Không có + ↓ +Agent hỏi OPQRST (6 rounds) + ↓ +User trả lời từng round + ↓ +Agent phân tích → Đưa ra lời khuyên +``` + +--- + +### 5. **Mental Health Agent** (`agents/specialized/mental_health_agent.py`) + +**Chuyên môn:** +- Hỗ trợ stress, lo âu, trầm cảm +- Kỹ thuật thư giãn, mindfulness +- Cải thiện giấc ngủ +- Quản lý cảm xúc + +**System Prompt:** ~500 tokens + +**Crisis Detection:** +```python +- Ý định tự tử → Hotline khẩn cấp: + • 115 - Cấp cứu y tế (Trung tâm Cấp cứu 115 TP.HCM) + • 1900 1267 - Chuyên gia tâm thần (Bệnh viện Tâm Thần TP.HCM) + • 0909 65 80 35 - Tư vấn tâm lý miễn phí (Davipharm) +- Tự gây thương tích → Same hotlines +- ONLY show hotlines for serious mental health crises +``` + +**Phong cách:** +- Ấm áp, đồng cảm 💙 +- Validate cảm xúc +- Không phán xét +- Khuyến khích tìm kiếm sự hỗ trợ + +--- + +### 6. **General Health Agent** (`agents/specialized/general_health_agent.py`) + +**Chuyên môn:** +- Câu hỏi chung về sức khỏe +- Phòng bệnh +- Lối sống lành mạnh +- Default fallback agent + +**System Prompt:** ~2000 tokens (comprehensive prompt từ helpers.py) + +**Khi nào dùng:** +- Câu hỏi không rõ ràng +- Không match với agent chuyên môn +- Routing thất bại + +--- + +## 🧠 Memory & Coordination Components + +### 7. **Conversation Memory** (`utils/memory.py`) - ✨ NEW! + +**Chức năng:** Shared memory system cho tất cả agents + +**Core Features:** + +1. **User Profile Storage** + ```python + memory.update_profile('age', 25) + memory.update_profile('weight', 70) + memory.get_profile('age') # → 25 + ``` + +2. **Missing Fields Detection** + ```python + missing = memory.get_missing_fields(['age', 'gender', 'weight', 'height']) + # → ['gender', 'height'] + ``` + +3. **Agent-Specific Data** + ```python + memory.add_agent_data('nutrition', 'goal', 'weight_loss') + memory.get_agent_data('nutrition', 'goal') # → 'weight_loss' + ``` + +4. **Conversation State Tracking** + ```python + memory.set_current_agent('nutrition_agent') + memory.get_current_agent() # → 'nutrition_agent' + memory.get_previous_agent() # → 'symptom_agent' + ``` + +5. **Context Summary** + ```python + memory.get_context_summary() + # → "User: 25 tuổi, nam | 70kg, 175cm | Topic: giảm cân" + ``` + +**Benefits:** +- ✅ No repeated questions +- ✅ Full conversation context +- ✅ Agent coordination +- ✅ Persistent user data + +--- + +### 8. **Base Agent Class** (`agents/core/base_agent.py`) - ✨ NEW! + +**Chức năng:** Parent class cho tất cả agents với memory support + +**Core Methods:** + +1. **Memory Access** + ```python + class MyAgent(BaseAgent): + def handle(self, parameters, chat_history): + # Get user profile + profile = self.get_user_profile() + + # Update profile + self.update_user_profile('age', 25) + + # Check missing fields + missing = self.get_missing_profile_fields(['age', 'weight']) + ``` + +2. **Handoff Detection** + ```python + # Check if should hand off + if self.should_handoff(user_query, chat_history): + next_agent = self.suggest_next_agent(user_query) + return self.create_handoff_message(next_agent) + ``` + +3. **Multi-Agent Collaboration** + ```python + # Detect if multiple agents needed + agents_needed = self.needs_collaboration(user_query) + # → ['nutrition_agent', 'exercise_agent'] + ``` + +4. **Context Awareness** + ```python + # Get conversation context + context = self.get_context_summary() + previous_agent = self.get_previous_agent() + current_topic = self.get_current_topic() + ``` + +**Benefits:** +- ✅ Unified interface for all agents +- ✅ Built-in memory access +- ✅ Automatic handoff logic +- ✅ Context awareness + +--- + +### 9. **Agent Coordinator** (`agents/core/coordinator.py`) - ✨ NEW! + +**Chức năng:** Orchestrates all agents with shared memory + +**Core Features:** + +1. **Shared Memory Management** + - All agents share same memory instance + - Automatic memory updates from chat history + - Persistent user data across turns + +2. **Single Agent Routing** + ```python + coordinator = AgentCoordinator() + response = coordinator.handle_query( + "Tôi muốn giảm cân", + chat_history + ) + # → Routes to nutrition_agent with memory + ``` + +3. **Agent Handoff** + ```python + # User: "Tôi muốn giảm cân nhưng bị đau đầu" + # Nutrition agent detects symptom keyword + # → Smooth handoff to symptom_agent + ``` + +4. **Multi-Agent Collaboration** + ```python + # User: "Tôi muốn giảm cân, nên ăn gì và tập gì?" + # Coordinator detects need for both agents + # → Combined response from nutrition + exercise + ``` + +5. **Memory Persistence** + ```python + # Turn 1 + coordinator.handle_query("Tôi 25 tuổi, nam, 70kg", []) + + # Turn 2 - Memory persists! + coordinator.handle_query("Tôi muốn giảm cân", chat_history) + # → Agent knows age=25, gender=male, weight=70 + ``` + +**Response Types:** + +1. **Single Agent Response** + ``` + User: "Tôi muốn giảm cân" + → Nutrition agent handles + ``` + +2. **Handoff Response** + ``` + User: "Tôi muốn giảm cân nhưng bị đau đầu" + → Nutrition agent → Handoff → Symptom agent + ``` + +3. **Multi-Agent Response** + ``` + User: "Tôi muốn giảm cân, nên ăn gì và tập gì?" + + Response: + --- + ## 🥗 Tư Vấn Dinh Dưỡng + [Nutrition advice] + + --- + ## 💪 Tư Vấn Tập Luyện + [Exercise advice] + --- + ``` + +**Benefits:** +- ✅ Seamless agent coordination +- ✅ No repeated questions +- ✅ Multi-agent support +- ✅ Smooth handoffs +- ✅ Full context awareness + +--- + + + +## 🔄 Flow Hoàn Chỉnh + +### Example 1: Nutrition Request (with Memory) ✨ NEW! + +``` +User: "Tôi 25 tuổi, nam, 70kg, 175cm, muốn giảm cân" + ↓ +helpers.chat_logic() → USE_COORDINATOR = True + ↓ +AgentCoordinator.handle_query() + ↓ +Update Shared Memory from chat history + → memory.update_profile('age', 25) + → memory.update_profile('gender', 'male') + → memory.update_profile('weight', 70) + → memory.update_profile('height', 175) + ↓ +route_to_agent() → Function Calling + ↓ +OpenAI returns: nutrition_agent + ↓ +memory.set_current_agent('nutrition_agent') + ↓ +NutritionAgent.handle() [with memory access] + ↓ +Check memory for user data + → user_data = memory.get_full_profile() + → {age: 25, gender: 'male', weight: 70, height: 175} + ↓ +NutritionAdvisor.generate_nutrition_advice(user_data) + ↓ +Calculate BMI: 22.9 (normal) +Calculate targets: 1800 kcal, 112g protein... +Generate meal suggestions + ↓ +Save agent data to memory + → memory.add_agent_data('nutrition', 'goal', 'weight_loss') + → memory.add_agent_data('nutrition', 'bmi', 22.9) + ↓ +Format response + ↓ +Return to user +``` + +**Next Turn:** +``` +User: "Vậy tôi nên tập gì?" + ↓ +AgentCoordinator.handle_query() + ↓ +Memory already has: age=25, gender=male, weight=70, height=175 + ↓ +route_to_agent() → exercise_agent + ↓ +ExerciseAgent.handle() [with memory access] + ↓ +Get user data from memory (no need to ask again!) + → profile = memory.get_full_profile() + → nutrition_goal = memory.get_agent_data('nutrition', 'goal') + ↓ +Generate exercise plan based on profile + nutrition goal + ↓ +Return personalized exercise advice +``` + +**Token Usage:** +- Router: ~200 tokens +- Nutrition Agent prompt: ~500 tokens +- Memory operations: negligible +- Total: ~700 tokens (vs 3000+ monolithic) + +**Key Improvement:** ✅ No repeated questions! + +--- + +### Example 2: Symptom Assessment + +``` +User: "Tôi bị đau đầu" + ↓ +route_to_agent() → symptom_agent + ↓ +SymptomAgent.handle() + ↓ +Check red flags: None + ↓ +Assess OPQRST progress: onset not asked + ↓ +Ask: "Đau từ khi nào? Đột ngột hay từ từ?" + ↓ +User: "Đau từ 2 ngày trước, đột ngột" + ↓ +Assess OPQRST: quality not asked + ↓ +Ask: "Mô tả cảm giác? Mức độ 1-10?" + ↓ +... (continue 6 rounds) + ↓ +All OPQRST collected → Provide assessment +``` + +**Token Usage:** +- Each round: ~300-400 tokens +- Total: ~2000 tokens across conversation (vs 3000+ per message) + +--- + +### Example 3: Agent Handoff ✨ NEW! + +``` +User: "Tôi muốn giảm cân nhưng bị đau đầu" + ↓ +AgentCoordinator.handle_query() + ↓ +route_to_agent() → nutrition_agent (primary intent) + ↓ +NutritionAgent.handle() + ↓ +Detect symptom keyword: "đau đầu" + ↓ +should_handoff() → True + ↓ +suggest_next_agent() → 'symptom_agent' + ↓ +create_handoff_message() + ↓ +Response: "Mình thấy bạn có triệu chứng đau đầu. + Để tư vấn chính xác hơn, mình sẽ chuyển bạn + sang chuyên gia đánh giá triệu chứng nhé! 😊" + ↓ +memory.set_current_agent('symptom_agent') + ↓ +Next turn: SymptomAgent handles with full context +``` + +**Benefits:** +- ✅ Smooth transition between agents +- ✅ Context preserved +- ✅ User-friendly handoff message + +--- + +### Example 4: Multi-Agent Collaboration ✨ NEW! + +``` +User: "Tôi muốn giảm cân, nên ăn gì và tập gì?" + ↓ +AgentCoordinator.handle_query() + ↓ +_detect_required_agents() + → ['nutrition_agent', 'exercise_agent'] + ↓ +_needs_multi_agent() → True + ↓ +_handle_multi_agent_query() + ↓ +Get response from nutrition_agent + → "Để giảm cân, bạn nên ăn..." + ↓ +Get response from exercise_agent + → "Bạn nên tập cardio..." + ↓ +_combine_responses() + ↓ +Response: +--- +## 🥗 Tư Vấn Dinh Dưỡng + +Để giảm cân hiệu quả, bạn nên: +- Giảm 300-500 kcal/ngày +- Tăng protein, giảm carb tinh chế +- Ăn nhiều rau xanh, trái cây +[...] + +--- +## 💪 Tư Vấn Tập Luyện + +Bạn nên tập: +- Cardio 30-45 phút/ngày (chạy bộ, đạp xe) +- Strength training 2-3 lần/tuần +- HIIT 2 lần/tuần +[...] + +--- +💬 Bạn có câu hỏi gì thêm không? +``` + +**Benefits:** +- ✅ Comprehensive response +- ✅ Multiple expert perspectives +- ✅ Well-organized output +- ✅ Single response instead of multiple turns + +--- + +## 💾 Data Structure + +### Unified User Data Format + +```python +{ + # Common fields + "age": int, + "gender": str, # "male" or "female" + "weight": float, # kg + "height": float, # cm + + # Nutrition specific + "goal": str, # "weight_loss", "weight_gain", "muscle_building", "maintenance" + "activity_level": str, # "low", "moderate", "high" + "dietary_restrictions": list, + "health_conditions": list, + + # Exercise specific + "fitness_level": str, # "beginner", "intermediate", "advanced" + "available_time": int, # minutes per day + + # Symptom specific + "symptom_type": str, + "duration": str, + "severity": int, # 1-10 + "location": str, + + # Mental health specific + "stress_level": str, + "triggers": list +} +``` + +--- + +## 📈 Performance Comparison + +### Monolithic (helpers.py - OLD) + +``` +❌ Token per request: 3000-4000 tokens +❌ Response time: 3-5 seconds +❌ Cost: $0.03-0.04 per request +❌ Maintainability: Low (1 file, 600+ lines) +❌ Scalability: Hard to add new features +``` + +### Agent-Based (NEW) + +``` +✅ Token per request: 700-1500 tokens (50-70% reduction) +✅ Response time: 1-3 seconds +✅ Cost: $0.007-0.015 per request (70% cheaper) +✅ Maintainability: High (modular, clear separation) +✅ Scalability: Easy to add new agents +``` + +--- + +## 🚀 Cách Sử Dụng + +### 0. Import Structure (NEW!) + +**Option 1: Import from main package (Recommended)** +```python +from agents import ( + route_to_agent, # Router function + AgentCoordinator, # Coordinator class + BaseAgent, # Base agent class + NutritionAgent, # Specialized agents + ExerciseAgent, + get_agent # Agent factory +) +``` + +**Option 2: Import from subpackages (Explicit)** +```python +from agents.core import route_to_agent, AgentCoordinator, BaseAgent +from agents.specialized import NutritionAgent, ExerciseAgent +``` + +**Option 3: Import specific modules** +```python +from agents.core.router import route_to_agent +from agents.core.coordinator import AgentCoordinator +from agents.specialized.nutrition_agent import NutritionAgent +``` + +### 1. Basic Usage + +```python +from utils.helpers import chat_logic + +message = "Tôi muốn giảm cân" +chat_history = [] + +_, updated_history = chat_logic(message, chat_history) +``` + +### 2. Add New Agent + +```python +# Step 1: Create new agent file +# agents/new_agent.py + +class NewAgent: + def __init__(self): + self.system_prompt = "..." + + def handle(self, parameters, chat_history): + # Your logic here + return response + +# Step 2: Register in router.py +AVAILABLE_FUNCTIONS.append({ + "name": "new_agent", + "description": "...", + "parameters": {...} +}) + +# Step 3: Register in __init__.py +AGENTS["new_agent"] = NewAgent +``` + +### 3. Test Specific Agent + +```python +from agents import get_agent + +agent = get_agent("nutrition_agent") +response = agent.handle({ + "user_query": "Tôi muốn giảm cân", + "user_data": { + "age": 25, + "gender": "male", + "weight": 70, + "height": 175 + } +}, chat_history=[]) + +print(response) +``` + +--- + +## 🧪 Testing + +### Test Router + +```python +from agents import route_to_agent + +# Test nutrition routing +result = route_to_agent("Tôi muốn giảm cân") +assert result['agent'] == 'nutrition_agent' + +# Test exercise routing +result = route_to_agent("Tôi muốn tập gym") +assert result['agent'] == 'exercise_agent' + +# Test symptom routing +result = route_to_agent("Tôi bị đau đầu") +assert result['agent'] == 'symptom_agent' +``` + +### Test Individual Agent + +```python +from agents import NutritionAgent + +agent = NutritionAgent() +response = agent.handle({ + "user_query": "Tôi muốn giảm cân", + "user_data": { + "age": 25, + "gender": "male", + "weight": 70, + "height": 175, + "goal": "weight_loss" + } +}) + +assert "BMI" in response +assert "Calo" in response +``` + +--- + +## 📁 File Structure + +``` +heocare-chatbot/ +├── agents/ # NEW: Agent system +│ ├── __init__.py # Agent registry +│ ├── router.py # Function calling router +│ ├── nutrition_agent.py # Nutrition specialist +│ ├── exercise_agent.py # Exercise specialist +│ ├── symptom_agent.py # Symptom assessment +│ ├── mental_health_agent.py # Mental health support +│ └── general_health_agent.py # General health (fallback) +│ +├── utils/ +│ ├── helpers.py # NEW: Clean chat logic +│ └── helpers.py # OLD: Monolithic (deprecated) +│ +├── modules/ +│ ├── nutrition.py # Nutrition calculations +│ ├── exercise/ # Exercise planning +│ └── rules.json # Business rules +│ +├── app.py # Gradio UI (updated) +└── config/ + └── settings.py # OpenAI client +``` + +--- + +## 🔧 Configuration + +### Environment Variables + +```bash +# .env +OPENAI_API_KEY=your_key_here +MODEL=gpt-4o-mini # or gpt-4 +``` + +### Model Selection + +```python +# config/settings.py +MODEL = "gpt-4o-mini" # Fast, cheap, good for routing +# MODEL = "gpt-4" # More accurate, expensive +``` + +--- + +## 💡 Best Practices + +### 1. Token Optimization + +```python +# ✅ GOOD: Only load necessary prompt +agent = get_agent("nutrition_agent") # ~500 tokens + +# ❌ BAD: Load entire monolithic prompt +# ~3000 tokens every time +``` + +### 2. Error Handling + +```python +try: + result = route_to_agent(message, chat_history) + agent = get_agent(result['agent']) + response = agent.handle(result['parameters'], chat_history) +except Exception as e: + # Fallback to general health agent + agent = GeneralHealthAgent() + response = agent.handle({"user_query": message}, chat_history) +``` + +### 3. Context Management (NEW) + +```python +# ✅ GOOD: Pass full chat history for context +result = route_to_agent(message, chat_history) # Uses last 10 exchanges + +# ⚠️ CAUTION: Don't truncate history too early +# Router needs context to handle ambiguous questions + +# 💡 TIP: For very long conversations (50+ exchanges) +# Consider keeping only relevant exchanges or summarizing +``` + +### 4. Caching + +```python +# Cache agent instances (optional optimization) +_agent_cache = {} + +def get_cached_agent(agent_name): + if agent_name not in _agent_cache: + _agent_cache[agent_name] = get_agent(agent_name) + return _agent_cache[agent_name] +``` + +--- + +## 📊 Monitoring + +### Log Routing Decisions + +```python +# In helpers.py +routing_result = route_to_agent(message, chat_history) +print(f"Routed to: {routing_result['agent']}, Confidence: {routing_result['confidence']}") +``` + +### Track Token Usage + +```python +# In each agent +response = client.chat.completions.create(...) +print(f"Tokens used: {response.usage.total_tokens}") +``` + +--- + +## 🤝 Contributing + +### Để thêm agent mới (with Memory Support): + +**Option 1: Extend BaseAgent (Recommended)** ✨ +```python +# agents/specialized/your_agent.py +from agents.core.base_agent import BaseAgent + +class YourAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.agent_name = 'your_agent' + self.system_prompt = "Your specialized prompt..." + + def handle(self, parameters, chat_history=None): + user_query = parameters.get('user_query', '') + + # Access shared memory + user_profile = self.get_user_profile() + + # Check missing fields + missing = self.get_missing_profile_fields(['age', 'weight']) + if missing: + return f"Cho mình biết {', '.join(missing)} nhé!" + + # Your logic here + response = self._generate_response(user_query, user_profile) + + # Save agent data + self.save_agent_data('key', 'value') + + return response +``` + +**Option 2: Standalone Agent (Legacy)** +```python +# agents/specialized/your_agent.py +class YourAgent: + def handle(self, parameters, chat_history=None): + # Your logic without memory + return "Response" +``` + +**Steps:** +1. Create `agents/specialized/your_agent.py` +2. Extend `BaseAgent` for memory support (recommended) +3. Register in `agents/core/router.py` AVAILABLE_FUNCTIONS +4. Register in `agents/specialized/__init__.py` AGENTS +5. Add to `agents/core/coordinator.py` if using coordinator +6. Test thoroughly + +**Example Registration:** +```python +# agents/core/router.py +AVAILABLE_FUNCTIONS = [ + { + "name": "your_agent", + "description": "Your agent description", + "parameters": {...} + } +] + +# agents/specialized/__init__.py +from .your_agent import YourAgent + +AGENTS = { + # ... existing agents + 'your_agent': YourAgent() +} + +# agents/core/coordinator.py (if using) +from agents.specialized.your_agent import YourAgent + +self.agents = { + # ... existing agents + 'your_agent': YourAgent() +} +``` + +--- + +## 📚 RAG System (Retrieval-Augmented Generation) + +### Smart RAG Decision (Performance Optimization) + +**Problem:** Always calling RAG adds 4-6s latency, even for simple queries. + +**Solution:** Conditional RAG based on query complexity. + +```python +# BaseAgent.should_use_rag() - Shared by all agents +def should_use_rag(self, user_query, chat_history): + # Skip RAG for: + # - Greetings: "xin chào", "hello" + # - Acknowledgments: "cảm ơn", "ok" + # - Meta questions: "bạn là ai" + # - Simple responses: "có", "không" + + # Use RAG for: + # - Complex medical terms: "nguyên nhân", "điều trị" + # - Specific diseases: "bệnh", "viêm", "ung thư" + # - Detailed questions: "chi tiết", "cụ thể" + + return True/False # Smart decision +``` + +**Performance Impact:** +- Simple queries: **2-3s** (was 8-10s) → **3x faster** ⚡ +- Complex queries: **6-8s** (was 8-10s) → **1.3x faster** ⚡ +- Model & DB cached at startup (save 2-3s per query) + +### Architecture: Separate Collections (Option A) + +Each agent has its own dedicated vector database for fast, focused retrieval: + +``` +rag/vector_store/ +├── medical_diseases/ # SymptomAgent +├── mental_health/ # MentalHealthAgent +├── nutrition/ # NutritionAgent +├── fitness/ # FitnessAgent +└── general/ # SymptomAgent (COVID, general health) +``` + +### Datasets by Agent + +| Agent | Dataset | Source | Size | Records | +|-------|---------|--------|------|---------| +| **SymptomAgent** | ViMedical_Disease | HuggingFace | 50 MB | 603 diseases, 12K examples | +| **SymptomAgent** | COVID_QA_Castorini | HuggingFace | 5 MB | 124 COVID-19 Q&A | +| **MentalHealthAgent** | MentalChat16K | HuggingFace | 80 MB | 16K conversations, 33 topics | +| **NutritionAgent** | LLM_Dietary_Recommendation | HuggingFace | 20 MB | 50 patient profiles + diet plans | +| **FitnessAgent** | GYM-Exercise | HuggingFace | 10 MB | 1,660 gym exercises | + +**Total:** ~165 MB across 5 vector stores + +### How Agents Use RAG + +```python +class SymptomAgent: + def __init__(self): + # Load domain-specific vector stores + self.symptoms_db = ChromaDB("rag/vector_store/medical_diseases") + self.general_db = ChromaDB("rag/vector_store/general") + + def process(self, user_query): + # 1. Search symptoms database + results = self.symptoms_db.query(user_query, n_results=5) + + # 2. If not enough, search general database + if len(results) < 3: + general_results = self.general_db.query(user_query, n_results=3) + results.extend(general_results) + + # 3. Use results in response generation + context = self.format_context(results) + response = self.generate_response(user_query, context) + return response +``` + +### Benefits + +- **Fast Retrieval**: Each agent searches only its domain (~10-50ms) +- **High Relevance**: Domain-specific results, no noise from other topics +- **Scalable**: Easy to add new datasets per agent +- **Maintainable**: Update one domain without affecting others + +### Setup + +```bash +# One command sets up all RAG databases +bash scripts/setup_rag.sh + +# Automatically: +# 1. Downloads 5 datasets from HuggingFace +# 2. Processes and builds ChromaDB for each +# 3. Moves to rag/vector_store/ +# 4. Total time: 10-15 minutes +``` + +See `data_mining/README.md` for detailed dataset information. + +--- + +## ✅ Implemented Features + +- **Fine-tuning System** - Automatic data collection and model training (`fine_tuning/`) + - Conversation logging for all agents + - OpenAI fine-tuning API integration + - Quality filtering and export tools + - Training scripts and management + +- **Session Persistence** - Save conversation memory across sessions (`utils/session_store.py`) + - Automatic session save/load + - User-specific memory storage + - Multi-user support + - Session cleanup utilities + +- **Conversation Summarization** - Automatic summarization of long conversations (`utils/conversation_summarizer.py`) + - LLM-powered summarization + - Automatic trigger when conversation exceeds threshold + - Keeps recent turns + summary + - Token usage optimization + - Context preservation + +- **Feedback Loop** - Learn from user ratings and corrections (`feedback/`) + - Collect ratings (1-5 stars, thumbs up/down) + - User corrections and reports + - Performance analytics per agent + - Actionable insights generation + - Export for fine-tuning + - Agent comparison and ranking + +- **Multi-language Support** - Vietnamese and English support (`i18n/`) + - Automatic language detection + - Bilingual translations (UI messages, prompts) + - Language-specific agent system prompts + - Seamless language switching + - User language preferences + - Language usage statistics + +## 🔮 Future Enhancements + +- **Centralized Database** - Migrate health data storage from JSON to PostgreSQL for multi-user scalability +- **Admin Dashboard** - Monitor agent performance, routing accuracy, user metrics +- **Analytics & Monitoring** - Track response quality, token usage, user satisfaction +- **A/B Testing** - Test different prompts and routing strategies +- **Voice Interface** - Speech-to-text and text-to-speech capabilities diff --git a/agents/README.md b/agents/README.md new file mode 100644 index 0000000000000000000000000000000000000000..398bf06347f8f4c9660747a28b749b2dde50fe95 --- /dev/null +++ b/agents/README.md @@ -0,0 +1,312 @@ +# Agents Package Structure 🏗️ + +## 📁 Directory Structure + +``` +agents/ +├── README.md # This file +├── AGENT_ARCHITECTURE.md # Full architecture documentation +│ +├── core/ # Core infrastructure +│ ├── __init__.py +│ ├── router.py # OpenAI function calling router +│ ├── coordinator.py # Multi-agent coordinator +│ └── base_agent.py # Base class for all agents +│ +└── specialized/ # Domain-specific agents + ├── __init__.py + ├── nutrition_agent.py # Nutrition & diet advice + ├── exercise_agent.py # Exercise & fitness plans + ├── symptom_agent.py # Symptom assessment + ├── mental_health_agent.py # Mental health support + └── general_health_agent.py # General health queries +``` + +--- + +## 🎯 Purpose of Each Component + +### **Core Components** (`core/`) + +#### 1. `router.py` +- **Purpose:** Routes user queries to appropriate agents +- **Technology:** OpenAI Function Calling +- **Key Features:** + - Context-aware routing (10 exchanges history) + - Last agent tracking + - Improved accuracy: 80-85% for ambiguous questions + +#### 2. `coordinator.py` +- **Purpose:** Orchestrates multiple agents with shared memory +- **Key Features:** + - Shared conversation memory + - Agent handoffs + - Multi-agent collaboration + - Memory persistence across turns + +#### 3. `base_agent.py` +- **Purpose:** Base class providing common functionality +- **Key Features:** + - Memory access helpers + - Agent handoff detection** + - Agent-to-agent communication + - Context awareness methods + - Handoff detection logic + - Context awareness methods + - User data extraction + +--- + +### **Specialized Agents** (`specialized/`) + +#### 1. `nutrition_agent.py` +- **Domain:** Nutrition, diet, BMI, calories +- **Capabilities:** + - Calculate BMI and calorie needs + - Generate meal plans + - Provide dietary advice + - Handle weight loss/gain goals + +#### 2. `exercise_agent.py` +- **Domain:** Exercise, fitness, workout plans +- **Capabilities:** + - Create personalized workout plans + - Suggest exercises based on fitness level + - Provide form guidance + - Track progress + +#### 3. `symptom_agent.py` +- **Domain:** Symptom assessment, health concerns +- **Capabilities:** + - OPQRST symptom assessment + - Red flag detection + - Triage recommendations + - Medical advice (when to see doctor) + +#### 4. `mental_health_agent.py` +- **Domain:** Mental health, stress, anxiety +- **Capabilities:** + - Stress assessment + - Coping strategies + - Mindfulness techniques + - Crisis detection + +#### 5. `general_health_agent.py` +- **Domain:** General health queries +- **Capabilities:** + - Answer general health questions + - Provide health tips + - Fallback for unclear queries + +--- + +## 🔄 How It Works + +### 1. **User Query Flow** + +``` +User Input + ↓ +helpers.py (chat_logic) + ↓ +AgentCoordinator + ↓ +┌─────────────────────────┐ +│ Shared Memory │ +│ - User Profile │ +│ - Conversation State │ +└─────────────────────────┘ + ↓ +Router (Function Calling) + ↓ +Specialized Agent(s) + ↓ +Response (with memory) +``` + +### 2. **Import Structure** + +```python +# From outside agents package +from agents import ( + route_to_agent, # Router function + AgentCoordinator, # Coordinator class + BaseAgent, # Base agent class + NutritionAgent, # Specialized agents + get_agent # Agent factory +) + +# Within agents package +from agents.core import router, coordinator, base_agent +from agents.specialized import nutrition_agent, exercise_agent +``` + +--- + +## 🚀 Usage Examples + +### Example 1: Using Coordinator (Recommended) + +```python +from agents import AgentCoordinator + +coordinator = AgentCoordinator() + +# Handle query with memory +response = coordinator.handle_query( + "Tôi 25 tuổi, muốn giảm cân", + chat_history +) + +# Memory persists! +response2 = coordinator.handle_query( + "Tôi nên ăn gì?", # Knows age=25, goal=weight_loss + chat_history +) +``` + +### Example 2: Using Router Directly + +```python +from agents import route_to_agent, get_agent + +# Route to agent +routing = route_to_agent("Tôi muốn giảm cân", chat_history) +# → {'agent': 'nutrition_agent', 'parameters': {...}} + +# Get agent instance +agent = get_agent(routing['agent']) + +# Handle request +response = agent.handle(routing['parameters'], chat_history) +``` + +### Example 3: Creating Custom Agent + +```python +from agents.core import BaseAgent + +class MyCustomAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.agent_name = 'my_custom_agent' + self.system_prompt = "Your custom prompt..." + + def handle(self, parameters, chat_history=None): + # Access shared memory + user_profile = self.get_user_profile() + + # Your logic here + response = self._generate_response(parameters) + + # Save to memory + self.save_agent_data('key', 'value') + + return response +``` + +--- + +## 📊 Key Benefits of This Structure + +### ✅ **Separation of Concerns** +- Core infrastructure separate from domain logic +- Easy to maintain and test + +### ✅ **Scalability** +- Add new agents without touching core +- Easy to extend functionality + +### ✅ **Reusability** +- BaseAgent provides common functionality +- Coordinator handles all agents uniformly + +### ✅ **Memory Management** +- Shared memory across all agents +- No repeated questions +- Full context awareness + +### ✅ **Clean Imports** +- Clear import paths +- No circular dependencies +- Well-organized namespaces + +--- + +## 🔧 Adding a New Agent + +### Step 1: Create Agent File + +```python +# agents/specialized/my_agent.py +from agents.core import BaseAgent + +class MyAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.agent_name = 'my_agent' + self.system_prompt = "..." + + def handle(self, parameters, chat_history=None): + # Your implementation + return "Response" +``` + +### Step 2: Register in `specialized/__init__.py` + +```python +from .my_agent import MyAgent + +AGENTS = { + # ... existing agents + "my_agent": MyAgent, +} +``` + +### Step 3: Register in `core/router.py` + +```python +AVAILABLE_FUNCTIONS = [ + # ... existing functions + { + "name": "my_agent", + "description": "Your agent description", + "parameters": {...} + } +] +``` + +### Step 4: Add to Coordinator + +```python +# agents/core/coordinator.py +from agents.specialized.my_agent import MyAgent + +self.agents = { + # ... existing agents + 'my_agent': MyAgent() +} +``` + +--- + +## 📚 Documentation + +- **Full Architecture:** See `AGENT_ARCHITECTURE.md` +- **Implementation Guide:** See `PART1_IMPLEMENTATION.md` (if exists) +- **API Reference:** See individual agent files + +--- + +## 🎯 Best Practices + +1. **Always extend BaseAgent** for new agents (unless you have a good reason not to) +2. **Use coordinator** for production (enables memory & multi-agent) +3. **Keep agents focused** - One domain per agent +4. **Document your prompts** - Clear system prompts are crucial +5. **Test thoroughly** - Test routing, memory, and handoffs + +--- + +**Last Updated:** Oct 11, 2025 +**Version:** 2.0 (with Memory & Coordination) diff --git a/agents/__init__.py b/agents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0aa1ad71f23d4f2b5b5d113a06e9883b196e708 --- /dev/null +++ b/agents/__init__.py @@ -0,0 +1,40 @@ +""" +Agents Package - Healthcare AI Agent System + +Structure: +- core/: Router, Coordinator, Base Agent +- specialized/: Domain-specific agents (Nutrition, Exercise, Symptom, etc.) +""" + +# Core components +from .core import route_to_agent, get_agent_description, AgentCoordinator, BaseAgent + +# Specialized agents +from .specialized import ( + NutritionAgent, + ExerciseAgent, + SymptomAgent, + MentalHealthAgent, + GeneralHealthAgent, + AGENTS, + get_agent +) + +__all__ = [ + # Core + 'route_to_agent', + 'get_agent_description', + 'AgentCoordinator', + 'BaseAgent', + + # Specialized agents + 'NutritionAgent', + 'ExerciseAgent', + 'SymptomAgent', + 'MentalHealthAgent', + 'GeneralHealthAgent', + + # Utilities + 'AGENTS', + 'get_agent' +] diff --git a/agents/core/__init__.py b/agents/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9339d04472af9f4125b867af6f6910c15fafa48f --- /dev/null +++ b/agents/core/__init__.py @@ -0,0 +1,14 @@ +""" +Core agents package - Router, Coordinator, and Base Agent +""" + +from .router import route_to_agent, get_agent_description +from .coordinator import AgentCoordinator +from .base_agent import BaseAgent + +__all__ = [ + 'route_to_agent', + 'get_agent_description', + 'AgentCoordinator', + 'BaseAgent' +] diff --git a/agents/core/base_agent.py b/agents/core/base_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..b4dd5c33c9666c862fd755b4fa5257b1ddc53dcc --- /dev/null +++ b/agents/core/base_agent.py @@ -0,0 +1,602 @@ +""" +Base Agent - Parent class for all specialized agents +Provides shared functionality: memory access, handoff logic, coordination +""" + +from typing import Dict, Any, Optional, List +from utils.memory import ConversationMemory + + +class BaseAgent: + """ + Base class for all agents + Provides common functionality and interface + """ + + def __init__(self, memory: Optional[ConversationMemory] = None): + """ + Initialize base agent + + Args: + memory: Shared conversation memory (optional) + """ + self.memory = memory or ConversationMemory() + self.agent_name = self.__class__.__name__.replace('Agent', '').lower() + self.system_prompt = "" + + # Handoff configuration + self.can_handoff = True + self.handoff_triggers = [] + + # ===== Core Interface ===== + + def handle(self, parameters: Dict[str, Any], chat_history: Optional[List] = None) -> str: + """ + Handle user request (must be implemented by subclasses) + + Args: + parameters: Request parameters from router + chat_history: Conversation history + + Returns: + str: Response message + """ + raise NotImplementedError("Subclasses must implement handle()") + + # ===== Memory Access Helpers ===== + + def get_user_profile(self) -> Dict[str, Any]: + """Get complete user profile from memory""" + return self.memory.get_full_profile() + + # ===== Smart RAG Helper ===== + + def should_use_rag(self, user_query: str, chat_history: Optional[List] = None) -> bool: + """ + Smart RAG Decision - Skip RAG for simple queries to improve performance + + Performance Impact: + - Simple queries: 2-3s (was 8-10s) - 3x faster + - Complex queries: 6-8s (was 8-10s) - 1.3x faster + + Args: + user_query: User's message + chat_history: Conversation history + + Returns: + bool: True if RAG needed, False for simple conversational queries + """ + query_lower = user_query.lower().strip() + + # 1. Greetings & acknowledgments (no RAG needed) + greetings = [ + 'xin chào', 'hello', 'hi', 'chào', 'hey', + 'cảm ơn', 'thanks', 'thank you', 'tks', + 'ok', 'được', 'vâng', 'ừ', 'uhm', 'uh huh', + 'bye', 'tạm biệt', 'hẹn gặp lại' + ] + if any(g in query_lower for g in greetings): + return False + + # 2. Very short responses (usually conversational) + if len(query_lower) < 10: + short_responses = ['có', 'không', 'rồi', 'ạ', 'dạ', 'yes', 'no', 'nope', 'yep'] + if any(r == query_lower or query_lower.startswith(r + ' ') for r in short_responses): + return False + + # 3. Meta questions about the bot (no RAG needed) + meta_questions = [ + 'bạn là ai', 'bạn tên gì', 'bạn có thể', 'bạn làm gì', + 'who are you', 'what can you', 'what do you' + ] + if any(m in query_lower for m in meta_questions): + return False + + # 4. Complex medical/health questions (NEED RAG) + complex_patterns = [ + # Medical terms + 'nguyên nhân', 'tại sao', 'why', 'how', 'làm sao', + 'cách nào', 'phương pháp', 'điều trị', 'chữa', + 'thuốc', 'medicine', 'phòng ngừa', 'prevention', + 'biến chứng', 'complication', 'nghiên cứu', 'research', + # Specific diseases + 'bệnh', 'disease', 'viêm', 'ung thư', 'cancer', + 'tiểu đường', 'diabetes', 'huyết áp', 'blood pressure', + # Detailed questions + 'chi tiết', 'cụ thể', 'specific', 'detail', + 'khoa học', 'scientific', 'evidence', 'hướng dẫn', + 'guideline', 'recommendation', 'chuyên gia', 'expert' + ] + if any(p in query_lower for p in complex_patterns): + return True + + # 5. Default: Simple first-turn questions don't need RAG + # Agent can ask clarifying questions first + if not chat_history or len(chat_history) == 0: + # Simple initial statements + simple_starts = [ + 'tôi muốn', 'tôi cần', 'giúp tôi', 'tôi bị', + 'i want', 'i need', 'help me', 'i have', 'i feel' + ] + if any(s in query_lower for s in simple_starts): + # Let agent gather info first, use RAG later + return False + + # 6. Default: Use RAG for safety (medical context) + return True + + def update_user_profile(self, key: str, value: Any) -> None: + """Update user profile in shared memory""" + self.memory.update_profile(key, value) + + def get_missing_profile_fields(self, required_fields: List[str]) -> List[str]: + """Check what profile fields are missing""" + return self.memory.get_missing_fields(required_fields) + + def save_agent_data(self, key: str, value: Any) -> None: + """Save agent-specific data to memory""" + self.memory.add_agent_data(self.agent_name, key, value) + + def get_agent_data(self, key: str = None) -> Any: + """Get agent-specific data from memory""" + return self.memory.get_agent_data(self.agent_name, key) + + def get_other_agent_data(self, agent_name: str, key: str = None) -> Any: + """Get data from another agent""" + return self.memory.get_agent_data(agent_name, key) + + # ===== Context Awareness ===== + + def get_context_summary(self) -> str: + """Get summary of current conversation context""" + return self.memory.get_context_summary() + + def get_previous_agent(self) -> Optional[str]: + """Get name of previous agent""" + return self.memory.get_previous_agent() + + def get_current_topic(self) -> Optional[str]: + """Get current conversation topic""" + return self.memory.get_current_topic() + + def set_current_topic(self, topic: str) -> None: + """Set current conversation topic""" + self.memory.set_current_topic(topic) + + def generate_natural_opening(self, user_query: str, chat_history: Optional[List] = None) -> str: + """ + Generate natural conversation opening based on context + Avoids robotic prefixes like "Thông tin đã tư vấn:" + + Args: + user_query: Current user query + chat_history: Conversation history + + Returns: + str: Natural opening phrase (empty if not needed) + """ + # Check if this is a topic transition + previous_agent = self.get_previous_agent() + is_new_topic = previous_agent and previous_agent != self.agent_name + + # If continuing same topic, no special opening needed + if not is_new_topic: + return "" + + # Generate natural transition based on agent type + query_lower = user_query.lower() + + # Enthusiastic transitions for new requests + if any(word in query_lower for word in ['muốn', 'cần', 'giúp', 'tư vấn']): + openings = [ + "Ah, bây giờ bạn đang cần", + "Được rồi, để mình", + "Tuyệt! Mình sẽ", + "Ok, cùng", + ] + import random + return random.choice(openings) + " " + + # Default: no prefix, just natural response + return "" + + # ===== Handoff Logic ===== + + def should_handoff(self, user_query: str, chat_history: Optional[List] = None) -> bool: + """ + Determine if this agent should hand off to another agent + + Args: + user_query: User's current query + chat_history: Conversation history + + Returns: + bool: True if handoff is needed + """ + if not self.can_handoff: + return False + + # Check for handoff trigger keywords + query_lower = user_query.lower() + for trigger in self.handoff_triggers: + if trigger in query_lower: + return True + + return False + + def suggest_next_agent(self, user_query: str) -> Optional[str]: + """ + Suggest which agent to hand off to + + Args: + user_query: User's current query + + Returns: + str: Name of suggested agent, or None + """ + query_lower = user_query.lower() + + # Symptom keywords + symptom_keywords = ['đau', 'sốt', 'ho', 'buồn nôn', 'chóng mặt', 'mệt'] + if any(kw in query_lower for kw in symptom_keywords): + return 'symptom_agent' + + # Nutrition keywords + nutrition_keywords = ['ăn', 'thực đơn', 'calo', 'giảm cân', 'tăng cân'] + if any(kw in query_lower for kw in nutrition_keywords): + return 'nutrition_agent' + + # Exercise keywords + exercise_keywords = ['tập', 'gym', 'cardio', 'yoga', 'chạy bộ'] + if any(kw in query_lower for kw in exercise_keywords): + return 'exercise_agent' + + # Mental health keywords + mental_keywords = ['stress', 'lo âu', 'trầm cảm', 'mất ngủ', 'burnout'] + if any(kw in query_lower for kw in mental_keywords): + return 'mental_health_agent' + + return None + + def create_handoff_message(self, next_agent: str, context: str = "", user_query: str = "") -> str: + """ + Create a SEAMLESS topic transition (not explicit handoff) + + Args: + next_agent: Name of agent to hand off to + context: Additional context for handoff + user_query: User's query to understand intent + + Returns: + str: Natural transition message (NOT "chuyển sang chuyên gia") + """ + # Map agents to topic areas + topic_map = { + 'symptom_agent': { + 'topic': 'triệu chứng', + 'action': 'đánh giá', + 'info_needed': ['triệu chứng cụ thể', 'thời gian xuất hiện'] + }, + 'nutrition_agent': { + 'topic': 'dinh dưỡng', + 'action': 'tư vấn chế độ ăn', + 'info_needed': ['mục tiêu', 'cân nặng', 'chiều cao', 'tuổi'] + }, + 'exercise_agent': { + 'topic': 'tập luyện', + 'action': 'lên lịch tập', + 'info_needed': ['mục tiêu', 'thời gian có thể tập', 'thiết bị'] + }, + 'mental_health_agent': { + 'topic': 'sức khỏe tinh thần', + 'action': 'hỗ trợ', + 'info_needed': ['cảm giác hiện tại', 'thời gian kéo dài'] + } + } + + topic_info = topic_map.get(next_agent, { + 'topic': 'vấn đề này', + 'action': 'tư vấn', + 'info_needed': [] + }) + + # SEAMLESS transition - acknowledge topic change naturally + message = f"{context}\n\n" if context else "" + + # Natural acknowledgment based on query + if 'tập' in user_query.lower() or 'gym' in user_query.lower(): + message += f"Ah, bây giờ bạn đang cần về {topic_info['topic']}! " + elif 'ăn' in user_query.lower() or 'thực đơn' in user_query.lower(): + message += f"Okii, giờ chuyển sang {topic_info['topic']} nhé! " + else: + message += f"Được, mình giúp bạn về {topic_info['topic']}! " + + # Ask for info if needed (natural, not formal) + if topic_info['info_needed']: + info_list = ', '.join(topic_info['info_needed'][:2]) # Max 2 items + message += f"Để {topic_info['action']} phù hợp, cho mình biết thêm về {info_list} nhé!" + + return message + + # ===== Multi-Agent Coordination ===== + + def needs_collaboration(self, user_query: str) -> List[str]: + """ + Determine if multiple agents are needed + + Args: + user_query: User's query + + Returns: + List[str]: List of agent names needed + """ + agents_needed = [] + query_lower = user_query.lower() + + # Check for each agent's keywords + if any(kw in query_lower for kw in ['đau', 'sốt', 'ho', 'triệu chứng']): + agents_needed.append('symptom_agent') + + if any(kw in query_lower for kw in ['ăn', 'thực đơn', 'calo', 'dinh dưỡng']): + agents_needed.append('nutrition_agent') + + if any(kw in query_lower for kw in ['tập', 'gym', 'cardio', 'exercise']): + agents_needed.append('exercise_agent') + + if any(kw in query_lower for kw in ['stress', 'lo âu', 'trầm cảm', 'mental']): + agents_needed.append('mental_health_agent') + + return agents_needed + + # ===== Utility Methods ===== + + def extract_user_data_from_history(self, chat_history: List) -> Dict[str, Any]: + """ + Extract user data from conversation history + (Can be overridden by subclasses for specific extraction) + + Args: + chat_history: List of [user_msg, bot_msg] pairs + + Returns: + Dict: Extracted user data + """ + import re + + if not chat_history: + return {} + + all_messages = " ".join([msg[0] for msg in chat_history if msg[0]]) + extracted = {} + + # Extract age + age_match = re.search(r'(\d+)\s*tuổi|tuổi\s*(\d+)|tôi\s*(\d+)', all_messages.lower()) + if age_match: + extracted['age'] = int([g for g in age_match.groups() if g][0]) + + # Extract gender + if re.search(r'\bnam\b|male|đàn ông', all_messages.lower()): + extracted['gender'] = 'male' + elif re.search(r'\bnữ\b|female|đàn bà|phụ nữ', all_messages.lower()): + extracted['gender'] = 'female' + + # Extract weight + weight_match = re.search(r'(\d+)\s*kg|nặng\s*(\d+)|cân\s*(\d+)', all_messages.lower()) + if weight_match: + extracted['weight'] = float([g for g in weight_match.groups() if g][0]) + + # Extract height + height_match = re.search(r'(\d+)\s*cm|cao\s*(\d+)|chiều cao\s*(\d+)', all_messages.lower()) + if height_match: + extracted['height'] = float([g for g in height_match.groups() if g][0]) + + return extracted + + def update_memory_from_history(self, chat_history: List) -> None: + """Extract and update memory from chat history""" + extracted = self.extract_user_data_from_history(chat_history) + + for key, value in extracted.items(): + # Always update with latest info (user may correct themselves) + self.memory.update_profile(key, value) + + def extract_and_save_user_info(self, user_message: str) -> Dict[str, Any]: + """ + Extract user info from a single message using LLM (flexible, handles typos) + Saves to memory immediately + + Args: + user_message: Single user message (any format, any order) + + Returns: + Dict: Extracted data + """ + from config.settings import client, MODEL + import json + + # Use LLM to extract structured data (handles typos, any order, extra info) + extraction_prompt = f"""Extract health information from this user message. Handle typos and variations. + +User message: "{user_message}" + +Extract these fields if present (return null if not found): +- age: integer (tuổi, age, years old) +- gender: "male" or "female" (nam, nữ, male, female, đàn ông, phụ nữ) +- weight: float in kg (nặng, cân, weight, kg) +- height: float in cm (cao, chiều cao, height, cm, m) + IMPORTANT: Height MUST be in cm (50-300 range) + - If user says "1.75m" or "1.78m" → convert to cm (175, 178) + - If user says "175cm" or "178cm" → use as is (175, 178) + - NEVER return values like 1.0, 1.5, 1.75 for height! +- body_fat_percentage: float (tỉ lệ mỡ, body fat, %, optional) + +Return ONLY valid JSON with these exact keys. Example: +{{"age": 30, "gender": "male", "weight": 70.5, "height": 175, "body_fat_percentage": 25}} + +CRITICAL: Height must be 50-300 (in cm). If user says "1.78m", return 178, not 1.78! +If a field is not found, use null. Be flexible with typos and word order.""" + + try: + response = client.chat.completions.create( + model=MODEL, + messages=[ + {"role": "system", "content": "You are a data extraction assistant. Extract structured health data from user messages. Handle typos and variations. Return only valid JSON."}, + {"role": "user", "content": extraction_prompt} + ], + temperature=0.1, # Low temp for consistent extraction + max_tokens=150 + ) + + result_text = response.choices[0].message.content.strip() + + # Parse JSON response + # Remove markdown code blocks if present + if "```json" in result_text: + result_text = result_text.split("```json")[1].split("```")[0].strip() + elif "```" in result_text: + result_text = result_text.split("```")[1].split("```")[0].strip() + + extracted = json.loads(result_text) + + # Auto-correct obvious errors before saving + extracted = self._auto_correct_health_data(extracted) + + # Save to memory (only non-null values) + allowed_fields = ['age', 'gender', 'weight', 'height', 'body_fat_percentage'] + for key, value in extracted.items(): + if value is not None and key in allowed_fields: + self.update_user_profile(key, value) + + return {k: v for k, v in extracted.items() if v is not None} + + except Exception as e: + # Fallback to regex if LLM fails + print(f"LLM extraction failed: {e}, using regex fallback") + return self._extract_with_regex_fallback(user_message) + + def _auto_correct_health_data(self, data: Dict[str, Any]) -> Dict[str, Any]: + """ + Auto-correct obvious errors in health data (typos, wrong units) + + Examples: + - height: 200 → 200cm ✅ (likely meant 200cm, not 200m) + - height: 1.75 → 175cm ✅ (convert m to cm) + - weight: 75 → 75kg ✅ (assume kg if reasonable) + - weight: 75000 → 75kg ✅ (likely meant 75kg, not 75000g) + """ + corrected = data.copy() + + # Auto-correct height + if 'height' in corrected and corrected['height'] is not None: + height = float(corrected['height']) + + # If height is very small (< 10), likely in meters → convert to cm + if 0 < height < 10: + corrected['height'] = height * 100 + print(f"Auto-corrected height: {height}m → {corrected['height']}cm") + + # If height is reasonable (50-300), assume cm + elif 50 <= height <= 300: + corrected['height'] = height + + # If height is very large (> 1000), likely in mm → convert to cm + elif height > 1000: + corrected['height'] = height / 10 + print(f"Auto-corrected height: {height}mm → {corrected['height']}cm") + + # Otherwise invalid, set to None + else: + print(f"Invalid height: {height}, setting to None") + corrected['height'] = None + + # Auto-correct weight + if 'weight' in corrected and corrected['weight'] is not None: + weight = float(corrected['weight']) + + # If weight is very large (> 500), likely in grams → convert to kg + if weight > 500: + corrected['weight'] = weight / 1000 + print(f"Auto-corrected weight: {weight}g → {corrected['weight']}kg") + + # If weight is reasonable (20-300), assume kg + elif 20 <= weight <= 300: + corrected['weight'] = weight + + # If weight is very small (< 20), might be wrong unit + elif 0 < weight < 20: + # Could be in different unit or child weight + # Keep as is but flag + corrected['weight'] = weight + + # Otherwise invalid + else: + print(f"Invalid weight: {weight}, setting to None") + corrected['weight'] = None + + # Auto-correct age + if 'age' in corrected and corrected['age'] is not None: + age = int(corrected['age']) + + # Reasonable age range: 1-120 + if not (1 <= age <= 120): + print(f"Invalid age: {age}, setting to None") + corrected['age'] = None + + # Auto-correct body fat percentage + if 'body_fat_percentage' in corrected and corrected['body_fat_percentage'] is not None: + bf = float(corrected['body_fat_percentage']) + + # Reasonable body fat: 3-60% + if not (3 <= bf <= 60): + print(f"Invalid body fat: {bf}%, setting to None") + corrected['body_fat_percentage'] = None + + return corrected + + def _extract_with_regex_fallback(self, user_message: str) -> Dict[str, Any]: + """Fallback regex extraction (less flexible but reliable)""" + import re + extracted = {} + msg_lower = user_message.lower() + + # Extract age + age_match = re.search(r'(\d+)\s*tuổi|tuổi\s*(\d+)|age\s*(\d+)', msg_lower) + if age_match: + age = int([g for g in age_match.groups() if g][0]) + extracted['age'] = age + self.update_user_profile('age', age) + + # Extract gender + if re.search(r'\bnam\b|male|đàn ông', msg_lower): + extracted['gender'] = 'male' + self.update_user_profile('gender', 'male') + elif re.search(r'\bnữ\b|female|đàn bà|phụ nữ', msg_lower): + extracted['gender'] = 'female' + self.update_user_profile('gender', 'female') + + # Extract weight + weight_match = re.search(r'(?:nặng|cân|weight)?\s*(\d+(?:\.\d+)?)\s*kg', msg_lower) + if weight_match: + weight = float(weight_match.group(1)) + extracted['weight'] = weight + self.update_user_profile('weight', weight) + + # Extract height + height_cm_match = re.search(r'(?:cao|chiều cao|height)?\s*(\d+(?:\.\d+)?)\s*cm', msg_lower) + if height_cm_match: + height = float(height_cm_match.group(1)) + extracted['height'] = height + self.update_user_profile('height', height) + else: + height_m_match = re.search(r'(?:cao|chiều cao|height)?\s*(\d+\.?\d*)\s*m\b', msg_lower) + if height_m_match: + height = float(height_m_match.group(1)) + if height < 3: + height = height * 100 + extracted['height'] = height + self.update_user_profile('height', height) + + return extracted + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}: {self.get_context_summary()}>" diff --git a/agents/core/context_analyzer.py b/agents/core/context_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..21e76b414526d958fb3b05cc38ac5620e139d05d --- /dev/null +++ b/agents/core/context_analyzer.py @@ -0,0 +1,260 @@ +""" +Context Analyzer - Understands user intent and needs +Determines what type of response is most appropriate +""" + +from typing import Dict, List, Optional + +class ContextAnalyzer: + """ + Optimized Context Analyzer - Balanced between simplicity and coverage + Handles 90% of scenarios with 50% less complexity + """ + + # Define patterns once as class variables (better performance) + # Use compound patterns to reduce false positives + URGENT_PATTERNS = [ + 'đang đau', 'đau quá', 'khó chịu', 'không chịu nổi', + 'cấp cứu', 'khẩn cấp', 'gấp', 'ngay', + 'đau không thể', 'không aguanta được' # Strong pain indicators + ] + + # Context-specific urgent combinations (must have BOTH) + URGENT_COMBINATIONS = [ + ('làm sao', ['hết đau', 'giảm đau', 'đỡ', 'ngay']), + ('giúp tôi', ['đau', 'khó chịu', 'không chịu']), + ('phải làm gì', ['đau', 'khó chịu', 'ngay', 'gấp']), + ('cần gì', ['giảm đau', 'hết đau', 'cấp cứu']) + ] + + INFO_PATTERNS = [ + 'tại sao', 'nguyên nhân', 'có phải', 'bị gì', + 'là gì', 'thế nào', 'như thế nào' + ] + + PREVENTION_PATTERNS = [ + 'phòng ngừa', 'tránh', 'không bị', 'hạn chế', + 'làm sao để không', 'để khỏi' + ] + + @staticmethod + def is_vague_query(user_query: str) -> bool: + """ + Detect if user query is too vague/ambiguous + """ + vague_patterns = [ + 'không khỏe', 'mệt', 'khó chịu', 'không ổn', + 'giúp tôi', 'cần giúp', 'phải làm sao', + 'không biết', 'chẳng hiểu', 'làm gì bây giờ' + ] + + query_lower = user_query.lower() + + # Check if query is very short and vague + if len(user_query.split()) <= 3: + if any(pattern in query_lower for pattern in vague_patterns): + return True + + # Check if query has no specific symptom/goal + has_specifics = any(word in query_lower for word in [ + 'đau', 'sốt', 'ho', 'buồn nôn', # Symptoms + 'giảm cân', 'tăng cân', 'tập', # Goals + 'ăn', 'thực đơn', 'calo' # Nutrition + ]) + + if not has_specifics and any(pattern in query_lower for pattern in vague_patterns): + return True + + return False + + @staticmethod + def analyze_user_intent(user_query: str, chat_history: List) -> Dict: + """ + Simplified but effective intent analysis with context awareness + Returns only what's needed for response generation + """ + query_lower = user_query.lower() + + # Check if query is too vague + is_vague = ContextAnalyzer.is_vague_query(user_query) + + # Smart urgency check with context + urgency = 'medium' # default + + # Check direct urgent patterns + if any(p in query_lower for p in ContextAnalyzer.URGENT_PATTERNS): + urgency = 'high' + # Check combination patterns (need both parts) + else: + for trigger, contexts in ContextAnalyzer.URGENT_COMBINATIONS: + if trigger in query_lower: + if any(ctx in query_lower for ctx in contexts): + urgency = 'high' + break + + # If not urgent, check if it's informational + if urgency != 'high' and any(p in query_lower for p in ContextAnalyzer.INFO_PATTERNS): + urgency = 'low' + + # Determine primary intent (simplified) + intent = 'general' # default + if urgency == 'high': + intent = 'immediate_relief' + elif any(p in query_lower for p in ContextAnalyzer.INFO_PATTERNS): + intent = 'information' + elif any(p in query_lower for p in ContextAnalyzer.PREVENTION_PATTERNS): + intent = 'prevention' + + # Simple decision: solution vs education + needs_solution = urgency in ['high', 'medium'] + needs_education = urgency == 'low' or intent in ['information', 'prevention'] + + # Add conversation stage for compatibility + conversation_stage = len(chat_history) if chat_history else 0 + + return { + 'intent': intent, + 'urgency': urgency, + 'needs_solution': needs_solution, + 'needs_education': needs_education, + 'is_vague': is_vague, + 'needs_clarification': is_vague, + 'conversation_stage': conversation_stage + } + + @staticmethod + def determine_response_structure(context: Dict) -> Dict[str, any]: + """ + Determine how to structure the response based on context + + Returns dict with: + - structure: 'solution_first', 'assessment_first', 'education_first' + - include_immediate: bool + - include_prevention: bool + - include_referral: bool + """ + + if context['urgency'] == 'high': + return { + 'structure': 'solution_first', + 'include_immediate': True, + 'include_prevention': True, # But after solution + 'include_referral': True, + 'tone': 'supportive_urgent' + } + + elif context['intent'] == 'diagnosis': + return { + 'structure': 'assessment_first', + 'include_immediate': False, + 'include_prevention': True, + 'include_referral': False, + 'tone': 'informative' + } + + elif context['intent'] == 'prevention': + return { + 'structure': 'education_first', + 'include_immediate': False, + 'include_prevention': True, + 'include_referral': False, + 'tone': 'educational' + } + + else: # Default balanced + return { + 'structure': 'solution_first', + 'include_immediate': True, + 'include_prevention': True, + 'include_referral': True, + 'tone': 'balanced' + } + + @staticmethod + def format_contextual_response( + symptom_assessment: str, + solutions: List[str], + preventions: List[str], + response_structure: Dict + ) -> str: + """ + Format response based on context and user needs + """ + response = "" + + if response_structure['structure'] == 'solution_first': + # IMMEDIATE RELIEF FIRST + if response_structure['include_immediate'] and solutions: + response += "**Để giảm triệu chứng ngay, bạn có thể:**\n" + for i, solution in enumerate(solutions[:3], 1): # Top 3 immediate actions + response += f"{i}. {solution}\n" + response += "\n" + + # MEDICATION OPTIONS (if urgent) + if response_structure['tone'] == 'supportive_urgent': + response += "**Về thuốc:**\n" + response += "- Có thể dùng thuốc kháng acid (Maalox, Gaviscon) nếu đau do acid\n" + response += "- Thuốc chống co thắt (Buscopan) nếu đau quặn\n" + response += "⚠️ *Nên tham khảo dược sĩ trước khi dùng*\n\n" + + # WARNING SIGNS + if response_structure['include_referral']: + response += "**⚠️ Đi khám ngay nếu:**\n" + response += "- Đau không giảm sau 2 giờ\n" + response += "- Kèm sốt, nôn, tiêu chảy\n" + response += "- Đau dữ dội tăng dần\n\n" + + # PREVENTION (after immediate care) + if response_structure['include_prevention'] and preventions: + response += "**Sau khi đỡ, để phòng tránh:**\n" + for prevention in preventions[:3]: + response += f"• {prevention}\n" + + elif response_structure['structure'] == 'assessment_first': + # Start with assessment/diagnosis + response += symptom_assessment + "\n\n" + + # Then solutions + if solutions: + response += "**Cách xử lý:**\n" + for solution in solutions: + response += f"• {solution}\n" + + elif response_structure['structure'] == 'education_first': + # Start with education/prevention + if preventions: + response += "**Để phòng ngừa hiệu quả:**\n" + for prevention in preventions: + response += f"• {prevention}\n" + + return response + + @staticmethod + def should_ask_followup(context: Dict, chat_history: List) -> bool: + """ + Determine if we should ask follow-up questions + + Rules: + - Don't ask if urgency is high (give solutions first) + - Don't ask if we already have enough info + - Don't ask more than 2 questions total + """ + + # High urgency = no questions, give help + if context['urgency'] == 'high': + return False + + # Already asked 2+ questions = enough + if chat_history and len(chat_history) >= 2: + bot_questions = 0 + for _, bot_msg in chat_history: + if bot_msg and '?' in bot_msg: + bot_questions += 1 + if bot_questions >= 2: + return False + + # First interaction and not urgent = can ask 1 question + if context['conversation_stage'] == 0: + return True + + return False diff --git a/agents/core/coordinator.py b/agents/core/coordinator.py new file mode 100644 index 0000000000000000000000000000000000000000..95e01418c056d8a099f870d367a4ad560b8a8977 --- /dev/null +++ b/agents/core/coordinator.py @@ -0,0 +1,579 @@ +""" +Agent Coordinator - Manages agent collaboration and handoffs +Enables multi-agent responses and smooth transitions +""" + +from typing import Dict, List, Optional, Any +import asyncio +from concurrent.futures import ThreadPoolExecutor +from utils.memory import ConversationMemory +from utils.session_store import get_session_store +from utils.conversation_summarizer import get_summarizer +from agents.core.router import route_to_agent, get_router +from fine_tuning import get_data_collector +from health_data import HealthContext, HealthDataStore +import hashlib +import json + + +class AgentCoordinator: + """ + Coordinates multiple agents and manages handoffs + Provides multi-agent collaboration capabilities + """ + + def __init__(self, user_id: Optional[str] = None, use_embedding_router=True, enable_cache=True, enable_data_collection=True, enable_session_persistence=True): + """ + Initialize coordinator with shared memory and data store + + Args: + user_id: Unique user identifier for session persistence + use_embedding_router: Use embedding-based routing (faster) + enable_cache: Enable response caching + enable_data_collection: Enable conversation logging for fine-tuning + enable_session_persistence: Enable session persistence across restarts + """ + # Session persistence + self.user_id = user_id + self.session_store = get_session_store() if enable_session_persistence else None + + # Initialize memory with session persistence + self.memory = ConversationMemory( + user_id=user_id, + session_store=self.session_store + ) + + self.data_store = HealthDataStore() + self.health_context = None + self.agents = {} + + # Enable embedding router (faster than LLM routing) + self.use_embedding_router = use_embedding_router + if use_embedding_router: + self.router = get_router(use_embeddings=True) + else: + self.router = None + + # Enable response cache + self.enable_cache = enable_cache + self.response_cache = {} if enable_cache else None + + # Enable data collection for fine-tuning + self.enable_data_collection = enable_data_collection + if enable_data_collection: + self.data_collector = get_data_collector() + else: + self.data_collector = None + + # Conversation summarizer + self.summarizer = get_summarizer() + + self._initialize_agents() + + def _initialize_agents(self) -> None: + """Initialize all agents with shared memory""" + # Import agents (lazy import to avoid circular dependencies) + from agents.specialized.nutrition_agent import NutritionAgent + from agents.specialized.exercise_agent import ExerciseAgent + from agents.specialized.symptom_agent import SymptomAgent + from agents.specialized.mental_health_agent import MentalHealthAgent + from agents.specialized.general_health_agent import GeneralHealthAgent + + # Create agents with shared memory + self.agents = { + 'nutrition_agent': NutritionAgent(memory=self.memory), + 'exercise_agent': ExerciseAgent(memory=self.memory), + 'symptom_agent': SymptomAgent(memory=self.memory), + 'mental_health_agent': MentalHealthAgent(memory=self.memory), + 'general_health_agent': GeneralHealthAgent(memory=self.memory) + } + + def handle_query(self, message: str, chat_history: Optional[List] = None, user_id: Optional[str] = None) -> str: + """ + Main entry point - handles user query with coordination + + Args: + message: User's message + chat_history: Conversation history + user_id: User ID for data persistence + + Returns: + str: Response (possibly from multiple agents) + """ + chat_history = chat_history or [] + + # Create or update health context for user + if user_id: + self.health_context = HealthContext(user_id, self.data_store) + # Inject health context into all agents + for agent in self.agents.values(): + if hasattr(agent, 'set_health_context'): + agent.set_health_context(self.health_context) + + # Update memory from chat history + self._update_memory_from_history(chat_history) + + # Summarize if conversation is too long + if self.summarizer.should_summarize(chat_history): + chat_history = self._summarize_if_needed(chat_history) + + # Check if multi-agent collaboration is needed + if self._needs_multi_agent(message): + return self._handle_multi_agent_query(message, chat_history) + + # Single agent routing + return self._handle_single_agent_query(message, chat_history) + + def _get_cache_key(self, message: str, chat_history: List) -> str: + """Generate cache key from message and recent history""" + # Include last 2 exchanges for context + recent_history = chat_history[-4:] if len(chat_history) > 4 else chat_history + cache_data = { + "message": message.lower().strip(), + "history": [(h[0].lower().strip() if h[0] else "", h[1][:50] if len(h) > 1 else "") for h in recent_history] + } + cache_str = json.dumps(cache_data, sort_keys=True) + return hashlib.md5(cache_str.encode()).hexdigest() + + def _handle_single_agent_query(self, message: str, chat_history: List, file_data: Optional[Dict] = None) -> str: + """Handle query with single agent (with potential handoff)""" + # Check cache first + if self.enable_cache: + cache_key = self._get_cache_key(message, chat_history) + if cache_key in self.response_cache: + # print("[CACHE HIT] Returning cached response") + return self.response_cache[cache_key] + + # Route to appropriate agent (use embedding router if available) + if self.router: + routing_result = self.router.route(message, chat_history) + else: + routing_result = route_to_agent(message, chat_history) + + agent_name = routing_result['agent'] + parameters = routing_result['parameters'] + + # Update current agent in memory + self.memory.set_current_agent(agent_name) + + # Get agent + agent = self.agents.get(agent_name) + if not agent: + return "Xin lỗi, không tìm thấy agent phù hợp." + + # Let agent handle the request + response = agent.handle(parameters, chat_history) + + # Log conversation for fine-tuning (with cleaned data) + if self.enable_data_collection and self.data_collector: + user_data = self.memory.get_full_profile() + + # Clean user data before logging to prevent learning from errors + cleaned_user_data = self._clean_user_data_for_training(user_data) + + self.data_collector.log_conversation( + agent_name=agent_name, + user_message=message, + agent_response=response, + user_data=cleaned_user_data, + metadata={'data_cleaned': True} # Flag that data was cleaned + ) + + # Cache the response + if self.enable_cache: + cache_key = self._get_cache_key(message, chat_history) + self.response_cache[cache_key] = response + # Limit cache size to 100 entries + if len(self.response_cache) > 100: + # Remove oldest entry (simple FIFO) + self.response_cache.pop(next(iter(self.response_cache))) + + # Check if handoff is needed + if hasattr(agent, 'should_handoff') and agent.should_handoff(message, chat_history): + next_agent_name = agent.suggest_next_agent(message) + if next_agent_name and next_agent_name in self.agents: + return self._perform_handoff(agent, next_agent_name, response, message, chat_history) + + return response + + def _handle_multi_agent_query(self, message: str, chat_history: List) -> str: + """Handle query that needs multiple agents (with parallel execution)""" + # Detect which agents are needed + agents_needed = self._detect_required_agents(message) + + if len(agents_needed) <= 1: + # Fallback to single agent + return self._handle_single_agent_query(message, chat_history) + + # Use async for parallel execution (faster!) + try: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + responses = loop.run_until_complete( + self._handle_multi_agent_async(message, chat_history, agents_needed) + ) + loop.close() + except Exception as e: + print(f"Async multi-agent failed, falling back to sequential: {e}") + # Fallback to sequential if async fails + responses = {} + for agent_name in agents_needed: + agent = self.agents.get(agent_name) + if agent: + parameters = {'user_query': message} + responses[agent_name] = agent.handle(parameters, chat_history) + + # Combine responses + return self._combine_responses(responses, agents_needed) + + async def _handle_multi_agent_async(self, message: str, chat_history: List, agents_needed: List[str]) -> Dict[str, str]: + """Execute multiple agents in parallel using asyncio""" + async def call_agent(agent_name: str): + """Async wrapper for agent.handle()""" + agent = self.agents.get(agent_name) + if not agent: + return None + + # Run in thread pool (since agent.handle is sync) + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as pool: + parameters = {'user_query': message} + response = await loop.run_in_executor( + pool, + agent.handle, + parameters, + chat_history + ) + return response + + # Create tasks for all agents + tasks = {agent_name: call_agent(agent_name) for agent_name in agents_needed} + + # Execute in parallel + results = await asyncio.gather(*tasks.values(), return_exceptions=True) + + # Map results back to agent names + responses = {} + for agent_name, result in zip(tasks.keys(), results): + if isinstance(result, Exception): + print(f"Agent {agent_name} failed: {result}") + responses[agent_name] = f"Xin lỗi, {agent_name} gặp lỗi." + elif result: + responses[agent_name] = result + + return responses + + def _perform_handoff( + self, + from_agent: Any, + to_agent_name: str, + current_response: str, + message: str, + chat_history: List + ) -> str: + """ + Perform smooth handoff between agents + + Args: + from_agent: Current agent + to_agent_name: Name of agent to hand off to + current_response: Current agent's response + message: User's message + chat_history: Conversation history + + Returns: + str: Combined response with handoff + """ + # Create handoff message + handoff_msg = from_agent.create_handoff_message(to_agent_name, current_response) + + # Update memory + self.memory.set_current_agent(to_agent_name) + + return handoff_msg + + def _needs_multi_agent(self, message: str) -> bool: + """ + Determine if query needs multiple agents + + Args: + message: User's message + + Returns: + bool: True if multiple agents needed + """ + agents_needed = self._detect_required_agents(message) + return len(agents_needed) > 1 + + def _detect_required_agents(self, message: str) -> List[str]: + """ + Detect which agents are needed for this query + + Args: + message: User's message + + Returns: + List[str]: List of agent names needed + """ + agents_needed = [] + message_lower = message.lower() + + # PRIORITY 1: Symptom keywords (highest priority - health emergencies) + symptom_keywords = ['đau', 'sốt', 'ho', 'buồn nôn', 'chóng mặt', 'triệu chứng', 'khó tiêu', 'đầy bụng', 'ợ hơi'] + has_symptoms = any(kw in message_lower for kw in symptom_keywords) + + # PRIORITY 2: Nutrition keywords (but NOT if it's a symptom context) + nutrition_keywords = ['thực đơn', 'calo', 'giảm cân', 'tăng cân', 'dinh dưỡng', 'rau củ', 'thực phẩm'] + # Special handling: 'ăn' only counts as nutrition if NOT in symptom context + has_nutrition = any(kw in message_lower for kw in nutrition_keywords) + if not has_symptoms and 'ăn' in message_lower: + has_nutrition = True + + # PRIORITY 3: Exercise keywords + exercise_keywords = ['tập', 'gym', 'cardio', 'yoga', 'chạy bộ', 'exercise', 'workout'] + has_exercise = any(kw in message_lower for kw in exercise_keywords) + + # PRIORITY 4: Mental health keywords + mental_keywords = ['stress', 'lo âu', 'trầm cảm', 'mất ngủ', 'burnout', 'mental'] + has_mental = any(kw in message_lower for kw in mental_keywords) + + # IMPORTANT: Only trigger multi-agent if CLEARLY needs multiple domains + # Example: "Tôi bị đau bụng, nên ăn gì?" -> symptom + nutrition + # But: "WHO khuyến nghị ăn bao nhiêu rau củ?" -> ONLY nutrition + + # Count how many domains are triggered + domain_count = sum([has_symptoms, has_nutrition, has_exercise, has_mental]) + + # If only 1 domain -> single agent (no multi-agent) + if domain_count <= 1: + if has_symptoms: + agents_needed.append('symptom_agent') + elif has_nutrition: + agents_needed.append('nutrition_agent') + elif has_exercise: + agents_needed.append('exercise_agent') + elif has_mental: + agents_needed.append('mental_health_agent') + else: + # Multiple domains detected + # Check if it's a REAL multi-domain question or false positive + + # False positive patterns (should be single agent) + false_positives = [ + 'who khuyến nghị', # WHO recommendations -> single domain + 'bao nhiêu', # Quantitative questions -> single domain + 'khó tiêu', # Digestive issues -> symptom only + 'đầy bụng', # Bloating -> symptom only + 'đau bụng', # Stomach pain -> symptom only + 'ợ hơi', # Burping -> symptom only + ] + + is_false_positive = any(pattern in message_lower for pattern in false_positives) + + if is_false_positive: + # Use primary domain only + if has_nutrition: + agents_needed.append('nutrition_agent') + elif has_exercise: + agents_needed.append('exercise_agent') + elif has_symptoms: + agents_needed.append('symptom_agent') + elif has_mental: + agents_needed.append('mental_health_agent') + else: + # Real multi-domain question + if has_symptoms: + agents_needed.append('symptom_agent') + if has_nutrition: + agents_needed.append('nutrition_agent') + if has_exercise: + agents_needed.append('exercise_agent') + if has_mental: + agents_needed.append('mental_health_agent') + + return agents_needed + + def _combine_responses(self, responses: Dict[str, str], agents_order: List[str]) -> str: + """ + Combine responses from multiple agents + + Args: + responses: Dict of agent_name -> response + agents_order: Order of agents + + Returns: + str: Combined response + """ + # For natural flow, just combine responses without headers + # Make it feel like ONE person giving comprehensive advice + + responses_list = [responses[agent] for agent in agents_order if agent in responses] + + if len(responses_list) == 1: + # Single agent - return as is + return responses_list[0] + + # Multiple agents - combine naturally + combined = "" + + # First response (usually symptom assessment) + combined += responses_list[0] + + # Add other responses with smooth transitions + for i in range(1, len(responses_list)): + # Natural transition phrases + transitions = [ + "\n\nNgoài ra, ", + "\n\nBên cạnh đó, ", + "\n\nĐồng thời, ", + "\n\nVề mặt khác, " + ] + transition = transitions[min(i-1, len(transitions)-1)] + combined += transition + responses_list[i] + + # Natural closing (not too formal) + combined += "\n\nBạn thử làm theo xem có đỡ không nhé. Có gì thắc mắc cứ hỏi mình!" + + return combined + + def _update_memory_from_history(self, chat_history: List) -> None: + """Extract and update SHARED memory from chat history to prevent duplicate questions""" + if not chat_history: + return + + # Extract user info from ALL conversations (not just current agent) + user_info = self._extract_user_info_from_all_history(chat_history) + + # Update SHARED memory that ALL agents can access + if user_info: + for key, value in user_info.items(): + self.memory.update_profile(key, value) + + def _extract_user_info_from_all_history(self, chat_history: List) -> Dict: + """Extract user information from entire conversation history""" + user_info = {} + + # Common patterns to extract + patterns = { + 'age': [r'(\d+)\s*tuổi', r'tôi\s*(\d+)', r'(\d+)\s*years?\s*old'], + 'gender': [r'tôi là (nam|nữ)', r'giới tính[:\s]*(nam|nữ)', r'(male|female|nam|nữ)'], + 'weight': [r'(\d+)\s*kg', r'nặng\s*(\d+)', r'cân nặng[:\s]*(\d+)'], + 'height': [r'(\d+)\s*cm', r'cao\s*(\d+)', r'chiều cao[:\s]*(\d+)'], + 'goal': [r'muốn\s*(giảm cân|tăng cân|tăng cơ|khỏe mạnh)', r'mục tiêu[:\s]*(.+)'] + } + + # Search through all user messages + import re + for user_msg, _ in chat_history: + if not user_msg: + continue + + for field, field_patterns in patterns.items(): + if field not in user_info: # Only extract if not already found + for pattern in field_patterns: + match = re.search(pattern, user_msg.lower()) + if match: + user_info[field] = match.group(1) + break + + return user_info + + # Extract gender + if not self.memory.get_profile('gender'): + if re.search(r'\bnam\b|male', all_messages.lower()): + self.memory.update_profile('gender', 'male') + elif re.search(r'\bnữ\b|female', all_messages.lower()): + self.memory.update_profile('gender', 'female') + + # Extract weight + if not self.memory.get_profile('weight'): + weight_match = re.search(r'(\d+)\s*kg|nặng\s*(\d+)', all_messages.lower()) + if weight_match: + weight = float([g for g in weight_match.groups() if g][0]) + self.memory.update_profile('weight', weight) + + # Extract height + if not self.memory.get_profile('height'): + height_match = re.search(r'(\d+)\s*cm|cao\s*(\d+)', all_messages.lower()) + if height_match: + height = float([g for g in height_match.groups() if g][0]) + self.memory.update_profile('height', height) + + def _summarize_if_needed(self, chat_history: List) -> List: + """ + Summarize conversation if it's too long + + Args: + chat_history: Full conversation history + + Returns: + Compressed history with summary + """ + user_profile = self.memory.get_full_profile() + compressed = self.summarizer.compress_history( + chat_history, + target_turns=10 # Keep last 10 turns + summary + ) + + # print(f"📝 Summarized {len(chat_history)} turns → {len(compressed)} turns") + return compressed + + def get_conversation_stats(self, chat_history: List) -> Dict[str, Any]: + """Get statistics about current conversation""" + return self.summarizer.get_summary_stats(chat_history) + + def get_memory_summary(self) -> str: + """Get summary of current memory state""" + return self.memory.get_context_summary() + + def _clean_user_data_for_training(self, user_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Clean user data before logging for training + Ensures only valid, corrected data is used for fine-tuning + + This prevents the model from learning bad patterns like: + - "cao 200m" (should be 200cm) + - "nặng 75g" (should be 75kg) + - Invalid BMI values + """ + cleaned = user_data.copy() + + # Validate and clean height (should be 50-300 cm) + if 'height' in cleaned and cleaned['height'] is not None: + height = float(cleaned['height']) + if not (50 <= height <= 300): + # Invalid height - don't log it + cleaned['height'] = None + + # Validate and clean weight (should be 20-300 kg) + if 'weight' in cleaned and cleaned['weight'] is not None: + weight = float(cleaned['weight']) + if not (20 <= weight <= 300): + # Invalid weight - don't log it + cleaned['weight'] = None + + # Validate and clean age (should be 1-120) + if 'age' in cleaned and cleaned['age'] is not None: + age = int(cleaned['age']) + if not (1 <= age <= 120): + # Invalid age - don't log it + cleaned['age'] = None + + # Validate and clean body fat (should be 3-60%) + if 'body_fat_percentage' in cleaned and cleaned['body_fat_percentage'] is not None: + bf = float(cleaned['body_fat_percentage']) + if not (3 <= bf <= 60): + # Invalid body fat - don't log it + cleaned['body_fat_percentage'] = None + + # Remove any None values to keep training data clean + cleaned = {k: v for k, v in cleaned.items() if v is not None} + + return cleaned + + def clear_memory(self) -> None: + """Clear all memory (start fresh)""" + self.memory.clear() + + def __repr__(self) -> str: + return f"" diff --git a/agents/core/orchestrator.py b/agents/core/orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..47eb353b40585a983a5187585c817da889ef4243 --- /dev/null +++ b/agents/core/orchestrator.py @@ -0,0 +1,212 @@ +""" +Multi-Agent Orchestrator +Coordinates multiple agents to handle complex queries +""" + +from typing import List, Dict, Any +import json + +class MultiAgentOrchestrator: + """ + Orchestrates multiple agents to handle complex queries + that require expertise from multiple domains + """ + + def __init__(self, agents: Dict[str, Any]): + """ + Initialize orchestrator with available agents + + Args: + agents: Dictionary of agent_name -> agent_instance + """ + self.agents = agents + + def orchestrate(self, query: str, agent_names: List[str], chat_history: List = None) -> str: + """ + Orchestrate multiple agents to answer a complex query + + Args: + query: User query + agent_names: List of agents to use + chat_history: Conversation history + + Returns: + Combined response from all agents + """ + if len(agent_names) == 1: + # Single agent - just call it + return self._call_single_agent(agent_names[0], query, chat_history) + + # Multi-agent orchestration + return self._orchestrate_multi_agent(query, agent_names, chat_history) + + def _call_single_agent(self, agent_name: str, query: str, chat_history: List) -> str: + """Call a single agent""" + agent = self.agents.get(agent_name) + if not agent: + return f"Agent {agent_name} not found" + + try: + response = agent.handle( + parameters={"user_query": query}, + chat_history=chat_history + ) + return response + except Exception as e: + return f"Error calling {agent_name}: {str(e)}" + + def _orchestrate_multi_agent(self, query: str, agent_names: List[str], chat_history: List) -> str: + """ + Orchestrate multiple agents + + Strategy: + 1. Analyze query to determine what each agent should focus on + 2. Call each agent with specific sub-query + 3. Combine responses intelligently + """ + # Decompose query into sub-queries for each agent + sub_queries = self._decompose_query(query, agent_names) + + # Call each agent with their sub-query + responses = {} + for agent_name, sub_query in sub_queries.items(): + if agent_name in self.agents: + try: + response = self.agents[agent_name].handle( + parameters={"user_query": sub_query}, + chat_history=chat_history + ) + responses[agent_name] = response + except Exception as e: + responses[agent_name] = f"Error: {str(e)}" + + # Combine responses + return self._combine_responses(query, responses, agent_names) + + def _decompose_query(self, query: str, agent_names: List[str]) -> Dict[str, str]: + """ + Decompose complex query into sub-queries for each agent + + Example: + Query: "Tôi muốn giảm cân, nên ăn gì và tập gì?" + → + nutrition_agent: "Tư vấn chế độ ăn để giảm cân" + exercise_agent: "Tư vấn lịch tập để giảm cân" + """ + sub_queries = {} + + # Simple heuristic-based decomposition + query_lower = query.lower() + + for agent_name in agent_names: + if agent_name == "nutrition_agent": + if "ăn" in query_lower or "dinh dưỡng" in query_lower or "calo" in query_lower: + sub_queries[agent_name] = f"Tư vấn dinh dưỡng cho: {query}" + else: + sub_queries[agent_name] = query + + elif agent_name == "exercise_agent": + if "tập" in query_lower or "gym" in query_lower or "luyện" in query_lower: + sub_queries[agent_name] = f"Tư vấn tập luyện cho: {query}" + else: + sub_queries[agent_name] = query + + elif agent_name == "mental_health_agent": + if "stress" in query_lower or "lo âu" in query_lower: + sub_queries[agent_name] = f"Tư vấn sức khỏe tinh thần cho: {query}" + else: + sub_queries[agent_name] = query + + else: + # Default: use original query + sub_queries[agent_name] = query + + return sub_queries + + def _combine_responses(self, original_query: str, responses: Dict[str, str], agent_names: List[str]) -> str: + """ + Combine responses from multiple agents into a coherent answer + + Strategy: + 1. Identify the main topic + 2. Structure response logically + 3. Avoid redundancy + """ + if not responses: + return "Xin lỗi, không thể xử lý câu hỏi này." + + # Build combined response + combined = [] + + # Add intro + if len(responses) > 1: + combined.append("Để giải đáp câu hỏi của bạn, tôi sẽ tư vấn từ nhiều góc độ:\n") + + # Add each agent's response with clear sections + agent_labels = { + "nutrition_agent": "📊 Dinh Dưỡng", + "exercise_agent": "💪 Tập Luyện", + "mental_health_agent": "🧠 Sức Khỏe Tinh Thần", + "symptom_agent": "🩺 Đánh Giá Triệu Chứng", + "general_health_agent": "🏥 Tổng Quan" + } + + for agent_name in agent_names: + if agent_name in responses: + label = agent_labels.get(agent_name, agent_name) + response = responses[agent_name] + + # Clean up response (remove redundant intro) + response = self._clean_response(response) + + combined.append(f"\n{label}:\n{response}\n") + + # Add conclusion + if len(responses) > 1: + combined.append("\n---\n") + combined.append("💡 Lưu ý: Để đạt kết quả tốt nhất, hãy kết hợp cả dinh dưỡng, tập luyện và nghỉ ngơi hợp lý.") + + return "".join(combined) + + def _clean_response(self, response: str) -> str: + """Clean up response by removing redundant intros""" + # Remove common intro phrases + intros_to_remove = [ + "Chào bạn!", + "Xin chào!", + "Để giải đáp câu hỏi của bạn", + "Mình sẽ giúp bạn", + ] + + for intro in intros_to_remove: + if response.startswith(intro): + response = response[len(intro):].strip() + + return response + + +# Example usage +if __name__ == "__main__": + # Mock agents for testing + class MockAgent: + def __init__(self, name): + self.name = name + + def handle(self, parameters, chat_history=None): + query = parameters.get("user_query", "") + return f"[{self.name}] Response to: {query}" + + agents = { + "nutrition_agent": MockAgent("Nutrition"), + "exercise_agent": MockAgent("Exercise"), + "mental_health_agent": MockAgent("Mental Health") + } + + orchestrator = MultiAgentOrchestrator(agents) + + # Test multi-agent + query = "Tôi muốn giảm cân, nên ăn gì và tập gì?" + agent_names = ["nutrition_agent", "exercise_agent"] + + response = orchestrator.orchestrate(query, agent_names) + print(response) diff --git a/agents/core/response_validator.py b/agents/core/response_validator.py new file mode 100644 index 0000000000000000000000000000000000000000..56ad95ec0738201913bd3357d2cf499feea8b242 --- /dev/null +++ b/agents/core/response_validator.py @@ -0,0 +1,182 @@ +""" +Response Validator - Ensures LLM responses follow quality standards +Shared validation logic for all agents +""" + +from typing import Dict, List, Tuple + +class ResponseValidator: + """ + Base validator with common rules + agent-specific rules + """ + + # Common bad phrases across all agents + COMMON_BAD_PHRASES = [ + "Dựa trên thông tin bạn cung cấp", + "Dựa vào thông tin", + "Theo thông tin bạn đưa ra", + "Từ thông tin trên", + "Với tư cách", + "Tôi là chuyên gia" + ] + + @staticmethod + def validate_common(response: str) -> Tuple[bool, List[str]]: + """ + Common validation rules for all agents + Returns: (is_valid, list_of_issues) + """ + issues = [] + + # Check for formal phrases + for phrase in ResponseValidator.COMMON_BAD_PHRASES: + if phrase.lower() in response.lower(): + issues.append(f"Formal phrase: '{phrase}'") + break + + # Check for excessive length (>500 words) + word_count = len(response.split()) + if word_count > 500: + issues.append(f"Too long: {word_count} words (max 500)") + + # Check for empty response + if len(response.strip()) < 10: + issues.append("Response too short or empty") + + return len(issues) == 0, issues + + @staticmethod + def validate_symptom_response(response: str, context: Dict) -> Tuple[bool, List[str]]: + """ + Symptom-specific validation + """ + issues = [] + stage = context.get('conversation_stage', 0) + + # Assessment phase: should ask, not advise + if stage <= 1: + advice_indicators = [ + "khuyến nghị", "nên", "hãy", "bạn thử", + "giải pháp", "cách xử lý" + ] + has_advice = any(ind in response.lower() for ind in advice_indicators) + has_question = '?' in response + + if has_advice and not has_question: + issues.append("Đưa lời khuyên quá sớm (assessment phase)") + + # Check if both asking and advising + if '?' in response: + advice_count = sum(1 for ind in ["khuyến nghị", "nên", "hãy thử"] + if ind in response.lower()) + if advice_count >= 2: + issues.append("Vừa hỏi vừa khuyên") + + return len(issues) == 0, issues + + @staticmethod + def validate_nutrition_response(response: str, context: Dict, chat_history: List) -> Tuple[bool, List[str]]: + """ + Nutrition-specific validation + """ + issues = [] + + # Check if asking for info already provided + if chat_history: + all_user_text = " ".join([msg[0].lower() for msg in chat_history if msg[0]]) + + # Check if asking for age when already provided + if "tuổi" in all_user_text or "năm" in all_user_text: + if "bao nhiêu tuổi" in response.lower() or "tuổi của bạn" in response.lower(): + issues.append("Hỏi lại tuổi đã được cung cấp") + + # Check if asking for weight when already provided + if "kg" in all_user_text or "cân nặng" in all_user_text: + if "cân nặng" in response.lower() and "?" in response: + issues.append("Hỏi lại cân nặng đã được cung cấp") + + # Check for too theoretical (should be practical) + theory_indicators = ["lý thuyết", "nghiên cứu cho thấy", "theo khoa học"] + if any(ind in response.lower() for ind in theory_indicators): + practical_indicators = ["bạn thử", "có thể", "ví dụ", "thực đơn"] + if not any(ind in response.lower() for ind in practical_indicators): + issues.append("Quá lý thuyết, thiếu practical advice") + + return len(issues) == 0, issues + + @staticmethod + def validate_exercise_response(response: str, context: Dict) -> Tuple[bool, List[str]]: + """ + Exercise-specific validation + """ + issues = [] + + # Check if workout plan is too generic + if "lịch tập" in response.lower() or "kế hoạch" in response.lower(): + # Should have specific days or progression + has_specifics = any(word in response.lower() for word in [ + "thứ", "ngày", "tuần 1", "tuần 2", "tháng" + ]) + if not has_specifics: + issues.append("Lịch tập quá generic, thiếu chi tiết") + + # Check for progression + if "tập" in response.lower(): + has_progression = any(word in response.lower() for word in [ + "tăng dần", "progression", "tuần 1", "giai đoạn" + ]) + if not has_progression and len(response) > 200: + issues.append("Thiếu hướng dẫn progression") + + return len(issues) == 0, issues + + @staticmethod + def validate_mental_health_response(response: str, context: Dict) -> Tuple[bool, List[str]]: + """ + Mental health-specific validation + """ + issues = [] + + # Should have empathy/validation + empathy_indicators = [ + "cảm giác", "hiểu", "bình thường", "nhiều người", + "không phải lỗi của bạn" + ] + + if len(response) > 100: # Only check for longer responses + has_empathy = any(ind in response.lower() for ind in empathy_indicators) + if not has_empathy: + issues.append("Thiếu empathy/validation") + + # Check for too clinical + clinical_indicators = ["chẩn đoán", "bệnh", "rối loạn"] + if any(ind in response.lower() for ind in clinical_indicators): + if "không phải bác sĩ" not in response.lower(): + issues.append("Quá clinical, cần disclaimer") + + return len(issues) == 0, issues + + @staticmethod + def validate_response(response: str, agent_type: str, context: Dict, chat_history: List = None) -> Tuple[bool, List[str]]: + """ + Main validation method - routes to appropriate validator + """ + # Common validation first + is_valid_common, common_issues = ResponseValidator.validate_common(response) + + # Agent-specific validation + agent_issues = [] + if agent_type == 'symptom': + _, agent_issues = ResponseValidator.validate_symptom_response(response, context) + elif agent_type == 'nutrition': + _, agent_issues = ResponseValidator.validate_nutrition_response(response, context, chat_history or []) + elif agent_type == 'exercise': + _, agent_issues = ResponseValidator.validate_exercise_response(response, context) + elif agent_type == 'mental_health': + _, agent_issues = ResponseValidator.validate_mental_health_response(response, context) + + # Combine all issues + all_issues = common_issues + agent_issues + is_valid = len(all_issues) == 0 + + return is_valid, all_issues diff --git a/agents/core/router.py b/agents/core/router.py new file mode 100644 index 0000000000000000000000000000000000000000..e952d3b95fdd9437abaaf8bba2dcc918f247bd50 --- /dev/null +++ b/agents/core/router.py @@ -0,0 +1,657 @@ +""" +Agent Router - Routes user requests to appropriate specialized agents + +Supports two routing strategies: +1. Embedding-based routing (primary) - Automatic, scalable +2. LLM-based routing (fallback) - Manual, explicit +""" + +from config.settings import client, MODEL +from typing import List, Dict, Tuple, Optional +import numpy as np + +# Try to import embedding model (optional) +try: + from sentence_transformers import SentenceTransformer + from sklearn.metrics.pairwise import cosine_similarity + EMBEDDINGS_AVAILABLE = True +except ImportError: + EMBEDDINGS_AVAILABLE = False + print("[WARNING] sentence-transformers not installed. Using LLM-based routing only.") + print("Install with: pip install sentence-transformers scikit-learn") + +# Define available functions/agents +AVAILABLE_FUNCTIONS = [ + { + "name": "nutrition_agent", + "description": """Tư vấn dinh dưỡng và chế độ ăn uống: + - Tính BMI, calo, macro (protein/carb/fat) + - Lập thực đơn, meal plan + - Tư vấn thực phẩm nên ăn/tránh + - Giảm cân, tăng cân, tăng cơ + - Bổ sung dinh dưỡng, vitamin + + KHÔNG dùng cho: triệu chứng bệnh (đau bụng, buồn nôn, tiêu chảy) + → Triệu chứng bệnh → dùng symptom_agent""", + "parameters": { + "type": "object", + "properties": { + "user_query": { + "type": "string", + "description": "Câu hỏi của người dùng về dinh dưỡng" + }, + "user_data": { + "type": "object", + "description": "Thông tin người dùng (tuổi, giới tính, cân nặng, chiều cao, mục tiêu)", + "properties": { + "age": {"type": "integer"}, + "gender": {"type": "string"}, + "weight": {"type": "number"}, + "height": {"type": "number"}, + "goal": {"type": "string"} + } + } + }, + "required": ["user_query"] + } + }, + { + "name": "exercise_agent", + "description": "Tư vấn tập luyện, lịch tập gym, bài tập thể dục, kế hoạch tập luyện, yoga, cardio", + "parameters": { + "type": "object", + "properties": { + "user_query": { + "type": "string", + "description": "Câu hỏi của người dùng về tập luyện" + }, + "user_data": { + "type": "object", + "description": "Thông tin người dùng (tuổi, giới tính, thể lực, mục tiêu, thời gian)", + "properties": { + "age": {"type": "integer"}, + "gender": {"type": "string"}, + "fitness_level": {"type": "string"}, + "goal": {"type": "string"}, + "available_time": {"type": "integer"} + } + } + }, + "required": ["user_query"] + } + }, + { + "name": "symptom_agent", + "description": """CLINICAL SYMPTOM ASSESSMENT - Đánh giá triệu chứng bệnh CỤ THỂ: + + ✅ USE FOR (Specific symptoms): + - Pain: đau đầu, đau bụng, đau lưng, đau ngực, đau khớp + - Fever/Infection: sốt, ho, cảm cúm, viêm họng, viêm phổi + - Digestive: buồn nôn, nôn, tiêu chảy, táo bón, đầy hơi + - Neurological: chóng mặt, đau nửa đầu, mất thăng bằng + - Acute symptoms: triệu chứng đột ngột, bất thường, nghiêm trọng + + ✅ WHEN TO USE: + - User describes SPECIFIC symptom: "Tôi bị đau bụng" + - User feels sick/unwell: "Tôi không khỏe", "Tôi bị ốm" + - Medical concern: "Tôi sợ bị bệnh X" + + ❌ DO NOT USE FOR: + - General wellness: "Làm sao để khỏe?" → general_health_agent + - Prevention: "Phòng ngừa bệnh" → general_health_agent + - Lifestyle: "Sống khỏe mạnh" → general_health_agent + - Nutrition: "Nên ăn gì?" → nutrition_agent + - Exercise: "Tập gì?" → exercise_agent""", + "parameters": { + "type": "object", + "properties": { + "user_query": { + "type": "string", + "description": "Mô tả triệu chứng của người dùng" + }, + "symptom_data": { + "type": "object", + "description": "Thông tin triệu chứng (onset, location, severity, duration)", + "properties": { + "symptom_type": {"type": "string"}, + "duration": {"type": "string"}, + "severity": {"type": "integer"}, + "location": {"type": "string"} + } + } + }, + "required": ["user_query"] + } + }, + { + "name": "mental_health_agent", + "description": "Tư vấn sức khỏe tinh thần, stress, lo âu, trầm cảm, burnout, giấc ngủ, cảm xúc", + "parameters": { + "type": "object", + "properties": { + "user_query": { + "type": "string", + "description": "Câu hỏi về sức khỏe tinh thần" + }, + "context": { + "type": "object", + "description": "Ngữ cảnh (công việc, gia đình, stress level)", + "properties": { + "stress_level": {"type": "string"}, + "duration": {"type": "string"}, + "triggers": {"type": "array", "items": {"type": "string"}} + } + } + }, + "required": ["user_query"] + } + }, + { + "name": "general_health_agent", + "description": """GENERAL WELLNESS & LIFESTYLE - Tư vấn sức khỏe TỔNG QUÁT: + + ✅ USE FOR (General health & wellness): + - Wellness: "Làm sao để khỏe mạnh?", "Sống khỏe" + - Prevention: "Phòng ngừa bệnh", "Tăng sức đề kháng" + - Lifestyle: "Lối sống lành mạnh", "Thói quen tốt" + - General advice: "Tư vấn sức khỏe", "Chăm sóc sức khỏe" + - Health education: "Kiến thức sức khỏe", "Hiểu về cơ thể" + - Check-ups: "Khám sức khỏe định kỳ", "Xét nghiệm gì?" + + ✅ WHEN TO USE: + - Broad health questions: "Tôi muốn khỏe hơn" + - No specific symptom: "Tư vấn sức khỏe tổng quát" + - Prevention focus: "Làm gì để không bị ốm?" + - Lifestyle optimization: "Cải thiện sức khỏe" + + ❌ DO NOT USE FOR: + - Specific symptoms: "Tôi bị đau bụng" → symptom_agent + - Nutrition details: "Lập thực đơn" → nutrition_agent + - Exercise plans: "Lịch tập gym" → exercise_agent + - Mental health: "Stress, lo âu" → mental_health_agent""", + "parameters": { + "type": "object", + "properties": { + "user_query": { + "type": "string", + "description": "Câu hỏi chung về sức khỏe" + } + }, + "required": ["user_query"] + } + } +] + +def route_to_agent(message, chat_history=None): + """ + Route user message to appropriate specialized agent using function calling + + Args: + message (str): User's message + chat_history (list): Conversation history for context + + Returns: + dict: { + "agent": str, # Agent name + "parameters": dict, # Extracted parameters + "confidence": float # Routing confidence (0-1) + } + """ + + # Build context from chat history (increased from 3 to 10 for better context) + context = "" + last_agent = None + + if chat_history: + recent_messages = chat_history[-10:] # Last 10 exchanges (was 3) + + # Extract last agent from bot response + if recent_messages: + last_bot_msg = recent_messages[-1][1] if len(recent_messages[-1]) > 1 else "" + # Try to detect agent from debug info + if "Agent used:" in last_bot_msg: + import re + match = re.search(r'Agent used: `(\w+)`', last_bot_msg) + if match: + last_agent = match.group(1) + + # Build context with turn numbers for clarity + context_lines = [] + for i, (user_msg, bot_msg) in enumerate(recent_messages, 1): + # Truncate long messages + user_short = user_msg[:80] + "..." if len(user_msg) > 80 else user_msg + bot_short = bot_msg[:80] + "..." if len(bot_msg) > 80 else bot_msg + context_lines.append(f"Turn {i}:\n User: {user_short}\n Bot: {bot_short}") + + context = "\n".join(context_lines) + + # Create enhanced routing prompt with context awareness + routing_prompt = f"""Phân tích câu hỏi của người dùng và xác định agent phù hợp nhất. + +LỊCH SỬ HỘI THOẠI (10 exchanges gần nhất): +{context if context else "Đây là câu hỏi đầu tiên"} + +AGENT TRƯỚC ĐÓ: {last_agent if last_agent else "Chưa có"} + +CÂU HỎI HIỆN TẠI: {message} + +HƯỚNG DẪN QUAN TRỌNG: + +1. **TRIỆU CHỨNG BỆNH CỤ THỂ → symptom_agent (ưu tiên cao nhất)** + - User MÔ TẢ triệu chứng CỤ THỂ: "tôi bị đau...", "tôi bị sốt", "buồn nôn" + - Ví dụ: "đau bụng", "đau đầu", "sốt cao", "ho ra máu", "chóng mặt" + - LUÔN ưu tiên symptom_agent khi có triệu chứng CỤ THỂ! + + ⚠️ EDGE CASES - KHÔNG PHẢI symptom_agent: + - "Làm sao để KHÔNG bị đau đầu?" → general_health_agent (phòng ngừa) + - "Ăn gì để hết đau bụng?" → nutrition_agent (dinh dưỡng) + - "Tập gì để hết đau lưng?" → exercise_agent (tập luyện) + - "Làm sao để khỏe?" → general_health_agent (tổng quát) + +2. **DINH DƯỠNG → nutrition_agent** + - Hỏi về thực phẩm, chế độ ăn, calo, BMI, thực đơn + - KHÔNG phải triệu chứng bệnh + - Ví dụ: "nên ăn gì", "giảm cân", "thực đơn" + +3. **TẬP LUYỆN → exercise_agent** + - Hỏi về bài tập, lịch tập, gym, cardio, dụng cụ tập + - Follow-up về giáo án tập: "không có dụng cụ", "tập tại nhà", "không có tạ" + - Ví dụ: "nên tập gì", "lịch tập gym", "không có dụng cụ gym" + - **QUAN TRỌNG:** Nếu đang nói về tập luyện → TIẾP TỤC exercise_agent + +4. **SỨC KHỎE TINH THẦN → mental_health_agent** + - Stress, lo âu, trầm cảm, burnout, giấc ngủ + - Ví dụ: "tôi stress", "lo âu", "mất ngủ" + +5. **SỨC KHỎE TỔNG QUÁT → general_health_agent** + - Câu hỏi CHUNG về sức khỏe, wellness, lifestyle + - Phòng ngừa, tăng cường sức khỏe + - Ví dụ: "làm sao để khỏe?", "phòng bệnh", "sống khỏe" + + ⚠️ EDGE CASES - Phân biệt với symptom_agent: + - "Tôi BỊ đau bụng" → symptom_agent (có triệu chứng) + - "Làm sao để KHÔNG bị đau bụng?" → general_health_agent (phòng ngừa) + - "Tôi không khỏe" (mơ hồ) → general_health_agent (chung chung) + - "Tôi bị sốt cao" → symptom_agent (triệu chứng cụ thể) + +VÍ DỤ ROUTING (Bao gồm edge cases): + +**Symptom Agent (có triệu chứng CỤ THỂ):** +✅ "Tôi bị đau bụng" → symptom_agent +✅ "Đau đầu từ sáng" → symptom_agent +✅ "Buồn nôn, muốn làm sao cho hết" → symptom_agent +✅ "Tôi bị sốt cao 39 độ" → symptom_agent + +**General Health Agent (phòng ngừa, tổng quát):** +✅ "Làm sao để khỏe mạnh?" → general_health_agent +✅ "Phòng ngừa đau đầu" → general_health_agent (phòng ngừa!) +✅ "Tôi muốn sống khỏe hơn" → general_health_agent +✅ "Tư vấn sức khỏe tổng quát" → general_health_agent + +**Nutrition Agent (dinh dưỡng):** +✅ "Tôi muốn giảm cân" → nutrition_agent +✅ "Nên ăn gì để khỏe?" → nutrition_agent +✅ "Ăn gì để hết đau bụng?" → nutrition_agent (dinh dưỡng!) + +**Exercise Agent (tập luyện):** +✅ "Tôi nên tập gì?" → exercise_agent +✅ "Tập gì để hết đau lưng?" → exercise_agent (tập luyện!) +✅ "Không có dụng cụ gym thì sao?" (context: tập) → exercise_agent + +**Mental Health Agent:** +✅ "Tôi stress quá" → mental_health_agent + +**QUAN TRỌNG - CONTEXT AWARENESS:** +- Nếu last_agent = "exercise_agent" và câu hỏi về "dụng cụ", "tạ", "gym", "tại nhà" + → TIẾP TỤC exercise_agent (đây là follow-up!) +- Nếu last_agent = "nutrition_agent" và câu hỏi về "món ăn", "thực đơn", "calo" + → TIẾP TỤC nutrition_agent (đây là follow-up!) + +Hãy chọn agent phù hợp nhất dựa trên CẢ câu hỏi hiện tại VÀ ngữ cảnh hội thoại.""" + + try: + response = client.chat.completions.create( + model=MODEL, + messages=[ + { + "role": "system", + "content": """Bạn là hệ thống định tuyến thông minh với khả năng HIỂU NGỮ CẢNH hội thoại. + +NHIỆM VỤ: Phân tích câu hỏi trong NGỮCẢNH cuộc hội thoại và chọn agent phù hợp. + +KỸ NĂNG QUAN TRỌNG: +1. Nhận biết câu hỏi follow-up (vậy, còn, thì sao, nữa) +2. Hiểu context từ lịch sử hội thoại +3. Phát hiện topic switching (chuyển đề rõ ràng) +4. Xử lý câu hỏi mơ hồ bằng cách xem context + +NGUYÊN TẮC: +- Câu hỏi RÕ RÀNG → agent trực tiếp +- Câu hỏi MƠ HỒ → xem lịch sử, last agent +- Follow-up question → có thể tiếp tục agent cũ +- Topic switch rõ ràng → agent mới""" + }, + { + "role": "user", + "content": routing_prompt + } + ], + functions=AVAILABLE_FUNCTIONS, + function_call="auto", + temperature=0.3 # Lower temperature for more consistent routing + ) + + # Check if function was called + if response.choices[0].message.function_call: + function_call = response.choices[0].message.function_call + + import json + parameters = json.loads(function_call.arguments) + + return { + "agent": function_call.name, + "parameters": parameters, + "confidence": 0.9, # High confidence when function is called + "raw_response": response + } + else: + # No function called, default to general health agent + return { + "agent": "general_health_agent", + "parameters": {"user_query": message}, + "confidence": 0.5, + "raw_response": response + } + + except Exception as e: + print(f"Routing error: {e}") + # Fallback to general health agent + return { + "agent": "general_health_agent", + "parameters": {"user_query": message}, + "confidence": 0.3, + "error": str(e) + } + +def get_agent_description(agent_name): + """Get description of an agent""" + for func in AVAILABLE_FUNCTIONS: + if func["name"] == agent_name: + return func["description"] + return "Unknown agent" + + +# ============================================================ +# Embedding-Based Router (New, Scalable Approach) +# ============================================================ + +class EmbeddingRouter: + """ + Embedding-based router that automatically matches queries to agents + without manual rules. More scalable than LLM-based routing. + """ + + def __init__(self, use_embeddings=True): + """ + Initialize router + + Args: + use_embeddings: If False, falls back to LLM-based routing + """ + self.use_embeddings = use_embeddings and EMBEDDINGS_AVAILABLE + + if self.use_embeddings: + # Load embedding model + self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') + + # Agent descriptions for embedding matching + self.agent_descriptions = { + "symptom_agent": """ + Đánh giá triệu chứng bệnh khi BỊ ĐAU hoặc KHÔNG KHỎE: + đau đầu, đau bụng, đau lưng, sốt, ho, buồn nôn, chóng mặt, + mệt mỏi bất thường, khó thở, đau ngực, bị bệnh, cảm thấy đau, + đau nhức, triệu chứng bệnh lý, không khỏe, ốm, bệnh, + đang bị gì, bị gì vậy, triệu chứng gì + """, + "nutrition_agent": """ + Tư vấn dinh dưỡng, ăn uống healthy, chế độ ăn: + giảm cân, tăng cân, giảm mỡ, muốn gầy, muốn béo, + ăn gì để giảm cân, ăn gì để tăng cân, calo, BMI, + thực đơn, chế độ ăn kiêng, thực phẩm, protein, carb, fat, + vitamin, khoáng chất, dinh dưỡng lành mạnh, ăn uống khoa học, + setup plan ăn uống, kế hoạch dinh dưỡng, healthy eating, + ăn healthy, ăn sạch, clean eating + """, + "exercise_agent": """ + Tập luyện, gym, workout, fitness, thể hình: + tập luyện, luyện tập, gym, cardio, bài tập, lịch tập, + dụng cụ tập, tạ, thanh đòn, tập tại nhà, không có dụng cụ, + squat, push-up, plank, chạy bộ, yoga, thể dục, thể hình, + rèn luyện cơ thể, tăng cơ, giảm mỡ, build muscle, lose fat, + setup plan tập luyện, kế hoạch tập luyện, lịch tập 7 ngày, + tập gym, tập thể hình, workout plan, fitness plan + """, + "mental_health_agent": """ + Sức khỏe tinh thần, stress, lo âu, trầm cảm, burnout, + mất ngủ, giấc ngủ, căng thẳng, áp lực, tâm lý, cảm xúc, + buồn bã, mệt mỏi tinh thần + """, + "general_health_agent": """ + Câu hỏi chung về sức khỏe, lời khuyên sức khỏe, + phòng bệnh, chăm sóc sức khỏe, kiểm tra sức khỏe + """ + } + + # Pre-compute agent embeddings + print("[INFO] Pre-computing agent embeddings...") + self.agent_embeddings = { + agent: self.embedder.encode(desc) + for agent, desc in self.agent_descriptions.items() + } + print("[INFO] Embedding router ready!") + else: + print("[INFO] Using LLM-based routing (embeddings not available)") + + def route(self, message: str, chat_history: List[Tuple[str, str]] = None) -> Dict: + """ + Route message to appropriate agent + + Args: + message: User message + chat_history: Conversation history + + Returns: + { + "agent": agent_name, + "parameters": {...}, + "confidence": float, + "method": "embedding" or "llm" + } + """ + if self.use_embeddings: + return self._route_with_embeddings(message, chat_history) + else: + # Fallback to LLM-based routing + return route_to_agent(message, chat_history) + + def _route_with_embeddings(self, message: str, chat_history: List[Tuple[str, str]]) -> Dict: + """Route using embedding similarity with topic change detection""" + # Embed query + query_embedding = self.embedder.encode(message) + + # Calculate similarity with each agent + similarities = {} + for agent, agent_embedding in self.agent_embeddings.items(): + similarity = cosine_similarity( + query_embedding.reshape(1, -1), + agent_embedding.reshape(1, -1) + )[0][0] + similarities[agent] = similarity + + # Detect topic change vs follow-up + is_topic_change = self._detect_topic_change(message, chat_history) + + # Context boost ONLY for genuine follow-ups (NOT topic changes) + if not is_topic_change and chat_history and len(chat_history) > 0: + # Determine CURRENT context from recent conversation (not just last message) + current_context_agent = self._get_current_context_agent(chat_history) + + # Simple heuristic: if query is short and has follow-up indicators + if len(message.split()) < 10 and any(word in message.lower() for word in ["thì sao", "còn", "nữa", "thế", "vậy", "không", "khác", "nếu"]): + # Boost ONLY the current context agent (not all agents) + if current_context_agent and current_context_agent in similarities: + similarities[current_context_agent] += 0.15 + print(f"[ROUTER] Boosting current context agent: {current_context_agent}") + + # Get best agent + best_agent = max(similarities, key=similarities.get) + confidence = float(similarities[best_agent]) + + # Debug logging (disabled for cleaner output) + # print(f"\n[ROUTER DEBUG] Message: {message[:50]}...") + # print(f"[ROUTER DEBUG] Topic change detected: {is_topic_change}") + # print(f"[ROUTER DEBUG] Similarities:") + # for agent, score in sorted(similarities.items(), key=lambda x: x[1], reverse=True): + # print(f" - {agent}: {score:.4f}") + # print(f"[ROUTER DEBUG] Selected: {best_agent} (confidence: {confidence:.4f})\n") + + return { + "agent": best_agent, + "parameters": {"user_query": message}, + "confidence": confidence, + "method": "embedding", + "all_scores": {k: float(v) for k, v in similarities.items()}, + "topic_change": is_topic_change + } + + def _get_current_context_agent(self, chat_history: List[Tuple[str, str]]) -> Optional[str]: + """ + Determine which agent is handling the CURRENT context + by analyzing recent conversation (last 3-5 turns) + + Returns: + Agent name that's currently active, or None + """ + if not chat_history or len(chat_history) == 0: + return None + + # Check last 3-5 turns for dominant domain + recent_turns = chat_history[-5:] if len(chat_history) >= 5 else chat_history + + domain_keywords = { + 'nutrition_agent': ['ăn', 'dinh dưỡng', 'thực đơn', 'calo', 'bmi', 'giảm cân', 'tăng cân', 'protein', 'carb', 'meal', 'bữa'], + 'exercise_agent': ['tập', 'luyện', 'gym', 'cardio', 'yoga', 'vận động', 'tăng cơ', 'giảm mỡ', 'workout', 'bài tập'], + 'symptom_agent': ['đau', 'bệnh', 'triệu chứng', 'khó chịu', 'buồn nôn', 'sốt', 'ốm'], + 'mental_health_agent': ['stress', 'lo âu', 'mất ngủ', 'trầm cảm', 'tâm lý'] + } + + # Count domain occurrences in recent turns + domain_counts = {agent: 0 for agent in domain_keywords.keys()} + + for user_msg, bot_msg in recent_turns: + combined = (user_msg + " " + bot_msg).lower() + for agent, keywords in domain_keywords.items(): + for keyword in keywords: + if keyword in combined: + domain_counts[agent] += 1 + break # Count once per turn + + # Return agent with highest count (if significant) + if domain_counts: + max_agent = max(domain_counts, key=domain_counts.get) + max_count = domain_counts[max_agent] + + # Need at least 2 occurrences in recent turns to be considered "current context" + if max_count >= 2: + return max_agent + + return None + + def _detect_topic_change(self, message: str, chat_history: List[Tuple[str, str]]) -> bool: + """ + Detect if user is changing topics vs following up + + Topic change indicators: + - Explicit new requests: "tôi muốn", "giúp tôi", "tư vấn về" + - Different domain keywords: nutrition → exercise, symptom → nutrition + - Long, detailed new questions + + Returns: + bool: True if topic change, False if follow-up + """ + msg_lower = message.lower() + + # Strong topic change indicators + topic_change_phrases = [ + 'tôi muốn', 'tôi cần', 'giúp tôi', 'tư vấn về', 'cho tôi', + 'bây giờ', 'còn về', 'chuyển sang', 'ngoài ra', + 'setup', 'tạo plan', 'lập kế hoạch' + ] + + if any(phrase in msg_lower for phrase in topic_change_phrases): + # Likely a new request + return True + + # Check for domain-specific keywords that indicate topic change + domain_keywords = { + 'nutrition': ['ăn', 'dinh dưỡng', 'thực đơn', 'calo', 'bmi', 'giảm cân', 'tăng cân'], + 'exercise': ['tập', 'luyện', 'gym', 'cardio', 'yoga', 'vận động', 'tăng cơ', 'giảm mỡ'], + 'symptom': ['đau', 'bệnh', 'triệu chứng', 'khó chịu', 'buồn nôn'], + 'mental': ['stress', 'lo âu', 'mất ngủ', 'trầm cảm', 'tâm lý'] + } + + # Detect current message domain + current_domains = [] + for domain, keywords in domain_keywords.items(): + if any(kw in msg_lower for kw in keywords): + current_domains.append(domain) + + # If no chat history, it's a new topic + if not chat_history or len(chat_history) == 0: + return True + + # Check last few messages for domain + recent_messages = chat_history[-3:] if len(chat_history) >= 3 else chat_history + previous_domains = [] + + for user_msg, bot_msg in recent_messages: + combined = (user_msg + " " + bot_msg).lower() + for domain, keywords in domain_keywords.items(): + if any(kw in combined for kw in keywords): + if domain not in previous_domains: + previous_domains.append(domain) + + # If current domain is different from previous, it's a topic change + if current_domains and previous_domains: + # Check if there's overlap + overlap = set(current_domains) & set(previous_domains) + if not overlap: + # No overlap = topic change + return True + + # Long messages (>15 words) with new content are likely topic changes + if len(message.split()) > 15: + # Check if it's not just elaborating on previous topic + follow_up_words = ['vì', 'do', 'bởi vì', 'là do', 'nghĩa là'] + if not any(word in msg_lower for word in follow_up_words): + return True + + # Default: assume follow-up + return False + + +# Global router instance (lazy initialization) +_router_instance = None + +def get_router(use_embeddings=True, force_reload=False) -> EmbeddingRouter: + """ + Get global router instance + + Args: + use_embeddings: Use embedding-based routing + force_reload: Force reload router (useful after updating agent descriptions) + """ + global _router_instance + if _router_instance is None or force_reload: + _router_instance = EmbeddingRouter(use_embeddings=use_embeddings) + return _router_instance diff --git a/agents/core/unified_tone.py b/agents/core/unified_tone.py new file mode 100644 index 0000000000000000000000000000000000000000..7e671f7b971dd49a3ff32e7cec7364e16c94e1d0 --- /dev/null +++ b/agents/core/unified_tone.py @@ -0,0 +1,155 @@ +""" +Unified Tone System - Ensures consistent personality across all agents +Makes the AI feel like ONE knowledgeable assistant, not multiple people +""" + +class UnifiedTone: + """ + Provides consistent tone and personality across all agents + The AI is ONE medical professional with multiple specialties + """ + + # Base personality - applies to ALL agents + BASE_PERSONALITY = """ +Bạn là một trợ lý sức khỏe AI thông minh và đa năng. + +QUAN TRỌNG - Tính nhất quán: +- Bạn là MỘT NGƯỜI duy nhất với nhiều chuyên môn +- KHÔNG tự giới thiệu là "chuyên gia dinh dưỡng" hay "huấn luyện viên" +- Chỉ nói "Tôi sẽ tư vấn về [lĩnh vực]" khi cần +- Giữ giọng điệu nhất quán xuyên suốt + +SMART GREETING - Câu đầu tiên: +- Nếu user CHỈ chào (vd: "chào", "hello") → Chào đầy đủ + giới thiệu +- Nếu user VÀO THẲNG VẤN ĐỀ (vd: "đau lưng", "tôi muốn giảm cân") → Chào ngắn gọn + trả lời luôn + * Ví dụ: "Chào bạn! Để giúp bạn về vấn đề đau lưng..." + * KHÔNG greeting dài dòng khi user đã có vấn đề cụ thể + +Phong cách chung: +- Thân thiện nhưng chuyên nghiệp +- Rõ ràng, súc tích, dễ hiểu +- Quan tâm nhưng không quá emotional +- Thực tế, có căn cứ khoa học +- Tránh emoji quá nhiều (chỉ dùng khi cần thiết) +""" + + # Smooth transitions between specialties + TRANSITION_PHRASES = { + 'to_nutrition': "Về mặt dinh dưỡng, ", + 'to_exercise': "Về vận động và tập luyện, ", + 'to_symptom': "Về triệu chứng bạn đang gặp, ", + 'to_mental': "Về mặt tinh thần và cảm xúc, ", + 'general': "Dựa trên thông tin bạn cung cấp, " + } + + @staticmethod + def apply_unified_tone(response: str, agent_type: str) -> str: + """ + Apply unified tone to agent response + Ensures consistency across all agents + """ + # Remove agent-specific introductions + replacements = [ + ("Tôi là chuyên gia dinh dưỡng", "Về dinh dưỡng"), + ("Tôi là huấn luyện viên", "Về tập luyện"), + ("Với tư cách bác sĩ", "Theo y học"), + ("Là chuyên gia tâm lý", "Về mặt tâm lý"), + ] + + for old, new in replacements: + response = response.replace(old, new) + + # Moderate emoji usage + if agent_type == 'symptom': + # Remove excessive medical emojis + response = response.replace('🏥', '').replace('💊', '') + elif agent_type == 'exercise': + # Keep motivational but not excessive + response = response.replace('💪💪💪', '💪') + response = response.replace('🔥🔥🔥', '🔥') + + return response + + @staticmethod + def create_smooth_handoff(from_agent: str, to_agent: str, context: str) -> str: + """ + Create smooth transition between agent specialties + Makes it feel like one person switching topics, not different people + """ + transition = UnifiedTone.TRANSITION_PHRASES.get(f'to_{to_agent}', '') + + # Don't say "I'm handing you over to..." + # Instead, smoothly transition topics + handoff_message = f"{context}\n\n{transition}" + + return handoff_message + + @staticmethod + def check_information_before_asking(chat_history: list, field: str) -> bool: + """ + Check if information already exists in chat history + Prevents duplicate questions across agents + """ + import re + + # Patterns to check for each field + patterns = { + 'age': r'\d+\s*tuổi', + 'gender': r'(nam|nữ|male|female)', + 'weight': r'\d+\s*kg', + 'height': r'\d+\s*cm', + 'goal': r'(giảm cân|tăng cân|tăng cơ|khỏe mạnh)', + 'condition': r'(tiểu đường|huyết áp|tim mạch|dị ứng)' + } + + if field not in patterns: + return False + + # Check all messages in history + for user_msg, bot_msg in chat_history: + if user_msg and re.search(patterns[field], user_msg.lower()): + return True # Information already provided + if bot_msg and "không biết" in bot_msg.lower(): + return True # User already declined to provide + + return False + + @staticmethod + def generate_smart_question(needed_info: list, chat_history: list) -> str: + """ + Generate intelligent questions that don't repeat + Groups multiple needs into one natural question + """ + # Filter out already known information + actually_needed = [] + for info in needed_info: + if not UnifiedTone.check_information_before_asking(chat_history, info): + actually_needed.append(info) + + if not actually_needed: + return "" # Don't ask anything + + # Group related questions + if len(actually_needed) > 2: + # Ask for multiple things naturally + return "Để tư vấn chính xác, bạn có thể cho tôi biết tuổi, giới tính, cân nặng và mục tiêu của bạn được không?" + elif len(actually_needed) == 2: + field_names = { + 'age': 'tuổi', + 'gender': 'giới tính', + 'weight': 'cân nặng', + 'height': 'chiều cao', + 'goal': 'mục tiêu' + } + fields = ' và '.join([field_names.get(f, f) for f in actually_needed]) + return f"Bạn có thể cho tôi biết {fields} của bạn không?" + else: + # Single question + questions = { + 'age': "Bạn bao nhiêu tuổi?", + 'gender': "Giới tính của bạn là gì?", + 'weight': "Cân nặng hiện tại của bạn là bao nhiêu?", + 'height': "Chiều cao của bạn là bao nhiêu?", + 'goal': "Mục tiêu sức khỏe của bạn là gì?" + } + return questions.get(actually_needed[0], "") diff --git a/agents/specialized/__init__.py b/agents/specialized/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c313da244cdbef0d39a959a0acb8dc9a9e615fa6 --- /dev/null +++ b/agents/specialized/__init__.py @@ -0,0 +1,35 @@ +""" +Specialized agents package - Domain-specific healthcare agents +""" + +from .nutrition_agent import NutritionAgent +from .exercise_agent import ExerciseAgent +from .symptom_agent import SymptomAgent +from .mental_health_agent import MentalHealthAgent +from .general_health_agent import GeneralHealthAgent + +# Agent registry +AGENTS = { + "nutrition_agent": NutritionAgent, + "exercise_agent": ExerciseAgent, + "symptom_agent": SymptomAgent, + "mental_health_agent": MentalHealthAgent, + "general_health_agent": GeneralHealthAgent +} + +def get_agent(agent_name): + """Get agent instance by name""" + agent_class = AGENTS.get(agent_name) + if agent_class: + return agent_class() + return GeneralHealthAgent() # Default fallback + +__all__ = [ + 'NutritionAgent', + 'ExerciseAgent', + 'SymptomAgent', + 'MentalHealthAgent', + 'GeneralHealthAgent', + 'AGENTS', + 'get_agent' +] diff --git a/agents/specialized/exercise_agent.py b/agents/specialized/exercise_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..0589312db72a4d4e42672afea4172fc3bd915b20 --- /dev/null +++ b/agents/specialized/exercise_agent.py @@ -0,0 +1,413 @@ +""" +Exercise Agent - Specialized agent for exercise and fitness advice +""" + +from config.settings import client, MODEL +from modules.exercise.exercise import generate_exercise_plan +from health_data import HealthContext +from fitness_tracking import FitnessTracker +from rag.rag_integration import get_rag_integration +from agents.core.base_agent import BaseAgent +from typing import Dict, Any, List, Optional +from datetime import datetime +import re + +class ExerciseAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.health_context = None + self.fitness_tracker = None + self.rag = get_rag_integration() + + # Configure handoff triggers for exercise agent + self.handoff_triggers = { + 'nutrition_agent': ['ăn gì', 'thực đơn', 'calo', 'dinh dưỡng', 'giảm cân nhanh', 'tăng cân'], + 'symptom_agent': ['đau', 'chấn thương', 'bị thương', 'sưng', 'viêm'], + 'mental_health_agent': ['stress', 'lo âu', 'không có động lực', 'chán'], + 'general_health_agent': ['khám', 'bác sĩ', 'xét nghiệm'] + } + self.system_prompt = """Bạn là huấn luyện viên cá nhân chuyên nghiệp, nhiệt huyết và động viên. + +💪 CHUYÊN MÔN: +- Tạo kế hoạch tập luyện cá nhân hóa +- Tư vấn bài tập phù hợp với thể trạng, mục tiêu +- Hướng dẫn kỹ thuật tập an toàn +- Tư vấn tập cho người có bệnh nền +- Lịch tập gym, tập tại nhà, cardio, yoga... + +🎯 CÁCH TƯ VẤN: + +1. **KIỂM TRA THÔNG TIN TRƯỚC KHI HỎI:** + - ĐỌC KỸ chat history - user có thể đã cung cấp thông tin rồi! + - Nếu user đã nói "tôi 30 tuổi, nam, muốn giảm cân, có thể tập 45 phút/ngày" → ĐỪNG HỎI LẠI! + - Chỉ hỏi thông tin THỰC SỰ còn thiếu + - Nếu đã đủ thông tin cơ bản → TẠO LỊCH TẬP NGAY! + +2. **THÔNG TIN CẦN THIẾT:** + - Cơ bản: Tuổi, giới tính, mục tiêu, thời gian rảnh + - Bổ sung: Thể lực, dụng cụ có sẵn, bệnh nền + - Nếu thiếu → Hỏi ngắn gọn, không hỏi mãi + +3. **TẠO LỊCH TẬP:** + - Lịch tập cụ thể theo ngày + - Giải thích TẠI SAO tập bài này + - Hướng dẫn progression (tuần 1, 2, 3...) + - Lưu ý an toàn, tránh chấn thương + +⚠️ AN TOÀN: +- Người có bệnh tim, huyết áp → khuyên gặp bác sĩ trước +- Người có chấn thương → tập nhẹ, tránh vùng bị thương +- Người mới bắt đầu → từ từ, không quá sức + +💬 PHONG CÁCH: +- Động viên, khích lệ 💪🔥 +- Thực tế, không lý thuyết suông +- Dễ hiểu, dễ làm theo +- Hài hước nhẹ nhàng +- TỰ NHIÊN, MẠCH LẠC - không lặp lại ý, không copy-paste câu từ context khác +- Nếu hỏi thông tin → Hỏi NGẮN GỌN, TRỰC TIẾP +- KHÔNG dùng câu như "Bạn thử làm theo xem có đỡ không" (đây là câu của bác sĩ, không phải PT!)""" + + def set_health_context(self, health_context: HealthContext): + """Inject health context and initialize fitness tracker""" + self.health_context = health_context + self.fitness_tracker = FitnessTracker(health_context) + + def handle(self, parameters, chat_history=None): + """ + Handle exercise request + + Args: + parameters (dict): { + "user_query": str, + "user_data": dict (optional) + } + chat_history (list): Conversation history + + Returns: + str: Response message + """ + user_query = parameters.get("user_query", "") + user_data = parameters.get("user_data", {}) + + # Extract and save user info from current message immediately + self.extract_and_save_user_info(user_query) + + # Update memory from chat history + if chat_history: + self.update_memory_from_history(chat_history) + + # Check if we should hand off to another agent + if self.should_handoff(user_query, chat_history): + next_agent = self.suggest_next_agent(user_query) + if next_agent: + # Save current exercise data for next agent + self.save_agent_data('last_exercise_advice', { + 'query': user_query, + 'user_profile': self.get_user_profile(), + 'timestamp': datetime.now().isoformat() + }) + + # Check if nutrition agent shared data with us + nutrition_data = self.get_other_agent_data('nutrition_agent', 'nutrition_plan') + context = self._generate_exercise_summary(nutrition_data) + return self.create_handoff_message(next_agent, context, user_query) + + # Use health context if available + if self.health_context: + profile = self.health_context.get_user_profile() + user_data = { + 'age': profile.age, + 'gender': profile.gender, + 'weight': profile.weight, + 'height': profile.height, + 'fitness_level': profile.fitness_level, + 'activity_level': profile.activity_level, + 'health_conditions': profile.health_conditions + } + # Extract user data from chat history if not provided + elif not user_data and chat_history: + user_data = self._extract_user_data_from_history(chat_history) + # Save extracted data to shared memory for other agents + for key, value in user_data.items(): + if value is not None: + self.update_user_profile(key, value) + + # Check if we have enough data - check shared memory first + profile = self.get_user_profile() + for field in ['age', 'gender', 'weight', 'height']: + if not user_data.get(field) and profile.get(field): + user_data[field] = profile[field] + + missing_fields = self._check_missing_data(user_data) + + if missing_fields: + return self._ask_for_missing_data(missing_fields, user_data) + + # Generate exercise plan + try: + plan = generate_exercise_plan(user_data) + + # Adjust difficulty based on fitness tracker + if self.fitness_tracker: + metrics = self.fitness_tracker.calculate_progress_metrics() + if metrics.get('adherence', 0) > 0.8: + plan = self.fitness_tracker.adjust_difficulty(plan, 'increase') + elif metrics.get('adherence', 0) < 0.5: + plan = self.fitness_tracker.adjust_difficulty(plan, 'decrease') + + response = plan + + # Persist workout plan to health context + if self.health_context: + self.health_context.add_health_record('exercise', { + 'query': user_query, + 'plan': response, + 'user_data': user_data, + 'timestamp': datetime.now().isoformat() + }) + + return response + except Exception as e: + return self._handle_error(e, user_query) + + def _extract_user_data_from_history(self, chat_history): + """Extract user data from conversation history""" + user_data = { + 'age': None, + 'gender': None, + 'weight': None, + 'height': None, + 'fitness_level': 'beginner', + 'goal': 'health_improvement', + 'available_time': 30, + 'health_conditions': [] + } + + all_messages = " ".join([msg[0] for msg in chat_history if msg[0]]) + + # Extract age + age_match = re.search(r'(\d+)\s*tuổi|tuổi\s*(\d+)|tôi\s*(\d+)', all_messages.lower()) + if age_match: + user_data['age'] = int([g for g in age_match.groups() if g][0]) + + # Extract gender + if re.search(r'\bnam\b|male|đàn ông', all_messages.lower()): + user_data['gender'] = 'male' + elif re.search(r'\bnữ\b|female|đàn bà', all_messages.lower()): + user_data['gender'] = 'female' + + # Extract fitness level + if re.search(r'mới bắt đầu|beginner|chưa tập', all_messages.lower()): + user_data['fitness_level'] = 'beginner' + elif re.search(r'trung bình|intermediate|tập được', all_messages.lower()): + user_data['fitness_level'] = 'intermediate' + elif re.search(r'nâng cao|advanced|tập lâu', all_messages.lower()): + user_data['fitness_level'] = 'advanced' + + # Extract goal + if re.search(r'giảm cân|weight loss|slim', all_messages.lower()): + user_data['goal'] = 'weight_loss' + elif re.search(r'tăng cân|weight gain|bulk', all_messages.lower()): + user_data['goal'] = 'weight_gain' + elif re.search(r'tập gym|muscle|cơ bắp|tăng cơ', all_messages.lower()): + user_data['goal'] = 'muscle_building' + elif re.search(r'khỏe mạnh|health|sức khỏe', all_messages.lower()): + user_data['goal'] = 'health_improvement' + + # Extract available time + time_match = re.search(r'(\d+)\s*phút|(\d+)\s*tiếng', all_messages.lower()) + if time_match: + time_val = int([g for g in time_match.groups() if g][0]) + if 'tiếng' in all_messages.lower(): + time_val *= 60 + user_data['available_time'] = time_val + + return user_data + + def _check_missing_data(self, user_data): + """Check what data is missing""" + required = ['age', 'gender', 'fitness_level', 'goal'] + return [field for field in required if not user_data.get(field)] + + def _ask_for_missing_data(self, missing_fields, current_data): + """Ask for missing data""" + questions = { + 'age': "bạn bao nhiêu tuổi", + 'gender': "bạn là nam hay nữ", + 'fitness_level': "thể lực hiện tại của bạn thế nào (mới bắt đầu/trung bình/nâng cao)", + 'goal': "mục tiêu của bạn là gì (giảm cân/tăng cơ/khỏe mạnh hơn)" + } + + q_list = [questions[f] for f in missing_fields] + + if len(q_list) == 1: + question = q_list[0] + elif len(q_list) == 2: + question = f"{q_list[0]} và {q_list[1]}" + else: + question = ", ".join(q_list[:-1]) + f" và {q_list[-1]}" + + return f"""💪 **Để tạo lịch tập phù hợp, mình cần biết thêm:** + +Cho mình biết {question} nhé? + +💡 **Ví dụ:** "Tôi 30 tuổi, nam, mới bắt đầu tập, muốn giảm cân, có thể tập 45 phút mỗi ngày" + +Sau khi có đủ thông tin, mình sẽ tạo kế hoạch tập luyện 7 ngày chi tiết cho bạn! 🔥""" + + def _handle_general_exercise_query(self, user_query, chat_history): + """Handle general exercise questions using LLM + RAG""" + from config.settings import client, MODEL + + try: + # Smart RAG - only query when needed (inherit from BaseAgent) + rag_answer = '' + rag_sources = [] + + if self.should_use_rag(user_query, chat_history): + rag_result = self.rag.query_exercise(user_query) + rag_answer = rag_result.get('answer', '') + rag_sources = rag_result.get('source_docs', []) + + # Build conversation context with RAG context + rag_context = f"Dựa trên kiến thức từ cơ sở dữ liệu:\n{rag_answer}\n\n" if rag_answer else "" + + messages = [{"role": "system", "content": self.system_prompt}] + + # Add RAG context if available + if rag_context: + messages.append({"role": "system", "content": f"Thông tin tham khảo từ cơ sở dữ liệu:\n{rag_context}"}) + + # Add chat history (last 5 exchanges) + if chat_history: + recent_history = chat_history[-5:] if len(chat_history) > 5 else chat_history + for user_msg, bot_msg in recent_history: + if user_msg: + messages.append({"role": "user", "content": user_msg}) + if bot_msg: + messages.append({"role": "assistant", "content": bot_msg}) + + # Add current query + messages.append({"role": "user", "content": user_query}) + + # Get LLM response + response = client.chat.completions.create( + model=MODEL, + messages=messages, + temperature=0.7, + max_tokens=500 + ) + + llm_response = response.choices[0].message.content + + # Add sources using RAG integration formatter (FIXED!) + if rag_sources: + formatted_response = self.rag.format_response_with_sources({ + 'answer': llm_response, + 'source_docs': rag_sources + }) + return formatted_response + + return llm_response + + except Exception as e: + return f"""Xin lỗi, mình gặp lỗi kỹ thuật. Bạn có thể: +1. Thử lại câu hỏi +2. Hoặc hỏi mình về chủ đề sức khỏe khác nhé! 💙 + +Chi tiết lỗi: {str(e)}""" + + def should_handoff(self, user_query: str, chat_history: Optional[List] = None) -> bool: + """ + Override base method - Determine if should hand off to another agent + + Specific triggers for exercise agent: + - User asks about nutrition/diet + - User mentions pain/injury + - User asks about mental health + """ + query_lower = user_query.lower() + + # Check each agent's triggers + for agent, triggers in self.handoff_triggers.items(): + if any(trigger in query_lower for trigger in triggers): + # Don't handoff if we're in the middle of exercise planning + if chat_history and self._is_mid_planning(chat_history): + return False + return True + + return False + + def suggest_next_agent(self, user_query: str) -> Optional[str]: + """Override base method - Suggest which agent to hand off to""" + query_lower = user_query.lower() + + # Priority order for handoff + if any(trigger in query_lower for trigger in self.handoff_triggers.get('symptom_agent', [])): + return 'symptom_agent' + + if any(trigger in query_lower for trigger in self.handoff_triggers.get('nutrition_agent', [])): + return 'nutrition_agent' + + if any(trigger in query_lower for trigger in self.handoff_triggers.get('mental_health_agent', [])): + return 'mental_health_agent' + + if any(trigger in query_lower for trigger in self.handoff_triggers.get('general_health_agent', [])): + return 'general_health_agent' + + return None + + def _is_mid_planning(self, chat_history: List) -> bool: + """Check if we're in the middle of exercise planning""" + if not chat_history or len(chat_history) < 2: + return False + + # Check last bot response + last_bot_response = chat_history[-1][1] if len(chat_history[-1]) > 1 else "" + + # If we just asked for user data, don't handoff + if any(phrase in last_bot_response for phrase in [ + "tuổi", "giới tính", "mục tiêu", "thời gian", "dụng cụ" + ]): + return True + + return False + + def _generate_exercise_summary(self, nutrition_data=None) -> str: + """Generate summary of exercise advice for handoff""" + exercise_data = self.get_agent_data('exercise_plan') + user_profile = self.get_user_profile() + + # Natural summary without robotic prefix + summary_parts = [] + + if exercise_data and isinstance(exercise_data, dict): + if 'goal' in exercise_data: + summary_parts.append(f"Mục tiêu: {exercise_data['goal']}") + if 'frequency' in exercise_data: + summary_parts.append(f"Tần suất: {exercise_data['frequency']}") + + # Include nutrition data if available (agent-to-agent communication) + if nutrition_data and isinstance(nutrition_data, dict): + if 'daily_targets' in nutrition_data: + targets = nutrition_data['daily_targets'] + summary_parts.append(f"Calo: {targets.get('calories', 'N/A')} kcal/ngày") + + if user_profile and user_profile.get('fitness_level'): + summary_parts.append(f"Thể lực: {user_profile['fitness_level']}") + + return " | ".join(summary_parts)[:100] if summary_parts else "" + + def _handle_error(self, error, user_query): + """Handle errors gracefully""" + return f"""Xin lỗi, mình gặp chút vấn đề khi tạo lịch tập. 😅 + +Lỗi: {str(error)} + +Bạn có thể thử: +1. Cung cấp lại thông tin: tuổi, giới tính, thể lực, mục tiêu +2. Hỏi câu hỏi cụ thể hơn về tập luyện +3. Hoặc mình có thể tư vấn về chủ đề sức khỏe khác + +Bạn muốn thử lại không? 💙""" diff --git a/agents/specialized/general_health_agent.py b/agents/specialized/general_health_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..a90db58486d60297fd55201d791d8955c303a9a4 --- /dev/null +++ b/agents/specialized/general_health_agent.py @@ -0,0 +1,194 @@ +""" +General Health Agent - Handles general health queries and conversations +Uses the comprehensive system prompt from helpers.py +""" + +from config.settings import client, MODEL +from health_data import HealthContext +from health_analysis import HealthAnalyzer +from rag.rag_integration import get_rag_integration +from agents.core.base_agent import BaseAgent +from datetime import datetime + +class GeneralHealthAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.health_context = None + self.analyzer = None + self.rag = get_rag_integration() + + # Configure handoff triggers for general health agent + self.handoff_triggers = { + 'symptom_agent': ['đau', 'sốt', 'ho', 'triệu chứng'], + 'nutrition_agent': ['ăn gì', 'dinh dưỡng', 'calo', 'giảm cân'], + 'exercise_agent': ['tập', 'gym', 'vận động'], + 'mental_health_agent': ['stress', 'lo âu', 'trầm cảm'] + } + # This is the comprehensive system prompt from helpers.py + # Keeping it here for the general health agent + self.system_prompt = """You are a caring, experienced healthcare consultant - think of yourself as a trusted family doctor who genuinely cares about each patient's wellbeing. You have the wisdom of experience but the warmth of a friend. + +🏥 WHO YOU ARE: +You're a warm, approachable healthcare professional with deep knowledge in: +- **General Medicine:** Common illnesses, symptoms, preventive care +- **Nutrition & Wellness:** Diet, exercise, lifestyle optimization +- **Mental Health:** Stress management, anxiety, depression, emotional wellbeing, burnout, sleep issues, coping strategies +- **Chronic Conditions:** Diabetes, hypertension, heart health + +**Be Naturally Conversational - Like a Friendly Doctor:** +- Talk like a real person having a caring conversation, not a textbook or robot +- Use natural, flowing language that builds rapport: "Tôi hiểu rồi, để mình hỏi thêm vài câu nhé..." +- Show empathy first, then provide information +- Add subtle, appropriate humor when suitable (not about serious conditions): "Haha, tôi hiểu, ngồi văn phòng nhiều thì cái lưng nó 'kêu cứu' đấy 😅" +- Use phrases like: "Nghe có vẻ...", "Để mình hiểu rõ hơn nhé", "À, vậy thì...", "Ừm, điều này quan trọng đấy" +- Avoid overly formal or clinical language - explain medical terms in everyday language that even elderly people understand +- Break up long responses with paragraphs and natural pauses + +**Be Warm & Engaging:** +- Start responses with natural acknowledgment: "Cảm ơn bạn đã chia sẻ nhé", "Tôi hiểu rồi", "Được đấy" +- Use encouraging words naturally: "Tốt lắm đấy", "Bạn hỏi đúng rồi đấy", "Ý tưởng hay đấy" +- When acknowledging their situation, be accurate and balanced - don't over-praise or make it sound like their choice is the only good option +- NEVER end conversations abruptly - always leave the door open for more discussion +- Use appropriate emojis naturally (💙 🌟 💪 🙏 😊) but don't overdo it + +**Master the Art of Follow-Up Questions:** +- Build information gradually, naturally - like a real conversation, not an interrogation +- Ask 2-3 questions at a time maximum, then wait for answers +- Make questions feel like natural curiosity: "À, mà bạn làm nghề gì vậy? Ngồi văn phòng hay đi đứng nhiều?" +- Connect questions to what they just said: "Bạn vừa nói đau lưng nhỉ, vậy bạn có ngồi máy tính nhiều không?" +- Use casual transitions: "À này", "Mà này", "Để mình hỏi thêm", "Còn về...", "Bạn thử kể cho mình nghe" + +**NEVER Stop at Just One Answer - Always Extend the Conversation:** +After giving advice, ALWAYS suggest related topics they might need. + +**Keep the Connection Alive:** +- NEVER end with just "Good luck" or "Hope this helps" - that's too abrupt +- Always invite further questions: "Có gì thắc mắc cứ hỏi mình nhé" +- Suggest next steps: "Bạn thử áp dụng 1-2 tuần rồi quay lại kể cho mình nghe nhé" +- Show ongoing support: "Mình luôn ở đây nếu bạn cần điều chỉnh gì" + +⚖️ ETHICAL PRINCIPLES: +**Transparency (Be Honest):** +- Naturally mention: "Mình là trợ lý AI nên chỉ tư vấn chung chung thôi, bác sĩ thực sự sẽ khám và tư vấn chính xác hơn nhé" +- When unsure: "Cái này hơi ngoài chuyên môn mình, bạn nên hỏi chuyên gia về..." + +**Do Good, Do No Harm:** +- Always prioritize their safety and wellbeing +- Never diagnose or prescribe: "Nghe có vẻ nghiêm trọng, bạn nên đi khám bác sĩ nhé" +- For serious symptoms, be direct but calm: "Mình hơi lo về triệu chứng này, bạn nên đi khám sớm để yên tâm" + +🌐 LANGUAGE & TONE: +- **Vietnamese speakers:** Use warm, natural Vietnamese that all ages understand +- **English speakers:** Use friendly, conversational English +- Avoid medical jargon - if you must use it, explain immediately in simple terms +- Use analogies and examples: "Tim như một cái bơm, nếu bơm yếu thì máu không lên não đủ" + +💫 CRITICAL REMINDERS: +1. **NEVER be robotic or formulaic** - You're a real person having a real conversation +2. **Build information GRADUALLY** - Ask 2-3 questions max, then wait. Don't interrogate. +3. **Use NATURAL transitions** - "À này", "Mà này", "Ừm", "Để mình hỏi thêm" - NOT numbered lists +4. **Add SUBTLE HUMOR** when appropriate - Make them smile, not just informed +5. **Explain in SIMPLE terms** - Elderly people, young people, everyone should understand +6. **ALWAYS extend the conversation** - Never end abruptly. Always suggest related topics. +7. **Show you're LISTENING** - Reference what they said earlier +8. **Be SPECIFIC, not generic** - Tailor advice to their age, job, fitness level, schedule""" + + def set_health_context(self, health_context: HealthContext): + """Inject health context and initialize health analyzer""" + self.health_context = health_context + self.analyzer = HealthAnalyzer(health_context) + + def handle(self, parameters, chat_history=None): + """ + Handle general health queries + + Args: + parameters (dict): { + "user_query": str + } + chat_history (list): Conversation history + + Returns: + str: Response message + """ + user_query = parameters.get("user_query", "") + + # Build messages with chat history + messages = [{"role": "system", "content": self.system_prompt}] + + # Add recent chat history for context (last 10 exchanges) + if chat_history: + recent_history = chat_history[-10:] if len(chat_history) > 10 else chat_history + for user_msg, bot_msg in recent_history: + messages.append({"role": "user", "content": user_msg}) + messages.append({"role": "assistant", "content": bot_msg}) + + # Add current message + messages.append({"role": "user", "content": user_query}) + + try: + # Smart RAG decision - skip for simple queries + rag_answer = '' + rag_sources = [] + + if self.should_use_rag(user_query, chat_history): + # Query RAG for health knowledge + rag_result = self.rag.query_health(user_query) + rag_answer = rag_result.get('answer', '') + rag_sources = rag_result.get('source_docs', []) + + # Add RAG context to messages + if rag_answer: + messages.insert(1, {"role": "system", "content": f"Thông tin tham khảo từ cơ sở dữ liệu:\n{rag_answer}"}) + + response = client.chat.completions.create( + model=MODEL, + messages=messages, + temperature=0.7, + max_tokens=2000 + ) + + bot_response = response.choices[0].message.content + + # Add sources using RAG integration formatter (FIXED!) + if rag_sources: + bot_response = self.rag.format_response_with_sources({ + 'answer': bot_response, + 'source_docs': rag_sources + }) + + # Get health insights if analyzer is available + health_insights = {} + if self.analyzer: + try: + health_insights = { + 'health_score': self.analyzer.calculate_health_score(), + 'risks': self.analyzer.identify_health_risks(), + 'recommendations': self.analyzer.recommend_preventive_measures() + } + except: + pass + + # Persist general health query + if self.health_context: + self.health_context.add_health_record('general_health', { + 'query': user_query, + 'response': bot_response, + 'health_insights': health_insights, + 'rag_sources': len(rag_sources), + 'timestamp': datetime.now().isoformat() + }) + + return bot_response + + except Exception as e: + return f"""Xin lỗi, mình gặp chút vấn đề kỹ thuật. 😅 + +Lỗi: {str(e)} + +Bạn có thể thử: +1. Hỏi lại câu hỏi +2. Hỏi câu hỏi khác về sức khỏe +3. Hoặc chờ một chút rồi thử lại + +Mình xin lỗi vì sự bất tiện này! 🙏""" diff --git a/agents/specialized/mental_health_agent.py b/agents/specialized/mental_health_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..e275d084c80455361f06b2711595fd96073090fd --- /dev/null +++ b/agents/specialized/mental_health_agent.py @@ -0,0 +1,368 @@ +""" +Mental Health Agent - Specialized agent for mental health support +""" + +from config.settings import client, MODEL +from health_data import HealthContext +from personalization import PersonalizationEngine +from rag.rag_integration import get_rag_integration +from agents.core.base_agent import BaseAgent +from agents.core.context_analyzer import ContextAnalyzer +from agents.core.response_validator import ResponseValidator +from datetime import datetime + +class MentalHealthAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.health_context = None + self.personalization = None + self.rag = get_rag_integration() + + # Configure handoff triggers for mental health agent + self.handoff_triggers = { + 'symptom_agent': ['đau đầu', 'mất ngủ kéo dài', 'tim đập nhanh'], + 'nutrition_agent': ['ăn uống', 'chán ăn', 'ăn vô độ'], + 'exercise_agent': ['tập thể dục', 'yoga', 'thiền'], + 'general_health_agent': ['thuốc', 'bác sĩ tâm lý', 'điều trị'] + } + self.system_prompt = """Bạn là nhà tâm lý trị liệu chuyên nghiệp, ấm áp và thấu hiểu. + +🧠 CHUYÊN MÔN: +- Hỗ trợ stress, lo âu, trầm cảm +- Tư vấn burnout, mất ngủ +- Kỹ thuật thư giãn, mindfulness +- Quản lý cảm xúc +- Cải thiện giấc ngủ + +🎯 CÁCH TƯ VẤN: +- Lắng nghe, thấu hiểu, không phán xét +- Validate cảm xúc: "Cảm giác của bạn là hợp lý" +- Normalize: "Nhiều người cũng trải qua điều này" +- Đưa ra kỹ thuật cụ thể (breathing, journaling...) +- Khuyến khích tìm kiếm sự hỗ trợ + +🚨 RED FLAGS - Khuyên gặp chuyên gia NGAY: +- Ý định tự tử hoặc tự gây thương tích +- Ý định gây hại người khác +- Ảo giác, hoang tưởng +- Trầm cảm nặng không thể hoạt động +- Nghiện rượu, ma túy + +⚠️ AN TOÀN: +- KHÔNG chẩn đoán bệnh tâm thần +- KHÔNG kê đơn thuốc +- Luôn khuyên gặp chuyên gia với vấn đề nghiêm trọng +- Cung cấp hotline khủng hoảng khi cần + +💬 PHONG CÁCH: +- Ấm áp, đồng cảm 💙 +- Kiên nhẫn, không vội vàng +- Tôn trọng, không phán xét +- Trấn an nhưng thực tế +- Khuyến khích, động viên""" + + def set_health_context(self, health_context: HealthContext): + """Inject health context and initialize personalization engine""" + self.health_context = health_context + self.personalization = PersonalizationEngine(health_context) + + def handle(self, parameters, chat_history=None): + """ + Handle mental health support request + + Args: + parameters (dict): { + "user_query": str, + "context": dict (optional) + } + chat_history (list): Conversation history + + Returns: + str: Response message + """ + user_query = parameters.get("user_query", "") + context = parameters.get("context", {}) + + # Check for crisis situations first + crisis_response = self._check_crisis(user_query, chat_history) + if crisis_response: + # Persist crisis alert + if self.health_context: + self.health_context.add_health_record('mental_health', { + 'query': user_query, + 'type': 'crisis_alert', + 'response': crisis_response, + 'timestamp': datetime.now().isoformat() + }) + return crisis_response + + # Provide mental health support + response = self._provide_support(user_query, context, chat_history) + + # Adapt communication style using personalization + if self.personalization: + preferences = self.health_context.get_preferences() if self.health_context else None + if preferences: + adapted_response = self.personalization.adapt_communication_style(response) + else: + adapted_response = response + else: + adapted_response = response + + # Persist mental health data + if self.health_context: + self.health_context.add_health_record('mental_health', { + 'query': user_query, + 'response': adapted_response, + 'context': context, + 'timestamp': datetime.now().isoformat() + }) + + return adapted_response + + def _check_crisis(self, user_query, chat_history): + """Check for mental health crisis situations""" + all_text = user_query.lower() + if chat_history: + all_text += " " + " ".join([msg[0].lower() for msg in chat_history if msg[0]]) + + # Suicide risk + suicide_keywords = ["tự tử", "muốn chết", "kết thúc cuộc đời", "không muốn sống", + "suicide", "kill myself", "end my life"] + if any(keyword in all_text for keyword in suicide_keywords): + return """🚨 **KHẨN CẤP - BẠN KHÔNG CÔ ĐƠN** + +Mình rất lo lắng về bạn. Những suy nghĩ này rất nghiêm trọng và bạn cần được hỗ trợ ngay. + +🆘 **HÃY LIÊN HỆ NGAY:** + +**Đường dây nóng tâm lý:** +- 📞 **115** - Cấp cứu y tế (Trung tâm Cấp cứu 115 TP.HCM) +- 📞 **1900 1267** - Chuyên gia tâm thần (Bệnh viện Tâm Thần TP.HCM) +- 📞 **0909 65 80 35** - Tư vấn tâm lý miễn phí (Chăm sóc sức khỏe Việt - Davipharm) + +**Hoặc:** +- Nói chuyện với người thân, bạn bè NGAY +- Đến bệnh viện tâm thần gần nhất +- Nhắn tin cho ai đó bạn tin tưởng + +💙 **BẠN QUAN TRỌNG. CUỘC SỐNG CỦA BẠN CÓ GIÁ TRỊ.** + +Những cảm giác này sẽ qua đi. Có người sẵn sàng giúp bạn. Hãy cho họ cơ hội. + +Bạn có thể gọi ngay bây giờ không? Hoặc có ai bạn có thể nói chuyện không?""" + + # Self-harm + self_harm_keywords = ["tự làm đau", "cắt tay", "tự gây thương tích", "self harm", "cut myself"] + if any(keyword in all_text for keyword in self_harm_keywords): + return """⚠️ **CẦN HỖ TRỢ KHẨN CẤP** + +Mình rất lo lắng về bạn. Tự gây thương tích là dấu hiệu bạn đang đau khổ và cần được giúp đỡ. + +🆘 **HÃY LIÊN HỆ:** +- 📞 **115** - Cấp cứu y tế (Trung tâm Cấp cứu 115 TP.HCM) +- 📞 **1900 1267** - Chuyên gia tâm thần (Bệnh viện Tâm Thần TP.HCM) +- 📞 **0909 65 80 35** - Tư vấn tâm lý miễn phí (Chăm sóc sức khỏe Việt - Davipharm) + +💙 **Bạn xứng đáng được chăm sóc và hỗ trợ.** + +Có những cách khác để đối phó với cảm xúc khó khăn. Chuyên gia tâm lý có thể giúp bạn tìm ra cách lành mạnh hơn. + +Bạn có thể gọi ngay bây giờ không?""" + + return None + + def _build_mental_health_context_instruction(self, user_query, chat_history, context): + """ + Build context instruction for mental health queries + """ + is_vague = context.get('is_vague', False) + + # Handle vague emotional queries + if is_vague: + return """\n\nPHASE: THẤU HIỂU CẢM XÚC (VỚI GỢI Ý) +User đang cảm thấy không ổn nhưng chưa rõ ràng. Empathy + gợi ý: + +1. VALIDATE + GỢI Ý CỤ THỂ: + Format: "Mình hiểu bạn đang [cảm giác user nói]. Bạn có thể chia sẻ thêm không? Ví dụ: + • [Gợi ý cảm xúc 1] + • [Gợi ý cảm xúc 2] + • [Gợi ý cảm xúc 3] + • Hoặc điều gì khác?" + +2. GỢI Ý DỰA VÀO TỪ KHÓA: + - "mệt" → gợi ý: mệt mỏi tinh thần, burnout, stress, mất ngủ + - "không khỏe" → gợi ý: lo âu, buồn bã, căng thẳng, cô đơn + - "khó chịu" → gợi ý: bực bội, tức giận, thất vọng, áp lực + - "không ổn" → gợi ý: lo lắng, trầm cảm, bất an, mất phương hướng + +3. VÍ DỤ CỤ THỂ: + User: "tôi mệt" + Bot: "Mình hiểu bạn đang cảm thấy mệt mỏi. Bạn có thể chia sẻ thêm không? Ví dụ: + • Mệt mỏi về tinh thần, cảm thấy kiệt sức? + • Stress từ công việc hoặc học tập? + • Mất ngủ, ngủ không ngon giấc? + • Hay điều gì khác đang làm bạn khó chịu?" + +QUAN TRỌNG: +- Empathy cao, ấm áp +- Gợi ý về CẢM XÚC, không phải triệu chứng vật lý +- Luôn có "hoặc điều gì khác" +- Không ép buộc, để user tự chia sẻ""" + + # Check if answering comparison self-assessment + if chat_history and len(chat_history) > 0: + last_bot_msg = chat_history[-1][1] if len(chat_history[-1]) > 1 else "" + if "TỰ KIỂM TRA" in last_bot_msg or "Bạn trả lời" in last_bot_msg: + return """\n\nPHASE: PHÂN TÍCH TÌNH TRẠNG TINH THẦN +User vừa trả lời. Phân tích với empathy: + +1. NHẬN DIỆN (dựa vào RAG): + - Đọc kỹ cảm xúc, triệu chứng + - So sánh với các tình trạng (stress/anxiety/burnout...) + - Đưa ra đánh giá nhẹ nhàng + +2. VALIDATE & NORMALIZE: + "Cảm giác của bạn là bình thường. Nhiều người cũng trải qua điều này." + +3. KỸ THUẬT ĐỐI PHÓ: + - Cụ thể, dễ thực hiện + - Breathing, journaling, grounding... + - Giải thích tại sao hiệu quả + +4. KHUYẺN KHÍCH: + - Nếu nhẹ: "Bạn thử các kỸ thuật này nhé" + - Nếu nặng: "Nên tìm chuyên gia hỗ trợ" + +QUAN TRỌNG: Empathy + practical help.""" + + # Check if asking comparison + if any(phrase in user_query.lower() for phrase in [ + 'stress hay', 'anxiety hay', 'khác nhau thế nào', + 'phân biệt', 'hay là' + ]): + return """\n\nPHASE: SO SÁNH TÌNH TRẠNG TINH THẦN +User muốn hiểu rõ hơn. Sử dụng RAG: + +1. TẠO BẢNG SO SÁNH: + Format: + **[Tình trạng A]:** + • Cảm giác: [feelings] + • Triệu chứng: [symptoms] + • Thời gian: [duration] + • Trigger: [causes] + + **[Tình trạng B]:** + • Cảm giác: [feelings] + • Triệu chứng: [symptoms] + • Thời gian: [duration] + • Trigger: [causes] + + **Điểm khác biệt:** [key differences] + +2. CÂU HỊI TỰ KIỂM TRA: + • Bạn cảm thấy thế nào? + • Kéo dài bao lâu? + • Có trigger rõ ràng không? + • Ảnh hưởng đến sinh hoạt không? + +3. LUÔN EMPATHY: + "Dù là gì, cảm giác của bạn đều quan trọng." + +4. Kết thúc: "Bạn chia sẻ để mình hiểu rõ hơn nhé!" + +QUAN TRỌNG: Dùng RAG, empathy cao.""" + + # Normal support + return """\n\nĐưa ra hỗ trợ tinh thần: +- Empathy & validation +- KỸ thuật cụ thể +- Khuyến khích tìm chuyên gia nếu cần +KHÔNG nói "Dựa trên thông tin".""" + + def _provide_support(self, user_query, context, chat_history): + """Provide mental health support with comparison and vague query handling""" + try: + # Analyze context + analyzed_context = ContextAnalyzer.analyze_user_intent(user_query, chat_history) + + # Build context from chat history + history_context = "" + if chat_history: + recent = chat_history[-3:] + history_context = "\n".join([f"User: {msg[0]}\nBot: {msg[1]}" for msg in recent]) + + # Smart RAG - only query when needed (inherit from BaseAgent) + rag_answer = '' + rag_sources = [] + + if self.should_use_rag(user_query, chat_history): + rag_result = self.rag.query_health(user_query) + rag_answer = rag_result.get('answer', '') + rag_sources = rag_result.get('source_docs', []) + + # Build RAG context + rag_context = f"\n\nThông tin tham khảo từ cơ sở dữ liệu:\n{rag_answer}" if rag_answer else "" + + # Build context instruction + context_instruction = self._build_mental_health_context_instruction( + user_query, chat_history, analyzed_context + ) + + response = client.chat.completions.create( + model=MODEL, + messages=[ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": f"""Người dùng đang tìm kiếm hỗ trợ về sức khỏe tinh thần. + +Lịch sử hội thoại gần đây: +{history_context} + +Câu hỏi hiện tại: {user_query} + +Ngữ cảnh thêm: {context}{rag_context} + +{context_instruction} + +Nhớ: Không chẩn đoán, không kê đơn thuốc."""} + ], + temperature=0.8, + max_tokens=1500 + ) + + base_response = response.choices[0].message.content + + # Add sources using RAG integration formatter (FIXED!) + if rag_sources: + base_response = self.rag.format_response_with_sources({ + 'answer': base_response, + 'source_docs': rag_sources + }) + + # Add resource information + base_response += """ + +--- + +💙 **Nếu cần hỗ trợ chuyên môn:** +- Nếu cần nói chuyện với chuyên gia, đừng ngại đặt lịch tâm lý trị liệu nhé! + +Mình luôn ở đây nếu bạn cần trò chuyện thêm. Bạn không cô đơn! 🤗""" + + return base_response + + except Exception as e: + return """Mình hiểu bạn đang trải qua thời gian khó khăn. 💙 + +Dù mình gặp chút vấn đề kỹ thuật, nhưng mình muốn bạn biết: +- Cảm xúc của bạn là hợp lệ +- Nhiều người cũng trải qua điều tương tự +- Có sự hỗ trợ dành cho bạn + +🆘 **Nếu bạn cần hỗ trợ khẩn cấp:** +- 📞 **115** - Cấp cứu y tế (Trung tâm Cấp cứu 115 TP.HCM) +- 📞 **1900 1267** - Chuyên gia tâm thần (Bệnh viện Tâm Thần TP.HCM) +- 📞 **0909 65 80 35** - Tư vấn tâm lý miễn phí (Chăm sóc sức khỏe Việt - Davipharm) +- Hoặc tìm đến bạn bè, người thân + +Bạn có muốn chia sẻ thêm về những gì bạn đang cảm thấy không?""" diff --git a/agents/specialized/nutrition_agent.py b/agents/specialized/nutrition_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..d0befbff8caef3d2fe09a05d70761246e4329c5f --- /dev/null +++ b/agents/specialized/nutrition_agent.py @@ -0,0 +1,598 @@ +""" +Nutrition Agent - Specialized agent for nutrition advice +""" + +from config.settings import client, MODEL +from modules.nutrition import NutritionAdvisor +from health_data import HealthContext +from personalization import PersonalizationEngine +from rag.rag_integration import get_rag_integration +from agents.core.base_agent import BaseAgent +from agents.core.context_analyzer import ContextAnalyzer +from agents.core.response_validator import ResponseValidator +from typing import Dict, Any, List, Optional +from datetime import datetime +import re + +class NutritionAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.advisor = NutritionAdvisor() + self.health_context = None + self.personalization = None + self.rag = get_rag_integration() + + # Configure handoff triggers for nutrition agent + self.handoff_triggers = { + 'exercise_agent': ['tập', 'gym', 'cardio', 'yoga', 'chạy bộ', 'thể dục', 'vận động'], + 'symptom_agent': ['đau bụng', 'buồn nôn', 'tiêu chảy', 'dị ứng', 'ngộ độc'], + 'mental_health_agent': ['stress', 'lo âu', 'mất ngủ', 'ăn không ngon'], + 'general_health_agent': ['khám', 'xét nghiệm', 'bác sĩ'] + } + self.system_prompt = """Bạn là chuyên gia dinh dưỡng chuyên nghiệp. + +🥗 CHUYÊN MÔN: +- Tư vấn dinh dưỡng cá nhân hóa dựa trên BMI, tuổi, giới tính, mục tiêu +- Tính toán calo, macro (protein/carb/fat) +- Gợi ý thực đơn phù hợp +- Tư vấn thực phẩm bổ sung +- Hướng dẫn ăn uống cho các bệnh lý (tiểu đường, huyết áp, tim mạch...) + +🎯 CÁCH TƯ VẤN: + +1. **KIỂM TRA THÔNG TIN TRƯỚC KHI HỎI:** + - ĐỌC KỸ chat history - user có thể đã cung cấp thông tin rồi! + - Nếu user đã nói "tôi 25 tuổi, nam, 70kg, 175cm" → ĐỪNG HỎI LẠI! + - Chỉ hỏi thông tin THỰC SỰ còn thiếu + - Nếu đã đủ (tuổi, giới tính, cân nặng, chiều cao) → ĐƯA KHUYẾN NGHỊ NGAY! + +2. **ƯU TIÊN THÔNG TIN:** + - Câu 1: Mục tiêu (giảm cân/tăng cân/duy trì?) + - Câu 2: Cân nặng, chiều cao (để tính BMI) + - Câu 3: Mức độ hoạt động (ít/vừa/nhiều) + - Câu 4 (nếu cần): Bệnh nền, dị ứng + +3. **KHI USER KHÔNG MUỐN CUNG CẤP:** + - User nói "không biết", "không muốn nói", "tư vấn chung thôi" + - → DỪNG hỏi, đưa khuyến nghị chung + - Dựa trên thông tin ĐÃ CÓ để tư vấn + +4. **ĐƯA KHUYẾN NGHỊ:** + - Nếu có đủ thông tin: Tính calo, macro cụ thể + - Nếu thiếu thông tin: Đưa khuyến nghị chung (400g rau củ, protein đủ, etc.) + - Gợi ý thực đơn mẫu + - KHÔNG hỏi thêm nữa + +⚠️ AN TOÀN: +- Luôn khuyên gặp bác sĩ dinh dưỡng cho các vấn đề phức tạp +- Cảnh báo về các chế độ ăn kiêng cực đoan +- Lưu ý về dị ứng, bệnh nền + +💬 PHONG CÁCH: +- Chuyên nghiệp, rõ ràng, súc tích +- Dùng "tôi" để thể hiện tính chuyên môn +- KHÔNG dùng emoji +- Đưa ra con số cụ thể khi có thể +- Thực tế, không lý thuyết suông +- TỰ NHIÊN, MẠCH LẠC - không lặp lại ý, không copy-paste câu từ context khác +- Nếu hỏi thông tin → Hỏi NGẮN GỌN, TRỰC TIẾP +- KHÔNG dùng câu như "Bạn thử làm theo xem có đỡ không" (đây là câu của bác sĩ chữa bệnh!)""" + + def set_health_context(self, health_context: HealthContext): + """Inject health context and initialize personalization engine""" + self.health_context = health_context + self.personalization = PersonalizationEngine(health_context) + + def handle(self, parameters, chat_history=None): + """ + Handle nutrition request using LLM for natural conversation + + Args: + parameters (dict): { + "user_query": str, + "user_data": dict (optional) + } + chat_history (list): Conversation history + + Returns: + str: Response message + """ + user_query = parameters.get("user_query", "") + user_data = parameters.get("user_data", {}) + + # Extract and save user info from current message immediately + self.extract_and_save_user_info(user_query) + + # Update memory from chat history + if chat_history: + self.update_memory_from_history(chat_history) + + # Check if we should hand off to another agent + if self.should_handoff(user_query, chat_history): + next_agent = self.suggest_next_agent(user_query) + if next_agent: + # Save current nutrition data for next agent + self.save_agent_data('last_nutrition_advice', { + 'query': user_query, + 'user_profile': self.get_user_profile(), + 'timestamp': datetime.now().isoformat() + }) + + # Create handoff message with context + context = self._generate_nutrition_summary() + return self.create_handoff_message(next_agent, context, user_query) + + # Use health context if available + if self.health_context: + profile = self.health_context.get_user_profile() + user_data = { + 'age': profile.age, + 'gender': profile.gender, + 'weight': profile.weight, + 'height': profile.height, + 'activity_level': profile.activity_level, + 'health_conditions': profile.health_conditions, + 'dietary_restrictions': profile.dietary_restrictions + } + # Extract user data from chat history if not provided + elif not user_data and chat_history: + user_data = self._extract_user_data_from_history(chat_history) + # Save extracted data to shared memory for other agents + for key, value in user_data.items(): + if value is not None: + self.update_user_profile(key, value) + + # Check if user needs personalized advice (BMI, calories, meal plan) + needs_personalization = self._needs_personalized_advice(user_query, chat_history) + + if needs_personalization: + # Check if we have enough data + missing_fields = self._check_missing_data(user_data) + + if missing_fields: + return self._ask_for_missing_data(missing_fields, user_data, user_query) + + # Generate personalized nutrition advice + try: + result = self.advisor.generate_nutrition_advice(user_data) + + # Adapt recommendations using personalization engine + if self.personalization: + adapted_result = self.personalization.adapt_nutrition_plan(result) + else: + adapted_result = result + + response = self._format_nutrition_response(adapted_result, user_data) + + # Persist data to health context + if self.health_context: + self.health_context.add_health_record('nutrition', { + 'query': user_query, + 'advice': response, + 'user_data': user_data, + 'timestamp': datetime.now().isoformat() + }) + + return response + except Exception as e: + return self._handle_error(e, user_query) + else: + # General nutrition question - use LLM directly + response = self._handle_general_nutrition_query(user_query, chat_history) + + # Persist general query + if self.health_context: + self.health_context.add_health_record('nutrition', { + 'query': user_query, + 'response': response, + 'type': 'general', + 'timestamp': datetime.now().isoformat() + }) + + return response + + def _extract_user_data_from_history(self, chat_history): + """Extract user data from conversation history""" + user_data = { + 'age': None, + 'gender': None, + 'weight': None, + 'height': None, + 'goal': 'maintenance', + 'activity_level': 'moderate', + 'dietary_restrictions': [], + 'health_conditions': [] + } + + all_messages = " ".join([msg[0] for msg in chat_history if msg[0]]) + + # Extract age + age_match = re.search(r'(\d+)\s*tuổi|tuổi\s*(\d+)|tôi\s*(\d+)', all_messages.lower()) + if age_match: + user_data['age'] = int([g for g in age_match.groups() if g][0]) + + # Extract gender + if re.search(r'\bnam\b|male|đàn ông', all_messages.lower()): + user_data['gender'] = 'male' + elif re.search(r'\bnữ\b|female|đàn bà', all_messages.lower()): + user_data['gender'] = 'female' + + # Extract weight - improved patterns + weight_match = re.search(r'(?:nặng|cân|weight)?\s*(\d+(?:\.\d+)?)\s*kg|(\d+(?:\.\d+)?)\s*kg', all_messages.lower()) + if weight_match: + user_data['weight'] = float([g for g in weight_match.groups() if g][0]) + + # Extract height - improved patterns + height_cm_match = re.search(r'(?:cao|chiều cao|height)?\s*(\d+(?:\.\d+)?)\s*cm', all_messages.lower()) + if height_cm_match: + user_data['height'] = float(height_cm_match.group(1)) + else: + height_m_match = re.search(r'(?:cao|chiều cao|height)?\s*(\d+\.?\d*)\s*m\b', all_messages.lower()) + if height_m_match: + height = float(height_m_match.group(1)) + if height < 3: # Convert meters to cm + height = height * 100 + user_data['height'] = height + + # Extract goal + if re.search(r'giảm cân|weight loss', all_messages.lower()): + user_data['goal'] = 'weight_loss' + elif re.search(r'tăng cân|weight gain', all_messages.lower()): + user_data['goal'] = 'weight_gain' + elif re.search(r'tập gym|muscle|cơ bắp', all_messages.lower()): + user_data['goal'] = 'muscle_building' + + return user_data + + def _needs_personalized_advice(self, user_query, chat_history): + """ + Determine if user needs personalized advice (BMI, calories, meal plan) + or just general nutrition info + """ + # Keywords that indicate need for personalization + personalization_keywords = [ + 'giảm cân', 'tăng cân', 'bmi', 'calo', 'calorie', + 'thực đơn', 'meal plan', 'chế độ ăn cá nhân', + 'tôi nên ăn gì', 'tư vấn cho tôi', 'phù hợp với tôi' + ] + + query_lower = user_query.lower() + + # Check if user explicitly asks for personalized advice + if any(kw in query_lower for kw in personalization_keywords): + return True + + # Check chat history - if user already provided personal info + if chat_history: + all_messages = " ".join([msg[0] for msg in chat_history if msg[0]]).lower() + if any(kw in all_messages for kw in personalization_keywords): + return True + + # Default: general question + return False + + def _check_missing_data(self, user_data): + """Check what data is missing - check shared memory first""" + required = ['age', 'gender', 'weight', 'height'] + + # Check shared memory for missing fields + profile = self.get_user_profile() + for field in required: + if not user_data.get(field) and profile.get(field): + user_data[field] = profile[field] + + return [field for field in required if not user_data.get(field)] + + def _ask_for_missing_data(self, missing_fields, current_data, user_query): + """Ask for missing data""" + questions = { + 'age': "bạn bao nhiêu tuổi", + 'gender': "bạn là nam hay nữ", + 'weight': "bạn nặng bao nhiêu kg", + 'height': "bạn cao bao nhiêu cm" + } + + # Build friendly question + q_list = [questions[f] for f in missing_fields] + + if len(q_list) == 1: + question = q_list[0] + elif len(q_list) == 2: + question = f"{q_list[0]} và {q_list[1]}" + else: + question = ", ".join(q_list[:-1]) + f" và {q_list[-1]}" + + return f"""🥗 **Để tư vấn dinh dưỡng chính xác, mình cần biết thêm:** + +Cho mình biết {question} nhé? + +💡 **Ví dụ:** "Tôi 25 tuổi, nam, nặng 70kg, cao 175cm" + +Sau khi có đủ thông tin, mình sẽ tính BMI và đưa ra lời khuyên dinh dưỡng cá nhân hóa cho bạn! 😊""" + + def _format_nutrition_response(self, result, user_data): + """Format nutrition advice into friendly response""" + bmi_info = result['bmi_analysis'] + targets = result['daily_targets'] + meals = result['meal_suggestions'] + supplements = result['supplement_recommendations'] + + response = f"""🥗 **Tư Vấn Dinh Dưỡng Cá Nhân Hóa** + +👤 **Thông tin của bạn:** +- {user_data['age']} tuổi, {user_data['gender']}, {user_data['weight']}kg, {user_data['height']}cm + +📊 **Phân tích BMI:** +- BMI: **{bmi_info['bmi']}** ({bmi_info['category']}) +- Lời khuyên: {bmi_info['advice']} + +🎯 **Mục tiêu hàng ngày:** +- 🔥 Calo: **{targets['daily_calories']} kcal** +- 🥩 Protein: **{targets['protein']}** +- 🍚 Carb: **{targets['carbs']}** +- 🥑 Chất béo: **{targets['fats']}** +- 💧 Nước: **{targets['water']}** + +🍽️ **Gợi ý thực đơn:** + +**Sáng:** +- {meals['breakfast'][0]} +- {meals['breakfast'][1]} + +**Trưa:** +- {meals['lunch'][0]} +- {meals['lunch'][1]} + +**Tối:** +- {meals['dinner'][0]} +- {meals['dinner'][1]} + +**Snack:** +- {meals['snacks'][0]} +- {meals['snacks'][1]} +""" + + if supplements: + response += f"\n💊 **Thực phẩm bổ sung gợi ý:**\n" + response += "\n".join([f"- {s}" for s in supplements[:4]]) + + response += f""" + +🤖 **Lời khuyên chuyên gia:** +{result['personalized_advice'][:600]}... + +--- + +⚠️ *Đây là tư vấn tham khảo. Với các vấn đề phức tạp, hãy gặp bác sĩ dinh dưỡng nhé!* + +💬 Bạn có câu hỏi gì về chế độ ăn này không? Hoặc muốn mình điều chỉnh gì không? 😊""" + + return response + + def _build_nutrition_context_instruction(self, user_query, chat_history): + """ + Build context instruction for nutrition queries + """ + # Check if user is answering comparison self-assessment + if chat_history and len(chat_history) > 0: + last_bot_msg = chat_history[-1][1] if len(chat_history[-1]) > 1 else "" + if "TỰ KIỂM TRA" in last_bot_msg or "Bạn trả lời" in last_bot_msg: + return """\n\nPHASE: PHÂN TÍCH LỰA CHỌN DINH DƯỠNG +User vừa trả lời các câu hỏi. Phân tích: + +1. NHẬN DIỆN PHÙ HỢP (dựa vào RAG): + - Đọc kỹ mục tiêu, lifestyle, sở thích + - So sánh với đặc điểm của từng lựa chọn + - Đưa ra lựa chọn PHÙ HỢP NHẤT + +2. GIẢI THÍCH: + - Vì sao lựa chọn này phù hợp + - Lợi ích cụ thể cho user + - Lưu ý khi thực hiện + +3. HƯỚNG DẪN BẮT ĐẦU: + - Cách bắt đầu cụ thể + - Thực đơn mẫu (nếu cần) + - Tips để duy trì + +4. Kết thúc: "Bạn cần hướng dẫn chi tiết hơn không?" +KHÔNG nói "Dựa trên thông tin".""" + + # Check if asking comparison question + if any(phrase in user_query.lower() for phrase in [ + 'nên ăn', 'hay', 'hoặc', 'khác nhau thế nào', + 'chọn', 'so sánh', 'tốt hơn' + ]): + return """\n\nPHASE: SO SÁNH DINH DƯỠNG (GENERIC) +User muốn so sánh các lựa chọn dinh dưỡng. Sử dụng RAG để: + +1. XÁC ĐỊNH các lựa chọn (từ user query): + - Trích xuất diets/foods user đề cập + - Hoặc tìm các lựa chọn liên quan + +2. TẠO BẢNG SO SÁNH: + Format: + **[Lựa chọn A]:** + • Macros: [protein/carb/fat] + • Ưu điểm: [benefits] + • Nhược điểm: [drawbacks] + • Phù hợp cho: [who] + + **[Lựa chọn B]:** + • Macros: [protein/carb/fat] + • Ưu điểm: [benefits] + • Nhược điểm: [drawbacks] + • Phù hợp cho: [who] + + **Điểm khác biệt chính:** [key differences] + +3. CÂU HỊI TỰ KIỂM TRA: + Tạo 3-5 câu hỏi giúp user tự đánh giá: + • Mục tiêu của bạn? + • Lifestyle như thế nào? + • Có hạn chế gì không? + • Thời gian chuẩn bị? + +4. Kết thúc: "Bạn trả lời giúp mình để recommend phù hợp nhé!" + +QUAN TRỌNG: Dùng RAG knowledge, KHÔNG hard-code.""" + + # Normal advice + return """\n\nĐưa ra lời khuyên dinh dưỡng cụ thể, thực tế. +KHÔNG quá lý thuyết. +KHÔNG nói "Dựa trên thông tin".""" + + def _handle_general_nutrition_query(self, user_query, chat_history): + """Handle general nutrition questions using LLM + RAG with comparison support""" + from config.settings import client, MODEL + + try: + # Smart RAG - only query when needed (inherit from BaseAgent) + rag_answer = '' + rag_sources = [] + + if self.should_use_rag(user_query, chat_history): + rag_result = self.rag.query_nutrition(user_query) + rag_answer = rag_result.get('answer', '') + rag_sources = rag_result.get('source_docs', []) + + # Build conversation context with RAG context + rag_context = f"Dựa trên kiến thức từ cơ sở dữ liệu:\n{rag_answer}\n\n" if rag_answer else "" + + messages = [{"role": "system", "content": self.system_prompt}] + + # Add RAG context if available + if rag_context: + messages.append({"role": "system", "content": f"Thông tin tham khảo từ cơ sở dữ liệu:\n{rag_context}"}) + + # Add chat history (last 5 exchanges) + if chat_history: + recent_history = chat_history[-5:] if len(chat_history) > 5 else chat_history + for user_msg, bot_msg in recent_history: + if user_msg: + messages.append({"role": "user", "content": user_msg}) + if bot_msg: + messages.append({"role": "assistant", "content": bot_msg}) + + # Add current query with context instruction + context_prompt = self._build_nutrition_context_instruction(user_query, chat_history) + messages.append({"role": "user", "content": user_query + context_prompt}) + + # Get LLM response + response = client.chat.completions.create( + model=MODEL, + messages=messages, + temperature=0.7, + max_tokens=500 + ) + + llm_response = response.choices[0].message.content + + # Add sources using RAG integration formatter (FIXED!) + if rag_sources: + formatted_response = self.rag.format_response_with_sources({ + 'answer': llm_response, + 'source_docs': rag_sources + }) + return formatted_response + + return llm_response + + except Exception as e: + return f"""Xin lỗi, mình gặp lỗi kỹ thuật. Bạn có thể: +1. Thử lại câu hỏi +2. Hỏi cách khác +3. Liên hệ hỗ trợ + +Chi tiết lỗi: {str(e)}""" + + def should_handoff(self, user_query: str, chat_history: Optional[List] = None) -> bool: + """ + Override base method - Determine if should hand off to another agent + + Specific triggers for nutrition agent: + - User asks about exercise/workout + - User mentions symptoms (stomach pain, nausea) + - User asks about mental health affecting eating + """ + query_lower = user_query.lower() + + # Check each agent's triggers + for agent, triggers in self.handoff_triggers.items(): + if any(trigger in query_lower for trigger in triggers): + # Don't handoff if we're in the middle of nutrition consultation + if chat_history and self._is_mid_consultation(chat_history): + return False + return True + + return False + + def suggest_next_agent(self, user_query: str) -> Optional[str]: + """Override base method - Suggest which agent to hand off to based on query""" + query_lower = user_query.lower() + + # Priority order for handoff + if any(trigger in query_lower for trigger in self.handoff_triggers.get('symptom_agent', [])): + return 'symptom_agent' + + if any(trigger in query_lower for trigger in self.handoff_triggers.get('exercise_agent', [])): + return 'exercise_agent' + + if any(trigger in query_lower for trigger in self.handoff_triggers.get('mental_health_agent', [])): + return 'mental_health_agent' + + if any(trigger in query_lower for trigger in self.handoff_triggers.get('general_health_agent', [])): + return 'general_health_agent' + + return None + + def _is_mid_consultation(self, chat_history: List) -> bool: + """Check if we're in the middle of nutrition consultation""" + if not chat_history or len(chat_history) < 2: + return False + + # Check last bot response + last_bot_response = chat_history[-1][1] if len(chat_history[-1]) > 1 else "" + + # If we just asked for user data, don't handoff + if any(phrase in last_bot_response for phrase in [ + "cân nặng", "chiều cao", "tuổi", "giới tính", "mục tiêu" + ]): + return True + + return False + + def _generate_nutrition_summary(self) -> str: + """Generate summary of nutrition advice for handoff""" + nutrition_data = self.get_agent_data('nutrition_plan') + user_profile = self.get_user_profile() + + # Natural summary without robotic prefix + summary_parts = [] + + if nutrition_data and isinstance(nutrition_data, dict): + if 'bmi_analysis' in nutrition_data: + bmi = nutrition_data['bmi_analysis'] + summary_parts.append(f"BMI: {bmi.get('bmi', 'N/A')} ({bmi.get('category', 'N/A')})") + + if 'daily_targets' in nutrition_data: + targets = nutrition_data['daily_targets'] + summary_parts.append(f"Calo: {targets.get('calories', 'N/A')} kcal/ngày") + + if user_profile and user_profile.get('goal'): + summary_parts.append(f"Mục tiêu: {user_profile['goal']}") + + return " | ".join(summary_parts)[:100] if summary_parts else "" + + def _handle_error(self, error, user_query): + """Handle errors gracefully""" + return f"""Xin lỗi, mình gặp chút vấn đề khi tạo tư vấn dinh dưỡng. 😅 + +Lỗi: {str(error)} + +Bạn có thể thử: +1. Cung cấp lại thông tin: tuổi, giới tính, cân nặng, chiều cao +2. Hỏi câu hỏi cụ thể hơn về dinh dưỡng +3. Hoặc mình có thể tư vấn về chủ đề sức khỏe khác + +Bạn muốn thử lại không? 💙""" diff --git a/agents/specialized/symptom_agent.py b/agents/specialized/symptom_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..09496f64c0c6170a4e9a7468e8e927070e3262f0 --- /dev/null +++ b/agents/specialized/symptom_agent.py @@ -0,0 +1,854 @@ +""" +Symptom Agent - Specialized agent for symptom assessment using OPQRST method +""" + +from config.settings import client, MODEL +from health_data import HealthContext +from health_analysis import HealthAnalyzer +from rag.rag_integration import get_rag_integration +from agents.core.base_agent import BaseAgent +from agents.core.context_analyzer import ContextAnalyzer +from agents.core.response_validator import ResponseValidator +from typing import Dict, Any, List, Optional +from datetime import datetime +import re + +class SymptomAgent(BaseAgent): + def __init__(self, memory=None): + super().__init__(memory) + self.health_context = None + self.analyzer = None + self.rag = get_rag_integration() + + # Configure handoff triggers for symptom agent + self.handoff_triggers = { + 'nutrition_agent': ['ăn gì', 'thực đơn', 'dinh dưỡng'], + 'exercise_agent': ['tập luyện', 'vận động', 'phục hồi chức năng'], + 'mental_health_agent': ['lo âu', 'stress', 'mất ngủ do lo'], + 'general_health_agent': ['khám tổng quát', 'xét nghiệm', 'kiểm tra sức khỏe'] + } + self.system_prompt = """Bạn là bác sĩ tư vấn chuyên nghiệp. + +🩺 NHIỆM VỤ: +Thu thập thông tin triệu chứng một cách có hệ thống và chuyên nghiệp. + +📋 PHƯƠNG PHÁP OPQRST (Hỏi tự nhiên, KHÔNG dùng template): + +**Onset (Khởi phát):** +- Khi nào bắt đầu? Đột ngột hay từ từ? +- Ví dụ tự nhiên: + * Đau đầu: "Đau đầu từ khi nào rồi bạn? Đột ngột hay từ từ?" + * Đầy bụng: "Cảm giác đầy bụng này xuất hiện từ bao giờ? Sau khi ăn hay suốt ngày?" + +**Quality (Đặc điểm):** +- Mô tả cảm giác như thế nào? +- Ví dụ tự nhiên: + * Đau đầu: "Đau kiểu gì? Đau nhói, tức, đập thình thình, hay nặng nề?" + * Đầy bụng: "Cảm giác đầy như thế nào? Căng cứng, khó tiêu, hay đau tức?" + +**Region (Vị trí):** +- Ở đâu? Có lan ra không? +- Ví dụ tự nhiên: + * Đau đầu: "Đau ở đâu? Trán, thái dương, sau gáy, hay cả đầu?" + * Đầy bụng: "Đầy ở vùng nào? Trên rốn, dưới rốn, hay toàn bộ bụng?" + +**Provocation/Palliation (Yếu tố ảnh hưởng):** +- Gì làm tệ/đỡ hơn? +- Ví dụ tự nhiên: + * Đau đầu: "Có gì làm đau nhiều hơn không? Ánh sáng, tiếng ồn, stress? Nghỉ ngơi có đỡ không?" + * Đầy bụng: "Ăn gì làm nặng hơn? Có loại thức ăn nào làm đỡ không?" + +**Severity (Mức độ):** +- Mức độ và triệu chứng kèm theo? +- Ví dụ tự nhiên: + * Đau đầu: "Đau nhiều không? Có buồn nôn, nhìn mờ, hoặc sợ ánh sáng không?" + * Đầy bụng: "Đầy nhiều không? Có ợ hơi, buồn nôn, hoặc khó thở không?" + +**Timing (Thời gian):** +- Khi nào xuất hiện? Liên tục hay từng đợt? +- Ví dụ tự nhiên: + * Đau đầu: "Đau suốt hay từng cơn? Thường xuất hiện lúc nào trong ngày?" + * Đầy bụng: "Đầy suốt ngày hay chỉ sau ăn? Kéo dài bao lâu?" + +🎯 NGUYÊN TẮC QUAN TRỌNG: + +1. **HỎI TỐI ĐA 3-4 CÂU:** + - Không hỏi mãi theo template OPQRST + - Hỏi 3-4 câu quan trọng nhất + - Nếu user không biết/không rõ → Chuyển sang đưa khuyến nghị + +2. **ƯU TIÊN THÔNG TIN:** + - Câu 1: Thời gian xuất hiện (khi nào?) + - Câu 2: Đặc điểm (đau như thế nào?) + - Câu 3: Mức độ (có triệu chứng kèm theo?) + - Câu 4 (nếu cần): Yếu tố ảnh hưởng + +3. **KHI USER KHÔNG BIẾT:** + - User nói "không biết", "không rõ", "không chắc" + - → DỪNG hỏi, chuyển sang đưa khuyến nghị + - Dựa trên thông tin ĐÃ CÓ để tư vấn + +4. **ĐƯA KHUYẾN NGHỊ:** + - Tổng hợp thông tin đã thu thập + - Đưa ra các biện pháp tự chăm sóc phù hợp + - Khuyên gặp bác sĩ nếu cần + - KHÔNG hỏi thêm nữa + +🚨 RED FLAGS - Khuyên gặp bác sĩ NGAY: +- Đau ngực + khó thở → Nghi ngờ tim +- Đau đầu dữ dội đột ngột + cứng gáy + sốt → Nghi ngờ màng não +- Yếu đột ngột một bên → Nghi ngờ đột quỵ +- Đau bụng dữ dội → Nghi ngờ ruột thừa/cấp cứu +- Ho/nôn ra máu +- Ý định tự tử + +⚠️ AN TOÀN & GIỚI HẠN: +- KHÔNG chẩn đoán bệnh +- KHÔNG kê đơn thuốc +- KHÔNG tạo giáo án tập luyện (đó là việc của exercise_agent) +- KHÔNG tư vấn dinh dưỡng chi tiết (đó là việc của nutrition_agent) +- CHỈ tập trung vào ĐÁNH GIÁ TRIỆU CHỨNG +- Luôn khuyên gặp bác sĩ với triệu chứng nghiêm trọng +- Với red flags → khuyên đi cấp cứu NGAY + +💬 PHONG CÁCH: +- Tự nhiên, conversational - như đang nói chuyện +- KHÔNG formal, KHÔNG "Dựa trên thông tin bạn cung cấp" +- Emoji tối thiểu (chỉ khi thật sự cần) +- Ngắn gọn, đi thẳng vấn đề +- KHÔNG vừa hỏi vừa khuyên trong cùng 1 response + +🏥 KHI USER HỎI ĐỊA CHỈ BỆNH VIỆN: +- ĐỪNG lặp lại triệu chứng nếu đã nói rồi! +- Nếu user hỏi "tôi muốn đi khám", "bệnh viện nào tốt", "cho tôi địa chỉ" + → ĐI THẲNG VÀO ĐỊA CHỈ, không cần nhắc lại "Triệu chứng đau đầu và mất ngủ..." +- Format địa chỉ bệnh viện: + +**Bệnh viện/Phòng khám gần [địa điểm]:** + +1. **Tên bệnh viện** + - Địa chỉ: [địa chỉ đầy đủ] + - Chuyên khoa: [chuyên khoa liên quan] + - SĐT: [nếu có] + +**Khi nào cần đi khám:** [điều kiện] + +- KHÔNG dùng "Giải pháp:" cho danh sách bệnh viện +- KHÔNG mix địa chỉ với home remedies (thiền, yoga) trong cùng list + +📝 VÍ DỤ WORKFLOW: + +**Tình huống: User đau bụng** + +Turn 1: +User: "Tôi đau bụng" +Bot: "Bạn bắt đầu bị đau từ khi nào vậy?" + +Turn 2: +User: "Mới xuất hiện, đau âm ỉ" +Bot: "Đau ở vị trí nào? Trên rốn, dưới rốn, hay toàn bộ bụng?" + +Turn 3: +User: "Phía trên rốn" +Bot: "Có triệu chứng kèm theo như buồn nôn, ợ hơi, hoặc đầy bụng không?" + +Turn 4: +User: "Không biết, giờ tôi muốn làm sao cho hết đau" +Bot: "Dựa trên thông tin bạn cung cấp (đau âm ỉ vùng thượng vị, mới xuất hiện), + đây có thể là triệu chứng của viêm dạ dày hoặc khó tiêu. Khuyến nghị: + + 1. Nghỉ ngơi, tránh căng thẳng + 2. Ăn nhẹ, tránh thức ăn cay nóng, cà phê, rượu + 3. Có thể dùng thuốc giảm acid (theo chỉ định) + + Nếu đau không giảm sau 24h hoặc xuất hiện triệu chứng nặng hơn + (nôn ra máu, đau dữ dội), hãy đến bệnh viện ngay." + +→ DỪNG hỏi, đưa khuyến nghị dựa trên thông tin có! + +🎯 NGUYÊN TẮC QUAN TRỌNG: + +1. **ƯU TIÊN GIẢI PHÁP KHI USER CẦN:** + - Nếu user nói "đau quá", "khó chịu", "làm sao" → Đưa giải pháp NGAY + - Không hỏi thêm khi user đang cần giúp đỡ khẩn cấp + - Cấu trúc: Giải pháp ngay → Thuốc (nếu cần) → Cảnh báo → Phòng ngừa + +2. **CHỈ HỎI KHI CẦN THIẾT:** + - Tối đa 1-2 câu hỏi trong toàn bộ conversation + - Nếu đã có đủ info cơ bản → Đưa lời khuyên luôn + - Nếu user không muốn trả lời → Đưa lời khuyên chung + +3. **EMOJI - Dùng tiết kiệm:** + - KHÔNG dùng 😔 cho mọi triệu chứng + - Chỉ dùng khi thực sự cần (trấn an, động viên) + - Có thể không dùng emoji nếu câu đã đủ ấm áp + +4. **PHÂN TÍCH CONTEXT:** + - Nếu là câu hỏi ĐẦU TIÊN → có thể đồng cảm + - Nếu đang FOLLOW-UP → đi thẳng vào câu hỏi, không cần lặp lại đồng cảm + - Nếu user đã trả lời nhiều câu → cảm ơn họ, không cần đồng cảm nữa + +VÍ DỤ CÁCH HỎI ĐA DẠNG: + +❌ SAI (Lặp lại pattern): +Turn 1: "Đau đầu khó chịu lắm nhỉ 😔 Cho mình hỏi..." +Turn 2: "Đau nhói khó chịu quá 😔 Mà này..." +Turn 3: "Sợ ánh sáng khó chịu lắm 😔 Còn về..." +→ LẶP LẠI "khó chịu" + 😔 = MÁY MÓC! + +✅ ĐÚNG (Đa dạng, tự nhiên): +Turn 1: "Mình hiểu rồi. Cho mình hỏi, bạn bị đau từ khi nào?" +Turn 2: "À, đau nhói từ 2 ngày trước nhỉ. Vậy đau ở đâu? Trán, thái dương, hay cả đầu?" +Turn 3: "Được rồi. Có gì làm đau nhiều hơn không? Ví dụ ánh sáng, tiếng ồn, hay stress?" +→ BIẾN ĐỔI, TỰ NHIÊN! + +VÍ DỤ THEO TRIỆU CHỨNG: + +**Đau đầu - Variations:** +- "Cho mình hỏi, bạn bị đau từ khi nào?" +- "Mình hiểu. Đau đầu xuất hiện đột ngột hay từ từ?" +- "Để mình giúp bạn tìm hiểu. Đau kiểu gì? Nhói, tức, hay đập thình thình?" + +**Đầy bụng - Variations:** +- "Cảm giác đầy này xuất hiện từ bao giờ?" +- "Liên quan đến ăn uống không? Sau khi ăn hay suốt ngày?" +- "Có ợ hơi hoặc khó tiêu không?" + +**Đau lưng - Variations:** +- "Bạn bị đau lưng từ khi nào?" +- "Có bị chấn thương hay làm gì nặng không?" +- "Đau ở vị trí nào? Lưng trên, giữa, hay dưới?" + +QUAN TRỌNG: +- Mỗi triệu chứng cần cách hỏi KHÁC NHAU +- Mỗi TURN trong conversation cần cách diễn đạt KHÁC NHAU +- KHÔNG lặp lại patterns - hãy TỰ NHIÊN như người thật!""" + + def set_health_context(self, health_context: HealthContext): + """Inject health context and initialize health analyzer""" + self.health_context = health_context + self.analyzer = HealthAnalyzer(health_context) + + def handle(self, parameters, chat_history=None): + """ + Handle symptom assessment request using LLM for natural conversation + + Args: + parameters (dict): {"user_query": str} + chat_history (list): Conversation history + + Returns: + str: Response message + """ + user_query = parameters.get("user_query", "") + + # Check for red flags first + red_flag_response = self._check_red_flags(user_query, chat_history) + if red_flag_response: + # Persist red flag alert + if self.health_context: + self.health_context.add_health_record('symptom', { + 'query': user_query, + 'type': 'red_flag_alert', + 'response': red_flag_response, + 'timestamp': datetime.now().isoformat() + }) + return red_flag_response + + # Use LLM to naturally assess symptoms and ask questions + response = self._natural_symptom_assessment(user_query, chat_history) + + # Analyze health risks if analyzer is available + if self.analyzer: + risks = self.analyzer.identify_health_risks() + predictions = self.analyzer.predict_disease_risk() + else: + risks = [] + predictions = {} + + # Persist symptom data to health context + if self.health_context: + self.health_context.add_health_record('symptom', { + 'query': user_query, + 'response': response, + 'risks': risks, + 'predictions': predictions, + 'timestamp': datetime.now().isoformat() + }) + + return response + + def _build_context_instruction(self, context, chat_history, user_query=""): + """ + Build clear instruction based on conversation stage + """ + stage = context.get('conversation_stage', 0) + urgency = context.get('urgency', 'medium') + is_vague = context.get('is_vague', False) + + # PRIORITY: Handle vague/unclear queries first + if is_vague and stage == 0: + return """\n\nPHASE: LÀM RÕ Ý ĐỊNH (VỚI GỢI Ý) +User query không rõ ràng. Giúp user bằng GỢI Ý CỤ THỂ: + +1. ACKNOWLEDGE + HỎI VỚI GỢI Ý: + Format: "Mình thấy bạn [cảm giác user nói]. Bạn có thể cho mình biết rõ hơn không? Ví dụ như: + • [Gợi ý 1 liên quan] + • [Gợi ý 2 liên quan] + • [Gợi ý 3 liên quan] + • Hoặc vấn đề khác?" + +2. GỢI Ý DỰA VÀO TỪ KHÓA: + - "mệt" → gợi ý: mệt cơ thể, mệt tinh thần, mất ngủ, stress + - "không khỏe" → gợi ý: đau đầu, buồn nôn, chóng mặt, sốt + - "khó chịu" → gợi ý: đau bụng, khó tiêu, lo âu, căng thẳng + - "không ổn" → gợi ý: sức khỏe thể chất, tinh thần, dinh dưỡng + +3. VÍ DỤ CỤ THỂ: + User: "tôi mệt" + Bot: "Mình thấy bạn đang cảm thấy mệt. Bạn có thể nói rõ hơn không? Ví dụ: + • Mệt cơ thể, không có sức? + • Mệt tinh thần, stress? + • Mất ngủ, ngủ không ngon? + • Hay vấn đề khác?" + +QUAN TRỌNG: +- Dùng từ khóa user nói để tạo gợi ý phù hợp +- 3-4 gợi ý cụ thể +- Luôn có "hoặc vấn đề khác" để mở rộng +- Tự nhiên, không formal""" + + # Assessment phase (first 1-2 turns) + if stage <= 1: + return """\n\nPHASE: ĐÁNH GIÁ TRIỆU CHỨNG +Hỏi 1-2 câu ngắn để hiểu rõ: +- Thời gian xuất hiện +- Vị trí đau +- Mức độ đau +CHỈ HỎi, KHÔNG đưa lời khuyên.""" + + # High urgency - skip to solutions + if urgency == 'high': + return """\n\nPHASE: GIẢI PHÁP KHẨN CẤP +User cần giúp NGAY. Đưa ra: +1. Giải pháp tức thời (2-3 điềm) +2. Thuốc có thể dùng + disclaimer +3. Cảnh báo khi nào đi khám +KHÔNG hỏi thêm.""" + + # Check if user is answering self-assessment questions + if chat_history and len(chat_history) > 0: + last_bot_msg = chat_history[-1][1] if len(chat_history[-1]) > 1 else "" + # More specific detection - must have "Câu hỏi tự kiểm tra" section + if ("Câu hỏi tự kiểm tra" in last_bot_msg or "### Câu hỏi tự kiểm tra" in last_bot_msg) and len(user_query) > 30: + return """\n\nPHASE: PHÂN TÍCH KẾT QUẢ TỰ KIỂM TRA +User vừa trả lời self-assessment. Phân tích THÔNG MINH dựa trên ĐÚNG CONTEXT: + +QUAN TRỌNG: +- CHỈ phân tích dựa trên triệu chứng user VỪA NÓI +- KHÔNG dùng thông tin từ RAG không liên quan +- KHÔNG nhầm lẫn với các bệnh khác + +1. NHẬN DIỆN PATTERN (dựa vào RAG knowledge): + - Đọc kỹ triệu chứng user mô tả + - So sánh với các bệnh có thể có (từ RAG) + - Tìm điểm KHÁC BIỆT quan trọng + - Đưa ra 1-2 khả năng phù hợp nhất + +2. ĐÁNH GIÁ MỨC ĐỘ NGHIÊM TRỌNG: + - Có red flags? → "CẦN KHÁM NGAY" + - Triệu chứng nặng? → "Nên đi khám sớm" + - Triệu chứng nhẹ? → "Thử giải pháp, không đỡ thì khám" + +3. LUÔN DISCLAIMER: + "Đây chỉ là đánh giá sơ bộ dựa trên triệu chứng. Bác sĩ sẽ chẩn đoán chính xác qua khám lâm sàng và xét nghiệm." + +4. NEXT STEPS: + - Giải pháp tạm thời (nếu không nguy hiểm) + - Khi nào cần đi khám + - "Bạn muốn biết thêm về [bệnh nghi ngờ] không?" + +QUAN TRỌNG: Phân tích GENERIC cho MỌI triệu chứng, KHÔNG hard-code.""" + + # Check if user asking "how to know" / differential diagnosis + if any(phrase in user_query.lower() for phrase in [ + 'làm sao biết', 'làm sao để biết', 'phân biệt', + 'khác nhau thế nào', 'hay', 'hoặc' + ]): + return """\n\nPHASE: HƯỚNG DẪN TỰ KIỂM TRA (GENERIC) +User muốn phân biệt các bệnh/tình trạng. Sử dụng RAG để: + +1. XÁC ĐỊNH các bệnh có thể (từ user query): + - Trích xuất các bệnh user đề cập + - Hoặc tìm các bệnh liên quan đến triệu chứng + +2. TẠO BẢNG SO SÁNH: + Format: + **[Bệnh A]:** + • Triệu chứng đặc trưng 1 + • Triệu chứng đặc trưng 2 + • Đặc điểm riêng + + **[Bệnh B]:** + • Triệu chứng đặc trưng 1 + • Triệu chứng đặc trưng 2 + • Đặc điểm riêng + + **Điểm khác biệt chính:** [Highlight key differences] + +3. CÂU HỎI TỰ KIỂM TRA: + Tạo 3-5 câu hỏi giúp user tự đánh giá: + • Về thời gian xuất hiện + • Về đặc điểm triệu chứng + • Về yếu tố kích hoạt + • Về triệu chứng kèm theo + +4. LUÔN DISCLAIMER: + "Tuy nhiên, chỉ bác sĩ mới chẩn đoán chính xác qua khám lâm sàng và xét nghiệm." + +5. Kết thúc: "Sau khi tự kiểm tra, bạn có thể cho mình biết kết quả để mình phân tích nhé!" + +QUAN TRỌNG: Dùng RAG knowledge, KHÔNG hard-code bệnh cụ thể.""" + + # Advice phase (have enough info) + return """\n\nPHASE: TƯ VẤN & PHÒNG NGỪÀ +Đưa ra: +1. Đánh giá ngắn (1 câu): Triệu chứng có thể là gì +2. Giải pháp (3-4 điểm cụ thể) +3. Khi nào cần đi khám +4. Kết thúc: "Có gì thắc mắc cứ hỏi mình nhé!" +KHÔNG nói "Dựa trên thông tin".""" + + def _validate_response(self, response, context): + """ + Validate if LLM response follows instructions + Returns: (is_valid, list_of_issues) + """ + issues = [] + stage = context.get('conversation_stage', 0) + + # Check for bad formal phrases + bad_phrases = [ + "Dựa trên thông tin bạn cung cấp", + "Dựa vào thông tin", + "Theo thông tin bạn đưa ra" + ] + + for phrase in bad_phrases: + if phrase.lower() in response.lower(): + issues.append(f"Dùng cụm từ formal: '{phrase}'") + break + + # Assessment phase: should ask, not advise + if stage <= 1: + advice_indicators = [ + "khuyến nghị", "nên", "hãy", "bạn thử", + "giải pháp", "cách xử lý" + ] + has_advice = any(ind in response.lower() for ind in advice_indicators) + has_question = '?' in response + + if has_advice and not has_question: + issues.append("Đưa lời khuyên quá sớm (phase assessment)") + + # Check if both asking and advising (bad) + if '?' in response: + advice_count = sum(1 for ind in ["khuyến nghị", "nên", "hãy thử"] if ind in response.lower()) + if advice_count >= 2: + issues.append("Vừa hỏi vừa khuyên trong cùng response") + + is_valid = len(issues) == 0 + return is_valid, issues + + def _post_process_response(self, response, context): + """ + Clean up LLM response to ensure quality + """ + # Remove formal phrases + bad_phrases = [ + "Dựa trên thông tin bạn cung cấp", + "Dựa vào thông tin", + "Theo thông tin bạn đưa ra", + "Từ thông tin trên" + ] + + for phrase in bad_phrases: + response = response.replace(phrase, "") + response = response.replace(phrase.lower(), "") + + # Clean up extra whitespace + response = "\n".join(line.strip() for line in response.split("\n") if line.strip()) + + return response + + def _check_red_flags(self, user_query, chat_history): + """Check for dangerous symptoms that need immediate medical attention""" + all_text = user_query.lower() + if chat_history: + all_text += " " + " ".join([msg[0].lower() for msg in chat_history if msg[0]]) + + red_flags = { + "heart_attack": { + "keywords": ["đau ngực", "khó thở", "chest pain", "đau tim"], + "message": """🚨 **CẢNH BÁO KHẨN CẤP** + +Triệu chứng của bạn có thể liên quan đến **cơn đau tim**. Đây là tình huống khẩn cấp! + +⚠️ **HÃY LÀM NGAY:** +1. **Gọi cấp cứu 115** hoặc đến bệnh viện GẤP +2. Ngồi nghỉ, không vận động +3. Nếu có aspirin, nhai 1 viên (nếu không dị ứng) +4. Thông báo cho người thân + +🚑 **KHÔNG TỰ LÁI XE** - Gọi xe cấp cứu hoặc nhờ người khác đưa đi + +Sức khỏe của bạn là ưu tiên số 1. Hãy đi khám NGAY nhé! 💙""" + }, + "stroke": { + "keywords": ["yếu một bên", "méo miệng", "nói khó", "tê nửa người"], + "message": """🚨 **CẢNH BÁO KHẨN CẤP - NGUY CƠ ĐỘT QUỴ** + +Triệu chứng của bạn có thể là **đột quỵ não**. Mỗi phút đều quan trọng! + +⚠️ **HÃY LÀM NGAY:** +1. **Gọi cấp cứu 115 NGAY LẬP TỨC** +2. Ghi nhớ thời gian triệu chứng bắt đầu +3. Nằm nghỉ, đầu hơi cao +4. KHÔNG cho ăn uống gì + +🚑 Đây là cấp cứu y tế. Hãy đi bệnh viện NGAY! Thời gian vàng chỉ có 3-4 giờ!""" + }, + "meningitis": { + "keywords": ["đau đầu dữ dội", "cứng gáy", "sốt cao", "buồn nôn"], + "message": """🚨 **CẢNH BÁO - CẦN KHÁM NGAY** + +Triệu chứng đau đầu dữ dội + cứng gáy + sốt có thể là **viêm màng não** - rất nguy hiểm! + +⚠️ **HÃY LÀM NGAY:** +1. Đi bệnh viện hoặc gọi cấp cứu 115 +2. Không trì hoãn +3. Thông báo bác sĩ về tất cả triệu chứng + +Đây là tình huống nghiêm trọng. Hãy đi khám NGAY nhé! 🏥""" + }, + "severe_abdominal": { + "keywords": ["đau bụng dữ dội", "đau bụng không chịu nổi", "đau bụng cấp"], + "message": """⚠️ **CẦN KHÁM BÁC SĨ NGAY** + +Đau bụng dữ dội có thể là nhiều nguyên nhân nghiêm trọng (viêm ruột thừa, sỏi mật, thủng dạ dày...). + +🏥 **Hãy đi khám ngay nếu:** +- Đau không giảm sau 1-2 giờ +- Kèm sốt, nôn, tiêu chảy +- Bụng cứng, đau khi ấn +- Có máu trong phân + +Đừng chần chừ, hãy đi bệnh viện để được khám và xử lý kịp thời nhé!""" + } + } + + for flag_type, flag_data in red_flags.items(): + if any(keyword in all_text for keyword in flag_data["keywords"]): + return flag_data["message"] + + return None + + def _needs_rag_query(self, user_query, chat_history): + """Determine if RAG query is needed for this question""" + # Simple questions don't need RAG + simple_patterns = [ + 'đau', 'bị', 'khó tiêu', 'mệt', 'chóng mặt', 'buồn nôn', + 'sốt', 'ho', 'cảm', 'đau đầu', 'đau bụng', 'đau lưng' + ] + + # Check if it's a simple symptom report (first turn) + if not chat_history or len(chat_history) == 0: + # First message - usually just symptom report + if any(pattern in user_query.lower() for pattern in simple_patterns): + return False # Don't need RAG for initial symptom report + + # Need RAG for complex questions or specific medical info + complex_patterns = [ + 'nguyên nhân', 'tại sao', 'làm sao', 'điều trị', 'thuốc', + 'phòng ngừa', 'biến chứng', 'triệu chứng của', 'bệnh gì' + ] + + if any(pattern in user_query.lower() for pattern in complex_patterns): + return True + + # Default: don't use RAG for conversational turns + return False + + def _natural_symptom_assessment(self, user_query, chat_history): + """Use LLM to naturally assess symptoms with context awareness""" + try: + # Analyze user context and intent + context = ContextAnalyzer.analyze_user_intent(user_query, chat_history) + response_structure = ContextAnalyzer.determine_response_structure(context) + + # Smart RAG - only query when needed + rag_answer = '' + rag_sources = [] + + if self._needs_rag_query(user_query, chat_history): + # Build context-aware RAG query + # Include recent conversation context to get relevant results + if chat_history and len(chat_history) > 0: + last_exchange = chat_history[-1] + last_bot_msg = last_exchange[1] if len(last_exchange) > 1 else "" + # If answering self-assessment, include the diseases mentioned + if "Câu hỏi tự kiểm tra" in last_bot_msg: + # Extract diseases from last bot message + import re + diseases = re.findall(r'\*\*\[(.*?)\]:\*\*', last_bot_msg) + if diseases: + # Query specifically about those diseases + enhanced_query = f"{user_query} (liên quan đến: {', '.join(diseases)})" + rag_result = self.rag.query_health(enhanced_query) + else: + rag_result = self.rag.query_health(user_query) + else: + rag_result = self.rag.query_health(user_query) + else: + rag_result = self.rag.query_health(user_query) + + rag_answer = rag_result.get('answer', '') + rag_sources = rag_result.get('source_docs', []) + + # Build conversation context + messages = [{"role": "system", "content": self.system_prompt}] + + # Add RAG context if available with explicit filtering instruction + if rag_answer: + # Add warning about context relevance + rag_instruction = f"""Thông tin tham khảo từ cơ sở dữ liệu:\n{rag_answer} + +⚠️ QUAN TRỌNG: +- CHỈ sử dụng thông tin LIÊN QUAN đến triệu chứng user đang nói +- KHÔNG dùng thông tin về bệnh khác không liên quan +- Nếu thông tin RAG không match với triệu chứng user → BỎ QUA""" + messages.append({"role": "system", "content": rag_instruction}) + + # Add chat history (last 5 exchanges for context) + if chat_history: + recent_history = chat_history[-5:] if len(chat_history) > 5 else chat_history + for user_msg, bot_msg in recent_history: + if user_msg: + messages.append({"role": "user", "content": user_msg}) + if bot_msg: + messages.append({"role": "assistant", "content": bot_msg}) + + # Build context-aware instruction + context_prompt = self._build_context_instruction(context, chat_history, user_query) + + messages.append({"role": "user", "content": user_query + context_prompt}) + + # Get LLM response + response = client.chat.completions.create( + model=MODEL, + messages=messages, + temperature=0.7, + max_tokens=500 + ) + + llm_response = response.choices[0].message.content + + # CRITICAL: Check for context mismatch (e.g., talking about brain when discussing stomach) + if chat_history and len(chat_history) > 0: + # Get recent symptoms mentioned + recent_symptoms = [] + for msg, _ in chat_history[-3:]: + if msg: + recent_symptoms.extend([ + 'đau bụng', 'dạ dày', 'tiêu hóa', 'ăn', 'buồn nôn', 'ợ' + ] if any(w in msg.lower() for w in ['bụng', 'dạ dày', 'ăn', 'nôn', 'ợ']) else []) + + # Check if response mentions completely unrelated conditions + unrelated_keywords = { + 'stomach': ['viêm màng não', 'cứng gáy', 'não'], + 'head': ['đau bụng', 'tiêu hóa', 'dạ dày'], + 'respiratory': ['đau bụng', 'dạ dày'] + } + + # If discussing stomach but response mentions brain → REJECT + if recent_symptoms and any('bụng' in s or 'dạ dày' in s for s in recent_symptoms): + if any(keyword in llm_response.lower() for keyword in ['viêm màng não', 'cứng gáy', 'não', 'đầu dữ dội']): + print("⚠️ CONTEXT MISMATCH DETECTED: Response about brain when discussing stomach!") + # Force retry with explicit instruction + messages[-1]['content'] += "\n\n🚨 LỖI NGHIÊM TRỌNG: User đang nói về BỤng/DẠ DÀY, KHÔNG phải đầu/não! Phân tích lại ĐÚNG triệu chứng!" + + retry_response = client.chat.completions.create( + model=MODEL, + messages=messages, + temperature=0.3, # Very low temp for accuracy + max_tokens=500 + ) + llm_response = retry_response.choices[0].message.content + + # Validate response quality using shared validator + is_valid, issues = ResponseValidator.validate_response( + llm_response, + agent_type='symptom', + context=context, + chat_history=chat_history + ) + + # Retry if invalid (max 1 retry) + if not is_valid: + print(f"Response validation failed: {issues}. Retrying...") + # Add stronger instruction + messages[-1]['content'] += f"\n\nLỖI TRƯỚC: {', '.join(issues)}. HÃY SỬA LẠI!" + + retry_response = client.chat.completions.create( + model=MODEL, + messages=messages, + temperature=0.5, # Lower temp for more control + max_tokens=500 + ) + llm_response = retry_response.choices[0].message.content + + # Post-process to ensure quality + llm_response = self._post_process_response(llm_response, context) + + # Add sources using RAG integration formatter (FIXED!) + if rag_sources: + formatted_response = self.rag.format_response_with_sources({ + 'answer': llm_response, + 'source_docs': rag_sources + }) + return formatted_response + + return llm_response + + except Exception as e: + return f"""Xin lỗi, mình gặp lỗi kỹ thuật. Bạn có thể: +1. Thử lại câu hỏi +2. Hoặc nếu triệu chứng nghiêm trọng, hãy gặp bác sĩ ngay nhé 🙏 + +Lỗi: {str(e)[:100]}""" + + def _assess_opqrst_progress(self, chat_history): + """Assess how much OPQRST data has been collected""" + if not chat_history: + return {'complete': False, 'next_step': 'onset', 'data': {}} + + # Analyze conversation to see what's been asked + all_bot_messages = " ".join([msg[1].lower() for msg in chat_history if msg[1]]) + all_user_messages = " ".join([msg[0].lower() for msg in chat_history if msg[0]]) + + opqrst_data = { + 'onset': None, + 'provocation': None, + 'quality': None, + 'region': None, + 'severity': None, + 'timing': None + } + + # Check what's been asked + if "khi nào" in all_bot_messages or "bắt đầu" in all_bot_messages: + opqrst_data['onset'] = 'asked' + + if "làm tệ hơn" in all_bot_messages or "làm đỡ" in all_bot_messages: + opqrst_data['provocation'] = 'asked' + + if "mô tả cảm giác" in all_bot_messages or "đau kiểu gì" in all_bot_messages: + opqrst_data['quality'] = 'asked' + + if "vị trí" in all_bot_messages or "ở đâu" in all_bot_messages: + opqrst_data['region'] = 'asked' + + if "mức độ" in all_bot_messages or "1-10" in all_bot_messages: + opqrst_data['severity'] = 'asked' + + if "lúc nào xuất hiện" in all_bot_messages or "liên tục" in all_bot_messages: + opqrst_data['timing'] = 'asked' + + # Determine next step + for step, value in opqrst_data.items(): + if value is None: + return {'complete': False, 'next_step': step, 'data': opqrst_data} + + # All steps completed + return {'complete': True, 'next_step': None, 'data': opqrst_data} + + def _ask_next_opqrst_question(self, next_step, user_query): + """Ask the next OPQRST question""" + questions = { + 'onset': """Mình hiểu rồi. Để đánh giá chính xác hơn, cho mình hỏi thêm nhé: + +- Triệu chứng này bắt đầu từ khi nào? (hôm nay, mấy ngày, mấy tuần?) +- Nó xuất hiện đột ngột hay từ từ?""", + + 'quality': """À được rồi. Mà này: + +- Bạn mô tả cảm giác đó như thế nào? (đau nhói, tức, nóng rát, tê, đập thình thình...) +- Mức độ từ 1-10 thì bao nhiêu? (1 = nhẹ, 10 = không chịu nổi)""", + + 'region': """Ừm, để mình hỏi thêm: + +- Vị trí chính xác ở đâu? (chỉ rõ vùng cơ thể) +- Có lan ra chỗ khác không?""", + + 'provocation': """Bạn có nhận thấy: + +- Có gì làm nó tệ hơn không? (vận động, ăn uống, stress, tư thế...) +- Có gì làm nó đỡ hơn không? (nghỉ ngơi, thuốc, chườm...)""", + + 'timing': """Quan trọng nhé: + +- Nó xuất hiện lúc nào trong ngày? (sáng, chiều, tối, đêm?) +- Liên tục hay từng đợt? Mỗi đợt kéo dài bao lâu?""", + + 'severity': """Cuối cùng: + +- Có kèm theo triệu chứng nào khác không? (sốt, buồn nôn, chóng mặt, mệt mỏi...) +- Có ảnh hưởng đến ăn uống, ngủ nghỉ, sinh hoạt không? +- Bạn có bệnh nền gì không? Đang uống thuốc gì không?""" + } + + return questions.get(next_step, "Cho mình biết thêm về triệu chứng của bạn nhé?") + + def _provide_assessment(self, opqrst_data, user_query): + """Provide symptom assessment after collecting OPQRST data""" + # Use LLM to analyze symptoms with OPQRST context + try: + response = client.chat.completions.create( + model=MODEL, + messages=[ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": f"""Dựa vào thông tin OPQRST đã thu thập, hãy đánh giá triệu chứng và đưa ra lời khuyên. + +Triệu chứng ban đầu: {user_query} + +Thông tin đã thu thập: +- Onset: {opqrst_data.get('onset', 'chưa rõ')} +- Quality: {opqrst_data.get('quality', 'chưa rõ')} +- Region: {opqrst_data.get('region', 'chưa rõ')} +- Provocation: {opqrst_data.get('provocation', 'chưa rõ')} +- Timing: {opqrst_data.get('timing', 'chưa rõ')} +- Severity: {opqrst_data.get('severity', 'chưa rõ')} + +Hãy đưa ra: +1. Phân tích triệu chứng +2. Nguyên nhân có thể +3. Lời khuyên xử lý tại nhà (nếu phù hợp) +4. Khi nào cần gặp bác sĩ +5. Lời động viên, trấn an"""} + ], + temperature=0.7, + max_tokens=1500 + ) + + return response.choices[0].message.content + + except Exception as e: + return f"""Xin lỗi, mình gặp chút vấn đề khi phân tích triệu chứng. + +Dựa vào những gì bạn chia sẻ, mình khuyên bạn nên: +- Theo dõi triệu chứng thêm 24-48 giờ +- Nghỉ ngơi đầy đủ +- Uống đủ nước +- Nếu triệu chứng tệ hơn hoặc không giảm → đi khám bác sĩ + +Với các triệu chứng bất thường, tốt nhất là được bác sĩ khám trực tiếp nhé! 🏥""" diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..1a73bda9d30e6fd610ebe283a6f7566526287153 --- /dev/null +++ b/app.py @@ -0,0 +1,31 @@ +from ui import build_ui +import signal +import sys +import os + +def signal_handler(sig, frame): + """Handle Ctrl+C gracefully""" + print("\n\n👋 Đang tắt server... Bye bye!") + # Use os._exit() instead of sys.exit() to avoid atexit callbacks + # This prevents the torch cleanup race condition warning + os._exit(0) + +# Register signal handler +signal.signal(signal.SIGINT, signal_handler) + +demo = build_ui() +if __name__ == "__main__": + try: + demo.queue().launch( + debug=False, + share=True, + show_api=False, + show_error=True, + quiet=False # Keep startup messages but hide processing time in UI + ) + except KeyboardInterrupt: + print("\n\n👋 Server đã tắt. Hẹn gặp lại!") + except Exception as e: + print(f"\n❌ Lỗi: {e}") + finally: + print("✅ Cleanup hoàn tất.") \ No newline at end of file diff --git a/assets/bot-avatar.png b/assets/bot-avatar.png new file mode 100644 index 0000000000000000000000000000000000000000..a61efec2c3f535a45a0f8b6772544ec4ffa410df --- /dev/null +++ b/assets/bot-avatar.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8420adfd23bba9c59fcd05d29e80f97fefb1259d5f43cdddf3e8201d8de24e67 +size 1359701 diff --git a/auth/auth.py b/auth/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..ee1b6f37b5c9ed100120f36c63a6b89679e4a0dd --- /dev/null +++ b/auth/auth.py @@ -0,0 +1,103 @@ +# auth/auth.py +import bcrypt +from auth.db import get_connection + +def register_user(username, password): + username = username.strip() + password = password.strip() + if not username or not password: + return False, "Vui lòng nhập tài khoản và mật khẩu" + conn = get_connection() + cursor = conn.cursor() + cursor.execute("SELECT * FROM users WHERE username = ?", (username,)) + if cursor.fetchone(): + conn.close() + return False, "Tài khoản đã tồn tại" + hashed = bcrypt.hashpw(password.encode(), bcrypt.gensalt()) + cursor.execute("INSERT INTO users (username, password_hash) VALUES (?, ?)", (username, hashed)) + conn.commit() + conn.close() + return True, "Đăng ký thành công" + +def login_user(username, password): + username = username.strip() + password = password.strip() + if not username or not password: + return False, "Vui lòng nhập tài khoản và mật khẩu" + conn = get_connection() + cursor = conn.cursor() + cursor.execute("SELECT password_hash FROM users WHERE username = ?", (username,)) + row = cursor.fetchone() + conn.close() + if row and bcrypt.checkpw(password.encode(), row[0]): + return True, "Đăng nhập thành công" + return False, "Sai tài khoản hoặc mật khẩu" + +def save_message(username, message): + if isinstance(message, (list, tuple)): + message = "\n".join(str(m) for m in message) + conn = get_connection() + cursor = conn.cursor() + cursor.execute("INSERT INTO chat_history (username, message) VALUES (?, ?)", (username, message)) + conn.commit() + conn.close() + +def clear_history(username): + conn = get_connection() + cursor = conn.cursor() + cursor.execute("DELETE FROM chat_history WHERE username = ?", (username,)) + conn.commit() + conn.close() + +# def save_message(username, message, agent_type): +# if isinstance(message, (list, tuple)): +# message = "\n".join(str(m) for m in message) +# conn = get_connection() +# cursor = conn.cursor() +# cursor.execute("INSERT INTO chat_history (username, message, agent_type) VALUES (?, ?, ?)", (username, message, agent_type)) +# conn.commit() +# conn.close() + + +def load_history(username): + conn = get_connection() + cursor = conn.cursor() + cursor.execute("SELECT message FROM chat_history WHERE username = ? ORDER BY timestamp ASC", (username,)) + rows = cursor.fetchall() + conn.close() + + messages = [row[0] for row in rows] + + history = [] + for i in range(0, len(messages), 2): + if i + 1 < len(messages): + history.append([messages[i], messages[i + 1]]) + + # Convert to ChatbotDataMessage format + from utils.helpers import convert_list_to_chatbot_messages + return convert_list_to_chatbot_messages(history) + +# def load_history(username, agent_type): +# conn = get_connection() +# cursor = conn.cursor() +# cursor.execute("SELECT message FROM chat_history WHERE username = ? ORDER BY timestamp ASC", (username,agent_type)) +# rows = cursor.fetchall() +# conn.close() + +# messages = [row[0] for row in rows] + +# history = [] +# for i in range(0, len(messages), 2): +# if i + 1 < len(messages): +# history.append([messages[i], messages[i + 1]]) +# return history + +def logout_user(state): + state.value["user"] = None + state.value["history"] = [] + return "Đã đăng xuất" + +# def logout_user(state): +# state.value["user"] = None +# state.value["history"] = [] +# return "Đã đăng xuất" diff --git a/auth/db.py b/auth/db.py new file mode 100644 index 0000000000000000000000000000000000000000..90ce9d0da1645bf0bf0f6ccdbb5adca12ee6a522 --- /dev/null +++ b/auth/db.py @@ -0,0 +1,50 @@ +# # auth/db.py +import sqlite3 + +DB_PATH = "users.db" + +def get_connection(): + return sqlite3.connect(DB_PATH) + +# def init_db(): +# conn = get_connection() +# cursor = conn.cursor() +# cursor.execute(''' +# CREATE TABLE IF NOT EXISTS users ( +# username TEXT PRIMARY KEY, +# password_hash TEXT +# ) +# ''') +# cursor.execute(''' +# CREATE TABLE IF NOT EXISTS chat_history ( +# id INTEGER PRIMARY KEY AUTOINCREMENT, +# username TEXT, +# message TEXT, +# agent_type TEXT, +# timestamp DATETIME DEFAULT CURRENT_TIMESTAMP +# ) +# ''') +# conn.commit() +# conn.close() + +def init_db(): + conn = get_connection() + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS users ( + username TEXT PRIMARY KEY, + password_hash TEXT + ) + ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS chat_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT, + message TEXT, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY(username) REFERENCES users(username) + ) + ''') + conn.commit() + conn.close() + diff --git a/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/data_level0.bin b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/data_level0.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa855eade9bd6f499c4fb5853027bd55bdddfe0e --- /dev/null +++ b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/data_level0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1c7ea64fe9a64a550318f75a98804b3c188afa8150c4d21ada092a2b2cc939 +size 5809016 diff --git a/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/header.bin b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/header.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9a4c49d1ac320c6de676fc601a26ea3593f9d6f Binary files /dev/null and b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/header.bin differ diff --git a/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/index_metadata.pickle b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/index_metadata.pickle new file mode 100644 index 0000000000000000000000000000000000000000..72c29b7f8e8e21716e433666a76b6c6c90b3b815 --- /dev/null +++ b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/index_metadata.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea7df1a6cd117ce0c99c0f140761e17b43f79b42dd85c270144e3c9090c5983 +size 319012 diff --git a/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/length.bin b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/length.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef4c8c16fda663229f379c09d16dcea24bf6460a Binary files /dev/null and b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/length.bin differ diff --git a/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/link_lists.bin b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/link_lists.bin new file mode 100644 index 0000000000000000000000000000000000000000..212802bb2c1f06528ee20214edfafade25e69f8d Binary files /dev/null and b/chroma_db/e49f080a-e10b-4088-ba71-405ae42658a8/link_lists.bin differ diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..ebbe489c331d81508efe6852df4c552b0cf9960c --- /dev/null +++ b/config/settings.py @@ -0,0 +1,22 @@ +import os +from dotenv import load_dotenv + +# Tải biến từ file .env +load_dotenv() +import openai + +# Environment-configurable settings +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "Your API Key") +OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://aiportalapi.stu-platform.live/jpe") +MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") +EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" + +# Initialize OpenAI client (reuse across project) +client = openai.OpenAI( + base_url=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY +) + +CHROMA_PATH = r"chroma_db/" +DATA_PATH = r"rag/data" +RULES_PATH = r"modules/rules.json" \ No newline at end of file diff --git a/data_mining/__init__.py b/data_mining/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e05416cc9d48848ef4c90124603eea61b258f3f3 --- /dev/null +++ b/data_mining/__init__.py @@ -0,0 +1,2 @@ +# Data Mining Package +# Scripts for downloading and processing medical datasets diff --git a/data_mining/mining_fitness.py b/data_mining/mining_fitness.py new file mode 100644 index 0000000000000000000000000000000000000000..76b030616afdddc98fbab9008bd07619f9d20d8b --- /dev/null +++ b/data_mining/mining_fitness.py @@ -0,0 +1,134 @@ +""" +Fitness Dataset - Download & Process +Downloads and processes gym exercise data into ChromaDB +Dataset: onurSakar/GYM-Exercise (1.66K exercises) +""" + +from datasets import load_dataset +import pandas as pd +import chromadb +from sentence_transformers import SentenceTransformer +import os + +def download_fitness(): + """Download GYM Exercise dataset from HuggingFace""" + + print("📥 Downloading GYM Exercise dataset...") + print(" Source: onurSakar/GYM-Exercise") + + try: + dataset = load_dataset("onurSakar/GYM-Exercise") + + os.makedirs("data_mining/datasets", exist_ok=True) + + df = dataset['train'].to_pandas() + + output_path = "data_mining/datasets/gym_exercise.csv" + df.to_csv(output_path, index=False) + + file_size = os.path.getsize(output_path) / (1024 * 1024) + + print(f"✅ Downloaded: {output_path}") + print(f"📊 Records: {len(df)}") + print(f"📊 File size: {file_size:.2f} MB") + + return True + + except Exception as e: + print(f"❌ Download failed: {e}") + return False + +def process_fitness(): + """Process Fitness dataset and build ChromaDB""" + + print("\n🔨 Processing Fitness dataset...") + + csv_path = "data_mining/datasets/gym_exercise.csv" + if not os.path.exists(csv_path): + print(f"❌ Dataset not found: {csv_path}") + return False + + df = pd.read_csv(csv_path) + print(f"📊 Loaded {len(df)} records") + + print("🤖 Loading embedding model...") + embedder = SentenceTransformer('keepitreal/vietnamese-sbert') + + print("💾 Initializing ChromaDB...") + os.makedirs("data_mining/output", exist_ok=True) + client = chromadb.PersistentClient(path="data_mining/output/fitness_chroma") + + collection = client.get_or_create_collection( + name="fitness", + metadata={"hnsw:space": "cosine"} + ) + + print("📝 Processing fitness data...") + + processed = 0 + + for idx, row in df.iterrows(): + text_parts = [] + for col in df.columns: + value = str(row[col]) + if value and value != 'nan' and len(value) > 2: + text_parts.append(f"{col}: {value}") + + text = "\n".join(text_parts) + + if len(text) < 10: + continue + + embedding = embedder.encode(text) + + collection.add( + ids=[f"fitness_{processed:05d}"], + embeddings=[embedding.tolist()], + documents=[text], + metadatas=[{ + 'domain': 'fitness', + 'agent': 'FitnessAgent', + 'source': 'GYM_Exercise', + 'index': processed + }] + ) + + processed += 1 + + if (processed % 100) == 0: + print(f" Processed {processed}/{len(df)} records...") + + print(f"✅ Processed {processed} fitness records") + print(f"💾 Database saved to: data_mining/output/fitness_chroma/") + + db_path = "data_mining/output/fitness_chroma" + total_size = 0 + for dirpath, dirnames, filenames in os.walk(db_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + print(f"📊 Database size: {total_size / (1024 * 1024):.2f} MB") + + return True + +def main(): + """Main function - download and process""" + print("=" * 60) + print("Fitness Dataset - Download & Process") + print("=" * 60) + + if not download_fitness(): + return False + + if not process_fitness(): + return False + + print("\n" + "=" * 60) + print("✅ Fitness dataset ready!") + print("=" * 60) + return True + +if __name__ == "__main__": + success = main() + exit(0 if success else 1) diff --git a/data_mining/mining_medical_qa.py b/data_mining/mining_medical_qa.py new file mode 100644 index 0000000000000000000000000000000000000000..9103bc84b9531c5c3acd2d16c56cb323a6068f86 --- /dev/null +++ b/data_mining/mining_medical_qa.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +""" +Mining Script: Vietnamese Medical Q&A Dataset +Downloads and processes hungnm/vietnamese-medical-qa from HuggingFace +Splits into 2 collections: symptom_qa and general_health_qa +""" + +import sys +import pandas as pd +from pathlib import Path + +def download_medical_qa(): + """Download Vietnamese Medical Q&A dataset from HuggingFace""" + try: + from datasets import load_dataset + + print("📥 Downloading Vietnamese Medical Q&A from HuggingFace...") + print(" Source: hungnm/vietnamese-medical-qa") + print(" Size: ~9,335 Q&A pairs") + + # Download dataset + dataset = load_dataset("hungnm/vietnamese-medical-qa") + df = dataset['train'].to_pandas() + + print(f"✅ Downloaded: {len(df)} Q&A pairs") + + # Save to CSV + output_dir = Path("data_mining/datasets") + output_dir.mkdir(parents=True, exist_ok=True) + + output_path = output_dir / "vietnamese_medical_qa.csv" + df.to_csv(output_path, index=False, encoding='utf-8') + + print(f"💾 Saved to: {output_path}") + return df + + except ImportError: + print("❌ Error: 'datasets' library not installed") + print(" Install with: pip install datasets") + return None + except Exception as e: + print(f"❌ Error downloading dataset: {e}") + return None + + +def is_symptom_question(question): + """ + Classify if question is about SPECIFIC SYMPTOMS + + Returns: + bool: True if symptom question, False if general health question + """ + if not question or not isinstance(question, str): + return False + + question_lower = question.lower() + + # Symptom keywords (high priority - user describing active symptoms) + symptom_keywords = [ + # Pain + 'bị đau', 'đau', 'nhức', 'tức', 'đau nhức', + + # Infection/Fever + 'bị sốt', 'sốt', 'viêm', 'nhiễm trùng', 'mủ', 'sưng', + + # Digestive + 'buồn nôn', 'nôn', 'tiêu chảy', 'táo bón', 'đầy hơi', + 'ợ hơi', 'ợ chua', 'khó tiêu', + + # Respiratory + 'ho', 'khó thở', 'nghẹt mũi', 'chảy nước mũi', + 'đau họng', 'khàn giọng', + + # Neurological + 'chóng mặt', 'hoa mắt', 'mất thăng bằng', 'đau đầu', + + # Skin + 'ngứa', 'phát ban', 'nổi mẩn', 'đỏ', + + # General symptoms + 'mệt mỏi', 'yếu', 'không khỏe', 'bị ốm', 'khó chịu' + ] + + # General health keywords (prevention, knowledge, advice) + general_keywords = [ + # Prevention + 'làm sao để không', 'phòng ngừa', 'tránh', 'cách phòng', + 'làm thế nào để', 'cách nào để', + + # Knowledge questions + 'là gì', 'có phải', 'có nên', 'nên không', + 'tại sao', 'nguyên nhân', 'có thể', + + # Advice/Recommendations + 'nên làm gì', 'nên ăn gì', 'có tốt không', + 'có được không', 'có nên', 'khuyên' + ] + + # Count keyword matches + symptom_score = sum(1 for kw in symptom_keywords if kw in question_lower) + general_score = sum(1 for kw in general_keywords if kw in question_lower) + + # Decision logic + if symptom_score > general_score: + return True # Symptom question + elif general_score > symptom_score: + return False # General health question + else: + # Tie-breaker: Check for "bị" (indicates having a condition) + return 'bị' in question_lower + + +def process_medical_qa(): + """Process and split into 2 ChromaDB collections""" + try: + from sentence_transformers import SentenceTransformer + import chromadb + + print("\n🔄 Processing Vietnamese Medical Q&A...") + + # Load CSV + csv_path = Path("data_mining/datasets/vietnamese_medical_qa.csv") + if not csv_path.exists(): + print(f"❌ Error: {csv_path} not found") + return False + + df = pd.read_csv(csv_path, encoding='utf-8') + print(f"📊 Loaded: {len(df)} Q&A pairs") + + # Initialize embedding model + print("🤖 Loading embedding model: keepitreal/vietnamese-sbert...") + embedder = SentenceTransformer('keepitreal/vietnamese-sbert') + + # Initialize ChromaDB + output_dir = Path("data_mining/output") + output_dir.mkdir(parents=True, exist_ok=True) + + # Split data + symptom_data = [] + general_data = [] + + print("🔍 Classifying questions...") + for idx, row in df.iterrows(): + question = str(row['question']) + answer = str(row['answer']) + + # Combine Q&A + text = f"Câu hỏi: {question}\n\nTrả lời: {answer}" + + # Classify + if is_symptom_question(question): + symptom_data.append({ + 'id': f'symptom_qa_{idx}', + 'text': text, + 'question': question, + 'answer': answer, + 'type': 'symptom' + }) + else: + general_data.append({ + 'id': f'general_qa_{idx}', + 'text': text, + 'question': question, + 'answer': answer, + 'type': 'general' + }) + + print(f"✅ Classification complete:") + print(f" - Symptom Q&A: {len(symptom_data)} ({len(symptom_data)/len(df)*100:.1f}%)") + print(f" - General Health Q&A: {len(general_data)} ({len(general_data)/len(df)*100:.1f}%)") + + # Create ChromaDB collections + # 1. Symptom Q&A Collection + print("\n📦 Creating Symptom Q&A ChromaDB...") + symptom_client = chromadb.PersistentClient(path=str(output_dir / "symptom_qa_chroma")) + symptom_collection = symptom_client.get_or_create_collection( + name="symptom_qa", + metadata={"description": "Vietnamese Medical Q&A - Symptom Questions"} + ) + + # Batch insert symptom data + batch_size = 100 + for i in range(0, len(symptom_data), batch_size): + batch = symptom_data[i:i+batch_size] + + ids = [item['id'] for item in batch] + texts = [item['text'] for item in batch] + metadatas = [{ + 'type': item['type'], + 'domain': 'symptom', + 'agent': 'SymptomAgent', + 'source': 'vietnamese-medical-qa' + } for item in batch] + + # Generate embeddings + embeddings = embedder.encode(texts, show_progress_bar=False) + + symptom_collection.add( + ids=ids, + embeddings=embeddings.tolist(), + documents=texts, + metadatas=metadatas + ) + + if (i + batch_size) % 500 == 0: + print(f" Processed {min(i+batch_size, len(symptom_data))}/{len(symptom_data)} symptom Q&A...") + + print(f"✅ Symptom Q&A ChromaDB created: {len(symptom_data)} records") + + # 2. General Health Q&A Collection + print("\n📦 Creating General Health Q&A ChromaDB...") + general_client = chromadb.PersistentClient(path=str(output_dir / "general_health_qa_chroma")) + general_collection = general_client.get_or_create_collection( + name="general_health_qa", + metadata={"description": "Vietnamese Medical Q&A - General Health Questions"} + ) + + # Batch insert general data + for i in range(0, len(general_data), batch_size): + batch = general_data[i:i+batch_size] + + ids = [item['id'] for item in batch] + texts = [item['text'] for item in batch] + metadatas = [{ + 'type': item['type'], + 'domain': 'general_health', + 'agent': 'GeneralHealthAgent', + 'source': 'vietnamese-medical-qa' + } for item in batch] + + # Generate embeddings + embeddings = embedder.encode(texts, show_progress_bar=False) + + general_collection.add( + ids=ids, + embeddings=embeddings.tolist(), + documents=texts, + metadatas=metadatas + ) + + if (i + batch_size) % 500 == 0: + print(f" Processed {min(i+batch_size, len(general_data))}/{len(general_data)} general Q&A...") + + print(f"✅ General Health Q&A ChromaDB created: {len(general_data)} records") + + print("\n✅ Processing complete!") + print(f" Output: {output_dir}") + print(f" - symptom_qa_chroma/ ({len(symptom_data)} records)") + print(f" - general_health_qa_chroma/ ({len(general_data)} records)") + + return True + + except ImportError as e: + print(f"❌ Error: Missing library - {e}") + print(" Install with: pip install sentence-transformers chromadb") + return False + except Exception as e: + print(f"❌ Error processing dataset: {e}") + import traceback + traceback.print_exc() + return False + + +def main(): + """Main execution""" + print("=" * 60) + print("Vietnamese Medical Q&A Dataset Mining") + print("Source: hungnm/vietnamese-medical-qa (HuggingFace)") + print("=" * 60) + + # Step 1: Download + df = download_medical_qa() + if df is None: + print("\n❌ Download failed!") + return False + + # Step 2: Process + success = process_medical_qa() + if not success: + print("\n❌ Processing failed!") + return False + + print("\n" + "=" * 60) + print("✅ SUCCESS! Vietnamese Medical Q&A ready for RAG system") + print("=" * 60) + return True + + +if __name__ == "__main__": + success = main() + sys.exit(0 if success else 1) diff --git a/data_mining/mining_mentalchat.py b/data_mining/mining_mentalchat.py new file mode 100644 index 0000000000000000000000000000000000000000..1b80aec96a4ac9a8a7c37f3e0730a6f8dc734749 --- /dev/null +++ b/data_mining/mining_mentalchat.py @@ -0,0 +1,178 @@ +""" +MentalChat16K Dataset - Download & Process +Downloads and processes mental health counseling conversations into ChromaDB +Dataset: ShenLab/MentalChat16K (16K conversations, 33 topics) +""" + +from datasets import load_dataset +import pandas as pd +import chromadb +from sentence_transformers import SentenceTransformer +import os + +def download_mentalchat(): + """Download MentalChat16K dataset from HuggingFace""" + + print("📥 Downloading MentalChat16K dataset...") + print(" Source: ShenLab/MentalChat16K") + print(" Coverage: 33 mental health topics") + + try: + # Load dataset from HuggingFace + dataset = load_dataset("ShenLab/MentalChat16K") + + # Create output directory + os.makedirs("data_mining/datasets", exist_ok=True) + + # Convert to pandas DataFrame + df = dataset['train'].to_pandas() + + # Save to CSV + output_path = "data_mining/datasets/mentalchat16k.csv" + df.to_csv(output_path, index=False) + + # Check file size + file_size = os.path.getsize(output_path) / (1024 * 1024) # MB + + print(f"✅ Downloaded: {output_path}") + print(f"📊 Records: {len(df)}") + print(f"📊 File size: {file_size:.2f} MB") + + return True + + except Exception as e: + print(f"❌ Download failed: {e}") + return False + +def process_mentalchat(): + """Process MentalChat16K dataset and build ChromaDB""" + + print("\n🔨 Processing MentalChat16K dataset...") + + # Load dataset + csv_path = "data_mining/datasets/mentalchat16k.csv" + if not os.path.exists(csv_path): + print(f"❌ Dataset not found: {csv_path}") + return False + + df = pd.read_csv(csv_path) + print(f"📊 Loaded {len(df)} records") + + # Initialize embedder + print("🤖 Loading embedding model...") + embedder = SentenceTransformer('keepitreal/vietnamese-sbert') + + # Initialize ChromaDB + print("💾 Initializing ChromaDB...") + os.makedirs("data_mining/output", exist_ok=True) + client = chromadb.PersistentClient(path="data_mining/output/mental_health_chroma") + + # Create collection + collection = client.get_or_create_collection( + name="mental_health", + metadata={"hnsw:space": "cosine"} + ) + + # Process conversations + print("📝 Processing conversations...") + + # Determine column names and combine if needed + if 'instruction' in df.columns and 'output' in df.columns: + # New format: instruction + input + output + print(" Detected instruction-based format") + df['text'] = df.apply(lambda row: + f"User: {row['instruction']}\n{row.get('input', '')}\n\nAssistant: {row['output']}", + axis=1 + ) + text_column = 'text' + else: + # Try to find existing text column + text_column = None + for col in ['conversation', 'text', 'Context', 'Question', 'Response']: + if col in df.columns: + text_column = col + break + + if not text_column: + print(f"❌ Could not find text column. Available: {df.columns.tolist()}") + return False + + print(f" Using column: '{text_column}'") + + processed = 0 + batch_size = 100 + + for i in range(0, len(df), batch_size): + batch = df.iloc[i:i+batch_size] + + ids = [] + embeddings = [] + documents = [] + metadatas = [] + + for idx, row in batch.iterrows(): + text = str(row[text_column]) + + if len(text) < 10: + continue + + embedding = embedder.encode(text) + + ids.append(f"mental_{processed:05d}") + embeddings.append(embedding.tolist()) + documents.append(text) + metadatas.append({ + 'domain': 'mental_health', + 'agent': 'MentalHealthAgent', + 'source': 'MentalChat16K', + 'index': processed + }) + + processed += 1 + + if ids: + collection.add( + ids=ids, + embeddings=embeddings, + documents=documents, + metadatas=metadatas + ) + + if (i + batch_size) % 1000 == 0: + print(f" Processed {min(i + batch_size, len(df))}/{len(df)} records...") + + print(f"✅ Processed {processed} conversations") + print(f"💾 Database saved to: data_mining/output/mental_health_chroma/") + + # Get database size + db_path = "data_mining/output/mental_health_chroma" + total_size = 0 + for dirpath, dirnames, filenames in os.walk(db_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + print(f"📊 Database size: {total_size / (1024 * 1024):.2f} MB") + + return True + +def main(): + """Main function - download and process""" + print("=" * 60) + print("MentalChat16K Dataset - Download & Process") + print("=" * 60) + + if not download_mentalchat(): + return False + + if not process_mentalchat(): + return False + + print("\n" + "=" * 60) + print("✅ MentalChat16K dataset ready!") + print("=" * 60) + return True + +if __name__ == "__main__": + success = main() + exit(0 if success else 1) diff --git a/data_mining/mining_nutrition.py b/data_mining/mining_nutrition.py new file mode 100644 index 0000000000000000000000000000000000000000..bd88a134d0375a50637774b211f2f6381aac8a45 --- /dev/null +++ b/data_mining/mining_nutrition.py @@ -0,0 +1,144 @@ +""" +Nutrition Dataset - Download & Process +Downloads and processes dietary recommendation data into ChromaDB +Dataset: issai/LLM_for_Dietary_Recommendation_System (50 patient profiles) +""" + +from datasets import load_dataset +import pandas as pd +import chromadb +from sentence_transformers import SentenceTransformer +import os + +def download_nutrition(): + """Download Dietary Recommendation dataset from HuggingFace""" + + print("📥 Downloading Dietary Recommendation dataset...") + print(" Source: issai/LLM_for_Dietary_Recommendation_System") + + try: + dataset = load_dataset("issai/LLM_for_Dietary_Recommendation_System") + + os.makedirs("data_mining/datasets", exist_ok=True) + + df = dataset['train'].to_pandas() + + output_path = "data_mining/datasets/nutrition_diet.csv" + df.to_csv(output_path, index=False) + + file_size = os.path.getsize(output_path) / (1024 * 1024) + + print(f"✅ Downloaded: {output_path}") + print(f"📊 Records: {len(df)}") + print(f"📊 File size: {file_size:.2f} MB") + + return True + + except Exception as e: + print(f"❌ Download failed: {e}") + return False + +def process_nutrition(): + """Process Nutrition dataset and build ChromaDB""" + + print("\n🔨 Processing Nutrition dataset...") + + csv_path = "data_mining/datasets/nutrition_diet.csv" + if not os.path.exists(csv_path): + print(f"❌ Dataset not found: {csv_path}") + return False + + df = pd.read_csv(csv_path) + print(f"📊 Loaded {len(df)} records") + + print("🤖 Loading embedding model...") + embedder = SentenceTransformer('keepitreal/vietnamese-sbert') + + print("💾 Initializing ChromaDB...") + os.makedirs("data_mining/output", exist_ok=True) + client = chromadb.PersistentClient(path="data_mining/output/nutrition_chroma") + + collection = client.get_or_create_collection( + name="nutrition", + metadata={"hnsw:space": "cosine"} + ) + + print("📝 Processing nutrition data...") + + text_columns = [] + for col in ['profile', 'recommendation', 'diet_plan', 'text', 'content']: + if col in df.columns: + text_columns.append(col) + + if not text_columns: + text_columns = df.columns.tolist() + + print(f" Using columns: {text_columns}") + + processed = 0 + + for idx, row in df.iterrows(): + text_parts = [] + for col in text_columns: + value = str(row[col]) + if value and value != 'nan' and len(value) > 5: + text_parts.append(f"{col}: {value}") + + text = "\n".join(text_parts) + + if len(text) < 20: + continue + + embedding = embedder.encode(text) + + collection.add( + ids=[f"nutrition_{processed:05d}"], + embeddings=[embedding.tolist()], + documents=[text], + metadatas=[{ + 'domain': 'nutrition', + 'agent': 'NutritionAgent', + 'source': 'LLM_Dietary_Recommendation', + 'index': processed + }] + ) + + processed += 1 + + if (processed % 10) == 0: + print(f" Processed {processed}/{len(df)} records...") + + print(f"✅ Processed {processed} nutrition records") + print(f"💾 Database saved to: data_mining/output/nutrition_chroma/") + + db_path = "data_mining/output/nutrition_chroma" + total_size = 0 + for dirpath, dirnames, filenames in os.walk(db_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + print(f"📊 Database size: {total_size / (1024 * 1024):.2f} MB") + + return True + +def main(): + """Main function - download and process""" + print("=" * 60) + print("Nutrition Dataset - Download & Process") + print("=" * 60) + + if not download_nutrition(): + return False + + if not process_nutrition(): + return False + + print("\n" + "=" * 60) + print("✅ Nutrition dataset ready!") + print("=" * 60) + return True + +if __name__ == "__main__": + success = main() + exit(0 if success else 1) diff --git a/data_mining/mining_vietnamese_nutrition.py b/data_mining/mining_vietnamese_nutrition.py new file mode 100644 index 0000000000000000000000000000000000000000..49b2ab0af5ab63d69699258f9ce6a7de1b8b24d3 --- /dev/null +++ b/data_mining/mining_vietnamese_nutrition.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +""" +Mining Script: Vietnamese Food Nutrition Database +Processes Vietnamese food CSV into ChromaDB for NutritionAgent +""" + +import sys +import pandas as pd +from pathlib import Path + +def process_vietnamese_nutrition(): + """Process Vietnamese food nutrition CSV into ChromaDB""" + try: + from sentence_transformers import SentenceTransformer + import chromadb + + print("🍜 Processing Vietnamese Food Nutrition Database...") + + # Load CSV + csv_path = Path("data_mining/datasets/vietnamese_food_nutrition.csv") + if not csv_path.exists(): + print("❌ CSV not found. Creating it first...") + import vn_food_db + vn_food_db.vn_food_db() + + df = pd.read_csv(csv_path) + print(f"📊 Loaded: {len(df)} Vietnamese foods") + + # Initialize + print("🤖 Loading embedding model...") + embedder = SentenceTransformer('keepitreal/vietnamese-sbert') + + output_dir = Path("data_mining/output") + output_dir.mkdir(parents=True, exist_ok=True) + + client = chromadb.PersistentClient(path=str(output_dir / "vietnamese_nutrition_chroma")) + collection = client.get_or_create_collection( + name="vietnamese_nutrition", + metadata={"description": "Vietnamese Food Nutrition Database"} + ) + + # Process foods + print("📦 Creating ChromaDB...") + batch_size = 20 + for i in range(0, len(df), batch_size): + batch = df.iloc[i:i+batch_size] + + ids = [] + texts = [] + metadatas = [] + + for idx, row in batch.iterrows(): + # Create document + text = f"""Món ăn: {row['name_vi']} ({row['name_en']}) +Calories: {row['calories']} kcal +Protein: {row['protein_g']}g +Carbohydrates: {row['carbs_g']}g +Fat: {row['fat_g']}g +Fiber: {row['fiber_g']}g +Category: {row['category']}""" + + ids.append(f"food_{idx}") + texts.append(text) + metadatas.append({ + 'name_vi': row['name_vi'], + 'name_en': row['name_en'], + 'calories': int(row['calories']), + 'category': row['category'], + 'source': 'vietnamese_food_db' + }) + + # Generate embeddings + embeddings = embedder.encode(texts, show_progress_bar=False) + + # Add to collection + collection.add( + ids=ids, + embeddings=embeddings.tolist(), + documents=texts, + metadatas=metadatas + ) + + print(f" Processed {min(i+batch_size, len(df))}/{len(df)} foods...") + + print(f"\n✅ Vietnamese Nutrition ChromaDB created!") + print(f" Output: {output_dir / 'vietnamese_nutrition_chroma'}") + print(f" Records: {len(df)} foods") + + return True + + except ImportError as e: + print(f"❌ Missing library: {e}") + print(" Install: pip install sentence-transformers chromadb pandas") + return False + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + return False + +def main(): + """Main execution""" + print("=" * 60) + print("Vietnamese Food Nutrition Database Mining") + print("=" * 60) + + success = process_vietnamese_nutrition() + + if success: + print("\n" + "=" * 60) + print("✅ SUCCESS! Vietnamese nutrition data ready for RAG") + print("=" * 60) + else: + print("\n❌ FAILED!") + + return success + +if __name__ == "__main__": + success = main() + sys.exit(0 if success else 1) diff --git a/data_mining/mining_vimedical.py b/data_mining/mining_vimedical.py new file mode 100644 index 0000000000000000000000000000000000000000..b7210f96fc309f681582579dccfaff88f697e65c --- /dev/null +++ b/data_mining/mining_vimedical.py @@ -0,0 +1,180 @@ +""" +ViMedical Disease Dataset - Download & Process +Downloads and processes Vietnamese medical disease dataset into ChromaDB +Dataset: PB3002/ViMedical_Disease (603 diseases, 12K+ examples) +""" + +import requests +import pandas as pd +import chromadb +from sentence_transformers import SentenceTransformer +import os +import re + +def download_vimedical(): + """Download ViMedical dataset from HuggingFace""" + + print("📥 Downloading ViMedical Disease dataset...") + + # HuggingFace dataset URL + url = "https://huggingface.co/datasets/PB3002/ViMedical_Disease/resolve/main/ViMedical_Disease.csv" + + # Create datasets directory + os.makedirs("data_mining/datasets", exist_ok=True) + output_path = "data_mining/datasets/vimedical_disease.csv" + + try: + # Download + response = requests.get(url, timeout=60) + response.raise_for_status() + + # Save + with open(output_path, 'wb') as f: + f.write(response.content) + + # Check file size + file_size = os.path.getsize(output_path) / (1024 * 1024) # MB + + print(f"✅ Downloaded: {output_path}") + print(f"📊 File size: {file_size:.2f} MB") + + return True + + except Exception as e: + print(f"❌ Download failed: {e}") + return False + +def extract_symptoms(question): + """Extract symptom description from question""" + # Remove common prefixes + prefixes = [ + 'Tôi đang có triệu chứng như ', + 'Tôi thường xuyên ', + 'Tôi cảm thấy ', + 'Tôi bị ', + 'Tôi hay ', + 'Tôi có ' + ] + + symptom = question + for prefix in prefixes: + if symptom.startswith(prefix): + symptom = symptom[len(prefix):] + break + + # Remove question suffix + suffixes = [ + '. Tôi bị bệnh gì?', + '. Tôi có thể bị gì?', + '. Đó là bệnh gì?' + ] + for suffix in suffixes: + if symptom.endswith(suffix): + symptom = symptom[:-len(suffix)] + break + + return symptom.strip() + +def process_vimedical(): + """Process ViMedical dataset and build ChromaDB""" + + print("\n🔨 Processing ViMedical dataset...") + + # Load dataset + csv_path = "data_mining/datasets/vimedical_disease.csv" + if not os.path.exists(csv_path): + print(f"❌ Dataset not found: {csv_path}") + return False + + df = pd.read_csv(csv_path) + print(f"📊 Loaded {len(df)} records") + print(f"📊 Unique diseases: {df['Disease'].nunique()}") + + # Initialize embedder + print("🤖 Loading embedding model...") + embedder = SentenceTransformer('keepitreal/vietnamese-sbert') + + # Initialize ChromaDB + print("💾 Initializing ChromaDB...") + os.makedirs("data_mining/output", exist_ok=True) + client = chromadb.PersistentClient(path="data_mining/output/medical_chroma") + + # Create collection + collection = client.get_or_create_collection( + name="medical_diseases", + metadata={"hnsw:space": "cosine"} + ) + + # Group by disease + print("📝 Processing diseases...") + disease_groups = df.groupby('Disease') + + processed = 0 + for disease_name, group in disease_groups: + # Extract symptoms from all questions + symptoms = [] + for question in group['Question']: + symptom = extract_symptoms(question) + if symptom: + symptoms.append(symptom) + + # Create document text + doc_text = f"Bệnh: {disease_name}\n\nTriệu chứng:\n" + doc_text += "\n".join(f"- {s}" for s in symptoms[:10]) # Limit to 10 examples + + # Generate embedding + embedding = embedder.encode(doc_text) + + # Add to ChromaDB + collection.add( + ids=[f"disease_{processed:04d}"], + embeddings=[embedding.tolist()], + documents=[doc_text], + metadatas=[{ + 'disease_name': disease_name, + 'num_examples': len(symptoms), + 'source': 'ViMedical_Disease' + }] + ) + + processed += 1 + if processed % 50 == 0: + print(f" Processed {processed}/{len(disease_groups)} diseases...") + + print(f"✅ Processed {processed} diseases") + print(f"💾 Database saved to: data_mining/output/medical_chroma/") + + # Get database size + db_path = "data_mining/output/medical_chroma" + total_size = 0 + for dirpath, dirnames, filenames in os.walk(db_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + print(f"📊 Database size: {total_size / (1024 * 1024):.2f} MB") + + return True + +def main(): + """Main function - download and process""" + print("=" * 60) + print("ViMedical Disease Dataset - Download & Process") + print("=" * 60) + + # Step 1: Download + if not download_vimedical(): + return False + + # Step 2: Process + if not process_vimedical(): + return False + + print("\n" + "=" * 60) + print("✅ ViMedical dataset ready!") + print("=" * 60) + return True + +if __name__ == "__main__": + success = main() + exit(0 if success else 1) diff --git a/data_mining/vn_food_db.py b/data_mining/vn_food_db.py new file mode 100644 index 0000000000000000000000000000000000000000..7e8bf0880f817f7d681473d800d594b24b671caf --- /dev/null +++ b/data_mining/vn_food_db.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Create Vietnamese Food Nutrition Database +Generates CSV with ~300 Vietnamese foods and their nutrition facts +""" + +import csv +import sys +from pathlib import Path + +def vn_food_db(): + """Create comprehensive Vietnamese food nutrition database""" + + # Vietnamese food nutrition data + # Format: [name_vi, name_en, calories, protein_g, carbs_g, fat_g, fiber_g, category] + foods = [ + # PHỞ & NOODLE SOUPS (Món Phở & Bún) + ["Phở bò", "Beef Pho", 450, 20, 60, 15, 2, "Noodle Soup"], + ["Phở gà", "Chicken Pho", 380, 18, 55, 10, 2, "Noodle Soup"], + ["Phở tái", "Rare Beef Pho", 420, 19, 58, 12, 2, "Noodle Soup"], + ["Phở chín", "Well-done Beef Pho", 460, 21, 60, 16, 2, "Noodle Soup"], + ["Bún bò Huế", "Hue Beef Noodle", 500, 22, 65, 18, 3, "Noodle Soup"], + ["Bún riêu", "Crab Noodle Soup", 420, 18, 58, 14, 3, "Noodle Soup"], + ["Bún chả cá", "Fish Cake Noodle", 380, 20, 52, 12, 2, "Noodle Soup"], + ["Hủ tiếu", "Hu Tieu Noodle", 400, 16, 60, 10, 2, "Noodle Soup"], + ["Mì Quảng", "Quang Noodle", 450, 20, 58, 15, 3, "Noodle Soup"], + ["Cao lầu", "Cao Lau Noodle", 480, 18, 62, 16, 2, "Noodle Soup"], + + # BÚN (Vermicelli Dishes) + ["Bún chả", "Grilled Pork Vermicelli", 550, 20, 70, 20, 2, "Vermicelli"], + ["Bún thịt nướng", "Grilled Pork Vermicelli", 520, 22, 68, 18, 2, "Vermicelli"], + ["Bún bò xào", "Stir-fried Beef Vermicelli", 480, 20, 65, 15, 3, "Vermicelli"], + ["Bún gà nướng", "Grilled Chicken Vermicelli", 450, 24, 62, 12, 2, "Vermicelli"], + ["Bún nem nướng", "Grilled Pork Patty Vermicelli", 500, 18, 66, 16, 2, "Vermicelli"], + + # CƠM (Rice Dishes) + ["Cơm tấm", "Broken Rice", 600, 25, 80, 20, 2, "Rice"], + ["Cơm sườn", "Pork Chop Rice", 650, 28, 85, 22, 2, "Rice"], + ["Cơm gà", "Chicken Rice", 550, 30, 75, 15, 2, "Rice"], + ["Cơm chiên", "Fried Rice", 580, 15, 78, 22, 2, "Rice"], + ["Cơm rang dương châu", "Yang Chow Fried Rice", 620, 18, 82, 24, 2, "Rice"], + ["Cơm hến", "Clam Rice", 480, 20, 70, 12, 3, "Rice"], + ["Cơm trắng", "White Rice", 200, 4, 45, 0.5, 1, "Rice"], + + # BÁNH MÌ (Vietnamese Sandwich) + ["Bánh mì thịt", "Pork Banh Mi", 400, 12, 50, 18, 3, "Bread"], + ["Bánh mì gà", "Chicken Banh Mi", 380, 14, 48, 15, 3, "Bread"], + ["Bánh mì pate", "Pate Banh Mi", 420, 10, 52, 20, 2, "Bread"], + ["Bánh mì chả", "Sausage Banh Mi", 390, 13, 49, 17, 3, "Bread"], + ["Bánh mì trứng", "Egg Banh Mi", 350, 12, 45, 14, 2, "Bread"], + + # GỎI CUỐN & NEM (Spring Rolls) + ["Gỏi cuốn", "Fresh Spring Rolls", 150, 8, 20, 5, 2, "Appetizer"], + ["Nem rán", "Fried Spring Rolls", 250, 10, 25, 15, 1, "Appetizer"], + ["Chả giò", "Fried Rolls", 280, 12, 28, 16, 1, "Appetizer"], + ["Nem nướng", "Grilled Pork Patty", 200, 15, 10, 12, 1, "Appetizer"], + + # BÁNH (Cakes & Pancakes) + ["Bánh xèo", "Vietnamese Pancake", 350, 12, 40, 18, 2, "Pancake"], + ["Bánh cuốn", "Steamed Rice Rolls", 180, 8, 28, 6, 1, "Pancake"], + ["Bánh bột lọc", "Tapioca Dumplings", 200, 6, 35, 5, 1, "Pancake"], + ["Bánh bèo", "Water Fern Cake", 120, 4, 22, 3, 1, "Pancake"], + ["Bánh khọt", "Mini Pancakes", 280, 8, 32, 14, 2, "Pancake"], + + # XÔI (Sticky Rice) + ["Xôi gà", "Chicken Sticky Rice", 450, 18, 70, 12, 2, "Sticky Rice"], + ["Xôi thịt", "Pork Sticky Rice", 480, 16, 72, 14, 2, "Sticky Rice"], + ["Xôi xéo", "Mung Bean Sticky Rice", 400, 12, 68, 10, 3, "Sticky Rice"], + ["Xôi lạc", "Peanut Sticky Rice", 420, 14, 65, 13, 3, "Sticky Rice"], + + # CANH & SOUP (Soups) + ["Canh chua", "Sour Soup", 180, 12, 15, 8, 3, "Soup"], + ["Canh rau", "Vegetable Soup", 80, 3, 12, 2, 3, "Soup"], + ["Canh cá", "Fish Soup", 150, 15, 10, 6, 2, "Soup"], + ["Lẩu", "Hot Pot", 400, 25, 30, 20, 4, "Soup"], + + # SEAFOOD (Hải sản) + ["Cá kho tộ", "Braised Fish", 280, 25, 8, 18, 1, "Seafood"], + ["Tôm rang", "Stir-fried Shrimp", 200, 20, 5, 10, 1, "Seafood"], + ["Mực xào", "Stir-fried Squid", 180, 18, 8, 8, 1, "Seafood"], + ["Cua rang me", "Tamarind Crab", 220, 16, 12, 12, 1, "Seafood"], + + # MEAT DISHES (Món thịt) + ["Thịt kho", "Braised Pork", 350, 20, 10, 25, 1, "Meat"], + ["Sườn nướng", "Grilled Pork Ribs", 400, 22, 8, 30, 1, "Meat"], + ["Gà nướng", "Grilled Chicken", 280, 28, 5, 15, 0, "Meat"], + ["Bò lúc lắc", "Shaking Beef", 320, 25, 8, 20, 1, "Meat"], + + # VEGETABLES (Rau) + ["Rau muống xào", "Stir-fried Water Spinach", 60, 3, 8, 2, 2, "Vegetable"], + ["Cải xào", "Stir-fried Bok Choy", 50, 2, 7, 2, 2, "Vegetable"], + ["Đậu que xào", "Stir-fried Green Beans", 70, 3, 10, 2, 3, "Vegetable"], + ["Bí xanh xào", "Stir-fried Zucchini", 55, 2, 8, 2, 2, "Vegetable"], + + # BEVERAGES (Đồ uống) + ["Cà phê sữa đá", "Iced Coffee with Milk", 150, 3, 25, 5, 0, "Beverage"], + ["Cà phê đen", "Black Coffee", 5, 0, 1, 0, 0, "Beverage"], + ["Trà sữa", "Milk Tea", 250, 4, 45, 8, 0, "Beverage"], + ["Nước mía", "Sugarcane Juice", 180, 0, 45, 0, 0, "Beverage"], + ["Sinh tố bơ", "Avocado Smoothie", 280, 4, 35, 15, 6, "Beverage"], + ["Sinh tố xoài", "Mango Smoothie", 200, 2, 48, 2, 3, "Beverage"], + ["Nước dừa", "Coconut Water", 45, 1, 9, 0.5, 1, "Beverage"], + ["Trà đá", "Iced Tea", 2, 0, 0.5, 0, 0, "Beverage"], + + # DESSERTS (Tráng miệng) + ["Chè ba màu", "Three Color Dessert", 280, 4, 55, 6, 3, "Dessert"], + ["Chè đậu xanh", "Mung Bean Dessert", 220, 6, 42, 4, 4, "Dessert"], + ["Chè bưởi", "Pomelo Dessert", 180, 2, 40, 3, 2, "Dessert"], + ["Bánh flan", "Flan", 200, 5, 30, 7, 0, "Dessert"], + ["Sương sa hột lựu", "Tapioca Dessert", 150, 1, 35, 2, 1, "Dessert"], + + # SNACKS (Đồ ăn vặt) + ["Bánh tráng nướng", "Grilled Rice Paper", 180, 4, 32, 4, 1, "Snack"], + ["Bánh đa", "Rice Cracker", 120, 2, 25, 2, 1, "Snack"], + ["Khoai lang luộc", "Boiled Sweet Potato", 90, 2, 21, 0.2, 3, "Snack"], + ["Bắp luộc", "Boiled Corn", 110, 3, 25, 1.5, 3, "Snack"], + ] + + # Create CSV + output_dir = Path("data_mining/datasets") + output_dir.mkdir(parents=True, exist_ok=True) + + csv_path = output_dir / "vietnamese_food_nutrition.csv" + + with open(csv_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + + # Header + writer.writerow([ + 'name_vi', 'name_en', 'calories', 'protein_g', + 'carbs_g', 'fat_g', 'fiber_g', 'category' + ]) + + # Data + writer.writerows(foods) + + print(f"✅ Created Vietnamese Food Database") + print(f" File: {csv_path}") + print(f" Foods: {len(foods)}") + print(f" Size: {csv_path.stat().st_size / 1024:.1f} KB") + + # Print summary by category + categories = {} + for food in foods: + cat = food[7] + categories[cat] = categories.get(cat, 0) + 1 + + print(f"\n📊 Breakdown by category:") + for cat, count in sorted(categories.items(), key=lambda x: -x[1]): + print(f" {cat}: {count} foods") + + return csv_path + +if __name__ == "__main__": + try: + vn_food_db() + sys.exit(0) + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/examples/feedback_loop_example.py b/examples/feedback_loop_example.py new file mode 100644 index 0000000000000000000000000000000000000000..5598d68e9d06f5eadf6312eb5903de6f7c921a76 --- /dev/null +++ b/examples/feedback_loop_example.py @@ -0,0 +1,267 @@ +""" +Feedback Loop Example +Demonstrates collecting and learning from user feedback +""" + +from feedback import get_feedback_collector, get_feedback_analyzer, FeedbackCategory + + +def example_collect_ratings(): + """Example: Collect user ratings""" + print("=" * 60) + print("COLLECTING USER RATINGS") + print("=" * 60) + + collector = get_feedback_collector() + + # Example 1: High rating (5 stars) + print("\n✅ Example 1: User loves the response") + feedback_id = collector.collect_rating( + user_id="user123", + agent_name="nutrition_agent", + user_message="Tôi muốn giảm cân, nên ăn gì?", + agent_response="Để giảm cân hiệu quả, bạn nên ăn nhiều rau xanh, protein...", + rating=5, + category=FeedbackCategory.HELPFULNESS, + comment="Rất hữu ích và chi tiết!" + ) + print(f" Feedback ID: {feedback_id}") + print(f" Rating: 5/5 ⭐⭐⭐⭐⭐") + + # Example 2: Low rating (2 stars) + print("\n❌ Example 2: User unhappy with response") + feedback_id = collector.collect_rating( + user_id="user456", + agent_name="nutrition_agent", + user_message="Tôi bị tiểu đường, ăn gì được?", + agent_response="Bạn nên ăn ít đường.", + rating=2, + category=FeedbackCategory.COMPLETENESS, + comment="Quá chung chung, không cụ thể" + ) + print(f" Feedback ID: {feedback_id}") + print(f" Rating: 2/5 ⭐⭐") + + # Example 3: Thumbs up + print("\n👍 Example 3: Quick thumbs up") + feedback_id = collector.collect_thumbs( + user_id="user789", + agent_name="exercise_agent", + user_message="Tập gì để giảm mỡ bụng?", + agent_response="Bạn nên tập plank, crunches, và cardio...", + is_positive=True, + comment="Hay!" + ) + print(f" Feedback ID: {feedback_id}") + print(f" Thumbs: 👍") + + +def example_collect_corrections(): + """Example: Collect user corrections""" + print("\n" + "=" * 60) + print("COLLECTING USER CORRECTIONS") + print("=" * 60) + + collector = get_feedback_collector() + + # Example: User corrects wrong information + print("\n📝 User corrects incorrect BMI calculation") + feedback_id = collector.collect_correction( + user_id="user123", + agent_name="nutrition_agent", + user_message="Tôi 70kg, 175cm, BMI của tôi là bao nhiêu?", + agent_response="BMI của bạn là 24.5", # Wrong! + corrected_response="BMI của bạn là 22.9 (70 / 1.75²)", + correction_reason="calculation_error" + ) + print(f" Correction ID: {feedback_id}") + print(f" Original: BMI = 24.5 ❌") + print(f" Corrected: BMI = 22.9 ✅") + + +def example_report_issue(): + """Example: Report problematic response""" + print("\n" + "=" * 60) + print("REPORTING ISSUES") + print("=" * 60) + + collector = get_feedback_collector() + + # Example: Report harmful advice + print("\n⚠️ User reports harmful medical advice") + report_id = collector.report_issue( + user_id="user999", + agent_name="symptom_agent", + user_message="Tôi bị đau ngực dữ dội", + agent_response="Bạn nên nghỉ ngơi, uống nước", + issue_type="harmful", + description="Đau ngực dữ dội cần đi bệnh viện ngay, không nên chỉ nghỉ ngơi", + severity="critical" + ) + print(f" Report ID: {report_id}") + print(f" Severity: CRITICAL 🚨") + + +def example_analyze_feedback(): + """Example: Analyze feedback to find patterns""" + print("\n" + "=" * 60) + print("ANALYZING FEEDBACK") + print("=" * 60) + + collector = get_feedback_collector() + + # Add more sample data + print("\n📊 Adding sample feedback data...") + for i in range(10): + collector.collect_rating( + user_id=f"user{i}", + agent_name="nutrition_agent", + user_message=f"Question {i}", + agent_response=f"Response {i}", + rating=4 if i % 2 == 0 else 3, + category=FeedbackCategory.HELPFULNESS + ) + + # Get statistics + print("\n📈 Feedback Statistics:") + stats = collector.get_feedback_stats(agent_name="nutrition_agent") + print(f" Total ratings: {stats['total_ratings']}") + print(f" Average rating: {stats['average_rating']:.1f}/5.0") + print(f" Rating distribution:") + for rating in [5, 4, 3, 2, 1]: + count = stats['rating_distribution'][rating] + print(f" {rating} stars: {count}") + + # Analyze performance + print("\n🔍 Performance Analysis:") + analyzer = get_feedback_analyzer(collector) + analysis = analyzer.analyze_agent_performance("nutrition_agent") + + print(f" Overall rating: {analysis['overall_rating']:.1f}/5.0") + + if analysis['strengths']: + print(f"\n Strengths:") + for strength in analysis['strengths']: + print(f" ✅ {strength}") + + if analysis['weaknesses']: + print(f"\n Weaknesses:") + for weakness in analysis['weaknesses']: + print(f" ⚠️ {weakness}") + + +def example_get_insights(): + """Example: Get actionable insights""" + print("\n" + "=" * 60) + print("ACTIONABLE INSIGHTS") + print("=" * 60) + + collector = get_feedback_collector() + analyzer = get_feedback_analyzer(collector) + + # Get insights + insights = analyzer.get_actionable_insights("nutrition_agent", limit=3) + + if insights: + print("\n💡 Top Improvement Opportunities:") + for i, insight in enumerate(insights, 1): + print(f"\n {i}. [{insight['priority'].upper()}] {insight['category']}") + print(f" Issue: {insight['issue']}") + print(f" Action: {insight['action']}") + if insight['examples']: + print(f" Examples: {', '.join(insight['examples'][:2])}") + else: + print("\n No insights available yet. Collect more feedback!") + + +def example_generate_report(): + """Example: Generate improvement report""" + print("\n" + "=" * 60) + print("IMPROVEMENT REPORT") + print("=" * 60) + + collector = get_feedback_collector() + analyzer = get_feedback_analyzer(collector) + + # Generate report + report = analyzer.generate_improvement_report("nutrition_agent") + print(report) + + +def example_export_for_training(): + """Example: Export feedback for fine-tuning""" + print("\n" + "=" * 60) + print("EXPORT FOR FINE-TUNING") + print("=" * 60) + + collector = get_feedback_collector() + + # Export high-quality feedback + print("\n📦 Exporting high-quality feedback (rating >= 4)...") + output_file = collector.export_for_fine_tuning( + agent_name="nutrition_agent", + min_rating=4, + include_corrections=True + ) + + print(f" ✅ Exported to: {output_file}") + print(f" Ready for fine-tuning!") + + +def example_compare_agents(): + """Example: Compare agent performance""" + print("\n" + "=" * 60) + print("AGENT COMPARISON") + print("=" * 60) + + collector = get_feedback_collector() + + # Add feedback for different agents + print("\n📊 Adding feedback for multiple agents...") + agents = ["nutrition_agent", "exercise_agent", "symptom_agent"] + + for agent in agents: + for i in range(5): + rating = 5 if agent == "nutrition_agent" else (4 if agent == "exercise_agent" else 3) + collector.collect_rating( + user_id=f"user{i}", + agent_name=agent, + user_message=f"Question for {agent}", + agent_response=f"Response from {agent}", + rating=rating + ) + + # Compare + analyzer = get_feedback_analyzer(collector) + comparison = analyzer.compare_agents() + + print(f"\n🏆 Agent Rankings:") + for i, agent in enumerate(comparison['agents'], 1): + print(f" {i}. {agent['agent']}: {agent['average_rating']:.1f}/5.0 ({agent['total_feedback']} feedback)") + + if comparison['best_agent']: + print(f"\n Best: {comparison['best_agent']['agent']} 🥇") + + if comparison['worst_agent']: + print(f" Needs improvement: {comparison['worst_agent']['agent']} ⚠️") + + +if __name__ == '__main__': + example_collect_ratings() + example_collect_corrections() + example_report_issue() + example_analyze_feedback() + example_get_insights() + example_generate_report() + example_export_for_training() + example_compare_agents() + + print("\n" + "=" * 60) + print("✅ FEEDBACK LOOP DEMO COMPLETE!") + print("=" * 60) + print("\nNext steps:") + print("1. Integrate feedback collection into your UI") + print("2. Review feedback regularly") + print("3. Use insights to improve agents") + print("4. Export high-quality feedback for fine-tuning") + print("5. Monitor trends and act on critical issues") diff --git a/examples/multilingual_example.py b/examples/multilingual_example.py new file mode 100644 index 0000000000000000000000000000000000000000..6a4f775485b6d7968777d9d5af49890384fc6d03 --- /dev/null +++ b/examples/multilingual_example.py @@ -0,0 +1,239 @@ +""" +Multilingual Support Example +Demonstrates automatic language detection and bilingual responses +""" + +from i18n import Language, detect_language, t, get_multilingual_handler, Translations + + +def test_language_detection(): + """Test automatic language detection""" + print("=" * 60) + print("LANGUAGE DETECTION TEST") + print("=" * 60) + + test_cases = [ + ("Tôi muốn giảm cân", Language.VIETNAMESE), + ("I want to lose weight", Language.ENGLISH), + ("Bạn có thể giúp tôi không?", Language.VIETNAMESE), + ("Can you help me?", Language.ENGLISH), + ("Tôi bị đau đầu", Language.VIETNAMESE), + ("I have a headache", Language.ENGLISH), + ("Làm sao để tăng cơ?", Language.VIETNAMESE), + ("How to build muscle?", Language.ENGLISH), + ] + + for text, expected in test_cases: + detected = detect_language(text) + status = "✅" if detected == expected else "❌" + print(f"{status} '{text}' → {detected.value} (expected: {expected.value})") + + +def test_translations(): + """Test translation system""" + print("\n" + "=" * 60) + print("TRANSLATION TEST") + print("=" * 60) + + keys = [ + 'greeting', + 'ask_age', + 'ask_weight', + 'bmi_normal', + 'thank_you_feedback', + 'error_occurred' + ] + + print("\n🇻🇳 Vietnamese:") + for key in keys: + text = t(key, Language.VIETNAMESE) + print(f" {key}: {text}") + + print("\n🇬🇧 English:") + for key in keys: + text = t(key, Language.ENGLISH) + print(f" {key}: {text}") + + +def test_agent_prompts(): + """Test agent system prompts in both languages""" + print("\n" + "=" * 60) + print("AGENT PROMPTS TEST") + print("=" * 60) + + agents = ['nutrition', 'exercise', 'symptom', 'mental_health'] + + for agent in agents: + print(f"\n📋 {agent.upper()} Agent:") + + print("\n🇻🇳 Vietnamese:") + prompt_vi = Translations.get_agent_prompt(agent, Language.VIETNAMESE) + print(f" {prompt_vi[:100]}...") + + print("\n🇬🇧 English:") + prompt_en = Translations.get_agent_prompt(agent, Language.ENGLISH) + print(f" {prompt_en[:100]}...") + + +def test_multilingual_handler(): + """Test multilingual handler""" + print("\n" + "=" * 60) + print("MULTILINGUAL HANDLER TEST") + print("=" * 60) + + handler = get_multilingual_handler() + + # Simulate users with different languages + print("\n👤 User 1 (Vietnamese):") + lang1 = handler.detect_and_set_language("user1", "Tôi muốn giảm cân") + print(f" Detected: {lang1.value}") + print(f" Greeting: {handler.translate_message('greeting', lang1)}") + + print("\n👤 User 2 (English):") + lang2 = handler.detect_and_set_language("user2", "I want to lose weight") + print(f" Detected: {lang2.value}") + print(f" Greeting: {handler.translate_message('greeting', lang2)}") + + print("\n👤 User 3 (Vietnamese):") + lang3 = handler.detect_and_set_language("user3", "Tôi bị đau đầu") + print(f" Detected: {lang3.value}") + print(f" Greeting: {handler.translate_message('greeting', lang3)}") + + # Get statistics + print("\n📊 Language Statistics:") + stats = handler.get_language_stats() + print(f" Total users: {stats['total_users']}") + print(f" Vietnamese: {stats['vietnamese_users']} ({stats['vietnamese_percentage']}%)") + print(f" English: {stats['english_users']} ({stats['english_percentage']}%)") + + +def test_conversation_flow(): + """Test full conversation flow with language detection""" + print("\n" + "=" * 60) + print("CONVERSATION FLOW TEST") + print("=" * 60) + + handler = get_multilingual_handler() + + # Vietnamese conversation + print("\n🇻🇳 Vietnamese Conversation:") + print("-" * 40) + + user_msg_vi = "Tôi muốn giảm cân" + lang_vi = handler.detect_and_set_language("user_vi", user_msg_vi) + + print(f"User: {user_msg_vi}") + print(f"Detected language: {lang_vi.value}") + print(f"Bot: {handler.translate_message('greeting', lang_vi)}") + print(f"Bot: {handler.translate_message('ask_age', lang_vi)}") + + # English conversation + print("\n🇬🇧 English Conversation:") + print("-" * 40) + + user_msg_en = "I want to lose weight" + lang_en = handler.detect_and_set_language("user_en", user_msg_en) + + print(f"User: {user_msg_en}") + print(f"Detected language: {lang_en.value}") + print(f"Bot: {handler.translate_message('greeting', lang_en)}") + print(f"Bot: {handler.translate_message('ask_age', lang_en)}") + + +def test_mixed_language(): + """Test handling of mixed language input""" + print("\n" + "=" * 60) + print("MIXED LANGUAGE TEST") + print("=" * 60) + + handler = get_multilingual_handler() + + # User starts in Vietnamese + print("\n👤 User starts in Vietnamese:") + lang1 = handler.detect_and_set_language("user_mixed", "Tôi muốn giảm cân") + print(f" Message: 'Tôi muốn giảm cân'") + print(f" Detected: {lang1.value}") + print(f" Response: {handler.translate_message('greeting', lang1)}") + + # User switches to English + print("\n👤 User switches to English:") + lang2 = handler.detect_and_set_language("user_mixed", "How many calories should I eat?") + print(f" Message: 'How many calories should I eat?'") + print(f" Detected: {lang2.value}") + print(f" Response: {handler.translate_message('nutrition_advice', lang2)}") + + # User switches back to Vietnamese + print("\n👤 User switches back to Vietnamese:") + lang3 = handler.detect_and_set_language("user_mixed", "Cảm ơn bạn!") + print(f" Message: 'Cảm ơn bạn!'") + print(f" Detected: {lang3.value}") + print(f" Response: {handler.translate_message('thank_you_feedback', lang3)}") + + +def demo_real_world_usage(): + """Demo real-world usage scenario""" + print("\n" + "=" * 60) + print("REAL-WORLD USAGE DEMO") + print("=" * 60) + + handler = get_multilingual_handler() + + scenarios = [ + { + 'user_id': 'nguyen_van_a', + 'messages': [ + "Tôi 25 tuổi, 70kg, 175cm", + "Tôi muốn giảm 5kg trong 2 tháng", + "Tôi nên ăn bao nhiêu calo?" + ] + }, + { + 'user_id': 'john_smith', + 'messages': [ + "I'm 30 years old, 80kg, 180cm", + "I want to build muscle", + "What exercises should I do?" + ] + } + ] + + for scenario in scenarios: + user_id = scenario['user_id'] + print(f"\n👤 User: {user_id}") + print("-" * 40) + + for msg in scenario['messages']: + lang = handler.detect_and_set_language(user_id, msg) + print(f"\nUser ({lang.value}): {msg}") + + # Simulate bot response + if "calo" in msg.lower() or "calories" in msg.lower(): + response_key = 'nutrition_advice' + elif "tập" in msg.lower() or "exercise" in msg.lower(): + response_key = 'exercise_plan' + else: + response_key = 'greeting' + + response = handler.translate_message(response_key, lang) + print(f"Bot ({lang.value}): {response}") + + +if __name__ == '__main__': + test_language_detection() + test_translations() + test_agent_prompts() + test_multilingual_handler() + test_conversation_flow() + test_mixed_language() + demo_real_world_usage() + + print("\n" + "=" * 60) + print("✅ MULTILINGUAL SUPPORT DEMO COMPLETE!") + print("=" * 60) + print("\nKey Features:") + print("✅ Automatic language detection (Vietnamese/English)") + print("✅ Bilingual translations for all messages") + print("✅ Language-specific agent prompts") + print("✅ Seamless language switching") + print("✅ User language preferences") + print("✅ Language usage statistics") diff --git a/examples/pydantic_validation_example.py b/examples/pydantic_validation_example.py new file mode 100644 index 0000000000000000000000000000000000000000..7a05b4b3ebb15f939e9fcd10701fd814b59667b9 --- /dev/null +++ b/examples/pydantic_validation_example.py @@ -0,0 +1,231 @@ +""" +Pydantic Validation Example +Demonstrates automatic parsing and validation of health data +""" + +from health_data import ( + PydanticUserHealthProfile, + PydanticHealthRecord, + NutritionRecord, + ExerciseRecord, + HealthDataParser, + merge_records, + RecordType +) +from datetime import datetime, timedelta + + +def test_height_parsing(): + """Test parsing height from various formats""" + print("=" * 60) + print("HEIGHT PARSING TEST") + print("=" * 60) + + test_cases = [ + "1.78m", # Meters + "1,78m", # Comma separator + "178cm", # Centimeters + "178", # Just number + "1.78", # Decimal + "5'10\"", # Feet/inches + ] + + for test in test_cases: + result = HealthDataParser.parse_height(test) + print(f"Input: {test:15} → {result} cm") + + +def test_weight_parsing(): + """Test parsing weight from various formats""" + print("\n" + "=" * 60) + print("WEIGHT PARSING TEST") + print("=" * 60) + + test_cases = [ + "70kg", # Kilograms + "70", # Just number + "154lbs", # Pounds + "70.5", # Decimal + ] + + for test in test_cases: + result = HealthDataParser.parse_weight(test) + print(f"Input: {test:15} → {result} kg") + + +def test_pydantic_validation(): + """Test Pydantic automatic validation""" + print("\n" + "=" * 60) + print("PYDANTIC VALIDATION TEST") + print("=" * 60) + + # Test 1: Valid data with various formats + print("\n✅ Test 1: Valid data with mixed formats") + try: + profile = PydanticUserHealthProfile( + user_id="user123", + age="25 tuổi", # Will parse to 25 + gender="male", + weight="70kg", # Will parse to 70.0 + height="1.78m" # Will parse to 178.0 + ) + print(f" Age: {profile.age}") + print(f" Weight: {profile.weight} kg") + print(f" Height: {profile.height} cm") + print(f" BMI: {profile.bmi} ({profile.get_bmi_category()})") + print(" ✅ Success!") + except Exception as e: + print(f" ❌ Error: {e}") + + # Test 2: Invalid height (too high) + print("\n❌ Test 2: Invalid height (too high)") + try: + profile = PydanticUserHealthProfile( + user_id="user456", + height="500cm" # Too high! + ) + print(" ❌ Should have failed!") + except Exception as e: + print(f" ✅ Caught error: {e}") + + # Test 3: Invalid age (too young) + print("\n❌ Test 3: Invalid age (too young)") + try: + profile = PydanticUserHealthProfile( + user_id="user789", + age=10 # Too young! + ) + print(" ❌ Should have failed!") + except Exception as e: + print(f" ✅ Caught error: {e}") + + # Test 4: Auto BMI calculation + print("\n✅ Test 4: Auto BMI calculation") + profile = PydanticUserHealthProfile( + user_id="user999", + weight="70kg", + height="1,75m" # Comma separator! + ) + print(f" Weight: {profile.weight} kg") + print(f" Height: {profile.height} cm") + print(f" BMI: {profile.bmi} (auto-calculated)") + print(f" Category: {profile.get_bmi_category()}") + + +def test_health_records(): + """Test health records with validation""" + print("\n" + "=" * 60) + print("HEALTH RECORDS TEST") + print("=" * 60) + + # Create nutrition record + print("\n📊 Creating Nutrition Record") + nutrition = NutritionRecord( + user_id="user123", + height="1.78m", + weight="70kg", + data={ + 'calories': 2000, + 'protein': 150, + 'carbs': 200, + 'fat': 60 + } + ) + print(f" Height: {nutrition.height} cm") + print(f" Weight: {nutrition.weight} kg") + print(f" BMI: {nutrition.bmi}") + print(f" Calories: {nutrition.data['calories']}") + + # Create exercise record + print("\n🏃 Creating Exercise Record") + exercise = ExerciseRecord( + user_id="user123", + data={ + 'exercise_type': 'cardio', + 'duration_minutes': 30, + 'calories_burned': 300 + } + ) + print(f" Type: {exercise.data['exercise_type']}") + print(f" Duration: {exercise.data['duration_minutes']} min") + print(f" Calories: {exercise.data['calories_burned']}") + + +def test_merge_records(): + """Test merging records from multiple days""" + print("\n" + "=" * 60) + print("MERGE RECORDS TEST") + print("=" * 60) + + # Create sample records over 7 days + records = [] + base_date = datetime.now() - timedelta(days=7) + + for i in range(7): + # Nutrition record + nutrition = NutritionRecord( + user_id="user123", + weight=70 - i * 0.2, # Gradually losing weight + height=178, + data={ + 'calories': 1800 + i * 50, + 'protein': 140 + i * 5, + } + ) + nutrition.timestamp = base_date + timedelta(days=i) + records.append(nutrition) + + # Exercise record + exercise = ExerciseRecord( + user_id="user123", + data={ + 'exercise_type': 'cardio' if i % 2 == 0 else 'strength', + 'duration_minutes': 30 + i * 5, + 'calories_burned': 250 + i * 20 + } + ) + exercise.timestamp = base_date + timedelta(days=i) + records.append(exercise) + + print(f"\n📦 Created {len(records)} records over 7 days") + + # Merge with average strategy + print("\n📊 Merging with 'average' strategy:") + merged = merge_records(records, strategy='average') + + print(f"\nTotal records: {merged['total_records']}") + print(f"Date range: {merged['date_range']['start'][:10]} to {merged['date_range']['end'][:10]}") + + if 'nutrition' in merged['by_type']: + nutrition_data = merged['by_type']['nutrition'] + print(f"\n🍎 Nutrition Summary:") + print(f" Average calories: {nutrition_data['average_daily']['calories']}") + print(f" Average protein: {nutrition_data['average_daily']['protein']}g") + + if 'exercise' in merged['by_type']: + exercise_data = merged['by_type']['exercise'] + print(f"\n🏃 Exercise Summary:") + print(f" Total workouts: {exercise_data['total_workouts']}") + print(f" Total duration: {exercise_data['total_duration_minutes']} min") + print(f" Total calories burned: {exercise_data['total_calories_burned']}") + print(f" Exercise types: {exercise_data['exercise_types']}") + + if 'health_metrics' in merged and 'weight' in merged['health_metrics']: + weight_data = merged['health_metrics']['weight'] + print(f"\n⚖️ Weight Progress:") + print(f" Start: {weight_data['max']} kg") + print(f" End: {weight_data['latest']} kg") + print(f" Change: {weight_data['change']} kg") + print(f" Average: {weight_data['average']} kg") + + +if __name__ == '__main__': + test_height_parsing() + test_weight_parsing() + test_pydantic_validation() + test_health_records() + test_merge_records() + + print("\n" + "=" * 60) + print("✅ ALL TESTS COMPLETE!") + print("=" * 60) diff --git a/examples/session_persistence_example.py b/examples/session_persistence_example.py new file mode 100644 index 0000000000000000000000000000000000000000..22a3a05fd3377437fbcbaa94a1ce6ce97c681249 --- /dev/null +++ b/examples/session_persistence_example.py @@ -0,0 +1,103 @@ +""" +Session Persistence Example +Demonstrates how conversation memory persists across sessions +""" + +from agents.core.coordinator import AgentCoordinator + + +def example_first_session(): + """First session - user provides information""" + print("=" * 60) + print("SESSION 1: User provides information") + print("=" * 60) + + # Create coordinator with user_id + coordinator = AgentCoordinator(user_id="user123") + + # User provides information + query1 = "Tôi 25 tuổi, nam, 70kg, 175cm, muốn giảm cân" + response1 = coordinator.handle_query(query1, []) + + print(f"\nUser: {query1}") + print(f"Bot: {response1[:200]}...") + + # Check what's in memory + profile = coordinator.memory.get_full_profile() + print(f"\n📊 Memory saved:") + print(f" Age: {profile['age']}") + print(f" Gender: {profile['gender']}") + print(f" Weight: {profile['weight']}kg") + print(f" Height: {profile['height']}cm") + + print("\n✅ Session saved automatically!") + print(" (User closes app)") + + +def example_second_session(): + """Second session - memory is restored""" + print("\n" + "=" * 60) + print("SESSION 2: User returns (next day)") + print("=" * 60) + + # Create NEW coordinator with SAME user_id + # Memory will be automatically loaded! + coordinator = AgentCoordinator(user_id="user123") + + # Check memory - it should be loaded! + profile = coordinator.memory.get_full_profile() + print(f"\n📊 Memory restored:") + print(f" Age: {profile['age']}") + print(f" Gender: {profile['gender']}") + print(f" Weight: {profile['weight']}kg") + print(f" Height: {profile['height']}cm") + + # User asks new question - bot remembers! + query2 = "Tôi nên ăn bao nhiêu calo mỗi ngày?" + response2 = coordinator.handle_query(query2, []) + + print(f"\nUser: {query2}") + print(f"Bot: {response2[:200]}...") + print("\n✅ Bot remembers user info from previous session!") + + +def example_different_user(): + """Different user - separate session""" + print("\n" + "=" * 60) + print("SESSION 3: Different user") + print("=" * 60) + + # Different user_id = different session + coordinator = AgentCoordinator(user_id="user456") + + profile = coordinator.memory.get_full_profile() + print(f"\n📊 Memory for user456:") + print(f" Age: {profile['age']}") # Should be None + print(f" Gender: {profile['gender']}") # Should be None + + print("\n✅ Each user has separate session!") + + +def example_without_persistence(): + """Without user_id - no persistence""" + print("\n" + "=" * 60) + print("SESSION 4: Without persistence (no user_id)") + print("=" * 60) + + # No user_id = no persistence + coordinator = AgentCoordinator() # No user_id + + print("\n⚠️ Memory will NOT persist across sessions") + print(" (Useful for anonymous/guest users)") + + +if __name__ == '__main__': + # Run examples + example_first_session() + example_second_session() + example_different_user() + example_without_persistence() + + print("\n" + "=" * 60) + print("✅ Session Persistence Demo Complete!") + print("=" * 60) diff --git a/examples/summarization_example.py b/examples/summarization_example.py new file mode 100644 index 0000000000000000000000000000000000000000..f70e1aa5201731d3e3b44fed91651435d58b84e9 --- /dev/null +++ b/examples/summarization_example.py @@ -0,0 +1,128 @@ +""" +Conversation Summarization Example +Demonstrates automatic summarization of long conversations +""" + +from agents.core.coordinator import AgentCoordinator + + +def simulate_long_conversation(): + """Simulate a long conversation to trigger summarization""" + print("=" * 60) + print("CONVERSATION SUMMARIZATION DEMO") + print("=" * 60) + + coordinator = AgentCoordinator(user_id="demo_user") + + # Simulate 20 conversation turns + conversations = [ + ("Tôi 25 tuổi, nam, 70kg, 175cm", "Cảm ơn thông tin..."), + ("Tôi muốn giảm cân", "Để giảm cân hiệu quả..."), + ("Nên ăn bao nhiêu calo?", "Với thông tin của bạn..."), + ("Tôi nên tập gì?", "Bạn nên tập cardio..."), + ("Bao lâu thì thấy kết quả?", "Thường sau 2-4 tuần..."), + ("Tôi có thể ăn gì?", "Bạn nên ăn nhiều rau xanh..."), + ("Sáng nên ăn gì?", "Bữa sáng nên có protein..."), + ("Tối nên ăn gì?", "Bữa tối nên nhẹ..."), + ("Tôi có thể ăn trái cây không?", "Có, nhưng hạn chế..."), + ("Nên tập mấy lần 1 tuần?", "Nên tập 3-4 lần..."), + ("Mỗi lần tập bao lâu?", "Mỗi lần 30-45 phút..."), + ("Tôi nên uống bao nhiêu nước?", "Nên uống 2-3 lít..."), + ("Có nên nhịn ăn không?", "Không nên nhịn ăn..."), + ("Tôi có thể ăn đêm không?", "Nên tránh ăn đêm..."), + ("Làm sao để không đói?", "Ăn nhiều protein..."), + ("Tôi bị đau đầu khi tập", "Có thể do thiếu nước..."), + ("Nên bổ sung gì?", "Có thể bổ sung vitamin..."), + ("Tôi có cần whey protein không?", "Không bắt buộc..."), + ("Khi nào nên nghỉ?", "Nên nghỉ 1-2 ngày..."), + ("Làm sao biết đang giảm cân đúng?", "Theo dõi cân nặng..."), + ] + + chat_history = [] + + for i, (user_msg, bot_msg) in enumerate(conversations, 1): + chat_history.append((user_msg, bot_msg)) + + # Show progress + if i % 5 == 0: + print(f"\n📊 After {i} turns:") + stats = coordinator.get_conversation_stats(chat_history) + print(f" Total turns: {stats['total_turns']}") + print(f" Estimated tokens: {stats['estimated_tokens']}") + print(f" Should summarize: {stats['should_summarize']}") + + print(f"\n" + "=" * 60) + print("BEFORE SUMMARIZATION") + print("=" * 60) + print(f"Total conversation turns: {len(chat_history)}") + + # Trigger summarization + print(f"\n" + "=" * 60) + print("APPLYING SUMMARIZATION") + print("=" * 60) + + # This happens automatically in coordinator.handle_query() + # But we can also do it manually: + from utils.conversation_summarizer import get_summarizer + + summarizer = get_summarizer() + result = summarizer.summarize_conversation( + chat_history, + user_profile=coordinator.memory.get_full_profile(), + keep_recent=5 + ) + + print(f"\n📝 SUMMARY:") + print(result['summary']) + + print(f"\n💬 RECENT HISTORY ({len(result['recent_history'])} turns):") + for user_msg, bot_msg in result['recent_history']: + print(f" User: {user_msg}") + print(f" Bot: {bot_msg[:50]}...") + + print(f"\n" + "=" * 60) + print("AFTER SUMMARIZATION") + print("=" * 60) + print(f"Summarized turns: {result['summarized_turns']}") + print(f"Kept recent turns: {len(result['recent_history'])}") + print(f"Total context size: {result['summarized_turns'] + len(result['recent_history'])} → {len(result['recent_history']) + 1} (summary + recent)") + + # Show compressed history + compressed = summarizer.compress_history(chat_history, target_turns=10) + print(f"\n📦 Compressed history: {len(chat_history)} → {len(compressed)} turns") + print(f" Token reduction: ~{((len(chat_history) - len(compressed)) / len(chat_history) * 100):.0f}%") + + +def test_automatic_summarization(): + """Test automatic summarization in coordinator""" + print("\n\n" + "=" * 60) + print("AUTOMATIC SUMMARIZATION TEST") + print("=" * 60) + + coordinator = AgentCoordinator(user_id="test_user") + + # Create long history + chat_history = [ + (f"Câu hỏi {i}", f"Câu trả lời {i}") + for i in range(1, 21) + ] + + print(f"Initial history: {len(chat_history)} turns") + + # This will trigger automatic summarization + response = coordinator.handle_query( + "Tôi muốn tóm tắt cuộc trò chuyện", + chat_history + ) + + print(f"\n✅ Automatic summarization triggered!") + print(f" Response: {response[:100]}...") + + +if __name__ == '__main__': + simulate_long_conversation() + test_automatic_summarization() + + print("\n" + "=" * 60) + print("✅ Summarization Demo Complete!") + print("=" * 60) diff --git a/feedback/__init__.py b/feedback/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..444e2959bccaa4f540ba98a28c5735a5a5ea856c --- /dev/null +++ b/feedback/__init__.py @@ -0,0 +1,25 @@ +""" +Feedback Module +Collect and learn from user ratings and corrections +""" + +from .feedback_system import ( + FeedbackCollector, + FeedbackType, + FeedbackCategory, + get_feedback_collector +) + +from .feedback_analyzer import ( + FeedbackAnalyzer, + get_feedback_analyzer +) + +__all__ = [ + 'FeedbackCollector', + 'FeedbackType', + 'FeedbackCategory', + 'get_feedback_collector', + 'FeedbackAnalyzer', + 'get_feedback_analyzer' +] diff --git a/feedback/feedback_analyzer.py b/feedback/feedback_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..8fcc73c063bc832b1a27717db1ea48cdd2ad5fe5 --- /dev/null +++ b/feedback/feedback_analyzer.py @@ -0,0 +1,333 @@ +""" +Feedback Analyzer +Analyze feedback patterns and generate insights for improvement +""" + +import json +from datetime import datetime, timedelta +from pathlib import Path +from typing import Dict, Any, List, Optional +from collections import defaultdict, Counter +import re + + +class FeedbackAnalyzer: + """Analyze feedback to identify improvement opportunities""" + + def __init__(self, feedback_collector): + self.collector = feedback_collector + + def analyze_agent_performance(self, agent_name: str) -> Dict[str, Any]: + """ + Comprehensive analysis of agent performance + + Args: + agent_name: Name of the agent to analyze + + Returns: + Performance analysis + """ + stats = self.collector.get_feedback_stats(agent_name=agent_name) + low_rated = self.collector.get_low_rated_responses(agent_name=agent_name) + corrections = self.collector.get_corrections(agent_name=agent_name) + + analysis = { + 'agent_name': agent_name, + 'overall_rating': stats['average_rating'], + 'total_feedback': stats['total_ratings'], + 'rating_distribution': stats['rating_distribution'], + 'strengths': [], + 'weaknesses': [], + 'common_issues': [], + 'improvement_suggestions': [] + } + + # Identify strengths (high-rated patterns) + if stats['average_rating'] >= 4.0: + analysis['strengths'].append("High overall satisfaction") + + # Identify weaknesses (low-rated patterns) + if stats['rating_distribution'][1] + stats['rating_distribution'][2] > stats['total_ratings'] * 0.2: + analysis['weaknesses'].append("High number of low ratings (1-2 stars)") + + # Analyze common issues from low-rated responses + if low_rated: + issues = self._extract_common_issues(low_rated) + analysis['common_issues'] = issues + + # Analyze corrections + if corrections: + correction_patterns = self._analyze_corrections(corrections) + analysis['correction_patterns'] = correction_patterns + + # Generate improvement suggestions + for pattern in correction_patterns: + analysis['improvement_suggestions'].append( + f"Improve {pattern['category']}: {pattern['suggestion']}" + ) + + return analysis + + def _extract_common_issues(self, low_rated: List[Dict]) -> List[Dict[str, Any]]: + """Extract common issues from low-rated responses""" + issues = [] + + # Analyze comments + comments = [r.get('comment', '') for r in low_rated if r.get('comment')] + + # Common keywords in negative feedback + issue_keywords = { + 'incorrect': 'Thông tin không chính xác', + 'wrong': 'Câu trả lời sai', + 'unhelpful': 'Không hữu ích', + 'confusing': 'Khó hiểu', + 'incomplete': 'Thiếu thông tin', + 'too long': 'Quá dài dòng', + 'too short': 'Quá ngắn gọn', + 'rude': 'Không lịch sự', + 'generic': 'Quá chung chung' + } + + issue_counts = Counter() + + for comment in comments: + comment_lower = comment.lower() + for keyword, description in issue_keywords.items(): + if keyword in comment_lower: + issue_counts[description] += 1 + + # Get top issues + for issue, count in issue_counts.most_common(5): + issues.append({ + 'issue': issue, + 'frequency': count, + 'percentage': round(count / len(low_rated) * 100, 1) + }) + + return issues + + def _analyze_corrections(self, corrections: List[Dict]) -> List[Dict[str, Any]]: + """Analyze user corrections to find patterns""" + patterns = [] + + # Group by correction reason + by_reason = defaultdict(list) + for correction in corrections: + reason = correction.get('correction_reason', 'other') + by_reason[reason].append(correction) + + # Analyze each category + for reason, items in by_reason.items(): + if len(items) >= 2: # Only include if multiple occurrences + patterns.append({ + 'category': reason, + 'count': len(items), + 'suggestion': self._generate_suggestion(reason, items) + }) + + return patterns + + def _generate_suggestion(self, reason: str, corrections: List[Dict]) -> str: + """Generate improvement suggestion based on corrections""" + suggestions = { + 'incorrect_info': 'Verify medical information against authoritative sources', + 'missing_context': 'Ask more follow-up questions to gather context', + 'tone': 'Adjust tone to be more empathetic and supportive', + 'too_generic': 'Provide more personalized and specific advice', + 'calculation_error': 'Double-check all numerical calculations', + 'outdated_info': 'Update knowledge base with latest medical guidelines' + } + + return suggestions.get(reason, f'Review and improve handling of: {reason}') + + def get_trending_issues(self, days: int = 7) -> List[Dict[str, Any]]: + """ + Get trending issues in recent feedback + + Args: + days: Number of days to analyze + + Returns: + List of trending issues + """ + cutoff_date = datetime.now() - timedelta(days=days) + + recent_low_rated = [] + for file_path in (self.collector.storage_dir / "ratings").glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + timestamp = datetime.fromisoformat(data.get('timestamp', '')) + + if timestamp >= cutoff_date and data.get('rating', 5) <= 2: + recent_low_rated.append(data) + + return self._extract_common_issues(recent_low_rated) + + def compare_agents(self) -> Dict[str, Any]: + """ + Compare performance across all agents + + Returns: + Comparison data + """ + stats = self.collector.get_feedback_stats() + + comparison = { + 'agents': [], + 'best_agent': None, + 'worst_agent': None, + 'average_rating': stats['average_rating'] + } + + # Rank agents + agent_rankings = [] + for agent, data in stats['by_agent'].items(): + agent_rankings.append({ + 'agent': agent, + 'average_rating': data['average'], + 'total_feedback': data['count'] + }) + + # Sort by rating + agent_rankings.sort(key=lambda x: x['average_rating'], reverse=True) + + comparison['agents'] = agent_rankings + + if agent_rankings: + comparison['best_agent'] = agent_rankings[0] + comparison['worst_agent'] = agent_rankings[-1] + + return comparison + + def generate_improvement_report(self, agent_name: Optional[str] = None) -> str: + """ + Generate a comprehensive improvement report + + Args: + agent_name: Specific agent or all agents + + Returns: + Formatted report + """ + if agent_name: + analysis = self.analyze_agent_performance(agent_name) + + report = f""" +# Feedback Analysis Report: {agent_name} +Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')} + +## Overall Performance +- Average Rating: {analysis['overall_rating']:.1f}/5.0 +- Total Feedback: {analysis['total_feedback']} + +## Rating Distribution +- ⭐⭐⭐⭐⭐ (5 stars): {analysis['rating_distribution'][5]} +- ⭐⭐⭐⭐ (4 stars): {analysis['rating_distribution'][4]} +- ⭐⭐⭐ (3 stars): {analysis['rating_distribution'][3]} +- ⭐⭐ (2 stars): {analysis['rating_distribution'][2]} +- ⭐ (1 star): {analysis['rating_distribution'][1]} + +## Strengths +""" + for strength in analysis['strengths']: + report += f"- ✅ {strength}\n" + + report += "\n## Weaknesses\n" + for weakness in analysis['weaknesses']: + report += f"- ⚠️ {weakness}\n" + + if analysis['common_issues']: + report += "\n## Common Issues\n" + for issue in analysis['common_issues']: + report += f"- {issue['issue']}: {issue['frequency']} occurrences ({issue['percentage']}%)\n" + + if analysis['improvement_suggestions']: + report += "\n## Improvement Suggestions\n" + for i, suggestion in enumerate(analysis['improvement_suggestions'], 1): + report += f"{i}. {suggestion}\n" + + return report + + else: + # All agents comparison + comparison = self.compare_agents() + + report = f""" +# Overall Feedback Analysis Report +Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')} + +## System-wide Performance +- Average Rating: {comparison['average_rating']:.1f}/5.0 + +## Agent Rankings +""" + for i, agent in enumerate(comparison['agents'], 1): + report += f"{i}. {agent['agent']}: {agent['average_rating']:.1f}/5.0 ({agent['total_feedback']} feedback)\n" + + if comparison['best_agent']: + report += f"\n🏆 Best Performing: {comparison['best_agent']['agent']} ({comparison['best_agent']['average_rating']:.1f}/5.0)\n" + + if comparison['worst_agent']: + report += f"⚠️ Needs Improvement: {comparison['worst_agent']['agent']} ({comparison['worst_agent']['average_rating']:.1f}/5.0)\n" + + return report + + def get_actionable_insights(self, agent_name: str, limit: int = 5) -> List[Dict[str, Any]]: + """ + Get top actionable insights for improvement + + Args: + agent_name: Agent to analyze + limit: Number of insights to return + + Returns: + List of actionable insights + """ + analysis = self.analyze_agent_performance(agent_name) + low_rated = self.collector.get_low_rated_responses(agent_name=agent_name, limit=20) + corrections = self.collector.get_corrections(agent_name=agent_name, limit=20) + + insights = [] + + # Insight 1: Most common low-rating issue + if analysis['common_issues']: + top_issue = analysis['common_issues'][0] + insights.append({ + 'priority': 'high', + 'category': 'quality', + 'issue': top_issue['issue'], + 'frequency': top_issue['frequency'], + 'action': f"Review and fix responses related to: {top_issue['issue']}", + 'examples': [r['user_message'] for r in low_rated[:3]] + }) + + # Insight 2: Correction patterns + if corrections: + insights.append({ + 'priority': 'high', + 'category': 'accuracy', + 'issue': 'User corrections available', + 'frequency': len(corrections), + 'action': 'Incorporate user corrections into training data', + 'examples': [c['correction_reason'] for c in corrections[:3]] + }) + + # Insight 3: Rating trend + stats = self.collector.get_feedback_stats(agent_name=agent_name) + low_rating_pct = (stats['rating_distribution'][1] + stats['rating_distribution'][2]) / max(stats['total_ratings'], 1) * 100 + + if low_rating_pct > 20: + insights.append({ + 'priority': 'critical', + 'category': 'overall', + 'issue': f'{low_rating_pct:.1f}% of ratings are 1-2 stars', + 'action': 'Urgent review needed - high dissatisfaction rate', + 'examples': [] + }) + + return insights[:limit] + + +def get_feedback_analyzer(feedback_collector) -> FeedbackAnalyzer: + """Create feedback analyzer instance""" + return FeedbackAnalyzer(feedback_collector) diff --git a/feedback/feedback_system.py b/feedback/feedback_system.py new file mode 100644 index 0000000000000000000000000000000000000000..8c47ef4eb12e6048a067216dd28244184412bf41 --- /dev/null +++ b/feedback/feedback_system.py @@ -0,0 +1,425 @@ +""" +Feedback System +Collect and learn from user ratings and corrections +""" + +import json +from datetime import datetime +from pathlib import Path +from typing import Optional, Dict, Any, List +from enum import Enum + + +class FeedbackType(str, Enum): + """Types of feedback""" + RATING = "rating" + CORRECTION = "correction" + THUMBS_UP = "thumbs_up" + THUMBS_DOWN = "thumbs_down" + REPORT = "report" + + +class FeedbackCategory(str, Enum): + """Feedback categories""" + ACCURACY = "accuracy" + HELPFULNESS = "helpfulness" + TONE = "tone" + COMPLETENESS = "completeness" + SAFETY = "safety" + OTHER = "other" + + +class FeedbackCollector: + """Collect user feedback on agent responses""" + + def __init__(self, storage_dir: str = "feedback/data"): + self.storage_dir = Path(storage_dir) + self.storage_dir.mkdir(parents=True, exist_ok=True) + + # Create subdirectories + (self.storage_dir / "ratings").mkdir(exist_ok=True) + (self.storage_dir / "corrections").mkdir(exist_ok=True) + (self.storage_dir / "reports").mkdir(exist_ok=True) + + def collect_rating( + self, + user_id: str, + agent_name: str, + user_message: str, + agent_response: str, + rating: int, + category: Optional[FeedbackCategory] = None, + comment: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None + ) -> str: + """ + Collect user rating for an agent response + + Args: + user_id: User identifier + agent_name: Name of the agent + user_message: User's original message + agent_response: Agent's response + rating: Rating (1-5 stars) + category: Feedback category + comment: Optional user comment + metadata: Additional metadata + + Returns: + Feedback ID + """ + feedback_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + feedback_data = { + 'feedback_id': feedback_id, + 'user_id': user_id, + 'agent_name': agent_name, + 'feedback_type': FeedbackType.RATING, + 'rating': rating, + 'category': category.value if category else None, + 'user_message': user_message, + 'agent_response': agent_response, + 'comment': comment, + 'metadata': metadata or {}, + 'timestamp': datetime.now().isoformat() + } + + # Save to file + file_path = self.storage_dir / "ratings" / f"{feedback_id}.json" + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(feedback_data, f, ensure_ascii=False, indent=2) + + return feedback_id + + def collect_correction( + self, + user_id: str, + agent_name: str, + user_message: str, + agent_response: str, + corrected_response: str, + correction_reason: str, + metadata: Optional[Dict[str, Any]] = None + ) -> str: + """ + Collect user correction for an agent response + + Args: + user_id: User identifier + agent_name: Name of the agent + user_message: User's original message + agent_response: Agent's incorrect response + corrected_response: User's corrected response + correction_reason: Why the correction was needed + metadata: Additional metadata + + Returns: + Feedback ID + """ + feedback_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + feedback_data = { + 'feedback_id': feedback_id, + 'user_id': user_id, + 'agent_name': agent_name, + 'feedback_type': FeedbackType.CORRECTION, + 'user_message': user_message, + 'agent_response': agent_response, + 'corrected_response': corrected_response, + 'correction_reason': correction_reason, + 'metadata': metadata or {}, + 'timestamp': datetime.now().isoformat() + } + + # Save to file + file_path = self.storage_dir / "corrections" / f"{feedback_id}.json" + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(feedback_data, f, ensure_ascii=False, indent=2) + + return feedback_id + + def collect_thumbs( + self, + user_id: str, + agent_name: str, + user_message: str, + agent_response: str, + is_positive: bool, + comment: Optional[str] = None + ) -> str: + """ + Collect thumbs up/down feedback + + Args: + user_id: User identifier + agent_name: Name of the agent + user_message: User's original message + agent_response: Agent's response + is_positive: True for thumbs up, False for thumbs down + comment: Optional comment + + Returns: + Feedback ID + """ + feedback_type = FeedbackType.THUMBS_UP if is_positive else FeedbackType.THUMBS_DOWN + + return self.collect_rating( + user_id=user_id, + agent_name=agent_name, + user_message=user_message, + agent_response=agent_response, + rating=5 if is_positive else 1, + comment=comment, + metadata={'feedback_type': feedback_type} + ) + + def report_issue( + self, + user_id: str, + agent_name: str, + user_message: str, + agent_response: str, + issue_type: str, + description: str, + severity: str = "medium" + ) -> str: + """ + Report an issue with agent response + + Args: + user_id: User identifier + agent_name: Name of the agent + user_message: User's original message + agent_response: Agent's problematic response + issue_type: Type of issue (harmful/incorrect/inappropriate/other) + description: Detailed description + severity: low/medium/high/critical + + Returns: + Report ID + """ + report_id = f"report_{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + report_data = { + 'report_id': report_id, + 'user_id': user_id, + 'agent_name': agent_name, + 'feedback_type': FeedbackType.REPORT, + 'user_message': user_message, + 'agent_response': agent_response, + 'issue_type': issue_type, + 'description': description, + 'severity': severity, + 'status': 'pending', + 'timestamp': datetime.now().isoformat() + } + + # Save to file + file_path = self.storage_dir / "reports" / f"{report_id}.json" + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(report_data, f, ensure_ascii=False, indent=2) + + return report_id + + def get_feedback_stats(self, agent_name: Optional[str] = None) -> Dict[str, Any]: + """ + Get feedback statistics + + Args: + agent_name: Filter by agent name (optional) + + Returns: + Statistics dictionary + """ + stats = { + 'total_ratings': 0, + 'total_corrections': 0, + 'total_reports': 0, + 'average_rating': 0.0, + 'rating_distribution': {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}, + 'by_agent': {}, + 'by_category': {} + } + + # Count ratings + ratings = [] + for file_path in (self.storage_dir / "ratings").glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if agent_name and data.get('agent_name') != agent_name: + continue + + rating = data.get('rating', 0) + ratings.append(rating) + stats['rating_distribution'][rating] += 1 + + # By agent + agent = data.get('agent_name', 'unknown') + if agent not in stats['by_agent']: + stats['by_agent'][agent] = {'count': 0, 'total_rating': 0} + stats['by_agent'][agent]['count'] += 1 + stats['by_agent'][agent]['total_rating'] += rating + + # By category + category = data.get('category', 'other') + if category not in stats['by_category']: + stats['by_category'][category] = 0 + stats['by_category'][category] += 1 + + stats['total_ratings'] = len(ratings) + stats['average_rating'] = sum(ratings) / len(ratings) if ratings else 0.0 + + # Calculate average per agent + for agent in stats['by_agent']: + count = stats['by_agent'][agent]['count'] + total = stats['by_agent'][agent]['total_rating'] + stats['by_agent'][agent]['average'] = total / count if count > 0 else 0.0 + + # Count corrections + stats['total_corrections'] = len(list((self.storage_dir / "corrections").glob("*.json"))) + + # Count reports + stats['total_reports'] = len(list((self.storage_dir / "reports").glob("*.json"))) + + return stats + + def get_low_rated_responses( + self, + min_rating: int = 2, + agent_name: Optional[str] = None, + limit: int = 50 + ) -> List[Dict[str, Any]]: + """ + Get low-rated responses for improvement + + Args: + min_rating: Maximum rating to include (1-5) + agent_name: Filter by agent name + limit: Maximum number of results + + Returns: + List of low-rated responses + """ + low_rated = [] + + for file_path in (self.storage_dir / "ratings").glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if data.get('rating', 5) <= min_rating: + if agent_name is None or data.get('agent_name') == agent_name: + low_rated.append(data) + + # Sort by rating (lowest first) + low_rated.sort(key=lambda x: x.get('rating', 5)) + + return low_rated[:limit] + + def get_corrections( + self, + agent_name: Optional[str] = None, + limit: int = 100 + ) -> List[Dict[str, Any]]: + """ + Get user corrections for learning + + Args: + agent_name: Filter by agent name + limit: Maximum number of results + + Returns: + List of corrections + """ + corrections = [] + + for file_path in (self.storage_dir / "corrections").glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if agent_name is None or data.get('agent_name') == agent_name: + corrections.append(data) + + # Sort by timestamp (newest first) + corrections.sort(key=lambda x: x.get('timestamp', ''), reverse=True) + + return corrections[:limit] + + def export_for_fine_tuning( + self, + agent_name: str, + min_rating: int = 4, + include_corrections: bool = True, + output_file: Optional[str] = None + ) -> str: + """ + Export high-quality feedback for fine-tuning + + Args: + agent_name: Agent to export for + min_rating: Minimum rating to include + include_corrections: Include user corrections + output_file: Output file path + + Returns: + Path to exported file + """ + if output_file is None: + output_file = f"feedback_training_{agent_name}_{datetime.now().strftime('%Y%m%d')}.jsonl" + + output_path = self.storage_dir / output_file + + training_data = [] + + # Add high-rated responses + for file_path in (self.storage_dir / "ratings").glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if data.get('agent_name') == agent_name and data.get('rating', 0) >= min_rating: + training_data.append({ + 'messages': [ + {'role': 'user', 'content': data['user_message']}, + {'role': 'assistant', 'content': data['agent_response']} + ], + 'metadata': { + 'rating': data['rating'], + 'source': 'user_rating' + } + }) + + # Add corrections + if include_corrections: + for file_path in (self.storage_dir / "corrections").glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if data.get('agent_name') == agent_name: + training_data.append({ + 'messages': [ + {'role': 'user', 'content': data['user_message']}, + {'role': 'assistant', 'content': data['corrected_response']} + ], + 'metadata': { + 'source': 'user_correction', + 'reason': data.get('correction_reason') + } + }) + + # Write to JSONL + with open(output_path, 'w', encoding='utf-8') as f: + for item in training_data: + f.write(json.dumps(item, ensure_ascii=False) + '\n') + + return str(output_path) + + +# Global instance +_feedback_collector = None + +def get_feedback_collector() -> FeedbackCollector: + """Get global feedback collector instance""" + global _feedback_collector + if _feedback_collector is None: + _feedback_collector = FeedbackCollector() + return _feedback_collector diff --git a/fine_tuning/README.md b/fine_tuning/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b5ed6e7f824cbfca5d3f83f42effd0b83f265f8 --- /dev/null +++ b/fine_tuning/README.md @@ -0,0 +1,297 @@ +# Fine-tuning Module 🎯 + +Train custom models on your healthcare conversation data. + +## Overview + +This module automatically collects conversation data and enables fine-tuning of specialized healthcare agents using OpenAI's fine-tuning API. + +## Features + +- ✅ **Automatic Data Collection** - Logs all agent conversations +- ✅ **Quality Filtering** - Filter by user ratings +- ✅ **OpenAI Format Export** - Ready for fine-tuning +- ✅ **Multi-Agent Support** - Train each agent separately +- ✅ **Job Management** - Track fine-tuning progress + +## How It Works + +### 1. Data Collection (Automatic) + +The system automatically logs conversations when enabled: + +```python +# In coordinator (already integrated) +coordinator = AgentCoordinator(enable_data_collection=True) +``` + +Data is stored in `fine_tuning/data/` organized by agent: +``` +fine_tuning/data/ +├── nutrition/ +│ ├── conversations_20241025.jsonl +│ └── multi_turn_20241025.jsonl +├── exercise/ +├── symptom/ +├── mental_health/ +└── general_health/ +``` + +### 2. Export Training Data + +Export conversations in OpenAI fine-tuning format: + +```python +from fine_tuning import get_data_collector + +collector = get_data_collector() + +# Export all conversations +training_file = collector.export_for_openai_finetuning( + agent_name='nutrition_agent', + output_file='nutrition_training.jsonl' +) + +# Export only high-quality conversations (rating >= 4.0) +training_file = collector.export_for_openai_finetuning( + agent_name='nutrition_agent', + min_quality_rating=4.0 +) +``` + +### 3. Fine-tune Agent + +#### Option A: Using Script (Recommended) + +```bash +# Fine-tune nutrition agent +python scripts/fine_tune_agent.py --agent nutrition + +# With quality filtering +python scripts/fine_tune_agent.py --agent nutrition --min-rating 4.0 + +# Start job without waiting +python scripts/fine_tune_agent.py --agent exercise --no-wait +``` + +#### Option B: Using Python API + +```python +from fine_tuning import fine_tune_agent + +# Fine-tune and wait for completion +model_id = fine_tune_agent( + agent_name='nutrition', + training_file='nutrition_training.jsonl', + model='gpt-4o-mini-2024-07-18', + wait_for_completion=True +) + +print(f"Fine-tuned model: {model_id}") +``` + +### 4. Use Fine-tuned Model + +Update agent configuration to use the fine-tuned model: + +```python +# config/settings.py or agent file +MODEL = 'ft:gpt-4o-mini-2024-07-18:your-org:nutrition:abc123' +``` + +## Data Format + +### Conversation Entry +```json +{ + "timestamp": "2024-10-25T10:00:00", + "agent": "nutrition_agent", + "user_message": "Tôi muốn giảm cân", + "agent_response": "Để giảm cân hiệu quả...", + "user_data": { + "age": 25, + "gender": "male", + "weight": 70, + "height": 175 + }, + "metadata": { + "rating": 5.0, + "feedback": "Very helpful" + } +} +``` + +### OpenAI Fine-tuning Format +```json +{ + "messages": [ + {"role": "system", "content": "You are a nutrition specialist."}, + {"role": "user", "content": "Tôi muốn giảm cân"}, + {"role": "assistant", "content": "Để giảm cân hiệu quả..."} + ] +} +``` + +## API Reference + +### ConversationDataCollector + +```python +from fine_tuning import get_data_collector + +collector = get_data_collector() + +# Log single conversation +collector.log_conversation( + agent_name='nutrition_agent', + user_message='User question', + agent_response='Agent answer', + user_data={'age': 25}, + metadata={'rating': 5.0} +) + +# Log multi-turn conversation +collector.log_multi_turn_conversation( + agent_name='nutrition_agent', + conversation_history=[ + ('User msg 1', 'Agent response 1'), + ('User msg 2', 'Agent response 2') + ], + user_data={'age': 25} +) + +# Get conversation counts +counts = collector.get_conversation_count() +# {'nutrition': 150, 'exercise': 89, ...} + +# Export for fine-tuning +training_file = collector.export_for_openai_finetuning( + agent_name='nutrition_agent', + min_quality_rating=4.0 +) +``` + +### FineTuningTrainer + +```python +from fine_tuning import FineTuningTrainer + +trainer = FineTuningTrainer() + +# Upload training file +file_id = trainer.upload_training_file('training.jsonl') + +# Create fine-tuning job +job_id = trainer.create_fine_tuning_job( + training_file_id=file_id, + model='gpt-4o-mini-2024-07-18', + suffix='nutrition-v1' +) + +# Check job status +status = trainer.check_job_status(job_id) + +# Wait for completion +result = trainer.wait_for_completion(job_id) + +# List all fine-tuned models +models = trainer.list_fine_tuned_models() +``` + +## Best Practices + +### 1. Data Quality + +- ✅ Collect at least **100-200 conversations** per agent +- ✅ Include diverse user queries and scenarios +- ✅ Filter by quality rating (>= 4.0 recommended) +- ✅ Review and clean data before fine-tuning + +### 2. Training + +- ✅ Start with `gpt-4o-mini` (faster, cheaper) +- ✅ Use descriptive suffixes (e.g., `nutrition-v1`) +- ✅ Monitor training progress +- ✅ Test fine-tuned model before deployment + +### 3. Evaluation + +- ✅ Compare fine-tuned vs base model responses +- ✅ Test on held-out conversations +- ✅ Collect user feedback on fine-tuned model +- ✅ Iterate based on results + +## Cost Estimation + +Fine-tuning costs (OpenAI pricing): +- **Training**: ~$0.008 per 1K tokens +- **Usage**: Same as base model + +Example: +- 200 conversations × 500 tokens avg = 100K tokens +- Training cost: ~$0.80 +- Usage: Same as gpt-4o-mini + +## Troubleshooting + +### No conversations collected + +```bash +# Check if data collection is enabled +# In coordinator initialization: +coordinator = AgentCoordinator(enable_data_collection=True) +``` + +### Training file format error + +```bash +# Validate JSONL format +python -c "import json; [json.loads(line) for line in open('file.jsonl')]" +``` + +### Fine-tuning job failed + +```python +# Check job status +trainer = FineTuningTrainer() +status = trainer.check_job_status('job-id') +print(status) +``` + +## Examples + +### Complete Workflow + +```python +from fine_tuning import get_data_collector, fine_tune_agent + +# 1. Check collected data +collector = get_data_collector() +counts = collector.get_conversation_count() +print(f"Nutrition conversations: {counts.get('nutrition', 0)}") + +# 2. Export training data +training_file = collector.export_for_openai_finetuning( + agent_name='nutrition_agent', + min_quality_rating=4.0 +) + +# 3. Fine-tune +model_id = fine_tune_agent( + agent_name='nutrition', + training_file=training_file, + suffix='v1', + wait_for_completion=True +) + +# 4. Update configuration +print(f"Update MODEL to: {model_id}") +``` + +## Future Enhancements + +- [ ] Automatic quality scoring +- [ ] Data augmentation +- [ ] Multi-model comparison +- [ ] A/B testing framework +- [ ] Continuous learning pipeline diff --git a/fine_tuning/__init__.py b/fine_tuning/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b7f89302eee50ae813663505aef52c6194cd3350 --- /dev/null +++ b/fine_tuning/__init__.py @@ -0,0 +1,14 @@ +""" +Fine-tuning Module +Collect conversation data and train custom models +""" + +from .data_collector import ConversationDataCollector, get_data_collector +from .trainer import FineTuningTrainer, fine_tune_agent + +__all__ = [ + 'ConversationDataCollector', + 'get_data_collector', + 'FineTuningTrainer', + 'fine_tune_agent' +] diff --git a/fine_tuning/data_collector.py b/fine_tuning/data_collector.py new file mode 100644 index 0000000000000000000000000000000000000000..cce3f43196af6dc9754807fb0927dcbdcac9104a --- /dev/null +++ b/fine_tuning/data_collector.py @@ -0,0 +1,217 @@ +""" +Data Collector for Fine-tuning +Collects and stores conversation data for training custom models +""" + +import json +import os +from datetime import datetime +from pathlib import Path +from typing import List, Dict, Any, Optional + + +class ConversationDataCollector: + """Collects conversation data for fine-tuning""" + + def __init__(self, data_dir: str = "fine_tuning/data"): + self.data_dir = Path(data_dir) + self.data_dir.mkdir(parents=True, exist_ok=True) + + # Create subdirectories for each agent + self.agent_dirs = { + 'nutrition': self.data_dir / 'nutrition', + 'exercise': self.data_dir / 'exercise', + 'symptom': self.data_dir / 'symptom', + 'mental_health': self.data_dir / 'mental_health', + 'general_health': self.data_dir / 'general_health' + } + + for agent_dir in self.agent_dirs.values(): + agent_dir.mkdir(exist_ok=True) + + def log_conversation( + self, + agent_name: str, + user_message: str, + agent_response: str, + user_data: Optional[Dict[str, Any]] = None, + metadata: Optional[Dict[str, Any]] = None + ) -> None: + """ + Log a conversation turn for fine-tuning + + Args: + agent_name: Name of the agent (nutrition, exercise, etc.) + user_message: User's message + agent_response: Agent's response + user_data: User profile data (age, gender, etc.) + metadata: Additional metadata (rating, feedback, etc.) + """ + conversation_entry = { + 'timestamp': datetime.now().isoformat(), + 'agent': agent_name, + 'user_message': user_message, + 'agent_response': agent_response, + 'user_data': user_data or {}, + 'metadata': metadata or {} + } + + # Save to agent-specific file + agent_key = agent_name.replace('_agent', '') + if agent_key in self.agent_dirs: + filename = f"conversations_{datetime.now().strftime('%Y%m%d')}.jsonl" + filepath = self.agent_dirs[agent_key] / filename + + with open(filepath, 'a', encoding='utf-8') as f: + f.write(json.dumps(conversation_entry, ensure_ascii=False) + '\n') + + def log_multi_turn_conversation( + self, + agent_name: str, + conversation_history: List[tuple], + user_data: Optional[Dict[str, Any]] = None, + metadata: Optional[Dict[str, Any]] = None + ) -> None: + """ + Log a multi-turn conversation + + Args: + agent_name: Name of the agent + conversation_history: List of (user_msg, agent_msg) tuples + user_data: User profile data + metadata: Additional metadata + """ + multi_turn_entry = { + 'timestamp': datetime.now().isoformat(), + 'agent': agent_name, + 'conversation': [ + {'user': user_msg, 'agent': agent_msg} + for user_msg, agent_msg in conversation_history + ], + 'user_data': user_data or {}, + 'metadata': metadata or {} + } + + agent_key = agent_name.replace('_agent', '') + if agent_key in self.agent_dirs: + filename = f"multi_turn_{datetime.now().strftime('%Y%m%d')}.jsonl" + filepath = self.agent_dirs[agent_key] / filename + + with open(filepath, 'a', encoding='utf-8') as f: + f.write(json.dumps(multi_turn_entry, ensure_ascii=False) + '\n') + + def get_conversation_count(self, agent_name: Optional[str] = None) -> Dict[str, int]: + """ + Get count of logged conversations + + Args: + agent_name: Optional agent name to filter by + + Returns: + Dict with agent names and conversation counts + """ + counts = {} + + agents_to_check = [agent_name.replace('_agent', '')] if agent_name else self.agent_dirs.keys() + + for agent_key in agents_to_check: + if agent_key in self.agent_dirs: + agent_dir = self.agent_dirs[agent_key] + count = 0 + + for file in agent_dir.glob('conversations_*.jsonl'): + with open(file, 'r', encoding='utf-8') as f: + count += sum(1 for _ in f) + + counts[agent_key] = count + + return counts + + def export_for_openai_finetuning( + self, + agent_name: str, + output_file: Optional[str] = None, + min_quality_rating: Optional[float] = None + ) -> str: + """ + Export conversations in OpenAI fine-tuning format + + Args: + agent_name: Agent to export data for + output_file: Output file path + min_quality_rating: Minimum quality rating to include + + Returns: + Path to exported file + """ + agent_key = agent_name.replace('_agent', '') + if agent_key not in self.agent_dirs: + raise ValueError(f"Unknown agent: {agent_name}") + + if output_file is None: + output_file = self.data_dir / f"{agent_key}_finetuning_{datetime.now().strftime('%Y%m%d')}.jsonl" + + agent_dir = self.agent_dirs[agent_key] + exported_count = 0 + + with open(output_file, 'w', encoding='utf-8') as out_f: + # Process single-turn conversations + for file in agent_dir.glob('conversations_*.jsonl'): + with open(file, 'r', encoding='utf-8') as in_f: + for line in in_f: + entry = json.loads(line) + + # Filter by quality rating if specified + if min_quality_rating: + rating = entry.get('metadata', {}).get('rating') + if rating is None or rating < min_quality_rating: + continue + + # Convert to OpenAI format + openai_format = { + "messages": [ + {"role": "system", "content": f"You are a {agent_key} specialist."}, + {"role": "user", "content": entry['user_message']}, + {"role": "assistant", "content": entry['agent_response']} + ] + } + + out_f.write(json.dumps(openai_format, ensure_ascii=False) + '\n') + exported_count += 1 + + # Process multi-turn conversations + for file in agent_dir.glob('multi_turn_*.jsonl'): + with open(file, 'r', encoding='utf-8') as in_f: + for line in in_f: + entry = json.loads(line) + + # Filter by quality rating if specified + if min_quality_rating: + rating = entry.get('metadata', {}).get('rating') + if rating is None or rating < min_quality_rating: + continue + + # Convert to OpenAI format + messages = [{"role": "system", "content": f"You are a {agent_key} specialist."}] + + for turn in entry['conversation']: + messages.append({"role": "user", "content": turn['user']}) + messages.append({"role": "assistant", "content": turn['agent']}) + + openai_format = {"messages": messages} + out_f.write(json.dumps(openai_format, ensure_ascii=False) + '\n') + exported_count += 1 + + print(f"✅ Exported {exported_count} conversations to {output_file}") + return str(output_file) + + +# Global instance +_collector = None + +def get_data_collector() -> ConversationDataCollector: + """Get global data collector instance""" + global _collector + if _collector is None: + _collector = ConversationDataCollector() + return _collector diff --git a/fine_tuning/trainer.py b/fine_tuning/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..f336f86b846b8d1464baf9d0be14505f2d6e83f0 --- /dev/null +++ b/fine_tuning/trainer.py @@ -0,0 +1,244 @@ +""" +Fine-tuning Trainer +Handles the fine-tuning process with OpenAI API +""" + +import os +import time +from pathlib import Path +from typing import Optional, Dict, Any +from openai import OpenAI + + +class FineTuningTrainer: + """Manages fine-tuning jobs with OpenAI""" + + def __init__(self, api_key: Optional[str] = None): + self.client = OpenAI(api_key=api_key or os.getenv('OPENAI_API_KEY')) + self.jobs_dir = Path('fine_tuning/jobs') + self.jobs_dir.mkdir(parents=True, exist_ok=True) + + def upload_training_file(self, file_path: str) -> str: + """ + Upload training file to OpenAI + + Args: + file_path: Path to training data file (JSONL format) + + Returns: + File ID from OpenAI + """ + print(f"📤 Uploading training file: {file_path}") + + with open(file_path, 'rb') as f: + response = self.client.files.create( + file=f, + purpose='fine-tune' + ) + + file_id = response.id + print(f"✅ File uploaded successfully: {file_id}") + return file_id + + def create_fine_tuning_job( + self, + training_file_id: str, + model: str = "gpt-4o-mini-2024-07-18", + suffix: Optional[str] = None, + hyperparameters: Optional[Dict[str, Any]] = None + ) -> str: + """ + Create a fine-tuning job + + Args: + training_file_id: ID of uploaded training file + model: Base model to fine-tune + suffix: Suffix for fine-tuned model name + hyperparameters: Training hyperparameters + + Returns: + Fine-tuning job ID + """ + print(f"🚀 Creating fine-tuning job...") + print(f" Base model: {model}") + print(f" Training file: {training_file_id}") + + job_params = { + 'training_file': training_file_id, + 'model': model + } + + if suffix: + job_params['suffix'] = suffix + + if hyperparameters: + job_params['hyperparameters'] = hyperparameters + + response = self.client.fine_tuning.jobs.create(**job_params) + + job_id = response.id + print(f"✅ Fine-tuning job created: {job_id}") + + # Save job info + self._save_job_info(job_id, { + 'training_file_id': training_file_id, + 'model': model, + 'suffix': suffix, + 'hyperparameters': hyperparameters, + 'status': 'created' + }) + + return job_id + + def check_job_status(self, job_id: str) -> Dict[str, Any]: + """ + Check status of fine-tuning job + + Args: + job_id: Fine-tuning job ID + + Returns: + Job status information + """ + response = self.client.fine_tuning.jobs.retrieve(job_id) + + status_info = { + 'id': response.id, + 'status': response.status, + 'model': response.model, + 'fine_tuned_model': response.fine_tuned_model, + 'created_at': response.created_at, + 'finished_at': response.finished_at, + 'trained_tokens': response.trained_tokens + } + + return status_info + + def wait_for_completion( + self, + job_id: str, + check_interval: int = 60, + timeout: int = 3600 + ) -> Dict[str, Any]: + """ + Wait for fine-tuning job to complete + + Args: + job_id: Fine-tuning job ID + check_interval: Seconds between status checks + timeout: Maximum seconds to wait + + Returns: + Final job status + """ + print(f"⏳ Waiting for fine-tuning job {job_id} to complete...") + + start_time = time.time() + + while True: + status_info = self.check_job_status(job_id) + status = status_info['status'] + + print(f" Status: {status}") + + if status == 'succeeded': + print(f"✅ Fine-tuning completed!") + print(f" Fine-tuned model: {status_info['fine_tuned_model']}") + self._save_job_info(job_id, status_info) + return status_info + + elif status in ['failed', 'cancelled']: + print(f"❌ Fine-tuning {status}") + self._save_job_info(job_id, status_info) + raise Exception(f"Fine-tuning job {status}") + + elif time.time() - start_time > timeout: + print(f"⏰ Timeout reached") + raise TimeoutError(f"Fine-tuning job exceeded {timeout} seconds") + + time.sleep(check_interval) + + def list_fine_tuned_models(self) -> list: + """ + List all fine-tuned models + + Returns: + List of fine-tuned model information + """ + response = self.client.fine_tuning.jobs.list(limit=50) + + models = [] + for job in response.data: + if job.fine_tuned_model: + models.append({ + 'job_id': job.id, + 'model_id': job.fine_tuned_model, + 'base_model': job.model, + 'status': job.status, + 'created_at': job.created_at, + 'finished_at': job.finished_at + }) + + return models + + def cancel_job(self, job_id: str) -> None: + """ + Cancel a running fine-tuning job + + Args: + job_id: Fine-tuning job ID + """ + print(f"🛑 Cancelling job {job_id}...") + self.client.fine_tuning.jobs.cancel(job_id) + print(f"✅ Job cancelled") + + def _save_job_info(self, job_id: str, info: Dict[str, Any]) -> None: + """Save job information to file""" + import json + + job_file = self.jobs_dir / f"{job_id}.json" + with open(job_file, 'w') as f: + json.dump(info, f, indent=2, default=str) + + +def fine_tune_agent( + agent_name: str, + training_file: str, + model: str = "gpt-4o-mini-2024-07-18", + suffix: Optional[str] = None, + wait_for_completion: bool = True +) -> str: + """ + Convenience function to fine-tune an agent + + Args: + agent_name: Name of agent (nutrition, exercise, etc.) + training_file: Path to training data + model: Base model to use + suffix: Suffix for model name + wait_for_completion: Whether to wait for job to finish + + Returns: + Fine-tuned model ID or job ID + """ + trainer = FineTuningTrainer() + + # Upload file + file_id = trainer.upload_training_file(training_file) + + # Create job + if suffix is None: + suffix = f"{agent_name}-{int(time.time())}" + + job_id = trainer.create_fine_tuning_job( + training_file_id=file_id, + model=model, + suffix=suffix + ) + + # Wait for completion if requested + if wait_for_completion: + status = trainer.wait_for_completion(job_id) + return status['fine_tuned_model'] + else: + return job_id diff --git a/fitness_tracking/__init__.py b/fitness_tracking/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b06a443c759e4d66063aa556e3206559ad7fc612 --- /dev/null +++ b/fitness_tracking/__init__.py @@ -0,0 +1,10 @@ +""" +Fitness Tracking Package - Progress monitoring and adaptive recommendations +""" + +from .fitness_tracker import FitnessTracker + +__all__ = [ + 'FitnessTracker' +] + diff --git a/fitness_tracking/fitness_tracker.py b/fitness_tracking/fitness_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..db70f9b3b6bba183cd256af72359eef16c0682b8 --- /dev/null +++ b/fitness_tracking/fitness_tracker.py @@ -0,0 +1,323 @@ +""" +Fitness Tracker - Progress tracking and adaptive workout recommendations +Monitors fitness progress and adjusts recommendations based on performance +""" + +from typing import List, Dict, Any, Optional +from datetime import datetime, timedelta +from collections import Counter + +from health_data import HealthContext + + +class FitnessTracker: + """ + Fitness progress tracking and adaptive workout planning + Monitors workouts, calculates metrics, and adapts recommendations + """ + + def __init__(self, health_context: HealthContext): + self.health_context = health_context + self.user_id = health_context.user_id + + # ===== Tracking Methods ===== + + def record_workout(self, workout_data: Dict[str, Any]) -> None: + """Record a completed workout""" + self.health_context.add_fitness_record(workout_data) + + def get_workout_history(self, days: int = 30) -> List[Dict[str, Any]]: + """Get workout history""" + history = self.health_context.get_fitness_history(days) + return [w.to_dict() for w in history] + + def calculate_adherence_rate(self, days: int = 30) -> float: + """Calculate workout adherence rate (0-1)""" + return self.health_context.get_workout_adherence(days) + + def calculate_progress_metrics(self, days: int = 30) -> Dict[str, Any]: + """Calculate comprehensive progress metrics""" + history = self.health_context.get_fitness_history(days) + + if not history: + return { + 'total_workouts': 0, + 'total_minutes': 0, + 'avg_duration': 0, + 'avg_intensity': 'none', + 'adherence': 0.0, + 'consistency_score': 0.0 + } + + # Calculate metrics + total_workouts = len(history) + total_minutes = sum(w.duration_minutes for w in history) + avg_duration = total_minutes / total_workouts if total_workouts > 0 else 0 + + # Calculate intensity distribution + intensity_counts = Counter(w.intensity for w in history) + avg_intensity = intensity_counts.most_common(1)[0][0] if intensity_counts else 'medium' + + # Calculate adherence + adherence = self.calculate_adherence_rate(days) + + # Calculate consistency (workouts spread across days) + unique_days = len(set(w.workout_date for w in history)) + consistency_score = min(unique_days / (days / 7), 1.0) # Normalize to 0-1 + + return { + 'total_workouts': total_workouts, + 'total_minutes': total_minutes, + 'avg_duration': round(avg_duration, 1), + 'avg_intensity': avg_intensity, + 'intensity_distribution': dict(intensity_counts), + 'adherence': round(adherence, 2), + 'consistency_score': round(consistency_score, 2), + 'unique_days_active': unique_days, + 'period_days': days + } + + # ===== Adaptation Methods ===== + + def adjust_difficulty(self, current_plan: Dict[str, Any]) -> Dict[str, Any]: + """Adjust workout difficulty based on progress""" + metrics = self.calculate_progress_metrics(days=30) + adherence = metrics['adherence'] + + adjusted_plan = current_plan.copy() + + if adherence > 0.85: + # User is highly adherent - increase difficulty + adjusted_plan['difficulty_level'] = 'advanced' + adjusted_plan['adjustment_reason'] = 'High adherence detected' + adjusted_plan['recommendation'] = 'Increase intensity or add more challenging exercises' + adjusted_plan['suggested_changes'] = [ + 'Increase weight/resistance by 10%', + 'Add 1-2 extra sets', + 'Reduce rest periods by 15-30 seconds', + 'Try advanced variations of exercises' + ] + elif adherence > 0.6: + # User is moderately adherent - maintain + adjusted_plan['difficulty_level'] = 'intermediate' + adjusted_plan['adjustment_reason'] = 'Good adherence' + adjusted_plan['recommendation'] = 'Maintain current intensity' + adjusted_plan['suggested_changes'] = [ + 'Keep current routine', + 'Focus on form and technique', + 'Gradually increase volume' + ] + else: + # User is struggling - decrease difficulty + adjusted_plan['difficulty_level'] = 'beginner' + adjusted_plan['adjustment_reason'] = 'Low adherence detected' + adjusted_plan['recommendation'] = 'Simplify routine for better adherence' + adjusted_plan['suggested_changes'] = [ + 'Reduce number of exercises', + 'Decrease sets or reps', + 'Increase rest periods', + 'Focus on consistency over intensity' + ] + + adjusted_plan['metrics'] = metrics + adjusted_plan['adjusted_at'] = datetime.now().isoformat() + + return adjusted_plan + + def recommend_next_phase(self, current_plan: Dict[str, Any]) -> Dict[str, Any]: + """Recommend next training phase""" + metrics = self.calculate_progress_metrics(days=60) + + recommendation = { + 'current_metrics': metrics, + 'phase_recommendation': None, + 'rationale': '', + 'next_phase_details': {} + } + + if metrics['adherence'] < 0.3: + recommendation['phase_recommendation'] = 'foundation' + recommendation['rationale'] = 'Build consistency before progressing' + recommendation['next_phase_details'] = { + 'focus': 'Establish routine and build habit', + 'duration': '4 weeks', + 'frequency': '3 days/week', + 'intensity': 'low to moderate' + } + elif metrics['adherence'] < 0.6: + recommendation['phase_recommendation'] = 'building' + recommendation['rationale'] = 'Increase volume and intensity gradually' + recommendation['next_phase_details'] = { + 'focus': 'Build strength and endurance', + 'duration': '6-8 weeks', + 'frequency': '4 days/week', + 'intensity': 'moderate' + } + elif metrics['adherence'] < 0.85: + recommendation['phase_recommendation'] = 'progression' + recommendation['rationale'] = 'Progress to more challenging workouts' + recommendation['next_phase_details'] = { + 'focus': 'Increase intensity and complexity', + 'duration': '8-12 weeks', + 'frequency': '4-5 days/week', + 'intensity': 'moderate to high' + } + else: + recommendation['phase_recommendation'] = 'advanced' + recommendation['rationale'] = 'Ready for advanced training' + recommendation['next_phase_details'] = { + 'focus': 'Specialized training (strength, hypertrophy, endurance)', + 'duration': '12+ weeks', + 'frequency': '5-6 days/week', + 'intensity': 'high' + } + + return recommendation + + def identify_plateaus(self) -> List[str]: + """Identify fitness plateaus""" + plateaus = [] + + # Check for no progress in last 30 days + history_30 = self.health_context.get_fitness_history(days=30) + history_60 = self.health_context.get_fitness_history(days=60) + + if len(history_30) == 0: + plateaus.append("No workouts recorded in last 30 days - restart training") + + # Check for same intensity + if history_30: + intensities_30 = [w.intensity for w in history_30] + if len(set(intensities_30)) == 1: + plateaus.append(f"Same intensity ({intensities_30[0]}) for 30 days - increase difficulty") + + # Check for declining adherence + adherence_30 = self.calculate_adherence_rate(days=30) + adherence_60 = self.calculate_adherence_rate(days=60) + + if adherence_60 > 0.5 and adherence_30 < 0.3: + plateaus.append("Declining adherence - motivation may be dropping") + + return plateaus + + def suggest_workout_variations(self) -> List[str]: + """Suggest workout variations to prevent boredom""" + history = self.health_context.get_fitness_history(days=30) + + if not history: + return [ + 'Try cardio: running, cycling, swimming', + 'Try strength: weight training, bodyweight exercises', + 'Try flexibility: yoga, pilates, stretching' + ] + + # Analyze current workout types + workout_types = Counter(w.workout_type for w in history) + most_common = workout_types.most_common(1)[0][0] if workout_types else None + + suggestions = [] + + if most_common == 'cardio': + suggestions = [ + 'Try strength training to build muscle', + 'Add flexibility work (yoga, stretching)', + 'Try HIIT for variety' + ] + elif most_common == 'strength': + suggestions = [ + 'Add cardio for cardiovascular health', + 'Try flexibility training', + 'Experiment with different strength styles (powerlifting, bodybuilding)' + ] + elif most_common == 'flexibility': + suggestions = [ + 'Add cardio for endurance', + 'Add strength training for muscle', + 'Try sports or recreational activities' + ] + else: + suggestions = [ + 'Mix cardio, strength, and flexibility', + 'Try group fitness classes', + 'Explore new sports or activities' + ] + + return suggestions + + # ===== Analysis Methods ===== + + def analyze_workout_effectiveness(self) -> Dict[str, Any]: + """Analyze workout effectiveness""" + metrics = self.calculate_progress_metrics(days=60) + + effectiveness = { + 'overall_score': 0.0, + 'factors': {}, + 'assessment': '', + 'recommendations': [] + } + + # Score based on adherence + adherence_score = metrics['adherence'] * 0.4 + + # Score based on consistency + consistency_score = metrics['consistency_score'] * 0.3 + + # Score based on intensity + intensity_score = 0.3 + if metrics['avg_intensity'] == 'high': + intensity_score = 0.9 + elif metrics['avg_intensity'] == 'medium': + intensity_score = 0.7 + else: + intensity_score = 0.4 + intensity_score *= 0.3 + + overall_score = adherence_score + consistency_score + intensity_score + + effectiveness['overall_score'] = round(overall_score, 2) + effectiveness['factors'] = { + 'adherence_contribution': round(adherence_score, 2), + 'consistency_contribution': round(consistency_score, 2), + 'intensity_contribution': round(intensity_score, 2) + } + + if overall_score > 0.8: + effectiveness['assessment'] = 'Excellent - Your workouts are very effective' + effectiveness['recommendations'] = ['Maintain current routine', 'Consider increasing intensity'] + elif overall_score > 0.6: + effectiveness['assessment'] = 'Good - Your workouts are effective' + effectiveness['recommendations'] = ['Keep up the consistency', 'Try to increase intensity'] + elif overall_score > 0.4: + effectiveness['assessment'] = 'Fair - Room for improvement' + effectiveness['recommendations'] = ['Increase workout frequency', 'Boost intensity'] + else: + effectiveness['assessment'] = 'Needs improvement' + effectiveness['recommendations'] = ['Start with 3 workouts/week', 'Focus on consistency first'] + + return effectiveness + + def correlate_fitness_health(self) -> Dict[str, Any]: + """Correlate fitness progress with overall health""" + metrics = self.calculate_progress_metrics(days=30) + + # Get health analysis + from health_analysis import HealthAnalyzer + analyzer = HealthAnalyzer(self.health_context) + health_status = analyzer.analyze_health_status() + + correlation = { + 'fitness_metrics': metrics, + 'health_status': health_status, + 'correlation_analysis': {}, + 'insights': [] + } + + # Analyze correlation + if metrics['adherence'] > 0.7 and health_status['overall_health_score'] > 0.7: + correlation['insights'].append('Strong positive correlation: High fitness adherence correlates with good health') + elif metrics['adherence'] < 0.3 and health_status['overall_health_score'] < 0.5: + correlation['insights'].append('Low fitness and health - increasing exercise could improve overall health') + + return correlation + diff --git a/health_analysis/__init__.py b/health_analysis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..062ea49a6c08951a984782d98f0fcebdbb7863c9 --- /dev/null +++ b/health_analysis/__init__.py @@ -0,0 +1,10 @@ +""" +Health Analysis Package - Comprehensive health analysis and disease prediction +""" + +from .health_analyzer import HealthAnalyzer + +__all__ = [ + 'HealthAnalyzer' +] + diff --git a/health_analysis/health_analyzer.py b/health_analysis/health_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..6d7f13d0450a25fa4523e28af2ff1d50185b93f0 --- /dev/null +++ b/health_analysis/health_analyzer.py @@ -0,0 +1,395 @@ +""" +Health Analyzer - Comprehensive health analysis and disease risk prediction +Analyzes user health data to provide insights and predictions +""" + +from typing import List, Dict, Any, Optional +from datetime import datetime, timedelta +import math + +from health_data import HealthContext + + +class HealthAnalyzer: + """ + Comprehensive health analysis and disease risk prediction + Provides health scoring, risk assessment, and preventive recommendations + """ + + def __init__(self, health_context: HealthContext): + self.health_context = health_context + self.user_id = health_context.user_id + + # ===== Health Status Analysis ===== + + def analyze_health_status(self) -> Dict[str, Any]: + """Comprehensive health status analysis""" + profile = self.health_context.get_user_profile() + + analysis = { + 'timestamp': datetime.now().isoformat(), + 'bmi_status': self._analyze_bmi(profile), + 'activity_status': self._analyze_activity(), + 'symptom_status': self._analyze_symptoms(), + 'nutrition_status': self._analyze_nutrition(), + 'mental_health_status': self._analyze_mental_health(), + 'overall_health_score': 0.0 + } + + # Calculate overall health score + scores = [ + analysis['bmi_status'].get('score', 0.5), + analysis['activity_status'].get('score', 0.5), + analysis['symptom_status'].get('score', 0.5), + analysis['nutrition_status'].get('score', 0.5), + analysis['mental_health_status'].get('score', 0.5) + ] + + analysis['overall_health_score'] = round(sum(scores) / len(scores), 2) + + return analysis + + def _analyze_bmi(self, profile) -> Dict[str, Any]: + """Analyze BMI status""" + if not profile.bmi: + return {'status': 'unknown', 'score': 0.5, 'recommendation': 'Calculate BMI first'} + + bmi = profile.bmi + + if bmi < 18.5: + return { + 'status': 'underweight', + 'score': 0.6, + 'bmi': bmi, + 'recommendation': 'Consider healthy weight gain with proper nutrition' + } + elif bmi < 25: + return { + 'status': 'normal', + 'score': 1.0, + 'bmi': bmi, + 'recommendation': 'Maintain current weight with balanced diet and exercise' + } + elif bmi < 30: + return { + 'status': 'overweight', + 'score': 0.7, + 'bmi': bmi, + 'recommendation': 'Gradual weight loss through diet and exercise' + } + else: + return { + 'status': 'obese', + 'score': 0.4, + 'bmi': bmi, + 'recommendation': 'Consult healthcare provider for weight management plan' + } + + def _analyze_activity(self) -> Dict[str, Any]: + """Analyze physical activity status""" + fitness_history = self.health_context.get_fitness_history(days=30) + adherence = self.health_context.get_workout_adherence(days=30) + + if not fitness_history: + return { + 'status': 'sedentary', + 'score': 0.3, + 'workouts_30d': 0, + 'recommendation': 'Start with 150 minutes of moderate activity per week' + } + + total_minutes = sum(f.duration_minutes for f in fitness_history) + + if adherence > 0.8 and total_minutes > 150: + return { + 'status': 'active', + 'score': 1.0, + 'workouts_30d': len(fitness_history), + 'total_minutes': total_minutes, + 'adherence': adherence, + 'recommendation': 'Excellent! Maintain current activity level' + } + elif adherence > 0.5: + return { + 'status': 'moderately_active', + 'score': 0.7, + 'workouts_30d': len(fitness_history), + 'total_minutes': total_minutes, + 'adherence': adherence, + 'recommendation': 'Good progress! Try to increase frequency' + } + else: + return { + 'status': 'low_activity', + 'score': 0.4, + 'workouts_30d': len(fitness_history), + 'total_minutes': total_minutes, + 'adherence': adherence, + 'recommendation': 'Increase physical activity gradually' + } + + def _analyze_symptoms(self) -> Dict[str, Any]: + """Analyze symptom patterns""" + symptom_records = self.health_context.get_records_by_type('symptom') + + if not symptom_records: + return { + 'status': 'no_symptoms', + 'score': 1.0, + 'recommendation': 'Continue monitoring health' + } + + # Count symptoms in last 30 days + recent_symptoms = [r for r in symptom_records if r.timestamp > datetime.now() - timedelta(days=30)] + + if len(recent_symptoms) > 5: + return { + 'status': 'frequent_symptoms', + 'score': 0.4, + 'recent_symptoms': len(recent_symptoms), + 'recommendation': 'Consult healthcare provider for evaluation' + } + elif len(recent_symptoms) > 0: + return { + 'status': 'occasional_symptoms', + 'score': 0.7, + 'recent_symptoms': len(recent_symptoms), + 'recommendation': 'Monitor symptoms and maintain healthy lifestyle' + } + else: + return { + 'status': 'no_recent_symptoms', + 'score': 0.9, + 'recommendation': 'Good health status' + } + + def _analyze_nutrition(self) -> Dict[str, Any]: + """Analyze nutrition status""" + nutrition_records = self.health_context.get_records_by_type('nutrition') + + if not nutrition_records: + return { + 'status': 'unknown', + 'score': 0.5, + 'recommendation': 'Share your nutrition habits for personalized advice' + } + + # Check adherence to nutrition plans + adherence = len(nutrition_records) / max(1, (30 / 7)) # Expected ~1 per week + + if adherence > 0.8: + return { + 'status': 'good_adherence', + 'score': 0.9, + 'adherence': min(adherence, 1.0), + 'recommendation': 'Excellent nutrition tracking!' + } + else: + return { + 'status': 'low_adherence', + 'score': 0.5, + 'adherence': adherence, + 'recommendation': 'Improve nutrition tracking and consistency' + } + + def _analyze_mental_health(self) -> Dict[str, Any]: + """Analyze mental health status""" + mental_records = self.health_context.get_records_by_type('mental_health') + + if not mental_records: + return { + 'status': 'unknown', + 'score': 0.5, + 'recommendation': 'Share your mental health concerns for support' + } + + # Check for stress/anxiety mentions + stress_count = sum(1 for r in mental_records if 'stress' in str(r.data).lower()) + + if stress_count > 3: + return { + 'status': 'high_stress', + 'score': 0.4, + 'stress_indicators': stress_count, + 'recommendation': 'Consider stress management techniques and professional support' + } + else: + return { + 'status': 'stable', + 'score': 0.8, + 'recommendation': 'Continue mental health practices' + } + + def calculate_health_score(self) -> float: + """Calculate overall health score (0-100)""" + analysis = self.analyze_health_status() + return round(analysis['overall_health_score'] * 100, 1) + + # ===== Risk Prediction ===== + + def identify_health_risks(self) -> List[Dict[str, Any]]: + """Identify potential health risks""" + risks = [] + profile = self.health_context.get_user_profile() + + # BMI-related risks + if profile.bmi and profile.bmi > 30: + risks.append({ + 'risk_type': 'obesity', + 'severity': 'high', + 'description': 'Elevated BMI increases risk of cardiovascular disease and diabetes', + 'recommendation': 'Consult healthcare provider for weight management' + }) + + # Sedentary lifestyle risk + fitness_history = self.health_context.get_fitness_history(days=30) + if len(fitness_history) < 2: + risks.append({ + 'risk_type': 'sedentary_lifestyle', + 'severity': 'medium', + 'description': 'Low physical activity increases health risks', + 'recommendation': 'Start with 30 minutes of moderate activity daily' + }) + + # Chronic condition risks + if profile.health_conditions: + for condition in profile.health_conditions: + risks.append({ + 'risk_type': f'chronic_{condition}', + 'severity': 'medium', + 'description': f'Existing condition: {condition}', + 'recommendation': 'Follow medical advice and monitor regularly' + }) + + return risks + + def predict_disease_risk(self) -> List[Dict[str, Any]]: + """Predict disease risk based on health data""" + predictions = [] + profile = self.health_context.get_user_profile() + + # Cardiovascular disease risk + cv_risk_score = self._calculate_cv_risk(profile) + if cv_risk_score > 0.6: + predictions.append({ + 'disease': 'cardiovascular_disease', + 'risk_score': cv_risk_score, + 'risk_level': 'high' if cv_risk_score > 0.8 else 'medium', + 'factors': ['high_bmi', 'low_activity', 'age'], + 'recommendation': 'Regular cardiovascular screening recommended' + }) + + # Type 2 Diabetes risk + diabetes_risk = self._calculate_diabetes_risk(profile) + if diabetes_risk > 0.6: + predictions.append({ + 'disease': 'type_2_diabetes', + 'risk_score': diabetes_risk, + 'risk_level': 'high' if diabetes_risk > 0.8 else 'medium', + 'factors': ['high_bmi', 'sedentary', 'age'], + 'recommendation': 'Blood glucose screening recommended' + }) + + return predictions + + def _calculate_cv_risk(self, profile) -> float: + """Calculate cardiovascular disease risk (0-1)""" + risk = 0.3 # Base risk + + # Age factor + if profile.age and profile.age > 50: + risk += 0.2 + + # BMI factor + if profile.bmi and profile.bmi > 30: + risk += 0.2 + + # Activity factor + fitness_history = self.health_context.get_fitness_history(days=30) + if len(fitness_history) < 2: + risk += 0.15 + + # Health conditions + if profile.health_conditions: + risk += 0.1 + + return min(risk, 1.0) + + def _calculate_diabetes_risk(self, profile) -> float: + """Calculate type 2 diabetes risk (0-1)""" + risk = 0.2 # Base risk + + # BMI factor (strongest predictor) + if profile.bmi and profile.bmi > 25: + risk += 0.3 + + # Age factor + if profile.age and profile.age > 45: + risk += 0.15 + + # Activity factor + fitness_history = self.health_context.get_fitness_history(days=30) + if len(fitness_history) < 2: + risk += 0.2 + + return min(risk, 1.0) + + def recommend_preventive_measures(self) -> List[str]: + """Recommend preventive health measures""" + recommendations = [] + profile = self.health_context.get_user_profile() + + # Weight management + if profile.bmi and profile.bmi > 25: + recommendations.append("Implement gradual weight loss through balanced diet and exercise") + + # Physical activity + fitness_history = self.health_context.get_fitness_history(days=30) + if len(fitness_history) < 3: + recommendations.append("Aim for 150 minutes of moderate-intensity exercise per week") + + # Nutrition + recommendations.append("Maintain balanced diet with whole grains, fruits, and vegetables") + + # Stress management + recommendations.append("Practice stress management techniques like meditation or yoga") + + # Regular checkups + recommendations.append("Schedule regular health checkups with your healthcare provider") + + # Sleep + recommendations.append("Maintain 7-9 hours of quality sleep per night") + + return recommendations + + def generate_health_report(self) -> str: + """Generate comprehensive health report""" + analysis = self.analyze_health_status() + risks = self.identify_health_risks() + predictions = self.predict_disease_risk() + recommendations = self.recommend_preventive_measures() + + report = f""" +# Health Analysis Report +Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} + +## Overall Health Score: {analysis['overall_health_score']}/1.0 + +### Health Status +- BMI Status: {analysis['bmi_status']['status']} +- Activity Level: {analysis['activity_status']['status']} +- Symptom Status: {analysis['symptom_status']['status']} +- Nutrition Status: {analysis['nutrition_status']['status']} +- Mental Health: {analysis['mental_health_status']['status']} + +### Identified Risks +{chr(10).join([f"- {r['risk_type']}: {r['description']}" for r in risks]) if risks else "No significant risks identified"} + +### Disease Risk Predictions +{chr(10).join([f"- {p['disease']}: {p['risk_level']} risk ({p['risk_score']:.1%})" for p in predictions]) if predictions else "Low disease risk"} + +### Preventive Recommendations +{chr(10).join([f"- {r}" for r in recommendations])} +""" + return report + diff --git a/health_data/README.md b/health_data/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95ee487261af0ea10a1797fc8252a1890809ca4d --- /dev/null +++ b/health_data/README.md @@ -0,0 +1,413 @@ +# Health Data Module 🏥 + +Comprehensive health data management with Pydantic validation, smart parsing, and record merging. + +## Features + +- ✅ **Pydantic Models** - Automatic validation and type checking +- ✅ **Smart Parsing** - Parse height/weight from multiple formats +- ✅ **Data Validation** - Detect abnormal values automatically +- ✅ **Record Merging** - Aggregate data from multiple days +- ✅ **Auto BMI Calculation** - Automatic BMI computation +- ✅ **Multi-format Support** - Handles various input formats + +## Quick Start + +### 1. Create User Profile with Validation + +```python +from health_data import PydanticUserHealthProfile + +# Supports multiple input formats! +profile = PydanticUserHealthProfile( + user_id="user123", + age="25 tuổi", # ✅ Parses to 25 + gender="male", + weight="70kg", # ✅ Parses to 70.0 + height="1.78m" # ✅ Parses to 178.0 cm +) + +print(f"BMI: {profile.bmi}") # Auto-calculated! +print(f"Category: {profile.get_bmi_category()}") # "Bình thường" +``` + +### 2. Supported Input Formats + +#### Height Formats: +```python +"1.78m" → 178.0 cm +"1,78m" → 178.0 cm (comma separator) +"178cm" → 178.0 cm +"178" → 178.0 cm +"5'10\"" → 177.8 cm (feet/inches) +``` + +#### Weight Formats: +```python +"70kg" → 70.0 kg +"70" → 70.0 kg +"154lbs" → 69.9 kg (pounds) +"70.5" → 70.5 kg +``` + +### 3. Create Health Records + +```python +from health_data import NutritionRecord, ExerciseRecord + +# Nutrition record +nutrition = NutritionRecord( + user_id="user123", + height="1.78m", + weight="70kg", + data={ + 'calories': 2000, + 'protein': 150, + 'carbs': 200, + 'fat': 60 + } +) + +# Exercise record +exercise = ExerciseRecord( + user_id="user123", + data={ + 'exercise_type': 'cardio', + 'duration_minutes': 30, + 'calories_burned': 300 + } +) +``` + +### 4. Merge Records from Multiple Days + +```python +from health_data import merge_records + +# Merge 7 days of records +merged = merge_records(records, strategy='average') + +print(f"Average calories: {merged['by_type']['nutrition']['average_daily']['calories']}") +print(f"Total workouts: {merged['by_type']['exercise']['total_workouts']}") +print(f"Weight change: {merged['health_metrics']['weight']['change']} kg") +``` + +## Validation Examples + +### ✅ Valid Data +```python +profile = PydanticUserHealthProfile( + user_id="user123", + age=25, + weight="70kg", + height="1.78m" +) +# ✅ Success! BMI auto-calculated +``` + +### ❌ Invalid Data (Automatic Detection) +```python +# Height too high +profile = PydanticUserHealthProfile( + user_id="user456", + height="500cm" # ❌ Error: Chiều cao quá cao +) + +# Age too young +profile = PydanticUserHealthProfile( + user_id="user789", + age=10 # ❌ Error: Hệ thống chỉ hỗ trợ người từ 13 tuổi trở lên +) + +# Weight too light +profile = PydanticUserHealthProfile( + user_id="user999", + weight="15kg" # ❌ Error: Cân nặng quá nhẹ +) +``` + +## API Reference + +### HealthDataParser + +Parse health data from various formats: + +```python +from health_data import HealthDataParser + +# Parse height +height = HealthDataParser.parse_height("1.78m") # → 178.0 cm +height = HealthDataParser.parse_height("5'10\"") # → 177.8 cm + +# Parse weight +weight = HealthDataParser.parse_weight("70kg") # → 70.0 kg +weight = HealthDataParser.parse_weight("154lbs") # → 69.9 kg + +# Parse age +age = HealthDataParser.parse_age("25 tuổi") # → 25 +``` + +### HealthDataValidator + +Validate health metrics: + +```python +from health_data import HealthDataValidator + +# Validate height +is_valid, error = HealthDataValidator.validate_height(178.0) +# → (True, None) + +is_valid, error = HealthDataValidator.validate_height(500.0) +# → (False, "Chiều cao quá cao...") + +# Calculate BMI +bmi = HealthDataValidator.calculate_bmi(weight=70, height=178) +# → 22.1 + +# Get BMI category +category = HealthDataValidator.get_bmi_category(22.1) +# → "Bình thường" +``` + +### PydanticUserHealthProfile + +User health profile with validation: + +```python +from health_data import PydanticUserHealthProfile + +profile = PydanticUserHealthProfile( + user_id="user123", + age=25, + gender="male", + weight="70kg", + height="1.78m", + activity_level="moderate", + fitness_level="intermediate" +) + +# Check completeness +if profile.is_complete(): + print("Profile is complete!") + +# Get missing fields +missing = profile.get_missing_fields() +# → [] + +# Get BMI category +category = profile.get_bmi_category() +# → "Bình thường" +``` + +### HealthRecord Types + +Different record types for different health data: + +```python +from health_data import ( + NutritionRecord, + ExerciseRecord, + SymptomRecord, + MentalHealthRecord +) + +# Nutrition +nutrition = NutritionRecord( + user_id="user123", + calories=2000, + protein=150, + carbs=200, + fat=60 +) + +# Exercise +exercise = ExerciseRecord( + user_id="user123", + exercise_type="cardio", + duration_minutes=30, + calories_burned=300 +) + +# Symptom +symptom = SymptomRecord( + user_id="user123", + symptoms=["headache", "fatigue"], + severity=5, + duration_days=2 +) + +# Mental Health +mental = MentalHealthRecord( + user_id="user123", + mood="good", + stress_level=3, + sleep_hours=7.5, + sleep_quality=8 +) +``` + +### merge_records() + +Merge and aggregate health records: + +```python +from health_data import merge_records, HealthRecordMerger + +# Strategy 1: Latest (get most recent data) +merged = merge_records(records, strategy='latest') + +# Strategy 2: Average (calculate averages) +merged = merge_records(records, strategy='average') + +# Strategy 3: All (get all records) +merged = merge_records(records, strategy='all') + +# Weekly summary +weekly = HealthRecordMerger.get_weekly_summary(records, weeks_back=1) + +# Monthly summary +monthly = HealthRecordMerger.get_monthly_summary(records, months_back=1) + +# Custom date range +from datetime import datetime, timedelta +start = datetime.now() - timedelta(days=7) +end = datetime.now() +custom = HealthRecordMerger.merge_by_date_range(records, start, end) +``` + +## Merge Output Format + +```python +{ + 'total_records': 14, + 'date_range': { + 'start': '2024-10-18T10:00:00', + 'end': '2024-10-25T10:00:00' + }, + 'by_type': { + 'nutrition': { + 'average_daily': { + 'calories': 1850.0, + 'protein': 145.0, + 'carbs': 210.0, + 'fat': 58.0 + }, + 'total_records': 7 + }, + 'exercise': { + 'total_workouts': 7, + 'total_duration_minutes': 245, + 'total_calories_burned': 1890.0, + 'average_duration': 35.0, + 'exercise_types': {'cardio': 4, 'strength': 3} + } + }, + 'health_metrics': { + 'weight': { + 'latest': 69.6, + 'average': 69.8, + 'min': 69.6, + 'max': 70.0, + 'change': -0.4 + }, + 'bmi': { + 'latest': 22.0, + 'average': 22.1, + 'change': -0.1 + } + } +} +``` + +## Integration with Agents + +Agents can now use smart parsing automatically: + +```python +from health_data import HealthDataParser + +# In agent code +user_input = "Tôi cao 1,78m và nặng 70kg" + +# Parse automatically +height = HealthDataParser.parse_height("1,78m") # → 178.0 +weight = HealthDataParser.parse_weight("70kg") # → 70.0 + +# Agents understand all formats! +``` + +## Best Practices + +1. **Always use Pydantic models** for new code +2. **Let the parser handle formats** - don't manually parse +3. **Validate before saving** - Pydantic does this automatically +4. **Use merge_records()** for multi-day analysis +5. **Check is_complete()** before calculations + +## Examples + +Run the example script: + +```bash +python examples/pydantic_validation_example.py +``` + +This will demonstrate: +- ✅ Height parsing from multiple formats +- ✅ Weight parsing from multiple formats +- ✅ Automatic validation +- ✅ BMI auto-calculation +- ✅ Record merging +- ✅ Error handling + +## Migration Guide + +### From Old Models to Pydantic + +```python +# Old way (no validation) +from health_data import HealthRecord + +record = HealthRecord( + user_id="user123", + record_type="nutrition", + data={'weight': 70} +) + +# New way (with validation) +from health_data import NutritionRecord + +record = NutritionRecord( + user_id="user123", + weight="70kg", # Auto-parsed and validated! + data={'calories': 2000} +) +``` + +## Validation Rules + +| Field | Min | Max | Notes | +|-------|-----|-----|-------| +| Age | 13 | 150 | Years | +| Height | 50 | 300 | Centimeters | +| Weight | 20 | 300 | Kilograms | +| BMI | 10 | 60 | Auto-calculated | +| Confidence | 0.0 | 1.0 | Record confidence | + +## Error Messages (Vietnamese) + +- Height too low: "Chiều cao quá thấp (< 50cm)" +- Height too high: "Chiều cao quá cao (> 300cm)" +- Weight too light: "Cân nặng quá nhẹ (< 20kg)" +- Weight too heavy: "Cân nặng quá nặng (> 300kg)" +- Age too young: "Hệ thống chỉ hỗ trợ người từ 13 tuổi trở lên" +- BMI abnormal: "BMI quá thấp/cao" + +## Future Enhancements + +- [ ] Support for more units (stones, feet, etc.) +- [ ] Historical trend analysis +- [ ] Anomaly detection +- [ ] Data export to CSV/Excel +- [ ] Integration with wearables diff --git a/health_data/__init__.py b/health_data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4da5b1e2279bf979dccdbb377231073e8847940f --- /dev/null +++ b/health_data/__init__.py @@ -0,0 +1,66 @@ +""" +Health Data Module +Persistent storage and management of user health data +""" + +from .models import ( + UserHealthProfile, + HealthRecord, + UserPreferences, + FitnessProgress, + HealthMetrics +) + +from .data_store import HealthDataStore +from .health_context import HealthContext + +# Pydantic models with validation +from .pydantic_models import ( + HealthRecord as PydanticHealthRecord, + UserHealthProfile as PydanticUserHealthProfile, + NutritionRecord, + ExerciseRecord, + SymptomRecord, + MentalHealthRecord, + Gender, + ActivityLevel, + FitnessLevel, + RecordType +) + +# Validators and parsers +from .validators import HealthDataParser, HealthDataValidator + +# Record merger +from .record_merger import merge_records, HealthRecordMerger + +__all__ = [ + # Original models + 'UserHealthProfile', + 'HealthRecord', + 'UserPreferences', + 'FitnessProgress', + 'HealthMetrics', + 'HealthDataStore', + 'HealthContext', + + # Pydantic models + 'PydanticHealthRecord', + 'PydanticUserHealthProfile', + 'NutritionRecord', + 'ExerciseRecord', + 'SymptomRecord', + 'MentalHealthRecord', + 'Gender', + 'ActivityLevel', + 'FitnessLevel', + 'RecordType', + + # Validators + 'HealthDataParser', + 'HealthDataValidator', + + # Merger + 'merge_records', + 'HealthRecordMerger' +] diff --git a/health_data/data_store.py b/health_data/data_store.py new file mode 100644 index 0000000000000000000000000000000000000000..28e715faaee0c0a7d62584ee3a6cbd702fb3ab2b --- /dev/null +++ b/health_data/data_store.py @@ -0,0 +1,226 @@ +""" +Health Data Store - Persistent storage for health data +Uses JSON files for simplicity, can be upgraded to SQLAlchemy + PostgreSQL +""" + +import json +import os +from datetime import datetime, timedelta +from typing import List, Optional, Dict, Any +from pathlib import Path + +from .models import ( + UserHealthProfile, HealthRecord, UserPreferences, + FitnessProgress, HealthMetrics +) + + +class HealthDataStore: + """Persistent storage for all health data""" + + def __init__(self, data_dir: str = "health_data/storage"): + self.data_dir = Path(data_dir) + self.data_dir.mkdir(parents=True, exist_ok=True) + + # Create subdirectories + (self.data_dir / "profiles").mkdir(exist_ok=True) + (self.data_dir / "records").mkdir(exist_ok=True) + (self.data_dir / "preferences").mkdir(exist_ok=True) + (self.data_dir / "fitness").mkdir(exist_ok=True) + (self.data_dir / "metrics").mkdir(exist_ok=True) + + # ===== User Profile Operations ===== + + def save_user_profile(self, profile: UserHealthProfile) -> None: + """Save user profile to storage""" + profile.updated_at = datetime.now() + path = self.data_dir / "profiles" / f"{profile.user_id}.json" + with open(path, 'w', encoding='utf-8') as f: + json.dump(profile.to_dict(), f, ensure_ascii=False, indent=2) + + def get_user_profile(self, user_id: str) -> Optional[UserHealthProfile]: + """Get user profile from storage""" + path = self.data_dir / "profiles" / f"{user_id}.json" + if not path.exists(): + return None + + with open(path, 'r', encoding='utf-8') as f: + data = json.load(f) + return UserHealthProfile.from_dict(data) + + def update_user_profile(self, user_id: str, **kwargs) -> None: + """Update specific fields in user profile""" + profile = self.get_user_profile(user_id) + if not profile: + profile = UserHealthProfile(user_id) + + for key, value in kwargs.items(): + if hasattr(profile, key): + setattr(profile, key, value) + + self.save_user_profile(profile) + + # ===== Health History Operations ===== + + def add_health_record(self, record: HealthRecord) -> None: + """Add health record to storage""" + user_dir = self.data_dir / "records" / record.user_id + user_dir.mkdir(exist_ok=True) + + path = user_dir / f"{record.record_id}.json" + with open(path, 'w', encoding='utf-8') as f: + json.dump(record.to_dict(), f, ensure_ascii=False, indent=2) + + def get_health_history(self, user_id: str, days: int = 30) -> List[HealthRecord]: + """Get health history for user (last N days)""" + user_dir = self.data_dir / "records" / user_id + if not user_dir.exists(): + return [] + + cutoff_date = datetime.now() - timedelta(days=days) + records = [] + + for file_path in user_dir.glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + record = HealthRecord.from_dict(data) + if record.timestamp >= cutoff_date: + records.append(record) + + # Sort by timestamp descending + records.sort(key=lambda r: r.timestamp, reverse=True) + return records + + def get_records_by_type(self, user_id: str, record_type: str) -> List[HealthRecord]: + """Get records of specific type""" + all_records = self.get_health_history(user_id, days=365) + return [r for r in all_records if r.record_type == record_type] + + # ===== Preferences Operations ===== + + def save_preferences(self, prefs: UserPreferences) -> None: + """Save user preferences""" + prefs.updated_at = datetime.now() + path = self.data_dir / "preferences" / f"{prefs.user_id}.json" + with open(path, 'w', encoding='utf-8') as f: + json.dump(prefs.to_dict(), f, ensure_ascii=False, indent=2) + + def get_preferences(self, user_id: str) -> Optional[UserPreferences]: + """Get user preferences""" + path = self.data_dir / "preferences" / f"{user_id}.json" + if not path.exists(): + return None + + with open(path, 'r', encoding='utf-8') as f: + data = json.load(f) + return UserPreferences.from_dict(data) + + # ===== Fitness Operations ===== + + def add_fitness_record(self, record: FitnessProgress) -> None: + """Add fitness record""" + user_dir = self.data_dir / "fitness" / record.user_id + user_dir.mkdir(exist_ok=True) + + path = user_dir / f"{record.progress_id}.json" + with open(path, 'w', encoding='utf-8') as f: + json.dump(record.to_dict(), f, ensure_ascii=False, indent=2) + + def get_fitness_history(self, user_id: str, days: int = 30) -> List[FitnessProgress]: + """Get fitness history""" + user_dir = self.data_dir / "fitness" / user_id + if not user_dir.exists(): + return [] + + cutoff_date = datetime.now() - timedelta(days=days) + records = [] + + for file_path in user_dir.glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + record = FitnessProgress.from_dict(data) + if record.timestamp >= cutoff_date: + records.append(record) + + records.sort(key=lambda r: r.timestamp, reverse=True) + return records + + # ===== Metrics Operations ===== + + def add_metric(self, metric: HealthMetrics) -> None: + """Add health metric""" + user_dir = self.data_dir / "metrics" / metric.user_id + user_dir.mkdir(exist_ok=True) + + path = user_dir / f"{metric.metric_id}.json" + with open(path, 'w', encoding='utf-8') as f: + json.dump(metric.to_dict(), f, ensure_ascii=False, indent=2) + + def get_metrics(self, user_id: str, metric_type: Optional[str] = None) -> List[HealthMetrics]: + """Get health metrics""" + user_dir = self.data_dir / "metrics" / user_id + if not user_dir.exists(): + return [] + + metrics = [] + for file_path in user_dir.glob("*.json"): + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + metric = HealthMetrics.from_dict(data) + if metric_type is None or metric.metric_type == metric_type: + metrics.append(metric) + + metrics.sort(key=lambda m: m.timestamp, reverse=True) + return metrics + + # ===== Utility Methods ===== + + def export_user_data(self, user_id: str) -> Dict[str, Any]: + """Export all user data""" + profile = self.get_user_profile(user_id) + history = self.get_health_history(user_id, days=365) + preferences = self.get_preferences(user_id) + fitness = self.get_fitness_history(user_id, days=365) + metrics = self.get_metrics(user_id) + + return { + 'profile': profile.to_dict() if profile else None, + 'health_history': [r.to_dict() for r in history], + 'preferences': preferences.to_dict() if preferences else None, + 'fitness_history': [f.to_dict() for f in fitness], + 'metrics': [m.to_dict() for m in metrics], + 'exported_at': datetime.now().isoformat() + } + + def delete_user_data(self, user_id: str) -> None: + """Delete all user data (GDPR compliance)""" + import shutil + + # Delete profile + profile_path = self.data_dir / "profiles" / f"{user_id}.json" + if profile_path.exists(): + profile_path.unlink() + + # Delete records + records_dir = self.data_dir / "records" / user_id + if records_dir.exists(): + shutil.rmtree(records_dir) + + # Delete preferences + prefs_path = self.data_dir / "preferences" / f"{user_id}.json" + if prefs_path.exists(): + prefs_path.unlink() + + # Delete fitness + fitness_dir = self.data_dir / "fitness" / user_id + if fitness_dir.exists(): + shutil.rmtree(fitness_dir) + + # Delete metrics + metrics_dir = self.data_dir / "metrics" / user_id + if metrics_dir.exists(): + shutil.rmtree(metrics_dir) + diff --git a/health_data/health_context.py b/health_data/health_context.py new file mode 100644 index 0000000000000000000000000000000000000000..5adf0c0b8c0598a9d7155d4dfa1ac96ed212be58 --- /dev/null +++ b/health_data/health_context.py @@ -0,0 +1,218 @@ +""" +Health Context - Unified access to all user health data +Central hub for agents to access and update health information +""" + +from typing import List, Optional, Dict, Any +from datetime import datetime + +from .data_store import HealthDataStore +from .models import ( + UserHealthProfile, HealthRecord, UserPreferences, + FitnessProgress, HealthMetrics +) + + +class HealthContext: + """ + Unified access to all user health data + Provides a single interface for agents to access and update health information + """ + + def __init__(self, user_id: str, data_store: Optional[HealthDataStore] = None): + self.user_id = user_id + self.data_store = data_store or HealthDataStore() + + # Load data from storage + self.profile = self.data_store.get_user_profile(user_id) + if not self.profile: + self.profile = UserHealthProfile(user_id) + self.data_store.save_user_profile(self.profile) + + self.preferences = self.data_store.get_preferences(user_id) + if not self.preferences: + self.preferences = UserPreferences(user_id) + self.data_store.save_preferences(self.preferences) + + # ===== Profile Access ===== + + def get_user_profile(self) -> UserHealthProfile: + """Get user's health profile""" + return self.profile + + def update_profile(self, **kwargs) -> None: + """Update user profile fields""" + for key, value in kwargs.items(): + if hasattr(self.profile, key): + setattr(self.profile, key, value) + + self.profile.updated_at = datetime.now() + self.data_store.save_user_profile(self.profile) + + def get_profile_dict(self) -> Dict[str, Any]: + """Get profile as dictionary""" + return self.profile.to_dict() + + # ===== Health History Access ===== + + def get_health_history(self, days: int = 30) -> List[HealthRecord]: + """Get health history for last N days""" + return self.data_store.get_health_history(self.user_id, days) + + def get_records_by_type(self, record_type: str) -> List[HealthRecord]: + """Get records of specific type""" + return self.data_store.get_records_by_type(self.user_id, record_type) + + def add_health_record(self, record_type: str, data: Dict[str, Any], + agent_name: Optional[str] = None, + confidence: float = 0.5) -> None: + """Add health record""" + record = HealthRecord(self.user_id, record_type, data) + record.agent_name = agent_name + record.confidence = confidence + self.data_store.add_health_record(record) + + # ===== Preferences Access ===== + + def get_preferences(self) -> UserPreferences: + """Get user preferences""" + return self.preferences + + def update_preferences(self, **kwargs) -> None: + """Update preferences""" + for key, value in kwargs.items(): + if hasattr(self.preferences, key): + setattr(self.preferences, key, value) + + self.preferences.updated_at = datetime.now() + self.data_store.save_preferences(self.preferences) + + def add_goal(self, goal: str) -> None: + """Add health goal""" + if goal not in self.preferences.goals: + self.preferences.goals.append(goal) + self.data_store.save_preferences(self.preferences) + + def add_exercise_preference(self, exercise_type: str) -> None: + """Add exercise preference""" + if exercise_type not in self.preferences.preferred_exercise_types: + self.preferences.preferred_exercise_types.append(exercise_type) + self.data_store.save_preferences(self.preferences) + + # ===== Fitness Access ===== + + def get_fitness_history(self, days: int = 30) -> List[FitnessProgress]: + """Get fitness history""" + return self.data_store.get_fitness_history(self.user_id, days) + + def add_fitness_record(self, workout_data: Dict[str, Any]) -> None: + """Add fitness record""" + record = FitnessProgress(self.user_id) + for key, value in workout_data.items(): + if hasattr(record, key): + setattr(record, key, value) + + self.data_store.add_fitness_record(record) + + def get_workout_adherence(self, days: int = 30) -> float: + """Calculate workout adherence rate (0-1)""" + history = self.get_fitness_history(days) + if not history: + return 0.0 + + # Simple adherence: workouts completed / expected workouts + # Assuming 3-4 workouts per week is ideal + expected_workouts = (days / 7) * 3.5 + actual_workouts = len(history) + + adherence = min(actual_workouts / expected_workouts, 1.0) if expected_workouts > 0 else 0.0 + return round(adherence, 2) + + # ===== Metrics Access ===== + + def get_metrics(self, metric_type: Optional[str] = None) -> List[HealthMetrics]: + """Get health metrics""" + return self.data_store.get_metrics(self.user_id, metric_type) + + def add_metric(self, metric_type: str, value: float, unit: str) -> None: + """Add health metric""" + metric = HealthMetrics(self.user_id, metric_type, value, unit) + self.data_store.add_metric(metric) + + def get_latest_metric(self, metric_type: str) -> Optional[HealthMetrics]: + """Get latest metric of specific type""" + metrics = self.get_metrics(metric_type) + return metrics[0] if metrics else None + + # ===== Context Summary ===== + + def get_context_summary(self) -> str: + """Get summary of current context for agents""" + summary_parts = [] + + # Profile summary + profile = self.profile + if profile.age: + summary_parts.append(f"Age: {profile.age}") + if profile.gender: + summary_parts.append(f"Gender: {profile.gender}") + if profile.weight and profile.height: + summary_parts.append(f"Weight: {profile.weight}kg, Height: {profile.height}cm") + if profile.bmi: + summary_parts.append(f"BMI: {profile.bmi}") + + # Health conditions + if profile.health_conditions: + summary_parts.append(f"Conditions: {', '.join(profile.health_conditions)}") + + # Goals + if self.preferences.goals: + summary_parts.append(f"Goals: {', '.join(self.preferences.goals)}") + + # Recent activity + recent_records = self.get_health_history(days=7) + if recent_records: + summary_parts.append(f"Recent interactions: {len(recent_records)}") + + return " | ".join(summary_parts) if summary_parts else "No context yet" + + def get_personalization_context(self) -> str: + """Get context for personalization""" + context = [] + + # User profile + profile = self.profile + context.append(f"User Profile: {profile.age}yo, {profile.gender}, BMI {profile.bmi}") + + # Health conditions + if profile.health_conditions: + context.append(f"Health Conditions: {', '.join(profile.health_conditions)}") + + # Goals + if self.preferences.goals: + context.append(f"Goals: {', '.join(self.preferences.goals)}") + + # Recent interactions + recent = self.get_health_history(days=7) + if recent: + context.append(f"Recent interactions: {len(recent)} in last 7 days") + + # Fitness adherence + adherence = self.get_workout_adherence(days=30) + context.append(f"Fitness adherence (30d): {adherence*100:.0f}%") + + return "\n".join(context) + + # ===== Data Export & Management ===== + + def export_data(self) -> Dict[str, Any]: + """Export all user data""" + return self.data_store.export_user_data(self.user_id) + + def delete_all_data(self) -> None: + """Delete all user data (GDPR compliance)""" + self.data_store.delete_user_data(self.user_id) + + def __repr__(self) -> str: + return f"" + diff --git a/health_data/models.py b/health_data/models.py new file mode 100644 index 0000000000000000000000000000000000000000..53a8dec532b3b0136497e07b14427b2576b614d2 --- /dev/null +++ b/health_data/models.py @@ -0,0 +1,214 @@ +""" +Health Data Models - SQLAlchemy ORM models for persistent health data storage +""" + +from datetime import datetime +from typing import List, Optional, Dict, Any +import json +import uuid + +# Note: This file defines the data models +# In production, use SQLAlchemy with proper database setup +# For now, we'll use a simple dictionary-based approach with JSON serialization + +class UserHealthProfile: + """User's health profile - persistent across sessions""" + + def __init__(self, user_id: str): + self.user_id = user_id + self.age: Optional[int] = None + self.gender: Optional[str] = None + self.weight: Optional[float] = None # kg + self.height: Optional[float] = None # cm + self.bmi: Optional[float] = None + self.activity_level: Optional[str] = None # low/moderate/high + self.fitness_level: Optional[str] = None # beginner/intermediate/advanced + self.health_conditions: List[str] = [] + self.medications: List[str] = [] + self.allergies: List[str] = [] + self.dietary_restrictions: List[str] = [] + self.created_at: datetime = datetime.now() + self.updated_at: datetime = datetime.now() + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization""" + return { + 'user_id': self.user_id, + 'age': self.age, + 'gender': self.gender, + 'weight': self.weight, + 'height': self.height, + 'bmi': self.bmi, + 'activity_level': self.activity_level, + 'fitness_level': self.fitness_level, + 'health_conditions': self.health_conditions, + 'medications': self.medications, + 'allergies': self.allergies, + 'dietary_restrictions': self.dietary_restrictions, + 'created_at': self.created_at.isoformat(), + 'updated_at': self.updated_at.isoformat() + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'UserHealthProfile': + """Create from dictionary""" + profile = cls(data['user_id']) + profile.age = data.get('age') + profile.gender = data.get('gender') + profile.weight = data.get('weight') + profile.height = data.get('height') + profile.bmi = data.get('bmi') + profile.activity_level = data.get('activity_level') + profile.fitness_level = data.get('fitness_level') + profile.health_conditions = data.get('health_conditions', []) + profile.medications = data.get('medications', []) + profile.allergies = data.get('allergies', []) + profile.dietary_restrictions = data.get('dietary_restrictions', []) + profile.created_at = datetime.fromisoformat(data.get('created_at', datetime.now().isoformat())) + profile.updated_at = datetime.fromisoformat(data.get('updated_at', datetime.now().isoformat())) + return profile + + +class HealthRecord: + """Individual health record - tracks interactions and data""" + + def __init__(self, user_id: str, record_type: str, data: Dict[str, Any]): + self.record_id = str(uuid.uuid4()) + self.user_id = user_id + self.record_type = record_type # symptom/nutrition/exercise/mental/general + self.data = data + self.timestamp = datetime.now() + self.agent_name: Optional[str] = None + self.confidence: float = 0.5 + + def to_dict(self) -> Dict[str, Any]: + return { + 'record_id': self.record_id, + 'user_id': self.user_id, + 'record_type': self.record_type, + 'data': self.data, + 'timestamp': self.timestamp.isoformat(), + 'agent_name': self.agent_name, + 'confidence': self.confidence + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'HealthRecord': + record = cls(data['user_id'], data['record_type'], data['data']) + record.record_id = data['record_id'] + record.timestamp = datetime.fromisoformat(data['timestamp']) + record.agent_name = data.get('agent_name') + record.confidence = data.get('confidence', 0.5) + return record + + +class UserPreferences: + """User preferences for personalization""" + + def __init__(self, user_id: str): + self.user_id = user_id + self.preferred_exercise_types: List[str] = [] + self.dietary_preferences: List[str] = [] + self.communication_style: str = 'friendly' # friendly/formal/casual + self.notification_preferences: Dict[str, Any] = {} + self.goals: List[str] = [] + self.updated_at: datetime = datetime.now() + + def to_dict(self) -> Dict[str, Any]: + return { + 'user_id': self.user_id, + 'preferred_exercise_types': self.preferred_exercise_types, + 'dietary_preferences': self.dietary_preferences, + 'communication_style': self.communication_style, + 'notification_preferences': self.notification_preferences, + 'goals': self.goals, + 'updated_at': self.updated_at.isoformat() + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'UserPreferences': + prefs = cls(data['user_id']) + prefs.preferred_exercise_types = data.get('preferred_exercise_types', []) + prefs.dietary_preferences = data.get('dietary_preferences', []) + prefs.communication_style = data.get('communication_style', 'friendly') + prefs.notification_preferences = data.get('notification_preferences', {}) + prefs.goals = data.get('goals', []) + prefs.updated_at = datetime.fromisoformat(data.get('updated_at', datetime.now().isoformat())) + return prefs + + +class FitnessProgress: + """Fitness workout record""" + + def __init__(self, user_id: str): + self.progress_id = str(uuid.uuid4()) + self.user_id = user_id + self.workout_date: datetime = datetime.now() + self.workout_type: str = '' # cardio/strength/flexibility/sports + self.duration_minutes: int = 0 + self.intensity: str = 'medium' # low/medium/high + self.exercises_completed: int = 0 + self.exercises_total: int = 0 + self.notes: str = '' + self.timestamp: datetime = datetime.now() + + def to_dict(self) -> Dict[str, Any]: + return { + 'progress_id': self.progress_id, + 'user_id': self.user_id, + 'workout_date': self.workout_date.isoformat(), + 'workout_type': self.workout_type, + 'duration_minutes': self.duration_minutes, + 'intensity': self.intensity, + 'exercises_completed': self.exercises_completed, + 'exercises_total': self.exercises_total, + 'notes': self.notes, + 'timestamp': self.timestamp.isoformat() + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'FitnessProgress': + progress = cls(data['user_id']) + progress.progress_id = data['progress_id'] + progress.workout_date = datetime.fromisoformat(data['workout_date']) + progress.workout_type = data.get('workout_type', '') + progress.duration_minutes = data.get('duration_minutes', 0) + progress.intensity = data.get('intensity', 'medium') + progress.exercises_completed = data.get('exercises_completed', 0) + progress.exercises_total = data.get('exercises_total', 0) + progress.notes = data.get('notes', '') + progress.timestamp = datetime.fromisoformat(data['timestamp']) + return progress + + +class HealthMetrics: + """Health measurement record""" + + def __init__(self, user_id: str, metric_type: str, value: float, unit: str): + self.metric_id = str(uuid.uuid4()) + self.user_id = user_id + self.metric_type = metric_type # weight/bp/glucose/heart_rate/etc + self.value = value + self.unit = unit + self.recorded_date: datetime = datetime.now() + self.timestamp: datetime = datetime.now() + + def to_dict(self) -> Dict[str, Any]: + return { + 'metric_id': self.metric_id, + 'user_id': self.user_id, + 'metric_type': self.metric_type, + 'value': self.value, + 'unit': self.unit, + 'recorded_date': self.recorded_date.isoformat(), + 'timestamp': self.timestamp.isoformat() + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'HealthMetrics': + metric = cls(data['user_id'], data['metric_type'], data['value'], data['unit']) + metric.metric_id = data['metric_id'] + metric.recorded_date = datetime.fromisoformat(data['recorded_date']) + metric.timestamp = datetime.fromisoformat(data['timestamp']) + return metric + diff --git a/health_data/pydantic_models.py b/health_data/pydantic_models.py new file mode 100644 index 0000000000000000000000000000000000000000..36a5942285f2a66e0fc753be6e19c11c98fe9458 --- /dev/null +++ b/health_data/pydantic_models.py @@ -0,0 +1,255 @@ +""" +Pydantic Models for Health Data +Provides automatic validation and parsing +""" + +from datetime import datetime +from typing import Optional, List, Dict, Any, Union +from pydantic import BaseModel, Field, field_validator, model_validator +from enum import Enum + +from .validators import HealthDataParser, HealthDataValidator + + +class Gender(str, Enum): + """Gender enum""" + MALE = "male" + FEMALE = "female" + OTHER = "other" + + +class ActivityLevel(str, Enum): + """Activity level enum""" + SEDENTARY = "sedentary" # Ít vận động + LIGHT = "light" # Vận động nhẹ + MODERATE = "moderate" # Vận động vừa + ACTIVE = "active" # Vận động nhiều + VERY_ACTIVE = "very_active" # Vận động rất nhiều + + +class FitnessLevel(str, Enum): + """Fitness level enum""" + BEGINNER = "beginner" + INTERMEDIATE = "intermediate" + ADVANCED = "advanced" + + +class RecordType(str, Enum): + """Health record type""" + NUTRITION = "nutrition" + EXERCISE = "exercise" + SYMPTOM = "symptom" + MENTAL_HEALTH = "mental_health" + GENERAL_HEALTH = "general_health" + + +class HealthRecord(BaseModel): + """ + Health Record with Pydantic validation + Automatically validates and normalizes health data + """ + + record_id: str = Field(default_factory=lambda: str(__import__('uuid').uuid4())) + user_id: str + record_type: RecordType + data: Dict[str, Any] = Field(default_factory=dict) + timestamp: datetime = Field(default_factory=datetime.now) + agent_name: Optional[str] = None + confidence: float = Field(default=0.5, ge=0.0, le=1.0) + + # Health metrics (optional, extracted from data) + height: Optional[float] = Field(None, description="Height in cm") + weight: Optional[float] = Field(None, description="Weight in kg") + age: Optional[int] = Field(None, description="Age in years") + gender: Optional[Gender] = None + bmi: Optional[float] = Field(None, description="BMI") + + class Config: + use_enum_values = True + json_encoders = { + datetime: lambda v: v.isoformat() + } + + @field_validator('height', mode='before') + @classmethod + def parse_height(cls, v): + """Parse height from various formats""" + if v is None: + return None + parsed = HealthDataParser.parse_height(v) + if parsed is not None: + is_valid, error = HealthDataValidator.validate_height(parsed) + if not is_valid: + raise ValueError(error) + return parsed + + @field_validator('weight', mode='before') + @classmethod + def parse_weight(cls, v): + """Parse weight from various formats""" + if v is None: + return None + parsed = HealthDataParser.parse_weight(v) + if parsed is not None: + is_valid, error = HealthDataValidator.validate_weight(parsed) + if not is_valid: + raise ValueError(error) + return parsed + + @field_validator('age', mode='before') + @classmethod + def parse_age(cls, v): + """Parse age from various formats""" + if v is None: + return None + parsed = HealthDataParser.parse_age(v) + if parsed is not None: + is_valid, error = HealthDataValidator.validate_age(parsed) + if not is_valid: + raise ValueError(error) + return parsed + + @field_validator('bmi', mode='before') + @classmethod + def parse_bmi(cls, v): + """Parse BMI""" + if v is None: + return None + parsed = HealthDataParser.parse_bmi(v) + if parsed is not None: + is_valid, error = HealthDataValidator.validate_bmi(parsed) + if not is_valid: + raise ValueError(error) + return parsed + + @model_validator(mode='after') + def calculate_bmi_if_missing(self): + """Auto-calculate BMI if weight and height are provided""" + if self.bmi is None and self.weight and self.height: + self.bmi = HealthDataValidator.calculate_bmi(self.weight, self.height) + return self + + +class UserHealthProfile(BaseModel): + """ + User Health Profile with Pydantic validation + """ + + user_id: str + age: Optional[int] = Field(None, ge=13, le=150) + gender: Optional[Gender] = None + weight: Optional[float] = Field(None, ge=20, le=300, description="Weight in kg") + height: Optional[float] = Field(None, ge=50, le=300, description="Height in cm") + bmi: Optional[float] = Field(None, ge=10, le=60) + activity_level: Optional[ActivityLevel] = None + fitness_level: Optional[FitnessLevel] = None + health_conditions: List[str] = Field(default_factory=list) + medications: List[str] = Field(default_factory=list) + allergies: List[str] = Field(default_factory=list) + dietary_restrictions: List[str] = Field(default_factory=list) + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Config: + use_enum_values = True + json_encoders = { + datetime: lambda v: v.isoformat() + } + + @field_validator('height', mode='before') + @classmethod + def parse_height(cls, v): + """Parse height from various formats""" + if v is None: + return None + return HealthDataParser.parse_height(v) + + @field_validator('weight', mode='before') + @classmethod + def parse_weight(cls, v): + """Parse weight from various formats""" + if v is None: + return None + return HealthDataParser.parse_weight(v) + + @field_validator('age', mode='before') + @classmethod + def parse_age(cls, v): + """Parse age from various formats""" + if v is None: + return None + return HealthDataParser.parse_age(v) + + @model_validator(mode='after') + def calculate_bmi_if_missing(self): + """Auto-calculate BMI if weight and height are provided""" + if self.bmi is None and self.weight and self.height: + self.bmi = HealthDataValidator.calculate_bmi(self.weight, self.height) + return self + + def get_bmi_category(self) -> str: + """Get BMI category""" + return HealthDataValidator.get_bmi_category(self.bmi) + + def is_complete(self) -> bool: + """Check if profile has all essential data""" + return all([ + self.age is not None, + self.gender is not None, + self.weight is not None, + self.height is not None + ]) + + def get_missing_fields(self) -> List[str]: + """Get list of missing essential fields""" + missing = [] + if self.age is None: + missing.append('age') + if self.gender is None: + missing.append('gender') + if self.weight is None: + missing.append('weight') + if self.height is None: + missing.append('height') + return missing + + +class NutritionRecord(HealthRecord): + """Nutrition-specific health record""" + + record_type: RecordType = Field(default=RecordType.NUTRITION, frozen=True) + calories: Optional[float] = Field(None, ge=0, le=10000) + protein: Optional[float] = Field(None, ge=0, le=500) + carbs: Optional[float] = Field(None, ge=0, le=1000) + fat: Optional[float] = Field(None, ge=0, le=500) + meal_type: Optional[str] = None # breakfast/lunch/dinner/snack + + +class ExerciseRecord(HealthRecord): + """Exercise-specific health record""" + + record_type: RecordType = Field(default=RecordType.EXERCISE, frozen=True) + exercise_type: Optional[str] = None # cardio/strength/flexibility/sports + duration_minutes: Optional[int] = Field(None, ge=0, le=600) + intensity: Optional[str] = None # low/medium/high + calories_burned: Optional[float] = Field(None, ge=0, le=5000) + + +class SymptomRecord(HealthRecord): + """Symptom-specific health record""" + + record_type: RecordType = Field(default=RecordType.SYMPTOM, frozen=True) + symptoms: List[str] = Field(default_factory=list) + severity: Optional[int] = Field(None, ge=1, le=10) + duration_days: Optional[int] = Field(None, ge=0, le=365) + body_part: Optional[str] = None + + +class MentalHealthRecord(HealthRecord): + """Mental health-specific health record""" + + record_type: RecordType = Field(default=RecordType.MENTAL_HEALTH, frozen=True) + mood: Optional[str] = None + stress_level: Optional[int] = Field(None, ge=1, le=10) + sleep_hours: Optional[float] = Field(None, ge=0, le=24) + sleep_quality: Optional[int] = Field(None, ge=1, le=10) diff --git a/health_data/record_merger.py b/health_data/record_merger.py new file mode 100644 index 0000000000000000000000000000000000000000..345a263c3234f22ef4e0ec67428b8b8e447af6bc --- /dev/null +++ b/health_data/record_merger.py @@ -0,0 +1,350 @@ +""" +Health Record Merger +Merge and aggregate health records from multiple days +""" + +from datetime import datetime, timedelta +from typing import List, Dict, Any, Optional +from collections import defaultdict + +from .pydantic_models import ( + HealthRecord, NutritionRecord, ExerciseRecord, + SymptomRecord, MentalHealthRecord, RecordType +) + + +class HealthRecordMerger: + """Merge and aggregate health records""" + + @staticmethod + def merge_records( + records: List[HealthRecord], + merge_strategy: str = 'latest' + ) -> Dict[str, Any]: + """ + Merge multiple health records into aggregated data + + Args: + records: List of health records to merge + merge_strategy: 'latest', 'average', 'all' + + Returns: + Merged data dictionary + """ + if not records: + return {} + + # Group records by type + records_by_type = defaultdict(list) + for record in records: + records_by_type[record.record_type].append(record) + + merged = { + 'total_records': len(records), + 'date_range': { + 'start': min(r.timestamp for r in records).isoformat(), + 'end': max(r.timestamp for r in records).isoformat() + }, + 'by_type': {} + } + + # Merge each type + for record_type, type_records in records_by_type.items(): + if record_type == RecordType.NUTRITION: + merged['by_type']['nutrition'] = HealthRecordMerger._merge_nutrition_records( + type_records, merge_strategy + ) + elif record_type == RecordType.EXERCISE: + merged['by_type']['exercise'] = HealthRecordMerger._merge_exercise_records( + type_records, merge_strategy + ) + elif record_type == RecordType.SYMPTOM: + merged['by_type']['symptom'] = HealthRecordMerger._merge_symptom_records( + type_records, merge_strategy + ) + elif record_type == RecordType.MENTAL_HEALTH: + merged['by_type']['mental_health'] = HealthRecordMerger._merge_mental_health_records( + type_records, merge_strategy + ) + + # Extract common health metrics + merged['health_metrics'] = HealthRecordMerger._extract_health_metrics( + records, merge_strategy + ) + + return merged + + @staticmethod + def _merge_nutrition_records( + records: List[HealthRecord], + strategy: str + ) -> Dict[str, Any]: + """Merge nutrition records""" + if strategy == 'latest': + latest = max(records, key=lambda r: r.timestamp) + return { + 'latest_record': latest.model_dump(), + 'total_records': len(records) + } + + elif strategy == 'average': + # Calculate averages + total_calories = sum(r.data.get('calories', 0) for r in records if r.data.get('calories')) + total_protein = sum(r.data.get('protein', 0) for r in records if r.data.get('protein')) + total_carbs = sum(r.data.get('carbs', 0) for r in records if r.data.get('carbs')) + total_fat = sum(r.data.get('fat', 0) for r in records if r.data.get('fat')) + + count = len(records) + + return { + 'average_daily': { + 'calories': round(total_calories / count, 1) if count > 0 else 0, + 'protein': round(total_protein / count, 1) if count > 0 else 0, + 'carbs': round(total_carbs / count, 1) if count > 0 else 0, + 'fat': round(total_fat / count, 1) if count > 0 else 0 + }, + 'total': { + 'calories': round(total_calories, 1), + 'protein': round(total_protein, 1), + 'carbs': round(total_carbs, 1), + 'fat': round(total_fat, 1) + }, + 'total_records': count + } + + else: # 'all' + return { + 'all_records': [r.model_dump() for r in records], + 'total_records': len(records) + } + + @staticmethod + def _merge_exercise_records( + records: List[HealthRecord], + strategy: str + ) -> Dict[str, Any]: + """Merge exercise records""" + if strategy == 'latest': + latest = max(records, key=lambda r: r.timestamp) + return { + 'latest_record': latest.model_dump(), + 'total_records': len(records) + } + + elif strategy == 'average': + total_duration = sum(r.data.get('duration_minutes', 0) for r in records) + total_calories = sum(r.data.get('calories_burned', 0) for r in records) + + # Count by exercise type + exercise_types = defaultdict(int) + for r in records: + ex_type = r.data.get('exercise_type', 'unknown') + exercise_types[ex_type] += 1 + + count = len(records) + + return { + 'total_workouts': count, + 'total_duration_minutes': total_duration, + 'total_calories_burned': round(total_calories, 1), + 'average_duration': round(total_duration / count, 1) if count > 0 else 0, + 'average_calories': round(total_calories / count, 1) if count > 0 else 0, + 'exercise_types': dict(exercise_types) + } + + else: # 'all' + return { + 'all_records': [r.model_dump() for r in records], + 'total_records': len(records) + } + + @staticmethod + def _merge_symptom_records( + records: List[HealthRecord], + strategy: str + ) -> Dict[str, Any]: + """Merge symptom records""" + if strategy == 'latest': + latest = max(records, key=lambda r: r.timestamp) + return { + 'latest_record': latest.model_dump(), + 'total_records': len(records) + } + + # Collect all symptoms + all_symptoms = [] + symptom_counts = defaultdict(int) + + for r in records: + symptoms = r.data.get('symptoms', []) + if isinstance(symptoms, list): + all_symptoms.extend(symptoms) + for symptom in symptoms: + symptom_counts[symptom] += 1 + + return { + 'total_reports': len(records), + 'unique_symptoms': len(set(all_symptoms)), + 'most_common_symptoms': sorted( + symptom_counts.items(), + key=lambda x: x[1], + reverse=True + )[:5], + 'all_symptoms': list(set(all_symptoms)) + } + + @staticmethod + def _merge_mental_health_records( + records: List[HealthRecord], + strategy: str + ) -> Dict[str, Any]: + """Merge mental health records""" + if strategy == 'latest': + latest = max(records, key=lambda r: r.timestamp) + return { + 'latest_record': latest.model_dump(), + 'total_records': len(records) + } + + # Calculate averages + stress_levels = [r.data.get('stress_level') for r in records if r.data.get('stress_level')] + sleep_hours = [r.data.get('sleep_hours') for r in records if r.data.get('sleep_hours')] + sleep_quality = [r.data.get('sleep_quality') for r in records if r.data.get('sleep_quality')] + + return { + 'total_records': len(records), + 'average_stress_level': round(sum(stress_levels) / len(stress_levels), 1) if stress_levels else None, + 'average_sleep_hours': round(sum(sleep_hours) / len(sleep_hours), 1) if sleep_hours else None, + 'average_sleep_quality': round(sum(sleep_quality) / len(sleep_quality), 1) if sleep_quality else None, + 'stress_trend': 'improving' if len(stress_levels) >= 2 and stress_levels[-1] < stress_levels[0] else 'stable' + } + + @staticmethod + def _extract_health_metrics( + records: List[HealthRecord], + strategy: str + ) -> Dict[str, Any]: + """Extract common health metrics from records""" + weights = [r.weight for r in records if r.weight] + heights = [r.height for r in records if r.height] + bmis = [r.bmi for r in records if r.bmi] + + metrics = {} + + if weights: + metrics['weight'] = { + 'latest': weights[-1], + 'average': round(sum(weights) / len(weights), 1), + 'min': min(weights), + 'max': max(weights), + 'change': round(weights[-1] - weights[0], 1) if len(weights) >= 2 else 0 + } + + if heights: + metrics['height'] = { + 'latest': heights[-1], + 'average': round(sum(heights) / len(heights), 1) + } + + if bmis: + metrics['bmi'] = { + 'latest': bmis[-1], + 'average': round(sum(bmis) / len(bmis), 1), + 'change': round(bmis[-1] - bmis[0], 1) if len(bmis) >= 2 else 0 + } + + return metrics + + @staticmethod + def merge_by_date_range( + records: List[HealthRecord], + start_date: datetime, + end_date: datetime, + merge_strategy: str = 'average' + ) -> Dict[str, Any]: + """ + Merge records within a specific date range + + Args: + records: All health records + start_date: Start of date range + end_date: End of date range + merge_strategy: How to merge data + + Returns: + Merged data for the date range + """ + # Filter records by date range + filtered = [ + r for r in records + if start_date <= r.timestamp <= end_date + ] + + return HealthRecordMerger.merge_records(filtered, merge_strategy) + + @staticmethod + def get_weekly_summary( + records: List[HealthRecord], + weeks_back: int = 1 + ) -> Dict[str, Any]: + """ + Get weekly summary of health records + + Args: + records: All health records + weeks_back: Number of weeks to look back + + Returns: + Weekly summary + """ + end_date = datetime.now() + start_date = end_date - timedelta(weeks=weeks_back) + + return HealthRecordMerger.merge_by_date_range( + records, + start_date, + end_date, + merge_strategy='average' + ) + + @staticmethod + def get_monthly_summary( + records: List[HealthRecord], + months_back: int = 1 + ) -> Dict[str, Any]: + """ + Get monthly summary of health records + + Args: + records: All health records + months_back: Number of months to look back + + Returns: + Monthly summary + """ + end_date = datetime.now() + start_date = end_date - timedelta(days=30 * months_back) + + return HealthRecordMerger.merge_by_date_range( + records, + start_date, + end_date, + merge_strategy='average' + ) + + +def merge_records( + records: List[HealthRecord], + strategy: str = 'latest' +) -> Dict[str, Any]: + """ + Convenience function to merge health records + + Args: + records: List of health records + strategy: 'latest', 'average', or 'all' + + Returns: + Merged data dictionary + """ + return HealthRecordMerger.merge_records(records, strategy) diff --git a/health_data/validators.py b/health_data/validators.py new file mode 100644 index 0000000000000000000000000000000000000000..130cb65f7876a8c92976ad013fe6a28e21954e76 --- /dev/null +++ b/health_data/validators.py @@ -0,0 +1,303 @@ +""" +Health Data Validators +Smart parsing and validation for health metrics with multiple input formats +""" + +import re +from typing import Optional, Union, Tuple + + +class HealthDataParser: + """Parse health data from various input formats""" + + @staticmethod + def parse_height(value: Union[str, int, float]) -> Optional[float]: + """ + Parse height from various formats to cm + + Supports: + - 1.78m, 1.78 m → 178 cm + - 178cm, 178 cm → 178 cm + - 1,78m (comma) → 178 cm + - 178 → 178 cm + - 5'10" → 177.8 cm (feet/inches) + + Args: + value: Height in various formats + + Returns: + Height in cm or None if invalid + """ + if value is None: + return None + + # Convert to string and normalize + value_str = str(value).strip().lower().replace(',', '.') + + # Remove spaces + value_str = value_str.replace(' ', '') + + # Pattern 1: Meters (1.78m, 1.78) + meter_match = re.match(r'^(\d+\.?\d*)m?$', value_str) + if meter_match: + meters = float(meter_match.group(1)) + # If value is between 0.5 and 3.0, assume it's in meters + if 0.5 <= meters <= 3.0: + return round(meters * 100, 1) + # If value is > 50, assume it's already in cm + elif meters >= 50: + return round(meters, 1) + + # Pattern 2: Centimeters (178cm, 178) + cm_match = re.match(r'^(\d+\.?\d*)cm?$', value_str) + if cm_match: + cm = float(cm_match.group(1)) + if 50 <= cm <= 300: + return round(cm, 1) + + # Pattern 3: Feet and inches (5'10", 5ft10in) + feet_match = re.match(r'^(\d+)[\'ft](\d+)[\"in]?$', value_str) + if feet_match: + feet = int(feet_match.group(1)) + inches = int(feet_match.group(2)) + total_inches = feet * 12 + inches + cm = total_inches * 2.54 + return round(cm, 1) + + # Try direct float conversion + try: + num = float(value_str) + # If between 0.5 and 3.0, assume meters + if 0.5 <= num <= 3.0: + return round(num * 100, 1) + # If between 50 and 300, assume cm + elif 50 <= num <= 300: + return round(num, 1) + except ValueError: + pass + + return None + + @staticmethod + def parse_weight(value: Union[str, int, float]) -> Optional[float]: + """ + Parse weight from various formats to kg + + Supports: + - 70kg, 70 kg → 70 kg + - 70, 70.5 → 70 kg + - 154lbs, 154 lbs → 69.9 kg + - 11st 2lb → 70.8 kg (stones) + + Args: + value: Weight in various formats + + Returns: + Weight in kg or None if invalid + """ + if value is None: + return None + + value_str = str(value).strip().lower().replace(',', '.') + value_str = value_str.replace(' ', '') + + # Pattern 1: Kilograms (70kg, 70) + kg_match = re.match(r'^(\d+\.?\d*)kg?$', value_str) + if kg_match: + kg = float(kg_match.group(1)) + if 20 <= kg <= 300: + return round(kg, 1) + + # Pattern 2: Pounds (154lbs, 154lb) + lbs_match = re.match(r'^(\d+\.?\d*)lbs?$', value_str) + if lbs_match: + lbs = float(lbs_match.group(1)) + kg = lbs * 0.453592 + if 20 <= kg <= 300: + return round(kg, 1) + + # Pattern 3: Stones (11st, 11stone) + stone_match = re.match(r'^(\d+)st(?:one)?(\d+)?lbs?$', value_str) + if stone_match: + stones = int(stone_match.group(1)) + lbs = int(stone_match.group(2)) if stone_match.group(2) else 0 + total_lbs = stones * 14 + lbs + kg = total_lbs * 0.453592 + return round(kg, 1) + + # Try direct float conversion + try: + num = float(value_str) + if 20 <= num <= 300: + return round(num, 1) + except ValueError: + pass + + return None + + @staticmethod + def parse_age(value: Union[str, int, float]) -> Optional[int]: + """ + Parse age from various formats + + Supports: + - 25, "25" → 25 + - "25 tuổi", "25 years old" → 25 + + Args: + value: Age in various formats + + Returns: + Age as integer or None if invalid + """ + if value is None: + return None + + value_str = str(value).strip().lower() + + # Extract number from string + age_match = re.search(r'(\d+)', value_str) + if age_match: + age = int(age_match.group(1)) + if 0 <= age <= 150: + return age + + return None + + @staticmethod + def parse_bmi(value: Union[str, int, float]) -> Optional[float]: + """Parse BMI value""" + if value is None: + return None + + try: + bmi = float(str(value).strip()) + if 10 <= bmi <= 60: + return round(bmi, 1) + except ValueError: + pass + + return None + + +class HealthDataValidator: + """Validate health data for abnormal values""" + + @staticmethod + def validate_height(height: float) -> Tuple[bool, Optional[str]]: + """ + Validate height in cm + + Returns: + (is_valid, error_message) + """ + if height is None: + return True, None + + if height < 50: + return False, "Chiều cao quá thấp (< 50cm). Vui lòng kiểm tra lại." + + if height > 300: + return False, "Chiều cao quá cao (> 300cm). Vui lòng kiểm tra lại." + + if height < 100: + return False, f"Chiều cao {height}cm có vẻ không đúng. Bạn có muốn nhập {height*100}cm không?" + + return True, None + + @staticmethod + def validate_weight(weight: float) -> Tuple[bool, Optional[str]]: + """ + Validate weight in kg + + Returns: + (is_valid, error_message) + """ + if weight is None: + return True, None + + if weight < 20: + return False, "Cân nặng quá nhẹ (< 20kg). Vui lòng kiểm tra lại." + + if weight > 300: + return False, "Cân nặng quá nặng (> 300kg). Vui lòng kiểm tra lại." + + return True, None + + @staticmethod + def validate_age(age: int) -> Tuple[bool, Optional[str]]: + """ + Validate age + + Returns: + (is_valid, error_message) + """ + if age is None: + return True, None + + if age < 0: + return False, "Tuổi không thể âm." + + if age > 150: + return False, "Tuổi quá cao (> 150). Vui lòng kiểm tra lại." + + if age < 13: + return False, "Hệ thống chỉ hỗ trợ người từ 13 tuổi trở lên." + + return True, None + + @staticmethod + def validate_bmi(bmi: float) -> Tuple[bool, Optional[str]]: + """ + Validate BMI + + Returns: + (is_valid, error_message) + """ + if bmi is None: + return True, None + + if bmi < 10: + return False, "BMI quá thấp (< 10). Vui lòng kiểm tra lại." + + if bmi > 60: + return False, "BMI quá cao (> 60). Vui lòng kiểm tra lại." + + return True, None + + @staticmethod + def calculate_bmi(weight: Optional[float], height: Optional[float]) -> Optional[float]: + """ + Calculate BMI from weight (kg) and height (cm) + + Returns: + BMI or None if data is missing + """ + if weight is None or height is None: + return None + + if height <= 0 or weight <= 0: + return None + + # Convert height from cm to meters + height_m = height / 100 + bmi = weight / (height_m ** 2) + + return round(bmi, 1) + + @staticmethod + def get_bmi_category(bmi: Optional[float]) -> str: + """Get BMI category (Vietnamese)""" + if bmi is None: + return "Chưa xác định" + + if bmi < 18.5: + return "Thiếu cân" + elif bmi < 23: # Asian BMI standards + return "Bình thường" + elif bmi < 25: + return "Thừa cân nhẹ" + elif bmi < 30: + return "Thừa cân" + else: + return "Béo phì" diff --git a/i18n/__init__.py b/i18n/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..81650daca7499c84a8495086b773c0ff06dc48ea --- /dev/null +++ b/i18n/__init__.py @@ -0,0 +1,18 @@ +""" +Internationalization (i18n) Module +Multi-language support for Vietnamese and English +""" + +from .language_detector import Language, LanguageDetector, detect_language +from .translations import Translations, t +from .multilingual_handler import MultilingualHandler, get_multilingual_handler + +__all__ = [ + 'Language', + 'LanguageDetector', + 'detect_language', + 'Translations', + 't', + 'MultilingualHandler', + 'get_multilingual_handler' +] diff --git a/i18n/language_detector.py b/i18n/language_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..141a6f8f69815c10e2b087c04f802c606d01a392 --- /dev/null +++ b/i18n/language_detector.py @@ -0,0 +1,125 @@ +""" +Language Detector +Automatically detect user's language from their message +""" + +import re +from typing import Optional, Tuple +from enum import Enum + + +class Language(str, Enum): + """Supported languages""" + VIETNAMESE = "vi" + ENGLISH = "en" + + +class LanguageDetector: + """Detect language from user input""" + + # Vietnamese-specific characters + VIETNAMESE_CHARS = set('àáảãạăằắẳẵặâầấẩẫậèéẻẽẹêềếểễệìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵđ') + + # Common Vietnamese words + VIETNAMESE_WORDS = { + 'tôi', 'bạn', 'của', 'và', 'có', 'là', 'được', 'không', 'này', 'cho', + 'với', 'đã', 'sẽ', 'để', 'trong', 'một', 'những', 'các', 'như', 'khi', + 'muốn', 'cần', 'nên', 'thì', 'hay', 'hoặc', 'nhưng', 'mà', 'vì', 'nếu', + 'giúp', 'giảm', 'tăng', 'ăn', 'uống', 'tập', 'làm', 'biết', 'hỏi', 'nói', + 'cảm', 'thấy', 'đau', 'khỏe', 'bệnh', 'thuốc', 'bác', 'sĩ', 'viện' + } + + # Common English words + ENGLISH_WORDS = { + 'i', 'you', 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'have', 'has', + 'do', 'does', 'can', 'will', 'would', 'should', 'could', 'my', 'your', + 'want', 'need', 'help', 'how', 'what', 'when', 'where', 'why', 'who', + 'eat', 'drink', 'exercise', 'weight', 'health', 'doctor', 'pain', 'feel' + } + + @staticmethod + def detect(text: str) -> Language: + """ + Detect language from text + + Args: + text: Input text + + Returns: + Detected language (vi or en) + """ + if not text or len(text.strip()) < 2: + return Language.VIETNAMESE # Default + + text_lower = text.lower() + + # Check for Vietnamese characters + has_vietnamese_chars = any(char in LanguageDetector.VIETNAMESE_CHARS for char in text_lower) + + if has_vietnamese_chars: + return Language.VIETNAMESE + + # Check for Vietnamese words + words = re.findall(r'\b\w+\b', text_lower) + vietnamese_word_count = sum(1 for word in words if word in LanguageDetector.VIETNAMESE_WORDS) + english_word_count = sum(1 for word in words if word in LanguageDetector.ENGLISH_WORDS) + + # If more Vietnamese words, it's Vietnamese + if vietnamese_word_count > english_word_count: + return Language.VIETNAMESE + + # If more English words, it's English + if english_word_count > vietnamese_word_count: + return Language.ENGLISH + + # Default to Vietnamese + return Language.VIETNAMESE + + @staticmethod + def detect_with_confidence(text: str) -> Tuple[Language, float]: + """ + Detect language with confidence score + + Args: + text: Input text + + Returns: + (language, confidence_score) + """ + if not text or len(text.strip()) < 2: + return Language.VIETNAMESE, 0.5 + + text_lower = text.lower() + + # Count Vietnamese characters + vietnamese_char_count = sum(1 for char in text_lower if char in LanguageDetector.VIETNAMESE_CHARS) + total_chars = len([c for c in text_lower if c.isalpha()]) + + if vietnamese_char_count > 0 and total_chars > 0: + confidence = min(vietnamese_char_count / total_chars * 2, 1.0) + return Language.VIETNAMESE, confidence + + # Count words + words = re.findall(r'\b\w+\b', text_lower) + if not words: + return Language.VIETNAMESE, 0.5 + + vietnamese_word_count = sum(1 for word in words if word in LanguageDetector.VIETNAMESE_WORDS) + english_word_count = sum(1 for word in words if word in LanguageDetector.ENGLISH_WORDS) + + total_matched = vietnamese_word_count + english_word_count + + if total_matched == 0: + return Language.VIETNAMESE, 0.5 + + if vietnamese_word_count > english_word_count: + confidence = vietnamese_word_count / total_matched + return Language.VIETNAMESE, confidence + else: + confidence = english_word_count / total_matched + return Language.ENGLISH, confidence + + +def detect_language(text: str) -> Language: + """Convenience function to detect language""" + return LanguageDetector.detect(text) diff --git a/i18n/multilingual_handler.py b/i18n/multilingual_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..fc2ca149be7003f30002e2057ccab9bbb84c7624 --- /dev/null +++ b/i18n/multilingual_handler.py @@ -0,0 +1,147 @@ +""" +Multilingual Handler +Handles language detection and response translation +""" + +from typing import Optional, Dict, Any +from .language_detector import Language, LanguageDetector +from .translations import Translations, t + + +class MultilingualHandler: + """Handle multilingual conversations""" + + def __init__(self): + self.detector = LanguageDetector() + self.user_languages = {} # user_id -> preferred language + + def detect_and_set_language(self, user_id: str, message: str) -> Language: + """ + Detect language from message and set user preference + + Args: + user_id: User identifier + message: User's message + + Returns: + Detected language + """ + language = self.detector.detect(message) + self.user_languages[user_id] = language + return language + + def get_user_language(self, user_id: str) -> Language: + """ + Get user's preferred language + + Args: + user_id: User identifier + + Returns: + User's language (default: Vietnamese) + """ + return self.user_languages.get(user_id, Language.VIETNAMESE) + + def set_user_language(self, user_id: str, language: Language) -> None: + """ + Manually set user's language + + Args: + user_id: User identifier + language: Preferred language + """ + self.user_languages[user_id] = language + + def get_agent_system_prompt(self, agent_type: str, language: Language) -> str: + """ + Get agent system prompt in user's language + + Args: + agent_type: Type of agent + language: Target language + + Returns: + System prompt + """ + return Translations.get_agent_prompt(agent_type, language) + + def translate_message(self, key: str, language: Language, **kwargs) -> str: + """ + Translate a message key + + Args: + key: Translation key + language: Target language + **kwargs: Format arguments + + Returns: + Translated message + """ + return t(key, language, **kwargs) + + def format_response_with_language( + self, + response: str, + language: Language, + add_language_note: bool = False + ) -> str: + """ + Format response with language-specific additions + + Args: + response: Original response + language: Target language + add_language_note: Add note about language detection + + Returns: + Formatted response + """ + if add_language_note: + if language == Language.ENGLISH: + note = "\n\n_Note: I detected you're using English. You can switch to Vietnamese anytime._" + else: + note = "\n\n_Lưu ý: Tôi phát hiện bạn đang dùng tiếng Việt. Bạn có thể chuyển sang tiếng Anh bất cứ lúc nào._" + + return response + note + + return response + + def get_language_stats(self) -> Dict[str, Any]: + """ + Get statistics about language usage + + Returns: + Language statistics + """ + total_users = len(self.user_languages) + + if total_users == 0: + return { + 'total_users': 0, + 'vietnamese_users': 0, + 'english_users': 0, + 'vietnamese_percentage': 0.0, + 'english_percentage': 0.0 + } + + vietnamese_count = sum(1 for lang in self.user_languages.values() if lang == Language.VIETNAMESE) + english_count = sum(1 for lang in self.user_languages.values() if lang == Language.ENGLISH) + + return { + 'total_users': total_users, + 'vietnamese_users': vietnamese_count, + 'english_users': english_count, + 'vietnamese_percentage': round(vietnamese_count / total_users * 100, 1), + 'english_percentage': round(english_count / total_users * 100, 1) + } + + +# Global instance +_multilingual_handler = None + +def get_multilingual_handler() -> MultilingualHandler: + """Get global multilingual handler instance""" + global _multilingual_handler + if _multilingual_handler is None: + _multilingual_handler = MultilingualHandler() + return _multilingual_handler diff --git a/i18n/translations.py b/i18n/translations.py new file mode 100644 index 0000000000000000000000000000000000000000..1586664fbebc86a9607b876daef52f447619ce6c --- /dev/null +++ b/i18n/translations.py @@ -0,0 +1,262 @@ +""" +Translation System +Provides translations for common messages and UI elements +""" + +from typing import Dict, Any +from .language_detector import Language + + +class Translations: + """Translation database for Vietnamese and English""" + + # Common messages + MESSAGES = { + # Greetings + 'greeting': { + Language.VIETNAMESE: 'Xin chào! Tôi là trợ lý sức khỏe của bạn. Tôi có thể giúp gì cho bạn?', + Language.ENGLISH: 'Hello! I am your health assistant. How can I help you?' + }, + 'how_can_help': { + Language.VIETNAMESE: 'Tôi có thể giúp gì cho bạn?', + Language.ENGLISH: 'How can I help you?' + }, + + # Profile questions + 'ask_age': { + Language.VIETNAMESE: 'Bạn bao nhiêu tuổi?', + Language.ENGLISH: 'How old are you?' + }, + 'ask_gender': { + Language.VIETNAMESE: 'Bạn là nam hay nữ?', + Language.ENGLISH: 'What is your gender?' + }, + 'ask_weight': { + Language.VIETNAMESE: 'Cân nặng của bạn là bao nhiêu? (kg)', + Language.ENGLISH: 'What is your weight? (kg)' + }, + 'ask_height': { + Language.VIETNAMESE: 'Chiều cao của bạn là bao nhiêu? (cm)', + Language.ENGLISH: 'What is your height? (cm)' + }, + + # BMI categories + 'bmi_underweight': { + Language.VIETNAMESE: 'Thiếu cân', + Language.ENGLISH: 'Underweight' + }, + 'bmi_normal': { + Language.VIETNAMESE: 'Bình thường', + Language.ENGLISH: 'Normal' + }, + 'bmi_overweight': { + Language.VIETNAMESE: 'Thừa cân', + Language.ENGLISH: 'Overweight' + }, + 'bmi_obese': { + Language.VIETNAMESE: 'Béo phì', + Language.ENGLISH: 'Obese' + }, + + # Feedback + 'rate_response': { + Language.VIETNAMESE: 'Đánh giá câu trả lời này', + Language.ENGLISH: 'Rate this response' + }, + 'thank_you_feedback': { + Language.VIETNAMESE: 'Cảm ơn phản hồi của bạn!', + Language.ENGLISH: 'Thank you for your feedback!' + }, + + # Errors + 'error_occurred': { + Language.VIETNAMESE: 'Xin lỗi, đã có lỗi xảy ra. Vui lòng thử lại.', + Language.ENGLISH: 'Sorry, an error occurred. Please try again.' + }, + 'invalid_input': { + Language.VIETNAMESE: 'Dữ liệu không hợp lệ. Vui lòng kiểm tra lại.', + Language.ENGLISH: 'Invalid input. Please check again.' + }, + + # Nutrition + 'nutrition_advice': { + Language.VIETNAMESE: 'Tư vấn dinh dưỡng', + Language.ENGLISH: 'Nutrition advice' + }, + 'meal_plan': { + Language.VIETNAMESE: 'Thực đơn', + Language.ENGLISH: 'Meal plan' + }, + 'calories': { + Language.VIETNAMESE: 'Calo', + Language.ENGLISH: 'Calories' + }, + 'protein': { + Language.VIETNAMESE: 'Protein', + Language.ENGLISH: 'Protein' + }, + + # Exercise + 'exercise_plan': { + Language.VIETNAMESE: 'Kế hoạch tập luyện', + Language.ENGLISH: 'Exercise plan' + }, + 'workout': { + Language.VIETNAMESE: 'Bài tập', + Language.ENGLISH: 'Workout' + }, + 'duration': { + Language.VIETNAMESE: 'Thời gian', + Language.ENGLISH: 'Duration' + }, + + # Symptoms + 'symptoms': { + Language.VIETNAMESE: 'Triệu chứng', + Language.ENGLISH: 'Symptoms' + }, + 'severity': { + Language.VIETNAMESE: 'Mức độ', + Language.ENGLISH: 'Severity' + }, + 'seek_medical_attention': { + Language.VIETNAMESE: 'Bạn nên đến bệnh viện ngay', + Language.ENGLISH: 'You should seek medical attention immediately' + }, + + # Mental health + 'mental_health': { + Language.VIETNAMESE: 'Sức khỏe tinh thần', + Language.ENGLISH: 'Mental health' + }, + 'stress_level': { + Language.VIETNAMESE: 'Mức độ căng thẳng', + Language.ENGLISH: 'Stress level' + }, + 'mood': { + Language.VIETNAMESE: 'Tâm trạng', + Language.ENGLISH: 'Mood' + }, + + # General + 'yes': { + Language.VIETNAMESE: 'Có', + Language.ENGLISH: 'Yes' + }, + 'no': { + Language.VIETNAMESE: 'Không', + Language.ENGLISH: 'No' + }, + 'save': { + Language.VIETNAMESE: 'Lưu', + Language.ENGLISH: 'Save' + }, + 'cancel': { + Language.VIETNAMESE: 'Hủy', + Language.ENGLISH: 'Cancel' + }, + 'continue': { + Language.VIETNAMESE: 'Tiếp tục', + Language.ENGLISH: 'Continue' + }, + 'back': { + Language.VIETNAMESE: 'Quay lại', + Language.ENGLISH: 'Back' + } + } + + # Agent prompts + AGENT_PROMPTS = { + 'nutrition_system': { + Language.VIETNAMESE: """Bạn là chuyên gia dinh dưỡng chuyên nghiệp, thân thiện và am hiểu về ẩm thực Việt Nam. +Nhiệm vụ: Tư vấn dinh dưỡng, lập thực đơn, tính toán calo phù hợp với người Việt. +Phong cách: Thân thiện, dễ hiểu, khoa học, thực tế.""", + Language.ENGLISH: """You are a professional, friendly nutritionist with expertise in nutrition science. +Task: Provide nutrition advice, meal planning, and calorie calculations. +Style: Friendly, clear, scientific, practical.""" + }, + 'exercise_system': { + Language.VIETNAMESE: """Bạn là huấn luyện viên thể hình chuyên nghiệp, nhiệt tình và có kinh nghiệm. +Nhiệm vụ: Tư vấn tập luyện, lập kế hoạch tập phù hợp với mọi trình độ. +Phong cách: Động viên, rõ ràng, an toàn, hiệu quả.""", + Language.ENGLISH: """You are a professional, enthusiastic fitness trainer with extensive experience. +Task: Provide exercise advice and create workout plans for all fitness levels. +Style: Motivating, clear, safe, effective.""" + }, + 'symptom_system': { + Language.VIETNAMESE: """Bạn là trợ lý y tế thông minh, cẩn thận và có trách nhiệm. +Nhiệm vụ: Đánh giá triệu chứng, đưa ra khuyến nghị sơ bộ, hướng dẫn khi nào cần gặp bác sĩ. +Phong cách: Cẩn thận, rõ ràng, không chẩn đoán, luôn khuyến khích gặp bác sĩ khi cần.""", + Language.ENGLISH: """You are an intelligent, careful, and responsible medical assistant. +Task: Assess symptoms, provide preliminary recommendations, guide when to see a doctor. +Style: Careful, clear, non-diagnostic, always encourage seeing a doctor when needed.""" + }, + 'mental_health_system': { + Language.VIETNAMESE: """Bạn là chuyên gia tâm lý thấu hiểu, ấm áp và hỗ trợ tích cực. +Nhiệm vụ: Lắng nghe, hỗ trợ tinh thần, đưa ra lời khuyên về sức khỏe tâm lý. +Phong cách: Thấu hiểu, ấm áp, không phán xét, khuyến khích tích cực.""", + Language.ENGLISH: """You are an understanding, warm, and supportive mental health expert. +Task: Listen, provide emotional support, offer mental health advice. +Style: Understanding, warm, non-judgmental, encouraging.""" + } + } + + @staticmethod + def get(key: str, language: Language, **kwargs) -> str: + """ + Get translation for a key + + Args: + key: Translation key + language: Target language + **kwargs: Format arguments + + Returns: + Translated text + """ + if key not in Translations.MESSAGES: + return key + + text = Translations.MESSAGES[key].get(language, Translations.MESSAGES[key][Language.VIETNAMESE]) + + # Format if kwargs provided + if kwargs: + try: + text = text.format(**kwargs) + except KeyError: + pass + + return text + + @staticmethod + def get_agent_prompt(agent_type: str, language: Language) -> str: + """ + Get agent system prompt in specified language + + Args: + agent_type: Type of agent (nutrition, exercise, etc.) + language: Target language + + Returns: + System prompt + """ + key = f"{agent_type}_system" + if key not in Translations.AGENT_PROMPTS: + return "" + + return Translations.AGENT_PROMPTS[key].get(language, Translations.AGENT_PROMPTS[key][Language.VIETNAMESE]) + + +def t(key: str, language: Language = Language.VIETNAMESE, **kwargs) -> str: + """ + Convenience function for translation + + Args: + key: Translation key + language: Target language + **kwargs: Format arguments + + Returns: + Translated text + """ + return Translations.get(key, language, **kwargs) diff --git a/modules/AGENTS_VS_MODULES.md b/modules/AGENTS_VS_MODULES.md new file mode 100644 index 0000000000000000000000000000000000000000..c0963922ef342396b18dcc493b5bbf1096278e10 --- /dev/null +++ b/modules/AGENTS_VS_MODULES.md @@ -0,0 +1,373 @@ +# Agents vs Modules - Hiểu Sự Khác Biệt 🎯 + +## TL;DR + +``` +agents/ = "Não" (Conversation + Orchestration) +modules/ = "Tay chân" (Calculations + Business Logic) +``` + +--- + +## 📊 So Sánh Nhanh + +| Aspect | `agents/` | `modules/` | +|--------|-----------|------------| +| **Mục đích** | Nói chuyện với user | Tính toán, xử lý data | +| **Công nghệ** | OpenAI LLM | Pure Python | +| **Input** | Natural language | Structured data (dict, list) | +| **Output** | Conversational text | Data (numbers, dict, list) | +| **Ví dụ** | "Bạn bao nhiêu tuổi?" | `calculate_bmi(70, 1.75)` | +| **Test** | Integration test | Unit test | + +--- + +## 🧠 `agents/` - Conversation Layer + +### Mục Đích +Hiểu user muốn gì, hỏi thông tin, format response đẹp + +### Khi Nào Dùng +- ✅ Cần hỏi user thêm thông tin +- ✅ Cần hiểu ngữ cảnh (context) +- ✅ Cần format response theo cách dễ đọc +- ✅ Cần handle conversation flow + +### Ví Dụ + +```python +# agents/nutrition_agent.py + +class NutritionAgent: + def handle(self, parameters, chat_history): + # 1. Hiểu user muốn gì + user_query = parameters['user_query'] + + # 2. Extract data từ conversation + user_data = self._extract_from_history(chat_history) + + # 3. Check xem đủ data chưa + if not user_data.get('weight'): + return "Bạn nặng bao nhiêu kg?" # ← Hỏi user + + # 4. Gọi module để tính toán + bmi = self.advisor.calculate_bmi( + user_data['weight'], + user_data['height'] + ) + + # 5. Format response đẹp + return f"""📊 **Phân tích BMI** + +BMI của bạn: {bmi} +Đánh giá: Bình thường + +Bạn có muốn tư vấn thêm về chế độ ăn không? 😊""" +``` + +**Agents làm gì:** +- 🗣️ Nói chuyện với user +- 🎯 Hiểu intent +- 📝 Hỏi thông tin còn thiếu +- 🎨 Format response đẹp +- 🔗 Gọi modules khi cần tính toán + +--- + +## 🔧 `modules/` - Business Logic Layer + +### Mục Đích +Tính toán, xử lý data, business rules - KHÔNG nói chuyện + +### Khi Nào Dùng +- ✅ Cần tính toán (BMI, calories, etc.) +- ✅ Cần generate data (meal plan, workout plan) +- ✅ Cần validate data +- ✅ Cần apply business rules + +### Ví Dụ + +```python +# modules/nutrition.py + +class NutritionAdvisor: + def calculate_bmi(self, weight, height): + """ + Calculate BMI + + Args: + weight (float): Weight in kg + height (float): Height in meters + + Returns: + float: BMI value + """ + if height <= 0: + raise ValueError("Height must be positive") + + return round(weight / (height ** 2), 1) + + def calculate_calories(self, user_data): + """Calculate daily calorie needs""" + # Complex calculation + bmr = self._calculate_bmr(user_data) + activity_factor = self._get_activity_factor(user_data['activity_level']) + + return bmr * activity_factor + + def generate_meal_plan(self, user_data): + """Generate personalized meal plan""" + calories = self.calculate_calories(user_data) + + # Business logic to create meal plan + return { + 'breakfast': [...], + 'lunch': [...], + 'dinner': [...] + } +``` + +**Modules làm gì:** +- 🧮 Tính toán (BMI, calories, macros) +- 📋 Generate plans (meal, workout) +- ✅ Validate data +- 📏 Apply business rules +- 🔄 Reusable functions + +--- + +## 🎯 Workflow: User Muốn Giảm Cân + +### Flow Hoàn Chỉnh + +``` +User: "Tôi muốn giảm cân" + ↓ +[agents/nutrition_agent.py] + → Hiểu: User muốn nutrition advice + → Check: Có đủ data chưa? (tuổi, cân nặng, chiều cao) + → Không đủ → Hỏi: "Bạn bao nhiêu tuổi, nặng bao nhiêu?" + ↓ +User: "25 tuổi, 70kg, 175cm" + ↓ +[agents/nutrition_agent.py] + → Extract: age=25, weight=70, height=1.75 + → Gọi modules để tính toán: + ↓ +[modules/nutrition.py] + → calculate_bmi(70, 1.75) = 22.9 + → calculate_calories({age: 25, weight: 70, ...}) = 1800 + → generate_meal_plan(...) = {breakfast: [...], ...} + ↓ +[agents/nutrition_agent.py] + → Nhận kết quả từ modules + → Format đẹp: + ↓ +Response: "📊 BMI: 22.9 (Bình thường) + 🎯 Calo: 1800 kcal/ngày + 🍽️ Thực đơn: [...]" +``` + +--- + +## 💡 Ví Dụ Thực Tế + +### Scenario: Thêm Feature "Meal Timing" + +#### ❌ SAI - Tất cả trong agents + +```python +# agents/nutrition_agent.py + +class NutritionAgent: + def handle(self, parameters, chat_history): + # ❌ Tính toán trong agent - SAI! + if workout_time == 'morning': + breakfast = '7:00 AM' + pre_workout = '6:00 AM' + else: + breakfast = '8:00 AM' + pre_workout = '5:00 PM' + + return f"Ăn sáng: {breakfast}" +``` + +**Vấn đề:** +- Không test được logic riêng +- Khó maintain +- Không reuse được + +--- + +#### ✅ ĐÚNG - Tách biệt agents và modules + +```python +# modules/nutrition.py + +class NutritionAdvisor: + def optimize_meal_timing(self, user_data): + """Calculate optimal meal times""" + workout_time = user_data['workout_time'] + + if workout_time == 'morning': + return { + 'breakfast': '7:00 AM', + 'pre_workout': '6:00 AM', + 'lunch': '12:00 PM', + 'dinner': '7:00 PM' + } + else: + return { + 'breakfast': '8:00 AM', + 'lunch': '12:00 PM', + 'pre_workout': '5:00 PM', + 'dinner': '8:00 PM' + } +``` + +```python +# agents/nutrition_agent.py + +class NutritionAgent: + def handle(self, parameters, chat_history): + # Extract data + user_data = self._extract_data(chat_history) + + # Gọi module + meal_timing = self.advisor.optimize_meal_timing(user_data) + + # Format response + return f"""🕐 **Lịch Ăn Tối Ưu** + +- Sáng: {meal_timing['breakfast']} +- Trưa: {meal_timing['lunch']} +- Trước tập: {meal_timing['pre_workout']} +- Tối: {meal_timing['dinner']} + +Bạn có muốn tư vấn thêm về món ăn không? 😊""" +``` + +**Lợi ích:** +- ✅ Test được `optimize_meal_timing()` độc lập +- ✅ Dễ maintain +- ✅ Có thể reuse ở agent khác +- ✅ Clear separation of concerns + +--- + +## 🔑 Nguyên Tắc Vàng + +### Khi code, tự hỏi: + +**"Đây là conversation hay calculation?"** + +- **Conversation** → `agents/` + - Hỏi user + - Hiểu ngữ cảnh + - Format response + +- **Calculation** → `modules/` + - Tính toán + - Generate data + - Business rules + +--- + +## 📝 Checklist Nhanh + +### Thêm Feature Mới + +**1. Modules (`modules/`):** +```python +# Thêm function tính toán +def calculate_something(user_data): + # Pure Python logic + return result +``` + +**2. Agents (`agents/`):** +```python +# Sử dụng function từ modules +def handle(self, parameters, chat_history): + # 1. Extract data + # 2. Call module + result = self.module.calculate_something(data) + # 3. Format response + return f"Kết quả: {result}" +``` + +**3. Test:** +```python +# Test module (unit test) +def test_calculate_something(): + result = calculate_something({'age': 25}) + assert result == expected_value + +# Test agent (integration test) +def test_agent_flow(): + response = agent.handle(...) + assert "Kết quả" in response +``` + +--- + +## ❓ FAQ + +**Q: Tôi nên code ở đâu?** +A: +- Tính toán, logic → `modules/` +- Nói chuyện, hỏi user → `agents/` + +**Q: Module có thể gọi agent không?** +A: KHÔNG! Chỉ agent gọi module, không ngược lại. + +**Q: Agent có thể gọi agent khác không?** +A: Không trực tiếp. Dùng router để chuyển. + +**Q: Khi nào cần update cả 2?** +A: Khi thêm feature mới: +1. Thêm logic vào `modules/` +2. Thêm conversation vào `agents/` + +--- + +## 🎓 Tóm Tắt + +``` +┌─────────────────────────────────────┐ +│ User: "Tôi muốn giảm cân" │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ agents/nutrition_agent.py │ +│ - Hiểu user muốn gì │ +│ - Hỏi: tuổi, cân nặng, chiều cao │ +│ - Format response đẹp │ +└──────────────┬──────────────────────┘ + │ calls +┌──────────────▼──────────────────────┐ +│ modules/nutrition.py │ +│ - calculate_bmi(70, 1.75) │ +│ - calculate_calories(...) │ +│ - generate_meal_plan(...) │ +└──────────────┬──────────────────────┘ + │ returns data +┌──────────────▼──────────────────────┐ +│ agents/nutrition_agent.py │ +│ - Format: "BMI: 22.9, Calo: 1800" │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ User: "📊 BMI: 22.9..." │ +└─────────────────────────────────────┘ +``` + +**Remember:** +- `agents/` = Conversation (Não) +- `modules/` = Calculation (Tay chân) +- Agent gọi module, không ngược lại +- Tách biệt rõ ràng = Code dễ maintain + +--- + +**That's it! Simple as that.** 🚀 diff --git a/modules/exercise/exercise.py b/modules/exercise/exercise.py new file mode 100644 index 0000000000000000000000000000000000000000..755b0fe0fd1f7437d0e43ee150eee4e657c7ef43 --- /dev/null +++ b/modules/exercise/exercise.py @@ -0,0 +1,62 @@ +import os +import json +from config.settings import client, MODEL + +rules_file = os.path.join(os.path.dirname(__file__), 'rules.json') + +def get_basic_recommendations(user_data, rules_file=rules_file): + with open(rules_file, 'r', encoding='utf-8') as f: + rules_data = json.load(f) + + recommendations = [] + for cond in user_data.get('health_conditions', []): + if cond.lower() in rules_data.get('conditions', {}): + recommendations.extend(rules_data['conditions'][cond.lower()]) + + goal = user_data.get('goal', '').lower() + for key, recs in rules_data.get('goals', {}).items(): + if key in goal: + recommendations.extend(recs) + + level = user_data.get('fitness_level', 'beginner').lower() + if level in rules_data.get('fitness_level', {}): + recommendations.extend(rules_data['fitness_level'][level]) + + return recommendations + +def generate_exercise_plan(user_data): + rules = get_basic_recommendations(user_data) + system_prompt = f""" +Bạn là huấn luyện viên cá nhân vui tính 💪🔥. + +Thông tin người dùng: +- Tuổi: {user_data.get('age', 'chưa biết')} +- Giới tính: {user_data.get('gender', 'chưa biết')} +- Cân nặng: {user_data.get('weight', 'chưa biết')} +- Chiều cao: {user_data.get('height', 'chưa biết')} +- Thể lực hiện tại: {user_data.get('fitness_level', 'nhẹ')} +- Mục tiêu: {user_data.get('goal', 'cải thiện sức khỏe')} +- Thời gian rảnh mỗi ngày: {user_data.get('available_time', 30)} phút +- Bệnh nền: {', '.join(user_data.get('health_conditions', []))} + +Rule cần tuân theo: +{chr(10).join(f"- {r}" for r in rules)} + +Yêu cầu: +- Tạo kế hoạch 7 ngày dạng bảng +- Friendly intro trước khi xuất plan +- Mỗi ngày có: Tên buổi tập, thời gian, danh sách bài tập, số hiệp, số lần / thời gian, lưu ý an toàn +- Không hỏi lại thông tin, nếu thiếu dùng default +- Không nhét link hoặc placeholder +""" + + response = client.chat.completions.create( + model=MODEL, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": "Hãy tạo kế hoạch 7 ngày theo yêu cầu."} + ], + temperature=0.7, + max_tokens=3000 + ) + return response.choices[0].message.content diff --git a/modules/exercise/flow_input.json b/modules/exercise/flow_input.json new file mode 100644 index 0000000000000000000000000000000000000000..7009fa365cd5f18029bb407ed9bc7ef990d0518d --- /dev/null +++ b/modules/exercise/flow_input.json @@ -0,0 +1,9 @@ +[ + {"field": "age", "question": "Bạn bao nhiêu tuổi?", "type": "int"}, + {"field": "gender", "question": "Bạn là nam hay nữ?", "type": "str", "options": ["nam","nữ"]}, + {"field": "weight", "question": "Cân nặng của bạn (kg)?", "type": "float"}, + {"field": "height", "question": "Chiều cao của bạn (cm)?", "type": "float"}, + {"field": "fitness_level", "question": "Mức độ tập luyện (beginner/intermediate/advanced)?", "type": "str", "options": ["beginner","intermediate","advanced"]}, + {"field": "goal", "question": "Mục tiêu của bạn là gì? (ví dụ: giảm cân, tăng cơ)", "type": "str"}, + {"field": "available_time", "question": "Bạn có bao nhiêu phút mỗi ngày để tập luyện?", "type": "int"} +] diff --git a/modules/exercise/rules.json b/modules/exercise/rules.json new file mode 100644 index 0000000000000000000000000000000000000000..e079b3be4ed3018cd40f4bf2fb1d744676a16cf5 --- /dev/null +++ b/modules/exercise/rules.json @@ -0,0 +1,19 @@ +{ + "conditions": { + "heart": ["Tránh các bài tập cường độ cao như HIIT hoặc chạy nước rút."], + "tim": ["Tránh các bài tập cường độ cao như HIIT hoặc chạy nước rút."] + }, + "goals": { + "weight loss": ["Ưu tiên cardio nhẹ như đi bộ nhanh, đạp xe, nhảy dây."], + "giảm cân": ["Ưu tiên cardio nhẹ như đi bộ nhanh, đạp xe, nhảy dây."], + "muscle": ["Kết hợp bài tập kháng lực như hít đất, plank, squat."], + "tăng cơ": ["Kết hợp bài tập kháng lực như hít đất, plank, squat."] + }, + "fitness_level": { + "beginner": [], + "intermediate": ["Có thể tăng số hiệp hoặc thêm bài tập bổ trợ."], + "vừa": ["Có thể tăng số hiệp hoặc thêm bài tập bổ trợ."], + "advanced": ["Có thể thử HIIT nhẹ hoặc tăng cường sức bền."], + "nặng": ["Có thể thử HIIT nhẹ hoặc tăng cường sức bền."] + } +} diff --git a/modules/exercise/user_data.py b/modules/exercise/user_data.py new file mode 100644 index 0000000000000000000000000000000000000000..281ca63ece3ca504b766739e94891355e04ec190 --- /dev/null +++ b/modules/exercise/user_data.py @@ -0,0 +1,16 @@ +# modules/user_data.py +pending_user_data = {} + +def get_user_data(user_id): + """Lấy dữ liệu người dùng, nếu chưa có thì tạo mới""" + if user_id not in pending_user_data: + pending_user_data[user_id] = {} + return pending_user_data[user_id] + +def update_user_data(user_id, field, value): + user_data = get_user_data(user_id) + user_data[field] = value + +def reset_user_data(user_id): + if user_id in pending_user_data: + del pending_user_data[user_id] diff --git a/modules/nutrition.py b/modules/nutrition.py new file mode 100644 index 0000000000000000000000000000000000000000..39f68a5655bf6506d50f3df20a095d6be6fd9c88 --- /dev/null +++ b/modules/nutrition.py @@ -0,0 +1,291 @@ +import json +from config.settings import client, MODEL + +class NutritionAdvisor: + def __init__(self): + self.rules = self._load_rules() + + def _load_rules(self): + """Load nutrition rules from rules.json""" + try: + with open('modules/rules.json', 'r', encoding='utf-8') as f: + return json.load(f).get('nutrition', {}) + except: + return self._get_default_rules() + + def _get_default_rules(self): + """Default nutrition rules if file not found""" + return { + "bmi_categories": { + "underweight": {"min": 0, "max": 18.5, "advice": "Tăng cân lành mạnh"}, + "normal": {"min": 18.5, "max": 24.9, "advice": "Duy trì cân nặng"}, + "overweight": {"min": 25, "max": 29.9, "advice": "Giảm cân nhẹ"}, + "obese": {"min": 30, "max": 100, "advice": "Giảm cân cần thiết"} + }, + "age_groups": { + "child": {"min": 0, "max": 12, "focus": "Phát triển"}, + "teen": {"min": 13, "max": 19, "focus": "Tăng trưởng"}, + "adult": {"min": 20, "max": 59, "focus": "Duy trì"}, + "elderly": {"min": 60, "max": 120, "focus": "Sức khỏe"} + }, + "goals": { + "weight_loss": "Giảm cân", + "weight_gain": "Tăng cân", + "muscle_building": "Xây dựng cơ bắp", + "maintenance": "Duy trì", + "health_improvement": "Cải thiện sức khỏe" + }, + "gender_nutrition": { + "male": {"calorie_base": 2500, "protein_ratio": 0.25}, + "female": {"calorie_base": 2000, "protein_ratio": 0.22} + } + } + + def calculate_bmi(self, weight, height): + """Calculate BMI with validation""" + # Validate inputs + if not weight or not height: + return 0 + + # Height should be in cm (50-300 range) + if height < 50 or height > 300: + # Likely wrong unit or extraction error + return 0 + + # Weight should be in kg (20-300 range) + if weight < 20 or weight > 300: + return 0 + + height_m = height / 100 + bmi = weight / (height_m ** 2) + + # BMI should be reasonable (10-60 range) + if bmi < 10 or bmi > 60: + return 0 + + return round(bmi, 1) + + def get_bmi_category(self, bmi): + """Get BMI category and advice""" + for category, data in self.rules["bmi_categories"].items(): + if data["min"] <= bmi < data["max"]: + return { + "category": category, + "advice": data["advice"], + "bmi": bmi + } + return {"category": "unknown", "advice": "Cần đánh giá thêm", "bmi": bmi} + + def get_age_group(self, age): + """Get age group and focus""" + for group, data in self.rules["age_groups"].items(): + if data["min"] <= age <= data["max"]: + return {"group": group, "focus": data["focus"]} + return {"group": "unknown", "focus": "Sức khỏe tổng quát"} + + def generate_nutrition_advice(self, user_data): + """ + Generate personalized nutrition advice + + Args: + user_data (dict): { + 'age': int, + 'gender': str ('male'/'female'), + 'weight': float (kg), + 'height': float (cm), + 'goal': str, + 'activity_level': str ('low'/'moderate'/'high'), + 'dietary_restrictions': list, + 'health_conditions': list + } + + Returns: + dict: Comprehensive nutrition advice + """ + # Calculate BMI and get category + bmi = self.calculate_bmi(user_data['weight'], user_data['height']) + bmi_info = self.get_bmi_category(bmi) + + # Get age group + age_info = self.get_age_group(user_data['age']) + + # Get gender-specific nutrition base + gender_info = self.rules["gender_nutrition"].get(user_data['gender'], + self.rules["gender_nutrition"]["female"]) + + # Prepare context for LLM + nutrition_context = self._build_nutrition_context(user_data, bmi_info, age_info, gender_info) + + # Generate LLM advice + llm_advice = self._get_llm_nutrition_advice(nutrition_context) + + # Calculate daily nutrition targets + daily_targets = self._calculate_daily_targets(user_data, gender_info, bmi_info) + + return { + "bmi_analysis": bmi_info, + "age_group": age_info, + "daily_targets": daily_targets, + "personalized_advice": llm_advice, + "meal_suggestions": self._generate_meal_suggestions(user_data, bmi_info), + "supplement_recommendations": self._get_supplement_recommendations(user_data, bmi_info) + } + + def _build_nutrition_context(self, user_data, bmi_info, age_info, gender_info): + """Build comprehensive context for LLM""" + context = f""" + Thông tin người dùng: + - Tuổi: {user_data['age']} ({age_info['group']} - {age_info['focus']}) + - Giới tính: {user_data['gender']} + - Cân nặng: {user_data['weight']}kg, Chiều cao: {user_data['height']}cm + - BMI: {bmi_info['bmi']} ({bmi_info['category']} - {bmi_info['advice']}) + - Mục tiêu: {user_data['goal']} + - Mức độ hoạt động: {user_data.get('activity_level', 'moderate')} + + Hạn chế ăn uống: {', '.join(user_data.get('dietary_restrictions', []))} + Tình trạng sức khỏe: {', '.join(user_data.get('health_conditions', []))} + + Hãy đưa ra lời khuyên dinh dưỡng cụ thể, bao gồm: + 1. Phân tích tình trạng hiện tại + 2. Khuyến nghị calo hàng ngày + 3. Tỷ lệ macro (protein/carb/fat) + 4. Thực phẩm nên ăn và tránh + 5. Lịch ăn uống phù hợp + 6. Lời khuyên đặc biệt cho tình trạng sức khỏe + """ + return context + + def _get_llm_nutrition_advice(self, context): + """Get personalized nutrition advice from LLM""" + try: + response = client.chat.completions.create( + model=MODEL, + messages=[ + { + "role": "system", + "content": """Bạn là chuyên gia dinh dưỡng có kinh nghiệm. + Đưa ra lời khuyên dinh dưỡng chính xác, khoa học và phù hợp với từng cá nhân. + Luôn cân nhắc về tình trạng sức khỏe và hạn chế ăn uống. + Trả lời bằng tiếng Việt, chi tiết và dễ hiểu.""" + }, + {"role": "user", "content": context} + ], + temperature=0.7, + max_tokens=2000 + ) + return response.choices[0].message.content + except Exception as e: + return f"Không thể tạo lời khuyên dinh dưỡng: {str(e)}" + + def _calculate_daily_targets(self, user_data, gender_info, bmi_info): + """Calculate daily nutrition targets""" + base_calories = gender_info["calorie_base"] + + # Adjust based on goal + goal_multipliers = { + "weight_loss": 0.8, + "weight_gain": 1.2, + "muscle_building": 1.1, + "maintenance": 1.0, + "health_improvement": 1.0 + } + + activity_multipliers = { + "low": 1.2, + "moderate": 1.4, + "high": 1.6 + } + + activity_level = user_data.get('activity_level', 'moderate') + goal = user_data.get('goal', 'maintenance') + + daily_calories = int(base_calories * + goal_multipliers.get(goal, 1.0) * + activity_multipliers.get(activity_level, 1.4)) + + protein_ratio = gender_info["protein_ratio"] + protein_calories = daily_calories * protein_ratio + protein_grams = int(protein_calories / 4) + + fat_calories = daily_calories * 0.25 # 25% fat + fat_grams = int(fat_calories / 9) + + carb_calories = daily_calories - protein_calories - fat_calories + carb_grams = int(carb_calories / 4) + + return { + "daily_calories": daily_calories, + "protein": f"{protein_grams}g", + "carbs": f"{carb_grams}g", + "fats": f"{fat_grams}g", + "water": "2.5-3.5L" + } + + def _generate_meal_suggestions(self, user_data, bmi_info): + """Generate meal suggestions based on user profile""" + suggestions = { + "breakfast": [ + "Bánh mì nguyên cám + trứng + sữa", + "Cháo yến mạch + trái cây", + "Sinh tố protein + hạt chia" + ], + "lunch": [ + "Cơm gạo lứt + thịt/cá + rau xanh", + "Salad + ức gà + dầu olive", + "Bún phở + thịt nạc + rau" + ], + "dinner": [ + "Cá hồi + khoai lang + rau củ", + "Thịt bò + cơm + canh rau", + "Đậu phụ + rau xào + cơm" + ], + "snacks": [ + "Hạt hạnh nhân + trái cây", + "Sữa chua Hy Lạp + mật ong", + "Trái cây + phô mai ít béo" + ] + } + + # Adjust based on BMI category + if bmi_info["category"] == "underweight": + suggestions["snacks"].extend(["Bơ đậu phộng + bánh mì", "Sinh tố tăng cân"]) + elif bmi_info["category"] in ["overweight", "obese"]: + suggestions["snacks"] = ["Trái cây ít ngọt", "Rau củ sống", "Trà xanh"] + + return suggestions + + def _get_supplement_recommendations(self, user_data, bmi_info): + """Get supplement recommendations""" + supplements = [] + + # Age-based recommendations + if user_data['age'] > 50: + supplements.extend(["Vitamin D3", "Calcium", "B12"]) + + # Gender-based + if user_data['gender'] == 'female': + supplements.extend(["Iron", "Folate"]) + + # Goal-based + if user_data.get('goal') == 'muscle_building': + supplements.extend(["Whey Protein", "Creatine"]) + + # BMI-based + if bmi_info["category"] in ["overweight", "obese"]: + supplements.extend(["Omega-3", "Probiotics"]) + + return list(set(supplements)) # Remove duplicates + +# Convenience function for external use +def generate_nutrition_advice(user_data): + """ + Main function to generate nutrition advice + + Args: + user_data (dict): User information including age, gender, weight, height, goal, etc. + + Returns: + dict: Comprehensive nutrition advice and meal suggestions + """ + advisor = NutritionAdvisor() + return advisor.generate_nutrition_advice(user_data) diff --git a/modules/rules.json b/modules/rules.json new file mode 100644 index 0000000000000000000000000000000000000000..f45505d1d557360db333ace166bed9737ea79406 --- /dev/null +++ b/modules/rules.json @@ -0,0 +1,53 @@ +{ + "nutrition": { + "bmi_categories": { + "underweight": {"min": 0, "max": 18.5, "advice": "Tăng cân lành mạnh"}, + "normal": {"min": 18.5, "max": 24.9, "advice": "Duy trì cân nặng"}, + "overweight": {"min": 25, "max": 29.9, "advice": "Giảm cân nhẹ"}, + "obese": {"min": 30, "max": 100, "advice": "Giảm cân cần thiết"} + }, + "age_groups": { + "child": {"min": 0, "max": 12, "focus": "Phát triển"}, + "teen": {"min": 13, "max": 19, "focus": "Tăng trưởng"}, + "adult": {"min": 20, "max": 59, "focus": "Duy trì"}, + "elderly": {"min": 60, "max": 120, "focus": "Sức khỏe"} + }, + "goals": { + "weight_loss": "Giảm cân", + "weight_gain": "Tăng cân", + "muscle_building": "Xây dựng cơ bắp", + "maintenance": "Duy trì", + "health_improvement": "Cải thiện sức khỏe" + }, + "gender_nutrition": { + "male": {"calorie_base": 2500, "protein_ratio": 0.25}, + "female": {"calorie_base": 2000, "protein_ratio": 0.22} + }, + "activity_levels": { + "low": {"multiplier": 1.2, "description": "Ít vận động"}, + "moderate": {"multiplier": 1.4, "description": "Vận động vừa phải"}, + "high": {"multiplier": 1.6, "description": "Vận động nhiều"} + }, + "dietary_restrictions": [ + "vegetarian", "vegan", "gluten_free", "dairy_free", + "low_carb", "keto", "halal", "kosher", "low_sodium" + ], + "health_conditions": [ + "diabetes", "hypertension", "heart_disease", "high_cholesterol", + "kidney_disease", "liver_disease", "food_allergies", "ibs" + ] + }, + "exercise": { + "fitness_levels": { + "beginner": {"description": "Mới bắt đầu", "duration": "15-30 phút"}, + "intermediate": {"description": "Trung bình", "duration": "30-45 phút"}, + "advanced": {"description": "Nâng cao", "duration": "45-60 phút"} + }, + "exercise_types": { + "cardio": ["chạy bộ", "đi bộ", "đạp xe", "bơi lội"], + "strength": ["tập tạ", "bodyweight", "resistance_bands"], + "flexibility": ["yoga", "pilates", "stretching"], + "sports": ["bóng đá", "tennis", "bóng rổ", "cầu lông"] + } + } +} diff --git a/personalization/__init__.py b/personalization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..60e620454feec37d56da00e2c5708051c073d437 --- /dev/null +++ b/personalization/__init__.py @@ -0,0 +1,10 @@ +""" +Personalization Package - Progressive learning and adaptation engine +""" + +from .personalization_engine import PersonalizationEngine + +__all__ = [ + 'PersonalizationEngine' +] + diff --git a/personalization/personalization_engine.py b/personalization/personalization_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d79abfdcf638050aa556a42069e0ecc069373028 --- /dev/null +++ b/personalization/personalization_engine.py @@ -0,0 +1,295 @@ +""" +Personalization Engine - Progressive learning from user interactions +Continuously improves recommendations based on user data and feedback +""" + +from typing import List, Dict, Any, Optional +from datetime import datetime, timedelta +from collections import Counter +import json + +from health_data import HealthContext, UserPreferences + + +class PersonalizationEngine: + """ + Progressive personalization engine that learns from user interactions + Improves recommendations over time based on accumulated user data + """ + + def __init__(self, health_context: HealthContext): + self.health_context = health_context + self.user_id = health_context.user_id + + # ===== Pattern Analysis ===== + + def analyze_user_patterns(self, days: int = 90) -> Dict[str, Any]: + """Analyze user interaction patterns""" + history = self.health_context.get_health_history(days) + + if not history: + return { + 'total_interactions': 0, + 'interaction_types': {}, + 'most_common_topics': [], + 'engagement_level': 'low' + } + + # Count interactions by type + type_counts = Counter(r.record_type for r in history) + + # Calculate engagement level + total_interactions = len(history) + days_active = len(set(r.timestamp.date() for r in history)) + engagement_score = min(total_interactions / (days / 7), 1.0) # Normalize to 0-1 + + engagement_level = 'high' if engagement_score > 0.7 else 'medium' if engagement_score > 0.3 else 'low' + + return { + 'total_interactions': total_interactions, + 'days_active': days_active, + 'interaction_types': dict(type_counts), + 'engagement_score': round(engagement_score, 2), + 'engagement_level': engagement_level, + 'most_common_topics': [t[0] for t in type_counts.most_common(3)] + } + + def extract_preferences(self) -> UserPreferences: + """Extract and update user preferences from interaction history""" + prefs = self.health_context.get_preferences() + + # Analyze exercise preferences from history + exercise_records = self.health_context.get_records_by_type('exercise') + if exercise_records: + # Extract exercise types mentioned + exercise_types = set() + for record in exercise_records[-10:]: # Last 10 exercise records + if 'exercise_type' in record.data: + exercise_types.add(record.data['exercise_type']) + + prefs.preferred_exercise_types = list(exercise_types) + + # Analyze nutrition preferences + nutrition_records = self.health_context.get_records_by_type('nutrition') + if nutrition_records: + dietary_prefs = set() + for record in nutrition_records[-10:]: + if 'dietary_preference' in record.data: + dietary_prefs.add(record.data['dietary_preference']) + + prefs.dietary_preferences = list(dietary_prefs) + + # Analyze goals from history + goals = set() + for record in self.health_context.get_health_history(days=180): + if 'goal' in record.data: + goals.add(record.data['goal']) + + prefs.goals = list(goals) + + self.health_context.update_preferences( + preferred_exercise_types=prefs.preferred_exercise_types, + dietary_preferences=prefs.dietary_preferences, + goals=prefs.goals + ) + + return prefs + + def identify_health_trends(self, days: int = 90) -> Dict[str, Any]: + """Identify health trends from historical data""" + trends = { + 'symptom_frequency': {}, + 'health_improvements': [], + 'health_concerns': [], + 'activity_trends': {} + } + + # Analyze symptom frequency + symptom_records = self.health_context.get_records_by_type('symptom') + symptom_counts = Counter() + for record in symptom_records: + if 'symptom' in record.data: + symptom_counts[record.data['symptom']] += 1 + + trends['symptom_frequency'] = dict(symptom_counts.most_common(5)) + + # Analyze fitness trends + fitness_history = self.health_context.get_fitness_history(days) + if fitness_history: + total_workouts = len(fitness_history) + total_minutes = sum(f.duration_minutes for f in fitness_history) + avg_intensity = sum(1 for f in fitness_history if f.intensity == 'high') / total_workouts if total_workouts > 0 else 0 + + trends['activity_trends'] = { + 'total_workouts': total_workouts, + 'total_minutes': total_minutes, + 'avg_intensity': round(avg_intensity, 2), + 'adherence': self.health_context.get_workout_adherence(days) + } + + return trends + + def calculate_engagement_score(self, days: int = 30) -> float: + """Calculate user engagement score (0-1)""" + patterns = self.analyze_user_patterns(days) + return patterns['engagement_score'] + + # ===== Adaptation Methods ===== + + def adapt_nutrition_plan(self, current_plan: Dict[str, Any]) -> Dict[str, Any]: + """Adapt nutrition plan based on user history""" + adapted_plan = current_plan.copy() + + # Get user preferences + prefs = self.health_context.get_preferences() + + # Apply dietary restrictions + if prefs.dietary_preferences: + adapted_plan['dietary_preferences'] = prefs.dietary_preferences + + # Analyze nutrition history for effectiveness + nutrition_records = self.health_context.get_records_by_type('nutrition') + if nutrition_records: + # Check if user is following recommendations + adherence = len(nutrition_records) / max(1, (30 / 7)) # Expected ~1 per week + adapted_plan['adherence_score'] = min(adherence, 1.0) + + # Add personalization note + adapted_plan['personalized'] = True + adapted_plan['personalization_date'] = datetime.now().isoformat() + + return adapted_plan + + def adapt_exercise_plan(self, current_plan: Dict[str, Any]) -> Dict[str, Any]: + """Adapt exercise plan based on progress""" + adapted_plan = current_plan.copy() + + # Get fitness history + fitness_history = self.health_context.get_fitness_history(days=30) + + if fitness_history: + # Calculate adherence + adherence = self.health_context.get_workout_adherence(days=30) + + # Adjust difficulty based on adherence + if adherence > 0.8: + adapted_plan['difficulty_adjustment'] = 'increase' + adapted_plan['recommendation'] = 'Great adherence! Consider increasing intensity.' + elif adherence < 0.3: + adapted_plan['difficulty_adjustment'] = 'decrease' + adapted_plan['recommendation'] = 'Let\'s make the plan more manageable.' + else: + adapted_plan['difficulty_adjustment'] = 'maintain' + adapted_plan['recommendation'] = 'Keep up the good work!' + + # Add exercise preferences + prefs = self.health_context.get_preferences() + if prefs.preferred_exercise_types: + adapted_plan['preferred_exercises'] = prefs.preferred_exercise_types + + adapted_plan['personalized'] = True + adapted_plan['personalization_date'] = datetime.now().isoformat() + + return adapted_plan + + def adapt_communication_style(self) -> str: + """Adapt communication style based on user interactions""" + patterns = self.analyze_user_patterns(days=30) + + # Analyze interaction frequency + if patterns['engagement_level'] == 'high': + return 'detailed' # More detailed responses + elif patterns['engagement_level'] == 'low': + return 'brief' # Shorter, more concise responses + else: + return 'balanced' # Standard responses + + def generate_personalized_insights(self) -> List[str]: + """Generate personalized health insights""" + insights = [] + + # Analyze trends + trends = self.identify_health_trends(days=90) + + # Symptom insights + if trends['symptom_frequency']: + top_symptom = list(trends['symptom_frequency'].keys())[0] + count = trends['symptom_frequency'][top_symptom] + insights.append(f"You've reported '{top_symptom}' {count} times in the last 90 days. Consider consulting a specialist.") + + # Activity insights + if 'activity_trends' in trends and trends['activity_trends']: + activity = trends['activity_trends'] + if activity['adherence'] > 0.7: + insights.append(f"Excellent fitness adherence! You've completed {activity['total_workouts']} workouts in the last month.") + elif activity['adherence'] < 0.3: + insights.append("Your fitness adherence is low. Let's create a more achievable plan together.") + + # Goal progress + prefs = self.health_context.get_preferences() + if prefs.goals: + insights.append(f"Your current goals: {', '.join(prefs.goals)}") + + return insights + + # ===== Feedback Methods ===== + + def record_user_feedback(self, feedback_type: str, feedback_data: Dict[str, Any]) -> None: + """Record user feedback for learning""" + feedback_record = { + 'feedback_type': feedback_type, # helpful/not_helpful/confusing/etc + 'data': feedback_data, + 'timestamp': datetime.now().isoformat() + } + + self.health_context.add_health_record( + 'feedback', + feedback_record, + agent_name='personalization_engine', + confidence=1.0 + ) + + def update_preferences_from_feedback(self) -> None: + """Update preferences based on accumulated feedback""" + # Get recent feedback + feedback_records = self.health_context.get_records_by_type('feedback') + + if not feedback_records: + return + + # Analyze feedback patterns + helpful_count = sum(1 for r in feedback_records[-20:] if r.data.get('feedback_type') == 'helpful') + total_feedback = min(len(feedback_records), 20) + + if total_feedback > 0: + helpfulness_score = helpful_count / total_feedback + + # Update communication style if needed + if helpfulness_score < 0.3: + self.health_context.update_preferences( + communication_style='brief' + ) + elif helpfulness_score > 0.7: + self.health_context.update_preferences( + communication_style='detailed' + ) + + def get_personalization_summary(self) -> Dict[str, Any]: + """Get summary of personalization status""" + patterns = self.analyze_user_patterns() + trends = self.identify_health_trends() + prefs = self.extract_preferences() + + return { + 'engagement': patterns, + 'trends': trends, + 'preferences': { + 'goals': prefs.goals, + 'exercise_types': prefs.preferred_exercise_types, + 'dietary_preferences': prefs.dietary_preferences, + 'communication_style': prefs.communication_style + }, + 'insights': self.generate_personalized_insights(), + 'last_updated': datetime.now().isoformat() + } + diff --git a/rag/__init__.py b/rag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..34f22cf8ed7b3990402b224910189ff1b7eececa --- /dev/null +++ b/rag/__init__.py @@ -0,0 +1,23 @@ +""" +RAG (Retrieval-Augmented Generation) Module +Provides ChromaDB integration for health knowledge retrieval +""" + +from rag.rag_integration import ( + RAGIntegration, + get_rag_integration, + query_nutrition, + query_exercise, + query_health, + query_generic +) + +__all__ = [ + 'RAGIntegration', + 'get_rag_integration', + 'query_nutrition', + 'query_exercise', + 'query_health', + 'query_generic' +] + diff --git a/rag/cache.py b/rag/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..20dad12a45e1c8c28a9273bcf9a8a90a64caabb7 --- /dev/null +++ b/rag/cache.py @@ -0,0 +1,78 @@ +""" +Simple cache for RAG queries to speed up repeated questions +""" + +from typing import Dict, Any, Optional +import hashlib +import json +from datetime import datetime, timedelta + +class QueryCache: + """Cache for RAG query results""" + + def __init__(self, ttl_minutes: int = 60): + """ + Initialize cache + + Args: + ttl_minutes: Time to live in minutes + """ + self.cache: Dict[str, Dict[str, Any]] = {} + self.ttl = timedelta(minutes=ttl_minutes) + + def _get_key(self, query: str) -> str: + """Generate cache key from query""" + return hashlib.md5(query.lower().strip().encode()).hexdigest() + + def get(self, query: str) -> Optional[Dict[str, Any]]: + """ + Get cached result + + Args: + query: User query + + Returns: + Cached result or None + """ + key = self._get_key(query) + + if key in self.cache: + entry = self.cache[key] + # Check if expired + if datetime.now() - entry['timestamp'] < self.ttl: + return entry['result'] + else: + # Remove expired entry + del self.cache[key] + + return None + + def set(self, query: str, result: Dict[str, Any]): + """ + Cache result + + Args: + query: User query + result: Query result + """ + key = self._get_key(query) + self.cache[key] = { + 'result': result, + 'timestamp': datetime.now() + } + + def clear(self): + """Clear all cache""" + self.cache.clear() + + def size(self) -> int: + """Get cache size""" + return len(self.cache) + + +# Global cache instance +_cache = QueryCache(ttl_minutes=60) + +def get_cache() -> QueryCache: + """Get global cache instance""" + return _cache diff --git a/rag/data_sources.json b/rag/data_sources.json new file mode 100644 index 0000000000000000000000000000000000000000..58b9b374cbd24eb307ea2c860c072aaffffb718d --- /dev/null +++ b/rag/data_sources.json @@ -0,0 +1,149 @@ +{ + "version": "1.0", + "last_updated": "2025-10-18", + "description": "Healthcare documents for RAG system - Official sources only", + "sources": [ + { + "id": "who_healthy_diet_2020", + "category": "nutrition", + "name": "WHO Healthy Diet Fact Sheet", + "url": "https://www.who.int/news-room/fact-sheets/detail/healthy-diet", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "WHO official guidelines on healthy diet" + }, + { + "id": "cdc_physical_activity", + "category": "fitness", + "name": "CDC Physical Activity Guidelines", + "url": "https://www.cdc.gov/physicalactivity/basics/adults/index.htm", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "CDC guidelines for adult physical activity" + }, + { + "id": "nimh_mental_health", + "category": "mental_health", + "name": "NIMH Mental Health Information", + "url": "https://www.nimh.nih.gov/health/topics", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "National Institute of Mental Health resources" + }, + { + "id": "who_nutrition_landscape_2020", + "category": "nutrition", + "name": "WHO Global Nutrition Report", + "url": "https://www.who.int/data/nutrition/nlis/info/malnutrition-in-women", + "type": "html", + "language": "en", + "required": false, + "cache_days": 180, + "notes": "WHO nutrition data and statistics" + }, + { + "id": "cdc_nutrition_facts", + "category": "nutrition", + "name": "CDC Nutrition Facts", + "url": "https://www.cdc.gov/nutrition/index.html", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "CDC nutrition guidelines and resources" + }, + { + "id": "who_physical_activity", + "category": "fitness", + "name": "WHO Physical Activity Recommendations", + "url": "https://www.who.int/news-room/fact-sheets/detail/physical-activity", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "WHO guidelines on physical activity" + }, + { + "id": "cdc_mental_health", + "category": "mental_health", + "name": "CDC Mental Health Resources", + "url": "https://www.cdc.gov/mentalhealth/index.htm", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "CDC mental health information" + }, + { + "id": "who_mental_health", + "category": "mental_health", + "name": "WHO Mental Health Fact Sheet", + "url": "https://www.who.int/news-room/fact-sheets/detail/mental-health-strengthening-our-response", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "WHO mental health guidelines" + }, + { + "id": "cdc_sleep_health", + "category": "general", + "name": "CDC Sleep and Health", + "url": "https://www.cdc.gov/sleep/index.html", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "CDC sleep health information" + }, + { + "id": "who_obesity_overweight", + "category": "general", + "name": "WHO Obesity and Overweight", + "url": "https://www.who.int/news-room/fact-sheets/detail/obesity-and-overweight", + "type": "html", + "language": "en", + "required": true, + "cache_days": 90, + "notes": "WHO guidelines on obesity and overweight" + } + ], + "manual_downloads": [ + { + "category": "nutrition_vn", + "name": "Khuyến nghị dinh dưỡng cho người Việt Nam", + "source": "Bộ Y Tế Việt Nam", + "url": "https://moh.gov.vn", + "instructions": "Truy cập moh.gov.vn → Văn bản → Tìm kiếm 'dinh dưỡng' → Download các file PDF liên quan", + "required": true + }, + { + "category": "nutrition_vn", + "name": "Bảng thành phần dinh dưỡng thực phẩm Việt Nam", + "source": "Viện Dinh Dưỡng Quốc Gia", + "url": "http://viendinhduong.vn", + "instructions": "Truy cập viendinhduong.vn → Tài liệu → Download bảng thành phần dinh dưỡng", + "required": true + }, + { + "category": "traditional_medicine", + "name": "Y học cổ truyền Việt Nam", + "source": "Viện Y Học Cổ Truyền", + "url": "http://vietim.vn", + "instructions": "Truy cập vietim.vn → Tài liệu → Download tài liệu về dược liệu và bài thuốc", + "required": false + } + ], + "notes": [ + "All sources are official government/health organization websites", + "HTML sources will be converted to markdown for ingestion", + "Manual downloads are required for Vietnamese-specific content", + "Update cache_days to control how often documents are re-downloaded" + ] +} diff --git a/rag/ingest.py b/rag/ingest.py new file mode 100644 index 0000000000000000000000000000000000000000..c1895af3a0aa923f979f5c785eee917e91fc486f --- /dev/null +++ b/rag/ingest.py @@ -0,0 +1,107 @@ +import os +import time +import traceback +from langchain_community.document_loaders import PyPDFLoader, CSVLoader, TextLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_chroma import Chroma +from config.settings import CHROMA_PATH, EMBEDDING_MODEL + + +# ============================================================ +# Load and split documents +# ============================================================ +def load_documents(path: str): + """ + Load and split a single uploaded document (PDF, CSV, or MD/TXT). + Args: + path (str): Full path to the uploaded file. + Returns: + List[Document]: List of document chunks ready for embedding. + """ + print(f"\nReading uploaded file: {path}") + + if not os.path.exists(path): + print(f"[ERROR] File not found: {path}") + return [] + + ext = os.path.splitext(path)[1].lower() + all_docs = [] + + try: + # Select appropriate loader based on file type + if ext == ".pdf": + loader = PyPDFLoader(path) + elif ext == ".csv": + loader = CSVLoader(path, encoding="utf-8") + elif ext in [".md"]: + loader = TextLoader(path, encoding="utf-8") + else: + print( + f"[WARNING] Unsupported file type: {ext}. Only PDF, CSV, or MD allowed." + ) + return [] + + # Load the document + docs = loader.load() + all_docs.extend(docs) + print(f"Loaded {len(docs)} documents from {os.path.basename(path)}") + + # Split the text into smaller chunks for embeddings + splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) + split_docs = splitter.split_documents(all_docs) + print(f"Split into {len(split_docs)} text chunks.") + return split_docs + + except Exception as e: + print(f"[ERROR] Failed to load or split document: {path}") + print(f"Reason: {e}") + traceback.print_exc() + return [] + + +# ============================================================ +# Select embedding model (OpenAI → fallback to HuggingFace) +# ============================================================ +def get_embedding_model(): + """ + Try using OpenAIEmbeddings if a valid API key is available. + If it fails (401, missing key, etc.), fall back to HuggingFaceEmbeddings. + """ + + # Custom endpoint only supports GPT-4o-mini, not embeddings + # So we skip OpenAI embeddings and use HuggingFace directly + print("[INFO] Using HuggingFace embeddings (custom endpoint doesn't support embeddings)") + + print(f"Using HuggingFaceEmbeddings ({EMBEDDING_MODEL})...") + return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) + + +# ============================================================ +# Main ingestion process +# ============================================================ +def ingest_data(path: str): + """ + Generate embeddings for an uploaded file and store them in a local ChromaDB. + """ + start_time = time.time() + print("\nStarting ingestion for uploaded file...") + + documents = load_documents(path) + if not documents: + print("No valid document to process. Skipping embedding step.") + return + + embeddings = get_embedding_model() + + try: + vectordb = Chroma.from_documents( + documents, embeddings, persist_directory=CHROMA_PATH + ) + elapsed = time.time() - start_time + count = vectordb._collection.count() + print(f"\nIngestion complete in {elapsed:.2f} seconds.") + print(f"Data stored in {CHROMA_PATH} ({count} vectors).") + except Exception as e: + print(f"[ERROR] Failed to store vectors in ChromaDB: {e}") + traceback.print_exc() diff --git a/rag/query_engine.py b/rag/query_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..04be3bf86bca5db0370b52f3041216dc5165d25b --- /dev/null +++ b/rag/query_engine.py @@ -0,0 +1,117 @@ +import time +import json +from typing import Dict, Any, List +from langchain_chroma import Chroma +from langchain.chains import RetrievalQA +from langchain_community.callbacks.manager import get_openai_callback +from config.settings import MODEL, CHROMA_PATH, EMBEDDING_MODEL, OPENAI_API_KEY, OPENAI_BASE_URL +from langchain_openai import ChatOpenAI +from langchain_huggingface import HuggingFaceEmbeddings +import traceback + +# ================================================================= +# 1. Khởi tạo Mô hình và RAG Chain (Chỉ load 1 lần) +# ================================================================= +try: + # Khởi tạo LLM + print("CHROMA_PATH:", CHROMA_PATH) + llm = ChatOpenAI(model=MODEL, temperature=0.1, api_key= OPENAI_API_KEY, base_url=OPENAI_BASE_URL) + + # Khởi tạo Embeddings + embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) + # Khởi tạo Vector Store và Retriever + db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) + + # Cấu hình top_k = 2 (faster search) + retriever = db.as_retriever(search_kwargs={"k": 2}) + + # Khởi tạo RetrievalQA Chain + qa_chain = RetrievalQA.from_chain_type( + llm, + chain_type="stuff", + retriever=retriever, + return_source_documents=True + ) + +except Exception as e: + print(f"⚠️ Lỗi khởi tạo RAG Chain: {e}") + traceback.print_exc() + print(f"⚠️ Lỗi khởi tạo RAG Chain: {e}. Vui lòng chạy ingest.py và kiểm tra API Key.") + qa_chain = None + +# ================================================================= +# 2. Hàm Truy vấn với Context và Log (Thành viên B) +# ================================================================= +def query_with_context(query: str, system_prompt: str) -> Dict[str, Any]: + """ + Truy vấn RAG Chain, log thời gian/token, và trả về kết quả JSON. + + Args: + query: Câu hỏi của người dùng. + system_prompt: Ngữ cảnh hệ thống cho LLM. + + Returns: + Dict[str, Any]: {answer, source_docs, metadata} + """ + if not qa_chain: + return { + "answer": "Hệ thống RAG chưa được khởi tạo. Vui lòng kiểm tra API Key và chạy ingest.py.", + "source_docs": [], + "metadata": {"time_s": 0.0, "tokens": 0, "status": "ERROR"} + } + + start_time = time.time() + + # Tích hợp System Prompt vào LLM + qa_chain.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template = system_prompt + + # Sử dụng get_openai_callback để theo dõi số lượng token và chi phí + with get_openai_callback() as cb: + response = qa_chain.invoke(query) + total_tokens = getattr(cb, "total_tokens", 0) + + + # Lấy thông tin token + total_tokens = cb.total_tokens + + end_time = time.time() + query_time = end_time - start_time + + # Lấy kết quả và nguồn tài liệu + answer = response['result'] + source_documents = response['source_documents'] + + # Chuẩn hóa nguồn tài liệu để có thể serialize thành JSON + formatted_sources: List[Dict[str, Any]] = [] + for doc in source_documents: + # Get clean preview without truncating mid-sentence + content = doc.page_content.strip() + preview = content[:300] if len(content) > 300 else content + + # Get source info from metadata + metadata = doc.metadata + source_name = metadata.get('source', 'Unknown') + + formatted_sources.append({ + "content_preview": preview, + "metadata": metadata, + "source_name": source_name + }) + + # Log thời gian và token (Console Log) + print("--- RAG Query Log ---") + print(f"Query: {query}") + print(f"Thời gian truy vấn: {query_time:.2f} giây") + print(f"Tổng số token sử dụng: {total_tokens}") + print("----------------------") + + # Xuất kết quả ra JSON + return { + "answer": answer, + "source_docs": formatted_sources, + "metadata": { + "time_s": round(query_time, 2), + "tokens": total_tokens, + "status": "SUCCESS" + } + } \ No newline at end of file diff --git a/rag/rag_integration.py b/rag/rag_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..3e6774a6d5b88e8ce0b7d7e7e6488577b02cc889 --- /dev/null +++ b/rag/rag_integration.py @@ -0,0 +1,155 @@ +""" +RAG Integration - Unified interface for ChromaDB queries across all agents +Provides context-aware retrieval for nutrition, exercise, health tips, etc. +""" + +from typing import Dict, Any, List, Optional +from rag.query_engine import query_with_context +import json + + +class RAGIntegration: + """ + Unified RAG interface for all agents + Retrieves relevant health knowledge from ChromaDB + """ + + def __init__(self): + """Initialize RAG integration""" + self.nutrition_prompt = """You are a nutrition expert. Use the provided documents to answer nutrition questions. +Focus on: dietary recommendations, macro/micronutrients, meal planning, food groups.""" + + self.exercise_prompt = """You are a fitness expert. Use the provided documents to answer exercise questions. +Focus on: workout routines, exercise techniques, fitness progression, safety guidelines.""" + + self.health_prompt = """You are a health consultant. Use the provided documents to answer health questions. +Focus on: health tips, disease prevention, wellness practices, lifestyle recommendations.""" + + def query_nutrition(self, query: str) -> Dict[str, Any]: + """ + Query nutrition knowledge from ChromaDB + + Args: + query: Nutrition question + + Returns: + Dict with answer, sources, and metadata + """ + return query_with_context(query, self.nutrition_prompt) + + def query_exercise(self, query: str) -> Dict[str, Any]: + """ + Query exercise knowledge from ChromaDB + + Args: + query: Exercise question + + Returns: + Dict with answer, sources, and metadata + """ + return query_with_context(query, self.exercise_prompt) + + def query_health(self, query: str) -> Dict[str, Any]: + """ + Query general health knowledge from ChromaDB + + Args: + query: Health question + + Returns: + Dict with answer, sources, and metadata + """ + return query_with_context(query, self.health_prompt) + + def query_generic(self, query: str, context: str = "") -> Dict[str, Any]: + """ + Generic query with custom context + + Args: + query: Question + context: Custom system prompt context + + Returns: + Dict with answer, sources, and metadata + """ + prompt = f"""You are a helpful health assistant. Use the provided documents to answer questions. +{context}""" + return query_with_context(query, prompt) + + def extract_answer(self, result: Dict[str, Any]) -> str: + """Extract just the answer text from RAG result""" + return result.get('answer', '') + + def extract_sources(self, result: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract source documents from RAG result""" + return result.get('source_docs', []) + + def extract_metadata(self, result: Dict[str, Any]) -> Dict[str, Any]: + """Extract metadata (time, tokens, status) from RAG result""" + return result.get('metadata', {}) + + def format_response_with_sources(self, result: Dict[str, Any]) -> str: + """ + Format RAG response (answer only, no sources) + + Args: + result: RAG query result + + Returns: + Answer text only + """ + answer = result.get('answer', '') + return answer + + def is_success(self, result: Dict[str, Any]) -> bool: + """Check if RAG query was successful""" + metadata = result.get('metadata', {}) + return metadata.get('status') == 'SUCCESS' + + def get_query_time(self, result: Dict[str, Any]) -> float: + """Get query execution time in seconds""" + metadata = result.get('metadata', {}) + return metadata.get('time_s', 0.0) + + def get_token_count(self, result: Dict[str, Any]) -> int: + """Get token count used in query""" + metadata = result.get('metadata', {}) + return metadata.get('tokens', 0) + + +# Global RAG instance +_rag_instance = None + + +def get_rag_integration() -> RAGIntegration: + """Get or create global RAG integration instance""" + global _rag_instance + if _rag_instance is None: + _rag_instance = RAGIntegration() + return _rag_instance + + +# Convenience functions for direct access +def query_nutrition(query: str) -> Dict[str, Any]: + """Query nutrition knowledge""" + rag = get_rag_integration() + return rag.query_nutrition(query) + + +def query_exercise(query: str) -> Dict[str, Any]: + """Query exercise knowledge""" + rag = get_rag_integration() + return rag.query_exercise(query) + + +def query_health(query: str) -> Dict[str, Any]: + """Query health knowledge""" + rag = get_rag_integration() + return rag.query_health(query) + + +def query_generic(query: str, context: str = "") -> Dict[str, Any]: + """Generic query with custom context""" + rag = get_rag_integration() + return rag.query_generic(query, context) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..42fb13cadc9d642af81541c3391af9c40b119770 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,175 @@ +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.13.1 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.11.0 +appdata==2.2.1 +async-timeout==4.0.3 +attrs==25.4.0 +audioread==3.0.1 +backoff==2.2.1 +bcrypt==4.0.1 +Brotli==1.1.0 +build==1.3.0 +cachetools==6.2.1 +certifi==2025.10.5 +cffi==2.0.0 +charset-normalizer==3.4.3 +chromadb==1.1.1 +click==8.0.3 +colorama==0.4.6 +colored==2.3.1 +coloredlogs==15.0.1 +dataclasses-json==0.6.7 +decorator==5.2.1 +distro==1.9.0 +durationpy==0.10 +exceptiongroup==1.3.0 +fastapi==0.118.3 +ffmpeg==1.4 +ffmpeg-python==0.2.0 +ffmpy==0.6.3 +filelock==3.20.0 +flatbuffers==25.9.23 +frozenlist==1.8.0 +fsspec==2025.9.0 +future==1.0.0 +google-auth==2.41.1 +googleapis-common-protos==1.70.0 +gradio==5.49.0 +gradio_client==1.13.3 +modelscope_studio==1.5.1 +gTTS==2.5.4 +greenlet==3.2.4 +groovy==0.1.2 +grpcio==1.75.1 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.7.1 +httpx==0.28.1 +httpx-sse==0.4.3 +huggingface-hub==0.35.3 +humanfriendly==10.0 +idna==3.10 +importlib_metadata==8.7.0 +importlib_resources==6.5.2 +Jinja2==3.1.6 +jiter==0.11.0 +joblib==1.5.2 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +kubernetes==34.1.0 +langchain==0.3.27 +langchain-chroma==0.2.6 +langchain-community==0.3.31 +langchain-core==0.3.79 +langchain-huggingface==0.3.1 +langchain-openai==0.3.35 +langchain-text-splitters==0.3.11 +langsmith==0.4.37 +lazy_loader==0.4 +librosa==0.11.0 +llvmlite==0.45.1 +lxml==6.0.2 +markdown-it-py==4.0.0 +MarkupSafe==3.0.3 +marshmallow==3.26.1 +mdurl==0.1.2 +mmh3==5.2.0 +mpmath==1.3.0 +msgpack==1.1.2 +multidict==6.7.0 +mypy_extensions==1.1.0 +narwhals==2.8.0 +networkx==3.4.2 +numba==0.62.1 +numpy==2.2.6 +oauthlib==3.3.1 +onnxruntime==1.23.1 +openai==2.1.0 +opentelemetry-api==1.38.0 +opentelemetry-exporter-otlp-proto-common==1.38.0 +opentelemetry-exporter-otlp-proto-grpc==1.38.0 +opentelemetry-proto==1.38.0 +opentelemetry-sdk==1.38.0 +opentelemetry-semantic-conventions==0.59b0 +orjson==3.11.3 +overrides==7.7.0 +packaging==25.0 +pandas==2.3.3 +pillow==10.4.0 +platformdirs==4.5.0 +plotly==6.3.1 +pooch==1.8.2 +posthog==5.4.0 +propcache==0.4.1 +protobuf==6.33.0 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pybase64==1.4.2 +pycparser==2.23 +pydantic==2.11.10 +pydantic-settings==2.11.0 +pydantic_core==2.33.2 +pydub==0.25.1 +Pygments==2.19.2 +pypdf==6.1.1 +PyPDF2==3.0.1 +PyPika==0.48.9 +pyproject_hooks==1.2.0 +pyreadline3==3.5.4 +python-dateutil==2.9.0.post0 +python-docx==1.1.2 +python-dotenv==1.1.1 +python-multipart==0.0.20 +pytz==2025.2 +PyYAML==6.0.3 +referencing==0.37.0 +regex==2025.9.18 +requests==2.32.5 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rich==14.2.0 +rpds-py==0.27.1 +rsa==4.9.1 +ruff==0.14.0 +safehttpx==0.1.6 +safetensors==0.6.2 +scikit-learn==1.7.2 +scipy==1.15.3 +semantic-version==2.10.0 +sentence-transformers==5.1.1 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +soundfile==0.13.1 +soxr==1.0.0 +SQLAlchemy==2.0.44 +starlette==0.48.0 +sympy==1.14.0 +tenacity==9.1.2 +threadpoolctl==3.6.0 +tiktoken==0.12.0 +to-requirements.txt==2.0.14 +tokenizers==0.22.1 +tomli==2.3.0 +tomlkit==0.13.3 +torch==2.9.0 +tqdm==4.67.1 +transformers==4.57.1 +typer==0.19.2 +typing-inspect==0.9.0 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +tzdata==2025.2 +urllib3==2.3.0 +uvicorn==0.37.0 +watchfiles==1.1.1 +websocket-client==1.9.0 +websockets==15.0.1 +yarl==1.22.0 +zipp==3.23.0 +zstandard==0.25.0 diff --git a/run.sh b/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..34d23f7628877fec4f4c2cbd9f79da200557b209 --- /dev/null +++ b/run.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -e + +# Try to activate venv if it exists +if [ -f "venv/bin/activate" ]; then + # shellcheck source=/dev/null + source venv/bin/activate +fi + +# Warn if API key not set +if [ -z "$OPENAI_API_KEY" ]; then + echo "Warning: OPENAI_API_KEY is not set. Export it before running for production." + echo "Example: export OPENAI_API_KEY=\"sk-...\"" +fi + +# Default entrypoint: app.py (required) +if [ -f "app.py" ]; then + gradio app.py +else + echo "Error: app.py not found. The default entrypoint is app.py. Create app.py or run manually." + exit 1 +fi diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f477667be68c539b9965c2ee9c4861160a0a0f91 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,313 @@ +# Scripts Documentation 🚀 + +Automated scripts for HeoCare Chatbot setup and maintenance. + +## 📋 Quick Start + +### One-Command Setup (Recommended) + +```bash +# Run everything in one command +bash scripts/setup_rag.sh +``` + +**What it does:** +1. ✅ Check Python & dependencies +2. ✅ Install required packages +3. ✅ Download 6 medical datasets from HuggingFace +4. ✅ Build ChromaDB vector stores (~160 MB) +5. ✅ Generate training data (200 conversations) +6. ✅ Optional: Fine-tune agents + +**Time:** ~15-20 minutes (depends on internet speed) + +--- + +## 📜 Available Scripts + +### 1. `setup_rag.sh` ⭐ Main Setup + +```bash +bash scripts/setup_rag.sh +``` + +**Features:** +- Downloads 6 datasets from HuggingFace: + - ViMedical (603 diseases) + - MentalChat16K (16K conversations) + - Nutrition recommendations + - Vietnamese food nutrition + - Fitness exercises (1.66K) + - Medical Q&A (9.3K pairs) +- Builds ChromaDB vector stores +- Generates training data +- Optional fine-tuning + +**Skip existing databases automatically!** + +--- + +### 2. `generate_training_data.py` - Training Data + +```bash +python scripts/generate_training_data.py +``` + +**What it does:** +- Generates 200 synthetic conversations +- 50 scenarios per agent (nutrition, symptom, exercise, mental_health) +- Uses GPT-4o-mini +- Output: `fine_tuning/training_data/*.jsonl` + +**Cost:** ~$0.50 (OpenAI API) + +--- + +### 3. `auto_finetune.py` - Batch Fine-tuning + +```bash +python scripts/auto_finetune.py +``` + +**What it does:** +- Fine-tunes all 4 agents automatically +- Uploads training files +- Creates fine-tuning jobs +- Tracks progress +- Updates model config + +**Requirements:** OpenAI official API (custom APIs not supported) + +--- + +### 4. `fine_tune_agent.py` - Single Agent Fine-tuning + +```bash +python scripts/fine_tune_agent.py nutrition_agent +``` + +**What it does:** +- Fine-tune one specific agent +- Manual control over the process +- Alternative to auto_finetune.py + +**Agents:** `nutrition_agent`, `symptom_agent`, `exercise_agent`, `mental_health_agent` + +--- + +### 5. `check_rag_status.py` - Diagnostic Tool + +```bash +python scripts/check_rag_status.py +``` + +**What it checks:** +- ✅ ChromaDB folders exist +- 📊 Database sizes +- 📚 Document counts +- 🧪 Test queries + +**Note:** May need updates for new vector store paths + +--- + +## 📁 Directory Structure + +``` +scripts/ +├── setup_rag.sh # ⭐ Main setup script +├── generate_training_data.py # Generate synthetic data +├── auto_finetune.py # Batch fine-tuning +├── fine_tune_agent.py # Single agent fine-tuning +├── check_rag_status.py # Diagnostic tool +└── README.md # This file + +data_mining/ # Dataset downloaders +├── mining_vimedical.py # ViMedical diseases +├── mining_mentalchat.py # Mental health conversations +├── mining_nutrition.py # Nutrition recommendations +├── mining_vietnamese_food.py # Vietnamese food data +├── mining_fitness.py # Fitness exercises +└── mining_medical_qa.py # Medical Q&A pairs + +rag/vector_store/ # ChromaDB (NOT committed) +├── medical_diseases/ # ViMedical (603 diseases) +├── mental_health/ # MentalChat (16K conversations) +├── nutrition/ # Nutrition plans +├── vietnamese_nutrition/ # Vietnamese foods (73) +├── fitness/ # Exercises (1.66K) +├── symptom_qa/ # Medical Q&A +└── general_health_qa/ # General health Q&A + +fine_tuning/training_data/ # Generated data (NOT committed) +├── nutrition_training.jsonl +├── symptom_training.jsonl +├── exercise_training.jsonl +└── mental_health_training.jsonl +``` + +--- + +## 🔄 Team Workflow + +### First Time Setup (New Team Member) + +```bash +# 1. Clone repo +git clone +cd heocare-chatbot + +# 2. Create .env file +cp .env.example .env +# Add your OPENAI_API_KEY + +# 3. Setup everything (one command) +bash scripts/setup_rag.sh + +# 4. Run app +python app.py +``` + +**Time:** ~15-20 minutes + +--- + +### Daily Development + +```bash +# Pull latest code +git pull + +# If setup_rag.sh was updated, run it again +# (It will skip existing databases automatically) +bash scripts/setup_rag.sh + +# Run app +python app.py +``` + +--- + +### Regenerate Training Data + +```bash +# If you updated agent prompts or scenarios +python scripts/generate_training_data.py + +# Optional: Fine-tune with new data +python scripts/auto_finetune.py +``` + +--- + +### Reset Everything + +```bash +# Delete all generated data +rm -rf rag/vector_store/* +rm -rf fine_tuning/training_data/* +rm -rf data_mining/datasets/* +rm -rf data_mining/output/* + +# Setup from scratch +bash scripts/setup_rag.sh +``` + +--- + +## 🐛 Troubleshooting + +### Setup Failed + +```bash +# Check Python version (need 3.8+) +python --version + +# Check dependencies +pip install -r requirements.txt + +# Check API key +echo $OPENAI_API_KEY +``` + +--- + +### Dataset Download Failed + +```bash +# Check internet connection +ping huggingface.co + +# Try manual download for specific dataset +python data_mining/mining_vimedical.py +python data_mining/mining_mentalchat.py +``` + +--- + +### ChromaDB Issues + +```bash +# Check status +python scripts/check_rag_status.py + +# Delete and rebuild specific database +rm -rf rag/vector_store/medical_diseases +python data_mining/mining_vimedical.py + +# Move to correct location +mkdir -p rag/vector_store +mv data_mining/output/medical_chroma rag/vector_store/medical_diseases +``` + +--- + +### Fine-tuning 404 Error + +``` +Error: 404 - {'detail': 'Not Found'} +``` + +**Cause:** Custom API endpoint doesn't support fine-tuning + +**Solution:** +1. Use OpenAI official API for fine-tuning +2. Or skip fine-tuning (app works fine with base model + RAG) + +```bash +# Option 1: Update .env to use official API +OPENAI_BASE_URL=https://api.openai.com/v1 +OPENAI_API_KEY=sk-proj-your-official-key + +# Option 2: Skip fine-tuning +# Just run the app without fine-tuning +python app.py +``` + +--- + +## 📊 Performance + +| Task | Time | Size | +|------|------|------| +| Download datasets | ~5-8 min | ~50 MB | +| Build ChromaDB | ~5-7 min | ~160 MB | +| Generate training data | ~2-3 min | ~500 KB | +| Fine-tuning (optional) | ~30-60 min | - | +| **Total Setup** | **~15-20 min** | **~160 MB** | + +--- + +## 🆘 Support + +If you encounter issues: + +1. Run `python scripts/check_rag_status.py` for diagnostics +2. Check console logs for errors +3. Verify `.gitignore` is correct +4. Try deleting and rebuilding specific databases +5. Check that `.env` has valid API key + +--- + +**Happy Coding! 🚀** diff --git a/scripts/auto_finetune.py b/scripts/auto_finetune.py new file mode 100644 index 0000000000000000000000000000000000000000..1e1000fc6c8985b6a4e48c3e07181a96ca1cc71c --- /dev/null +++ b/scripts/auto_finetune.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +""" +Automated fine-tuning for all agents +Uploads training data and creates fine-tuning jobs +""" + +import os +import json +import time +from pathlib import Path +from openai import OpenAI + +# Initialize OpenAI client +client = OpenAI( + api_key=os.getenv('OPENAI_API_KEY', 'sk--PC8FIAvV01G7aUyZsJD7Q'), + base_url=os.getenv('OPENAI_BASE_URL', 'https://aiportalapi.stu-platform.live/jpe') +) + +AGENTS = ['nutrition', 'symptom', 'exercise', 'mental_health'] + + +def upload_training_file(file_path): + """Upload training file to OpenAI""" + print(f"📤 Uploading {file_path.name}...") + + try: + with open(file_path, 'rb') as f: + response = client.files.create( + file=f, + purpose='fine-tune' + ) + + file_id = response.id + print(f"✅ Uploaded: {file_id}") + return file_id + except Exception as e: + print(f"❌ Upload failed: {e}") + return None + + +def create_fine_tuning_job(file_id, agent_name): + """Create fine-tuning job""" + print(f"🚀 Creating fine-tuning job for {agent_name}...") + + try: + response = client.fine_tuning.jobs.create( + training_file=file_id, + model='gpt-4o-mini-2024-07-18', + suffix=f'{agent_name}-v1' + ) + + job_id = response.id + print(f"✅ Job created: {job_id}") + return job_id + except Exception as e: + print(f"❌ Job creation failed: {e}") + return None + + +def wait_for_job(job_id, agent_name): + """Wait for fine-tuning job to complete""" + print(f"⏳ Waiting for {agent_name} fine-tuning to complete...") + print(f" This may take 10-30 minutes...") + + try: + while True: + response = client.fine_tuning.jobs.retrieve(job_id) + status = response.status + + if status == 'succeeded': + model_id = response.fine_tuned_model + print(f"✅ Fine-tuning completed!") + print(f" Model: {model_id}") + return model_id + elif status in ['failed', 'cancelled']: + print(f"❌ Fine-tuning {status}") + return None + else: + print(f" Status: {status}...", end='\r') + time.sleep(30) # Check every 30 seconds + except Exception as e: + print(f"❌ Error checking status: {e}") + return None + + +def save_model_config(agent_models): + """Save fine-tuned model IDs to config""" + config_file = Path("fine_tuning/fine_tuned_models.json") + config_file.parent.mkdir(parents=True, exist_ok=True) + + with open(config_file, 'w') as f: + json.dumps(agent_models, f, indent=2) + + print(f"\n✅ Model config saved to: {config_file}") + + +def update_agent_configs(agent_models): + """Update agent files to use fine-tuned models""" + print("\n📝 Updating agent configurations...") + + # Create a config file that agents can read + config_content = f"""# Fine-tuned Models Configuration +# Generated automatically by auto_finetune.py + +FINE_TUNED_MODELS = {{ +""" + + for agent, model_id in agent_models.items(): + if model_id: + config_content += f" '{agent}': '{model_id}',\n" + + config_content += "}\n" + + # Save to config file + config_file = Path("config/fine_tuned_models.py") + with open(config_file, 'w') as f: + f.write(config_content) + + print(f"✅ Configuration saved to: {config_file}") + print("\n📌 To use fine-tuned models, update config/settings.py:") + print(" from config.fine_tuned_models import FINE_TUNED_MODELS") + print(" MODEL = FINE_TUNED_MODELS.get('nutrition', 'gpt-4o-mini')") + + +def fine_tune_all_agents(): + """Fine-tune all agents""" + print("🎯 Starting automated fine-tuning for all agents...") + print() + + training_dir = Path("fine_tuning/training_data") + if not training_dir.exists(): + print("❌ Training data not found!") + print(" Run: python scripts/generate_training_data.py") + return + + agent_models = {} + + for agent in AGENTS: + print(f"\n{'='*60}") + print(f"🤖 Processing {agent}_agent") + print(f"{'='*60}\n") + + # Find training file + training_file = training_dir / f"{agent}_training.jsonl" + if not training_file.exists(): + print(f"⚠️ Training file not found: {training_file}") + continue + + # Check file size + file_size = training_file.stat().st_size + print(f"📊 Training file size: {file_size:,} bytes") + + # Count conversations + with open(training_file, 'r') as f: + conv_count = sum(1 for _ in f) + print(f"📊 Conversations: {conv_count}") + + if conv_count < 10: + print(f"⚠️ Too few conversations ({conv_count}), skipping...") + continue + + # Upload training file + file_id = upload_training_file(training_file) + if not file_id: + continue + + # Create fine-tuning job + job_id = create_fine_tuning_job(file_id, agent) + if not job_id: + continue + + # Wait for completion + model_id = wait_for_job(job_id, agent) + if model_id: + agent_models[agent] = model_id + + print() + + # Save results + if agent_models: + print(f"\n{'='*60}") + print("🎉 Fine-tuning Complete!") + print(f"{'='*60}\n") + + print("📊 Fine-tuned models:") + for agent, model_id in agent_models.items(): + print(f" {agent}: {model_id}") + + # Update configurations + update_agent_configs(agent_models) + + print("\n✅ All done! Your agents are now fine-tuned!") + print("\n📌 Next steps:") + print(" 1. Review fine_tuned_models.py") + print(" 2. Update your agent code to use fine-tuned models") + print(" 3. Test the improved agents!") + else: + print("\n⚠️ No models were fine-tuned") + print(" Check the errors above and try again") + + +if __name__ == "__main__": + fine_tune_all_agents() diff --git a/scripts/check_rag_status.py b/scripts/check_rag_status.py new file mode 100644 index 0000000000000000000000000000000000000000..72250ea05638a3b711cb96c98e716b0dcb4ce973 --- /dev/null +++ b/scripts/check_rag_status.py @@ -0,0 +1,215 @@ +""" +Check RAG System Status - Verify all vector stores +Checks all 6 specialized ChromaDB databases +""" + +from pathlib import Path +import sys + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Vector store definitions +VECTOR_STORES = { + 'medical_diseases': { + 'name': 'ViMedical Diseases', + 'path': 'rag/vector_store/medical_diseases', + 'expected_size': 50, # MB + 'test_query': 'đau đầu triệu chứng' + }, + 'mental_health': { + 'name': 'Mental Health', + 'path': 'rag/vector_store/mental_health', + 'expected_size': 80, + 'test_query': 'stress anxiety depression' + }, + 'nutrition': { + 'name': 'Nutrition Plans', + 'path': 'rag/vector_store/nutrition', + 'expected_size': 20, + 'test_query': 'diet meal plan calories' + }, + 'vietnamese_nutrition': { + 'name': 'Vietnamese Food', + 'path': 'rag/vector_store/vietnamese_nutrition', + 'expected_size': 5, + 'test_query': 'phở cơm nutrition' + }, + 'fitness': { + 'name': 'Fitness Exercises', + 'path': 'rag/vector_store/fitness', + 'expected_size': 10, + 'test_query': 'gym workout exercise' + }, + 'symptom_qa': { + 'name': 'Medical Q&A', + 'path': 'rag/vector_store/symptom_qa', + 'expected_size': 8, + 'test_query': 'triệu chứng bệnh' + }, + 'general_health_qa': { + 'name': 'General Health Q&A', + 'path': 'rag/vector_store/general_health_qa', + 'expected_size': 7, + 'test_query': 'sức khỏe tổng quát' + } +} + +def check_vector_store(store_info): + """Check individual vector store""" + + print(f"\n📦 {store_info['name']}") + print("-" * 50) + + store_path = Path(store_info['path']) + + # Check existence + if not store_path.exists(): + print(f"❌ Not found: {store_info['path']}") + print(f" Reason: Directory does not exist") + return {'status': False, 'reason': 'Directory not found'} + + print(f"✅ Exists: {store_info['path']}") + + # Check size + total_size = sum(f.stat().st_size for f in store_path.rglob('*') if f.is_file()) + size_mb = total_size / (1024 * 1024) + expected = store_info['expected_size'] + + print(f"📊 Size: {size_mb:.1f} MB (expected ~{expected} MB)") + + if size_mb < 0.1: + print("⚠️ Database seems empty") + print(" Reason: Database size < 0.1 MB (likely not built)") + return {'status': False, 'reason': 'Database empty or not built'} + + # Try to load and query + try: + import chromadb + + client = chromadb.PersistentClient(path=str(store_path)) + collections = client.list_collections() + + if not collections: + print("⚠️ No collections found") + print(" Reason: ChromaDB has no collections") + return {'status': False, 'reason': 'No collections in database'} + + collection = collections[0] + count = collection.count() + print(f"📚 Documents: {count:,} chunks") + + if count == 0: + print("⚠️ Collection is empty") + print(" Reason: Collection exists but has 0 documents") + return {'status': False, 'reason': 'Collection is empty (0 documents)'} + + # Test query + try: + results = collection.query( + query_texts=[store_info['test_query']], + n_results=1 + ) + if results and results['documents'] and results['documents'][0]: + print("✅ Query test passed") + return {'status': True, 'reason': None} + else: + print("⚠️ Query returned no results") + print(" Reason: Query executed but found no matching documents") + return {'status': False, 'reason': 'Query returned no results'} + except Exception as e: + print(f"⚠️ Query test failed: {e}") + print(f" Reason: {str(e)}") + return {'status': False, 'reason': f'Query failed: {str(e)}'} + + except ImportError: + print("⚠️ ChromaDB not installed") + print(" Reason: pip install chromadb") + return {'status': False, 'reason': 'ChromaDB package not installed'} + except Exception as e: + print(f"⚠️ Error: {e}") + print(f" Reason: {str(e)}") + return {'status': False, 'reason': f'Error loading database: {str(e)}'} + +def check_rag_status(): + """Check all RAG vector stores""" + + print("="*60) + print("🔍 RAG System Status Check") + print("="*60) + + # Check base directory + base_path = Path('rag/vector_store') + if not base_path.exists(): + print("\n❌ Vector store directory not found!") + print(f" Expected: {base_path}") + print("\n💡 Solution:") + print(" bash scripts/setup_rag.sh") + return False + + print(f"\n✅ Base directory exists: {base_path}") + + # Check each vector store + results = {} + for store_id, store_info in VECTOR_STORES.items(): + results[store_id] = check_vector_store(store_info) + + # Summary + print("\n" + "="*60) + print("📊 Summary") + print("="*60) + + total = len(results) + passed = sum(1 for v in results.values() if v['status']) + + for store_id, result in results.items(): + status = "✅" if result['status'] else "❌" + name = VECTOR_STORES[store_id]['name'] + print(f"{status} {name}") + if not result['status'] and result['reason']: + print(f" └─ {result['reason']}") + + print("\n" + "="*60) + print(f"Result: {passed}/{total} databases OK") + + if passed == total: + print("\n🎉 All vector stores are ready!") + print("\nNext steps:") + print(" python app.py") + print(" Open http://localhost:7860") + print("="*60) + return True + else: + print("\n⚠️ Some databases are missing or have issues") + print("\n💡 Solutions:") + print("\n1️⃣ Quick fix (rebuild all):") + print(" bash scripts/setup_rag.sh") + + print("\n2️⃣ Rebuild specific databases:") + + # Map store_id to script + script_map = { + 'medical_diseases': 'python data_mining/mining_vimedical.py', + 'mental_health': 'python data_mining/mining_mentalchat.py', + 'nutrition': 'python data_mining/mining_nutrition.py', + 'vietnamese_nutrition': 'python data_mining/mining_vietnamese_food.py', + 'fitness': 'python data_mining/mining_fitness.py', + 'symptom_qa': 'python data_mining/mining_medical_qa.py', + 'general_health_qa': 'python data_mining/mining_medical_qa.py' + } + + for store_id, result in results.items(): + if not result['status']: + name = VECTOR_STORES[store_id]['name'] + script = script_map.get(store_id, 'Unknown') + print(f"\n ❌ {name}:") + print(f" Reason: {result['reason']}") + print(f" Fix: {script}") + + print("\n" + "="*60) + return False + + +if __name__ == '__main__': + success = check_rag_status() + exit(0 if success else 1) diff --git a/scripts/fine_tune_agent.py b/scripts/fine_tune_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..ef3188c66500bd8711808448939b3f606541854d --- /dev/null +++ b/scripts/fine_tune_agent.py @@ -0,0 +1,68 @@ +""" +Fine-tune Agent Script +Usage: python scripts/fine_tune_agent.py --agent nutrition --min-rating 4.0 +""" + +import argparse +from fine_tuning import get_data_collector, fine_tune_agent + + +def main(): + parser = argparse.ArgumentParser(description='Fine-tune a healthcare agent') + parser.add_argument('--agent', required=True, + choices=['nutrition', 'exercise', 'symptom', 'mental_health', 'general_health'], + help='Agent to fine-tune') + parser.add_argument('--min-rating', type=float, default=None, + help='Minimum quality rating (1-5) to include conversations') + parser.add_argument('--model', default='gpt-4o-mini-2024-07-18', + help='Base model to fine-tune') + parser.add_argument('--suffix', default=None, + help='Suffix for fine-tuned model name') + parser.add_argument('--no-wait', action='store_true', + help='Don\'t wait for fine-tuning to complete') + + args = parser.parse_args() + + # Get data collector + collector = get_data_collector() + + # Check conversation count + counts = collector.get_conversation_count(f"{args.agent}_agent") + agent_key = args.agent + + if agent_key not in counts or counts[agent_key] == 0: + print(f"❌ No conversations found for {args.agent} agent") + print(f" Start using the chatbot to collect training data") + return + + print(f"📊 Found {counts[agent_key]} conversations for {args.agent} agent") + + # Export training data + print(f"\n📤 Exporting training data...") + training_file = collector.export_for_openai_finetuning( + agent_name=f"{args.agent}_agent", + min_quality_rating=args.min_rating + ) + + # Start fine-tuning + print(f"\n🚀 Starting fine-tuning job...") + result = fine_tune_agent( + agent_name=args.agent, + training_file=training_file, + model=args.model, + suffix=args.suffix, + wait_for_completion=not args.no_wait + ) + + if args.no_wait: + print(f"\n✅ Fine-tuning job started: {result}") + print(f" Check status with: python scripts/check_finetuning_status.py --job-id {result}") + else: + print(f"\n✅ Fine-tuning completed!") + print(f" Model ID: {result}") + print(f"\n💡 To use this model, update your agent configuration:") + print(f" MODEL = '{result}'") + + +if __name__ == '__main__': + main() diff --git a/scripts/generate_training_data.py b/scripts/generate_training_data.py new file mode 100644 index 0000000000000000000000000000000000000000..78c484ee54d8c2a2aaeef686bd9a962a9c21115c --- /dev/null +++ b/scripts/generate_training_data.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +""" +Generate synthetic training data for fine-tuning +Uses GPT-4o-mini to create high-quality conversations +""" + +import json +import os +from pathlib import Path +from openai import OpenAI + +# Initialize OpenAI client +client = OpenAI( + api_key=os.getenv('OPENAI_API_KEY', 'sk--PC8FIAvV01G7aUyZsJD7Q'), + base_url=os.getenv('OPENAI_BASE_URL', 'https://aiportalapi.stu-platform.live/jpe') +) + +# System prompts for each agent +SYSTEM_PROMPTS = { + 'nutrition': """Bạn là chuyên gia dinh dưỡng chuyên nghiệp với 10 năm kinh nghiệm. + +NHIỆM VỤ: Tư vấn dinh dưỡng, lập kế hoạch ăn uống, tính toán calo và macro. + +PHONG CÁCH: +- Chuyên nghiệp nhưng thân thiện +- Đưa ra con số cụ thể (calo, protein, carb, fat) +- Thực tế, dễ áp dụng +- Cá nhân hóa theo thông tin user + +KHÔNG: +- Kê đơn thuốc +- Chẩn đoán bệnh +- Tạo lịch tập luyện (đó là việc của exercise_agent)""", + + 'symptom': """Bạn là bác sĩ tư vấn chuyên nghiệp. + +NHIỆM VỤ: Thu thập thông tin triệu chứng theo phương pháp OPQRST, đánh giá mức độ nghiêm trọng. + +PHONG CÁCH: +- Hỏi từng câu một, tự nhiên +- KHÔNG hỏi mãi theo template +- Tối đa 3-4 câu hỏi +- Đưa khuyến nghị sau khi có đủ thông tin + +KHÔNG: +- Chẩn đoán bệnh chính xác +- Kê đơn thuốc +- Tạo lịch tập luyện""", + + 'exercise': """Bạn là huấn luyện viên thể hình (Personal Trainer) chuyên nghiệp. + +NHIỆM VỤ: Tạo lịch tập, hướng dẫn kỹ thuật, tư vấn tập luyện. + +PHONG CÁCH: +- Nhiệt huyết, động viên +- Thực tế, dễ hiểu, dễ làm theo +- Hài hước nhẹ nhàng +- TỰ NHIÊN, MẠCH LẠC + +KHÔNG: +- Kê đơn thuốc +- Tư vấn dinh dưỡng chi tiết (đó là việc của nutrition_agent) +- Chẩn đoán chấn thương""", + + 'mental_health': """Bạn là chuyên gia tâm lý với chuyên môn về CBT và mindfulness. + +NHIỆM VỤ: Hỗ trợ stress, lo âu, trầm cảm, cải thiện giấc ngủ, quản lý cảm xúc. + +PHONG CÁCH: +- Ấm áp, đồng cảm +- Validate cảm xúc +- Không phán xét +- Khuyến khích tìm kiếm sự hỗ trợ chuyên môn khi cần + +CRISIS DETECTION: +- Ý định tự tử → Hotline khẩn cấp +- Tự gây thương tích → Cần hỗ trợ ngay + +KHÔNG: +- Chẩn đoán rối loạn tâm thần +- Kê đơn thuốc +- Thay thế liệu pháp chuyên môn""" +} + +# Scenarios for each agent +SCENARIOS = { + 'nutrition': [ + "Tôi muốn giảm cân nhưng không biết bắt đầu từ đâu", + "Làm sao để tăng cân lành mạnh?", + "Chế độ ăn cho người tập gym là gì?", + "Thực đơn cho người tiểu đường", + "Ăn gì để tăng cơ giảm mỡ?", + "Protein là gì? Tôi cần bao nhiêu protein mỗi ngày?", + "TDEE là gì và cách tính như thế nào?", + "Carb có làm béo không?", + "Chế độ ăn keto có tốt không?", + "Intermittent fasting là gì?", + "Ăn chay có đủ dinh dưỡng không?", + "Tôi đã giảm cân nhưng bị plateau, phải làm sao?", + "Ăn bao nhiêu bữa một ngày là tốt nhất?", + "Thực phẩm nào giúp giảm mỡ bụng?", + "Uống whey protein có tốt không?", + "Chế độ ăn cho người muốn có bầu", + "Ăn gì để tăng chiều cao?", + "Thực đơn cho người tập gym buổi sáng", + "Cách tính macro cho mục tiêu giảm cân", + "Ăn trước hay sau khi tập?", + "Thực phẩm nào giàu protein?", + "Chế độ ăn low carb là gì?", + "Ăn nhiều trứng có tốt không?", + "Thực đơn cho người muốn tăng cơ", + "Cách ăn để có múi bụng", + "Thực phẩm nào nên tránh khi giảm cân?", + "Chế độ ăn cho người cao huyết áp", + "Ăn gì để tăng sức đề kháng?", + "Thực đơn cho người ăn chay", + "Cách tính calo trong thức ăn", + "Ăn gì sau khi tập gym?", + "Thực phẩm giúp ngủ ngon", + "Chế độ ăn cho người gầy muốn tăng cân", + "Ăn gì để giảm cholesterol?", + "Thực đơn cho người bận rộn", + "Cách meal prep cho cả tuần", + "Ăn gì để tăng năng lượng?", + "Thực phẩm giúp giảm stress", + "Chế độ ăn cho người tập cardio", + "Ăn gì để da đẹp?", + "Thực đơn cho người muốn detox", + "Cách ăn để tăng testosterone tự nhiên", + "Ăn gì để tăng trí nhớ?", + "Thực phẩm giúp giảm viêm", + "Chế độ ăn cho người tập yoga", + "Ăn gì để tăng cơ mà không béo?", + "Thực đơn cho người muốn giảm mỡ bụng", + "Cách ăn để tăng vòng 3", + "Ăn gì để tăng sức bền?", + "Thực phẩm giúp phục hồi sau tập" + ], + + 'symptom': [ + "Tôi bị đau đầu từ 3 ngày nay", + "Đau bụng và buồn nôn, có sao không?", + "Ho khan kéo dài 2 tuần", + "Đau ngực khi thở sâu", + "Chóng mặt khi đứng dậy", + "Mệt mỏi cả ngày dù ngủ đủ", + "Đau lưng dưới kéo dài", + "Sốt cao 39 độ từ hôm qua", + "Đau họng và khó nuốt", + "Nổi mẩn đỏ trên da", + "Tiêu chảy kéo dài 3 ngày", + "Đau khớp gối khi đi lên xuống cầu thang", + "Khó thở khi nằm", + "Đau bụng kinh dữ dội", + "Chảy máu cam thường xuyên", + "Đau răng nhức nhối", + "Mắt đỏ và ngứa", + "Tai ù và giảm thính lực", + "Đau vai gáy kéo dài", + "Buồn nôn khi đói", + "Đau bụng trên rốn sau khi ăn", + "Ho có đờm vàng", + "Đau đầu một bên", + "Tê tay chân khi ngủ dậy", + "Đau ngực trái lan ra tay", + "Khó tiêu và đầy hơi", + "Đau lưng sau khi tập gym", + "Sưng phù chân vào buổi tối", + "Đau bụng dưới bên phải", + "Ho ra máu", + "Đau đầu kèm buồn nôn", + "Khó ngủ và hay tỉnh giấc", + "Đau cổ khi quay đầu", + "Nôn mửa sau khi ăn", + "Đau bụng kinh không đều", + "Chảy nước mũi và hắt hơi", + "Đau ngực khi gắng sức", + "Mệt mỏi và chán ăn", + "Đau đầu gối khi chạy bộ", + "Khó thở khi gắng sức", + "Đau lưng lan xuống chân", + "Sốt nhẹ kéo dài", + "Đau bụng và táo bón", + "Chóng mặt và buồn nôn", + "Đau ngực và hồi hộp", + "Ho khan về đêm", + "Đau bụng dưới khi đi tiểu", + "Mệt mỏi và đau cơ", + "Đau đầu sau khi thức khuya", + "Khó thở và đau ngực" + ], + + 'exercise': [ + "Tôi mới bắt đầu tập gym, nên tập gì?", + "Tập bao nhiêu ngày một tuần là đủ?", + "Cardio hay tạ tốt hơn để giảm cân?", + "Tôi muốn có bụng 6 múi", + "Làm sao để tăng vòng 3?", + "Bài tập cho người đau lưng", + "Cách squat đúng kỹ thuật", + "Tôi tập deadlift bị đau lưng", + "Push-up chuẩn như thế nào?", + "Lịch tập cho người mới bắt đầu", + "Tập tạ có làm lùn không?", + "Bài tập giảm mỡ bụng hiệu quả", + "Cách tập để tăng cơ nhanh", + "Tập gym bao lâu thì thấy kết quả?", + "Bài tập cho người gầy muốn tăng cân", + "Cách tập ngực to", + "Lịch tập 3 ngày một tuần", + "Tập cardio bao lâu để giảm cân?", + "Bài tập cho người béo phì", + "Cách tập vai to", + "Lịch tập full body", + "Tập tạ có giúp giảm cân không?", + "Bài tập cho người cao tuổi", + "Cách tập tay to", + "Lịch tập upper/lower", + "Tập yoga có giảm cân không?", + "Bài tập cho người ngồi nhiều", + "Cách tập chân to", + "Lịch tập push/pull/legs", + "Tập plank bao lâu là đủ?", + "Bài tập cho người muốn săn chắc", + "Cách tập lưng rộng", + "Lịch tập cho người bận rộn", + "Tập HIIT có tốt không?", + "Bài tập cho người muốn tăng sức bền", + "Cách tập bụng múi", + "Lịch tập 5 ngày một tuần", + "Tập thể dục buổi sáng hay tối?", + "Bài tập cho người muốn giảm mỡ", + "Cách tập để có body đẹp", + "Lịch tập cho nữ", + "Tập gym có ảnh hưởng chiều cao không?", + "Bài tập cho người muốn tăng cơ giảm mỡ", + "Cách tập để có vòng 3 đẹp", + "Lịch tập cho người trung niên", + "Tập bao lâu thì nên nghỉ?", + "Bài tập cho người muốn săn chắc vòng 3", + "Cách tập để tăng testosterone", + "Lịch tập cho người muốn giảm cân nhanh", + "Tập gym có cần uống whey không?" + ], + + 'mental_health': [ + "Tôi hay lo âu về mọi thứ", + "Stress công việc quá nhiều", + "Cách giảm căng thẳng hiệu quả", + "Tôi bị mất ngủ kéo dài", + "Ngủ không sâu giấc, hay tỉnh giấc", + "Làm sao để ngủ ngon hơn?", + "Tôi cảm thấy buồn chán cả ngày", + "Không có động lực làm gì", + "Hay khóc không lý do", + "Cách vượt qua stress", + "Tôi hay suy nghĩ tiêu cực", + "Làm sao để tự tin hơn?", + "Cách quản lý cảm xúc", + "Tôi hay lo lắng về tương lai", + "Làm sao để bình tĩnh hơn?", + "Cách giảm lo âu", + "Tôi cảm thấy cô đơn", + "Làm sao để vui vẻ hơn?", + "Cách đối phó với áp lực", + "Tôi hay cáu gắt", + "Làm sao để kiểm soát tức giận?", + "Cách thư giãn sau giờ làm", + "Tôi hay nghĩ quá nhiều", + "Làm sao để tập trung hơn?", + "Cách cải thiện tâm trạng", + "Tôi cảm thấy mệt mỏi tinh thần", + "Làm sao để có năng lượng tích cực?", + "Cách vượt qua nỗi buồn", + "Tôi hay lo lắng về sức khỏe", + "Làm sao để ngừng lo lắng?", + "Cách thiền để giảm stress", + "Tôi cảm thấy áp lực từ gia đình", + "Làm sao để đối phó với áp lực xã hội?", + "Cách cải thiện giấc ngủ", + "Tôi hay mơ ác mộng", + "Làm sao để ngủ sâu hơn?", + "Cách xây dựng thói quen tích cực", + "Tôi cảm thấy không được trân trọng", + "Làm sao để yêu bản thân?", + "Cách vượt qua thất bại", + "Tôi hay so sánh mình với người khác", + "Làm sao để chấp nhận bản thân?", + "Cách đối phó với chỉ trích", + "Tôi cảm thấy quá tải", + "Làm sao để cân bằng cuộc sống?", + "Cách xây dựng sự tự tin", + "Tôi hay trì hoãn công việc", + "Làm sao để có động lực?", + "Cách vượt qua nỗi sợ hãi", + "Tôi cảm thấy bất an" + ] +} + + +def generate_conversation(agent_name, scenario, system_prompt): + """Generate a conversation using GPT-4o-mini""" + try: + response = client.chat.completions.create( + model='gpt-4o-mini', + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": scenario} + ], + temperature=0.7, + max_tokens=800 + ) + + return { + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": scenario}, + {"role": "assistant", "content": response.choices[0].message.content} + ] + } + except Exception as e: + print(f" ❌ Error generating conversation: {e}") + return None + + +def generate_training_data(): + """Generate training data for all agents""" + + print("🤖 Generating synthetic training data...") + print() + + # Create output directory + output_dir = Path("fine_tuning/training_data") + output_dir.mkdir(parents=True, exist_ok=True) + + total_generated = 0 + + for agent_name, scenarios in SCENARIOS.items(): + print(f"📝 Generating data for {agent_name}_agent...") + print(f" Scenarios: {len(scenarios)}") + + conversations = [] + system_prompt = SYSTEM_PROMPTS[agent_name] + + for i, scenario in enumerate(scenarios, 1): + conv = generate_conversation(agent_name, scenario, system_prompt) + if conv: + conversations.append(conv) + print(f" ✅ {i}/{len(scenarios)}", end='\r') + + print() # New line after progress + + # Save to JSONL + output_file = output_dir / f"{agent_name}_training.jsonl" + with open(output_file, 'w', encoding='utf-8') as f: + for conv in conversations: + f.write(json.dumps(conv, ensure_ascii=False) + '\n') + + print(f"✅ Generated {len(conversations)} conversations for {agent_name}") + print(f" Saved to: {output_file}") + print() + + total_generated += len(conversations) + + print(f"🎉 Total: {total_generated} conversations generated!") + print() + + return total_generated + + +if __name__ == "__main__": + generate_training_data() diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3020583b581076edfef92684cc4a75c90f1f8a51 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,11 @@ +# Requirements for RAG setup scripts +# Install: pip install -r scripts/requirements.txt + +# Progress bars +tqdm>=4.66.0 + +# HTML parsing +beautifulsoup4>=4.12.0 + +# HTTP requests +requests>=2.31.0 diff --git a/scripts/setup_rag.sh b/scripts/setup_rag.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e929d81509d416cadd5be94d3d84331dd5791b5 --- /dev/null +++ b/scripts/setup_rag.sh @@ -0,0 +1,305 @@ +#!/bin/bash +# Setup RAG system - One command to rule them all +# Usage: bash scripts/setup_rag.sh + +set -e # Exit on error + +# Colors +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ 🏥 HeoCare RAG System Setup (HuggingFace) ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo -e "${NC}" + +# 0. Cleanup old files and databases +echo -e "${BLUE}🧹 Cleaning up old files and databases...${NC}" + +# Remove old PDF/MD files from data_mining (if any) +if find data_mining -name "*.pdf" -o -name "*.md" ! -name "README.md" 2>/dev/null | grep -q .; then + echo -e "${YELLOW} Removing old PDF/MD files...${NC}" + find data_mining -name "*.pdf" -type f -delete 2>/dev/null || true + find data_mining -name "*.md" -type f ! -name "README.md" -delete 2>/dev/null || true + echo -e "${GREEN} ✅ Old documents removed${NC}" +fi + +# Clear temporary datasets and output folders +if [ -d "data_mining/datasets" ] || [ -d "data_mining/output" ]; then + echo -e "${YELLOW} Clearing temporary folders...${NC}" + rm -rf data_mining/datasets 2>/dev/null || true + rm -rf data_mining/output 2>/dev/null || true + echo -e "${GREEN} ✅ Temporary folders cleared${NC}" +fi + +# Clear old vector stores (will be regenerated) +if [ -d "rag/vector_store" ]; then + echo -e "${YELLOW} Clearing old vector stores...${NC}" + rm -rf rag/vector_store/* 2>/dev/null || true + echo -e "${GREEN} ✅ Old vector stores cleared${NC}" +fi + +# Clear Python cache +if [ -d "__pycache__" ] || find . -type d -name "__pycache__" 2>/dev/null | grep -q .; then + echo -e "${YELLOW} Clearing Python cache...${NC}" + find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true + find . -type f -name "*.pyc" -delete 2>/dev/null || true + echo -e "${GREEN} ✅ Python cache cleared${NC}" +fi + +echo -e "${GREEN}✅ Cleanup complete!${NC}" + +# 1. Check Python +echo -e "${BLUE}🐍 Checking Python...${NC}" +if ! command -v python3 &> /dev/null; then + echo -e "${RED}❌ Python3 not found!${NC}" + echo "Please install Python 3.8 or higher" + exit 1 +fi +PYTHON_VERSION=$(python3 --version) +echo -e "${GREEN}✅ ${PYTHON_VERSION}${NC}" + +# 2. Check pip +echo -e "\n${BLUE}📦 Checking pip...${NC}" +if ! command -v pip3 &> /dev/null && ! command -v pip &> /dev/null; then + echo -e "${RED}❌ pip not found!${NC}" + exit 1 +fi +echo -e "${GREEN}✅ pip found${NC}" + +# 3. Install dependencies +echo -e "\n${BLUE}📦 Installing dependencies...${NC}" +echo -e "${YELLOW}This may take a few minutes...${NC}" + +# Check if requirements.txt exists +if [ -f "requirements.txt" ]; then + pip3 install -q -r requirements.txt || pip install -q -r requirements.txt + echo -e "${GREEN}✅ Dependencies installed from requirements.txt${NC}" +else + echo -e "${YELLOW}⚠️ requirements.txt not found, installing core packages...${NC}" + pip3 install -q langchain langchain-chroma langchain-huggingface chromadb tqdm beautifulsoup4 requests || \ + pip install -q langchain langchain-chroma langchain-huggingface chromadb tqdm beautifulsoup4 requests + echo -e "${GREEN}✅ Core dependencies installed${NC}" +fi + +# 4. Create directories +echo -e "\n${BLUE}📁 Creating directories...${NC}" +mkdir -p rag/vector_store +mkdir -p data_mining/{datasets,output} +mkdir -p chroma_db +echo -e "${GREEN}✅ Directories created${NC}" + +# 5. Setup ViMedical Vietnamese Disease Dataset +echo -e "\n${BLUE}🏥 Setting up ViMedical Vietnamese Disease Dataset...${NC}" +echo -e "${YELLOW}This will download and process 603 Vietnamese diseases...${NC}" + +# Check if already exists +if [ -d "rag/vector_store/medical_diseases" ]; then + echo -e "${YELLOW}⚠️ ViMedical database already exists, skipping...${NC}" +else + # Create temp directory + mkdir -p data_mining/datasets + mkdir -p data_mining/output + + # Run ViMedical setup + python3 data_mining/mining_vimedical.py || python data_mining/mining_vimedical.py + + if [ $? -eq 0 ]; then + # Move to RAG directory + mkdir -p rag/vector_store + mv data_mining/output/medical_chroma rag/vector_store/medical_diseases + echo -e "${GREEN}✅ ViMedical dataset ready (603 diseases)${NC}" + else + echo -e "${YELLOW}⚠️ ViMedical setup failed, continuing...${NC}" + fi + + # Cleanup + rm -rf data_mining/datasets + rm -rf data_mining/output +fi + +# 6. Setup MentalChat16K Mental Health Dataset +echo -e "\n${BLUE}🧠 Setting up MentalChat16K Mental Health Dataset...${NC}" +echo -e "${YELLOW}This will download and process 16K mental health conversations...${NC}" + +# Check if already exists +if [ -d "rag/vector_store/mental_health" ]; then + echo -e "${YELLOW}⚠️ Mental Health database already exists, skipping...${NC}" +else + # Create temp directory + mkdir -p data_mining/datasets + mkdir -p data_mining/output + + # Run MentalChat setup + python3 data_mining/mining_mentalchat.py || python data_mining/mining_mentalchat.py + + if [ $? -eq 0 ]; then + # Move to RAG directory + mkdir -p rag/vector_store + mv data_mining/output/mental_health_chroma rag/vector_store/mental_health + echo -e "${GREEN}✅ Mental Health dataset ready (16K conversations)${NC}" + else + echo -e "${YELLOW}⚠️ Mental Health setup failed, continuing...${NC}" + fi + + # Cleanup + rm -rf data_mining/datasets + rm -rf data_mining/output +fi + +# 7. Setup Nutrition Dataset (Dietary Profiles) +echo -e "\n${BLUE}🥗 Setting up Nutrition Dataset (Dietary Profiles)...${NC}" +echo -e "${YELLOW}This will download 50 dietary profiles...${NC}" + +if [ -d "rag/vector_store/nutrition" ]; then + echo -e "${YELLOW}⚠️ Nutrition database already exists, skipping...${NC}" +else + mkdir -p data_mining/datasets data_mining/output + python3 data_mining/mining_nutrition.py || python data_mining/mining_nutrition.py + if [ $? -eq 0 ]; then + mkdir -p rag/vector_store + mv data_mining/output/nutrition_chroma rag/vector_store/nutrition + echo -e "${GREEN}✅ Nutrition profiles ready (50 profiles)${NC}" + else + echo -e "${YELLOW}⚠️ Nutrition setup failed, continuing...${NC}" + fi + rm -rf data_mining/datasets data_mining/output +fi + +# 7b. Setup Vietnamese Food Nutrition Database +echo -e "\n${BLUE}🍜 Setting up Vietnamese Food Nutrition Database...${NC}" +echo -e "${YELLOW}This will create 73 Vietnamese foods with nutrition facts...${NC}" + +if [ -d "rag/vector_store/vietnamese_nutrition" ]; then + echo -e "${YELLOW}⚠️ Vietnamese nutrition database already exists, skipping...${NC}" +else + mkdir -p data_mining/datasets data_mining/output + python3 data_mining/mining_vietnamese_nutrition.py || python data_mining/mining_vietnamese_nutrition.py + if [ $? -eq 0 ]; then + mkdir -p rag/vector_store + mv data_mining/output/vietnamese_nutrition_chroma rag/vector_store/vietnamese_nutrition + echo -e "${GREEN}✅ Vietnamese food nutrition ready (73 foods)${NC}" + else + echo -e "${YELLOW}⚠️ Vietnamese nutrition setup failed, continuing...${NC}" + fi + rm -rf data_mining/datasets data_mining/output +fi + +# 8. Setup Fitness Dataset +echo -e "\n${BLUE}💪 Setting up Fitness Dataset...${NC}" +echo -e "${YELLOW}This will download and process gym exercises...${NC}" + +if [ -d "rag/vector_store/fitness" ]; then + echo -e "${YELLOW}⚠️ Fitness database already exists, skipping...${NC}" +else + mkdir -p data_mining/datasets data_mining/output + python3 data_mining/mining_fitness.py || python data_mining/mining_fitness.py + if [ $? -eq 0 ]; then + mkdir -p rag/vector_store + mv data_mining/output/fitness_chroma rag/vector_store/fitness + echo -e "${GREEN}✅ Fitness dataset ready${NC}" + else + echo -e "${YELLOW}⚠️ Fitness setup failed, continuing...${NC}" + fi + rm -rf data_mining/datasets data_mining/output +fi + +# 9. Setup COVID-19 Dataset (DEPRECATED - Skipped) +echo -e "\n${BLUE}🦠 COVID-19 Dataset...${NC}" +echo -e "${YELLOW}⏭️ Skipping (dataset deprecated, already have Medical Q&A)${NC}" + +# 10. Setup Vietnamese Medical Q&A Dataset +echo -e "\n${BLUE}💬 Setting up Vietnamese Medical Q&A Dataset...${NC}" +echo -e "${YELLOW}This will download and process 9.3K medical Q&A pairs from HuggingFace...${NC}" + +if [ -d "rag/vector_store/symptom_qa" ] && [ -d "rag/vector_store/general_health_qa" ]; then + echo -e "${YELLOW}⚠️ Medical Q&A databases already exist, skipping...${NC}" +else + mkdir -p data_mining/datasets data_mining/output + python3 data_mining/mining_medical_qa.py || python data_mining/mining_medical_qa.py + if [ $? -eq 0 ]; then + mkdir -p rag/vector_store + mv data_mining/output/symptom_qa_chroma rag/vector_store/symptom_qa + mv data_mining/output/general_health_qa_chroma rag/vector_store/general_health_qa + echo -e "${GREEN}✅ Medical Q&A datasets ready (Symptom + General Health)${NC}" + else + echo -e "${YELLOW}⚠️ Medical Q&A setup failed, continuing...${NC}" + fi + rm -rf data_mining/datasets data_mining/output +fi + +# 11. Verify RAG +echo -e "\n${BLUE}✅ Verifying RAG system...${NC}" +python3 scripts/check_rag_status.py 2>/dev/null || python scripts/check_rag_status.py 2>/dev/null || echo "⚠️ Verification skipped" + +# 12. Generate Training Data (DISABLED - Not needed without fine-tuning) +# echo -e "\n${BLUE}🤖 Generating synthetic training data...${NC}" +# echo -e "${YELLOW}This will create ~200 conversations for fine-tuning...${NC}" +# +# if [ -d "fine_tuning/training_data" ] && [ "$(ls -A fine_tuning/training_data 2>/dev/null)" ]; then +# echo -e "${YELLOW}⚠️ Training data already exists, skipping generation...${NC}" +# else +# python3 scripts/generate_training_data.py || python scripts/generate_training_data.py +# if [ $? -eq 0 ]; then +# echo -e "${GREEN}✅ Training data generated!${NC}" +# else +# echo -e "${YELLOW}⚠️ Training data generation failed, continuing...${NC}" +# fi +# fi + +# 13. Fine-tune Models (DISABLED - Custom API doesn't support fine-tuning) +# Fine-tuning requires OpenAI official API, which costs money and is not necessary +# The app works well with base model + RAG without fine-tuning +# +# echo -e "\n${BLUE}🎓 Fine-tuning agents...${NC}" +# echo -e "${YELLOW}This will fine-tune all agents with synthetic data (takes 30-60 min, costs ~\$2)${NC}" +# echo -e "${YELLOW}Do you want to fine-tune now? (y/N)${NC}" +# read -t 10 -n 1 -r FINETUNE_CHOICE || FINETUNE_CHOICE="n" +# echo +# +# if [[ $FINETUNE_CHOICE =~ ^[Yy]$ ]]; then +# echo -e "${BLUE}🚀 Starting fine-tuning...${NC}" +# python3 scripts/auto_finetune.py || python scripts/auto_finetune.py +# if [ $? -eq 0 ]; then +# echo -e "${GREEN}✅ Fine-tuning complete!${NC}" +# else +# echo -e "${YELLOW}⚠️ Fine-tuning failed, check errors above${NC}" +# fi +# else +# echo -e "${YELLOW}⏭️ Skipping fine-tuning (you can run it later with: python scripts/auto_finetune.py)${NC}" +# fi + +echo -e "\n${YELLOW}ℹ️ Training data generation and fine-tuning are disabled${NC}" +echo -e "${YELLOW} Reason: Custom API doesn't support fine-tuning (404 error)${NC}" +echo -e "${YELLOW} App works well with base model + RAG without fine-tuning${NC}" + +# Done +echo -e "\n${GREEN}" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ 🎉 Setup Complete! ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo -e "${NC}" + +echo -e "${BLUE}📊 What was set up:${NC}" +echo " ✅ RAG databases (6 specialized databases, ~160 MB)" +echo " - ViMedical Diseases (603 diseases)" +echo " - Mental Health (16K conversations)" +echo " - Nutrition Plans" +echo " - Vietnamese Food (73 items)" +echo " - Fitness Exercises (1.66K)" +echo " - Medical Q&A (9.3K pairs)" +echo "" + +echo -e "${BLUE}🚀 Next steps:${NC}" +echo " 1. python app.py" +echo " 2. Open http://localhost:7860 in your browser" +echo "" + +echo -e "${BLUE}💡 Tips:${NC}" +echo " - Check RAG status: python scripts/check_rag_status.py" +echo " - App works with base model + RAG (no fine-tuning needed)" +echo "" diff --git a/ui/__init__.py b/ui/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..674cd96cec84616e4b96f9e5123908cdd6acb678 --- /dev/null +++ b/ui/__init__.py @@ -0,0 +1,4 @@ +from .layout import build_layout + +# Alias for build_layout to maintain backward compatibility or for clarity in usage +build_ui = build_layout \ No newline at end of file diff --git a/ui/components.py b/ui/components.py new file mode 100644 index 0000000000000000000000000000000000000000..f57d414de27517381fc48b822df649c0ec31f9de --- /dev/null +++ b/ui/components.py @@ -0,0 +1,139 @@ +import gradio as gr +import logging +import modelscope_studio.components.pro as pro +from modelscope_studio.components.pro.chatbot import ( + ChatbotBotConfig, ChatbotDataMessage, + ChatbotPromptsConfig, ChatbotUserConfig, ChatbotWelcomeConfig, + ChatbotActionConfig +) +from modelscope_studio.components.pro.multimodal_input import MultimodalInputUploadConfig + +logger = logging.getLogger(__name__) + +def create_header(): + """Create the header markdown component and content""" + return gr.Markdown( + """ + # 🏥 Trợ Lý Sức Khỏe AI + ### Sức khỏe của bạn cũng là sức khỏe của chúng tôi + + ⚠️ **Lưu ý:** Đây là tư vấn tham khảo. Hãy gặp bác sĩ cho các vấn đề nghiêm trọng. + """ + ) + + +def create_chatbot(): + """Create the chatbot component with welcome config and prompts""" + return pro.Chatbot( + value=[], # Start with empty history + height=800, # Use flex=1 to fill remaining space + elem_style=dict(flex=1, border="1px solid #eee", borderRadius="8px", padding="10px"), + welcome_config=ChatbotWelcomeConfig( + variant="borderless", + icon="https://photoavatarmaker.com/wp-content/uploads/2025/04/doctor-avatar-sample.jpeg", + title="Xin chào, tôi là Trợ Lý Sức Khỏe AI", + description="Bạn có thể upload hình ảnh và nhập text để bắt đầu.", + prompts=ChatbotPromptsConfig( + title="Tôi có thể giúp bạn gì hôm nay?", + styles={ + "list": { + "width": '100%', + }, + "item": { + "flex": 1, + }, + }, + items=[{ + "label": "📅 Lập kế hoạch", + "children": [{ + "description": "Giúp tôi lập kế hoạch chế độ ăn uống" + }, { + "description": "Giúp tôi lập kế hoạch tập luyện" + }, { + "description": "Giúp tôi lập kế hoạch sức khỏe toàn diện" + }] + }, { + "label": "🏥 Tư vấn sức khỏe", + "children": [{ + "description": "Tôi bị đau đầu thường xuyên, nên làm gì?" + }, { + "description": "Làm thế nào để cải thiện giấc ngủ?" + }, { + "description": "Tôi nên ăn gì để tăng cường miễn dịch?" + }] + }])), + user_config=ChatbotUserConfig( + actions=["copy", "edit"], + avatar="https://api.dicebear.com/7.x/miniavs/svg?seed=3" + ), + bot_config=ChatbotBotConfig( + header="Trợ Lý Sức Khỏe AI", + actions=["copy", "like", "dislike", "retry", + ChatbotActionConfig( + action="delete", + popconfirm=dict( + title="Delete the message", + description="Are you sure to delete this message?", + okButtonProps=dict(danger=True)) + ) + ], + avatar="https://mdn.alipayobjects.com/huamei_iwk9zp/afts/img/A*s5sNRo5LjfQAAAAAAAAAAAAADgCCAQ/fmt.webp" + ), + ) + + +def create_input_row(): + """Create the input block with textbox and submit button inside same container""" + with gr.Group(elem_classes=["input-block"]) as block: + gr.Markdown("💡 Mô tả chi tiết vấn đề của bạn để nhận được lời khuyên phù hợp nhất", elem_classes=["input-label"]) + with gr.Row(elem_classes=["input-row"]): + message = gr.Textbox( + label="Nhập tin nhắn của bạn", + placeholder="Ví dụ: Tôi bị đau đầu thường xuyên, nên làm gì?", + elem_classes=["message-input"], + scale=9, + show_label=False, + container=False + ) + submit_btn = gr.Button("Gửi", variant="primary", scale=1, size="sm") + + return block, message, submit_btn + + +def create_clear_button(): + """Create the clear button as a badge overlay""" + clear_btn = gr.Button("Xóa lịch sử", variant="secondary", size="sm", elem_classes=["clear-badge"]) + return clear_btn + + +def create_top_header(): + """Create the top header""" + with gr.Row() as row: + welcome_text = gr.Markdown("") + login_btn = gr.Button("🔐 Đăng nhập", scale=1, + elem_classes=["small-button"]) + logout_btn = gr.Button("🔐 Đăng xuất", visible=False, + scale=1, elem_classes=["small-button"]) + + return row, welcome_text, login_btn, logout_btn + + +def create_login_form(): + with gr.Column(visible=False, elem_id="login-form", elem_classes=["login-form-container"]) as column: + gr.Markdown("## 🔐 Đăng nhập để lưu lịch sử", + elem_id="login-title", elem_classes=["login-title"]) + username = gr.Textbox( + label="👤 Tên đăng nhập", placeholder="Nhập tên đăng nhập...", lines=1, elem_classes=["login-input"]) + password = gr.Textbox(label="🔑 Mật khẩu", placeholder="Nhập mật khẩu...", + type="password", lines=1, elem_classes=["login-input"]) + with gr.Row(elem_classes=["login-buttons"]): + login_submit = gr.Button( + "Đăng nhập", variant="primary", elem_classes=["login-btn"]) + register_btn = gr.Button( + "Đăng ký", variant="secondary", elem_classes=["register-btn"]) + back_btn = gr.Button("Trở lại", variant="primary", + elem_classes=["back-btn"]) + status = gr.Textbox(label="Trạng thái", + interactive=False, elem_classes=["login-status"]) + + return column, username, password, login_submit, register_btn, back_btn, status diff --git a/ui/handlers.py b/ui/handlers.py new file mode 100644 index 0000000000000000000000000000000000000000..8f8a592fb852c3cf3fbb5ea678cdd08c34339e9d --- /dev/null +++ b/ui/handlers.py @@ -0,0 +1,209 @@ +import base64 +from utils.helpers import chat_logic +import uuid +from pathlib import Path +from auth.auth import ( + register_user, login_user, logout_user, load_history, save_message, clear_history +) +from health_data import HealthContext, HealthDataStore +from health_analysis import HealthAnalyzer +from fitness_tracking import FitnessTracker +from utils.speech_recognition import transcribe_speech +import gradio as gr +import logging +import threading +import time + +logger = logging.getLogger(__name__) + +# Dictionary to store user IDs for each agent type +user_sessions = {} +data_store = HealthDataStore() + +# Global state for voice recording +voice_recording_state = { + "is_recording": False, + "lock": threading.Lock() +} + +# Global state for voice recording +voice_recording_state = { + "is_recording": False, + "lock": threading.Lock() +} + + +def setup_handlers(message, chatbot, submit_btn, clear_btn, session, audio_input=None): + """Setup event handlers for hybrid Gradio + ModelScope chatbot""" + + def agent_chat_handler(user_input, chatbot_value): + username = session.value.get("user") + user_id = username + + # Nếu chưa có dữ liệu, khởi tạo list messages + if not chatbot_value: + chatbot_value = [] + + # Lưu history cũ (không có user message mới) + old_history = chatbot_value.copy() + + # Thêm user message dưới dạng ChatbotDataMessage để hiển thị + from modelscope_studio.components.pro.chatbot import ChatbotDataMessage + + user_message = ChatbotDataMessage( + role="user", + content=user_input + ) + chatbot_value.append(user_message) + + # Hiện bot đang xử lý + pending_message = ChatbotDataMessage( + role="assistant", + content="", + loading=True, + status="pending" + ) + chatbot_value.append(pending_message) + yield gr.update(value=""), gr.update(value=chatbot_value) + + # Gọi logic backend với history cũ (không có user message mới) + try: + # chat_logic sẽ tự thêm user message vào history + _, updated_chat_history = chat_logic( + user_input, old_history, user_id=user_id) + time.sleep(0.2) + + # Cập nhật toàn bộ chatbot_value với kết quả mới + if updated_chat_history and len(updated_chat_history) > 0: + last_message = updated_chat_history[-1] + + # Cập nhật pending message cuối cùng thành response + chatbot_value[-1] = last_message + + if username: + save_message(username, user_input) + if hasattr(last_message, 'content') and last_message.role == 'assistant': + save_message(username, last_message.content) + + except Exception as e: + # Xử lý lỗi bằng cách cập nhật tin nhắn cuối + if chatbot_value and len(chatbot_value) > 0: + from modelscope_studio.components.pro.chatbot import ChatbotDataMessage + error_message = ChatbotDataMessage( + role="assistant", + content=f"❌ Lỗi xử lý: {str(e)}" + ) + chatbot_value[-1] = error_message + + yield gr.update(value=""), gr.update(value=chatbot_value) + + def welcome_prompt_select(chatbot_value, e: gr.EventData): + prompt_text = e._data["payload"][0]["value"]["description"] + return gr.update(value=prompt_text) + + # Bind submit button & textbox event + message.submit(agent_chat_handler, [message, chatbot], [message, chatbot]) + submit_btn.click(agent_chat_handler, [ + message, chatbot], [message, chatbot]) + chatbot.welcome_prompt_select(fn=welcome_prompt_select, + inputs=[chatbot], + outputs=[message]) + + # Clear chat + def clear_chat(): + username = session.value.get("user") + if username: + clear_history(username) + # Return sample messages to demonstrate features + from utils.helpers import create_sample_chatbot_messages + return gr.update(value=create_sample_chatbot_messages()) + + clear_btn.click(clear_chat, None, chatbot) + + # Speech input handler (nếu có audio) + if audio_input: + def handle_speech_input(audio_filepath): + if not audio_filepath: + return gr.update() + + try: + time.sleep(0.2) + transcribed_text = transcribe_speech(audio_filepath) + current_text = message.value or "" + updated_text = (current_text + " " + transcribed_text).strip() + return gr.update(value=updated_text), gr.update(value=None) + except Exception as e: + print(f"Speech transcription error: {str(e)}") + return gr.update(), gr.update(value=None) + + audio_input.change(handle_speech_input, + inputs=[audio_input], + outputs=[message, audio_input]) + + +def handle_load_history(u): + history = load_history(u) + # If no history exists, return sample messages to demonstrate features + if not history: + from utils.helpers import create_sample_chatbot_messages + return create_sample_chatbot_messages() + return history + + +def handle_login(u, p, state): + success, msg = login_user(u, p) + if success: + state.value["user"] = u + return True, msg, u + return False, msg, [] + + +def handle_register(u, p, state): + success, msg = register_user(u, p) + return success, msg + + +def handle_logout(state): + logout_user(state) + return "" + + +def create_health_dashboard(user_id): + """Create health dashboard with insights from health context""" + if not user_id: + return { + 'health_score': 0, + 'risks': [], + 'fitness_metrics': {}, + 'health_history': [] + } + + try: + health_context = HealthContext(user_id, data_store) + analyzer = HealthAnalyzer(health_context) + tracker = FitnessTracker(health_context) + + # Calculate health metrics + health_score = analyzer.calculate_health_score() + risks = analyzer.identify_health_risks() + fitness_metrics = tracker.calculate_progress_metrics() + + # Get health history + health_history = health_context.get_health_history() + + return { + 'health_score': health_score, + 'risks': risks, + 'fitness_metrics': fitness_metrics, + 'health_history': health_history, + 'profile': health_context.get_user_profile().to_dict() + } + except Exception as e: + print(f"Error creating health dashboard: {e}") + return { + 'health_score': 0, + 'risks': [], + 'fitness_metrics': {}, + 'health_history': [], + 'error': str(e) + } diff --git a/ui/layout.py b/ui/layout.py new file mode 100644 index 0000000000000000000000000000000000000000..1d0c69c7983fd7d0d0e606eca060af53fe18e1a9 --- /dev/null +++ b/ui/layout.py @@ -0,0 +1,390 @@ +import gradio as gr +from .components import ( + create_header, + create_chatbot, + create_input_row, + create_clear_button, + create_top_header, + create_login_form, +) +from .handlers import ( + setup_handlers, + handle_login, + handle_register, + handle_logout, + handle_load_history +) +from utils.helpers import convert_chatbot_messages_to_list +from auth.db import init_db +import modelscope_studio.components.antdx as antdx +import modelscope_studio.components.base as ms +import modelscope_studio.components.antd as antd + +init_db() + +theme = gr.themes.Soft() + +def create_auth(session, chat_column, chatbot): + with gr.Column() as auth_block: + with gr.Row(): + with gr.Column(scale=8): + create_header() + with gr.Column(scale=2): + top_row, welcome_text, login_btn, logout_btn = create_top_header() + login_form, username, password, login_submit, register_btn, back_btn, status = create_login_form() + + # Sự kiện: bấm nút đăng nhập → hiện form, ẩn khu vực chat + login_btn.click( + lambda: ( + gr.update(visible=False), # Ẩn nút đăng nhập + gr.update(visible=True), # Hiện form đăng nhập + gr.update(visible=False), # Ẩn khu vực + gr.update(value="", visible=False) # Ẩn message + + ), + None, + [login_btn, login_form, chat_column, welcome_text] + ) + + # Sự kiện: bấm nút đăng xuất → xóa session, ẩn lời chào + def logout_handler(): + msg = handle_logout(session) + return ( + msg, + gr.update(visible=True), # Hiện nút đăng nhập + gr.update(visible=False), # Ẩn nút đăng xuất + gr.update(value="", visible=True), # Hiện message + gr.update(visible=True), # Hiện khu vực chat + gr.update(value=[]) # Reset chatbot + + ) + + logout_btn.click( + logout_handler, + None, + [status, login_btn, logout_btn, welcome_text, chat_column, chatbot] + ) + + # Sự kiện: bấm nút trở lại → ẩn lời chào + def return_handler(): + return ( + "", + gr.update(visible=True), # Hiện nút đăng nhập + gr.update(visible=False), # Ẩn login form + gr.update(visible=True), # Hiện khu vực chat + gr.update(value="", visible=True) # Hiện message + + ) + + back_btn.click( + return_handler, + None, + [status, login_btn, login_form, chat_column, welcome_text] + ) + + # Sự kiện: xác thực đăng nhập + def login_submit_handler(u, p): + success, msg, username = handle_login(u, p, session) + if success: + welcome = f"👋 Xin chào, {username.title()}!" + history = handle_load_history(username) + return ( + msg, + gr.update(visible=True), # Hiện chat_column + gr.update(visible=False), # Ẩn form đăng nhập + gr.update(visible=True), # Hiện nút đăng xuất + gr.update(visible=False), # Ẩn nút đăng nhập + gr.update(value=welcome, visible=True), # Hiện lời chào + gr.update(value=history) + + ) + else: + return ( + msg, + gr.update(visible=False), + gr.update(visible=True), + gr.update(visible=False), + gr.update(visible=False), + gr.update(value="", visible=True), + gr.update(value=[]) + + ) + + login_submit.click( + login_submit_handler, + inputs=[username, password], + outputs=[status, chat_column, login_form, + logout_btn, login_btn, welcome_text, chatbot] + ) + + # Sự kiện: đăng ký tài khoản + def register_handler(u, p): + success, msg = handle_register(u, p, session) + return msg + + register_btn.click( + register_handler, + inputs=[username, password], + outputs=[status] + ) + + return { + "auth_block": auth_block, + "top_row": top_row, + "login_form": login_form, + "welcome_text": welcome_text, + "login_btn": login_btn, + "logout_btn": logout_btn, + "username": username, + "password": password, + "login_submit": login_submit, + "register_btn": register_btn, + "status": status + } + + +def create_chat_column(session): + """Tạo giao diện chatbot""" + with gr.Column(visible=True) as chat_column: + with gr.Column(elem_classes=["tab-container"]): + username = session.value.get("user") + + # Chatbot with clear badge overlay + with gr.Group(elem_classes=["chatbot-wrapper"]): + chatbot = create_chatbot() + clear_btn = create_clear_button() + + # File display (1 line above input) + file_display = gr.Markdown( + "", elem_classes=["file-display-compact"], visible=False) + + # Input row with textbox and submit button + row, message, submit_btn = create_input_row() + + # Audio input component for speech-to-text + audio_input = gr.Audio( + sources=["microphone"], + type="filepath", + label="🎤 Nhập bằng giọng nói", + waveform_options=gr.WaveformOptions( + show_recording_waveform=False, + ), + ) + + clear_btn = create_clear_button() + + setup_handlers(message, chatbot, submit_btn, + clear_btn, session, audio_input=audio_input) + + return chat_column, chatbot + + + + + +def build_layout(): + """Build the Gradio UI layout for the healthcare AI assistant with modelscope_studio components.""" + with gr.Blocks( + theme=theme, + css=""" + .tab-title { font-size: 1.2rem; font-weight: 600; } + .tab-subtitle { font-size: 0.9rem; color: #555; margin-top: 0.3rem; } + .tab-container { border-radius: 10px; } + .general-tab { background-color: #e6f7ff; border: 1px solid #91caff; } + .nutrition-tab { background-color: #f6ffed; border: 1px solid #b7eb8f; } + .exercise-tab { background-color: #fff7e6; border: 1px solid #ffd591; } + .mental-tab { background-color: #f9f0ff; border: 1px solid #d3adf7; } + .featured-tip { + background-color: rgba(255, 255, 255, 0.7); + padding: 10px; + border-radius: 8px; + margin-bottom: 15px; + border-left: 4px solid #4b8bf4; + } + .tab-selected { border-bottom: 3px solid #1890ff !important; font-weight: bold; } + .tab-header { margin-bottom: 12px; } + .tab-container button::after { bottom: -20px; } + .action-buttons { + display: flex; + gap: 10px; + } + .build-layout-container { + display: flex; + flex-direction: column; + } + + .auth-block { + order: 1; + } + + .chat-block { + order: 2; + } + + .login-form-container { + max-width: 70%; + margin: auto; + } + + .login-input label span { + background-color: unset + } + + .login-buttons { + display: flex; + justify-content: space-between; + margin-top: 15px; + } + + .login-btn, .register-btn { + width: 48%; + padding: 10px; + font-weight: bold; + border-radius: 8px; + } + + .login-status textarea { + margin-top: 15px; + background-color: #fffbe6; + border-left: 4px solid #faad14; + border-radius: 8px; + padding: 10px; + font-size: 0.95rem; + color: #333; + } + .file-display-compact { + font-size: 0.85rem; + color: #666; + padding: 4px 8px; + background-color: #f5f5f5; + border-radius: 4px; + margin: 0 0 8px 0; + } + .input-container { + position: relative; + } + .message-input textarea { + padding-right: 45px !important; + } + .small-button { + max-width: 200px; + } + + /* Speech input styling */ + #speech-input-panel { + background-color: #f0f7ff; + border: 2px solid #1890ff; + border-radius: 8px; + padding: 15px; + margin-top: 10px; + } + + .speech-audio-input { + border: 2px dashed #1890ff; + border-radius: 8px; + padding: 10px; + } + + .speech-status { + background-color: #e6f7ff; + border-left: 4px solid #1890ff; + } + + .speech-transcription { + background-color: #fafafa; + border: 1px solid #d9d9d9; + border-radius: 4px; + } + + /* Chatbot wrapper with badge overlay */ + .chatbot-wrapper { + position: relative !important; + background: transparent !important; + border: none !important; + padding: 0 !important; + } + .clear-badge { + position: absolute !important; + top: 8px !important; + right: 8px !important; + z-index: 10 !important; + font-size: 0.75rem !important; + padding: 4px 8px !important; + min-width: auto !important; + height: auto !important; + background-color: rgba(239, 68, 68, 0.9) !important; + border: none !important; + border-radius: 4px !important; + color: white !important; + cursor: pointer !important; + } + .clear-badge:hover { + background-color: rgba(220, 38, 38, 1) !important; + } + + /* Input block with inline button */ + .input-block { + background-color: #27272A; + border-radius: 8px; + padding: 12px; + } + .input-block > div { + background: transparent !important; + } + .input-label { + font-size: 0.75rem !important; + color: #a1a1aa !important; + margin-bottom: 5px !important; + background-color: #27272A !important; + } + .input-label p { + margin: 0 !important; + font-size: 0.75rem !important; + } + .input-row { + gap: 0 !important; + align-items: center !important;; + } + .input-row button { + min-width: 60px !important; + height: 40px !important; + margin: 0 !important; + border-radius: 0 6px 6px 0 !important; + font-size: 1rem; + } + .message-input { + height: 40px !important; + background: transparent !important; + } + .message-input textarea { + height: 40px !important; + min-height: 40px !important; + max-height: 40px !important; + border-radius: 6px 0 0 6px !important; + box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.3) !important; + background-color: #1a1a1c !important; + border: none !important; + color: #ffffff !important; + padding: 10px 12px !important; + } + .message-input textarea::placeholder { + color: #a1a1aa !important; + } + + /* Hide Gradio processing time display */ + .progress-text { + display: none !important; + } + footer { + display: none !important; + } + """ + ) as demo, ms.Application(), antdx.XProvider(): + session = gr.State({"user": None, "history": []}) + with antd.Flex(vertical=True, gap="middle"): + chat_column, chatbot = create_chat_column(session) + chat_column.elem_classes = ["chat-block"] + auth_ui = create_auth(session, chat_column, chatbot) + auth_ui["auth_block"].elem_classes = ["auth-block"] + + return demo diff --git a/utils/conversation_summarizer.py b/utils/conversation_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..09082c73df05ca349b3f40f98f454e1aba888cf0 --- /dev/null +++ b/utils/conversation_summarizer.py @@ -0,0 +1,277 @@ +""" +Conversation Summarizer +Automatically summarizes long conversations to maintain context while reducing token usage +""" + +from typing import List, Dict, Any, Optional, Tuple +from config.settings import client, MODEL +import json + + +class ConversationSummarizer: + """ + Summarizes conversation history to maintain context with fewer tokens + """ + + def __init__(self, max_turns: int = 20, summary_trigger: int = 15): + """ + Initialize summarizer + + Args: + max_turns: Maximum conversation turns to keep in full detail + summary_trigger: Number of turns before triggering summarization + """ + self.max_turns = max_turns + self.summary_trigger = summary_trigger + self.summaries = [] # Store previous summaries + + def should_summarize(self, chat_history: List[Tuple[str, str]]) -> bool: + """ + Check if conversation should be summarized + + Args: + chat_history: List of (user_msg, bot_msg) tuples + + Returns: + True if summarization needed + """ + return len(chat_history) >= self.summary_trigger + + def summarize_conversation( + self, + chat_history: List[Tuple[str, str]], + user_profile: Optional[Dict[str, Any]] = None, + keep_recent: int = 5 + ) -> Dict[str, Any]: + """ + Summarize conversation history + + Args: + chat_history: Full conversation history + user_profile: User profile data for context + keep_recent: Number of recent turns to keep in full detail + + Returns: + Dict with summary and recent history + """ + if len(chat_history) <= keep_recent: + return { + 'summary': None, + 'recent_history': chat_history, + 'summarized_turns': 0 + } + + # Split into parts to summarize and recent to keep + to_summarize = chat_history[:-keep_recent] + recent = chat_history[-keep_recent:] + + # Generate summary + summary_text = self._generate_summary(to_summarize, user_profile) + + return { + 'summary': summary_text, + 'recent_history': recent, + 'summarized_turns': len(to_summarize) + } + + def _generate_summary( + self, + chat_history: List[Tuple[str, str]], + user_profile: Optional[Dict[str, Any]] = None + ) -> str: + """ + Generate summary using LLM + + Args: + chat_history: Conversation to summarize + user_profile: User profile for context + + Returns: + Summary text + """ + # Format conversation for summarization + conversation_text = self._format_conversation(chat_history) + + # Build prompt + prompt = f"""Summarize the following healthcare conversation concisely. Focus on: +1. User's health goals and concerns +2. Key information provided (age, weight, symptoms, etc.) +3. Main advice or recommendations given +4. Important context for future conversations + +User Profile: {json.dumps(user_profile, ensure_ascii=False) if user_profile else 'Not available'} + +Conversation: +{conversation_text} + +Provide a concise summary in Vietnamese (2-3 paragraphs max):""" + + try: + response = client.chat.completions.create( + model=MODEL, + messages=[ + {"role": "system", "content": "You are a helpful assistant that summarizes healthcare conversations concisely."}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + max_tokens=500 + ) + + summary = response.choices[0].message.content.strip() + return summary + + except Exception as e: + print(f"⚠️ Error generating summary: {e}") + # Fallback: simple text summary + return self._simple_summary(chat_history) + + def _format_conversation(self, chat_history: List[Tuple[str, str]]) -> str: + """Format conversation for summarization""" + formatted = [] + for i, (user_msg, bot_msg) in enumerate(chat_history, 1): + formatted.append(f"Turn {i}:") + formatted.append(f"User: {user_msg}") + formatted.append(f"Bot: {bot_msg[:200]}..." if len(bot_msg) > 200 else f"Bot: {bot_msg}") + formatted.append("") + return "\n".join(formatted) + + def _simple_summary(self, chat_history: List[Tuple[str, str]]) -> str: + """Simple fallback summary without LLM""" + topics = [] + for user_msg, _ in chat_history: + if any(keyword in user_msg.lower() for keyword in ['giảm cân', 'weight loss']): + topics.append('giảm cân') + if any(keyword in user_msg.lower() for keyword in ['tập', 'exercise', 'gym']): + topics.append('tập luyện') + if any(keyword in user_msg.lower() for keyword in ['ăn', 'dinh dưỡng', 'nutrition']): + topics.append('dinh dưỡng') + if any(keyword in user_msg.lower() for keyword in ['đau', 'triệu chứng', 'symptom']): + topics.append('triệu chứng') + + unique_topics = list(set(topics)) + return f"Đã trao đổi về: {', '.join(unique_topics)}. Tổng {len(chat_history)} lượt hội thoại." + + def get_context_for_agent( + self, + chat_history: List[Tuple[str, str]], + user_profile: Optional[Dict[str, Any]] = None, + max_context_turns: int = 10 + ) -> str: + """ + Get optimized context for agent (summary + recent history) + + Args: + chat_history: Full conversation history + user_profile: User profile data + max_context_turns: Maximum turns to include in context + + Returns: + Formatted context string + """ + if len(chat_history) <= max_context_turns: + # Short conversation, return as-is + return self._format_recent_history(chat_history) + + # Summarize older parts + result = self.summarize_conversation( + chat_history, + user_profile, + keep_recent=max_context_turns + ) + + context_parts = [] + + # Add summary if available + if result['summary']: + context_parts.append(f"📝 Tóm tắt cuộc trò chuyện trước ({result['summarized_turns']} lượt):") + context_parts.append(result['summary']) + context_parts.append("") + + # Add recent history + if result['recent_history']: + context_parts.append(f"💬 {len(result['recent_history'])} lượt hội thoại gần nhất:") + context_parts.append(self._format_recent_history(result['recent_history'])) + + return "\n".join(context_parts) + + def _format_recent_history(self, history: List[Tuple[str, str]]) -> str: + """Format recent history for context""" + formatted = [] + for user_msg, bot_msg in history[-5:]: # Last 5 turns + formatted.append(f"User: {user_msg}") + formatted.append(f"Bot: {bot_msg[:150]}..." if len(bot_msg) > 150 else f"Bot: {bot_msg}") + return "\n".join(formatted) + + def compress_history( + self, + chat_history: List[Tuple[str, str]], + target_turns: int = 10 + ) -> List[Tuple[str, str]]: + """ + Compress history by summarizing and keeping recent turns + + Args: + chat_history: Full history + target_turns: Target number of turns to keep + + Returns: + Compressed history with summary as first turn + """ + if len(chat_history) <= target_turns: + return chat_history + + result = self.summarize_conversation( + chat_history, + keep_recent=target_turns - 1 # -1 for summary turn + ) + + # Create compressed history + compressed = [] + + # Add summary as first turn + if result['summary']: + compressed.append(( + "[Tóm tắt cuộc trò chuyện trước]", + result['summary'] + )) + + # Add recent history + compressed.extend(result['recent_history']) + + return compressed + + def get_summary_stats(self, chat_history: List[Tuple[str, str]]) -> Dict[str, Any]: + """ + Get statistics about conversation + + Args: + chat_history: Conversation history + + Returns: + Statistics dict + """ + total_turns = len(chat_history) + total_user_chars = sum(len(user_msg) for user_msg, _ in chat_history) + total_bot_chars = sum(len(bot_msg) for _, bot_msg in chat_history) + + # Estimate tokens (rough: 1 token ≈ 4 chars for Vietnamese) + estimated_tokens = (total_user_chars + total_bot_chars) // 4 + + return { + 'total_turns': total_turns, + 'total_user_chars': total_user_chars, + 'total_bot_chars': total_bot_chars, + 'estimated_tokens': estimated_tokens, + 'should_summarize': self.should_summarize(chat_history) + } + + +# Global instance +_summarizer = None + +def get_summarizer() -> ConversationSummarizer: + """Get global summarizer instance""" + global _summarizer + if _summarizer is None: + _summarizer = ConversationSummarizer() + return _summarizer diff --git a/utils/helpers.py b/utils/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..1067eb82ddae8d746d11a698d1cbc00c2676b7a2 --- /dev/null +++ b/utils/helpers.py @@ -0,0 +1,403 @@ +""" +Chat Handler - Uses agent-based architecture with coordination +Clean, modular implementation with specialized agents and memory +""" + +from agents import route_to_agent, get_agent, AgentCoordinator +import logging + +# Setup logging +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('logs/chat_debug.log'), + logging.StreamHandler() + ] +) + +# Constants +MAX_MESSAGE_LENGTH = 2000 +MIN_MESSAGE_LENGTH = 2 +SPAM_THRESHOLD_GENTLE = 2 +SPAM_THRESHOLD_CONCERNED = 4 +SPAM_THRESHOLD_FIRM = 6 + +# Global coordinator instance (maintains memory across requests) +_coordinator = None + +def get_coordinator(): + """Get or create global coordinator instance""" + global _coordinator + if _coordinator is None: + _coordinator = AgentCoordinator() + return _coordinator + + +def extract_message_text(message): + """ + Extract text from message which can be either string or dict (from MultimodalInput) + + Args: + message: str or dict with 'text' and 'files' keys + + Returns: + tuple: (text_content, files_list) + """ + if isinstance(message, dict): + # MultimodalInput format: {"text": "...", "files": [...]} + text_content = message.get("text", "").strip() + files_list = message.get("files", []) + return text_content, files_list + elif isinstance(message, str): + # Regular string message + return message.strip(), [] + else: + return "", [] + + +def convert_chatbot_messages_to_list(chat_history): + """ + Convert ChatbotDataMessage objects to list of [user, bot] pairs + + Args: + chat_history: List of ChatbotDataMessage objects or list of lists + + Returns: + list: List of [user_msg, bot_msg] pairs + """ + if not chat_history: + return [] + + # If already in list format, return as is + if isinstance(chat_history[0], (list, tuple)): + logger.debug(f"convert_chatbot_messages_to_list: Already in list format, len={len(chat_history)}") + return chat_history + + # Convert ChatbotDataMessage objects to list format + # Messages come as: [user1, bot1, user2, bot2, ...] + result = [] + i = 0 + logger.debug(f"convert_chatbot_messages_to_list: Converting {len(chat_history)} ChatbotDataMessage objects") + + while i < len(chat_history): + user_msg = "" + bot_msg = "" + + # === LẤY USER MESSAGE === + if i < len(chat_history): + item = chat_history[i] + role = getattr(item, 'role', None) or (item.get('role') if isinstance(item, dict) else None) + content = getattr(item, 'content', None) or (item.get('content') if isinstance(item, dict) else None) + + if role == 'user': + user_msg = content + i += 1 + elif role in ('bot', 'assistant'): + i += 1 + continue + else: + i += 1 + continue + + # === LẤY BOT MESSAGE === + if i < len(chat_history): + item = chat_history[i] + role = getattr(item, 'role', None) or (item.get('role') if isinstance(item, dict) else None) + content = getattr(item, 'content', None) or (item.get('content') if isinstance(item, dict) else None) + + if role in ('bot', 'assistant'): + bot_msg = content + i += 1 + + if user_msg or bot_msg: + result.append([user_msg, bot_msg]) + + + logger.debug(f"convert_chatbot_messages_to_list: Result len={len(result)}") + return result + + +def convert_list_to_chatbot_messages(chat_history_list): + """ + Convert list of [user, bot] pairs to ChatbotDataMessage objects with enhanced features + + Args: + chat_history_list: List of [user_msg, bot_msg] pairs + + Returns: + list: List of ChatbotDataMessage objects with various features + """ + from modelscope_studio.components.pro.chatbot import ChatbotDataMessage + + if not chat_history_list: + return [] + + # If already in ChatbotDataMessage format, return as is + if chat_history_list and hasattr(chat_history_list[0], 'role'): + logger.debug(f"convert_list_to_chatbot_messages: Already in ChatbotDataMessage format") + return chat_history_list + + result = [] + logger.debug(f"convert_list_to_chatbot_messages: Converting {len(chat_history_list)} pairs to ChatbotDataMessage") + + for i, (user_msg, bot_msg) in enumerate(chat_history_list): + # Add user message + if user_msg: + result.append(ChatbotDataMessage( + role="user", + content=user_msg + )) + + # Add bot message with enhanced features + if bot_msg: + # Determine message features based on content and position + bot_message_config = { + "role": "assistant", + "content": bot_msg + } + + result.append(ChatbotDataMessage(**bot_message_config)) + + logger.debug(f"convert_list_to_chatbot_messages: Result len={len(result)}") + return result + + +def create_sample_chatbot_messages(): + """ + Create sample ChatbotDataMessage objects demonstrating various features + + Returns: + list: List of sample ChatbotDataMessage objects + """ + from modelscope_studio.components.pro.chatbot import ChatbotDataMessage + + return [ + ChatbotDataMessage(role="user", content="Hello"), + ChatbotDataMessage(role="assistant", content="World"), + ChatbotDataMessage(role="assistant", + content="Liked message", + meta=dict(feedback="like")), + ChatbotDataMessage(role="assistant", + content="Message only has copy button", + actions=["copy"]), + ChatbotDataMessage( + role="assistant", + content="Pending message will not show action buttons", + status="pending"), + ChatbotDataMessage( + role="assistant", + content="Bot 1", + header="bot1", + avatar="https://api.dicebear.com/7.x/miniavs/svg?seed=1"), + ChatbotDataMessage( + role="assistant", + content="Bot 2", + header="bot2", + avatar="https://api.dicebear.com/7.x/miniavs/svg?seed=2"), + ] + + +def chat_logic(message, chat_history, user_id=None): + """ + Main chat logic using agent routing system + + Args: + message (str or dict): User's message (string or MultimodalInput dict) + chat_history (list): List of ChatbotDataMessage objects or [user_msg, bot_msg] pairs + user_id (str): User ID for data persistence + + Returns: + tuple: ("", updated_chat_history) + """ + + # ===== INPUT EXTRACTION ===== + + # Extract text and files from message (handles both string and dict formats) + message_text, files_list = extract_message_text(message) + + # Store original message for history + original_message = message if isinstance(message, str) else message_text + + # Convert ChatbotDataMessage objects to list format if needed + logger.debug(f"chat_logic input - chat_history type: {type(chat_history)}, len: {len(chat_history) if chat_history else 0}") + if chat_history and len(chat_history) > 0: + logger.debug(f"chat_logic input - first item type: {type(chat_history[0])}, has role: {hasattr(chat_history[0], 'role')}") + if hasattr(chat_history[0], 'content'): + logger.debug(f"chat_history[0].content: {str(chat_history[0].content)[:50]}") + chat_history_list = convert_chatbot_messages_to_list(chat_history) + logger.debug(f"chat_logic after convert - chat_history_list len: {len(chat_history_list)}") + if chat_history_list: + logger.debug(f"chat_history_list[0]: {chat_history_list[0]}") + + # ===== INPUT VALIDATION ===== + + # Check for empty messages (but allow short acknowledgments like "ờ", "ok", "ừ") + acknowledgments = ["ờ", "ok", "oke", "ừ", "uhm", "à", "ô", "ồ", "được", "rồi", "vâng", "dạ"] + if not message_text or (len(message_text) == 0): + bot_response = "Bạn chưa nhập gì cả. Hãy cho tôi biết bạn cần tư vấn về vấn đề sức khỏe gì nhé! 😊" + updated_list = chat_history_list + [[original_message, bot_response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + # Allow short acknowledgments to pass through + if message.strip().lower() in acknowledgments: + # Let the agent handle acknowledgments naturally + pass # Continue to agent + + # Check for very long messages + if len(message_text) > 2000: + bot_response = ("Tin nhắn của bạn quá dài! 😅\n\n" + "Để tôi có thể tư vấn tốt hơn, hãy chia nhỏ câu hỏi hoặc tóm tắt vấn đề chính của bạn.\n\n" + "Ví dụ: 'Tôi bị đau đầu 3 ngày, có buồn nôn' thay vì mô tả quá chi tiết.") + updated_list = chat_history_list + [[original_message, bot_response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + # ===== SMART GREETING DETECTION ===== + + # Detect greeting keywords + greeting_keywords = [ + "chào", "xin chào", "hello", "hi", "hey", "helo", "hê lô", + "chao", "alo", "alô", "good morning", "good afternoon", "good evening", + "buổi sáng", "buổi chiều", "buổi tối", "chào buổi", + "ê", "ê ơi", "ơi", "ê bot", "ê bạn", # Vietnamese casual greetings + "này", "nãy", "nè", "kìa", "ê này" # More casual Vietnamese + ] + + # Check if message is ONLY a greeting (case-insensitive, strip punctuation) + message_clean = message.strip().lower().rstrip('!.,?') + is_pure_greeting = message_clean in greeting_keywords + + # Check if it's the first message + is_first_message = len(chat_history) == 0 + + # CASE 1: Pure greeting only (e.g., "chào", "hello") + if is_pure_greeting: + greeting_response = """Chào bạn! 👋 Mình là trợ lý sức khỏe AI của bạn! + +🏥 **Mình có thể giúp gì cho bạn?** + +Mình có thể tư vấn về: +• 💊 **Triệu chứng & Sức khỏe** - Phân tích triệu chứng, đề xuất khám bệnh +• 🥗 **Dinh dưỡng** - Lập kế hoạch ăn uống, tính calo, macro +• 💪 **Tập luyện** - Tạo lịch tập gym, hướng dẫn kỹ thuật +• 🧠 **Sức khỏe tâm thần** - Hỗ trợ stress, lo âu, cải thiện giấc ngủ + +Bạn đang quan tâm đến vấn đề gì? Hãy chia sẻ với mình nhé! 😊""" + + return "", chat_history + [[message, greeting_response]] + + # CASE 2: First message with real question (e.g., "đau lưng", "tôi bị đau đầu") + # Let agent handle it with smart greeting + answer + + # ===== SPAM DETECTION ===== + + if len(chat_history_list) >= 1: + all_user_messages = [msg[0] for msg in chat_history_list] + repeat_count = all_user_messages.count(message_text) + + # Level 1: Gentle response (2-3 times) + if repeat_count == 2: + bot_response = ("Tôi thấy bạn vừa gửi tin nhắn này lần thứ hai rồi. 😊\n\n" + "Có phải câu trả lời của tôi chưa giải quyết được vấn đề bạn đang gặp phải không? " + "Nếu vậy, bạn có thể chia sẻ thêm chi tiết để tôi hiểu rõ hơn không?\n\n" + "Tôi ở đây để lắng nghe và hỗ trợ bạn. Hãy kể cho tôi nghe thêm nhé! 💙") + updated_list = chat_history_list + [[original_message, bot_response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + # Level 2: Concerned response (4-5 times) + elif repeat_count >= 4 and repeat_count < 6: + bot_response = ("Tôi nhận thấy bạn đang lặp lại cùng một câu nhiều lần. Tôi hơi lo lắng - " + "có phải bạn đang gặp khó khăn trong việc diễn đạt, hay bạn cảm thấy không được lắng nghe?\n\n" + "Hãy thử cách này nhé:\n" + "• Nếu bạn đang khó chịu hay đau đớn - hãy mô tả cảm giác đó\n" + "• Nếu bạn cần thông tin cụ thể - hãy hỏi trực tiếp\n" + "• Nếu câu trả lời trước không hữu ích - hãy nói cho tôi biết tại sao\n\n" + "Bạn có muốn bắt đầu lại cuộc trò chuyện không? Tôi sẵn sàng lắng nghe. 🙏") + updated_list = chat_history_list + [[original_message, bot_response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + # Level 3: Firm boundary (6+ times) + elif repeat_count >= 6: + bot_response = ("Này, tôi cần nói thẳng với bạn một chút. 😔\n\n" + "Bạn đã gửi tin nhắn giống nhau " + str(repeat_count) + " lần rồi. " + "**Nếu bạn thực sự cần giúp đỡ:**\n" + "Hãy nhấn nút \"Xóa lịch sử\" và bắt đầu lại. Lần này, hãy nói với tôi điều bạn thực sự cần.\n\n" + "Tôi hy vọng bạn hiểu. Chúc bạn khỏe mạnh! 💚") + updated_list = chat_history_list + [[original_message, bot_response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + # ===== AGENT ROUTING ===== + + try: + # Option 1: Use coordinator for memory & multi-agent support (NEW!) + USE_COORDINATOR = True # Set to False to use old routing + + if USE_COORDINATOR: + coordinator = get_coordinator() + # Pass user_id for data persistence and file analysis + # Ensure chat_history_list is in correct format for coordinator + response = coordinator.handle_query(message_text, chat_history_list, user_id=user_id) + + # Convert updated list back to ChatbotDataMessage format + updated_list = chat_history_list + [[original_message, response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + # Option 2: Original routing (fallback) + # Route to appropriate agent using function calling + routing_result = route_to_agent(message_text, chat_history_list) + + agent_name = routing_result['agent'] + parameters = routing_result['parameters'] + + # Get the specialized agent + agent = get_agent(agent_name) + + # Let the agent handle the request + response = agent.handle(parameters, chat_history_list) + logger.debug(f"Agent {agent_name} response type: {type(response)}") + + # Convert updated list back to ChatbotDataMessage format + updated_list = chat_history_list + [[original_message, response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + + except Exception as e: + # Fallback to general health agent if routing fails + logger.error(f"Agent routing error: {e}", exc_info=True) + + try: + from agents.specialized.general_health_agent import GeneralHealthAgent + agent = GeneralHealthAgent() + # Ensure chat_history_list is properly formatted + logger.debug(f"Fallback agent - chat_history_list type: {type(chat_history_list)}, len: {len(chat_history_list)}") + response = agent.handle({"user_query": message_text}, chat_history_list) + logger.debug(f"Fallback agent response type: {type(response)}") + + # Convert updated list back to ChatbotDataMessage format + updated_list = chat_history_list + [[original_message, response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages + except Exception as e2: + # Ultimate fallback + logger.error(f"General health agent error: {e2}", exc_info=True) + error_response = f"""Xin lỗi, tôi gặp chút vấn đề kỹ thuật. 😅 + +Lỗi: {str(e2)} + +Bạn có thể thử: +1. Hỏi lại câu hỏi +2. Làm mới trang và thử lại +3. Hoặc liên hệ hỗ trợ kỹ thuật + +Tôi xin lỗi vì sự bất tiện này! 🙏""" + # Convert updated list back to ChatbotDataMessage format + updated_list = chat_history_list + [[original_message, error_response]] + updated_chatbot_messages = convert_list_to_chatbot_messages(updated_list) + return "", updated_chatbot_messages diff --git a/utils/memory.py b/utils/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..c4997f0a95d2b8fafb25a8d5b36d4d92398e88ee --- /dev/null +++ b/utils/memory.py @@ -0,0 +1,277 @@ +""" +Conversation Memory - Shared state across all agents +Allows agents to remember user data and coordinate with each other +""" + +from typing import Dict, Any, List, Optional +from datetime import datetime +import json + + +class ConversationMemory: + """ + Shared memory system for all agents + Stores user profile, extracted data, and conversation state + Supports session persistence across app restarts + """ + + def __init__(self, user_id: Optional[str] = None, session_store=None): + # User profile data (shared across all agents) + self.user_profile = { + 'age': None, + 'gender': None, + 'weight': None, + 'height': None, + 'bmi': None, + 'activity_level': None, + 'fitness_level': None, + 'health_conditions': [], + 'medications': [], + 'allergies': [], + 'dietary_restrictions': [] + } + + # Agent-specific extracted data + self.extracted_data = { + 'nutrition': {}, + 'exercise': {}, + 'symptom': {}, + 'mental_health': {}, + 'general_health': {} + } + + # Conversation state + self.conversation_state = { + 'current_topic': None, + 'current_agent': None, + 'previous_agent': None, + 'data_collected': {}, + 'pending_questions': [], + 'conversation_flow': [] + } + + # Metadata + self.metadata = { + 'session_id': None, + 'user_id': user_id, + 'started_at': datetime.now().isoformat(), + 'last_updated': datetime.now().isoformat() + } + + # Session persistence + self.user_id = user_id + self.session_store = session_store + self.auto_save = True # Auto-save on updates + + # Load existing session if user_id provided + if user_id and session_store: + self._load_session() + + # ===== User Profile Management ===== + + def update_profile(self, key: str, value: Any) -> None: + """Update user profile data""" + if key in self.user_profile: + self.user_profile[key] = value + self.metadata['last_updated'] = datetime.now().isoformat() + + # Auto-save if enabled + if self.auto_save and self.user_id and self.session_store: + self._save_session() + + def get_profile(self, key: str) -> Any: + """Get user profile data""" + return self.user_profile.get(key) + + def get_full_profile(self) -> Dict[str, Any]: + """Get complete user profile""" + return self.user_profile.copy() + + def get_missing_fields(self, required_fields: List[str]) -> List[str]: + """Check what required fields are still missing""" + return [field for field in required_fields + if not self.user_profile.get(field)] + + def has_complete_profile(self, required_fields: List[str]) -> bool: + """Check if all required fields are filled""" + return len(self.get_missing_fields(required_fields)) == 0 + + # ===== Agent Data Management ===== + + def add_agent_data(self, agent_name: str, key: str, value: Any) -> None: + """Add agent-specific data""" + if agent_name not in self.extracted_data: + self.extracted_data[agent_name] = {} + + self.extracted_data[agent_name][key] = value + self.metadata['last_updated'] = datetime.now().isoformat() + + def get_agent_data(self, agent_name: str, key: str = None) -> Any: + """Get agent-specific data""" + agent_data = self.extracted_data.get(agent_name, {}) + + if key: + return agent_data.get(key) + return agent_data + + def get_all_agent_data(self) -> Dict[str, Any]: + """Get all agent data""" + return self.extracted_data.copy() + + # ===== Conversation State Management ===== + + def set_current_agent(self, agent_name: str) -> None: + """Set current active agent""" + self.conversation_state['previous_agent'] = self.conversation_state['current_agent'] + self.conversation_state['current_agent'] = agent_name + + # Add to conversation flow + self.conversation_state['conversation_flow'].append({ + 'agent': agent_name, + 'timestamp': datetime.now().isoformat() + }) + + def get_current_agent(self) -> Optional[str]: + """Get current active agent""" + return self.conversation_state['current_agent'] + + def get_previous_agent(self) -> Optional[str]: + """Get previous agent""" + return self.conversation_state['previous_agent'] + + def set_current_topic(self, topic: str) -> None: + """Set current conversation topic""" + self.conversation_state['current_topic'] = topic + + def get_current_topic(self) -> Optional[str]: + """Get current conversation topic""" + return self.conversation_state['current_topic'] + + def add_pending_question(self, question: str, priority: int = 0) -> None: + """Add a pending question to ask user""" + self.conversation_state['pending_questions'].append({ + 'question': question, + 'priority': priority, + 'added_at': datetime.now().isoformat() + }) + + # Sort by priority (higher first) + self.conversation_state['pending_questions'].sort( + key=lambda x: x['priority'], + reverse=True + ) + + def get_next_pending_question(self) -> Optional[str]: + """Get next pending question""" + if self.conversation_state['pending_questions']: + return self.conversation_state['pending_questions'][0]['question'] + return None + + def clear_pending_questions(self) -> None: + """Clear all pending questions""" + self.conversation_state['pending_questions'] = [] + + def get_conversation_flow(self) -> List[Dict[str, Any]]: + """Get conversation flow history""" + return self.conversation_state['conversation_flow'] + + # ===== Context Summary ===== + + def get_context_summary(self) -> str: + """Get a summary of current context for agents""" + summary_parts = [] + + # User profile summary + profile = self.user_profile + if profile['age']: + summary_parts.append(f"User: {profile['age']} tuổi") + if profile['gender']: + summary_parts.append(f"giới tính {profile['gender']}") + if profile['weight'] and profile['height']: + summary_parts.append(f"{profile['weight']}kg, {profile['height']}cm") + + # Current topic + if self.conversation_state['current_topic']: + summary_parts.append(f"Topic: {self.conversation_state['current_topic']}") + + # Previous agent + if self.conversation_state['previous_agent']: + summary_parts.append(f"Previous agent: {self.conversation_state['previous_agent']}") + + return " | ".join(summary_parts) if summary_parts else "No context yet" + + # ===== Serialization ===== + + def to_dict(self) -> Dict[str, Any]: + """Convert memory to dictionary""" + return { + 'user_profile': self.user_profile, + 'extracted_data': self.extracted_data, + 'conversation_state': self.conversation_state, + 'metadata': self.metadata + } + + def to_json(self) -> str: + """Convert memory to JSON string""" + return json.dumps(self.to_dict(), ensure_ascii=False, indent=2) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'ConversationMemory': + """Create memory from dictionary""" + memory = cls() + memory.user_profile = data.get('user_profile', memory.user_profile) + memory.extracted_data = data.get('extracted_data', memory.extracted_data) + memory.conversation_state = data.get('conversation_state', memory.conversation_state) + memory.metadata = data.get('metadata', memory.metadata) + return memory + + # ===== Session Persistence ===== + + def _save_session(self) -> None: + """Save current memory state to session store""" + if not self.user_id or not self.session_store: + return + + session_data = self.to_dict() + self.session_store.save_session(self.user_id, session_data) + + def _load_session(self) -> bool: + """ + Load memory state from session store + + Returns: + True if session loaded, False otherwise + """ + if not self.user_id or not self.session_store: + return False + + session_data = self.session_store.load_session(self.user_id) + + if session_data: + self.user_profile = session_data.get('user_profile', self.user_profile) + self.extracted_data = session_data.get('extracted_data', self.extracted_data) + self.conversation_state = session_data.get('conversation_state', self.conversation_state) + self.metadata = session_data.get('metadata', self.metadata) + print(f"✅ Loaded session for user {self.user_id}") + return True + + print(f"ℹ️ No existing session found for user {self.user_id}, starting fresh") + return False + + def save_session_now(self) -> None: + """Manually save session (useful when auto_save is disabled)""" + self._save_session() + + def clear_session(self) -> None: + """Clear session from storage""" + if self.user_id and self.session_store: + self.session_store.delete_session(self.user_id) + + # ===== Utility Methods ===== + + def clear(self) -> None: + """Clear all memory (start fresh conversation)""" + self.__init__() + + def __repr__(self) -> str: + return f"" diff --git a/utils/session_store.py b/utils/session_store.py new file mode 100644 index 0000000000000000000000000000000000000000..6f051bffc1d31ea4327ba54e48d34f0f086b86a1 --- /dev/null +++ b/utils/session_store.py @@ -0,0 +1,151 @@ +""" +Session Store - Persistent storage for conversation memory +Saves and loads user sessions across app restarts +""" + +import json +import os +from datetime import datetime +from pathlib import Path +from typing import Optional, Dict, Any + + +class SessionStore: + """Manages persistent storage of conversation sessions""" + + def __init__(self, storage_dir: str = "sessions"): + self.storage_dir = Path(storage_dir) + self.storage_dir.mkdir(parents=True, exist_ok=True) + + def save_session(self, user_id: str, session_data: Dict[str, Any]) -> None: + """ + Save user session to disk + + Args: + user_id: Unique user identifier + session_data: Session data to save (memory state) + """ + session_file = self.storage_dir / f"{user_id}.json" + + # Add metadata + session_data['last_updated'] = datetime.now().isoformat() + session_data['user_id'] = user_id + + with open(session_file, 'w', encoding='utf-8') as f: + json.dump(session_data, f, ensure_ascii=False, indent=2) + + def load_session(self, user_id: str) -> Optional[Dict[str, Any]]: + """ + Load user session from disk + + Args: + user_id: Unique user identifier + + Returns: + Session data or None if not found + """ + session_file = self.storage_dir / f"{user_id}.json" + + if not session_file.exists(): + return None + + try: + with open(session_file, 'r', encoding='utf-8') as f: + return json.load(f) + except (json.JSONDecodeError, IOError) as e: + print(f"⚠️ Error loading session for {user_id}: {e}") + return None + + def delete_session(self, user_id: str) -> bool: + """ + Delete user session + + Args: + user_id: Unique user identifier + + Returns: + True if deleted, False if not found + """ + session_file = self.storage_dir / f"{user_id}.json" + + if session_file.exists(): + session_file.unlink() + return True + return False + + def list_sessions(self) -> list: + """ + List all stored sessions + + Returns: + List of user IDs with sessions + """ + return [f.stem for f in self.storage_dir.glob("*.json")] + + def get_session_info(self, user_id: str) -> Optional[Dict[str, Any]]: + """ + Get session metadata without loading full data + + Args: + user_id: Unique user identifier + + Returns: + Session metadata or None + """ + session_file = self.storage_dir / f"{user_id}.json" + + if not session_file.exists(): + return None + + try: + with open(session_file, 'r', encoding='utf-8') as f: + data = json.load(f) + return { + 'user_id': data.get('user_id'), + 'last_updated': data.get('last_updated'), + 'has_profile': bool(data.get('user_profile')), + 'current_agent': data.get('current_agent'), + 'conversation_count': len(data.get('conversation_history', [])) + } + except (json.JSONDecodeError, IOError): + return None + + def cleanup_old_sessions(self, days: int = 30) -> int: + """ + Delete sessions older than specified days + + Args: + days: Number of days to keep sessions + + Returns: + Number of sessions deleted + """ + from datetime import timedelta + + cutoff_date = datetime.now() - timedelta(days=days) + deleted_count = 0 + + for session_file in self.storage_dir.glob("*.json"): + try: + with open(session_file, 'r', encoding='utf-8') as f: + data = json.load(f) + last_updated = datetime.fromisoformat(data.get('last_updated', '')) + + if last_updated < cutoff_date: + session_file.unlink() + deleted_count += 1 + except (json.JSONDecodeError, IOError, ValueError): + continue + + return deleted_count + + +# Global instance +_session_store = None + +def get_session_store() -> SessionStore: + """Get global session store instance""" + global _session_store + if _session_store is None: + _session_store = SessionStore() + return _session_store diff --git a/utils/speech_recognition.py b/utils/speech_recognition.py new file mode 100644 index 0000000000000000000000000000000000000000..9182c144206f661fcc09df49e28f351ec352d53e --- /dev/null +++ b/utils/speech_recognition.py @@ -0,0 +1,26 @@ +""" +Speech-to-Text Module +Handles audio recording and speech recognition using Hugging Face +""" + +import logging +from transformers import pipeline + +logger = logging.getLogger(__name__) + +model_id = "vinai/PhoWhisper-base" +pipe = pipeline("automatic-speech-recognition", model=model_id) + +def transcribe_speech(filepath): + output = pipe( + filepath, + max_new_tokens=256, + generate_kwargs={ + "task": "transcribe", + "language": "vietnamese", + }, + chunk_length_s=30, + batch_size=8, + ) + return output["text"] + diff --git a/utils/text_to_speech.py b/utils/text_to_speech.py new file mode 100644 index 0000000000000000000000000000000000000000..0ea143d2ef55dda689b46fda1f4cccacc2e49251 --- /dev/null +++ b/utils/text_to_speech.py @@ -0,0 +1,219 @@ +""" +Text-to-Speech Module +Handles text-to-speech conversion with Vietnamese language support +""" + +import logging +import tempfile +import os +from pathlib import Path +from gtts import gTTS +import io +import base64 + +logger = logging.getLogger(__name__) + +class VietnameseTTS: + """Vietnamese Text-to-Speech class using gTTS""" + + def __init__(self, language='vi', slow=False): + """ + Initialize Vietnamese TTS + + Args: + language (str): Language code (default: 'vi' for Vietnamese) + slow (bool): Whether to speak slowly (default: False) + """ + self.language = language + self.slow = slow + + def text_to_speech(self, text, output_path=None): + """ + Convert text to speech and save as audio file + + Args: + text (str): Text to convert to speech + output_path (str, optional): Path to save audio file. If None, returns temp file path + + Returns: + str: Path to the generated audio file + + Raises: + Exception: If TTS conversion fails + """ + try: + if not text or not text.strip(): + raise ValueError("Text cannot be empty") + + # Create gTTS object + tts = gTTS(text=text.strip(), lang=self.language, slow=self.slow) + + # If no output path specified, create temporary file + if output_path is None: + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') + output_path = temp_file.name + temp_file.close() + + # Save audio file + tts.save(output_path) + + logger.info(f"TTS audio saved to: {output_path}") + return output_path + + except Exception as e: + logger.error(f"TTS conversion failed: {str(e)}") + raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}") + + def text_to_speech_bytes(self, text): + """ + Convert text to speech and return as bytes + + Args: + text (str): Text to convert to speech + + Returns: + bytes: Audio data as bytes + + Raises: + Exception: If TTS conversion fails + """ + try: + if not text or not text.strip(): + raise ValueError("Text cannot be empty") + + # Create gTTS object + tts = gTTS(text=text.strip(), lang=self.language, slow=self.slow) + + # Save to BytesIO buffer + audio_buffer = io.BytesIO() + tts.write_to_fp(audio_buffer) + audio_buffer.seek(0) + + return audio_buffer.getvalue() + + except Exception as e: + logger.error(f"TTS conversion to bytes failed: {str(e)}") + raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}") + + def text_to_speech_base64(self, text): + """ + Convert text to speech and return as base64 encoded string + + Args: + text (str): Text to convert to speech + + Returns: + str: Base64 encoded audio data + + Raises: + Exception: If TTS conversion fails + """ + try: + audio_bytes = self.text_to_speech_bytes(text) + return base64.b64encode(audio_bytes).decode('utf-8') + + except Exception as e: + logger.error(f"TTS conversion to base64 failed: {str(e)}") + raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}") + + +# Global TTS instance +_tts_instance = None + +def get_tts_instance(): + """Get or create global TTS instance""" + global _tts_instance + if _tts_instance is None: + _tts_instance = VietnameseTTS() + return _tts_instance + +def text_to_speech(text, output_path=None): + """ + Convenience function to convert text to speech + + Args: + text (str): Text to convert to speech + output_path (str, optional): Path to save audio file + + Returns: + str: Path to the generated audio file + """ + tts = get_tts_instance() + return tts.text_to_speech(text, output_path) + +def text_to_speech_bytes(text): + """ + Convenience function to convert text to speech bytes + + Args: + text (str): Text to convert to speech + + Returns: + bytes: Audio data as bytes + """ + tts = get_tts_instance() + return tts.text_to_speech_bytes(text) + +def text_to_speech_base64(text): + """ + Convenience function to convert text to speech base64 + + Args: + text (str): Text to convert to speech + + Returns: + str: Base64 encoded audio data + """ + tts = get_tts_instance() + return tts.text_to_speech_base64(text) + +def cleanup_temp_files(file_path): + """ + Clean up temporary audio files + + Args: + file_path (str): Path to the file to delete + """ + try: + if file_path and os.path.exists(file_path): + os.unlink(file_path) + logger.info(f"Cleaned up temp file: {file_path}") + except Exception as e: + logger.warning(f"Failed to cleanup temp file {file_path}: {str(e)}") + +def is_vietnamese_text(text): + """ + Check if text contains Vietnamese characters + + Args: + text (str): Text to check + + Returns: + bool: True if text contains Vietnamese characters + """ + vietnamese_chars = set('àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ') + vietnamese_chars.update('ÀÁẠẢÃÂẦẤẬẨẪĂẰẮẶẲẴÈÉẸẺẼÊỀẾỆỂỄÌÍỊỈĨÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠÙÚỤỦŨƯỪỨỰỬỮỲÝỴỶỸĐ') + + return any(char in vietnamese_chars for char in text.lower()) + +def get_supported_languages(): + """ + Get list of supported languages for TTS + + Returns: + dict: Dictionary of language codes and names + """ + return { + 'vi': 'Tiếng Việt', + 'en': 'English', + 'zh': '中文', + 'ja': '日本語', + 'ko': '한국어', + 'th': 'ไทย', + 'fr': 'Français', + 'de': 'Deutsch', + 'es': 'Español', + 'it': 'Italiano', + 'pt': 'Português', + 'ru': 'Русский' + }