Spaces:
Runtime error
Runtime error
| """ | |
| Conversation Summarizer | |
| Automatically summarizes long conversations to maintain context while reducing token usage | |
| """ | |
| from typing import List, Dict, Any, Optional, Tuple | |
| from config.settings import client, MODEL | |
| import json | |
| class ConversationSummarizer: | |
| """ | |
| Summarizes conversation history to maintain context with fewer tokens | |
| """ | |
| def __init__(self, max_turns: int = 20, summary_trigger: int = 15): | |
| """ | |
| Initialize summarizer | |
| Args: | |
| max_turns: Maximum conversation turns to keep in full detail | |
| summary_trigger: Number of turns before triggering summarization | |
| """ | |
| self.max_turns = max_turns | |
| self.summary_trigger = summary_trigger | |
| self.summaries = [] # Store previous summaries | |
| def should_summarize(self, chat_history: List[Tuple[str, str]]) -> bool: | |
| """ | |
| Check if conversation should be summarized | |
| Args: | |
| chat_history: List of (user_msg, bot_msg) tuples | |
| Returns: | |
| True if summarization needed | |
| """ | |
| return len(chat_history) >= self.summary_trigger | |
| def summarize_conversation( | |
| self, | |
| chat_history: List[Tuple[str, str]], | |
| user_profile: Optional[Dict[str, Any]] = None, | |
| keep_recent: int = 5 | |
| ) -> Dict[str, Any]: | |
| """ | |
| Summarize conversation history | |
| Args: | |
| chat_history: Full conversation history | |
| user_profile: User profile data for context | |
| keep_recent: Number of recent turns to keep in full detail | |
| Returns: | |
| Dict with summary and recent history | |
| """ | |
| if len(chat_history) <= keep_recent: | |
| return { | |
| 'summary': None, | |
| 'recent_history': chat_history, | |
| 'summarized_turns': 0 | |
| } | |
| # Split into parts to summarize and recent to keep | |
| to_summarize = chat_history[:-keep_recent] | |
| recent = chat_history[-keep_recent:] | |
| # Generate summary | |
| summary_text = self._generate_summary(to_summarize, user_profile) | |
| return { | |
| 'summary': summary_text, | |
| 'recent_history': recent, | |
| 'summarized_turns': len(to_summarize) | |
| } | |
| def _generate_summary( | |
| self, | |
| chat_history: List[Tuple[str, str]], | |
| user_profile: Optional[Dict[str, Any]] = None | |
| ) -> str: | |
| """ | |
| Generate summary using LLM | |
| Args: | |
| chat_history: Conversation to summarize | |
| user_profile: User profile for context | |
| Returns: | |
| Summary text | |
| """ | |
| # Format conversation for summarization | |
| conversation_text = self._format_conversation(chat_history) | |
| # Build prompt | |
| prompt = f"""Summarize the following healthcare conversation concisely. Focus on: | |
| 1. User's health goals and concerns | |
| 2. Key information provided (age, weight, symptoms, etc.) | |
| 3. Main advice or recommendations given | |
| 4. Important context for future conversations | |
| User Profile: {json.dumps(user_profile, ensure_ascii=False) if user_profile else 'Not available'} | |
| Conversation: | |
| {conversation_text} | |
| Provide a concise summary in Vietnamese (2-3 paragraphs max):""" | |
| try: | |
| response = client.chat.completions.create( | |
| model=MODEL, | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that summarizes healthcare conversations concisely."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.3, | |
| max_tokens=500 | |
| ) | |
| summary = response.choices[0].message.content.strip() | |
| return summary | |
| except Exception as e: | |
| print(f"⚠️ Error generating summary: {e}") | |
| # Fallback: simple text summary | |
| return self._simple_summary(chat_history) | |
| def _format_conversation(self, chat_history: List[Tuple[str, str]]) -> str: | |
| """Format conversation for summarization""" | |
| formatted = [] | |
| for i, (user_msg, bot_msg) in enumerate(chat_history, 1): | |
| formatted.append(f"Turn {i}:") | |
| formatted.append(f"User: {user_msg}") | |
| formatted.append(f"Bot: {bot_msg[:200]}..." if len(bot_msg) > 200 else f"Bot: {bot_msg}") | |
| formatted.append("") | |
| return "\n".join(formatted) | |
| def _simple_summary(self, chat_history: List[Tuple[str, str]]) -> str: | |
| """Simple fallback summary without LLM""" | |
| topics = [] | |
| for user_msg, _ in chat_history: | |
| if any(keyword in user_msg.lower() for keyword in ['giảm cân', 'weight loss']): | |
| topics.append('giảm cân') | |
| if any(keyword in user_msg.lower() for keyword in ['tập', 'exercise', 'gym']): | |
| topics.append('tập luyện') | |
| if any(keyword in user_msg.lower() for keyword in ['ăn', 'dinh dưỡng', 'nutrition']): | |
| topics.append('dinh dưỡng') | |
| if any(keyword in user_msg.lower() for keyword in ['đau', 'triệu chứng', 'symptom']): | |
| topics.append('triệu chứng') | |
| unique_topics = list(set(topics)) | |
| return f"Đã trao đổi về: {', '.join(unique_topics)}. Tổng {len(chat_history)} lượt hội thoại." | |
| def get_context_for_agent( | |
| self, | |
| chat_history: List[Tuple[str, str]], | |
| user_profile: Optional[Dict[str, Any]] = None, | |
| max_context_turns: int = 10 | |
| ) -> str: | |
| """ | |
| Get optimized context for agent (summary + recent history) | |
| Args: | |
| chat_history: Full conversation history | |
| user_profile: User profile data | |
| max_context_turns: Maximum turns to include in context | |
| Returns: | |
| Formatted context string | |
| """ | |
| if len(chat_history) <= max_context_turns: | |
| # Short conversation, return as-is | |
| return self._format_recent_history(chat_history) | |
| # Summarize older parts | |
| result = self.summarize_conversation( | |
| chat_history, | |
| user_profile, | |
| keep_recent=max_context_turns | |
| ) | |
| context_parts = [] | |
| # Add summary if available | |
| if result['summary']: | |
| context_parts.append(f"📝 Tóm tắt cuộc trò chuyện trước ({result['summarized_turns']} lượt):") | |
| context_parts.append(result['summary']) | |
| context_parts.append("") | |
| # Add recent history | |
| if result['recent_history']: | |
| context_parts.append(f"💬 {len(result['recent_history'])} lượt hội thoại gần nhất:") | |
| context_parts.append(self._format_recent_history(result['recent_history'])) | |
| return "\n".join(context_parts) | |
| def _format_recent_history(self, history: List[Tuple[str, str]]) -> str: | |
| """Format recent history for context""" | |
| formatted = [] | |
| for user_msg, bot_msg in history[-5:]: # Last 5 turns | |
| formatted.append(f"User: {user_msg}") | |
| formatted.append(f"Bot: {bot_msg[:150]}..." if len(bot_msg) > 150 else f"Bot: {bot_msg}") | |
| return "\n".join(formatted) | |
| def compress_history( | |
| self, | |
| chat_history: List[Tuple[str, str]], | |
| target_turns: int = 10 | |
| ) -> List[Tuple[str, str]]: | |
| """ | |
| Compress history by summarizing and keeping recent turns | |
| Args: | |
| chat_history: Full history | |
| target_turns: Target number of turns to keep | |
| Returns: | |
| Compressed history with summary as first turn | |
| """ | |
| if len(chat_history) <= target_turns: | |
| return chat_history | |
| result = self.summarize_conversation( | |
| chat_history, | |
| keep_recent=target_turns - 1 # -1 for summary turn | |
| ) | |
| # Create compressed history | |
| compressed = [] | |
| # Add summary as first turn | |
| if result['summary']: | |
| compressed.append(( | |
| "[Tóm tắt cuộc trò chuyện trước]", | |
| result['summary'] | |
| )) | |
| # Add recent history | |
| compressed.extend(result['recent_history']) | |
| return compressed | |
| def get_summary_stats(self, chat_history: List[Tuple[str, str]]) -> Dict[str, Any]: | |
| """ | |
| Get statistics about conversation | |
| Args: | |
| chat_history: Conversation history | |
| Returns: | |
| Statistics dict | |
| """ | |
| total_turns = len(chat_history) | |
| total_user_chars = sum(len(user_msg) for user_msg, _ in chat_history) | |
| total_bot_chars = sum(len(bot_msg) for _, bot_msg in chat_history) | |
| # Estimate tokens (rough: 1 token ≈ 4 chars for Vietnamese) | |
| estimated_tokens = (total_user_chars + total_bot_chars) // 4 | |
| return { | |
| 'total_turns': total_turns, | |
| 'total_user_chars': total_user_chars, | |
| 'total_bot_chars': total_bot_chars, | |
| 'estimated_tokens': estimated_tokens, | |
| 'should_summarize': self.should_summarize(chat_history) | |
| } | |
| # Global instance | |
| _summarizer = None | |
| def get_summarizer() -> ConversationSummarizer: | |
| """Get global summarizer instance""" | |
| global _summarizer | |
| if _summarizer is None: | |
| _summarizer = ConversationSummarizer() | |
| return _summarizer | |