Spaces:
Runtime error
Runtime error
File size: 9,728 Bytes
eeb0f9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 |
"""
Conversation Summarizer
Automatically summarizes long conversations to maintain context while reducing token usage
"""
from typing import List, Dict, Any, Optional, Tuple
from config.settings import client, MODEL
import json
class ConversationSummarizer:
"""
Summarizes conversation history to maintain context with fewer tokens
"""
def __init__(self, max_turns: int = 20, summary_trigger: int = 15):
"""
Initialize summarizer
Args:
max_turns: Maximum conversation turns to keep in full detail
summary_trigger: Number of turns before triggering summarization
"""
self.max_turns = max_turns
self.summary_trigger = summary_trigger
self.summaries = [] # Store previous summaries
def should_summarize(self, chat_history: List[Tuple[str, str]]) -> bool:
"""
Check if conversation should be summarized
Args:
chat_history: List of (user_msg, bot_msg) tuples
Returns:
True if summarization needed
"""
return len(chat_history) >= self.summary_trigger
def summarize_conversation(
self,
chat_history: List[Tuple[str, str]],
user_profile: Optional[Dict[str, Any]] = None,
keep_recent: int = 5
) -> Dict[str, Any]:
"""
Summarize conversation history
Args:
chat_history: Full conversation history
user_profile: User profile data for context
keep_recent: Number of recent turns to keep in full detail
Returns:
Dict with summary and recent history
"""
if len(chat_history) <= keep_recent:
return {
'summary': None,
'recent_history': chat_history,
'summarized_turns': 0
}
# Split into parts to summarize and recent to keep
to_summarize = chat_history[:-keep_recent]
recent = chat_history[-keep_recent:]
# Generate summary
summary_text = self._generate_summary(to_summarize, user_profile)
return {
'summary': summary_text,
'recent_history': recent,
'summarized_turns': len(to_summarize)
}
def _generate_summary(
self,
chat_history: List[Tuple[str, str]],
user_profile: Optional[Dict[str, Any]] = None
) -> str:
"""
Generate summary using LLM
Args:
chat_history: Conversation to summarize
user_profile: User profile for context
Returns:
Summary text
"""
# Format conversation for summarization
conversation_text = self._format_conversation(chat_history)
# Build prompt
prompt = f"""Summarize the following healthcare conversation concisely. Focus on:
1. User's health goals and concerns
2. Key information provided (age, weight, symptoms, etc.)
3. Main advice or recommendations given
4. Important context for future conversations
User Profile: {json.dumps(user_profile, ensure_ascii=False) if user_profile else 'Not available'}
Conversation:
{conversation_text}
Provide a concise summary in Vietnamese (2-3 paragraphs max):"""
try:
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant that summarizes healthcare conversations concisely."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=500
)
summary = response.choices[0].message.content.strip()
return summary
except Exception as e:
print(f"⚠️ Error generating summary: {e}")
# Fallback: simple text summary
return self._simple_summary(chat_history)
def _format_conversation(self, chat_history: List[Tuple[str, str]]) -> str:
"""Format conversation for summarization"""
formatted = []
for i, (user_msg, bot_msg) in enumerate(chat_history, 1):
formatted.append(f"Turn {i}:")
formatted.append(f"User: {user_msg}")
formatted.append(f"Bot: {bot_msg[:200]}..." if len(bot_msg) > 200 else f"Bot: {bot_msg}")
formatted.append("")
return "\n".join(formatted)
def _simple_summary(self, chat_history: List[Tuple[str, str]]) -> str:
"""Simple fallback summary without LLM"""
topics = []
for user_msg, _ in chat_history:
if any(keyword in user_msg.lower() for keyword in ['giảm cân', 'weight loss']):
topics.append('giảm cân')
if any(keyword in user_msg.lower() for keyword in ['tập', 'exercise', 'gym']):
topics.append('tập luyện')
if any(keyword in user_msg.lower() for keyword in ['ăn', 'dinh dưỡng', 'nutrition']):
topics.append('dinh dưỡng')
if any(keyword in user_msg.lower() for keyword in ['đau', 'triệu chứng', 'symptom']):
topics.append('triệu chứng')
unique_topics = list(set(topics))
return f"Đã trao đổi về: {', '.join(unique_topics)}. Tổng {len(chat_history)} lượt hội thoại."
def get_context_for_agent(
self,
chat_history: List[Tuple[str, str]],
user_profile: Optional[Dict[str, Any]] = None,
max_context_turns: int = 10
) -> str:
"""
Get optimized context for agent (summary + recent history)
Args:
chat_history: Full conversation history
user_profile: User profile data
max_context_turns: Maximum turns to include in context
Returns:
Formatted context string
"""
if len(chat_history) <= max_context_turns:
# Short conversation, return as-is
return self._format_recent_history(chat_history)
# Summarize older parts
result = self.summarize_conversation(
chat_history,
user_profile,
keep_recent=max_context_turns
)
context_parts = []
# Add summary if available
if result['summary']:
context_parts.append(f"📝 Tóm tắt cuộc trò chuyện trước ({result['summarized_turns']} lượt):")
context_parts.append(result['summary'])
context_parts.append("")
# Add recent history
if result['recent_history']:
context_parts.append(f"💬 {len(result['recent_history'])} lượt hội thoại gần nhất:")
context_parts.append(self._format_recent_history(result['recent_history']))
return "\n".join(context_parts)
def _format_recent_history(self, history: List[Tuple[str, str]]) -> str:
"""Format recent history for context"""
formatted = []
for user_msg, bot_msg in history[-5:]: # Last 5 turns
formatted.append(f"User: {user_msg}")
formatted.append(f"Bot: {bot_msg[:150]}..." if len(bot_msg) > 150 else f"Bot: {bot_msg}")
return "\n".join(formatted)
def compress_history(
self,
chat_history: List[Tuple[str, str]],
target_turns: int = 10
) -> List[Tuple[str, str]]:
"""
Compress history by summarizing and keeping recent turns
Args:
chat_history: Full history
target_turns: Target number of turns to keep
Returns:
Compressed history with summary as first turn
"""
if len(chat_history) <= target_turns:
return chat_history
result = self.summarize_conversation(
chat_history,
keep_recent=target_turns - 1 # -1 for summary turn
)
# Create compressed history
compressed = []
# Add summary as first turn
if result['summary']:
compressed.append((
"[Tóm tắt cuộc trò chuyện trước]",
result['summary']
))
# Add recent history
compressed.extend(result['recent_history'])
return compressed
def get_summary_stats(self, chat_history: List[Tuple[str, str]]) -> Dict[str, Any]:
"""
Get statistics about conversation
Args:
chat_history: Conversation history
Returns:
Statistics dict
"""
total_turns = len(chat_history)
total_user_chars = sum(len(user_msg) for user_msg, _ in chat_history)
total_bot_chars = sum(len(bot_msg) for _, bot_msg in chat_history)
# Estimate tokens (rough: 1 token ≈ 4 chars for Vietnamese)
estimated_tokens = (total_user_chars + total_bot_chars) // 4
return {
'total_turns': total_turns,
'total_user_chars': total_user_chars,
'total_bot_chars': total_bot_chars,
'estimated_tokens': estimated_tokens,
'should_summarize': self.should_summarize(chat_history)
}
# Global instance
_summarizer = None
def get_summarizer() -> ConversationSummarizer:
"""Get global summarizer instance"""
global _summarizer
if _summarizer is None:
_summarizer = ConversationSummarizer()
return _summarizer
|