File size: 9,728 Bytes
eeb0f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""
Conversation Summarizer
Automatically summarizes long conversations to maintain context while reducing token usage
"""

from typing import List, Dict, Any, Optional, Tuple
from config.settings import client, MODEL
import json


class ConversationSummarizer:
    """
    Summarizes conversation history to maintain context with fewer tokens
    """
    
    def __init__(self, max_turns: int = 20, summary_trigger: int = 15):
        """
        Initialize summarizer
        
        Args:
            max_turns: Maximum conversation turns to keep in full detail
            summary_trigger: Number of turns before triggering summarization
        """
        self.max_turns = max_turns
        self.summary_trigger = summary_trigger
        self.summaries = []  # Store previous summaries
    
    def should_summarize(self, chat_history: List[Tuple[str, str]]) -> bool:
        """
        Check if conversation should be summarized
        
        Args:
            chat_history: List of (user_msg, bot_msg) tuples
            
        Returns:
            True if summarization needed
        """
        return len(chat_history) >= self.summary_trigger
    
    def summarize_conversation(
        self,
        chat_history: List[Tuple[str, str]],
        user_profile: Optional[Dict[str, Any]] = None,
        keep_recent: int = 5
    ) -> Dict[str, Any]:
        """
        Summarize conversation history
        
        Args:
            chat_history: Full conversation history
            user_profile: User profile data for context
            keep_recent: Number of recent turns to keep in full detail
            
        Returns:
            Dict with summary and recent history
        """
        if len(chat_history) <= keep_recent:
            return {
                'summary': None,
                'recent_history': chat_history,
                'summarized_turns': 0
            }
        
        # Split into parts to summarize and recent to keep
        to_summarize = chat_history[:-keep_recent]
        recent = chat_history[-keep_recent:]
        
        # Generate summary
        summary_text = self._generate_summary(to_summarize, user_profile)
        
        return {
            'summary': summary_text,
            'recent_history': recent,
            'summarized_turns': len(to_summarize)
        }
    
    def _generate_summary(
        self,
        chat_history: List[Tuple[str, str]],
        user_profile: Optional[Dict[str, Any]] = None
    ) -> str:
        """
        Generate summary using LLM
        
        Args:
            chat_history: Conversation to summarize
            user_profile: User profile for context
            
        Returns:
            Summary text
        """
        # Format conversation for summarization
        conversation_text = self._format_conversation(chat_history)
        
        # Build prompt
        prompt = f"""Summarize the following healthcare conversation concisely. Focus on:
1. User's health goals and concerns
2. Key information provided (age, weight, symptoms, etc.)
3. Main advice or recommendations given
4. Important context for future conversations

User Profile: {json.dumps(user_profile, ensure_ascii=False) if user_profile else 'Not available'}

Conversation:
{conversation_text}

Provide a concise summary in Vietnamese (2-3 paragraphs max):"""
        
        try:
            response = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that summarizes healthcare conversations concisely."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=500
            )
            
            summary = response.choices[0].message.content.strip()
            return summary
            
        except Exception as e:
            print(f"⚠️ Error generating summary: {e}")
            # Fallback: simple text summary
            return self._simple_summary(chat_history)
    
    def _format_conversation(self, chat_history: List[Tuple[str, str]]) -> str:
        """Format conversation for summarization"""
        formatted = []
        for i, (user_msg, bot_msg) in enumerate(chat_history, 1):
            formatted.append(f"Turn {i}:")
            formatted.append(f"User: {user_msg}")
            formatted.append(f"Bot: {bot_msg[:200]}..." if len(bot_msg) > 200 else f"Bot: {bot_msg}")
            formatted.append("")
        return "\n".join(formatted)
    
    def _simple_summary(self, chat_history: List[Tuple[str, str]]) -> str:
        """Simple fallback summary without LLM"""
        topics = []
        for user_msg, _ in chat_history:
            if any(keyword in user_msg.lower() for keyword in ['giảm cân', 'weight loss']):
                topics.append('giảm cân')
            if any(keyword in user_msg.lower() for keyword in ['tập', 'exercise', 'gym']):
                topics.append('tập luyện')
            if any(keyword in user_msg.lower() for keyword in ['ăn', 'dinh dưỡng', 'nutrition']):
                topics.append('dinh dưỡng')
            if any(keyword in user_msg.lower() for keyword in ['đau', 'triệu chứng', 'symptom']):
                topics.append('triệu chứng')
        
        unique_topics = list(set(topics))
        return f"Đã trao đổi về: {', '.join(unique_topics)}. Tổng {len(chat_history)} lượt hội thoại."
    
    def get_context_for_agent(
        self,
        chat_history: List[Tuple[str, str]],
        user_profile: Optional[Dict[str, Any]] = None,
        max_context_turns: int = 10
    ) -> str:
        """
        Get optimized context for agent (summary + recent history)
        
        Args:
            chat_history: Full conversation history
            user_profile: User profile data
            max_context_turns: Maximum turns to include in context
            
        Returns:
            Formatted context string
        """
        if len(chat_history) <= max_context_turns:
            # Short conversation, return as-is
            return self._format_recent_history(chat_history)
        
        # Summarize older parts
        result = self.summarize_conversation(
            chat_history,
            user_profile,
            keep_recent=max_context_turns
        )
        
        context_parts = []
        
        # Add summary if available
        if result['summary']:
            context_parts.append(f"📝 Tóm tắt cuộc trò chuyện trước ({result['summarized_turns']} lượt):")
            context_parts.append(result['summary'])
            context_parts.append("")
        
        # Add recent history
        if result['recent_history']:
            context_parts.append(f"💬 {len(result['recent_history'])} lượt hội thoại gần nhất:")
            context_parts.append(self._format_recent_history(result['recent_history']))
        
        return "\n".join(context_parts)
    
    def _format_recent_history(self, history: List[Tuple[str, str]]) -> str:
        """Format recent history for context"""
        formatted = []
        for user_msg, bot_msg in history[-5:]:  # Last 5 turns
            formatted.append(f"User: {user_msg}")
            formatted.append(f"Bot: {bot_msg[:150]}..." if len(bot_msg) > 150 else f"Bot: {bot_msg}")
        return "\n".join(formatted)
    
    def compress_history(
        self,
        chat_history: List[Tuple[str, str]],
        target_turns: int = 10
    ) -> List[Tuple[str, str]]:
        """
        Compress history by summarizing and keeping recent turns
        
        Args:
            chat_history: Full history
            target_turns: Target number of turns to keep
            
        Returns:
            Compressed history with summary as first turn
        """
        if len(chat_history) <= target_turns:
            return chat_history
        
        result = self.summarize_conversation(
            chat_history,
            keep_recent=target_turns - 1  # -1 for summary turn
        )
        
        # Create compressed history
        compressed = []
        
        # Add summary as first turn
        if result['summary']:
            compressed.append((
                "[Tóm tắt cuộc trò chuyện trước]",
                result['summary']
            ))
        
        # Add recent history
        compressed.extend(result['recent_history'])
        
        return compressed
    
    def get_summary_stats(self, chat_history: List[Tuple[str, str]]) -> Dict[str, Any]:
        """
        Get statistics about conversation
        
        Args:
            chat_history: Conversation history
            
        Returns:
            Statistics dict
        """
        total_turns = len(chat_history)
        total_user_chars = sum(len(user_msg) for user_msg, _ in chat_history)
        total_bot_chars = sum(len(bot_msg) for _, bot_msg in chat_history)
        
        # Estimate tokens (rough: 1 token ≈ 4 chars for Vietnamese)
        estimated_tokens = (total_user_chars + total_bot_chars) // 4
        
        return {
            'total_turns': total_turns,
            'total_user_chars': total_user_chars,
            'total_bot_chars': total_bot_chars,
            'estimated_tokens': estimated_tokens,
            'should_summarize': self.should_summarize(chat_history)
        }


# Global instance
_summarizer = None

def get_summarizer() -> ConversationSummarizer:
    """Get global summarizer instance"""
    global _summarizer
    if _summarizer is None:
        _summarizer = ConversationSummarizer()
    return _summarizer