Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on 25 days ago

Commit

cc8629b

verified ·

1 Parent(s): b5e51ac

Update services/streaming_voice_service.py

Browse files

Files changed (1) hide show

services/streaming_voice_service.py +297 -215

services/streaming_voice_service.py CHANGED Viewed

@@ -1,193 +1,14 @@
-# import io
-# import numpy as np
-# import soundfile as sf
-# import threading
-# import time
-# import sounddevice as sd
-# from groq import Groq
-# from typing import Optional, Callable
-# from config.settings import settings
-# from core.speechbrain_vad import SpeechBrainVAD
-# from core.rag_system import EnhancedRAGSystem
-# from core.tts_service import EnhancedTTSService
-# class StreamingVoiceService:
-#     def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
-#         self.client = groq_client
-#         self.rag_system = rag_system
-#         self.tts_service = tts_service
-#         self.vad_processor = SpeechBrainVAD()
-#         # Streaming state
-#         self.is_listening = False
-#         self.audio_stream = None
-#         self.callback_handler = None
-#         # Conversation context
-#         self.conversation_history = []
-#         self.current_transcription = ""
-#     def start_listening(self, callback_handler: Callable):
-#         """Bắt đầu lắng nghe với sounddevice"""
-#         if self.is_listening:
-#             return False
-#         try:
-#             self.callback_handler = callback_handler
-#             self.is_listening = True
-#             self.conversation_history = []
-#             # Start VAD processing thread
-#             self.vad_processor.start_stream(self._process_speech_segment)
-#             # Khởi động thread lắng nghe
-#             threading.Thread(target=self._listen_loop, daemon=True).start()
-#             print("🎙️ Bắt đầu lắng nghe (sounddevice)...")
-#             return True
-#         except Exception as e:
-#             print(f"❌ Lỗi khởi động stream: {e}")
-#             self.stop_listening()
-#             return False
-#     def stop_listening(self):
-#         """Dừng lắng nghe"""
-#         self.is_listening = False
-#         self.vad_processor.stop_stream()
-#         print("🛑 Đã dừng lắng nghe")
-#     def _listen_loop(self):
-#         """Luồng lấy mẫu âm thanh liên tục"""
-#         try:
-#             with sd.InputStream(
-#                 samplerate=settings.SAMPLE_RATE,
-#                 channels=1,
-#                 dtype="float32",
-#                 blocksize=1024,
-#                 callback=self._audio_callback
-#             ):
-#                 while self.is_listening:
-#                     time.sleep(0.05)
-#         except Exception as e:
-#             print(f"❌ Lỗi luồng âm thanh: {e}")
-#             self.stop_listening()
-#     def _audio_callback(self, in_data, frames, time_info, status):
-#         """Callback xử lý audio input real-time"""
-#         if status:
-#             print(f"⚠️ Trạng thái âm thanh: {status}")
-#         if self.is_listening:
-#             audio_data = np.copy(in_data[:, 0])  # Mono
-#             self.vad_processor.process_stream(audio_data, settings.SAMPLE_RATE)
-#     def _process_speech_segment(self, speech_audio: np.ndarray, sample_rate: int):
-#         """Xử lý đoạn giọng nói"""
-#         if not self.is_listening or len(speech_audio) == 0:
-#             return
-#         print(f"🎯 Đang xử lý segment giọng nói ({len(speech_audio)} samples)...")
-#         transcription = self._transcribe_audio(speech_audio, sample_rate)
-#         if transcription and len(transcription.strip()) > 0:
-#             self.current_transcription = transcription
-#             print(f"📝 Transcription: {transcription}")
-#             response = self._generate_ai_response(transcription)
-#             tts_audio = self._text_to_speech(response)
-#             if self.callback_handler:
-#                 self.callback_handler({
-#                     'transcription': transcription,
-#                     'response': response,
-#                     'tts_audio': tts_audio,
-#                     'speech_audio': speech_audio
-#                 })
-#     def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> Optional[str]:
-#         """Chuyển audio -> text"""
-#         try:
-#             buffer = io.BytesIO()
-#             sf.write(buffer, audio_data, sample_rate, format='wav')
-#             buffer.seek(0)
-#             transcription = self.client.audio.transcriptions.create(
-#                 model=settings.WHISPER_MODEL,
-#                 file=("speech.wav", buffer.read()),
-#                 response_format="text",
-#                 language="vi"
-#             )
-#             return transcription.strip()
-#         except Exception as e:
-#             print(f"❌ Lỗi transcription: {e}")
-#             return None
-#     def _generate_ai_response(self, user_input: str) -> str:
-#         """Sinh phản hồi AI"""
-#         try:
-#             self.conversation_history.append({"role": "user", "content": user_input})
-#             rag_results = self.rag_system.semantic_search(user_input, top_k=2)
-#             context_text = "\n".join([f"- {doc.text}" for doc in rag_results]) if rag_results else ""
-#             system_prompt = f"""Bạn là trợ lý AI thông minh chuyên về tiếng Việt.
-# Hãy trả lời ngắn gọn, tự nhiên và hữu ích.
-# Thông tin tham khảo:
-# {context_text}
-# """
-#             messages = [{"role": "system", "content": system_prompt}]
-#             messages.extend(self.conversation_history[-6:])
-#             completion = self.client.chat.completions.create(
-#                 model=settings.LLM_MODEL,
-#                 messages=messages,
-#                 max_tokens=150,
-#                 temperature=0.7
-#             )
-#             response = completion.choices[0].message.content
-#             self.conversation_history.append({"role": "assistant", "content": response})
-#             if len(self.conversation_history) > 10:
-#                 self.conversation_history = self.conversation_history[-10:]
-#             return response
-#         except Exception as e:
-#             return f"Xin lỗi, tôi gặp lỗi: {str(e)}"
-#     def _text_to_speech(self, text: str) -> Optional[str]:
-#         """Chuyển văn bản thành giọng nói"""
-#         try:
-#             tts_bytes = self.tts_service.text_to_speech(text, 'vi')
-#             if tts_bytes:
-#                 return self.tts_service.save_audio_to_file(tts_bytes)
-#         except Exception as e:
-#             print(f"❌ Lỗi TTS: {e}")
-#         return None
-#     def get_conversation_state(self) -> dict:
-#         """Lấy trạng thái hội thoại"""
-#         return {
-#             'is_listening': self.is_listening,
-#             'history_length': len(self.conversation_history),
-#             'current_transcription': self.current_transcription
-#         }
 import io
 import numpy as np
 import soundfile as sf
-import time  # THÊM IMPORT NÀY
 import traceback
 from groq import Groq
 from typing import Optional, Dict, Any
 from config.settings import settings
 from core.rag_system import EnhancedRAGSystem
 from core.tts_service import EnhancedTTSService
 class StreamingVoiceService:
@@ -196,12 +17,85 @@ class StreamingVoiceService:
         self.rag_system = rag_system
         self.tts_service = tts_service
         # Conversation context
         self.conversation_history = []
         self.current_transcription = ""
     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
-        """Xử lý audio streaming từ Gradio microphone component"""
         if not audio_data:
             return {
                 'transcription': "❌ Không có dữ liệu âm thanh",
@@ -210,23 +104,22 @@ class StreamingVoiceService:
             }
         try:
-            # Lấy dữ liệu audio từ Gradio
             sample_rate, audio_array = audio_data
             print(f"🎯 Nhận audio: {len(audio_array)} samples, SR: {sample_rate}")
-            # Kiểm tra audio có dữ liệu không
-            if len(audio_array) == 0 or np.max(np.abs(audio_array)) < 0.01:
                 return {
-                    'transcription': "❌ Âm thanh quá yếu",
-                    'response': "Xin vui lòng nói to hơn và rõ hơn",
                     'tts_audio': None
                 }
             # Chuyển đổi thành văn bản
             transcription = self._transcribe_audio(audio_array, sample_rate)
-            if not transcription or len(transcription.strip()) == 0:
                 return {
                     'transcription': "❌ Không nghe rõ",
                     'response': "Xin vui lòng nói lại rõ hơn",
@@ -234,8 +127,6 @@ class StreamingVoiceService:
                 }
             print(f"📝 Đã chuyển đổi: {transcription}")
-            # Cập nhật transcription hiện tại
             self.current_transcription = transcription
             # Tạo phản hồi AI
@@ -252,45 +143,46 @@ class StreamingVoiceService:
         except Exception as e:
             print(f"❌ Lỗi xử lý streaming audio: {e}")
-            print(f"Chi tiết lỗi: {traceback.format_exc()}")
             return {
                 'transcription': f"❌ Lỗi: {str(e)}",
-                'response': "Xin lỗi, có lỗi xảy ra trong quá trình xử lý",
                 'tts_audio': None
             }
     def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> Optional[str]:
-        """Chuyển audio -> text"""
         try:
-            # Chuẩn hóa audio data
             if audio_data.ndim > 1:
-                audio_data = np.mean(audio_data, axis=1)  # Chuyển sang mono
-            # Normalize âm lượng
             audio_max = np.max(np.abs(audio_data))
-            if audio_max > 0:
-                audio_data = audio_data / audio_max
-            # Giới hạn độ dài audio (tránh quá dài)
-            max_duration = 10  # giây
             max_samples = sample_rate * max_duration
             if len(audio_data) > max_samples:
                 audio_data = audio_data[:max_samples]
-                print(f"⚠️ Cắt audio xuống còn {max_duration} giây")
             buffer = io.BytesIO()
             sf.write(buffer, audio_data, sample_rate, format='wav', subtype='PCM_16')
             buffer.seek(0)
-            # Gọi API Whisper
             transcription = self.client.audio.transcriptions.create(
                 model=settings.WHISPER_MODEL,
                 file=("speech.wav", buffer.read(), "audio/wav"),
                 response_format="text",
-                language="vi"
             )
-            # Xử lý response
             if hasattr(transcription, 'text'):
                 result = transcription.text.strip()
             elif isinstance(transcription, str):
@@ -298,7 +190,6 @@ class StreamingVoiceService:
             else:
                 result = str(transcription).strip()
-            print(f"✅ Transcription thành công: {result}")
             return result
         except Exception as e:
@@ -306,23 +197,21 @@ class StreamingVoiceService:
             return None
     def _generate_ai_response(self, user_input: str) -> str:
-        """Sinh phản hồi AI"""
         try:
-            # Thêm vào lịch sử
             self.conversation_history.append({"role": "user", "content": user_input})
-            # Tìm kiếm RAG
             rag_results = self.rag_system.semantic_search(user_input, top_k=2)
             context_text = "\n".join([f"- {result.get('text', str(result))}" for result in rag_results]) if rag_results else ""
             system_prompt = f"""Bạn là trợ lý AI thông minh chuyên về tiếng Việt.
-Hãy trả lời ngắn gọn, tự nhiên và hữu ích (dưới 100 từ).
 Thông tin tham khảo:
 {context_text}
 """
             messages = [{"role": "system", "content": system_prompt}]
-            # Giữ lại 4 tin nhắn gần nhất
             messages.extend(self.conversation_history[-4:])
             completion = self.client.chat.completions.create(
@@ -335,17 +224,16 @@ Thông tin tham khảo:
             response = completion.choices[0].message.content
             self.conversation_history.append({"role": "assistant", "content": response})
-            # Giới hạn lịch sử
             if len(self.conversation_history) > 8:
                 self.conversation_history = self.conversation_history[-8:]
             return response
         except Exception as e:
-            return f"Xin lỗi, tôi gặp lỗi khi tạo phản hồi: {str(e)}"
     def _text_to_speech(self, text: str) -> Optional[str]:
-        """Chuyển văn bản thành giọng nói"""
         try:
             if not text or text.startswith("❌") or text.startswith("Xin lỗi"):
                 return None
@@ -353,7 +241,6 @@ Thông tin tham khảo:
             tts_bytes = self.tts_service.text_to_speech(text, 'vi')
             if tts_bytes:
                 audio_path = self.tts_service.save_audio_to_file(tts_bytes)
-                print(f"✅ Đã tạo TTS: {audio_path}")
                 return audio_path
         except Exception as e:
             print(f"❌ Lỗi TTS: {e}")
@@ -368,7 +255,202 @@ Thông tin tham khảo:
     def get_conversation_state(self) -> dict:
         """Lấy trạng thái hội thoại"""
         return {
             'history_length': len(self.conversation_history),
             'current_transcription': self.current_transcription,
             'last_update': time.strftime("%H:%M:%S")
-        }

 import io
 import numpy as np
 import soundfile as sf
+import time
 import traceback
 from groq import Groq
 from typing import Optional, Dict, Any
 from config.settings import settings
 from core.rag_system import EnhancedRAGSystem
 from core.tts_service import EnhancedTTSService
+from core.speechbrain_vad import SpeechBrainVAD  # THÊM IMPORT
 class StreamingVoiceService:
         self.rag_system = rag_system
         self.tts_service = tts_service
+        # Khởi tạo VAD
+        self.vad_processor = SpeechBrainVAD()
         # Conversation context
         self.conversation_history = []
         self.current_transcription = ""
+        self.is_listening = False
+    def start_listening(self) -> bool:
+        """Bắt đầu lắng nghe với VAD"""
+        if self.is_listening:
+            return False
+        success = self.vad_processor.start_stream(self._on_speech_detected)
+        if success:
+            self.is_listening = True
+            print("🎙️ Đã bắt đầu lắng nghe với VAD")
+        return success
+    def stop_listening(self):
+        """Dừng lắng nghe"""
+        self.vad_processor.stop_stream()
+        self.is_listening = False
+        print("🛑 Đã dừng lắng nghe")
+    def process_audio_chunk(self, audio_data: tuple) -> Dict[str, Any]:
+        """Xử lý audio chunk với VAD (dùng cho real-time streaming)"""
+        if not audio_data or not self.is_listening:
+            return {
+                'transcription': "",
+                'response': "",
+                'tts_audio': None
+            }
+        try:
+            sample_rate, audio_array = audio_data
+            # Xử lý với VAD
+            self.vad_processor.process_stream(audio_array, sample_rate)
+            return {
+                'transcription': "Đang lắng nghe...",
+                'response': "",
+                'tts_audio': None
+            }
+        except Exception as e:
+            print(f"❌ Lỗi xử lý audio chunk: {e}")
+            return {
+                'transcription': "",
+                'response': "",
+                'tts_audio': None
+            }
+    def _on_speech_detected(self, speech_audio: np.ndarray, sample_rate: int):
+        """Callback khi VAD phát hiện speech"""
+        print(f"🎯 VAD phát hiện speech segment: {len(speech_audio)/sample_rate:.2f}s")
+        # Chuyển đổi speech thành text
+        transcription = self._transcribe_audio(speech_audio, sample_rate)
+        if not transcription or len(transcription.strip()) < 2:
+            print("⚠️ Transcription quá ngắn hoặc trống")
+            return
+        print(f"📝 VAD Transcription: {transcription}")
+        self.current_transcription = transcription
+        # Tạo phản hồi AI
+        response = self._generate_ai_response(transcription)
+        # Tạo TTS
+        tts_audio_path = self._text_to_speech(response)
+        # Có thể gửi kết quả đến UI thông qua callback
+        # (cần tích hợp với Gradio events)
     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
+        """Xử lý audio streaming (phương thức cũ cho compatibility)"""
         if not audio_data:
             return {
                 'transcription': "❌ Không có dữ liệu âm thanh",
             }
         try:
             sample_rate, audio_array = audio_data
             print(f"🎯 Nhận audio: {len(audio_array)} samples, SR: {sample_rate}")
+            # Sử dụng VAD để kiểm tra speech
+            if not self.vad_processor.is_speech(audio_array, sample_rate):
                 return {
+                    'transcription': "❌ Không phát hiện giọng nói",
+                    'response': "Vui lòng nói rõ hơn",
                     'tts_audio': None
                 }
             # Chuyển đổi thành văn bản
             transcription = self._transcribe_audio(audio_array, sample_rate)
+            if not transcription or len(transcription.strip()) < 2:
                 return {
                     'transcription': "❌ Không nghe rõ",
                     'response': "Xin vui lòng nói lại rõ hơn",
                 }
             print(f"📝 Đã chuyển đổi: {transcription}")
             self.current_transcription = transcription
             # Tạo phản hồi AI
         except Exception as e:
             print(f"❌ Lỗi xử lý streaming audio: {e}")
             return {
                 'transcription': f"❌ Lỗi: {str(e)}",
+                'response': "Xin lỗi, có lỗi xảy ra",
                 'tts_audio': None
             }
     def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> Optional[str]:
+        """Chuyển audio -> text (giữ nguyên)"""
+        # ... giữ nguyên code cũ ...
         try:
             if audio_data.ndim > 1:
+                audio_data = np.mean(audio_data, axis=1)
             audio_max = np.max(np.abs(audio_data))
+            if audio_max > 0.1:
+                audio_data = audio_data / audio_max * 0.9
+            max_duration = 15
             max_samples = sample_rate * max_duration
             if len(audio_data) > max_samples:
                 audio_data = audio_data[:max_samples]
+            min_duration = 1.0
+            min_samples = sample_rate * min_duration
+            if len(audio_data) < min_samples:
+                padding = np.zeros(min_samples - len(audio_data))
+                audio_data = np.concatenate([audio_data, padding])
             buffer = io.BytesIO()
             sf.write(buffer, audio_data, sample_rate, format='wav', subtype='PCM_16')
             buffer.seek(0)
             transcription = self.client.audio.transcriptions.create(
                 model=settings.WHISPER_MODEL,
                 file=("speech.wav", buffer.read(), "audio/wav"),
                 response_format="text",
+                language="vi",
+                temperature=0.0,
             )
             if hasattr(transcription, 'text'):
                 result = transcription.text.strip()
             elif isinstance(transcription, str):
             else:
                 result = str(transcription).strip()
             return result
         except Exception as e:
             return None
     def _generate_ai_response(self, user_input: str) -> str:
+        """Sinh phản hồi AI (giữ nguyên)"""
+        # ... giữ nguyên code cũ ...
         try:
             self.conversation_history.append({"role": "user", "content": user_input})
             rag_results = self.rag_system.semantic_search(user_input, top_k=2)
             context_text = "\n".join([f"- {result.get('text', str(result))}" for result in rag_results]) if rag_results else ""
             system_prompt = f"""Bạn là trợ lý AI thông minh chuyên về tiếng Việt.
+Hãy trả lời ngắn gọn, tự nhiên và hữu ích.
 Thông tin tham khảo:
 {context_text}
 """
             messages = [{"role": "system", "content": system_prompt}]
             messages.extend(self.conversation_history[-4:])
             completion = self.client.chat.completions.create(
             response = completion.choices[0].message.content
             self.conversation_history.append({"role": "assistant", "content": response})
             if len(self.conversation_history) > 8:
                 self.conversation_history = self.conversation_history[-8:]
             return response
         except Exception as e:
+            return f"Xin lỗi, tôi gặp lỗi: {str(e)}"
     def _text_to_speech(self, text: str) -> Optional[str]:
+        """Chuyển văn bản thành giọng nói (giữ nguyên)"""
         try:
             if not text or text.startswith("❌") or text.startswith("Xin lỗi"):
                 return None
             tts_bytes = self.tts_service.text_to_speech(text, 'vi')
             if tts_bytes:
                 audio_path = self.tts_service.save_audio_to_file(tts_bytes)
                 return audio_path
         except Exception as e:
             print(f"❌ Lỗi TTS: {e}")
     def get_conversation_state(self) -> dict:
         """Lấy trạng thái hội thoại"""
         return {
+            'is_listening': self.is_listening,
             'history_length': len(self.conversation_history),
             'current_transcription': self.current_transcription,
             'last_update': time.strftime("%H:%M:%S")
+        }
+# import io
+# import numpy as np
+# import soundfile as sf
+# import time  # THÊM IMPORT NÀY
+# import traceback
+# from groq import Groq
+# from typing import Optional, Dict, Any
+# from config.settings import settings
+# from core.rag_system import EnhancedRAGSystem
+# from core.tts_service import EnhancedTTSService
+# class StreamingVoiceService:
+#     def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
+#         self.client = groq_client
+#         self.rag_system = rag_system
+#         self.tts_service = tts_service
+#         # Conversation context
+#         self.conversation_history = []
+#         self.current_transcription = ""
+#     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
+#         """Xử lý audio streaming từ Gradio microphone component"""
+#         if not audio_data:
+#             return {
+#                 'transcription': "❌ Không có dữ liệu âm thanh",
+#                 'response': "Vui lòng nói lại",
+#                 'tts_audio': None
+#             }
+#         try:
+#             # Lấy dữ liệu audio từ Gradio
+#             sample_rate, audio_array = audio_data
+#             print(f"🎯 Nhận audio: {len(audio_array)} samples, SR: {sample_rate}")
+#             # Kiểm tra audio có dữ liệu không
+#             if len(audio_array) == 0 or np.max(np.abs(audio_array)) < 0.01:
+#                 return {
+#                     'transcription': "❌ Âm thanh quá yếu",
+#                     'response': "Xin vui lòng nói to hơn và rõ hơn",
+#                     'tts_audio': None
+#                 }
+#             # Chuyển đổi thành văn bản
+#             transcription = self._transcribe_audio(audio_array, sample_rate)
+#             if not transcription or len(transcription.strip()) == 0:
+#                 return {
+#                     'transcription': "❌ Không nghe rõ",
+#                     'response': "Xin vui lòng nói lại rõ hơn",
+#                     'tts_audio': None
+#                 }
+#             print(f"📝 Đã chuyển đổi: {transcription}")
+#             # Cập nhật transcription hiện tại
+#             self.current_transcription = transcription
+#             # Tạo phản hồi AI
+#             response = self._generate_ai_response(transcription)
+#             # Tạo TTS
+#             tts_audio_path = self._text_to_speech(response)
+#             return {
+#                 'transcription': transcription,
+#                 'response': response,
+#                 'tts_audio': tts_audio_path
+#             }
+#         except Exception as e:
+#             print(f"❌ Lỗi xử lý streaming audio: {e}")
+#             print(f"Chi tiết lỗi: {traceback.format_exc()}")
+#             return {
+#                 'transcription': f"❌ Lỗi: {str(e)}",
+#                 'response': "Xin lỗi, có lỗi xảy ra trong quá trình xử lý",
+#                 'tts_audio': None
+#             }
+#     def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> Optional[str]:
+#         """Chuyển audio -> text"""
+#         try:
+#             # Chuẩn hóa audio data
+#             if audio_data.ndim > 1:
+#                 audio_data = np.mean(audio_data, axis=1)  # Chuyển sang mono
+#             # Normalize âm lượng
+#             audio_max = np.max(np.abs(audio_data))
+#             if audio_max > 0:
+#                 audio_data = audio_data / audio_max
+#             # Giới hạn độ dài audio (tránh quá dài)
+#             max_duration = 10  # giây
+#             max_samples = sample_rate * max_duration
+#             if len(audio_data) > max_samples:
+#                 audio_data = audio_data[:max_samples]
+#                 print(f"⚠️ Cắt audio xuống còn {max_duration} giây")
+#             buffer = io.BytesIO()
+#             sf.write(buffer, audio_data, sample_rate, format='wav', subtype='PCM_16')
+#             buffer.seek(0)
+#             # Gọi API Whisper
+#             transcription = self.client.audio.transcriptions.create(
+#                 model=settings.WHISPER_MODEL,
+#                 file=("speech.wav", buffer.read(), "audio/wav"),
+#                 response_format="text",
+#                 language="vi"
+#             )
+#             # Xử lý response
+#             if hasattr(transcription, 'text'):
+#                 result = transcription.text.strip()
+#             elif isinstance(transcription, str):
+#                 result = transcription.strip()
+#             else:
+#                 result = str(transcription).strip()
+#             print(f"✅ Transcription thành công: {result}")
+#             return result
+#         except Exception as e:
+#             print(f"❌ Lỗi transcription: {e}")
+#             return None
+#     def _generate_ai_response(self, user_input: str) -> str:
+#         """Sinh phản hồi AI"""
+#         try:
+#             # Thêm vào lịch sử
+#             self.conversation_history.append({"role": "user", "content": user_input})
+#             # Tìm kiếm RAG
+#             rag_results = self.rag_system.semantic_search(user_input, top_k=2)
+#             context_text = "\n".join([f"- {result.get('text', str(result))}" for result in rag_results]) if rag_results else ""
+#             system_prompt = f"""Bạn là trợ lý AI thông minh chuyên về tiếng Việt.
+# Hãy trả lời ngắn gọn, tự nhiên và hữu ích (dưới 100 từ).
+# Thông tin tham khảo:
+# {context_text}
+# """
+#             messages = [{"role": "system", "content": system_prompt}]
+#             # Giữ lại 4 tin nhắn gần nhất
+#             messages.extend(self.conversation_history[-4:])
+#             completion = self.client.chat.completions.create(
+#                 model="llama-3.1-8b-instant",
+#                 messages=messages,
+#                 max_tokens=150,
+#                 temperature=0.7
+#             )
+#             response = completion.choices[0].message.content
+#             self.conversation_history.append({"role": "assistant", "content": response})
+#             # Giới hạn lịch sử
+#             if len(self.conversation_history) > 8:
+#                 self.conversation_history = self.conversation_history[-8:]
+#             return response
+#         except Exception as e:
+#             return f"Xin lỗi, tôi gặp lỗi khi tạo phản hồi: {str(e)}"
+#     def _text_to_speech(self, text: str) -> Optional[str]:
+#         """Chuyển văn bản thành giọng nói"""
+#         try:
+#             if not text or text.startswith("❌") or text.startswith("Xin lỗi"):
+#                 return None
+#             tts_bytes = self.tts_service.text_to_speech(text, 'vi')
+#             if tts_bytes:
+#                 audio_path = self.tts_service.save_audio_to_file(tts_bytes)
+#                 print(f"✅ Đã tạo TTS: {audio_path}")
+#                 return audio_path
+#         except Exception as e:
+#             print(f"❌ Lỗi TTS: {e}")
+#         return None
+#     def clear_conversation(self):
+#         """Xóa lịch sử hội thoại"""
+#         self.conversation_history = []
+#         self.current_transcription = ""
+#         print("🗑️ Đã xóa lịch sử hội thoại")
+#     def get_conversation_state(self) -> dict:
+#         """Lấy trạng thái hội thoại"""
+#         return {
+#             'history_length': len(self.conversation_history),
+#             'current_transcription': self.current_transcription,
+#             'last_update': time.strftime("%H:%M:%S")
+#         }