Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

datbkpro commited on 8 days ago

Commit

1b5721c

verified ·

1 Parent(s): 47163d5

Update services/streaming_voice_service.py

Browse files

Files changed (1) hide show

services/streaming_voice_service.py +110 -1

services/streaming_voice_service.py CHANGED Viewed

@@ -586,7 +586,116 @@ class StreamingVoiceService:
         finally:
             with self.processing_lock:
                 self.is_processing = False
     def process_audio_chunk(self, audio_data: tuple) -> Dict[str, Any]:
         """Xử lý audio chunk với VAD"""
         if not audio_data or not self.is_listening:

         finally:
             with self.processing_lock:
                 self.is_processing = False
+    # THÊM LẠI PHƯƠNG THỨC process_streaming_audio ĐÃ BỊ THIẾU
+    def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
+        """Xử lý audio streaming (phương thức cũ cho compatibility với Gradio)"""
+        if not audio_data:
+            return {
+                'transcription': "❌ Không có dữ liệu âm thanh",
+                'response': "Vui lòng nói lại",
+                'tts_audio': None,
+                'status': 'error'
+            }
+        # Nếu đang xử lý VAD, trả về trạng thái processing
+        if self.is_processing:
+            return {
+                'transcription': "Đang xử lý...",
+                'response': "",
+                'tts_audio': None,
+                'status': 'processing'
+            }
+        try:
+            # Lấy dữ liệu audio từ Gradio
+            sample_rate, audio_array = audio_data
+            print(f"🎯 Nhận audio manual: {len(audio_array)} samples, SR: {sample_rate}")
+            # Kiểm tra kiểu dữ liệu và chuyển đổi nếu cần
+            if isinstance(audio_array, np.ndarray):
+                if audio_array.dtype == np.float32 or audio_array.dtype == np.float64:
+                    # Chuyển từ float sang int16
+                    audio_array = (audio_array * 32767).astype(np.int16)
+            # Kiểm tra audio có dữ liệu không
+            if len(audio_array) == 0:
+                return {
+                    'transcription': "❌ Âm thanh trống",
+                    'response': "Vui lòng nói lại",
+                    'tts_audio': None,
+                    'status': 'error'
+                }
+            # Tính toán âm lượng
+            audio_abs = np.abs(audio_array.astype(np.float32))
+            audio_rms = np.sqrt(np.mean(audio_abs**2)) / 32767.0
+            print(f"📊 Âm lượng RMS: {audio_rms:.4f}")
+            if audio_rms < 0.005:
+                return {
+                    'transcription': "❌ Âm thanh quá yếu",
+                    'response': "Xin vui lòng nói to hơn",
+                    'tts_audio': None,
+                    'status': 'error'
+                }
+            # Sử dụng VAD để kiểm tra speech
+            if not self.vad_processor.is_speech(audio_array, sample_rate):
+                return {
+                    'transcription': "❌ Không phát hiện giọng nói",
+                    'response': "Vui lòng nói rõ hơn",
+                    'tts_audio': None,
+                    'status': 'error'
+                }
+            # Chuyển đổi thành văn bản
+            transcription = self._transcribe_audio(audio_array, sample_rate)
+            if not transcription or len(transcription.strip()) == 0:
+                return {
+                    'transcription': "❌ Không nghe rõ",
+                    'response': "Xin vui lòng nói lại rõ hơn",
+                    'tts_audio': None,
+                    'status': 'error'
+                }
+            # Kiểm tra nếu transcription quá ngắn
+            if len(transcription.strip()) < 2:
+                return {
+                    'transcription': "❌ Câu nói quá ngắn",
+                    'response': "Xin vui lòng nói câu dài hơn",
+                    'tts_audio': None,
+                    'status': 'error'
+                }
+            print(f"📝 Đã chuyển đổi: {transcription}")
+            # Cập nhật transcription hiện tại
+            self.current_transcription = transcription
+            # Tạo phản hồi AI
+            response = self._generate_ai_response(transcription)
+            # Tạo TTS
+            tts_audio_path = self._text_to_speech(response)
+            return {
+                'transcription': transcription,
+                'response': response,
+                'tts_audio': tts_audio_path,
+                'status': 'completed'
+            }
+        except Exception as e:
+            print(f"❌ Lỗi xử lý streaming audio: {e}")
+            print(f"Chi tiết lỗi: {traceback.format_exc()}")
+            return {
+                'transcription': f"❌ Lỗi: {str(e)}",
+                'response': "Xin lỗi, có lỗi xảy ra trong quá trình xử lý",
+                'tts_audio': None,
+                'status': 'error'
+            }
     def process_audio_chunk(self, audio_data: tuple) -> Dict[str, Any]:
         """Xử lý audio chunk với VAD"""
         if not audio_data or not self.is_listening: