Spaces:

ABAO77
/

Run_code_api

Sleeping

App Files Files Community

ABAO77 commited on Sep 8

Commit

45a0e83

1 Parent(s): 85fa45c

feat: implement new IPA assessment API with detailed phoneme analysis and Vietnamese-specific feedback

Browse files

Files changed (3) hide show

src/apis/routes/ipa_route.py +0 -158
src/apis/routes/speaking_route.py +288 -1
test_new_ipa_api.py +124 -0

src/apis/routes/ipa_route.py CHANGED Viewed

@@ -1488,165 +1488,7 @@ def _get_common_mistakes(phonemes: List[str]) -> List[Dict]:
     return mistakes
-@router.post("/assess-pronunciation")
-async def assess_ipa_pronunciation(
-    audio_file: UploadFile = File(
-        ..., description="Audio file for IPA pronunciation assessment"
-    ),
-    word: str = Form(..., description="Target word to assess"),
-    target_ipa: str = Form(None, description="Target IPA transcription (optional)"),
-    focus_phonemes: str = Form(
-        None, description="Comma-separated list of phonemes to focus on (optional)"
-    ),
-):
-    """
-    Specialized IPA pronunciation assessment with detailed phoneme analysis
-    Optimized for IPA learning with Vietnamese speaker feedback
-    """
-    import tempfile
-    import os
-    try:
-        # Get the global assessor instance (singleton)
-        assessor = get_assessor()
-        # Save uploaded audio file
-        file_extension = ".wav"
-        if audio_file.filename and "." in audio_file.filename:
-            file_extension = f".{audio_file.filename.split('.')[-1]}"
-        with tempfile.NamedTemporaryFile(
-            delete=False, suffix=file_extension
-        ) as tmp_file:
-            content = await audio_file.read()
-            tmp_file.write(content)
-            tmp_file.flush()
-            # Run standard pronunciation assessment
-            result = assessor.assess_pronunciation(tmp_file.name, word, "word")
-            # Get target IPA and phonemes
-            if not target_ipa:
-                target_phonemes_data = g2p.text_to_phonemes(word)[0]
-                target_ipa = target_phonemes_data["ipa"]
-                target_phonemes = target_phonemes_data["phonemes"]
-            else:
-                # Parse IPA to phonemes (simplified)
-                target_phonemes = target_ipa.replace("/", "").split()
-            # Focus phonemes analysis
-            focus_phonemes_list = []
-            if focus_phonemes:
-                focus_phonemes_list = [p.strip() for p in focus_phonemes.split(",")]
-            # Enhanced IPA-specific analysis
-            ipa_analysis = {
-                "target_word": word,
-                "target_ipa": target_ipa,
-                "target_phonemes": target_phonemes,
-                "user_transcript": result.get("transcript", ""),
-                "user_ipa": result.get("user_ipa", ""),
-                "user_phonemes": result.get("user_phonemes", ""),
-                "overall_score": result.get("overall_score", 0.0),
-                "phoneme_accuracy": result.get("phoneme_comparison", {}).get(
-                    "accuracy_percentage", 0
-                ),
-                "focus_phonemes_analysis": [],
-                "vietnamese_specific_tips": [],
-                "practice_recommendations": [],
-            }
-            # Focus phonemes detailed analysis
-            if focus_phonemes_list and result.get("phoneme_differences"):
-                for phoneme_diff in result["phoneme_differences"]:
-                    ref_phoneme = phoneme_diff.get("reference_phoneme", "")
-                    if ref_phoneme in focus_phonemes_list:
-                        analysis = {
-                            "phoneme": ref_phoneme,
-                            "status": phoneme_diff.get("status", "unknown"),
-                            "score": phoneme_diff.get("score", 0.0),
-                            "difficulty": g2p.get_difficulty_score(ref_phoneme),
-                            "vietnamese_tip": IPA_SYMBOLS_DATA.get(ref_phoneme, {}).get(
-                                "tip", ""
-                            ),
-                            "practice_tip": _get_practice_tips(ref_phoneme),
-                        }
-                        ipa_analysis["focus_phonemes_analysis"].append(analysis)
-            # Vietnamese-specific pronunciation tips
-            all_target_phonemes = target_phonemes + focus_phonemes_list
-            vietnamese_tips = []
-            for phoneme in set(all_target_phonemes):
-                if phoneme in [
-                    "θ",
-                    "ð",
-                    "v",
-                    "z",
-                    "ʒ",
-                    "r",
-                    "w",
-                    "æ",
-                    "ɪ",
-                    "ʊ",
-                ]:  # Difficult for Vietnamese
-                    tip_data = IPA_SYMBOLS_DATA.get(phoneme, {})
-                    if tip_data:
-                        vietnamese_tips.append(
-                            {
-                                "phoneme": phoneme,
-                                "tip": tip_data.get("tip", ""),
-                                "difficulty": tip_data.get("difficulty", "medium"),
-                                "category": tip_data.get("category", "unknown"),
-                            }
-                        )
-            ipa_analysis["vietnamese_specific_tips"] = vietnamese_tips
-            # Practice recommendations based on score
-            if result.get("overall_score", 0) < 0.7:
-                recommendations = [
-                    "Nghe từ mẫu nhiều lần trước khi phát âm",
-                    "Phát âm chậm và rõ ràng từng âm vị",
-                    "Chú ý đến vị trí lưỡi và môi khi phát âm",
-                ]
-                # Add specific recommendations for low-scoring phonemes
-                if result.get("wrong_words"):
-                    for wrong_word in result["wrong_words"][
-                        :2
-                    ]:  # Top 2 problematic words
-                        for wrong_phoneme in wrong_word.get("wrong_phonemes", [])[:2]:
-                            phoneme = wrong_phoneme.get("expected", "")
-                            if phoneme in IPA_SYMBOLS_DATA:
-                                recommendations.append(
-                                    f"Luyện đặc biệt âm /{phoneme}/: {IPA_SYMBOLS_DATA[phoneme]['tip']}"
-                                )
-                ipa_analysis["practice_recommendations"] = recommendations
-            # Combine with original result
-            enhanced_result = {
-                **result,  # Original assessment result
-                "ipa_analysis": ipa_analysis,  # IPA-specific analysis
-                "assessment_type": "ipa_focused",
-                "target_ipa": target_ipa,
-                "focus_phonemes": focus_phonemes_list,
-            }
-            # Clean up temp file
-            os.unlink(tmp_file.name)
-            logger.info(
-                f"IPA assessment completed for word '{word}' with score {result.get('overall_score', 0):.2f}"
-            )
-            return enhanced_result
-    except Exception as e:
-        logger.error(f"IPA pronunciation assessment error: {e}")
-        raise HTTPException(status_code=500, detail=f"Assessment failed: {str(e)}")
 @router.get("/practice-session/{lesson_id}")


1488	return mistakes
1489
1490


















































































































































1491












1492
1493
1494	@router.get("/practice-session/{lesson_id}")

src/apis/routes/speaking_route.py CHANGED Viewed

@@ -36,6 +36,33 @@ class PronunciationAssessmentResult(BaseModel):
     assessment_mode: Optional[str] = None
     character_level_analysis: Optional[bool] = None
 # Global assessor instance - singleton pattern for performance
 global_assessor = None
@@ -178,6 +205,239 @@ async def assess_pronunciation(
         raise HTTPException(status_code=500, detail=f"Assessment failed: {str(e)}")
 # =============================================================================
 # UTILITY ENDPOINTS
 # =============================================================================
@@ -238,5 +498,32 @@ def get_vietnamese_tip(phoneme: str) -> str:
         "z": "Như 's' nhưng rung dây thanh",
         "ʒ": "Như 'ʃ' nhưng rung dây thanh",
         "w": "Tròn môi như 'u'",
     }
-    return tips.get(phoneme, f"Luyện âm {phoneme}")

     assessment_mode: Optional[str] = None
     character_level_analysis: Optional[bool] = None
+class IPAAssessmentResult(BaseModel):
+    """Optimized response model for IPA-focused pronunciation assessment"""
+    # Core assessment data
+    transcript: str  # What the user actually said
+    user_ipa: Optional[str] = None  # User's IPA transcription
+    target_word: str  # Target word being assessed
+    target_ipa: str  # Target IPA transcription
+    overall_score: float  # Overall pronunciation score (0-1)
+    # Character-level analysis for IPA mapping
+    character_analysis: List[Dict]  # Each character with its IPA and score
+    # Phoneme-specific analysis
+    phoneme_scores: List[Dict]  # Individual phoneme scores with colors
+    focus_phonemes_analysis: List[Dict]  # Detailed analysis of target phonemes
+    # Feedback and recommendations
+    vietnamese_tips: List[str]  # Vietnamese-specific pronunciation tips
+    practice_recommendations: List[str]  # Practice suggestions
+    feedback: List[str]  # General feedback messages
+    # Assessment metadata
+    processing_info: Dict  # Processing details
+    assessment_type: str = "ipa_focused"
+    error: Optional[str] = None
 # Global assessor instance - singleton pattern for performance
 global_assessor = None
         raise HTTPException(status_code=500, detail=f"Assessment failed: {str(e)}")
+@router.post("/assess-ipa", response_model=IPAAssessmentResult)
+async def assess_ipa_pronunciation(
+    audio_file: UploadFile = File(..., description="Audio file (.wav, .mp3, .m4a)"),
+    target_word: str = Form(..., description="Target word to assess (e.g., 'bed')"),
+    target_ipa: str = Form(None, description="Target IPA notation (e.g., '/bɛd/')"),
+    focus_phonemes: str = Form(None, description="Comma-separated focus phonemes (e.g., 'ɛ,b')"),
+):
+    """
+    Optimized IPA pronunciation assessment for phoneme-focused learning
+    Evaluates:
+    - Overall word pronunciation accuracy
+    - Character-to-phoneme mapping accuracy
+    - Specific phoneme pronunciation (e.g., /ɛ/ in 'bed')
+    - Vietnamese-optimized feedback and tips
+    - Dynamic color scoring for UI visualization
+    Example: Assessing 'bed' /bɛd/ with focus on /ɛ/ phoneme
+    """
+    import time
+    start_time = time.time()
+    # Validate inputs
+    if not target_word.strip():
+        raise HTTPException(status_code=400, detail="Target word cannot be empty")
+    if len(target_word) > 50:
+        raise HTTPException(status_code=400, detail="Target word too long (max 50 characters)")
+    # Clean target word
+    target_word = target_word.strip().lower()
+    try:
+        # Save uploaded file temporarily
+        file_extension = ".wav"
+        if audio_file.filename and "." in audio_file.filename:
+            file_extension = f".{audio_file.filename.split('.')[-1]}"
+        with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
+            content = await audio_file.read()
+            tmp_file.write(content)
+            tmp_file.flush()
+            logger.info(f"IPA assessment for word '{target_word}' with IPA '{target_ipa}'")
+            # Get the assessor instance
+            assessor = get_assessor()
+            # Run base pronunciation assessment in word mode
+            base_result = assessor.assess_pronunciation(tmp_file.name, target_word, "word")
+            # Get target IPA and phonemes using G2P
+            g2p = EnhancedG2P()
+            if not target_ipa:
+                target_phonemes_data = g2p.text_to_phonemes(target_word)[0]
+                target_ipa = target_phonemes_data["ipa"]
+                target_phonemes = target_phonemes_data["phonemes"]
+            else:
+                # Parse provided IPA
+                clean_ipa = target_ipa.replace("/", "").strip()
+                target_phonemes = list(clean_ipa)  # Simple phoneme parsing
+            # Parse focus phonemes
+            focus_phonemes_list = []
+            if focus_phonemes:
+                focus_phonemes_list = [p.strip() for p in focus_phonemes.split(",")]
+            # Character-level analysis for UI mapping
+            character_analysis = []
+            target_chars = list(target_word)
+            target_phoneme_chars = list(target_ipa.replace("/", ""))
+            for i, char in enumerate(target_chars):
+                # Map character to its phoneme
+                char_phoneme = target_phoneme_chars[i] if i < len(target_phoneme_chars) else ""
+                # Calculate character-level score based on overall assessment
+                char_score = base_result.get("overall_score", 0.0)
+                # If we have detailed phoneme analysis, use specific scores
+                if base_result.get("phoneme_differences"):
+                    for phoneme_diff in base_result["phoneme_differences"]:
+                        if phoneme_diff.get("reference_phoneme") == char_phoneme:
+                            char_score = phoneme_diff.get("score", char_score)
+                            break
+                # Color coding based on score
+                color_class = "text-green-600" if char_score > 0.8 else \
+                            "text-yellow-600" if char_score > 0.6 else "text-red-600"
+                character_analysis.append({
+                    "character": char,
+                    "phoneme": char_phoneme,
+                    "score": float(char_score),
+                    "color_class": color_class,
+                    "is_focus": char_phoneme in focus_phonemes_list
+                })
+            # Phoneme-specific scoring for visualization
+            phoneme_scores = []
+            for phoneme in target_phonemes:
+                phoneme_score = base_result.get("overall_score", 0.0)
+                # Find specific phoneme score from assessment
+                if base_result.get("phoneme_differences"):
+                    for phoneme_diff in base_result["phoneme_differences"]:
+                        if phoneme_diff.get("reference_phoneme") == phoneme:
+                            phoneme_score = phoneme_diff.get("score", phoneme_score)
+                            break
+                # Color coding for phonemes
+                color_class = "bg-green-100 text-green-800" if phoneme_score > 0.8 else \
+                            "bg-yellow-100 text-yellow-800" if phoneme_score > 0.6 else \
+                            "bg-red-100 text-red-800"
+                phoneme_scores.append({
+                    "phoneme": phoneme,
+                    "score": float(phoneme_score),
+                    "color_class": color_class,
+                    "percentage": int(phoneme_score * 100),
+                    "is_focus": phoneme in focus_phonemes_list
+                })
+            # Focus phonemes detailed analysis
+            focus_phonemes_analysis = []
+            for focus_phoneme in focus_phonemes_list:
+                phoneme_analysis = {
+                    "phoneme": focus_phoneme,
+                    "score": base_result.get("overall_score", 0.0),
+                    "status": "correct",
+                    "vietnamese_tip": get_vietnamese_tip(focus_phoneme),
+                    "difficulty": "medium",
+                    "color_class": "bg-green-100 text-green-800"
+                }
+                # Get specific analysis from base result
+                if base_result.get("phoneme_differences"):
+                    for phoneme_diff in base_result["phoneme_differences"]:
+                        if phoneme_diff.get("reference_phoneme") == focus_phoneme:
+                            score = phoneme_diff.get("score", 0.0)
+                            phoneme_analysis.update({
+                                "score": float(score),
+                                "status": phoneme_diff.get("status", "unknown"),
+                                "color_class": "bg-green-100 text-green-800" if score > 0.8 else
+                                             "bg-yellow-100 text-yellow-800" if score > 0.6 else
+                                             "bg-red-100 text-red-800"
+                            })
+                            break
+                focus_phonemes_analysis.append(phoneme_analysis)
+            # Vietnamese-specific tips
+            vietnamese_tips = []
+            difficult_phonemes = ["θ", "ð", "v", "z", "ʒ", "r", "w", "æ", "ɪ", "ʊ", "ɛ"]
+            for phoneme in set(target_phonemes + focus_phonemes_list):
+                if phoneme in difficult_phonemes:
+                    tip = get_vietnamese_tip(phoneme)
+                    if tip not in vietnamese_tips:
+                        vietnamese_tips.append(tip)
+            # Practice recommendations based on score
+            practice_recommendations = []
+            overall_score = base_result.get("overall_score", 0.0)
+            if overall_score < 0.7:
+                practice_recommendations.extend([
+                    "Nghe từ mẫu nhiều lần trước khi phát âm",
+                    "Phát âm chậm và rõ ràng từng âm vị",
+                    "Chú ý đến vị trí lưỡi và môi khi phát âm"
+                ])
+                # Add specific recommendations for focus phonemes
+                for analysis in focus_phonemes_analysis:
+                    if analysis["score"] < 0.6:
+                        practice_recommendations.append(
+                            f"Luyện đặc biệt âm /{analysis['phoneme']}/: {analysis['vietnamese_tip']}"
+                        )
+            if overall_score >= 0.8:
+                practice_recommendations.append("Phát âm rất tốt! Tiếp tục luyện tập để duy trì chất lượng")
+            elif overall_score >= 0.6:
+                practice_recommendations.append("Phát âm khá tốt, cần cải thiện một số âm vị")
+            # Handle error cases
+            error_message = None
+            feedback = base_result.get("feedback", [])
+            if base_result.get("error"):
+                error_message = base_result["error"]
+                feedback = [f"Lỗi: {error_message}"]
+            # Processing information
+            processing_time = time.time() - start_time
+            processing_info = {
+                "processing_time": processing_time,
+                "mode": "ipa_focused",
+                "model_used": "Wav2Vec2-Enhanced",
+                "confidence": base_result.get("processing_info", {}).get("confidence", 0.0),
+                "enhanced_features": True
+            }
+            # Create final result
+            result = IPAAssessmentResult(
+                transcript=base_result.get("transcript", ""),
+                user_ipa=base_result.get("user_ipa", ""),
+                target_word=target_word,
+                target_ipa=target_ipa,
+                overall_score=float(overall_score),
+                character_analysis=character_analysis,
+                phoneme_scores=phoneme_scores,
+                focus_phonemes_analysis=focus_phonemes_analysis,
+                vietnamese_tips=vietnamese_tips,
+                practice_recommendations=practice_recommendations,
+                feedback=feedback,
+                processing_info=processing_info,
+                error=error_message
+            )
+            logger.info(f"IPA assessment completed for '{target_word}' in {processing_time:.2f}s with score {overall_score:.2f}")
+            return result
+    except Exception as e:
+        logger.error(f"IPA assessment error: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"IPA assessment failed: {str(e)}")
 # =============================================================================
 # UTILITY ENDPOINTS
 # =============================================================================
         "z": "Như 's' nhưng rung dây thanh",
         "ʒ": "Như 'ʃ' nhưng rung dây thanh",
         "w": "Tròn môi như 'u'",
+        "ɛ": "Mở miệng vừa phải, lưỡi hạ thấp như 'e' tiếng Việt",
+        "æ": "Mở miệng rộng, lưỡi thấp như nói 'a' nhưng ngắn hơn",
+        "ɪ": "Âm 'i' ngắn, lưỡi không căng như 'i' tiếng Việt",
+        "ʊ": "Âm 'u' ngắn, môi tròn nhẹ",
+        "ə": "Âm trung tính, miệng thả lỏng",
+        "ɔ": "Mở miệng tròn như 'o' nhưng rộng hơn",
+        "ʌ": "Miệng mở vừa, lưỡi ở giữa",
+        "f": "Răng trên chạm môi dưới, thổi nhẹ",
+        "b": "Hai môi chạm nhau, rung dây thanh",
+        "p": "Hai môi chạm nhau, không rung dây thanh",
+        "d": "Lưỡi chạm nướu răng trên, rung dây thanh",
+        "t": "Lưỡi chạm nướu răng trên, không rung dây thanh",
+        "k": "Lưỡi chạm vòm miệng, không rung dây thanh",
+        "g": "Lưỡi chạm vòm miệng, rung dây thanh"
     }
+    return tips.get(phoneme, f"Luyện tập phát âm /{phoneme}/")
+def get_phoneme_difficulty(phoneme: str) -> str:
+    """Get difficulty level for Vietnamese speakers"""
+    hard_phonemes = ["θ", "ð", "r", "w", "æ", "ʌ", "ɪ", "ʊ"]
+    medium_phonemes = ["v", "z", "ʒ", "ɛ", "ə", "ɔ", "f"]
+    if phoneme in hard_phonemes:
+        return "hard"
+    elif phoneme in medium_phonemes:
+        return "medium"
+    else:
+        return "easy"

test_new_ipa_api.py ADDED Viewed

	@@ -0,0 +1,124 @@

+#!/usr/bin/env python3
+"""
+Test script for the new IPA assessment API
+"""
+import requests
+import json
+import os
+# API endpoint
+API_BASE = "http://localhost:8000"
+ENDPOINT = f"{API_BASE}/speaking/assess-ipa"
+def test_ipa_assessment():
+    """Test the new IPA assessment endpoint"""
+    # Create a test audio file (mock)
+    test_audio_path = "test_audio.wav"
+    # Create a minimal WAV file for testing
+    with open(test_audio_path, "wb") as f:
+        # Write minimal WAV header (44 bytes)
+        f.write(b'RIFF')
+        f.write((36).to_bytes(4, 'little'))  # file size - 8
+        f.write(b'WAVE')
+        f.write(b'fmt ')
+        f.write((16).to_bytes(4, 'little'))  # fmt chunk size
+        f.write((1).to_bytes(2, 'little'))   # audio format (PCM)
+        f.write((1).to_bytes(2, 'little'))   # num channels
+        f.write((44100).to_bytes(4, 'little'))  # sample rate
+        f.write((88200).to_bytes(4, 'little'))  # byte rate
+        f.write((2).to_bytes(2, 'little'))   # block align
+        f.write((16).to_bytes(2, 'little'))  # bits per sample
+        f.write(b'data')
+        f.write((0).to_bytes(4, 'little'))   # data size
+    try:
+        # Test data
+        test_cases = [
+            {
+                "target_word": "bed",
+                "target_ipa": "/bɛd/",
+                "focus_phonemes": "ɛ,b,d"
+            },
+            {
+                "target_word": "cat",
+                "target_ipa": "/kæt/",
+                "focus_phonemes": "æ"
+            },
+            {
+                "target_word": "think",
+                "target_ipa": "/θɪŋk/",
+                "focus_phonemes": "θ"
+            }
+        ]
+        for i, test_case in enumerate(test_cases, 1):
+            print(f"\n{'='*50}")
+            print(f"Test Case {i}: {test_case['target_word']}")
+            print(f"{'='*50}")
+            # Prepare the request
+            files = {
+                'audio_file': ('test.wav', open(test_audio_path, 'rb'), 'audio/wav')
+            }
+            data = {
+                'target_word': test_case['target_word'],
+                'target_ipa': test_case['target_ipa'],
+                'focus_phonemes': test_case['focus_phonemes']
+            }
+            print(f"Request data: {data}")
+            # Make the request
+            response = requests.post(ENDPOINT, files=files, data=data)
+            # Close the file
+            files['audio_file'][1].close()
+            print(f"Response status: {response.status_code}")
+            if response.status_code == 200:
+                result = response.json()
+                print("✅ Success!")
+                print(f"Overall Score: {result.get('overall_score', 0) * 100:.1f}%")
+                print(f"Character Analysis: {len(result.get('character_analysis', []))} characters")
+                print(f"Phoneme Scores: {len(result.get('phoneme_scores', []))} phonemes")
+                print(f"Focus Phonemes: {len(result.get('focus_phonemes_analysis', []))} analyzed")
+                print(f"Vietnamese Tips: {len(result.get('vietnamese_tips', []))} tips")
+                print(f"Recommendations: {len(result.get('practice_recommendations', []))} recommendations")
+                # Print sample character analysis
+                if result.get('character_analysis'):
+                    print("\nCharacter Analysis Sample:")
+                    for char_analysis in result['character_analysis'][:3]:
+                        print(f"  '{char_analysis['character']}' -> /{char_analysis['phoneme']}/ ({char_analysis['score']*100:.1f}%)")
+                # Print focus phonemes
+                if result.get('focus_phonemes_analysis'):
+                    print("\nFocus Phonemes Analysis:")
+                    for phoneme_analysis in result['focus_phonemes_analysis']:
+                        print(f"  /{phoneme_analysis['phoneme']}/ - {phoneme_analysis['score']*100:.1f}% ({phoneme_analysis['status']})")
+                        print(f"    Tip: {phoneme_analysis['vietnamese_tip']}")
+            else:
+                print(f"❌ Failed: {response.text}")
+    except requests.exceptions.ConnectionError:
+        print("❌ Connection Error: Make sure the API server is running on port 8000")
+        print("Start the server with: uvicorn app:app --host 0.0.0.0 --port 8000")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+    finally:
+        # Clean up test file
+        if os.path.exists(test_audio_path):
+            os.remove(test_audio_path)
+if __name__ == "__main__":
+    print("Testing New IPA Assessment API")
+    print("=" * 50)
+    test_ipa_assessment()