ABAO77 commited on
Commit
45a0e83
·
1 Parent(s): 85fa45c

feat: implement new IPA assessment API with detailed phoneme analysis and Vietnamese-specific feedback

Browse files
src/apis/routes/ipa_route.py CHANGED
@@ -1488,165 +1488,7 @@ def _get_common_mistakes(phonemes: List[str]) -> List[Dict]:
1488
  return mistakes
1489
 
1490
 
1491
- @router.post("/assess-pronunciation")
1492
- async def assess_ipa_pronunciation(
1493
- audio_file: UploadFile = File(
1494
- ..., description="Audio file for IPA pronunciation assessment"
1495
- ),
1496
- word: str = Form(..., description="Target word to assess"),
1497
- target_ipa: str = Form(None, description="Target IPA transcription (optional)"),
1498
- focus_phonemes: str = Form(
1499
- None, description="Comma-separated list of phonemes to focus on (optional)"
1500
- ),
1501
- ):
1502
- """
1503
- Specialized IPA pronunciation assessment with detailed phoneme analysis
1504
- Optimized for IPA learning with Vietnamese speaker feedback
1505
- """
1506
-
1507
- import tempfile
1508
- import os
1509
-
1510
- try:
1511
- # Get the global assessor instance (singleton)
1512
- assessor = get_assessor()
1513
-
1514
- # Save uploaded audio file
1515
- file_extension = ".wav"
1516
- if audio_file.filename and "." in audio_file.filename:
1517
- file_extension = f".{audio_file.filename.split('.')[-1]}"
1518
-
1519
- with tempfile.NamedTemporaryFile(
1520
- delete=False, suffix=file_extension
1521
- ) as tmp_file:
1522
- content = await audio_file.read()
1523
- tmp_file.write(content)
1524
- tmp_file.flush()
1525
-
1526
- # Run standard pronunciation assessment
1527
- result = assessor.assess_pronunciation(tmp_file.name, word, "word")
1528
-
1529
- # Get target IPA and phonemes
1530
- if not target_ipa:
1531
- target_phonemes_data = g2p.text_to_phonemes(word)[0]
1532
- target_ipa = target_phonemes_data["ipa"]
1533
- target_phonemes = target_phonemes_data["phonemes"]
1534
- else:
1535
- # Parse IPA to phonemes (simplified)
1536
- target_phonemes = target_ipa.replace("/", "").split()
1537
-
1538
- # Focus phonemes analysis
1539
- focus_phonemes_list = []
1540
- if focus_phonemes:
1541
- focus_phonemes_list = [p.strip() for p in focus_phonemes.split(",")]
1542
-
1543
- # Enhanced IPA-specific analysis
1544
- ipa_analysis = {
1545
- "target_word": word,
1546
- "target_ipa": target_ipa,
1547
- "target_phonemes": target_phonemes,
1548
- "user_transcript": result.get("transcript", ""),
1549
- "user_ipa": result.get("user_ipa", ""),
1550
- "user_phonemes": result.get("user_phonemes", ""),
1551
- "overall_score": result.get("overall_score", 0.0),
1552
- "phoneme_accuracy": result.get("phoneme_comparison", {}).get(
1553
- "accuracy_percentage", 0
1554
- ),
1555
- "focus_phonemes_analysis": [],
1556
- "vietnamese_specific_tips": [],
1557
- "practice_recommendations": [],
1558
- }
1559
-
1560
- # Focus phonemes detailed analysis
1561
- if focus_phonemes_list and result.get("phoneme_differences"):
1562
- for phoneme_diff in result["phoneme_differences"]:
1563
- ref_phoneme = phoneme_diff.get("reference_phoneme", "")
1564
- if ref_phoneme in focus_phonemes_list:
1565
- analysis = {
1566
- "phoneme": ref_phoneme,
1567
- "status": phoneme_diff.get("status", "unknown"),
1568
- "score": phoneme_diff.get("score", 0.0),
1569
- "difficulty": g2p.get_difficulty_score(ref_phoneme),
1570
- "vietnamese_tip": IPA_SYMBOLS_DATA.get(ref_phoneme, {}).get(
1571
- "tip", ""
1572
- ),
1573
- "practice_tip": _get_practice_tips(ref_phoneme),
1574
- }
1575
- ipa_analysis["focus_phonemes_analysis"].append(analysis)
1576
-
1577
- # Vietnamese-specific pronunciation tips
1578
- all_target_phonemes = target_phonemes + focus_phonemes_list
1579
- vietnamese_tips = []
1580
-
1581
- for phoneme in set(all_target_phonemes):
1582
- if phoneme in [
1583
- "θ",
1584
- "ð",
1585
- "v",
1586
- "z",
1587
- "ʒ",
1588
- "r",
1589
- "w",
1590
- "æ",
1591
- "ɪ",
1592
- "ʊ",
1593
- ]: # Difficult for Vietnamese
1594
- tip_data = IPA_SYMBOLS_DATA.get(phoneme, {})
1595
- if tip_data:
1596
- vietnamese_tips.append(
1597
- {
1598
- "phoneme": phoneme,
1599
- "tip": tip_data.get("tip", ""),
1600
- "difficulty": tip_data.get("difficulty", "medium"),
1601
- "category": tip_data.get("category", "unknown"),
1602
- }
1603
- )
1604
-
1605
- ipa_analysis["vietnamese_specific_tips"] = vietnamese_tips
1606
-
1607
- # Practice recommendations based on score
1608
- if result.get("overall_score", 0) < 0.7:
1609
- recommendations = [
1610
- "Nghe từ mẫu nhiều lần trước khi phát âm",
1611
- "Phát âm chậm và rõ ràng từng âm vị",
1612
- "Chú ý đến vị trí lưỡi và môi khi phát âm",
1613
- ]
1614
-
1615
- # Add specific recommendations for low-scoring phonemes
1616
- if result.get("wrong_words"):
1617
- for wrong_word in result["wrong_words"][
1618
- :2
1619
- ]: # Top 2 problematic words
1620
- for wrong_phoneme in wrong_word.get("wrong_phonemes", [])[:2]:
1621
- phoneme = wrong_phoneme.get("expected", "")
1622
- if phoneme in IPA_SYMBOLS_DATA:
1623
- recommendations.append(
1624
- f"Luyện đặc biệt âm /{phoneme}/: {IPA_SYMBOLS_DATA[phoneme]['tip']}"
1625
- )
1626
-
1627
- ipa_analysis["practice_recommendations"] = recommendations
1628
-
1629
- # Combine with original result
1630
- enhanced_result = {
1631
- **result, # Original assessment result
1632
- "ipa_analysis": ipa_analysis, # IPA-specific analysis
1633
- "assessment_type": "ipa_focused",
1634
- "target_ipa": target_ipa,
1635
- "focus_phonemes": focus_phonemes_list,
1636
- }
1637
 
1638
- # Clean up temp file
1639
- os.unlink(tmp_file.name)
1640
-
1641
- logger.info(
1642
- f"IPA assessment completed for word '{word}' with score {result.get('overall_score', 0):.2f}"
1643
- )
1644
-
1645
- return enhanced_result
1646
-
1647
- except Exception as e:
1648
- logger.error(f"IPA pronunciation assessment error: {e}")
1649
- raise HTTPException(status_code=500, detail=f"Assessment failed: {str(e)}")
1650
 
1651
 
1652
  @router.get("/practice-session/{lesson_id}")
 
1488
  return mistakes
1489
 
1490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1491
 
 
 
 
 
 
 
 
 
 
 
 
 
1492
 
1493
 
1494
  @router.get("/practice-session/{lesson_id}")
src/apis/routes/speaking_route.py CHANGED
@@ -36,6 +36,33 @@ class PronunciationAssessmentResult(BaseModel):
36
  assessment_mode: Optional[str] = None
37
  character_level_analysis: Optional[bool] = None
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Global assessor instance - singleton pattern for performance
40
  global_assessor = None
41
 
@@ -178,6 +205,239 @@ async def assess_pronunciation(
178
  raise HTTPException(status_code=500, detail=f"Assessment failed: {str(e)}")
179
 
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # =============================================================================
182
  # UTILITY ENDPOINTS
183
  # =============================================================================
@@ -238,5 +498,32 @@ def get_vietnamese_tip(phoneme: str) -> str:
238
  "z": "Như 's' nhưng rung dây thanh",
239
  "ʒ": "Như 'ʃ' nhưng rung dây thanh",
240
  "w": "Tròn môi như 'u'",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
- return tips.get(phoneme, f"Luyện âm {phoneme}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  assessment_mode: Optional[str] = None
37
  character_level_analysis: Optional[bool] = None
38
 
39
+
40
+ class IPAAssessmentResult(BaseModel):
41
+ """Optimized response model for IPA-focused pronunciation assessment"""
42
+ # Core assessment data
43
+ transcript: str # What the user actually said
44
+ user_ipa: Optional[str] = None # User's IPA transcription
45
+ target_word: str # Target word being assessed
46
+ target_ipa: str # Target IPA transcription
47
+ overall_score: float # Overall pronunciation score (0-1)
48
+
49
+ # Character-level analysis for IPA mapping
50
+ character_analysis: List[Dict] # Each character with its IPA and score
51
+
52
+ # Phoneme-specific analysis
53
+ phoneme_scores: List[Dict] # Individual phoneme scores with colors
54
+ focus_phonemes_analysis: List[Dict] # Detailed analysis of target phonemes
55
+
56
+ # Feedback and recommendations
57
+ vietnamese_tips: List[str] # Vietnamese-specific pronunciation tips
58
+ practice_recommendations: List[str] # Practice suggestions
59
+ feedback: List[str] # General feedback messages
60
+
61
+ # Assessment metadata
62
+ processing_info: Dict # Processing details
63
+ assessment_type: str = "ipa_focused"
64
+ error: Optional[str] = None
65
+
66
  # Global assessor instance - singleton pattern for performance
67
  global_assessor = None
68
 
 
205
  raise HTTPException(status_code=500, detail=f"Assessment failed: {str(e)}")
206
 
207
 
208
+ @router.post("/assess-ipa", response_model=IPAAssessmentResult)
209
+ async def assess_ipa_pronunciation(
210
+ audio_file: UploadFile = File(..., description="Audio file (.wav, .mp3, .m4a)"),
211
+ target_word: str = Form(..., description="Target word to assess (e.g., 'bed')"),
212
+ target_ipa: str = Form(None, description="Target IPA notation (e.g., '/bɛd/')"),
213
+ focus_phonemes: str = Form(None, description="Comma-separated focus phonemes (e.g., 'ɛ,b')"),
214
+ ):
215
+ """
216
+ Optimized IPA pronunciation assessment for phoneme-focused learning
217
+
218
+ Evaluates:
219
+ - Overall word pronunciation accuracy
220
+ - Character-to-phoneme mapping accuracy
221
+ - Specific phoneme pronunciation (e.g., /ɛ/ in 'bed')
222
+ - Vietnamese-optimized feedback and tips
223
+ - Dynamic color scoring for UI visualization
224
+
225
+ Example: Assessing 'bed' /bɛd/ with focus on /ɛ/ phoneme
226
+ """
227
+
228
+ import time
229
+ start_time = time.time()
230
+
231
+ # Validate inputs
232
+ if not target_word.strip():
233
+ raise HTTPException(status_code=400, detail="Target word cannot be empty")
234
+
235
+ if len(target_word) > 50:
236
+ raise HTTPException(status_code=400, detail="Target word too long (max 50 characters)")
237
+
238
+ # Clean target word
239
+ target_word = target_word.strip().lower()
240
+
241
+ try:
242
+ # Save uploaded file temporarily
243
+ file_extension = ".wav"
244
+ if audio_file.filename and "." in audio_file.filename:
245
+ file_extension = f".{audio_file.filename.split('.')[-1]}"
246
+
247
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
248
+ content = await audio_file.read()
249
+ tmp_file.write(content)
250
+ tmp_file.flush()
251
+
252
+ logger.info(f"IPA assessment for word '{target_word}' with IPA '{target_ipa}'")
253
+
254
+ # Get the assessor instance
255
+ assessor = get_assessor()
256
+
257
+ # Run base pronunciation assessment in word mode
258
+ base_result = assessor.assess_pronunciation(tmp_file.name, target_word, "word")
259
+
260
+ # Get target IPA and phonemes using G2P
261
+ g2p = EnhancedG2P()
262
+
263
+ if not target_ipa:
264
+ target_phonemes_data = g2p.text_to_phonemes(target_word)[0]
265
+ target_ipa = target_phonemes_data["ipa"]
266
+ target_phonemes = target_phonemes_data["phonemes"]
267
+ else:
268
+ # Parse provided IPA
269
+ clean_ipa = target_ipa.replace("/", "").strip()
270
+ target_phonemes = list(clean_ipa) # Simple phoneme parsing
271
+
272
+ # Parse focus phonemes
273
+ focus_phonemes_list = []
274
+ if focus_phonemes:
275
+ focus_phonemes_list = [p.strip() for p in focus_phonemes.split(",")]
276
+
277
+ # Character-level analysis for UI mapping
278
+ character_analysis = []
279
+ target_chars = list(target_word)
280
+ target_phoneme_chars = list(target_ipa.replace("/", ""))
281
+
282
+ for i, char in enumerate(target_chars):
283
+ # Map character to its phoneme
284
+ char_phoneme = target_phoneme_chars[i] if i < len(target_phoneme_chars) else ""
285
+
286
+ # Calculate character-level score based on overall assessment
287
+ char_score = base_result.get("overall_score", 0.0)
288
+
289
+ # If we have detailed phoneme analysis, use specific scores
290
+ if base_result.get("phoneme_differences"):
291
+ for phoneme_diff in base_result["phoneme_differences"]:
292
+ if phoneme_diff.get("reference_phoneme") == char_phoneme:
293
+ char_score = phoneme_diff.get("score", char_score)
294
+ break
295
+
296
+ # Color coding based on score
297
+ color_class = "text-green-600" if char_score > 0.8 else \
298
+ "text-yellow-600" if char_score > 0.6 else "text-red-600"
299
+
300
+ character_analysis.append({
301
+ "character": char,
302
+ "phoneme": char_phoneme,
303
+ "score": float(char_score),
304
+ "color_class": color_class,
305
+ "is_focus": char_phoneme in focus_phonemes_list
306
+ })
307
+
308
+ # Phoneme-specific scoring for visualization
309
+ phoneme_scores = []
310
+ for phoneme in target_phonemes:
311
+ phoneme_score = base_result.get("overall_score", 0.0)
312
+
313
+ # Find specific phoneme score from assessment
314
+ if base_result.get("phoneme_differences"):
315
+ for phoneme_diff in base_result["phoneme_differences"]:
316
+ if phoneme_diff.get("reference_phoneme") == phoneme:
317
+ phoneme_score = phoneme_diff.get("score", phoneme_score)
318
+ break
319
+
320
+ # Color coding for phonemes
321
+ color_class = "bg-green-100 text-green-800" if phoneme_score > 0.8 else \
322
+ "bg-yellow-100 text-yellow-800" if phoneme_score > 0.6 else \
323
+ "bg-red-100 text-red-800"
324
+
325
+ phoneme_scores.append({
326
+ "phoneme": phoneme,
327
+ "score": float(phoneme_score),
328
+ "color_class": color_class,
329
+ "percentage": int(phoneme_score * 100),
330
+ "is_focus": phoneme in focus_phonemes_list
331
+ })
332
+
333
+ # Focus phonemes detailed analysis
334
+ focus_phonemes_analysis = []
335
+
336
+ for focus_phoneme in focus_phonemes_list:
337
+ phoneme_analysis = {
338
+ "phoneme": focus_phoneme,
339
+ "score": base_result.get("overall_score", 0.0),
340
+ "status": "correct",
341
+ "vietnamese_tip": get_vietnamese_tip(focus_phoneme),
342
+ "difficulty": "medium",
343
+ "color_class": "bg-green-100 text-green-800"
344
+ }
345
+
346
+ # Get specific analysis from base result
347
+ if base_result.get("phoneme_differences"):
348
+ for phoneme_diff in base_result["phoneme_differences"]:
349
+ if phoneme_diff.get("reference_phoneme") == focus_phoneme:
350
+ score = phoneme_diff.get("score", 0.0)
351
+ phoneme_analysis.update({
352
+ "score": float(score),
353
+ "status": phoneme_diff.get("status", "unknown"),
354
+ "color_class": "bg-green-100 text-green-800" if score > 0.8 else
355
+ "bg-yellow-100 text-yellow-800" if score > 0.6 else
356
+ "bg-red-100 text-red-800"
357
+ })
358
+ break
359
+
360
+ focus_phonemes_analysis.append(phoneme_analysis)
361
+
362
+ # Vietnamese-specific tips
363
+ vietnamese_tips = []
364
+ difficult_phonemes = ["θ", "ð", "v", "z", "ʒ", "r", "w", "æ", "ɪ", "ʊ", "ɛ"]
365
+
366
+ for phoneme in set(target_phonemes + focus_phonemes_list):
367
+ if phoneme in difficult_phonemes:
368
+ tip = get_vietnamese_tip(phoneme)
369
+ if tip not in vietnamese_tips:
370
+ vietnamese_tips.append(tip)
371
+
372
+ # Practice recommendations based on score
373
+ practice_recommendations = []
374
+ overall_score = base_result.get("overall_score", 0.0)
375
+
376
+ if overall_score < 0.7:
377
+ practice_recommendations.extend([
378
+ "Nghe từ mẫu nhiều lần trước khi phát âm",
379
+ "Phát âm chậm và rõ ràng từng âm vị",
380
+ "Chú ý đến vị trí lưỡi và môi khi phát âm"
381
+ ])
382
+
383
+ # Add specific recommendations for focus phonemes
384
+ for analysis in focus_phonemes_analysis:
385
+ if analysis["score"] < 0.6:
386
+ practice_recommendations.append(
387
+ f"Luyện đặc biệt âm /{analysis['phoneme']}/: {analysis['vietnamese_tip']}"
388
+ )
389
+
390
+ if overall_score >= 0.8:
391
+ practice_recommendations.append("Phát âm rất tốt! Tiếp tục luyện tập để duy trì chất lượng")
392
+ elif overall_score >= 0.6:
393
+ practice_recommendations.append("Phát âm khá tốt, cần cải thiện một số âm vị")
394
+
395
+ # Handle error cases
396
+ error_message = None
397
+ feedback = base_result.get("feedback", [])
398
+
399
+ if base_result.get("error"):
400
+ error_message = base_result["error"]
401
+ feedback = [f"Lỗi: {error_message}"]
402
+
403
+ # Processing information
404
+ processing_time = time.time() - start_time
405
+ processing_info = {
406
+ "processing_time": processing_time,
407
+ "mode": "ipa_focused",
408
+ "model_used": "Wav2Vec2-Enhanced",
409
+ "confidence": base_result.get("processing_info", {}).get("confidence", 0.0),
410
+ "enhanced_features": True
411
+ }
412
+
413
+ # Create final result
414
+ result = IPAAssessmentResult(
415
+ transcript=base_result.get("transcript", ""),
416
+ user_ipa=base_result.get("user_ipa", ""),
417
+ target_word=target_word,
418
+ target_ipa=target_ipa,
419
+ overall_score=float(overall_score),
420
+ character_analysis=character_analysis,
421
+ phoneme_scores=phoneme_scores,
422
+ focus_phonemes_analysis=focus_phonemes_analysis,
423
+ vietnamese_tips=vietnamese_tips,
424
+ practice_recommendations=practice_recommendations,
425
+ feedback=feedback,
426
+ processing_info=processing_info,
427
+ error=error_message
428
+ )
429
+
430
+ logger.info(f"IPA assessment completed for '{target_word}' in {processing_time:.2f}s with score {overall_score:.2f}")
431
+
432
+ return result
433
+
434
+ except Exception as e:
435
+ logger.error(f"IPA assessment error: {str(e)}")
436
+ import traceback
437
+ traceback.print_exc()
438
+ raise HTTPException(status_code=500, detail=f"IPA assessment failed: {str(e)}")
439
+
440
+
441
  # =============================================================================
442
  # UTILITY ENDPOINTS
443
  # =============================================================================
 
498
  "z": "Như 's' nhưng rung dây thanh",
499
  "ʒ": "Như 'ʃ' nhưng rung dây thanh",
500
  "w": "Tròn môi như 'u'",
501
+ "ɛ": "Mở miệng vừa phải, lưỡi hạ thấp như 'e' tiếng Việt",
502
+ "æ": "Mở miệng rộng, lưỡi thấp như nói 'a' nhưng ngắn hơn",
503
+ "ɪ": "Âm 'i' ngắn, lưỡi không căng như 'i' tiếng Việt",
504
+ "ʊ": "Âm 'u' ngắn, môi tròn nhẹ",
505
+ "ə": "Âm trung tính, miệng thả lỏng",
506
+ "ɔ": "Mở miệng tròn như 'o' nhưng rộng hơn",
507
+ "ʌ": "Miệng mở vừa, lưỡi ở giữa",
508
+ "f": "Răng trên chạm môi dưới, thổi nhẹ",
509
+ "b": "Hai môi chạm nhau, rung dây thanh",
510
+ "p": "Hai môi chạm nhau, không rung dây thanh",
511
+ "d": "Lưỡi chạm nướu răng trên, rung dây thanh",
512
+ "t": "Lưỡi chạm nướu răng trên, không rung dây thanh",
513
+ "k": "Lưỡi chạm vòm miệng, không rung dây thanh",
514
+ "g": "Lưỡi chạm vòm miệng, rung dây thanh"
515
  }
516
+ return tips.get(phoneme, f"Luyện tập phát âm /{phoneme}/")
517
+
518
+
519
+ def get_phoneme_difficulty(phoneme: str) -> str:
520
+ """Get difficulty level for Vietnamese speakers"""
521
+ hard_phonemes = ["θ", "ð", "r", "w", "æ", "ʌ", "ɪ", "ʊ"]
522
+ medium_phonemes = ["v", "z", "ʒ", "ɛ", "ə", "ɔ", "f"]
523
+
524
+ if phoneme in hard_phonemes:
525
+ return "hard"
526
+ elif phoneme in medium_phonemes:
527
+ return "medium"
528
+ else:
529
+ return "easy"
test_new_ipa_api.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for the new IPA assessment API
4
+ """
5
+
6
+ import requests
7
+ import json
8
+ import os
9
+
10
+ # API endpoint
11
+ API_BASE = "http://localhost:8000"
12
+ ENDPOINT = f"{API_BASE}/speaking/assess-ipa"
13
+
14
+ def test_ipa_assessment():
15
+ """Test the new IPA assessment endpoint"""
16
+
17
+ # Create a test audio file (mock)
18
+ test_audio_path = "test_audio.wav"
19
+
20
+ # Create a minimal WAV file for testing
21
+ with open(test_audio_path, "wb") as f:
22
+ # Write minimal WAV header (44 bytes)
23
+ f.write(b'RIFF')
24
+ f.write((36).to_bytes(4, 'little')) # file size - 8
25
+ f.write(b'WAVE')
26
+ f.write(b'fmt ')
27
+ f.write((16).to_bytes(4, 'little')) # fmt chunk size
28
+ f.write((1).to_bytes(2, 'little')) # audio format (PCM)
29
+ f.write((1).to_bytes(2, 'little')) # num channels
30
+ f.write((44100).to_bytes(4, 'little')) # sample rate
31
+ f.write((88200).to_bytes(4, 'little')) # byte rate
32
+ f.write((2).to_bytes(2, 'little')) # block align
33
+ f.write((16).to_bytes(2, 'little')) # bits per sample
34
+ f.write(b'data')
35
+ f.write((0).to_bytes(4, 'little')) # data size
36
+
37
+ try:
38
+ # Test data
39
+ test_cases = [
40
+ {
41
+ "target_word": "bed",
42
+ "target_ipa": "/bɛd/",
43
+ "focus_phonemes": "ɛ,b,d"
44
+ },
45
+ {
46
+ "target_word": "cat",
47
+ "target_ipa": "/kæt/",
48
+ "focus_phonemes": "æ"
49
+ },
50
+ {
51
+ "target_word": "think",
52
+ "target_ipa": "/θɪŋk/",
53
+ "focus_phonemes": "θ"
54
+ }
55
+ ]
56
+
57
+ for i, test_case in enumerate(test_cases, 1):
58
+ print(f"\n{'='*50}")
59
+ print(f"Test Case {i}: {test_case['target_word']}")
60
+ print(f"{'='*50}")
61
+
62
+ # Prepare the request
63
+ files = {
64
+ 'audio_file': ('test.wav', open(test_audio_path, 'rb'), 'audio/wav')
65
+ }
66
+
67
+ data = {
68
+ 'target_word': test_case['target_word'],
69
+ 'target_ipa': test_case['target_ipa'],
70
+ 'focus_phonemes': test_case['focus_phonemes']
71
+ }
72
+
73
+ print(f"Request data: {data}")
74
+
75
+ # Make the request
76
+ response = requests.post(ENDPOINT, files=files, data=data)
77
+
78
+ # Close the file
79
+ files['audio_file'][1].close()
80
+
81
+ print(f"Response status: {response.status_code}")
82
+
83
+ if response.status_code == 200:
84
+ result = response.json()
85
+ print("✅ Success!")
86
+ print(f"Overall Score: {result.get('overall_score', 0) * 100:.1f}%")
87
+ print(f"Character Analysis: {len(result.get('character_analysis', []))} characters")
88
+ print(f"Phoneme Scores: {len(result.get('phoneme_scores', []))} phonemes")
89
+ print(f"Focus Phonemes: {len(result.get('focus_phonemes_analysis', []))} analyzed")
90
+ print(f"Vietnamese Tips: {len(result.get('vietnamese_tips', []))} tips")
91
+ print(f"Recommendations: {len(result.get('practice_recommendations', []))} recommendations")
92
+
93
+ # Print sample character analysis
94
+ if result.get('character_analysis'):
95
+ print("\nCharacter Analysis Sample:")
96
+ for char_analysis in result['character_analysis'][:3]:
97
+ print(f" '{char_analysis['character']}' -> /{char_analysis['phoneme']}/ ({char_analysis['score']*100:.1f}%)")
98
+
99
+ # Print focus phonemes
100
+ if result.get('focus_phonemes_analysis'):
101
+ print("\nFocus Phonemes Analysis:")
102
+ for phoneme_analysis in result['focus_phonemes_analysis']:
103
+ print(f" /{phoneme_analysis['phoneme']}/ - {phoneme_analysis['score']*100:.1f}% ({phoneme_analysis['status']})")
104
+ print(f" Tip: {phoneme_analysis['vietnamese_tip']}")
105
+
106
+ else:
107
+ print(f"❌ Failed: {response.text}")
108
+
109
+ except requests.exceptions.ConnectionError:
110
+ print("❌ Connection Error: Make sure the API server is running on port 8000")
111
+ print("Start the server with: uvicorn app:app --host 0.0.0.0 --port 8000")
112
+
113
+ except Exception as e:
114
+ print(f"❌ Error: {e}")
115
+
116
+ finally:
117
+ # Clean up test file
118
+ if os.path.exists(test_audio_path):
119
+ os.remove(test_audio_path)
120
+
121
+ if __name__ == "__main__":
122
+ print("Testing New IPA Assessment API")
123
+ print("=" * 50)
124
+ test_ipa_assessment()