Run_code_api / src /apis /routes /ipa_route.py
ABAO77's picture
feat: implement new IPA assessment API with detailed phoneme analysis and Vietnamese-specific feedback
45a0e83
raw
history blame
53.8 kB
from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form
from pydantic import BaseModel
from typing import List, Dict, Optional, Union, Any
import json
import random
import re
import tempfile
import os
import base64
import subprocess
from loguru import logger
from src.apis.controllers.speaking_controller import (
EnhancedG2P,
ProductionPronunciationAssessor,
)
class CharacterMapping(BaseModel):
ipa_symbol: Optional[str] = None
grapheme: Optional[str] = None
start_index: Optional[int] = None
end_index: Optional[int] = None
characters: Optional[str] = None
chars: Optional[str] = None
ipa: Optional[str] = None
start: Optional[int] = None
end: Optional[int] = None
router = APIRouter(prefix="/ipa", tags=["IPA Training"])
# Initialize G2P converter and assessment system once (singleton pattern)
g2p = EnhancedG2P()
# Global assessor instance - will be initialized once due to singleton pattern
global_assessor = None
def get_assessor():
"""Get or create the global assessor instance"""
global global_assessor
if global_assessor is None:
logger.info("Creating global ProductionPronunciationAssessor instance...")
global_assessor = ProductionPronunciationAssessor()
return global_assessor
def map_ipa_to_characters(word: str, ipa_symbol: str) -> List[CharacterMapping]:
"""
Map IPA symbols to their corresponding characters in the word
Returns a list of character mappings for highlighting
"""
# Common IPA to grapheme mappings
ipa_mappings = {
# Vowels
"i": [
"ee",
"ea",
"e",
"ie",
"ei",
"i",
], # see, eat, me, piece, receive, machine
"ɪ": ["i", "y", "ui", "e"], # sit, gym, build, women
"u": ["oo", "u", "ou", "ue", "ui", "o"], # food, flu, soup, true, fruit, do
"ʊ": ["oo", "u", "ou"], # book, put, could
"ɛ": ["e", "ea", "ai", "a"], # bed, head, said, many
"ə": [
"a",
"e",
"i",
"o",
"u",
"ou",
"ar",
"er",
"or",
], # about, taken, pencil, lemon, circus, famous, dollar, butter, doctor
"ʌ": ["u", "o", "ou", "oo"], # cup, love, country, blood
"ɑ": ["a", "o", "au"], # father, hot, aunt
"æ": ["a"], # cat, apple
"ɔ": ["o", "aw", "au", "a", "ou"], # saw, law, caught, all, thought
# Diphthongs
"eɪ": ["a", "ai", "ay", "ei", "ey", "ea"], # say, wait, day, eight, grey, break
"aɪ": ["i", "y", "ie", "uy", "ai", "igh"], # my, fly, pie, buy, aisle, night
"ɔɪ": ["oy", "oi"], # boy, coin
"aʊ": ["ou", "ow"], # how, house
"oʊ": ["o", "oa", "ow", "oe", "ou"], # go, boat, show, toe, soul
# Consonants
"p": ["p", "pp"], # pen, apple
"b": ["b", "bb"], # boy, rabbit
"t": ["t", "tt", "ed"], # top, butter, walked
"d": ["d", "dd", "ed"], # dog, ladder, played
"k": ["c", "k", "ck", "ch", "qu"], # cat, key, back, school, queen
"g": ["g", "gg", "gh", "gu"], # go, egg, ghost, guard
"f": ["f", "ff", "ph", "gh"], # fish, off, phone, laugh
"v": ["v", "ve"], # very, have
"θ": ["th"], # think
"ð": ["th"], # this
"s": ["s", "ss", "c", "sc", "ps"], # see, miss, city, scene, psychology
"z": ["z", "zz", "s", "se", "ze"], # zoo, buzz, is, rose, froze
"ʃ": [
"sh",
"s",
"ss",
"ch",
"ci",
"ti",
], # ship, sure, mission, machine, special, nation
"ʒ": ["s", "si", "ge"], # measure, vision, garage
"tʃ": ["ch", "tch", "t"], # chair, watch, nature
"dʒ": ["j", "ge", "dge", "g"], # job, age, bridge, gym
"m": ["m", "mm", "mb"], # man, hammer, lamb
"n": ["n", "nn", "kn", "gn"], # no, dinner, knee, sign
"ŋ": ["ng", "n"], # sing, think
"l": ["l", "ll"], # love, hello
"r": ["r", "rr", "wr"], # red, sorry, write
"j": ["y", "i", "j"], # yes, onion, hallelujah
"w": ["w", "wh", "qu", "u"], # we, what, queen, language
"h": ["h", "wh"], # house, who
}
# Get possible grapheme representations for the IPA symbol
possible_graphemes = ipa_mappings.get(ipa_symbol, [])
# Find the best match in the word
word_lower = word.lower()
mappings = []
for grapheme in possible_graphemes:
start_pos = word_lower.find(grapheme)
if start_pos != -1:
mappings.append(
CharacterMapping(
ipa_symbol=ipa_symbol,
grapheme=grapheme,
start_index=start_pos,
end_index=start_pos + len(grapheme),
characters=word[start_pos : start_pos + len(grapheme)],
)
)
break # Use the first match found
# If no direct match found, try to match individual characters
if not mappings and ipa_symbol in word_lower:
start_pos = word_lower.find(ipa_symbol)
if start_pos != -1:
mappings.append(
CharacterMapping(
ipa_symbol=ipa_symbol,
grapheme=ipa_symbol,
start_index=start_pos,
end_index=start_pos + len(ipa_symbol),
characters=word[start_pos : start_pos + len(ipa_symbol)],
)
)
return mappings
def map_word_to_phonemes(word: str, ipa_transcription: str) -> List[CharacterMapping]:
"""
Map an entire word to its phoneme sequence
Returns detailed character to IPA mappings for the whole word
"""
# Clean the IPA transcription
clean_ipa = ipa_transcription.strip("/").replace("ˈ", "").replace("ˌ", "")
# Common word-to-IPA mappings for better accuracy
word_mappings = {
# Easy words
"cat": [
CharacterMapping(
characters="c", ipa_symbol="k", start_index=0, end_index=1
),
CharacterMapping(
characters="a", ipa_symbol="æ", start_index=1, end_index=2
),
CharacterMapping(
characters="t", ipa_symbol="t", start_index=2, end_index=3
),
],
"dog": [
CharacterMapping(
characters="d", ipa_symbol="d", start_index=0, end_index=1
),
CharacterMapping(
characters="o", ipa_symbol="ɔ", start_index=1, end_index=2
),
CharacterMapping(
characters="g", ipa_symbol="g", start_index=2, end_index=3
),
],
"pen": [
CharacterMapping(
characters="p", ipa_symbol="p", start_index=0, end_index=1
),
CharacterMapping(
characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
),
CharacterMapping(
characters="n", ipa_symbol="n", start_index=2, end_index=3
),
],
"see": [
CharacterMapping(
characters="s", ipa_symbol="s", start_index=0, end_index=1
),
CharacterMapping(
characters="ee", ipa_symbol="i", start_index=1, end_index=3
),
],
"bed": [
CharacterMapping(
characters="b", ipa_symbol="b", start_index=0, end_index=1
),
CharacterMapping(
characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
),
CharacterMapping(
characters="d", ipa_symbol="d", start_index=2, end_index=3
),
],
"fish": [
CharacterMapping(
characters="f", ipa_symbol="f", start_index=0, end_index=1
),
CharacterMapping(
characters="i", ipa_symbol="ɪ", start_index=1, end_index=2
),
CharacterMapping(
characters="sh", ipa_symbol="ʃ", start_index=2, end_index=4
),
],
"book": [
CharacterMapping(
characters="b", ipa_symbol="b", start_index=0, end_index=1
),
CharacterMapping(
characters="oo", ipa_symbol="ʊ", start_index=1, end_index=3
),
CharacterMapping(
characters="k", ipa_symbol="k", start_index=3, end_index=4
),
],
"food": [
CharacterMapping(
characters="f", ipa_symbol="f", start_index=0, end_index=1
),
CharacterMapping(
characters="oo", ipa_symbol="u", start_index=1, end_index=3
),
CharacterMapping(
characters="d", ipa_symbol="d", start_index=3, end_index=4
),
],
"man": [
CharacterMapping(
characters="m", ipa_symbol="m", start_index=0, end_index=1
),
CharacterMapping(
characters="a", ipa_symbol="æ", start_index=1, end_index=2
),
CharacterMapping(
characters="n", ipa_symbol="n", start_index=2, end_index=3
),
],
"sun": [
CharacterMapping(
characters="s", ipa_symbol="s", start_index=0, end_index=1
),
CharacterMapping(
characters="u", ipa_symbol="ʌ", start_index=1, end_index=2
),
CharacterMapping(
characters="n", ipa_symbol="n", start_index=2, end_index=3
),
],
# Medium words
"chair": [
CharacterMapping(
characters="ch", ipa_symbol="tʃ", start_index=0, end_index=2
),
CharacterMapping(
characters="ai", ipa_symbol="ɛ", start_index=2, end_index=4
),
CharacterMapping(
characters="r", ipa_symbol="r", start_index=4, end_index=5
),
],
"water": [
CharacterMapping(
characters="w", ipa_symbol="w", start_index=0, end_index=1
),
CharacterMapping(
characters="a", ipa_symbol="ɔ", start_index=1, end_index=2
),
CharacterMapping(
characters="t", ipa_symbol="t", start_index=2, end_index=3
),
CharacterMapping(
characters="er", ipa_symbol="ər", start_index=3, end_index=5
),
],
"house": [
CharacterMapping(
characters="h", ipa_symbol="h", start_index=0, end_index=1
),
CharacterMapping(
characters="ou", ipa_symbol="aʊ", start_index=1, end_index=3
),
CharacterMapping(
characters="se", ipa_symbol="s", start_index=3, end_index=5
),
],
"yellow": [
CharacterMapping(
characters="y", ipa_symbol="j", start_index=0, end_index=1
),
CharacterMapping(
characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
),
CharacterMapping(
characters="ll", ipa_symbol="l", start_index=2, end_index=4
),
CharacterMapping(
characters="ow", ipa_symbol="oʊ", start_index=4, end_index=6
),
],
"about": [
CharacterMapping(
characters="a", ipa_symbol="ə", start_index=0, end_index=1
),
CharacterMapping(
characters="b", ipa_symbol="b", start_index=1, end_index=2
),
CharacterMapping(
characters="ou", ipa_symbol="aʊ", start_index=2, end_index=4
),
CharacterMapping(
characters="t", ipa_symbol="t", start_index=4, end_index=5
),
],
# Hard words
"think": [
CharacterMapping(
characters="th", ipa_symbol="θ", start_index=0, end_index=2
),
CharacterMapping(
characters="i", ipa_symbol="ɪ", start_index=2, end_index=3
),
CharacterMapping(
characters="nk", ipa_symbol="ŋk", start_index=3, end_index=5
),
],
"this": [
CharacterMapping(
characters="th", ipa_symbol="ð", start_index=0, end_index=2
),
CharacterMapping(
characters="i", ipa_symbol="ɪ", start_index=2, end_index=3
),
CharacterMapping(
characters="s", ipa_symbol="s", start_index=3, end_index=4
),
],
"very": [
CharacterMapping(
characters="v", ipa_symbol="v", start_index=0, end_index=1
),
CharacterMapping(
characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
),
CharacterMapping(
characters="r", ipa_symbol="r", start_index=2, end_index=3
),
CharacterMapping(
characters="y", ipa_symbol="i", start_index=3, end_index=4
),
],
"through": [
CharacterMapping(
characters="th", ipa_symbol="θ", start_index=0, end_index=2
),
CharacterMapping(
characters="r", ipa_symbol="r", start_index=2, end_index=3
),
CharacterMapping(
characters="ough", ipa_symbol="u", start_index=3, end_index=7
),
],
"measure": [
CharacterMapping(
characters="m", ipa_symbol="m", start_index=0, end_index=1
),
CharacterMapping(
characters="ea", ipa_symbol="ɛ", start_index=1, end_index=3
),
CharacterMapping(
characters="s", ipa_symbol="ʒ", start_index=3, end_index=4
),
CharacterMapping(
characters="ure", ipa_symbol="ər", start_index=4, end_index=7
),
],
}
# Check if we have a predefined mapping
if word.lower() in word_mappings:
return word_mappings[word.lower()]
# If no predefined mapping, try to create a basic mapping
# This is a simplified approach - in production, you'd use a more sophisticated G2P system
mappings = []
char_index = 0
# Basic character-by-character mapping (fallback)
for i, char in enumerate(word.lower()):
if char.isalpha():
mappings.append(
CharacterMapping(
characters=word[i],
ipa_symbol=char, # Simplified - would need actual phoneme mapping
start_index=i,
end_index=i + 1,
)
)
return mappings
class IPASymbol(BaseModel):
symbol: str
description: str
example_word: str
audio_example: Optional[str] = None
category: str # vowel, consonant, diphthong
difficulty_level: str # easy, medium, hard
vietnamese_tip: str
character_mapping: Optional[List[CharacterMapping]] = None
class IPALesson(BaseModel):
id: str
title: str
description: str
symbols: List[IPASymbol]
difficulty: str
estimated_time: int # minutes
class IPAWord(BaseModel):
word: str
ipa: str
phonemes: List[str]
difficulty: str
meaning: str
example_sentence: str
character_mapping: Optional[List[CharacterMapping]] = None
class IPAExercise(BaseModel):
word: str
ipa: str
phonemes: List[str]
hints: List[str]
difficulty: str
# IPA Symbol data for Vietnamese learners
IPA_SYMBOLS_DATA = {
# Vowels - Easy
"i": {
"desc": "High front unrounded vowel",
"word": "see",
"tip": "Như âm 'i' trong tiếng Việt nhưng dài hơn",
"category": "vowel",
"difficulty": "easy",
},
"u": {
"desc": "High back rounded vowel",
"word": "food",
"tip": "Như âm 'u' trong tiếng Việt nhưng dài hơn",
"category": "vowel",
"difficulty": "easy",
},
"ɑ": {
"desc": "Low back unrounded vowel",
"word": "father",
"tip": "Mở miệng rộng, âm 'a' sâu",
"category": "vowel",
"difficulty": "easy",
},
"ɛ": {
"desc": "Mid front unrounded vowel",
"word": "bed",
"tip": "Giống âm 'e' trong 'đẹp'",
"category": "vowel",
"difficulty": "easy",
},
"ɔ": {
"desc": "Mid back rounded vowel",
"word": "saw",
"tip": "Âm 'o' tròn môi",
"category": "vowel",
"difficulty": "easy",
},
# Vowels - Medium
"ɪ": {
"desc": "Near-close near-front unrounded vowel",
"word": "sit",
"tip": "Âm 'i' ngắn, không kéo dài",
"category": "vowel",
"difficulty": "medium",
},
"ʊ": {
"desc": "Near-close near-back rounded vowel",
"word": "put",
"tip": "Âm 'u' ngắn, tròn môi nhẹ",
"category": "vowel",
"difficulty": "medium",
},
"ʌ": {
"desc": "Mid central unrounded vowel",
"word": "cup",
"tip": "Âm 'ơ' nhưng mở miệng hơn",
"category": "vowel",
"difficulty": "medium",
},
"æ": {
"desc": "Near-open front unrounded vowel",
"word": "cat",
"tip": "Mở miệng rộng, âm 'a' phẳng",
"category": "vowel",
"difficulty": "medium",
},
"ə": {
"desc": "Schwa - mid central vowel",
"word": "about",
"tip": "Âm yếu 'ơ', thư giãn cơ miệng",
"category": "vowel",
"difficulty": "medium",
},
# Diphthongs
"eɪ": {
"desc": "Diphthong from e to i",
"word": "say",
"tip": "Từ 'e' trượt lên 'i'",
"category": "diphthong",
"difficulty": "medium",
},
"aɪ": {
"desc": "Diphthong from a to i",
"word": "my",
"tip": "Từ 'a' trượt lên 'i'",
"category": "diphthong",
"difficulty": "medium",
},
"ɔɪ": {
"desc": "Diphthong from o to i",
"word": "boy",
"tip": "Từ 'o' trượt lên 'i'",
"category": "diphthong",
"difficulty": "medium",
},
"aʊ": {
"desc": "Diphthong from a to u",
"word": "how",
"tip": "Từ 'a' trượt lên 'u'",
"category": "diphthong",
"difficulty": "medium",
},
"oʊ": {
"desc": "Diphthong from o to u",
"word": "go",
"tip": "Từ 'o' trượt lên 'u'",
"category": "diphthong",
"difficulty": "medium",
},
# Consonants - Easy
"p": {
"desc": "Voiceless bilabial plosive",
"word": "pen",
"tip": "Âm 'p' không thở ra",
"category": "consonant",
"difficulty": "easy",
},
"b": {
"desc": "Voiced bilabial plosive",
"word": "boy",
"tip": "Âm 'b' có rung dây thanh",
"category": "consonant",
"difficulty": "easy",
},
"t": {
"desc": "Voiceless alveolar plosive",
"word": "top",
"tip": "Âm 't' lưỡi chạm nướu",
"category": "consonant",
"difficulty": "easy",
},
"d": {
"desc": "Voiced alveolar plosive",
"word": "dog",
"tip": "Âm 'd' có rung dây thanh",
"category": "consonant",
"difficulty": "easy",
},
"k": {
"desc": "Voiceless velar plosive",
"word": "cat",
"tip": "Âm 'k' cuống họng",
"category": "consonant",
"difficulty": "easy",
},
"g": {
"desc": "Voiced velar plosive",
"word": "go",
"tip": "Âm 'g' có rung dây thanh",
"category": "consonant",
"difficulty": "easy",
},
"m": {
"desc": "Bilabial nasal",
"word": "man",
"tip": "Âm 'm' qua mũi",
"category": "consonant",
"difficulty": "easy",
},
"n": {
"desc": "Alveolar nasal",
"word": "no",
"tip": "Âm 'n' lưỡi chạm nướu",
"category": "consonant",
"difficulty": "easy",
},
"s": {
"desc": "Voiceless alveolar fricative",
"word": "see",
"tip": "Âm 's' rít",
"category": "consonant",
"difficulty": "easy",
},
"f": {
"desc": "Voiceless labiodental fricative",
"word": "fish",
"tip": "Môi dưới chạm răng trên",
"category": "consonant",
"difficulty": "easy",
},
# Consonants - Medium
"ʃ": {
"desc": "Voiceless postalveolar fricative",
"word": "ship",
"tip": "Âm 'sh', lưỡi cong",
"category": "consonant",
"difficulty": "medium",
},
"ʒ": {
"desc": "Voiced postalveolar fricative",
"word": "measure",
"tip": "Như 'ʃ' nhưng có rung dây thanh",
"category": "consonant",
"difficulty": "medium",
},
"tʃ": {
"desc": "Voiceless postalveolar affricate",
"word": "chair",
"tip": "Âm 'ch', từ 't' + 'ʃ'",
"category": "consonant",
"difficulty": "medium",
},
"dʒ": {
"desc": "Voiced postalveolar affricate",
"word": "job",
"tip": "Từ 'd' + 'ʒ'",
"category": "consonant",
"difficulty": "medium",
},
"l": {
"desc": "Lateral approximant",
"word": "love",
"tip": "Lưỡi chạm nướu, âm thoát hai bên",
"category": "consonant",
"difficulty": "medium",
},
"r": {
"desc": "Approximant",
"word": "red",
"tip": "Cuộn lưỡi nhẹ, không chạm vòm",
"category": "consonant",
"difficulty": "medium",
},
"j": {
"desc": "Palatal approximant",
"word": "yes",
"tip": "Âm 'y', lưỡi gần vòm miệng",
"category": "consonant",
"difficulty": "medium",
},
"w": {
"desc": "Labial-velar approximant",
"word": "we",
"tip": "Tròn môi như 'u', không dùng răng",
"category": "consonant",
"difficulty": "medium",
},
"h": {
"desc": "Glottal fricative",
"word": "house",
"tip": "Thở ra nhẹ từ họng",
"category": "consonant",
"difficulty": "medium",
},
"z": {
"desc": "Voiced alveolar fricative",
"word": "zoo",
"tip": "Như 's' nhưng có rung dây thanh",
"category": "consonant",
"difficulty": "medium",
},
# Consonants - Hard (for Vietnamese speakers)
"θ": {
"desc": "Voiceless dental fricative",
"word": "think",
"tip": "Lưỡi giữa răng, thổi nhẹ",
"category": "consonant",
"difficulty": "hard",
},
"ð": {
"desc": "Voiced dental fricative",
"word": "this",
"tip": "Lưỡi giữa răng, rung dây thanh",
"category": "consonant",
"difficulty": "hard",
},
"v": {
"desc": "Voiced labiodental fricative",
"word": "very",
"tip": "Môi dưới chạm răng trên, rung dây thanh",
"category": "consonant",
"difficulty": "hard",
},
"ŋ": {
"desc": "Velar nasal",
"word": "sing",
"tip": "Âm 'ng' cuối từ",
"category": "consonant",
"difficulty": "hard",
},
}
# Sample word database for each difficulty level
SAMPLE_WORDS = {
"easy": [
{
"word": "cat",
"ipa": "/kæt/",
"meaning": "con mèo",
"sentence": "The cat is sleeping.",
},
{
"word": "dog",
"ipa": "/dɔg/",
"meaning": "con chó",
"sentence": "I love my dog.",
},
{
"word": "man",
"ipa": "/mæn/",
"meaning": "người đàn ông",
"sentence": "The man is tall.",
},
{
"word": "pen",
"ipa": "/pɛn/",
"meaning": "cái bút",
"sentence": "I need a pen.",
},
{
"word": "sun",
"ipa": "/sʌn/",
"meaning": "mặt trời",
"sentence": "The sun is bright.",
},
{
"word": "fish",
"ipa": "/fɪʃ/",
"meaning": "con cá",
"sentence": "Fish live in water.",
},
{
"word": "book",
"ipa": "/bʊk/",
"meaning": "quyển sách",
"sentence": "I read a book.",
},
{
"word": "food",
"ipa": "/fud/",
"meaning": "thức ăn",
"sentence": "I like good food.",
},
{
"word": "see",
"ipa": "/si/",
"meaning": "nhìn thấy",
"sentence": "I can see you.",
},
{
"word": "bed",
"ipa": "/bɛd/",
"meaning": "giường",
"sentence": "I sleep in my bed.",
},
],
"medium": [
{
"word": "water",
"ipa": "/ˈwɔtər/",
"meaning": "nước",
"sentence": "I drink water every day.",
},
{
"word": "chair",
"ipa": "/tʃɛr/",
"meaning": "cái ghế",
"sentence": "Please sit on the chair.",
},
{
"word": "school",
"ipa": "/skul/",
"meaning": "trường học",
"sentence": "Children go to school.",
},
{
"word": "mother",
"ipa": "/ˈmʌðər/",
"meaning": "mẹ",
"sentence": "My mother is kind.",
},
{
"word": "house",
"ipa": "/haʊs/",
"meaning": "ngôi nhà",
"sentence": "I live in a big house.",
},
{
"word": "yellow",
"ipa": "/ˈjɛloʊ/",
"meaning": "màu vàng",
"sentence": "The sun is yellow.",
},
{
"word": "measure",
"ipa": "/ˈmɛʒər/",
"meaning": "đo lường",
"sentence": "Please measure the length.",
},
{
"word": "pleasure",
"ipa": "/ˈplɛʒər/",
"meaning": "niềm vui",
"sentence": "It's a pleasure to meet you.",
},
{
"word": "about",
"ipa": "/əˈbaʊt/",
"meaning": "về",
"sentence": "Tell me about your day.",
},
{
"word": "family",
"ipa": "/ˈfæməli/",
"meaning": "gia đình",
"sentence": "I love my family.",
},
],
"hard": [
{
"word": "think",
"ipa": "/θɪŋk/",
"meaning": "suy nghĩ",
"sentence": "I think you are right.",
},
{
"word": "this",
"ipa": "/ðɪs/",
"meaning": "cái này",
"sentence": "This is my book.",
},
{
"word": "very",
"ipa": "/ˈvɛri/",
"meaning": "rất",
"sentence": "You are very smart.",
},
{
"word": "through",
"ipa": "/θru/",
"meaning": "qua",
"sentence": "Walk through the door.",
},
{
"word": "weather",
"ipa": "/ˈwɛðər/",
"meaning": "thời tiết",
"sentence": "The weather is nice.",
},
{
"word": "voice",
"ipa": "/vɔɪs/",
"meaning": "giọng nói",
"sentence": "She has a beautiful voice.",
},
{
"word": "clothes",
"ipa": "/kloʊðz/",
"meaning": "quần áo",
"sentence": "I need new clothes.",
},
{
"word": "breathe",
"ipa": "/brið/",
"meaning": "thở",
"sentence": "Breathe slowly and deeply.",
},
{
"word": "although",
"ipa": "/ɔlˈðoʊ/",
"meaning": "mặc dù",
"sentence": "Although it's cold, I'm happy.",
},
{
"word": "rhythm",
"ipa": "/ˈrɪðəm/",
"meaning": "nhịp điệu",
"sentence": "Music has a good rhythm.",
},
],
}
@router.get("/symbols", response_model=List[IPASymbol])
async def get_ipa_symbols(
category: Optional[str] = Query(
None, description="Filter by category: vowel, consonant, diphthong"
)
):
"""Get all IPA symbols with Vietnamese tips and character mappings"""
try:
symbols = []
for symbol, data in IPA_SYMBOLS_DATA.items():
if category and data["category"] != category:
continue
# Get character mapping for the example word
character_mapping = map_ipa_to_characters(data["word"], symbol)
symbols.append(
IPASymbol(
symbol=symbol,
description=data["desc"],
example_word=data["word"],
category=data["category"],
difficulty_level=data["difficulty"],
vietnamese_tip=data["tip"],
character_mapping=character_mapping,
)
)
# Sort by difficulty and then by symbol
difficulty_order = {"easy": 0, "medium": 1, "hard": 2}
symbols.sort(key=lambda x: (difficulty_order[x.difficulty_level], x.symbol))
return symbols
except Exception as e:
logger.error(f"Error getting IPA symbols: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/lessons", response_model=List[IPALesson])
async def get_ipa_lessons():
"""Get structured IPA lessons for progressive learning"""
try:
lessons = [
{
"id": "vowels_basic",
"title": "Nguyên âm cơ bản (Basic Vowels)",
"description": "Học các nguyên âm đơn giản nhất trong tiếng Anh",
"symbols": [
s
for s in IPA_SYMBOLS_DATA.keys()
if IPA_SYMBOLS_DATA[s]["category"] == "vowel"
and IPA_SYMBOLS_DATA[s]["difficulty"] == "easy"
],
"difficulty": "easy",
"estimated_time": 15,
},
{
"id": "consonants_basic",
"title": "Phụ âm cơ bản (Basic Consonants)",
"description": "Các phụ âm dễ phát âm cho người Việt",
"symbols": [
s
for s in IPA_SYMBOLS_DATA.keys()
if IPA_SYMBOLS_DATA[s]["category"] == "consonant"
and IPA_SYMBOLS_DATA[s]["difficulty"] == "easy"
],
"difficulty": "easy",
"estimated_time": 20,
},
{
"id": "vowels_intermediate",
"title": "Nguyên âm nâng cao (Intermediate Vowels)",
"description": "Các nguyên âm khó hơn, cần luyện tập kỹ",
"symbols": [
s
for s in IPA_SYMBOLS_DATA.keys()
if IPA_SYMBOLS_DATA[s]["category"] == "vowel"
and IPA_SYMBOLS_DATA[s]["difficulty"] == "medium"
],
"difficulty": "medium",
"estimated_time": 25,
},
{
"id": "diphthongs",
"title": "Nguyên âm đôi (Diphthongs)",
"description": "Học cách phát âm nguyên âm đôi tự nhiên",
"symbols": [
s
for s in IPA_SYMBOLS_DATA.keys()
if IPA_SYMBOLS_DATA[s]["category"] == "diphthong"
],
"difficulty": "medium",
"estimated_time": 20,
},
{
"id": "consonants_intermediate",
"title": "Phụ âm trung cấp (Intermediate Consonants)",
"description": "Các phụ âm cần luyện tập cho người Việt",
"symbols": [
s
for s in IPA_SYMBOLS_DATA.keys()
if IPA_SYMBOLS_DATA[s]["category"] == "consonant"
and IPA_SYMBOLS_DATA[s]["difficulty"] == "medium"
],
"difficulty": "medium",
"estimated_time": 30,
},
{
"id": "difficult_sounds",
"title": "Âm khó (Difficult Sounds)",
"description": "Những âm khó nhất cho người Việt: th, v, z",
"symbols": [
s
for s in IPA_SYMBOLS_DATA.keys()
if IPA_SYMBOLS_DATA[s]["difficulty"] == "hard"
],
"difficulty": "hard",
"estimated_time": 40,
},
]
# Convert to proper lesson objects
lesson_objects = []
for lesson in lessons:
symbol_objects = []
for symbol_key in lesson["symbols"]:
data = IPA_SYMBOLS_DATA[symbol_key]
# Get character mapping for the example word
character_mapping = map_ipa_to_characters(data["word"], symbol_key)
symbol_objects.append(
IPASymbol(
symbol=symbol_key,
description=data["desc"],
example_word=data["word"],
category=data["category"],
difficulty_level=data["difficulty"],
vietnamese_tip=data["tip"],
character_mapping=character_mapping,
)
)
lesson_objects.append(
IPALesson(
id=lesson["id"],
title=lesson["title"],
description=lesson["description"],
symbols=symbol_objects,
difficulty=lesson["difficulty"],
estimated_time=lesson["estimated_time"],
)
)
return lesson_objects
except Exception as e:
logger.error(f"Error getting IPA lessons: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/words", response_model=List[IPAWord])
async def get_practice_words(
difficulty: str = Query("easy", description="Difficulty level: easy, medium, hard")
):
"""Get practice words with IPA transcription and character mappings"""
try:
if difficulty not in ["easy", "medium", "hard"]:
difficulty = "easy"
words_data = SAMPLE_WORDS.get(difficulty, SAMPLE_WORDS["easy"])
words = []
for word_data in words_data:
# Get phonemes using G2P
try:
phoneme_data = g2p.text_to_phonemes(word_data["word"])[0]
phonemes = phoneme_data["phonemes"]
except:
# Fallback to simple conversion
phonemes = list(word_data["word"].lower())
# Calculate difficulty
difficulty_score = 0.0
for phoneme in phonemes:
difficulty_score += g2p.get_difficulty_score(phoneme)
avg_difficulty = difficulty_score / len(phonemes) if phonemes else 0.3
word_difficulty = (
"hard"
if avg_difficulty > 0.6
else "medium" if avg_difficulty > 0.4 else "easy"
)
# Get character mapping for the word
character_mapping = map_word_to_phonemes(
word_data["word"], word_data["ipa"]
)
words.append(
IPAWord(
word=word_data["word"],
ipa=word_data["ipa"],
phonemes=phonemes,
difficulty=word_difficulty,
meaning=word_data["meaning"],
example_sentence=word_data["sentence"],
character_mapping=character_mapping,
)
)
return words
except Exception as e:
logger.error(f"Error getting practice words: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/exercises", response_model=List[IPAExercise])
async def get_ipa_exercises(
count: int = Query(5, ge=1, le=20), difficulty: str = Query("mixed")
):
"""Generate random IPA pronunciation exercises"""
try:
exercises = []
# Select words based on difficulty
if difficulty == "mixed":
all_words = []
for level in SAMPLE_WORDS.values():
all_words.extend(level)
selected_words = random.sample(all_words, min(count, len(all_words)))
else:
if difficulty not in SAMPLE_WORDS:
difficulty = "easy"
word_pool = SAMPLE_WORDS[difficulty]
selected_words = random.sample(word_pool, min(count, len(word_pool)))
for word_data in selected_words:
# Get phonemes
try:
phoneme_data = g2p.text_to_phonemes(word_data["word"])[0]
phonemes = phoneme_data["phonemes"]
except:
phonemes = list(word_data["word"].lower())
# Generate hints
hints = [
f"Nghĩa: {word_data['meaning']}",
f"Ví dụ: {word_data['sentence']}",
f"Số âm tiết: {len(phonemes)}",
]
# Add specific pronunciation hints for difficult sounds
difficult_sounds = []
for phoneme in phonemes:
if phoneme in ["θ", "ð", "v", "z", "ʒ", "r", "w"]:
difficult_sounds.append(phoneme)
if difficult_sounds:
for sound in difficult_sounds:
if sound in IPA_SYMBOLS_DATA:
hints.append(f"Âm /{sound}/: {IPA_SYMBOLS_DATA[sound]['tip']}")
exercises.append(
IPAExercise(
word=word_data["word"],
ipa=word_data["ipa"],
phonemes=phonemes,
hints=hints,
difficulty=difficulty if difficulty != "mixed" else "easy",
)
)
return exercises
except Exception as e:
logger.error(f"Error generating IPA exercises: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/symbol/{symbol}")
async def get_symbol_details(symbol: str):
"""Get detailed information about a specific IPA symbol"""
try:
if symbol not in IPA_SYMBOLS_DATA:
raise HTTPException(
status_code=404, detail=f"IPA symbol '{symbol}' not found"
)
data = IPA_SYMBOLS_DATA[symbol]
# Find words containing this symbol
example_words = []
for difficulty_level, words in SAMPLE_WORDS.items():
for word_data in words:
if symbol in word_data["ipa"]:
example_words.append(
{
"word": word_data["word"],
"ipa": word_data["ipa"],
"meaning": word_data["meaning"],
"difficulty": difficulty_level,
}
)
if len(example_words) >= 5: # Limit to 5 examples
break
if len(example_words) >= 5:
break
return {
"symbol": symbol,
"description": data["desc"],
"example_word": data["word"],
"category": data["category"],
"difficulty_level": data["difficulty"],
"vietnamese_tip": data["tip"],
"difficulty_score": g2p.get_difficulty_score(symbol),
"example_words": example_words,
"practice_tips": _get_practice_tips(symbol),
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting symbol details: {e}")
raise HTTPException(status_code=500, detail=str(e))
def _get_practice_tips(symbol: str) -> List[str]:
"""Get specific practice tips for a symbol"""
tips_map = {
"θ": [
"Đặt đầu lưỡi giữa răng trên và răng dưới",
"Thổi khí nhẹ qua kẽ răng",
"Không rung dây thanh âm",
"Luyện với từ: think, three, thank",
],
"ð": [
"Vị trí lưỡi giống như âm θ",
"Nhưng phải rung dây thanh âm",
"Cảm nhận rung động ở cổ họng",
"Luyện với từ: this, that, brother",
],
"v": [
"Môi dưới chạm vào răng trên",
"Không dùng cả hai môi như tiếng Việt",
"Rung dây thanh âm",
"Luyện với từ: very, voice, love",
],
"r": [
"Cuộn lưỡi nhẹ nhàng",
"Không để lưỡi chạm vào vòm miệng",
"Không lăn lưỡi như tiếng Việt",
"Luyện với từ: red, run, car",
],
"w": [
"Tròn môi như phát âm 'u'",
"Không dùng răng như âm 'v'",
"Môi tròn rồi mở ra nhanh",
"Luyện với từ: we, water, window",
],
}
return tips_map.get(
symbol,
[
f"Luyện phát âm âm /{symbol}/ thường xuyên",
"Nghe và bắt chước người bản ngữ",
"Tập trung vào vị trí lưỡi và môi",
"Luyện tập với từ đơn giản trước",
],
)
@router.get("/word-analysis/{word}")
async def get_word_analysis(word: str):
"""Get comprehensive analysis of a word for IPA learning"""
try:
# Get phoneme data
phoneme_data = g2p.text_to_phonemes(word)[0]
# Calculate difficulty
difficulty_scores = [
g2p.get_difficulty_score(p) for p in phoneme_data["phonemes"]
]
avg_difficulty = (
sum(difficulty_scores) / len(difficulty_scores)
if difficulty_scores
else 0.3
)
word_difficulty = (
"hard"
if avg_difficulty > 0.6
else "medium" if avg_difficulty > 0.4 else "easy"
)
# Get detailed phoneme analysis
phoneme_analysis = []
for i, phoneme in enumerate(phoneme_data["phonemes"]):
difficulty_score = g2p.get_difficulty_score(phoneme)
analysis = {
"phoneme": phoneme,
"position": i,
"difficulty_score": difficulty_score,
"difficulty_level": (
"hard"
if difficulty_score > 0.6
else "medium" if difficulty_score > 0.4 else "easy"
),
"category": IPA_SYMBOLS_DATA.get(phoneme, {}).get(
"category", "unknown"
),
"vietnamese_tip": IPA_SYMBOLS_DATA.get(phoneme, {}).get(
"tip", f"Luyện âm {phoneme}"
),
"practice_tips": _get_practice_tips(phoneme),
}
phoneme_analysis.append(analysis)
# Find similar words for practice
similar_words = []
for difficulty_level, words in SAMPLE_WORDS.items():
for word_data in words:
if word_data["word"] != word:
# Check if shares difficult phonemes
word_phonemes = g2p.text_to_phonemes(word_data["word"])[0][
"phonemes"
]
shared_difficult = [
p
for p in phoneme_data["phonemes"]
if p in word_phonemes and g2p.get_difficulty_score(p) > 0.5
]
if shared_difficult:
similar_words.append(
{
"word": word_data["word"],
"ipa": word_data["ipa"],
"meaning": word_data["meaning"],
"shared_sounds": shared_difficult,
"difficulty": difficulty_level,
}
)
if len(similar_words) >= 5:
break
if len(similar_words) >= 5:
break
return {
"word": word,
"ipa": phoneme_data["ipa"],
"phonemes": phoneme_data["phonemes"],
"phoneme_string": phoneme_data["phoneme_string"],
"difficulty": word_difficulty,
"difficulty_score": avg_difficulty,
"phoneme_analysis": phoneme_analysis,
"similar_words": similar_words,
"practice_sequence": _generate_practice_sequence(phoneme_analysis),
"common_mistakes": _get_common_mistakes(phoneme_data["phonemes"]),
}
except Exception as e:
logger.error(f"Error analyzing word '{word}': {e}")
raise HTTPException(status_code=500, detail=str(e))
def _generate_practice_sequence(phoneme_analysis: List[Dict]) -> List[Dict]:
"""Generate a practice sequence starting with easier sounds"""
# Sort by difficulty
sorted_phonemes = sorted(phoneme_analysis, key=lambda x: x["difficulty_score"])
sequence = []
for phoneme_data in sorted_phonemes:
step = {
"step": len(sequence) + 1,
"phoneme": phoneme_data["phoneme"],
"focus": "Tập trung vào âm này",
"tip": phoneme_data["vietnamese_tip"],
"practice_words": _get_practice_words_for_phoneme(phoneme_data["phoneme"]),
}
sequence.append(step)
return sequence
def _get_practice_words_for_phoneme(phoneme: str) -> List[str]:
"""Get simple words containing the phoneme"""
practice_words = {
"θ": ["think", "three", "month", "tooth"],
"ð": ["this", "that", "mother", "brother"],
"v": ["very", "voice", "love", "give"],
"r": ["red", "run", "car", "tree"],
"w": ["we", "water", "window", "want"],
"z": ["zoo", "zero", "buzz", "pizza"],
"ʒ": ["measure", "pleasure", "treasure", "vision"],
"æ": ["cat", "hat", "man", "bad"],
"ɪ": ["sit", "big", "win", "ship"],
"ʊ": ["put", "look", "book", "good"],
}
return practice_words.get(phoneme, [])
def _get_common_mistakes(phonemes: List[str]) -> List[Dict]:
"""Get common pronunciation mistakes for Vietnamese speakers"""
mistakes = []
common_mistakes_map = {
"θ": {
"mistake": "Phát âm thành 'f' hoặc 's'",
"correction": "Đặt lưỡi giữa răng, thổi nhẹ",
"examples": ["think → fink/sink (sai), think (đúng)"],
},
"ð": {
"mistake": "Phát âm thành 'd' hoặc 'z'",
"correction": "Lưỡi giữa răng + rung dây thanh",
"examples": ["this → dis/zis (sai), this (đúng)"],
},
"v": {
"mistake": "Phát âm thành 'w' hoặc 'b'",
"correction": "Môi dưới chạm răng trên",
"examples": ["very → wery/bery (sai), very (đúng)"],
},
"r": {
"mistake": "Lăn lưỡi như tiếng Việt",
"correction": "Cuộn lưỡi nhẹ, không chạm vòm",
"examples": ["red → rrred (sai), red (đúng)"],
},
"w": {
"mistake": "Phát âm thành 'v'",
"correction": "Tròn môi, không dùng răng",
"examples": ["we → ve (sai), we (đúng)"],
},
}
for phoneme in phonemes:
if phoneme in common_mistakes_map:
mistake_info = common_mistakes_map[phoneme]
mistakes.append(
{
"phoneme": phoneme,
"common_mistake": mistake_info["mistake"],
"correction": mistake_info["correction"],
"examples": mistake_info["examples"],
}
)
return mistakes
@router.get("/practice-session/{lesson_id}")
async def create_ipa_practice_session(lesson_id: str):
"""Create a structured IPA practice session"""
try:
# This would typically fetch from a database
# For now, we'll create a sample session based on lesson_id
if lesson_id == "vowels_basic":
session_words = [
{
"word": "cat",
"ipa": "/kæt/",
"focus_phonemes": ["æ"],
"mapping": map_word_to_phonemes("cat", "/kæt/"),
},
{
"word": "bed",
"ipa": "/bɛd/",
"focus_phonemes": ["ɛ"],
"mapping": map_word_to_phonemes("bed", "/bɛd/"),
},
{
"word": "see",
"ipa": "/si/",
"focus_phonemes": ["i"],
"mapping": map_word_to_phonemes("see", "/si/"),
},
{
"word": "cup",
"ipa": "/kʌp/",
"focus_phonemes": ["ʌ"],
"mapping": map_word_to_phonemes("cup", "/kʌp/"),
},
{
"word": "book",
"ipa": "/bʊk/",
"focus_phonemes": ["ʊ"],
"mapping": map_word_to_phonemes("book", "/bʊk/"),
},
]
elif lesson_id == "difficult_sounds":
session_words = [
{
"word": "think",
"ipa": "/θɪŋk/",
"focus_phonemes": ["θ"],
"mapping": map_word_to_phonemes("think", "/θɪŋk/"),
},
{
"word": "this",
"ipa": "/ðɪs/",
"focus_phonemes": ["ð"],
"mapping": map_word_to_phonemes("this", "/ðɪs/"),
},
{
"word": "very",
"ipa": "/ˈvɛri/",
"focus_phonemes": ["v"],
"mapping": map_word_to_phonemes("very", "/ˈvɛri/"),
},
{
"word": "water",
"ipa": "/ˈwɔtər/",
"focus_phonemes": ["w"],
"mapping": map_word_to_phonemes("water", "/ˈwɔtər/"),
},
{
"word": "red",
"ipa": "/rɛd/",
"focus_phonemes": ["r"],
"mapping": map_word_to_phonemes("red", "/rɛd/"),
},
]
else:
# Default session
session_words = [
{
"word": "hello",
"ipa": "/həˈloʊ/",
"focus_phonemes": ["ə", "oʊ"],
"mapping": map_word_to_phonemes("hello", "/həˈloʊ/"),
},
{
"word": "world",
"ipa": "/wɜrld/",
"focus_phonemes": ["w", "ɜr"],
"mapping": map_word_to_phonemes("world", "/wɜrld/"),
},
{
"word": "practice",
"ipa": "/ˈpræktɪs/",
"focus_phonemes": ["æ", "ɪ"],
"mapping": map_word_to_phonemes("practice", "/ˈpræktɪs/"),
},
]
return {
"session_id": lesson_id,
"title": f"IPA Practice Session: {lesson_id.replace('_', ' ').title()}",
"words": session_words,
"estimated_time": len(session_words) * 3, # 3 minutes per word
"instructions": [
"Nghe mẫu từng từ carefully",
"Tập trung vào âm vị được highlight",
"Ghi âm nhiều lần cho đến khi đạt điểm tốt",
"Đọc feedback để cải thiện",
],
}
except Exception as e:
logger.error(f"Error creating practice session: {e}")
raise HTTPException(status_code=500, detail=str(e))