Spaces:

ABAO77
/

Run_code_api

Sleeping

App Files Files Community

Run_code_api / src /apis /routes /ipa_route.py

ABAO77

feat: implement new IPA assessment API with detailed phoneme analysis and Vietnamese-specific feedback

45a0e83 2 months ago

raw

history blame

53.8 kB

	from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form
	from pydantic import BaseModel
	from typing import List, Dict, Optional, Union, Any
	import json
	import random
	import re
	import tempfile
	import os
	import base64
	import subprocess
	from loguru import logger
	from src.apis.controllers.speaking_controller import (
	EnhancedG2P,
	ProductionPronunciationAssessor,
	)


	class CharacterMapping(BaseModel):
	ipa_symbol: Optional[str] = None
	grapheme: Optional[str] = None
	start_index: Optional[int] = None
	end_index: Optional[int] = None
	characters: Optional[str] = None
	chars: Optional[str] = None
	ipa: Optional[str] = None
	start: Optional[int] = None
	end: Optional[int] = None


	router = APIRouter(prefix="/ipa", tags=["IPA Training"])

	# Initialize G2P converter and assessment system once (singleton pattern)
	g2p = EnhancedG2P()
	# Global assessor instance - will be initialized once due to singleton pattern
	global_assessor = None


	def get_assessor():
	"""Get or create the global assessor instance"""
	global global_assessor
	if global_assessor is None:
	logger.info("Creating global ProductionPronunciationAssessor instance...")
	global_assessor = ProductionPronunciationAssessor()
	return global_assessor


	def map_ipa_to_characters(word: str, ipa_symbol: str) -> List[CharacterMapping]:
	"""
	Map IPA symbols to their corresponding characters in the word
	Returns a list of character mappings for highlighting
	"""
	# Common IPA to grapheme mappings
	ipa_mappings = {
	# Vowels
	"i": [
	"ee",
	"ea",
	"e",
	"ie",
	"ei",
	"i",
	], # see, eat, me, piece, receive, machine
	"ɪ": ["i", "y", "ui", "e"], # sit, gym, build, women
	"u": ["oo", "u", "ou", "ue", "ui", "o"], # food, flu, soup, true, fruit, do
	"ʊ": ["oo", "u", "ou"], # book, put, could
	"ɛ": ["e", "ea", "ai", "a"], # bed, head, said, many
	"ə": [
	"a",
	"e",
	"i",
	"o",
	"u",
	"ou",
	"ar",
	"er",
	"or",
	], # about, taken, pencil, lemon, circus, famous, dollar, butter, doctor
	"ʌ": ["u", "o", "ou", "oo"], # cup, love, country, blood
	"ɑ": ["a", "o", "au"], # father, hot, aunt
	"æ": ["a"], # cat, apple
	"ɔ": ["o", "aw", "au", "a", "ou"], # saw, law, caught, all, thought
	# Diphthongs
	"eɪ": ["a", "ai", "ay", "ei", "ey", "ea"], # say, wait, day, eight, grey, break
	"aɪ": ["i", "y", "ie", "uy", "ai", "igh"], # my, fly, pie, buy, aisle, night
	"ɔɪ": ["oy", "oi"], # boy, coin
	"aʊ": ["ou", "ow"], # how, house
	"oʊ": ["o", "oa", "ow", "oe", "ou"], # go, boat, show, toe, soul
	# Consonants
	"p": ["p", "pp"], # pen, apple
	"b": ["b", "bb"], # boy, rabbit
	"t": ["t", "tt", "ed"], # top, butter, walked
	"d": ["d", "dd", "ed"], # dog, ladder, played
	"k": ["c", "k", "ck", "ch", "qu"], # cat, key, back, school, queen
	"g": ["g", "gg", "gh", "gu"], # go, egg, ghost, guard
	"f": ["f", "ff", "ph", "gh"], # fish, off, phone, laugh
	"v": ["v", "ve"], # very, have
	"θ": ["th"], # think
	"ð": ["th"], # this
	"s": ["s", "ss", "c", "sc", "ps"], # see, miss, city, scene, psychology
	"z": ["z", "zz", "s", "se", "ze"], # zoo, buzz, is, rose, froze
	"ʃ": [
	"sh",
	"s",
	"ss",
	"ch",
	"ci",
	"ti",
	], # ship, sure, mission, machine, special, nation
	"ʒ": ["s", "si", "ge"], # measure, vision, garage
	"tʃ": ["ch", "tch", "t"], # chair, watch, nature
	"dʒ": ["j", "ge", "dge", "g"], # job, age, bridge, gym
	"m": ["m", "mm", "mb"], # man, hammer, lamb
	"n": ["n", "nn", "kn", "gn"], # no, dinner, knee, sign
	"ŋ": ["ng", "n"], # sing, think
	"l": ["l", "ll"], # love, hello
	"r": ["r", "rr", "wr"], # red, sorry, write
	"j": ["y", "i", "j"], # yes, onion, hallelujah
	"w": ["w", "wh", "qu", "u"], # we, what, queen, language
	"h": ["h", "wh"], # house, who
	}

	# Get possible grapheme representations for the IPA symbol
	possible_graphemes = ipa_mappings.get(ipa_symbol, [])

	# Find the best match in the word
	word_lower = word.lower()
	mappings = []

	for grapheme in possible_graphemes:
	start_pos = word_lower.find(grapheme)
	if start_pos != -1:
	mappings.append(
	CharacterMapping(
	ipa_symbol=ipa_symbol,
	grapheme=grapheme,
	start_index=start_pos,
	end_index=start_pos + len(grapheme),
	characters=word[start_pos : start_pos + len(grapheme)],
	)
	)
	break # Use the first match found

	# If no direct match found, try to match individual characters
	if not mappings and ipa_symbol in word_lower:
	start_pos = word_lower.find(ipa_symbol)
	if start_pos != -1:
	mappings.append(
	CharacterMapping(
	ipa_symbol=ipa_symbol,
	grapheme=ipa_symbol,
	start_index=start_pos,
	end_index=start_pos + len(ipa_symbol),
	characters=word[start_pos : start_pos + len(ipa_symbol)],
	)
	)

	return mappings


	def map_word_to_phonemes(word: str, ipa_transcription: str) -> List[CharacterMapping]:
	"""
	Map an entire word to its phoneme sequence
	Returns detailed character to IPA mappings for the whole word
	"""
	# Clean the IPA transcription
	clean_ipa = ipa_transcription.strip("/").replace("ˈ", "").replace("ˌ", "")

	# Common word-to-IPA mappings for better accuracy
	word_mappings = {
	# Easy words
	"cat": [
	CharacterMapping(
	characters="c", ipa_symbol="k", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="a", ipa_symbol="æ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="t", ipa_symbol="t", start_index=2, end_index=3
	),
	],
	"dog": [
	CharacterMapping(
	characters="d", ipa_symbol="d", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="o", ipa_symbol="ɔ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="g", ipa_symbol="g", start_index=2, end_index=3
	),
	],
	"pen": [
	CharacterMapping(
	characters="p", ipa_symbol="p", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="n", ipa_symbol="n", start_index=2, end_index=3
	),
	],
	"see": [
	CharacterMapping(
	characters="s", ipa_symbol="s", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="ee", ipa_symbol="i", start_index=1, end_index=3
	),
	],
	"bed": [
	CharacterMapping(
	characters="b", ipa_symbol="b", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="d", ipa_symbol="d", start_index=2, end_index=3
	),
	],
	"fish": [
	CharacterMapping(
	characters="f", ipa_symbol="f", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="i", ipa_symbol="ɪ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="sh", ipa_symbol="ʃ", start_index=2, end_index=4
	),
	],
	"book": [
	CharacterMapping(
	characters="b", ipa_symbol="b", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="oo", ipa_symbol="ʊ", start_index=1, end_index=3
	),
	CharacterMapping(
	characters="k", ipa_symbol="k", start_index=3, end_index=4
	),
	],
	"food": [
	CharacterMapping(
	characters="f", ipa_symbol="f", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="oo", ipa_symbol="u", start_index=1, end_index=3
	),
	CharacterMapping(
	characters="d", ipa_symbol="d", start_index=3, end_index=4
	),
	],
	"man": [
	CharacterMapping(
	characters="m", ipa_symbol="m", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="a", ipa_symbol="æ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="n", ipa_symbol="n", start_index=2, end_index=3
	),
	],
	"sun": [
	CharacterMapping(
	characters="s", ipa_symbol="s", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="u", ipa_symbol="ʌ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="n", ipa_symbol="n", start_index=2, end_index=3
	),
	],
	# Medium words
	"chair": [
	CharacterMapping(
	characters="ch", ipa_symbol="tʃ", start_index=0, end_index=2
	),
	CharacterMapping(
	characters="ai", ipa_symbol="ɛ", start_index=2, end_index=4
	),
	CharacterMapping(
	characters="r", ipa_symbol="r", start_index=4, end_index=5
	),
	],
	"water": [
	CharacterMapping(
	characters="w", ipa_symbol="w", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="a", ipa_symbol="ɔ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="t", ipa_symbol="t", start_index=2, end_index=3
	),
	CharacterMapping(
	characters="er", ipa_symbol="ər", start_index=3, end_index=5
	),
	],
	"house": [
	CharacterMapping(
	characters="h", ipa_symbol="h", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="ou", ipa_symbol="aʊ", start_index=1, end_index=3
	),
	CharacterMapping(
	characters="se", ipa_symbol="s", start_index=3, end_index=5
	),
	],
	"yellow": [
	CharacterMapping(
	characters="y", ipa_symbol="j", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="ll", ipa_symbol="l", start_index=2, end_index=4
	),
	CharacterMapping(
	characters="ow", ipa_symbol="oʊ", start_index=4, end_index=6
	),
	],
	"about": [
	CharacterMapping(
	characters="a", ipa_symbol="ə", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="b", ipa_symbol="b", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="ou", ipa_symbol="aʊ", start_index=2, end_index=4
	),
	CharacterMapping(
	characters="t", ipa_symbol="t", start_index=4, end_index=5
	),
	],
	# Hard words
	"think": [
	CharacterMapping(
	characters="th", ipa_symbol="θ", start_index=0, end_index=2
	),
	CharacterMapping(
	characters="i", ipa_symbol="ɪ", start_index=2, end_index=3
	),
	CharacterMapping(
	characters="nk", ipa_symbol="ŋk", start_index=3, end_index=5
	),
	],
	"this": [
	CharacterMapping(
	characters="th", ipa_symbol="ð", start_index=0, end_index=2
	),
	CharacterMapping(
	characters="i", ipa_symbol="ɪ", start_index=2, end_index=3
	),
	CharacterMapping(
	characters="s", ipa_symbol="s", start_index=3, end_index=4
	),
	],
	"very": [
	CharacterMapping(
	characters="v", ipa_symbol="v", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="e", ipa_symbol="ɛ", start_index=1, end_index=2
	),
	CharacterMapping(
	characters="r", ipa_symbol="r", start_index=2, end_index=3
	),
	CharacterMapping(
	characters="y", ipa_symbol="i", start_index=3, end_index=4
	),
	],
	"through": [
	CharacterMapping(
	characters="th", ipa_symbol="θ", start_index=0, end_index=2
	),
	CharacterMapping(
	characters="r", ipa_symbol="r", start_index=2, end_index=3
	),
	CharacterMapping(
	characters="ough", ipa_symbol="u", start_index=3, end_index=7
	),
	],
	"measure": [
	CharacterMapping(
	characters="m", ipa_symbol="m", start_index=0, end_index=1
	),
	CharacterMapping(
	characters="ea", ipa_symbol="ɛ", start_index=1, end_index=3
	),
	CharacterMapping(
	characters="s", ipa_symbol="ʒ", start_index=3, end_index=4
	),
	CharacterMapping(
	characters="ure", ipa_symbol="ər", start_index=4, end_index=7
	),
	],
	}

	# Check if we have a predefined mapping
	if word.lower() in word_mappings:
	return word_mappings[word.lower()]

	# If no predefined mapping, try to create a basic mapping
	# This is a simplified approach - in production, you'd use a more sophisticated G2P system
	mappings = []
	char_index = 0

	# Basic character-by-character mapping (fallback)
	for i, char in enumerate(word.lower()):
	if char.isalpha():
	mappings.append(
	CharacterMapping(
	characters=word[i],
	ipa_symbol=char, # Simplified - would need actual phoneme mapping
	start_index=i,
	end_index=i + 1,
	)
	)

	return mappings


	class IPASymbol(BaseModel):
	symbol: str
	description: str
	example_word: str
	audio_example: Optional[str] = None
	category: str # vowel, consonant, diphthong
	difficulty_level: str # easy, medium, hard
	vietnamese_tip: str
	character_mapping: Optional[List[CharacterMapping]] = None


	class IPALesson(BaseModel):
	id: str
	title: str
	description: str
	symbols: List[IPASymbol]
	difficulty: str
	estimated_time: int # minutes


	class IPAWord(BaseModel):
	word: str
	ipa: str
	phonemes: List[str]
	difficulty: str
	meaning: str
	example_sentence: str
	character_mapping: Optional[List[CharacterMapping]] = None


	class IPAExercise(BaseModel):
	word: str
	ipa: str
	phonemes: List[str]
	hints: List[str]
	difficulty: str


	# IPA Symbol data for Vietnamese learners
	IPA_SYMBOLS_DATA = {
	# Vowels - Easy
	"i": {
	"desc": "High front unrounded vowel",
	"word": "see",
	"tip": "Như âm 'i' trong tiếng Việt nhưng dài hơn",
	"category": "vowel",
	"difficulty": "easy",
	},
	"u": {
	"desc": "High back rounded vowel",
	"word": "food",
	"tip": "Như âm 'u' trong tiếng Việt nhưng dài hơn",
	"category": "vowel",
	"difficulty": "easy",
	},
	"ɑ": {
	"desc": "Low back unrounded vowel",
	"word": "father",
	"tip": "Mở miệng rộng, âm 'a' sâu",
	"category": "vowel",
	"difficulty": "easy",
	},
	"ɛ": {
	"desc": "Mid front unrounded vowel",
	"word": "bed",
	"tip": "Giống âm 'e' trong 'đẹp'",
	"category": "vowel",
	"difficulty": "easy",
	},
	"ɔ": {
	"desc": "Mid back rounded vowel",
	"word": "saw",
	"tip": "Âm 'o' tròn môi",
	"category": "vowel",
	"difficulty": "easy",
	},
	# Vowels - Medium
	"ɪ": {
	"desc": "Near-close near-front unrounded vowel",
	"word": "sit",
	"tip": "Âm 'i' ngắn, không kéo dài",
	"category": "vowel",
	"difficulty": "medium",
	},
	"ʊ": {
	"desc": "Near-close near-back rounded vowel",
	"word": "put",
	"tip": "Âm 'u' ngắn, tròn môi nhẹ",
	"category": "vowel",
	"difficulty": "medium",
	},
	"ʌ": {
	"desc": "Mid central unrounded vowel",
	"word": "cup",
	"tip": "Âm 'ơ' nhưng mở miệng hơn",
	"category": "vowel",
	"difficulty": "medium",
	},
	"æ": {
	"desc": "Near-open front unrounded vowel",
	"word": "cat",
	"tip": "Mở miệng rộng, âm 'a' phẳng",
	"category": "vowel",
	"difficulty": "medium",
	},
	"ə": {
	"desc": "Schwa - mid central vowel",
	"word": "about",
	"tip": "Âm yếu 'ơ', thư giãn cơ miệng",
	"category": "vowel",
	"difficulty": "medium",
	},
	# Diphthongs
	"eɪ": {
	"desc": "Diphthong from e to i",
	"word": "say",
	"tip": "Từ 'e' trượt lên 'i'",
	"category": "diphthong",
	"difficulty": "medium",
	},
	"aɪ": {
	"desc": "Diphthong from a to i",
	"word": "my",
	"tip": "Từ 'a' trượt lên 'i'",
	"category": "diphthong",
	"difficulty": "medium",
	},
	"ɔɪ": {
	"desc": "Diphthong from o to i",
	"word": "boy",
	"tip": "Từ 'o' trượt lên 'i'",
	"category": "diphthong",
	"difficulty": "medium",
	},
	"aʊ": {
	"desc": "Diphthong from a to u",
	"word": "how",
	"tip": "Từ 'a' trượt lên 'u'",
	"category": "diphthong",
	"difficulty": "medium",
	},
	"oʊ": {
	"desc": "Diphthong from o to u",
	"word": "go",
	"tip": "Từ 'o' trượt lên 'u'",
	"category": "diphthong",
	"difficulty": "medium",
	},
	# Consonants - Easy
	"p": {
	"desc": "Voiceless bilabial plosive",
	"word": "pen",
	"tip": "Âm 'p' không thở ra",
	"category": "consonant",
	"difficulty": "easy",
	},
	"b": {
	"desc": "Voiced bilabial plosive",
	"word": "boy",
	"tip": "Âm 'b' có rung dây thanh",
	"category": "consonant",
	"difficulty": "easy",
	},
	"t": {
	"desc": "Voiceless alveolar plosive",
	"word": "top",
	"tip": "Âm 't' lưỡi chạm nướu",
	"category": "consonant",
	"difficulty": "easy",
	},
	"d": {
	"desc": "Voiced alveolar plosive",
	"word": "dog",
	"tip": "Âm 'd' có rung dây thanh",
	"category": "consonant",
	"difficulty": "easy",
	},
	"k": {
	"desc": "Voiceless velar plosive",
	"word": "cat",
	"tip": "Âm 'k' cuống họng",
	"category": "consonant",
	"difficulty": "easy",
	},
	"g": {
	"desc": "Voiced velar plosive",
	"word": "go",
	"tip": "Âm 'g' có rung dây thanh",
	"category": "consonant",
	"difficulty": "easy",
	},
	"m": {
	"desc": "Bilabial nasal",
	"word": "man",
	"tip": "Âm 'm' qua mũi",
	"category": "consonant",
	"difficulty": "easy",
	},
	"n": {
	"desc": "Alveolar nasal",
	"word": "no",
	"tip": "Âm 'n' lưỡi chạm nướu",
	"category": "consonant",
	"difficulty": "easy",
	},
	"s": {
	"desc": "Voiceless alveolar fricative",
	"word": "see",
	"tip": "Âm 's' rít",
	"category": "consonant",
	"difficulty": "easy",
	},
	"f": {
	"desc": "Voiceless labiodental fricative",
	"word": "fish",
	"tip": "Môi dưới chạm răng trên",
	"category": "consonant",
	"difficulty": "easy",
	},
	# Consonants - Medium
	"ʃ": {
	"desc": "Voiceless postalveolar fricative",
	"word": "ship",
	"tip": "Âm 'sh', lưỡi cong",
	"category": "consonant",
	"difficulty": "medium",
	},
	"ʒ": {
	"desc": "Voiced postalveolar fricative",
	"word": "measure",
	"tip": "Như 'ʃ' nhưng có rung dây thanh",
	"category": "consonant",
	"difficulty": "medium",
	},
	"tʃ": {
	"desc": "Voiceless postalveolar affricate",
	"word": "chair",
	"tip": "Âm 'ch', từ 't' + 'ʃ'",
	"category": "consonant",
	"difficulty": "medium",
	},
	"dʒ": {
	"desc": "Voiced postalveolar affricate",
	"word": "job",
	"tip": "Từ 'd' + 'ʒ'",
	"category": "consonant",
	"difficulty": "medium",
	},
	"l": {
	"desc": "Lateral approximant",
	"word": "love",
	"tip": "Lưỡi chạm nướu, âm thoát hai bên",
	"category": "consonant",
	"difficulty": "medium",
	},
	"r": {
	"desc": "Approximant",
	"word": "red",
	"tip": "Cuộn lưỡi nhẹ, không chạm vòm",
	"category": "consonant",
	"difficulty": "medium",
	},
	"j": {
	"desc": "Palatal approximant",
	"word": "yes",
	"tip": "Âm 'y', lưỡi gần vòm miệng",
	"category": "consonant",
	"difficulty": "medium",
	},
	"w": {
	"desc": "Labial-velar approximant",
	"word": "we",
	"tip": "Tròn môi như 'u', không dùng răng",
	"category": "consonant",
	"difficulty": "medium",
	},
	"h": {
	"desc": "Glottal fricative",
	"word": "house",
	"tip": "Thở ra nhẹ từ họng",
	"category": "consonant",
	"difficulty": "medium",
	},
	"z": {
	"desc": "Voiced alveolar fricative",
	"word": "zoo",
	"tip": "Như 's' nhưng có rung dây thanh",
	"category": "consonant",
	"difficulty": "medium",
	},
	# Consonants - Hard (for Vietnamese speakers)
	"θ": {
	"desc": "Voiceless dental fricative",
	"word": "think",
	"tip": "Lưỡi giữa răng, thổi nhẹ",
	"category": "consonant",
	"difficulty": "hard",
	},
	"ð": {
	"desc": "Voiced dental fricative",
	"word": "this",
	"tip": "Lưỡi giữa răng, rung dây thanh",
	"category": "consonant",
	"difficulty": "hard",
	},
	"v": {
	"desc": "Voiced labiodental fricative",
	"word": "very",
	"tip": "Môi dưới chạm răng trên, rung dây thanh",
	"category": "consonant",
	"difficulty": "hard",
	},
	"ŋ": {
	"desc": "Velar nasal",
	"word": "sing",
	"tip": "Âm 'ng' cuối từ",
	"category": "consonant",
	"difficulty": "hard",
	},
	}

	# Sample word database for each difficulty level
	SAMPLE_WORDS = {
	"easy": [
	{
	"word": "cat",
	"ipa": "/kæt/",
	"meaning": "con mèo",
	"sentence": "The cat is sleeping.",
	},
	{
	"word": "dog",
	"ipa": "/dɔg/",
	"meaning": "con chó",
	"sentence": "I love my dog.",
	},
	{
	"word": "man",
	"ipa": "/mæn/",
	"meaning": "người đàn ông",
	"sentence": "The man is tall.",
	},
	{
	"word": "pen",
	"ipa": "/pɛn/",
	"meaning": "cái bút",
	"sentence": "I need a pen.",
	},
	{
	"word": "sun",
	"ipa": "/sʌn/",
	"meaning": "mặt trời",
	"sentence": "The sun is bright.",
	},
	{
	"word": "fish",
	"ipa": "/fɪʃ/",
	"meaning": "con cá",
	"sentence": "Fish live in water.",
	},
	{
	"word": "book",
	"ipa": "/bʊk/",
	"meaning": "quyển sách",
	"sentence": "I read a book.",
	},
	{
	"word": "food",
	"ipa": "/fud/",
	"meaning": "thức ăn",
	"sentence": "I like good food.",
	},
	{
	"word": "see",
	"ipa": "/si/",
	"meaning": "nhìn thấy",
	"sentence": "I can see you.",
	},
	{
	"word": "bed",
	"ipa": "/bɛd/",
	"meaning": "giường",
	"sentence": "I sleep in my bed.",
	},
	],
	"medium": [
	{
	"word": "water",
	"ipa": "/ˈwɔtər/",
	"meaning": "nước",
	"sentence": "I drink water every day.",
	},
	{
	"word": "chair",
	"ipa": "/tʃɛr/",
	"meaning": "cái ghế",
	"sentence": "Please sit on the chair.",
	},
	{
	"word": "school",
	"ipa": "/skul/",
	"meaning": "trường học",
	"sentence": "Children go to school.",
	},
	{
	"word": "mother",
	"ipa": "/ˈmʌðər/",
	"meaning": "mẹ",
	"sentence": "My mother is kind.",
	},
	{
	"word": "house",
	"ipa": "/haʊs/",
	"meaning": "ngôi nhà",
	"sentence": "I live in a big house.",
	},
	{
	"word": "yellow",
	"ipa": "/ˈjɛloʊ/",
	"meaning": "màu vàng",
	"sentence": "The sun is yellow.",
	},
	{
	"word": "measure",
	"ipa": "/ˈmɛʒər/",
	"meaning": "đo lường",
	"sentence": "Please measure the length.",
	},
	{
	"word": "pleasure",
	"ipa": "/ˈplɛʒər/",
	"meaning": "niềm vui",
	"sentence": "It's a pleasure to meet you.",
	},
	{
	"word": "about",
	"ipa": "/əˈbaʊt/",
	"meaning": "về",
	"sentence": "Tell me about your day.",
	},
	{
	"word": "family",
	"ipa": "/ˈfæməli/",
	"meaning": "gia đình",
	"sentence": "I love my family.",
	},
	],
	"hard": [
	{
	"word": "think",
	"ipa": "/θɪŋk/",
	"meaning": "suy nghĩ",
	"sentence": "I think you are right.",
	},
	{
	"word": "this",
	"ipa": "/ðɪs/",
	"meaning": "cái này",
	"sentence": "This is my book.",
	},
	{
	"word": "very",
	"ipa": "/ˈvɛri/",
	"meaning": "rất",
	"sentence": "You are very smart.",
	},
	{
	"word": "through",
	"ipa": "/θru/",
	"meaning": "qua",
	"sentence": "Walk through the door.",
	},
	{
	"word": "weather",
	"ipa": "/ˈwɛðər/",
	"meaning": "thời tiết",
	"sentence": "The weather is nice.",
	},
	{
	"word": "voice",
	"ipa": "/vɔɪs/",
	"meaning": "giọng nói",
	"sentence": "She has a beautiful voice.",
	},
	{
	"word": "clothes",
	"ipa": "/kloʊðz/",
	"meaning": "quần áo",
	"sentence": "I need new clothes.",
	},
	{
	"word": "breathe",
	"ipa": "/brið/",
	"meaning": "thở",
	"sentence": "Breathe slowly and deeply.",
	},
	{
	"word": "although",
	"ipa": "/ɔlˈðoʊ/",
	"meaning": "mặc dù",
	"sentence": "Although it's cold, I'm happy.",
	},
	{
	"word": "rhythm",
	"ipa": "/ˈrɪðəm/",
	"meaning": "nhịp điệu",
	"sentence": "Music has a good rhythm.",
	},
	],
	}


	@router.get("/symbols", response_model=List[IPASymbol])
	async def get_ipa_symbols(
	category: Optional[str] = Query(
	None, description="Filter by category: vowel, consonant, diphthong"
	)
	):
	"""Get all IPA symbols with Vietnamese tips and character mappings"""
	try:
	symbols = []
	for symbol, data in IPA_SYMBOLS_DATA.items():
	if category and data["category"] != category:
	continue

	# Get character mapping for the example word
	character_mapping = map_ipa_to_characters(data["word"], symbol)

	symbols.append(
	IPASymbol(
	symbol=symbol,
	description=data["desc"],
	example_word=data["word"],
	category=data["category"],
	difficulty_level=data["difficulty"],
	vietnamese_tip=data["tip"],
	character_mapping=character_mapping,
	)
	)

	# Sort by difficulty and then by symbol
	difficulty_order = {"easy": 0, "medium": 1, "hard": 2}
	symbols.sort(key=lambda x: (difficulty_order[x.difficulty_level], x.symbol))

	return symbols
	except Exception as e:
	logger.error(f"Error getting IPA symbols: {e}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.get("/lessons", response_model=List[IPALesson])
	async def get_ipa_lessons():
	"""Get structured IPA lessons for progressive learning"""
	try:
	lessons = [
	{
	"id": "vowels_basic",
	"title": "Nguyên âm cơ bản (Basic Vowels)",
	"description": "Học các nguyên âm đơn giản nhất trong tiếng Anh",
	"symbols": [
	s
	for s in IPA_SYMBOLS_DATA.keys()
	if IPA_SYMBOLS_DATA[s]["category"] == "vowel"
	and IPA_SYMBOLS_DATA[s]["difficulty"] == "easy"
	],
	"difficulty": "easy",
	"estimated_time": 15,
	},
	{
	"id": "consonants_basic",
	"title": "Phụ âm cơ bản (Basic Consonants)",
	"description": "Các phụ âm dễ phát âm cho người Việt",
	"symbols": [
	s
	for s in IPA_SYMBOLS_DATA.keys()
	if IPA_SYMBOLS_DATA[s]["category"] == "consonant"
	and IPA_SYMBOLS_DATA[s]["difficulty"] == "easy"
	],
	"difficulty": "easy",
	"estimated_time": 20,
	},
	{
	"id": "vowels_intermediate",
	"title": "Nguyên âm nâng cao (Intermediate Vowels)",
	"description": "Các nguyên âm khó hơn, cần luyện tập kỹ",
	"symbols": [
	s
	for s in IPA_SYMBOLS_DATA.keys()
	if IPA_SYMBOLS_DATA[s]["category"] == "vowel"
	and IPA_SYMBOLS_DATA[s]["difficulty"] == "medium"
	],
	"difficulty": "medium",
	"estimated_time": 25,
	},
	{
	"id": "diphthongs",
	"title": "Nguyên âm đôi (Diphthongs)",
	"description": "Học cách phát âm nguyên âm đôi tự nhiên",
	"symbols": [
	s
	for s in IPA_SYMBOLS_DATA.keys()
	if IPA_SYMBOLS_DATA[s]["category"] == "diphthong"
	],
	"difficulty": "medium",
	"estimated_time": 20,
	},
	{
	"id": "consonants_intermediate",
	"title": "Phụ âm trung cấp (Intermediate Consonants)",
	"description": "Các phụ âm cần luyện tập cho người Việt",
	"symbols": [
	s
	for s in IPA_SYMBOLS_DATA.keys()
	if IPA_SYMBOLS_DATA[s]["category"] == "consonant"
	and IPA_SYMBOLS_DATA[s]["difficulty"] == "medium"
	],
	"difficulty": "medium",
	"estimated_time": 30,
	},
	{
	"id": "difficult_sounds",
	"title": "Âm khó (Difficult Sounds)",
	"description": "Những âm khó nhất cho người Việt: th, v, z",
	"symbols": [
	s
	for s in IPA_SYMBOLS_DATA.keys()
	if IPA_SYMBOLS_DATA[s]["difficulty"] == "hard"
	],
	"difficulty": "hard",
	"estimated_time": 40,
	},
	]

	# Convert to proper lesson objects
	lesson_objects = []
	for lesson in lessons:
	symbol_objects = []
	for symbol_key in lesson["symbols"]:
	data = IPA_SYMBOLS_DATA[symbol_key]
	# Get character mapping for the example word
	character_mapping = map_ipa_to_characters(data["word"], symbol_key)

	symbol_objects.append(
	IPASymbol(
	symbol=symbol_key,
	description=data["desc"],
	example_word=data["word"],
	category=data["category"],
	difficulty_level=data["difficulty"],
	vietnamese_tip=data["tip"],
	character_mapping=character_mapping,
	)
	)

	lesson_objects.append(
	IPALesson(
	id=lesson["id"],
	title=lesson["title"],
	description=lesson["description"],
	symbols=symbol_objects,
	difficulty=lesson["difficulty"],
	estimated_time=lesson["estimated_time"],
	)
	)

	return lesson_objects
	except Exception as e:
	logger.error(f"Error getting IPA lessons: {e}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.get("/words", response_model=List[IPAWord])
	async def get_practice_words(
	difficulty: str = Query("easy", description="Difficulty level: easy, medium, hard")
	):
	"""Get practice words with IPA transcription and character mappings"""
	try:
	if difficulty not in ["easy", "medium", "hard"]:
	difficulty = "easy"

	words_data = SAMPLE_WORDS.get(difficulty, SAMPLE_WORDS["easy"])

	words = []
	for word_data in words_data:
	# Get phonemes using G2P
	try:
	phoneme_data = g2p.text_to_phonemes(word_data["word"])[0]
	phonemes = phoneme_data["phonemes"]
	except:
	# Fallback to simple conversion
	phonemes = list(word_data["word"].lower())

	# Calculate difficulty
	difficulty_score = 0.0
	for phoneme in phonemes:
	difficulty_score += g2p.get_difficulty_score(phoneme)
	avg_difficulty = difficulty_score / len(phonemes) if phonemes else 0.3

	word_difficulty = (
	"hard"
	if avg_difficulty > 0.6
	else "medium" if avg_difficulty > 0.4 else "easy"
	)

	# Get character mapping for the word
	character_mapping = map_word_to_phonemes(
	word_data["word"], word_data["ipa"]
	)

	words.append(
	IPAWord(
	word=word_data["word"],
	ipa=word_data["ipa"],
	phonemes=phonemes,
	difficulty=word_difficulty,
	meaning=word_data["meaning"],
	example_sentence=word_data["sentence"],
	character_mapping=character_mapping,
	)
	)

	return words
	except Exception as e:
	logger.error(f"Error getting practice words: {e}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.get("/exercises", response_model=List[IPAExercise])
	async def get_ipa_exercises(
	count: int = Query(5, ge=1, le=20), difficulty: str = Query("mixed")
	):
	"""Generate random IPA pronunciation exercises"""
	try:
	exercises = []

	# Select words based on difficulty
	if difficulty == "mixed":
	all_words = []
	for level in SAMPLE_WORDS.values():
	all_words.extend(level)
	selected_words = random.sample(all_words, min(count, len(all_words)))
	else:
	if difficulty not in SAMPLE_WORDS:
	difficulty = "easy"
	word_pool = SAMPLE_WORDS[difficulty]
	selected_words = random.sample(word_pool, min(count, len(word_pool)))

	for word_data in selected_words:
	# Get phonemes
	try:
	phoneme_data = g2p.text_to_phonemes(word_data["word"])[0]
	phonemes = phoneme_data["phonemes"]
	except:
	phonemes = list(word_data["word"].lower())

	# Generate hints
	hints = [
	f"Nghĩa: {word_data['meaning']}",
	f"Ví dụ: {word_data['sentence']}",
	f"Số âm tiết: {len(phonemes)}",
	]

	# Add specific pronunciation hints for difficult sounds
	difficult_sounds = []
	for phoneme in phonemes:
	if phoneme in ["θ", "ð", "v", "z", "ʒ", "r", "w"]:
	difficult_sounds.append(phoneme)

	if difficult_sounds:
	for sound in difficult_sounds:
	if sound in IPA_SYMBOLS_DATA:
	hints.append(f"Âm /{sound}/: {IPA_SYMBOLS_DATA[sound]['tip']}")

	exercises.append(
	IPAExercise(
	word=word_data["word"],
	ipa=word_data["ipa"],
	phonemes=phonemes,
	hints=hints,
	difficulty=difficulty if difficulty != "mixed" else "easy",
	)
	)

	return exercises
	except Exception as e:
	logger.error(f"Error generating IPA exercises: {e}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.get("/symbol/{symbol}")
	async def get_symbol_details(symbol: str):
	"""Get detailed information about a specific IPA symbol"""
	try:
	if symbol not in IPA_SYMBOLS_DATA:
	raise HTTPException(
	status_code=404, detail=f"IPA symbol '{symbol}' not found"
	)

	data = IPA_SYMBOLS_DATA[symbol]

	# Find words containing this symbol
	example_words = []
	for difficulty_level, words in SAMPLE_WORDS.items():
	for word_data in words:
	if symbol in word_data["ipa"]:
	example_words.append(
	{
	"word": word_data["word"],
	"ipa": word_data["ipa"],
	"meaning": word_data["meaning"],
	"difficulty": difficulty_level,
	}
	)
	if len(example_words) >= 5: # Limit to 5 examples
	break
	if len(example_words) >= 5:
	break

	return {
	"symbol": symbol,
	"description": data["desc"],
	"example_word": data["word"],
	"category": data["category"],
	"difficulty_level": data["difficulty"],
	"vietnamese_tip": data["tip"],
	"difficulty_score": g2p.get_difficulty_score(symbol),
	"example_words": example_words,
	"practice_tips": _get_practice_tips(symbol),
	}
	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Error getting symbol details: {e}")
	raise HTTPException(status_code=500, detail=str(e))


	def _get_practice_tips(symbol: str) -> List[str]:
	"""Get specific practice tips for a symbol"""
	tips_map = {
	"θ": [
	"Đặt đầu lưỡi giữa răng trên và răng dưới",
	"Thổi khí nhẹ qua kẽ răng",
	"Không rung dây thanh âm",
	"Luyện với từ: think, three, thank",
	],
	"ð": [
	"Vị trí lưỡi giống như âm θ",
	"Nhưng phải rung dây thanh âm",
	"Cảm nhận rung động ở cổ họng",
	"Luyện với từ: this, that, brother",
	],
	"v": [
	"Môi dưới chạm vào răng trên",
	"Không dùng cả hai môi như tiếng Việt",
	"Rung dây thanh âm",
	"Luyện với từ: very, voice, love",
	],
	"r": [
	"Cuộn lưỡi nhẹ nhàng",
	"Không để lưỡi chạm vào vòm miệng",
	"Không lăn lưỡi như tiếng Việt",
	"Luyện với từ: red, run, car",
	],
	"w": [
	"Tròn môi như phát âm 'u'",
	"Không dùng răng như âm 'v'",
	"Môi tròn rồi mở ra nhanh",
	"Luyện với từ: we, water, window",
	],
	}

	return tips_map.get(
	symbol,
	[
	f"Luyện phát âm âm /{symbol}/ thường xuyên",
	"Nghe và bắt chước người bản ngữ",
	"Tập trung vào vị trí lưỡi và môi",
	"Luyện tập với từ đơn giản trước",
	],
	)


	@router.get("/word-analysis/{word}")
	async def get_word_analysis(word: str):
	"""Get comprehensive analysis of a word for IPA learning"""
	try:
	# Get phoneme data
	phoneme_data = g2p.text_to_phonemes(word)[0]

	# Calculate difficulty
	difficulty_scores = [
	g2p.get_difficulty_score(p) for p in phoneme_data["phonemes"]
	]
	avg_difficulty = (
	sum(difficulty_scores) / len(difficulty_scores)
	if difficulty_scores
	else 0.3
	)

	word_difficulty = (
	"hard"
	if avg_difficulty > 0.6
	else "medium" if avg_difficulty > 0.4 else "easy"
	)

	# Get detailed phoneme analysis
	phoneme_analysis = []
	for i, phoneme in enumerate(phoneme_data["phonemes"]):
	difficulty_score = g2p.get_difficulty_score(phoneme)

	analysis = {
	"phoneme": phoneme,
	"position": i,
	"difficulty_score": difficulty_score,
	"difficulty_level": (
	"hard"
	if difficulty_score > 0.6
	else "medium" if difficulty_score > 0.4 else "easy"
	),
	"category": IPA_SYMBOLS_DATA.get(phoneme, {}).get(
	"category", "unknown"
	),
	"vietnamese_tip": IPA_SYMBOLS_DATA.get(phoneme, {}).get(
	"tip", f"Luyện âm {phoneme}"
	),
	"practice_tips": _get_practice_tips(phoneme),
	}
	phoneme_analysis.append(analysis)

	# Find similar words for practice
	similar_words = []
	for difficulty_level, words in SAMPLE_WORDS.items():
	for word_data in words:
	if word_data["word"] != word:
	# Check if shares difficult phonemes
	word_phonemes = g2p.text_to_phonemes(word_data["word"])[0][
	"phonemes"
	]
	shared_difficult = [
	p
	for p in phoneme_data["phonemes"]
	if p in word_phonemes and g2p.get_difficulty_score(p) > 0.5
	]
	if shared_difficult:
	similar_words.append(
	{
	"word": word_data["word"],
	"ipa": word_data["ipa"],
	"meaning": word_data["meaning"],
	"shared_sounds": shared_difficult,
	"difficulty": difficulty_level,
	}
	)
	if len(similar_words) >= 5:
	break
	if len(similar_words) >= 5:
	break

	return {
	"word": word,
	"ipa": phoneme_data["ipa"],
	"phonemes": phoneme_data["phonemes"],
	"phoneme_string": phoneme_data["phoneme_string"],
	"difficulty": word_difficulty,
	"difficulty_score": avg_difficulty,
	"phoneme_analysis": phoneme_analysis,
	"similar_words": similar_words,
	"practice_sequence": _generate_practice_sequence(phoneme_analysis),
	"common_mistakes": _get_common_mistakes(phoneme_data["phonemes"]),
	}

	except Exception as e:
	logger.error(f"Error analyzing word '{word}': {e}")
	raise HTTPException(status_code=500, detail=str(e))


	def _generate_practice_sequence(phoneme_analysis: List[Dict]) -> List[Dict]:
	"""Generate a practice sequence starting with easier sounds"""
	# Sort by difficulty
	sorted_phonemes = sorted(phoneme_analysis, key=lambda x: x["difficulty_score"])

	sequence = []
	for phoneme_data in sorted_phonemes:
	step = {
	"step": len(sequence) + 1,
	"phoneme": phoneme_data["phoneme"],
	"focus": "Tập trung vào âm này",
	"tip": phoneme_data["vietnamese_tip"],
	"practice_words": _get_practice_words_for_phoneme(phoneme_data["phoneme"]),
	}
	sequence.append(step)

	return sequence


	def _get_practice_words_for_phoneme(phoneme: str) -> List[str]:
	"""Get simple words containing the phoneme"""
	practice_words = {
	"θ": ["think", "three", "month", "tooth"],
	"ð": ["this", "that", "mother", "brother"],
	"v": ["very", "voice", "love", "give"],
	"r": ["red", "run", "car", "tree"],
	"w": ["we", "water", "window", "want"],
	"z": ["zoo", "zero", "buzz", "pizza"],
	"ʒ": ["measure", "pleasure", "treasure", "vision"],
	"æ": ["cat", "hat", "man", "bad"],
	"ɪ": ["sit", "big", "win", "ship"],
	"ʊ": ["put", "look", "book", "good"],
	}

	return practice_words.get(phoneme, [])


	def _get_common_mistakes(phonemes: List[str]) -> List[Dict]:
	"""Get common pronunciation mistakes for Vietnamese speakers"""
	mistakes = []

	common_mistakes_map = {
	"θ": {
	"mistake": "Phát âm thành 'f' hoặc 's'",
	"correction": "Đặt lưỡi giữa răng, thổi nhẹ",
	"examples": ["think → fink/sink (sai), think (đúng)"],
	},
	"ð": {
	"mistake": "Phát âm thành 'd' hoặc 'z'",
	"correction": "Lưỡi giữa răng + rung dây thanh",
	"examples": ["this → dis/zis (sai), this (đúng)"],
	},
	"v": {
	"mistake": "Phát âm thành 'w' hoặc 'b'",
	"correction": "Môi dưới chạm răng trên",
	"examples": ["very → wery/bery (sai), very (đúng)"],
	},
	"r": {
	"mistake": "Lăn lưỡi như tiếng Việt",
	"correction": "Cuộn lưỡi nhẹ, không chạm vòm",
	"examples": ["red → rrred (sai), red (đúng)"],
	},
	"w": {
	"mistake": "Phát âm thành 'v'",
	"correction": "Tròn môi, không dùng răng",
	"examples": ["we → ve (sai), we (đúng)"],
	},
	}

	for phoneme in phonemes:
	if phoneme in common_mistakes_map:
	mistake_info = common_mistakes_map[phoneme]
	mistakes.append(
	{
	"phoneme": phoneme,
	"common_mistake": mistake_info["mistake"],
	"correction": mistake_info["correction"],
	"examples": mistake_info["examples"],
	}
	)

	return mistakes





	@router.get("/practice-session/{lesson_id}")
	async def create_ipa_practice_session(lesson_id: str):
	"""Create a structured IPA practice session"""
	try:
	# This would typically fetch from a database
	# For now, we'll create a sample session based on lesson_id

	if lesson_id == "vowels_basic":
	session_words = [
	{
	"word": "cat",
	"ipa": "/kæt/",
	"focus_phonemes": ["æ"],
	"mapping": map_word_to_phonemes("cat", "/kæt/"),
	},
	{
	"word": "bed",
	"ipa": "/bɛd/",
	"focus_phonemes": ["ɛ"],
	"mapping": map_word_to_phonemes("bed", "/bɛd/"),
	},
	{
	"word": "see",
	"ipa": "/si/",
	"focus_phonemes": ["i"],
	"mapping": map_word_to_phonemes("see", "/si/"),
	},
	{
	"word": "cup",
	"ipa": "/kʌp/",
	"focus_phonemes": ["ʌ"],
	"mapping": map_word_to_phonemes("cup", "/kʌp/"),
	},
	{
	"word": "book",
	"ipa": "/bʊk/",
	"focus_phonemes": ["ʊ"],
	"mapping": map_word_to_phonemes("book", "/bʊk/"),
	},
	]
	elif lesson_id == "difficult_sounds":
	session_words = [
	{
	"word": "think",
	"ipa": "/θɪŋk/",
	"focus_phonemes": ["θ"],
	"mapping": map_word_to_phonemes("think", "/θɪŋk/"),
	},
	{
	"word": "this",
	"ipa": "/ðɪs/",
	"focus_phonemes": ["ð"],
	"mapping": map_word_to_phonemes("this", "/ðɪs/"),
	},
	{
	"word": "very",
	"ipa": "/ˈvɛri/",
	"focus_phonemes": ["v"],
	"mapping": map_word_to_phonemes("very", "/ˈvɛri/"),
	},
	{
	"word": "water",
	"ipa": "/ˈwɔtər/",
	"focus_phonemes": ["w"],
	"mapping": map_word_to_phonemes("water", "/ˈwɔtər/"),
	},
	{
	"word": "red",
	"ipa": "/rɛd/",
	"focus_phonemes": ["r"],
	"mapping": map_word_to_phonemes("red", "/rɛd/"),
	},
	]
	else:
	# Default session
	session_words = [
	{
	"word": "hello",
	"ipa": "/həˈloʊ/",
	"focus_phonemes": ["ə", "oʊ"],
	"mapping": map_word_to_phonemes("hello", "/həˈloʊ/"),
	},
	{
	"word": "world",
	"ipa": "/wɜrld/",
	"focus_phonemes": ["w", "ɜr"],
	"mapping": map_word_to_phonemes("world", "/wɜrld/"),
	},
	{
	"word": "practice",
	"ipa": "/ˈpræktɪs/",
	"focus_phonemes": ["æ", "ɪ"],
	"mapping": map_word_to_phonemes("practice", "/ˈpræktɪs/"),
	},
	]

	return {
	"session_id": lesson_id,
	"title": f"IPA Practice Session: {lesson_id.replace('_', ' ').title()}",
	"words": session_words,
	"estimated_time": len(session_words) * 3, # 3 minutes per word
	"instructions": [
	"Nghe mẫu từng từ carefully",
	"Tập trung vào âm vị được highlight",
	"Ghi âm nhiều lần cho đến khi đạt điểm tốt",
	"Đọc feedback để cải thiện",
	],
	}

	except Exception as e:
	logger.error(f"Error creating practice session: {e}")
	raise HTTPException(status_code=500, detail=str(e))