Ravi-9's picture
Update evaluate.py
f488928 verified
raw
history blame
1.11 kB
from metrics import (
calculate_msd,
calculate_f0_correlation,
calculate_phoneme_accuracy,
calculate_spectral_convergence
)
from inference import run_tts
def evaluate_bd_tts(model, test_dataset):
metrics = {}
pred_audio, target_audio = [], []
for text, target in test_dataset:
pred = run_tts(text)
pred_audio.append(pred)
target_audio.append(target)
metrics['mel_spectral_distance'] = calculate_msd(pred_audio, target_audio)
metrics['f0_correlation'] = calculate_f0_correlation(pred_audio, target_audio)
metrics['phoneme_accuracy'] = calculate_phoneme_accuracy(pred_audio, target_audio)
metrics['spectral_convergence'] = calculate_spectral_convergence(pred_audio, target_audio)
# Accent classifier is usually a pretrained model
# Placeholder: you’d plug in your Bangla accent classifier here
metrics['accent_score'] = 0.85
return metrics
if __name__ == "__main__":
test_dataset = [("আমি বাংলা বলি।", "reference.wav")] # dummy dataset
print(evaluate_bd_tts(None, test_dataset))