Ravi-9's picture
Update inference.py
d7a8830 verified
raw
history blame
1.49 kB
from TTS.api import TTS
import torch
import json
# Path to your local config and checkpoint
config_path = "./config.json"
checkpoint_path = "./pytorch_model.pth"
# Load model from checkpoint
gpu = torch.cuda.is_available()
tts = TTS(config_path=config_path, model_path=checkpoint_path, progress_bar=False, gpu=gpu)
# model_id = "bangla-speech-processing/bangla_tts_female" # local model path or Hugging Face ID
# tts = TTS(model_name=model_id, progress_bar=False, gpu=False)
# Detect if GPU is available and initialize TTS
# gpu = torch.cuda.is_available()
# tts = TTS(model_id, progress_bar=False, gpu=gpu)
def run_tts(text, output_path="output.wav"):
"""Convert text to speech and save as file"""
tts.tts_to_file(text=text, file_path=output_path)
return output_path
# def text_to_speech(text):
# output_path = "output.wav"
# tts.tts_to_file(text=text, file_path=output_path)
# return output_path
# Gradio app
# demo = gr.Interface(
# fn=text_to_speech,
# inputs="text",
# outputs="audio",
# title="Bangla Text to Speech",
# description="Enter Bangla text and get speech output"
# )
# if __name__ == "__main__":
# demo.launch()
# tts --model_path bangla_tts_female/pytorch_model.pth \
# --config_path bangla_tts_female/config.json \
# --text "আমি বাংলাদেশ থেকে এসেছি।" \
# --out_path baseline.wav
# from IPython.display import Audio
# Audio("baseline.wav")