Spaces:
Paused
Paused
| import os | |
| import noisereduce as nr | |
| import soundfile as sf | |
| # from moviepy.editor import * | |
| import string | |
| import json | |
| from glob import glob | |
| import torchaudio | |
| import subprocess | |
| import shutil | |
| import pyloudnorm as pyln | |
| import torch | |
| from TTS.api import TTS | |
| import string | |
| def remove_punctuation(sentence): | |
| translator = str.maketrans('', '', string.punctuation) | |
| sentence = sentence.translate(translator) | |
| # Remove line breaks | |
| sentence = sentence.replace('\n', ' ').replace('\r', '') | |
| return sentence | |
| def run_audio_generation_v1(new_text,accent='None'): | |
| new_text = new_text.replace('\n', ' ').replace('\r', '') | |
| new_text_mod = remove_punctuation(new_text) | |
| new_text_split = new_text_mod.split() | |
| for word in new_text_split: | |
| if len(word)>=2 and word.isupper(): | |
| new_text = new_text.replace(word, " ".join([*word])) | |
| models = TTS().list_models() | |
| with open('models.txt', 'w') as f: | |
| f.writelines(f"{model}\n" for model in models) | |
| gpu = True if torch.cuda.is_available() else False | |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda) | |
| # if not gpu: | |
| # pre-process story audio file | |
| # convert to 16 bit mono | |
| # remove noise | |
| speaker_wav_data, speaker_wav_rate = sf.read("./tmp/audio/input_src/0.wav") | |
| speaker_wav_data_no_noise = nr.reduce_noise(y=speaker_wav_data, sr=speaker_wav_rate) | |
| sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16') | |
| tts.tts_to_file( | |
| new_text, | |
| speaker_wav="./tmp/audio/speaker_wav.wav", | |
| language="en", | |
| file_path="./tmp/audio/generated-custom.wav" | |
| ) |