Change phonemizer to proprietary one
Browse files- .gitignore +11 -1
- app.py +27 -6
- packages.txt +0 -3
.gitignore
CHANGED
|
@@ -157,4 +157,14 @@ cython_debug/
|
|
| 157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 160 |
-
#.idea/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 160 |
+
#.idea/
|
| 161 |
+
|
| 162 |
+
deletion_token.txt
|
| 163 |
+
|
| 164 |
+
.vscode
|
| 165 |
+
|
| 166 |
+
en_us
|
| 167 |
+
input.txt
|
| 168 |
+
input.xml
|
| 169 |
+
phn.zip
|
| 170 |
+
xml_nlp
|
app.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
from espnet2.bin.tts_inference import Text2Speech
|
| 2 |
import torch
|
| 3 |
from parallel_wavegan.utils import download_pretrained_model, load_model
|
| 4 |
-
from phonemizer import phonemize
|
| 5 |
-
from phonemizer.separator import Separator
|
| 6 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
s = Separator(word=None, phone=" ")
|
| 9 |
config_path = "config.yaml"
|
| 10 |
model_path = "model.pth"
|
| 11 |
|
|
@@ -14,6 +14,13 @@ vocoder_tag = "ljspeech_parallel_wavegan.v3"
|
|
| 14 |
vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
|
| 15 |
vocoder.remove_weight_norm()
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
global_styles = {
|
| 18 |
"Style 1": torch.load("style1.pt"),
|
| 19 |
"Style 2": torch.load("style2.pt"),
|
|
@@ -24,6 +31,22 @@ global_styles = {
|
|
| 24 |
}
|
| 25 |
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
|
| 28 |
with torch.no_grad():
|
| 29 |
text2speech = Text2Speech(
|
|
@@ -44,9 +67,7 @@ def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
|
|
| 44 |
|
| 45 |
style_emb = torch.flatten(global_styles[global_style])
|
| 46 |
|
| 47 |
-
phoneme_string = phonemize(
|
| 48 |
-
text, language="mb-us1", backend="espeak-mbrola", separator=s
|
| 49 |
-
)
|
| 50 |
phonemes = phoneme_string.split(" ")
|
| 51 |
|
| 52 |
max_edit_index = -1
|
|
|
|
| 1 |
from espnet2.bin.tts_inference import Text2Speech
|
| 2 |
import torch
|
| 3 |
from parallel_wavegan.utils import download_pretrained_model, load_model
|
|
|
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
+
import os
|
| 6 |
+
import subprocess
|
| 7 |
+
from zipfile import ZipFile
|
| 8 |
|
|
|
|
| 9 |
config_path = "config.yaml"
|
| 10 |
model_path = "model.pth"
|
| 11 |
|
|
|
|
| 14 |
vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
|
| 15 |
vocoder.remove_weight_norm()
|
| 16 |
|
| 17 |
+
url = os.environ.get("PHN_URL")
|
| 18 |
+
subprocess.call(["wget", url, "-q"])
|
| 19 |
+
|
| 20 |
+
with ZipFile("phn.zip", "r") as zip_ref:
|
| 21 |
+
zip_ref.extractall()
|
| 22 |
+
subprocess.call(["chmod", "+x", "xml_nlp"])
|
| 23 |
+
|
| 24 |
global_styles = {
|
| 25 |
"Style 1": torch.load("style1.pt"),
|
| 26 |
"Style 2": torch.load("style2.pt"),
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
|
| 34 |
+
def phonemize(text):
|
| 35 |
+
with open("input.txt", "w+") as f:
|
| 36 |
+
f.write(text)
|
| 37 |
+
|
| 38 |
+
with open("input.xml", "w") as f:
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
subprocess.call(["./xml_nlp", "input", "180", "en_us/enu.ini", "en_us"])
|
| 42 |
+
|
| 43 |
+
phoneme_string = ""
|
| 44 |
+
with open("input.xml", "r") as f:
|
| 45 |
+
for line in f.readlines():
|
| 46 |
+
phoneme_string += line.split("[")[-1][:-2]
|
| 47 |
+
return phoneme_string
|
| 48 |
+
|
| 49 |
+
|
| 50 |
def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
|
| 51 |
with torch.no_grad():
|
| 52 |
text2speech = Text2Speech(
|
|
|
|
| 67 |
|
| 68 |
style_emb = torch.flatten(global_styles[global_style])
|
| 69 |
|
| 70 |
+
phoneme_string = phonemize(text)
|
|
|
|
|
|
|
| 71 |
phonemes = phoneme_string.split(" ")
|
| 72 |
|
| 73 |
max_edit_index = -1
|
packages.txt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
mbrola
|
| 2 |
-
mbrola-us1
|
| 3 |
-
espeak-ng
|
|
|
|
|
|
|
|
|
|
|
|