Spaces:

tobiasc
/

conex

Build error

tobiasc commited on Jun 21, 2022

Commit

a6f6513

1 Parent(s): 7881a87

Change phonemizer to proprietary one

Files changed (3) hide show

.gitignore CHANGED Viewed

@@ -157,4 +157,14 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/

 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+deletion_token.txt
+.vscode
+en_us
+input.txt
+input.xml
+phn.zip
+xml_nlp

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from espnet2.bin.tts_inference import Text2Speech
 import torch
 from parallel_wavegan.utils import download_pretrained_model, load_model
-from phonemizer import phonemize
-from phonemizer.separator import Separator
 import gradio as gr
-s = Separator(word=None, phone=" ")
 config_path = "config.yaml"
 model_path = "model.pth"
@@ -14,6 +14,13 @@ vocoder_tag = "ljspeech_parallel_wavegan.v3"
 vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
 vocoder.remove_weight_norm()
 global_styles = {
     "Style 1": torch.load("style1.pt"),
     "Style 2": torch.load("style2.pt"),
@@ -24,6 +31,22 @@ global_styles = {
 }
 def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
     with torch.no_grad():
         text2speech = Text2Speech(
@@ -44,9 +67,7 @@ def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
         style_emb = torch.flatten(global_styles[global_style])
-        phoneme_string = phonemize(
-            text, language="mb-us1", backend="espeak-mbrola", separator=s
-        )
         phonemes = phoneme_string.split(" ")
         max_edit_index = -1

 from espnet2.bin.tts_inference import Text2Speech
 import torch
 from parallel_wavegan.utils import download_pretrained_model, load_model
 import gradio as gr
+import os
+import subprocess
+from zipfile import ZipFile
 config_path = "config.yaml"
 model_path = "model.pth"
 vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
 vocoder.remove_weight_norm()
+url = os.environ.get("PHN_URL")
+subprocess.call(["wget", url, "-q"])
+with ZipFile("phn.zip", "r") as zip_ref:
+    zip_ref.extractall()
+subprocess.call(["chmod", "+x", "xml_nlp"])
 global_styles = {
     "Style 1": torch.load("style1.pt"),
     "Style 2": torch.load("style2.pt"),
 }
+def phonemize(text):
+    with open("input.txt", "w+") as f:
+        f.write(text)
+    with open("input.xml", "w") as f:
+        pass
+    subprocess.call(["./xml_nlp", "input", "180", "en_us/enu.ini", "en_us"])
+    phoneme_string = ""
+    with open("input.xml", "r") as f:
+        for line in f.readlines():
+            phoneme_string += line.split("[")[-1][:-2]
+    return phoneme_string
 def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
     with torch.no_grad():
         text2speech = Text2Speech(
         style_emb = torch.flatten(global_styles[global_style])
+        phoneme_string = phonemize(text)
         phonemes = phoneme_string.split(" ")
         max_edit_index = -1

packages.txt DELETED Viewed

@@ -1,3 +0,0 @@
-mbrola
-mbrola-us1
-espeak-ng