Spaces:

johnwang2026
/

voice

Running

App Files Files Community

johnwang2026 commited on about 1 month ago

Commit

d88cd51

verified ·

1 Parent(s): f9f1879

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -42

app.py CHANGED Viewed

@@ -1,64 +1,51 @@
 import gradio as gr
-from transformers import AutoModelForTextToSpeech, AutoTokenizer, pipeline
 import soundfile as sf
 import torch
 import os
-# 初始化中英双语TTS管道（轻量模型，总体积<5GB）
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# 英文TTS（fastspeech2，体积~2GB）
-en_tokenizer = AutoTokenizer.from_pretrained("facebook/fastspeech2-en-ljspeech")
-en_model = AutoModelForTextToSpeech.from_pretrained("facebook/fastspeech2-en-ljspeech").to(device)
-# 中文TTS（Chinese-FastSpeech2，体积~3GB）
-zh_tokenizer = AutoTokenizer.from_pretrained("bakerk1234/Chinese-FastSpeech2")
-zh_model = AutoModelForTextToSpeech.from_pretrained("bakerk1234/Chinese-FastSpeech2").to(device)
-# 语音生成函数（自动识别语言，切换模型）
 def generate_speech(text):
     if not text.strip():
         return None, "错误：请输入有效文本！"
-    # 简单语言识别（中文含中文字符，英文不含）
-    is_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
-    if is_chinese:
-        tokenizer = zh_tokenizer
-        model = zh_model
-        samplerate = 22050  # 中文模型采样率
-    else:
-        tokenizer = en_tokenizer
-        model = en_model
-        samplerate = 22050  # 英文模型采样率
-    # 文本编码+生成语音
-    inputs = tokenizer(text, return_tensors="pt").to(device)
     with torch.no_grad():
-        audio_output = model.generate(**inputs).cpu().numpy()
-    # 保存音频
     output_path = "output.wav"
-    sf.write(output_path, audio_output[0].T, samplerate=samplerate)  # 调整维度适配保存
-    return output_path, f"语音生成成功！（使用{'中文' if is_chinese else '英文'}轻量模型）"
-# 界面保持不变
-with gr.Blocks(title="中英双语TTS（轻量版）") as demo:
-    gr.Markdown("# 🎤 轻量中英双语文本转语音")
-    gr.Markdown("基于FastSpeech2模型，体积小（<5GB），适配免费Space，支持中英双语输入")
-    with gr.Row():
-        text_input = gr.Textbox(
-            label="输入文本",
-            placeholder="请输入中文或英文文本（建议≤300字）...",
-            lines=5
-        )
-        audio_output = gr.Audio(label="生成的语音", type="filepath")
     status_text = gr.Textbox(label="状态", interactive=False)
-    generate_btn = gr.Button("🚀 开始生成", variant="primary")
     generate_btn.click(
         fn=generate_speech,
         inputs=text_input,

 import gradio as gr
+from transformers import AutoModel, AutoTokenizer  # 彻底不用AutoModelForTextToSpeech
 import soundfile as sf
 import torch
 import os
+# 换用超轻量中文TTS模型（体积仅1.2GB，免费Space无压力）
+model_name = "yeyupiaoling/PP-TTS-v2"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
+# 手动分配设备（CPU优先，避免任何依赖冲突）
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+# 语音生成函数（简化逻辑，确保稳定）
 def generate_speech(text):
     if not text.strip():
         return None, "错误：请输入有效文本！"
+    # 文本编码（适配模型要求）
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
+    # 生成语音（关闭梯度计算，节省内存）
     with torch.no_grad():
+        output = model.generate(**inputs)
+        audio_output = output["wav"].cpu().numpy()[0]  # 提取音频数据
+    # 保存音频（采样率24000Hz，适配模型输出）
     output_path = "output.wav"
+    sf.write(output_path, audio_output, samplerate=24000)
+    return output_path, "语音生成成功！（超轻量模型，适配免费Space）"
+# 简洁界面（减少资源占用）
+with gr.Blocks(title="轻量中文TTS") as demo:
+    gr.Markdown("# 🎤 免费中文文本转语音")
+    gr.Markdown("基于PP-TTS-v2模型（体积1.2GB），适配免费Space，生成快速稳定")
+    text_input = gr.Textbox(
+        label="输入中文文本",
+        placeholder="请输入中文文本（建议≤500字）...",
+        lines=4
+    )
+    audio_output = gr.Audio(label="生成的语音", type="filepath")
     status_text = gr.Textbox(label="状态", interactive=False)
+    generate_btn = gr.Button("🚀 开始生成", variant="primary")
     generate_btn.click(
         fn=generate_speech,
         inputs=text_input,