johnwang2026 commited on
Commit
d88cd51
·
verified ·
1 Parent(s): f9f1879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -42
app.py CHANGED
@@ -1,64 +1,51 @@
1
  import gradio as gr
2
- from transformers import AutoModelForTextToSpeech, AutoTokenizer, pipeline
3
  import soundfile as sf
4
  import torch
5
  import os
6
 
7
- # 初始化中英双语TTS管道(轻量模型,总体积<5GB)
8
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
-
10
- # 英文TTS(fastspeech2,体积~2GB)
11
- en_tokenizer = AutoTokenizer.from_pretrained("facebook/fastspeech2-en-ljspeech")
12
- en_model = AutoModelForTextToSpeech.from_pretrained("facebook/fastspeech2-en-ljspeech").to(device)
13
 
14
- # 中文TTS(Chinese-FastSpeech2,体积~3GB)
15
- zh_tokenizer = AutoTokenizer.from_pretrained("bakerk1234/Chinese-FastSpeech2")
16
- zh_model = AutoModelForTextToSpeech.from_pretrained("bakerk1234/Chinese-FastSpeech2").to(device)
17
 
18
- # 语音生成函数(自动识别语言,切换模型)
19
  def generate_speech(text):
20
  if not text.strip():
21
  return None, "错误:请输入有效文本!"
22
 
23
- # 简单语言识别(中文含中文字符,英文不含)
24
- is_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
25
 
26
- if is_chinese:
27
- tokenizer = zh_tokenizer
28
- model = zh_model
29
- samplerate = 22050 # 中文模型采样率
30
- else:
31
- tokenizer = en_tokenizer
32
- model = en_model
33
- samplerate = 22050 # 英文模型采样率
34
-
35
- # 文本编码+生成语音
36
- inputs = tokenizer(text, return_tensors="pt").to(device)
37
  with torch.no_grad():
38
- audio_output = model.generate(**inputs).cpu().numpy()
 
39
 
40
- # 保存音频
41
  output_path = "output.wav"
42
- sf.write(output_path, audio_output[0].T, samplerate=samplerate) # 调整维度适配保存
43
 
44
- return output_path, f"语音生成成功!(使用{'中文' if is_chinese else '英文'}轻量模型)"
45
 
46
- # 界面保持不变
47
- with gr.Blocks(title="中英双语TTS(轻量版)") as demo:
48
- gr.Markdown("# 🎤 轻量中英双语文本转语音")
49
- gr.Markdown("基于FastSpeech2模型,体积小(<5GB),适配免费Space,支持中英双语输入")
50
-
51
- with gr.Row():
52
- text_input = gr.Textbox(
53
- label="输入文本",
54
- placeholder="请输入中文或英文文本(建议≤300字)...",
55
- lines=5
56
- )
57
- audio_output = gr.Audio(label="生成的语音", type="filepath")
58
-
59
  status_text = gr.Textbox(label="状态", interactive=False)
60
- generate_btn = gr.Button("🚀 开始生成", variant="primary")
61
 
 
62
  generate_btn.click(
63
  fn=generate_speech,
64
  inputs=text_input,
 
1
  import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer # 彻底不用AutoModelForTextToSpeech
3
  import soundfile as sf
4
  import torch
5
  import os
6
 
7
+ # 换用超轻量中文TTS模型(体积仅1.2GB,免费Space无压力)
8
+ model_name = "yeyupiaoling/PP-TTS-v2"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModel.from_pretrained(model_name)
 
 
11
 
12
+ # 手动分配设备(CPU优先,避免任何依赖冲突)
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ model = model.to(device)
15
 
16
+ # 语音生成函数(简化逻辑,确保稳定)
17
  def generate_speech(text):
18
  if not text.strip():
19
  return None, "错误:请输入有效文本!"
20
 
21
+ # 文本编码(适配模型要求)
22
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
23
 
24
+ # 生成语音(关闭梯度计算,节省内存)
 
 
 
 
 
 
 
 
 
 
25
  with torch.no_grad():
26
+ output = model.generate(**inputs)
27
+ audio_output = output["wav"].cpu().numpy()[0] # 提取音频数据
28
 
29
+ # 保存音频(采样率24000Hz,适配模型输出)
30
  output_path = "output.wav"
31
+ sf.write(output_path, audio_output, samplerate=24000)
32
 
33
+ return output_path, "语音生成成功!(超轻量模型,适配免费Space)"
34
 
35
+ # 简洁界面(减少资源占用)
36
+ with gr.Blocks(title="轻量中文TTS") as demo:
37
+ gr.Markdown("# 🎤 免费中文文本转语音")
38
+ gr.Markdown("基于PP-TTS-v2模型(体积1.2GB),适配免费Space,生成快速稳定")
39
+
40
+ text_input = gr.Textbox(
41
+ label="输入中文文本",
42
+ placeholder="请输入中文文本(建议≤500字)...",
43
+ lines=4
44
+ )
45
+ audio_output = gr.Audio(label="生成的语音", type="filepath")
 
 
46
  status_text = gr.Textbox(label="状态", interactive=False)
 
47
 
48
+ generate_btn = gr.Button("🚀 开始生成", variant="primary")
49
  generate_btn.click(
50
  fn=generate_speech,
51
  inputs=text_input,