johnwang2026 commited on
Commit
f9f1879
·
verified ·
1 Parent(s): 6dbf71c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -22
app.py CHANGED
@@ -1,47 +1,57 @@
1
  import gradio as gr
2
- from transformers import AutoModel, AutoTokenizer
3
  import soundfile as sf
4
  import torch
5
  import os
6
 
7
- # 加载模型和Tokenizer(修复参数+移除device_map
8
- model_name = "Soul-AILab/SoulX-Podcast-1.7B"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModel.from_pretrained(
11
- model_name,
12
- dtype=torch.float16, # 替换 deprecated 的 torch_dtype
13
- # 移除 device_map="auto",改用手动分配设备(兼容无accelerate环境)
14
- )
15
- # 手动将模型移到GPU(无GPU自动用CPU)
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
- model = model.to(device)
18
 
19
- # 语音生成函数(补充设备适配)
 
 
 
 
 
 
 
 
20
  def generate_speech(text):
21
  if not text.strip():
22
  return None, "错误:请输入有效文本!"
23
 
24
- # 文本编码并移到对应设备
25
- inputs = tokenizer(text, return_tensors="pt").to(device)
26
 
 
 
 
 
 
 
 
 
 
 
 
27
  with torch.no_grad():
28
- audio_output = model.generate(**inputs)
29
 
30
  # 保存音频
31
  output_path = "output.wav"
32
- sf.write(output_path, audio_output[0].cpu().numpy(), samplerate=24000)
33
 
34
- return output_path, "语音生成成功!"
35
 
36
- # 界面部分不变
37
- with gr.Blocks(title="SoulX-Podcast-1.7B 中英双语TTS") as demo:
38
- gr.Markdown("# 🎤 SoulX-Podcast-1.7B 文本转语音")
39
- gr.Markdown("支持中英双语输入,生成自然流畅的语音(采样率24000Hz)")
40
 
41
  with gr.Row():
42
  text_input = gr.Textbox(
43
  label="输入文本",
44
- placeholder="请输入要转换的文本(建议≤200字),支持中英双语...",
45
  lines=5
46
  )
47
  audio_output = gr.Audio(label="生成的语音", type="filepath")
 
1
  import gradio as gr
2
+ from transformers import AutoModelForTextToSpeech, AutoTokenizer, pipeline
3
  import soundfile as sf
4
  import torch
5
  import os
6
 
7
+ # 初始化中英双语TTS管道(轻量模型,总体积<5GB
 
 
 
 
 
 
 
 
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
9
 
10
+ # 英文TTS(fastspeech2,体积~2GB)
11
+ en_tokenizer = AutoTokenizer.from_pretrained("facebook/fastspeech2-en-ljspeech")
12
+ en_model = AutoModelForTextToSpeech.from_pretrained("facebook/fastspeech2-en-ljspeech").to(device)
13
+
14
+ # 中文TTS(Chinese-FastSpeech2,体积~3GB)
15
+ zh_tokenizer = AutoTokenizer.from_pretrained("bakerk1234/Chinese-FastSpeech2")
16
+ zh_model = AutoModelForTextToSpeech.from_pretrained("bakerk1234/Chinese-FastSpeech2").to(device)
17
+
18
+ # 语音生成函数(自动识别语言,切换模型)
19
  def generate_speech(text):
20
  if not text.strip():
21
  return None, "错误:请输入有效文本!"
22
 
23
+ # 简单语言识别(中文含中文字符,英文不含)
24
+ is_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
25
 
26
+ if is_chinese:
27
+ tokenizer = zh_tokenizer
28
+ model = zh_model
29
+ samplerate = 22050 # 中文模型采样率
30
+ else:
31
+ tokenizer = en_tokenizer
32
+ model = en_model
33
+ samplerate = 22050 # 英文模型采样率
34
+
35
+ # 文本编码+生成语音
36
+ inputs = tokenizer(text, return_tensors="pt").to(device)
37
  with torch.no_grad():
38
+ audio_output = model.generate(**inputs).cpu().numpy()
39
 
40
  # 保存音频
41
  output_path = "output.wav"
42
+ sf.write(output_path, audio_output[0].T, samplerate=samplerate) # 调整维度适配保存
43
 
44
+ return output_path, f"语音生成成功!(使用{'中文' if is_chinese else '英文'}轻量模型)"
45
 
46
+ # 界面保持不变
47
+ with gr.Blocks(title="中英双语TTS(轻量版)") as demo:
48
+ gr.Markdown("# 🎤 轻量中英双语文本转语音")
49
+ gr.Markdown("基于FastSpeech2模型,体积小(<5GB),适配免费Space,支持中英双语输入")
50
 
51
  with gr.Row():
52
  text_input = gr.Textbox(
53
  label="输入文本",
54
+ placeholder="请输入中文或英文文本(建议≤300字)...",
55
  lines=5
56
  )
57
  audio_output = gr.Audio(label="生成的语音", type="filepath")