Spaces:
Running
on
Zero
Running
on
Zero
add logo
Browse files- app.py +7 -2
- diffrhythm/infer/infer_utils.py +6 -14
app.py
CHANGED
|
@@ -139,6 +139,11 @@ css = """
|
|
| 139 |
|
| 140 |
with gr.Blocks(css=css) as demo:
|
| 141 |
gr.HTML(f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
<div style="flex: 1; text-align: center;">
|
| 143 |
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
|
| 144 |
Di♪♪Rhythm (谛韵)
|
|
@@ -194,7 +199,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 194 |
- Shorter clips may lead to incoherent generation
|
| 195 |
|
| 196 |
4. **Supported Languages**
|
| 197 |
-
- Chinese and English
|
| 198 |
- More languages comming soon
|
| 199 |
""")
|
| 200 |
|
|
@@ -255,7 +260,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 255 |
)
|
| 256 |
|
| 257 |
# page 2
|
| 258 |
-
with gr.Tab("
|
| 259 |
with gr.Row():
|
| 260 |
with gr.Column():
|
| 261 |
with gr.Accordion("Notice", open=False):
|
|
|
|
| 139 |
|
| 140 |
with gr.Blocks(css=css) as demo:
|
| 141 |
gr.HTML(f"""
|
| 142 |
+
<div style="display: flex; align-items: center;">
|
| 143 |
+
<img src='https://raw.githubusercontent.com/ASLP-lab/DiffRhythm/refs/heads/main/src/DiffRhythm_logo.jpg'
|
| 144 |
+
style='width: 200px; height: 40%; display: block; margin: 0 auto 20px;'>
|
| 145 |
+
</div>
|
| 146 |
+
|
| 147 |
<div style="flex: 1; text-align: center;">
|
| 148 |
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
|
| 149 |
Di♪♪Rhythm (谛韵)
|
|
|
|
| 199 |
- Shorter clips may lead to incoherent generation
|
| 200 |
|
| 201 |
4. **Supported Languages**
|
| 202 |
+
- **Chinese and English**
|
| 203 |
- More languages comming soon
|
| 204 |
""")
|
| 205 |
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
# page 2
|
| 263 |
+
with gr.Tab("Lyrics Generate", id=1):
|
| 264 |
with gr.Row():
|
| 265 |
with gr.Column():
|
| 266 |
with gr.Accordion("Notice", open=False):
|
diffrhythm/infer/infer_utils.py
CHANGED
|
@@ -53,25 +53,17 @@ def get_negative_style_prompt(device):
|
|
| 53 |
|
| 54 |
def get_style_prompt(model, wav_path):
|
| 55 |
mulan = model
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
if ext == '.mp3':
|
| 59 |
-
meta = MP3(wav_path)
|
| 60 |
-
audio_len = meta.info.length
|
| 61 |
-
elif ext in ['.wav', '.flac']:
|
| 62 |
-
audio_len = librosa.get_duration(path=wav_path)
|
| 63 |
-
else:
|
| 64 |
-
raise ValueError("Unsupported file format: {}".format(ext))
|
| 65 |
|
| 66 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
| 67 |
|
| 68 |
-
if audio_len
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
wav, _ = librosa.load(wav_path, sr=24000, offset=start_time, duration=10)
|
| 72 |
|
| 73 |
else:
|
| 74 |
-
wav
|
| 75 |
wav = torch.tensor(wav).unsqueeze(0).to(model.device)
|
| 76 |
|
| 77 |
with torch.no_grad():
|
|
|
|
| 53 |
|
| 54 |
def get_style_prompt(model, wav_path):
|
| 55 |
mulan = model
|
| 56 |
+
audio, _ = librosa.load(wav_path, sr=24000)
|
| 57 |
+
audio_len = librosa.get_duration(y=audio, sr=24000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
| 60 |
|
| 61 |
+
if audio_len > 10:
|
| 62 |
+
start_time = int(audio_len // 2 - 5)
|
| 63 |
+
wav = audio[start_time*24000:(start_time+10)*24000]
|
|
|
|
| 64 |
|
| 65 |
else:
|
| 66 |
+
wav = audio
|
| 67 |
wav = torch.tensor(wav).unsqueeze(0).to(model.device)
|
| 68 |
|
| 69 |
with torch.no_grad():
|