DiffRhythm

Running on Zero

ing0 commited on Mar 6

Commit

b311ba6

1 Parent(s): 1866ab0

add logo

Files changed (2) hide show

app.py CHANGED Viewed

@@ -139,6 +139,11 @@ css = """
 with gr.Blocks(css=css) as demo:
     gr.HTML(f"""
             <div style="flex: 1; text-align: center;">
                 <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
                     Di♪♪Rhythm (谛韵)
@@ -194,7 +199,7 @@ with gr.Blocks(css=css) as demo:
                         - Shorter clips may lead to incoherent generation
                         4. **Supported Languages**
-                        - Chinese and English
                         - More languages comming soon
                         """)
@@ -255,7 +260,7 @@ with gr.Blocks(css=css) as demo:
             )
         # page 2
-        with gr.Tab("LLM Generate LRC", id=1):
             with gr.Row():
                 with gr.Column():
                     with gr.Accordion("Notice", open=False):

 with gr.Blocks(css=css) as demo:
     gr.HTML(f"""
+            <div style="display: flex; align-items: center;">
+                <img src='https://raw.githubusercontent.com/ASLP-lab/DiffRhythm/refs/heads/main/src/DiffRhythm_logo.jpg'
+                    style='width: 200px; height: 40%; display: block; margin: 0 auto 20px;'>
+            </div>
             <div style="flex: 1; text-align: center;">
                 <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
                     Di♪♪Rhythm (谛韵)
                         - Shorter clips may lead to incoherent generation
                         4. **Supported Languages**
+                        - **Chinese and English**
                         - More languages comming soon
                         """)
             )
         # page 2
+        with gr.Tab("Lyrics Generate", id=1):
             with gr.Row():
                 with gr.Column():
                     with gr.Accordion("Notice", open=False):

diffrhythm/infer/infer_utils.py CHANGED Viewed

@@ -53,25 +53,17 @@ def get_negative_style_prompt(device):
 def get_style_prompt(model, wav_path):
     mulan = model
-    ext = os.path.splitext(wav_path)[-1].lower()
-    if ext == '.mp3':
-        meta = MP3(wav_path)
-        audio_len = meta.info.length
-    elif ext in ['.wav', '.flac']:
-        audio_len = librosa.get_duration(path=wav_path)
-    else:
-        raise ValueError("Unsupported file format: {}".format(ext))
     assert audio_len >= 1, "Input audio length shorter than 1 second"
-    if audio_len >= 10:
-        mid_time = audio_len // 2
-        start_time = mid_time - 5
-        wav, _ = librosa.load(wav_path, sr=24000, offset=start_time, duration=10)
     else:
-        wav, _ = librosa.load(wav_path, sr=24000)
     wav = torch.tensor(wav).unsqueeze(0).to(model.device)
     with torch.no_grad():

 def get_style_prompt(model, wav_path):
     mulan = model
+    audio, _ = librosa.load(wav_path, sr=24000)
+    audio_len = librosa.get_duration(y=audio, sr=24000)
     assert audio_len >= 1, "Input audio length shorter than 1 second"
+    if audio_len > 10:
+        start_time = int(audio_len // 2 - 5)
+        wav = audio[start_time*24000:(start_time+10)*24000]
     else:
+        wav = audio
     wav = torch.tensor(wav).unsqueeze(0).to(model.device)
     with torch.no_grad():