Anime-XCodec2-44.1kHz-Demo

Running on Zero

App Files Files Community

OmniAICreator commited on 15 days ago

Commit

8340c5c

verified ·

1 Parent(s): 2aba824

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ import spaces  # For ZeroGPU
 from xcodec2.modeling_xcodec2 import XCodec2Model
 # ====== Settings ======
-BASE_REPO = os.getenv("BASE_REPO", "NandemoGHS/Anime-XCodec2")               # Baseline (pretrained)
-FT_REPO   = os.getenv("FT_REPO",   "NandemoGHS/Anime-XCodec2-44.1kHz")         # Fine-tuned (yours)
-TARGET_SR = 16000                                                       # XCodec2 expects 16 kHz
 MAX_SECONDS_DEFAULT = 30                                               # Default max duration (seconds)
 def _ensure_models():
@@ -113,12 +113,13 @@ def run(audio_path, max_seconds):
     recon_ft = _reconstruct(ft, waveform, device)
     # Gradio Audio expects (sample_rate, np.ndarray)
     return (sr, recon_base), (44100, recon_ft)
 # ====== UI ======
 DESCRIPTION = """
-# Anime‑XCodec2 / XCodec2 Reconstruction Demo
-Compare **Baseline (HKUSTAudio/xcodec2)** and **Fine‑tuned (NandemoGHS/Anime‑XCodec2)** reconstructions side by side.
 - Supported inputs: wav / flac / ogg / mp3
 - Input is automatically converted to **16 kHz** (as required by XCodec2).
@@ -143,15 +144,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden}") as dem
             )
             run_btn = gr.Button("Run", variant="primary")
             gr.Markdown(
-                f"**Baseline model**: `{BASE_REPO}`  \n"
-                f"**Fine‑tuned model**: `{FT_REPO}`  \n"
                 f"**Inference device**: auto (GPU on ZeroGPU)"
             )
         with gr.Column(scale=1):
             with gr.Row():
                 out_base = gr.Audio(
-                    label="Baseline reconstruction (NandemoGHS/Anime‑XCodec2)",
                     show_download_button=True, format="wav"
                 )
                 out_ft = gr.Audio(

 from xcodec2.modeling_xcodec2 import XCodec2Model
 # ====== Settings ======
+BASE_REPO = os.getenv("BASE_REPO", "NandemoGHS/Anime-XCodec2")               # Baseline (16 kHz)
+FT_REPO   = os.getenv("FT_REPO",   "NandemoGHS/Anime-XCodec2-44.1kHz")         # Fine-tuned (44.1 kHz)
+TARGET_SR = 16000                                                       # XCodec2 expects 16 kHz input
 MAX_SECONDS_DEFAULT = 30                                               # Default max duration (seconds)
 def _ensure_models():
     recon_ft = _reconstruct(ft, waveform, device)
     # Gradio Audio expects (sample_rate, np.ndarray)
+    # 44.1 kHz version returns 44.1kHz sr
     return (sr, recon_base), (44100, recon_ft)
 # ====== UI ======
 DESCRIPTION = """
+# Anime‑XCodec2-44.1kHz Reconstruction Demo
+Compare **16 kHz (NandemoGHS/Anime‑XCodec2)** and **44.1 kHz (NandemoGHS/Anime‑XCodec2-44.1kHz)** reconstructions side by side.
 - Supported inputs: wav / flac / ogg / mp3
 - Input is automatically converted to **16 kHz** (as required by XCodec2).
             )
             run_btn = gr.Button("Run", variant="primary")
             gr.Markdown(
+                f"**16 kHz model**: `{BASE_REPO}`  \n"
+                f"**44.1 kHz model**: `{FT_REPO}`  \n"
                 f"**Inference device**: auto (GPU on ZeroGPU)"
             )
         with gr.Column(scale=1):
             with gr.Row():
                 out_base = gr.Audio(
+                    label="16 kHz reconstruction (NandemoGHS/Anime‑XCodec2)",
                     show_download_button=True, format="wav"
                 )
                 out_ft = gr.Audio(