OmniAICreator commited on
Commit
8340c5c
·
verified ·
1 Parent(s): 2aba824

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -8,9 +8,9 @@ import spaces # For ZeroGPU
8
  from xcodec2.modeling_xcodec2 import XCodec2Model
9
 
10
  # ====== Settings ======
11
- BASE_REPO = os.getenv("BASE_REPO", "NandemoGHS/Anime-XCodec2") # Baseline (pretrained)
12
- FT_REPO = os.getenv("FT_REPO", "NandemoGHS/Anime-XCodec2-44.1kHz") # Fine-tuned (yours)
13
- TARGET_SR = 16000 # XCodec2 expects 16 kHz
14
  MAX_SECONDS_DEFAULT = 30 # Default max duration (seconds)
15
 
16
  def _ensure_models():
@@ -113,12 +113,13 @@ def run(audio_path, max_seconds):
113
  recon_ft = _reconstruct(ft, waveform, device)
114
 
115
  # Gradio Audio expects (sample_rate, np.ndarray)
 
116
  return (sr, recon_base), (44100, recon_ft)
117
 
118
  # ====== UI ======
119
  DESCRIPTION = """
120
- # Anime‑XCodec2 / XCodec2 Reconstruction Demo
121
- Compare **Baseline (HKUSTAudio/xcodec2)** and **Fine‑tuned (NandemoGHS/Anime‑XCodec2)** reconstructions side by side.
122
 
123
  - Supported inputs: wav / flac / ogg / mp3
124
  - Input is automatically converted to **16 kHz** (as required by XCodec2).
@@ -143,15 +144,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden}") as dem
143
  )
144
  run_btn = gr.Button("Run", variant="primary")
145
  gr.Markdown(
146
- f"**Baseline model**: `{BASE_REPO}` \n"
147
- f"**Fine‑tuned model**: `{FT_REPO}` \n"
148
  f"**Inference device**: auto (GPU on ZeroGPU)"
149
  )
150
 
151
  with gr.Column(scale=1):
152
  with gr.Row():
153
  out_base = gr.Audio(
154
- label="Baseline reconstruction (NandemoGHS/Anime‑XCodec2)",
155
  show_download_button=True, format="wav"
156
  )
157
  out_ft = gr.Audio(
 
8
  from xcodec2.modeling_xcodec2 import XCodec2Model
9
 
10
  # ====== Settings ======
11
+ BASE_REPO = os.getenv("BASE_REPO", "NandemoGHS/Anime-XCodec2") # Baseline (16 kHz)
12
+ FT_REPO = os.getenv("FT_REPO", "NandemoGHS/Anime-XCodec2-44.1kHz") # Fine-tuned (44.1 kHz)
13
+ TARGET_SR = 16000 # XCodec2 expects 16 kHz input
14
  MAX_SECONDS_DEFAULT = 30 # Default max duration (seconds)
15
 
16
  def _ensure_models():
 
113
  recon_ft = _reconstruct(ft, waveform, device)
114
 
115
  # Gradio Audio expects (sample_rate, np.ndarray)
116
+ # 44.1 kHz version returns 44.1kHz sr
117
  return (sr, recon_base), (44100, recon_ft)
118
 
119
  # ====== UI ======
120
  DESCRIPTION = """
121
+ # Anime‑XCodec2-44.1kHz Reconstruction Demo
122
+ Compare **16 kHz (NandemoGHS/Anime‑XCodec2)** and **44.1 kHz (NandemoGHS/Anime‑XCodec2-44.1kHz)** reconstructions side by side.
123
 
124
  - Supported inputs: wav / flac / ogg / mp3
125
  - Input is automatically converted to **16 kHz** (as required by XCodec2).
 
144
  )
145
  run_btn = gr.Button("Run", variant="primary")
146
  gr.Markdown(
147
+ f"**16 kHz model**: `{BASE_REPO}` \n"
148
+ f"**44.1 kHz model**: `{FT_REPO}` \n"
149
  f"**Inference device**: auto (GPU on ZeroGPU)"
150
  )
151
 
152
  with gr.Column(scale=1):
153
  with gr.Row():
154
  out_base = gr.Audio(
155
+ label="16 kHz reconstruction (NandemoGHS/Anime‑XCodec2)",
156
  show_download_button=True, format="wav"
157
  )
158
  out_ft = gr.Audio(