Spaces:

amaai-lab
/

SonicMaster

Running on Zero

App Files Files Community

ambujm22 commited on Aug 22

Commit

e22a58e

verified ·

1 Parent(s): 48bfead

Update app.py

Browse files

Files changed (1) hide show

app.py +226 -140

app.py CHANGED Viewed

@@ -1,203 +1,289 @@
 # ---------- MUST BE FIRST: Gradio CDN + ZeroGPU probe ----------
 import os
 os.environ.setdefault("GRADIO_USE_CDN", "true")
-# A GPU-decorated function MUST exist at import time for ZeroGPU.
-# Import spaces unconditionally and register a tiny probe.
 import spaces
 @spaces.GPU(duration=10)
-def _gpu_probe() -> str:
-    # Never called; only here so ZeroGPU startup check passes.
-    return "ok"
 # ---------- Standard imports ----------
-import sys
-import subprocess
 from pathlib import Path
-from typing import Tuple, Optional
 import gradio as gr
 import numpy as np
 import soundfile as sf
 from huggingface_hub import hf_hub_download
-# Detect ZeroGPU to decide whether to CALL the GPU function.
-USE_ZEROGPU = os.getenv("SPACE_RUNTIME", "").lower() == "zerogpu"
 SPACE_ROOT   = Path(__file__).parent.resolve()
 REPO_DIR     = SPACE_ROOT / "SonicMasterRepo"
 WEIGHTS_REPO = "amaai-lab/SonicMaster"
 WEIGHTS_FILE = "model.safetensors"
 CACHE_DIR    = SPACE_ROOT / "weights"
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
-# ---------- 1) Pull weights from HF Hub ----------
-def get_weights_path() -> Path:
-    return Path(
-        hf_hub_download(
             repo_id=WEIGHTS_REPO,
             filename=WEIGHTS_FILE,
-            local_dir=CACHE_DIR.as_posix(),
             local_dir_use_symlinks=False,
             force_download=False,
             resume_download=True,
         )
-    )
-# ---------- 2) Clone GitHub repo ----------
-def ensure_repo() -> Path:
-    if not REPO_DIR.exists():
-        subprocess.run(
-            ["git", "clone", "--depth", "1",
-             "https://github.com/AMAAI-Lab/SonicMaster",
-             REPO_DIR.as_posix()],
-            check=True,
-        )
-    if REPO_DIR.as_posix() not in sys.path:
-        sys.path.append(REPO_DIR.as_posix())
     return REPO_DIR
-# ---------- 3) Examples ----------
-def build_examples():
-    repo = ensure_repo()
-    wav_dir = repo / "samples" / "inputs"
-    wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
-    prompts = [
-        "Increase the clarity of this song by emphasizing treble frequencies.",
-        "Make this song sound more boomy by amplifying the low end bass frequencies.",
-        "Can you make this sound louder, please?",
-        "Make the audio smoother and less distorted.",
-        "Improve the balance in this song.",
-        "Disentangle the left and right channels to give this song a stereo feeling.",
-        "Correct the unnatural frequency emphasis. Reduce the roominess or echo.",
-        "Raise the level of the vocals, please.",
-        "Increase the clarity of this song by emphasizing treble frequencies.",
-        "Please, dereverb this audio.",
-    ]
-    return [[p.as_posix(), prompts[i] if i < len(prompts) else prompts[-1]]
-            for i, p in enumerate(wav_paths[:10])]
-# ---------- 4) I/O helpers ----------
 def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
-    if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
-        wav = wav.T
-    sf.write(path.as_posix(), wav, sr)
 def read_audio(path: str) -> Tuple[np.ndarray, int]:
     wav, sr = sf.read(path, always_2d=False)
-    return wav.astype(np.float32) if wav.dtype == np.float64 else wav, sr
-# ---------- 5) Core inference (subprocess calling your repo script) ----------
-def run_sonicmaster_cli(input_wav_path: Path,
-                        prompt: str,
-                        out_path: Path,
-                        _logs: list,
-                        progress: Optional[gr.Progress] = None) -> bool:
-    if progress: progress(0.15, desc="Loading weights & repo")
-    ckpt = get_weights_path()
-    repo = ensure_repo()
     py = sys.executable or "python3"
-    script_candidates = [repo / "infer_single.py"]
-    CANDIDATE_CMDS = []
-    for script in script_candidates:
-        if script.exists():
-            CANDIDATE_CMDS.append([
-                py, script.as_posix(),
-                "--ckpt", ckpt.as_posix(),
-                "--input", input_wav_path.as_posix(),
-                "--prompt", prompt,
-                "--output", out_path.as_posix(),
-            ])
-            CANDIDATE_CMDS.append([
-                py, script.as_posix(),
-                "--weights", ckpt.as_posix(),
-                "--input", input_wav_path.as_posix(),
-                "--text", prompt,
-                "--out", out_path.as_posix(),
-            ])
-    for idx, cmd in enumerate(CANDIDATE_CMDS, start=1):
-        try:
-            if progress: progress(0.35 + 0.05*idx, desc=f"Running inference (try {idx})")
-            # inherit env so CUDA_VISIBLE_DEVICES from ZeroGPU reaches subprocess
-            subprocess.run(cmd, capture_output=True, text=True, check=True, env=os.environ.copy())
-            if out_path.exists() and out_path.stat().st_size > 0:
-                if progress: progress(0.9, desc="Post-processing output")
-                return True
-        except Exception:
-            continue
-    return False
-# ---------- 6) REAL GPU function (always defined; only CALLED on ZeroGPU) ----------
 @spaces.GPU(duration=180)
-def enhance_on_gpu(input_path: str, prompt: str, output_path: str) -> bool:
-    # Import torch here so CUDA initializes inside GPU context
     try:
         import torch  # noqa: F401
     except Exception:
         pass
     from pathlib import Path as _P
-    return run_sonicmaster_cli(_P(input_path), prompt, _P(output_path), _logs=[], progress=None)
-# ---------- 7) Gradio callback ----------
-def enhance_audio_ui(audio_path: str,
-                     prompt: str,
-                     progress=gr.Progress(track_tqdm=True)) -> Tuple[int, np.ndarray]:
-    if not audio_path or not prompt:
-        raise gr.Error("Please provide audio and a text prompt.")
-    wav, sr = read_audio(audio_path)
-    tmp_in, tmp_out = SPACE_ROOT / "tmp_in.wav", SPACE_ROOT / "tmp_out.wav"
-    if tmp_out.exists():
-        try: tmp_out.unlink()
-        except: pass
-    save_temp_wav(wav, sr, tmp_in)
-    if progress: progress(0.3, desc="Starting inference")
-    if USE_ZEROGPU:
-        ok = enhance_on_gpu(tmp_in.as_posix(), prompt, tmp_out.as_posix())
-    else:
-        ok = run_sonicmaster_cli(tmp_in, prompt, tmp_out, _logs=[], progress=progress)
-    if ok and tmp_out.exists() and tmp_out.stat().st_size > 0:
-        out_wav, out_sr = read_audio(tmp_out.as_posix())
-        return (out_sr, out_wav)
-    else:
-        return (sr, wav)
-# ---------- 8) Gradio UI ----------
 with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as demo:
-    gr.Markdown("## 🎧 SonicMaster\nUpload or choose an example, write a text prompt, then click **Enhance**.")
     with gr.Row():
-        with gr.Column():
             in_audio = gr.Audio(label="Input Audio", type="filepath")
-            prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., reduce reverb")
-            run_btn = gr.Button("🚀 Enhance", variant="primary")
-            gr.Examples(examples=build_examples(), inputs=[in_audio, prompt])
-        with gr.Column():
             out_audio = gr.Audio(label="Enhanced Audio (output)")
-    run_btn.click(fn=enhance_audio_ui,
-                  inputs=[in_audio, prompt],
-                  outputs=[out_audio],
-                  concurrency_limit=1)
-# ---------- 9) FastAPI mount & disconnect handler ----------
 from fastapi import FastAPI, Request
 from starlette.responses import PlainTextResponse
-from starlette.requests import ClientDisconnect
-_ = get_weights_path(); _ = ensure_repo()
 app = FastAPI()
 @app.exception_handler(ClientDisconnect)
 async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
     return PlainTextResponse("Client disconnected", status_code=499)
-app = gr.mount_gradio_app(app, demo.queue(max_size=16), path="/")
 if __name__ == "__main__":
     import uvicorn

 # ---------- MUST BE FIRST: Gradio CDN + ZeroGPU probe ----------
 import os
 os.environ.setdefault("GRADIO_USE_CDN", "true")
 import spaces
 @spaces.GPU(duration=10)
+def _gpu_probe(a: int = 1, b: int = 1) -> int:
+    # Never called; exists so ZeroGPU startup check passes.
+    return a + b
 # ---------- Standard imports ----------
 from pathlib import Path
+from typing import Optional, Tuple, List
+import subprocess
+import sys
+import traceback
 import gradio as gr
 import numpy as np
 import soundfile as sf
 from huggingface_hub import hf_hub_download
+# ---------- Config ----------
 SPACE_ROOT   = Path(__file__).parent.resolve()
 REPO_DIR     = SPACE_ROOT / "SonicMasterRepo"
+REPO_URL     = "https://github.com/AMAAI-Lab/SonicMaster"
 WEIGHTS_REPO = "amaai-lab/SonicMaster"
 WEIGHTS_FILE = "model.safetensors"
 CACHE_DIR    = SPACE_ROOT / "weights"
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
+# ZeroGPU detection (heuristic)
+USE_ZEROGPU = os.getenv("SPACE_RUNTIME", "").lower() == "zerogpu"
+# ---------- Lazy resources ----------
+_weights_path: Optional[Path] = None
+_repo_ready: bool = False
+def get_weights_path(progress: Optional[gr.Progress] = None) -> Path:
+    """Fetch model weights lazily and cache the resolved path."""
+    global _weights_path
+    if _weights_path is None:
+        if progress:
+            progress(0.10, desc="Downloading model weights (first run)")
+        wp = hf_hub_download(
             repo_id=WEIGHTS_REPO,
             filename=WEIGHTS_FILE,
+            local_dir=str(CACHE_DIR),
             local_dir_use_symlinks=False,
             force_download=False,
             resume_download=True,
         )
+        _weights_path = Path(wp)
+    return _weights_path
+def ensure_repo(progress: Optional[gr.Progress] = None) -> Path:
+    """Clone the inference repo lazily and put it on sys.path."""
+    global _repo_ready
+    if not _repo_ready:
+        if not REPO_DIR.exists():
+            if progress:
+                progress(0.18, desc="Cloning SonicMaster repo (first run)")
+            subprocess.run(
+                ["git", "clone", "--depth", "1", REPO_URL, REPO_DIR.as_posix()],
+                check=True,
+            )
+        if REPO_DIR.as_posix() not in sys.path:
+            sys.path.append(REPO_DIR.as_posix())
+        _repo_ready = True
     return REPO_DIR
+# ---------- Audio helpers ----------
 def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
+    # Ensure (N, C) shape for soundfile
+    if wav.ndim == 1:
+        data = wav
+    else:
+        # (channels, samples) -> (samples, channels)
+        data = wav.T if wav.shape[0] < wav.shape[1] else wav
+    if data.dtype == np.float64:
+        data = data.astype(np.float32)
+    sf.write(path.as_posix(), data, sr)
 def read_audio(path: str) -> Tuple[np.ndarray, int]:
     wav, sr = sf.read(path, always_2d=False)
+    if wav.dtype == np.float64:
+        wav = wav.astype(np.float32)
+    return wav, sr
+# ---------- CLI runner ----------
+def _candidate_commands(py: str, script: Path, ckpt: Path, inp: Path, prompt: str, out: Path) -> List[List[str]]:
+    """Try multiple arg styles commonly found in repos."""
+    combos = [
+        # infer_single.py (common)
+        [py, script.as_posix(), "--ckpt", ckpt.as_posix(), "--input", inp.as_posix(), "--prompt", prompt, "--output", out.as_posix()],
+        [py, script.as_posix(), "--weights", ckpt.as_posix(), "--input", inp.as_posix(), "--text", prompt, "--out", out.as_posix()],
+        # other possible entrypoints
+        [py, script.as_posix(), "--ckpt", ckpt.as_posix(), "--input", inp.as_posix(), "--text", prompt, "--output", out.as_posix()],
+    ]
+    return combos
+def run_sonicmaster_cli(
+    input_wav_path: Path,
+    prompt: str,
+    out_path: Path,
+    progress: Optional[gr.Progress] = None,
+) -> Tuple[bool, str]:
+    """
+    Returns (ok, message). Captures stderr/stdout and returns first non-empty output file.
+    """
+    if progress:
+        progress(0.14, desc="Preparing inference")
+    ckpt = get_weights_path(progress=progress)
+    repo = ensure_repo(progress=progress)
+    # Candidate scripts to try
+    script_candidates = [
+        repo / "infer_single.py",
+        repo / "inference_fullsong.py",
+        repo / "inference_ptload_batch.py",
+    ]
+    scripts = [s for s in script_candidates if s.exists()]
+    if not scripts:
+        return False, "No inference script found in the repo (expected infer_single.py or similar)."
     py = sys.executable or "python3"
+    env = os.environ.copy()  # keep CUDA_VISIBLE_DEVICES etc.
+    last_err = ""
+    for idx, script in enumerate(scripts, start=1):
+        for jdx, cmd in enumerate(_candidate_commands(py, script, ckpt, input_wav_path, prompt, out_path), start=1):
+            try:
+                if progress:
+                    progress(min(0.20 + 0.08 * (idx + jdx), 0.70), desc=f"Running {script.name} (try {idx}.{jdx})")
+                res = subprocess.run(cmd, capture_output=True, text=True, check=True, env=env)
+                if out_path.exists() and out_path.stat().st_size > 0:
+                    if progress:
+                        progress(0.88, desc="Post-processing output")
+                    # Return any informative stdout as message
+                    msg = (res.stdout or "").strip()
+                    return True, msg if msg else "Inference completed."
+                else:
+                    last_err = f"{script.name} produced no output file."
+            except subprocess.CalledProcessError as e:
+                # Collect stderr/stdout for the user
+                snippet = "\n".join(filter(None, [e.stdout or "", e.stderr or ""])).strip()
+                last_err = snippet if snippet else f"{script.name} failed with return code {e.returncode}."
+            except Exception as e:
+                last_err = f"Unexpected error: {e}\n{traceback.format_exc()}"
+    return False, last_err or "All candidate commands failed without an error message."
+# ---------- REAL GPU function (called only if using ZeroGPU / GPU available) ----------
 @spaces.GPU(duration=180)
+def enhance_on_gpu(input_path: str, prompt: str, output_path: str) -> Tuple[bool, str]:
     try:
+        # Initialize CUDA inside the GPU context
         import torch  # noqa: F401
     except Exception:
         pass
     from pathlib import Path as _P
+    return run_sonicmaster_cli(_P(input_path), prompt, _P(output_path), progress=None)
+def _has_cuda() -> bool:
+    try:
+        import torch
+        return torch.cuda.is_available()
+    except Exception:
+        return False
+# ---------- UI callback ----------
+def enhance_audio_ui(
+    audio_path: str,
+    prompt: str,
+    progress=gr.Progress(track_tqdm=True),
+) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
+    """
+    Returns (audio, message). On failure, audio=None and message=error text.
+    """
+    try:
+        if not prompt:
+            raise gr.Error("Please provide a text prompt.")
+        if not audio_path:
+            raise gr.Error("Please upload or select an input audio file.")
+        wav, sr = read_audio(audio_path)
+        tmp_in  = SPACE_ROOT / "tmp_in.wav"
+        tmp_out = SPACE_ROOT / "tmp_out.wav"
+        if tmp_out.exists():
+            try:
+                tmp_out.unlink()
+            except Exception:
+                pass
+        if progress:
+            progress(0.06, desc="Preparing audio")
+        save_temp_wav(wav, sr, tmp_in)
+        # Choose execution path: prefer real GPU if available, else CPU
+        use_gpu_call = USE_ZEROGPU or _has_cuda()
+        if progress:
+            progress(0.12, desc="Starting inference")
+        if use_gpu_call:
+            ok, msg = enhance_on_gpu(tmp_in.as_posix(), prompt, tmp_out.as_posix())
+        else:
+            ok, msg = run_sonicmaster_cli(tmp_in, prompt, tmp_out, progress=progress)
+        if ok and tmp_out.exists() and tmp_out.stat().st_size > 0:
+            # Return output audio by filepath (lighter than big arrays)
+            # Gradio Audio accepts a (sr, np.ndarray) OR a file path; giving file path is fine.
+            return (None, f"Saved output: {tmp_out.name}\n{msg or ''}") if False else (read_audio(tmp_out.as_posix()), msg or "Done.")
+        else:
+            # On failure: DON'T echo input audio — return None and the error message
+            if not msg:
+                msg = "Inference failed without a specific error message."
+            return (None, msg.strip())
+    except gr.Error as e:
+        return (None, str(e))
+    except Exception as e:
+        return (None, f"Unexpected error: {e}\n{traceback.format_exc()}")
+# ---------- Gradio UI ----------
+PROMPT_EXAMPLES = [
+    ["Increase the clarity of this song by emphasizing treble frequencies."],
+    ["Make this song sound more boomy by amplifying the low end bass frequencies."],
+    ["Make the audio smoother and less distorted."],
+    ["Improve the balance in this song."],
+    ["Reduce roominess/echo (dereverb)."],
+    ["Raise the level of the vocals."],
+    ["Give the song a wider stereo image."],
+]
 with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as demo:
+    gr.Markdown("## 🎧 SonicMaster\nUpload audio, enter a prompt, then click **Enhance**.\n"
+                "- Progress appears below during the first run (weights/repo download).\n"
+                "- If something fails, you'll see the **error message** instead of the input audio.")
     with gr.Row():
+        with gr.Column(scale=1):
             in_audio = gr.Audio(label="Input Audio", type="filepath")
+            prompt   = gr.Textbox(label="Text Prompt", placeholder="e.g., Reduce reverb and brighten the vocals.")
+            run_btn  = gr.Button("🚀 Enhance", variant="primary")
+            gr.Examples(
+                examples=PROMPT_EXAMPLES,
+                inputs=[prompt],  # prompt-only examples to avoid heavy file ops at startup
+                label="Prompt Examples",
+            )
+        with gr.Column(scale=1):
             out_audio = gr.Audio(label="Enhanced Audio (output)")
+            status    = gr.Textbox(label="Status / Messages", interactive=False, lines=6)
+    # On click, return audio + message
+    run_btn.click(
+        fn=enhance_audio_ui,
+        inputs=[in_audio, prompt],
+        outputs=[out_audio, status],
+        concurrency_limit=1,
+    )
+# Queue BEFORE mounting so the mounted app is ready immediately
+demo = demo.queue(concurrency_count=1, max_size=16)
+# ---------- FastAPI mount & health ----------
 from fastapi import FastAPI, Request
 from starlette.responses import PlainTextResponse
+try:
+    from starlette.exceptions import ClientDisconnect  # Starlette ≥0.27
+except Exception:
+    from starlette.requests import ClientDisconnect  # fallback for older versions
 app = FastAPI()
+@app.get("/health")
+def _health():
+    return {"ok": True}
 @app.exception_handler(ClientDisconnect)
 async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
     return PlainTextResponse("Client disconnected", status_code=499)
+# Mount Gradio at root (Spaces looks here)
+app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     import uvicorn