Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Hugging Face Space (Gradio) app for running ZipVoice inference by cloning | |
| https://github.com/k2-fsa/ZipVoice and calling the inference script. | |
| Files: this single file (app.py). Save alongside a requirements.txt in the Space | |
| that contains at least: gradio | |
| Notes: | |
| - This script attempts to `git clone` the ZipVoice repo on first run and install | |
| it (pip). On Spaces this can take time and may require a GPU-enabled runner | |
| (recommended). If your Space already has the ZipVoice code and deps installed, | |
| the startup will be faster. | |
| - The app calls the repo's inference CLI: `python -m zipvoice.bin.infer_zipvoice`. | |
| - You can change `REPO_URL` or the default model names if you have your own HF | |
| checkpoint or local weights. | |
| - This is a practical example; depending on available hardware (CPU-only), you | |
| may need to use the `zipvoice_distill` or quantized models for speed. | |
| Usage in Space: | |
| - Create a new Space (Gradio, Python). | |
| - Add this file as `app.py`. | |
| - Add a small `requirements.txt` with: gradio | |
| - Optionally add a `start.sh` or enable internet to let the app clone and | |
| install the ZipVoice repo on startup. | |
| """ | |
| import os | |
| import subprocess | |
| import shlex | |
| import tempfile | |
| import time | |
| from pathlib import Path | |
| import spaces | |
| import gradio as gr | |
| def gpuCheck(): | |
| return "GPU OK" | |
| # CONFIG - change if needed | |
| REPO_URL = "https://github.com/k2-fsa/ZipVoice.git" | |
| REPO_DIR = Path("/tmp/ZipVoice") | |
| PYTHON_CMD = "python3" | |
| DEFAULT_MODEL = "zipvoice" | |
| DEFAULT_DISTILL_MODEL = "zipvoice_distill" | |
| def run_cmd(cmd, cwd=None, env=None, timeout=1800): | |
| """Run shell command, return (returncode, stdout, stderr).""" | |
| try: | |
| proc = subprocess.run( | |
| shlex.split(cmd), | |
| cwd=cwd, | |
| env=env, | |
| capture_output=True, | |
| text=True, | |
| timeout=timeout, | |
| ) | |
| return proc.returncode, proc.stdout, proc.stderr | |
| except subprocess.TimeoutExpired as e: | |
| return -1, "", f"Timeout: {e}" | |
| def ensure_zipvoice_installed(): | |
| """Clone repo and install if not present. Returns a tuple (ok, logs).""" | |
| logs = [] | |
| if REPO_DIR.exists(): | |
| logs.append(f"Found existing repo at {REPO_DIR}") | |
| return True, "\n".join(logs) | |
| logs.append(f"Cloning {REPO_URL} into {REPO_DIR} ...") | |
| code, out, err = run_cmd(f"git clone {REPO_URL} {REPO_DIR}") | |
| logs.append(out) | |
| logs.append(err) | |
| if code != 0: | |
| logs.append("Failed to clone repository.") | |
| return False, "\n".join(logs) | |
| # Try to pip install the package (editable) and requirements if present | |
| # This may be heavy (torch etc.). If it fails, user can preinstall deps. | |
| req_txt = REPO_DIR / "requirements.txt" | |
| if req_txt.exists(): | |
| logs.append("Installing requirements.txt (this may take several minutes)...") | |
| code, out, err = run_cmd(f"{PYTHON_CMD} -m pip install -r {req_txt}") | |
| logs.append(out) | |
| logs.append(err) | |
| if code != 0: | |
| logs.append("requirements install returned non-zero exit code, trying package install...") | |
| # Try to install the package (setup.py or pyproject) | |
| logs.append("Attempting to install ZipVoice package (pip install -e .)") | |
| code, out, err = run_cmd(f"{PYTHON_CMD} -m pip install -e {REPO_DIR}") | |
| logs.append(out) | |
| logs.append(err) | |
| if code != 0: | |
| logs.append("Editable install failed; try installing dependencies manually in the Space.") | |
| # Even if install failed, the CLI may still run if requirements are available. | |
| # Return success=False so the UI can warn user. | |
| return False, "\n".join(logs) | |
| logs.append("ZipVoice installed successfully.") | |
| return True, "\n".join(logs) | |
| def infer_zipvoice(prompt_wav_path: str, prompt_text: str, text: str, model_name: str, num_steps: int = 6): | |
| """Run the ZipVoice inference CLI and return path to generated wav and logs.""" | |
| out_dir = Path("/tmp/zipvoice_out") | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| res_wav = out_dir / f"result_{int(time.time())}.wav" | |
| # Build command | |
| cmd = ( | |
| f"{PYTHON_CMD} -m zipvoice.bin.infer_zipvoice" | |
| f" --model-name {shlex.quote(model_name)}" | |
| f" --prompt-wav {shlex.quote(prompt_wav_path)}" | |
| f" --prompt-text {shlex.quote(prompt_text)}" | |
| f" --text {shlex.quote(text)}" | |
| f" --res-wav-path {shlex.quote(str(res_wav))}" | |
| f" --num-steps {int(num_steps)}" | |
| ) | |
| rc, out, err = run_cmd(cmd, cwd=str(REPO_DIR), timeout=900) | |
| logs = f"RETURN_CODE={rc}\nSTDOUT:\n{out}\nSTDERR:\n{err}" | |
| if rc == 0 and res_wav.exists(): | |
| return str(res_wav), logs | |
| else: | |
| return None, logs | |
| # Build Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# ZipVoice Hugging Face Space - Quick Runner") | |
| status_box = gr.Textbox(label="Setup / Status logs", lines=8, interactive=False) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| prompt_audio = gr.Audio(label="Prompt audio (wav) - short, clean, single speaker", type="filepath") | |
| prompt_text = gr.Textbox(label="Transcription of prompt audio", value="", placeholder="Type the transcription of the prompt wav here") | |
| text_to_speak = gr.Textbox(label="Text to synthesize", value="Hello, this is a test from ZipVoice.") | |
| model_choice = gr.Radio(choices=[DEFAULT_MODEL, DEFAULT_DISTILL_MODEL], value=DEFAULT_MODEL, label="Model") | |
| num_steps = gr.Slider(label="Number of sampling steps (lower = faster)", minimum=1, maximum=16, value=6, step=1) | |
| generate_btn = gr.Button("Generate") | |
| with gr.Column(scale=1): | |
| output_audio = gr.Audio(label="Generated audio (result.wav)") | |
| logs_out = gr.Textbox(label="Inference logs", lines=12, interactive=False) | |
| def startup_check(): | |
| ok, logs = ensure_zipvoice_installed() | |
| if not ok: | |
| msg = ( | |
| "Warning: automatic install failed.\n" | |
| "Please preinstall model dependencies (torch, soundfile, etc.) or enable internet for this Space.\n" | |
| "Install logs:\n" | |
| ) | |
| return msg + logs | |
| return "Setup complete. You can upload prompt audio and run inference.\n" + logs | |
| def on_generate(prompt_wav, p_text, text, model_name, n_steps): | |
| if not prompt_wav: | |
| return None, "", "Please upload a prompt WAV file." | |
| # ensure installed | |
| ok, logs = ensure_zipvoice_installed() | |
| if not ok: | |
| return None, "", "ZipVoice is not installed correctly. See logs:\n" + logs | |
| # Copy prompt wav into tmp file (sometimes gradio provides a temporary path already) | |
| tmp_prompt = Path(prompt_wav) | |
| if not tmp_prompt.exists(): | |
| return None, "", f"Prompt file not found: {prompt_wav}" | |
| res_path, infer_logs = infer_zipvoice(str(tmp_prompt), p_text or "", text or "", model_name, int(n_steps)) | |
| if res_path: | |
| return res_path, infer_logs, "Generation successful" | |
| else: | |
| return None, infer_logs, "Generation failed. See logs." | |
| # Wire events | |
| demo.load(startup_check, outputs=[status_box]) | |
| generate_btn.click(on_generate, inputs=[prompt_audio, prompt_text, text_to_speak, model_choice, num_steps], outputs=[output_audio, logs_out, status_box]) | |
| if __name__ == "__main__": | |
| demo.launch() | |