Spaces:
Running
on
Zero
Running
on
Zero
| import argparse | |
| import json | |
| import logging | |
| from typing import Optional | |
| from .config import GenerationConfig | |
| from .engine import ParlerVoiceInference | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| def _parse_args() -> argparse.Namespace: | |
| p = argparse.ArgumentParser(description="ParlerVoice TTS Inference CLI") | |
| p.add_argument("--checkpoint", required=True, help="Path to fine-tuned checkpoint") | |
| p.add_argument("--base-model", default="parler-tts/parler-tts-mini-v1.1", help="Base model path") | |
| p.add_argument("--prompt", help="Text to speak") | |
| p.add_argument("--speaker", default="Connor", help="Speaker name") | |
| p.add_argument("--preset", default="natural", help="Preset name") | |
| p.add_argument("--description", help="Override auto-built description") | |
| p.add_argument("--output", default="output.wav", help="Output wav path") | |
| p.add_argument("--jobs", help="JSONL of batch jobs: prompt,speaker,preset,output") | |
| p.add_argument("--output-dir", default="outputs", help="Dir for batch outputs") | |
| # generation args | |
| p.add_argument("--temperature", type=float, default=0.9) | |
| p.add_argument("--top-k", type=int, default=50) | |
| p.add_argument("--top-p", type=float, default=0.95) | |
| p.add_argument("--repetition-penalty", type=float, default=1.1) | |
| p.add_argument("--max-length", type=int, default=2048) | |
| p.add_argument("--min-length", type=int, default=10) | |
| p.add_argument("--num-beams", type=int, default=1) | |
| p.add_argument("--no-sample", action="store_true", help="Disable sampling") | |
| return p.parse_args() | |
| def main() -> int: | |
| args = _parse_args() | |
| config = GenerationConfig( | |
| temperature=args.temperature, | |
| top_k=args.top_k, | |
| top_p=args.top_p, | |
| repetition_penalty=args.repetition_penalty, | |
| max_length=args.max_length, | |
| min_length=args.min_length, | |
| do_sample=not args.no_sample, | |
| num_beams=args.num_beams, | |
| ) | |
| infer = ParlerVoiceInference(checkpoint_path=args.checkpoint, base_model_path=args.base_model) | |
| if args.jobs: | |
| count = 0 | |
| with open(args.jobs, "r") as f: | |
| for line in f: | |
| if not line.strip(): | |
| continue | |
| job = json.loads(line) | |
| prompt: str = job["prompt"] | |
| speaker: str = job.get("speaker", args.speaker) | |
| preset: str = job.get("preset", args.preset) | |
| output: str = job.get("output", f"{args.output_dir}/job_{count:03d}.wav") | |
| desc = job.get("description") | |
| if not desc: | |
| desc = infer.build_advanced_description(speaker=speaker, **{}) | |
| # If preset provided, use preset builder | |
| desc = infer.build_advanced_description(speaker=speaker, **{}) | |
| # Prefer preset when specified | |
| if preset: | |
| _, _ = infer.generate_with_speaker_preset( | |
| prompt=prompt, speaker=speaker, preset=preset, config=config, output_path=output | |
| ) | |
| else: | |
| _, _ = infer.generate_audio(prompt=prompt, description=desc, config=config, output_path=output) | |
| count += 1 | |
| return 0 | |
| # Single job path | |
| description: Optional[str] = args.description | |
| if not description: | |
| # Prefer preset if provided | |
| _, _ = infer.generate_with_speaker_preset( | |
| prompt=args.prompt or "", | |
| speaker=args.speaker, | |
| preset=args.preset, | |
| config=config, | |
| output_path=args.output, | |
| ) | |
| else: | |
| _, _ = infer.generate_audio( | |
| prompt=args.prompt or "", | |
| description=description, | |
| config=config, | |
| output_path=args.output, | |
| ) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |