Spaces:
Running
on
Zero
Running
on
Zero
Create app_cli.py
Browse files- app_cli.py +44 -0
app_cli.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import shutil
|
| 3 |
+
import soundfile as sf
|
| 4 |
+
|
| 5 |
+
import app
|
| 6 |
+
from app import infer
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def main():
|
| 10 |
+
parser = argparse.ArgumentParser(description="F5 TTS - CLI interface")
|
| 11 |
+
parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3)")
|
| 12 |
+
parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed.")
|
| 13 |
+
parser.add_argument("--gen_text", required=True, help="Text to generate")
|
| 14 |
+
parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi)")
|
| 15 |
+
parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio")
|
| 16 |
+
parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration (s)")
|
| 17 |
+
parser.add_argument("--output_audio", default="output.wav", help="Path to the output WAV file")
|
| 18 |
+
parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram (PNG)")
|
| 19 |
+
parser.add_argument("--language", default="en-us", help="Synthesized language (default: en-us)")
|
| 20 |
+
parser.add_argument("--ref_language", default="en-us", help="Reference language (default: en-us)")
|
| 21 |
+
parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)")
|
| 22 |
+
args = parser.parse_args()
|
| 23 |
+
|
| 24 |
+
app.language = args.language
|
| 25 |
+
app.ref_language = args.ref_language
|
| 26 |
+
app.speed = args.speed
|
| 27 |
+
|
| 28 |
+
(sr, audio_data), spectrogram_path = infer(
|
| 29 |
+
args.ref_audio,
|
| 30 |
+
args.ref_text,
|
| 31 |
+
args.gen_text,
|
| 32 |
+
args.exp_name,
|
| 33 |
+
args.remove_silence,
|
| 34 |
+
args.cross_fade_duration
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
sf.write(args.output_audio, audio_data.astype("float32"), sr)
|
| 38 |
+
shutil.copy(spectrogram_path, args.output_spectrogram)
|
| 39 |
+
|
| 40 |
+
print(f"Audio saved in: {args.output_audio}")
|
| 41 |
+
print(f"Spectrogram saved in: {args.output_spectrogram}")
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
main()
|