Amamrnaf
commited on
Commit
·
e77a799
1
Parent(s):
53f6df5
yeep
Browse files- app.py +3 -3
- metaVoice.py +4 -4
app.py
CHANGED
|
@@ -37,10 +37,10 @@ def process_audio(input_text, speaker_audio, speaker_name, option_selected):
|
|
| 37 |
return f"The option is not implemented yet."
|
| 38 |
|
| 39 |
# Save the output audio under the speaker's name
|
| 40 |
-
speaker_output_path = f"audio/{speaker_name}.wav"
|
| 41 |
-
os.rename("audio/output.wav", speaker_output_path)
|
| 42 |
|
| 43 |
-
return
|
| 44 |
|
| 45 |
except Exception as e:
|
| 46 |
return str(e)
|
|
|
|
| 37 |
return f"The option is not implemented yet."
|
| 38 |
|
| 39 |
# Save the output audio under the speaker's name
|
| 40 |
+
# speaker_output_path = f"audio/{speaker_name}.wav"
|
| 41 |
+
# os.rename("audio/output.wav", speaker_output_path)
|
| 42 |
|
| 43 |
+
return "./tmp/audio/generated-custom.wav"
|
| 44 |
|
| 45 |
except Exception as e:
|
| 46 |
return str(e)
|
metaVoice.py
CHANGED
|
@@ -19,7 +19,7 @@ from fam.llm.model import GPT, GPTConfig
|
|
| 19 |
from fam.llm.utils import (
|
| 20 |
check_audio_file,
|
| 21 |
get_default_dtype,
|
| 22 |
-
get_default_use_kv_cache,
|
| 23 |
normalize_text,
|
| 24 |
)
|
| 25 |
from fam.quantiser.audio.speaker_encoder.model import SpeakerEncoder
|
|
@@ -724,9 +724,9 @@ class SamplingControllerConfig:
|
|
| 724 |
init_from: str = "resume"
|
| 725 |
"""Either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')."""
|
| 726 |
|
| 727 |
-
use_kv_cache: Optional[Literal["flash_decoding", "vanilla"]] = get_default_use_kv_cache()
|
| 728 |
-
"""Type of kv caching to use for inference: 1) [none] no kv caching, 2) [flash_decoding] use the
|
| 729 |
-
flash decoding kernel, 3) [vanilla] use torch attention with hand implemented kv-cache."""
|
| 730 |
|
| 731 |
output_dir: str = "samples/"
|
| 732 |
"""Relative path to output directory"""
|
|
|
|
| 19 |
from fam.llm.utils import (
|
| 20 |
check_audio_file,
|
| 21 |
get_default_dtype,
|
| 22 |
+
# get_default_use_kv_cache,
|
| 23 |
normalize_text,
|
| 24 |
)
|
| 25 |
from fam.quantiser.audio.speaker_encoder.model import SpeakerEncoder
|
|
|
|
| 724 |
init_from: str = "resume"
|
| 725 |
"""Either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')."""
|
| 726 |
|
| 727 |
+
# use_kv_cache: Optional[Literal["flash_decoding", "vanilla"]] = get_default_use_kv_cache()
|
| 728 |
+
# """Type of kv caching to use for inference: 1) [none] no kv caching, 2) [flash_decoding] use the
|
| 729 |
+
# flash decoding kernel, 3) [vanilla] use torch attention with hand implemented kv-cache."""
|
| 730 |
|
| 731 |
output_dir: str = "samples/"
|
| 732 |
"""Relative path to output directory"""
|