Spaces:
Build error
Build error
| """Get tokens using the SpeechTokenizer. | |
| Apply SpeechTokenizer to extract acoustic and semantic tokens. | |
| The tokens will be extracted to | |
| encoding_output/acoustic and encoding_output/semantic. | |
| python utils/get_tokens_speech_tokenizer.py \ | |
| --config_path ckpt/speechtokenizer/config.json \ | |
| --ckpt_path ckpt/speechtokenizer/SpeechTokenizer.pt \ | |
| --encoding_input datasets/example/audios \ | |
| --encoding_output datasets/example/audios-speech-tokenizer | |
| Copyright PolyAI Limited. | |
| """ | |
| import argparse | |
| import pathlib | |
| from modules.speech_tokenizer import SpeechTokenizer | |
| MQTTS_ROOT_PATH = str(pathlib.Path(__file__).parent.resolve()) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--config_path", | |
| type=str, | |
| help="Path to the SpeechTokenizer config", | |
| default=MQTTS_ROOT_PATH + "/ckpt/speechtokenizer/config.json", | |
| ) | |
| parser.add_argument( | |
| "--ckpt_path", | |
| type=str, | |
| help="Path to the SpeechTokenizer checkpoint", | |
| default=MQTTS_ROOT_PATH + "/ckpt/speechtokenizer/SpeechTokenizer.pt", | |
| ) | |
| parser.add_argument( | |
| "--encoding_input", | |
| type=str, | |
| help="Path to the input folder for encoding", | |
| default=MQTTS_ROOT_PATH + "/datasets/giga-training-data/audios", | |
| ) | |
| parser.add_argument( | |
| "--encoding_output", | |
| type=str, | |
| help="Path where to save the encoded tokens", | |
| default="/tmp/encoding_output", | |
| ) | |
| parser.add_argument( | |
| "--start_percent", | |
| type=int, | |
| default=0, | |
| ) | |
| parser.add_argument( | |
| "--end_percent", | |
| type=int, | |
| default=100, | |
| ) | |
| args = parser.parse_args() | |
| print("Parsed args") | |
| print(args) | |
| tokenizer = SpeechTokenizer( | |
| config_path=args.config_path, | |
| ckpt_path=args.ckpt_path, | |
| ) | |
| tokenizer.encode_files_with_model_concurrent( | |
| folder_path=args.encoding_input, destination_folder=args.encoding_output, | |
| start_percent=args.start_percent, end_percent=args.end_percent | |
| ) | |