Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from pathlib import Path | |
| os.system("pip uninstall -y gradio") | |
| os.system("pip install gradio==3.2") | |
| from demo_inference.demo_tts import DemoTTS | |
| from demo_inference.demo_asr import DemoASR | |
| from demo_inference.demo_anonymization import DemoAnonymizer | |
| def pcm2float(sig, dtype='float32'): | |
| """ | |
| https://gist.github.com/HudsonHuang/fbdf8e9af7993fe2a91620d3fb86a182 | |
| """ | |
| sig = np.asarray(sig) | |
| if sig.dtype.kind not in 'iu': | |
| raise TypeError("'sig' must be an array of integers") | |
| dtype = np.dtype(dtype) | |
| if dtype.kind != 'f': | |
| raise TypeError("'dtype' must be a floating point type") | |
| i = np.iinfo(sig.dtype) | |
| abs_max = 2 ** (i.bits - 1) | |
| offset = i.min + abs_max | |
| return (sig.astype(dtype) - offset) / abs_max | |
| def float2pcm(sig, dtype='int16'): | |
| """ | |
| https://gist.github.com/HudsonHuang/fbdf8e9af7993fe2a91620d3fb86a182 | |
| """ | |
| sig = np.asarray(sig) | |
| if sig.dtype.kind != 'f': | |
| raise TypeError("'sig' must be a float array") | |
| dtype = np.dtype(dtype) | |
| if dtype.kind not in 'iu': | |
| raise TypeError("'dtype' must be an integer type") | |
| i = np.iinfo(dtype) | |
| abs_max = 2 ** (i.bits - 1) | |
| offset = i.min + abs_max | |
| return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype) | |
| class VPInterface: | |
| def __init__(self): | |
| self.device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| self.path_to_tts_models = Path('models', 'tts') | |
| self.path_to_asr_model = Path('models', 'asr') | |
| self.path_to_anon_model = Path('models', 'anonymization') | |
| self.synthesis_model = DemoTTS(model_paths=self.path_to_tts_models, device=self.device) | |
| self.asr_model = DemoASR(model_path=self.path_to_asr_model, device=self.device) | |
| self.anon_model = DemoAnonymizer(model_path=self.path_to_anon_model, model_tag='gan', device=self.device) | |
| def read(self, recording, anon_model_tag): | |
| sr, audio = recording | |
| audio = pcm2float(audio) | |
| self._check_models(anon_model_tag) | |
| text_is_phonemes = True | |
| text = self.asr_model.recognize_speech(audio, sr) | |
| speaker_embedding = self.anon_model.anonymize_embedding(audio, sr) | |
| syn_audio = self.synthesis_model.read_text(transcription=text, speaker_embedding=speaker_embedding, | |
| text_is_phonemes=text_is_phonemes) | |
| return 48000, float2pcm(syn_audio.cpu().numpy()) | |
| def _check_models(self, anon_model_tag): | |
| if anon_model_tag != self.anon_model.model_tag: | |
| self.anon_model = DemoAnonymizer(model_path=self.path_to_anon_model, model_tag=anon_model_tag, | |
| device=self.device) | |
| model = VPInterface() | |
| article = """ | |
| This demo allows you to anonymize your input speech by defining different anonymization models. If | |
| you want to know more about each model, please read the paper linked above. Every time you click the *submit* button, | |
| you should receive a new voice. | |
| Note that for *pool* anonymization in this demo, we are using a different scaling approach ( | |
| sklearn.preprocessing.StandardScaler instead of sklearn.preprocessing.MinMaxScaler) because we are processing only | |
| one sample at a time and would otherwise always end up with the same voice. | |
| This demo is still work in progress, so please be lenient with possible low quality and errors. Also, be aware that | |
| this Huggingface space runs on CPU which makes the demo quite slow. | |
| For more information about this system, visit our Github page: [https://github.com/DigitalPhonetics/speaker-anonymization](https://github.com/DigitalPhonetics/speaker-anonymization/tree/gan_embeddings) | |
| """ | |
| description = """ | |
| ## Test demo corresponding to the models in our paper [Anonymizing Speech with Generative Adversarial Networks to Preserve Speaker Privacy](https://arxiv.org/abs/2210.07002) | |
| """ | |
| css = """ | |
| .gr-button-primary {background-color: green !important, border-color: green} | |
| """ | |
| iface = gr.Interface(fn=model.read, | |
| inputs=[gr.inputs.Audio(source='microphone', type='numpy', label='Say a sentence in English.'), | |
| gr.inputs.Dropdown(['gan', 'pool', 'random'], type='value', default='gan', | |
| label='Anonymization') | |
| ], | |
| outputs=gr.outputs.Audio(type='numpy', label=None), | |
| layout='vertical', | |
| title='IMS Speaker Anonymization', | |
| description=description, | |
| theme='default', | |
| allow_flagging='never', | |
| article=article, | |
| allow_screenshot=False) | |
| iface.launch(enable_queue=True) | |