Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Initial commit
Browse filesCo-authored-by: hysts <hysts@users.noreply.huggingface.co>
- Dockerfile +65 -0
- README.md +3 -2
- app.py +221 -0
- mlg_config.json +186 -0
- requirements.txt +4 -0
- style.css +16 -0
    	
        Dockerfile
    ADDED
    
    | @@ -0,0 +1,65 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
         | 
| 2 | 
            +
            ENV DEBIAN_FRONTEND=noninteractive
         | 
| 3 | 
            +
            RUN apt-get update && \
         | 
| 4 | 
            +
                apt-get upgrade -y && \
         | 
| 5 | 
            +
                apt-get install -y --no-install-recommends \
         | 
| 6 | 
            +
                git \
         | 
| 7 | 
            +
                git-lfs \
         | 
| 8 | 
            +
                wget \
         | 
| 9 | 
            +
                curl \
         | 
| 10 | 
            +
                # python build dependencies \
         | 
| 11 | 
            +
                build-essential \
         | 
| 12 | 
            +
                libssl-dev \
         | 
| 13 | 
            +
                zlib1g-dev \
         | 
| 14 | 
            +
                libbz2-dev \
         | 
| 15 | 
            +
                libreadline-dev \
         | 
| 16 | 
            +
                libsqlite3-dev \
         | 
| 17 | 
            +
                libncursesw5-dev \
         | 
| 18 | 
            +
                xz-utils \
         | 
| 19 | 
            +
                tk-dev \
         | 
| 20 | 
            +
                libxml2-dev \
         | 
| 21 | 
            +
                libxmlsec1-dev \
         | 
| 22 | 
            +
                libffi-dev \
         | 
| 23 | 
            +
                liblzma-dev \
         | 
| 24 | 
            +
                # gradio dependencies \
         | 
| 25 | 
            +
                ffmpeg \
         | 
| 26 | 
            +
                # fairseq2 dependencies \
         | 
| 27 | 
            +
                libsndfile-dev && \
         | 
| 28 | 
            +
                apt-get clean && \
         | 
| 29 | 
            +
                rm -rf /var/lib/apt/lists/*
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            RUN useradd -m -u 1000 user
         | 
| 32 | 
            +
            USER user
         | 
| 33 | 
            +
            ENV HOME=/home/user \
         | 
| 34 | 
            +
                PATH=/home/user/.local/bin:${PATH}
         | 
| 35 | 
            +
            WORKDIR ${HOME}/app
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            RUN curl https://pyenv.run | bash
         | 
| 38 | 
            +
            ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
         | 
| 39 | 
            +
            ARG PYTHON_VERSION=3.10.12
         | 
| 40 | 
            +
            RUN pyenv install ${PYTHON_VERSION} && \
         | 
| 41 | 
            +
                pyenv global ${PYTHON_VERSION} && \
         | 
| 42 | 
            +
                pyenv rehash && \
         | 
| 43 | 
            +
                pip install --no-cache-dir -U pip setuptools wheel
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            RUN pip install --no-cache-dir torch==2.0.1 gradio==3.40.1 && \
         | 
| 46 | 
            +
                pip install --extra-index-url https://test.pypi.org/simple/ fairseq2==0.1.0rc0
         | 
| 47 | 
            +
            RUN --mount=type=secret,id=GITHUB_TOKEN,mode=0444,required=true \
         | 
| 48 | 
            +
                git clone https://$(cat /run/secrets/GITHUB_TOKEN)@github.com/fairinternal/seamless_communication && \
         | 
| 49 | 
            +
                cd seamless_communication && \
         | 
| 50 | 
            +
                pip install . && \
         | 
| 51 | 
            +
                cd .. && \
         | 
| 52 | 
            +
                rm -rf seamless_communication
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            COPY ./requirements.txt /tmp/requirements.txt
         | 
| 55 | 
            +
            RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            COPY --chown=1000 . ${HOME}/app
         | 
| 58 | 
            +
            ENV PYTHONPATH=${HOME}/app \
         | 
| 59 | 
            +
                PYTHONUNBUFFERED=1 \
         | 
| 60 | 
            +
                GRADIO_ALLOW_FLAGGING=never \
         | 
| 61 | 
            +
                GRADIO_NUM_PORTS=1 \
         | 
| 62 | 
            +
                GRADIO_SERVER_NAME=0.0.0.0 \
         | 
| 63 | 
            +
                GRADIO_THEME=huggingface \
         | 
| 64 | 
            +
                SYSTEM=spaces
         | 
| 65 | 
            +
            CMD ["python", "app.py"]
         | 
    	
        README.md
    CHANGED
    
    | @@ -1,10 +1,11 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
            -
            title: Seamless  | 
| 3 | 
            -
            emoji:  | 
| 4 | 
             
            colorFrom: blue
         | 
| 5 | 
             
            colorTo: yellow
         | 
| 6 | 
             
            sdk: docker
         | 
| 7 | 
             
            pinned: false
         | 
|  | |
| 8 | 
             
            ---
         | 
| 9 |  | 
| 10 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
            +
            title: Seamless M4T
         | 
| 3 | 
            +
            emoji: 📞
         | 
| 4 | 
             
            colorFrom: blue
         | 
| 5 | 
             
            colorTo: yellow
         | 
| 6 | 
             
            sdk: docker
         | 
| 7 | 
             
            pinned: false
         | 
| 8 | 
            +
            suggested_hardware: t4-medium
         | 
| 9 | 
             
            ---
         | 
| 10 |  | 
| 11 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,221 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import json
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import gradio as gr
         | 
| 5 | 
            +
            import numpy as np
         | 
| 6 | 
            +
            import torch
         | 
| 7 | 
            +
            import torchaudio
         | 
| 8 | 
            +
            from seamless_communication.models.inference.translator import Translator
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            DESCRIPTION = "# SeamlessM4T"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            with open("./mlg_config.json", "r") as f:
         | 
| 13 | 
            +
                lang_idx_map = json.loads(f.read())
         | 
| 14 | 
            +
            LANGUAGES = lang_idx_map["multilingual"].keys()
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            TASK_NAMES = [
         | 
| 17 | 
            +
                "S2ST (Speech to Speech translation)",
         | 
| 18 | 
            +
                "S2TT (Speech to Text translation)",
         | 
| 19 | 
            +
                "T2ST (Text to Speech translation)",
         | 
| 20 | 
            +
                "T2TT (Text to Text translation)",
         | 
| 21 | 
            +
                "ASR (Automatic Speech Recognition)",
         | 
| 22 | 
            +
            ]
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            AUDIO_SAMPLE_RATE = 16000.0
         | 
| 25 | 
            +
            MAX_INPUT_AUDIO_LENGTH = 60  # in seconds
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         | 
| 28 | 
            +
            translator = Translator(
         | 
| 29 | 
            +
                model_name_or_card="multitask_unity_large",
         | 
| 30 | 
            +
                vocoder_name_or_card="vocoder_36langs",
         | 
| 31 | 
            +
                device=device,
         | 
| 32 | 
            +
                sample_rate=AUDIO_SAMPLE_RATE,
         | 
| 33 | 
            +
            )
         | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 36 | 
            +
            def predict(
         | 
| 37 | 
            +
                task_name: str,
         | 
| 38 | 
            +
                audio_source: str,
         | 
| 39 | 
            +
                input_audio_mic: str,
         | 
| 40 | 
            +
                input_audio_file: str,
         | 
| 41 | 
            +
                input_text: str,
         | 
| 42 | 
            +
                source_language: str,
         | 
| 43 | 
            +
                target_language: str,
         | 
| 44 | 
            +
            ) -> tuple[tuple[int, np.ndarray] | None, str]:
         | 
| 45 | 
            +
                task_name = task_name.split()[0]
         | 
| 46 | 
            +
                if task_name in ["S2ST", "S2TT", "ASR"]:
         | 
| 47 | 
            +
                    if audio_source == "microphone":
         | 
| 48 | 
            +
                        input_data = input_audio_mic
         | 
| 49 | 
            +
                    else:
         | 
| 50 | 
            +
                        input_data = input_audio_file
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    arr, org_sr = torchaudio.load(input_data)
         | 
| 53 | 
            +
                    new_arr = torchaudio.functional.resample(arr, orig_freq=org_sr, new_freq=AUDIO_SAMPLE_RATE)
         | 
| 54 | 
            +
                    max_length = int(MAX_INPUT_AUDIO_LENGTH * AUDIO_SAMPLE_RATE)
         | 
| 55 | 
            +
                    if new_arr.shape[1] > max_length:
         | 
| 56 | 
            +
                        new_arr = new_arr[:, :max_length]
         | 
| 57 | 
            +
                        gr.Warning(f"Input audio is too long. Only the first {MAX_INPUT_AUDIO_LENGTH} seconds is used.")
         | 
| 58 | 
            +
                    torchaudio.save(input_data, new_arr, sample_rate=int(AUDIO_SAMPLE_RATE))
         | 
| 59 | 
            +
                else:
         | 
| 60 | 
            +
                    input_data = input_text
         | 
| 61 | 
            +
                text_out, wav, sr = translator.predict(
         | 
| 62 | 
            +
                    input=input_data,
         | 
| 63 | 
            +
                    task_str=task_name,
         | 
| 64 | 
            +
                    tgt_lang=target_language,
         | 
| 65 | 
            +
                    src_lang=source_language,
         | 
| 66 | 
            +
                )
         | 
| 67 | 
            +
                if task_name in ["S2ST", "T2ST"]:
         | 
| 68 | 
            +
                    return (sr, wav.cpu().detach().numpy()), text_out
         | 
| 69 | 
            +
                else:
         | 
| 70 | 
            +
                    return None, text_out
         | 
| 71 | 
            +
             | 
| 72 | 
            +
             | 
| 73 | 
            +
            def update_audio_ui(audio_source: str) -> tuple[dict, dict]:
         | 
| 74 | 
            +
                mic = audio_source == "microphone"
         | 
| 75 | 
            +
                return (
         | 
| 76 | 
            +
                    gr.update(visible=mic, value=None),  # input_audio_mic
         | 
| 77 | 
            +
                    gr.update(visible=not mic, value=None),  # input_audio_file
         | 
| 78 | 
            +
                )
         | 
| 79 | 
            +
             | 
| 80 | 
            +
             | 
| 81 | 
            +
            def update_input_ui(task_name: str) -> tuple[dict, dict, dict, dict]:
         | 
| 82 | 
            +
                task_name = task_name.split()[0]
         | 
| 83 | 
            +
                if task_name in ["S2ST", "S2TT"]:
         | 
| 84 | 
            +
                    return (
         | 
| 85 | 
            +
                        gr.update(visible=True),  # audio_box
         | 
| 86 | 
            +
                        gr.update(visible=False),  # input_text
         | 
| 87 | 
            +
                        gr.update(visible=False),  # source_language
         | 
| 88 | 
            +
                        gr.update(visible=True),  # target_language
         | 
| 89 | 
            +
                    )
         | 
| 90 | 
            +
                elif task_name in ["T2ST", "T2TT"]:
         | 
| 91 | 
            +
                    return (
         | 
| 92 | 
            +
                        gr.update(visible=False),  # audio_box
         | 
| 93 | 
            +
                        gr.update(visible=True),  # input_text
         | 
| 94 | 
            +
                        gr.update(visible=True),  # source_language
         | 
| 95 | 
            +
                        gr.update(visible=True),  # target_language
         | 
| 96 | 
            +
                    )
         | 
| 97 | 
            +
                elif task_name == "ASR":
         | 
| 98 | 
            +
                    return (
         | 
| 99 | 
            +
                        gr.update(visible=True),  # audio_box
         | 
| 100 | 
            +
                        gr.update(visible=False),  # input_text
         | 
| 101 | 
            +
                        gr.update(visible=False),  # source_language
         | 
| 102 | 
            +
                        gr.update(visible=True),  # target_language
         | 
| 103 | 
            +
                    )
         | 
| 104 | 
            +
                else:
         | 
| 105 | 
            +
                    raise ValueError(f"Unknown task: {task_name}")
         | 
| 106 | 
            +
             | 
| 107 | 
            +
             | 
| 108 | 
            +
            def update_output_ui(task_name: str) -> tuple[dict, dict]:
         | 
| 109 | 
            +
                task_name = task_name.split()[0]
         | 
| 110 | 
            +
                if task_name in ["S2ST", "T2ST"]:
         | 
| 111 | 
            +
                    return (
         | 
| 112 | 
            +
                        gr.update(visible=True, value=None),  # output_audio
         | 
| 113 | 
            +
                        gr.update(value=None),  # output_text
         | 
| 114 | 
            +
                    )
         | 
| 115 | 
            +
                elif task_name in ["S2TT", "T2TT", "ASR"]:
         | 
| 116 | 
            +
                    return (
         | 
| 117 | 
            +
                        gr.update(visible=False, value=None),  # output_audio
         | 
| 118 | 
            +
                        gr.update(value=None),  # output_text
         | 
| 119 | 
            +
                    )
         | 
| 120 | 
            +
                else:
         | 
| 121 | 
            +
                    raise ValueError(f"Unknown task: {task_name}")
         | 
| 122 | 
            +
             | 
| 123 | 
            +
             | 
| 124 | 
            +
            with gr.Blocks(css="style.css") as demo:
         | 
| 125 | 
            +
                gr.Markdown(DESCRIPTION)
         | 
| 126 | 
            +
                gr.DuplicateButton(
         | 
| 127 | 
            +
                    value="Duplicate Space for private use",
         | 
| 128 | 
            +
                    elem_id="duplicate-button",
         | 
| 129 | 
            +
                    visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
         | 
| 130 | 
            +
                )
         | 
| 131 | 
            +
                with gr.Group():
         | 
| 132 | 
            +
                    task_name = gr.Dropdown(
         | 
| 133 | 
            +
                        label="Task",
         | 
| 134 | 
            +
                        choices=TASK_NAMES,
         | 
| 135 | 
            +
                        value=TASK_NAMES[0],
         | 
| 136 | 
            +
                    )
         | 
| 137 | 
            +
                    with gr.Row():
         | 
| 138 | 
            +
                        source_language = gr.Dropdown(
         | 
| 139 | 
            +
                            label="Source language",
         | 
| 140 | 
            +
                            choices=LANGUAGES,
         | 
| 141 | 
            +
                            value="eng",
         | 
| 142 | 
            +
                            visible=False,
         | 
| 143 | 
            +
                        )
         | 
| 144 | 
            +
                        target_language = gr.Dropdown(
         | 
| 145 | 
            +
                            label="Target language",
         | 
| 146 | 
            +
                            choices=LANGUAGES,
         | 
| 147 | 
            +
                            value="fra",
         | 
| 148 | 
            +
                        )
         | 
| 149 | 
            +
                    with gr.Row() as audio_box:
         | 
| 150 | 
            +
                        audio_source = gr.Radio(
         | 
| 151 | 
            +
                            label="Audio source",
         | 
| 152 | 
            +
                            choices=["file", "microphone"],
         | 
| 153 | 
            +
                            value="file",
         | 
| 154 | 
            +
                        )
         | 
| 155 | 
            +
                        input_audio_mic = gr.Audio(
         | 
| 156 | 
            +
                            label="Input speech",
         | 
| 157 | 
            +
                            type="filepath",
         | 
| 158 | 
            +
                            source="microphone",
         | 
| 159 | 
            +
                            visible=False,
         | 
| 160 | 
            +
                        )
         | 
| 161 | 
            +
                        input_audio_file = gr.Audio(
         | 
| 162 | 
            +
                            label="Input speech",
         | 
| 163 | 
            +
                            type="filepath",
         | 
| 164 | 
            +
                            source="upload",
         | 
| 165 | 
            +
                            visible=True,
         | 
| 166 | 
            +
                        )
         | 
| 167 | 
            +
                    input_text = gr.Textbox(label="Input text", visible=False)
         | 
| 168 | 
            +
                    btn = gr.Button("Translate")
         | 
| 169 | 
            +
                    with gr.Column():
         | 
| 170 | 
            +
                        output_audio = gr.Audio(
         | 
| 171 | 
            +
                            label="Translated speech",
         | 
| 172 | 
            +
                            autoplay=False,
         | 
| 173 | 
            +
                            streaming=False,
         | 
| 174 | 
            +
                            type="numpy",
         | 
| 175 | 
            +
                        )
         | 
| 176 | 
            +
                        output_text = gr.Textbox(label="Translated text")
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                audio_source.change(
         | 
| 179 | 
            +
                    fn=update_audio_ui,
         | 
| 180 | 
            +
                    inputs=audio_source,
         | 
| 181 | 
            +
                    outputs=[
         | 
| 182 | 
            +
                        input_audio_mic,
         | 
| 183 | 
            +
                        input_audio_file,
         | 
| 184 | 
            +
                    ],
         | 
| 185 | 
            +
                    queue=False,
         | 
| 186 | 
            +
                    api_name=False,
         | 
| 187 | 
            +
                )
         | 
| 188 | 
            +
                task_name.change(
         | 
| 189 | 
            +
                    fn=update_input_ui,
         | 
| 190 | 
            +
                    inputs=task_name,
         | 
| 191 | 
            +
                    outputs=[
         | 
| 192 | 
            +
                        audio_box,
         | 
| 193 | 
            +
                        input_text,
         | 
| 194 | 
            +
                        source_language,
         | 
| 195 | 
            +
                        target_language,
         | 
| 196 | 
            +
                    ],
         | 
| 197 | 
            +
                    queue=False,
         | 
| 198 | 
            +
                    api_name=False,
         | 
| 199 | 
            +
                ).then(
         | 
| 200 | 
            +
                    fn=update_output_ui,
         | 
| 201 | 
            +
                    inputs=task_name,
         | 
| 202 | 
            +
                    outputs=[output_audio, output_text],
         | 
| 203 | 
            +
                    queue=False,
         | 
| 204 | 
            +
                    api_name=False,
         | 
| 205 | 
            +
                )
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                btn.click(
         | 
| 208 | 
            +
                    fn=predict,
         | 
| 209 | 
            +
                    inputs=[
         | 
| 210 | 
            +
                        task_name,
         | 
| 211 | 
            +
                        audio_source,
         | 
| 212 | 
            +
                        input_audio_mic,
         | 
| 213 | 
            +
                        input_audio_file,
         | 
| 214 | 
            +
                        input_text,
         | 
| 215 | 
            +
                        source_language,
         | 
| 216 | 
            +
                        target_language,
         | 
| 217 | 
            +
                    ],
         | 
| 218 | 
            +
                    outputs=[output_audio, output_text],
         | 
| 219 | 
            +
                    api_name="run",
         | 
| 220 | 
            +
                )
         | 
| 221 | 
            +
            demo.queue(max_size=50).launch()
         | 
    	
        mlg_config.json
    ADDED
    
    | @@ -0,0 +1,186 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "multilingual": {
         | 
| 3 | 
            +
                    "arb": 0,
         | 
| 4 | 
            +
                    "ben": 1,
         | 
| 5 | 
            +
                    "cat": 2,
         | 
| 6 | 
            +
                    "ces": 3,
         | 
| 7 | 
            +
                    "cmn": 4,
         | 
| 8 | 
            +
                    "cym": 5,
         | 
| 9 | 
            +
                    "dan": 6,
         | 
| 10 | 
            +
                    "deu": 7,
         | 
| 11 | 
            +
                    "eng": 8,
         | 
| 12 | 
            +
                    "est": 9,
         | 
| 13 | 
            +
                    "fin": 10,
         | 
| 14 | 
            +
                    "fra": 11,
         | 
| 15 | 
            +
                    "hin": 12,
         | 
| 16 | 
            +
                    "ind": 13,
         | 
| 17 | 
            +
                    "ita": 14,
         | 
| 18 | 
            +
                    "jpn": 15,
         | 
| 19 | 
            +
                    "kor": 16,
         | 
| 20 | 
            +
                    "mlt": 17,
         | 
| 21 | 
            +
                    "nld": 18,
         | 
| 22 | 
            +
                    "pes": 19,
         | 
| 23 | 
            +
                    "pol": 20,
         | 
| 24 | 
            +
                    "por": 21,
         | 
| 25 | 
            +
                    "ron": 22,
         | 
| 26 | 
            +
                    "rus": 23,
         | 
| 27 | 
            +
                    "slk": 24,
         | 
| 28 | 
            +
                    "spa": 25,
         | 
| 29 | 
            +
                    "swe": 26,
         | 
| 30 | 
            +
                    "swh": 27,
         | 
| 31 | 
            +
                    "tel": 28,
         | 
| 32 | 
            +
                    "tgl": 29,
         | 
| 33 | 
            +
                    "tha": 30,
         | 
| 34 | 
            +
                    "tur": 31,
         | 
| 35 | 
            +
                    "ukr": 32,
         | 
| 36 | 
            +
                    "urd": 33,
         | 
| 37 | 
            +
                    "uzn": 34,
         | 
| 38 | 
            +
                    "vie": 35
         | 
| 39 | 
            +
                },
         | 
| 40 | 
            +
                "multispkr": {
         | 
| 41 | 
            +
                    "arb": [
         | 
| 42 | 
            +
                        0
         | 
| 43 | 
            +
                    ],
         | 
| 44 | 
            +
                    "ben": [
         | 
| 45 | 
            +
                        2,
         | 
| 46 | 
            +
                        1
         | 
| 47 | 
            +
                    ],
         | 
| 48 | 
            +
                    "cat": [
         | 
| 49 | 
            +
                        3
         | 
| 50 | 
            +
                    ],
         | 
| 51 | 
            +
                    "ces": [
         | 
| 52 | 
            +
                        4
         | 
| 53 | 
            +
                    ],
         | 
| 54 | 
            +
                    "cmn": [
         | 
| 55 | 
            +
                        5
         | 
| 56 | 
            +
                    ],
         | 
| 57 | 
            +
                    "cym": [
         | 
| 58 | 
            +
                        6
         | 
| 59 | 
            +
                    ],
         | 
| 60 | 
            +
                    "dan": [
         | 
| 61 | 
            +
                        7,
         | 
| 62 | 
            +
                        8
         | 
| 63 | 
            +
                    ],
         | 
| 64 | 
            +
                    "deu": [
         | 
| 65 | 
            +
                        9
         | 
| 66 | 
            +
                    ],
         | 
| 67 | 
            +
                    "eng": [
         | 
| 68 | 
            +
                        10
         | 
| 69 | 
            +
                    ],
         | 
| 70 | 
            +
                    "est": [
         | 
| 71 | 
            +
                        11,
         | 
| 72 | 
            +
                        12,
         | 
| 73 | 
            +
                        13
         | 
| 74 | 
            +
                    ],
         | 
| 75 | 
            +
                    "fin": [
         | 
| 76 | 
            +
                        14
         | 
| 77 | 
            +
                    ],
         | 
| 78 | 
            +
                    "fra": [
         | 
| 79 | 
            +
                        15
         | 
| 80 | 
            +
                    ],
         | 
| 81 | 
            +
                    "hin": [
         | 
| 82 | 
            +
                        16
         | 
| 83 | 
            +
                    ],
         | 
| 84 | 
            +
                    "ind": [
         | 
| 85 | 
            +
                        17,
         | 
| 86 | 
            +
                        24,
         | 
| 87 | 
            +
                        18,
         | 
| 88 | 
            +
                        20,
         | 
| 89 | 
            +
                        19,
         | 
| 90 | 
            +
                        21,
         | 
| 91 | 
            +
                        23,
         | 
| 92 | 
            +
                        27,
         | 
| 93 | 
            +
                        26,
         | 
| 94 | 
            +
                        22,
         | 
| 95 | 
            +
                        25
         | 
| 96 | 
            +
                    ],
         | 
| 97 | 
            +
                    "ita": [
         | 
| 98 | 
            +
                        29,
         | 
| 99 | 
            +
                        28
         | 
| 100 | 
            +
                    ],
         | 
| 101 | 
            +
                    "jpn": [
         | 
| 102 | 
            +
                        30
         | 
| 103 | 
            +
                    ],
         | 
| 104 | 
            +
                    "kor": [
         | 
| 105 | 
            +
                        31
         | 
| 106 | 
            +
                    ],
         | 
| 107 | 
            +
                    "mlt": [
         | 
| 108 | 
            +
                        32,
         | 
| 109 | 
            +
                        33,
         | 
| 110 | 
            +
                        34
         | 
| 111 | 
            +
                    ],
         | 
| 112 | 
            +
                    "nld": [
         | 
| 113 | 
            +
                        35
         | 
| 114 | 
            +
                    ],
         | 
| 115 | 
            +
                    "pes": [
         | 
| 116 | 
            +
                        36
         | 
| 117 | 
            +
                    ],
         | 
| 118 | 
            +
                    "pol": [
         | 
| 119 | 
            +
                        37
         | 
| 120 | 
            +
                    ],
         | 
| 121 | 
            +
                    "por": [
         | 
| 122 | 
            +
                        38
         | 
| 123 | 
            +
                    ],
         | 
| 124 | 
            +
                    "ron": [
         | 
| 125 | 
            +
                        39
         | 
| 126 | 
            +
                    ],
         | 
| 127 | 
            +
                    "rus": [
         | 
| 128 | 
            +
                        40
         | 
| 129 | 
            +
                    ],
         | 
| 130 | 
            +
                    "slk": [
         | 
| 131 | 
            +
                        41
         | 
| 132 | 
            +
                    ],
         | 
| 133 | 
            +
                    "spa": [
         | 
| 134 | 
            +
                        42
         | 
| 135 | 
            +
                    ],
         | 
| 136 | 
            +
                    "swe": [
         | 
| 137 | 
            +
                        43,
         | 
| 138 | 
            +
                        45,
         | 
| 139 | 
            +
                        44
         | 
| 140 | 
            +
                    ],
         | 
| 141 | 
            +
                    "swh": [
         | 
| 142 | 
            +
                        46,
         | 
| 143 | 
            +
                        48,
         | 
| 144 | 
            +
                        47
         | 
| 145 | 
            +
                    ],
         | 
| 146 | 
            +
                    "tel": [
         | 
| 147 | 
            +
                        49
         | 
| 148 | 
            +
                    ],
         | 
| 149 | 
            +
                    "tgl": [
         | 
| 150 | 
            +
                        50
         | 
| 151 | 
            +
                    ],
         | 
| 152 | 
            +
                    "tha": [
         | 
| 153 | 
            +
                        51,
         | 
| 154 | 
            +
                        54,
         | 
| 155 | 
            +
                        55,
         | 
| 156 | 
            +
                        52,
         | 
| 157 | 
            +
                        53
         | 
| 158 | 
            +
                    ],
         | 
| 159 | 
            +
                    "tur": [
         | 
| 160 | 
            +
                        58,
         | 
| 161 | 
            +
                        57,
         | 
| 162 | 
            +
                        56
         | 
| 163 | 
            +
                    ],
         | 
| 164 | 
            +
                    "ukr": [
         | 
| 165 | 
            +
                        59
         | 
| 166 | 
            +
                    ],
         | 
| 167 | 
            +
                    "urd": [
         | 
| 168 | 
            +
                        60,
         | 
| 169 | 
            +
                        61,
         | 
| 170 | 
            +
                        62
         | 
| 171 | 
            +
                    ],
         | 
| 172 | 
            +
                    "uzn": [
         | 
| 173 | 
            +
                        63,
         | 
| 174 | 
            +
                        64,
         | 
| 175 | 
            +
                        65
         | 
| 176 | 
            +
                    ],
         | 
| 177 | 
            +
                    "vie": [
         | 
| 178 | 
            +
                        66,
         | 
| 179 | 
            +
                        67,
         | 
| 180 | 
            +
                        70,
         | 
| 181 | 
            +
                        71,
         | 
| 182 | 
            +
                        68,
         | 
| 183 | 
            +
                        69
         | 
| 184 | 
            +
                    ]
         | 
| 185 | 
            +
                }
         | 
| 186 | 
            +
            }
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            gradio==3.40.1
         | 
| 2 | 
            +
            huggingface_hub==0.16.4
         | 
| 3 | 
            +
            torch==2.0.1
         | 
| 4 | 
            +
            torchaudio==2.0.2
         | 
    	
        style.css
    ADDED
    
    | @@ -0,0 +1,16 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            h1 {
         | 
| 2 | 
            +
              text-align: center;
         | 
| 3 | 
            +
            }
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            #duplicate-button {
         | 
| 6 | 
            +
              margin: auto;
         | 
| 7 | 
            +
              color: #fff;
         | 
| 8 | 
            +
              background: #1565c0;
         | 
| 9 | 
            +
              border-radius: 100vh;
         | 
| 10 | 
            +
            }
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            #component-0 {
         | 
| 13 | 
            +
              max-width: 730px;
         | 
| 14 | 
            +
              margin: auto;
         | 
| 15 | 
            +
              padding-top: 1.5rem;
         | 
| 16 | 
            +
            }
         | 
 
			
