Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
|
| 5 |
clasification = pipeline(
|
| 6 |
"audio-classification",
|
|
@@ -117,20 +124,33 @@ lang_id = {
|
|
| 117 |
"Chinese": "zh",
|
| 118 |
"Zulu": "zu",
|
| 119 |
}
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
demo = gr.Blocks()
|
| 122 |
|
| 123 |
with demo:
|
| 124 |
gr.Markdown("Speech analyzer")
|
| 125 |
audio = gr.Audio(type="filepath", label = "Upload a file")
|
|
|
|
| 126 |
text = gr.Textbox()
|
| 127 |
source_lang = gr.Dropdown(label="Source lang", choices=list(lang_id.keys()))
|
|
|
|
|
|
|
| 128 |
#gr.Examples(examples = list(lang_id.keys()),
|
| 129 |
# inputs=[
|
| 130 |
# source_lang])
|
| 131 |
b1 = gr.Button("convert to text")
|
| 132 |
-
|
| 133 |
-
|
| 134 |
b1.click(audio_a_text, inputs=audio, outputs=text)
|
| 135 |
|
| 136 |
b2 = gr.Button("Classification of speech")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
|
| 4 |
+
pretrained_model: str = "facebook/m2m100_1.2B"
|
| 5 |
+
cache_dir: str = "models/"
|
| 6 |
+
|
| 7 |
+
tokenizer = M2M100Tokenizer.from_pretrained(pretrained_model, cache_dir=cache_dir)
|
| 8 |
+
translation_model = M2M100ForConditionalGeneration.from_pretrained(
|
| 9 |
+
pretrained_model, cache_dir=cache_dir)
|
| 10 |
+
|
| 11 |
transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
|
| 12 |
clasification = pipeline(
|
| 13 |
"audio-classification",
|
|
|
|
| 124 |
"Chinese": "zh",
|
| 125 |
"Zulu": "zu",
|
| 126 |
}
|
| 127 |
+
def translation(source_lang, target_lang):
|
| 128 |
+
src_lang = lang_id[source_lang]
|
| 129 |
+
trg_lang = lang_id[target_lang]
|
| 130 |
+
tokenizer.src_lang = src_lang
|
| 131 |
+
with torch.no_grad():
|
| 132 |
+
encoded_input = tokenizer(user_input, return_tensors="pt").to(device)
|
| 133 |
+
generated_tokens = model.generate(
|
| 134 |
+
**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(trg_lang))
|
| 135 |
+
translated_text = tokenizer.batch_decode(
|
| 136 |
+
generated_tokens, skip_special_tokens=True)[0]
|
| 137 |
+
return translated_text
|
| 138 |
demo = gr.Blocks()
|
| 139 |
|
| 140 |
with demo:
|
| 141 |
gr.Markdown("Speech analyzer")
|
| 142 |
audio = gr.Audio(type="filepath", label = "Upload a file")
|
| 143 |
+
text0 = gr.Textbox()
|
| 144 |
text = gr.Textbox()
|
| 145 |
source_lang = gr.Dropdown(label="Source lang", choices=list(lang_id.keys()))
|
| 146 |
+
target_lang = gr.Dropdown(label="target lang", choices=list(lang_id.keys()))
|
| 147 |
+
|
| 148 |
#gr.Examples(examples = list(lang_id.keys()),
|
| 149 |
# inputs=[
|
| 150 |
# source_lang])
|
| 151 |
b1 = gr.Button("convert to text")
|
| 152 |
+
b3 = gr.Button("translate")
|
| 153 |
+
b3.Dropdown(translation, input = text0, output = text)
|
| 154 |
b1.click(audio_a_text, inputs=audio, outputs=text)
|
| 155 |
|
| 156 |
b2 = gr.Button("Classification of speech")
|