Spaces:
Runtime error
Runtime error
Zeimoto
commited on
Commit
·
c438acc
1
Parent(s):
0536b78
added translation task in whisper model
Browse files- app.py +19 -18
- speech2text.py +19 -1
app.py
CHANGED
|
@@ -2,8 +2,8 @@ import streamlit as st
|
|
| 2 |
from st_audiorec import st_audiorec
|
| 3 |
|
| 4 |
from nameder import init_model_ner, get_entity_labels
|
| 5 |
-
from speech2text import init_model_trans, transcribe
|
| 6 |
-
from translation import get_translation
|
| 7 |
from resources import audit_elapsedtime, set_start
|
| 8 |
import subprocess
|
| 9 |
|
|
@@ -12,25 +12,26 @@ def main ():
|
|
| 12 |
print(f"Running main")
|
| 13 |
|
| 14 |
#print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
|
| 15 |
-
text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
|
| 16 |
-
st.write(text)
|
| 17 |
-
traducao = get_translation(text_to_translate=text, languageCode="pt")
|
| 18 |
-
st.write(traducao)
|
| 19 |
-
|
| 20 |
# ner = init_model_ner() #async
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
# if text is not None and ner is not None:
|
| 36 |
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
|
|
|
| 2 |
from st_audiorec import st_audiorec
|
| 3 |
|
| 4 |
from nameder import init_model_ner, get_entity_labels
|
| 5 |
+
from speech2text import init_model_trans, transcribe, translate
|
| 6 |
+
# from translation import get_translation
|
| 7 |
from resources import audit_elapsedtime, set_start
|
| 8 |
import subprocess
|
| 9 |
|
|
|
|
| 12 |
print(f"Running main")
|
| 13 |
|
| 14 |
#print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
|
| 15 |
+
# text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
|
| 16 |
+
# st.write(text)
|
| 17 |
+
# traducao = get_translation(text_to_translate=text, languageCode="pt")
|
| 18 |
+
# st.write(traducao)
|
| 19 |
+
s2t = init_model_trans()
|
| 20 |
# ner = init_model_ner() #async
|
| 21 |
|
| 22 |
+
print("Rendering UI...")
|
| 23 |
+
start_render = set_start()
|
| 24 |
+
wav_audio_data = st_audiorec()
|
| 25 |
+
audit_elapsedtime(function="Rendering UI", start=start_render)
|
| 26 |
|
| 27 |
+
if wav_audio_data is not None and s2t is not None:
|
| 28 |
+
print("Loading data...")
|
| 29 |
+
start_loading = set_start()
|
| 30 |
+
st.audio(wav_audio_data, format='audio/wav')
|
| 31 |
+
original = transcribe(wav_audio_data, s2t)
|
| 32 |
+
print("translating audio...")
|
| 33 |
+
translation = translate(original)
|
| 34 |
+
st.write(f"Original: {original}/nTranscription: {translation}")
|
| 35 |
|
| 36 |
# if text is not None and ner is not None:
|
| 37 |
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
speech2text.py
CHANGED
|
@@ -14,7 +14,7 @@ def init_model_trans ():
|
|
| 14 |
model_id = "openai/whisper-large-v3"
|
| 15 |
|
| 16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 17 |
-
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=
|
| 18 |
)
|
| 19 |
model.to(device)
|
| 20 |
|
|
@@ -41,9 +41,27 @@ def transcribe (audio_sample: bytes, pipe) -> str:
|
|
| 41 |
start = set_start()
|
| 42 |
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
| 43 |
# sample = dataset[0]["audio"]
|
|
|
|
|
|
|
| 44 |
result = pipe(audio_sample)
|
|
|
|
| 45 |
audit_elapsedtime(function="Transcription", start=start)
|
| 46 |
print("transcription result",result)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
#st.write('trancription: ', result["text"])
|
| 49 |
return result["text"]
|
|
|
|
| 14 |
model_id = "openai/whisper-large-v3"
|
| 15 |
|
| 16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 17 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
| 18 |
)
|
| 19 |
model.to(device)
|
| 20 |
|
|
|
|
| 41 |
start = set_start()
|
| 42 |
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
| 43 |
# sample = dataset[0]["audio"]
|
| 44 |
+
|
| 45 |
+
#result = pipe(audio_sample)
|
| 46 |
result = pipe(audio_sample)
|
| 47 |
+
|
| 48 |
audit_elapsedtime(function="Transcription", start=start)
|
| 49 |
print("transcription result",result)
|
| 50 |
|
| 51 |
+
#st.write('trancription: ', result["text"])
|
| 52 |
+
return result["text"]
|
| 53 |
+
|
| 54 |
+
def translate (audio_sample: bytes, pipe) -> str:
|
| 55 |
+
print("Initiating Translation...")
|
| 56 |
+
start = set_start()
|
| 57 |
+
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
| 58 |
+
# sample = dataset[0]["audio"]
|
| 59 |
+
|
| 60 |
+
#result = pipe(audio_sample)
|
| 61 |
+
result = pipe(audio_sample, generate_kwargs={"task": "translate"})
|
| 62 |
+
|
| 63 |
+
audit_elapsedtime(function="Translation", start=start)
|
| 64 |
+
print("Translation result",result)
|
| 65 |
+
|
| 66 |
#st.write('trancription: ', result["text"])
|
| 67 |
return result["text"]
|