Spaces:
Build error
Build error
Commit
·
074cf4f
1
Parent(s):
0eef9b6
small fixes
Browse files
app.py
CHANGED
|
@@ -19,6 +19,7 @@
|
|
| 19 |
# References:
|
| 20 |
# https://gradio.app/docs/#dropdown
|
| 21 |
|
|
|
|
| 22 |
import os
|
| 23 |
import time
|
| 24 |
from datetime import datetime
|
|
@@ -34,7 +35,7 @@ languages = sorted(language_to_models.keys())
|
|
| 34 |
def convert_to_wav(in_filename: str) -> str:
|
| 35 |
"""Convert the input audio file to a wave file"""
|
| 36 |
out_filename = in_filename + ".wav"
|
| 37 |
-
|
| 38 |
_ = os.system(f"ffmpeg -hide_banner -i '{in_filename}' '{out_filename}'")
|
| 39 |
return out_filename
|
| 40 |
|
|
@@ -46,23 +47,23 @@ def process(
|
|
| 46 |
decoding_method: str,
|
| 47 |
num_active_paths: int,
|
| 48 |
) -> str:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
filename = convert_to_wav(in_filename)
|
| 56 |
|
| 57 |
now = datetime.now()
|
| 58 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
| 59 |
-
|
| 60 |
|
| 61 |
start = time.time()
|
| 62 |
wave, wave_sample_rate = torchaudio.load(filename)
|
| 63 |
|
| 64 |
if wave_sample_rate != sample_rate:
|
| 65 |
-
|
| 66 |
f"Expected sample rate: {sample_rate}. Given: {wave_sample_rate}. "
|
| 67 |
f"Resampling to {sample_rate}."
|
| 68 |
)
|
|
@@ -86,22 +87,12 @@ def process(
|
|
| 86 |
duration = wave.shape[0] / sample_rate
|
| 87 |
rtf = (end - start) / duration
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
print(hyp)
|
| 94 |
|
| 95 |
-
|
| 96 |
-
<div class='result'>
|
| 97 |
-
<div class='result_item result_item_success'>
|
| 98 |
-
{hyp}
|
| 99 |
-
<br/>
|
| 100 |
-
</div>
|
| 101 |
-
</div>
|
| 102 |
-
"""
|
| 103 |
-
|
| 104 |
-
return html_output
|
| 105 |
|
| 106 |
|
| 107 |
title = "# Automatic Speech Recognition with Next-gen Kaldi"
|
|
@@ -125,16 +116,7 @@ def update_model_dropdown(language: str):
|
|
| 125 |
raise ValueError(f"Unsupported language: {language}")
|
| 126 |
|
| 127 |
|
| 128 |
-
|
| 129 |
-
# https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L112
|
| 130 |
-
demo = gr.Blocks(
|
| 131 |
-
css="""
|
| 132 |
-
.result {display:flex;flex-direction:column}
|
| 133 |
-
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
| 134 |
-
.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
|
| 135 |
-
.result_item_error {background-color:#ff7070;color:white;align-self:start}
|
| 136 |
-
""",
|
| 137 |
-
)
|
| 138 |
|
| 139 |
with demo:
|
| 140 |
gr.Markdown(title)
|
|
@@ -178,8 +160,8 @@ with demo:
|
|
| 178 |
optional=False,
|
| 179 |
label="Upload from disk",
|
| 180 |
)
|
| 181 |
-
uploaded_output = gr.HTML(label="Recognized speech from uploaded file")
|
| 182 |
upload_button = gr.Button("Submit for recognition")
|
|
|
|
| 183 |
|
| 184 |
with gr.TabItem("Record from microphone"):
|
| 185 |
microphone = gr.Audio(
|
|
@@ -190,7 +172,7 @@ with demo:
|
|
| 190 |
)
|
| 191 |
|
| 192 |
record_button = gr.Button("Submit for recognition")
|
| 193 |
-
recorded_output = gr.
|
| 194 |
|
| 195 |
upload_button.click(
|
| 196 |
process,
|
|
@@ -217,4 +199,8 @@ with demo:
|
|
| 217 |
gr.Markdown(description)
|
| 218 |
|
| 219 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
demo.launch()
|
|
|
|
| 19 |
# References:
|
| 20 |
# https://gradio.app/docs/#dropdown
|
| 21 |
|
| 22 |
+
import logging
|
| 23 |
import os
|
| 24 |
import time
|
| 25 |
from datetime import datetime
|
|
|
|
| 35 |
def convert_to_wav(in_filename: str) -> str:
|
| 36 |
"""Convert the input audio file to a wave file"""
|
| 37 |
out_filename = in_filename + ".wav"
|
| 38 |
+
logging.info(f"Converting '{in_filename}' to '{out_filename}'")
|
| 39 |
_ = os.system(f"ffmpeg -hide_banner -i '{in_filename}' '{out_filename}'")
|
| 40 |
return out_filename
|
| 41 |
|
|
|
|
| 47 |
decoding_method: str,
|
| 48 |
num_active_paths: int,
|
| 49 |
) -> str:
|
| 50 |
+
logging.info(f"in_filename: {in_filename}")
|
| 51 |
+
logging.info(f"language: {language}")
|
| 52 |
+
logging.info(f"repo_id: {repo_id}")
|
| 53 |
+
logging.info(f"decoding_method: {decoding_method}")
|
| 54 |
+
logging.info(f"num_active_paths: {num_active_paths}")
|
| 55 |
|
| 56 |
filename = convert_to_wav(in_filename)
|
| 57 |
|
| 58 |
now = datetime.now()
|
| 59 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
| 60 |
+
logging.info(f"Started at {date_time}")
|
| 61 |
|
| 62 |
start = time.time()
|
| 63 |
wave, wave_sample_rate = torchaudio.load(filename)
|
| 64 |
|
| 65 |
if wave_sample_rate != sample_rate:
|
| 66 |
+
logging.info(
|
| 67 |
f"Expected sample rate: {sample_rate}. Given: {wave_sample_rate}. "
|
| 68 |
f"Resampling to {sample_rate}."
|
| 69 |
)
|
|
|
|
| 87 |
duration = wave.shape[0] / sample_rate
|
| 88 |
rtf = (end - start) / duration
|
| 89 |
|
| 90 |
+
logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
|
| 91 |
+
logging.info(f"Duration {duration: .3f} s")
|
| 92 |
+
logging.info(f"RTF {rtf: .3f}")
|
| 93 |
+
logging.info(f"hyp:\n{hyp}")
|
|
|
|
| 94 |
|
| 95 |
+
return hyp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
title = "# Automatic Speech Recognition with Next-gen Kaldi"
|
|
|
|
| 116 |
raise ValueError(f"Unsupported language: {language}")
|
| 117 |
|
| 118 |
|
| 119 |
+
demo = gr.Blocks()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
with demo:
|
| 122 |
gr.Markdown(title)
|
|
|
|
| 160 |
optional=False,
|
| 161 |
label="Upload from disk",
|
| 162 |
)
|
|
|
|
| 163 |
upload_button = gr.Button("Submit for recognition")
|
| 164 |
+
uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
|
| 165 |
|
| 166 |
with gr.TabItem("Record from microphone"):
|
| 167 |
microphone = gr.Audio(
|
|
|
|
| 172 |
)
|
| 173 |
|
| 174 |
record_button = gr.Button("Submit for recognition")
|
| 175 |
+
recorded_output = gr.Textbox(label="Recognized speech from recordings")
|
| 176 |
|
| 177 |
upload_button.click(
|
| 178 |
process,
|
|
|
|
| 199 |
gr.Markdown(description)
|
| 200 |
|
| 201 |
if __name__ == "__main__":
|
| 202 |
+
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
| 203 |
+
|
| 204 |
+
logging.basicConfig(format=formatter, level=logging.INFO)
|
| 205 |
+
|
| 206 |
demo.launch()
|