Spaces:
Runtime error
Runtime error
“siddhu001”
commited on
Commit
·
71fd664
1
Parent(s):
6ebbb2b
Add input text box
Browse files
app.py
CHANGED
|
@@ -283,6 +283,7 @@ def start_warmup():
|
|
| 283 |
except Exception:
|
| 284 |
print("Removing " + opt + " from ASR options since it cannot be loaded.")
|
| 285 |
ASR_options = ASR_options[:opt_count] + ASR_options[(opt_count + 1) :]
|
|
|
|
| 286 |
if opt == ASR_name:
|
| 287 |
ASR_name = ASR_options[0]
|
| 288 |
for opt_count in range(len(LLM_options)):
|
|
@@ -345,6 +346,7 @@ def transcribe(
|
|
| 345 |
ASR_option: str,
|
| 346 |
LLM_option: str,
|
| 347 |
type_option: str,
|
|
|
|
| 348 |
):
|
| 349 |
"""
|
| 350 |
Processes and transcribes an audio stream in real-time.
|
|
@@ -420,6 +422,15 @@ def transcribe(
|
|
| 420 |
audio_output1 = None
|
| 421 |
else:
|
| 422 |
stream = np.concatenate((stream, y))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
(
|
| 424 |
asr_output_str,
|
| 425 |
text_str,
|
|
@@ -512,6 +523,13 @@ with gr.Blocks(
|
|
| 512 |
(https://github.com/siddhu001/espnet/tree/sds_demo_recipe/egs2/TEMPLATE/sds1#how-to-use).
|
| 513 |
"""
|
| 514 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
with gr.Row():
|
| 516 |
with gr.Column(scale=1):
|
| 517 |
user_audio = gr.Audio(
|
|
@@ -519,6 +537,12 @@ with gr.Blocks(
|
|
| 519 |
streaming=True,
|
| 520 |
waveform_options=gr.WaveformOptions(sample_rate=16000),
|
| 521 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
with gr.Row():
|
| 523 |
type_radio = gr.Radio(
|
| 524 |
choices=["Cascaded", "E2E"],
|
|
@@ -686,7 +710,7 @@ with gr.Blocks(
|
|
| 686 |
)
|
| 687 |
user_audio.stream(
|
| 688 |
transcribe,
|
| 689 |
-
inputs=[state, user_audio, radio, ASR_radio, LLM_radio, type_radio],
|
| 690 |
outputs=[state, output_asr_text, output_text, output_audio, output_audio1],
|
| 691 |
).then(
|
| 692 |
lambda *args: callback.flag(list(args)), [user_audio], None, preprocess=False
|
|
|
|
| 283 |
except Exception:
|
| 284 |
print("Removing " + opt + " from ASR options since it cannot be loaded.")
|
| 285 |
ASR_options = ASR_options[:opt_count] + ASR_options[(opt_count + 1) :]
|
| 286 |
+
opt_count -=1
|
| 287 |
if opt == ASR_name:
|
| 288 |
ASR_name = ASR_options[0]
|
| 289 |
for opt_count in range(len(LLM_options)):
|
|
|
|
| 346 |
ASR_option: str,
|
| 347 |
LLM_option: str,
|
| 348 |
type_option: str,
|
| 349 |
+
input_text: str,
|
| 350 |
):
|
| 351 |
"""
|
| 352 |
Processes and transcribes an audio stream in real-time.
|
|
|
|
| 422 |
audio_output1 = None
|
| 423 |
else:
|
| 424 |
stream = np.concatenate((stream, y))
|
| 425 |
+
# import pdb;pdb.set_trace()
|
| 426 |
+
dialogue_model.chat.init_chat(
|
| 427 |
+
{
|
| 428 |
+
"role": "system",
|
| 429 |
+
"content": (
|
| 430 |
+
input_text
|
| 431 |
+
),
|
| 432 |
+
}
|
| 433 |
+
)
|
| 434 |
(
|
| 435 |
asr_output_str,
|
| 436 |
text_str,
|
|
|
|
| 523 |
(https://github.com/siddhu001/espnet/tree/sds_demo_recipe/egs2/TEMPLATE/sds1#how-to-use).
|
| 524 |
"""
|
| 525 |
)
|
| 526 |
+
default_instruct=(
|
| 527 |
+
"You are a helpful and friendly AI "
|
| 528 |
+
"assistant. "
|
| 529 |
+
"You are polite, respectful, and aim to "
|
| 530 |
+
"provide concise and complete responses of "
|
| 531 |
+
"less than 15 words."
|
| 532 |
+
)
|
| 533 |
with gr.Row():
|
| 534 |
with gr.Column(scale=1):
|
| 535 |
user_audio = gr.Audio(
|
|
|
|
| 537 |
streaming=True,
|
| 538 |
waveform_options=gr.WaveformOptions(sample_rate=16000),
|
| 539 |
)
|
| 540 |
+
input_text=gr.Textbox(
|
| 541 |
+
label="LLM prompt",
|
| 542 |
+
visible=True,
|
| 543 |
+
interactive=True,
|
| 544 |
+
value=default_instruct
|
| 545 |
+
)
|
| 546 |
with gr.Row():
|
| 547 |
type_radio = gr.Radio(
|
| 548 |
choices=["Cascaded", "E2E"],
|
|
|
|
| 710 |
)
|
| 711 |
user_audio.stream(
|
| 712 |
transcribe,
|
| 713 |
+
inputs=[state, user_audio, radio, ASR_radio, LLM_radio, type_radio, input_text],
|
| 714 |
outputs=[state, output_asr_text, output_text, output_audio, output_audio1],
|
| 715 |
).then(
|
| 716 |
lambda *args: callback.flag(list(args)), [user_audio], None, preprocess=False
|