Spaces:

Fabriwin
/

Convx

Build error

App Files Files Community

Fabriwin commited on Jan 3

Commit

3175dca

verified ·

1 Parent(s): 24e657d

Upload 3 files

Browse files

Files changed (3) hide show

README.md +14 -14
app.py +73 -0
requirements.txt +10 -0

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
----
-title: Convx
-emoji: 🦀
-colorFrom: purple
-colorTo: red
-sdk: gradio
-sdk_version: 5.9.1
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: Conversational smal mixture modal
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Convx
+emoji: 🦀
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.9.1
+app_file: app.py
+pinned: false
+license: apache-2.0
+short_description: Conversational smal mixture modal
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import torch
+from transformers import pipeline
+import time
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# Define the models using pipeline
+asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small", chunk_length_s=30)
+text_pipe = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-360M", max_length=512, temperature=0.7, top_p=0.9)
+tts_pipe = pipeline("text-to-speech", model="mussacharles60/swahili-tts-female-voice")
+# Define conversation rules
+MAX_INPUT_SIZE = 100
+PREDEFINED_ATTRIBUTES = ["name", "age", "location"]
+CONTEXT_HISTORY = []
+# Define the function to recognize speech
+def recognize_speech(audio):
+    retries = 3
+    for _ in range(retries):
+        try:
+            result = asr_pipe(audio, return_timestamps=True)
+            return result['text']
+        except Exception as e:
+            logging.error(f"ASR failed: {e}")
+            time.sleep(1)
+    return ""
+# Define the function to generate text
+def generate_text(prompt):
+    global CONTEXT_HISTORY
+    CONTEXT_HISTORY.append(prompt)
+    if len(CONTEXT_HISTORY) > 5:
+        CONTEXT_HISTORY.pop(0)
+    context = " ".join(CONTEXT_HISTORY)
+    outputs = text_pipe(context, max_length=512, num_return_sequences=1)
+    generated_text = outputs[0]['generated_text']
+    return generated_text
+# Define the function to synthesize speech
+def synthesize_speech(text):
+    audio = tts_pipe(text, output_format="wav", sample_rate=16000)
+    return audio
+# Define the function to handle conversation
+def handle_conversation(audio):
+    recognized_text = recognize_speech(audio)
+    if any(attr in recognized_text.lower() for attr in PREDEFINED_ATTRIBUTES):
+        generated_text = generate_text(f"Please provide your {recognized_text}")
+    else:
+        generated_text = generate_text(recognized_text)
+    synthesized_audio = synthesize_speech(generated_text)
+    return synthesized_audio, generated_text
+# Define the Gradio app
+demo = gr.Blocks()
+# Define the input and output components
+input_audio = gr.Audio(label="Input Audio")
+output_audio = gr.Audio(label="Output Audio")
+output_text = gr.Textbox(label="Output Text")
+# Define the buttons
+conversation_button = gr.Button("Start Conversation")
+# Define the event listeners
+conversation_button.click(handle_conversation, inputs=input_audio, outputs=[output_audio, output_text])
+# Launch the app
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+datasets[audio]
+transformers==4.40.1
+torchaudio
+accelerate
+evaluate
+jiwer
+tensorboard
+gradio
+spaces
+logging