Spaces:

WillHeld
/

diva-audio-chat

Paused

Helw150 commited on Oct 15, 2024

Commit

5d37f08

1 Parent(s): 4898006

No CUDA?

Files changed (2) hide show

README.md CHANGED Viewed

@@ -4,10 +4,10 @@ emoji: 💬
 colorFrom: gray
 colorTo: red
 sdk: gradio
-sdk_version: 5.0.2
 app_file: app.py
 pinned: false
 license: mpl-2.0
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 colorFrom: gray
 colorTo: red
 sdk: gradio
+sdk_version: 5.1.0
 app_file: app.py
 pinned: false
 license: mpl-2.0
 ---
+An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

app.py CHANGED Viewed

@@ -72,20 +72,36 @@ def response(state: AppState, audio: tuple):
     state.conversation.append(
         {"role": "user", "content": {"path": file_name, "mime_type": "audio/wav"}}
     )
     start = False
     for resp, outs in diva_audio(
-        (state.sampling_rate, state.stream), prev_outs=state.model_outs
     ):
-        print(resp)
         if not start:
             state.conversation.append({"role": "assistant", "content": resp})
             start = True
         else:
             state.conversation[-1]["content"] = resp
-        yield state, state.conversation
-    yield AppState(conversation=state.conversation, model_outs=outs), state.conversation
 def start_recording_user(state: AppState):

     state.conversation.append(
         {"role": "user", "content": {"path": file_name, "mime_type": "audio/wav"}}
     )
+    if spaces.config.Config.zero_gpu:
+        if state.model_outs is not None:
+            state.model_outs.past_key_values = tuple(
+                tuple(vec.cuda() for vec in tup)
+                for tup in state.model_outs.past_key_values
+            )
+    prev_outs = state.model_outs
     start = False
     for resp, outs in diva_audio(
+        (state.sampling_rate, state.stream),
+        prev_outs=(prev_out if state.model_outs is not None is not None else None),
     ):
         if not start:
             state.conversation.append({"role": "assistant", "content": resp})
             start = True
         else:
             state.conversation[-1]["content"] = resp
+        # yield state, state.conversation
+    del outs.logits
+    del outs.hidden_states
+    if spaces.config.Config.zero_gpu:
+        outs.past_key_values = tuple(
+            tuple(vec.cpu() for vec in tup) for tup in outs.past_key_values
+        )
+        print(outs)
+    return (
+        AppState(conversation=state.conversation, model_outs=outs),
+        state.conversation,
+    )
 def start_recording_user(state: AppState):