tchvc

Runtime error

App Files Files Community

yaya-sy commited on Sep 2

Commit

b34bbf6

verified ·

1 Parent(s): 01b6957

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -27

app.py CHANGED Viewed

@@ -189,20 +189,66 @@ def model_inference(input_dict, history):
     audio_path = tts(buffer)
     return audio_path  # Return the audio file path
-# Option 1: Use regular Interface with streaming (recommended)
 with gr.Blocks() as demo:
     gr.Markdown("# oolel-vision-experimental `@video-infer for video understanding`")
-    chatbot = gr.Chatbot()
-    msg = gr.MultimodalTextbox(
-        label="Query Input",
-        file_types=["image", "video"],
-        file_count="multiple"
-    )
-    audio_output = gr.Audio(label="Generated Speech")
-    clear = gr.Button("Clear")
     def respond(message, chat_history):
         # Add user message to chat history
         bot_message = ""
         chat_history.append([message["text"], ""])
@@ -211,36 +257,25 @@ with gr.Blocks() as demo:
         for response in model_inference(message, chat_history):
             bot_message = response
             chat_history[-1][1] = bot_message
-            yield "", chat_history, None
         # Generate audio after streaming is complete
         try:
             if bot_message.strip():  # Only generate TTS if there's actual text
                 audio_path = tts(bot_message)
                 if audio_path:
-                    yield "", chat_history, audio_path
                 else:
                     print("TTS returned None or empty result")
-                    yield "", chat_history, None
             else:
-                yield "", chat_history, None
         except Exception as e:
             print(f"TTS Error: {e}")
-            yield "", chat_history, None
-    msg.submit(respond, [msg, chatbot], [msg, chatbot, audio_output])
-    clear.click(lambda: ([], None), outputs=[chatbot, audio_output])
-# Option 2: Use ChatInterface without outputs parameter (simpler but no audio)
-# demo = gr.ChatInterface(
-#     fn=model_inference,
-#     description="# oolel-vision-experimental `@video-infer for video understanding`**",
-#     fill_height=True,
-#     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),
-#     stop_btn="Stop Generation",
-#     multimodal=True,
-#     cache_examples=False,
-# )
 if __name__ == "__main__":
     demo.launch(debug=True)

     audio_path = tts(buffer)
     return audio_path  # Return the audio file path
+# Main interface with image preview
 with gr.Blocks() as demo:
     gr.Markdown("# oolel-vision-experimental `@video-infer for video understanding`")
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(type="messages")
+            msg = gr.MultimodalTextbox(
+                label="Query Input",
+                file_types=["image", "video"],
+                file_count="multiple"
+            )
+            clear = gr.Button("Clear")
+        with gr.Column(scale=1):
+            uploaded_files = gr.Gallery(
+                label="Uploaded Files",
+                show_label=True,
+                elem_id="gallery",
+                columns=2,
+                rows=2,
+                object_fit="contain",
+                height="auto"
+            )
+            audio_output = gr.Audio(label="Generated Speech")
+    def update_gallery(message):
+        """Update gallery with uploaded files"""
+        if message and "files" in message and message["files"]:
+            # Filter for image files only (videos won't display properly in gallery)
+            image_files = []
+            for file_path in message["files"]:
+                try:
+                    # Check if it's an image by trying to open it
+                    with Image.open(file_path) as img:
+                        image_files.append(file_path)
+                except:
+                    # If it fails, it's probably a video or other file type
+                    # Generate video thumbnail for videos
+                    try:
+                        vidcap = cv2.VideoCapture(file_path)
+                        success, frame = vidcap.read()
+                        if success:
+                            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                            thumbnail = Image.fromarray(frame)
+                            # Save thumbnail temporarily
+                            import tempfile
+                            temp_thumb = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
+                            thumbnail.save(temp_thumb.name)
+                            image_files.append(temp_thumb.name)
+                        vidcap.release()
+                    except:
+                        pass
+            return image_files
+        return []
     def respond(message, chat_history):
+        # Update gallery first
+        gallery_files = update_gallery(message)
         # Add user message to chat history
         bot_message = ""
         chat_history.append([message["text"], ""])
         for response in model_inference(message, chat_history):
             bot_message = response
             chat_history[-1][1] = bot_message
+            yield "", chat_history, None, gallery_files
         # Generate audio after streaming is complete
         try:
             if bot_message.strip():  # Only generate TTS if there's actual text
                 audio_path = tts(bot_message)
                 if audio_path:
+                    yield "", chat_history, audio_path, gallery_files
                 else:
                     print("TTS returned None or empty result")
+                    yield "", chat_history, None, gallery_files
             else:
+                yield "", chat_history, None, gallery_files
         except Exception as e:
             print(f"TTS Error: {e}")
+            yield "", chat_history, None, gallery_files
+    msg.submit(respond, [msg, chatbot], [msg, chatbot, audio_output, uploaded_files])
+    clear.click(lambda: ([], None, []), outputs=[chatbot, audio_output, uploaded_files])
 if __name__ == "__main__":
     demo.launch(debug=True)