Spaces:

akhaliq
/

FastVLM-7B

Runtime error

App Files Files Community

akhaliq HF Staff commited on Sep 2

Commit

9b41a1c

verified ·

1 Parent(s): 9db4084

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -11

app.py CHANGED Viewed

@@ -100,7 +100,7 @@ def caption_frame(image: Image.Image, prompt: str) -> str:
             inputs=input_ids,
             attention_mask=attention_mask,
             images=px,
-            max_new_tokens=256,
             temperature=0.7,
             do_sample=True,
         )
@@ -211,27 +211,55 @@ with gr.Blocks() as demo:
         outputs=[video_display, chatbot]
     )
-    # Modified process function to update chatbot
-    def process_video_with_chat(video_path, num_frames, sampling_method, caption_mode, custom_prompt, chat_history):
         if not video_path:
             chat_history.append(["Assistant", "Please upload a video first."])
-            return chat_history, None
         chat_history.append(["User", "Analyzing video..."])
-        # Call the original process_video function
-        result, frames = process_video(video_path, num_frames, sampling_method, caption_mode, custom_prompt)
-        # Add result to chat
-        chat_history.append(["Assistant", result])
-        return chat_history, frames
-    # Process button
     process_btn.click(
         process_video_with_chat,
         inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt, chatbot],
-        outputs=[chatbot, frame_gallery]
     )
     demo.launch()

             inputs=input_ids,
             attention_mask=attention_mask,
             images=px,
+            max_new_tokens=15,
             temperature=0.7,
             do_sample=True,
         )
         outputs=[video_display, chatbot]
     )
+    # Modified process function to update chatbot with streaming
+    def process_video_with_chat(video_path, num_frames, sampling_method, caption_mode, custom_prompt, chat_history, progress=gr.Progress()):
         if not video_path:
             chat_history.append(["Assistant", "Please upload a video first."])
+            yield chat_history, None
+            return
         chat_history.append(["User", "Analyzing video..."])
+        yield chat_history, None
+        # Extract frames
+        progress(0, desc="Extracting frames...")
+        frames = extract_frames(video_path, num_frames, sampling_method)
+        if not frames:
+            chat_history.append(["Assistant", "Failed to extract frames from video."])
+            yield chat_history, None
+            return
+        # Start streaming response
+        chat_history.append(["Assistant", ""])
+        prompt = "Provide a brief one-sentence description of what's happening in this image."
+        captions = []
+        for i, frame in enumerate(frames):
+            progress((i + 1) / (len(frames) + 1), desc=f"Analyzing frame {i + 1}/{len(frames)}...")
+            caption = caption_frame(frame, prompt)
+            frame_caption = f"Frame {i + 1}: {caption}\n"
+            captions.append(frame_caption)
+            # Update the last message with accumulated captions
+            current_text = "".join(captions)
+            chat_history[-1] = ["Assistant", f"Analyzing {len(frames)} frames:\n\n{current_text}"]
+            yield chat_history, frames[:i+1]  # Also update frame gallery progressively
+        progress(1.0, desc="Analysis complete!")
+        # Final update with complete message
+        full_caption = "".join(captions)
+        final_message = f"Analyzed {len(frames)} frames:\n\n{full_caption}"
+        chat_history[-1] = ["Assistant", final_message]
+        yield chat_history, frames
+    # Process button with streaming
     process_btn.click(
         process_video_with_chat,
         inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt, chatbot],
+        outputs=[chatbot, frame_gallery],
+        show_progress=True
     )
     demo.launch()