Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -100,7 +100,7 @@ def caption_frame(image: Image.Image, prompt: str) -> str:
|
|
| 100 |
inputs=input_ids,
|
| 101 |
attention_mask=attention_mask,
|
| 102 |
images=px,
|
| 103 |
-
max_new_tokens=
|
| 104 |
temperature=0.7,
|
| 105 |
do_sample=True,
|
| 106 |
)
|
|
@@ -211,27 +211,55 @@ with gr.Blocks() as demo:
|
|
| 211 |
outputs=[video_display, chatbot]
|
| 212 |
)
|
| 213 |
|
| 214 |
-
# Modified process function to update chatbot
|
| 215 |
-
def process_video_with_chat(video_path, num_frames, sampling_method, caption_mode, custom_prompt, chat_history):
|
| 216 |
if not video_path:
|
| 217 |
chat_history.append(["Assistant", "Please upload a video first."])
|
| 218 |
-
|
|
|
|
| 219 |
|
| 220 |
chat_history.append(["User", "Analyzing video..."])
|
|
|
|
| 221 |
|
| 222 |
-
#
|
| 223 |
-
|
|
|
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
| 227 |
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
# Process button
|
| 231 |
process_btn.click(
|
| 232 |
process_video_with_chat,
|
| 233 |
inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt, chatbot],
|
| 234 |
-
outputs=[chatbot, frame_gallery]
|
|
|
|
| 235 |
)
|
| 236 |
|
| 237 |
demo.launch()
|
|
|
|
| 100 |
inputs=input_ids,
|
| 101 |
attention_mask=attention_mask,
|
| 102 |
images=px,
|
| 103 |
+
max_new_tokens=15,
|
| 104 |
temperature=0.7,
|
| 105 |
do_sample=True,
|
| 106 |
)
|
|
|
|
| 211 |
outputs=[video_display, chatbot]
|
| 212 |
)
|
| 213 |
|
| 214 |
+
# Modified process function to update chatbot with streaming
|
| 215 |
+
def process_video_with_chat(video_path, num_frames, sampling_method, caption_mode, custom_prompt, chat_history, progress=gr.Progress()):
|
| 216 |
if not video_path:
|
| 217 |
chat_history.append(["Assistant", "Please upload a video first."])
|
| 218 |
+
yield chat_history, None
|
| 219 |
+
return
|
| 220 |
|
| 221 |
chat_history.append(["User", "Analyzing video..."])
|
| 222 |
+
yield chat_history, None
|
| 223 |
|
| 224 |
+
# Extract frames
|
| 225 |
+
progress(0, desc="Extracting frames...")
|
| 226 |
+
frames = extract_frames(video_path, num_frames, sampling_method)
|
| 227 |
|
| 228 |
+
if not frames:
|
| 229 |
+
chat_history.append(["Assistant", "Failed to extract frames from video."])
|
| 230 |
+
yield chat_history, None
|
| 231 |
+
return
|
| 232 |
|
| 233 |
+
# Start streaming response
|
| 234 |
+
chat_history.append(["Assistant", ""])
|
| 235 |
+
prompt = "Provide a brief one-sentence description of what's happening in this image."
|
| 236 |
+
|
| 237 |
+
captions = []
|
| 238 |
+
for i, frame in enumerate(frames):
|
| 239 |
+
progress((i + 1) / (len(frames) + 1), desc=f"Analyzing frame {i + 1}/{len(frames)}...")
|
| 240 |
+
caption = caption_frame(frame, prompt)
|
| 241 |
+
frame_caption = f"Frame {i + 1}: {caption}\n"
|
| 242 |
+
captions.append(frame_caption)
|
| 243 |
+
|
| 244 |
+
# Update the last message with accumulated captions
|
| 245 |
+
current_text = "".join(captions)
|
| 246 |
+
chat_history[-1] = ["Assistant", f"Analyzing {len(frames)} frames:\n\n{current_text}"]
|
| 247 |
+
yield chat_history, frames[:i+1] # Also update frame gallery progressively
|
| 248 |
+
|
| 249 |
+
progress(1.0, desc="Analysis complete!")
|
| 250 |
+
|
| 251 |
+
# Final update with complete message
|
| 252 |
+
full_caption = "".join(captions)
|
| 253 |
+
final_message = f"Analyzed {len(frames)} frames:\n\n{full_caption}"
|
| 254 |
+
chat_history[-1] = ["Assistant", final_message]
|
| 255 |
+
yield chat_history, frames
|
| 256 |
|
| 257 |
+
# Process button with streaming
|
| 258 |
process_btn.click(
|
| 259 |
process_video_with_chat,
|
| 260 |
inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt, chatbot],
|
| 261 |
+
outputs=[chatbot, frame_gallery],
|
| 262 |
+
show_progress=True
|
| 263 |
)
|
| 264 |
|
| 265 |
demo.launch()
|