Spaces:

codelion
/

videoanalysis

Sleeping

App Files Files Community

codelion commited on Apr 2

Commit

d38e256

verified ·

1 Parent(s): b3e97a9

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -25

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import time
 import gradio as gr
 from google import genai
 from google.genai import types
@@ -11,22 +13,28 @@ if not GOOGLE_API_KEY:
 # Initialize the Gemini API client
 client = genai.Client(api_key=GOOGLE_API_KEY)
-MODEL_NAME = "gemini-2.5-pro-exp-03-25"  # Model from the notebook that supports video analysis
-def upload_and_process_video(video_file: str) -> types.File:
     """
     Upload a video file to the Gemini API and wait for processing.
     Args:
         video_file (str): Path to the video file
     Returns:
         types.File: Processed video file object
     """
     try:
         video_file_obj = client.files.upload(file=video_file)
         while video_file_obj.state == "PROCESSING":
-            print(f"Processing {video_file}...")
             time.sleep(10)
             video_file_obj = client.files.get(name=video_file_obj.name)
@@ -38,74 +46,152 @@ def upload_and_process_video(video_file: str) -> types.File:
     except Exception as e:
         raise Exception(f"Error uploading video: {str(e)}")
-def analyze_video(video_file: str, user_query: str) -> str:
     """
-    Analyze the video using the Gemini API and return a summary.
     Args:
         video_file (str): Path to the video file
         user_query (str): Optional query to guide the analysis
     Returns:
-        str: Markdown-formatted report
     """
     # Validate input
     if not video_file or not os.path.exists(video_file):
-        return "Please upload a valid video file."
     if not video_file.lower().endswith('.mp4'):
-        return "Please upload an MP4 video file."
     try:
         # Upload and process the video
         video_file_obj = upload_and_process_video(video_file)
-        # Prepare prompt
-        prompt = "Provide a detailed summary of this video."
         if user_query:
-            prompt += f" Focus on: {user_query}"
-        # Analyze video with Gemini API
-        response = client.models.generate_content(
             model=MODEL_NAME,
-            contents=[
-                video_file_obj,  # Pass the processed video file object
-                prompt
-            ]
         )
-        summary = response.text
         # Generate Markdown report
         markdown_report = (
             "## Video Analysis Report\n\n"
             f"**Summary:**\n{summary}\n"
         )
-        return markdown_report
     except Exception as e:
         error_msg = (
             "## Video Analysis Report\n\n"
             f"**Error:** Unable to analyze video.\n"
             f"Details: {str(e)}\n"
         )
-        return error_msg
 # Define the Gradio interface
 iface = gr.Interface(
     fn=analyze_video,
     inputs=[
-        gr.Video(label="Upload Video File (MP4)"),  # Removed type="filepath"
         gr.Textbox(label="Analysis Query (optional)",
                   placeholder="e.g., focus on main events or themes")
     ],
-    outputs=gr.Markdown(label="Video Analysis Report"),
     title="AI Video Analysis Agent with Gemini",
     description=(
-        "Upload an MP4 video to get a summary using Google's Gemini API. "
-        "This tool analyzes the video content directly without audio or frame extraction. "
         "Optionally, provide a query to guide the analysis."
     )
 )
 if __name__ == "__main__":
-    # Launch with share=True to create a public link
     iface.launch(share=True)

 import os
 import time
+import json
 import gradio as gr
+import cv2
 from google import genai
 from google.genai import types
 # Initialize the Gemini API client
 client = genai.Client(api_key=GOOGLE_API_KEY)
+MODEL_NAME = "gemini-2.5-pro-exp-03-25"  # Model supporting video analysis
+def upload_and_process_video(video_file: str, timeout: int = 300) -> types.File:
     """
     Upload a video file to the Gemini API and wait for processing.
     Args:
         video_file (str): Path to the video file
+        timeout (int): Maximum time to wait for processing in seconds (default: 5 minutes)
     Returns:
         types.File: Processed video file object
     """
     try:
         video_file_obj = client.files.upload(file=video_file)
+        start_time = time.time()
         while video_file_obj.state == "PROCESSING":
+            elapsed_time = time.time() - start_time
+            if elapsed_time > timeout:
+                raise TimeoutError(f"Video processing timed out after {timeout} seconds.")
+            print(f"Processing {video_file}... ({int(elapsed_time)}s elapsed)")
             time.sleep(10)
             video_file_obj = client.files.get(name=video_file_obj.name)
     except Exception as e:
         raise Exception(f"Error uploading video: {str(e)}")
+def hhmmss_to_seconds(timestamp: str) -> float:
+    """
+    Convert HH:MM:SS timestamp to seconds.
+    Args:
+        timestamp (str): Time in HH:MM:SS format
+    Returns:
+        float: Time in seconds
+    """
+    h, m, s = map(float, timestamp.split(":"))
+    return h * 3600 + m * 60 + s
+def extract_key_frames(video_file: str, key_frames_json: str) -> list:
+    """
+    Extract key frames from the video based on JSON data.
+    Args:
+        video_file (str): Path to the video file
+        key_frames_json (str): JSON string with key frames data
+    Returns:
+        list: List of tuples (image, caption)
+    """
+    try:
+        key_frames = json.loads(key_frames_json)
+        if not isinstance(key_frames, list):
+            raise ValueError("Key frames data must be a list of objects.")
+        extracted_frames = []
+        cap = cv2.VideoCapture(video_file)
+        if not cap.isOpened():
+            raise ValueError("Could not open video file.")
+        for frame in key_frames:
+            timestamp = frame.get("timecode", frame.get("timestamp", ""))
+            title = frame.get("title", frame.get("caption", "Untitled"))
+            if not timestamp:
+                continue
+            seconds = hhmmss_to_seconds(timestamp)
+            cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
+            ret, frame_img = cap.read()
+            if ret:
+                frame_rgb = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
+                caption = f"{timestamp}: {title}"
+                extracted_frames.append((frame_rgb, caption))
+        cap.release()
+        return extracted_frames
+    except Exception as e:
+        print(f"Error extracting frames: {str(e)}")
+        return []
+def analyze_video(video_file: str, user_query: str) -> tuple[str, list]:
     """
+    Analyze the video using the Gemini API and extract key frames.
     Args:
         video_file (str): Path to the video file
         user_query (str): Optional query to guide the analysis
     Returns:
+        tuple: (Markdown report, list of key frames as (image, caption) tuples)
     """
     # Validate input
     if not video_file or not os.path.exists(video_file):
+        return "Please upload a valid video file.", []
     if not video_file.lower().endswith('.mp4'):
+        return "Please upload an MP4 video file.", []
     try:
         # Upload and process the video
         video_file_obj = upload_and_process_video(video_file)
+        # Step 1: Generate detailed summary
+        summary_prompt = "Provide a detailed summary of this video with timestamps for key sections."
         if user_query:
+            summary_prompt += f" Focus on: {user_query}"
+        summary_response = client.models.generate_content(
+            model=MODEL_NAME,
+            contents=[video_file_obj, summary_prompt]
+        )
+        summary = summary_response.text
+        # Step 2: Extract key frames in an agentic loop
+        key_frames_prompt = (
+            "Identify key frames in this video and return them as a JSON array. "
+            "Each object should have 'timecode' (in HH:MM:SS format) and 'title' describing the scene."
+        )
+        if user_query:
+            key_frames_prompt += f" Focus on: {user_query}"
+        key_frames_response = client.models.generate_content(
             model=MODEL_NAME,
+            contents=[video_file_obj, key_frames_prompt]
         )
+        key_frames_json = key_frames_response.text
+        # Parse and extract frames
+        key_frames = extract_key_frames(video_file, key_frames_json)
         # Generate Markdown report
         markdown_report = (
             "## Video Analysis Report\n\n"
             f"**Summary:**\n{summary}\n"
+            f"**Video URI:** {video_file_obj.uri}\n"
         )
+        if key_frames:
+            markdown_report += "\n**Key Frames Identified:**\n"
+            for i, (_, caption) in enumerate(key_frames, 1):
+                markdown_report += f"- Frame {i}: {caption}\n"
+        else:
+            markdown_report += "\n*No key frames extracted.*\n"
+        return markdown_report, key_frames
     except Exception as e:
         error_msg = (
             "## Video Analysis Report\n\n"
             f"**Error:** Unable to analyze video.\n"
             f"Details: {str(e)}\n"
+            "Please check your API key, ensure the video is valid, or try again later."
         )
+        return error_msg, []
 # Define the Gradio interface
 iface = gr.Interface(
     fn=analyze_video,
     inputs=[
+        gr.Video(label="Upload Video File (MP4)"),
         gr.Textbox(label="Analysis Query (optional)",
                   placeholder="e.g., focus on main events or themes")
     ],
+    outputs=[
+        gr.Markdown(label="Video Analysis Report"),
+        gr.Gallery(label="Key Frames", columns=2)
+    ],
     title="AI Video Analysis Agent with Gemini",
     description=(
+        "Upload an MP4 video to get a detailed summary and key frames using Google's Gemini API. "
+        "This tool analyzes the video content directly and extracts key moments as images. "
         "Optionally, provide a query to guide the analysis."
     )
 )
 if __name__ == "__main__":
     iface.launch(share=True)