Spaces:

codelion
/

videoanalysis

Sleeping

App Files Files Community

codelion commited on Apr 2

Commit

c137e5c

verified ·

1 Parent(s): d638712

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -29

app.py CHANGED Viewed

@@ -1,28 +1,32 @@
 import os
-import json
 import gradio as gr
 import cv2
 from google import genai
 from google.genai.types import Part
 from tenacity import retry, stop_after_attempt, wait_random_exponential
-# Retrieve API key from environment variables.
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 if not GOOGLE_API_KEY:
     raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
-# Initialize the Gemini API client via AI Studio.
 client = genai.Client(api_key=GOOGLE_API_KEY)
-# Use the Gemini 2.0 Flash model.
-MODEL_NAME = "gemini-2.0-flash-001"
 @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
 def call_gemini(video_file: str, prompt: str) -> str:
     """
-    Call the Gemini model with the provided video file and prompt.
-    The video file is read as bytes and passed with MIME type "video/mp4",
-    and the prompt is wrapped as a text part.
     """
     with open(video_file, "rb") as f:
         file_bytes = f.read()
@@ -37,17 +41,31 @@ def call_gemini(video_file: str, prompt: str) -> str:
 def safe_call_gemini(video_file: str, prompt: str) -> str:
     """
-    Wrapper for call_gemini that catches exceptions and returns a fallback string.
     """
     try:
         return call_gemini(video_file, prompt)
     except Exception as e:
-        print("Gemini call failed:", e)
-        return "No summary available."
 def hhmmss_to_seconds(time_str: str) -> float:
     """
     Convert a HH:MM:SS formatted string into seconds.
     """
     parts = time_str.strip().split(":")
     parts = [float(p) for p in parts]
@@ -60,23 +78,28 @@ def hhmmss_to_seconds(time_str: str) -> float:
 def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
     """
-    Ask Gemini to output key timestamps and descriptions as plain text.
-    The prompt instructs the model to output one line per event in the format:
-    HH:MM:SS - description
-    We then parse these lines and extract the corresponding frames using OpenCV.
-    Returns a list of tuples: (image_array, caption)
     """
     prompt = (
         "List the key timestamps in the video and a brief description of the event at that time. "
-        "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
     )
     prompt += f" Video Summary: {summary}"
     if user_query:
         prompt += f" Focus on: {user_query}"
-    # Use the safe call to get a response or fallback text.
     key_frames_response = safe_call_gemini(video_file, prompt)
     lines = key_frames_response.strip().split("\n")
     key_frames = []
     for line in lines:
@@ -110,13 +133,14 @@ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
 def analyze_video(video_file: str, user_query: str) -> (str, list):
     """
-    Perform video analysis on the uploaded file.
-    First, call Gemini with a simple prompt to get a brief summary.
-    Then, call Gemini to list key timestamps and descriptions.
     Returns:
-      - A Markdown report summarizing the video.
-      - A gallery list of key frames (each as a tuple of (image, caption)).
     """
     summary_prompt = "Summarize this video."
     if user_query:
@@ -135,18 +159,28 @@ def analyze_video(video_file: str, user_query: str) -> (str, list):
 def gradio_interface(video_file, user_query: str) -> (str, list):
     """
-    Gradio interface function that accepts an uploaded video file and an optional query,
-    then returns a Markdown report and a gallery of key frame images with captions.
     """
-    if not video_file:
         return "Please upload a valid video file.", []
     return analyze_video(video_file, user_query)
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Video(label="Upload Video File"),
-        gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
     ],
     outputs=[
         gr.Markdown(label="Security & Surveillance Analysis Report"),
@@ -154,11 +188,11 @@ iface = gr.Interface(
     ],
     title="AI Video Analysis and Summariser Agent",
     description=(
-        "This tool uses Google's Gemini 2.0 Flash model via AI Studio to analyze an uploaded video. "
         "It returns a brief summary and extracts key frames based on that summary. "
         "Provide a video file and, optionally, a query to guide the analysis."
     )
 )
 if __name__ == "__main__":
-    iface.launch()

 import os
 import gradio as gr
 import cv2
 from google import genai
 from google.genai.types import Part
 from tenacity import retry, stop_after_attempt, wait_random_exponential
+# Retrieve API key from environment variables
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 if not GOOGLE_API_KEY:
     raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
+# Initialize the Gemini API client
 client = genai.Client(api_key=GOOGLE_API_KEY)
+# Define the model name
+MODEL_NAME = "gemini-2.0-flash"
 @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
 def call_gemini(video_file: str, prompt: str) -> str:
     """
+    Call the Gemini model with a video file and prompt.
+    Args:
+        video_file (str): Path to the video file
+        prompt (str): Text prompt to guide the analysis
+    Returns:
+        str: Response text from the Gemini API
     """
     with open(video_file, "rb") as f:
         file_bytes = f.read()
 def safe_call_gemini(video_file: str, prompt: str) -> str:
     """
+    Wrapper for call_gemini that catches exceptions and returns error messages.
+    Args:
+        video_file (str): Path to the video file
+        prompt (str): Text prompt for the API
+    Returns:
+        str: API response or error message
     """
     try:
         return call_gemini(video_file, prompt)
     except Exception as e:
+        error_msg = f"Gemini call failed: {str(e)}"
+        print(error_msg)
+        return error_msg
 def hhmmss_to_seconds(time_str: str) -> float:
     """
     Convert a HH:MM:SS formatted string into seconds.
+    Args:
+        time_str (str): Time string in HH:MM:SS format
+    Returns:
+        float: Time in seconds
     """
     parts = time_str.strip().split(":")
     parts = [float(p) for p in parts]
 def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
     """
+    Extract key frames from the video based on timestamps provided by Gemini.
+    Args:
+        video_file (str): Path to the video file
+        summary (str): Video summary to provide context
+        user_query (str): Optional user query to focus the analysis
+    Returns:
+        list: List of tuples (image_array, caption)
     """
     prompt = (
         "List the key timestamps in the video and a brief description of the event at that time. "
+        "Output one line per event in the format: HH:MM:SS - description. Do not include any extra text."
     )
     prompt += f" Video Summary: {summary}"
     if user_query:
         prompt += f" Focus on: {user_query}"
     key_frames_response = safe_call_gemini(video_file, prompt)
+    if "Gemini call failed" in key_frames_response:
+        return []
     lines = key_frames_response.strip().split("\n")
     key_frames = []
     for line in lines:
 def analyze_video(video_file: str, user_query: str) -> (str, list):
     """
+    Analyze the video and generate a summary and key frames.
+    Args:
+        video_file (str): Path to the video file
+        user_query (str): Optional query to guide the analysis
     Returns:
+        tuple: (Markdown report, list of key frames)
     """
     summary_prompt = "Summarize this video."
     if user_query:
 def gradio_interface(video_file, user_query: str) -> (str, list):
     """
+    Gradio interface function to process video and return results.
+    Args:
+        video_file (str): Path to the uploaded video file
+        user_query (str): Optional query to guide analysis
+    Returns:
+        tuple: (Markdown report, gallery of key frames)
     """
+    if not video_file or not os.path.exists(video_file):
         return "Please upload a valid video file.", []
+    if not video_file.lower().endswith('.mp4'):
+        return "Please upload an MP4 video file.", []
     return analyze_video(video_file, user_query)
+# Define the Gradio interface
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Video(label="Upload Video File"),
+        gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis",
+                  placeholder="e.g., focus on unusual movements near the entrance")
     ],
     outputs=[
         gr.Markdown(label="Security & Surveillance Analysis Report"),
     ],
     title="AI Video Analysis and Summariser Agent",
     description=(
+        "This tool uses Google's Gemini 2.0 Flash model to analyze an uploaded video. "
         "It returns a brief summary and extracts key frames based on that summary. "
         "Provide a video file and, optionally, a query to guide the analysis."
     )
 )
 if __name__ == "__main__":
+    iface.launch()