Spaces:

codelion
/

videoanalysis

Sleeping

App Files Files Community

codelion commited on Apr 2

Commit

d638712

verified ·

1 Parent(s): 0425992

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -25

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ MODEL_NAME = "gemini-2.0-flash-001"
 def call_gemini(video_file: str, prompt: str) -> str:
     """
     Call the Gemini model with the provided video file and prompt.
-    The video is read as bytes and passed with MIME type "video/mp4",
     and the prompt is wrapped as a text part.
     """
     with open(video_file, "rb") as f:
@@ -35,6 +35,16 @@ def call_gemini(video_file: str, prompt: str) -> str:
     )
     return response.text
 def hhmmss_to_seconds(time_str: str) -> float:
     """
     Convert a HH:MM:SS formatted string into seconds.
@@ -50,7 +60,7 @@ def hhmmss_to_seconds(time_str: str) -> float:
 def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
     """
-    Ask Gemini to output key timestamps and descriptions in plain text.
     The prompt instructs the model to output one line per event in the format:
     HH:MM:SS - description
     We then parse these lines and extract the corresponding frames using OpenCV.
@@ -61,24 +71,20 @@ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
         "List the key timestamps in the video and a brief description of the event at that time. "
         "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
     )
-    # Append the summary (and user query if provided) so the model has context.
     prompt += f" Video Summary: {summary}"
     if user_query:
         prompt += f" Focus on: {user_query}"
-    try:
-        key_frames_response = call_gemini(video_file, prompt)
-        lines = key_frames_response.strip().split("\n")
-        key_frames = []
-        for line in lines:
-            if " - " in line:
-                parts = line.split(" - ", 1)
-                timestamp = parts[0].strip()
-                description = parts[1].strip()
-                key_frames.append({"timestamp": timestamp, "description": description})
-    except Exception as e:
-        print("Error in key frame extraction:", e)
-        key_frames = []
     extracted_frames = []
     cap = cv2.VideoCapture(video_file)
@@ -104,24 +110,20 @@ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
 def analyze_video(video_file: str, user_query: str) -> (str, list):
     """
-    Perform a single-step video analysis.
     First, call Gemini with a simple prompt to get a brief summary.
-    Then, call Gemini to list key timestamps with descriptions.
     Returns:
       - A Markdown report summarizing the video.
       - A gallery list of key frames (each as a tuple of (image, caption)).
     """
-    # Use a very simple prompt for summary.
     summary_prompt = "Summarize this video."
     if user_query:
         summary_prompt += f" Also focus on: {user_query}"
-    try:
-        summary = call_gemini(video_file, summary_prompt)
-    except Exception as e:
-        summary = f"[Error in summary extraction: {e}]"
-    markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
     key_frames_gallery = get_key_frames(video_file, summary, user_query)
     if not key_frames_gallery:
         markdown_report += "\n*No key frames were extracted.*\n"
@@ -134,7 +136,7 @@ def analyze_video(video_file: str, user_query: str) -> (str, list):
 def gradio_interface(video_file, user_query: str) -> (str, list):
     """
     Gradio interface function that accepts an uploaded video file and an optional query,
-    then returns a Markdown report and a gallery of extracted key frames with captions.
     """
     if not video_file:
         return "Please upload a valid video file.", []

 def call_gemini(video_file: str, prompt: str) -> str:
     """
     Call the Gemini model with the provided video file and prompt.
+    The video file is read as bytes and passed with MIME type "video/mp4",
     and the prompt is wrapped as a text part.
     """
     with open(video_file, "rb") as f:
     )
     return response.text
+def safe_call_gemini(video_file: str, prompt: str) -> str:
+    """
+    Wrapper for call_gemini that catches exceptions and returns a fallback string.
+    """
+    try:
+        return call_gemini(video_file, prompt)
+    except Exception as e:
+        print("Gemini call failed:", e)
+        return "No summary available."
 def hhmmss_to_seconds(time_str: str) -> float:
     """
     Convert a HH:MM:SS formatted string into seconds.
 def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
     """
+    Ask Gemini to output key timestamps and descriptions as plain text.
     The prompt instructs the model to output one line per event in the format:
     HH:MM:SS - description
     We then parse these lines and extract the corresponding frames using OpenCV.
         "List the key timestamps in the video and a brief description of the event at that time. "
         "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
     )
     prompt += f" Video Summary: {summary}"
     if user_query:
         prompt += f" Focus on: {user_query}"
+    # Use the safe call to get a response or fallback text.
+    key_frames_response = safe_call_gemini(video_file, prompt)
+    lines = key_frames_response.strip().split("\n")
+    key_frames = []
+    for line in lines:
+        if " - " in line:
+            parts = line.split(" - ", 1)
+            timestamp = parts[0].strip()
+            description = parts[1].strip()
+            key_frames.append({"timestamp": timestamp, "description": description})
     extracted_frames = []
     cap = cv2.VideoCapture(video_file)
 def analyze_video(video_file: str, user_query: str) -> (str, list):
     """
+    Perform video analysis on the uploaded file.
     First, call Gemini with a simple prompt to get a brief summary.
+    Then, call Gemini to list key timestamps and descriptions.
     Returns:
       - A Markdown report summarizing the video.
       - A gallery list of key frames (each as a tuple of (image, caption)).
     """
     summary_prompt = "Summarize this video."
     if user_query:
         summary_prompt += f" Also focus on: {user_query}"
+    summary = safe_call_gemini(video_file, summary_prompt)
+    markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
     key_frames_gallery = get_key_frames(video_file, summary, user_query)
     if not key_frames_gallery:
         markdown_report += "\n*No key frames were extracted.*\n"
 def gradio_interface(video_file, user_query: str) -> (str, list):
     """
     Gradio interface function that accepts an uploaded video file and an optional query,
+    then returns a Markdown report and a gallery of key frame images with captions.
     """
     if not video_file:
         return "Please upload a valid video file.", []