Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
|
|
|
|
|
|
| 3 |
from google import genai
|
| 4 |
from google.genai import types
|
| 5 |
from google.genai.types import Part
|
| 6 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
| 7 |
|
| 8 |
-
# Retrieve API key from environment
|
| 9 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
| 10 |
if not GOOGLE_API_KEY:
|
| 11 |
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
|
| 12 |
|
| 13 |
-
# Initialize the Gemini API client via AI Studio
|
| 14 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 15 |
|
| 16 |
-
# Use the Gemini 2.0 Flash model
|
| 17 |
MODEL_NAME = "gemini-2.0-flash-001"
|
| 18 |
|
| 19 |
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
|
| 20 |
def call_gemini(video_url: str, prompt: str) -> str:
|
| 21 |
"""
|
| 22 |
Call the Gemini model with the provided video URL and prompt.
|
| 23 |
-
The video is
|
| 24 |
"""
|
| 25 |
response = client.models.generate_content(
|
| 26 |
model=MODEL_NAME,
|
|
@@ -31,51 +33,88 @@ def call_gemini(video_url: str, prompt: str) -> str:
|
|
| 31 |
)
|
| 32 |
return response.text
|
| 33 |
|
| 34 |
-
def
|
| 35 |
"""
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
analysis = ""
|
| 40 |
num_iterations = 3
|
| 41 |
|
| 42 |
for i in range(num_iterations):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if i == 0:
|
| 44 |
-
prompt =
|
| 45 |
-
"You are a video analysis agent focusing on security and surveillance. "
|
| 46 |
-
"Provide a detailed summary of the video, highlighting any key events, suspicious activities, or anomalies."
|
| 47 |
-
)
|
| 48 |
else:
|
| 49 |
-
prompt = (
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
| 54 |
try:
|
| 55 |
analysis = call_gemini(video_url, prompt)
|
| 56 |
except Exception as e:
|
| 57 |
analysis += f"\n[Error during iteration {i+1}: {e}]"
|
| 58 |
-
break
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
def gradio_interface(video_url: str) -> str:
|
| 62 |
"""
|
| 63 |
-
Gradio interface function that takes a video URL and
|
|
|
|
| 64 |
"""
|
| 65 |
if not video_url:
|
| 66 |
-
return "Please provide a valid video URL."
|
| 67 |
-
return analyze_video(video_url)
|
| 68 |
|
| 69 |
-
# Define
|
| 70 |
iface = gr.Interface(
|
| 71 |
fn=gradio_interface,
|
| 72 |
-
inputs=
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
title="AI Video Analysis and Summariser Agent",
|
| 75 |
description=(
|
| 76 |
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
|
| 77 |
-
"to iteratively analyze a video for security and surveillance insights.
|
| 78 |
-
"
|
|
|
|
| 79 |
)
|
| 80 |
)
|
| 81 |
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from collections import Counter
|
| 5 |
from google import genai
|
| 6 |
from google.genai import types
|
| 7 |
from google.genai.types import Part
|
| 8 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
| 9 |
|
| 10 |
+
# Retrieve API key from environment variables.
|
| 11 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
| 12 |
if not GOOGLE_API_KEY:
|
| 13 |
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
|
| 14 |
|
| 15 |
+
# Initialize the Gemini API client via AI Studio using the API key.
|
| 16 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 17 |
|
| 18 |
+
# Use the Gemini 2.0 Flash model.
|
| 19 |
MODEL_NAME = "gemini-2.0-flash-001"
|
| 20 |
|
| 21 |
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
|
| 22 |
def call_gemini(video_url: str, prompt: str) -> str:
|
| 23 |
"""
|
| 24 |
Call the Gemini model with the provided video URL and prompt.
|
| 25 |
+
The video URL is passed as a URI part with MIME type "video/webm".
|
| 26 |
"""
|
| 27 |
response = client.models.generate_content(
|
| 28 |
model=MODEL_NAME,
|
|
|
|
| 33 |
)
|
| 34 |
return response.text
|
| 35 |
|
| 36 |
+
def generate_chart(analysis_text: str) -> plt.Figure:
|
| 37 |
"""
|
| 38 |
+
Create a simple bar chart based on the frequency of selected keywords in the analysis.
|
| 39 |
+
"""
|
| 40 |
+
# Define keywords of interest
|
| 41 |
+
keywords = ["suspicious", "anomaly", "incident", "alert", "object", "movement"]
|
| 42 |
+
# Lowercase the analysis text and split into words
|
| 43 |
+
words = analysis_text.lower().split()
|
| 44 |
+
# Count occurrences for each keyword
|
| 45 |
+
counter = Counter({kw: words.count(kw) for kw in keywords})
|
| 46 |
+
|
| 47 |
+
# Create a bar chart using matplotlib
|
| 48 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
| 49 |
+
ax.bar(counter.keys(), counter.values(), color="skyblue")
|
| 50 |
+
ax.set_title("Keyword Frequency in Analysis")
|
| 51 |
+
ax.set_ylabel("Count")
|
| 52 |
+
ax.set_xlabel("Keyword")
|
| 53 |
+
plt.tight_layout()
|
| 54 |
+
return fig
|
| 55 |
+
|
| 56 |
+
def analyze_video(video_url: str, user_query: str) -> (str, plt.Figure):
|
| 57 |
+
"""
|
| 58 |
+
Perform iterative (agentic) video analysis.
|
| 59 |
+
The analysis is refined over several iterations, incorporating the user query if provided.
|
| 60 |
+
Returns a Markdown report and a matplotlib chart.
|
| 61 |
"""
|
| 62 |
analysis = ""
|
| 63 |
num_iterations = 3
|
| 64 |
|
| 65 |
for i in range(num_iterations):
|
| 66 |
+
base_prompt = "You are a video analysis agent focusing on security and surveillance. Provide a detailed summary of the video, highlighting key events, suspicious activities, or anomalies."
|
| 67 |
+
if user_query:
|
| 68 |
+
base_prompt += f" Also, focus on the following query: {user_query}"
|
| 69 |
+
|
| 70 |
if i == 0:
|
| 71 |
+
prompt = base_prompt
|
|
|
|
|
|
|
|
|
|
| 72 |
else:
|
| 73 |
+
prompt = (f"Based on the previous analysis: \"{analysis}\". "
|
| 74 |
+
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
|
| 75 |
+
"and details that would help a security team understand the situation better. ")
|
| 76 |
+
if user_query:
|
| 77 |
+
prompt += f"Remember to focus on: {user_query}"
|
| 78 |
+
|
| 79 |
try:
|
| 80 |
analysis = call_gemini(video_url, prompt)
|
| 81 |
except Exception as e:
|
| 82 |
analysis += f"\n[Error during iteration {i+1}: {e}]"
|
| 83 |
+
break
|
| 84 |
+
|
| 85 |
+
# Create a Markdown report (adding headings and bullet points if desired)
|
| 86 |
+
markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
|
| 87 |
+
|
| 88 |
+
# Generate a chart visualization based on the analysis text.
|
| 89 |
+
chart_fig = generate_chart(analysis)
|
| 90 |
+
return markdown_report, chart_fig
|
| 91 |
|
| 92 |
+
def gradio_interface(video_url: str, user_query: str) -> (str, any):
|
| 93 |
"""
|
| 94 |
+
Gradio interface function that takes a video URL and an optional query,
|
| 95 |
+
then returns a Markdown report and a visualization chart.
|
| 96 |
"""
|
| 97 |
if not video_url:
|
| 98 |
+
return "Please provide a valid video URL.", None
|
| 99 |
+
return analyze_video(video_url, user_query)
|
| 100 |
|
| 101 |
+
# Define the Gradio interface with two inputs and two outputs.
|
| 102 |
iface = gr.Interface(
|
| 103 |
fn=gradio_interface,
|
| 104 |
+
inputs=[
|
| 105 |
+
gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
|
| 106 |
+
gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
|
| 107 |
+
],
|
| 108 |
+
outputs=[
|
| 109 |
+
gr.Markdown(label="Security & Surveillance Analysis Report"),
|
| 110 |
+
gr.Plot(label="Visualization: Keyword Frequency")
|
| 111 |
+
],
|
| 112 |
title="AI Video Analysis and Summariser Agent",
|
| 113 |
description=(
|
| 114 |
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
|
| 115 |
+
"to iteratively analyze a video for security and surveillance insights. Provide a video URL and, optionally, "
|
| 116 |
+
"a query to guide the analysis. The tool returns a detailed Markdown report along with a bar chart visualization "
|
| 117 |
+
"of keyword frequency."
|
| 118 |
)
|
| 119 |
)
|
| 120 |
|