Spaces:
Sleeping
Sleeping
Fix: Correct and simplify Gradio frontend for MCP integration
Browse files- backend/app.py +187 -0
backend/app.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import httpx
|
| 4 |
+
from typing import Dict, Any
|
| 5 |
+
|
| 6 |
+
# --- Backend Client Functions ---
|
| 7 |
+
# These functions call the Modal/backend endpoints.
|
| 8 |
+
|
| 9 |
+
async def call_video_analysis_backend(video_url: str) -> Dict[str, Any]:
|
| 10 |
+
"""Calls the backend to analyze a single video."""
|
| 11 |
+
# Default to a placeholder if the env var is not set, to avoid crashing.
|
| 12 |
+
backend_url = os.getenv("BACKEND_VIDEO_URL", "https://your-backend-hf-space-for-video/process_video_analysis")
|
| 13 |
+
if not video_url:
|
| 14 |
+
return {"status": "error", "message": "Video URL cannot be empty."}
|
| 15 |
+
|
| 16 |
+
print(f"Sending request to backend for video: {video_url}")
|
| 17 |
+
payload = {"video_url": video_url}
|
| 18 |
+
try:
|
| 19 |
+
async with httpx.AsyncClient(timeout=1800.0) as client:
|
| 20 |
+
response = await client.post(backend_url, json=payload)
|
| 21 |
+
response.raise_for_status()
|
| 22 |
+
return response.json()
|
| 23 |
+
except httpx.HTTPStatusError as e:
|
| 24 |
+
return {"status": "error", "message": f"Backend Error: {e.response.status_code}", "details": e.response.text}
|
| 25 |
+
except Exception as e:
|
| 26 |
+
return {"status": "error", "message": "Failed to connect to backend", "details": str(e)}
|
| 27 |
+
|
| 28 |
+
async def call_topic_analysis_backend(topic: str, max_videos: int) -> Dict[str, Any]:
|
| 29 |
+
"""Calls the backend to analyze videos for a topic."""
|
| 30 |
+
backend_url = os.getenv("BACKEND_TOPIC_URL", "https://your-backend-hf-space-for-topic/analyze_topic")
|
| 31 |
+
if not topic:
|
| 32 |
+
return {"status": "error", "message": "Topic cannot be empty."}
|
| 33 |
+
|
| 34 |
+
print(f"Sending request to backend for topic: {topic} ({max_videos} videos)")
|
| 35 |
+
payload = {"topic": topic, "max_videos": max_videos}
|
| 36 |
+
try:
|
| 37 |
+
async with httpx.AsyncClient(timeout=3600.0) as client:
|
| 38 |
+
response = await client.post(backend_url, json=payload)
|
| 39 |
+
response.raise_for_status()
|
| 40 |
+
return response.json()
|
| 41 |
+
except httpx.HTTPStatusError as e:
|
| 42 |
+
return {"status": "error", "message": f"Backend Error: {e.response.status_code}", "details": e.response.text}
|
| 43 |
+
except Exception as e:
|
| 44 |
+
return {"status": "error", "message": "Failed to connect to backend", "details": str(e)}
|
| 45 |
+
|
| 46 |
+
# --- Gradio Tool Functions (Wrappers for MCP) ---
|
| 47 |
+
|
| 48 |
+
async def analyze_video(video_url: str):
|
| 49 |
+
"""
|
| 50 |
+
Triggers a comprehensive analysis of a single video from a URL.
|
| 51 |
+
|
| 52 |
+
This tool calls a backend service to perform multiple analyses:
|
| 53 |
+
- Transcribes audio to text.
|
| 54 |
+
- Generates a descriptive caption for the video content.
|
| 55 |
+
- Recognizes main actions in the video.
|
| 56 |
+
- Detects objects in keyframes.
|
| 57 |
+
|
| 58 |
+
:param video_url: The public URL of the video to be processed (e.g., a YouTube link).
|
| 59 |
+
:return: A JSON object containing the full analysis results from the backend.
|
| 60 |
+
"""
|
| 61 |
+
status_update = f"Analyzing video: {video_url}..."
|
| 62 |
+
results = await call_video_analysis_backend(video_url)
|
| 63 |
+
if isinstance(results, dict) and results.get("analysis") is None:
|
| 64 |
+
status_update = f"Error analyzing video: {results.get('error', 'Unknown error')}"
|
| 65 |
+
else:
|
| 66 |
+
status_update = "Video analysis complete."
|
| 67 |
+
return status_update, results
|
| 68 |
+
|
| 69 |
+
async def analyze_topic(topic: str, max_videos: int):
|
| 70 |
+
"""
|
| 71 |
+
Finds and analyzes multiple videos based on a given topic.
|
| 72 |
+
|
| 73 |
+
This tool calls a backend service that searches for videos related to the topic,
|
| 74 |
+
then runs a comprehensive analysis on each video found.
|
| 75 |
+
|
| 76 |
+
:param topic: The topic to search for (e.g., 'latest AI advancements').
|
| 77 |
+
:param max_videos: The maximum number of videos to find and analyze (1-5).
|
| 78 |
+
:return: A JSON object with the aggregated analysis results for all videos.
|
| 79 |
+
"""
|
| 80 |
+
status_update = f"Analyzing topic '{topic}' with {max_videos} videos... this can take a very long time."
|
| 81 |
+
results = await call_topic_analysis_backend(topic, max_videos)
|
| 82 |
+
if isinstance(results, dict) and results.get("results") is None:
|
| 83 |
+
status_update = f"Error analyzing topic: {results.get('error', 'Unknown error')}"
|
| 84 |
+
else:
|
| 85 |
+
status_update = "Topic analysis complete."
|
| 86 |
+
return status_update, results
|
| 87 |
+
|
| 88 |
+
# --- Gradio UI ---
|
| 89 |
+
|
| 90 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 91 |
+
gr.Markdown("# LLM Video Interpretation MCP")
|
| 92 |
+
gr.Markdown("This Hugging Face Space provides tools for processing video context for AI agents. Use the tools below to analyze videos by URL or by topic.")
|
| 93 |
+
|
| 94 |
+
with gr.Tab("Single Video Analysis"):
|
| 95 |
+
gr.Markdown("## Analyze a single video from a URL")
|
| 96 |
+
with gr.Row():
|
| 97 |
+
video_url_input = gr.Textbox(label="Video URL", placeholder="Enter a YouTube or direct video URL...", scale=4)
|
| 98 |
+
submit_button = gr.Button("Analyze Video", variant="primary")
|
| 99 |
+
status_text = gr.Textbox(label="Status", interactive=False)
|
| 100 |
+
json_output = gr.JSON(label="Analysis Results")
|
| 101 |
+
|
| 102 |
+
submit_button.click(
|
| 103 |
+
analyze_video,
|
| 104 |
+
inputs=[video_url_input],
|
| 105 |
+
outputs=[status_text, json_output],
|
| 106 |
+
api_name="analyze_video"
|
| 107 |
+
)
|
| 108 |
+
gr.Examples(
|
| 109 |
+
examples=["https://www.youtube.com/watch?v=3wLg_t_H2Xw", "https://www.youtube.com/watch?v=h42dDpgE7g8"],
|
| 110 |
+
inputs=video_url_input
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
with gr.Tab("Topic Video Analysis"):
|
| 114 |
+
gr.Markdown("## Analyze multiple videos based on a topic")
|
| 115 |
+
with gr.Row():
|
| 116 |
+
topic_input = gr.Textbox(label="Enter a topic", placeholder="e.g., 'Apple Vision Pro review'", scale=3)
|
| 117 |
+
max_videos_slider = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Number of Videos to Analyze")
|
| 118 |
+
topic_submit_button = gr.Button("Analyze Topic", variant="primary")
|
| 119 |
+
topic_status_text = gr.Textbox(label="Status", interactive=False)
|
| 120 |
+
topic_json_output = gr.JSON(label="Analysis Results")
|
| 121 |
+
|
| 122 |
+
topic_submit_button.click(
|
| 123 |
+
analyze_topic,
|
| 124 |
+
inputs=[topic_input, max_videos_slider],
|
| 125 |
+
outputs=[topic_status_text, topic_json_output],
|
| 126 |
+
api_name="analyze_topic"
|
| 127 |
+
)
|
| 128 |
+
gr.Examples(
|
| 129 |
+
examples=[["self-driving car technology", 2], ["open source large language models", 3]],
|
| 130 |
+
inputs=[topic_input, max_videos_slider]
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Set environment variables in your Hugging Face Space settings, not here.
|
| 134 |
+
# BACKEND_VIDEO_URL = "https://your-modal-or-backend-url/process_video_analysis"
|
| 135 |
+
# BACKEND_TOPIC_URL = "https://your-modal-or-backend-url/analyze_topic"
|
| 136 |
+
|
| 137 |
+
demo.launch() "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
| 138 |
+
"https://sample-videos.com/zip/10/mp4/SampleVideo_1280x720_1mb.mp4"
|
| 139 |
+
],
|
| 140 |
+
inputs=input_text
|
| 141 |
+
)
|
| 142 |
+
gr.Markdown("**Processing can take several minutes** depending on video length and model inference times. The cache on the Modal backend will speed up repeated requests for the same video.")
|
| 143 |
+
|
| 144 |
+
with gr.Tab("Demo (for Manual Testing)"):
|
| 145 |
+
gr.Markdown("### Manually test video URLs or paths for interpretation and observe the JSON response.")
|
| 146 |
+
demo_interface.render()
|
| 147 |
+
|
| 148 |
+
with gr.Tab("Topic Video Analysis"):
|
| 149 |
+
gr.Markdown("### Analyze Multiple Videos Based on a Topic")
|
| 150 |
+
gr.Markdown("Enter a topic, and the system will search for relevant videos, analyze them, and provide an aggregated JSON output.")
|
| 151 |
+
|
| 152 |
+
with gr.Row():
|
| 153 |
+
topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g., 'best cat videos', 'Python programming tutorials'", scale=3)
|
| 154 |
+
max_videos_input = gr.Number(label="Max Videos to Analyze", value=3, minimum=1, maximum=5, step=1, scale=1) # Max 5 for UI, backend might support more
|
| 155 |
+
|
| 156 |
+
topic_analysis_output = gr.JSON(label="Topic Analysis Results")
|
| 157 |
+
|
| 158 |
+
with gr.Row():
|
| 159 |
+
topic_submit_button = gr.Button("Analyze Topic Videos", variant="primary")
|
| 160 |
+
topic_clear_button = gr.Button("Clear")
|
| 161 |
+
|
| 162 |
+
topic_submit_button.click(
|
| 163 |
+
fn=call_topic_analysis_endpoint,
|
| 164 |
+
inputs=[topic_input, max_videos_input],
|
| 165 |
+
outputs=[topic_analysis_output]
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
def clear_topic_outputs():
|
| 169 |
+
return [None, 3, None] # topic_input, max_videos_input (reset to default), topic_analysis_output
|
| 170 |
+
topic_clear_button.click(fn=clear_topic_outputs, inputs=[], outputs=[topic_input, max_videos_input, topic_analysis_output])
|
| 171 |
+
|
| 172 |
+
gr.Examples(
|
| 173 |
+
examples=[
|
| 174 |
+
["AI in healthcare", 2],
|
| 175 |
+
["sustainable energy solutions", 3],
|
| 176 |
+
["how to make sourdough bread", 1]
|
| 177 |
+
],
|
| 178 |
+
inputs=[topic_input, max_videos_input],
|
| 179 |
+
outputs=topic_analysis_output,
|
| 180 |
+
fn=call_topic_analysis_endpoint,
|
| 181 |
+
cache_examples=False
|
| 182 |
+
)
|
| 183 |
+
gr.Markdown("**Note:** This process involves searching for videos and then analyzing each one. It can take a significant amount of time, especially for multiple videos. The backend has a long timeout, but please be patient.")
|
| 184 |
+
|
| 185 |
+
# Launch the Gradio application
|
| 186 |
+
if __name__ == "__main__":
|
| 187 |
+
app.launch(debug=True, server_name="0.0.0.0")
|