demo-os56ddeg / app.py
Gertie01's picture
Deploy Gradio app with multiple files
bd63620 verified
import gradio as gr
from models import generate_t2v, generate_i2v
from utils import frames_to_video
import tempfile
import os
# Stable Video Diffusion XT supports max 32 frames.
MAX_FRAMES = 32
MAX_FPS = 30
MAX_VIDEO_DURATION_SECONDS = MAX_FRAMES / MAX_FPS
def handle_t2v(prompt: str, motion: int, frames: int, fps: int):
"""Handles text-to-video generation."""
# Create a temporary file path for the video output
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
temp_path = tmp_file.name
try:
frames_list = generate_t2v(prompt, motion, frames, fps)
output_path = frames_to_video(frames_list, fps, temp_path)
return output_path
except Exception as e:
# Clean up temp file on error
if os.path.exists(temp_path):
os.remove(temp_path)
raise gr.Error(f"Video generation failed: {e}")
def handle_i2v(input_image, motion: int, frames: int, fps: int):
"""Handles image-to-video generation."""
# Create a temporary file path for the video output
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
temp_path = tmp_file.name
if input_image is None:
raise gr.Error("Please upload an image for Image-to-Video generation.")
try:
frames_list = generate_i2v(input_image, motion, frames, fps)
output_path = frames_to_video(frames_list, fps, temp_path)
return output_path
except Exception as e:
# Clean up temp file on error
if os.path.exists(temp_path):
os.remove(temp_path)
raise gr.Error(f"Video generation failed: {e}")
title = "Sora-2 (Simulation) - Video Generation Demo"
description = f"""
# {title}
This application simulates the capabilities of large-scale video models like OpenAI's Sora, supporting Text-to-Video (T2V) and Image-to-Video (I2V) generation. We use cutting-edge open models Stable Video Diffusion (SVD-XT) and SDXL.
**🚨 IMPORTANT LIMITATION:** Due to the extreme computational demands and time constraints (2-minute videos are not feasible in this environment), we use **Stable Video Diffusion (SVD-XT)** which currently supports videos up to {MAX_FRAMES} frames (approx. {MAX_VIDEO_DURATION_SECONDS:.1f} seconds at max FPS).
## Modalities Implemented:
1. **Text-to-Video (T2V):** Uses Stable Diffusion XL (SDXL) to create a high-quality initial image, followed by Stable Video Diffusion (SVD) to add realistic motion.
2. **Image-to-Video (I2V):** Uses Stable Video Diffusion (SVD) to animate a static image.
Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
"""
with gr.Blocks(title=title) as demo:
gr.Markdown(description)
output_video = gr.Video(label="Generated Video", height=400, autoplay=True)
# Global Controls for both tabs
with gr.Accordion(f"Settings (Max {MAX_VIDEO_DURATION_SECONDS:.1f}s)", open=True):
motion_slider = gr.Slider(
minimum=1,
maximum=255,
value=127,
step=1,
label="Motion Intensity (Motion Bucket ID)",
info="Higher values produce more dynamic movement."
)
frames_slider = gr.Slider(
minimum=14,
maximum=MAX_FRAMES,
value=14,
step=2,
label="Number of Frames",
info=f"SVD-XT supports up to {MAX_FRAMES} frames."
)
fps_slider = gr.Slider(
minimum=10,
maximum=MAX_FPS,
value=24,
step=1,
label="Frames Per Second (FPS)"
)
with gr.Tabs():
with gr.TabItem("Text-to-Video (T2V)"):
t2v_prompt = gr.Textbox(
label="Prompt",
placeholder="A majestic golden retriever wearing a tiny crown running through a field of glowing lavender.",
value="A cozy cabin nestled in a snowy forest, steam rising from the chimney."
)
t2v_button = gr.Button("Generate T2V Video (Sora-2 / SDXL + SVD)", variant="primary")
t2v_button.click(
handle_t2v,
inputs=[t2v_prompt, motion_slider, frames_slider, fps_slider],
outputs=output_video,
api_name="t2v_generate"
)
with gr.TabItem("Image-to-Video (I2V)"):
i2v_image = gr.Image(
label="Input Image",
type="pil",
sources=["upload", "clipboard"],
height=300
)
gr.Markdown("Note: SVD works best with 16:9 or 9:16 aspect ratio images (e.g., 1024x576). The image will be resized.")
i2v_button = gr.Button("Generate I2V Video (Sora-2 / SVD)", variant="primary")
i2v_button.click(
handle_i2v,
inputs=[i2v_image, motion_slider, frames_slider, fps_slider],
outputs=output_video,
api_name="i2v_generate"
)
if __name__ == "__main__":
demo.queue(max_size=20).launch(max_threads=1, show_api=True)