import gradio as gr from models import generate_t2v, generate_i2v from utils import frames_to_video import tempfile import os # Stable Video Diffusion XT supports max 32 frames. MAX_FRAMES = 32 MAX_FPS = 30 MAX_VIDEO_DURATION_SECONDS = MAX_FRAMES / MAX_FPS def handle_t2v(prompt: str, motion: int, frames: int, fps: int): """Handles text-to-video generation.""" # Create a temporary file path for the video output with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file: temp_path = tmp_file.name try: frames_list = generate_t2v(prompt, motion, frames, fps) output_path = frames_to_video(frames_list, fps, temp_path) return output_path except Exception as e: # Clean up temp file on error if os.path.exists(temp_path): os.remove(temp_path) raise gr.Error(f"Video generation failed: {e}") def handle_i2v(input_image, motion: int, frames: int, fps: int): """Handles image-to-video generation.""" # Create a temporary file path for the video output with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file: temp_path = tmp_file.name if input_image is None: raise gr.Error("Please upload an image for Image-to-Video generation.") try: frames_list = generate_i2v(input_image, motion, frames, fps) output_path = frames_to_video(frames_list, fps, temp_path) return output_path except Exception as e: # Clean up temp file on error if os.path.exists(temp_path): os.remove(temp_path) raise gr.Error(f"Video generation failed: {e}") title = "Sora-2 (Simulation) - Video Generation Demo" description = f""" # {title} This application simulates the capabilities of large-scale video models like OpenAI's Sora, supporting Text-to-Video (T2V) and Image-to-Video (I2V) generation. We use cutting-edge open models Stable Video Diffusion (SVD-XT) and SDXL. **🚨 IMPORTANT LIMITATION:** Due to the extreme computational demands and time constraints (2-minute videos are not feasible in this environment), we use **Stable Video Diffusion (SVD-XT)** which currently supports videos up to {MAX_FRAMES} frames (approx. {MAX_VIDEO_DURATION_SECONDS:.1f} seconds at max FPS). ## Modalities Implemented: 1. **Text-to-Video (T2V):** Uses Stable Diffusion XL (SDXL) to create a high-quality initial image, followed by Stable Video Diffusion (SVD) to add realistic motion. 2. **Image-to-Video (I2V):** Uses Stable Video Diffusion (SVD) to animate a static image. Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder) """ with gr.Blocks(title=title) as demo: gr.Markdown(description) output_video = gr.Video(label="Generated Video", height=400, autoplay=True) # Global Controls for both tabs with gr.Accordion(f"Settings (Max {MAX_VIDEO_DURATION_SECONDS:.1f}s)", open=True): motion_slider = gr.Slider( minimum=1, maximum=255, value=127, step=1, label="Motion Intensity (Motion Bucket ID)", info="Higher values produce more dynamic movement." ) frames_slider = gr.Slider( minimum=14, maximum=MAX_FRAMES, value=14, step=2, label="Number of Frames", info=f"SVD-XT supports up to {MAX_FRAMES} frames." ) fps_slider = gr.Slider( minimum=10, maximum=MAX_FPS, value=24, step=1, label="Frames Per Second (FPS)" ) with gr.Tabs(): with gr.TabItem("Text-to-Video (T2V)"): t2v_prompt = gr.Textbox( label="Prompt", placeholder="A majestic golden retriever wearing a tiny crown running through a field of glowing lavender.", value="A cozy cabin nestled in a snowy forest, steam rising from the chimney." ) t2v_button = gr.Button("Generate T2V Video (Sora-2 / SDXL + SVD)", variant="primary") t2v_button.click( handle_t2v, inputs=[t2v_prompt, motion_slider, frames_slider, fps_slider], outputs=output_video, api_name="t2v_generate" ) with gr.TabItem("Image-to-Video (I2V)"): i2v_image = gr.Image( label="Input Image", type="pil", sources=["upload", "clipboard"], height=300 ) gr.Markdown("Note: SVD works best with 16:9 or 9:16 aspect ratio images (e.g., 1024x576). The image will be resized.") i2v_button = gr.Button("Generate I2V Video (Sora-2 / SVD)", variant="primary") i2v_button.click( handle_i2v, inputs=[i2v_image, motion_slider, frames_slider, fps_slider], outputs=output_video, api_name="i2v_generate" ) if __name__ == "__main__": demo.queue(max_size=20).launch(max_threads=1, show_api=True)