Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from models import generate_t2v, generate_i2v | |
| from utils import frames_to_video | |
| import tempfile | |
| import os | |
| # Stable Video Diffusion XT supports max 32 frames. | |
| MAX_FRAMES = 32 | |
| MAX_FPS = 30 | |
| MAX_VIDEO_DURATION_SECONDS = MAX_FRAMES / MAX_FPS | |
| def handle_t2v(prompt: str, motion: int, frames: int, fps: int): | |
| """Handles text-to-video generation.""" | |
| # Create a temporary file path for the video output | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file: | |
| temp_path = tmp_file.name | |
| try: | |
| frames_list = generate_t2v(prompt, motion, frames, fps) | |
| output_path = frames_to_video(frames_list, fps, temp_path) | |
| return output_path | |
| except Exception as e: | |
| # Clean up temp file on error | |
| if os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| raise gr.Error(f"Video generation failed: {e}") | |
| def handle_i2v(input_image, motion: int, frames: int, fps: int): | |
| """Handles image-to-video generation.""" | |
| # Create a temporary file path for the video output | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file: | |
| temp_path = tmp_file.name | |
| if input_image is None: | |
| raise gr.Error("Please upload an image for Image-to-Video generation.") | |
| try: | |
| frames_list = generate_i2v(input_image, motion, frames, fps) | |
| output_path = frames_to_video(frames_list, fps, temp_path) | |
| return output_path | |
| except Exception as e: | |
| # Clean up temp file on error | |
| if os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| raise gr.Error(f"Video generation failed: {e}") | |
| title = "Sora-2 (Simulation) - Video Generation Demo" | |
| description = f""" | |
| # {title} | |
| This application simulates the capabilities of large-scale video models like OpenAI's Sora, supporting Text-to-Video (T2V) and Image-to-Video (I2V) generation. We use cutting-edge open models Stable Video Diffusion (SVD-XT) and SDXL. | |
| **π¨ IMPORTANT LIMITATION:** Due to the extreme computational demands and time constraints (2-minute videos are not feasible in this environment), we use **Stable Video Diffusion (SVD-XT)** which currently supports videos up to {MAX_FRAMES} frames (approx. {MAX_VIDEO_DURATION_SECONDS:.1f} seconds at max FPS). | |
| ## Modalities Implemented: | |
| 1. **Text-to-Video (T2V):** Uses Stable Diffusion XL (SDXL) to create a high-quality initial image, followed by Stable Video Diffusion (SVD) to add realistic motion. | |
| 2. **Image-to-Video (I2V):** Uses Stable Video Diffusion (SVD) to animate a static image. | |
| Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder) | |
| """ | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(description) | |
| output_video = gr.Video(label="Generated Video", height=400, autoplay=True) | |
| # Global Controls for both tabs | |
| with gr.Accordion(f"Settings (Max {MAX_VIDEO_DURATION_SECONDS:.1f}s)", open=True): | |
| motion_slider = gr.Slider( | |
| minimum=1, | |
| maximum=255, | |
| value=127, | |
| step=1, | |
| label="Motion Intensity (Motion Bucket ID)", | |
| info="Higher values produce more dynamic movement." | |
| ) | |
| frames_slider = gr.Slider( | |
| minimum=14, | |
| maximum=MAX_FRAMES, | |
| value=14, | |
| step=2, | |
| label="Number of Frames", | |
| info=f"SVD-XT supports up to {MAX_FRAMES} frames." | |
| ) | |
| fps_slider = gr.Slider( | |
| minimum=10, | |
| maximum=MAX_FPS, | |
| value=24, | |
| step=1, | |
| label="Frames Per Second (FPS)" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Text-to-Video (T2V)"): | |
| t2v_prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="A majestic golden retriever wearing a tiny crown running through a field of glowing lavender.", | |
| value="A cozy cabin nestled in a snowy forest, steam rising from the chimney." | |
| ) | |
| t2v_button = gr.Button("Generate T2V Video (Sora-2 / SDXL + SVD)", variant="primary") | |
| t2v_button.click( | |
| handle_t2v, | |
| inputs=[t2v_prompt, motion_slider, frames_slider, fps_slider], | |
| outputs=output_video, | |
| api_name="t2v_generate" | |
| ) | |
| with gr.TabItem("Image-to-Video (I2V)"): | |
| i2v_image = gr.Image( | |
| label="Input Image", | |
| type="pil", | |
| sources=["upload", "clipboard"], | |
| height=300 | |
| ) | |
| gr.Markdown("Note: SVD works best with 16:9 or 9:16 aspect ratio images (e.g., 1024x576). The image will be resized.") | |
| i2v_button = gr.Button("Generate I2V Video (Sora-2 / SVD)", variant="primary") | |
| i2v_button.click( | |
| handle_i2v, | |
| inputs=[i2v_image, motion_slider, frames_slider, fps_slider], | |
| outputs=output_video, | |
| api_name="i2v_generate" | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=20).launch(max_threads=1, show_api=True) |