# app.py import gradio as gr from PIL import Image from typing import Union import os # Import utility and model functions from models import generate_video from config import MAX_DURATION_SECONDS, DEFAULT_IMAGE_PATH, ASSETS_DIR from utils import ensure_placeholder_image # Prepare assets directory and placeholder image ensure_placeholder_image() # --- Unified Handler --- def run_generation( prompt: str, input_image_path: Union[str, None], duration_slider: float, is_image_to_video: bool ): """Unified handler that loads image if necessary and calls the model.""" pil_image = None if input_image_path and is_image_to_video: try: # Load the PIL image from the file path provided by gr.Image pil_image = Image.open(input_image_path).convert("RGB") except Exception as e: gr.Warning(f"Could not load image: {e}") pass duration = int(duration_slider) return generate_video( prompt=prompt, input_image=pil_image, duration=duration, is_image_to_video=is_image_to_video ) # --- Wrapper Functions for Tabs --- def t2v_wrapper(prompt: str, duration_slider: float): """Handler for Text-to-Video tab.""" return run_generation(prompt, None, duration_slider, False) def i2v_wrapper(prompt: str, input_image_path: str, duration_slider: float): """Handler for Image-to-Video tab.""" if not input_image_path: raise gr.Error("Please upload an image for Image-to-Video generation.") return run_generation(prompt, input_image_path, duration_slider, True) # --- UI Definition --- with gr.Blocks(title="Sora 2 Video Generator (ZeroScope Proxy)", fill_width=True) as demo: gr.HTML( f"""

Sora 2 Inspired Video Generator (ZeroScope Proxy)

This demo utilizes a real, high-quality open-source AI model ({MODEL_ID_T2V}) to simulate Sora's functionality. Due to hardware and model limitations, videos are currently capped at {MAX_DURATION_SECONDS} seconds. The audio track is synthesized based on the prompt complexity.

Built with anycoder

""" ) with gr.Tabs(): # ======================================================= # Tab 1: Text-to-Video (T2V) # ======================================================= with gr.TabItem("Text-to-Video (T2V)"): with gr.Row(): with gr.Column(scale=2): prompt_t2v = gr.Textbox( label="Text Prompt", value="A highly cinematic shot of a golden eagle flying over a medieval castle, volumetric lighting.", lines=3 ) duration_t2v = gr.Slider( minimum=4, maximum=MAX_DURATION_SECONDS, step=1, value=4, label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)" ) generate_btn_t2v = gr.Button("Generate Video", variant="primary") with gr.Column(scale=1): video_out_t2v = gr.Video(label="Generated Video") audio_out_t2v = gr.Audio(label="Generated Audio Track", type="numpy") # T2V Generation Event generate_btn_t2v.click( fn=t2v_wrapper, inputs=[prompt_t2v, duration_t2v], outputs=[video_out_t2v, audio_out_t2v] ) gr.Examples( examples=[ ["A puppy dancing ballet on the moon, high saturation, 4k.", 4], ["Neon lights reflecting off wet cobblestones in a cyberpunk alley, panning camera.", 4] ], inputs=[prompt_t2v, duration_t2v], outputs=[video_out_t2v, audio_out_t2v], fn=t2v_wrapper, cache_examples=False, run_on_click=True ) # ======================================================= # Tab 2: Image-to-Video (I2V) # ======================================================= with gr.TabItem("Image-to-Video (I2V)"): with gr.Row(): with gr.Column(scale=1): image_i2v = gr.Image( label="Input Image", type="filepath", sources=["upload"], interactive=True, value=DEFAULT_IMAGE_PATH ) with gr.Column(scale=2): prompt_i2v = gr.Textbox( label="Movement Prompt", value="The water ripples slightly as a breeze passes through the field.", placeholder="Describe the desired movement or animation.", lines=3 ) duration_i2v = gr.Slider( minimum=4, maximum=MAX_DURATION_SECONDS, step=1, value=4, label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)" ) generate_btn_i2v = gr.Button("Animate Image", variant="primary") with gr.Row(): video_out_i2v = gr.Video(label="Animated Video") audio_out_i2v = gr.Audio(label="Generated Audio Track", type="numpy") # I2V Generation Event generate_btn_i2v.click( fn=i2v_wrapper, inputs=[prompt_i2v, image_i2v, duration_i2v], outputs=[video_out_i2v, audio_out_i2v] ) gr.Examples( examples=[ [ "Heavy rain starts to fall, blurring the edges.", DEFAULT_IMAGE_PATH, 4 ] ], inputs=[prompt_i2v, image_i2v, duration_i2v], outputs=[video_out_i2v, audio_out_i2v], fn=i2v_wrapper, cache_examples=False, run_on_click=True ) if __name__ == "__main__": demo.queue(max_size=20).launch()