Spaces:
Build error
Build error
| # app.py | |
| import gradio as gr | |
| from PIL import Image | |
| from typing import Union | |
| import os | |
| # Import utility and model functions | |
| from models import generate_video | |
| from config import MAX_DURATION_SECONDS, DEFAULT_IMAGE_PATH, ASSETS_DIR | |
| from utils import ensure_placeholder_image | |
| # Prepare assets directory and placeholder image | |
| ensure_placeholder_image() | |
| # --- Unified Handler --- | |
| def run_generation( | |
| prompt: str, | |
| input_image_path: Union[str, None], | |
| duration_slider: float, | |
| is_image_to_video: bool | |
| ): | |
| """Unified handler that loads image if necessary and calls the model.""" | |
| pil_image = None | |
| if input_image_path and is_image_to_video: | |
| try: | |
| # Load the PIL image from the file path provided by gr.Image | |
| pil_image = Image.open(input_image_path).convert("RGB") | |
| except Exception as e: | |
| gr.Warning(f"Could not load image: {e}") | |
| pass | |
| duration = int(duration_slider) | |
| return generate_video( | |
| prompt=prompt, | |
| input_image=pil_image, | |
| duration=duration, | |
| is_image_to_video=is_image_to_video | |
| ) | |
| # --- Wrapper Functions for Tabs --- | |
| def t2v_wrapper(prompt: str, duration_slider: float): | |
| """Handler for Text-to-Video tab.""" | |
| return run_generation(prompt, None, duration_slider, False) | |
| def i2v_wrapper(prompt: str, input_image_path: str, duration_slider: float): | |
| """Handler for Image-to-Video tab.""" | |
| if not input_image_path: | |
| raise gr.Error("Please upload an image for Image-to-Video generation.") | |
| return run_generation(prompt, input_image_path, duration_slider, True) | |
| # --- UI Definition --- | |
| with gr.Blocks(title="Sora 2 Video Generator (ZeroScope Proxy)", fill_width=True) as demo: | |
| gr.HTML( | |
| f""" | |
| <div style="text-align: center; max-width: 800px; margin: 0 auto;"> | |
| <h1>Sora 2 Inspired Video Generator (ZeroScope Proxy)</h1> | |
| <p> | |
| This demo utilizes a real, high-quality open-source AI model ({MODEL_ID_T2V}) to simulate Sora's functionality. | |
| Due to hardware and model limitations, videos are currently capped at {MAX_DURATION_SECONDS} seconds. | |
| The audio track is synthesized based on the prompt complexity. | |
| </p> | |
| <p> | |
| Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a> | |
| </p> | |
| </div> | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # ======================================================= | |
| # Tab 1: Text-to-Video (T2V) | |
| # ======================================================= | |
| with gr.TabItem("Text-to-Video (T2V)"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt_t2v = gr.Textbox( | |
| label="Text Prompt", | |
| value="A highly cinematic shot of a golden eagle flying over a medieval castle, volumetric lighting.", | |
| lines=3 | |
| ) | |
| duration_t2v = gr.Slider( | |
| minimum=4, | |
| maximum=MAX_DURATION_SECONDS, | |
| step=1, | |
| value=4, | |
| label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)" | |
| ) | |
| generate_btn_t2v = gr.Button("Generate Video", variant="primary") | |
| with gr.Column(scale=1): | |
| video_out_t2v = gr.Video(label="Generated Video") | |
| audio_out_t2v = gr.Audio(label="Generated Audio Track", type="numpy") | |
| # T2V Generation Event | |
| generate_btn_t2v.click( | |
| fn=t2v_wrapper, | |
| inputs=[prompt_t2v, duration_t2v], | |
| outputs=[video_out_t2v, audio_out_t2v] | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["A puppy dancing ballet on the moon, high saturation, 4k.", 4], | |
| ["Neon lights reflecting off wet cobblestones in a cyberpunk alley, panning camera.", 4] | |
| ], | |
| inputs=[prompt_t2v, duration_t2v], | |
| outputs=[video_out_t2v, audio_out_t2v], | |
| fn=t2v_wrapper, | |
| cache_examples=False, | |
| run_on_click=True | |
| ) | |
| # ======================================================= | |
| # Tab 2: Image-to-Video (I2V) | |
| # ======================================================= | |
| with gr.TabItem("Image-to-Video (I2V)"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_i2v = gr.Image( | |
| label="Input Image", | |
| type="filepath", | |
| sources=["upload"], | |
| interactive=True, | |
| value=DEFAULT_IMAGE_PATH | |
| ) | |
| with gr.Column(scale=2): | |
| prompt_i2v = gr.Textbox( | |
| label="Movement Prompt", | |
| value="The water ripples slightly as a breeze passes through the field.", | |
| placeholder="Describe the desired movement or animation.", | |
| lines=3 | |
| ) | |
| duration_i2v = gr.Slider( | |
| minimum=4, | |
| maximum=MAX_DURATION_SECONDS, | |
| step=1, | |
| value=4, | |
| label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)" | |
| ) | |
| generate_btn_i2v = gr.Button("Animate Image", variant="primary") | |
| with gr.Row(): | |
| video_out_i2v = gr.Video(label="Animated Video") | |
| audio_out_i2v = gr.Audio(label="Generated Audio Track", type="numpy") | |
| # I2V Generation Event | |
| generate_btn_i2v.click( | |
| fn=i2v_wrapper, | |
| inputs=[prompt_i2v, image_i2v, duration_i2v], | |
| outputs=[video_out_i2v, audio_out_i2v] | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "Heavy rain starts to fall, blurring the edges.", | |
| DEFAULT_IMAGE_PATH, | |
| 4 | |
| ] | |
| ], | |
| inputs=[prompt_i2v, image_i2v, duration_i2v], | |
| outputs=[video_out_i2v, audio_out_i2v], | |
| fn=i2v_wrapper, | |
| cache_examples=False, | |
| run_on_click=True | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=20).launch() |