Spaces:
Build error
Build error
File size: 6,721 Bytes
48ae3bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
# app.py
import gradio as gr
from PIL import Image
from typing import Union
import os
# Import utility and model functions
from models import generate_video
from config import MAX_DURATION_SECONDS, DEFAULT_IMAGE_PATH, ASSETS_DIR
from utils import ensure_placeholder_image
# Prepare assets directory and placeholder image
ensure_placeholder_image()
# --- Unified Handler ---
def run_generation(
prompt: str,
input_image_path: Union[str, None],
duration_slider: float,
is_image_to_video: bool
):
"""Unified handler that loads image if necessary and calls the model."""
pil_image = None
if input_image_path and is_image_to_video:
try:
# Load the PIL image from the file path provided by gr.Image
pil_image = Image.open(input_image_path).convert("RGB")
except Exception as e:
gr.Warning(f"Could not load image: {e}")
pass
duration = int(duration_slider)
return generate_video(
prompt=prompt,
input_image=pil_image,
duration=duration,
is_image_to_video=is_image_to_video
)
# --- Wrapper Functions for Tabs ---
def t2v_wrapper(prompt: str, duration_slider: float):
"""Handler for Text-to-Video tab."""
return run_generation(prompt, None, duration_slider, False)
def i2v_wrapper(prompt: str, input_image_path: str, duration_slider: float):
"""Handler for Image-to-Video tab."""
if not input_image_path:
raise gr.Error("Please upload an image for Image-to-Video generation.")
return run_generation(prompt, input_image_path, duration_slider, True)
# --- UI Definition ---
with gr.Blocks(title="Sora 2 Video Generator (ZeroScope Proxy)", fill_width=True) as demo:
gr.HTML(
f"""
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
<h1>Sora 2 Inspired Video Generator (ZeroScope Proxy)</h1>
<p>
This demo utilizes a real, high-quality open-source AI model ({MODEL_ID_T2V}) to simulate Sora's functionality.
Due to hardware and model limitations, videos are currently capped at {MAX_DURATION_SECONDS} seconds.
The audio track is synthesized based on the prompt complexity.
</p>
<p>
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a>
</p>
</div>
"""
)
with gr.Tabs():
# =======================================================
# Tab 1: Text-to-Video (T2V)
# =======================================================
with gr.TabItem("Text-to-Video (T2V)"):
with gr.Row():
with gr.Column(scale=2):
prompt_t2v = gr.Textbox(
label="Text Prompt",
value="A highly cinematic shot of a golden eagle flying over a medieval castle, volumetric lighting.",
lines=3
)
duration_t2v = gr.Slider(
minimum=4,
maximum=MAX_DURATION_SECONDS,
step=1,
value=4,
label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)"
)
generate_btn_t2v = gr.Button("Generate Video", variant="primary")
with gr.Column(scale=1):
video_out_t2v = gr.Video(label="Generated Video")
audio_out_t2v = gr.Audio(label="Generated Audio Track", type="numpy")
# T2V Generation Event
generate_btn_t2v.click(
fn=t2v_wrapper,
inputs=[prompt_t2v, duration_t2v],
outputs=[video_out_t2v, audio_out_t2v]
)
gr.Examples(
examples=[
["A puppy dancing ballet on the moon, high saturation, 4k.", 4],
["Neon lights reflecting off wet cobblestones in a cyberpunk alley, panning camera.", 4]
],
inputs=[prompt_t2v, duration_t2v],
outputs=[video_out_t2v, audio_out_t2v],
fn=t2v_wrapper,
cache_examples=False,
run_on_click=True
)
# =======================================================
# Tab 2: Image-to-Video (I2V)
# =======================================================
with gr.TabItem("Image-to-Video (I2V)"):
with gr.Row():
with gr.Column(scale=1):
image_i2v = gr.Image(
label="Input Image",
type="filepath",
sources=["upload"],
interactive=True,
value=DEFAULT_IMAGE_PATH
)
with gr.Column(scale=2):
prompt_i2v = gr.Textbox(
label="Movement Prompt",
value="The water ripples slightly as a breeze passes through the field.",
placeholder="Describe the desired movement or animation.",
lines=3
)
duration_i2v = gr.Slider(
minimum=4,
maximum=MAX_DURATION_SECONDS,
step=1,
value=4,
label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)"
)
generate_btn_i2v = gr.Button("Animate Image", variant="primary")
with gr.Row():
video_out_i2v = gr.Video(label="Animated Video")
audio_out_i2v = gr.Audio(label="Generated Audio Track", type="numpy")
# I2V Generation Event
generate_btn_i2v.click(
fn=i2v_wrapper,
inputs=[prompt_i2v, image_i2v, duration_i2v],
outputs=[video_out_i2v, audio_out_i2v]
)
gr.Examples(
examples=[
[
"Heavy rain starts to fall, blurring the edges.",
DEFAULT_IMAGE_PATH,
4
]
],
inputs=[prompt_i2v, image_i2v, duration_i2v],
outputs=[video_out_i2v, audio_out_i2v],
fn=i2v_wrapper,
cache_examples=False,
run_on_click=True
)
if __name__ == "__main__":
demo.queue(max_size=20).launch() |