Gertie01's picture
Deploy Gradio app with multiple files
48ae3bc verified
raw
history blame
6.72 kB
# app.py
import gradio as gr
from PIL import Image
from typing import Union
import os
# Import utility and model functions
from models import generate_video
from config import MAX_DURATION_SECONDS, DEFAULT_IMAGE_PATH, ASSETS_DIR
from utils import ensure_placeholder_image
# Prepare assets directory and placeholder image
ensure_placeholder_image()
# --- Unified Handler ---
def run_generation(
prompt: str,
input_image_path: Union[str, None],
duration_slider: float,
is_image_to_video: bool
):
"""Unified handler that loads image if necessary and calls the model."""
pil_image = None
if input_image_path and is_image_to_video:
try:
# Load the PIL image from the file path provided by gr.Image
pil_image = Image.open(input_image_path).convert("RGB")
except Exception as e:
gr.Warning(f"Could not load image: {e}")
pass
duration = int(duration_slider)
return generate_video(
prompt=prompt,
input_image=pil_image,
duration=duration,
is_image_to_video=is_image_to_video
)
# --- Wrapper Functions for Tabs ---
def t2v_wrapper(prompt: str, duration_slider: float):
"""Handler for Text-to-Video tab."""
return run_generation(prompt, None, duration_slider, False)
def i2v_wrapper(prompt: str, input_image_path: str, duration_slider: float):
"""Handler for Image-to-Video tab."""
if not input_image_path:
raise gr.Error("Please upload an image for Image-to-Video generation.")
return run_generation(prompt, input_image_path, duration_slider, True)
# --- UI Definition ---
with gr.Blocks(title="Sora 2 Video Generator (ZeroScope Proxy)", fill_width=True) as demo:
gr.HTML(
f"""
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
<h1>Sora 2 Inspired Video Generator (ZeroScope Proxy)</h1>
<p>
This demo utilizes a real, high-quality open-source AI model ({MODEL_ID_T2V}) to simulate Sora's functionality.
Due to hardware and model limitations, videos are currently capped at {MAX_DURATION_SECONDS} seconds.
The audio track is synthesized based on the prompt complexity.
</p>
<p>
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a>
</p>
</div>
"""
)
with gr.Tabs():
# =======================================================
# Tab 1: Text-to-Video (T2V)
# =======================================================
with gr.TabItem("Text-to-Video (T2V)"):
with gr.Row():
with gr.Column(scale=2):
prompt_t2v = gr.Textbox(
label="Text Prompt",
value="A highly cinematic shot of a golden eagle flying over a medieval castle, volumetric lighting.",
lines=3
)
duration_t2v = gr.Slider(
minimum=4,
maximum=MAX_DURATION_SECONDS,
step=1,
value=4,
label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)"
)
generate_btn_t2v = gr.Button("Generate Video", variant="primary")
with gr.Column(scale=1):
video_out_t2v = gr.Video(label="Generated Video")
audio_out_t2v = gr.Audio(label="Generated Audio Track", type="numpy")
# T2V Generation Event
generate_btn_t2v.click(
fn=t2v_wrapper,
inputs=[prompt_t2v, duration_t2v],
outputs=[video_out_t2v, audio_out_t2v]
)
gr.Examples(
examples=[
["A puppy dancing ballet on the moon, high saturation, 4k.", 4],
["Neon lights reflecting off wet cobblestones in a cyberpunk alley, panning camera.", 4]
],
inputs=[prompt_t2v, duration_t2v],
outputs=[video_out_t2v, audio_out_t2v],
fn=t2v_wrapper,
cache_examples=False,
run_on_click=True
)
# =======================================================
# Tab 2: Image-to-Video (I2V)
# =======================================================
with gr.TabItem("Image-to-Video (I2V)"):
with gr.Row():
with gr.Column(scale=1):
image_i2v = gr.Image(
label="Input Image",
type="filepath",
sources=["upload"],
interactive=True,
value=DEFAULT_IMAGE_PATH
)
with gr.Column(scale=2):
prompt_i2v = gr.Textbox(
label="Movement Prompt",
value="The water ripples slightly as a breeze passes through the field.",
placeholder="Describe the desired movement or animation.",
lines=3
)
duration_i2v = gr.Slider(
minimum=4,
maximum=MAX_DURATION_SECONDS,
step=1,
value=4,
label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)"
)
generate_btn_i2v = gr.Button("Animate Image", variant="primary")
with gr.Row():
video_out_i2v = gr.Video(label="Animated Video")
audio_out_i2v = gr.Audio(label="Generated Audio Track", type="numpy")
# I2V Generation Event
generate_btn_i2v.click(
fn=i2v_wrapper,
inputs=[prompt_i2v, image_i2v, duration_i2v],
outputs=[video_out_i2v, audio_out_i2v]
)
gr.Examples(
examples=[
[
"Heavy rain starts to fall, blurring the edges.",
DEFAULT_IMAGE_PATH,
4
]
],
inputs=[prompt_i2v, image_i2v, duration_i2v],
outputs=[video_out_i2v, audio_out_i2v],
fn=i2v_wrapper,
cache_examples=False,
run_on_click=True
)
if __name__ == "__main__":
demo.queue(max_size=20).launch()