File size: 6,721 Bytes
48ae3bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# app.py
import gradio as gr
from PIL import Image
from typing import Union
import os

# Import utility and model functions
from models import generate_video
from config import MAX_DURATION_SECONDS, DEFAULT_IMAGE_PATH, ASSETS_DIR
from utils import ensure_placeholder_image

# Prepare assets directory and placeholder image
ensure_placeholder_image()

# --- Unified Handler ---
def run_generation(
    prompt: str, 
    input_image_path: Union[str, None], 
    duration_slider: float, 
    is_image_to_video: bool
):
    """Unified handler that loads image if necessary and calls the model."""
    
    pil_image = None
    if input_image_path and is_image_to_video:
        try:
            # Load the PIL image from the file path provided by gr.Image
            pil_image = Image.open(input_image_path).convert("RGB")
        except Exception as e:
            gr.Warning(f"Could not load image: {e}")
            pass
    
    duration = int(duration_slider)
    
    return generate_video(
        prompt=prompt,
        input_image=pil_image, 
        duration=duration,
        is_image_to_video=is_image_to_video
    )

# --- Wrapper Functions for Tabs ---
def t2v_wrapper(prompt: str, duration_slider: float):
    """Handler for Text-to-Video tab."""
    return run_generation(prompt, None, duration_slider, False)

def i2v_wrapper(prompt: str, input_image_path: str, duration_slider: float):
    """Handler for Image-to-Video tab."""
    if not input_image_path:
        raise gr.Error("Please upload an image for Image-to-Video generation.")
    return run_generation(prompt, input_image_path, duration_slider, True)


# --- UI Definition ---
with gr.Blocks(title="Sora 2 Video Generator (ZeroScope Proxy)", fill_width=True) as demo:
    gr.HTML(
        f"""
        <div style="text-align: center; max-width: 800px; margin: 0 auto;">
            <h1>Sora 2 Inspired Video Generator (ZeroScope Proxy)</h1>
            <p>
                This demo utilizes a real, high-quality open-source AI model ({MODEL_ID_T2V}) to simulate Sora's functionality. 
                Due to hardware and model limitations, videos are currently capped at {MAX_DURATION_SECONDS} seconds.
                The audio track is synthesized based on the prompt complexity.
            </p>
            <p>
                Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a>
            </p>
        </div>
        """
    )

    with gr.Tabs():
        
        # =======================================================
        # Tab 1: Text-to-Video (T2V)
        # =======================================================
        with gr.TabItem("Text-to-Video (T2V)"):
            with gr.Row():
                with gr.Column(scale=2):
                    prompt_t2v = gr.Textbox(
                        label="Text Prompt",
                        value="A highly cinematic shot of a golden eagle flying over a medieval castle, volumetric lighting.",
                        lines=3
                    )
                    duration_t2v = gr.Slider(
                        minimum=4,
                        maximum=MAX_DURATION_SECONDS,
                        step=1,
                        value=4,
                        label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)"
                    )
                    generate_btn_t2v = gr.Button("Generate Video", variant="primary")
                    
                with gr.Column(scale=1):
                    video_out_t2v = gr.Video(label="Generated Video")
                    audio_out_t2v = gr.Audio(label="Generated Audio Track", type="numpy")

            # T2V Generation Event
            generate_btn_t2v.click(
                fn=t2v_wrapper,
                inputs=[prompt_t2v, duration_t2v],
                outputs=[video_out_t2v, audio_out_t2v]
            )
            
            gr.Examples(
                examples=[
                    ["A puppy dancing ballet on the moon, high saturation, 4k.", 4],
                    ["Neon lights reflecting off wet cobblestones in a cyberpunk alley, panning camera.", 4]
                ],
                inputs=[prompt_t2v, duration_t2v],
                outputs=[video_out_t2v, audio_out_t2v],
                fn=t2v_wrapper,
                cache_examples=False,
                run_on_click=True
            )

        # =======================================================
        # Tab 2: Image-to-Video (I2V)
        # =======================================================
        with gr.TabItem("Image-to-Video (I2V)"):
            with gr.Row():
                with gr.Column(scale=1):
                    image_i2v = gr.Image(
                        label="Input Image",
                        type="filepath",
                        sources=["upload"],
                        interactive=True,
                        value=DEFAULT_IMAGE_PATH
                    )
                with gr.Column(scale=2):
                    prompt_i2v = gr.Textbox(
                        label="Movement Prompt",
                        value="The water ripples slightly as a breeze passes through the field.",
                        placeholder="Describe the desired movement or animation.",
                        lines=3
                    )
                    duration_i2v = gr.Slider(
                        minimum=4,
                        maximum=MAX_DURATION_SECONDS,
                        step=1,
                        value=4,
                        label=f"Video Duration (seconds, max {MAX_DURATION_SECONDS}s)"
                    )
                    generate_btn_i2v = gr.Button("Animate Image", variant="primary")
            
            with gr.Row():
                video_out_i2v = gr.Video(label="Animated Video")
                audio_out_i2v = gr.Audio(label="Generated Audio Track", type="numpy")
            
            # I2V Generation Event
            generate_btn_i2v.click(
                fn=i2v_wrapper,
                inputs=[prompt_i2v, image_i2v, duration_i2v],
                outputs=[video_out_i2v, audio_out_i2v]
            )

            gr.Examples(
                examples=[
                    [
                        "Heavy rain starts to fall, blurring the edges.",
                        DEFAULT_IMAGE_PATH,
                        4
                    ]
                ],
                inputs=[prompt_i2v, image_i2v, duration_i2v],
                outputs=[video_out_i2v, audio_out_i2v],
                fn=i2v_wrapper,
                cache_examples=False,
                run_on_click=True
            )


if __name__ == "__main__":
    demo.queue(max_size=20).launch()