File size: 10,306 Bytes
972cb1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8d0fbc
c00c62e
 
 
 
 
 
 
 
 
 
972cb1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# app.py (Versão Corrigida)

import gradio as gr
from PIL import Image
import os
import imageio
from api.ltx_server import video_generation_service
from huggingface_hub import logging

logging.set_verbosity_error()
logging.set_verbosity_warning()
logging.set_verbosity_info()
logging.set_verbosity_debug()

# --- FUNÇÕES DE AJUDA PARA A UI ---
# ... (calculate_new_dimensions e handle_media_upload_for_dims permanecem as mesmas) ...
TARGET_FIXED_SIDE = 768
MIN_DIM_SLIDER = 256
MAX_IMAGE_SIZE = 1280

def calculate_new_dimensions(orig_w, orig_h):
    if orig_w == 0 or orig_h == 0: return int(TARGET_FIXED_SIDE), int(TARGET_FIXED_SIDE)
    if orig_w >= orig_h:
        new_h, aspect_ratio = TARGET_FIXED_SIDE, orig_w / orig_h
        new_w = round((new_h * aspect_ratio) / 32) * 32
        new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
        new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE)) 
    else:
        new_w, aspect_ratio = TARGET_FIXED_SIDE, orig_h / orig_w
        new_h = round((new_w * aspect_ratio) / 32) * 32
        new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
        new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
    return int(new_h), int(new_w)

def handle_media_upload_for_dims(filepath, current_h, current_w):
    if not filepath or not os.path.exists(str(filepath)): return gr.update(value=current_h), gr.update(value=current_w)
    try:
        if str(filepath).lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
            with Image.open(filepath) as img:
                orig_w, orig_h = img.size
        else: # Assumir que é um vídeo
            with imageio.get_reader(filepath) as reader:
                meta = reader.get_meta_data()
                orig_w, orig_h = meta.get('size', (current_w, current_h))
        new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
        return gr.update(value=new_h), gr.update(value=new_w)
    except Exception as e:
        print(f"Erro ao processar mídia para dimensões: {e}")
        return gr.update(value=current_h), gr.update(value=current_w)

def update_frame_slider(duration):
    """Atualiza o valor máximo do slider de frame do meio com base na duração."""
    fps = 24.0
    max_frames = int(duration * fps)
    # Garante que o valor padrão não seja maior que o novo máximo
    new_value = 48 if max_frames >= 48 else max_frames // 2
    return gr.update(maximum=max_frames, value=new_value)

# --- FUNÇÃO WRAPPER PARA CHAMAR O SERVIÇO ---
def gradio_generate_wrapper(
    prompt, negative_prompt, mode,
    # Entradas de Keyframe
    start_image,
    middle_image, middle_frame, middle_weight,
    end_image, end_weight,
    # Outras entradas
    input_video, height, width, duration,
    frames_to_use, seed, randomize_seed,
    guidance_scale, improve_texture,
    progress=gr.Progress(track_tqdm=True)
):
    try:
        def progress_handler(step, total_steps):
            progress(step / total_steps, desc="Salvando vídeo...")

        output_path, used_seed = video_generation_service.generate(
            prompt=prompt, negative_prompt=negative_prompt, mode=mode,
            start_image_filepath=start_image,
            middle_image_filepath=middle_image,
            middle_frame_number=middle_frame,
            middle_image_weight=middle_weight,
            end_image_filepath=end_image,
            end_image_weight=end_weight,
            input_video_filepath=input_video,
            height=int(height), width=int(width), duration=float(duration),
            frames_to_use=int(frames_to_use), seed=int(seed),
            randomize_seed=bool(randomize_seed), guidance_scale=float(guidance_scale),
            improve_texture=bool(improve_texture), progress_callback=progress_handler
        )
        return output_path, used_seed
    except ValueError as e:
        raise gr.Error(str(e))
    except Exception as e:
        print(f"Erro inesperado na geração: {e}")
        raise gr.Error("Ocorreu um erro inesperado. Verifique os logs.")

# --- DEFINIÇÃO DA INTERFACE GRADIO ---
css = "#col-container { margin: 0 auto; max-width: 900px; }"
with gr.Blocks(css=css) as demo:
    gr.Markdown("# LTX Video com Keyframes")
    gr.Markdown("Guie a geração de vídeo usando imagens de início, meio e fim.")

    with gr.Row():
        with gr.Column():
            with gr.Tab("image-to-video (Keyframes)") as image_tab:
                i2v_prompt = gr.Textbox(label="Prompt", value="Uma bela transição entre as imagens", lines=2)
                
                with gr.Row():
                    with gr.Column(scale=1):
                        gr.Markdown("#### Início (Obrigatório)")
                        start_image_i2v = gr.Image(label="Imagem de Início", type="filepath", sources=["upload", "clipboard"])
                with gr.Row():
                    with gr.Accordion("Imagens condicionais opcional", open=False):    
                        with gr.Column(scale=1):
                            gr.Markdown("#### Meio (Opcional)")
                            middle_image_i2v = gr.Image(label="Imagem do Meio", type="filepath", sources=["upload", "clipboard"])
                            middle_frame_i2v = gr.Slider(label="Frame Alvo", minimum=0, maximum=200, step=1, value=48)
                            middle_weight_i2v = gr.Slider(label="Peso/Força", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
                        with gr.Column(scale=1):
                            gr.Markdown("#### Fim (Opcional)")
                            end_image_i2v = gr.Image(label="Imagem de Fim", type="filepath", sources=["upload", "clipboard"])
                            end_weight_i2v = gr.Slider(label="Peso/Força", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
                
                i2v_button = gr.Button("Generate Image-to-Video", variant="primary")

            with gr.Tab("text-to-video") as text_tab:
                t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
                t2v_button = gr.Button("Generate Text-to-Video", variant="primary")

            with gr.Tab("video-to-video") as video_tab:
                video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
                frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=257, value=9, step=8, info="Must be N*8+1.")
                v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
                v2v_button = gr.Button("Generate Video-to-Video", variant="primary")

            duration_input = gr.Slider(label="Video Duration (seconds)", minimum=1, maximum=30, value=8, step=0.5)
            improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True, visible=True)

        with gr.Column():
            output_video = gr.Video(label="Generated Video", interactive=False)

    with gr.Accordion("Advanced settings", open=False):
        mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
        negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, blurry, jittery", lines=2)
        with gr.Row():
            seed_input = gr.Number(label="Seed", value=42, precision=0)
            randomize_seed_input = gr.Checkbox(label="Randomize Seed", value=True)
        guidance_scale_input = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
        with gr.Row():
            height_input = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE)
            width_input = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE)

    # --- LÓGICA DE EVENTOS DA UI ---
    
    start_image_i2v.upload(fn=handle_media_upload_for_dims, inputs=[start_image_i2v, height_input, width_input], outputs=[height_input, width_input])
    video_v2v.upload(fn=handle_media_upload_for_dims, inputs=[video_v2v, height_input, width_input], outputs=[height_input, width_input])
    duration_input.change(fn=update_frame_slider, inputs=duration_input, outputs=middle_frame_i2v)
    
    image_tab.select(fn=lambda: "image-to-video", outputs=[mode])
    text_tab.select(fn=lambda: "text-to-video", outputs=[mode])
    video_tab.select(fn=lambda: "video-to-video", outputs=[mode])
    
    # Placeholders para os botões que não usam certos inputs
    none_image = gr.Textbox(visible=False, value=None)
    none_video = gr.Textbox(visible=False, value=None)
    
    # Parâmetros comuns a todos
    shared_params = [
        height_input, width_input, duration_input, frames_to_use, 
        seed_input, randomize_seed_input, guidance_scale_input, improve_texture
    ]
    
    i2v_inputs = [
        i2v_prompt, negative_prompt_input, mode,
        start_image_i2v, middle_image_i2v, middle_frame_i2v, middle_weight_i2v,
        end_image_i2v, end_weight_i2v,
        none_video, # Placeholder para input_video
        *shared_params
    ]

    t2v_inputs = [
        t2v_prompt, negative_prompt_input, mode,
        none_image, none_image, gr.Number(value=-1, visible=False), gr.Slider(value=0, visible=False), # Placeholders para keyframes
        none_image, gr.Slider(value=0, visible=False),
        none_video, # Placeholder para input_video
        *shared_params
    ]
    
    v2v_inputs = [
        v2v_prompt, negative_prompt_input, mode,
        none_image, none_image, gr.Number(value=-1, visible=False), gr.Slider(value=0, visible=False), # Placeholders para keyframes
        none_image, gr.Slider(value=0, visible=False),
        video_v2v, # Input de vídeo real
        *shared_params
    ]
    
    common_outputs = [output_video, seed_input]
    
    i2v_button.click(fn=gradio_generate_wrapper, inputs=i2v_inputs, outputs=common_outputs, api_name="image_to_video_keyframes")
    t2v_button.click(fn=gradio_generate_wrapper, inputs=t2v_inputs, outputs=common_outputs, api_name="text_to_video")
    v2v_button.click(fn=gradio_generate_wrapper, inputs=v2v_inputs, outputs=common_outputs, api_name="video_to_video")
    # --- <FIM DA CORREÇÃO> ---


if __name__ == "__main__":
    demo.queue().launch(debug=True, share=False)