eeuuia commited on
Commit
fcf054d
·
verified ·
1 Parent(s): e6712fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -207
app.py CHANGED
@@ -3,124 +3,141 @@ import torch
3
  import numpy as np
4
  import tempfile
5
  import os
 
 
 
 
6
 
 
 
 
7
  from diffusers import LTXLatentUpsamplePipeline
 
 
 
 
8
  from pipeline_ltx_condition_control import LTXConditionPipeline, LTXVideoCondition
9
- from diffusers.utils import export_to_video, load_video
10
- from torchvision import transforms
11
- import random
12
- import imageio
13
  from PIL import Image, ImageOps
14
- import cv2
15
- import shutil
16
- import glob
17
- from pathlib import Path
18
 
 
19
  import warnings
20
  import logging
21
- warnings.filterwarnings("ignore", category=UserWarning)
22
- warnings.filterwarnings("ignore", category=FutureWarning)
23
  warnings.filterwarnings("ignore", message=".*")
24
- from huggingface_hub import logging as ll
25
- ll.set_verbosity_error()
26
- ll.set_verbosity_warning()
27
- ll.set_verbosity_info()
28
- ll.set_verbosity_debug()
29
- logger = logging.getLogger("AducDebug")
30
- logging.basicConfig(level=logging.DEBUG)
31
- logger.setLevel(logging.DEBUG)
32
-
33
- FPS = 24
34
- dtype = torch.bfloat16
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
-
37
-
38
- single_file_url = "https://huggingface.co/Lightricks/LTX-Video/resolve/main/ltxv-13b-0.9.8-distilled-fp8.safetensors"
39
-
40
- pipeline = LTXConditionPipeline.from_single_file(
41
- single_file_url,
42
- offload_state_dict=False,
43
- dtype=torch.bfloat16, # Use o dtype apropriado. Para FP8, pode ser necessário torch.float8_e4m3fn.
44
- cache_dir=os.getenv("HF_HOME_CACHE"),
45
- token=os.getenv("HF_TOKEN"),
46
- )
47
-
48
- # Carregamento das pipelines
49
- #pipeline = LTXConditionPipeline.from_pretrained(
50
- # "Lightricks/LTX-Video-0.9.8-13B-distilled",
51
- # offload_state_dict=False,
52
- # torch_dtype=torch.bfloat16,
53
- # cache_dir=os.getenv("HF_HOME_CACHE"),
54
- # token=os.getenv("HF_TOKEN"),
55
- #)
 
 
 
 
 
 
 
56
 
57
- pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
58
- "Lightricks/ltxv-spatial-upscaler-0.9.7",
59
- cache_dir=os.getenv("HF_HOME_CACHE"),
60
- vae=pipeline.vae, torch_dtype=dtype
61
- )
 
 
 
 
 
 
 
 
 
 
62
 
63
- pipeline.to(device)
64
- pipe_upsample.to(device)
65
- pipeline.vae.enable_tiling()
 
 
 
 
 
 
 
 
 
 
66
 
 
67
 
68
- current_dir = Path(__file__).parent
 
 
69
 
70
- def cleanup_session_files(request: gr.Request):
71
- """Limpa arquivos temporários da sessão quando o usuário se desconecta."""
72
- try:
73
- session_id = request.session_hash
74
- session_dir = os.path.join("/tmp/gradio", session_id)
75
- if os.path.exists(session_dir):
76
- shutil.rmtree(session_dir)
77
- print(f"Limpou o diretório da sessão: {session_dir}")
78
- except Exception as e:
79
- print(f"Erro durante a limpeza da sessão: {e}")
80
 
81
- def read_video(video) -> torch.Tensor:
82
- """Lê um arquivo de vídeo e converte para um tensor torch."""
83
- to_tensor_transform = transforms.ToTensor()
84
- if isinstance(video, str):
85
- video_tensor = torch.stack([to_tensor_transform(img) for img in imageio.get_reader(video)])
86
- else:
87
- video_tensor = torch.stack([to_tensor_transform(img) for img in video])
88
- return video_tensor
89
 
 
90
 
91
  def round_to_nearest_resolution_acceptable_by_vae(height, width, vae_temporal_compression_ratio):
92
- """Arredonda a resolução para valores aceitáveis pelo VAE."""
93
  height = height - (height % vae_temporal_compression_ratio)
94
  width = width - (width % vae_temporal_compression_ratio)
95
  return height, width
96
 
97
-
98
- # A assinatura da função volta a aceitar argumentos individuais para compatibilidade com o Gradio
99
- def generate_video(
100
- condition_image_1,
101
- condition_strength_1,
102
- condition_frame_index_1,
103
- condition_image_2,
104
- condition_strength_2,
105
- condition_frame_index_2,
106
- prompt,
107
- duration=3.0,
108
- negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
109
- height=768,
110
- width=1152,
111
- num_inference_steps=7,
112
- guidance_scale=1.0,
113
- seed=0,
114
- randomize_seed=False,
115
  progress=gr.Progress(track_tqdm=True)
116
  ):
117
  try:
118
- # Lógica para agrupar as condições *dentro* da função
119
- # Cálculo de frames e resolução
 
 
 
 
 
 
120
  num_frames = int(duration * FPS) + 1
121
  temporal_compression = pipeline.vae_temporal_compression_ratio
122
  num_frames = ((num_frames - 1) // temporal_compression) * temporal_compression + 1
123
 
 
124
  downscale_factor = 2 / 3
125
  downscaled_height = int(height * downscale_factor)
126
  downscaled_width = int(width * downscale_factor)
@@ -128,85 +145,45 @@ def generate_video(
128
  downscaled_height, downscaled_width, pipeline.vae_temporal_compression_ratio
129
  )
130
 
 
 
 
 
 
 
 
131
 
132
-
133
- conditions = []
134
- if condition_image_1 is not None:
135
- condition_image_1 = ImageOps.fit(condition_image_1, (downscaled_width, downscaled_height), Image.LANCZOS)
136
- conditions.append(LTXVideoCondition(
137
- image=condition_image_1,
138
- strength=condition_strength_1,
139
- frame_index=int(condition_frame_index_1)
140
- ))
141
- if condition_image_2 is not None:
142
- condition_image_2 = ImageOps.fit(condition_image_2, (downscaled_width, downscaled_height), Image.LANCZOS)
143
- conditions.append(LTXVideoCondition(
144
- image=condition_image_2,
145
- strength=condition_strength_2,
146
- frame_index=int(condition_frame_index_2)
147
- ))
148
-
149
- pipeline_args = {}
150
- if conditions:
151
- pipeline_args["conditions"] = conditions
152
-
153
- # Manipulação da seed
154
- if randomize_seed:
155
- seed = random.randint(0, 2**32 - 1)
156
 
157
-
158
- # ETAPA 1: Geração do vídeo em baixa resolução
159
  latents = pipeline(
160
- prompt=prompt,
161
- negative_prompt=negative_prompt,
162
- width=downscaled_width,
163
- height=downscaled_height,
164
- num_frames=num_frames,
165
- timesteps=[1000, 993, 987, 981, 975, 909, 725, 0.03],
166
- decode_timestep=0.05,
167
- decode_noise_scale=0.025,
168
- image_cond_noise_scale=0.0,
169
- guidance_scale=guidance_scale,
170
- guidance_rescale=0.7,
171
- generator=torch.Generator().manual_seed(seed),
172
- output_type="latent",
173
- **pipeline_args
174
  ).frames
175
 
176
- # ETAPA 2: Upscale dos latentes
177
- #upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
178
- #upscaled_latents = pipe_upsample(
179
- # latents=latents,
180
- # output_type="latent"
181
- #).frames
182
-
183
- print(f"ETAPA 1 latents {latents.shape}")
184
-
 
 
 
185
 
 
186
 
187
- # ETAPA 3: Denoise final em alta resolução
188
  final_video_frames_np = pipeline(
189
- prompt=prompt,
190
- negative_prompt=negative_prompt,
191
- width=downscaled_width,
192
- height=downscaled_height,
193
- num_frames=num_frames,
194
- denoise_strength=0.999,
195
- timesteps=[1000, 909, 725, 421, 0],
196
- latents=latents,
197
- decode_timestep=0.05,
198
- decode_noise_scale=0.025,
199
- image_cond_noise_scale=0.0,
200
- guidance_scale=guidance_scale,
201
- guidance_rescale=0.7,
202
  generator=torch.Generator(device="cuda").manual_seed(seed),
203
- output_type="np",
204
- **pipeline_args
205
  ).frames[0]
206
 
207
- print(f"ETAPA 3 final_video_frames_np {final_video_frames_np.shape}")
208
-
209
- # Exportação para arquivo MP4
210
  video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
211
  output_filename = "output.mp4"
212
  with imageio.get_writer(output_filename, fps=FPS, quality=8, macro_block_size=1) as writer:
@@ -218,53 +195,39 @@ def generate_video(
218
 
219
  except Exception as e:
220
  print(f"Ocorreu um erro: {e}")
 
 
221
  return None, seed
222
 
223
- # Interface Gráfica com Gradio
224
  with gr.Blocks(theme=gr.themes.Ocean(font=[gr.themes.GoogleFont("Lexend Deca"), "sans-serif"]), delete_cache=(60, 900)) as demo:
225
- gr.Markdown(
226
- """
227
- # Geração de Vídeo com LTX
228
- **Crie vídeos a partir de texto e imagens de condição usando o modelo LTX-Video.**
229
- """
230
- )
231
 
232
  with gr.Row():
233
  with gr.Column(scale=1):
234
-
235
- prompt = gr.Textbox(
236
- label="Prompt",
237
- placeholder="Descreva o vídeo que você quer gerar...",
238
- lines=3,
239
- value="O Coringa em seu icônico terno roxo e cabelo verde, dançando sozinho em um quarto escuro e decadente. Seus movimentos são erráticos e imprevisíveis, alternando entre graciosos e caóticos enquanto ele se perde no momento. A câmera captura seus gestos teatrais, sua dança refletindo sua personalidade desequilibrada. Iluminação temperamental com sombras dançando pelas paredes, criando uma atmosfera de bela loucura."
240
- )
241
 
242
  with gr.Accordion("Imagem de Condição 1", open=True):
243
- condition_image_1 = gr.Image(label="Imagem de Condição 1", type="pil")
244
  with gr.Row():
245
- condition_strength_1 = gr.Slider(label="Peso (Strength)", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
246
  condition_frame_index_1 = gr.Number(label="Frame", value=0, precision=0)
247
 
248
  with gr.Accordion("Imagem de Condição 2", open=False):
249
- condition_image_2 = gr.Image(label="Imagem de Condição 2", type="pil")
250
  with gr.Row():
251
- condition_strength_2 = gr.Slider(label="Peso (Strength)", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
252
  condition_frame_index_2 = gr.Number(label="Frame", value=0, precision=0)
253
 
254
- duration = gr.Slider(label="Duração (segundos)", minimum=1.0, maximum=10.0, step=0.5, value=2)
255
 
256
  with gr.Accordion("Configurações Avançadas", open=False):
257
- negative_prompt = gr.Textbox(label="Prompt Negativo", placeholder="O que você não quer no vídeo...", lines=2, value="pior qualidade, movimento inconsistente, embaçado, tremido, distorcido")
258
  with gr.Row():
259
  height = gr.Slider(label="Altura", minimum=256, maximum=1536, step=32, value=768)
260
  width = gr.Slider(label="Largura", minimum=256, maximum=1536, step=32, value=1152)
261
-
262
- num_inference_steps = gr.Slider(label="Passos de Inferência", minimum=5, maximum=10, step=1, value=7, visible=False)
263
-
264
- with gr.Row():
265
- guidance_scale = gr.Slider(label="Escala de Orientação (Guidance)", minimum=1.0, maximum=5.0, step=0.1, value=1.0)
266
-
267
  with gr.Row():
 
268
  randomize_seed = gr.Checkbox(label="Seed Aleatória", value=True)
269
  seed = gr.Number(label="Seed", value=0, precision=0)
270
 
@@ -272,33 +235,18 @@ with gr.Blocks(theme=gr.themes.Ocean(font=[gr.themes.GoogleFont("Lexend Deca"),
272
 
273
  with gr.Column(scale=1):
274
  output_video = gr.Video(label="Vídeo Gerado", height=400)
275
-
276
- # CORREÇÃO: A lista de inputs agora é "plana", contendo apenas componentes do Gradio
277
  generate_btn.click(
278
- fn=generate_video,
279
  inputs=[
280
- condition_image_1,
281
- condition_strength_1,
282
- condition_frame_index_1,
283
- condition_image_2,
284
- condition_strength_2,
285
- condition_frame_index_2,
286
- prompt,
287
- duration,
288
- negative_prompt,
289
- height,
290
- width,
291
- num_inference_steps,
292
- guidance_scale,
293
- seed,
294
- randomize_seed,
295
  ],
296
- outputs=[output_video, seed],
297
- show_progress=True
298
  )
299
 
300
- demo.unload(cleanup_session_files)
301
-
302
-
303
  if __name__ == "__main__":
304
  demo.queue().launch(server_name="0.0.0.0", server_port=7860, debug=True, show_error=True)
 
3
  import numpy as np
4
  import tempfile
5
  import os
6
+ import yaml
7
+ import json
8
+ import threading
9
+ from pathlib import Path
10
 
11
+ # Importações de Hugging Face
12
+ from huggingface_hub import snapshot_download, HfFolder
13
+ from transformers import T5EncoderModel, T5TokenizerFast
14
  from diffusers import LTXLatentUpsamplePipeline
15
+ from diffusers.models import AutoencoderKLLTXVideo, LTXVideoTransformer3DModel
16
+ from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
17
+
18
+ # Nossa pipeline customizada e utilitários
19
  from pipeline_ltx_condition_control import LTXConditionPipeline, LTXVideoCondition
20
+ from diffusers.utils import export_to_video
 
 
 
21
  from PIL import Image, ImageOps
22
+ import imageio
 
 
 
23
 
24
+ # --- Configuração de Logging e Avisos ---
25
  import warnings
26
  import logging
27
+ warnings.filterwarnings("ignore", category="UserWarning")
28
+ warnings.filterwarnings("ignore", category="FutureWarning")
29
  warnings.filterwarnings("ignore", message=".*")
30
+ from huggingface_hub import logging as hf_logging
31
+ hf_logging.set_verbosity_error()
32
+
33
+ # --- Classe de Serviço para Carregamento e Gerenciamento dos Modelos ---
34
+
35
+ class VideoGenerationService:
36
+ """
37
+ Encapsula o carregamento e a configuração das pipelines de IA.
38
+ Carrega os componentes de forma explícita e modular a partir de um arquivo de configuração.
39
+ """
40
+ def __init__(self, config_path: Path):
41
+ print("=== [Serviço de Geração de Vídeo] Inicializando... ===")
42
+ if not torch.cuda.is_available():
43
+ raise RuntimeError("CUDA é necessário para rodar este serviço.")
44
+
45
+ self.device = "cuda"
46
+ self.torch_dtype = torch.bfloat16
47
+ print(f"[Init] Dispositivo: {self.device}, DType: {self.torch_dtype}")
48
+
49
+ with open(config_path, "r") as f:
50
+ self.cfg = yaml.safe_load(f)
51
+ print(f"[Init] Configuração carregada de: {config_path}")
52
+ print(json.dumps(self.cfg, indent=2))
53
+
54
+ # Parâmetros do YAML
55
+ self.base_repo = self.cfg.get("base_repo")
56
+ self.checkpoint_path = self.cfg.get("checkpoint_path")
57
+ self.upscaler_repo = self.cfg.get("spatial_upscaler_model_path")
58
+
59
+ self._initialize()
60
+ print("=== [Serviço de Geração de Vídeo] Inicialização concluída. ===")
61
+
62
+ def _initialize(self):
63
+ print(f"=== [Init] Baixando snapshot do repositório base: {self.base_repo} ===")
64
+ local_repo_path = snapshot_download(
65
+ repo_id=self.base_repo,
66
+ token=os.getenv("HF_TOKEN") or HfFolder.get_token(),
67
+ resume_download=True
68
+ )
69
 
70
+ print("[Init] Carregando componentes da pipeline a partir de arquivos locais...")
71
+ self.vae = AutoencoderKLLTXVideo.from_pretrained(local_repo_path, subfolder="vae", torch_dtype=self.torch_dtype)
72
+ self.text_encoder = T5EncoderModel.from_pretrained(local_repo_path, subfolder="text_encoder", torch_dtype=self.torch_dtype)
73
+ self.tokenizer = T5TokenizerFast.from_pretrained(local_repo_path, subfolder="tokenizer")
74
+ self.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(local_repo_path, subfolder="scheduler")
75
+
76
+ # Causa do erro anterior: desativar explicitamente o dynamic shifting para compatibilidade
77
+ if hasattr(self.scheduler.config, 'use_dynamic_shifting') and self.scheduler.config.use_dynamic_shifting:
78
+ print("[Init] Desativando 'use_dynamic_shifting' no scheduler.")
79
+ self.scheduler.config.use_dynamic_shifting = False
80
+
81
+ print(f"[Init] Carregando pesos do Transformer de: {self.checkpoint_path}")
82
+ self.transformer = LTXVideoTransformer3DModel.from_pretrained(
83
+ local_repo_path, subfolder="transformer", weight_name=self.checkpoint_path, torch_dtype=self.torch_dtype
84
+ )
85
 
86
+ print("[Init] Montando a LTXConditionPipeline...")
87
+ self.pipeline = LTXConditionPipeline(
88
+ vae=self.vae, text_encoder=self.text_encoder, tokenizer=self.tokenizer,
89
+ scheduler=self.scheduler, transformer=self.transformer
90
+ )
91
+ self.pipeline.to(self.device)
92
+ self.pipeline.vae.enable_tiling()
93
+
94
+ print(f"[Init] Carregando o upsampler espacial de: {self.upscaler_repo}")
95
+ self.upsampler = LTXLatentUpsamplePipeline.from_pretrained(
96
+ self.upscaler_repo, vae=self.vae, torch_dtype=self.torch_dtype
97
+ )
98
+ self.upsampler.to(self.device)
99
 
100
+ # --- Inicialização da Aplicação ---
101
 
102
+ CONFIG_PATH = Path("ltx_config.yaml")
103
+ if not CONFIG_PATH.exists():
104
+ raise FileNotFoundError(f"Arquivo de configuração '{CONFIG_PATH}' não encontrado. Crie-o antes de executar a aplicação.")
105
 
106
+ # Instancia o serviço que carrega e mantém os modelos
107
+ service = VideoGenerationService(config_path=CONFIG_PATH)
108
+ pipeline = service.pipeline
109
+ pipe_upsample = service.upsampler
 
 
 
 
 
 
110
 
111
+ FPS = 24
 
 
 
 
 
 
 
112
 
113
+ # --- Lógica Principal da Geração de Vídeo ---
114
 
115
  def round_to_nearest_resolution_acceptable_by_vae(height, width, vae_temporal_compression_ratio):
 
116
  height = height - (height % vae_temporal_compression_ratio)
117
  width = width - (width % vae_temporal_compression_ratio)
118
  return height, width
119
 
120
+ def prepare_and_generate_video(
121
+ condition_image_1, condition_strength_1, condition_frame_index_1,
122
+ condition_image_2, condition_strength_2, condition_frame_index_2,
123
+ prompt, duration, negative_prompt,
124
+ height, width, guidance_scale, seed, randomize_seed,
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  progress=gr.Progress(track_tqdm=True)
126
  ):
127
  try:
128
+ conditions_data = [
129
+ (condition_image_1, condition_strength_1, condition_frame_index_1),
130
+ (condition_image_2, condition_strength_2, condition_frame_index_2)
131
+ ]
132
+
133
+ if randomize_seed:
134
+ seed = random.randint(0, 2**32 - 1)
135
+
136
  num_frames = int(duration * FPS) + 1
137
  temporal_compression = pipeline.vae_temporal_compression_ratio
138
  num_frames = ((num_frames - 1) // temporal_compression) * temporal_compression + 1
139
 
140
+ # Etapa 1: Preparar condições para baixa resolução
141
  downscale_factor = 2 / 3
142
  downscaled_height = int(height * downscale_factor)
143
  downscaled_width = int(width * downscale_factor)
 
145
  downscaled_height, downscaled_width, pipeline.vae_temporal_compression_ratio
146
  )
147
 
148
+ conditions_low_res = []
149
+ for image, strength, frame_index in conditions_data:
150
+ if image is not None:
151
+ processed_image = ImageOps.fit(image, (downscaled_width, downscaled_height), Image.LANCZOS)
152
+ conditions_low_res.append(LTXVideoCondition(
153
+ image=processed_image, strength=strength, frame_index=int(frame_index)
154
+ ))
155
 
156
+ pipeline_args_low_res = {"conditions": conditions_low_res} if conditions_low_res else {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
 
 
158
  latents = pipeline(
159
+ prompt=prompt, negative_prompt=negative_prompt, width=downscaled_width, height=downscaled_height,
160
+ num_frames=num_frames, generator=torch.Generator().manual_seed(seed),
161
+ output_type="latent", **pipeline_args_low_res
 
 
 
 
 
 
 
 
 
 
 
162
  ).frames
163
 
164
+ # Etapa 2: Upscale
165
+ upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
166
+ upscaled_latents = pipe_upsample(latents=latents, output_type="latent").frames
167
+
168
+ # Etapa 3: Preparar condições para alta resolução (para manter frames imutáveis)
169
+ conditions_high_res = []
170
+ for image, strength, frame_index in conditions_data:
171
+ if image is not None:
172
+ processed_image_high_res = ImageOps.fit(image, (upscaled_width, upscaled_height), Image.LANCZOS)
173
+ conditions_high_res.append(LTXVideoCondition(
174
+ image=processed_image_high_res, strength=strength, frame_index=int(frame_index)
175
+ ))
176
 
177
+ pipeline_args_high_res = {"conditions": conditions_high_res} if conditions_high_res else {}
178
 
 
179
  final_video_frames_np = pipeline(
180
+ prompt=prompt, negative_prompt=negative_prompt, width=upscaled_width, height=upscaled_height,
181
+ num_frames=num_frames, denoise_strength=0.999, latents=upscaled_latents,
 
 
 
 
 
 
 
 
 
 
 
182
  generator=torch.Generator(device="cuda").manual_seed(seed),
183
+ output_type="np", **pipeline_args_high_res
 
184
  ).frames[0]
185
 
186
+ # Etapa 4: Exportação
 
 
187
  video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
188
  output_filename = "output.mp4"
189
  with imageio.get_writer(output_filename, fps=FPS, quality=8, macro_block_size=1) as writer:
 
195
 
196
  except Exception as e:
197
  print(f"Ocorreu um erro: {e}")
198
+ import traceback
199
+ traceback.print_exc()
200
  return None, seed
201
 
202
+ # --- Interface Gráfica com Gradio ---
203
  with gr.Blocks(theme=gr.themes.Ocean(font=[gr.themes.GoogleFont("Lexend Deca"), "sans-serif"]), delete_cache=(60, 900)) as demo:
204
+ gr.Markdown("# Geração de Vídeo com LTX\n**Crie vídeos a partir de texto e imagens de condição.**")
 
 
 
 
 
205
 
206
  with gr.Row():
207
  with gr.Column(scale=1):
208
+ prompt = gr.Textbox(label="Prompt", placeholder="Descreva o vídeo que você quer gerar...", lines=3, value="O Coringa dançando em um quarto escuro, iluminação dramática.")
 
 
 
 
 
 
209
 
210
  with gr.Accordion("Imagem de Condição 1", open=True):
211
+ condition_image_1 = gr.Image(label="Imagem 1", type="pil")
212
  with gr.Row():
213
+ condition_strength_1 = gr.Slider(label="Peso", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
214
  condition_frame_index_1 = gr.Number(label="Frame", value=0, precision=0)
215
 
216
  with gr.Accordion("Imagem de Condição 2", open=False):
217
+ condition_image_2 = gr.Image(label="Imagem 2", type="pil")
218
  with gr.Row():
219
+ condition_strength_2 = gr.Slider(label="Peso", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
220
  condition_frame_index_2 = gr.Number(label="Frame", value=0, precision=0)
221
 
222
+ duration = gr.Slider(label="Duração (s)", minimum=1.0, maximum=10.0, step=0.5, value=2)
223
 
224
  with gr.Accordion("Configurações Avançadas", open=False):
225
+ negative_prompt = gr.Textbox(label="Prompt Negativo", lines=2, value="pior qualidade, embaçado, tremido, distorcido")
226
  with gr.Row():
227
  height = gr.Slider(label="Altura", minimum=256, maximum=1536, step=32, value=768)
228
  width = gr.Slider(label="Largura", minimum=256, maximum=1536, step=32, value=1152)
 
 
 
 
 
 
229
  with gr.Row():
230
+ guidance_scale = gr.Slider(label="Guidance", minimum=1.0, maximum=5.0, step=0.1, value=1.0)
231
  randomize_seed = gr.Checkbox(label="Seed Aleatória", value=True)
232
  seed = gr.Number(label="Seed", value=0, precision=0)
233
 
 
235
 
236
  with gr.Column(scale=1):
237
  output_video = gr.Video(label="Vídeo Gerado", height=400)
238
+ generated_seed = gr.Number(label="Seed Utilizada", interactive=False)
239
+
240
  generate_btn.click(
241
+ fn=prepare_and_generate_video,
242
  inputs=[
243
+ condition_image_1, condition_strength_1, condition_frame_index_1,
244
+ condition_image_2, condition_strength_2, condition_frame_index_2,
245
+ prompt, duration, negative_prompt,
246
+ height, width, guidance_scale, seed, randomize_seed,
 
 
 
 
 
 
 
 
 
 
 
247
  ],
248
+ outputs=[output_video, generated_seed]
 
249
  )
250
 
 
 
 
251
  if __name__ == "__main__":
252
  demo.queue().launch(server_name="0.0.0.0", server_port=7860, debug=True, show_error=True)