Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 2

Commit

887690a

verified ·

1 Parent(s): 3b91b34

Update deformes4D_engine.py

Browse files

Files changed (1) hide show

deformes4D_engine.py +81 -135

deformes4D_engine.py CHANGED Viewed

@@ -26,7 +26,7 @@ from audio_specialist import audio_specialist_singleton
 from ltx_manager_helpers import ltx_manager_singleton
 from gemini_helpers import gemini_singleton
 from upscaler_specialist import upscaler_specialist_singleton
-from hd_specialist import hd_specialist_singleton # Importa o novo especialista
 from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
 from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
@@ -58,17 +58,28 @@ class Deformes4DEngine:
         self._vae.to(self.device); self._vae.eval()
         return self._vae
-    # MÉTODOS AUXILIARES
-    def save_latent_tensor(self, tensor: torch.Tensor, path: str):
-        torch.save(tensor.cpu(), path)
-    def load_latent_tensor(self, path: str) -> torch.Tensor:
-        return torch.load(path, map_location=self.device)
-    @torch.no_grad()
-    def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
-        tensor = tensor.to(self.device, dtype=self.vae.dtype)
-        return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
     @torch.no_grad()
     def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
@@ -76,14 +87,6 @@ class Deformes4DEngine:
         timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
         return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
-    def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
-        if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
-        video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
-        video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
-        video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
-        with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
-            for frame in video_np: writer.append_data(frame)
     def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
         if image.size != target_resolution:
             return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
@@ -95,70 +98,32 @@ class Deformes4DEngine:
         tensor = (tensor * 2.0) - 1.0
         return self.pixels_to_latents(tensor)
-    def _get_video_duration(self, video_path: str) -> float:
-        if not os.path.exists(video_path): return 0.0
-        try:
-            result = subprocess.run(
-                ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", video_path],
-                capture_output=True, text=True, check=True)
-            return float(result.stdout.strip())
-        except Exception:
-            return 0.0
-    def _combine_video_and_audio_ffmpeg(self, video_path: str, audio_path: str, output_path: str):
-        """Combina um arquivo de vídeo com um arquivo de áudio usando ffmpeg."""
-        cmd = [
-            'ffmpeg', '-y',
-            '-i', video_path,
-            '-i', audio_path,
-            '-c:v', 'copy',      # Copia o stream de vídeo sem re-codificar
-            '-c:a', 'aac',       # Re-codifica o áudio para o formato AAC, padrão para MP4
-            '-shortest',         # Termina a codificação quando o stream mais curto terminar
-            output_path
-        ]
-        try:
-            subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8')
-            logger.info(f"Áudio e vídeo combinados com sucesso em {output_path}")
-        except subprocess.CalledProcessError as e:
-            logger.error(f"Falha ao combinar áudio e vídeo. Detalhes: {e.stderr}")
-            raise gr.Error(f"Falha ao combinar áudio e vídeo: {e.stderr}")
-    def _generate_standalone_audio(self, video_for_duration_path: str, audio_prompt: str) -> str:
-        """Gera um arquivo de áudio e retorna seu caminho."""
-        duration = self._get_video_duration(video_for_duration_path)
-        if duration == 0:
-            raise gr.Error("Não foi possível determinar a duração do vídeo para gerar o áudio.")
-        # Esta função agora deve retornar apenas o caminho do arquivo de áudio gerado
-        # (pode exigir uma pequena modificação no seu audio_specialist)
-        audio_path = audio_specialist_singleton.generate_audio(
-            prompt=audio_prompt,
-            duration_seconds=duration,
-            output_dir=self.workspace_dir
-        )
-        return audio_path
-    # NÚCLEO DA LÓGICA ADUC-SDR
     def generate_full_movie(self, keyframes: list, global_prompt: str, storyboard: list,
                             seconds_per_fragment: float, trim_percent: int,
                             handler_strength: float, destination_convergence_strength: float,
                             video_resolution: int, use_continuity_director: bool,
                             progress: gr.Progress = gr.Progress()):
-        TOTAL_STEPS = len(keyframes) - 1 + 5 # Fragmentos + 5 etapas de pós-produção
         current_step = 0
         FPS = 24
         FRAMES_PER_LATENT_CHUNK = 8
         ECO_LATENT_CHUNKS = 2
         total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
-        total_latents_brutos = total_frames_brutos // FRAMES_PER_LATENT_CHUNK
         frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
         latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
-        if total_latents_brutos <= latents_a_podar + 1:
-            raise gr.Error(f"A combinação de duração e poda é muito agressiva.")
         DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
         DESTINATION_FRAME_TARGET = total_frames_brutos - 1
@@ -169,16 +134,14 @@ class Deformes4DEngine:
         eco_latent_for_next_loop = None
         dejavu_latent_for_next_loop = None
-        num_transitions_to_generate = len(keyframe_paths) - 1
         processed_latent_fragments = []
         for i in range(num_transitions_to_generate):
             fragment_index = i + 1
             current_step += 1
             progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento {fragment_index}/{num_transitions_to_generate}")
-            # ... (Lógica de decisão do Gemini e configuração de parâmetros - sem alterações)
             past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
             start_keyframe_path = keyframe_paths[i]
             destination_keyframe_path = keyframe_paths[i + 1]
@@ -189,7 +152,7 @@ class Deformes4DEngine:
             transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
             story_history += f"\n- Ato {fragment_index}: {motion_prompt}"
-            expected_height, expected_width = 768, 1152
             downscale_factor = 2 / 3
             downscaled_height = self._quantize_to_multiple(int(expected_height * downscale_factor), 8)
             downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
@@ -217,82 +180,76 @@ class Deformes4DEngine:
             if transition_type == "cut":
                 eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
-            # --- ATO I: PÓS-PRODUÇÃO LATENTE ---
             upscaled_latents = self.upscale_latents(latents_video)
             refined_latents = self.refine_latents(upscaled_latents, motion_prompt=f"refining scene: {motion_prompt}")
             processed_latent_fragments.append(refined_latents)
-        # --- FIM DO LOOP DE GERAÇÃO ---
-        current_step += 1
-        progress(current_step / TOTAL_STEPS, desc="Concatenando fragmentos...")
-        tensors_para_concatenar = [frag.to(self.device) for frag in processed_latent_fragments]
-        final_concatenated_latents = torch.cat(tensors_para_concatenar, dim=2)
-        base_name = f"movie_{int(time.time())}"
         current_step += 1
-        progress(current_step / TOTAL_STEPS, desc="Renderizando vídeo base...")
         refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
-        final_pixel_tensor = self.latents_to_pixels(final_concatenated_latents)
-        self.save_video_from_tensor(final_pixel_tensor, refined_silent_video_path, fps=FPS)
-        # Limpeza de VRAM antes da próxima etapa pesada
-        del final_pixel_tensor, final_concatenated_latents, processed_latent_fragments, tensors_para_concatenar
         gc.collect()
         torch.cuda.empty_cache()
-        # --- ATO II: MASTERIZAÇÃO FINAL (APLICAÇÃO DE HD) ---
         current_step += 1
         progress(current_step / TOTAL_STEPS, desc="Aprimoramento final (HD)...")
         hq_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_hq_silent.mp4")
         try:
-            # O Especialista HD processa o vídeo silencioso refinado
             hd_specialist_singleton.process_video(
                 input_video_path=refined_silent_video_path,
                 output_video_path=hq_silent_video_path,
                 prompt=global_prompt
             )
         except Exception as e:
-            logger.error(f"Falha no processo de aprimoramento HD. Usando o vídeo refinado como fallback. Erro: {e}")
-            # Se o HD falhar, usamos o vídeo refinado (silencioso) como base para o final
             os.rename(refined_silent_video_path, hq_silent_video_path)
         current_step += 1
         progress(current_step / TOTAL_STEPS, desc="Finalizando montagem...")
         final_video_path = os.path.join(self.workspace_dir, f"{base_name}_FINAL.mp4")
-        #if audio_path and os.path.exists(audio_path):
-        #    # Se o áudio foi gerado, combina o vídeo de ALTA QUALIDADE com ele
-        #    self._combine_video_and_audio_ffmpeg(hq_silent_video_path, audio_path, final_video_path)
-        #else:
-        #    # Se não houver áudio, apenas renomeia o vídeo de alta qualidade
-        #    os.rename(hq_silent_video_path, final_video_path)
-        logger.info(f"Processo concluído! Vídeo final salvo em: {hq_silent_video_path}")
-        yield {"final_path": hq_silent_video_path}
-    def refine_latents1(self, latents: torch.Tensor,
-                       fps: int = 24,
-                       denoise_strength: float = 0.35,
-                       refine_steps: int = 12,
-                       motion_prompt: str = "refining video, improving details, cinematic quality") -> torch.Tensor:
-        """Aplica um passe de refinamento (denoise) em um tensor latente."""
-        logger.info(f"Refinando tensor latente com shape {latents.shape}.")
-        _, _, num_frames, latent_h, latent_w = latents.shape
-        vae_scale_factor = self.vae.config.scaling_factor if hasattr(self.vae.config, 'scaling_factor') else 8
-        pixel_height, pixel_width = latent_h * vae_scale_factor, latent_w * vae_scale_factor
-        refined_latents_tensor, _ = self.ltx_manager.refine_latents(
-            latents, height=pixel_height, width=pixel_width, video_total_frames=num_frames,
-            video_fps=fps, motion_prompt=motion_prompt, current_fragment_index=int(time.time()),
-            denoise_strength=denoise_strength, refine_steps=refine_steps)
-        return refined_latents_tensor
     def refine_latents(self, latents: torch.Tensor,
                        fps: int = 24,
@@ -305,26 +262,17 @@ class Deformes4DEngine:
         """
         logger.info(f"Refinando tensor latente com shape {latents.shape} para refinamento.")
-        # Extrai as dimensões do tensor latente de ENTRADA.
         _, _, num_latent_frames, latent_h, latent_w = latents.shape
-        # Busca os fatores de escala do VAE. Assumimos que o fator temporal e espacial são iguais.
-        # Esta é uma suposição segura para o LTX-Video.
-        video_scale_factor = getattr(self.vae, 'temporal_downscale_factor', 8)
-        vae_scale_factor = getattr(self.vae, 'spatial_downscale_factor', 8)
-        # Converte as dimensões latentes para as dimensões de pixel correspondentes.
         pixel_height = latent_h * vae_scale_factor
         pixel_width = latent_w * vae_scale_factor
-        # --- [A CORREÇÃO PRINCIPAL ESTÁ AQUI] ---
-        # Para que a pipeline espere um latente com 'num_latent_frames', precisamos
-        # fornecer um número de frames de pixel que, após a divisão e a adição de 1
-        # (devido ao VAE causal), resulte no número original de frames latentes.
-        # A fórmula inversa é: (num_latent_frames - 1) * video_scale_factor
         pixel_frames = (num_latent_frames - 1) * video_scale_factor
-        # Chama o ltx_manager com os parâmetros corretos.
         refined_latents_tensor, _ = self.ltx_manager.refine_latents(
             latents,
             height=pixel_height,
@@ -339,9 +287,7 @@ class Deformes4DEngine:
         logger.info(f"Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
         return refined_latents_tensor
     def upscale_latents(self, latents: torch.Tensor) -> torch.Tensor:
         """Interface para o UpscalerSpecialist."""
         logger.info(f"Realizando upscale em tensor latente com shape {latents.shape}.")
@@ -349,7 +295,7 @@ class Deformes4DEngine:
     def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
         kwargs = {
-            **ltx_params, 'width': target_resolution[0], 'height': target_resolution[1],
             'video_total_frames': total_frames_to_generate, 'video_fps': 24,
             'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items
         }

 from ltx_manager_helpers import ltx_manager_singleton
 from gemini_helpers import gemini_singleton
 from upscaler_specialist import upscaler_specialist_singleton
+from hd_specialist import hd_specialist_singleton
 from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
 from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
         self._vae.to(self.device); self._vae.eval()
         return self._vae
+    # --- MÉTODOS AUXILIARES ---
+    def _extract_audio_ffmpeg(self, video_path: str, output_audio_path: str) -> str | None:
+        """Extrai a trilha sonora de um vídeo para uso posterior."""
+        if not os.path.exists(video_path): return None
+        cmd = ['ffmpeg', '-y', '-i', video_path, '-vn', '-acodec', 'copy', output_audio_path]
+        try:
+            subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8')
+            logger.info(f"Áudio extraído com sucesso para {output_audio_path}")
+            return output_audio_path
+        except subprocess.CalledProcessError:
+            logger.warning(f"Não foi possível extrair o áudio de {os.path.basename(video_path)}. O vídeo pode ser silencioso.")
+            return None
+    def _combine_video_and_audio_ffmpeg(self, video_path: str, audio_path: str, output_path: str):
+        """Combina um vídeo (sem som) com um arquivo de áudio."""
+        cmd = ['ffmpeg', '-y', '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-shortest', output_path]
+        try:
+            subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8')
+            logger.info(f"Áudio e vídeo combinados com sucesso em {output_path}")
+        except subprocess.CalledProcessError as e:
+            raise gr.Error(f"Falha ao combinar áudio e vídeo: {e.stderr}")
     @torch.no_grad()
     def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
         timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
         return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
     def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
         if image.size != target_resolution:
             return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
         tensor = (tensor * 2.0) - 1.0
         return self.pixels_to_latents(tensor)
+    @torch.no_grad()
+    def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
+        tensor = tensor.to(self.device, dtype=self.vae.dtype)
+        return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
+    # --- NÚCLEO DA LÓGICA ADUC-SDR ---
     def generate_full_movie(self, keyframes: list, global_prompt: str, storyboard: list,
                             seconds_per_fragment: float, trim_percent: int,
                             handler_strength: float, destination_convergence_strength: float,
                             video_resolution: int, use_continuity_director: bool,
                             progress: gr.Progress = gr.Progress()):
+        num_transitions_to_generate = len(keyframes) - 1
+        TOTAL_STEPS = num_transitions_to_generate + 4 # Fragmentos + etapas de pós-produção
         current_step = 0
         FPS = 24
         FRAMES_PER_LATENT_CHUNK = 8
         ECO_LATENT_CHUNKS = 2
         total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
         frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
         latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
+        if total_frames_brutos // FRAMES_PER_LATENT_CHUNK <= latents_a_podar + 1:
+            raise gr.Error("A combinação de duração e poda é muito agressiva.")
         DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
         DESTINATION_FRAME_TARGET = total_frames_brutos - 1
         eco_latent_for_next_loop = None
         dejavu_latent_for_next_loop = None
         processed_latent_fragments = []
+        # --- ATO I: GERAÇÃO LATENTE (LOOP DE FRAGMENTOS) ---
         for i in range(num_transitions_to_generate):
             fragment_index = i + 1
             current_step += 1
             progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento {fragment_index}/{num_transitions_to_generate}")
             past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
             start_keyframe_path = keyframe_paths[i]
             destination_keyframe_path = keyframe_paths[i + 1]
             transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
             story_history += f"\n- Ato {fragment_index}: {motion_prompt}"
+            expected_height, expected_width = video_resolution, video_resolution
             downscale_factor = 2 / 3
             downscaled_height = self._quantize_to_multiple(int(expected_height * downscale_factor), 8)
             downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
             if transition_type == "cut":
                 eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
             upscaled_latents = self.upscale_latents(latents_video)
             refined_latents = self.refine_latents(upscaled_latents, motion_prompt=f"refining scene: {motion_prompt}")
             processed_latent_fragments.append(refined_latents)
+        # --- ATO II: RENDERIZAÇÃO PRIMÁRIA (COM CORREÇÃO DE OOM) ---
+        base_name = f"movie_{int(time.time())}"
         current_step += 1
+        progress(current_step / TOTAL_STEPS, desc="Renderizando vídeo (em lotes)...")
         refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
+        with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
+            for i, latent_fragment in enumerate(processed_latent_fragments):
+                logger.info(f"Decodificando fragmento {i+1}/{len(processed_latent_fragments)} para pixels...")
+                pixel_tensor_fragment = self.latents_to_pixels(latent_fragment)
+                pixel_tensor_fragment = pixel_tensor_fragment.squeeze(0).permute(1, 2, 3, 0)
+                pixel_tensor_fragment = (pixel_tensor_fragment.clamp(-1, 1) + 1) / 2.0
+                video_np_fragment = (pixel_tensor_fragment.detach().cpu().float().numpy() * 255).astype(np.uint8)
+                for frame in video_np_fragment:
+                    writer.append_data(frame)
+                del pixel_tensor_fragment, video_np_fragment
+                gc.collect()
+                torch.cuda.empty_cache()
+        logger.info(f"Vídeo base renderizado com sucesso em: {refined_silent_video_path}")
+        del processed_latent_fragments
         gc.collect()
         torch.cuda.empty_cache()
+        # --- ATO III: MASTERIZAÇÃO FINAL (ÁUDIO E HD) ---
+        current_step += 1
+        progress(current_step / TOTAL_STEPS, desc="Gerando trilha sonora...")
+        try:
+            video_with_audio_path = audio_specialist_singleton.generate_audio_for_video(
+                video_path=refined_silent_video_path,
+                prompt=global_prompt,
+                duration_seconds=self._get_video_duration(refined_silent_video_path)
+            )
+            temp_audio_path = os.path.join(self.workspace_dir, f"{base_name}_extracted_audio.aac")
+            extracted_audio_path = self._extract_audio_ffmpeg(video_with_audio_path, temp_audio_path)
+        except Exception as e:
+            logger.error(f"Falha na geração de áudio: {e}. O vídeo final será silencioso.")
+            extracted_audio_path = None
         current_step += 1
         progress(current_step / TOTAL_STEPS, desc="Aprimoramento final (HD)...")
         hq_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_hq_silent.mp4")
         try:
             hd_specialist_singleton.process_video(
                 input_video_path=refined_silent_video_path,
                 output_video_path=hq_silent_video_path,
                 prompt=global_prompt
             )
         except Exception as e:
+            logger.error(f"Falha no aprimoramento HD: {e}. Usando vídeo de qualidade padrão.")
             os.rename(refined_silent_video_path, hq_silent_video_path)
         current_step += 1
         progress(current_step / TOTAL_STEPS, desc="Finalizando montagem...")
         final_video_path = os.path.join(self.workspace_dir, f"{base_name}_FINAL.mp4")
+        if extracted_audio_path and os.path.exists(hq_silent_video_path):
+            self._combine_video_and_audio_ffmpeg(hq_silent_video_path, extracted_audio_path, final_video_path)
+        else:
+            os.rename(hq_silent_video_path, final_video_path)
+        logger.info(f"Processo concluído! Vídeo final salvo em: {final_video_path}")
+        yield {"final_path": final_video_path}
     def refine_latents(self, latents: torch.Tensor,
                        fps: int = 24,
         """
         logger.info(f"Refinando tensor latente com shape {latents.shape} para refinamento.")
         _, _, num_latent_frames, latent_h, latent_w = latents.shape
+        video_scale_factor = getattr(self.vae.config, 'temporal_scale_factor', 8)
+        vae_scale_factor = getattr(self.vae.config, 'spatial_downscale_factor', 8)
         pixel_height = latent_h * vae_scale_factor
         pixel_width = latent_w * vae_scale_factor
+        # A fórmula inversa para o VAE causal: (N_latente - 1) * FatorDeEscala
         pixel_frames = (num_latent_frames - 1) * video_scale_factor
         refined_latents_tensor, _ = self.ltx_manager.refine_latents(
             latents,
             height=pixel_height,
         logger.info(f"Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
         return refined_latents_tensor
     def upscale_latents(self, latents: torch.Tensor) -> torch.Tensor:
         """Interface para o UpscalerSpecialist."""
         logger.info(f"Realizando upscale em tensor latente com shape {latents.shape}.")
     def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
         kwargs = {
+            **ltx_params, 'width': target_resolution[1], 'height': target_resolution[0],
             'video_total_frames': total_frames_to_generate, 'video_fps': 24,
             'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items
         }