Aduc_sdr

Paused

App Files Files Community

EU-IA commited on Aug 29

Commit

8d75895

verified ·

1 Parent(s): b9f745b

Update deformes4D_engine.py

Browse files

Files changed (1) hide show

deformes4D_engine.py +32 -24

deformes4D_engine.py CHANGED Viewed

@@ -52,6 +52,7 @@ class Deformes4DEngine:
         self._vae.to(self.device); self._vae.eval()
         return self._vae
     def save_latent_tensor(self, tensor: torch.Tensor, path: str):
         torch.save(tensor.cpu(), path)
         logger.info(f"Tensor latente salvo em: {path}")
@@ -158,10 +159,8 @@ class Deformes4DEngine:
                             progress: gr.Progress = gr.Progress()):
         base_ltx_params = {
-            "guidance_scale": 1.0,
-            "stg_scale": 0.0,
-            "rescaling_scale": 0.15,
-            "num_inference_steps": 7,
         }
         keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
@@ -169,7 +168,10 @@ class Deformes4DEngine:
         target_resolution_tuple = (video_resolution, video_resolution)
         n_trim_latents = self._quantize_to_multiple(int(seconds_per_fragment * 24 * (overlap_percent / 100.0)), 8)
-        previous_latents_path = None
         num_transitions_to_generate = len(keyframe_paths) - 1
         for i in range(num_transitions_to_generate):
@@ -179,8 +181,9 @@ class Deformes4DEngine:
             destination_keyframe_path = keyframe_paths[i+1]
             present_scene_desc = storyboard[i]
-            is_first_fragment = previous_latents_path is None
             if is_first_fragment:
                 transition_type = "start"
                 motion_prompt = gemini_singleton.get_initial_motion_prompt(
@@ -224,6 +227,7 @@ class Deformes4DEngine:
             current_ltx_params = {**base_ltx_params, "handler_strength": handler_strength, "motion_prompt": motion_prompt}
             total_frames_to_generate = self._quantize_to_multiple(int(seconds_per_fragment * 24), 8) + 1
             if is_first_fragment:
                 img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
                 start_latent = self.pil_to_latent(img_start)
@@ -233,15 +237,9 @@ class Deformes4DEngine:
                     destination_latent = self.pil_to_latent(img_dest)
                     conditioning_items.append(LatentConditioningItem(destination_latent, total_frames_to_generate - 1, destination_convergence_strength))
             else:
-                previous_latents = self.load_latent_tensor(previous_latents_path)
-                handler_latent = previous_latents[:, :, -1:, :, :]
-                trimmed_for_echo = previous_latents[:, :, :-n_trim_latents, :, :] if n_trim_latents > 0 and previous_latents.shape[2] > n_trim_latents else previous_latents
-                echo_latents = trimmed_for_echo[:, :, -echo_frames:, :, :]
-                handler_frame_position = n_trim_latents + echo_frames
-                conditioning_items.append(LatentConditioningItem(echo_latents, 0, 1.0))
-                conditioning_items.append(LatentConditioningItem(handler_latent, handler_frame_position, handler_strength))
-                del previous_latents, handler_latent, trimmed_for_echo, echo_latents; gc.collect()
                 if transition_type == "continuous":
                     img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
                     destination_latent = self.pil_to_latent(img_dest)
@@ -249,21 +247,31 @@ class Deformes4DEngine:
             new_full_latents = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_to_generate)
-            base_name = f"fragment_{i}_{int(time.time())}"
-            new_full_latents_path = os.path.join(self.workspace_dir, f"{base_name}_full.pt")
-            self.save_latent_tensor(new_full_latents, new_full_latents_path)
-            previous_latents_path = new_full_latents_path
-            latents_for_video = new_full_latents
             video_with_audio_path = self._generate_video_and_audio_from_latents(latents_for_video, audio_prompt, base_name)
             video_clips_paths.append(video_with_audio_path)
             if transition_type == "cut":
-                previous_latents_path = None
             yield {"fragment_path": video_with_audio_path}

         self._vae.to(self.device); self._vae.eval()
         return self._vae
+    # ... (métodos auxiliares como save/load/pixels_to_latents permanecem iguais) ...
     def save_latent_tensor(self, tensor: torch.Tensor, path: str):
         torch.save(tensor.cpu(), path)
         logger.info(f"Tensor latente salvo em: {path}")
                             progress: gr.Progress = gr.Progress()):
         base_ltx_params = {
+            "guidance_scale": 1.0, "stg_scale": 0.0,
+            "rescaling_scale": 0.15, "num_inference_steps": 7,
         }
         keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
         target_resolution_tuple = (video_resolution, video_resolution)
         n_trim_latents = self._quantize_to_multiple(int(seconds_per_fragment * 24 * (overlap_percent / 100.0)), 8)
+        # --- NOVA LÓGICA: Variáveis para guardar os tensores de continuidade ---
+        prepared_echo_latent = None
+        prepared_handler_latent = None
         num_transitions_to_generate = len(keyframe_paths) - 1
         for i in range(num_transitions_to_generate):
             destination_keyframe_path = keyframe_paths[i+1]
             present_scene_desc = storyboard[i]
+            is_first_fragment = (prepared_handler_latent is None)
+            # ... (Lógica de decisão do Gemini e do diretor de som permanece a mesma) ...
             if is_first_fragment:
                 transition_type = "start"
                 motion_prompt = gemini_singleton.get_initial_motion_prompt(
             current_ltx_params = {**base_ltx_params, "handler_strength": handler_strength, "motion_prompt": motion_prompt}
             total_frames_to_generate = self._quantize_to_multiple(int(seconds_per_fragment * 24), 8) + 1
+            # --- NOVA LÓGICA: Preparação das instruções de condicionamento ---
             if is_first_fragment:
                 img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
                 start_latent = self.pil_to_latent(img_start)
                     destination_latent = self.pil_to_latent(img_dest)
                     conditioning_items.append(LatentConditioningItem(destination_latent, total_frames_to_generate - 1, destination_convergence_strength))
             else:
+                # Usa os tensores pré-preparados da iteração anterior
+                conditioning_items.append(LatentConditioningItem(prepared_echo_latent, 0, 1.0))
+                conditioning_items.append(LatentConditioningItem(prepared_handler_latent, echo_frames, handler_strength))
                 if transition_type == "continuous":
                     img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
                     destination_latent = self.pil_to_latent(img_dest)
             new_full_latents = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_to_generate)
+            # --- NOVA LÓGICA: Preparação movida para o final do loop ---
+            is_last_fragment = (i == num_transitions_to_generate - 1)
+            if not is_last_fragment:
+                # ANTECIPAÇÃO: Prepara os tensores para a PRÓXIMA iteração
+                prepared_handler_latent = new_full_latents[:, :, -1:, :, :].clone()
+                prepared_echo_latent = new_full_latents[:, :, -echo_frames:, :, :].clone()
+                # CORTE NO FIM: Define os latentes para o VÍDEO ATUAL, removendo a sobreposição
+                if n_trim_latents > 0 and new_full_latents.shape[2] > n_trim_latents:
+                    latents_for_video = new_full_latents[:, :, :-n_trim_latents, :, :]
+                else:
+                    latents_for_video = new_full_latents
+            else:
+                # O último fragmento não precisa preparar nada para o futuro, então renderiza-se por completo.
+                latents_for_video = new_full_latents
+            base_name = f"fragment_{i}_{int(time.time())}"
             video_with_audio_path = self._generate_video_and_audio_from_latents(latents_for_video, audio_prompt, base_name)
             video_clips_paths.append(video_with_audio_path)
             if transition_type == "cut":
+                # Se for um corte, limpa a memória para a próxima iteração começar do zero.
+                prepared_echo_latent = None
+                prepared_handler_latent = None
             yield {"fragment_path": video_with_audio_path}