Update deformes4D_engine.py
Browse files- deformes4D_engine.py +11 -12
deformes4D_engine.py
CHANGED
|
@@ -102,7 +102,6 @@ class Deformes4DEngine:
|
|
| 102 |
current_step += 1
|
| 103 |
progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento Bruto {fragment_index}/{num_transitions_to_generate}")
|
| 104 |
|
| 105 |
-
# ... (Lógica do Gemini para obter motion_prompt - sem alterações) ...
|
| 106 |
past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
|
| 107 |
start_keyframe_path = keyframe_paths[i]
|
| 108 |
destination_keyframe_path = keyframe_paths[i + 1]
|
|
@@ -119,7 +118,6 @@ class Deformes4DEngine:
|
|
| 119 |
downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
|
| 120 |
target_resolution_tuple = (downscaled_height, downscaled_width)
|
| 121 |
|
| 122 |
-
# ... (Lógica de condicionamento - sem alterações) ...
|
| 123 |
conditioning_items = []
|
| 124 |
if eco_latent_for_next_loop is None:
|
| 125 |
img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
|
|
@@ -133,7 +131,6 @@ class Deformes4DEngine:
|
|
| 133 |
current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
|
| 134 |
latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
|
| 135 |
|
| 136 |
-
# ... (Lógica de poda e extração de Eco/Déjà-Vu - sem alterações) ...
|
| 137 |
last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
|
| 138 |
eco_latent_for_next_loop = last_trim[:, :, :ECO_LATENT_CHUNKS, :, :].clone()
|
| 139 |
dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
|
|
@@ -148,13 +145,18 @@ class Deformes4DEngine:
|
|
| 148 |
# --- ATO II: PÓS-PRODUÇÃO LATENTE GLOBAL ---
|
| 149 |
current_step += 1
|
| 150 |
progress(current_step / TOTAL_STEPS, desc="Unificação Causal (Concatenação)...")
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
logger.info(f"Latentes brutos unificados. Shape: {concatenated_raw_latents.shape}")
|
| 154 |
|
| 155 |
current_step += 1
|
| 156 |
progress(current_step / TOTAL_STEPS, desc="Polimento Global (Denoise)...")
|
| 157 |
-
# [ALTERAÇÃO CRÍTICA] Usamos um prompt vazio e guidance_scale=1.0 para um refinamento incondicional.
|
| 158 |
denoised_latents = self.refine_latents(
|
| 159 |
concatenated_raw_latents,
|
| 160 |
motion_prompt="",
|
|
@@ -170,17 +172,15 @@ class Deformes4DEngine:
|
|
| 170 |
refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
|
| 171 |
|
| 172 |
with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
|
| 173 |
-
chunk_size = 7
|
| 174 |
latent_chunks = torch.split(denoised_latents, chunk_size, dim=2)
|
| 175 |
|
| 176 |
for i, latent_chunk in enumerate(latent_chunks):
|
| 177 |
logger.info(f"Processando e renderizando lote {i+1}/{len(latent_chunks)}...")
|
| 178 |
|
| 179 |
-
# Upscale e Decode por lote
|
| 180 |
upscaled_chunk = self.upscale_latents(latent_chunk)
|
| 181 |
pixel_tensor_chunk = self.latents_to_pixels(upscaled_chunk)
|
| 182 |
|
| 183 |
-
# Converte e salva os frames
|
| 184 |
pixel_tensor_chunk = pixel_tensor_chunk.squeeze(0).permute(1, 2, 3, 0)
|
| 185 |
pixel_tensor_chunk = (pixel_tensor_chunk.clamp(-1, 1) + 1) / 2.0
|
| 186 |
video_np_chunk = (pixel_tensor_chunk.detach().cpu().float().numpy() * 255).astype(np.uint8)
|
|
@@ -207,7 +207,7 @@ class Deformes4DEngine:
|
|
| 207 |
prompt=global_prompt
|
| 208 |
)
|
| 209 |
except Exception as e:
|
| 210 |
-
logger.error(f"Falha no aprimoramento HD: {e}. Usando vídeo de qualidade padrão.")
|
| 211 |
os.rename(refined_silent_video_path, final_video_path)
|
| 212 |
|
| 213 |
logger.info(f"Processo concluído! Vídeo final salvo em: {final_video_path}")
|
|
@@ -223,12 +223,11 @@ class Deformes4DEngine:
|
|
| 223 |
pixel_width = latent_w * vae_scale_factor
|
| 224 |
pixel_frames = (num_latent_frames - 1) * video_scale_factor
|
| 225 |
|
| 226 |
-
# [ALTERAÇÃO] Permite que guidance_scale seja passado como argumento
|
| 227 |
final_ltx_params = {
|
| 228 |
"height": pixel_height, "width": pixel_width, "video_total_frames": pixel_frames,
|
| 229 |
"video_fps": fps, "motion_prompt": motion_prompt, "current_fragment_index": int(time.time()),
|
| 230 |
"denoise_strength": denoise_strength, "refine_steps": refine_steps,
|
| 231 |
-
"guidance_scale": kwargs.get('guidance_scale', 2.0)
|
| 232 |
}
|
| 233 |
|
| 234 |
refined_latents_tensor, _ = self.ltx_manager.refine_latents(latents, **final_ltx_params)
|
|
|
|
| 102 |
current_step += 1
|
| 103 |
progress(current_step / TOTAL_STEPS, desc=f"Gerando Fragmento Bruto {fragment_index}/{num_transitions_to_generate}")
|
| 104 |
|
|
|
|
| 105 |
past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
|
| 106 |
start_keyframe_path = keyframe_paths[i]
|
| 107 |
destination_keyframe_path = keyframe_paths[i + 1]
|
|
|
|
| 118 |
downscaled_width = self._quantize_to_multiple(int(expected_width * downscale_factor), 8)
|
| 119 |
target_resolution_tuple = (downscaled_height, downscaled_width)
|
| 120 |
|
|
|
|
| 121 |
conditioning_items = []
|
| 122 |
if eco_latent_for_next_loop is None:
|
| 123 |
img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
|
|
|
|
| 131 |
current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
|
| 132 |
latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
|
| 133 |
|
|
|
|
| 134 |
last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
|
| 135 |
eco_latent_for_next_loop = last_trim[:, :, :ECO_LATENT_CHUNKS, :, :].clone()
|
| 136 |
dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
|
|
|
|
| 145 |
# --- ATO II: PÓS-PRODUÇÃO LATENTE GLOBAL ---
|
| 146 |
current_step += 1
|
| 147 |
progress(current_step / TOTAL_STEPS, desc="Unificação Causal (Concatenação)...")
|
| 148 |
+
|
| 149 |
+
# --- [INÍCIO DA CORREÇÃO MULTI-GPU] ---
|
| 150 |
+
logger.info(f"Movendo {len(raw_latent_fragments)} fragmentos latentes para o dispositivo principal ({self.device}) para unificação.")
|
| 151 |
+
tensors_on_main_device = [frag.to(self.device) for frag in raw_latent_fragments]
|
| 152 |
+
concatenated_raw_latents = torch.cat(tensors_on_main_device, dim=2)
|
| 153 |
+
# --- [FIM DA CORREÇÃO MULTI-GPU] ---
|
| 154 |
+
|
| 155 |
+
del raw_latent_fragments, tensors_on_main_device; gc.collect(); torch.cuda.empty_cache()
|
| 156 |
logger.info(f"Latentes brutos unificados. Shape: {concatenated_raw_latents.shape}")
|
| 157 |
|
| 158 |
current_step += 1
|
| 159 |
progress(current_step / TOTAL_STEPS, desc="Polimento Global (Denoise)...")
|
|
|
|
| 160 |
denoised_latents = self.refine_latents(
|
| 161 |
concatenated_raw_latents,
|
| 162 |
motion_prompt="",
|
|
|
|
| 172 |
refined_silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_refined_silent.mp4")
|
| 173 |
|
| 174 |
with imageio.get_writer(refined_silent_video_path, fps=FPS, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
|
| 175 |
+
chunk_size = 7
|
| 176 |
latent_chunks = torch.split(denoised_latents, chunk_size, dim=2)
|
| 177 |
|
| 178 |
for i, latent_chunk in enumerate(latent_chunks):
|
| 179 |
logger.info(f"Processando e renderizando lote {i+1}/{len(latent_chunks)}...")
|
| 180 |
|
|
|
|
| 181 |
upscaled_chunk = self.upscale_latents(latent_chunk)
|
| 182 |
pixel_tensor_chunk = self.latents_to_pixels(upscaled_chunk)
|
| 183 |
|
|
|
|
| 184 |
pixel_tensor_chunk = pixel_tensor_chunk.squeeze(0).permute(1, 2, 3, 0)
|
| 185 |
pixel_tensor_chunk = (pixel_tensor_chunk.clamp(-1, 1) + 1) / 2.0
|
| 186 |
video_np_chunk = (pixel_tensor_chunk.detach().cpu().float().numpy() * 255).astype(np.uint8)
|
|
|
|
| 207 |
prompt=global_prompt
|
| 208 |
)
|
| 209 |
except Exception as e:
|
| 210 |
+
logger.error(f"Falha no aprimoramento HD: {e}. Usando o vídeo de qualidade padrão.")
|
| 211 |
os.rename(refined_silent_video_path, final_video_path)
|
| 212 |
|
| 213 |
logger.info(f"Processo concluído! Vídeo final salvo em: {final_video_path}")
|
|
|
|
| 223 |
pixel_width = latent_w * vae_scale_factor
|
| 224 |
pixel_frames = (num_latent_frames - 1) * video_scale_factor
|
| 225 |
|
|
|
|
| 226 |
final_ltx_params = {
|
| 227 |
"height": pixel_height, "width": pixel_width, "video_total_frames": pixel_frames,
|
| 228 |
"video_fps": fps, "motion_prompt": motion_prompt, "current_fragment_index": int(time.time()),
|
| 229 |
"denoise_strength": denoise_strength, "refine_steps": refine_steps,
|
| 230 |
+
"guidance_scale": kwargs.get('guidance_scale', 2.0)
|
| 231 |
}
|
| 232 |
|
| 233 |
refined_latents_tensor, _ = self.ltx_manager.refine_latents(latents, **final_ltx_params)
|