EU-IA commited on
Commit
8d75895
·
verified ·
1 Parent(s): b9f745b

Update deformes4D_engine.py

Browse files
Files changed (1) hide show
  1. deformes4D_engine.py +32 -24
deformes4D_engine.py CHANGED
@@ -52,6 +52,7 @@ class Deformes4DEngine:
52
  self._vae.to(self.device); self._vae.eval()
53
  return self._vae
54
 
 
55
  def save_latent_tensor(self, tensor: torch.Tensor, path: str):
56
  torch.save(tensor.cpu(), path)
57
  logger.info(f"Tensor latente salvo em: {path}")
@@ -158,10 +159,8 @@ class Deformes4DEngine:
158
  progress: gr.Progress = gr.Progress()):
159
 
160
  base_ltx_params = {
161
- "guidance_scale": 1.0,
162
- "stg_scale": 0.0,
163
- "rescaling_scale": 0.15,
164
- "num_inference_steps": 7,
165
  }
166
 
167
  keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
@@ -169,7 +168,10 @@ class Deformes4DEngine:
169
  target_resolution_tuple = (video_resolution, video_resolution)
170
  n_trim_latents = self._quantize_to_multiple(int(seconds_per_fragment * 24 * (overlap_percent / 100.0)), 8)
171
 
172
- previous_latents_path = None
 
 
 
173
  num_transitions_to_generate = len(keyframe_paths) - 1
174
 
175
  for i in range(num_transitions_to_generate):
@@ -179,8 +181,9 @@ class Deformes4DEngine:
179
  destination_keyframe_path = keyframe_paths[i+1]
180
  present_scene_desc = storyboard[i]
181
 
182
- is_first_fragment = previous_latents_path is None
183
 
 
184
  if is_first_fragment:
185
  transition_type = "start"
186
  motion_prompt = gemini_singleton.get_initial_motion_prompt(
@@ -224,6 +227,7 @@ class Deformes4DEngine:
224
  current_ltx_params = {**base_ltx_params, "handler_strength": handler_strength, "motion_prompt": motion_prompt}
225
  total_frames_to_generate = self._quantize_to_multiple(int(seconds_per_fragment * 24), 8) + 1
226
 
 
227
  if is_first_fragment:
228
  img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
229
  start_latent = self.pil_to_latent(img_start)
@@ -233,15 +237,9 @@ class Deformes4DEngine:
233
  destination_latent = self.pil_to_latent(img_dest)
234
  conditioning_items.append(LatentConditioningItem(destination_latent, total_frames_to_generate - 1, destination_convergence_strength))
235
  else:
236
- previous_latents = self.load_latent_tensor(previous_latents_path)
237
- handler_latent = previous_latents[:, :, -1:, :, :]
238
- trimmed_for_echo = previous_latents[:, :, :-n_trim_latents, :, :] if n_trim_latents > 0 and previous_latents.shape[2] > n_trim_latents else previous_latents
239
- echo_latents = trimmed_for_echo[:, :, -echo_frames:, :, :]
240
- handler_frame_position = n_trim_latents + echo_frames
241
-
242
- conditioning_items.append(LatentConditioningItem(echo_latents, 0, 1.0))
243
- conditioning_items.append(LatentConditioningItem(handler_latent, handler_frame_position, handler_strength))
244
- del previous_latents, handler_latent, trimmed_for_echo, echo_latents; gc.collect()
245
  if transition_type == "continuous":
246
  img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
247
  destination_latent = self.pil_to_latent(img_dest)
@@ -249,21 +247,31 @@ class Deformes4DEngine:
249
 
250
  new_full_latents = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_to_generate)
251
 
252
- base_name = f"fragment_{i}_{int(time.time())}"
253
- new_full_latents_path = os.path.join(self.workspace_dir, f"{base_name}_full.pt")
254
- self.save_latent_tensor(new_full_latents, new_full_latents_path)
255
 
256
- previous_latents_path = new_full_latents_path
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- latents_for_video = new_full_latents
259
-
260
  video_with_audio_path = self._generate_video_and_audio_from_latents(latents_for_video, audio_prompt, base_name)
261
  video_clips_paths.append(video_with_audio_path)
262
 
263
-
264
  if transition_type == "cut":
265
- previous_latents_path = None
266
-
 
267
 
268
  yield {"fragment_path": video_with_audio_path}
269
 
 
52
  self._vae.to(self.device); self._vae.eval()
53
  return self._vae
54
 
55
+ # ... (métodos auxiliares como save/load/pixels_to_latents permanecem iguais) ...
56
  def save_latent_tensor(self, tensor: torch.Tensor, path: str):
57
  torch.save(tensor.cpu(), path)
58
  logger.info(f"Tensor latente salvo em: {path}")
 
159
  progress: gr.Progress = gr.Progress()):
160
 
161
  base_ltx_params = {
162
+ "guidance_scale": 1.0, "stg_scale": 0.0,
163
+ "rescaling_scale": 0.15, "num_inference_steps": 7,
 
 
164
  }
165
 
166
  keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
 
168
  target_resolution_tuple = (video_resolution, video_resolution)
169
  n_trim_latents = self._quantize_to_multiple(int(seconds_per_fragment * 24 * (overlap_percent / 100.0)), 8)
170
 
171
+ # --- NOVA LÓGICA: Variáveis para guardar os tensores de continuidade ---
172
+ prepared_echo_latent = None
173
+ prepared_handler_latent = None
174
+
175
  num_transitions_to_generate = len(keyframe_paths) - 1
176
 
177
  for i in range(num_transitions_to_generate):
 
181
  destination_keyframe_path = keyframe_paths[i+1]
182
  present_scene_desc = storyboard[i]
183
 
184
+ is_first_fragment = (prepared_handler_latent is None)
185
 
186
+ # ... (Lógica de decisão do Gemini e do diretor de som permanece a mesma) ...
187
  if is_first_fragment:
188
  transition_type = "start"
189
  motion_prompt = gemini_singleton.get_initial_motion_prompt(
 
227
  current_ltx_params = {**base_ltx_params, "handler_strength": handler_strength, "motion_prompt": motion_prompt}
228
  total_frames_to_generate = self._quantize_to_multiple(int(seconds_per_fragment * 24), 8) + 1
229
 
230
+ # --- NOVA LÓGICA: Preparação das instruções de condicionamento ---
231
  if is_first_fragment:
232
  img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
233
  start_latent = self.pil_to_latent(img_start)
 
237
  destination_latent = self.pil_to_latent(img_dest)
238
  conditioning_items.append(LatentConditioningItem(destination_latent, total_frames_to_generate - 1, destination_convergence_strength))
239
  else:
240
+ # Usa os tensores pré-preparados da iteração anterior
241
+ conditioning_items.append(LatentConditioningItem(prepared_echo_latent, 0, 1.0))
242
+ conditioning_items.append(LatentConditioningItem(prepared_handler_latent, echo_frames, handler_strength))
 
 
 
 
 
 
243
  if transition_type == "continuous":
244
  img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
245
  destination_latent = self.pil_to_latent(img_dest)
 
247
 
248
  new_full_latents = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_to_generate)
249
 
250
+ # --- NOVA LÓGICA: Preparação movida para o final do loop ---
251
+ is_last_fragment = (i == num_transitions_to_generate - 1)
 
252
 
253
+ if not is_last_fragment:
254
+ # ANTECIPAÇÃO: Prepara os tensores para a PRÓXIMA iteração
255
+ prepared_handler_latent = new_full_latents[:, :, -1:, :, :].clone()
256
+ prepared_echo_latent = new_full_latents[:, :, -echo_frames:, :, :].clone()
257
+
258
+ # CORTE NO FIM: Define os latentes para o VÍDEO ATUAL, removendo a sobreposição
259
+ if n_trim_latents > 0 and new_full_latents.shape[2] > n_trim_latents:
260
+ latents_for_video = new_full_latents[:, :, :-n_trim_latents, :, :]
261
+ else:
262
+ latents_for_video = new_full_latents
263
+ else:
264
+ # O último fragmento não precisa preparar nada para o futuro, então renderiza-se por completo.
265
+ latents_for_video = new_full_latents
266
 
267
+ base_name = f"fragment_{i}_{int(time.time())}"
 
268
  video_with_audio_path = self._generate_video_and_audio_from_latents(latents_for_video, audio_prompt, base_name)
269
  video_clips_paths.append(video_with_audio_path)
270
 
 
271
  if transition_type == "cut":
272
+ # Se for um corte, limpa a memória para a próxima iteração começar do zero.
273
+ prepared_echo_latent = None
274
+ prepared_handler_latent = None
275
 
276
  yield {"fragment_path": video_with_audio_path}
277