euiia commited on
Commit
35d9fd0
·
verified ·
1 Parent(s): 0a725b5

Update deformes4D_engine.py

Browse files
Files changed (1) hide show
  1. deformes4D_engine.py +174 -176
deformes4D_engine.py CHANGED
@@ -1,13 +1,15 @@
1
  # deformes4D_engine.py
2
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
  #
4
- # MODIFICATIONS FOR ADUC-SDR:
5
- # Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
6
  #
7
- # This file is part of the ADUC-SDR project. It contains the core logic for
8
- # video fragment generation, latent manipulation, and dynamic editing,
9
- # governed by the ADUC orchestrator.
10
- # This component is licensed under the GNU Affero General Public License v3.0.
 
 
 
 
11
 
12
  import os
13
  import time
@@ -21,10 +23,10 @@ import gradio as gr
21
  import subprocess
22
  import gc
23
  import shutil
 
24
 
25
  from ltx_manager_helpers import ltx_manager_singleton
26
- from gemini_helpers import gemini_singleton
27
- # [REATORADO] Importa o novo especialista
28
  from latent_enhancer_specialist import latent_enhancer_specialist_singleton
29
  from hd_specialist import hd_specialist_singleton
30
  from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
@@ -34,23 +36,22 @@ logger = logging.getLogger(__name__)
34
 
35
  @dataclass
36
  class LatentConditioningItem:
37
- """Representa uma âncora de condicionamento no espaço latente para a Câmera (Ψ)."""
38
  latent_tensor: torch.Tensor
39
  media_frame_number: int
40
  conditioning_strength: float
41
 
42
  class Deformes4DEngine:
43
  """
44
- Implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura ADUC-SDR.
45
- Orquestra a geração, pós-produção latente e renderização final dos fragmentos de vídeo.
46
  """
47
  def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
48
  self.ltx_manager = ltx_manager
49
  self.workspace_dir = workspace_dir
50
  self._vae = None
51
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
52
- logger.info("Especialista Deformes4D (Executor ADUC-SDR) inicializado.")
53
- # Cria o diretório de workspace se não existir
54
  os.makedirs(self.workspace_dir, exist_ok=True)
55
 
56
 
@@ -61,7 +62,8 @@ class Deformes4DEngine:
61
  self._vae.to(self.device); self._vae.eval()
62
  return self._vae
63
 
64
- # --- MÉTODOS AUXILIARES ---
 
65
  @torch.no_grad()
66
  def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
67
  tensor = tensor.to(self.device, dtype=self.vae.dtype)
@@ -91,43 +93,43 @@ class Deformes4DEngine:
91
  tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
92
  tensor = (tensor * 2.0) - 1.0
93
  return self.pixels_to_latents(tensor)
94
-
95
  def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str):
96
- if not video_paths: raise gr.Error("Nenhum fragmento de vídeo para montar.")
97
  list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
98
  with open(list_file_path, 'w', encoding='utf-8') as f:
99
  for path in video_paths: f.write(f"file '{os.path.abspath(path)}'\n")
100
-
101
- # Tenta usar aceleração de hardware (GPU) para a concatenação, se disponível
102
  cmd_list = ['ffmpeg', '-y', '-hwaccel', 'auto', '-f', 'concat', '-safe', '0', '-i', list_file_path, '-c', 'copy', output_path]
103
- logger.info(f"Concatenando {len(video_paths)} clipes de vídeo em {output_path}...")
104
  try:
105
  subprocess.run(cmd_list, check=True, capture_output=True, text=True)
106
  except subprocess.CalledProcessError as e:
107
- logger.error(f"Erro no FFmpeg: {e.stderr}")
108
- # Tenta novamente sem aceleração de hardware como fallback
109
- logger.info("Tentando concatenar novamente sem aceleração de hardware...")
110
  cmd_list = ['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', list_file_path, '-c', 'copy', output_path]
111
  try:
112
  subprocess.run(cmd_list, check=True, capture_output=True, text=True)
113
  except subprocess.CalledProcessError as e_fallback:
114
- logger.error(f"Erro no FFmpeg (fallback): {e_fallback.stderr}")
115
- raise gr.Error(f"Falha na montagem final do vídeo. Detalhes: {e_fallback.stderr}")
116
-
117
- # --- NÚCLEO DA LÓGICA ADUC-SDR ---
118
- def generate_full_movie(self, keyframes: list, global_prompt: str, storyboard: list,
119
- seconds_per_fragment: float, trim_percent: int,
120
- handler_strength: float, destination_convergence_strength: float,
121
- use_upscaler: bool, use_refiner: bool, use_hd: bool, use_audio: bool,
122
- video_resolution: int, use_continuity_director: bool,
123
- progress: gr.Progress = gr.Progress()):
124
-
125
- # --- ETAPA 0: SETUP ---
 
 
 
126
  FPS = 24
127
  FRAMES_PER_LATENT_CHUNK = 8
128
- ECO_LATENT_CHUNKS = 2
129
- LATENT_PROCESSING_CHUNK_SIZE = 10 # Processa 10 fragmentos latentes por vez para economizar memória
130
-
131
  run_timestamp = int(time.time())
132
  temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
133
  temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
@@ -140,35 +142,29 @@ class Deformes4DEngine:
140
 
141
  DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
142
  DESTINATION_FRAME_TARGET = total_frames_brutos - 1
143
-
144
- base_ltx_params = {"guidance_scale": 2.0, "stg_scale": 0.025, "rescaling_scale": 0.15, "num_inference_steps": 20, "image_cond_noise_scale": 0.00}
145
- refine_ltx_params = {"motion_prompt": "", "guidance_scale": 1.0, "denoise_strength": 0.35, "refine_steps": 12}
146
-
147
  keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
148
  story_history = ""
149
- target_resolution_tuple = (video_resolution, video_resolution)
150
-
151
  eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
152
- latent_fragment_paths = [] # Lista para armazenar caminhos dos latentes salvos no disco
153
-
154
- if len(keyframe_paths) < 2: raise gr.Error(f"A geração requer no mínimo 2 keyframes. Você forneceu {len(keyframe_paths)}.")
155
-
156
  num_transitions_to_generate = len(keyframe_paths) - 1
157
-
158
- # --- ETAPA 1: GERAR FRAGMENTOS LATENTES E SALVAR EM DISCO ---
159
- logger.info("--- INICIANDO ETAPA 1: Geração de Fragmentos Latentes ---")
160
  for i in range(num_transitions_to_generate):
161
  fragment_index = i + 1
162
- progress(i / num_transitions_to_generate, desc=f"Gerando Latente {fragment_index}/{num_transitions_to_generate}")
163
-
164
- # (Lógica de decisão do Gemini e preparação de âncoras - inalterada)
165
  past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
166
  start_keyframe_path = keyframe_paths[i]
167
  destination_keyframe_path = keyframe_paths[i + 1]
168
- future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
169
- decision = gemini_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "O início.", storyboard[i], future_story_prompt)
 
170
  transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
171
- story_history += f"\n- Ato {fragment_index}: {motion_prompt}"
172
  conditioning_items = []
173
  if eco_latent_for_next_loop is None:
174
  img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
@@ -178,165 +174,167 @@ class Deformes4DEngine:
178
  conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
179
  img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
180
  conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
181
-
182
  current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
183
- latents_brutos = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
184
-
 
 
185
  last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
186
- eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
187
  dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
188
-
189
  latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
190
  latents_video = latents_video[:, :, 1:, :, :]
191
-
192
- del last_trim, latents_brutos
193
- gc.collect(); torch.cuda.empty_cache()
194
-
195
  if transition_type == "cut":
196
  eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
197
-
198
- # [REATORADO] Mover latente para CPU e salvar no disco para liberar VRAM
199
  cpu_latent = latents_video.cpu()
200
  latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
201
  torch.save(cpu_latent, latent_path)
202
  latent_fragment_paths.append(latent_path)
 
 
203
 
204
- del latents_video, cpu_latent
205
- gc.collect()
206
-
207
- del eco_latent_for_next_loop, dejavu_latent_for_next_loop
208
- gc.collect(); torch.cuda.empty_cache()
209
-
210
- # --- ETAPA 2: PROCESSAR LATENTES EM LOTES (CHUNKS) ---
211
- logger.info(f"--- INICIANDO ETAPA 2: Processamento de {len(latent_fragment_paths)} latentes em lotes de {LATENT_PROCESSING_CHUNK_SIZE} ---")
212
  final_video_clip_paths = []
213
- num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE) # Ceiling division
214
-
215
  for i in range(num_chunks):
216
  chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
217
  chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
218
  chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
219
-
220
- progress(i / num_chunks, desc=f"Processando Lote {i+1}/{num_chunks}")
221
-
222
- # Carrega os tensores do lote atual do disco para a GPU
223
  tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
224
-
225
- # Concatena os tensores do lote, removendo o latente de sobreposição
226
- tensors_para_concatenar = [
227
- frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag
228
- for j, frag in enumerate(tensors_in_chunk)
229
- ]
230
  sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
231
- del tensors_in_chunk, tensors_para_concatenar
232
- gc.collect(); torch.cuda.empty_cache()
233
-
234
- logger.info(f"Lote {i+1} concatenado. Shape do sub-latente: {sub_group_latent.shape}")
235
-
236
- # 1. (Opcional) Upscaler Latente
237
- if use_upscaler:
238
- logger.info(f"Aplicando Upscaler no lote {i+1}...")
239
- sub_group_latent = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
240
- gc.collect(); torch.cuda.empty_cache()
241
-
242
- # 2. Decodificar Latente para Vídeo (com ou sem áudio)
243
  base_name = f"clip_{i:04d}_{run_timestamp}"
244
- current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}_temp.mp4")
245
-
246
- if use_audio:
247
- # O áudio é gerado para o prompt global por enquanto. Pode ser adaptado.
248
- current_clip_path = self._generate_video_and_audio_from_latents(sub_group_latent, global_prompt, base_name)
249
- else:
250
- pixel_tensor = self.latents_to_pixels(sub_group_latent)
251
- self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
252
- del pixel_tensor
253
-
254
- del sub_group_latent
255
- gc.collect(); torch.cuda.empty_cache()
256
-
257
- # 3. (Opcional) Masterização HD
258
- if use_hd:
259
- logger.info(f"Aplicando masterização HD no clipe {i+1}...")
260
- hd_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}_hd.mp4")
261
- try:
262
- hd_specialist_singleton.process_video(input_video_path=current_clip_path, output_video_path=hd_clip_path, prompt=global_prompt)
263
- # Apaga o clipe não-HD para economizar espaço
264
- if os.path.exists(current_clip_path) and current_clip_path != hd_clip_path:
265
- os.remove(current_clip_path)
266
- current_clip_path = hd_clip_path
267
- except Exception as e:
268
- logger.error(f"Falha na masterização HD do clipe {i+1}: {e}. Usando versão padrão.")
269
-
270
- # 4. Adicionar caminho do clipe final à lista
271
  final_video_clip_paths.append(current_clip_path)
272
 
273
- #if use_refiner:
274
- # progress(0.8, desc="Refinando continuidade visual...")
275
- # # [REATORADO] Chamada para o novo especialista
276
- # # OBS: Refinamento foi desativado conforme solicitado por degradar a lógica das keyframes.
277
-
278
- # --- ETAPA 3: MONTAGEM FINAL ---
279
- progress(0.98, desc="Montagem final dos clipes...")
280
- final_video_path = os.path.join(self.workspace_dir, f"filme_final_{run_timestamp}.mp4")
281
  self.concatenate_videos_ffmpeg(final_video_clip_paths, final_video_path)
282
-
283
- # --- ETAPA 4: LIMPEZA ---
284
- logger.info("Limpando arquivos temporários...")
285
  try:
286
- shutil.rmtree(temp_latent_dir)
287
  shutil.rmtree(temp_video_clips_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  concat_list_path = os.path.join(self.workspace_dir, "concat_list.txt")
289
- if os.path.exists(concat_list_path):
290
- os.remove(concat_list_path)
291
  except OSError as e:
292
- logger.warning(f"Não foi possível remover os diretórios temporários: {e}")
293
-
294
- logger.info(f"Processo concluído! Vídeo final salvo em: {final_video_path}")
295
  yield {"final_path": final_video_path}
296
 
297
- def _generate_video_and_audio_from_latents(self, latent_tensor, audio_prompt, base_name):
298
- # Este método agora opera em um diretório temporário para os clipes
299
- temp_video_clips_dir = os.path.dirname(os.path.join(self.workspace_dir, base_name)) # Hack para obter o diretório correto
300
- silent_video_path = os.path.join(temp_video_clips_dir, f"{base_name}_silent.mp4")
301
-
302
- pixel_tensor = self.latents_to_pixels(latent_tensor)
303
- self.save_video_from_tensor(pixel_tensor, silent_video_path, fps=24)
304
- del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
  try:
 
307
  result = subprocess.run(
308
- ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", silent_video_path],
309
  capture_output=True, text=True, check=True)
310
- frag_duration = float(result.stdout.strip())
311
- except (subprocess.CalledProcessError, ValueError, FileNotFoundError):
312
- logger.warning(f"ffprobe falhou. Calculando duração manualmente a partir dos latentes.")
313
- # O VAE interpola, então o número de frames é (num_latentes - 1) * 8 + 1 (aproximadamente)
314
- num_pixel_frames = (latent_tensor.shape[2] - 1) * 8 + 1
315
- frag_duration = num_pixel_frames / 24.0
316
-
317
- # Salva o vídeo com áudio no mesmo diretório temporário
318
- video_with_audio_path = audio_specialist_singleton.generate_audio_for_video(
319
- video_path=silent_video_path, prompt=audio_prompt,
320
- duration_seconds=frag_duration,
321
- output_path_override=os.path.join(temp_video_clips_dir, f"{base_name}_with_audio.mp4")
322
- )
323
-
324
- if os.path.exists(silent_video_path):
325
- os.remove(silent_video_path)
326
- return video_with_audio_path
327
-
 
328
  def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
 
329
  final_ltx_params = {
330
  **ltx_params, 'width': target_resolution[0], 'height': target_resolution[1],
331
  'video_total_frames': total_frames_to_generate, 'video_fps': 24,
332
  'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items
333
  }
334
- new_full_latents, _ = self.ltx_manager.generate_latent_fragment(**final_ltx_params)
335
- gc.collect()
336
- torch.cuda.empty_cache()
337
- return new_full_latents
338
-
339
  def _quantize_to_multiple(self, n, m):
 
340
  if m == 0: return n
341
  quantized = int(round(n / m) * m)
342
  return m if n > 0 and quantized == 0 else quantized
 
1
  # deformes4D_engine.py
 
2
  #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
 
4
  #
5
+ # Version: 2.0.0
6
+ #
7
+ # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
8
+ # "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
9
+ # and Distiller (Δ) concepts. Its core responsibilities include the low-level orchestration
10
+ # of video fragment generation (calling the LTX specialist), latent manipulation (calling
11
+ # the enhancer specialist), and final rendering/post-production tasks like HD mastering
12
+ # and audio generation. It executes the specific commands delegated by the AducOrchestrator.
13
 
14
  import os
15
  import time
 
23
  import subprocess
24
  import gc
25
  import shutil
26
+ from pathlib import Path
27
 
28
  from ltx_manager_helpers import ltx_manager_singleton
29
+ from gemini_helpers import gemini_singleton
 
30
  from latent_enhancer_specialist import latent_enhancer_specialist_singleton
31
  from hd_specialist import hd_specialist_singleton
32
  from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
 
36
 
37
  @dataclass
38
  class LatentConditioningItem:
39
+ """Represents a conditioning anchor in the latent space for the Camera (Ψ)."""
40
  latent_tensor: torch.Tensor
41
  media_frame_number: int
42
  conditioning_strength: float
43
 
44
  class Deformes4DEngine:
45
  """
46
+ Implements the Camera (Ψ) and Distiller (Δ) of the ADUC-SDR architecture.
47
+ Orchestrates the generation, latent post-production, and final rendering of video fragments.
48
  """
49
  def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
50
  self.ltx_manager = ltx_manager
51
  self.workspace_dir = workspace_dir
52
  self._vae = None
53
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
54
+ logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
 
55
  os.makedirs(self.workspace_dir, exist_ok=True)
56
 
57
 
 
62
  self._vae.to(self.device); self._vae.eval()
63
  return self._vae
64
 
65
+ # --- HELPER METHODS ---
66
+
67
  @torch.no_grad()
68
  def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
69
  tensor = tensor.to(self.device, dtype=self.vae.dtype)
 
93
  tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
94
  tensor = (tensor * 2.0) - 1.0
95
  return self.pixels_to_latents(tensor)
96
+
97
  def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str):
98
+ if not video_paths: raise gr.Error("No video fragments to assemble.")
99
  list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
100
  with open(list_file_path, 'w', encoding='utf-8') as f:
101
  for path in video_paths: f.write(f"file '{os.path.abspath(path)}'\n")
102
+
 
103
  cmd_list = ['ffmpeg', '-y', '-hwaccel', 'auto', '-f', 'concat', '-safe', '0', '-i', list_file_path, '-c', 'copy', output_path]
104
+ logger.info(f"Concatenating {len(video_paths)} video clips into {output_path}...")
105
  try:
106
  subprocess.run(cmd_list, check=True, capture_output=True, text=True)
107
  except subprocess.CalledProcessError as e:
108
+ logger.error(f"FFmpeg error: {e.stderr}")
109
+ logger.info("Attempting concatenation again without hardware acceleration...")
 
110
  cmd_list = ['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', list_file_path, '-c', 'copy', output_path]
111
  try:
112
  subprocess.run(cmd_list, check=True, capture_output=True, text=True)
113
  except subprocess.CalledProcessError as e_fallback:
114
+ logger.error(f"FFmpeg error (fallback): {e_fallback.stderr}")
115
+ raise gr.Error(f"Failed to assemble the final video. Details: {e_fallback.stderr}")
116
+
117
+ # --- CORE ADUC-SDR LOGIC ---
118
+
119
+ def generate_original_movie(self, keyframes: list, global_prompt: str, storyboard: list,
120
+ seconds_per_fragment: float, trim_percent: int,
121
+ handler_strength: float, destination_convergence_strength: float,
122
+ video_resolution: int, use_continuity_director: bool,
123
+ guidance_scale: float, stg_scale: float, num_inference_steps: int,
124
+ progress: gr.Progress = gr.Progress()):
125
+ """
126
+ Step 3: Production. Generates the original master video from keyframes.
127
+ This involves generating latent tensors for each segment and then decoding them into a video file.
128
+ """
129
  FPS = 24
130
  FRAMES_PER_LATENT_CHUNK = 8
131
+ LATENT_PROCESSING_CHUNK_SIZE = 4
132
+
 
133
  run_timestamp = int(time.time())
134
  temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
135
  temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
 
142
 
143
  DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
144
  DESTINATION_FRAME_TARGET = total_frames_brutos - 1
145
+
146
+ base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps, "rescaling_scale": 0.15, "image_cond_noise_scale": 0.00}
 
 
147
  keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
148
  story_history = ""
149
+ target_resolution_tuple = (video_resolution, video_resolution)
 
150
  eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
151
+ latent_fragment_paths = []
152
+
153
+ if len(keyframe_paths) < 2: raise gr.Error(f"Generation requires at least 2 keyframes. You provided {len(keyframe_paths)}.")
 
154
  num_transitions_to_generate = len(keyframe_paths) - 1
155
+
156
+ logger.info("--- STARTING STAGE 1: Latent Fragment Generation ---")
 
157
  for i in range(num_transitions_to_generate):
158
  fragment_index = i + 1
159
+ progress(i / num_transitions_to_generate, desc=f"Generating Latent {fragment_index}/{num_transitions_to_generate}")
 
 
160
  past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
161
  start_keyframe_path = keyframe_paths[i]
162
  destination_keyframe_path = keyframe_paths[i + 1]
163
+ future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "The final scene."
164
+ logger.info(f"Calling Gemini to generate cinematic decision for fragment {fragment_index}...")
165
+ decision = gemini_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "The beginning.", storyboard[i], future_story_prompt)
166
  transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
167
+ story_history += f"\n- Act {fragment_index}: {motion_prompt}"
168
  conditioning_items = []
169
  if eco_latent_for_next_loop is None:
170
  img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
 
174
  conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
175
  img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
176
  conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
 
177
  current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
178
+ logger.info(f"Calling LTX to generate video latents for fragment {fragment_index} ({total_frames_brutos} frames)...")
179
+ latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
180
+ num_latent_frames = latents_brutos.shape[2]
181
+ logger.info(f"LTX responded with a latent tensor of shape {latents_brutos.shape}, representing ~{num_latent_frames * 8 + 1} video frames at {FPS} FPS.")
182
  last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
183
+ eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
184
  dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
 
185
  latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
186
  latents_video = latents_video[:, :, 1:, :, :]
187
+ del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
 
 
 
188
  if transition_type == "cut":
189
  eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
 
 
190
  cpu_latent = latents_video.cpu()
191
  latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
192
  torch.save(cpu_latent, latent_path)
193
  latent_fragment_paths.append(latent_path)
194
+ del latents_video, cpu_latent; gc.collect()
195
+ del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache()
196
 
197
+ logger.info(f"--- STARTING STAGE 2: Processing {len(latent_fragment_paths)} latents in chunks of {LATENT_PROCESSING_CHUNK_SIZE} ---")
 
 
 
 
 
 
 
198
  final_video_clip_paths = []
199
+ num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE)
 
200
  for i in range(num_chunks):
201
  chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
202
  chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
203
  chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
204
+ progress(i / num_chunks, desc=f"Processing & Decoding Batch {i+1}/{num_chunks}")
 
 
 
205
  tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
206
+ tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
 
 
 
 
 
207
  sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
208
+ del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
209
+ logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
 
 
 
 
 
 
 
 
 
 
210
  base_name = f"clip_{i:04d}_{run_timestamp}"
211
+ current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
212
+ pixel_tensor = self.latents_to_pixels(sub_group_latent)
213
+ self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
214
+ del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  final_video_clip_paths.append(current_clip_path)
216
 
217
+ progress(0.98, desc="Final assembly of clips...")
218
+ final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4")
 
 
 
 
 
 
219
  self.concatenate_videos_ffmpeg(final_video_clip_paths, final_video_path)
220
+ logger.info("Cleaning up temporary clip files...")
 
 
221
  try:
 
222
  shutil.rmtree(temp_video_clips_dir)
223
+ except OSError as e:
224
+ logger.warning(f"Could not remove temporary clip directory: {e}")
225
+ logger.info(f"Process complete! Original video saved to: {final_video_path}")
226
+ return {"final_path": final_video_path, "latent_paths": latent_fragment_paths}
227
+
228
+ def upscale_latents_and_create_video(self, latent_paths: list, chunk_size: int, progress: gr.Progress):
229
+ if not latent_paths:
230
+ raise gr.Error("Cannot perform upscaling: no latent paths were provided.")
231
+ logger.info("--- STARTING POST-PRODUCTION: Latent Upscaling ---")
232
+ run_timestamp = int(time.time())
233
+ temp_upscaled_clips_dir = os.path.join(self.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
234
+ os.makedirs(temp_upscaled_clips_dir, exist_ok=True)
235
+ final_upscaled_clip_paths = []
236
+ num_chunks = -(-len(latent_paths) // chunk_size)
237
+ for i in range(num_chunks):
238
+ chunk_start_index = i * chunk_size
239
+ chunk_end_index = chunk_start_index + chunk_size
240
+ chunk_paths = latent_paths[chunk_start_index:chunk_end_index]
241
+ progress(i / num_chunks, desc=f"Upscaling & Decoding Batch {i+1}/{num_chunks}")
242
+ tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
243
+ tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
244
+ sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
245
+ del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
246
+ logger.info(f"Batch {i+1} loaded. Original latent shape: {sub_group_latent.shape}")
247
+ upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
248
+ del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
249
+ logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
250
+ pixel_tensor = self.latents_to_pixels(upscaled_latent_chunk)
251
+ del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
252
+ base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
253
+ current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
254
+ self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
255
+ final_upscaled_clip_paths.append(current_clip_path)
256
+ del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
257
+ logger.info(f"Saved upscaled clip: {Path(current_clip_path).name}")
258
+ progress(0.98, desc="Assembling upscaled clips...")
259
+ final_video_path = os.path.join(self.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
260
+ self.concatenate_videos_ffmpeg(final_upscaled_clip_paths, final_video_path)
261
+ logger.info("Cleaning up temporary upscaled clip files...")
262
+ try:
263
+ shutil.rmtree(temp_upscaled_clips_dir)
264
  concat_list_path = os.path.join(self.workspace_dir, "concat_list.txt")
265
+ if os.path.exists(concat_list_path): os.remove(concat_list_path)
 
266
  except OSError as e:
267
+ logger.warning(f"Could not remove temporary upscaled clip directory: {e}")
268
+ logger.info(f"Latent upscaling complete! Final video at: {final_video_path}")
 
269
  yield {"final_path": final_video_path}
270
 
271
+ def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
272
+ """
273
+ Post-Production Step 4B: Applies SeedVR super-resolution to an existing video file.
274
+ """
275
+ logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
276
+ progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
277
+
278
+ run_timestamp = int(time.time())
279
+ output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{run_timestamp}.mp4")
280
+
281
+ try:
282
+ final_path = hd_specialist_singleton.process_video(
283
+ input_video_path=source_video_path,
284
+ output_video_path=output_path,
285
+ prompt=prompt,
286
+ model_version=model_version,
287
+ steps=steps,
288
+ progress=progress
289
+ )
290
+ logger.info(f"HD Mastering complete! Final video at: {final_path}")
291
+ yield {"final_path": final_path}
292
+ except Exception as e:
293
+ logger.error(f"HD Mastering failed: {e}", exc_info=True)
294
+ raise gr.Error(f"HD Mastering failed. Details: {e}")
295
+
296
+ def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
297
+ """
298
+ Post-Production Step 4C: Generates audio for a final video file and muxes it in.
299
+ """
300
+ logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
301
+ progress(0.1, desc="Preparing for audio generation...")
302
 
303
  try:
304
+ # Get video duration using ffprobe
305
  result = subprocess.run(
306
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
307
  capture_output=True, text=True, check=True)
308
+ duration = float(result.stdout.strip())
309
+ logger.info(f"Source video duration: {duration:.2f} seconds.")
310
+
311
+ progress(0.5, desc="Generating audio track...")
312
+ # The audio specialist handles file naming and muxing internally
313
+ final_path = audio_specialist_singleton.generate_audio_for_video(
314
+ video_path=source_video_path,
315
+ prompt=audio_prompt,
316
+ duration_seconds=duration,
317
+ )
318
+
319
+ logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
320
+ progress(1.0, desc="Audio generation complete!")
321
+ yield {"final_path": final_path}
322
+
323
+ except Exception as e:
324
+ logger.error(f"Audio generation failed: {e}", exc_info=True)
325
+ raise gr.Error(f"Audio generation failed. Details: {e}")
326
+
327
  def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
328
+ """Internal helper to call the LTX manager."""
329
  final_ltx_params = {
330
  **ltx_params, 'width': target_resolution[0], 'height': target_resolution[1],
331
  'video_total_frames': total_frames_to_generate, 'video_fps': 24,
332
  'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items
333
  }
334
+ return self.ltx_manager.generate_latent_fragment(**final_ltx_params)
335
+
 
 
 
336
  def _quantize_to_multiple(self, n, m):
337
+ """Helper to round n to the nearest multiple of m."""
338
  if m == 0: return n
339
  quantized = int(round(n / m) * m)
340
  return m if n > 0 and quantized == 0 else quantized