Carlexxx commited on
Commit
9c3367c
·
1 Parent(s): 8cc88ac

feat(arch): Launch the 'Staged Rocket' - Modular Deformes Engine Architecture

Browse files
aduc_orchestrator.py CHANGED
@@ -104,9 +104,9 @@ class AducOrchestrator:
104
  logger.info("Maestro: Especialista de Imagem concluiu a geração dos keyframes.")
105
  return final_keyframes
106
 
107
- # --- ASSINATURA DA FUNÇÃO CORRIGIDA ---
108
  def task_produce_final_movie_with_feedback(self, keyframes, global_prompt, seconds_per_fragment,
109
- overlap_percent, echo_frames,
110
  handler_strength,
111
  destination_convergence_strength,
112
  video_resolution, use_continuity_director,
@@ -115,14 +115,14 @@ class AducOrchestrator:
115
  logger.info("AducOrchestrator: Delegando a produção do filme completo ao Deformes4DEngine.")
116
  storyboard = self.director.get_state("storyboard", [])
117
 
118
- # --- CHAMADA CORRIGIDA ---
119
  for update in self.editor.generate_full_movie(
120
  keyframes=keyframes,
121
  global_prompt=global_prompt,
122
- storyboard=storyboard,
123
  seconds_per_fragment=seconds_per_fragment,
124
- overlap_percent=overlap_percent,
125
- echo_frames=echo_frames,
126
  handler_strength=handler_strength,
127
  destination_convergence_strength=destination_convergence_strength,
128
  video_resolution=video_resolution,
 
104
  logger.info("Maestro: Especialista de Imagem concluiu a geração dos keyframes.")
105
  return final_keyframes
106
 
107
+ # --- ASSINATURA DA FUNÇÃO ATUALIZADA ---
108
  def task_produce_final_movie_with_feedback(self, keyframes, global_prompt, seconds_per_fragment,
109
+ trim_chunks: int, echo_chunks: int, # <-- PARÂMETROS ATUALIZADOS
110
  handler_strength,
111
  destination_convergence_strength,
112
  video_resolution, use_continuity_director,
 
115
  logger.info("AducOrchestrator: Delegando a produção do filme completo ao Deformes4DEngine.")
116
  storyboard = self.director.get_state("storyboard", [])
117
 
118
+ # --- CHAMADA DA FUNÇÃO ATUALIZADA ---
119
  for update in self.editor.generate_full_movie(
120
  keyframes=keyframes,
121
  global_prompt=global_prompt,
122
+ storyboard=storyboard,
123
  seconds_per_fragment=seconds_per_fragment,
124
+ trim_chunks=trim_chunks, # <-- PARÂMETRO NOVO
125
+ echo_chunks=echo_chunks, # <-- PARÂMETRO NOVO
126
  handler_strength=handler_strength,
127
  destination_convergence_strength=destination_convergence_strength,
128
  video_resolution=video_resolution,
app.py CHANGED
@@ -116,7 +116,9 @@ def run_mode_b_wrapper(prompt, num_keyframes, ref_files, progress=gr.Progress())
116
 
117
  return gr.update(value=storyboard), gr.update(value=selected_keyframes), gr.update(visible=True, open=True)
118
 
119
- def run_video_production_wrapper(keyframes, prompt, duration, overlap_percent, echo_frames,
 
 
120
  handler_strength, destination_convergence_strength,
121
  video_resolution, use_cont, use_cine,
122
  progress=gr.Progress()):
@@ -130,8 +132,10 @@ def run_video_production_wrapper(keyframes, prompt, duration, overlap_percent, e
130
  video_fragments_so_far = []
131
  final_movie_path = None
132
 
 
133
  for update in aduc.task_produce_final_movie_with_feedback(
134
- keyframes, prompt, duration, overlap_percent, echo_frames,
 
135
  handler_strength, destination_convergence_strength,
136
  resolution, use_cont, use_cine, progress
137
  ):
@@ -155,8 +159,15 @@ def get_log_content():
155
  except FileNotFoundError:
156
  return "Arquivo de log ainda não criado. Inicie uma geração."
157
 
 
158
  def update_ui_language(lang_code):
159
  lang_map = i18n.get(lang_code, i18n.get('en', {}))
 
 
 
 
 
 
160
  return {
161
  title_md: gr.update(value=f"# {lang_map.get('app_title')}"),
162
  subtitle_md: gr.update(value=lang_map.get('app_subtitle')),
@@ -175,8 +186,10 @@ def update_ui_language(lang_code):
175
  continuity_director_checkbox: gr.update(label=lang_map.get('continuity_director_label')),
176
  cinematographer_checkbox: gr.update(label=lang_map.get('cinematographer_label')),
177
 
178
- memoria_cinetica_radio: gr.update(label=lang_map.get('memoria_cinetica_label'), info=lang_map.get('memoria_cinetica_info')),
179
- sobreposicao_video_slider: gr.update(label=lang_map.get('sobreposicao_label'), info=lang_map.get('sobreposicao_info')),
 
 
180
  forca_guia_slider: gr.update(label=lang_map.get('forca_guia_label'), info=lang_map.get('forca_guia_info')),
181
  convergencia_destino_slider: gr.update(label=lang_map.get('convergencia_final_label'), info=lang_map.get('convergencia_final_info')),
182
 
@@ -218,12 +231,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
218
  continuity_director_checkbox = gr.Checkbox(label=default_lang.get('continuity_director_label'), value=True)
219
  cinematographer_checkbox = gr.Checkbox(label=default_lang.get('cinematographer_label'), value=True, visible=False)
220
 
221
- gr.Markdown("--- \n**Controles de Continuidade e Edição (Novos):**")
 
222
  with gr.Row():
223
- memoria_cinetica_radio = gr.Radio(choices=[8, 16, 24], value=8, label=default_lang.get('memoria_cinetica_label'), info=default_lang.get('memoria_cinetica_info'))
224
- sobreposicao_video_slider = gr.Slider(label=default_lang.get('sobreposicao_label'), minimum=0, maximum=50, value=15, step=1, info=default_lang.get('sobreposicao_info'))
 
 
 
 
 
225
 
226
- gr.Markdown("**Controle de Influência (Novos):**")
227
  with gr.Row():
228
  forca_guia_slider = gr.Slider(label=default_lang.get('forca_guia_label'), minimum=0.0, maximum=1.0, value=0.5, step=0.05, info=default_lang.get('forca_guia_info'))
229
  convergencia_destino_slider = gr.Slider(label=default_lang.get('convergencia_final_label'), minimum=0.0, maximum=1.0, value=0.75, step=0.05, info=default_lang.get('convergencia_final_info'))
@@ -238,7 +257,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
238
  update_log_button = gr.Button("Atualizar Log")
239
 
240
  # --- 5. CONEXÕES DA UI ---
241
- all_ui_components = list(update_ui_language('pt').keys())
 
 
 
 
 
 
242
  lang_selector.change(fn=update_ui_language, inputs=lang_selector, outputs=all_ui_components)
243
 
244
  ref_image_input.upload(fn=preprocess_base_images_wrapper, inputs=ref_image_input, outputs=ref_image_input)
@@ -255,12 +280,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
255
  outputs=[storyboard_output, keyframe_gallery, step3_accordion]
256
  )
257
 
 
258
  produce_button.click(
259
  fn=run_video_production_wrapper,
260
  inputs=[
261
  keyframe_gallery, prompt_input, duration_per_fragment_slider,
262
- sobreposicao_video_slider,
263
- memoria_cinetica_radio,
264
  forca_guia_slider,
265
  convergencia_destino_slider,
266
  resolution_selector, continuity_director_checkbox, cinematographer_checkbox
 
116
 
117
  return gr.update(value=storyboard), gr.update(value=selected_keyframes), gr.update(visible=True, open=True)
118
 
119
+ # --- ASSINATURA DA FUNÇÃO WRAPPER ATUALIZADA ---
120
+ def run_video_production_wrapper(keyframes, prompt, duration,
121
+ trim_chunks, echo_chunks, # <-- PARÂMETROS ATUALIZADOS
122
  handler_strength, destination_convergence_strength,
123
  video_resolution, use_cont, use_cine,
124
  progress=gr.Progress()):
 
132
  video_fragments_so_far = []
133
  final_movie_path = None
134
 
135
+ # --- CHAMADA PARA O ORQUESTRADOR ATUALIZADA ---
136
  for update in aduc.task_produce_final_movie_with_feedback(
137
+ keyframes, prompt, duration,
138
+ int(trim_chunks), int(echo_chunks), # <-- Passando os novos valores (convertidos para int)
139
  handler_strength, destination_convergence_strength,
140
  resolution, use_cont, use_cine, progress
141
  ):
 
159
  except FileNotFoundError:
160
  return "Arquivo de log ainda não criado. Inicie uma geração."
161
 
162
+ # --- FUNÇÃO DE TRADUÇÃO ATUALIZADA (NOVOS COMPONENTES) ---
163
  def update_ui_language(lang_code):
164
  lang_map = i18n.get(lang_code, i18n.get('en', {}))
165
+ # Adicionando traduções para os novos sliders (supondo que existam no i18n.json)
166
+ trim_chunks_label = lang_map.get('trim_chunks_label', 'Trim Chunks')
167
+ trim_chunks_info = lang_map.get('trim_chunks_info', 'How many chunks (8 frames each) to "trim" from the end before extracting the continuity guide (Echo).')
168
+ echo_chunks_label = lang_map.get('echo_chunks_label', 'Echo Chunks (Memory)')
169
+ echo_chunks_info = lang_map.get('echo_chunks_info', 'The size of the continuity guide (Echo) in chunks. This is the "memory" passed to the next scene.')
170
+
171
  return {
172
  title_md: gr.update(value=f"# {lang_map.get('app_title')}"),
173
  subtitle_md: gr.update(value=lang_map.get('app_subtitle')),
 
186
  continuity_director_checkbox: gr.update(label=lang_map.get('continuity_director_label')),
187
  cinematographer_checkbox: gr.update(label=lang_map.get('cinematographer_label')),
188
 
189
+ # --- ATUALIZANDO OS NOVOS SLIDERS ---
190
+ trim_chunks_slider: gr.update(label=trim_chunks_label, info=trim_chunks_info),
191
+ echo_chunks_slider: gr.update(label=echo_chunks_label, info=echo_chunks_info),
192
+
193
  forca_guia_slider: gr.update(label=lang_map.get('forca_guia_label'), info=lang_map.get('forca_guia_info')),
194
  convergencia_destino_slider: gr.update(label=lang_map.get('convergencia_final_label'), info=lang_map.get('convergencia_final_info')),
195
 
 
231
  continuity_director_checkbox = gr.Checkbox(label=default_lang.get('continuity_director_label'), value=True)
232
  cinematographer_checkbox = gr.Checkbox(label=default_lang.get('cinematographer_label'), value=True, visible=False)
233
 
234
+ # --- INÍCIO DA ATUALIZAÇÃO DA UI (CONTROLES DE CHUNK) ---
235
+ gr.Markdown("--- \n**Controles de Continuidade e Edição (Baseado em Chunks):**")
236
  with gr.Row():
237
+ trim_chunks_slider = gr.Slider(minimum=0, maximum=10, value=2, step=1,
238
+ label="Chunks de Corte",
239
+ info="Quantos chunks (de 8 frames) 'aparar' do final antes de extrair a guia de continuidade (Eco).")
240
+ echo_chunks_slider = gr.Slider(minimum=1, maximum=3, value=1, step=1,
241
+ label="Chunks de Eco (Memória)",
242
+ info="O tamanho da guia de continuidade (Eco) em chunks. Esta é a 'memória' que passa para a próxima cena.")
243
+ # --- FIM DA ATUALIZAÇÃO DA UI ---
244
 
245
+ gr.Markdown("**Controle de Influência:**")
246
  with gr.Row():
247
  forca_guia_slider = gr.Slider(label=default_lang.get('forca_guia_label'), minimum=0.0, maximum=1.0, value=0.5, step=0.05, info=default_lang.get('forca_guia_info'))
248
  convergencia_destino_slider = gr.Slider(label=default_lang.get('convergencia_final_label'), minimum=0.0, maximum=1.0, value=0.75, step=0.05, info=default_lang.get('convergencia_final_info'))
 
257
  update_log_button = gr.Button("Atualizar Log")
258
 
259
  # --- 5. CONEXÕES DA UI ---
260
+ # Removendo os componentes antigos e adicionando os novos para a tradução
261
+ all_ui_components_dict = update_ui_language('pt')
262
+ # Adicionando os novos componentes manualmente ao dicionário para garantir que sejam incluídos
263
+ all_ui_components_dict[trim_chunks_slider] = None
264
+ all_ui_components_dict[echo_chunks_slider] = None
265
+ all_ui_components = list(all_ui_components_dict.keys())
266
+
267
  lang_selector.change(fn=update_ui_language, inputs=lang_selector, outputs=all_ui_components)
268
 
269
  ref_image_input.upload(fn=preprocess_base_images_wrapper, inputs=ref_image_input, outputs=ref_image_input)
 
280
  outputs=[storyboard_output, keyframe_gallery, step3_accordion]
281
  )
282
 
283
+ # --- CHAMADA DO BOTÃO ATUALIZADA ---
284
  produce_button.click(
285
  fn=run_video_production_wrapper,
286
  inputs=[
287
  keyframe_gallery, prompt_input, duration_per_fragment_slider,
288
+ trim_chunks_slider, # <-- NOVO INPUT
289
+ echo_chunks_slider, # <-- NOVO INPUT
290
  forca_guia_slider,
291
  convergencia_destino_slider,
292
  resolution_selector, continuity_director_checkbox, cinematographer_checkbox
deformes4D_engine.py CHANGED
@@ -1,7 +1,6 @@
1
  # deformes4D_engine.py
2
  # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
  #
4
- #
5
  # MODIFICATIONS FOR ADUC-SDR:
6
  # Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
7
  #
@@ -23,9 +22,7 @@ import subprocess
23
  import random
24
  import gc
25
 
26
- from audio_specialist import audio_specialist_singleton
27
  from ltx_manager_helpers import ltx_manager_singleton
28
- from flux_kontext_helpers import flux_kontext_singleton
29
  from gemini_helpers import gemini_singleton
30
  from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
31
 
@@ -52,7 +49,6 @@ class Deformes4DEngine:
52
  self._vae.to(self.device); self._vae.eval()
53
  return self._vae
54
 
55
- # ... (métodos auxiliares como save/load/pixels_to_latents permanecem iguais) ...
56
  def save_latent_tensor(self, tensor: torch.Tensor, path: str):
57
  torch.save(tensor.cpu(), path)
58
  logger.info(f"Tensor latente salvo em: {path}")
@@ -74,17 +70,19 @@ class Deformes4DEngine:
74
  return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
75
 
76
  def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
77
- if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
 
 
78
  video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
79
  video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
80
  video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
81
  with imageio.get_writer(path, fps=fps, codec='libx264', quality=8) as writer:
82
  for frame in video_np: writer.append_data(frame)
83
- logger.info(f"Vídeo salvo em: {path}")
84
 
85
  def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
86
  if image.size != target_resolution:
87
- logger.info(f" - AÇÃO: Redimensionando imagem de {image.size} para {target_resolution} antes da conversão para latente.")
88
  return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
89
  return image
90
 
@@ -94,245 +92,175 @@ class Deformes4DEngine:
94
  tensor = (tensor * 2.0) - 1.0
95
  return self.pixels_to_latents(tensor)
96
 
97
- def _generate_video_and_audio_from_latents(self, latent_tensor, audio_prompt, base_name):
98
  silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_silent.mp4")
99
  pixel_tensor = self.latents_to_pixels(latent_tensor)
100
  self.save_video_from_tensor(pixel_tensor, silent_video_path, fps=24)
101
  del pixel_tensor; gc.collect()
102
-
103
- #try:
104
- # result = subprocess.run(
105
- # ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", silent_video_path],
106
- # capture_output=True, text=True, check=True)
107
- # frag_duration = float(result.stdout.strip())
108
- #except (subprocess.CalledProcessError, ValueError, FileNotFoundError):
109
- # logger.warning(f"ffprobe falhou em {os.path.basename(silent_video_path)}. Calculando duração manualmente.")
110
- # num_pixel_frames = latent_tensor.shape[2] * 8
111
- # frag_duration = num_pixel_frames / 24.0
112
- #
113
- #video_with_audio_path = audio_specialist_singleton.generate_audio_for_video(
114
- # video_path=silent_video_path, prompt=audio_prompt,
115
- # duration_seconds=frag_duration)
116
- #
117
- #if os.path.exists(silent_video_path):
118
- # os.remove(silent_video_path)
119
  return silent_video_path
120
 
121
  def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
122
- final_ltx_params = {
123
- **ltx_params,
124
- 'width': target_resolution[0], 'height': target_resolution[1],
125
- 'video_total_frames': total_frames_to_generate, 'video_fps': 24,
126
- 'current_fragment_index': int(time.time()),
127
- 'conditioning_items_data': conditioning_items
128
- }
129
  new_full_latents, _ = self.ltx_manager.generate_latent_fragment(**final_ltx_params)
130
  return new_full_latents
131
 
132
  def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str) -> str:
133
- if not video_paths:
134
- raise gr.Error("Nenhum fragmento de vídeo para montar.")
135
  list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
136
  with open(list_file_path, 'w', encoding='utf-8') as f:
137
- for path in video_paths:
138
- f.write(f"file '{os.path.abspath(path)}'\n")
139
  cmd_list = ['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', list_file_path, '-c', 'copy', output_path]
140
- logger.info("Executando concatenação FFmpeg...")
141
  try:
142
  subprocess.run(cmd_list, check=True, capture_output=True, text=True)
143
  except subprocess.CalledProcessError as e:
144
  logger.error(f"Erro no FFmpeg: {e.stderr}")
145
- raise gr.Error(f"Falha na montagem final do vídeo. Detalhes: {e.stderr}")
146
  return output_path
147
 
148
- def generate_full_movie(self,
149
- keyframes: list,
150
- global_prompt: str,
151
- storyboard: list,
152
- seconds_per_fragment: float,
153
- overlap_percent: int,
154
- echo_frames: int,
155
- handler_strength: float,
156
- destination_convergence_strength: float,
157
- video_resolution: int,
158
- use_continuity_director: bool,
159
- progress: gr.Progress = gr.Progress()):
160
 
161
- base_ltx_params = {
162
- "guidance_scale": 1.0, "stg_scale": 0.0,
163
- "rescaling_scale": 0.15, "num_inference_steps": 7,
164
- }
165
-
166
  keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
167
- video_clips_paths, story_history, audio_history = [], "", "This is the beginning of the film."
168
  target_resolution_tuple = (video_resolution, video_resolution)
169
- n_trim_latents = self._quantize_to_multiple(int(seconds_per_fragment * 24 * (overlap_percent / 100.0)), 8)
170
 
171
- # --- NOVA LÓGICA: Variáveis para guardar os tensores de continuidade ---
172
- prepared_echo_latent = None
173
- prepared_handler_latent = None
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- num_transitions_to_generate = len(keyframe_paths) - 1
 
176
 
177
  for i in range(num_transitions_to_generate):
178
- progress((i + 1) / num_transitions_to_generate, desc=f"Produzindo Transição {i+1}/{num_transitions_to_generate}")
179
-
180
- start_keyframe_path = keyframe_paths[i]
181
- destination_keyframe_path = keyframe_paths[i+1]
182
- present_scene_desc = storyboard[i]
 
 
 
 
183
 
184
- is_first_fragment = (prepared_handler_latent is None)
185
 
186
- # ... (Lógica de decisão do Gemini e do diretor de som permanece a mesma) ...
187
- if is_first_fragment:
188
- transition_type = "start"
189
- motion_prompt = gemini_singleton.get_initial_motion_prompt(
190
- global_prompt, start_keyframe_path, destination_keyframe_path, present_scene_desc
191
- )
192
- else:
193
- past_keyframe_path = keyframe_paths[i-1]
194
- past_scene_desc = storyboard[i-1]
195
- future_scene_desc = storyboard[i+1] if (i+1) < len(storyboard) else "A cena final."
196
- decision = gemini_singleton.get_cinematic_decision(
197
- global_prompt=global_prompt, story_history=story_history,
198
- past_keyframe_path=past_keyframe_path, present_keyframe_path=start_keyframe_path,
199
- future_keyframe_path=destination_keyframe_path, past_scene_desc=past_scene_desc,
200
- present_scene_desc=present_scene_desc, future_scene_desc=future_scene_desc
201
- )
202
- transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
203
 
204
  story_history += f"\n- Ato {i+1} ({transition_type}): {motion_prompt}"
205
 
206
- if use_continuity_director:
207
- if is_first_fragment:
208
- audio_prompt = gemini_singleton.get_sound_director_prompt(
209
- audio_history=audio_history,
210
- past_keyframe_path=start_keyframe_path, present_keyframe_path=start_keyframe_path,
211
- future_keyframe_path=destination_keyframe_path, present_scene_desc=present_scene_desc,
212
- motion_prompt=motion_prompt, future_scene_desc=storyboard[i+1] if (i+1) < len(storyboard) else "The final scene."
213
- )
214
- else:
215
- audio_prompt = gemini_singleton.get_sound_director_prompt(
216
- audio_history=audio_history, past_keyframe_path=keyframe_paths[i-1],
217
- present_keyframe_path=start_keyframe_path, future_keyframe_path=destination_keyframe_path,
218
- present_scene_desc=present_scene_desc, motion_prompt=motion_prompt,
219
- future_scene_desc=storyboard[i+1] if (i+1) < len(storyboard) else "The final scene."
220
- )
221
- else:
222
- audio_prompt = present_scene_desc
223
 
224
- audio_history = audio_prompt
225
-
226
  conditioning_items = []
227
- current_ltx_params = {**base_ltx_params, "handler_strength": handler_strength, "motion_prompt": motion_prompt}
228
- total_frames_to_generate = self._quantize_to_multiple(int(seconds_per_fragment * 24), 8) + 1
229
-
230
-
231
 
232
- # --- NOVA LÓGICA: Preparação das instruções de condicionamento ---
233
- if is_first_fragment:
234
- img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
235
- start_latent = self.pil_to_latent(img_start)
236
- conditioning_items.append(LatentConditioningItem(start_latent, 0, 1.0))
237
- if transition_type != "cut":
238
- img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
239
- destination_latent = self.pil_to_latent(img_dest)
240
- conditioning_items.append(LatentConditioningItem(destination_latent, total_frames_to_generate - 1, destination_convergence_strength))
241
  else:
242
- # Usa os tensores pré-preparados da iteração anterior
243
- conditioning_items.append(LatentConditioningItem(prepared_echo_latent, 0, 1.0))
244
- conditioning_items.append(LatentConditioningItem(prepared_handler_latent, echo_frames, handler_strength))
245
- if transition_type == "continuous":
246
- img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
247
- destination_latent = self.pil_to_latent(img_dest)
248
- conditioning_items.append(LatentConditioningItem(destination_latent, total_frames_to_generate - 1, destination_convergence_strength))
249
 
250
- new_full_latents = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_to_generate)
 
251
 
 
 
252
 
253
-
254
- # --- CÓDIGO ADICIONADO PARA ITERAR E CONTAR FRAMES ---
255
- logger.info("Iniciando a contagem de frames para cada tensor latente individual.")
256
- total_frames_from_latents = 0
257
- for i in range(new_full_latents.shape[2]):
258
- single_latent_tensor = new_full_latents[:, :, i:i+1, :, :]
259
- temp_video_path = os.path.join(self.workspace_dir, f"temp_latent_video_{i}.mp4")
260
-
261
- # Gerar um vídeo temporário para o tensor latente individual
262
- pixel_tensor_single = self.latents_to_pixels(single_latent_tensor)
263
- self.save_video_from_tensor(pixel_tensor_single, temp_video_path, fps=24)
264
- del pixel_tensor_single
265
- gc.collect()
266
 
267
- # Contar os frames do vídeo temporário com ffprobe
 
 
 
268
  try:
269
- frame_count_result = subprocess.run(
270
- ["ffprobe", "-v", "error", "-select_streams", "v:0", "-count_frames", "-show_entries", "stream=nb_read_frames", "-of", "default=noprint_wrappers=1:nokey=1", temp_video_path],
271
- capture_output=True, text=True, check=True
272
- )
273
- frames = int(frame_count_result.stdout.strip())
274
- logger.info(f"Latente {i}: Vídeo temporário gerado com {frames} frames.")
275
- total_frames_from_latents += frames
276
- except (subprocess.CalledProcessError, ValueError, FileNotFoundError) as e:
277
- logger.error(f"Falha ao contar frames para o latente {i} com ffprobe: {e}")
278
- finally:
279
  if os.path.exists(temp_video_path):
 
 
 
 
280
  os.remove(temp_video_path)
 
 
 
 
 
 
 
281
 
282
- logger.info(f"Contagem total de frames a partir dos latentes individuais: {total_frames_from_latents}")
283
-
284
- # Gerar e contar frames para o vídeo completo a partir do tensor latente inteiro
285
- full_latent_video_path = os.path.join(self.workspace_dir, "temp_full_latent_video.mp4")
286
- full_pixel_tensor = self.latents_to_pixels(new_full_latents)
287
- self.save_video_from_tensor(full_pixel_tensor, full_latent_video_path, fps=24)
288
- del full_pixel_tensor
289
- gc.collect()
290
-
291
- try:
292
- full_frame_count_result = subprocess.run(
293
- ["ffprobe", "-v", "error", "-select_streams", "v:0", "-count_frames", "-show_entries", "stream=nb_read_frames", "-of", "default=noprint_wrappers=1:nokey=1", full_latent_video_path],
294
- capture_output=True, text=True, check=True
295
- )
296
- total_frames_full_video = int(full_frame_count_result.stdout.strip())
297
- logger.info(f"Vídeo completo gerado a partir do tensor latente inteiro possui {total_frames_full_video} frames.")
298
- except (subprocess.CalledProcessError, ValueError, FileNotFoundError) as e:
299
- logger.error(f"Falha ao contar frames do vídeo completo com ffprobe: {e}")
300
- finally:
301
- if os.path.exists(full_latent_video_path):
302
- os.remove(full_latent_video_path)
303
- # --- FIM DO CÓDIGO ADICIONADO ---```
304
-
305
-
306
 
307
- # --- NOVA LÓGICA: Preparação movida para o final do loop ---
308
- is_last_fragment = (i == num_transitions_to_generate - 1)
309
 
310
- if not is_last_fragment:
311
- # ANTECIPAÇÃO: Prepara os tensores para a PRÓXIMA iteração
312
- prepared_handler_latent = new_full_latents[:, :, -1:, :, :].clone()
313
- prepared_echo_latent = new_full_latents[:, :, -echo_frames:, :, :].clone()
314
 
315
- # CORTE NO FIM: Define os latentes para o VÍDEO ATUAL, removendo a sobreposição
316
- if n_trim_latents > 0 and new_full_latents.shape[2] > n_trim_latents:
317
- latents_for_video = new_full_latents[:, :, :-n_trim_latents, :, :]
318
- else:
319
- latents_for_video = new_full_latents
320
- else:
321
- # O último fragmento não precisa preparar nada para o futuro, então renderiza-se por completo.
322
- latents_for_video = new_full_latents
 
 
 
 
323
 
 
 
 
 
324
  base_name = f"fragment_{i}_{int(time.time())}"
325
- video_with_audio_path = self._generate_video_and_audio_from_latents(latents_for_video, audio_prompt, base_name)
326
- video_clips_paths.append(video_with_audio_path)
327
 
328
- if transition_type == "cut":
329
- # Se for um corte, limpa a memória para a próxima iteração começar do zero.
330
- prepared_echo_latent = None
331
- prepared_handler_latent = None
332
-
333
- yield {"fragment_path": video_with_audio_path}
334
-
335
- final_movie_path = os.path.join(self.workspace_dir, f"final_movie_{int(time.time())}.mp4")
336
  self.concatenate_videos_ffmpeg(video_clips_paths, final_movie_path)
337
 
338
  logger.info(f"Filme completo salvo em: {final_movie_path}")
 
1
  # deformes4D_engine.py
2
  # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
  #
 
4
  # MODIFICATIONS FOR ADUC-SDR:
5
  # Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
6
  #
 
22
  import random
23
  import gc
24
 
 
25
  from ltx_manager_helpers import ltx_manager_singleton
 
26
  from gemini_helpers import gemini_singleton
27
  from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
28
 
 
49
  self._vae.to(self.device); self._vae.eval()
50
  return self._vae
51
 
 
52
  def save_latent_tensor(self, tensor: torch.Tensor, path: str):
53
  torch.save(tensor.cpu(), path)
54
  logger.info(f"Tensor latente salvo em: {path}")
 
70
  return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
71
 
72
  def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
73
+ if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0:
74
+ logger.warning("Tentativa de salvar um tensor de vídeo inválido. Abortando.")
75
+ return
76
  video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
77
  video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
78
  video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
79
  with imageio.get_writer(path, fps=fps, codec='libx264', quality=8) as writer:
80
  for frame in video_np: writer.append_data(frame)
81
+ logger.info(f"Vídeo salvo em: {path}")
82
 
83
  def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
84
  if image.size != target_resolution:
85
+ logger.info(f" - AÇÃO: Redimensionando imagem de {image.size} para {target_resolution} antes da conversão para latente.")
86
  return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
87
  return image
88
 
 
92
  tensor = (tensor * 2.0) - 1.0
93
  return self.pixels_to_latents(tensor)
94
 
95
+ def _generate_video_from_latents(self, latent_tensor, base_name):
96
  silent_video_path = os.path.join(self.workspace_dir, f"{base_name}_silent.mp4")
97
  pixel_tensor = self.latents_to_pixels(latent_tensor)
98
  self.save_video_from_tensor(pixel_tensor, silent_video_path, fps=24)
99
  del pixel_tensor; gc.collect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  return silent_video_path
101
 
102
  def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
103
+ final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
 
 
 
 
 
 
104
  new_full_latents, _ = self.ltx_manager.generate_latent_fragment(**final_ltx_params)
105
  return new_full_latents
106
 
107
  def concatenate_videos_ffmpeg(self, video_paths: list[str], output_path: str) -> str:
108
+ if not video_paths: raise gr.Error("Nenhum fragmento de vídeo para montar.")
 
109
  list_file_path = os.path.join(self.workspace_dir, "concat_list.txt")
110
  with open(list_file_path, 'w', encoding='utf-8') as f:
111
+ for path in video_paths: f.write(f"file '{os.path.abspath(path)}'\n")
 
112
  cmd_list = ['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', list_file_path, '-c', 'copy', output_path]
113
+ logger.info("Executando concatenação FFmpeg...")
114
  try:
115
  subprocess.run(cmd_list, check=True, capture_output=True, text=True)
116
  except subprocess.CalledProcessError as e:
117
  logger.error(f"Erro no FFmpeg: {e.stderr}")
118
+ raise gr.Error(f"Falha na montagem final do vídeo. Detalhes: {e.stderr}")
119
  return output_path
120
 
121
+ def generate_full_movie(self, keyframes: list, global_prompt: str, storyboard: list, seconds_per_fragment: float,
122
+ trim_chunks: int, echo_chunks: int,
123
+ handler_strength: float, destination_convergence_strength: float, video_resolution: int,
124
+ use_continuity_director: bool, progress: gr.Progress = gr.Progress()):
 
 
 
 
 
 
 
 
125
 
126
+ base_ltx_params = {"guidance_scale": 1.0, "stg_scale": 0.0, "rescaling_scale": 0.15, "num_inference_steps": 20}
 
 
 
 
127
  keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
128
+ video_clips_paths, story_history = [], ""
129
  target_resolution_tuple = (video_resolution, video_resolution)
 
130
 
131
+ total_frames_base = self._quantize_to_multiple(round(seconds_per_fragment * 24), 8)
132
+ if total_frames_base == 0: total_frames_base = 8
133
+
134
+ logger.info("="*50)
135
+ logger.info("CÁLCULOS DE GERAÇÃO E GUIAS (BASEADO EM CHUNKS):")
136
+ logger.info(f" - Duração Base Solicitada: {total_frames_base} frames ({total_frames_base // 8} chunks)")
137
+ logger.info(f" - N_Corte para guias: {trim_chunks} chunks")
138
+ logger.info(f" - N_Eco (Dejavu) para guias: {echo_chunks} chunks")
139
+ logger.info("="*50)
140
+
141
+ dejavu_latent, evo_latent, last_eco_chunk = None, None, None
142
+
143
+ if len(keyframe_paths) < 3:
144
+ raise gr.Error(f"O modelo de geração requer no mínimo 3 keyframes (Passado, Presente, Futuro). Você forneceu {len(keyframe_paths)}.")
145
 
146
+ num_transitions_to_generate = len(keyframe_paths) - 2
147
+ logger.info(f"Modelo 'K-2' ativado: {len(keyframe_paths)} keyframes resultarão em {num_transitions_to_generate} fragmentos de vídeo.")
148
 
149
  for i in range(num_transitions_to_generate):
150
+ start_keyframe_index = i + 1
151
+
152
+ logger.info(f"--- INICIANDO FRAGMENTO {i+1}/{num_transitions_to_generate} (índice de loop i={i}) ---")
153
+ progress((i + 1) / num_transitions_to_generate, desc=f"Produzindo Transição {i+1}/{num_transitions_to_generate}")
154
+
155
+ past_keyframe_path = keyframe_paths[start_keyframe_index - 1]
156
+ start_keyframe_path = keyframe_paths[start_keyframe_index]
157
+ destination_keyframe_path = keyframe_paths[start_keyframe_index + 1]
158
+ future_story_prompt = storyboard[start_keyframe_index + 1] if (start_keyframe_index + 1) < len(storyboard) else "A cena final."
159
 
 
160
 
161
+ decision = gemini_singleton.get_cinematic_decision(
162
+ global_prompt,
163
+ story_history,
164
+ past_keyframe_path,
165
+ start_keyframe_path,
166
+ destination_keyframe_path,
167
+ storyboard[start_keyframe_index - 1], # Story para o Keyframe Passado
168
+ storyboard[start_keyframe_index], # Story para o Keyframe de Início (Presente)
169
+ future_story_prompt
170
+ )
171
+ transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
 
 
 
 
 
 
172
 
173
  story_history += f"\n- Ato {i+1} ({transition_type}): {motion_prompt}"
174
 
175
+ total_frames_to_generate = total_frames_base
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
 
 
177
  conditioning_items = []
178
+ logger.info(f" [0. PREPARAÇÃO] Montando itens de condicionamento para K{start_keyframe_index} -> K{start_keyframe_index+1}.")
179
+
 
 
180
 
181
+ if last_eco_chunk is None:
182
+ # Nenhum eco → sempre trata como "primeiro fragmento"
183
+ img_start = self._preprocess_image_for_latent_conversion(
184
+ Image.open(start_keyframe_path).convert("RGB"),
185
+ target_resolution_tuple
186
+ )
187
+ conditioning_items.append(
188
+ LatentConditioningItem(self.pil_to_latent(img_start), 0, 1.0)
189
+ )
190
  else:
191
+ # Usa eco + handler do fragmeto anterior
192
+ conditioning_items.append(LatentConditioningItem(last_eco_chunk, 0, 1.0))
193
+ handler_frame = (echo_chunks + trim_chunks) * 8
194
+ conditioning_items.append(LatentConditioningItem(handler_Chunk, handler_frame, handler_strength))
195
+
 
 
196
 
197
+ img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
198
+ conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), total_frames_base, destination_convergence_strength))
199
 
200
+ current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
201
+ new_full_latents = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_to_generate)
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ # --- [INÍCIO] Bloco de Verificação de Frames por Chunk ---
205
+ logger.info("--- [VERIFICAÇÃO DE CHUNKS INDIVIDUAIS] ---")
206
+ total_chunks_verificados = new_full_latents.shape[2]
207
+ for chunk_idx in range(total_chunks_verificados):
208
  try:
209
+ # Isola o chunk atual
210
+ single_chunk_latent = new_full_latents[:, :, chunk_idx:chunk_idx+1, :, :]
211
+
212
+ # Gera um nome de arquivo temporário para o vídeo do chunk
213
+ temp_video_base_name = f"debug_chunk_{chunk_idx}"
214
+
215
+ # Converte o latente do chunk em um vídeo MP4
216
+ temp_video_path = self._generate_video_from_latents(single_chunk_latent, temp_video_base_name)
217
+
218
+ # Conta os frames no vídeo gerado
219
  if os.path.exists(temp_video_path):
220
+ with imageio.get_reader(temp_video_path) as reader:
221
+ frame_count = reader.count_frames()
222
+ logger.info(f" - VERIFICADO: Chunk {chunk_idx} gerou um vídeo com {frame_count} frames.")
223
+ # Apaga o vídeo de debug
224
  os.remove(temp_video_path)
225
+ else:
226
+ logger.warning(f" - FALHA: Não foi possível gerar o vídeo para o Chunk {chunk_idx}.")
227
+
228
+ except Exception as e:
229
+ logger.error(f" - ERRO ao verificar Chunk {chunk_idx}: {e}")
230
+ logger.info("--- [FIM DA VERIFICAÇÃO] ---")
231
+ # --- [FIM] Bloco de Verificação ---
232
 
233
+ logger.info(f" [1. GERAÇÃO] Tensor latente bruto gerado com shape: {new_full_latents.shape}.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
 
 
235
 
236
+ total_chunks = new_full_latents.shape[2]
237
+ logger.info(f" [GUIAS] Extraindo guias de continuidade para a PRÓXIMA iteração (Total: {total_chunks} chunks).")
 
 
238
 
239
+ handler_Chunk = new_full_latents[:, :, -1:, :, :].clone()
240
+ logger.info(f" - 'handler_Chunk' (guia de evolução) extraído do chunk final (índice {total_chunks-1}).")
241
+
242
+ index_of_last_usable_chunk = total_chunks
243
+ end_chunk_index = index_of_last_usable_chunk - trim_chunks
244
+ start_chunk_index = end_chunk_index - echo_chunks
245
+
246
+ logger.info(f" - Fatia de chunks para 'Dejavu' (guia de memória) será: [{start_chunk_index}:{end_chunk_index}].")
247
+ last_eco_chunk = new_full_latents[:, :, start_chunk_index:end_chunk_index, :, :].clone()
248
+
249
+ logger.info(f" [2. EDIÇÃO] Realizando a montagem do clipe de vídeo a partir do tensor bruto.")
250
+ latents_for_video = new_full_latents
251
 
252
+ latents_for_video = latents_for_video[:, :, :1, :, :]
253
+
254
+ latents_for_video = latents_for_video[:, :, echo_chunks:-(trim_chunks), :, :]
255
+
256
  base_name = f"fragment_{i}_{int(time.time())}"
 
 
257
 
258
+ logger.info(f" [3. DECODIFICAÇÃO] Tensor final para o clipe tem {latents_for_video.shape[2]} chunks. Enviando para gerar vídeo.")
259
+ video_path = self._generate_video_from_latents(latents_for_video, base_name)
260
+ video_clips_paths.append(video_path)
261
+ yield {"fragment_path": video_path}
262
+
263
+ final_movie_path = os.path.join(self.workspace_dir, f"final_movie_silent_{int(time.time())}.mp4")
 
 
264
  self.concatenate_videos_ffmpeg(video_clips_paths, final_movie_path)
265
 
266
  logger.info(f"Filme completo salvo em: {final_movie_path}")
gemini_helpers.py CHANGED
@@ -47,7 +47,7 @@ class GeminiSingleton:
47
  if self.api_key:
48
  genai.configure(api_key=self.api_key)
49
  # Modelo mais recente e capaz para tarefas complexas de visão e raciocínio.
50
- self.model = genai.GenerativeModel('gemini-2.5-flash')
51
  logger.info("Especialista Gemini (1.5 Pro) inicializado com sucesso.")
52
  else:
53
  self.model = None
 
47
  if self.api_key:
48
  genai.configure(api_key=self.api_key)
49
  # Modelo mais recente e capaz para tarefas complexas de visão e raciocínio.
50
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
51
  logger.info("Especialista Gemini (1.5 Pro) inicializado com sucesso.")
52
  else:
53
  self.model = None
prompts/cinematic_director_prompt.txt CHANGED
@@ -24,12 +24,7 @@ You will receive a complete picture of the narrative timeline.
24
  - Textual Future (Ato_n+1): "{future_scene_desc}"
25
  - Visual Future (Keyframe k_n+1): [FUTURE_IMAGE]
26
 
27
- # --- TASK 1: THE EDITING DECISION ---
28
- Analyze the transition from the PRESENT (`k_n`) to the FUTURE (`k_n+1`).
29
- - If there is a major, non-continuous jump (e.g., scene changes from day to night, character teleports, location is completely different), you MUST decide this is a "cut". This is a critical break in the action.
30
- - Otherwise, if the action can flow logically from the present to the future, decide it is "continuous".
31
-
32
- # --- TASK 2: THE CINEMATIC MOTION PROMPT ---
33
  Based on your decision, write the `motion_prompt`. The prompt MUST describe the action that moves the story from the PRESENT visual (`k_n`) towards the FUTURE visual (`k_n+1`).
34
 
35
  **CRITICAL PROMPT DIRECTIVES:**
@@ -42,4 +37,4 @@ Based on your decision, write the `motion_prompt`. The prompt MUST describe the
42
  d. **Scenery/Environment:** Describe environmental details that add to the motion and mood (e.g., "wind rustling the leaves", "rain streaks down the window").
43
 
44
  # RESPONSE FORMAT:
45
- You MUST respond with a single, clean JSON object with two keys: "transition_type" and "motion_prompt".
 
24
  - Textual Future (Ato_n+1): "{future_scene_desc}"
25
  - Visual Future (Keyframe k_n+1): [FUTURE_IMAGE]
26
 
27
+ # --- TASK : THE CINEMATIC MOTION PROMPT ---
 
 
 
 
 
28
  Based on your decision, write the `motion_prompt`. The prompt MUST describe the action that moves the story from the PRESENT visual (`k_n`) towards the FUTURE visual (`k_n+1`).
29
 
30
  **CRITICAL PROMPT DIRECTIVES:**
 
37
  d. **Scenery/Environment:** Describe environmental details that add to the motion and mood (e.g., "wind rustling the leaves", "rain streaks down the window").
38
 
39
  # RESPONSE FORMAT:
40
+ You MUST respond with a single, clean JSON object with two keys: "transition_type" = "continuous , and "motion_prompt".