File size: 12,790 Bytes
2e253d7 12e0b76 2e253d7 12e0b76 2f4cdcb 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 69f4dbd a957837 2e253d7 a957837 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 2f4cdcb 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2f4cdcb 12e0b76 2f4cdcb 2e253d7 2f4cdcb 12e0b76 2f4cdcb 2e253d7 2f4cdcb 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 2e253d7 12e0b76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
# aduc_framework/engineers/deformes4D.py
#
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
#
# Versão 3.0.2 (Framework-Compliant com Lógica de Geração Restaurada)
#
# Este engenheiro implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura
# ADUC-SDR. Ele orquestra a geração sequencial de fragmentos de vídeo com base
# em um conjunto de keyframes pré-definido.
import os
import time
import imageio
import numpy as np
import torch
import logging
from PIL import Image, ImageOps
import subprocess
import gc
import shutil
from pathlib import Path
from typing import List, Tuple, Dict, Any, Callable, Optional
# --- Imports Relativos Corrigidos ---
from ..types import LatentConditioningItem
from ..managers.ltx_manager import ltx_manager_singleton
from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
from ..managers.vae_manager import vae_manager_singleton
from .deformes2D_thinker import deformes2d_thinker_singleton
from ..managers.seedvr_manager import seedvr_manager_singleton
from ..managers.mmaudio_manager import mmaudio_manager_singleton
from ..tools.video_encode_tool import video_encode_tool_singleton
logger = logging.getLogger(__name__)
ProgressCallback = Optional[Callable[[float, str], None]]
class Deformes4DEngine:
"""
Orquestra a geração, pós-produção latente e renderização final de fragmentos de vídeo.
"""
def __init__(self):
"""O construtor é leve e não recebe argumentos."""
self.workspace_dir: Optional[str] = None
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info("Deformes4DEngine instanciado (não inicializado).")
def initialize(self, workspace_dir: str):
"""Inicializa o engenheiro com as configurações necessárias."""
if self.workspace_dir is not None:
return # Evita reinicialização
self.workspace_dir = workspace_dir
os.makedirs(self.workspace_dir, exist_ok=True)
logger.info(f"Deformes4D Specialist (ADUC-SDR Executor) inicializado com workspace: {self.workspace_dir}.")
def generate_original_movie(
self,
full_generation_state: Dict[str, Any],
progress_callback: ProgressCallback = None
) -> Dict[str, Any]:
"""
Gera o filme principal lendo todos os parâmetros do estado de geração.
"""
if not self.workspace_dir:
raise RuntimeError("Deformes4DEngine não foi inicializado. Chame o método initialize() antes de usar.")
# 1. Extrai todos os parâmetros do estado de geração
pre_prod_params = full_generation_state.get("parametros_geracao", {}).get("pre_producao", {})
prod_params = full_generation_state.get("parametros_geracao", {}).get("producao", {})
keyframes_data = full_generation_state.get("Keyframe_atos", [])
global_prompt = full_generation_state.get("Promt_geral", "")
storyboard = [ato["resumo_ato"] for ato in full_generation_state.get("Atos", [])]
keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_data]
seconds_per_fragment = pre_prod_params.get('duration_per_fragment', 4.0)
video_resolution = pre_prod_params.get('resolution', 480)
trim_percent = prod_params.get('trim_percent', 50)
handler_strength = prod_params.get('handler_strength', 0.5)
destination_convergence_strength = prod_params.get('destination_convergence_strength', 0.75)
guidance_scale = prod_params.get('guidance_scale', 2.0)
stg_scale = prod_params.get('stg_scale', 0.025)
num_inference_steps = prod_params.get('inference_steps', 20)
# 2. Inicia o processo de geração
FPS = 24
FRAMES_PER_LATENT_CHUNK = 8
LATENT_PROCESSING_CHUNK_SIZE = 4
run_timestamp = int(time.time())
temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
os.makedirs(temp_latent_dir, exist_ok=True)
os.makedirs(temp_video_clips_dir, exist_ok=True)
total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
DESTINATION_FRAME_TARGET = total_frames_brutos - 1
base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps}
story_history = ""
target_resolution_tuple = (video_resolution, video_resolution)
eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
latent_fragment_paths = []
video_fragments_data = []
if len(keyframe_paths) < 2:
raise ValueError(f"A geração requer pelo menos 2 keyframes. Fornecidos: {len(keyframe_paths)}.")
num_transitions_to_generate = len(keyframe_paths) - 1
logger.info("--- INICIANDO ESTÁGIO 1: Geração de Fragmentos Latentes ---")
for i in range(num_transitions_to_generate):
fragment_index = i + 1
if progress_callback:
progress_fraction = (i / num_transitions_to_generate) * 0.7 # Geração de latentes usa 70% do tempo
progress_callback(progress_fraction, f"Gerando Latente {fragment_index}/{num_transitions_to_generate}")
past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
start_keyframe_path = keyframe_paths[i]
destination_keyframe_path = keyframe_paths[i + 1]
future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
decision = deformes2d_thinker_singleton.get_cinematic_decision(
global_prompt, story_history, past_keyframe_path, start_keyframe_path,
destination_keyframe_path, storyboard[i - 1] if i > 0 else "O início.",
storyboard[i], future_story_prompt
)
motion_prompt = decision["motion_prompt"]
story_history += f"\n- Ato {fragment_index}: {motion_prompt}"
conditioning_items = []
if eco_latent_for_next_loop is None:
img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_start), 0, 1.0))
else:
conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
latents_brutos, _ = ltx_manager_singleton.generate_latent_fragment(
height=video_resolution, width=video_resolution,
conditioning_items_data=conditioning_items, motion_prompt=motion_prompt,
video_total_frames=total_frames_brutos, video_fps=FPS,
**base_ltx_params
)
last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
cpu_latent = latents_video.cpu()
latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
torch.save(cpu_latent, latent_path)
latent_fragment_paths.append(latent_path)
video_fragments_data.append({"id": i, "prompt_video": motion_prompt})
del latents_video, cpu_latent; gc.collect()
del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache()
logger.info(f"--- INICIANDO ESTÁGIO 2: Processando {len(latent_fragment_paths)} latentes ---")
final_video_clip_paths = []
num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE) if LATENT_PROCESSING_CHUNK_SIZE > 0 else 0
for i in range(num_chunks):
chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
if progress_callback:
progress_fraction = 0.7 + (i / num_chunks * 0.28) # Decodificação usa 28% do tempo
progress_callback(progress_fraction, f"Processando & Decodificando Lote {i+1}/{num_chunks}")
tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
base_name = f"clip_{i:04d}_{run_timestamp}"
current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
final_video_clip_paths.append(current_clip_path)
del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
if progress_callback: progress_callback(0.98, "Montando o filme final...")
final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4")
video_encode_tool_singleton.concatenate_videos(final_video_clip_paths, final_video_path, self.workspace_dir)
try:
shutil.rmtree(temp_video_clips_dir)
shutil.rmtree(temp_latent_dir)
except OSError as e:
logger.warning(f"Não foi possível remover diretórios temporários: {e}")
logger.info(f"Processo completo! Vídeo original salvo em: {final_video_path}")
# 3. Empacota os resultados para o Orchestrator
final_video_data_for_state = {
"id": 0,
"caminho_pixel": final_video_path,
"caminhos_latentes_fragmentos": latent_fragment_paths,
"fragmentos_componentes": video_fragments_data
}
return {
"final_path": final_video_path,
"latent_paths": latent_fragment_paths,
"video_data": final_video_data_for_state
}
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
for frame in video_np: writer.append_data(frame)
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
if image.size != target_resolution:
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
return image
def _pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
image_np = np.array(pil_image).astype(np.float32) / 255.0
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
tensor = (tensor * 2.0) - 1.0
return vae_manager_singleton.encode(tensor)
def _quantize_to_multiple(self, n: int, m: int) -> int:
if m == 0: return n
quantized = int(round(n / m) * m)
return m if n > 0 and quantized == 0 else quantized |