|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import time |
|
|
import logging |
|
|
import yaml |
|
|
import torch |
|
|
import numpy as np |
|
|
from PIL import Image, ImageOps |
|
|
|
|
|
|
|
|
from typing import List, Dict, Any, Callable, Optional |
|
|
|
|
|
|
|
|
|
|
|
from .deformes2D_thinker import deformes2d_thinker_singleton |
|
|
from ..types import LatentConditioningItem |
|
|
from ..managers.ltx_manager import ltx_manager_singleton |
|
|
from ..managers.vae_manager import vae_manager_singleton |
|
|
from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
ProgressCallback = Optional[Callable[[float, str], None]] |
|
|
|
|
|
class Deformes3DEngine: |
|
|
""" |
|
|
Especialista ADUC para a geração de imagens estáticas (keyframes). |
|
|
""" |
|
|
def __init__(self): |
|
|
"""O construtor é leve e não recebe argumentos.""" |
|
|
self.workspace_dir: Optional[str] = None |
|
|
logger.info("Deformes3DEngine instanciado (não inicializado).") |
|
|
|
|
|
def initialize(self, workspace_dir: str): |
|
|
"""Inicializa o engenheiro com as configurações necessárias.""" |
|
|
if self.workspace_dir is not None: |
|
|
return |
|
|
self.workspace_dir = workspace_dir |
|
|
logger.info(f"3D Engine (Image Specialist) inicializado com workspace: {self.workspace_dir}.") |
|
|
|
|
|
def generate_keyframes_from_storyboard( |
|
|
self, |
|
|
generation_state: Dict[str, Any], |
|
|
progress_callback: ProgressCallback = None |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Orquestra a geração de todos os keyframes com base no estado de geração completo. |
|
|
Retorna uma lista de dicionários com dados detalhados de cada keyframe. |
|
|
""" |
|
|
if not self.workspace_dir: |
|
|
raise RuntimeError("Deformes3DEngine não foi inicializado. Chame o método initialize() antes de usar.") |
|
|
|
|
|
|
|
|
params = generation_state.get("parametros_geracao", {}).get("pre_producao", {}) |
|
|
storyboard = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])] |
|
|
global_prompt = generation_state.get("Promt_geral", "") |
|
|
general_ref_paths = [media["caminho"] for media in generation_state.get("midias_referencia", [])] |
|
|
|
|
|
keyframe_resolution = params.get('resolution', 480) |
|
|
initial_ref_path = general_ref_paths[0] if general_ref_paths else None |
|
|
|
|
|
if not initial_ref_path: |
|
|
raise ValueError("Não há imagem de referência inicial para começar a geração de keyframes.") |
|
|
|
|
|
current_base_image_path = initial_ref_path |
|
|
previous_prompt = "N/A (imagem de referência inicial)" |
|
|
all_keyframes_data: List[Dict[str, Any]] = [] |
|
|
width, height = keyframe_resolution, keyframe_resolution |
|
|
target_resolution_tuple = (width, height) |
|
|
|
|
|
num_keyframes_to_generate = len(storyboard) |
|
|
if num_keyframes_to_generate <= 0: |
|
|
logger.warning("Storyboard vazio. Nenhum keyframe a ser gerado.") |
|
|
return [] |
|
|
|
|
|
logger.info(f"IMAGE SPECIALIST: Ordem para gerar {num_keyframes_to_generate} keyframes (versões LTX).") |
|
|
|
|
|
ltx_conditioning_items0 = [] |
|
|
|
|
|
|
|
|
img_pil0 = Image.open(initial_ref_path).convert("RGB") |
|
|
img_processed0 = self._preprocess_image_for_latent_conversion(img_pil0, target_resolution_tuple) |
|
|
pixel_tensor0 = self._pil_to_pixel_tensor(img_processed0) |
|
|
|
|
|
ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 0, 0.05)) |
|
|
ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 24, 0.05)) |
|
|
|
|
|
latent_tensorY = pixel_tensor0 |
|
|
latent_tensorX = latent_tensorY |
|
|
|
|
|
current_base_image_path = initial_ref_path |
|
|
past_base_image_path = initial_ref_path |
|
|
|
|
|
|
|
|
for i in range(num_keyframes_to_generate): |
|
|
scene_index = i + 1 |
|
|
current_scene = storyboard[i] |
|
|
future_scene = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final." |
|
|
logger.info(f"--> Gerando Keyframe {scene_index}/{num_keyframes_to_generate}...") |
|
|
|
|
|
img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( |
|
|
global_prompt=global_prompt, |
|
|
scene_history=previous_prompt, |
|
|
current_scene_desc=current_scene, |
|
|
future_scene_desc=future_scene, |
|
|
last_image_path=past_base_image_path, |
|
|
fixed_ref_paths=current_base_image_path |
|
|
) |
|
|
|
|
|
past_base_image_path = current_base_image_path |
|
|
|
|
|
|
|
|
|
|
|
ltx_conditioning_items = ltx_conditioning_items0 |
|
|
ltx_conditioning_items.append(LatentConditioningItem(latent_tensorX, 0, 0.4)) |
|
|
ltx_conditioning_items.append(LatentConditioningItem(latent_tensorY, 8, 0.6)) |
|
|
|
|
|
latent_tensorX = latent_tensorY |
|
|
|
|
|
ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} |
|
|
generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( |
|
|
height=height, width=width, |
|
|
conditioning_items_data=ltx_conditioning_items, |
|
|
motion_prompt=img_prompt, |
|
|
video_total_frames=24, video_fps=24, |
|
|
**ltx_base_params |
|
|
) |
|
|
|
|
|
final_latent = generated_latents[:, :, -1:, :, :] |
|
|
|
|
|
enriched_pixel_tensor = vae_manager_singleton.decode(final_latent) |
|
|
|
|
|
pixel_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_pixel.png") |
|
|
latent_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_latent.pt") |
|
|
self.save_image_from_tensor(enriched_pixel_tensor, pixel_path) |
|
|
torch.save(final_latent.cpu(), latent_path) |
|
|
|
|
|
latent_tensorY = latent_path |
|
|
|
|
|
|
|
|
keyframe_data = { |
|
|
"id": scene_index, |
|
|
"caminho_pixel": pixel_path, |
|
|
"caminho_latent": latent_path, |
|
|
"prompt_keyframe": img_prompt |
|
|
} |
|
|
|
|
|
all_keyframes_data.append(keyframe_data) |
|
|
|
|
|
if progress_callback: |
|
|
progress_fraction = 0.2 + ((scene_index / num_keyframes_to_generate) * 0.8) |
|
|
progress_callback(progress_fraction, f"Keyframe {scene_index}/{num_keyframes_to_generate} gerado.") |
|
|
|
|
|
current_base_image_path = pixel_path |
|
|
previous_prompt = img_prompt |
|
|
|
|
|
logger.info("IMAGE SPECIALIST: Geração de todos os dados de keyframes completa.") |
|
|
return all_keyframes_data |
|
|
|
|
|
|
|
|
|
|
|
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: |
|
|
if image.size != target_resolution: |
|
|
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) |
|
|
return image |
|
|
|
|
|
def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: |
|
|
image_np = np.array(pil_image).astype(np.float32) / 255.0 |
|
|
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) |
|
|
return (tensor * 2.0) - 1.0 |
|
|
|
|
|
def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): |
|
|
tensor_chw = pixel_tensor.squeeze(0).squeeze(1) |
|
|
tensor_hwc = tensor_chw.permute(1, 2, 0) |
|
|
tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 |
|
|
image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) |
|
|
Image.fromarray(image_np).save(path) |
|
|
|
|
|
|
|
|
|
|
|
deformes3d_engine_singleton = Deformes3DEngine() |