|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from PIL import Image, ImageOps |
|
|
import os |
|
|
import time |
|
|
import logging |
|
|
import gradio as gr |
|
|
import yaml |
|
|
import torch |
|
|
import numpy as np |
|
|
|
|
|
from managers.flux_kontext_manager import flux_kontext_singleton |
|
|
from engineers.deformes2D_thinker import deformes2d_thinker_singleton |
|
|
from aduc_types import LatentConditioningItem |
|
|
from managers.ltx_manager import ltx_manager_singleton |
|
|
from managers.vae_manager import vae_manager_singleton |
|
|
from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class Deformes3DEngine: |
|
|
""" |
|
|
ADUC Specialist for static image (keyframe) generation. |
|
|
""" |
|
|
def __init__(self, workspace_dir): |
|
|
self.workspace_dir = workspace_dir |
|
|
self.image_generation_helper = flux_kontext_singleton |
|
|
logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.") |
|
|
|
|
|
def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str: |
|
|
""" |
|
|
Low-level function that generates a single image using the LTX helper. |
|
|
""" |
|
|
logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'") |
|
|
generated_image = self.image_generation_helper.generate_image( |
|
|
reference_images=reference_images, prompt=prompt, width=width, |
|
|
height=height, seed=int(time.time()), callback=callback |
|
|
) |
|
|
final_path = os.path.join(self.workspace_dir, output_filename) |
|
|
generated_image.save(final_path) |
|
|
logger.info(f"Keyframe successfully saved to: {final_path}") |
|
|
return final_path |
|
|
|
|
|
def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None): |
|
|
""" |
|
|
Orchestrates the generation of all keyframes. |
|
|
""" |
|
|
current_base_image_path = initial_ref_path |
|
|
previous_prompt = "N/A (initial reference image)" |
|
|
final_keyframes_gallery = [] |
|
|
width, height = keyframe_resolution, keyframe_resolution |
|
|
target_resolution_tuple = (width, height) |
|
|
|
|
|
num_keyframes_to_generate = len(storyboard) - 1 |
|
|
logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).") |
|
|
|
|
|
for i in range(num_keyframes_to_generate): |
|
|
scene_index = i + 1 |
|
|
current_scene = storyboard[i] |
|
|
future_scene = storyboard[i+1] |
|
|
progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None |
|
|
|
|
|
logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...") |
|
|
|
|
|
|
|
|
logger.info(f" - Step A: Generating with keyframe...") |
|
|
|
|
|
img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( |
|
|
global_prompt=global_prompt, scene_history=previous_prompt, |
|
|
current_scene_desc=current_scene, future_scene_desc=future_scene, |
|
|
last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ltx_context_paths = [] |
|
|
context_paths = [] |
|
|
context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3] |
|
|
|
|
|
ltx_context_paths = list(reversed(context_paths)) |
|
|
logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}") |
|
|
|
|
|
ltx_conditioning_items = [] |
|
|
|
|
|
weight = 0.6 |
|
|
for idx, path in enumerate(ltx_context_paths): |
|
|
img_pil = Image.open(path).convert("RGB") |
|
|
img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple) |
|
|
pixel_tensor = self._pil_to_pixel_tensor(img_processed) |
|
|
latent_tensor = vae_manager_singleton.encode(pixel_tensor) |
|
|
|
|
|
ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight)) |
|
|
|
|
|
if idx >= 0: |
|
|
weight -= 0.1 |
|
|
|
|
|
ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} |
|
|
generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( |
|
|
height=height, width=width, |
|
|
conditioning_items_data=ltx_conditioning_items, |
|
|
motion_prompt=img_prompt, |
|
|
video_total_frames=48, |
|
|
video_fps=24, |
|
|
**ltx_base_params |
|
|
) |
|
|
|
|
|
final_latent = generated_latents[:, :, -1:, :, :] |
|
|
upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent) |
|
|
enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent) |
|
|
|
|
|
ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png") |
|
|
self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path) |
|
|
final_keyframes_gallery.append(ltx_keyframe_path) |
|
|
|
|
|
|
|
|
current_base_image_path = ltx_keyframe_path |
|
|
previous_prompt = img_prompt |
|
|
|
|
|
logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.") |
|
|
return final_keyframes_gallery |
|
|
|
|
|
|
|
|
|
|
|
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: |
|
|
"""Resizes and fits an image to the target resolution for VAE encoding.""" |
|
|
if image.size != target_resolution: |
|
|
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) |
|
|
return image |
|
|
|
|
|
def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: |
|
|
"""Helper to convert PIL to the 5D pixel tensor the VAE expects.""" |
|
|
image_np = np.array(pil_image).astype(np.float32) / 255.0 |
|
|
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) |
|
|
return (tensor * 2.0) - 1.0 |
|
|
|
|
|
def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): |
|
|
"""Helper to save a 1-frame pixel tensor as an image.""" |
|
|
tensor_chw = pixel_tensor.squeeze(0).squeeze(1) |
|
|
tensor_hwc = tensor_chw.permute(1, 2, 0) |
|
|
tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 |
|
|
image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) |
|
|
Image.fromarray(image_np).save(path) |
|
|
|
|
|
|
|
|
try: |
|
|
with open("config.yaml", 'r') as f: |
|
|
config = yaml.safe_load(f) |
|
|
WORKSPACE_DIR = config['application']['workspace_dir'] |
|
|
deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR) |
|
|
except Exception as e: |
|
|
logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True) |
|
|
deformes3d_engine_singleton = None |