Aduc_sdr

Paused

App Files Files Community

Aduc-sdr commited on Sep 4

Commit

70c5d9b

verified ·

1 Parent(s): 1444940

Create deformes7D.py

Browse files

Files changed (1) hide show

engineers/deformes7D.py +300 -0

engineers/deformes7D.py ADDED Viewed

	@@ -0,0 +1,300 @@

+# engineers/deformes7D.py
+#
+# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
+# Copyright (C) 4 de Agosto de 2025  Carlos Rodrigues dos Santos
+#
+# Contato:
+# Carlos Rodrigues dos Santos
+# carlex22@gmail.com
+# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
+#
+# Repositórios e Projetos Relacionados:
+# GitHub: https://github.com/carlex22/Aduc-sdr
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License...
+# PENDING PATENT NOTICE: Please see NOTICE.md.
+#
+# Version: 3.0.0
+#
+# This file defines the Deformes7DEngine, the unified production specialist
+# of the ADUC-SDR framework. It merges the capabilities of 3D (causal keyframing)
+# and 4D (video fragment generation) into a single, continuous, and interleaved
+# rendering pipeline. It is the definitive implementation of the ADUC-SDR philosophy.
+import os
+import time
+import imageio
+import numpy as np
+import torch
+import logging
+from PIL import Image, ImageOps
+import gradio as gr
+import subprocess
+import gc
+import shutil
+from pathlib import Path
+from typing import List, Tuple, Generator, Dict, Any
+from aduc_types import LatentConditioningItem
+from managers.ltx_manager import ltx_manager_singleton
+from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
+from managers.vae_manager import vae_manager_singleton
+from engineers.deformes2D_thinker import deformes2d_thinker_singleton
+from managers.seedvr_manager import seedvr_manager_singleton
+from managers.mmaudio_manager import mmaudio_manager_singleton
+from tools.video_encode_tool import video_encode_tool_singleton
+logger = logging.getLogger(__name__)
+class Deformes7DEngine:
+    """
+    Unified 3D/4D engine for continuous, interleaved generation of keyframes and video fragments.
+    """
+    def __init__(self, workspace_dir="deformes_workspace"):
+        self.workspace_dir = workspace_dir
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        logger.info("Deformes7D Unified Engine initialized.")
+        os.makedirs(self.workspace_dir, exist_ok=True)
+    # --- HELPER METHODS (from 3D and 4D engines) ---
+    def _preprocess_image(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
+        """Resizes and fits an image to the target resolution."""
+        if image.size != target_resolution:
+            return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
+        return image
+    def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
+        """Converts PIL to the 5D pixel tensor for VAE encoding."""
+        image_np = np.array(pil_image).astype(np.float32) / 255.0
+        tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
+        return (tensor * 2.0) - 1.0
+    def _save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
+        """Saves a 1-frame pixel tensor as a PNG image."""
+        tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
+        tensor_hwc = tensor_chw.permute(1, 2, 0)
+        tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
+        image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
+        Image.fromarray(image_np).save(path)
+    def _quantize_to_multiple(self, n, m):
+        """Helper to round n to the nearest multiple of m."""
+        if m == 0: return n
+        quantized = int(round(n / m) * m)
+        return m if n > 0 and quantized == 0 else quantized
+    # --- CORE GENERATION LOGIC ---
+    def _generate_next_causal_keyframe(self, base_keyframe_path: str, all_ref_paths: list,
+                                       prompt: str, resolution_tuple: tuple) -> Tuple[str, torch.Tensor]:
+        """
+        Generates the next keyframe in a sequence using the LTX latent evolution method.
+        Returns the path to the saved PNG and its corresponding latent tensor.
+        """
+        ltx_context_paths = [base_keyframe_path] + [p for p in all_ref_paths if p != base_keyframe_path][:3]
+        ltx_conditioning_items = []
+        weight = 1.0
+        for path in ltx_context_paths:
+            img_pil = Image.open(path).convert("RGB")
+            img_processed = self._preprocess_image(img_pil, resolution_tuple)
+            pixel_tensor = self._pil_to_pixel_tensor(img_processed)
+            latent_tensor = vae_manager_singleton.encode(pixel_tensor)
+            ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
+            if weight == 1.0: weight = -0.2
+            else: weight -= 0.2
+        ltx_base_params = {"guidance_scale": 3.0, "stg_scale": 0.1, "num_inference_steps": 25}
+        generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
+            height=resolution_tuple[0], width=resolution_tuple[1],
+            conditioning_items_data=ltx_conditioning_items, motion_prompt=prompt,
+            video_total_frames=48, video_fps=24, **ltx_base_params
+        )
+        final_latent = generated_latents[:, :, -1:, :, :]
+        upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
+        pixel_tensor_out = vae_manager_singleton.decode(upscaled_latent)
+        # Save the new keyframe image
+        timestamp = int(time.time() * 1000)
+        output_path = os.path.join(self.workspace_dir, f"keyframe_{timestamp}.png")
+        self._save_image_from_tensor(pixel_tensor_out, output_path)
+        return output_path, final_latent
+    def generate_full_movie_interleaved(self, initial_ref_paths: list, storyboard: list, global_prompt: str,
+                                        video_resolution: int, seconds_per_fragment: float, trim_percent: int,
+                                        handler_strength: float, dest_strength: float, ltx_params: dict,
+                                        progress=gr.Progress()):
+        """
+        The main interleaved rendering pipeline for Deformes7D.
+        """
+        # --- INITIALIZATION ---
+        logger.info("--- DEFORMES 7D: INITIATING INTERLEAVED RENDERING PIPELINE ---")
+        run_timestamp = int(time.time())
+        temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
+        os.makedirs(temp_video_clips_dir, exist_ok=True)
+        resolution_tuple = (video_resolution, video_resolution)
+        # Lists to store the full sequence of generated artifacts
+        generated_keyframe_paths = []
+        generated_keyframe_latents = []
+        generated_video_fragment_paths = []
+        # --- BOOTSTRAP: Generate first two keyframes to start the pipeline ---
+        progress(0, desc="Bootstrap: Generating K0...")
+        # Keyframe 0 is just the processed initial reference
+        k0_path = initial_ref_paths[0]
+        k0_pil = Image.open(k0_path).convert("RGB")
+        k0_processed_pil = self._preprocess_image(k0_pil, resolution_tuple)
+        k0_pixel_tensor = self._pil_to_pixel_tensor(k0_processed_pil)
+        k0_latent = vae_manager_singleton.encode(k0_pixel_tensor)
+        generated_keyframe_paths.append(k0_path)
+        generated_keyframe_latents.append(k0_latent)
+        progress(0, desc="Bootstrap: Generating K1...")
+        # Generate Keyframe 1 from Keyframe 0
+        prompt_k1 = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
+            global_prompt, "Initial scene.", storyboard[0], storyboard[1], k0_path, initial_ref_paths
+        )
+        k1_path, k1_latent = self._generate_next_causal_keyframe(k0_path, initial_ref_paths, prompt_k1, resolution_tuple)
+        generated_keyframe_paths.append(k1_path)
+        generated_keyframe_latents.append(k1_latent)
+        # --- MAIN RENDERING LOOP ---
+        story_history = ""
+        eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
+        num_transitions = len(storyboard) - 1
+        for i in range(1, num_transitions):
+            progress(i / num_transitions, desc=f"Processing Act {i+1}/{num_transitions}...")
+            # --- 1. Generate the NEXT Keyframe (Look-ahead) ---
+            logger.info(f"--> Step 3D: Generating Keyframe K{i+1}")
+            kx_path = generated_keyframe_paths[i]
+            prompt_ky = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
+                global_prompt, "Continuing sequence...", storyboard[i], storyboard[i+1], kx_path, initial_ref_paths
+            )
+            ky_path, ky_latent = self._generate_next_causal_keyframe(kx_path, initial_ref_paths, prompt_ky, resolution_tuple)
+            generated_keyframe_paths.append(ky_path)
+            generated_keyframe_latents.append(ky_latent)
+            # --- 2. Generate the CURRENT Video Fragment ---
+            logger.info(f"--> Step 4D: Generating Video Fragment V{i}")
+            kb_path = generated_keyframe_paths[i-1] # Past
+            kx_path = generated_keyframe_paths[i]   # Present (Start)
+            ky_path = generated_keyframe_paths[i+1] # Future (End)
+            decision = deformes2d_thinker_singleton.get_cinematic_decision(
+                global_prompt, story_history, kb_path, kx_path, ky_path,
+                storyboard[i-1], storyboard[i], storyboard[i+1]
+            )
+            transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
+            story_history += f"\n- Act {i}: {motion_prompt}"
+            # Prepare conditioning items for the video fragment
+            conditioning_items = []
+            if eco_latent_for_next_loop is None:
+                conditioning_items.append(LatentConditioningItem(generated_keyframe_latents[i], 0, 1.0))
+            else:
+                # This part reuses the logic from the old Deformes4D
+                # ... [Implementation of Eco/Deja-Vu conditioning here] ...
+                # For simplicity in this first draft, we'll use the direct keyframe latent
+                conditioning_items.append(LatentConditioningItem(generated_keyframe_latents[i], 0, 1.0))
+            # Add the destination anchor
+            conditioning_items.append(LatentConditioningItem(ky_latent, -1, dest_strength)) # Use -1 for last frame
+            fragment_latents, _ = ltx_manager_singleton.generate_latent_fragment(
+                height=video_resolution, width=video_resolution,
+                conditioning_items_data=conditioning_items, motion_prompt=motion_prompt,
+                video_total_frames=self._quantize_to_multiple(int(seconds_per_fragment * 24), 8),
+                video_fps=24, **ltx_params
+            )
+            # Post-process and save the video fragment
+            pixel_tensor = vae_manager_singleton.decode(fragment_latents)
+            fragment_path = os.path.join(temp_video_clips_dir, f"fragment_{i}.mp4")
+            self.save_video_from_tensor(pixel_tensor, fragment_path, fps=24)
+            generated_video_fragment_paths.append(fragment_path)
+            logger.info(f"Video Fragment V{i} saved to {fragment_path}")
+            # Here you would also extract the Eco and Deja-Vu from `fragment_latents` for the next loop
+            # ...
+        # --- FINAL ASSEMBLY ---
+        logger.info("--- Final Assembly of Video Fragments ---")
+        final_video_path = os.path.join(self.workspace_dir, f"movie_7D_{run_timestamp}.mp4")
+        video_encode_tool_singleton.concatenate_videos(
+            video_paths=generated_video_fragment_paths,
+            output_path=final_video_path,
+            workspace_dir=self.workspace_dir
+        )
+        shutil.rmtree(temp_video_clips_dir)
+        logger.info(f"Full movie generated at: {final_video_path}")
+        # This function would then return the path and other artifacts for post-production
+        return {"final_path": final_video_path, "all_keyframes": generated_keyframe_paths}
+    # --- POST-PRODUCTION METHODS (migrated from Deformes4D) ---
+    def upscale_video(self, source_video_path: str, progress=gr.Progress()):
+        # This would be a more complex function that loads the video in chunks,
+        # encodes to latents, upscales, decodes, and reassembles.
+        # For this example, we assume it's a placeholder.
+        logger.info(f"Placeholder for upscaling video: {source_video_path}")
+        return source_video_path
+    def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress=gr.Progress()):
+        logger.info(f"--- POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
+        progress(0.1, desc=f"Preparing for HD Mastering...")
+        run_timestamp = int(time.time())
+        output_path = os.path.join(self.workspace_dir, f"{Path(source_video_path).stem}_hd.mp4")
+        try:
+            final_path = seedvr_manager_singleton.process_video(
+                input_video_path=source_video_path, output_video_path=output_path,
+                prompt=prompt, model_version=model_version, steps=steps, progress=progress
+            )
+            logger.info(f"HD Mastering complete! Final video at: {final_path}")
+            yield {"final_path": final_path}
+        except Exception as e:
+            logger.error(f"HD Mastering failed: {e}", exc_info=True)
+            raise gr.Error(f"HD Mastering failed. Details: {e}")
+    def generate_audio(self, source_video_path: str, audio_prompt: str, progress=gr.Progress()):
+        logger.info(f"--- POST-PRODUCTION: Audio Generation ---")
+        progress(0.1, desc="Preparing for audio generation...")
+        run_timestamp = int(time.time())
+        output_path = os.path.join(self.workspace_dir, f"{Path(source_video_path).stem}_audio.mp4")
+        try:
+            result = subprocess.run(
+                ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
+                capture_output=True, text=True, check=True)
+            duration = float(result.stdout.strip())
+            progress(0.5, desc="Generating audio track...")
+            final_path = mmaudio_manager_singleton.generate_audio_for_video(
+                video_path=source_video_path, prompt=audio_prompt,
+                duration_seconds=duration, output_path_override=output_path
+            )
+            logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
+            yield {"final_path": final_path}
+        except Exception as e:
+            logger.error(f"Audio generation failed: {e}", exc_info=True)
+            raise gr.Error(f"Audio generation failed. Details: {e}")