Aduc_sdr

Paused

App Files Files Community

Aduc-sdr commited on Sep 4

Commit

c4396c3

verified ·

1 Parent(s): 3d3733f

Update engineers/deformes3D_thinker.py

Browse files

Files changed (1) hide show

engineers/deformes3D_thinker.py +101 -45

engineers/deformes3D_thinker.py CHANGED Viewed

@@ -1,77 +1,133 @@
-# engineers/deformes3D_thinker.py
 #
 # Copyright (C) 2025 Carlos Rodrigues dos Santos
 #
-# Version: 1.0.0
 #
-# This file defines the Deformes3DThinker, the tactical cinematic director
-# of the ADUC framework. Its sole responsibility is to analyze the immediate
-# temporal context (past, present, future keyframes) to generate the optimal
-# motion prompt for the video generation engine.
 import logging
 from pathlib import Path
 from PIL import Image
 import gradio as gr
-from managers.gemini_manager import gemini_manager_singleton
 logger = logging.getLogger(__name__)
 class Deformes3DThinker:
     """
-    The tactical specialist that handles cinematic decision-making.
     """
-    def _read_prompt_template(self, filename: str) -> str:
-        """Reads a prompt template file from the 'prompts' directory."""
-        try:
-            prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
-            with open(prompts_dir / filename, "r", encoding="utf-8") as f:
-                return f.read()
-        except FileNotFoundError:
-            raise gr.Error(f"Prompt template file not found: prompts/{filename}")
-    def get_cinematic_decision(self, global_prompt: str, story_history: str,
-                               past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
-                               past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> dict:
         """
-        Acts as a Film Director to make editing decisions and generate motion prompts
-        by analyzing the past, present, and future visual and narrative context.
         """
         try:
-            template = self._read_prompt_template("cinematic_director_prompt.txt")
-            prompt_text = template.format(
-                global_prompt=global_prompt,
-                story_history=story_history,
-                past_scene_desc=past_scene_desc,
-                present_scene_desc=present_scene_desc,
-                future_scene_desc=future_scene_desc
             )
-            prompt_parts = [
-                prompt_text,
-                "[PAST_IMAGE]:", Image.open(past_keyframe_path),
-                "[PRESENT_IMAGE]:", Image.open(present_keyframe_path),
-                "[FUTURE_IMAGE]:", Image.open(future_keyframe_path)
             ]
-            decision_data = gemini_manager_singleton.get_json_object(prompt_parts)
-            if "transition_type" not in decision_data or "motion_prompt" not in decision_data:
-                raise ValueError("AI response (Cinematographer) is malformed. Missing 'transition_type' or 'motion_prompt'.")
-            # --- LOGGING ADICIONADO ---
-            logger.info(f"Deformes3DThinker Decision -> Transition: '{decision_data['transition_type']}', Motion Prompt: '{decision_data['motion_prompt']}'")
-            return decision_data
         except Exception as e:
-            logger.error(f"The Film Director (Deformes3D Thinker) failed: {e}. Using fallback.", exc_info=True)
             fallback_prompt = f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
-            logger.info(f"Deformes3DThinker Fallback -> Transition: 'continuous', Motion Prompt: '{fallback_prompt}'")
-            return {
-                "transition_type": "continuous",
-                "motion_prompt": fallback_prompt
-            }
 # --- Singleton Instance ---
 deformes3d_thinker_singleton = Deformes3DThinker()

+# engineers/deformes3d_thinker.py
 #
 # Copyright (C) 2025 Carlos Rodrigues dos Santos
 #
+# Version: 2.0.0
 #
+# This version is refactored to use the LTX pipeline's internal prompt enhancement
+# models instead of an external LLM (like Gemini). It acts as a direct interface
+# to the LTX's own "assistant director" for generating cinematic motion prompts.
 import logging
 from pathlib import Path
 from PIL import Image
 import gradio as gr
+import torch
+# Importamos o singleton do LTX para ter acesso à sua pipeline
+from managers.ltx_manager import ltx_manager_singleton
+# Importamos a lógica de prompt enhancement diretamente do LTX
+# para garantir que usamos exatamente o mesmo processo
+from ltx_video.utils.prompt_enhance_utils import (
+    _generate_i2v_prompt,
+    _get_first_frames_from_conditioning_item,
+)
+from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem
 logger = logging.getLogger(__name__)
 class Deformes3DThinker:
     """
+    The tactical specialist that handles cinematic decision-making by leveraging
+    the LTX pipeline's internal prompt enhancement capabilities.
     """
+    def __init__(self):
+        # Acessamos a pipeline do primeiro worker. Assumimos que todos os workers
+        # compartilham a mesma configuração de modelos de enhancement.
+        if not ltx_manager_singleton or not ltx_manager_singleton.workers:
+            raise RuntimeError("LTX Manager and its workers must be initialized before Deformes3DThinker.")
+        self.ltx_pipeline = ltx_manager_singleton.workers[0].pipeline
+        logger.info("Deformes3DThinker initialized and linked to LTX pipeline's enhancement models.")
+    def get_enhanced_motion_prompt(self, global_prompt: str, story_history: str,
+                                   past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
+                                   past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> str:
         """
+        Generates a refined, cinematic motion prompt using the LTX pipeline's own
+        image captioning and LLM enhancement models.
         """
         try:
+            # <<< INÍCIO DA LÓGICA DE PROMPT ENHANCEMENT REPLICADA >>>
+            # 1. Verificar se os modelos de enhancement estão disponíveis na pipeline
+            if not all([
+                self.ltx_pipeline.prompt_enhancer_image_caption_model,
+                self.ltx_pipeline.prompt_enhancer_image_caption_processor,
+                self.ltx_pipeline.prompt_enhancer_llm_model,
+                self.ltx_pipeline.prompt_enhancer_llm_tokenizer
+            ]):
+                logger.warning("LTX prompt enhancement models not found in the pipeline. Using fallback.")
+                return f"A cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
+            # 2. Nosso contexto é sempre Image-to-Video. Usamos a imagem PRESENTE como referência principal.
+            present_image = Image.open(present_keyframe_path).convert("RGB")
+            # O `_generate_i2v_prompt` espera uma lista de prompts e uma lista de imagens.
+            prompts_list = [present_scene_desc]
+            images_list = [present_image]
+            # O sistema do LTX usa um objeto "ConditioningItem", vamos simular isso para a função de caption
+            # que espera esse formato.
+            conditioning_item = ConditioningItem(
+                media_item=torch.stack([torch.tensor(_pil_to_numpy(img)).permute(2, 0, 1) for img in images_list]),
+                media_frame_number=0,
+                conditioning_strength=1.0
+            )
+            # 3. Gerar a caption da imagem de referência (presente)
+            image_captions = self.ltx_pipeline.prompt_enhancer_image_caption_processor.batch_decode(
+                self.ltx_pipeline.prompt_enhancer_image_caption_model.generate(
+                    **self.ltx_pipeline.prompt_enhancer_image_caption_processor(
+                        ["<DETAILED_CAPTION>"] * len(images_list), images_list, return_tensors="pt"
+                    ).to(self.ltx_pipeline.device)
+                ),
+                skip_special_tokens=True,
             )
+            # 4. Criar o prompt para o LLM de enhancement
+            system_prompt = "You are an expert cinematic director... (resumido para clareza)" # I2V_CINEMATIC_PROMPT
+            user_content = f"user_prompt: {future_scene_desc}\nimage_caption: {image_captions[0]}"
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_content}
             ]
+            # 5. Chamar o LLM de enhancement
+            text = self.ltx_pipeline.prompt_enhancer_llm_tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+            model_inputs = self.ltx_pipeline.prompt_enhancer_llm_tokenizer(
+                [text], return_tensors="pt"
+            ).to(self.ltx_pipeline.device)
+            generated_ids = self.ltx_pipeline.prompt_enhancer_llm_model.generate(
+                **model_inputs, max_new_tokens=256
+            )
+            decoded_prompts = self.ltx_pipeline.prompt_enhancer_llm_tokenizer.batch_decode(
+                generated_ids[:, model_inputs.input_ids.shape[1]:], skip_special_tokens=True
+            )
+            enhanced_prompt = decoded_prompts[0]
+            logger.info(f"Deformes3DThinker (LTX) Decision -> Motion Prompt: '{enhanced_prompt}'")
+            return enhanced_prompt.strip()
         except Exception as e:
+            logger.error(f"The Film Director (Deformes3D Thinker) failed with LTX models: {e}. Using fallback.", exc_info=True)
             fallback_prompt = f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
+            logger.info(f"Deformes3DThinker Fallback -> Motion Prompt: '{fallback_prompt}'")
+            return fallback_prompt
+# Função auxiliar para conversão
+def _pil_to_numpy(img: Image.Image):
+    return (
+        (torch.from_numpy(np.array(img).astype(np.float32) / 255.0))
+        .unsqueeze(0)
+        .unsqueeze(2)
+    )
 # --- Singleton Instance ---
 deformes3d_thinker_singleton = Deformes3DThinker()