Aduc-sdr commited on
Commit
c4396c3
·
verified ·
1 Parent(s): 3d3733f

Update engineers/deformes3D_thinker.py

Browse files
Files changed (1) hide show
  1. engineers/deformes3D_thinker.py +101 -45
engineers/deformes3D_thinker.py CHANGED
@@ -1,77 +1,133 @@
1
- # engineers/deformes3D_thinker.py
2
  #
3
  # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Version: 1.0.0
6
  #
7
- # This file defines the Deformes3DThinker, the tactical cinematic director
8
- # of the ADUC framework. Its sole responsibility is to analyze the immediate
9
- # temporal context (past, present, future keyframes) to generate the optimal
10
- # motion prompt for the video generation engine.
11
 
12
  import logging
13
  from pathlib import Path
14
  from PIL import Image
15
  import gradio as gr
 
16
 
17
- from managers.gemini_manager import gemini_manager_singleton
 
 
 
 
 
 
 
 
 
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
  class Deformes3DThinker:
22
  """
23
- The tactical specialist that handles cinematic decision-making.
 
24
  """
25
- def _read_prompt_template(self, filename: str) -> str:
26
- """Reads a prompt template file from the 'prompts' directory."""
27
- try:
28
- prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
29
- with open(prompts_dir / filename, "r", encoding="utf-8") as f:
30
- return f.read()
31
- except FileNotFoundError:
32
- raise gr.Error(f"Prompt template file not found: prompts/{filename}")
 
33
 
34
- def get_cinematic_decision(self, global_prompt: str, story_history: str,
35
- past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
36
- past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> dict:
37
  """
38
- Acts as a Film Director to make editing decisions and generate motion prompts
39
- by analyzing the past, present, and future visual and narrative context.
40
  """
41
  try:
42
- template = self._read_prompt_template("cinematic_director_prompt.txt")
43
- prompt_text = template.format(
44
- global_prompt=global_prompt,
45
- story_history=story_history,
46
- past_scene_desc=past_scene_desc,
47
- present_scene_desc=present_scene_desc,
48
- future_scene_desc=future_scene_desc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  )
50
 
51
- prompt_parts = [
52
- prompt_text,
53
- "[PAST_IMAGE]:", Image.open(past_keyframe_path),
54
- "[PRESENT_IMAGE]:", Image.open(present_keyframe_path),
55
- "[FUTURE_IMAGE]:", Image.open(future_keyframe_path)
 
56
  ]
57
 
58
- decision_data = gemini_manager_singleton.get_json_object(prompt_parts)
 
 
 
 
 
 
59
 
60
- if "transition_type" not in decision_data or "motion_prompt" not in decision_data:
61
- raise ValueError("AI response (Cinematographer) is malformed. Missing 'transition_type' or 'motion_prompt'.")
 
 
 
 
62
 
63
- # --- LOGGING ADICIONADO ---
64
- logger.info(f"Deformes3DThinker Decision -> Transition: '{decision_data['transition_type']}', Motion Prompt: '{decision_data['motion_prompt']}'")
65
 
66
- return decision_data
 
 
67
  except Exception as e:
68
- logger.error(f"The Film Director (Deformes3D Thinker) failed: {e}. Using fallback.", exc_info=True)
69
  fallback_prompt = f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
70
- logger.info(f"Deformes3DThinker Fallback -> Transition: 'continuous', Motion Prompt: '{fallback_prompt}'")
71
- return {
72
- "transition_type": "continuous",
73
- "motion_prompt": fallback_prompt
74
- }
 
 
 
 
 
75
 
76
  # --- Singleton Instance ---
77
  deformes3d_thinker_singleton = Deformes3DThinker()
 
1
+ # engineers/deformes3d_thinker.py
2
  #
3
  # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Version: 2.0.0
6
  #
7
+ # This version is refactored to use the LTX pipeline's internal prompt enhancement
8
+ # models instead of an external LLM (like Gemini). It acts as a direct interface
9
+ # to the LTX's own "assistant director" for generating cinematic motion prompts.
 
10
 
11
  import logging
12
  from pathlib import Path
13
  from PIL import Image
14
  import gradio as gr
15
+ import torch
16
 
17
+ # Importamos o singleton do LTX para ter acesso à sua pipeline
18
+ from managers.ltx_manager import ltx_manager_singleton
19
+
20
+ # Importamos a lógica de prompt enhancement diretamente do LTX
21
+ # para garantir que usamos exatamente o mesmo processo
22
+ from ltx_video.utils.prompt_enhance_utils import (
23
+ _generate_i2v_prompt,
24
+ _get_first_frames_from_conditioning_item,
25
+ )
26
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem
27
 
28
  logger = logging.getLogger(__name__)
29
 
30
  class Deformes3DThinker:
31
  """
32
+ The tactical specialist that handles cinematic decision-making by leveraging
33
+ the LTX pipeline's internal prompt enhancement capabilities.
34
  """
35
+
36
+ def __init__(self):
37
+ # Acessamos a pipeline do primeiro worker. Assumimos que todos os workers
38
+ # compartilham a mesma configuração de modelos de enhancement.
39
+ if not ltx_manager_singleton or not ltx_manager_singleton.workers:
40
+ raise RuntimeError("LTX Manager and its workers must be initialized before Deformes3DThinker.")
41
+
42
+ self.ltx_pipeline = ltx_manager_singleton.workers[0].pipeline
43
+ logger.info("Deformes3DThinker initialized and linked to LTX pipeline's enhancement models.")
44
 
45
+ def get_enhanced_motion_prompt(self, global_prompt: str, story_history: str,
46
+ past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
47
+ past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> str:
48
  """
49
+ Generates a refined, cinematic motion prompt using the LTX pipeline's own
50
+ image captioning and LLM enhancement models.
51
  """
52
  try:
53
+ # <<< INÍCIO DA LÓGICA DE PROMPT ENHANCEMENT REPLICADA >>>
54
+
55
+ # 1. Verificar se os modelos de enhancement estão disponíveis na pipeline
56
+ if not all([
57
+ self.ltx_pipeline.prompt_enhancer_image_caption_model,
58
+ self.ltx_pipeline.prompt_enhancer_image_caption_processor,
59
+ self.ltx_pipeline.prompt_enhancer_llm_model,
60
+ self.ltx_pipeline.prompt_enhancer_llm_tokenizer
61
+ ]):
62
+ logger.warning("LTX prompt enhancement models not found in the pipeline. Using fallback.")
63
+ return f"A cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
64
+
65
+ # 2. Nosso contexto é sempre Image-to-Video. Usamos a imagem PRESENTE como referência principal.
66
+ present_image = Image.open(present_keyframe_path).convert("RGB")
67
+
68
+ # O `_generate_i2v_prompt` espera uma lista de prompts e uma lista de imagens.
69
+ prompts_list = [present_scene_desc]
70
+ images_list = [present_image]
71
+
72
+ # O sistema do LTX usa um objeto "ConditioningItem", vamos simular isso para a função de caption
73
+ # que espera esse formato.
74
+ conditioning_item = ConditioningItem(
75
+ media_item=torch.stack([torch.tensor(_pil_to_numpy(img)).permute(2, 0, 1) for img in images_list]),
76
+ media_frame_number=0,
77
+ conditioning_strength=1.0
78
+ )
79
+
80
+ # 3. Gerar a caption da imagem de referência (presente)
81
+ image_captions = self.ltx_pipeline.prompt_enhancer_image_caption_processor.batch_decode(
82
+ self.ltx_pipeline.prompt_enhancer_image_caption_model.generate(
83
+ **self.ltx_pipeline.prompt_enhancer_image_caption_processor(
84
+ ["<DETAILED_CAPTION>"] * len(images_list), images_list, return_tensors="pt"
85
+ ).to(self.ltx_pipeline.device)
86
+ ),
87
+ skip_special_tokens=True,
88
  )
89
 
90
+ # 4. Criar o prompt para o LLM de enhancement
91
+ system_prompt = "You are an expert cinematic director... (resumido para clareza)" # I2V_CINEMATIC_PROMPT
92
+ user_content = f"user_prompt: {future_scene_desc}\nimage_caption: {image_captions[0]}"
93
+ messages = [
94
+ {"role": "system", "content": system_prompt},
95
+ {"role": "user", "content": user_content}
96
  ]
97
 
98
+ # 5. Chamar o LLM de enhancement
99
+ text = self.ltx_pipeline.prompt_enhancer_llm_tokenizer.apply_chat_template(
100
+ messages, tokenize=False, add_generation_prompt=True
101
+ )
102
+ model_inputs = self.ltx_pipeline.prompt_enhancer_llm_tokenizer(
103
+ [text], return_tensors="pt"
104
+ ).to(self.ltx_pipeline.device)
105
 
106
+ generated_ids = self.ltx_pipeline.prompt_enhancer_llm_model.generate(
107
+ **model_inputs, max_new_tokens=256
108
+ )
109
+ decoded_prompts = self.ltx_pipeline.prompt_enhancer_llm_tokenizer.batch_decode(
110
+ generated_ids[:, model_inputs.input_ids.shape[1]:], skip_special_tokens=True
111
+ )
112
 
113
+ enhanced_prompt = decoded_prompts[0]
 
114
 
115
+ logger.info(f"Deformes3DThinker (LTX) Decision -> Motion Prompt: '{enhanced_prompt}'")
116
+ return enhanced_prompt.strip()
117
+
118
  except Exception as e:
119
+ logger.error(f"The Film Director (Deformes3D Thinker) failed with LTX models: {e}. Using fallback.", exc_info=True)
120
  fallback_prompt = f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
121
+ logger.info(f"Deformes3DThinker Fallback -> Motion Prompt: '{fallback_prompt}'")
122
+ return fallback_prompt
123
+
124
+ # Função auxiliar para conversão
125
+ def _pil_to_numpy(img: Image.Image):
126
+ return (
127
+ (torch.from_numpy(np.array(img).astype(np.float32) / 255.0))
128
+ .unsqueeze(0)
129
+ .unsqueeze(2)
130
+ )
131
 
132
  # --- Singleton Instance ---
133
  deformes3d_thinker_singleton = Deformes3DThinker()