Carlexxx commited on
Commit
ac82132
·
1 Parent(s): 1267261

feat: Implement self-contained specialist managers

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. aduc_framework/managers/flux_kontext_manager.py +1 -1
  2. aduc_framework/managers/latent_enhancer_manager.py +1 -1
  3. aduc_framework/managers/ltx_manager.py +99 -116
  4. aduc_framework/managers/ltx_pipeline_utils.py +8 -8
  5. aduc_framework/managers/upscaler_specialist.py +1 -1
  6. aduc_framework/managers/vae_manager.py +2 -2
  7. aduc_framework/types.py +23 -14
  8. app.py +35 -29
  9. engineers/LICENSE +0 -23
  10. engineers/NOTICE.md +0 -76
  11. engineers/README.md +0 -211
  12. engineers/__init__.py +0 -0
  13. engineers/deformes2D_thinker.py +0 -171
  14. engineers/deformes3D.py +0 -193
  15. engineers/deformes3D_thinker.py +0 -136
  16. engineers/deformes4D.py +0 -338
  17. engineers/deformes7D.py +0 -316
  18. managers/LICENSE +0 -25
  19. managers/LICENSE.txt +0 -201
  20. managers/NOTICE.md +0 -60
  21. managers/README.md +0 -156
  22. managers/__init__.py +0 -0
  23. managers/config.yaml +0 -24
  24. managers/flux_kontext_manager.py +0 -165
  25. managers/gemini_manager.py +0 -119
  26. managers/latent_enhancer_manager.py +0 -109
  27. managers/ltx_manager.py +0 -320
  28. managers/ltx_pipeline_utils.py +0 -774
  29. managers/mmaudio_manager.py +0 -208
  30. managers/seedvr_manager.py +0 -233
  31. managers/upscaler_specialist.py +0 -91
  32. managers/vae_manager.py +0 -99
  33. prompts/LICENSE +0 -25
  34. prompts/NOTICE.md +0 -76
  35. prompts/README.md +0 -211
  36. prompts/anticipatory_keyframe_prompt.txt +0 -29
  37. prompts/audio_director_prompt.txt +0 -18
  38. prompts/cinematic_director_prompt.txt +0 -27
  39. prompts/director_composition_prompt.txt +0 -27
  40. prompts/flux_composition_wrapper_prompt.txt +0 -1
  41. prompts/initial_motion_prompt.txt +0 -20
  42. prompts/keyframe_selection_prompt.txt +0 -20
  43. prompts/sound_director_prompt.txt +0 -27
  44. prompts/sound_director_prompt.txt.txt +0 -27
  45. prompts/transition_decision_prompt.txt +0 -27
  46. prompts/unified_cinematographer_prompt.txt +0 -47
  47. prompts/unified_storyboard_prompt.txt +0 -19
  48. tools/LICENSE +0 -25
  49. tools/NOTICE.md +0 -76
  50. tools/README.md +0 -211
aduc_framework/managers/flux_kontext_manager.py CHANGED
@@ -25,7 +25,7 @@ import threading
25
  import yaml
26
  import logging
27
 
28
- from tools.hardware_manager import hardware_manager
29
 
30
  logger = logging.getLogger(__name__)
31
 
 
25
  import yaml
26
  import logging
27
 
28
+ from ..tools.hardware_manager import hardware_manager
29
 
30
  logger = logging.getLogger(__name__)
31
 
aduc_framework/managers/latent_enhancer_manager.py CHANGED
@@ -19,7 +19,7 @@ import torch
19
  import logging
20
  import time
21
  from diffusers import LTXLatentUpsamplePipeline
22
- from managers.ltx_manager import ltx_manager_singleton
23
 
24
  logger = logging.getLogger(__name__)
25
 
 
19
  import logging
20
  import time
21
  from diffusers import LTXLatentUpsamplePipeline
22
+ from ..managers.ltx_manager import ltx_manager_singleton
23
 
24
  logger = logging.getLogger(__name__)
25
 
aduc_framework/managers/ltx_manager.py CHANGED
@@ -1,20 +1,13 @@
1
- # managers/ltx_manager.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
  #
10
- # PENDING PATENT NOTICE: Please see NOTICE.md.
11
  #
12
- # Version: 2.3.0
13
- #
14
- # This version adds a public property `prompt_enhancement_pipeline` to the manager.
15
- # This allows other specialists, specifically the Deformes3DThinker, to access
16
- # the internal prompt refinement models (captioning and LLM) used by the LTX pipeline,
17
- # ensuring stylistic and logical consistency.
18
 
19
  import torch
20
  import gc
@@ -29,18 +22,21 @@ import subprocess
29
  from pathlib import Path
30
  from typing import Optional, List, Tuple, Union
31
 
32
- from tools.optimization import optimize_ltx_worker, can_optimize_fp8
33
- from tools.hardware_manager import hardware_manager
34
- from aduc_types import LatentConditioningItem
 
 
 
35
 
36
  logger = logging.getLogger(__name__)
37
 
38
- # --- Dependency Management ---
39
  DEPS_DIR = Path("./deps")
40
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
41
  LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
42
 
43
- # --- Placeholders for lazy-loaded modules ---
44
  create_ltx_video_pipeline = None
45
  calculate_padding = None
46
  LTXVideoPipeline = None
@@ -54,8 +50,8 @@ class LtxPoolManager:
54
  """
55
  Manages a pool of LtxWorkers and exposes the enhancement pipeline for other specialists.
56
  """
57
- def __init__(self, device_ids, ltx_config_file_name):
58
- logger.info(f"LTX POOL MANAGER: Creating workers for devices: {device_ids}")
59
  self._ltx_modules_loaded = False
60
  self._setup_dependencies()
61
  self._lazy_load_ltx_modules()
@@ -66,47 +62,43 @@ class LtxPoolManager:
66
  self.current_worker_index = 0
67
  self.lock = threading.Lock()
68
 
69
- # <--- NOVA PROPRIEDADE PARA O DEFORMES3DTHINKER USAR --->
70
- # Expõe a pipeline do primeiro worker. Assumimos que todas são configuradas
71
- # da mesma forma e contêm os mesmos modelos de enhancement.
72
  self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
73
  if self.prompt_enhancement_pipeline:
74
- logger.info("LTX POOL MANAGER: Prompt enhancement pipeline exposed for other specialists.")
75
- # <--- FIM DA NOVA PROPRIEDADE --->
76
 
77
  self._apply_ltx_pipeline_patches()
78
 
79
  if all(w.device.type == 'cuda' for w in self.workers):
80
- logger.info("LTX POOL MANAGER: HOT START MODE ENABLED. Pre-warming all GPUs...")
81
  for worker in self.workers:
82
  worker.to_gpu()
83
- logger.info("LTX POOL MANAGER: All GPUs are hot and ready.")
84
  else:
85
- logger.info("LTX POOL MANAGER: Operating in CPU or mixed mode. GPU pre-warming skipped.")
86
-
87
- # ... (O resto da classe LtxPoolManager, como _setup_dependencies, generate_latent_fragment, etc., permanece exatamente o mesmo) ...
88
 
89
  def _setup_dependencies(self):
90
  """Clones the LTX-Video repo if not found and adds it to the system path."""
91
  if not LTX_VIDEO_REPO_DIR.exists():
92
- logger.info(f"LTX-Video repository not found at '{LTX_VIDEO_REPO_DIR}'. Cloning from GitHub...")
93
  try:
94
  DEPS_DIR.mkdir(exist_ok=True)
95
  subprocess.run(
96
- ["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
97
  check=True, capture_output=True, text=True
98
  )
99
- logger.info("LTX-Video repository cloned successfully.")
100
  except subprocess.CalledProcessError as e:
101
- logger.error(f"Failed to clone LTX-Video repository. Git stderr: {e.stderr}")
102
- raise RuntimeError("Could not clone the required LTX-Video dependency from GitHub.")
103
  else:
104
- logger.info("Found local LTX-Video repository.")
105
 
106
  if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
107
  sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
108
- logger.info(f"Added '{LTX_VIDEO_REPO_DIR.resolve()}' to sys.path.")
109
-
110
  def _lazy_load_ltx_modules(self):
111
  """Dynamically imports LTX-Video modules after ensuring the repo exists."""
112
  if self._ltx_modules_loaded:
@@ -115,22 +107,22 @@ class LtxPoolManager:
115
  global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
116
  global vae_encode, latent_to_pixel_coords, randn_tensor
117
 
118
- from managers.ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
119
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
120
  from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
121
  from diffusers.utils.torch_utils import randn_tensor
122
 
123
  self._ltx_modules_loaded = True
124
- logger.info("LTX-Video modules have been dynamically loaded.")
125
 
126
  def _apply_ltx_pipeline_patches(self):
127
  """Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
128
- logger.info("LTX POOL MANAGER: Applying ADUC-SDR patches to LTX pipeline...")
129
  for worker in self.workers:
130
  worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
131
- logger.info("LTX POOL MANAGER: All pipeline instances have been patched successfully.")
132
 
133
- def _get_next_worker(self):
134
  with self.lock:
135
  worker = self.workers[self.current_worker_index]
136
  self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
@@ -152,62 +144,66 @@ class LtxPoolManager:
152
  if 'strength' in kwargs:
153
  pipeline_params["strength"] = kwargs['strength']
154
  if 'conditioning_items_data' in kwargs:
155
- final_conditioning_items = []
156
- for item in kwargs['conditioning_items_data']:
157
- item.latent_tensor = item.latent_tensor.to(worker.device)
158
- final_conditioning_items.append(item)
159
- pipeline_params["conditioning_items"] = final_conditioning_items
160
  if worker.is_distilled:
161
- logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.")
162
  fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
163
- pipeline_params["timesteps"] = fixed_timesteps
164
  if fixed_timesteps:
 
165
  pipeline_params["num_inference_steps"] = len(fixed_timesteps)
 
 
 
 
 
 
166
  return pipeline_params
167
 
168
- def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
169
  worker_to_use = self._get_next_worker()
170
  try:
171
  height, width = kwargs['height'], kwargs['width']
172
  padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
173
  padding_vals = calculate_padding(height, width, padded_h, padded_w)
174
  kwargs['height'], kwargs['width'] = padded_h, padded_w
 
175
  pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
176
- logger.info(f"Initiating GENERATION on {worker_to_use.device} with shape {padded_w}x{padded_h}")
 
 
177
  if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
178
  result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
179
  else:
180
  result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
181
  return result, padding_vals
182
  except Exception as e:
183
- logger.error(f"LTX POOL MANAGER: Error during generation on {worker_to_use.device}: {e}", exc_info=True)
184
  raise e
185
  finally:
186
  if worker_to_use and worker_to_use.device.type == 'cuda':
187
  with torch.cuda.device(worker_to_use.device):
188
- gc.collect(); torch.cuda.empty_cache()
 
189
 
190
- def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
191
- pass
192
 
193
- # ... (O resto do arquivo: LtxWorker, _aduc_prepare_conditioning_patch, Singleton Instantiation, etc. permanece idêntico) ...
194
  class LtxWorker:
195
- """
196
- Represents a single instance of the LTX-Video pipeline on a specific device.
197
- """
198
  def __init__(self, device_id, ltx_config_file):
199
  self.cpu_device = torch.device('cpu')
200
  self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
201
- logger.info(f"LTX Worker ({self.device}): Initializing with config '{ltx_config_file}'...")
202
 
203
  with open(ltx_config_file, "r") as file:
204
  self.config = yaml.safe_load(file)
205
 
206
  self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
207
-
208
  models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
209
 
210
- logger.info(f"LTX Worker ({self.device}): Preparing to load model...")
211
  model_filename = self.config["checkpoint_path"]
212
  model_path = huggingface_hub.hf_hub_download(
213
  repo_id="Lightricks/LTX-Video", filename=model_filename,
@@ -219,24 +215,22 @@ class LtxWorker:
219
  precision=self.config["precision"],
220
  text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
221
  sampler=self.config["sampler"],
222
- device='cpu'
223
  )
224
- logger.info(f"LTX Worker ({self.device}): Model ready on CPU. Is distilled model? {self.is_distilled}")
225
 
226
  def to_gpu(self):
227
  if self.device.type == 'cpu': return
228
- logger.info(f"LTX Worker: Moving pipeline to GPU {self.device}...")
229
  self.pipeline.to(self.device)
230
  if self.device.type == 'cuda' and can_optimize_fp8():
231
- logger.info(f"LTX Worker ({self.device}): FP8 supported GPU detected. Optimizing...")
232
  optimize_ltx_worker(self)
233
- logger.info(f"LTX Worker ({self.device}): Optimization complete.")
234
- elif self.device.type == 'cuda':
235
- logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.")
236
-
237
  def to_cpu(self):
238
  if self.device.type == 'cpu': return
239
- logger.info(f"LTX Worker: Unloading pipeline from GPU {self.device}...")
240
  self.pipeline.to('cpu')
241
  gc.collect()
242
  if torch.cuda.is_available(): torch.cuda.empty_cache()
@@ -244,10 +238,9 @@ class LtxWorker:
244
  def generate_video_fragment_internal(self, **kwargs):
245
  return self.pipeline(**kwargs).images
246
 
247
-
248
  def _aduc_prepare_conditioning_patch(
249
- self: LTXVideoPipeline,
250
- conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]],
251
  init_latents: torch.Tensor,
252
  num_frames: int,
253
  height: int,
@@ -259,62 +252,52 @@ def _aduc_prepare_conditioning_patch(
259
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
260
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
261
  return init_latents, init_pixel_coords, None, 0
262
- init_conditioning_mask = torch.zeros(init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device)
 
263
  extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
264
  extra_conditioning_num_latents = 0
265
- is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor')
266
- if is_latent_mode:
267
- for item in conditioning_items:
268
- media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
269
- media_frame_number, strength = item.media_frame_number, item.conditioning_strength
270
- if media_frame_number == 0:
271
- f_l, h_l, w_l = media_item_latents.shape[-3:]
272
- init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength)
273
- init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength
274
- else:
275
- noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
276
- media_item_latents = torch.lerp(noise, media_item_latents, strength)
277
- patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
278
- pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
279
- pixel_coords[:, 0] += media_frame_number
280
- extra_conditioning_num_latents += patched_latents.shape[1]
281
- new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
282
- extra_conditioning_latents.append(patched_latents)
283
- extra_conditioning_pixel_coords.append(pixel_coords)
284
- extra_conditioning_mask.append(new_mask)
285
- else:
286
- for item in conditioning_items:
287
- if not isinstance(item, ConditioningItem): continue
288
- item = self._resize_conditioning_item(item, height, width)
289
- media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype)
290
- if item.media_frame_number == 0:
291
- media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True)
292
- f_l, h_l, w_l = media_item_latents.shape[-3:]
293
- init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength)
294
- init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength
295
- else:
296
- logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.")
297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
299
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
300
  init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
301
  init_conditioning_mask = init_conditioning_mask.squeeze(-1)
 
302
  if extra_conditioning_latents:
303
  init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
304
  init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
305
  init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
306
- if self.transformer.use_tpu_flash_attention:
307
- init_latents = init_latents[:, :-extra_conditioning_num_latents]
308
- init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents]
309
- init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents]
310
  return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
311
 
312
-
313
- # --- Singleton Instantiation ---
314
  with open("config.yaml", 'r') as f:
315
  config = yaml.safe_load(f)
316
  ltx_gpus_required = config['specialists']['ltx']['gpus_required']
317
  ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
318
  ltx_config_filename = config['specialists']['ltx']['config_file']
319
  ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
320
- logger.info("Video Specialist (LTX) ready.")
 
1
+ # aduc_framework/managers/ltx_manager.py
 
 
2
  #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
 
 
 
4
  #
5
+ # Versão 2.3.1 (Framework-Compliant)
6
  #
7
+ # Este manager é responsável por controlar a pipeline LTX-Video. Ele gerencia
8
+ # um pool de workers para otimizar o uso de múltiplas GPUs, lida com a inicialização
9
+ # e o setup de dependências complexas, e expõe uma interface de alto nível para a
10
+ # geração de fragmentos de vídeo no espaço latente.
 
 
11
 
12
  import torch
13
  import gc
 
22
  from pathlib import Path
23
  from typing import Optional, List, Tuple, Union
24
 
25
+ # --- CORREÇÃO DE IMPORTAÇÃO ---
26
+ # O manager agora importa os tipos de seu próprio pacote "pai" e
27
+ # as ferramentas de um pacote "irmão".
28
+ from ..types import LatentConditioningItem
29
+ from ..tools.optimization import optimize_ltx_worker, can_optimize_fp8
30
+ from ..tools.hardware_manager import hardware_manager
31
 
32
  logger = logging.getLogger(__name__)
33
 
34
+ # --- Gerenciamento de Dependências ---
35
  DEPS_DIR = Path("./deps")
36
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
37
  LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
38
 
39
+ # --- Placeholders para módulos importados tardiamente (lazy-loaded) ---
40
  create_ltx_video_pipeline = None
41
  calculate_padding = None
42
  LTXVideoPipeline = None
 
50
  """
51
  Manages a pool of LtxWorkers and exposes the enhancement pipeline for other specialists.
52
  """
53
+ def __init__(self, device_ids: List[str], ltx_config_file_name: str):
54
+ logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
55
  self._ltx_modules_loaded = False
56
  self._setup_dependencies()
57
  self._lazy_load_ltx_modules()
 
62
  self.current_worker_index = 0
63
  self.lock = threading.Lock()
64
 
65
+ # Expõe a pipeline do primeiro worker para que outros especialistas (como o Deformes3DThinker)
66
+ # possam acessar os modelos de aprimoramento de prompt.
 
67
  self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
68
  if self.prompt_enhancement_pipeline:
69
+ logger.info("LTX POOL MANAGER: Pipeline de aprimoramento de prompt exposta para outros especialistas.")
 
70
 
71
  self._apply_ltx_pipeline_patches()
72
 
73
  if all(w.device.type == 'cuda' for w in self.workers):
74
+ logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...")
75
  for worker in self.workers:
76
  worker.to_gpu()
77
+ logger.info("LTX POOL MANAGER: Todas as GPUs estão prontas.")
78
  else:
79
+ logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. Pré-aquecimento de GPU pulado.")
 
 
80
 
81
  def _setup_dependencies(self):
82
  """Clones the LTX-Video repo if not found and adds it to the system path."""
83
  if not LTX_VIDEO_REPO_DIR.exists():
84
+ logger.info(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Clonando do GitHub...")
85
  try:
86
  DEPS_DIR.mkdir(exist_ok=True)
87
  subprocess.run(
88
+ ["git", "clone", "--depth", "1", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
89
  check=True, capture_output=True, text=True
90
  )
91
+ logger.info("Repositório LTX-Video clonado com sucesso.")
92
  except subprocess.CalledProcessError as e:
93
+ logger.error(f"Falha ao clonar o repositório LTX-Video. Git stderr: {e.stderr}")
94
+ raise RuntimeError("Não foi possível clonar a dependência LTX-Video do GitHub.")
95
  else:
96
+ logger.info("Repositório LTX-Video local encontrado.")
97
 
98
  if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
99
  sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
100
+ logger.info(f"Adicionado '{LTX_VIDEO_REPO_DIR.resolve()}' ao sys.path.")
101
+
102
  def _lazy_load_ltx_modules(self):
103
  """Dynamically imports LTX-Video modules after ensuring the repo exists."""
104
  if self._ltx_modules_loaded:
 
107
  global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
108
  global vae_encode, latent_to_pixel_coords, randn_tensor
109
 
110
+ from .ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
111
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
112
  from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
113
  from diffusers.utils.torch_utils import randn_tensor
114
 
115
  self._ltx_modules_loaded = True
116
+ logger.info("Módulos do LTX-Video foram carregados dinamicamente.")
117
 
118
  def _apply_ltx_pipeline_patches(self):
119
  """Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
120
+ logger.info("LTX POOL MANAGER: Aplicando patches ADUC-SDR na pipeline LTX...")
121
  for worker in self.workers:
122
  worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
123
+ logger.info("LTX POOL MANAGER: Todas as instâncias da pipeline foram corrigidas com sucesso.")
124
 
125
+ def _get_next_worker(self) -> 'LtxWorker':
126
  with self.lock:
127
  worker = self.workers[self.current_worker_index]
128
  self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
 
144
  if 'strength' in kwargs:
145
  pipeline_params["strength"] = kwargs['strength']
146
  if 'conditioning_items_data' in kwargs:
147
+ pipeline_params["conditioning_items"] = [
148
+ item._replace(latent_tensor=item.latent_tensor.to(worker.device))
149
+ for item in kwargs['conditioning_items_data']
150
+ ]
 
151
  if worker.is_distilled:
 
152
  fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
 
153
  if fixed_timesteps:
154
+ pipeline_params["timesteps"] = fixed_timesteps
155
  pipeline_params["num_inference_steps"] = len(fixed_timesteps)
156
+
157
+ callback = kwargs.get('callback')
158
+ if callback:
159
+ pipeline_params["callback_on_step_end"] = callback
160
+ pipeline_params["callback_on_step_end_tensor_inputs"] = ["latents"]
161
+
162
  return pipeline_params
163
 
164
+ def generate_latent_fragment(self, **kwargs) -> Tuple[torch.Tensor, tuple]:
165
  worker_to_use = self._get_next_worker()
166
  try:
167
  height, width = kwargs['height'], kwargs['width']
168
  padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
169
  padding_vals = calculate_padding(height, width, padded_h, padded_w)
170
  kwargs['height'], kwargs['width'] = padded_h, padded_w
171
+
172
  pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
173
+
174
+ logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}")
175
+
176
  if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
177
  result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
178
  else:
179
  result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
180
  return result, padding_vals
181
  except Exception as e:
182
+ logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True)
183
  raise e
184
  finally:
185
  if worker_to_use and worker_to_use.device.type == 'cuda':
186
  with torch.cuda.device(worker_to_use.device):
187
+ gc.collect()
188
+ torch.cuda.empty_cache()
189
 
190
+ def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, tuple]:
191
+ pass # Placeholder
192
 
 
193
  class LtxWorker:
194
+ """Represents a single instance of the LTX-Video pipeline on a specific device."""
 
 
195
  def __init__(self, device_id, ltx_config_file):
196
  self.cpu_device = torch.device('cpu')
197
  self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
198
+ logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...")
199
 
200
  with open(ltx_config_file, "r") as file:
201
  self.config = yaml.safe_load(file)
202
 
203
  self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
 
204
  models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
205
 
206
+ logger.info(f"LTX Worker ({self.device}): Preparando para carregar modelo...")
207
  model_filename = self.config["checkpoint_path"]
208
  model_path = huggingface_hub.hf_hub_download(
209
  repo_id="Lightricks/LTX-Video", filename=model_filename,
 
215
  precision=self.config["precision"],
216
  text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
217
  sampler=self.config["sampler"],
218
+ device='cpu' # Sempre carrega na CPU primeiro
219
  )
220
+ logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo distilled? {self.is_distilled}")
221
 
222
  def to_gpu(self):
223
  if self.device.type == 'cpu': return
224
+ logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
225
  self.pipeline.to(self.device)
226
  if self.device.type == 'cuda' and can_optimize_fp8():
227
+ logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Otimizando...")
228
  optimize_ltx_worker(self)
229
+ logger.info(f"LTX Worker ({self.device}): Otimização completa.")
230
+
 
 
231
  def to_cpu(self):
232
  if self.device.type == 'cpu': return
233
+ logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
234
  self.pipeline.to('cpu')
235
  gc.collect()
236
  if torch.cuda.is_available(): torch.cuda.empty_cache()
 
238
  def generate_video_fragment_internal(self, **kwargs):
239
  return self.pipeline(**kwargs).images
240
 
 
241
  def _aduc_prepare_conditioning_patch(
242
+ self: "LTXVideoPipeline",
243
+ conditioning_items: Optional[List[Union["ConditioningItem", "LatentConditioningItem"]]],
244
  init_latents: torch.Tensor,
245
  num_frames: int,
246
  height: int,
 
252
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
253
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
254
  return init_latents, init_pixel_coords, None, 0
255
+
256
+ init_conditioning_mask = torch.zeros_like(init_latents[:, 0, ...], dtype=torch.float32, device=init_latents.device)
257
  extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
258
  extra_conditioning_num_latents = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ for item in conditioning_items:
261
+ if not isinstance(item, LatentConditioningItem):
262
+ logger.warning("Patch ADUC: Item de condicionamento não é um LatentConditioningItem e será ignorado.")
263
+ continue
264
+
265
+ media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
266
+ media_frame_number, strength = item.media_frame_number, item.conditioning_strength
267
+
268
+ if media_frame_number == 0:
269
+ f_l, h_l, w_l = media_item_latents.shape[-3:]
270
+ init_latents[..., :f_l, :h_l, :w_l] = torch.lerp(init_latents[..., :f_l, :h_l, :w_l], media_item_latents, strength)
271
+ init_conditioning_mask[..., :f_l, :h_l, :w_l] = strength
272
+ else:
273
+ noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
274
+ media_item_latents = torch.lerp(noise, media_item_latents, strength)
275
+ patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
276
+ pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
277
+ pixel_coords[:, 0] += media_frame_number
278
+ extra_conditioning_num_latents += patched_latents.shape[1]
279
+ new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
280
+ extra_conditioning_latents.append(patched_latents)
281
+ extra_conditioning_pixel_coords.append(pixel_coords)
282
+ extra_conditioning_mask.append(new_mask)
283
+
284
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
285
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
286
  init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
287
  init_conditioning_mask = init_conditioning_mask.squeeze(-1)
288
+
289
  if extra_conditioning_latents:
290
  init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
291
  init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
292
  init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
293
+
 
 
 
294
  return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
295
 
296
+ # --- Instanciação Singleton ---
 
297
  with open("config.yaml", 'r') as f:
298
  config = yaml.safe_load(f)
299
  ltx_gpus_required = config['specialists']['ltx']['gpus_required']
300
  ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
301
  ltx_config_filename = config['specialists']['ltx']['config_file']
302
  ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
303
+ logger.info("Especialista de Vídeo (LTX) pronto.")
aduc_framework/managers/ltx_pipeline_utils.py CHANGED
@@ -23,20 +23,20 @@ from transformers import (
23
  )
24
  from huggingface_hub import hf_hub_download
25
 
26
- from ltx_video.models.autoencoders.causal_video_autoencoder import (
27
  CausalVideoAutoencoder,
28
  )
29
- from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
30
- from ltx_video.models.transformers.transformer3d import Transformer3DModel
31
- from ltx_video.pipelines.pipeline_ltx_video import (
32
  ConditioningItem,
33
  LTXVideoPipeline,
34
  LTXMultiScalePipeline,
35
  )
36
- from ltx_video.schedulers.rf import RectifiedFlowScheduler
37
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
38
- from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
39
- import ltx_video.pipelines.crf_compressor as crf_compressor
40
 
41
  MAX_HEIGHT = 720
42
  MAX_WIDTH = 1280
 
23
  )
24
  from huggingface_hub import hf_hub_download
25
 
26
+ from ..ltx_video.models.autoencoders.causal_video_autoencoder import (
27
  CausalVideoAutoencoder,
28
  )
29
+ from ..ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
30
+ from ..ltx_video.models.transformers.transformer3d import Transformer3DModel
31
+ from ..ltx_video.pipelines.pipeline_ltx_video import (
32
  ConditioningItem,
33
  LTXVideoPipeline,
34
  LTXMultiScalePipeline,
35
  )
36
+ from ..ltx_video.schedulers.rf import RectifiedFlowScheduler
37
+ from ..ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
38
+ from ..ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
39
+ import ..ltx_video.pipelines.crf_compressor as crf_compressor
40
 
41
  MAX_HEIGHT = 720
42
  MAX_WIDTH = 1280
aduc_framework/managers/upscaler_specialist.py CHANGED
@@ -5,7 +5,7 @@
5
  import torch
6
  import logging
7
  from diffusers import LTXLatentUpsamplePipeline
8
- from managers.ltx_manager import ltx_manager_singleton
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
5
  import torch
6
  import logging
7
  from diffusers import LTXLatentUpsamplePipeline
8
+ from ..managers.ltx_manager import ltx_manager_singleton
9
 
10
  logger = logging.getLogger(__name__)
11
 
aduc_framework/managers/vae_manager.py CHANGED
@@ -28,8 +28,8 @@ import gc
28
  from typing import Generator
29
 
30
  # Import the source of the VAE model and the low-level functions
31
- from managers.ltx_manager import ltx_manager_singleton
32
- from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
33
 
34
  logger = logging.getLogger(__name__)
35
 
 
28
  from typing import Generator
29
 
30
  # Import the source of the VAE model and the low-level functions
31
+ from ..managers.ltx_manager import ltx_manager_singleton
32
+ from ..ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
33
 
34
  logger = logging.getLogger(__name__)
35
 
aduc_framework/types.py CHANGED
@@ -2,20 +2,19 @@
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Versão 3.0.0 (Framework Data Models)
6
  #
7
  # Este arquivo define as estruturas de dados centrais para o Aduc Framework
8
- # usando Pydantic. Estes modelos servem como o "contrato" de dados entre as
9
- # diferentes camadas da aplicação (UI, API, Orchestrator, Engineers).
10
- #
11
- # O uso de Pydantic garante validação automática de tipos, serialização/desserialização
12
- # fácil para JSON e uma fonte única da verdade para a estrutura de dados.
13
 
14
  from pydantic import BaseModel, Field
15
  from typing import List, Dict, Any, Optional
 
 
16
 
17
- # --- Modelos de Parâmetros de Entrada ---
18
- # Estes modelos representam os dados que o usuário fornece através de uma interface.
19
 
20
  class PreProductionParams(BaseModel):
21
  """Parâmetros para a etapa de Roteiro e Keyframes."""
@@ -41,8 +40,8 @@ class GenerationParameters(BaseModel):
41
  pos_producao: Optional[Dict[str, Any]] = None
42
 
43
 
44
- # --- Modelos de Artefatos Gerados ---
45
- # Estes modelos representam os dados e metadados dos resultados criados pelo framework.
46
 
47
  class MediaRef(BaseModel):
48
  """Representa uma mídia de referência fornecida pelo usuário."""
@@ -60,13 +59,11 @@ class KeyframeData(BaseModel):
60
  caminho_pixel: str
61
  caminho_latent: str
62
  prompt_keyframe: str
63
- # Futuramente: midias_contexto: List[Dict[str, Any]]
64
 
65
  class VideoFragmentData(BaseModel):
66
  """Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes."""
67
  id: int
68
  prompt_video: str
69
- # Futuramente: midias_inicio, midias_caminho, midias_fim
70
 
71
  class VideoData(BaseModel):
72
  """Estrutura de dados completa para o vídeo final (ou um grande clipe)."""
@@ -76,7 +73,7 @@ class VideoData(BaseModel):
76
  fragmentos_componentes: List[VideoFragmentData]
77
 
78
 
79
- # --- O Modelo de Estado Principal ---
80
 
81
  class GenerationState(BaseModel):
82
  """
@@ -88,4 +85,16 @@ class GenerationState(BaseModel):
88
  midias_referencia: List[MediaRef] = Field(default_factory=list)
89
  Atos: List[Ato] = Field(default_factory=list)
90
  Keyframe_atos: List[KeyframeData] = Field(default_factory=list)
91
- videos_atos: List[VideoData] = Field(default_factory=list)
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Versão 3.1.0 (Framework Data Models with Core Types)
6
  #
7
  # Este arquivo define as estruturas de dados centrais para o Aduc Framework
8
+ # usando Pydantic. Ele também inclui tipos de dados de baixo nível, como dataclasses,
9
+ # que são usados internamente pelos managers e engineers.
 
 
 
10
 
11
  from pydantic import BaseModel, Field
12
  from typing import List, Dict, Any, Optional
13
+ from dataclasses import dataclass
14
+ import torch
15
 
16
+ # --- Modelos de Parâmetros de Entrada (Pydantic) ---
17
+ # Representam os dados que o usuário fornece através de uma interface.
18
 
19
  class PreProductionParams(BaseModel):
20
  """Parâmetros para a etapa de Roteiro e Keyframes."""
 
40
  pos_producao: Optional[Dict[str, Any]] = None
41
 
42
 
43
+ # --- Modelos de Artefatos Gerados (Pydantic) ---
44
+ # Representam os dados e metadados dos resultados criados pelo framework.
45
 
46
  class MediaRef(BaseModel):
47
  """Representa uma mídia de referência fornecida pelo usuário."""
 
59
  caminho_pixel: str
60
  caminho_latent: str
61
  prompt_keyframe: str
 
62
 
63
  class VideoFragmentData(BaseModel):
64
  """Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes."""
65
  id: int
66
  prompt_video: str
 
67
 
68
  class VideoData(BaseModel):
69
  """Estrutura de dados completa para o vídeo final (ou um grande clipe)."""
 
73
  fragmentos_componentes: List[VideoFragmentData]
74
 
75
 
76
+ # --- O Modelo de Estado Principal (Pydantic) ---
77
 
78
  class GenerationState(BaseModel):
79
  """
 
85
  midias_referencia: List[MediaRef] = Field(default_factory=list)
86
  Atos: List[Ato] = Field(default_factory=list)
87
  Keyframe_atos: List[KeyframeData] = Field(default_factory=list)
88
+ videos_atos: List[VideoData] = Field(default_factory=list)
89
+
90
+
91
+ # --- Tipos de Dados Internos (Dataclass) ---
92
+ # Usado para passar dados complexos (como tensores) que não são facilmente
93
+ # serializáveis em JSON, entre os componentes internos do framework.
94
+
95
+ @dataclass
96
+ class LatentConditioningItem:
97
+ """Representa uma âncora de condicionamento no espaço latente para o LTX."""
98
+ latent_tensor: torch.Tensor
99
+ media_frame_number: int
100
+ conditioning_strength: float
app.py CHANGED
@@ -1,39 +1,18 @@
1
-
2
  # app.py
3
  #
4
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
5
  #
6
- # Version: 2.3.0
7
- #
8
- # Contact:
9
- # Carlos Rodrigues dos Santos
10
- # carlex22@gmail.com
11
- #
12
- # Related Repositories and Projects:
13
- # GitHub: https://github.com/carlex22/Aduc-sdr
14
- # YouTube (Results): https://m.youtube.com/channel/UC3EgoJi_Fv7yuDpvfYNtoIQ
15
- #
16
- # This program is free software: you can redistribute it and/or modify
17
- # it under the terms of the GNU Affero General Public License as published by the
18
- # Free Software Foundation, either version 3 of the License, or
19
- # (at your option) any later version.
20
  #
21
- # This program is distributed in the hope that it will be useful,
22
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
23
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24
- # GNU Affero General Public License for more details.
25
- #
26
- # You should have received a copy of the GNU Affero General Public License
27
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
28
- #
29
- # PENDING PATENT NOTICE: The ADUC method and system implemented in this
30
- # software is in the process of being patented. Please see NOTICE.md for details.
31
-
32
 
33
  import gradio as gr
34
  import yaml
35
  import logging
36
  import os
 
37
  import shutil
38
  import time
39
  import json
@@ -70,7 +49,15 @@ if os.path.exists(LOG_FILE_PATH):
70
  log_format = '%(asctime)s - %(levelname)s - [%(name)s:%(funcName)s] - %(message)s'
71
  root_logger = logging.getLogger()
72
  root_logger.setLevel(logging.INFO)
73
- # ... (código completo de configuração de logging) ...
 
 
 
 
 
 
 
 
74
  logger = logging.getLogger(__name__)
75
 
76
  # Carrega a configuração e inicializa o framework
@@ -89,11 +76,16 @@ except Exception as e:
89
  # --- 2. FUNÇÕES WRAPPER (CAMADA DE TRADUÇÃO UI <-> FRAMEWORK) ---
90
 
91
  def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str, duration_per_fragment, progress=gr.Progress()):
 
 
 
92
  if not ref_files:
93
  raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.")
94
 
 
95
  ref_paths = [aduc.process_image_for_story(f.name, 480, f"ref_processed_{i}.png") for i, f in enumerate(ref_files)]
96
 
 
97
  params = PreProductionParams(
98
  prompt=prompt,
99
  num_keyframes=int(num_keyframes),
@@ -102,17 +94,26 @@ def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str,
102
  duration_per_fragment=duration_per_fragment
103
  )
104
 
105
- storyboard, final_keyframes, updated_state = aduc.task_pre_production(params, progress)
 
 
 
 
106
 
 
107
  return updated_state.model_dump(), storyboard, final_keyframes, gr.update(visible=True, open=True)
108
 
109
  def run_original_production_wrapper(current_state_dict, trim_percent, handler_strength, dest_strength, guidance_scale, stg_scale, steps, progress=gr.Progress()):
 
 
 
110
  yield {
111
  original_video_output: gr.update(value=None, visible=True, label="🎬 Produzindo seu filme..."),
112
  final_video_output: gr.update(value=None, visible=True, label="🎬 Produção em progresso..."),
113
  step4_accordion: gr.update(visible=False)
114
  }
115
 
 
116
  production_params = ProductionParams(
117
  trim_percent=int(trim_percent),
118
  handler_strength=handler_strength,
@@ -122,6 +123,7 @@ def run_original_production_wrapper(current_state_dict, trim_percent, handler_st
122
  inference_steps=int(steps)
123
  )
124
 
 
125
  final_video_path, latent_paths, updated_state = aduc.task_produce_original_movie(
126
  params=production_params,
127
  progress_callback=progress
@@ -129,6 +131,7 @@ def run_original_production_wrapper(current_state_dict, trim_percent, handler_st
129
 
130
  updated_state_dict = updated_state.model_dump()
131
 
 
132
  yield {
133
  original_video_output: gr.update(value=final_video_path, label="✅ Filme Original Master"),
134
  final_video_output: gr.update(value=final_video_path),
@@ -150,11 +153,15 @@ def get_log_content():
150
  # --- 3. DEFINIÇÃO DA UI GRADIO ---
151
  with gr.Blocks(theme=cinematic_theme, css="style.css") as demo:
152
 
 
153
  generation_state_holder = gr.State(value={})
154
 
 
155
  original_latents_paths_state = gr.State(value=None)
156
  original_video_path_state = gr.State(value=None)
157
  current_source_video_state = gr.State(value=None)
 
 
158
 
159
  gr.Markdown("<h1>ADUC-SDR 🎬 - O Diretor de Cinema IA</h1>")
160
  gr.Markdown("<p>Crie um filme completo com vídeo e áudio, orquestrado por uma equipe de IAs especialistas.</p>")
@@ -174,7 +181,6 @@ with gr.Blocks(theme=cinematic_theme, css="style.css") as demo:
174
  keyframe_gallery = gr.Gallery(label="Galeria de Cenas-Chave (Keyframes)", visible=True, object_fit="contain", height="auto", type="filepath")
175
 
176
  with gr.Accordion("Etapa 3: Produção do Vídeo Original", open=False, visible=False) as step3_accordion:
177
- # Aqui omiti a definição detalhada dos sliders para brevidade, mas eles existem
178
  trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label="Poda Causal (%)")
179
  handler_strength = gr.Slider(label="Força do Déjà-Vu", minimum=0.0, maximum=1.0, value=0.5, step=0.05)
180
  dest_strength = gr.Slider(label="Força da Âncora Final", minimum=0.0, maximum=1.0, value=0.75, step=0.05)
 
 
1
  # app.py
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Versão 3.0.0 (UI Head for Aduc Framework)
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  #
7
+ # Este arquivo implementa a interface de usuário com Gradio para o Aduc-Sdr.
8
+ # Ele atua como um cliente para o 'aduc_framework', que contém toda a
9
+ # lógica de negócio e orquestração.
 
 
 
 
 
 
 
 
10
 
11
  import gradio as gr
12
  import yaml
13
  import logging
14
  import os
15
+ import sys
16
  import shutil
17
  import time
18
  import json
 
49
  log_format = '%(asctime)s - %(levelname)s - [%(name)s:%(funcName)s] - %(message)s'
50
  root_logger = logging.getLogger()
51
  root_logger.setLevel(logging.INFO)
52
+ root_logger.handlers.clear()
53
+ stream_handler = logging.StreamHandler(sys.stdout)
54
+ stream_handler.setLevel(logging.INFO)
55
+ stream_handler.setFormatter(logging.Formatter(log_format))
56
+ root_logger.addHandler(stream_handler)
57
+ file_handler = logging.FileHandler(LOG_FILE_PATH, mode='w', encoding='utf-8')
58
+ file_handler.setLevel(logging.INFO)
59
+ file_handler.setFormatter(logging.Formatter(log_format))
60
+ root_logger.addHandler(file_handler)
61
  logger = logging.getLogger(__name__)
62
 
63
  # Carrega a configuração e inicializa o framework
 
76
  # --- 2. FUNÇÕES WRAPPER (CAMADA DE TRADUÇÃO UI <-> FRAMEWORK) ---
77
 
78
  def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str, duration_per_fragment, progress=gr.Progress()):
79
+ """
80
+ Coleta dados da UI, os empacota em um objeto Pydantic e chama a tarefa de pré-produção do framework.
81
+ """
82
  if not ref_files:
83
  raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.")
84
 
85
+ # Etapa de UI: Processar e salvar os arquivos de referência
86
  ref_paths = [aduc.process_image_for_story(f.name, 480, f"ref_processed_{i}.png") for i, f in enumerate(ref_files)]
87
 
88
+ # 1. Empacota os parâmetros da UI no modelo Pydantic que o framework espera
89
  params = PreProductionParams(
90
  prompt=prompt,
91
  num_keyframes=int(num_keyframes),
 
94
  duration_per_fragment=duration_per_fragment
95
  )
96
 
97
+ # 2. Define a função de callback para o progresso
98
+ progress_callback = progress
99
+
100
+ # 3. Chama o framework
101
+ storyboard, final_keyframes, updated_state = aduc.task_pre_production(params, progress_callback)
102
 
103
+ # 4. Retorna os resultados desempacotados para os componentes corretos da UI
104
  return updated_state.model_dump(), storyboard, final_keyframes, gr.update(visible=True, open=True)
105
 
106
  def run_original_production_wrapper(current_state_dict, trim_percent, handler_strength, dest_strength, guidance_scale, stg_scale, steps, progress=gr.Progress()):
107
+ """
108
+ Coleta os parâmetros da etapa de produção e o estado atual, e chama a tarefa de produção do framework.
109
+ """
110
  yield {
111
  original_video_output: gr.update(value=None, visible=True, label="🎬 Produzindo seu filme..."),
112
  final_video_output: gr.update(value=None, visible=True, label="🎬 Produção em progresso..."),
113
  step4_accordion: gr.update(visible=False)
114
  }
115
 
116
+ # 1. Empacota os parâmetros dos sliders no modelo Pydantic
117
  production_params = ProductionParams(
118
  trim_percent=int(trim_percent),
119
  handler_strength=handler_strength,
 
123
  inference_steps=int(steps)
124
  )
125
 
126
+ # 2. Chama a tarefa de produção no framework.
127
  final_video_path, latent_paths, updated_state = aduc.task_produce_original_movie(
128
  params=production_params,
129
  progress_callback=progress
 
131
 
132
  updated_state_dict = updated_state.model_dump()
133
 
134
+ # 3. Desempacota e retorna o resultado final para a UI
135
  yield {
136
  original_video_output: gr.update(value=final_video_path, label="✅ Filme Original Master"),
137
  final_video_output: gr.update(value=final_video_path),
 
153
  # --- 3. DEFINIÇÃO DA UI GRADIO ---
154
  with gr.Blocks(theme=cinematic_theme, css="style.css") as demo:
155
 
156
+ # O gr.State é a "memória" da nossa UI. Ele armazena o JSON de estado entre os cliques.
157
  generation_state_holder = gr.State(value={})
158
 
159
+ # Outros states para gerenciar caminhos de arquivos
160
  original_latents_paths_state = gr.State(value=None)
161
  original_video_path_state = gr.State(value=None)
162
  current_source_video_state = gr.State(value=None)
163
+ upscaled_video_path_state = gr.State(value=None)
164
+ hd_video_path_state = gr.State(value=None)
165
 
166
  gr.Markdown("<h1>ADUC-SDR 🎬 - O Diretor de Cinema IA</h1>")
167
  gr.Markdown("<p>Crie um filme completo com vídeo e áudio, orquestrado por uma equipe de IAs especialistas.</p>")
 
181
  keyframe_gallery = gr.Gallery(label="Galeria de Cenas-Chave (Keyframes)", visible=True, object_fit="contain", height="auto", type="filepath")
182
 
183
  with gr.Accordion("Etapa 3: Produção do Vídeo Original", open=False, visible=False) as step3_accordion:
 
184
  trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label="Poda Causal (%)")
185
  handler_strength = gr.Slider(label="Força do Déjà-Vu", minimum=0.0, maximum=1.0, value=0.5, step=0.05)
186
  dest_strength = gr.Slider(label="Força da Âncora Final", minimum=0.0, maximum=1.0, value=0.75, step=0.05)
engineers/LICENSE DELETED
@@ -1,23 +0,0 @@
1
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
2
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
- #
4
- # Contato:
5
- # Carlos Rodrigues dos Santos
6
- # carlex22@gmail.com
7
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
8
- #
9
- # Repositórios e Projetos Relacionados:
10
- # GitHub: https://github.com/carlex22/Aduc-sdr
11
- #
12
- # This program is free software: you can redistribute it and/or modify
13
- # it under the terms of the GNU Affero General Public License as published by
14
- # the Free Software Foundation, either version 3 of the License, or
15
- # (at your option) any later version.
16
- #
17
- # This program is distributed in the hope that it will be useful,
18
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- # GNU Affero General Public License for more details.
21
- #
22
- # You should have received a copy of the GNU Affero General Public License
23
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/NOTICE.md DELETED
@@ -1,76 +0,0 @@
1
- # NOTICE
2
-
3
- Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
4
-
5
- ---
6
-
7
- ## Aviso de Propriedade Intelectual e Licenciamento
8
-
9
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
10
-
11
- O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
12
-
13
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
14
-
15
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
16
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
17
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
18
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
19
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
20
-
21
- ### **Reconhecimento e Implicações (EM PORTUGUÊS):**
22
-
23
- Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
24
-
25
- 1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
26
- 2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
27
- 3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
28
-
29
- ---
30
-
31
- ### **Patent Pending (IN ENGLISH):**
32
-
33
- The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
34
-
35
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
36
-
37
- * Fragmentation and scaling of requests exceeding AI model context limits.
38
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
39
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
40
- * Cost, latency, and quality-aware planning and routing.
41
- * The use of "universal tokens" for model-agnostic communication.
42
-
43
- ### **Acknowledgement and Implications (IN ENGLISH):**
44
-
45
- By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
46
-
47
- 1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
48
- 2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
49
- 3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
50
-
51
- ---
52
-
53
- ## Licença AGPLv3
54
-
55
- This program is free software: you can redistribute it and/or modify
56
- it under the terms of the GNU Affero General Public License as published by
57
- the Free Software Foundation, either version 3 of the License, or
58
- (at your option) any later version.
59
-
60
- This program is distributed in the hope that it will be useful,
61
- but WITHOUT ANY WARRANTY; without even the implied warranty of
62
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
63
- GNU Affero General Public License for more details.
64
-
65
- You should have received a copy of the GNU Affero General Public License
66
- along with this program. If not, see <https://www.gnu.org/licenses/>.
67
-
68
- ---
69
-
70
- **Contato para Consultas:**
71
-
72
- Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
73
-
74
- Carlos Rodrigues dos Santos
75
- carlex22@gmail.com
76
- Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/README.md DELETED
@@ -1,211 +0,0 @@
1
- ---
2
- title: Euia-AducSdr
3
- emoji: 🎥
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: gradio
7
- app_file: app.py
8
- pinned: true
9
- license: agpl-3.0
10
- short_description: Uma implementação aberta e funcional da arquitetura ADUC-SDR
11
- ---
12
-
13
-
14
- ### 🇧🇷 Português
15
-
16
- Uma implementação aberta e funcional da arquitetura ADUC-SDR (Arquitetura de Unificação Compositiva - Escala Dinâmica e Resiliente), projetada para a geração de vídeo coerente de longa duração. Este projeto materializa os princípios de fragmentação, navegação geométrica e um mecanismo de "eco causal 4bits memoria" para garantir a continuidade física e narrativa em sequências de vídeo geradas por múltiplos modelos de IA.
17
-
18
- **Licença:** Este projeto é licenciado sob os termos da **GNU Affero General Public License v3.0**. Isto significa que se você usar este software (ou qualquer trabalho derivado) para fornecer um serviço através de uma rede, você é **obrigado a disponibilizar o código-fonte completo** da sua versão para os usuários desse serviço.
19
-
20
- - **Copyright (C) 4 de Agosto de 2025, Carlos Rodrigues dos Santos**
21
- - Uma cópia completa da licença pode ser encontrada no arquivo [LICENSE](LICENSE).
22
-
23
- ---
24
-
25
- ### 🇬🇧 English
26
-
27
- An open and functional implementation of the ADUC-SDR (Architecture for Compositive Unification - Dynamic and Resilient Scaling) architecture, designed for long-form coherent video generation. This project materializes the principles of fragmentation, geometric navigation, and a "causal echo 4bits memori" mechanism to ensure physical and narrative continuity in video sequences generated by multiple AI models.
28
-
29
- **License:** This project is licensed under the terms of the **GNU Affero General Public License v3.0**. This means that if you use this software (or any derivative work) to provide a service over a network, you are **required to make the complete source code** of your version available to the users of that service.
30
-
31
- - **Copyright (C) August 4, 2025, Carlos Rodrigues dos Santos**
32
- - A full copy of the license can be found in the [LICENSE](LICENSE) file.
33
-
34
- ---
35
-
36
- ## **Aviso de Propriedade Intelectual e Patenteamento**
37
-
38
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
39
-
40
- A arquitetura e o método **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste projeto e nas reivindicações associadas, estão **atualmente em processo de patenteamento**.
41
-
42
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, que incluem, mas não se limitam a:
43
-
44
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
45
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
46
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
47
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
48
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
49
-
50
- Ao utilizar este software e a arquitetura ADUC aqui implementada, você reconhece a natureza inovadora desta arquitetura e que a **reprodução ou exploração da lógica central da ADUC em sistemas independentes pode infringir direitos de patente pendente.**
51
-
52
- ---
53
-
54
- ### **Patent Pending (IN ENGLISH):**
55
-
56
- The **ADUC (Automated Discovery and Orchestration of Complex tasks)** architecture and method, as described in this project and its associated claims, are **currently in the process of being patented.**
57
-
58
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
59
-
60
- * Fragmentation and scaling of requests exceeding AI model context limits.
61
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
62
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
63
- * Cost, latency, and quality-aware planning and routing.
64
- * The use of "universal tokens" for model-agnostic communication.
65
-
66
- By using this software and the ADUC architecture implemented herein, you acknowledge the innovative nature of this architecture and that **the reproduction or exploitation of ADUC's core logic in independent systems may infringe upon pending patent rights.**
67
-
68
- ---
69
-
70
- ### Detalhes Técnicos e Reivindicações da ADUC
71
-
72
- #### 🇧🇷 Definição Curta (para Tese e Patente)
73
-
74
- **ADUC** é um *framework pré-input* e *intermediário* de **gerenciamento de prompts** que:
75
-
76
- 1. **fragmenta** solicitações acima do limite de contexto de qualquer modelo,
77
- 2. **escala linearmente** (processo sequencial com memória persistida),
78
- 3. **distribui** sub-tarefas a **especialistas** (modelos/ferramentas heterogêneos), e
79
- 4. **realimenta** a próxima etapa com avaliação do que foi feito/esperado (LLM diretor).
80
-
81
- Não é um modelo; é uma **camada orquestradora** plugável antes do input de modelos existentes (texto, imagem, áudio, vídeo), usando *tokens universais* e a tecnologia atual.
82
-
83
- #### 🇬🇧 Short Definition (for Thesis and Patent)
84
-
85
- **ADUC** is a *pre-input* and *intermediate* **prompt management framework** that:
86
-
87
- 1. **fragments** requests exceeding any model's context limit,
88
- 2. **scales linearly** (sequential process with persisted memory),
89
- 3. **distributes** sub-tasks to **specialists** (heterogeneous models/tools), and
90
- 4. **feeds back** to the next step with an evaluation of what was done/expected (director LLM).
91
-
92
- It is not a model; it is a pluggable **orchestration layer** before the input of existing models (text, image, audio, video), using *universal tokens* and current technology.
93
-
94
- ---
95
-
96
- #### 🇧🇷 Elementos Essenciais (Telegráfico)
97
-
98
- * **Agnóstico a modelos:** opera com qualquer LLM/difusor/API.
99
- * **Pré-input manager:** recebe pedido do usuário, **divide** em blocos ≤ limite de tokens, **prioriza**, **agenda** e **roteia**.
100
- * **Memória persistida:** resultados/latentes/“eco” viram **estado compartilhado** para o próximo bloco (nada é ignorado).
101
- * **Especialistas:** *routers* decidem quem faz o quê (ex.: “descrição → LLM-A”, “keyframe → Img-B”, “vídeo → Vid-C”).
102
- * **Controle de qualidade:** LLM diretor compara *o que fez* × *o que deveria* × *o que falta* e **regenera objetivos** do próximo fragmento.
103
- * **Custo/latência-aware:** planeja pela **VRAM/tempo/custo**, não tenta “abraçar tudo de uma vez”.
104
-
105
- #### 🇬🇧 Essential Elements (Telegraphic)
106
-
107
- * **Model-agnostic:** operates with any LLM/diffuser/API.
108
- * **Pre-input manager:** receives user request, **divides** into blocks ≤ token limit, **prioritizes**, **schedules**, and **routes**.
109
- * **Persisted memory:** results/latents/“echo” become **shared state** for the next block (nothing is ignored).
110
- * **Specialists:** *routers* decide who does what (e.g., “description → LLM-A”, “keyframe → Img-B”, “video → Vid-C”).
111
- * **Quality control:** director LLM compares *what was done* × *what should be done* × *what is missing* and **regenerates objectives** for the next fragment.
112
- * **Cost/latency-aware:** plans by **VRAM/time/cost**, does not try to “embrace everything at once”.
113
-
114
- ---
115
-
116
- #### 🇧🇷 Reivindicações Independentes (Método e Sistema)
117
-
118
- **Reivindicação Independente (Método) — Versão Enxuta:**
119
-
120
- 1. **Método** de **orquestração de prompts** para execução de tarefas acima do limite de contexto de modelos de IA, compreendendo:
121
- (a) **receber** uma solicitação que excede um limite de tokens;
122
- (b) **analisar** a solicitação por um **LLM diretor** e **fragmentá-la** em sub-tarefas ≤ limite;
123
- (c) **selecionar** especialistas de execução para cada sub-tarefa com base em capacidades declaradas;
124
- (d) **gerar** prompts específicos por sub-tarefa em **tokens universais**, incluindo referências ao **estado persistido** de execuções anteriores;
125
- (e) **executar sequencialmente** as sub-tarefas e **persistir** suas saídas como memória (incluindo latentes/eco/artefatos);
126
- (f) **avaliar** automaticamente a saída versus metas declaradas e **regenerar objetivos** do próximo fragmento;
127
- (g) **iterar** (b)–(f) até que os critérios de completude sejam atendidos, produzindo o resultado agregado;
128
- em que o framework **escala linearmente** no tempo e armazenamento físico, **independente** da janela de contexto dos modelos subjacentes.
129
-
130
- **Reivindicação Independente (Sistema):**
131
-
132
- 2. **Sistema** de orquestração de prompts, compreendendo: um **planejador LLM diretor**; um **roteador de especialistas**; um **banco de estado persistido** (incl. memória cinética para vídeo); um **gerador de prompts universais**; e um **módulo de avaliação/realimentação**, acoplados por uma **API pré-input** a modelos heterogêneos.
133
-
134
- #### 🇬🇧 Independent Claims (Method and System)
135
-
136
- **Independent Claim (Method) — Concise Version:**
137
-
138
- 1. A **method** for **prompt orchestration** for executing tasks exceeding AI model context limits, comprising:
139
- (a) **receiving** a request that exceeds a token limit;
140
- (b) **analyzing** the request by a **director LLM** and **fragmenting it** into sub-tasks ≤ the limit;
141
- (c) **selecting** execution specialists for each sub-task based on declared capabilities;
142
- (d) **generating** specific prompts per sub-task in **universal tokens**, including references to the **persisted state** of previous executions;
143
- (e) **sequentially executing** the sub-tasks and **persisting** their outputs as memory (including latents/echo/artifacts);
144
- (f) **automatically evaluating** the output against declared goals and **regenerating objectives** for the next fragment;
145
- (g) **iterating** (b)–(f) until completion criteria are met, producing the aggregated result;
146
- wherein the framework **scales linearly** in time and physical storage, **independent** of the context window of the underlying models.
147
-
148
- **Independent Claim (System):**
149
-
150
- 2. A prompt orchestration **system**, comprising: a **director LLM planner**; a **specialist router**; a **persisted state bank** (incl. kinetic memory for video); a **universal prompt generator**; and an **evaluation/feedback module**, coupled via a **pre-input API** to heterogeneous models.
151
-
152
- ---
153
-
154
- #### 🇧🇷 Dependentes Úteis
155
-
156
- * (3) Onde o roteamento considera **custo/latência/VRAM** e metas de qualidade.
157
- * (4) Onde o banco de estado inclui **eco cinético** para vídeo (últimos *n* frames/latentes/fluxo).
158
- * (5) Onde a avaliação usa métricas específicas por domínio (Lflow, consistência semântica, etc.).
159
- * (6) Onde *tokens universais* padronizam instruções entre especialistas.
160
- * (7) Onde a orquestração decide **cut vs continuous** e **corte regenerativo** (Déjà-Vu) ao editar vídeo.
161
- * (8) Onde o sistema **nunca descarta** conteúdo excedente: **reagenda** em novos fragmentos.
162
-
163
- #### 🇬🇧 Useful Dependents
164
-
165
- * (3) Wherein routing considers **cost/latency/VRAM** and quality goals.
166
- * (4) Wherein the state bank includes **kinetic echo** for video (last *n* frames/latents/flow).
167
- * (5) Wherein evaluation uses domain-specific metrics (Lflow, semantic consistency, etc.).
168
- * (6) Wherein *universal tokens* standardize instructions between specialists.
169
- * (7) Wherein orchestration decides **cut vs continuous** and **regenerative cut** (Déjà-Vu) when editing video.
170
- * (8) Wherein the system **never discards** excess content: it **reschedules** it in new fragments.
171
-
172
- ---
173
-
174
- #### 🇧🇷 Como isso conversa com SDR (Vídeo)
175
-
176
- * **Eco Cinético**: é um **tipo de estado persistido** consumido pelo próximo passo.
177
- * **Déjà-Vu (Corte Regenerativo)**: é **uma política de orquestração** aplicada quando há edição; ADUC decide, monta os prompts certos e chama o especialista de vídeo.
178
- * **Cut vs Continuous**: decisão do **diretor** com base em estado + metas; ADUC roteia e garante a sobreposição/remoção final.
179
-
180
- #### 🇬🇧 How this Converses with SDR (Video)
181
-
182
- * **Kinetic Echo**: is a **type of persisted state** consumed by the next step.
183
- * **Déjà-Vu (Regenerative Cut)**: is an **orchestration policy** applied during editing; ADUC decides, crafts the right prompts, and calls the video specialist.
184
- * **Cut vs Continuous**: decision made by the **director** based on state + goals; ADUC routes and ensures the final overlap/removal.
185
-
186
- ---
187
-
188
- #### 🇧🇷 Mensagem Clara ao Usuário (Experiência)
189
-
190
- > “Seu pedido excede o limite X do modelo Y. Em vez de truncar silenciosamente, o **ADUC** dividirá e **entregará 100%** do conteúdo por etapas coordenadas.”
191
-
192
- Isso é diferencial prático e jurídico: **não-obviedade** por transformar limite de contexto em **pipeline controlado**, com **persistência de estado** e **avaliação iterativa**.
193
-
194
- #### 🇬🇧 Clear User Message (Experience)
195
-
196
- > "Your request exceeds model Y's limit X. Instead of silently truncating, **ADUC** will divide and **deliver 100%** of the content through coordinated steps."
197
-
198
- This is a practical and legal differentiator: **non-obviousness** by transforming context limits into a **controlled pipeline**, with **state persistence** and **iterative evaluation**.
199
-
200
- ---
201
-
202
- ### Contact / Contato / Contacto
203
-
204
- - **Author / Autor:** Carlos Rodrigues dos Santos
205
- - **Email:** carlex22@gmail.com
206
- - **GitHub:** [https://github.com/carlex22/Aduc-sdr](https://github.com/carlex22/Aduc-sdr)
207
- - **Hugging Face Spaces:**
208
- - [Ltx-SuperTime-60Secondos](https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/)
209
- - [Novinho](https://huggingface.co/spaces/Carlexxx/Novinho/)
210
-
211
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/__init__.py DELETED
File without changes
engineers/deformes2D_thinker.py DELETED
@@ -1,171 +0,0 @@
1
- # engineers/deformes2D_thinker.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
- #
13
- # This program is free software: you can redistribute it and/or modify
14
- # it under the terms of the GNU Affero General Public License as published by
15
- # the Free Software Foundation, either version 3 of the License, or
16
- # (at your option) any later version.
17
- #
18
- # This program is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
- # GNU Affero General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Affero General Public License
24
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
25
- #
26
- # This program is free software: you can redistribute it and/or modify
27
- # it under the terms of the GNU Affero General Public License...
28
- # PENDING PATENT NOTICE: Please see NOTICE.md.
29
- #
30
- # Version 1.0.1
31
-
32
- import logging
33
- from pathlib import Path
34
- from PIL import Image
35
- import gradio as gr
36
- from typing import List
37
-
38
- # It imports the communication layer, not the API directly
39
- from managers.gemini_manager import gemini_manager_singleton
40
-
41
- logger = logging.getLogger(__name__)
42
-
43
- class Deformes2DThinker:
44
- """
45
- The cognitive specialist that handles prompt engineering and creative logic.
46
- """
47
- def _read_prompt_template(self, filename: str) -> str:
48
- """Reads a prompt template file from the 'prompts' directory."""
49
- try:
50
- prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
51
- with open(prompts_dir / filename, "r", encoding="utf-8") as f:
52
- return f.read()
53
- except FileNotFoundError:
54
- raise gr.Error(f"Prompt template file not found: prompts/{filename}")
55
-
56
- def generate_storyboard(self, prompt: str, num_keyframes: int, ref_image_paths: List[str]) -> List[str]:
57
- """Acts as a Scriptwriter to generate a storyboard."""
58
- try:
59
- template = self._read_prompt_template("unified_storyboard_prompt.txt")
60
- storyboard_prompt = template.format(user_prompt=prompt, num_fragments=num_keyframes)
61
- images = [Image.open(p) for p in ref_image_paths]
62
-
63
- # Assemble all parts into a single list for the manager
64
- prompt_parts = [storyboard_prompt] + images
65
- storyboard_data = gemini_manager_singleton.get_json_object(prompt_parts)
66
-
67
- storyboard = storyboard_data.get("scene_storyboard", [])
68
- if not storyboard or len(storyboard) != num_keyframes:
69
- raise ValueError(f"Incorrect number of scenes generated. Expected {num_keyframes}, got {len(storyboard)}.")
70
- return storyboard
71
- except Exception as e:
72
- raise gr.Error(f"The Scriptwriter (Deformes2D Thinker) failed: {e}")
73
-
74
- def select_keyframes_from_pool(self, storyboard: list, base_image_paths: list[str], pool_image_paths: list[str]) -> list[str]:
75
- """Acts as a Photographer/Editor to select keyframes."""
76
- if not pool_image_paths:
77
- raise gr.Error("The 'image pool' (Additional Images) is empty.")
78
-
79
- try:
80
- template = self._read_prompt_template("keyframe_selection_prompt.txt")
81
-
82
- image_map = {f"IMG-{i+1}": path for i, path in enumerate(pool_image_paths)}
83
-
84
- prompt_parts = ["# Reference Images (Story Base)"]
85
- prompt_parts.extend([Image.open(p) for p in base_image_paths])
86
- prompt_parts.append("\n# Image Pool (Scene Bank)")
87
- prompt_parts.extend([Image.open(p) for p in pool_image_paths])
88
-
89
- storyboard_str = "\n".join([f"- Scene {i+1}: {s}" for i, s in enumerate(storyboard)])
90
- selection_prompt = template.format(storyboard_str=storyboard_str, image_identifiers=list(image_map.keys()))
91
- prompt_parts.append(selection_prompt)
92
-
93
- selection_data = gemini_manager_singleton.get_json_object(prompt_parts)
94
-
95
- selected_identifiers = selection_data.get("selected_image_identifiers", [])
96
-
97
- if len(selected_identifiers) != len(storyboard):
98
- raise ValueError("The AI did not select the correct number of images for the scenes.")
99
-
100
- selected_paths = [image_map[identifier] for identifier in selected_identifiers]
101
- return selected_paths
102
-
103
- except Exception as e:
104
- raise gr.Error(f"The Photographer (Deformes2D Thinker) failed to select images: {e}")
105
-
106
- def get_anticipatory_keyframe_prompt(self, global_prompt: str, scene_history: str, current_scene_desc: str, future_scene_desc: str, last_image_path: str, fixed_ref_paths: list[str]) -> str:
107
- """Acts as an Art Director to generate an image prompt."""
108
- try:
109
- template = self._read_prompt_template("anticipatory_keyframe_prompt.txt")
110
-
111
- director_prompt = template.format(
112
- historico_prompt=scene_history,
113
- cena_atual=current_scene_desc,
114
- cena_futura=future_scene_desc
115
- )
116
-
117
- prompt_parts = [
118
- f"# CONTEXT:\n- Global Story Goal: {global_prompt}\n# VISUAL ASSETS:",
119
- "Current Base Image [IMG-BASE]:",
120
- Image.open(last_image_path)
121
- ]
122
-
123
- ref_counter = 1
124
- for path in fixed_ref_paths:
125
- if path != last_image_path:
126
- prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
127
- ref_counter += 1
128
-
129
- prompt_parts.append(director_prompt)
130
-
131
- final_flux_prompt = gemini_manager_singleton.get_raw_text(prompt_parts)
132
-
133
- return final_flux_prompt.strip().replace("`", "").replace("\"", "")
134
- except Exception as e:
135
- raise gr.Error(f"The Art Director (Deformes2D Thinker) failed: {e}")
136
-
137
- def get_cinematic_decision(self, global_prompt: str, story_history: str,
138
- past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
139
- past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> dict:
140
- """Acts as a Film Director to make editing decisions and generate motion prompts."""
141
- try:
142
- template = self._read_prompt_template("cinematic_director_prompt.txt")
143
- prompt_text = template.format(
144
- global_prompt=global_prompt,
145
- story_history=story_history,
146
- past_scene_desc=past_scene_desc,
147
- present_scene_desc=present_scene_desc,
148
- future_scene_desc=future_scene_desc
149
- )
150
-
151
- prompt_parts = [
152
- prompt_text,
153
- "[PAST_IMAGE]:", Image.open(past_keyframe_path),
154
- "[PRESENT_IMAGE]:", Image.open(present_keyframe_path),
155
- "[FUTURE_IMAGE]:", Image.open(future_keyframe_path)
156
- ]
157
-
158
- decision_data = gemini_manager_singleton.get_json_object(prompt_parts)
159
-
160
- if "transition_type" not in decision_data or "motion_prompt" not in decision_data:
161
- raise ValueError("AI response (Cinematographer) is malformed. Missing 'transition_type' or 'motion_prompt'.")
162
- return decision_data
163
- except Exception as e:
164
- logger.error(f"The Film Director (Deformes2D Thinker) failed: {e}. Using fallback to 'continuous'.", exc_info=True)
165
- return {
166
- "transition_type": "continuous",
167
- "motion_prompt": f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
168
- }
169
-
170
- # --- Singleton Instance ---
171
- deformes2d_thinker_singleton = Deformes2DThinker()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/deformes3D.py DELETED
@@ -1,193 +0,0 @@
1
- # engineers/deformes3D.py
2
- #
3
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
4
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
5
- #
6
- # Contato:
7
- # Carlos Rodrigues dos Santos
8
- # carlex22@gmail.com
9
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
10
- #
11
- # Repositórios e Projetos Relacionados:
12
- # GitHub: https://github.com/carlex22/Aduc-sdr
13
- #
14
- # This program is free software: you can redistribute it and/or modify
15
- # it under the terms of the GNU Affero General Public License as published by
16
- # the Free Software Foundation, either version 3 of the License, or
17
- # (at your option) any later version.
18
- #
19
- # This program is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
- # GNU Affero General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Affero General Public License
25
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
26
- #
27
- # This program is free software: you can redistribute it and/or modify
28
- # it under the terms of the GNU Affero General Public License...
29
- # PENDING PATENT NOTICE: Please see NOTICE.md.
30
- #
31
- # Version 2.0.1
32
-
33
- from PIL import Image, ImageOps
34
- import os
35
- import time
36
- import logging
37
- import gradio as gr
38
- import yaml
39
- import torch
40
- import numpy as np
41
-
42
- from managers.flux_kontext_manager import flux_kontext_singleton
43
- from engineers.deformes2D_thinker import deformes2d_thinker_singleton
44
- from aduc_types import LatentConditioningItem
45
- from managers.ltx_manager import ltx_manager_singleton
46
- from managers.vae_manager import vae_manager_singleton
47
- from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
48
-
49
- logger = logging.getLogger(__name__)
50
-
51
- class Deformes3DEngine:
52
- """
53
- ADUC Specialist for static image (keyframe) generation.
54
- """
55
- def __init__(self, workspace_dir):
56
- self.workspace_dir = workspace_dir
57
- self.image_generation_helper = flux_kontext_singleton
58
- logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.")
59
-
60
- def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str:
61
- """
62
- Low-level function that generates a single image using the LTX helper.
63
- """
64
- logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'")
65
- generated_image = self.image_generation_helper.generate_image(
66
- reference_images=reference_images, prompt=prompt, width=width,
67
- height=height, seed=int(time.time()), callback=callback
68
- )
69
- final_path = os.path.join(self.workspace_dir, output_filename)
70
- generated_image.save(final_path)
71
- logger.info(f"Keyframe successfully saved to: {final_path}")
72
- return final_path
73
-
74
- def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None):
75
- """
76
- Orchestrates the generation of all keyframes.
77
- """
78
- current_base_image_path = initial_ref_path
79
- previous_prompt = "N/A (initial reference image)"
80
- final_keyframes_gallery = [] #[current_base_image_path]
81
- width, height = keyframe_resolution, keyframe_resolution
82
- target_resolution_tuple = (width, height)
83
-
84
- num_keyframes_to_generate = len(storyboard) - 1
85
- logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).")
86
-
87
- for i in range(num_keyframes_to_generate):
88
- scene_index = i + 1
89
- current_scene = storyboard[i]
90
- future_scene = storyboard[i+1]
91
- progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None
92
-
93
- logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...")
94
-
95
- # --- STEP A: Generate with FLUX (Primary Method) ---
96
- logger.info(f" - Step A: Generating with keyframe...")
97
-
98
- img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
99
- global_prompt=global_prompt, scene_history=previous_prompt,
100
- current_scene_desc=current_scene, future_scene_desc=future_scene,
101
- last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths
102
- )
103
-
104
- #flux_ref_paths = list(set([current_base_image_path] + general_ref_paths))
105
- #flux_ref_images = [Image.open(p) for p in flux_ref_paths]
106
-
107
- #flux_keyframe_path = self._generate_single_keyframe(
108
- # prompt=img_prompt, reference_images=flux_ref_images,
109
- # output_filename=f"keyframe_{scene_index}_flux.png", width=width, height=height,
110
- # callback=progress_callback_flux
111
- #)
112
- #final_keyframes_gallery.append(flux_keyframe_path)
113
-
114
- # --- STEP B: LTX Enrichment Experiment ---
115
- #logger.info(f" - Step B: Generating enrichment with LTX...")
116
-
117
- ltx_context_paths = []
118
- context_paths = []
119
- context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3]
120
-
121
- ltx_context_paths = list(reversed(context_paths))
122
- logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}")
123
-
124
- ltx_conditioning_items = []
125
-
126
- weight = 0.6
127
- for idx, path in enumerate(ltx_context_paths):
128
- img_pil = Image.open(path).convert("RGB")
129
- img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple)
130
- pixel_tensor = self._pil_to_pixel_tensor(img_processed)
131
- latent_tensor = vae_manager_singleton.encode(pixel_tensor)
132
-
133
- ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
134
-
135
- if idx >= 0:
136
- weight -= 0.1
137
-
138
- ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25}
139
- generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
140
- height=height, width=width,
141
- conditioning_items_data=ltx_conditioning_items,
142
- motion_prompt=img_prompt,
143
- video_total_frames=48,
144
- video_fps=24,
145
- **ltx_base_params
146
- )
147
-
148
- final_latent = generated_latents[:, :, -1:, :, :]
149
- upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
150
- enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent)
151
-
152
- ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png")
153
- self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path)
154
- final_keyframes_gallery.append(ltx_keyframe_path)
155
-
156
- # Use the FLUX keyframe as the base for the next iteration to maintain the primary narrative path
157
- current_base_image_path = ltx_keyframe_path #flux_keyframe_path
158
- previous_prompt = img_prompt
159
-
160
- logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.")
161
- return final_keyframes_gallery
162
-
163
- # --- HELPER FUNCTIONS ---
164
-
165
- def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
166
- """Resizes and fits an image to the target resolution for VAE encoding."""
167
- if image.size != target_resolution:
168
- return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
169
- return image
170
-
171
- def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
172
- """Helper to convert PIL to the 5D pixel tensor the VAE expects."""
173
- image_np = np.array(pil_image).astype(np.float32) / 255.0
174
- tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
175
- return (tensor * 2.0) - 1.0
176
-
177
- def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
178
- """Helper to save a 1-frame pixel tensor as an image."""
179
- tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
180
- tensor_hwc = tensor_chw.permute(1, 2, 0)
181
- tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
182
- image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
183
- Image.fromarray(image_np).save(path)
184
-
185
- # --- Singleton Instantiation ---
186
- try:
187
- with open("config.yaml", 'r') as f:
188
- config = yaml.safe_load(f)
189
- WORKSPACE_DIR = config['application']['workspace_dir']
190
- deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR)
191
- except Exception as e:
192
- logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True)
193
- deformes3d_engine_singleton = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/deformes3D_thinker.py DELETED
@@ -1,136 +0,0 @@
1
- # engineers/deformes3D_thinker.py
2
- #
3
- # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Version: 4.0.0 (Definitive)
6
- #
7
- # This is the definitive, robust implementation. It directly contains the prompt
8
- # enhancement logic copied from the LTX pipeline's utils. It accesses the
9
- # enhancement models loaded by the LTX Manager and performs the captioning
10
- # and LLM generation steps locally, ensuring full control and compatibility.
11
-
12
- import logging
13
- from PIL import Image
14
- import torch
15
-
16
- # Importa o singleton do LTX para ter acesso à sua pipeline e aos modelos nela
17
- from managers.ltx_manager import ltx_manager_singleton
18
-
19
- # Importa o prompt de sistema do LTX para garantir consistência
20
- from ltx_video.utils.prompt_enhance_utils import I2V_CINEMATIC_PROMPT
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
- class Deformes3DThinker:
25
- """
26
- The tactical specialist that now directly implements the prompt enhancement
27
- logic, using the models provided by the LTX pipeline.
28
- """
29
-
30
- def __init__(self):
31
- # Acessa a pipeline exposta para obter os modelos necessários
32
- pipeline = ltx_manager_singleton.prompt_enhancement_pipeline
33
- if not pipeline:
34
- raise RuntimeError("Deformes3DThinker could not access the LTX pipeline.")
35
-
36
- # Armazena os modelos e processadores como atributos diretos
37
- self.caption_model = pipeline.prompt_enhancer_image_caption_model
38
- self.caption_processor = pipeline.prompt_enhancer_image_caption_processor
39
- self.llm_model = pipeline.prompt_enhancer_llm_model
40
- self.llm_tokenizer = pipeline.prompt_enhancer_llm_tokenizer
41
-
42
- # Verifica se os modelos foram realmente carregados
43
- if not all([self.caption_model, self.caption_processor, self.llm_model, self.llm_tokenizer]):
44
- logger.warning("Deformes3DThinker initialized, but one or more enhancement models were not loaded by the LTX pipeline. Fallback will be used.")
45
- else:
46
- logger.info("Deformes3DThinker initialized and successfully linked to LTX enhancement models.")
47
-
48
- @torch.no_grad()
49
- def get_enhanced_motion_prompt(self, global_prompt: str, story_history: str,
50
- past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
51
- past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> str:
52
- """
53
- Generates a refined motion prompt by directly executing the enhancement pipeline logic.
54
- """
55
- # Verifica se os modelos estão disponíveis antes de tentar usá-los
56
- if not all([self.caption_model, self.caption_processor, self.llm_model, self.llm_tokenizer]):
57
- logger.warning("Enhancement models not available. Using fallback prompt.")
58
- return f"A cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
59
-
60
- try:
61
- present_image = Image.open(present_keyframe_path).convert("RGB")
62
-
63
- # --- INÍCIO DA LÓGICA COPIADA E ADAPTADA DO LTX ---
64
-
65
- # 1. Gerar a caption da imagem de referência (presente)
66
- image_captions = self._generate_image_captions([present_image])
67
-
68
- # 2. Construir o prompt para o LLM
69
- # Usamos a cena futura como o "prompt do usuário"
70
- messages = [
71
- {"role": "system", "content": I2V_CINEMATIC_PROMPT},
72
- {"role": "user", "content": f"user_prompt: {future_scene_desc}\nimage_caption: {image_captions[0]}"},
73
- ]
74
-
75
- # 3. Gerar e decodificar o prompt final com o LLM
76
- enhanced_prompt = self._generate_and_decode_prompts(messages)
77
-
78
- # --- FIM DA LÓGICA COPIADA E ADAPTADA ---
79
-
80
- logger.info(f"Deformes3DThinker received enhanced prompt: '{enhanced_prompt}'")
81
- return enhanced_prompt
82
-
83
- except Exception as e:
84
- logger.error(f"The Film Director (Deformes3D Thinker) failed during enhancement: {e}. Using fallback.", exc_info=True)
85
- return f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
86
-
87
- def _generate_image_captions(self, images: list[Image.Image]) -> list[str]:
88
- """
89
- Lógica interna para gerar captions, copiada do LTX utils.
90
- """
91
- # O modelo Florence-2 do LTX não usa um system_prompt aqui, mas um task_prompt
92
- task_prompt = "<MORE_DETAILED_CAPTION>"
93
- inputs = self.caption_processor(
94
- text=[task_prompt] * len(images), images=images, return_tensors="pt"
95
- ).to(self.caption_model.device)
96
-
97
- generated_ids = self.caption_model.generate(
98
- input_ids=inputs["input_ids"],
99
- pixel_values=inputs["pixel_values"],
100
- max_new_tokens=1024,
101
- num_beams=3,
102
- )
103
-
104
- # Usa o post_process_generation para extrair a resposta limpa
105
- generated_text = self.caption_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
106
- processed_result = self.caption_processor.post_process_generation(
107
- generated_text,
108
- task=task_prompt,
109
- image_size=(images[0].width, images[0].height)
110
- )
111
- return [processed_result[task_prompt]]
112
-
113
- def _generate_and_decode_prompts(self, messages: list[dict]) -> str:
114
- """
115
- Lógica interna para gerar prompt com o LLM, copiada do LTX utils.
116
- """
117
- text = self.llm_tokenizer.apply_chat_template(
118
- messages, tokenize=False, add_generation_prompt=True
119
- )
120
- model_inputs = self.llm_tokenizer([text], return_tensors="pt").to(self.llm_model.device)
121
-
122
- output_ids = self.llm_model.generate(**model_inputs, max_new_tokens=256)
123
-
124
- input_ids_len = model_inputs.input_ids.shape[1]
125
- decoded_prompts = self.llm_tokenizer.batch_decode(
126
- output_ids[:, input_ids_len:], skip_special_tokens=True
127
- )
128
- return decoded_prompts[0].strip()
129
-
130
- # --- Singleton Instantiation ---
131
- try:
132
- deformes3d_thinker_singleton = Deformes3DThinker()
133
- except Exception as e:
134
- # A falha já terá sido logada dentro do __init__
135
- deformes3d_thinker_singleton = None
136
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/deformes4D.py DELETED
@@ -1,338 +0,0 @@
1
- # engineers/deformes4D.py
2
- #
3
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
4
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
5
- #
6
- # Contato:
7
- # Carlos Rodrigues dos Santos
8
- # carlex22@gmail.com
9
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
10
- #
11
- # Repositórios e Projetos Relacionados:
12
- # GitHub: https://github.com/carlex22/Aduc-sdr
13
- #
14
- # This program is free software: you can redistribute it and/or modify
15
- # it under the terms of the GNU Affero General Public License as published by
16
- # the Free Software Foundation, either version 3 of the License, or
17
- # (at your option) any later version.
18
- #
19
- # This program is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
- # GNU Affero General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Affero General Public License
25
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
26
- #
27
- # This program is free software: you can redistribute it and/or modify
28
- # it under the terms of the GNU Affero General Public License...
29
- # PENDING PATENT NOTICE: Please see NOTICE.md.
30
- #
31
- # Version 2.0.1
32
-
33
- import os
34
- import time
35
- import imageio
36
- import numpy as np
37
- import torch
38
- import logging
39
- from PIL import Image, ImageOps
40
- from dataclasses import dataclass
41
- import gradio as gr
42
- import subprocess
43
- import gc
44
- import shutil
45
- from pathlib import Path
46
- from typing import List, Tuple, Generator, Dict, Any
47
-
48
- from aduc_types import LatentConditioningItem
49
- from managers.ltx_manager import ltx_manager_singleton
50
- from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
51
- from managers.vae_manager import vae_manager_singleton
52
- from engineers.deformes2D_thinker import deformes2d_thinker_singleton
53
- from managers.seedvr_manager import seedvr_manager_singleton
54
- from managers.mmaudio_manager import mmaudio_manager_singleton
55
- from tools.video_encode_tool import video_encode_tool_singleton
56
-
57
- logger = logging.getLogger(__name__)
58
-
59
- class Deformes4DEngine:
60
- """
61
- Implements the Camera (Ψ) and Distiller (Δ) of the ADUC-SDR architecture.
62
- Orchestrates the generation, latent post-production, and final rendering of video fragments.
63
- """
64
- def __init__(self, workspace_dir="deformes_workspace"):
65
- self.workspace_dir = workspace_dir
66
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
67
- logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
68
- os.makedirs(self.workspace_dir, exist_ok=True)
69
-
70
- # --- HELPER METHODS ---
71
-
72
- def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
73
- """Saves a pixel-space tensor as an MP4 video file."""
74
- if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
75
- video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
76
- video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
77
- video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
78
- with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
79
- for frame in video_np: writer.append_data(frame)
80
-
81
- def read_video_to_tensor(self, video_path: str) -> torch.Tensor:
82
- """Reads a video file and converts it into a pixel-space tensor."""
83
- with imageio.get_reader(video_path, 'ffmpeg') as reader:
84
- frames = [frame for frame in reader]
85
-
86
- frames_np = np.stack(frames, axis=0).astype(np.float32) / 255.0
87
- # (F, H, W, C) -> (C, F, H, W)
88
- tensor = torch.from_numpy(frames_np).permute(3, 0, 1, 2)
89
- tensor = tensor.unsqueeze(0) # (B, C, F, H, W)
90
- tensor = (tensor * 2.0) - 1.0 # Normalize to [-1, 1]
91
- return tensor.to(self.device)
92
-
93
- def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
94
- """Resizes and fits an image to the target resolution for VAE encoding."""
95
- if image.size != target_resolution:
96
- return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
97
- return image
98
-
99
- def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
100
- """Converts a PIL Image to a latent tensor by calling the VaeManager."""
101
- image_np = np.array(pil_image).astype(np.float32) / 255.0
102
- tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
103
- tensor = (tensor * 2.0) - 1.0
104
- return vae_manager_singleton.encode(tensor)
105
-
106
- # --- CORE ADUC-SDR LOGIC ---
107
-
108
- def generate_original_movie(self, keyframes: list, global_prompt: str, storyboard: list,
109
- seconds_per_fragment: float, trim_percent: int,
110
- handler_strength: float, destination_convergence_strength: float,
111
- video_resolution: int, use_continuity_director: bool,
112
- guidance_scale: float, stg_scale: float, num_inference_steps: int,
113
- progress: gr.Progress = gr.Progress()):
114
- FPS = 24
115
- FRAMES_PER_LATENT_CHUNK = 8
116
- LATENT_PROCESSING_CHUNK_SIZE = 4
117
-
118
- run_timestamp = int(time.time())
119
- temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
120
- temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
121
- os.makedirs(temp_latent_dir, exist_ok=True)
122
- os.makedirs(temp_video_clips_dir, exist_ok=True)
123
-
124
- total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
125
- frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
126
- latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
127
-
128
- #if frames_a_podar % 2 == 0:
129
- # frames_a_podar = frames_a_podar-1
130
-
131
- total_latent_frames = total_frames_brutos // FRAMES_PER_LATENT_CHUNK
132
-
133
- DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
134
- DESTINATION_FRAME_TARGET = total_frames_brutos - 1
135
-
136
- base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps, "rescaling_scale": 0.15, "image_cond_noise_scale": 0.00}
137
- keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
138
- story_history = ""
139
- target_resolution_tuple = (video_resolution, video_resolution)
140
- eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
141
- latent_fragment_paths = []
142
-
143
- if len(keyframe_paths) < 2: raise gr.Error(f"Generation requires at least 2 keyframes. You provided {len(keyframe_paths)}.")
144
- num_transitions_to_generate = len(keyframe_paths) - 1
145
-
146
- logger.info("--- STARTING STAGE 1: Latent Fragment Generation ---")
147
- for i in range(num_transitions_to_generate):
148
- fragment_index = i + 1
149
- progress(i / num_transitions_to_generate, desc=f"Generating Latent {fragment_index}/{num_transitions_to_generate}")
150
- past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
151
- start_keyframe_path = keyframe_paths[i]
152
- destination_keyframe_path = keyframe_paths[i + 1]
153
- future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "The final scene."
154
- logger.info(f"Calling deformes2D_thinker to generate cinematic decision for fragment {fragment_index}...")
155
- decision = deformes2d_thinker_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "The beginning.", storyboard[i], future_story_prompt)
156
- transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
157
- story_history += f"\n- Act {fragment_index}: {motion_prompt}"
158
-
159
- conditioning_items = []
160
- if eco_latent_for_next_loop is None:
161
- img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
162
- conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_start), 0, 1.0))
163
- else:
164
- conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
165
- conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
166
-
167
- if transition_type == "cutx":
168
- logger.info(f"Cinematic Director chose a 'cut'. Creating FFmpeg transition bridge...")
169
- bridge_duration_seconds = FRAMES_PER_LATENT_CHUNK / FPS
170
- bridge_video_path = video_encode_tool_singleton.create_transition_bridge(
171
- start_image_path=start_keyframe_path, end_image_path=destination_keyframe_path,
172
- duration=bridge_duration_seconds, fps=FPS, target_resolution=target_resolution_tuple,
173
- workspace_dir=self.workspace_dir
174
- )
175
- bridge_pixel_tensor = self.read_video_to_tensor(bridge_video_path)
176
- bridge_latent_tensor = vae_manager_singleton.encode(bridge_pixel_tensor)
177
- final_fade_latent = bridge_latent_tensor[:, :, -2:, :, :]
178
- conditioning_items.append(LatentConditioningItem(final_fade_latent, total_latent_frames - 16, 0.95))
179
- #img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
180
- #conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength * 0.5))
181
- del bridge_pixel_tensor, bridge_latent_tensor, final_fade_latent
182
- if os.path.exists(bridge_video_path): os.remove(bridge_video_path)
183
- else:
184
- img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
185
- conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
186
-
187
- current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
188
- logger.info(f"Calling LTX to generate video latents for fragment {fragment_index} ({total_frames_brutos} frames)...")
189
- latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
190
- num_latent_frames = latents_brutos.shape[2]
191
- logger.info(f"LTX responded with a latent tensor of shape {latents_brutos.shape}, representing ~{num_latent_frames * 8 + 1} video frames at {FPS} FPS.")
192
-
193
- last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
194
- eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
195
- dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
196
- latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
197
- latents_video = latents_video[:, :, 1:, :, :]
198
- del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
199
-
200
- if transition_type == "cutx":
201
- eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
202
-
203
-
204
- cpu_latent = latents_video.cpu()
205
- latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
206
- torch.save(cpu_latent, latent_path)
207
- latent_fragment_paths.append(latent_path)
208
- del latents_video, cpu_latent; gc.collect()
209
- del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache()
210
-
211
- logger.info(f"--- STARTING STAGE 2: Processing {len(latent_fragment_paths)} latents in chunks of {LATENT_PROCESSING_CHUNK_SIZE} ---")
212
- final_video_clip_paths = []
213
- num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE)
214
- for i in range(num_chunks):
215
- chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
216
- chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
217
- chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
218
- progress(i / num_chunks, desc=f"Processing & Decoding Batch {i+1}/{num_chunks}")
219
- tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
220
- tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
221
- sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
222
- del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
223
- logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
224
- base_name = f"clip_{i:04d}_{run_timestamp}"
225
- current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
226
- pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
227
- self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
228
- del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
229
- final_video_clip_paths.append(current_clip_path)
230
-
231
- progress(0.98, desc="Final assembly of clips...")
232
- final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4")
233
- video_encode_tool_singleton.concatenate_videos(video_paths=final_video_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir)
234
- logger.info("Cleaning up temporary clip files...")
235
- try:
236
- shutil.rmtree(temp_video_clips_dir)
237
- except OSError as e:
238
- logger.warning(f"Could not remove temporary clip directory: {e}")
239
- logger.info(f"Process complete! Original video saved to: {final_video_path}")
240
- return {"final_path": final_video_path, "latent_paths": latent_fragment_paths}
241
-
242
- def upscale_latents_and_create_video(self, latent_paths: list, chunk_size: int, progress: gr.Progress):
243
- if not latent_paths:
244
- raise gr.Error("Cannot perform upscaling: no latent paths were provided.")
245
- logger.info("--- STARTING POST-PRODUCTION: Latent Upscaling ---")
246
- run_timestamp = int(time.time())
247
- temp_upscaled_clips_dir = os.path.join(self.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
248
- os.makedirs(temp_upscaled_clips_dir, exist_ok=True)
249
- final_upscaled_clip_paths = []
250
- num_chunks = -(-len(latent_paths) // chunk_size)
251
- for i in range(num_chunks):
252
- chunk_start_index = i * chunk_size
253
- chunk_end_index = chunk_start_index + chunk_size
254
- chunk_paths = latent_paths[chunk_start_index:chunk_end_index]
255
- progress(i / num_chunks, desc=f"Upscaling & Decoding Batch {i+1}/{num_chunks}")
256
- tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
257
- tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
258
- sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
259
- del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
260
- logger.info(f"Batch {i+1} loaded. Original latent shape: {sub_group_latent.shape}")
261
- upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
262
- del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
263
- logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
264
- pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
265
- del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
266
- base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
267
- current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
268
- self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
269
- final_upscaled_clip_paths.append(current_clip_path)
270
- del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
271
- logger.info(f"Saved upscaled clip: {Path(current_clip_path).name}")
272
- progress(0.98, desc="Assembling upscaled clips...")
273
- final_video_path = os.path.join(self.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
274
- video_encode_tool_singleton.concatenate_videos(video_paths=final_upscaled_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir)
275
- logger.info("Cleaning up temporary upscaled clip files...")
276
- try:
277
- shutil.rmtree(temp_upscaled_clips_dir)
278
- except OSError as e:
279
- logger.warning(f"Could not remove temporary upscaled clip directory: {e}")
280
- logger.info(f"Latent upscaling complete! Final video at: {final_video_path}")
281
- yield {"final_path": final_video_path}
282
-
283
- def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
284
- logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
285
- progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
286
- run_timestamp = int(time.time())
287
- output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{model_version}_{run_timestamp}.mp4")
288
- try:
289
- final_path = seedvr_manager_singleton.process_video(
290
- input_video_path=source_video_path,
291
- output_video_path=output_path,
292
- prompt=prompt,
293
- model_version=model_version,
294
- steps=steps,
295
- progress=progress
296
- )
297
- logger.info(f"HD Mastering complete! Final video at: {final_path}")
298
- yield {"final_path": final_path}
299
- except Exception as e:
300
- logger.error(f"HD Mastering failed: {e}", exc_info=True)
301
- raise gr.Error(f"HD Mastering failed. Details: {e}")
302
-
303
- def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
304
- logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
305
- progress(0.1, desc="Preparing for audio generation...")
306
- run_timestamp = int(time.time())
307
- source_name = Path(source_video_path).stem
308
- output_path = os.path.join(self.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
309
- try:
310
- result = subprocess.run(
311
- ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
312
- capture_output=True, text=True, check=True)
313
- duration = float(result.stdout.strip())
314
- logger.info(f"Source video duration: {duration:.2f} seconds.")
315
- progress(0.5, desc="Generating audio track...")
316
- final_path = mmaudio_manager_singleton.generate_audio_for_video(
317
- video_path=source_video_path,
318
- prompt=audio_prompt,
319
- duration_seconds=duration,
320
- output_path_override=output_path
321
- )
322
- logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
323
- progress(1.0, desc="Audio generation complete!")
324
- yield {"final_path": final_path}
325
- except Exception as e:
326
- logger.error(f"Audio generation failed: {e}", exc_info=True)
327
- raise gr.Error(f"Audio generation failed. Details: {e}")
328
-
329
- def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
330
- """Internal helper to call the LTX manager."""
331
- final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
332
- return ltx_manager_singleton.generate_latent_fragment(**final_ltx_params)
333
-
334
- def _quantize_to_multiple(self, n, m):
335
- """Helper to round n to the nearest multiple of m."""
336
- if m == 0: return n
337
- quantized = int(round(n / m) * m)
338
- return m if n > 0 and quantized == 0 else quantized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engineers/deformes7D.py DELETED
@@ -1,316 +0,0 @@
1
- # engineers/deformes7D.py
2
- #
3
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
4
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
5
- #
6
- # Contato:
7
- # Carlos Rodrigues dos Santos
8
- # carlex22@gmail.com
9
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
10
- #
11
- # Repositórios e Projetos Relacionados:
12
- # GitHub: https://github.com/carlex22/Aduc-sdr
13
- #
14
- # This program is free software: you can redistribute it and/or modify
15
- # it under the terms of the GNU Affero General Public License as published by
16
- # the Free Software Foundation, either version 3 of the License, or
17
- # (at your option) any later version.
18
- #
19
- # This program is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
- # GNU Affero General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Affero General Public License
25
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
26
- #
27
- # This program is free software: you can redistribute it and/or modify
28
- # it under the terms of the GNU Affero General Public License...
29
- # PENDING PATENT NOTICE: Please see NOTICE.md.
30
- #
31
- # Version 3.2.1
32
-
33
- import os
34
- import time
35
- import imageio
36
- import numpy as np
37
- import torch
38
- import logging
39
- from PIL import Image, ImageOps
40
- import gradio as gr
41
- import subprocess
42
- import gc
43
- import yaml
44
- import shutil
45
- from pathlib import Path
46
- from typing import List, Tuple, Dict, Generator
47
-
48
- from aduc_types import LatentConditioningItem
49
- from managers.ltx_manager import ltx_manager_singleton
50
- from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
51
- from managers.vae_manager import vae_manager_singleton
52
- from engineers.deformes2D_thinker import deformes2d_thinker_singleton
53
- from engineers.deformes3D_thinker import deformes3d_thinker_singleton
54
- from managers.seedvr_manager import seedvr_manager_singleton
55
- from managers.mmaudio_manager import mmaudio_manager_singleton
56
- from tools.video_encode_tool import video_encode_tool_singleton
57
-
58
- logger = logging.getLogger(__name__)
59
-
60
- class Deformes7DEngine:
61
- # ... (todo o corpo da classe permanece exatamente o mesmo da nossa última versão) ...
62
- """
63
- Unified 3D/4D engine for continuous, interleaved generation of keyframes and video fragments.
64
- """
65
- def __init__(self, workspace_dir="deformes_workspace"):
66
- self.workspace_dir = workspace_dir
67
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
68
- logger.info("Deformes7D Unified Engine initialized.")
69
- os.makedirs(self.workspace_dir, exist_ok=True)
70
-
71
- # --- HELPER METHODS ---
72
- def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
73
- """Saves a pixel-space tensor as an MP4 video file."""
74
- if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
75
- video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
76
- video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
77
- video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
78
- with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
79
- for frame in video_np: writer.append_data(frame)
80
-
81
- def read_video_to_tensor(self, video_path: str) -> torch.Tensor:
82
- """Reads a video file and converts it into a pixel-space tensor."""
83
- with imageio.get_reader(video_path, 'ffmpeg') as reader:
84
- frames = [frame for frame in reader]
85
- frames_np = np.stack(frames, axis=0).astype(np.float32) / 255.0
86
- tensor = torch.from_numpy(frames_np).permute(3, 0, 1, 2)
87
- tensor = tensor.unsqueeze(0)
88
- tensor = (tensor * 2.0) - 1.0
89
- return tensor.to(self.device)
90
-
91
- def _preprocess_image(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
92
- if image.size != target_resolution:
93
- return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
94
- return image
95
-
96
- def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
97
- image_np = np.array(pil_image).astype(np.float32) / 255.0
98
- tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
99
- return (tensor * 2.0) - 1.0
100
-
101
- def _save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
102
- tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
103
- tensor_hwc = tensor_chw.permute(1, 2, 0)
104
- tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
105
- image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
106
- Image.fromarray(image_np).save(path)
107
-
108
- def _quantize_to_multiple(self, n, m):
109
- if m == 0: return n
110
- quantized = int(round(n / m) * m)
111
- return m if n > 0 and quantized == 0 else quantized
112
-
113
- # --- CORE GENERATION LOGIC ---
114
- def _generate_next_causal_keyframe(self, base_keyframe_path: str, all_ref_paths: list,
115
- prompt: str, resolution_tuple: tuple) -> Tuple[str, torch.Tensor]:
116
- # (código interno deste método permanece o mesmo)
117
- ltx_context_paths = [base_keyframe_path] + [p for p in all_ref_paths if p != base_keyframe_path][:3]
118
- ltx_conditioning_items = []
119
- weight = 1.0
120
- for path in ltx_context_paths:
121
- img_pil = Image.open(path).convert("RGB")
122
- img_processed = self._preprocess_image(img_pil, resolution_tuple)
123
- pixel_tensor = self._pil_to_pixel_tensor(img_processed)
124
- latent_tensor = vae_manager_singleton.encode(pixel_tensor)
125
- ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
126
- if weight == 1.0: weight = -0.2
127
- else: weight -= 0.2
128
- ltx_base_params = {"guidance_scale": 3.0, "stg_scale": 0.1, "num_inference_steps": 25}
129
- generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
130
- height=resolution_tuple[0], width=resolution_tuple[1],
131
- conditioning_items_data=ltx_conditioning_items, motion_prompt=prompt,
132
- video_total_frames=48, video_fps=24, **ltx_base_params
133
- )
134
- final_latent = generated_latents[:, :, -1:, :, :]
135
- upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
136
- pixel_tensor_out = vae_manager_singleton.decode(upscaled_latent)
137
- timestamp = int(time.time() * 1000)
138
- output_path = os.path.join(self.workspace_dir, f"keyframe_{timestamp}.png")
139
- self._save_image_from_tensor(pixel_tensor_out, output_path)
140
- return output_path, final_latent
141
-
142
- def generate_full_movie_interleaved(self, initial_ref_paths: list, storyboard: list, global_prompt: str,
143
- video_resolution: int, seconds_per_fragment: float, trim_percent: int,
144
- handler_strength: float, dest_strength: float, ltx_params: dict,
145
- progress=gr.Progress()):
146
- # (código interno deste método permanece o mesmo)
147
- logger.info("--- DEFORMES 7D: INITIATING INTERLEAVED RENDERING PIPELINE ---")
148
- run_timestamp = int(time.time())
149
- temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
150
- os.makedirs(temp_video_clips_dir, exist_ok=True)
151
- FPS = 24
152
- FRAMES_PER_LATENT_CHUNK = 8
153
- resolution_tuple = (video_resolution, video_resolution)
154
- generated_keyframe_paths, generated_keyframe_latents, generated_video_fragment_paths = [], [], []
155
- progress(0, desc="Bootstrap: Processing K0...")
156
- k0_path = initial_ref_paths[0]
157
- k0_pil = Image.open(k0_path).convert("RGB")
158
- k0_processed_pil = self._preprocess_image(k0_pil, resolution_tuple)
159
- k0_pixel_tensor = self._pil_to_pixel_tensor(k0_processed_pil)
160
- k0_latent = vae_manager_singleton.encode(k0_pixel_tensor)
161
- generated_keyframe_paths.append(k0_path)
162
- generated_keyframe_latents.append(k0_latent)
163
- progress(0.01, desc="Bootstrap: Generating K1...")
164
- prompt_k1 = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
165
- global_prompt, "Initial scene.", storyboard[0], storyboard[1], k0_path, initial_ref_paths
166
- )
167
- k1_path, k1_latent = self._generate_next_causal_keyframe(k0_path, initial_ref_paths, prompt_k1, resolution_tuple)
168
- generated_keyframe_paths.append(k1_path)
169
- generated_keyframe_latents.append(k1_latent)
170
- story_history = ""
171
- eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
172
- num_transitions = len(storyboard) - 1
173
- base_4d_ltx_params = {"rescaling_scale": 0.15, "image_cond_noise_scale": 0.00, **ltx_params}
174
-
175
- for i in range(1, num_transitions):
176
- act_progress = i / num_transitions
177
- progress(act_progress, desc=f"Processing Act {i+1}/{num_transitions} (Keyframe Gen)...")
178
- logger.info(f"--> Step 3D: Generating Keyframe K{i+1}")
179
- kx_path = generated_keyframe_paths[i]
180
- prompt_ky = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
181
- global_prompt, story_history, storyboard[i], storyboard[i+1], kx_path, initial_ref_paths
182
- )
183
- ky_path, ky_latent = self._generate_next_causal_keyframe(kx_path, initial_ref_paths, prompt_ky, resolution_tuple)
184
- generated_keyframe_paths.append(ky_path)
185
- generated_keyframe_latents.append(ky_latent)
186
- progress(act_progress, desc=f"Processing Act {i+1}/{num_transitions} (Video Gen)...")
187
- logger.info(f"--> Step 4D: Generating Video Fragment V{i-1}")
188
- kb_path, kx_path, ky_path = generated_keyframe_paths[i-1], generated_keyframe_paths[i], generated_keyframe_paths[i+1]
189
- motion_prompt = deformes3d_thinker_singleton.get_enhanced_motion_prompt(
190
- global_prompt, story_history, kb_path, kx_path, ky_path,
191
- storyboard[i-1], storyboard[i], storyboard[i+1]
192
- )
193
- transition_type = "continuous"
194
- story_history += f"\n- Act {i}: {motion_prompt}"
195
- total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
196
- frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
197
- latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
198
- DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
199
- DESTINATION_FRAME_TARGET = total_frames_brutos - 1
200
- conditioning_items = []
201
- if eco_latent_for_next_loop is None:
202
- conditioning_items.append(LatentConditioningItem(generated_keyframe_latents[i], 0, 1.0))
203
- else:
204
- conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
205
- conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
206
- if transition_type != "cut":
207
- conditioning_items.append(LatentConditioningItem(ky_latent, DESTINATION_FRAME_TARGET, dest_strength))
208
- fragment_latents_brutos, _ = ltx_manager_singleton.generate_latent_fragment(
209
- height=video_resolution, width=video_resolution,
210
- conditioning_items_data=conditioning_items, motion_prompt=motion_prompt,
211
- video_total_frames=total_frames_brutos, video_fps=FPS, **base_4d_ltx_params
212
- )
213
- last_trim = fragment_latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
214
- eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
215
- dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
216
- final_fragment_latents = fragment_latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
217
- final_fragment_latents = final_fragment_latents[:, :, 1:, :, :]
218
- pixel_tensor = vae_manager_singleton.decode(final_fragment_latents)
219
- fragment_path = os.path.join(temp_video_clips_dir, f"fragment_{i-1}.mp4")
220
- self.save_video_from_tensor(pixel_tensor, fragment_path, fps=FPS)
221
- generated_video_fragment_paths.append(fragment_path)
222
- logger.info(f"Video Fragment V{i-1} saved to {fragment_path}")
223
-
224
- logger.info("--- Final Assembly of Video Fragments ---")
225
- final_video_path = os.path.join(self.workspace_dir, f"movie_7D_{run_timestamp}.mp4")
226
- video_encode_tool_singleton.concatenate_videos(generated_video_fragment_paths, final_video_path, self.workspace_dir)
227
- shutil.rmtree(temp_video_clips_dir)
228
- logger.info(f"Full movie generated at: {final_video_path}")
229
- return {"final_path": final_video_path, "all_keyframes": generated_keyframe_paths, "latent_paths": "NOT_IMPLEMENTED_YET"}
230
-
231
- # --- POST-PRODUCTION METHODS ---
232
- def task_run_latent_upscaling(self, latent_paths: list, chunk_size: int, progress: gr.Progress) -> Generator[Dict[str, any], None, None]:
233
- # (código interno deste método permanece o mesmo)
234
- if not latent_paths:
235
- raise gr.Error("Cannot perform upscaling: no latent paths were provided from the main generation.")
236
- logger.info("--- POST-PRODUCTION: Latent Upscaling ---")
237
- run_timestamp = int(time.time())
238
- temp_upscaled_clips_dir = os.path.join(self.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
239
- os.makedirs(temp_upscaled_clips_dir, exist_ok=True)
240
- final_upscaled_clip_paths = []
241
- num_chunks = -(-len(latent_paths) // chunk_size)
242
- for i in range(num_chunks):
243
- chunk_start_index = i * chunk_size
244
- chunk_end_index = chunk_start_index + chunk_size
245
- chunk_paths = latent_paths[chunk_start_index:chunk_end_index]
246
- progress(i / num_chunks, desc=f"Upscaling & Decoding Batch {i+1}/{num_chunks}")
247
- tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
248
- tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
249
- sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
250
- del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
251
- upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
252
- del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
253
- pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
254
- del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
255
- base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
256
- current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
257
- self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
258
- final_upscaled_clip_paths.append(current_clip_path)
259
- del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
260
- progress(0.98, desc="Assembling upscaled clips...")
261
- final_video_path = os.path.join(self.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
262
- video_encode_tool_singleton.concatenate_videos(video_paths=final_upscaled_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir)
263
- shutil.rmtree(temp_upscaled_clips_dir)
264
- logger.info(f"Latent upscaling complete! Final video at: {final_video_path}")
265
- yield {"final_path": final_video_path}
266
-
267
- def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
268
- # (código interno deste método permanece o mesmo)
269
- logger.info(f"--- POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
270
- run_timestamp = int(time.time())
271
- output_path = os.path.join(self.workspace_dir, f"{Path(source_video_path).stem}_hd.mp4")
272
- try:
273
- final_path = seedvr_manager_singleton.process_video(
274
- input_video_path=source_video_path, output_video_path=output_path,
275
- prompt=prompt, model_version=model_version, steps=steps, progress=progress
276
- )
277
- yield {"final_path": final_path}
278
- except Exception as e:
279
- logger.error(f"HD Mastering failed: {e}", exc_info=True)
280
- raise gr.Error(f"HD Mastering failed. Details: {e}")
281
-
282
- def generate_audio(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
283
- # (código interno deste método permanece o mesmo)
284
- logger.info(f"--- POST-PRODUCTION: Audio Generation ---")
285
- run_timestamp = int(time.time())
286
- output_path = os.path.join(self.workspace_dir, f"{Path(source_video_path).stem}_audio.mp4")
287
- try:
288
- result = subprocess.run(
289
- ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
290
- capture_output=True, text=True, check=True)
291
- duration = float(result.stdout.strip())
292
- progress(0.5, desc="Generating audio track...")
293
- final_path = mmaudio_manager_singleton.generate_audio_for_video(
294
- video_path=source_video_path, prompt=audio_prompt,
295
- duration_seconds=duration, output_path_override=output_path
296
- )
297
- yield {"final_path": final_path}
298
- except Exception as e:
299
- logger.error(f"Audio generation failed: {e}", exc_info=True)
300
- raise gr.Error(f"Audio generation failed. Details: {e}")
301
-
302
- # --- Singleton Instantiation ---
303
- try:
304
- config_path = Path(__file__).resolve().parent.parent / "config.yaml"
305
- with open(config_path, 'r') as f:
306
- config = yaml.safe_load(f)
307
- WORKSPACE_DIR = config['application']['workspace_dir']
308
- deformes7d_engine_singleton = Deformes7DEngine(workspace_dir=WORKSPACE_DIR)
309
- # <--- INÍCIO DA CORREÇÃO --->
310
- except Exception as e:
311
- # Loga o erro como CRÍTICO, pois a aplicação não pode funcionar sem este motor.
312
- logger.critical(f"CRITICAL: Failed to initialize the Deformes7DEngine singleton from {config_path}: {e}", exc_info=True)
313
- # Relança a exceção para parar a aplicação imediatamente.
314
- # Isso evita o erro 'NoneType' mais tarde e fornece um ponto claro de falha.
315
- raise
316
- # <--- FIM DA CORREÇÃO --->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/LICENSE DELETED
@@ -1,25 +0,0 @@
1
- # Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
2
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
- #
4
- # Contato:
5
- # Carlos Rodrigues dos Santos
6
- # carlex22@gmail.com
7
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
8
- #
9
- # Repositórios e Projetos Relacionados:
10
- # GitHub: https://github.com/carlex22/Aduc-sdr
11
- # Hugging Face (Ltx-SuperTime-60Secondos): https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/
12
- # Hugging Face (Novinho): https://huggingface.co/spaces/Carlexxx/Novinho/
13
- #
14
- # This program is free software: you can redistribute it and/or modify
15
- # it under the terms of the GNU Affero General Public License as published by
16
- # the Free Software Foundation, either version 3 of the License, or
17
- # (at your option) any later version.
18
- #
19
- # This program is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
- # GNU Affero General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Affero General Public License
25
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/LICENSE.txt DELETED
@@ -1,201 +0,0 @@
1
- Apache License
2
- Version 2.0, January 2004
3
- http://www.apache.org/licenses/
4
-
5
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
-
7
- 1. Definitions.
8
-
9
- "License" shall mean the terms and conditions for use, reproduction,
10
- and distribution as defined by Sections 1 through 9 of this document.
11
-
12
- "Licensor" shall mean the copyright owner or entity authorized by
13
- the copyright owner that is granting the License.
14
-
15
- "Legal Entity" shall mean the union of the acting entity and all
16
- other entities that control, are controlled by, or are under common
17
- control with that entity. For the purposes of this definition,
18
- "control" means (i) the power, direct or indirect, to cause the
19
- direction or management of such entity, whether by contract or
20
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
- outstanding shares, or (iii) beneficial ownership of such entity.
22
-
23
- "You" (or "Your") shall mean an individual or Legal Entity
24
- exercising permissions granted by this License.
25
-
26
- "Source" form shall mean the preferred form for making modifications,
27
- including but not limited to software source code, documentation
28
- source, and configuration files.
29
-
30
- "Object" form shall mean any form resulting from mechanical
31
- transformation or translation of a Source form, including but
32
- not limited to compiled object code, generated documentation,
33
- and conversions to other media types.
34
-
35
- "Work" shall mean the work of authorship, whether in Source or
36
- Object form, made available under the License, as indicated by a
37
- copyright notice that is included in or attached to the work
38
- (an example is provided in the Appendix below).
39
-
40
- "Derivative Works" shall mean any work, whether in Source or Object
41
- form, that is based on (or derived from) the Work and for which the
42
- editorial revisions, annotations, elaborations, or other modifications
43
- represent, as a whole, an original work of authorship. For the purposes
44
- of this License, Derivative Works shall not include works that remain
45
- separable from, or merely link (or bind by name) to the interfaces of,
46
- the Work and Derivative Works thereof.
47
-
48
- "Contribution" shall mean any work of authorship, including
49
- the original version of the Work and any modifications or additions
50
- to that Work or Derivative Works thereof, that is intentionally
51
- submitted to Licensor for inclusion in the Work by the copyright owner
52
- or by an individual or Legal Entity authorized to submit on behalf of
53
- the copyright owner. For the purposes of this definition, "submitted"
54
- means any form of electronic, verbal, or written communication sent
55
- to the Licensor or its representatives, including but not limited to
56
- communication on electronic mailing lists, source code control systems,
57
- and issue tracking systems that are managed by, or on behalf of, the
58
- Licensor for the purpose of discussing and improving the Work, but
59
- excluding communication that is conspicuously marked or otherwise
60
- designated in writing by the copyright owner as "Not a Contribution."
61
-
62
- "Contributor" shall mean Licensor and any individual or Legal Entity
63
- on behalf of whom a Contribution has been received by Licensor and
64
- subsequently incorporated within the Work.
65
-
66
- 2. Grant of Copyright License. Subject to the terms and conditions of
67
- this License, each Contributor hereby grants to You a perpetual,
68
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
- copyright license to reproduce, prepare Derivative Works of,
70
- publicly display, publicly perform, sublicense, and distribute the
71
- Work and such Derivative Works in Source or Object form.
72
-
73
- 3. Grant of Patent License. Subject to the terms and conditions of
74
- this License, each Contributor hereby grants to You a perpetual,
75
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
- (except as stated in this section) patent license to make, have made,
77
- use, offer to sell, sell, import, and otherwise transfer the Work,
78
- where such license applies only to those patent claims licensable
79
- by such Contributor that are necessarily infringed by their
80
- Contribution(s) alone or by combination of their Contribution(s)
81
- with the Work to which such Contribution(s) was submitted. If You
82
- institute patent litigation against any entity (including a
83
- cross-claim or counterclaim in a lawsuit) alleging that the Work
84
- or a Contribution incorporated within the Work constitutes direct
85
- or contributory patent infringement, then any patent licenses
86
- granted to You under this License for that Work shall terminate
87
- as of the date such litigation is filed.
88
-
89
- 4. Redistribution. You may reproduce and distribute copies of the
90
- Work or Derivative Works thereof in any medium, with or without
91
- modifications, and in Source or Object form, provided that You
92
- meet the following conditions:
93
-
94
- (a) You must give any other recipients of the Work or
95
- Derivative Works a copy of this License; and
96
-
97
- (b) You must cause any modified files to carry prominent notices
98
- stating that You changed the files; and
99
-
100
- (c) You must retain, in the Source form of any Derivative Works
101
- that You distribute, all copyright, patent, trademark, and
102
- attribution notices from the Source form of the Work,
103
- excluding those notices that do not pertain to any part of
104
- the Derivative Works; and
105
-
106
- (d) If the Work includes a "NOTICE" text file as part of its
107
- distribution, then any Derivative Works that You distribute must
108
- include a readable copy of the attribution notices contained
109
- within such NOTICE file, excluding those notices that do not
110
- pertain to any part of the Derivative Works, in at least one
111
- of the following places: within a NOTICE text file distributed
112
- as part of the Derivative Works; within the Source form or
113
- documentation, if provided along with the Derivative Works; or,
114
- within a display generated by the Derivative Works, if and
115
- wherever such third-party notices normally appear. The contents
116
- of the NOTICE file are for informational purposes only and
117
- do not modify the License. You may add Your own attribution
118
- notices within Derivative Works that You distribute, alongside
119
- or as an addendum to the NOTICE text from the Work, provided
120
- that such additional attribution notices cannot be construed
121
- as modifying the License.
122
-
123
- You may add Your own copyright statement to Your modifications and
124
- may provide additional or different license terms and conditions
125
- for use, reproduction, or distribution of Your modifications, or
126
- for any such Derivative Works as a whole, provided Your use,
127
- reproduction, and distribution of the Work otherwise complies with
128
- the conditions stated in this License.
129
-
130
- 5. Submission of Contributions. Unless You explicitly state otherwise,
131
- any Contribution intentionally submitted for inclusion in the Work
132
- by You to the Licensor shall be under the terms and conditions of
133
- this License, without any additional terms or conditions.
134
- Notwithstanding the above, nothing herein shall supersede or modify
135
- the terms of any separate license agreement you may have executed
136
- with Licensor regarding such Contributions.
137
-
138
- 6. Trademarks. This License does not grant permission to use the trade
139
- names, trademarks, service marks, or product names of the Licensor,
140
- except as required for reasonable and customary use in describing the
141
- origin of the Work and reproducing the content of the NOTICE file.
142
-
143
- 7. Disclaimer of Warranty. Unless required by applicable law or
144
- agreed to in writing, Licensor provides the Work (and each
145
- Contributor provides its Contributions) on an "AS IS" BASIS,
146
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
- implied, including, without limitation, any warranties or conditions
148
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
- PARTICULAR PURPOSE. You are solely responsible for determining the
150
- appropriateness of using or redistributing the Work and assume any
151
- risks associated with Your exercise of permissions under this License.
152
-
153
- 8. Limitation of Liability. In no event and under no legal theory,
154
- whether in tort (including negligence), contract, or otherwise,
155
- unless required by applicable law (such as deliberate and grossly
156
- negligent acts) or agreed to in writing, shall any Contributor be
157
- liable to You for damages, including any direct, indirect, special,
158
- incidental, or consequential damages of any character arising as a
159
- result of this License or out of the use or inability to use the
160
- Work (including but not limited to damages for loss of goodwill,
161
- work stoppage, computer failure or malfunction, or any and all
162
- other commercial damages or losses), even if such Contributor
163
- has been advised of the possibility of such damages.
164
-
165
- 9. Accepting Warranty or Additional Liability. While redistributing
166
- the Work or Derivative Works thereof, You may choose to offer,
167
- and charge a fee for, acceptance of support, warranty, indemnity,
168
- or other liability obligations and/or rights consistent with this
169
- License. However, in accepting such obligations, You may act only
170
- on Your own behalf and on Your sole responsibility, not on behalf
171
- of any other Contributor, and only if You agree to indemnify,
172
- defend, and hold each Contributor harmless for any liability
173
- incurred by, or claims asserted against, such Contributor by reason
174
- of your accepting any such warranty or additional liability.
175
-
176
- END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright [yyyy] [name of copyright owner]
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/NOTICE.md DELETED
@@ -1,60 +0,0 @@
1
- # NOTICE
2
-
3
- Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
4
-
5
- ---
6
-
7
- ## Aviso de Propriedade Intelectual e Licenciamento
8
-
9
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
10
-
11
- O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
12
-
13
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
14
-
15
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
16
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
17
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
18
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
19
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
20
-
21
- ### **Reconhecimento e Implicações (EM PORTUGUÊS):**
22
-
23
- Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
24
-
25
- 1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
26
- 2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
27
- 3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
28
-
29
- ---
30
-
31
- ### **Patent Pending (IN ENGLISH):**
32
-
33
- The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
34
-
35
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
36
-
37
- * Fragmentation and scaling of requests exceeding AI model context limits.
38
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
39
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
40
- * Cost, latency, and quality-aware planning and routing.
41
- * The use of "universal tokens" for model-agnostic communication.
42
-
43
- ### **Acknowledgement and Implications (IN ENGLISH):**
44
-
45
- By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
46
-
47
- 1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
48
- 2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
49
- 3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
50
-
51
- ---
52
-
53
-
54
- **Contato para Consultas:**
55
-
56
- Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
57
-
58
- Carlos Rodrigues dos Santos
59
- carlex22@gmail.com
60
- Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/README.md DELETED
@@ -1,156 +0,0 @@
1
- # 🛠️ managers/ - Ferramentas de IA de Terceiros para orquestração ADUC-SDR
2
-
3
- Esta pasta contém implementações adaptadas de modelos e utilitários de IA de terceiros, que servem como "especialistas" ou "ferramentas" de baixo nível para a arquitetura ADUC-SDR.
4
-
5
- **IMPORTANTE:** O conteúdo desta pasta é de autoria de seus respectivos idealizadores e desenvolvedores originais. Esta pasta **NÃO FAZ PARTE** do projeto principal ADUC-SDR em termos de sua arquitetura inovadora. Ela serve como um repositório para as **dependências diretas e modificadas** que os `Deformes enginers` (os estágios do "foguete" ADUC-SDR) invocam para realizar tarefas específicas (geração de imagem, vídeo, áudio).
6
-
7
- As modificações realizadas nos arquivos aqui presentes visam principalmente:
8
- 1. **Adaptação de Interfaces:** Padronizar as interfaces para que se encaixem no fluxo de orquestração do ADUC-SDR.
9
- 2. **Gerenciamento de Recursos:** Integrar lógicas de carregamento/descarregamento de modelos (GPU management) e configurações via arquivos YAML.
10
- 3. **Otimização de Fluxo:** Ajustar as pipelines para aceitar formatos de entrada mais eficientes (ex: tensores pré-codificados em vez de caminhos de mídia, pulando etapas de codificação/decodificação redundantes).
11
-
12
- ---
13
-
14
- ## 📄 Licenciamento
15
-
16
- O conteúdo original dos projetos listados abaixo é licenciado sob a **Licença Apache 2.0**, ou outra licença especificada pelos autores originais. Todas as modificações e o uso desses arquivos dentro da estrutura `helpers/` do projeto ADUC-SDR estão em conformidade com os termos da **Licença Apache 2.0**.
17
-
18
- As licenças originais dos projetos podem ser encontradas nas suas respectivas fontes ou nos subdiretórios `incl_licenses/` dentro de cada módulo adaptado.
19
-
20
- ---
21
-
22
- ## 🛠️ API dos Helpers e Guia de Uso
23
-
24
- Esta seção detalha como cada helper (agente especialista) deve ser utilizado dentro do ecossistema ADUC-SDR. Todos os agentes são instanciados como **singletons** no `hardware_manager.py` para garantir o gerenciamento centralizado de recursos de GPU.
25
-
26
- ### **gemini_helpers.py (GeminiAgent)**
27
-
28
- * **Propósito:** Atua como o "Oráculo de Síntese Adaptativo", responsável por todas as tarefas de processamento de linguagem natural, como criação de storyboards, geração de prompts, e tomada de decisões narrativas.
29
- * **Singleton Instance:** `gemini_agent_singleton`
30
- * **Construtor:** `GeminiAgent()`
31
- * Lê `configs/gemini_config.yaml` para obter o nome do modelo, parâmetros de inferência e caminhos de templates de prompt. A chave da API é lida da variável de ambiente `GEMINI_API_KEY`.
32
- * **Métodos Públicos:**
33
- * `generate_storyboard(prompt: str, num_keyframes: int, ref_image_paths: list[str])`
34
- * **Inputs:**
35
- * `prompt`: A ideia geral do filme (string).
36
- * `num_keyframes`: O número de cenas a serem geradas (int).
37
- * `ref_image_paths`: Lista de caminhos para as imagens de referência (list[str]).
38
- * **Output:** `tuple[list[str], str]` (Uma tupla contendo a lista de strings do storyboard e um relatório textual da operação).
39
- * `select_keyframes_from_pool(storyboard: list, base_image_paths: list[str], pool_image_paths: list[str])`
40
- * **Inputs:**
41
- * `storyboard`: A lista de strings do storyboard gerado.
42
- * `base_image_paths`: Imagens de referência base (list[str]).
43
- * `pool_image_paths`: O "banco de imagens" de onde selecionar (list[str]).
44
- * **Output:** `tuple[list[str], str]` (Uma tupla contendo a lista de caminhos de imagens selecionadas e um relatório textual).
45
- * `get_anticipatory_keyframe_prompt(...)`
46
- * **Inputs:** Contexto narrativo e visual para gerar um prompt de imagem.
47
- * **Output:** `tuple[str, str]` (Uma tupla contendo o prompt gerado para o modelo de imagem e um relatório textual).
48
- * `get_initial_motion_prompt(...)`
49
- * **Inputs:** Contexto narrativo e visual para a primeira transição de vídeo.
50
- * **Output:** `tuple[str, str]` (Uma tupla contendo o prompt de movimento gerado e um relatório textual).
51
- * `get_transition_decision(...)`
52
- * **Inputs:** Contexto narrativo e visual para uma transição de vídeo intermediária.
53
- * **Output:** `tuple[dict, str]` (Uma tupla contendo um dicionário `{"transition_type": "...", "motion_prompt": "..."}` e um relatório textual).
54
- * `generate_audio_prompts(...)`
55
- * **Inputs:** Contexto narrativo global.
56
- * **Output:** `tuple[dict, str]` (Uma tupla contendo um dicionário `{"music_prompt": "...", "sfx_prompt": "..."}` e um relatório textual).
57
-
58
- ### **flux_kontext_helpers.py (FluxPoolManager)**
59
-
60
- * **Propósito:** Especialista em geração de imagens de alta qualidade (keyframes) usando a pipeline FluxKontext. Gerencia um pool de workers para otimizar o uso de múltiplas GPUs.
61
- * **Singleton Instance:** `flux_kontext_singleton`
62
- * **Construtor:** `FluxPoolManager(device_ids: list[str], flux_config_file: str)`
63
- * Lê `configs/flux_config.yaml`.
64
- * **Método Público:**
65
- * `generate_image(prompt: str, reference_images: list[Image.Image], width: int, height: int, seed: int = 42, callback: callable = None)`
66
- * **Inputs:**
67
- * `prompt`: Prompt textual para guiar a geração (string).
68
- * `reference_images`: Lista de objetos `PIL.Image` como referência visual.
69
- * `width`, `height`: Dimensões da imagem de saída (int).
70
- * `seed`: Semente para reprodutibilidade (int).
71
- * `callback`: Função de callback opcional para monitorar o progresso.
72
- * **Output:** `PIL.Image.Image` (O objeto da imagem gerada).
73
-
74
- ### **dreamo_helpers.py (DreamOAgent)**
75
-
76
- * **Propósito:** Especialista em geração de imagens de alta qualidade (keyframes) usando a pipeline DreamO, com capacidades avançadas de edição e estilo a partir de referências.
77
- * **Singleton Instance:** `dreamo_agent_singleton`
78
- * **Construtor:** `DreamOAgent(device_id: str = None)`
79
- * Lê `configs/dreamo_config.yaml`.
80
- * **Método Público:**
81
- * `generate_image(prompt: str, reference_images: list[Image.Image], width: int, height: int)`
82
- * **Inputs:**
83
- * `prompt`: Prompt textual para guiar a geração (string).
84
- * `reference_images`: Lista de objetos `PIL.Image` como referência visual. A lógica interna atribui a primeira imagem como `style` e as demais como `ip`.
85
- * `width`, `height`: Dimensões da imagem de saída (int).
86
- * **Output:** `PIL.Image.Image` (O objeto da imagem gerada).
87
-
88
- ### **ltx_manager_helpers.py (LtxPoolManager)**
89
-
90
- * **Propósito:** Especialista na geração de fragmentos de vídeo no espaço latente usando a pipeline LTX-Video. Gerencia um pool de workers para otimizar o uso de múltiplas GPUs.
91
- * **Singleton Instance:** `ltx_manager_singleton`
92
- * **Construtor:** `LtxPoolManager(device_ids: list[str], ltx_model_config_file: str, ltx_global_config_file: str)`
93
- * Lê o `ltx_global_config_file` e o `ltx_model_config_file` para configurar a pipeline.
94
- * **Método Público:**
95
- * `generate_latent_fragment(**kwargs)`
96
- * **Inputs:** Dicionário de keyword arguments (`kwargs`) contendo todos os parâmetros da pipeline LTX, incluindo:
97
- * `height`, `width`: Dimensões do vídeo (int).
98
- * `video_total_frames`: Número total de frames a serem gerados (int).
99
- * `video_fps`: Frames por segundo (int).
100
- * `motion_prompt`: Prompt de movimento (string).
101
- * `conditioning_items_data`: Lista de objetos `LatentConditioningItem` contendo os tensores latentes de condição.
102
- * `guidance_scale`, `stg_scale`, `num_inference_steps`, etc.
103
- * **Output:** `tuple[torch.Tensor, tuple]` (Uma tupla contendo o tensor latente gerado e os valores de padding utilizados).
104
-
105
- ### **mmaudio_helper.py (MMAudioAgent)**
106
-
107
- * **Propósito:** Especialista em geração de áudio para um determinado fragmento de vídeo.
108
- * **Singleton Instance:** `mmaudio_agent_singleton`
109
- * **Construtor:** `MMAudioAgent(workspace_dir: str, device_id: str = None, mmaudio_config_file: str)`
110
- * Lê `configs/mmaudio_config.yaml`.
111
- * **Método Público:**
112
- * `generate_audio_for_video(video_path: str, prompt: str, negative_prompt: str, duration_seconds: float)`
113
- * **Inputs:**
114
- * `video_path`: Caminho para o arquivo de vídeo silencioso (string).
115
- * `prompt`: Prompt textual para guiar a geração de áudio (string).
116
- * `negative_prompt`: Prompt negativo para áudio (string).
117
- * `duration_seconds`: Duração exata do vídeo (float).
118
- * **Output:** `str` (O caminho para o novo arquivo de vídeo com a faixa de áudio integrada).
119
-
120
-
121
- ### **seedvr_helpers.py (SeedVrManager)**
122
-
123
- * **Propósito:** Especialista em pós-produção de vídeo, aplicando super-resolução com IA (`Video Super-Resolution`) para adicionar detalhes finos, nitidez e texturas realistas a um vídeo já renderizado.
124
- * **Singleton Instance:** `seedvr_manager_singleton`
125
- * **Construtor:** `SeedVrManager(workspace_dir: str, device_id: str = None)`
126
- * Lê `configs/seedvr_config.yaml`.
127
- * **Método Público:**
128
- * `process_video(input_video_path: str, output_video_path: str, prompt: str, model_version: str = '7B', steps: int = 100, seed: int = 666)`
129
- * **Inputs:**
130
- * `input_video_path`: Caminho para o vídeo de entrada a ser aprimorado (string).
131
- * `output_video_path`: Caminho onde o vídeo finalizado será salvo (string).
132
- * `prompt`: Um prompt de estilo geral para guiar o aprimoramento (string).
133
- * `model_version`: A versão do modelo a ser usada, '3B' ou '7B' (string).
134
- * `steps`: Número de passos de inferência para o processo de aprimoramento (int).
135
- * `seed`: Semente para reprodutibilidade (int).
136
- * **Output:** `str` (O caminho para o vídeo finalizado em alta definição).
137
-
138
- ---
139
-
140
- ## 🔗 Projetos Originais e Atribuições
141
- (A seção de atribuições e licenças permanece a mesma que definimos anteriormente)
142
-
143
- ### DreamO
144
- * **Repositório Original:** [https://github.com/bytedance/DreamO](https://github.com/bytedance/DreamO)
145
- ...
146
-
147
- ### LTX-Video
148
- * **Repositório Original:** [https://github.com/Lightricks/LTX-Video](https://github.com/Lightricks/LTX-Video)
149
- ...
150
-
151
- ### MMAudio
152
- * **Repositório Original:** [https://github.com/hkchengrex/MMAudio](https://github.com/hkchengrex/MMAudio)
153
- ...
154
-
155
- ### SeedVr
156
- * **Repositório Original:** [https://github.com/ByteDance-Seed/SeedVR](https://github.com/ByteDance-Seed/SeedVR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/__init__.py DELETED
File without changes
managers/config.yaml DELETED
@@ -1,24 +0,0 @@
1
- # config.yaml
2
- # Configuração central para a aplicação Deformes4D e seus especialistas.
3
-
4
- application:
5
- workspace_dir: "deformes_workspace"
6
-
7
- # Configuração para Hugging Face Spaces
8
- sdk: gradio
9
- app_file: app.py
10
-
11
- specialists:
12
- flux:
13
- # Define quantas GPUs o pool do Flux deve tentar alocar.
14
- # Se não houver GPUs suficientes, o hardware_manager lançará um erro.
15
- # Se 0, usará a CPU.
16
- gpus_required: 4
17
-
18
- ltx:
19
- # Define quantas GPUs o pool do LTX deve tentar alocar.
20
- gpus_required: 4
21
-
22
- # Aponta para o arquivo de configuração específico do modelo LTX.
23
- # Alterado para usar o modelo 0.9.8-dev.
24
- config_file: "ltxv-13b-0.9.8-distilled.yaml"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/flux_kontext_manager.py DELETED
@@ -1,165 +0,0 @@
1
- # flux_kontext_helpers.py (ADUC: O Especialista Pintor - com suporte a callback)
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
- #
13
- #
14
- # PENDING PATENT NOTICE: Please see NOTICE.md.
15
- #
16
- # Version 1.0.1
17
-
18
- import torch
19
- from PIL import Image, ImageOps
20
- import gc
21
- from diffusers import FluxKontextPipeline
22
- import huggingface_hub
23
- import os
24
- import threading
25
- import yaml
26
- import logging
27
-
28
- from tools.hardware_manager import hardware_manager
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
- class FluxWorker:
33
- """Representa uma única instância do pipeline FluxKontext em um dispositivo."""
34
- def __init__(self, device_id='cuda:0'):
35
- self.cpu_device = torch.device('cpu')
36
- self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
37
- self.pipe = None
38
- self._load_pipe_to_cpu()
39
-
40
- def _load_pipe_to_cpu(self):
41
- if self.pipe is None:
42
- logger.info(f"FLUX Worker ({self.device}): Carregando modelo para a CPU...")
43
- self.pipe = FluxKontextPipeline.from_pretrained(
44
- "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
45
- ).to(self.cpu_device)
46
- logger.info(f"FLUX Worker ({self.device}): Modelo pronto na CPU.")
47
-
48
- def to_gpu(self):
49
- if self.device.type == 'cpu': return
50
- logger.info(f"FLUX Worker: Movendo modelo para a GPU {self.device}...")
51
- self.pipe.to(self.device)
52
-
53
- def to_cpu(self):
54
- if self.device.type == 'cpu': return
55
- logger.info(f"FLUX Worker: Descarregando modelo da GPU {self.device}...")
56
- self.pipe.to(self.cpu_device)
57
- gc.collect()
58
- if torch.cuda.is_available(): torch.cuda.empty_cache()
59
-
60
- def _create_composite_reference(self, images: list[Image.Image], target_width: int, target_height: int) -> Image.Image:
61
- if not images: return None
62
- valid_images = [img.convert("RGB") for img in images if img is not None]
63
- if not valid_images: return None
64
- if len(valid_images) == 1:
65
- if valid_images[0].size != (target_width, target_height):
66
- return ImageOps.fit(valid_images[0], (target_width, target_height), Image.Resampling.LANCZOS)
67
- return valid_images[0]
68
-
69
- base_height = valid_images[0].height
70
- resized_for_concat = []
71
- for img in valid_images:
72
- if img.height != base_height:
73
- aspect_ratio = img.width / img.height
74
- new_width = int(base_height * aspect_ratio)
75
- resized_for_concat.append(img.resize((new_width, base_height), Image.Resampling.LANCZOS))
76
- else:
77
- resized_for_concat.append(img)
78
-
79
- total_width = sum(img.width for img in resized_for_concat)
80
- concatenated = Image.new('RGB', (total_width, base_height))
81
- x_offset = 0
82
- for img in resized_for_concat:
83
- concatenated.paste(img, (x_offset, 0))
84
- x_offset += img.width
85
-
86
- #final_reference = ImageOps.fit(concatenated, (target_width, target_height), Image.Resampling.LANCZOS)
87
- return concatenated
88
-
89
- @torch.inference_mode()
90
- def generate_image_internal(self, reference_images: list[Image.Image], prompt: str, target_width: int, target_height: int, seed: int, callback: callable = None):
91
- composite_reference = self._create_composite_reference(reference_images, target_width, target_height)
92
-
93
- num_steps = 12 # Valor fixo otimizado
94
-
95
- logger.info(f"\n===== [CHAMADA AO PIPELINE FLUX em {self.device}] =====\n"
96
- f" - Prompt: '{prompt}'\n"
97
- f" - Resolução: {target_width}x{target_height}, Seed: {seed}, Passos: {num_steps}\n"
98
- f" - Nº de Imagens na Composição: {len(reference_images)}\n"
99
- f"==========================================")
100
-
101
- generated_image = self.pipe(
102
- image=composite_reference,
103
- prompt=prompt,
104
- guidance_scale=2.5,
105
- width=target_width,
106
- height=target_height,
107
- num_inference_steps=num_steps,
108
- generator=torch.Generator(device="cpu").manual_seed(seed),
109
- callback_on_step_end=callback,
110
- callback_on_step_end_tensor_inputs=["latents"] if callback else None
111
- ).images[0]
112
-
113
- return generated_image
114
-
115
- class FluxPoolManager:
116
- def __init__(self, device_ids):
117
- logger.info(f"FLUX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
118
- self.workers = [FluxWorker(device_id) for device_id in device_ids]
119
- self.current_worker_index = 0
120
- self.lock = threading.Lock()
121
- self.last_cleanup_thread = None
122
-
123
- def _cleanup_worker_thread(self, worker):
124
- logger.info(f"FLUX CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...")
125
- worker.to_cpu()
126
-
127
- def generate_image(self, reference_images, prompt, width, height, seed=42, callback=None):
128
- worker_to_use = None
129
- try:
130
- with self.lock:
131
- if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
132
- self.last_cleanup_thread.join()
133
- worker_to_use = self.workers[self.current_worker_index]
134
- previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers)
135
- worker_to_cleanup = self.workers[previous_worker_index]
136
- cleanup_thread = threading.Thread(target=self._cleanup_worker_thread, args=(worker_to_cleanup,))
137
- cleanup_thread.start()
138
- self.last_cleanup_thread = cleanup_thread
139
- worker_to_use.to_gpu()
140
- self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
141
-
142
- logger.info(f"FLUX POOL MANAGER: Gerando imagem em {worker_to_use.device}...")
143
- return worker_to_use.generate_image_internal(
144
- reference_images=reference_images,
145
- prompt=prompt,
146
- target_width=width,
147
- target_height=height,
148
- seed=seed,
149
- callback=callback
150
- )
151
- except Exception as e:
152
- logger.error(f"FLUX POOL MANAGER: Erro durante a geração: {e}", exc_info=True)
153
- raise e
154
- finally:
155
- pass
156
-
157
- # --- Instanciação Singleton Dinâmica ---
158
- logger.info("Lendo config.yaml para inicializar o FluxKontext Pool Manager...")
159
- with open("config.yaml", 'r') as f: config = yaml.safe_load(f)
160
- hf_token = os.getenv('HF_TOKEN');
161
- if hf_token: huggingface_hub.login(token=hf_token)
162
- flux_gpus_required = config['specialists']['flux']['gpus_required']
163
- flux_device_ids = hardware_manager.allocate_gpus('Flux', flux_gpus_required)
164
- flux_kontext_singleton = FluxPoolManager(device_ids=flux_device_ids)
165
- logger.info("Especialista de Imagem (Flux) pronto.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/gemini_manager.py DELETED
@@ -1,119 +0,0 @@
1
- # managers/gemini_manager.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
- #
13
- # PENDING PATENT NOTICE: Please see NOTICE.md.
14
- #
15
- # Version: 1.1.1
16
- #
17
- # This file defines the GeminiManager, a specialist responsible for raw communication
18
- # with the Google Gemini API. It acts as a lean API client, handling requests,
19
- # parsing responses, and managing API-level errors. It does not contain any
20
- # high-level prompt engineering or creative logic.
21
-
22
- import os
23
- import logging
24
- import json
25
- from pathlib import Path
26
- import gradio as gr
27
- from PIL import Image
28
- import google.generativeai as genai
29
- import re
30
- from typing import List, Union, Any
31
-
32
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
33
- logger = logging.getLogger(__name__)
34
-
35
- def robust_json_parser(raw_text: str) -> dict:
36
- """
37
- Parses a JSON object from a string that might contain extra text,
38
- such as Markdown code blocks from an LLM's response.
39
- """
40
- clean_text = raw_text.strip()
41
- try:
42
- match = re.search(r'```json\s*(\{.*?\})\s*```', clean_text, re.DOTALL)
43
- if match:
44
- json_str = match.group(1)
45
- return json.loads(json_str)
46
-
47
- start_index = clean_text.find('{')
48
- end_index = clean_text.rfind('}')
49
- if start_index != -1 and end_index != -1 and end_index > start_index:
50
- json_str = clean_text[start_index : end_index + 1]
51
- return json.loads(json_str)
52
- else:
53
- raise ValueError("No valid JSON object could be found in the AI's response.")
54
- except json.JSONDecodeError as e:
55
- logger.error(f"Failed to decode JSON. The AI returned the following text:\n---\n{raw_text}\n---")
56
- raise ValueError(f"The AI returned an invalid JSON format: {e}")
57
-
58
- class GeminiManager:
59
- """
60
- Manages raw interactions with the Google Gemini API.
61
- """
62
- def __init__(self):
63
- self.api_key = os.environ.get("GEMINI_API_KEY")
64
- if self.api_key:
65
- genai.configure(api_key=self.api_key)
66
- self.model = genai.GenerativeModel('gemini-2.5-flash')
67
- logger.info("GeminiManager (Communication Layer) initialized successfully.")
68
- else:
69
- self.model = None
70
- logger.warning("Gemini API key not found. GeminiManager disabled.")
71
-
72
- def _check_model(self):
73
- """Raises an error if the Gemini API is not configured."""
74
- if not self.model:
75
- raise gr.Error("The Google Gemini API key is not configured (GEMINI_API_KEY).")
76
-
77
- def _generate_content(self, prompt_parts: List[Any]) -> str:
78
- """Internal method to make the API call."""
79
- self._check_model()
80
- logger.info("Calling Gemini API...")
81
- response = self.model.generate_content(prompt_parts)
82
- logger.info(f"Gemini responded with raw text: {response.text}")
83
- return response.text
84
-
85
- def get_raw_text(self, prompt_parts: List[Any]) -> str:
86
- """
87
- Sends a prompt to the Gemini API and returns the raw text response.
88
-
89
- Args:
90
- prompt_parts (List[Any]): A list containing strings and/or PIL.Image objects.
91
-
92
- Returns:
93
- str: The raw string response from the API.
94
- """
95
- try:
96
- return self._generate_content(prompt_parts)
97
- except Exception as e:
98
- logger.error(f"Gemini API call failed: {e}", exc_info=True)
99
- raise gr.Error(f"Gemini API communication failed: {e}")
100
-
101
- def get_json_object(self, prompt_parts: List[Any]) -> dict:
102
- """
103
- Sends a prompt to the Gemini API, expects a JSON response, parses it, and returns a dictionary.
104
-
105
- Args:
106
- prompt_parts (List[Any]): A list containing strings and/or PIL.Image objects.
107
-
108
- Returns:
109
- dict: The parsed JSON object from the API response.
110
- """
111
- try:
112
- raw_response = self._generate_content(prompt_parts)
113
- return robust_json_parser(raw_response)
114
- except Exception as e:
115
- logger.error(f"Gemini API call or JSON parsing failed: {e}", exc_info=True)
116
- raise gr.Error(f"Gemini API communication or response parsing failed: {e}")
117
-
118
- # --- Singleton Instance ---
119
- gemini_manager_singleton = GeminiManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/latent_enhancer_manager.py DELETED
@@ -1,109 +0,0 @@
1
- # latent_enhancer_specialist.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
- #
13
- #
14
- # PENDING PATENT NOTICE: Please see NOTICE.md.
15
- #
16
- # Version 1.0.1
17
-
18
- import torch
19
- import logging
20
- import time
21
- from diffusers import LTXLatentUpsamplePipeline
22
- from managers.ltx_manager import ltx_manager_singleton
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
- class LatentEnhancerSpecialist:
27
- """
28
- Especialista responsável por melhorar a qualidade de tensores latentes,
29
- incluindo upscaling espacial e refinamento por denoise.
30
- """
31
- def __init__(self):
32
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
- self.pipe_upsample = None
34
- self.base_vae = None # VAE para o upscaler
35
-
36
- def _lazy_init_upscaler(self):
37
- """Inicializa a pipeline de upscaling apenas quando for usada."""
38
- if self.pipe_upsample is not None:
39
- return
40
- try:
41
- from diffusers.models.autoencoders import AutoencoderKLLTXVideo
42
- self.base_vae = AutoencoderKLLTXVideo.from_pretrained(
43
- "linoyts/LTX-Video-spatial-upscaler-0.9.8",
44
- subfolder="vae",
45
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
46
- ).to(self.device)
47
-
48
- self.pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
49
- "linoyts/LTX-Video-spatial-upscaler-0.9.8",
50
- vae=self.base_vae,
51
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
52
- ).to(self.device)
53
- logger.info("[Enhancer] Pipeline de Upscale carregada com sucesso.")
54
- except Exception as e:
55
- logger.error(f"[Enhancer] Falha ao carregar pipeline de Upscale: {e}")
56
- self.pipe_upsample = None
57
-
58
- @torch.no_grad()
59
- def upscale(self, latents: torch.Tensor) -> torch.Tensor:
60
- """Aplica o upscaling 2x nos tensores latentes fornecidos."""
61
- self._lazy_init_upscaler()
62
- if self.pipe_upsample is None:
63
- logger.warning("[Enhancer] Pipeline de Upscale indisponível. Retornando latentes originais.")
64
- return latents
65
- try:
66
- logger.info(f"[Enhancer] Recebido shape {latents.shape} para Upscale.")
67
- result = self.pipe_upsample(latents=latents, output_type="latent")
68
- output_tensor = result.frames
69
- logger.info(f"[Enhancer] Upscale concluído. Novo shape: {output_tensor.shape}")
70
- return output_tensor
71
- except Exception as e:
72
- logger.error(f"[Enhancer] Erro durante upscale: {e}", exc_info=True)
73
- return latents
74
-
75
- @torch.no_grad()
76
- def refine(self, latents: torch.Tensor, fps: int = 24, **kwargs) -> torch.Tensor:
77
- """
78
- Invoca o LTX Pool Manager para refinar um tensor latente existente.
79
- """
80
- logger.info(f"[Enhancer] Refinando tensor latente com shape {latents.shape}.")
81
-
82
- main_pipeline_vae = ltx_manager_singleton.workers[0].pipeline.vae
83
- video_scale_factor = getattr(main_pipeline_vae.config, 'temporal_scale_factor', 8)
84
-
85
- _, _, num_latent_frames, _, _ = latents.shape
86
-
87
- # --- [CORREÇÃO FINAL E CRÍTICA] ---
88
- # A pipeline de refinamento (vid2vid) espera o número de frames de pixels que CORRESPONDE
89
- # ao latente existente, SEM a lógica do +1 que ela aplicará internamente.
90
- pixel_frames = (num_latent_frames - 1) * video_scale_factor
91
-
92
- final_ltx_params = {
93
- "video_total_frames": pixel_frames,
94
- "video_fps": fps,
95
- "current_fragment_index": int(time.time()),
96
- **kwargs
97
- }
98
-
99
- refined_latents_tensor, _ = ltx_manager_singleton.refine_latents(latents, **final_ltx_params)
100
-
101
- if refined_latents_tensor is None:
102
- logger.warning("[Enhancer] O refinamento falhou. Retornando tensor original não refinado.")
103
- return latents
104
-
105
- logger.info(f"[Enhancer] Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
106
- return refined_latents_tensor
107
-
108
- # --- Singleton Global ---
109
- latent_enhancer_specialist_singleton = LatentEnhancerSpecialist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/ltx_manager.py DELETED
@@ -1,320 +0,0 @@
1
- # managers/ltx_manager.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # PENDING PATENT NOTICE: Please see NOTICE.md.
11
- #
12
- # Version: 2.3.0
13
- #
14
- # This version adds a public property `prompt_enhancement_pipeline` to the manager.
15
- # This allows other specialists, specifically the Deformes3DThinker, to access
16
- # the internal prompt refinement models (captioning and LLM) used by the LTX pipeline,
17
- # ensuring stylistic and logical consistency.
18
-
19
- import torch
20
- import gc
21
- import os
22
- import sys
23
- import yaml
24
- import logging
25
- import huggingface_hub
26
- import time
27
- import threading
28
- import subprocess
29
- from pathlib import Path
30
- from typing import Optional, List, Tuple, Union
31
-
32
- from tools.optimization import optimize_ltx_worker, can_optimize_fp8
33
- from tools.hardware_manager import hardware_manager
34
- from aduc_types import LatentConditioningItem
35
-
36
- logger = logging.getLogger(__name__)
37
-
38
- # --- Dependency Management ---
39
- DEPS_DIR = Path("./deps")
40
- LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
41
- LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
42
-
43
- # --- Placeholders for lazy-loaded modules ---
44
- create_ltx_video_pipeline = None
45
- calculate_padding = None
46
- LTXVideoPipeline = None
47
- ConditioningItem = None
48
- LTXMultiScalePipeline = None
49
- vae_encode = None
50
- latent_to_pixel_coords = None
51
- randn_tensor = None
52
-
53
- class LtxPoolManager:
54
- """
55
- Manages a pool of LtxWorkers and exposes the enhancement pipeline for other specialists.
56
- """
57
- def __init__(self, device_ids, ltx_config_file_name):
58
- logger.info(f"LTX POOL MANAGER: Creating workers for devices: {device_ids}")
59
- self._ltx_modules_loaded = False
60
- self._setup_dependencies()
61
- self._lazy_load_ltx_modules()
62
-
63
- self.ltx_config_file = LTX_VIDEO_REPO_DIR / "configs" / ltx_config_file_name
64
-
65
- self.workers = [LtxWorker(dev_id, self.ltx_config_file) for dev_id in device_ids]
66
- self.current_worker_index = 0
67
- self.lock = threading.Lock()
68
-
69
- # <--- NOVA PROPRIEDADE PARA O DEFORMES3DTHINKER USAR --->
70
- # Expõe a pipeline do primeiro worker. Assumimos que todas são configuradas
71
- # da mesma forma e contêm os mesmos modelos de enhancement.
72
- self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
73
- if self.prompt_enhancement_pipeline:
74
- logger.info("LTX POOL MANAGER: Prompt enhancement pipeline exposed for other specialists.")
75
- # <--- FIM DA NOVA PROPRIEDADE --->
76
-
77
- self._apply_ltx_pipeline_patches()
78
-
79
- if all(w.device.type == 'cuda' for w in self.workers):
80
- logger.info("LTX POOL MANAGER: HOT START MODE ENABLED. Pre-warming all GPUs...")
81
- for worker in self.workers:
82
- worker.to_gpu()
83
- logger.info("LTX POOL MANAGER: All GPUs are hot and ready.")
84
- else:
85
- logger.info("LTX POOL MANAGER: Operating in CPU or mixed mode. GPU pre-warming skipped.")
86
-
87
- # ... (O resto da classe LtxPoolManager, como _setup_dependencies, generate_latent_fragment, etc., permanece exatamente o mesmo) ...
88
-
89
- def _setup_dependencies(self):
90
- """Clones the LTX-Video repo if not found and adds it to the system path."""
91
- if not LTX_VIDEO_REPO_DIR.exists():
92
- logger.info(f"LTX-Video repository not found at '{LTX_VIDEO_REPO_DIR}'. Cloning from GitHub...")
93
- try:
94
- DEPS_DIR.mkdir(exist_ok=True)
95
- subprocess.run(
96
- ["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
97
- check=True, capture_output=True, text=True
98
- )
99
- logger.info("LTX-Video repository cloned successfully.")
100
- except subprocess.CalledProcessError as e:
101
- logger.error(f"Failed to clone LTX-Video repository. Git stderr: {e.stderr}")
102
- raise RuntimeError("Could not clone the required LTX-Video dependency from GitHub.")
103
- else:
104
- logger.info("Found local LTX-Video repository.")
105
-
106
- if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
107
- sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
108
- logger.info(f"Added '{LTX_VIDEO_REPO_DIR.resolve()}' to sys.path.")
109
-
110
- def _lazy_load_ltx_modules(self):
111
- """Dynamically imports LTX-Video modules after ensuring the repo exists."""
112
- if self._ltx_modules_loaded:
113
- return
114
-
115
- global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
116
- global vae_encode, latent_to_pixel_coords, randn_tensor
117
-
118
- from managers.ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
119
- from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
120
- from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
121
- from diffusers.utils.torch_utils import randn_tensor
122
-
123
- self._ltx_modules_loaded = True
124
- logger.info("LTX-Video modules have been dynamically loaded.")
125
-
126
- def _apply_ltx_pipeline_patches(self):
127
- """Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
128
- logger.info("LTX POOL MANAGER: Applying ADUC-SDR patches to LTX pipeline...")
129
- for worker in self.workers:
130
- worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
131
- logger.info("LTX POOL MANAGER: All pipeline instances have been patched successfully.")
132
-
133
- def _get_next_worker(self):
134
- with self.lock:
135
- worker = self.workers[self.current_worker_index]
136
- self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
137
- return worker
138
-
139
- def _prepare_pipeline_params(self, worker: 'LtxWorker', **kwargs) -> dict:
140
- pipeline_params = {
141
- "height": kwargs['height'], "width": kwargs['width'], "num_frames": kwargs['video_total_frames'],
142
- "frame_rate": kwargs.get('video_fps', 24),
143
- "generator": torch.Generator(device=worker.device).manual_seed(int(time.time()) + kwargs.get('current_fragment_index', 0)),
144
- "is_video": True, "vae_per_channel_normalize": True,
145
- "prompt": kwargs.get('motion_prompt', ""), "negative_prompt": kwargs.get('negative_prompt', "blurry, distorted, static, bad quality"),
146
- "guidance_scale": kwargs.get('guidance_scale', 1.0), "stg_scale": kwargs.get('stg_scale', 0.0),
147
- "rescaling_scale": kwargs.get('rescaling_scale', 0.15), "num_inference_steps": kwargs.get('num_inference_steps', 20),
148
- "output_type": "latent"
149
- }
150
- if 'latents' in kwargs:
151
- pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
152
- if 'strength' in kwargs:
153
- pipeline_params["strength"] = kwargs['strength']
154
- if 'conditioning_items_data' in kwargs:
155
- final_conditioning_items = []
156
- for item in kwargs['conditioning_items_data']:
157
- item.latent_tensor = item.latent_tensor.to(worker.device)
158
- final_conditioning_items.append(item)
159
- pipeline_params["conditioning_items"] = final_conditioning_items
160
- if worker.is_distilled:
161
- logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.")
162
- fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
163
- pipeline_params["timesteps"] = fixed_timesteps
164
- if fixed_timesteps:
165
- pipeline_params["num_inference_steps"] = len(fixed_timesteps)
166
- return pipeline_params
167
-
168
- def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
169
- worker_to_use = self._get_next_worker()
170
- try:
171
- height, width = kwargs['height'], kwargs['width']
172
- padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
173
- padding_vals = calculate_padding(height, width, padded_h, padded_w)
174
- kwargs['height'], kwargs['width'] = padded_h, padded_w
175
- pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
176
- logger.info(f"Initiating GENERATION on {worker_to_use.device} with shape {padded_w}x{padded_h}")
177
- if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
178
- result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
179
- else:
180
- result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
181
- return result, padding_vals
182
- except Exception as e:
183
- logger.error(f"LTX POOL MANAGER: Error during generation on {worker_to_use.device}: {e}", exc_info=True)
184
- raise e
185
- finally:
186
- if worker_to_use and worker_to_use.device.type == 'cuda':
187
- with torch.cuda.device(worker_to_use.device):
188
- gc.collect(); torch.cuda.empty_cache()
189
-
190
- def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
191
- pass
192
-
193
- # ... (O resto do arquivo: LtxWorker, _aduc_prepare_conditioning_patch, Singleton Instantiation, etc. permanece idêntico) ...
194
- class LtxWorker:
195
- """
196
- Represents a single instance of the LTX-Video pipeline on a specific device.
197
- """
198
- def __init__(self, device_id, ltx_config_file):
199
- self.cpu_device = torch.device('cpu')
200
- self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
201
- logger.info(f"LTX Worker ({self.device}): Initializing with config '{ltx_config_file}'...")
202
-
203
- with open(ltx_config_file, "r") as file:
204
- self.config = yaml.safe_load(file)
205
-
206
- self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
207
-
208
- models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
209
-
210
- logger.info(f"LTX Worker ({self.device}): Preparing to load model...")
211
- model_filename = self.config["checkpoint_path"]
212
- model_path = huggingface_hub.hf_hub_download(
213
- repo_id="Lightricks/LTX-Video", filename=model_filename,
214
- local_dir=str(models_dir), local_dir_use_symlinks=False
215
- )
216
-
217
- self.pipeline = create_ltx_video_pipeline(
218
- ckpt_path=model_path,
219
- precision=self.config["precision"],
220
- text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
221
- sampler=self.config["sampler"],
222
- device='cpu'
223
- )
224
- logger.info(f"LTX Worker ({self.device}): Model ready on CPU. Is distilled model? {self.is_distilled}")
225
-
226
- def to_gpu(self):
227
- if self.device.type == 'cpu': return
228
- logger.info(f"LTX Worker: Moving pipeline to GPU {self.device}...")
229
- self.pipeline.to(self.device)
230
- if self.device.type == 'cuda' and can_optimize_fp8():
231
- logger.info(f"LTX Worker ({self.device}): FP8 supported GPU detected. Optimizing...")
232
- optimize_ltx_worker(self)
233
- logger.info(f"LTX Worker ({self.device}): Optimization complete.")
234
- elif self.device.type == 'cuda':
235
- logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.")
236
-
237
- def to_cpu(self):
238
- if self.device.type == 'cpu': return
239
- logger.info(f"LTX Worker: Unloading pipeline from GPU {self.device}...")
240
- self.pipeline.to('cpu')
241
- gc.collect()
242
- if torch.cuda.is_available(): torch.cuda.empty_cache()
243
-
244
- def generate_video_fragment_internal(self, **kwargs):
245
- return self.pipeline(**kwargs).images
246
-
247
-
248
- def _aduc_prepare_conditioning_patch(
249
- self: LTXVideoPipeline,
250
- conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]],
251
- init_latents: torch.Tensor,
252
- num_frames: int,
253
- height: int,
254
- width: int,
255
- vae_per_channel_normalize: bool = False,
256
- generator=None,
257
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, int]:
258
- if not conditioning_items:
259
- init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
260
- init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
261
- return init_latents, init_pixel_coords, None, 0
262
- init_conditioning_mask = torch.zeros(init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device)
263
- extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
264
- extra_conditioning_num_latents = 0
265
- is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor')
266
- if is_latent_mode:
267
- for item in conditioning_items:
268
- media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
269
- media_frame_number, strength = item.media_frame_number, item.conditioning_strength
270
- if media_frame_number == 0:
271
- f_l, h_l, w_l = media_item_latents.shape[-3:]
272
- init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength)
273
- init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength
274
- else:
275
- noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
276
- media_item_latents = torch.lerp(noise, media_item_latents, strength)
277
- patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
278
- pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
279
- pixel_coords[:, 0] += media_frame_number
280
- extra_conditioning_num_latents += patched_latents.shape[1]
281
- new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
282
- extra_conditioning_latents.append(patched_latents)
283
- extra_conditioning_pixel_coords.append(pixel_coords)
284
- extra_conditioning_mask.append(new_mask)
285
- else:
286
- for item in conditioning_items:
287
- if not isinstance(item, ConditioningItem): continue
288
- item = self._resize_conditioning_item(item, height, width)
289
- media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype)
290
- if item.media_frame_number == 0:
291
- media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True)
292
- f_l, h_l, w_l = media_item_latents.shape[-3:]
293
- init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength)
294
- init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength
295
- else:
296
- logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.")
297
-
298
- init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
299
- init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
300
- init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
301
- init_conditioning_mask = init_conditioning_mask.squeeze(-1)
302
- if extra_conditioning_latents:
303
- init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
304
- init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
305
- init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
306
- if self.transformer.use_tpu_flash_attention:
307
- init_latents = init_latents[:, :-extra_conditioning_num_latents]
308
- init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents]
309
- init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents]
310
- return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
311
-
312
-
313
- # --- Singleton Instantiation ---
314
- with open("config.yaml", 'r') as f:
315
- config = yaml.safe_load(f)
316
- ltx_gpus_required = config['specialists']['ltx']['gpus_required']
317
- ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
318
- ltx_config_filename = config['specialists']['ltx']['config_file']
319
- ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
320
- logger.info("Video Specialist (LTX) ready.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/ltx_pipeline_utils.py DELETED
@@ -1,774 +0,0 @@
1
- import argparse
2
- import os
3
- import random
4
- from datetime import datetime
5
- from pathlib import Path
6
- from diffusers.utils import logging
7
- from typing import Optional, List, Union
8
- import yaml
9
-
10
- import imageio
11
- import json
12
- import numpy as np
13
- import torch
14
- import cv2
15
- from safetensors import safe_open
16
- from PIL import Image
17
- from transformers import (
18
- T5EncoderModel,
19
- T5Tokenizer,
20
- AutoModelForCausalLM,
21
- AutoProcessor,
22
- AutoTokenizer,
23
- )
24
- from huggingface_hub import hf_hub_download
25
-
26
- from ltx_video.models.autoencoders.causal_video_autoencoder import (
27
- CausalVideoAutoencoder,
28
- )
29
- from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
30
- from ltx_video.models.transformers.transformer3d import Transformer3DModel
31
- from ltx_video.pipelines.pipeline_ltx_video import (
32
- ConditioningItem,
33
- LTXVideoPipeline,
34
- LTXMultiScalePipeline,
35
- )
36
- from ltx_video.schedulers.rf import RectifiedFlowScheduler
37
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
38
- from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
39
- import ltx_video.pipelines.crf_compressor as crf_compressor
40
-
41
- MAX_HEIGHT = 720
42
- MAX_WIDTH = 1280
43
- MAX_NUM_FRAMES = 257
44
-
45
- logger = logging.get_logger("LTX-Video")
46
-
47
-
48
- def get_total_gpu_memory():
49
- if torch.cuda.is_available():
50
- total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
51
- return total_memory
52
- return 44
53
-
54
-
55
- def get_device():
56
- if torch.cuda.is_available():
57
- return "cuda"
58
- elif torch.backends.mps.is_available():
59
- return "mps"
60
- return "cuda"
61
-
62
-
63
- def load_image_to_tensor_with_resize_and_crop(
64
- image_input: Union[str, Image.Image],
65
- target_height: int = 512,
66
- target_width: int = 768,
67
- just_crop: bool = False,
68
- ) -> torch.Tensor:
69
- """Load and process an image into a tensor.
70
-
71
- Args:
72
- image_input: Either a file path (str) or a PIL Image object
73
- target_height: Desired height of output tensor
74
- target_width: Desired width of output tensor
75
- just_crop: If True, only crop the image to the target size without resizing
76
- """
77
- if isinstance(image_input, str):
78
- image = Image.open(image_input).convert("RGB")
79
- elif isinstance(image_input, Image.Image):
80
- image = image_input
81
- else:
82
- raise ValueError("image_input must be either a file path or a PIL Image object")
83
-
84
- input_width, input_height = image.size
85
- aspect_ratio_target = target_width / target_height
86
- aspect_ratio_frame = input_width / input_height
87
- if aspect_ratio_frame > aspect_ratio_target:
88
- new_width = int(input_height * aspect_ratio_target)
89
- new_height = input_height
90
- x_start = (input_width - new_width) // 2
91
- y_start = 0
92
- else:
93
- new_width = input_width
94
- new_height = int(input_width / aspect_ratio_target)
95
- x_start = 0
96
- y_start = (input_height - new_height) // 2
97
-
98
- image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
99
- if not just_crop:
100
- image = image.resize((target_width, target_height))
101
-
102
- image = np.array(image)
103
- image = cv2.GaussianBlur(image, (3, 3), 0)
104
- frame_tensor = torch.from_numpy(image).float()
105
- frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
106
- frame_tensor = frame_tensor.permute(2, 0, 1)
107
- frame_tensor = (frame_tensor / 127.5) - 1.0
108
- # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
109
- return frame_tensor.unsqueeze(0).unsqueeze(2)
110
-
111
-
112
- def calculate_padding(
113
- source_height: int, source_width: int, target_height: int, target_width: int
114
- ) -> tuple[int, int, int, int]:
115
-
116
- # Calculate total padding needed
117
- pad_height = target_height - source_height
118
- pad_width = target_width - source_width
119
-
120
- # Calculate padding for each side
121
- pad_top = pad_height // 2
122
- pad_bottom = pad_height - pad_top # Handles odd padding
123
- pad_left = pad_width // 2
124
- pad_right = pad_width - pad_left # Handles odd padding
125
-
126
- # Return padded tensor
127
- # Padding format is (left, right, top, bottom)
128
- padding = (pad_left, pad_right, pad_top, pad_bottom)
129
- return padding
130
-
131
-
132
- def convert_prompt_to_filename(text: str, max_len: int = 20) -> str:
133
- # Remove non-letters and convert to lowercase
134
- clean_text = "".join(
135
- char.lower() for char in text if char.isalpha() or char.isspace()
136
- )
137
-
138
- # Split into words
139
- words = clean_text.split()
140
-
141
- # Build result string keeping track of length
142
- result = []
143
- current_length = 0
144
-
145
- for word in words:
146
- # Add word length plus 1 for underscore (except for first word)
147
- new_length = current_length + len(word)
148
-
149
- if new_length <= max_len:
150
- result.append(word)
151
- current_length += len(word)
152
- else:
153
- break
154
-
155
- return "-".join(result)
156
-
157
-
158
- # Generate output video name
159
- def get_unique_filename(
160
- base: str,
161
- ext: str,
162
- prompt: str,
163
- seed: int,
164
- resolution: tuple[int, int, int],
165
- dir: Path,
166
- endswith=None,
167
- index_range=1000,
168
- ) -> Path:
169
- base_filename = f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{seed}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
170
- for i in range(index_range):
171
- filename = dir / f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
172
- if not os.path.exists(filename):
173
- return filename
174
- raise FileExistsError(
175
- f"Could not find a unique filename after {index_range} attempts."
176
- )
177
-
178
-
179
- def seed_everething(seed: int):
180
- random.seed(seed)
181
- np.random.seed(seed)
182
- torch.manual_seed(seed)
183
- if torch.cuda.is_available():
184
- torch.cuda.manual_seed(seed)
185
- if torch.backends.mps.is_available():
186
- torch.mps.manual_seed(seed)
187
-
188
-
189
- def main():
190
- parser = argparse.ArgumentParser(
191
- description="Load models from separate directories and run the pipeline."
192
- )
193
-
194
- # Directories
195
- parser.add_argument(
196
- "--output_path",
197
- type=str,
198
- default=None,
199
- help="Path to the folder to save output video, if None will save in outputs/ directory.",
200
- )
201
- parser.add_argument("--seed", type=int, default="171198")
202
-
203
- # Pipeline parameters
204
- parser.add_argument(
205
- "--num_images_per_prompt",
206
- type=int,
207
- default=1,
208
- help="Number of images per prompt",
209
- )
210
- parser.add_argument(
211
- "--image_cond_noise_scale",
212
- type=float,
213
- default=0.15,
214
- help="Amount of noise to add to the conditioned image",
215
- )
216
- parser.add_argument(
217
- "--height",
218
- type=int,
219
- default=704,
220
- help="Height of the output video frames. Optional if an input image provided.",
221
- )
222
- parser.add_argument(
223
- "--width",
224
- type=int,
225
- default=1216,
226
- help="Width of the output video frames. If None will infer from input image.",
227
- )
228
- parser.add_argument(
229
- "--num_frames",
230
- type=int,
231
- default=121,
232
- help="Number of frames to generate in the output video",
233
- )
234
- parser.add_argument(
235
- "--frame_rate", type=int, default=30, help="Frame rate for the output video"
236
- )
237
- parser.add_argument(
238
- "--device",
239
- default=None,
240
- help="Device to run inference on. If not specified, will automatically detect and use CUDA or MPS if available, else CPU.",
241
- )
242
- parser.add_argument(
243
- "--pipeline_config",
244
- type=str,
245
- default="configs/ltxv-13b-0.9.7-dev.yaml",
246
- help="The path to the config file for the pipeline, which contains the parameters for the pipeline",
247
- )
248
-
249
- # Prompts
250
- parser.add_argument(
251
- "--prompt",
252
- type=str,
253
- help="Text prompt to guide generation",
254
- )
255
- parser.add_argument(
256
- "--negative_prompt",
257
- type=str,
258
- default="worst quality, inconsistent motion, blurry, jittery, distorted",
259
- help="Negative prompt for undesired features",
260
- )
261
-
262
- parser.add_argument(
263
- "--offload_to_cpu",
264
- action="store_true",
265
- help="Offloading unnecessary computations to CPU.",
266
- )
267
-
268
- # video-to-video arguments:
269
- parser.add_argument(
270
- "--input_media_path",
271
- type=str,
272
- default=None,
273
- help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
274
- )
275
-
276
- # Conditioning arguments
277
- parser.add_argument(
278
- "--conditioning_media_paths",
279
- type=str,
280
- nargs="*",
281
- help="List of paths to conditioning media (images or videos). Each path will be used as a conditioning item.",
282
- )
283
- parser.add_argument(
284
- "--conditioning_strengths",
285
- type=float,
286
- nargs="*",
287
- help="List of conditioning strengths (between 0 and 1) for each conditioning item. Must match the number of conditioning items.",
288
- )
289
- parser.add_argument(
290
- "--conditioning_start_frames",
291
- type=int,
292
- nargs="*",
293
- help="List of frame indices where each conditioning item should be applied. Must match the number of conditioning items.",
294
- )
295
-
296
- args = parser.parse_args()
297
- logger.warning(f"Running generation with arguments: {args}")
298
- infer(**vars(args))
299
-
300
-
301
- def create_ltx_video_pipeline(
302
- ckpt_path: str,
303
- precision: str,
304
- text_encoder_model_name_or_path: str,
305
- sampler: Optional[str] = None,
306
- device: Optional[str] = None,
307
- enhance_prompt: bool = False,
308
- prompt_enhancer_image_caption_model_name_or_path: Optional[str] = None,
309
- prompt_enhancer_llm_model_name_or_path: Optional[str] = None,
310
- ) -> LTXVideoPipeline:
311
- ckpt_path = Path(ckpt_path)
312
- assert os.path.exists(
313
- ckpt_path
314
- ), f"Ckpt path provided (--ckpt_path) {ckpt_path} does not exist"
315
-
316
- with safe_open(ckpt_path, framework="pt") as f:
317
- metadata = f.metadata()
318
- config_str = metadata.get("config")
319
- configs = json.loads(config_str)
320
- allowed_inference_steps = configs.get("allowed_inference_steps", None)
321
-
322
- vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
323
- transformer = Transformer3DModel.from_pretrained(ckpt_path)
324
-
325
- # Use constructor if sampler is specified, otherwise use from_pretrained
326
- if sampler == "from_checkpoint" or not sampler:
327
- scheduler = RectifiedFlowScheduler.from_pretrained(ckpt_path)
328
- else:
329
- scheduler = RectifiedFlowScheduler(
330
- sampler=("Uniform" if sampler.lower() == "uniform" else "LinearQuadratic")
331
- )
332
-
333
- text_encoder = T5EncoderModel.from_pretrained(
334
- text_encoder_model_name_or_path, subfolder="text_encoder"
335
- )
336
- patchifier = SymmetricPatchifier(patch_size=1)
337
- tokenizer = T5Tokenizer.from_pretrained(
338
- text_encoder_model_name_or_path, subfolder="tokenizer"
339
- )
340
-
341
- transformer = transformer.to(device)
342
- vae = vae.to(device)
343
- text_encoder = text_encoder.to(device)
344
-
345
- if enhance_prompt:
346
- prompt_enhancer_image_caption_model = AutoModelForCausalLM.from_pretrained(
347
- prompt_enhancer_image_caption_model_name_or_path, trust_remote_code=True
348
- )
349
- prompt_enhancer_image_caption_processor = AutoProcessor.from_pretrained(
350
- prompt_enhancer_image_caption_model_name_or_path, trust_remote_code=True
351
- )
352
- prompt_enhancer_llm_model = AutoModelForCausalLM.from_pretrained(
353
- prompt_enhancer_llm_model_name_or_path,
354
- torch_dtype="bfloat16",
355
- )
356
- prompt_enhancer_llm_tokenizer = AutoTokenizer.from_pretrained(
357
- prompt_enhancer_llm_model_name_or_path,
358
- )
359
- else:
360
- prompt_enhancer_image_caption_model = None
361
- prompt_enhancer_image_caption_processor = None
362
- prompt_enhancer_llm_model = None
363
- prompt_enhancer_llm_tokenizer = None
364
-
365
- vae = vae.to(torch.bfloat16)
366
- if precision == "bfloat16" and transformer.dtype != torch.bfloat16:
367
- transformer = transformer.to(torch.bfloat16)
368
- text_encoder = text_encoder.to(torch.bfloat16)
369
-
370
- # Use submodels for the pipeline
371
- submodel_dict = {
372
- "transformer": transformer,
373
- "patchifier": patchifier,
374
- "text_encoder": text_encoder,
375
- "tokenizer": tokenizer,
376
- "scheduler": scheduler,
377
- "vae": vae,
378
- "prompt_enhancer_image_caption_model": prompt_enhancer_image_caption_model,
379
- "prompt_enhancer_image_caption_processor": prompt_enhancer_image_caption_processor,
380
- "prompt_enhancer_llm_model": prompt_enhancer_llm_model,
381
- "prompt_enhancer_llm_tokenizer": prompt_enhancer_llm_tokenizer,
382
- "allowed_inference_steps": allowed_inference_steps,
383
- }
384
-
385
- pipeline = LTXVideoPipeline(**submodel_dict)
386
- pipeline = pipeline.to(device)
387
- return pipeline
388
-
389
-
390
- def create_latent_upsampler(latent_upsampler_model_path: str, device: str):
391
- latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
392
- latent_upsampler.to(device)
393
- latent_upsampler.eval()
394
- return latent_upsampler
395
-
396
-
397
- def infer(
398
- output_path: Optional[str],
399
- seed: int,
400
- pipeline_config: str,
401
- image_cond_noise_scale: float,
402
- height: Optional[int],
403
- width: Optional[int],
404
- num_frames: int,
405
- frame_rate: int,
406
- prompt: str,
407
- negative_prompt: str,
408
- offload_to_cpu: bool,
409
- input_media_path: Optional[str] = None,
410
- conditioning_media_paths: Optional[List[str]] = None,
411
- conditioning_strengths: Optional[List[float]] = None,
412
- conditioning_start_frames: Optional[List[int]] = None,
413
- device: Optional[str] = None,
414
- **kwargs,
415
- ):
416
- # check if pipeline_config is a file
417
- if not os.path.isfile(pipeline_config):
418
- raise ValueError(f"Pipeline config file {pipeline_config} does not exist")
419
- with open(pipeline_config, "r") as f:
420
- pipeline_config = yaml.safe_load(f)
421
-
422
- models_dir = "MODEL_DIR"
423
-
424
- ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
425
- if not os.path.isfile(ltxv_model_name_or_path):
426
- ltxv_model_path = hf_hub_download(
427
- repo_id="Lightricks/LTX-Video",
428
- filename=ltxv_model_name_or_path,
429
- local_dir=models_dir,
430
- repo_type="model",
431
- )
432
- else:
433
- ltxv_model_path = ltxv_model_name_or_path
434
-
435
- spatial_upscaler_model_name_or_path = pipeline_config.get(
436
- "spatial_upscaler_model_path"
437
- )
438
- if spatial_upscaler_model_name_or_path and not os.path.isfile(
439
- spatial_upscaler_model_name_or_path
440
- ):
441
- spatial_upscaler_model_path = hf_hub_download(
442
- repo_id="Lightricks/LTX-Video",
443
- filename=spatial_upscaler_model_name_or_path,
444
- local_dir=models_dir,
445
- repo_type="model",
446
- )
447
- else:
448
- spatial_upscaler_model_path = spatial_upscaler_model_name_or_path
449
-
450
- if kwargs.get("input_image_path", None):
451
- logger.warning(
452
- "Please use conditioning_media_paths instead of input_image_path."
453
- )
454
- assert not conditioning_media_paths and not conditioning_start_frames
455
- conditioning_media_paths = [kwargs["input_image_path"]]
456
- conditioning_start_frames = [0]
457
-
458
- # Validate conditioning arguments
459
- if conditioning_media_paths:
460
- # Use default strengths of 1.0
461
- if not conditioning_strengths:
462
- conditioning_strengths = [1.0] * len(conditioning_media_paths)
463
- if not conditioning_start_frames:
464
- raise ValueError(
465
- "If `conditioning_media_paths` is provided, "
466
- "`conditioning_start_frames` must also be provided"
467
- )
468
- if len(conditioning_media_paths) != len(conditioning_strengths) or len(
469
- conditioning_media_paths
470
- ) != len(conditioning_start_frames):
471
- raise ValueError(
472
- "`conditioning_media_paths`, `conditioning_strengths`, "
473
- "and `conditioning_start_frames` must have the same length"
474
- )
475
- if any(s < 0 or s > 1 for s in conditioning_strengths):
476
- raise ValueError("All conditioning strengths must be between 0 and 1")
477
- if any(f < 0 or f >= num_frames for f in conditioning_start_frames):
478
- raise ValueError(
479
- f"All conditioning start frames must be between 0 and {num_frames-1}"
480
- )
481
-
482
- seed_everething(seed)
483
- if offload_to_cpu and not torch.cuda.is_available():
484
- logger.warning(
485
- "offload_to_cpu is set to True, but offloading will not occur since the model is already running on CPU."
486
- )
487
- offload_to_cpu = False
488
- else:
489
- offload_to_cpu = offload_to_cpu and get_total_gpu_memory() < 30
490
-
491
- output_dir = (
492
- Path(output_path)
493
- if output_path
494
- else Path(f"outputs/{datetime.today().strftime('%Y-%m-%d')}")
495
- )
496
- output_dir.mkdir(parents=True, exist_ok=True)
497
-
498
- # Adjust dimensions to be divisible by 32 and num_frames to be (N * 8 + 1)
499
- height_padded = ((height - 1) // 32 + 1) * 32
500
- width_padded = ((width - 1) // 32 + 1) * 32
501
- num_frames_padded = ((num_frames - 2) // 8 + 1) * 8 + 1
502
-
503
- padding = calculate_padding(height, width, height_padded, width_padded)
504
-
505
- logger.warning(
506
- f"Padded dimensions: {height_padded}x{width_padded}x{num_frames_padded}"
507
- )
508
-
509
- prompt_enhancement_words_threshold = pipeline_config[
510
- "prompt_enhancement_words_threshold"
511
- ]
512
-
513
- prompt_word_count = len(prompt.split())
514
- enhance_prompt = (
515
- prompt_enhancement_words_threshold > 0
516
- and prompt_word_count < prompt_enhancement_words_threshold
517
- )
518
-
519
- if prompt_enhancement_words_threshold > 0 and not enhance_prompt:
520
- logger.info(
521
- f"Prompt has {prompt_word_count} words, which exceeds the threshold of {prompt_enhancement_words_threshold}. Prompt enhancement disabled."
522
- )
523
-
524
- precision = pipeline_config["precision"]
525
- text_encoder_model_name_or_path = pipeline_config["text_encoder_model_name_or_path"]
526
- sampler = pipeline_config["sampler"]
527
- prompt_enhancer_image_caption_model_name_or_path = pipeline_config[
528
- "prompt_enhancer_image_caption_model_name_or_path"
529
- ]
530
- prompt_enhancer_llm_model_name_or_path = pipeline_config[
531
- "prompt_enhancer_llm_model_name_or_path"
532
- ]
533
-
534
- pipeline = create_ltx_video_pipeline(
535
- ckpt_path=ltxv_model_path,
536
- precision=precision,
537
- text_encoder_model_name_or_path=text_encoder_model_name_or_path,
538
- sampler=sampler,
539
- device=kwargs.get("device", get_device()),
540
- enhance_prompt=enhance_prompt,
541
- prompt_enhancer_image_caption_model_name_or_path=prompt_enhancer_image_caption_model_name_or_path,
542
- prompt_enhancer_llm_model_name_or_path=prompt_enhancer_llm_model_name_or_path,
543
- )
544
-
545
- if pipeline_config.get("pipeline_type", None) == "multi-scale":
546
- if not spatial_upscaler_model_path:
547
- raise ValueError(
548
- "spatial upscaler model path is missing from pipeline config file and is required for multi-scale rendering"
549
- )
550
- latent_upsampler = create_latent_upsampler(
551
- spatial_upscaler_model_path, pipeline.device
552
- )
553
- pipeline = LTXMultiScalePipeline(pipeline, latent_upsampler=latent_upsampler)
554
-
555
- media_item = None
556
- if input_media_path:
557
- media_item = load_media_file(
558
- media_path=input_media_path,
559
- height=height,
560
- width=width,
561
- max_frames=num_frames_padded,
562
- padding=padding,
563
- )
564
-
565
- conditioning_items = (
566
- prepare_conditioning(
567
- conditioning_media_paths=conditioning_media_paths,
568
- conditioning_strengths=conditioning_strengths,
569
- conditioning_start_frames=conditioning_start_frames,
570
- height=height,
571
- width=width,
572
- num_frames=num_frames,
573
- padding=padding,
574
- pipeline=pipeline,
575
- )
576
- if conditioning_media_paths
577
- else None
578
- )
579
-
580
- stg_mode = pipeline_config.get("stg_mode", "attention_values")
581
- del pipeline_config["stg_mode"]
582
- if stg_mode.lower() == "stg_av" or stg_mode.lower() == "attention_values":
583
- skip_layer_strategy = SkipLayerStrategy.AttentionValues
584
- elif stg_mode.lower() == "stg_as" or stg_mode.lower() == "attention_skip":
585
- skip_layer_strategy = SkipLayerStrategy.AttentionSkip
586
- elif stg_mode.lower() == "stg_r" or stg_mode.lower() == "residual":
587
- skip_layer_strategy = SkipLayerStrategy.Residual
588
- elif stg_mode.lower() == "stg_t" or stg_mode.lower() == "transformer_block":
589
- skip_layer_strategy = SkipLayerStrategy.TransformerBlock
590
- else:
591
- raise ValueError(f"Invalid spatiotemporal guidance mode: {stg_mode}")
592
-
593
- # Prepare input for the pipeline
594
- sample = {
595
- "prompt": prompt,
596
- "prompt_attention_mask": None,
597
- "negative_prompt": negative_prompt,
598
- "negative_prompt_attention_mask": None,
599
- }
600
-
601
- device = device or get_device()
602
- generator = torch.Generator(device=device).manual_seed(seed)
603
-
604
- images = pipeline(
605
- **pipeline_config,
606
- skip_layer_strategy=skip_layer_strategy,
607
- generator=generator,
608
- output_type="pt",
609
- callback_on_step_end=None,
610
- height=height_padded,
611
- width=width_padded,
612
- num_frames=num_frames_padded,
613
- frame_rate=frame_rate,
614
- **sample,
615
- media_items=media_item,
616
- conditioning_items=conditioning_items,
617
- is_video=True,
618
- vae_per_channel_normalize=True,
619
- image_cond_noise_scale=image_cond_noise_scale,
620
- mixed_precision=(precision == "mixed_precision"),
621
- offload_to_cpu=offload_to_cpu,
622
- device=device,
623
- enhance_prompt=enhance_prompt,
624
- ).images
625
-
626
- # Crop the padded images to the desired resolution and number of frames
627
- (pad_left, pad_right, pad_top, pad_bottom) = padding
628
- pad_bottom = -pad_bottom
629
- pad_right = -pad_right
630
- if pad_bottom == 0:
631
- pad_bottom = images.shape[3]
632
- if pad_right == 0:
633
- pad_right = images.shape[4]
634
- images = images[:, :, :num_frames, pad_top:pad_bottom, pad_left:pad_right]
635
-
636
- for i in range(images.shape[0]):
637
- # Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
638
- video_np = images[i].permute(1, 2, 3, 0).cpu().float().numpy()
639
- # Unnormalizing images to [0, 255] range
640
- video_np = (video_np * 255).astype(np.uint8)
641
- fps = frame_rate
642
- height, width = video_np.shape[1:3]
643
- # In case a single image is generated
644
- if video_np.shape[0] == 1:
645
- output_filename = get_unique_filename(
646
- f"image_output_{i}",
647
- ".png",
648
- prompt=prompt,
649
- seed=seed,
650
- resolution=(height, width, num_frames),
651
- dir=output_dir,
652
- )
653
- imageio.imwrite(output_filename, video_np[0])
654
- else:
655
- output_filename = get_unique_filename(
656
- f"video_output_{i}",
657
- ".mp4",
658
- prompt=prompt,
659
- seed=seed,
660
- resolution=(height, width, num_frames),
661
- dir=output_dir,
662
- )
663
-
664
- # Write video
665
- with imageio.get_writer(output_filename, fps=fps) as video:
666
- for frame in video_np:
667
- video.append_data(frame)
668
-
669
- logger.warning(f"Output saved to {output_filename}")
670
-
671
-
672
- def prepare_conditioning(
673
- conditioning_media_paths: List[str],
674
- conditioning_strengths: List[float],
675
- conditioning_start_frames: List[int],
676
- height: int,
677
- width: int,
678
- num_frames: int,
679
- padding: tuple[int, int, int, int],
680
- pipeline: LTXVideoPipeline,
681
- ) -> Optional[List[ConditioningItem]]:
682
- """Prepare conditioning items based on input media paths and their parameters.
683
-
684
- Args:
685
- conditioning_media_paths: List of paths to conditioning media (images or videos)
686
- conditioning_strengths: List of conditioning strengths for each media item
687
- conditioning_start_frames: List of frame indices where each item should be applied
688
- height: Height of the output frames
689
- width: Width of the output frames
690
- num_frames: Number of frames in the output video
691
- padding: Padding to apply to the frames
692
- pipeline: LTXVideoPipeline object used for condition video trimming
693
-
694
- Returns:
695
- A list of ConditioningItem objects.
696
- """
697
- conditioning_items = []
698
- for path, strength, start_frame in zip(
699
- conditioning_media_paths, conditioning_strengths, conditioning_start_frames
700
- ):
701
- num_input_frames = orig_num_input_frames = get_media_num_frames(path)
702
- if hasattr(pipeline, "trim_conditioning_sequence") and callable(
703
- getattr(pipeline, "trim_conditioning_sequence")
704
- ):
705
- num_input_frames = pipeline.trim_conditioning_sequence(
706
- start_frame, orig_num_input_frames, num_frames
707
- )
708
- if num_input_frames < orig_num_input_frames:
709
- logger.warning(
710
- f"Trimming conditioning video {path} from {orig_num_input_frames} to {num_input_frames} frames."
711
- )
712
-
713
- media_tensor = load_media_file(
714
- media_path=path,
715
- height=height,
716
- width=width,
717
- max_frames=num_input_frames,
718
- padding=padding,
719
- just_crop=True,
720
- )
721
- conditioning_items.append(ConditioningItem(media_tensor, start_frame, strength))
722
- return conditioning_items
723
-
724
-
725
- def get_media_num_frames(media_path: str) -> int:
726
- is_video = any(
727
- media_path.lower().endswith(ext) for ext in [".mp4", ".avi", ".mov", ".mkv"]
728
- )
729
- num_frames = 1
730
- if is_video:
731
- reader = imageio.get_reader(media_path)
732
- num_frames = reader.count_frames()
733
- reader.close()
734
- return num_frames
735
-
736
-
737
- def load_media_file(
738
- media_path: str,
739
- height: int,
740
- width: int,
741
- max_frames: int,
742
- padding: tuple[int, int, int, int],
743
- just_crop: bool = False,
744
- ) -> torch.Tensor:
745
- is_video = any(
746
- media_path.lower().endswith(ext) for ext in [".mp4", ".avi", ".mov", ".mkv"]
747
- )
748
- if is_video:
749
- reader = imageio.get_reader(media_path)
750
- num_input_frames = min(reader.count_frames(), max_frames)
751
-
752
- # Read and preprocess the relevant frames from the video file.
753
- frames = []
754
- for i in range(num_input_frames):
755
- frame = Image.fromarray(reader.get_data(i))
756
- frame_tensor = load_image_to_tensor_with_resize_and_crop(
757
- frame, height, width, just_crop=just_crop
758
- )
759
- frame_tensor = torch.nn.functional.pad(frame_tensor, padding)
760
- frames.append(frame_tensor)
761
- reader.close()
762
-
763
- # Stack frames along the temporal dimension
764
- media_tensor = torch.cat(frames, dim=2)
765
- else: # Input image
766
- media_tensor = load_image_to_tensor_with_resize_and_crop(
767
- media_path, height, width, just_crop=just_crop
768
- )
769
- media_tensor = torch.nn.functional.pad(media_tensor, padding)
770
- return media_tensor
771
-
772
-
773
- if __name__ == "__main__":
774
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/mmaudio_manager.py DELETED
@@ -1,208 +0,0 @@
1
- # managers/mmaudio_manager.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
- #
13
- # PENDING PATENT NOTICE: Please see NOTICE.md.
14
- #
15
- # Version: 2.3.0
16
- #
17
- # This file defines the MMAudioManager for the ADUC-SDR framework. It is responsible
18
- # for generating audio synchronized with video clips. This version has been refactored
19
- # to be self-contained by automatically cloning the MMAudio dependency from its
20
- # official repository, making the framework more portable and easier to set up.
21
-
22
- import torch
23
- import logging
24
- import subprocess
25
- import os
26
- import time
27
- import yaml
28
- import gc
29
- from pathlib import Path
30
- import gradio as gr
31
- import sys
32
-
33
- logger = logging.getLogger(__name__)
34
-
35
- # --- Dependency Management ---
36
- DEPS_DIR = Path("./deps")
37
- MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
38
- MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
39
-
40
- def setup_mmaudio_dependencies():
41
- """
42
- Ensures the MMAudio repository is cloned and available in the sys.path.
43
- This function is run once when the module is first imported.
44
- """
45
- if not MMAUDIO_REPO_DIR.exists():
46
- logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
47
- try:
48
- DEPS_DIR.mkdir(exist_ok=True)
49
- subprocess.run(
50
- ["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
51
- check=True, capture_output=True, text=True
52
- )
53
- logger.info("MMAudio repository cloned successfully.")
54
- except subprocess.CalledProcessError as e:
55
- logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
56
- raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
57
- else:
58
- logger.info("Found local MMAudio repository.")
59
-
60
- if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
61
- sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
62
- logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
63
-
64
- setup_mmaudio_dependencies()
65
-
66
- from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
67
- from mmaudio.model.flow_matching import FlowMatching
68
- from mmaudio.model.networks import MMAudio, get_my_mmaudio
69
- from mmaudio.model.utils.features_utils import FeaturesUtils
70
- from mmaudio.model.sequence_config import SequenceConfig
71
-
72
-
73
- class MMAudioManager:
74
- """
75
- Manages the MMAudio model for audio generation tasks.
76
- """
77
- def __init__(self, workspace_dir):
78
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
79
- self.cpu_device = torch.device("cpu")
80
- self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
81
- self.workspace_dir = workspace_dir
82
-
83
- self.all_model_cfg = all_model_cfg
84
- self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
85
- self.net: 'MMAudio' = None
86
- self.feature_utils: 'FeaturesUtils' = None
87
- self.seq_cfg: 'SequenceConfig' = None
88
-
89
- self._load_models_to_cpu()
90
-
91
- def _adjust_paths_for_repo(self):
92
- """Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
93
- for cfg_key in self.all_model_cfg:
94
- cfg = self.all_model_cfg[cfg_key]
95
- # The paths in the original config are relative, so we join them with our repo path
96
- cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
97
- cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
98
- if cfg.bigvgan_16k_path is not None:
99
- cfg.bigvgan_16k_path = MMAUDIO_REPO_DIR / cfg.bigvgan_16k_path
100
- cfg.synchformer_ckpt = MMAUDIO_REPO_DIR / cfg.synchformer_ckpt
101
-
102
- def _load_models_to_cpu(self):
103
- """Loads the MMAudio models to CPU memory on initialization."""
104
- try:
105
- self._adjust_paths_for_repo()
106
- logger.info("Verifying and downloading MMAudio models, if necessary...")
107
- self.model_config.download_if_needed()
108
-
109
- self.seq_cfg = self.model_config.seq_cfg
110
-
111
- logger.info(f"Loading MMAudio model: {self.model_config.model_name} to CPU...")
112
- self.net = get_my_mmaudio(self.model_config.model_name).eval()
113
- self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True))
114
-
115
- logger.info("Loading MMAudio feature utils to CPU...")
116
- self.feature_utils = FeaturesUtils(
117
- tod_vae_ckpt=self.model_config.vae_path,
118
- synchformer_ckpt=self.model_config.synchformer_ckpt,
119
- enable_conditions=True,
120
- mode=self.model_config.mode,
121
- bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path,
122
- need_vae_encoder=False
123
- )
124
- self.feature_utils = self.feature_utils.eval()
125
- self.net.to(self.cpu_device)
126
- self.feature_utils.to(self.cpu_device)
127
- logger.info("MMAudioManager ready on CPU.")
128
- except Exception as e:
129
- logger.error(f"Failed to load audio models: {e}", exc_info=True)
130
- self.net = None
131
-
132
- def to_gpu(self):
133
- """Moves the models and utilities to the GPU before inference."""
134
- if self.device == 'cpu': return
135
- logger.info(f"Moving MMAudioManager to GPU ({self.device})...")
136
- self.net.to(self.device, self.dtype)
137
- self.feature_utils.to(self.device, self.dtype)
138
-
139
- def to_cpu(self):
140
- """Moves the models back to CPU and clears VRAM after inference."""
141
- if self.device == 'cpu': return
142
- logger.info("Unloading MMAudioManager from GPU...")
143
- self.net.to(self.cpu_device)
144
- self.feature_utils.to(self.cpu_device)
145
- gc.collect()
146
- if torch.cuda.is_available(): torch.cuda.empty_cache()
147
-
148
- def generate_audio_for_video(self, video_path: str, prompt: str, duration_seconds: float, output_path_override: str = None) -> str:
149
- """
150
- Generates audio for a video file, applying a negative prompt to avoid speech.
151
- """
152
- if self.net is None:
153
- raise gr.Error("MMAudio model is not loaded. Cannot generate audio.")
154
-
155
- logger.info("--- Generating Audio for Video Fragment ---")
156
- logger.info(f"--- Video: {os.path.basename(video_path)}")
157
- logger.info(f"--- Duration: {duration_seconds:.2f}s")
158
-
159
- negative_prompt = "human voice, speech, talking, singing, narration"
160
- logger.info(f"--- Prompt: '{prompt}' | Negative Prompt: '{negative_prompt}'")
161
-
162
- if duration_seconds < 1:
163
- logger.warning("Fragment too short (<1s). Returning original video.")
164
- return video_path
165
-
166
- if self.device == 'cpu':
167
- logger.warning("Generating audio on CPU. This may be very slow.")
168
-
169
- try:
170
- self.to_gpu()
171
- with torch.no_grad():
172
- rng = torch.Generator(device=self.device).manual_seed(int(time.time()))
173
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
174
-
175
- video_info = load_video(Path(video_path), duration_seconds)
176
- self.seq_cfg.duration = video_info.duration_sec
177
- self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len)
178
-
179
- audios = mmaudio_generate(
180
- clip_video=video_info.clip_frames.unsqueeze(0),
181
- sync_video=video_info.sync_frames.unsqueeze(0),
182
- text=[prompt],
183
- negative_text=[negative_prompt],
184
- feature_utils=self.feature_utils,
185
- net=self.net,
186
- fm=fm,
187
- rng=rng,
188
- cfg_strength=4.5
189
- )
190
- audio_waveform = audios.float().cpu()[0]
191
-
192
- output_video_path = output_path_override if output_path_override else os.path.join(self.workspace_dir, f"{Path(video_path).stem}_with_audio.mp4")
193
-
194
- make_video(video_info, Path(output_video_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate)
195
- logger.info(f"--- Fragment with audio saved to: {os.path.basename(output_video_path)}")
196
- return output_video_path
197
- finally:
198
- self.to_cpu()
199
-
200
- # --- Singleton Instantiation ---
201
- try:
202
- with open("config.yaml", 'r') as f:
203
- config = yaml.safe_load(f)
204
- WORKSPACE_DIR = config['application']['workspace_dir']
205
- mmaudio_manager_singleton = MMAudioManager(workspace_dir=WORKSPACE_DIR)
206
- except Exception as e:
207
- logger.error(f"Could not initialize MMAudioManager: {e}", exc_info=True)
208
- mmaudio_manager_singleton = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/seedvr_manager.py DELETED
@@ -1,233 +0,0 @@
1
- # managers/seedvr_manager.py
2
- #
3
- # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Version: 4.0.0 (Root Installer & Executor)
6
- #
7
- # This version fully adopts the logic from the functional hd_specialist.py example.
8
- # It acts as a setup manager: it clones the SeedVR repo and then copies all
9
- # necessary directories (projects, common, models, configs, ckpts) to the
10
- # application root. It also handles the pip installation of the Apex dependency.
11
- # This ensures that the SeedVR code runs in the exact file structure it expects.
12
-
13
- import torch
14
- import torch.distributed as dist
15
- import os
16
- import gc
17
- import logging
18
- import sys
19
- import subprocess
20
- from pathlib import Path
21
- from urllib.parse import urlparse
22
- from torch.hub import download_url_to_file
23
- import gradio as gr
24
- import mediapy
25
- from einops import rearrange
26
- import shutil
27
- from omegaconf import OmegaConf
28
-
29
- logger = logging.getLogger(__name__)
30
-
31
- # --- Caminhos Globais ---
32
- APP_ROOT = Path("/home/user/app")
33
- DEPS_DIR = APP_ROOT / "deps"
34
- SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
35
- SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B"
36
-
37
- class SeedVrManager:
38
- def __init__(self, workspace_dir="deformes_workspace"):
39
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
40
- self.runner = None
41
- self.workspace_dir = workspace_dir
42
- self.is_initialized = False
43
- self._original_barrier = None
44
- self.setup_complete = False # Flag para rodar o setup apenas uma vez
45
- logger.info("SeedVrManager initialized. Setup will run on first use.")
46
-
47
- def _full_setup(self):
48
- """
49
- Executa todo o processo de setup uma única vez.
50
- """
51
- if self.setup_complete:
52
- return
53
-
54
- logger.info("--- Starting Full SeedVR Setup ---")
55
-
56
- # 1. Clonar o repositório se não existir
57
- if not SEEDVR_SPACE_DIR.exists():
58
- logger.info(f"Cloning SeedVR Space repo to {SEEDVR_SPACE_DIR}...")
59
- DEPS_DIR.mkdir(exist_ok=True, parents=True)
60
- subprocess.run(
61
- ["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)],
62
- check=True, capture_output=True, text=True
63
- )
64
-
65
- # 2. Copiar as pastas necessárias para a raiz da aplicação
66
- required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b"]
67
- for dirname in required_dirs:
68
- source = SEEDVR_SPACE_DIR / dirname
69
- target = APP_ROOT / dirname
70
- if not target.exists():
71
- logger.info(f"Copying '{dirname}' to application root...")
72
- shutil.copytree(source, target)
73
-
74
- # 3. Adicionar a raiz ao sys.path para garantir que os imports funcionem
75
- if str(APP_ROOT) not in sys.path:
76
- sys.path.insert(0, str(APP_ROOT))
77
- logger.info(f"Added '{APP_ROOT}' to sys.path.")
78
-
79
- # 4. Instalar dependências complexas como Apex
80
- try:
81
- import apex
82
- logger.info("Apex is already installed.")
83
- except ImportError:
84
- logger.info("Installing Apex dependency...")
85
- apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
86
- apex_wheel_path = _load_file_from_url(url=apex_url, model_dir=str(DEPS_DIR))
87
- subprocess.run(f"pip install {apex_wheel_path}", check=True, shell=True)
88
- logger.info("Apex installed successfully.")
89
-
90
- # 5. Baixar os modelos para a pasta ./ckpts na raiz
91
- ckpt_dir = APP_ROOT / 'ckpts'
92
- ckpt_dir.mkdir(exist_ok=True)
93
- pretrain_model_urls = {
94
- 'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
95
- 'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
96
- 'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth',
97
- 'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
98
- 'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
99
- }
100
- for name, url in pretrain_model_urls.items():
101
- _load_file_from_url(url=url, model_dir=str(ckpt_dir))
102
-
103
- self.setup_complete = True
104
- logger.info("--- Full SeedVR Setup Complete ---")
105
-
106
- def _initialize_runner(self, model_version: str):
107
- if self.runner is not None: return
108
-
109
- # Garante que todo o ambiente está configurado antes de prosseguir
110
- self._full_setup()
111
-
112
- # Agora que o setup está feito, podemos importar os módulos
113
- from projects.video_diffusion_sr.infer import VideoDiffusionInfer
114
- from common.config import load_config
115
- from common.seed import set_seed
116
-
117
- if dist.is_available() and not dist.is_initialized():
118
- os.environ["MASTER_ADDR"] = "127.0.0.1"
119
- os.environ["MASTER_PORT"] = "12355"
120
- os.environ["RANK"] = str(0)
121
- os.environ["WORLD_SIZE"] = str(1)
122
- dist.init_process_group(backend='gloo')
123
- logger.info("Initialized torch.distributed process group.")
124
-
125
- logger.info(f"Initializing SeedVR2 {model_version} runner...")
126
- if model_version == '3B':
127
- config_path = APP_ROOT / 'configs_3b' / 'main.yaml'
128
- checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_3b.pth'
129
- else: # Assumimos 7B
130
- config_path = APP_ROOT / 'configs_7b' / 'main.yaml'
131
- checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_7b.pth'
132
-
133
- config = load_config(str(config_path))
134
-
135
- self.runner = VideoDiffusionInfer(config)
136
- OmegaConf.set_readonly(self.runner.config, False)
137
-
138
- self.runner.configure_dit_model(device=self.device, checkpoint=str(checkpoint_path))
139
- self.runner.configure_vae_model()
140
-
141
- if hasattr(self.runner.vae, "set_memory_limit"):
142
- self.runner.vae.set_memory_limit(**self.runner.config.vae.memory_limit)
143
-
144
- self.is_initialized = True
145
- logger.info(f"Runner for SeedVR2 {model_version} initialized and ready.")
146
-
147
- def _unload_runner(self):
148
- if self.runner is not None:
149
- del self.runner
150
- self.runner = None
151
- gc.collect()
152
- torch.cuda.empty_cache()
153
- self.is_initialized = False
154
- logger.info("Runner do SeedVR2 descarregado da VRAM.")
155
- if dist.is_initialized():
156
- dist.destroy_process_group()
157
- logger.info("Destroyed torch.distributed process group.")
158
-
159
- def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
160
- model_version: str = '7B', steps: int = 100, seed: int = 666,
161
- progress: gr.Progress = None) -> str:
162
- try:
163
- self._initialize_runner(model_version)
164
-
165
- # Precisamos importar aqui, pois o sys.path é modificado no setup
166
- from common.seed import set_seed
167
- from data.image.transforms.divisible_crop import DivisibleCrop
168
- from data.image.transforms.na_resize import NaResize
169
- from data.video.transforms.rearrange import Rearrange
170
- from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
171
- from torchvision.transforms import Compose, Lambda, Normalize
172
- from torchvision.io.video import read_video
173
-
174
- set_seed(seed, same_across_ranks=True)
175
- self.runner.config.diffusion.timesteps.sampling.steps = steps
176
- self.runner.configure_diffusion()
177
-
178
- video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0
179
- res_h, res_w = video_tensor.shape[-2:]
180
- video_transform = Compose([
181
- NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
182
- Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
183
- DivisibleCrop((16, 16)),
184
- Normalize(0.5, 0.5),
185
- Rearrange("t c h w -> c t h w"),
186
- ])
187
- cond_latents = [video_transform(video_tensor.to(self.device))]
188
- input_videos = cond_latents
189
- self.runner.dit.to("cpu")
190
- self.runner.vae.to(self.device)
191
- cond_latents = self.runner.vae_encode(cond_latents)
192
- self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache()
193
- self.runner.dit.to(self.device)
194
-
195
- pos_emb = torch.load(APP_ROOT / 'pos_emb.pt').to(self.device)
196
- neg_emb = torch.load(APP_ROOT / 'neg_emb.pt').to(self.device)
197
- text_embeds_dict = {"texts_pos": [pos_emb], "texts_neg": [neg_emb]}
198
-
199
- noises = [torch.randn_like(latent) for latent in cond_latents]
200
- conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)]
201
-
202
- with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
203
- video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict)
204
-
205
- self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache()
206
- self.runner.vae.to(self.device)
207
- samples = self.runner.vae_decode(video_tensors)
208
- final_sample = samples[0]
209
- input_video_sample = input_videos[0]
210
- if final_sample.shape[1] < input_video_sample.shape[1]:
211
- input_video_sample = input_video_sample[:, :final_sample.shape[1]]
212
-
213
- final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w"))
214
- final_sample = rearrange(final_sample, "t c h w -> t h w c")
215
- final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round()
216
- final_sample_np = final_sample.to(torch.uint8).cpu().numpy()
217
-
218
- mediapy.write_video(output_video_path, final_sample_np, fps=24)
219
- logger.info(f"HD Mastered video saved to: {output_video_path}")
220
- return output_path
221
- finally:
222
- self._unload_runner()
223
-
224
- def _load_file_from_url(url, model_dir='./', file_name=None):
225
- os.makedirs(model_dir, exist_ok=True)
226
- filename = file_name or os.path.basename(urlparse(url).path)
227
- cached_file = os.path.abspath(os.path.join(model_dir, filename))
228
- if not os.path.exists(cached_file):
229
- logger.info(f'Downloading: "{url}" to {cached_file}')
230
- download_url_to_file(url, cached_file, hash_prefix=None, progress=True)
231
- return cached_file
232
-
233
- seedvr_manager_singleton = SeedVrManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/upscaler_specialist.py DELETED
@@ -1,91 +0,0 @@
1
- # upscaler_specialist.py
2
- # Copyright (C) 2025 Carlos Rodrigues
3
- # Especialista ADUC para upscaling espacial de tensores latentes.
4
-
5
- import torch
6
- import logging
7
- from diffusers import LTXLatentUpsamplePipeline
8
- from managers.ltx_manager import ltx_manager_singleton
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- class UpscalerSpecialist:
13
- """
14
- Especialista responsável por aumentar a resolução espacial de tensores latentes
15
- usando o LTX Video Spatial Upscaler.
16
- """
17
- def __init__(self):
18
- # Força uso de CUDA se disponível
19
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
- self.base_vae = None
21
- self.pipe_upsample = None
22
-
23
-
24
- def _lazy_init(self):
25
- try:
26
- # Tenta usar o VAE do ltx_manager
27
- if ltx_manager_singleton.workers:
28
- candidate_vae = ltx_manager_singleton.workers[0].pipeline.vae
29
- if candidate_vae.__class__.__name__ == "AutoencoderKLLTXVideo":
30
- self.base_vae = candidate_vae
31
- logger.info("[Upscaler] Usando VAE do ltx_manager (AutoencoderKLLTXVideo).")
32
- else:
33
- logger.warning(f"[Upscaler] VAE incompatível: {type(candidate_vae)}. "
34
- "Carregando AutoencoderKLLTXVideo manualmente...")
35
- from diffusers.models.autoencoders import AutoencoderKLLTXVideo
36
- self.base_vae = AutoencoderKLLTXVideo.from_pretrained(
37
- "linoyts/LTX-Video-spatial-upscaler-0.9.8",
38
- subfolder="vae",
39
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
40
- ).to(self.device)
41
- else:
42
- logger.warning("[Upscaler] Nenhum worker disponível, carregando VAE manualmente...")
43
- from diffusers.models.autoencoders import AutoencoderKLLTXVideo
44
- self.base_vae = AutoencoderKLLTXVideo.from_pretrained(
45
- "linoyts/LTX-Video-spatial-upscaler-0.9.8",
46
- subfolder="vae",
47
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
48
- ).to(self.device)
49
-
50
- # Carregar pipeline
51
- self.pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
52
- "linoyts/LTX-Video-spatial-upscaler-0.9.8",
53
- vae=self.base_vae,
54
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
55
- ).to(self.device)
56
-
57
- logger.info("[Upscaler] Pipeline carregado com sucesso.")
58
-
59
- except Exception as e:
60
- logger.error(f"[Upscaler] Falha ao carregar pipeline: {e}")
61
- self.pipe_upsample = None
62
-
63
-
64
-
65
- @torch.no_grad()
66
- def upscale(self, latents: torch.Tensor) -> torch.Tensor:
67
- """Aplica o upscaling 2x nos tensores latentes fornecidos."""
68
- self._lazy_init()
69
- if self.pipe_upsample is None:
70
- logger.warning("[Upscaler] Pipeline indisponível. Retornando latentes originais.")
71
- return latents
72
-
73
- try:
74
- logger.info(f"[Upscaler] Recebido shape {latents.shape}. Executando upscale em {self.device}...")
75
-
76
- # [CORREÇÃO FINAL] Conforme a documentação oficial, o resultado está em .frames
77
- result = self.pipe_upsample(latents=latents, output_type="latent")
78
- output_tensor = result.frames
79
-
80
- logger.info(f"[Upscaler] Upscale concluído. Novo shape: {output_tensor.shape}")
81
- return output_tensor
82
-
83
- except Exception as e:
84
- logger.error(f"[Upscaler] Erro durante upscale: {e}", exc_info=True)
85
- return latents
86
-
87
-
88
- # ---------------------------
89
- # Singleton global
90
- # ---------------------------
91
- upscaler_specialist_singleton = UpscalerSpecialist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
managers/vae_manager.py DELETED
@@ -1,99 +0,0 @@
1
- # managers/vae_manager.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
- # carlex22@gmail.com
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
- #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
- #
13
- # PENDING PATENT NOTICE: Please see NOTICE.md.
14
- #
15
- #
16
- # This file defines the VaeManager specialist. Its purpose is to abstract all
17
- # direct interactions with the Variational Autoencoder (VAE) model. It handles
18
- # the model's state (CPU/GPU memory), provides clean interfaces for encoding and
19
- # decoding, and ensures that the heavy VAE model only occupies VRAM when actively
20
- # performing a task, freeing up resources for other specialists.
21
- #
22
- # Version 1.0.1
23
-
24
-
25
- import torch
26
- import logging
27
- import gc
28
- from typing import Generator
29
-
30
- # Import the source of the VAE model and the low-level functions
31
- from managers.ltx_manager import ltx_manager_singleton
32
- from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
- class VaeManager:
37
- """
38
- A specialist for managing the LTX VAE model. It provides high-level methods
39
- for encoding pixels to latents and decoding latents to pixels, while managing
40
- the model's presence on the GPU to conserve VRAM.
41
- """
42
- def __init__(self, vae_model):
43
- self.vae = vae_model
44
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
45
- self.cpu_device = torch.device('cpu')
46
-
47
- # Initialize the VAE on the CPU to keep VRAM free at startup
48
- self.vae.to(self.cpu_device)
49
- logger.info(f"VaeManager initialized. VAE model is on CPU.")
50
-
51
- def to_gpu(self):
52
- """Moves the VAE model to the active GPU."""
53
- if self.device == 'cpu': return
54
- logger.info("VaeManager: Moving VAE to GPU...")
55
- self.vae.to(self.device)
56
-
57
- def to_cpu(self):
58
- """Moves the VAE model to the CPU and clears VRAM cache."""
59
- if self.device == 'cpu': return
60
- logger.info("VaeManager: Unloading VAE from GPU...")
61
- self.vae.to(self.cpu_device)
62
- gc.collect()
63
- if torch.cuda.is_available():
64
- torch.cuda.empty_cache()
65
-
66
- @torch.no_grad()
67
- def encode(self, pixel_tensor: torch.Tensor) -> torch.Tensor:
68
- """
69
- Encodes a pixel-space tensor to the latent space.
70
- Manages moving the VAE to and from the GPU.
71
- """
72
- try:
73
- self.to_gpu()
74
- pixel_tensor = pixel_tensor.to(self.device, dtype=self.vae.dtype)
75
- latents = vae_encode(pixel_tensor, self.vae, vae_per_channel_normalize=True)
76
- return latents.to(self.cpu_device) # Return to CPU to free VRAM
77
- finally:
78
- self.to_cpu()
79
-
80
- @torch.no_grad()
81
- def decode(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
82
- """
83
- Decodes a latent-space tensor to pixels.
84
- Manages moving the VAE to and from the GPU.
85
- """
86
- try:
87
- self.to_gpu()
88
- latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
89
- timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
90
- pixels = vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
91
- return pixels.to(self.cpu_device) # Return to CPU to free VRAM
92
- finally:
93
- self.to_cpu()
94
-
95
- # --- Singleton Instance ---
96
- # The VaeManager must use the exact same VAE instance as the LTX pipeline to ensure
97
- # latent space compatibility. We source it directly from the already-initialized ltx_manager.
98
- source_vae_model = ltx_manager_singleton.workers[0].pipeline.vae
99
- vae_manager_singleton = VaeManager(source_vae_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/LICENSE DELETED
@@ -1,25 +0,0 @@
1
- # Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
2
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
- #
4
- # Contato:
5
- # Carlos Rodrigues dos Santos
6
- # carlex22@gmail.com
7
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
8
- #
9
- # Repositórios e Projetos Relacionados:
10
- # GitHub: https://github.com/carlex22/Aduc-sdr
11
- # Hugging Face (Ltx-SuperTime-60Secondos): https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/
12
- # Hugging Face (Novinho): https://huggingface.co/spaces/Carlexxx/Novinho/
13
- #
14
- # This program is free software: you can redistribute it and/or modify
15
- # it under the terms of the GNU Affero General Public License as published by
16
- # the Free Software Foundation, either version 3 of the License, or
17
- # (at your option) any later version.
18
- #
19
- # This program is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
- # GNU Affero General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Affero General Public License
25
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/NOTICE.md DELETED
@@ -1,76 +0,0 @@
1
- # NOTICE
2
-
3
- Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
4
-
5
- ---
6
-
7
- ## Aviso de Propriedade Intelectual e Licenciamento
8
-
9
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
10
-
11
- O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
12
-
13
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
14
-
15
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
16
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
17
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
18
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
19
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
20
-
21
- ### **Reconhecimento e Implicações (EM PORTUGUÊS):**
22
-
23
- Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
24
-
25
- 1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
26
- 2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
27
- 3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
28
-
29
- ---
30
-
31
- ### **Patent Pending (IN ENGLISH):**
32
-
33
- The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
34
-
35
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
36
-
37
- * Fragmentation and scaling of requests exceeding AI model context limits.
38
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
39
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
40
- * Cost, latency, and quality-aware planning and routing.
41
- * The use of "universal tokens" for model-agnostic communication.
42
-
43
- ### **Acknowledgement and Implications (IN ENGLISH):**
44
-
45
- By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
46
-
47
- 1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
48
- 2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
49
- 3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
50
-
51
- ---
52
-
53
- ## Licença AGPLv3
54
-
55
- This program is free software: you can redistribute it and/or modify
56
- it under the terms of the GNU Affero General Public License as published by
57
- the Free Software Foundation, either version 3 of the License, or
58
- (at your option) any later version.
59
-
60
- This program is distributed in the hope that it will be useful,
61
- but WITHOUT ANY WARRANTY; without even the implied warranty of
62
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
63
- GNU Affero General Public License for more details.
64
-
65
- You should have received a copy of the GNU Affero General Public License
66
- along with this program. If not, see <https://www.gnu.org/licenses/>.
67
-
68
- ---
69
-
70
- **Contato para Consultas:**
71
-
72
- Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
73
-
74
- Carlos Rodrigues dos Santos
75
- carlex22@gmail.com
76
- Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/README.md DELETED
@@ -1,211 +0,0 @@
1
- ---
2
- title: Euia-AducSdr
3
- emoji: 🎥
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: gradio
7
- app_file: app.py
8
- pinned: true
9
- license: agpl-3.0
10
- short_description: Uma implementação aberta e funcional da arquitetura ADUC-SDR
11
- ---
12
-
13
-
14
- ### 🇧🇷 Português
15
-
16
- Uma implementação aberta e funcional da arquitetura ADUC-SDR (Arquitetura de Unificação Compositiva - Escala Dinâmica e Resiliente), projetada para a geração de vídeo coerente de longa duração. Este projeto materializa os princípios de fragmentação, navegação geométrica e um mecanismo de "eco causal 4bits memoria" para garantir a continuidade física e narrativa em sequências de vídeo geradas por múltiplos modelos de IA.
17
-
18
- **Licença:** Este projeto é licenciado sob os termos da **GNU Affero General Public License v3.0**. Isto significa que se você usar este software (ou qualquer trabalho derivado) para fornecer um serviço através de uma rede, você é **obrigado a disponibilizar o código-fonte completo** da sua versão para os usuários desse serviço.
19
-
20
- - **Copyright (C) 4 de Agosto de 2025, Carlos Rodrigues dos Santos**
21
- - Uma cópia completa da licença pode ser encontrada no arquivo [LICENSE](LICENSE).
22
-
23
- ---
24
-
25
- ### 🇬🇧 English
26
-
27
- An open and functional implementation of the ADUC-SDR (Architecture for Compositive Unification - Dynamic and Resilient Scaling) architecture, designed for long-form coherent video generation. This project materializes the principles of fragmentation, geometric navigation, and a "causal echo 4bits memori" mechanism to ensure physical and narrative continuity in video sequences generated by multiple AI models.
28
-
29
- **License:** This project is licensed under the terms of the **GNU Affero General Public License v3.0**. This means that if you use this software (or any derivative work) to provide a service over a network, you are **required to make the complete source code** of your version available to the users of that service.
30
-
31
- - **Copyright (C) August 4, 2025, Carlos Rodrigues dos Santos**
32
- - A full copy of the license can be found in the [LICENSE](LICENSE) file.
33
-
34
- ---
35
-
36
- ## **Aviso de Propriedade Intelectual e Patenteamento**
37
-
38
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
39
-
40
- A arquitetura e o método **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste projeto e nas reivindicações associadas, estão **atualmente em processo de patenteamento**.
41
-
42
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, que incluem, mas não se limitam a:
43
-
44
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
45
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
46
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
47
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
48
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
49
-
50
- Ao utilizar este software e a arquitetura ADUC aqui implementada, você reconhece a natureza inovadora desta arquitetura e que a **reprodução ou exploração da lógica central da ADUC em sistemas independentes pode infringir direitos de patente pendente.**
51
-
52
- ---
53
-
54
- ### **Patent Pending (IN ENGLISH):**
55
-
56
- The **ADUC (Automated Discovery and Orchestration of Complex tasks)** architecture and method, as described in this project and its associated claims, are **currently in the process of being patented.**
57
-
58
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
59
-
60
- * Fragmentation and scaling of requests exceeding AI model context limits.
61
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
62
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
63
- * Cost, latency, and quality-aware planning and routing.
64
- * The use of "universal tokens" for model-agnostic communication.
65
-
66
- By using this software and the ADUC architecture implemented herein, you acknowledge the innovative nature of this architecture and that **the reproduction or exploitation of ADUC's core logic in independent systems may infringe upon pending patent rights.**
67
-
68
- ---
69
-
70
- ### Detalhes Técnicos e Reivindicações da ADUC
71
-
72
- #### 🇧🇷 Definição Curta (para Tese e Patente)
73
-
74
- **ADUC** é um *framework pré-input* e *intermediário* de **gerenciamento de prompts** que:
75
-
76
- 1. **fragmenta** solicitações acima do limite de contexto de qualquer modelo,
77
- 2. **escala linearmente** (processo sequencial com memória persistida),
78
- 3. **distribui** sub-tarefas a **especialistas** (modelos/ferramentas heterogêneos), e
79
- 4. **realimenta** a próxima etapa com avaliação do que foi feito/esperado (LLM diretor).
80
-
81
- Não é um modelo; é uma **camada orquestradora** plugável antes do input de modelos existentes (texto, imagem, áudio, vídeo), usando *tokens universais* e a tecnologia atual.
82
-
83
- #### 🇬🇧 Short Definition (for Thesis and Patent)
84
-
85
- **ADUC** is a *pre-input* and *intermediate* **prompt management framework** that:
86
-
87
- 1. **fragments** requests exceeding any model's context limit,
88
- 2. **scales linearly** (sequential process with persisted memory),
89
- 3. **distributes** sub-tasks to **specialists** (heterogeneous models/tools), and
90
- 4. **feeds back** to the next step with an evaluation of what was done/expected (director LLM).
91
-
92
- It is not a model; it is a pluggable **orchestration layer** before the input of existing models (text, image, audio, video), using *universal tokens* and current technology.
93
-
94
- ---
95
-
96
- #### 🇧🇷 Elementos Essenciais (Telegráfico)
97
-
98
- * **Agnóstico a modelos:** opera com qualquer LLM/difusor/API.
99
- * **Pré-input manager:** recebe pedido do usuário, **divide** em blocos ≤ limite de tokens, **prioriza**, **agenda** e **roteia**.
100
- * **Memória persistida:** resultados/latentes/“eco” viram **estado compartilhado** para o próximo bloco (nada é ignorado).
101
- * **Especialistas:** *routers* decidem quem faz o quê (ex.: “descrição → LLM-A”, “keyframe → Img-B”, “vídeo → Vid-C”).
102
- * **Controle de qualidade:** LLM diretor compara *o que fez* × *o que deveria* × *o que falta* e **regenera objetivos** do próximo fragmento.
103
- * **Custo/latência-aware:** planeja pela **VRAM/tempo/custo**, não tenta “abraçar tudo de uma vez”.
104
-
105
- #### 🇬🇧 Essential Elements (Telegraphic)
106
-
107
- * **Model-agnostic:** operates with any LLM/diffuser/API.
108
- * **Pre-input manager:** receives user request, **divides** into blocks ≤ token limit, **prioritizes**, **schedules**, and **routes**.
109
- * **Persisted memory:** results/latents/“echo” become **shared state** for the next block (nothing is ignored).
110
- * **Specialists:** *routers* decide who does what (e.g., “description → LLM-A”, “keyframe → Img-B”, “video → Vid-C”).
111
- * **Quality control:** director LLM compares *what was done* × *what should be done* × *what is missing* and **regenerates objectives** for the next fragment.
112
- * **Cost/latency-aware:** plans by **VRAM/time/cost**, does not try to “embrace everything at once”.
113
-
114
- ---
115
-
116
- #### 🇧🇷 Reivindicações Independentes (Método e Sistema)
117
-
118
- **Reivindicação Independente (Método) — Versão Enxuta:**
119
-
120
- 1. **Método** de **orquestração de prompts** para execução de tarefas acima do limite de contexto de modelos de IA, compreendendo:
121
- (a) **receber** uma solicitação que excede um limite de tokens;
122
- (b) **analisar** a solicitação por um **LLM diretor** e **fragmentá-la** em sub-tarefas ≤ limite;
123
- (c) **selecionar** especialistas de execução para cada sub-tarefa com base em capacidades declaradas;
124
- (d) **gerar** prompts específicos por sub-tarefa em **tokens universais**, incluindo referências ao **estado persistido** de execuções anteriores;
125
- (e) **executar sequencialmente** as sub-tarefas e **persistir** suas saídas como memória (incluindo latentes/eco/artefatos);
126
- (f) **avaliar** automaticamente a saída versus metas declaradas e **regenerar objetivos** do próximo fragmento;
127
- (g) **iterar** (b)–(f) até que os critérios de completude sejam atendidos, produzindo o resultado agregado;
128
- em que o framework **escala linearmente** no tempo e armazenamento físico, **independente** da janela de contexto dos modelos subjacentes.
129
-
130
- **Reivindicação Independente (Sistema):**
131
-
132
- 2. **Sistema** de orquestração de prompts, compreendendo: um **planejador LLM diretor**; um **roteador de especialistas**; um **banco de estado persistido** (incl. memória cinética para vídeo); um **gerador de prompts universais**; e um **módulo de avaliação/realimentação**, acoplados por uma **API pré-input** a modelos heterogêneos.
133
-
134
- #### 🇬🇧 Independent Claims (Method and System)
135
-
136
- **Independent Claim (Method) — Concise Version:**
137
-
138
- 1. A **method** for **prompt orchestration** for executing tasks exceeding AI model context limits, comprising:
139
- (a) **receiving** a request that exceeds a token limit;
140
- (b) **analyzing** the request by a **director LLM** and **fragmenting it** into sub-tasks ≤ the limit;
141
- (c) **selecting** execution specialists for each sub-task based on declared capabilities;
142
- (d) **generating** specific prompts per sub-task in **universal tokens**, including references to the **persisted state** of previous executions;
143
- (e) **sequentially executing** the sub-tasks and **persisting** their outputs as memory (including latents/echo/artifacts);
144
- (f) **automatically evaluating** the output against declared goals and **regenerating objectives** for the next fragment;
145
- (g) **iterating** (b)–(f) until completion criteria are met, producing the aggregated result;
146
- wherein the framework **scales linearly** in time and physical storage, **independent** of the context window of the underlying models.
147
-
148
- **Independent Claim (System):**
149
-
150
- 2. A prompt orchestration **system**, comprising: a **director LLM planner**; a **specialist router**; a **persisted state bank** (incl. kinetic memory for video); a **universal prompt generator**; and an **evaluation/feedback module**, coupled via a **pre-input API** to heterogeneous models.
151
-
152
- ---
153
-
154
- #### 🇧🇷 Dependentes Úteis
155
-
156
- * (3) Onde o roteamento considera **custo/latência/VRAM** e metas de qualidade.
157
- * (4) Onde o banco de estado inclui **eco cinético** para vídeo (últimos *n* frames/latentes/fluxo).
158
- * (5) Onde a avaliação usa métricas específicas por domínio (Lflow, consistência semântica, etc.).
159
- * (6) Onde *tokens universais* padronizam instruções entre especialistas.
160
- * (7) Onde a orquestração decide **cut vs continuous** e **corte regenerativo** (Déjà-Vu) ao editar vídeo.
161
- * (8) Onde o sistema **nunca descarta** conteúdo excedente: **reagenda** em novos fragmentos.
162
-
163
- #### 🇬🇧 Useful Dependents
164
-
165
- * (3) Wherein routing considers **cost/latency/VRAM** and quality goals.
166
- * (4) Wherein the state bank includes **kinetic echo** for video (last *n* frames/latents/flow).
167
- * (5) Wherein evaluation uses domain-specific metrics (Lflow, semantic consistency, etc.).
168
- * (6) Wherein *universal tokens* standardize instructions between specialists.
169
- * (7) Wherein orchestration decides **cut vs continuous** and **regenerative cut** (Déjà-Vu) when editing video.
170
- * (8) Wherein the system **never discards** excess content: it **reschedules** it in new fragments.
171
-
172
- ---
173
-
174
- #### 🇧🇷 Como isso conversa com SDR (Vídeo)
175
-
176
- * **Eco Cinético**: é um **tipo de estado persistido** consumido pelo próximo passo.
177
- * **Déjà-Vu (Corte Regenerativo)**: é **uma política de orquestração** aplicada quando há edição; ADUC decide, monta os prompts certos e chama o especialista de vídeo.
178
- * **Cut vs Continuous**: decisão do **diretor** com base em estado + metas; ADUC roteia e garante a sobreposição/remoção final.
179
-
180
- #### 🇬🇧 How this Converses with SDR (Video)
181
-
182
- * **Kinetic Echo**: is a **type of persisted state** consumed by the next step.
183
- * **Déjà-Vu (Regenerative Cut)**: is an **orchestration policy** applied during editing; ADUC decides, crafts the right prompts, and calls the video specialist.
184
- * **Cut vs Continuous**: decision made by the **director** based on state + goals; ADUC routes and ensures the final overlap/removal.
185
-
186
- ---
187
-
188
- #### 🇧🇷 Mensagem Clara ao Usuário (Experiência)
189
-
190
- > “Seu pedido excede o limite X do modelo Y. Em vez de truncar silenciosamente, o **ADUC** dividirá e **entregará 100%** do conteúdo por etapas coordenadas.”
191
-
192
- Isso é diferencial prático e jurídico: **não-obviedade** por transformar limite de contexto em **pipeline controlado**, com **persistência de estado** e **avaliação iterativa**.
193
-
194
- #### 🇬🇧 Clear User Message (Experience)
195
-
196
- > "Your request exceeds model Y's limit X. Instead of silently truncating, **ADUC** will divide and **deliver 100%** of the content through coordinated steps."
197
-
198
- This is a practical and legal differentiator: **non-obviousness** by transforming context limits into a **controlled pipeline**, with **state persistence** and **iterative evaluation**.
199
-
200
- ---
201
-
202
- ### Contact / Contato / Contacto
203
-
204
- - **Author / Autor:** Carlos Rodrigues dos Santos
205
- - **Email:** carlex22@gmail.com
206
- - **GitHub:** [https://github.com/carlex22/Aduc-sdr](https://github.com/carlex22/Aduc-sdr)
207
- - **Hugging Face Spaces:**
208
- - [Ltx-SuperTime-60Secondos](https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/)
209
- - [Novinho](https://huggingface.co/spaces/Carlexxx/Novinho/)
210
-
211
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/anticipatory_keyframe_prompt.txt DELETED
@@ -1,29 +0,0 @@
1
- # ROLE: AI Cinematographer and Storyboard Artist
2
-
3
- # GOAL:
4
- Your task is to generate a single, descriptive prompt for an image generation model (Flux). This prompt must describe a keyframe that serves as a perfect visual transition BETWEEN a current scene and a future scene. You must see what you just did, where you are, and where you are preparing to go.
5
-
6
- # CRITICAL DIRECTIVES:
7
- 1. **SYNTHESIZE, DON'T DESCRIBE:** Do not simply describe the "Current Scene" or the "Future Scene". Your prompt must create a visual concept that exists *in the moment between them*. It's the "in-between" frame.
8
-
9
- 2. **VISUAL ANCHORING:** The primary visual canvas is the "Current Base Image" (`[IMG-BASE]`). Your generated prompt should describe an evolution FROM this image. Maintain its environment and characters unless the narrative arc demands a change.
10
-
11
- 3. **NARRATIVE FORESHADOWING:** The prompt must contain visual elements that hint at or prepare for the "Future Scene". If the future scene is "the chicken climbs the sidewalk", your prompt for the current scene ("the chicken crosses the road") might be "the chicken, halfway across the road, lifts its head and looks towards the curb of the sidewalk".
12
-
13
- 4. **LEARN FROM THE PAST:** Analyze the "Previous Prompt" to understand the creative choices that led to the "Current Base Image". Maintain that stylistic and narrative trajectory.
14
-
15
- 5. **REFERENCE POOL:** Use the "General Reference Images" (`[IMG-REF-X]`) to maintain the identity and style of key subjects throughout the sequence.
16
-
17
- # CONTEXT FOR YOUR DECISION:
18
- - **Previous Prompt (What I thought to create the current image):**
19
- {historico_prompt}
20
-
21
- - **Current Scene (Where I am now):** "{cena_atual}"
22
- - **Future Scene (Where I am going next):** "{cena_futura}"
23
-
24
- # VISUAL ASSETS:
25
- # [The "Current Base Image" will be tagged as [IMG-BASE].]
26
- # [The "General Reference Images" will be tagged as [IMG-REF-1], [IMG-REF-2], etc.]
27
-
28
- # RESPONSE FORMAT:
29
- Respond with ONLY the final, single-line prompt string for the image generator.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/audio_director_prompt.txt DELETED
@@ -1,18 +0,0 @@
1
- # ROLE: AI Audio Director and Sound Designer
2
-
3
- # GOAL:
4
- Analyze the provided film script/storyboard. Based on the overall narrative and mood, generate two distinct prompts for audio generation: one for a background music score and one for ambient sound effects (SFX).
5
-
6
- # INSTRUCTIONS:
7
- 1. **Analyze the Story:** Read the "Global Idea" and the "Scene Storyboard" to understand the plot, pacing, and emotional tone of the film.
8
- 2. **Create Music Prompt:** Write a concise, descriptive prompt for a music generation model (like MusicGen). Focus on genre, mood, instruments
9
- 3. **Create SFX Prompt:** Write a concise, descriptive prompt for an audio generation model (like AudioLDM2). Focus on ambient sounds and key effects that match the scenes.
10
- 4. **Output Format:** You MUST respond with a single, clean JSON object with exactly two keys: "music_prompt" and "sfx_prompt".
11
-
12
- # == PROVIDED CONTEXT ==
13
- - **Global Idea:** "{global_prompt}"
14
- - **Scene Storyboard:**
15
- {storyboard_str}
16
-
17
- # == YOUR TASK ==
18
- # Generate the JSON response with the two audio prompts.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/cinematic_director_prompt.txt DELETED
@@ -1,27 +0,0 @@
1
- # ROLE: AI Cinematic Scenarist
2
-
3
- # GOAL:
4
- # Your single, crucial task is to write a rich, cinematic motion prompt.
5
- # This prompt must describe the most logical and compelling action that
6
- # connects the PRESENT visual state to the FUTURE visual state, considering
7
- # the context of the PAST.
8
-
9
- # CONTEXT FOR YOUR PROMPT:
10
- - Global Story Goal: {global_prompt}
11
- - Creative History: {story_history}
12
- - The Past: "{past_scene_desc}" -> [PAST_IMAGE]
13
- - The Present: "{present_scene_desc}" -> [PRESENT_IMAGE]
14
- - The Future: "{future_scene_desc}" -> [FUTURE_IMAGE]
15
-
16
- # CRITICAL PROMPT DIRECTIVES:
17
- # 1. ALWAYS DESCRIBE MOTION: The scene must not be static.
18
- # 2. STYLE: Be descriptive, cinematic, and direct.
19
- # 3. STRUCTURE: In a single paragraph (under 150 words), describe the scene's
20
- # motion, prioritizing in this EXACT order:
21
- # a. Actors/Animals: What are they doing?
22
- # b. Objects: How do they interact?
23
- # c. Camera: How is it moving?
24
- # d. Scenery/Environment: What details add to the mood?
25
-
26
- # RESPONSE FORMAT:
27
- # You MUST respond with ONLY the raw, single-line string for the motion prompt.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/director_composition_prompt.txt DELETED
@@ -1,27 +0,0 @@
1
- # ROLE: AI Animation Director (Key Pose)
2
-
3
- # GOAL:
4
- Generate a single, powerful, CLIP-style prompt to create the NEXT keyframe in a sequence. Your goal is to describe a logical and visually coherent evolution FROM the last generated image.
5
-
6
- # CRITICAL DIRECTIVES:
7
- 1. **ANCHOR TO THE PREVIOUS SCENE:** The last generated image, tagged as `[IMG-1]`, represents the END of the previous scene. Your new prompt MUST describe what happens IMMEDIATELY AFTER. Treat `[IMG-1]` as your primary visual and environmental canvas.
8
-
9
- 2. **EVOLVE, DO NOT REPLACE:** Unless the "Current Scene Description" explicitly describes a major change in location or character (e.g., "cut to a new scene"), you must maintain the environment, lighting, and core subjects from `[IMG-1]`. Your prompt should describe how the characters and objects *within* that scene evolve.
10
-
11
- 3. **POSE, NOT PANNING:** Describe the state of the subject at a specific instant, not camera movement. Focus on body language, expression, and interaction that logically follows the previous state.
12
-
13
- 4. **USE REFERENCES FOR CONTINUITY:** Use the `[IMG-X]` tags provided to maintain the identity of characters and objects across frames. Prioritize `[IMG-1]` for environmental context.
14
-
15
- 5. **BE A DIRECTOR:** Use strong, active verbs. Instead of "the lion is now sitting", prefer "the lion lowers its body, muscles tensing as it settles onto the dry grass".
16
-
17
- # CONTEXT:
18
- - Global Story Goal: "{global_prompt}"
19
- - Current Scene Description: "{current_scene_desc}"
20
- - Scene History (what happened before):
21
- {history_scene}
22
-
23
- # VISUAL ASSETS FOR ANALYSIS:
24
- # [Images will be provided and tagged as [IMG-1] (Last Image/Environment), [IMG-2] (Character Ref), etc.]
25
-
26
- # RESPONSE FORMAT:
27
- Respond with ONLY the final, single-line prompt string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/flux_composition_wrapper_prompt.txt DELETED
@@ -1 +0,0 @@
1
- From the provided reference images, create a single, natural, and cohesive scene where: {target_prompt}
 
 
prompts/initial_motion_prompt.txt DELETED
@@ -1,20 +0,0 @@
1
- # ROLE: AI Cinematographer (Initial Scene)
2
-
3
- # GOAL:
4
- Create a single, concise, CLIP-style motion prompt. The prompt must describe a coherent video sequence that transitions from a STARTING image to a DESTINATION image.
5
-
6
- # INSTRUCTIONS:
7
- 1. **Analyze the Arc:** Understand the visual and narrative journey from the START to the DESTINATION image.
8
- 2. **Describe the Motion:** Focus on DYNAMICS (camera and subject movement).
9
- 3. **Style Guide:** Use dense, descriptive, cinematic keywords. Omit fluff like "The video shows...". Be direct.
10
-
11
- # CONTEXT:
12
- - Overall Story Goal: "{user_prompt}"
13
- - Destination Scene Description: "{destination_scene_description}"
14
-
15
- # SCENE ANALYSIS:
16
- # START Image: [Image 1]
17
- # DESTINATION Image: [Image 2]
18
-
19
- # RESPONSE FORMAT:
20
- Respond with ONLY the raw prompt string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/keyframe_selection_prompt.txt DELETED
@@ -1,20 +0,0 @@
1
- # ROLE: AI Film Editor / Photographer
2
-
3
- # GOAL:
4
- You are tasked with selecting the best keyframe for each scene of a storyboard to create a coherent visual narrative. You have a "scene bank" of available images. Your selections must create a smooth and logical progression.
5
-
6
- # INSTRUCTIONS:
7
- 1. **Analyze the Storyboard:** Read each scene description carefully to understand the intended action and emotion.
8
- 2. **Prioritize Continuity:** For each scene, your primary goal is to find an image from the "Image Pool" that represents a logical **next step** from the previously selected scene. Avoid jarring jumps in location, lighting, or character appearance unless the storyboard explicitly calls for a "cut".
9
- 3. **Maintain Consistency:** Your choices must be consistent with the characters and style established in the "Reference Images (Story Base)".
10
- 4. **Select the Best Fit:** If multiple images could work, choose the one that best captures the specific action or mood of the current scene description.
11
- 5. **Output Format:** You MUST respond with a single, clean JSON object with one key: "selected_image_identifiers". The value should be an array of strings, where each string is the identifier of the chosen image (e.g., "IMG-3"). The order of the array must match the order of the scenes in the storyboard. The length of the array must be exactly the same as the number of scenes.
12
-
13
- # == PROVIDED CONTEXT ==
14
- - **Storyboard:**
15
- {storyboard_str}
16
-
17
- - **Available Image Identifiers in Pool:** {image_identifiers}
18
-
19
- # == YOUR TASK ==
20
- # Generate the JSON response with the selected image identifiers, prioritizing a smooth visual and narrative flow from one selection to the next.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/sound_director_prompt.txt DELETED
@@ -1,27 +0,0 @@
1
- # ROLE: AI Sound Director & Foley Artist
2
-
3
- # GOAL:
4
- You are the sound director for a film. Your task is to create a single, rich, and descriptive prompt for an audio generation model (like MMAudio). This prompt must describe the complete soundscape for the CURRENT scene, considering what happened before and what will happen next to ensure audio continuity.
5
-
6
- # CRITICAL RULES (MUST FOLLOW):
7
- 1. **NO SPEECH OR VOICES:** The final prompt must NOT include any terms related to human speech, dialogue, talking, voices, singing, or narration. The goal is to create a world of ambient sounds and specific sound effects (SFX).
8
- 2. **FOCUS ON THE PRESENT:** The audio must primarily match the CURRENT visual scene (Keyframe Kn) and its textual description (Ato_n).
9
- 3. **USE THE PAST FOR CONTINUITY:** Analyze the "Previous Audio Prompt" to understand the established soundscape. If a sound should logically continue from the previous scene, include it (e.g., "the continued sound of a gentle breeze...").
10
- 4. **USE THE FUTURE FOR FORESHADOWING:** Analyze the FUTURE keyframe and scene description. If appropriate, introduce subtle sounds that hint at what's to come. (e.g., if the next scene is a storm, you could add "...with the faint, distant rumble of thunder in the background.").
11
- 5. **BE DESCRIPTIVE:** Use evocative language. Instead of "dog bark", use "the sharp, excited yapping of a small dog". Combine multiple elements into a cohesive soundscape.
12
-
13
- # CONTEXT FOR YOUR DECISION:
14
-
15
- - **Previous Audio Prompt (what was just heard):**
16
- {audio_history}
17
-
18
- - **VISUAL PAST (Keyframe Kn-1):** [PAST_IMAGE]
19
- - **VISUAL PRESENT (Keyframe Kn):** [PRESENT_IMAGE]
20
- - **VISUAL FUTURE (Keyframe Kn+1):** [FUTURE_IMAGE]
21
-
22
- - **CURRENT Scene Description (Ato_n):** "{present_scene_desc}"
23
- - **CURRENT Motion Prompt (what the camera is doing):** "{motion_prompt}"
24
- - **FUTURE Scene Description (Ato_n+1):** "{future_scene_desc}"
25
-
26
- # RESPONSE FORMAT:
27
- Respond with ONLY the final, single-line prompt string for the audio generator.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/sound_director_prompt.txt.txt DELETED
@@ -1,27 +0,0 @@
1
- # ROLE: AI Sound Director & Foley Artist
2
-
3
- # GOAL:
4
- You are the sound director for a film. Your task is to create a single, rich, and descriptive prompt for an audio generation model (like MMAudio). This prompt must describe the complete soundscape for the CURRENT scene, considering what happened before and what will happen next to ensure audio continuity.
5
-
6
- # CRITICAL RULES (MUST FOLLOW):
7
- 1. **NO SPEECH OR VOICES:** The final prompt must NOT include any terms related to human speech, dialogue, talking, voices, singing, or narration. The goal is to create a world of ambient sounds and specific sound effects (SFX).
8
- 2. **FOCUS ON THE PRESENT:** The audio must primarily match the CURRENT visual scene (Keyframe Kn) and its textual description (Ato_n).
9
- 3. **USE THE PAST FOR CONTINUITY:** Analyze the "Previous Audio Prompt" to understand the established soundscape. If a sound should logically continue from the previous scene, include it (e.g., "the continued sound of a gentle breeze...").
10
- 4. **USE THE FUTURE FOR FORESHADOWING:** Analyze the FUTURE keyframe and scene description. If appropriate, introduce subtle sounds that hint at what's to come. (e.g., if the next scene is a storm, you could add "...with the faint, distant rumble of thunder in the background.").
11
- 5. **BE DESCRIPTIVE:** Use evocative language. Instead of "dog bark", use "the sharp, excited yapping of a small dog". Combine multiple elements into a cohesive soundscape.
12
-
13
- # CONTEXT FOR YOUR DECISION:
14
-
15
- - **Previous Audio Prompt (what was just heard):**
16
- {audio_history}
17
-
18
- - **VISUAL PAST (Keyframe Kn-1):** [PAST_IMAGE]
19
- - **VISUAL PRESENT (Keyframe Kn):** [PRESENT_IMAGE]
20
- - **VISUAL FUTURE (Keyframe Kn+1):** [FUTURE_IMAGE]
21
-
22
- - **CURRENT Scene Description (Ato_n):** "{present_scene_desc}"
23
- - **CURRENT Motion Prompt (what the camera is doing):** "{motion_prompt}"
24
- - **FUTURE Scene Description (Ato_n+1):** "{future_scene_desc}"
25
-
26
- # RESPONSE FORMAT:
27
- Respond with ONLY the final, single-line prompt string for the audio generator.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/transition_decision_prompt.txt DELETED
@@ -1,27 +0,0 @@
1
- # ROLE: AI Director of Continuity & Cinematographer
2
-
3
- # GOAL:
4
- Analyze the visual continuity between a START, MIDPOINT, and DESTINATION image. Make a directorial decision: is the transition a "continuous" action or does it require a "cut"? Then, write the appropriate motion prompt.
5
-
6
- # INSTRUCTIONS:
7
- 1. **Analyze Continuity:** Can a subject logically and physically move from START, through MIDPOINT, to DESTINATION in a few seconds of continuous screen time? Consider changes in location, pose, and time of day.
8
- * **Continuous Example:** Man walks to door (START) -> Hand on doorknob (MIDPOINT) -> Man walks through door (DESTINATION).
9
- * **Cut Example:** Woman outside house (START) -> Close up on face (MIDPOINT) -> Woman now inside house (DESTINATION).
10
- 2. **Make a Decision:**
11
- * If the action is unbroken, decide `"transition_type": "continuous"`.
12
- * If there is a jump in time, space, or logic, decide `"transition_type": "cut"`.
13
- 3. **Write Motion Prompt:**
14
- * **For "continuous":** Describe the physical action and camera movement. Example: "Camera follows the man as he opens the door and steps inside."
15
- * **For "cut":** Describe a cinematic transition effect. DO NOT describe character actions. Example: "A smooth cross-dissolve transition to the new scene."
16
-
17
- # CONTEXT:
18
- - Overall Story Goal: "{user_prompt}"
19
- - Story So Far: {story_history}
20
-
21
- # SCENE ANALYSIS:
22
- # START Image (Memory from last fragment): [Image 1]
23
- # MIDPOINT Image (Path): [Image 2]
24
- # DESTINATION Image (Destination): [Image 3]
25
-
26
- # RESPONSE FORMAT:
27
- You MUST respond with a single, clean JSON object with two keys: "transition_type" and "motion_prompt".
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/unified_cinematographer_prompt.txt DELETED
@@ -1,47 +0,0 @@
1
- # ROLE: AI Continuity Director & Cinematographer
2
-
3
- # GOAL:
4
- Your task is twofold. First, analyze two keyframe images (current and future) and their context to generate a precise, cinematic motion prompt describing the transition between them. Second, calculate a "Similarity Score" between the two images based on a strict set of criteria.
5
-
6
- # --- TASK 1: Generate Cinematic Motion Prompt ---
7
-
8
- # CONTEXT:
9
- - Previous Motion Prompt (what I thought before):
10
- {historico_prompt}
11
-
12
- - Current Scene Description (where we are now): "{cena_atual}"
13
- - Future Scene Description (where we are going next): "{cena_futura}"
14
-
15
- # INSTRUCTIONS for Motion Prompt:
16
- You must generate a single, concise, CLIP-style motion prompt describing the action that connects the CURRENT image to the FUTURE image. The prompt must be dense and descriptive, following this priority order:
17
- 1. **People/Animals:** Focus on expression, emotion, and specific actions.
18
- 2. **Objects:** Describe their location and any interaction or movement.
19
- 3. **Camera:** Specify focus, zoom, and movement (pan, tilt, dolly, etc.).
20
-
21
- Your prompt should describe the moment unfolding BETWEEN the current and future state.
22
-
23
- # --- TASK 2: Calculate Similarity Score ---
24
-
25
- # INSTRUCTIONS for Similarity Score:
26
- Calculate a similarity score between the CURRENT and FUTURE images, ranging from 0.0 (completely different) to 1.0 (very similar).
27
-
28
- **Consider ONLY the following criteria for similarity:**
29
- - **Objects:** Consistency in colors, textures, and relative sizes.
30
- - **People/Animals:** Consistency in morphology (body shape), clothing, and accessories.
31
- - **Environment:** Consistency in location, time of day (lighting), colors, and background/horizon.
32
-
33
- **Disregard the following for similarity:**
34
- - Repositioning or movement of subjects or the camera.
35
-
36
- **Negative Factors (Penalties):**
37
- - If the horizontal positions of two or more people are inverted (e.g., person A was on the left and is now on the right), REDUCE THE FINAL SCORE BY HALF (multiply by 0.5).
38
- - If the entire image appears horizontally flipped (mirrored), REDUCE THE FINAL SCORE BY HALF (multiply by 0.5).
39
-
40
- # VISUAL ASSETS:
41
- # [The CURRENT keyframe image will be provided here.]
42
- # [The FUTURE keyframe image will be provided here.]
43
-
44
- # --- RESPONSE FORMAT ---
45
- You MUST respond with a single, clean JSON object with exactly two keys:
46
- 1. "motion_prompt": A string containing the generated cinematic prompt.
47
- 2. "similarity_score": A floating-point number between 0.0 and 1.0.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/unified_storyboard_prompt.txt DELETED
@@ -1,19 +0,0 @@
1
- # ROLE: AI Storyboard Writer
2
-
3
- # GOAL:
4
- You are a scriptwriter tasked with breaking down a general idea into a sequence of exactly {num_fragments} distinct scenes or "acts". Each scene should represent a clear, single moment in a linear narrative.
5
-
6
- # CRITICAL RULES (MUST FOLLOW):
7
- 1. **ANCHOR TO THE REFERENCE IMAGES:** The narrative, characters, and style MUST be directly inspired by the provided reference images. The story should feel like it belongs in the same world as these images.
8
- 2. **SIMPLE, LINEAR ACTION:** Do not create a complex plot. The entire sequence should represent a single, simple story arc unfolding over a few moments (e.g., a character notices something, approaches it, and reacts).
9
- 3. **FOCUS ON "WHAT", NOT "HOW":** Each description is a scene, not a camera direction. Describe the core action or emotional beat of the moment. Example: "The knight raises his shield" instead of "Close-up on the knight raising his shield".
10
-
11
- # CONTEXT:
12
- - General Idea (User Prompt): "{user_prompt}"
13
- - Number of Scenes to Create: {num_fragments}
14
-
15
- # YOUR TASK:
16
- Based on the user's idea and the reference images, create a storyboard that tells a simple, continuous story across {num_fragments} scenes.
17
-
18
- # RESPONSE FORMAT:
19
- Return a single JSON object with the key "scene_storyboard", containing an array of strings (the scene descriptions).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/LICENSE DELETED
@@ -1,25 +0,0 @@
1
- # Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
2
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
- #
4
- # Contato:
5
- # Carlos Rodrigues dos Santos
6
- # carlex22@gmail.com
7
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
8
- #
9
- # Repositórios e Projetos Relacionados:
10
- # GitHub: https://github.com/carlex22/Aduc-sdr
11
- # Hugging Face (Ltx-SuperTime-60Secondos): https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/
12
- # Hugging Face (Novinho): https://huggingface.co/spaces/Carlexxx/Novinho/
13
- #
14
- # This program is free software: you can redistribute it and/or modify
15
- # it under the terms of the GNU Affero General Public License as published by
16
- # the Free Software Foundation, either version 3 of the License, or
17
- # (at your option) any later version.
18
- #
19
- # This program is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
- # GNU Affero General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Affero General Public License
25
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/NOTICE.md DELETED
@@ -1,76 +0,0 @@
1
- # NOTICE
2
-
3
- Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
4
-
5
- ---
6
-
7
- ## Aviso de Propriedade Intelectual e Licenciamento
8
-
9
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
10
-
11
- O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
12
-
13
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
14
-
15
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
16
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
17
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
18
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
19
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
20
-
21
- ### **Reconhecimento e Implicações (EM PORTUGUÊS):**
22
-
23
- Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
24
-
25
- 1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
26
- 2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
27
- 3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
28
-
29
- ---
30
-
31
- ### **Patent Pending (IN ENGLISH):**
32
-
33
- The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
34
-
35
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
36
-
37
- * Fragmentation and scaling of requests exceeding AI model context limits.
38
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
39
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
40
- * Cost, latency, and quality-aware planning and routing.
41
- * The use of "universal tokens" for model-agnostic communication.
42
-
43
- ### **Acknowledgement and Implications (IN ENGLISH):**
44
-
45
- By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
46
-
47
- 1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
48
- 2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
49
- 3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
50
-
51
- ---
52
-
53
- ## Licença AGPLv3
54
-
55
- This program is free software: you can redistribute it and/or modify
56
- it under the terms of the GNU Affero General Public License as published by
57
- the Free Software Foundation, either version 3 of the License, or
58
- (at your option) any later version.
59
-
60
- This program is distributed in the hope that it will be useful,
61
- but WITHOUT ANY WARRANTY; without even the implied warranty of
62
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
63
- GNU Affero General Public License for more details.
64
-
65
- You should have received a copy of the GNU Affero General Public License
66
- along with this program. If not, see <https://www.gnu.org/licenses/>.
67
-
68
- ---
69
-
70
- **Contato para Consultas:**
71
-
72
- Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
73
-
74
- Carlos Rodrigues dos Santos
75
- carlex22@gmail.com
76
- Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/README.md DELETED
@@ -1,211 +0,0 @@
1
- ---
2
- title: Euia-AducSdr
3
- emoji: 🎥
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: gradio
7
- app_file: app.py
8
- pinned: true
9
- license: agpl-3.0
10
- short_description: Uma implementação aberta e funcional da arquitetura ADUC-SDR
11
- ---
12
-
13
-
14
- ### 🇧🇷 Português
15
-
16
- Uma implementação aberta e funcional da arquitetura ADUC-SDR (Arquitetura de Unificação Compositiva - Escala Dinâmica e Resiliente), projetada para a geração de vídeo coerente de longa duração. Este projeto materializa os princípios de fragmentação, navegação geométrica e um mecanismo de "eco causal 4bits memoria" para garantir a continuidade física e narrativa em sequências de vídeo geradas por múltiplos modelos de IA.
17
-
18
- **Licença:** Este projeto é licenciado sob os termos da **GNU Affero General Public License v3.0**. Isto significa que se você usar este software (ou qualquer trabalho derivado) para fornecer um serviço através de uma rede, você é **obrigado a disponibilizar o código-fonte completo** da sua versão para os usuários desse serviço.
19
-
20
- - **Copyright (C) 4 de Agosto de 2025, Carlos Rodrigues dos Santos**
21
- - Uma cópia completa da licença pode ser encontrada no arquivo [LICENSE](LICENSE).
22
-
23
- ---
24
-
25
- ### 🇬🇧 English
26
-
27
- An open and functional implementation of the ADUC-SDR (Architecture for Compositive Unification - Dynamic and Resilient Scaling) architecture, designed for long-form coherent video generation. This project materializes the principles of fragmentation, geometric navigation, and a "causal echo 4bits memori" mechanism to ensure physical and narrative continuity in video sequences generated by multiple AI models.
28
-
29
- **License:** This project is licensed under the terms of the **GNU Affero General Public License v3.0**. This means that if you use this software (or any derivative work) to provide a service over a network, you are **required to make the complete source code** of your version available to the users of that service.
30
-
31
- - **Copyright (C) August 4, 2025, Carlos Rodrigues dos Santos**
32
- - A full copy of the license can be found in the [LICENSE](LICENSE) file.
33
-
34
- ---
35
-
36
- ## **Aviso de Propriedade Intelectual e Patenteamento**
37
-
38
- ### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
39
-
40
- A arquitetura e o método **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste projeto e nas reivindicações associadas, estão **atualmente em processo de patenteamento**.
41
-
42
- O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, que incluem, mas não se limitam a:
43
-
44
- * Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
45
- * Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
46
- * Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
47
- * Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
48
- * O uso de "tokens universais" para comunicação agnóstica a modelos.
49
-
50
- Ao utilizar este software e a arquitetura ADUC aqui implementada, você reconhece a natureza inovadora desta arquitetura e que a **reprodução ou exploração da lógica central da ADUC em sistemas independentes pode infringir direitos de patente pendente.**
51
-
52
- ---
53
-
54
- ### **Patent Pending (IN ENGLISH):**
55
-
56
- The **ADUC (Automated Discovery and Orchestration of Complex tasks)** architecture and method, as described in this project and its associated claims, are **currently in the process of being patented.**
57
-
58
- The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
59
-
60
- * Fragmentation and scaling of requests exceeding AI model context limits.
61
- * Intelligent distribution of sub-tasks to heterogeneous specialists.
62
- * Persistent state management with iterative evaluation and feedback for planning subsequent steps.
63
- * Cost, latency, and quality-aware planning and routing.
64
- * The use of "universal tokens" for model-agnostic communication.
65
-
66
- By using this software and the ADUC architecture implemented herein, you acknowledge the innovative nature of this architecture and that **the reproduction or exploitation of ADUC's core logic in independent systems may infringe upon pending patent rights.**
67
-
68
- ---
69
-
70
- ### Detalhes Técnicos e Reivindicações da ADUC
71
-
72
- #### 🇧🇷 Definição Curta (para Tese e Patente)
73
-
74
- **ADUC** é um *framework pré-input* e *intermediário* de **gerenciamento de prompts** que:
75
-
76
- 1. **fragmenta** solicitações acima do limite de contexto de qualquer modelo,
77
- 2. **escala linearmente** (processo sequencial com memória persistida),
78
- 3. **distribui** sub-tarefas a **especialistas** (modelos/ferramentas heterogêneos), e
79
- 4. **realimenta** a próxima etapa com avaliação do que foi feito/esperado (LLM diretor).
80
-
81
- Não é um modelo; é uma **camada orquestradora** plugável antes do input de modelos existentes (texto, imagem, áudio, vídeo), usando *tokens universais* e a tecnologia atual.
82
-
83
- #### 🇬🇧 Short Definition (for Thesis and Patent)
84
-
85
- **ADUC** is a *pre-input* and *intermediate* **prompt management framework** that:
86
-
87
- 1. **fragments** requests exceeding any model's context limit,
88
- 2. **scales linearly** (sequential process with persisted memory),
89
- 3. **distributes** sub-tasks to **specialists** (heterogeneous models/tools), and
90
- 4. **feeds back** to the next step with an evaluation of what was done/expected (director LLM).
91
-
92
- It is not a model; it is a pluggable **orchestration layer** before the input of existing models (text, image, audio, video), using *universal tokens* and current technology.
93
-
94
- ---
95
-
96
- #### 🇧🇷 Elementos Essenciais (Telegráfico)
97
-
98
- * **Agnóstico a modelos:** opera com qualquer LLM/difusor/API.
99
- * **Pré-input manager:** recebe pedido do usuário, **divide** em blocos ≤ limite de tokens, **prioriza**, **agenda** e **roteia**.
100
- * **Memória persistida:** resultados/latentes/“eco” viram **estado compartilhado** para o próximo bloco (nada é ignorado).
101
- * **Especialistas:** *routers* decidem quem faz o quê (ex.: “descrição → LLM-A”, “keyframe → Img-B”, “vídeo → Vid-C”).
102
- * **Controle de qualidade:** LLM diretor compara *o que fez* × *o que deveria* × *o que falta* e **regenera objetivos** do próximo fragmento.
103
- * **Custo/latência-aware:** planeja pela **VRAM/tempo/custo**, não tenta “abraçar tudo de uma vez”.
104
-
105
- #### 🇬🇧 Essential Elements (Telegraphic)
106
-
107
- * **Model-agnostic:** operates with any LLM/diffuser/API.
108
- * **Pre-input manager:** receives user request, **divides** into blocks ≤ token limit, **prioritizes**, **schedules**, and **routes**.
109
- * **Persisted memory:** results/latents/“echo” become **shared state** for the next block (nothing is ignored).
110
- * **Specialists:** *routers* decide who does what (e.g., “description → LLM-A”, “keyframe → Img-B”, “video → Vid-C”).
111
- * **Quality control:** director LLM compares *what was done* × *what should be done* × *what is missing* and **regenerates objectives** for the next fragment.
112
- * **Cost/latency-aware:** plans by **VRAM/time/cost**, does not try to “embrace everything at once”.
113
-
114
- ---
115
-
116
- #### 🇧🇷 Reivindicações Independentes (Método e Sistema)
117
-
118
- **Reivindicação Independente (Método) — Versão Enxuta:**
119
-
120
- 1. **Método** de **orquestração de prompts** para execução de tarefas acima do limite de contexto de modelos de IA, compreendendo:
121
- (a) **receber** uma solicitação que excede um limite de tokens;
122
- (b) **analisar** a solicitação por um **LLM diretor** e **fragmentá-la** em sub-tarefas ≤ limite;
123
- (c) **selecionar** especialistas de execução para cada sub-tarefa com base em capacidades declaradas;
124
- (d) **gerar** prompts específicos por sub-tarefa em **tokens universais**, incluindo referências ao **estado persistido** de execuções anteriores;
125
- (e) **executar sequencialmente** as sub-tarefas e **persistir** suas saídas como memória (incluindo latentes/eco/artefatos);
126
- (f) **avaliar** automaticamente a saída versus metas declaradas e **regenerar objetivos** do próximo fragmento;
127
- (g) **iterar** (b)–(f) até que os critérios de completude sejam atendidos, produzindo o resultado agregado;
128
- em que o framework **escala linearmente** no tempo e armazenamento físico, **independente** da janela de contexto dos modelos subjacentes.
129
-
130
- **Reivindicação Independente (Sistema):**
131
-
132
- 2. **Sistema** de orquestração de prompts, compreendendo: um **planejador LLM diretor**; um **roteador de especialistas**; um **banco de estado persistido** (incl. memória cinética para vídeo); um **gerador de prompts universais**; e um **módulo de avaliação/realimentação**, acoplados por uma **API pré-input** a modelos heterogêneos.
133
-
134
- #### 🇬🇧 Independent Claims (Method and System)
135
-
136
- **Independent Claim (Method) — Concise Version:**
137
-
138
- 1. A **method** for **prompt orchestration** for executing tasks exceeding AI model context limits, comprising:
139
- (a) **receiving** a request that exceeds a token limit;
140
- (b) **analyzing** the request by a **director LLM** and **fragmenting it** into sub-tasks ≤ the limit;
141
- (c) **selecting** execution specialists for each sub-task based on declared capabilities;
142
- (d) **generating** specific prompts per sub-task in **universal tokens**, including references to the **persisted state** of previous executions;
143
- (e) **sequentially executing** the sub-tasks and **persisting** their outputs as memory (including latents/echo/artifacts);
144
- (f) **automatically evaluating** the output against declared goals and **regenerating objectives** for the next fragment;
145
- (g) **iterating** (b)–(f) until completion criteria are met, producing the aggregated result;
146
- wherein the framework **scales linearly** in time and physical storage, **independent** of the context window of the underlying models.
147
-
148
- **Independent Claim (System):**
149
-
150
- 2. A prompt orchestration **system**, comprising: a **director LLM planner**; a **specialist router**; a **persisted state bank** (incl. kinetic memory for video); a **universal prompt generator**; and an **evaluation/feedback module**, coupled via a **pre-input API** to heterogeneous models.
151
-
152
- ---
153
-
154
- #### 🇧🇷 Dependentes Úteis
155
-
156
- * (3) Onde o roteamento considera **custo/latência/VRAM** e metas de qualidade.
157
- * (4) Onde o banco de estado inclui **eco cinético** para vídeo (últimos *n* frames/latentes/fluxo).
158
- * (5) Onde a avaliação usa métricas específicas por domínio (Lflow, consistência semântica, etc.).
159
- * (6) Onde *tokens universais* padronizam instruções entre especialistas.
160
- * (7) Onde a orquestração decide **cut vs continuous** e **corte regenerativo** (Déjà-Vu) ao editar vídeo.
161
- * (8) Onde o sistema **nunca descarta** conteúdo excedente: **reagenda** em novos fragmentos.
162
-
163
- #### 🇬🇧 Useful Dependents
164
-
165
- * (3) Wherein routing considers **cost/latency/VRAM** and quality goals.
166
- * (4) Wherein the state bank includes **kinetic echo** for video (last *n* frames/latents/flow).
167
- * (5) Wherein evaluation uses domain-specific metrics (Lflow, semantic consistency, etc.).
168
- * (6) Wherein *universal tokens* standardize instructions between specialists.
169
- * (7) Wherein orchestration decides **cut vs continuous** and **regenerative cut** (Déjà-Vu) when editing video.
170
- * (8) Wherein the system **never discards** excess content: it **reschedules** it in new fragments.
171
-
172
- ---
173
-
174
- #### 🇧🇷 Como isso conversa com SDR (Vídeo)
175
-
176
- * **Eco Cinético**: é um **tipo de estado persistido** consumido pelo próximo passo.
177
- * **Déjà-Vu (Corte Regenerativo)**: é **uma política de orquestração** aplicada quando há edição; ADUC decide, monta os prompts certos e chama o especialista de vídeo.
178
- * **Cut vs Continuous**: decisão do **diretor** com base em estado + metas; ADUC roteia e garante a sobreposição/remoção final.
179
-
180
- #### 🇬🇧 How this Converses with SDR (Video)
181
-
182
- * **Kinetic Echo**: is a **type of persisted state** consumed by the next step.
183
- * **Déjà-Vu (Regenerative Cut)**: is an **orchestration policy** applied during editing; ADUC decides, crafts the right prompts, and calls the video specialist.
184
- * **Cut vs Continuous**: decision made by the **director** based on state + goals; ADUC routes and ensures the final overlap/removal.
185
-
186
- ---
187
-
188
- #### 🇧🇷 Mensagem Clara ao Usuário (Experiência)
189
-
190
- > “Seu pedido excede o limite X do modelo Y. Em vez de truncar silenciosamente, o **ADUC** dividirá e **entregará 100%** do conteúdo por etapas coordenadas.”
191
-
192
- Isso é diferencial prático e jurídico: **não-obviedade** por transformar limite de contexto em **pipeline controlado**, com **persistência de estado** e **avaliação iterativa**.
193
-
194
- #### 🇬🇧 Clear User Message (Experience)
195
-
196
- > "Your request exceeds model Y's limit X. Instead of silently truncating, **ADUC** will divide and **deliver 100%** of the content through coordinated steps."
197
-
198
- This is a practical and legal differentiator: **non-obviousness** by transforming context limits into a **controlled pipeline**, with **state persistence** and **iterative evaluation**.
199
-
200
- ---
201
-
202
- ### Contact / Contato / Contacto
203
-
204
- - **Author / Autor:** Carlos Rodrigues dos Santos
205
- - **Email:** carlex22@gmail.com
206
- - **GitHub:** [https://github.com/carlex22/Aduc-sdr](https://github.com/carlex22/Aduc-sdr)
207
- - **Hugging Face Spaces:**
208
- - [Ltx-SuperTime-60Secondos](https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/)
209
- - [Novinho](https://huggingface.co/spaces/Carlexxx/Novinho/)
210
-
211
- ---