Carlexxx
commited on
Commit
·
ac82132
1
Parent(s):
1267261
feat: Implement self-contained specialist managers
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- aduc_framework/managers/flux_kontext_manager.py +1 -1
- aduc_framework/managers/latent_enhancer_manager.py +1 -1
- aduc_framework/managers/ltx_manager.py +99 -116
- aduc_framework/managers/ltx_pipeline_utils.py +8 -8
- aduc_framework/managers/upscaler_specialist.py +1 -1
- aduc_framework/managers/vae_manager.py +2 -2
- aduc_framework/types.py +23 -14
- app.py +35 -29
- engineers/LICENSE +0 -23
- engineers/NOTICE.md +0 -76
- engineers/README.md +0 -211
- engineers/__init__.py +0 -0
- engineers/deformes2D_thinker.py +0 -171
- engineers/deformes3D.py +0 -193
- engineers/deformes3D_thinker.py +0 -136
- engineers/deformes4D.py +0 -338
- engineers/deformes7D.py +0 -316
- managers/LICENSE +0 -25
- managers/LICENSE.txt +0 -201
- managers/NOTICE.md +0 -60
- managers/README.md +0 -156
- managers/__init__.py +0 -0
- managers/config.yaml +0 -24
- managers/flux_kontext_manager.py +0 -165
- managers/gemini_manager.py +0 -119
- managers/latent_enhancer_manager.py +0 -109
- managers/ltx_manager.py +0 -320
- managers/ltx_pipeline_utils.py +0 -774
- managers/mmaudio_manager.py +0 -208
- managers/seedvr_manager.py +0 -233
- managers/upscaler_specialist.py +0 -91
- managers/vae_manager.py +0 -99
- prompts/LICENSE +0 -25
- prompts/NOTICE.md +0 -76
- prompts/README.md +0 -211
- prompts/anticipatory_keyframe_prompt.txt +0 -29
- prompts/audio_director_prompt.txt +0 -18
- prompts/cinematic_director_prompt.txt +0 -27
- prompts/director_composition_prompt.txt +0 -27
- prompts/flux_composition_wrapper_prompt.txt +0 -1
- prompts/initial_motion_prompt.txt +0 -20
- prompts/keyframe_selection_prompt.txt +0 -20
- prompts/sound_director_prompt.txt +0 -27
- prompts/sound_director_prompt.txt.txt +0 -27
- prompts/transition_decision_prompt.txt +0 -27
- prompts/unified_cinematographer_prompt.txt +0 -47
- prompts/unified_storyboard_prompt.txt +0 -19
- tools/LICENSE +0 -25
- tools/NOTICE.md +0 -76
- tools/README.md +0 -211
aduc_framework/managers/flux_kontext_manager.py
CHANGED
|
@@ -25,7 +25,7 @@ import threading
|
|
| 25 |
import yaml
|
| 26 |
import logging
|
| 27 |
|
| 28 |
-
from tools.hardware_manager import hardware_manager
|
| 29 |
|
| 30 |
logger = logging.getLogger(__name__)
|
| 31 |
|
|
|
|
| 25 |
import yaml
|
| 26 |
import logging
|
| 27 |
|
| 28 |
+
from ..tools.hardware_manager import hardware_manager
|
| 29 |
|
| 30 |
logger = logging.getLogger(__name__)
|
| 31 |
|
aduc_framework/managers/latent_enhancer_manager.py
CHANGED
|
@@ -19,7 +19,7 @@ import torch
|
|
| 19 |
import logging
|
| 20 |
import time
|
| 21 |
from diffusers import LTXLatentUpsamplePipeline
|
| 22 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 23 |
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
|
|
|
|
| 19 |
import logging
|
| 20 |
import time
|
| 21 |
from diffusers import LTXLatentUpsamplePipeline
|
| 22 |
+
from ..managers.ltx_manager import ltx_manager_singleton
|
| 23 |
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
|
aduc_framework/managers/ltx_manager.py
CHANGED
|
@@ -1,20 +1,13 @@
|
|
| 1 |
-
# managers/ltx_manager.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
-
#
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
#
|
| 10 |
-
#
|
| 11 |
#
|
| 12 |
-
#
|
| 13 |
-
#
|
| 14 |
-
#
|
| 15 |
-
#
|
| 16 |
-
# the internal prompt refinement models (captioning and LLM) used by the LTX pipeline,
|
| 17 |
-
# ensuring stylistic and logical consistency.
|
| 18 |
|
| 19 |
import torch
|
| 20 |
import gc
|
|
@@ -29,18 +22,21 @@ import subprocess
|
|
| 29 |
from pathlib import Path
|
| 30 |
from typing import Optional, List, Tuple, Union
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
logger = logging.getLogger(__name__)
|
| 37 |
|
| 38 |
-
# ---
|
| 39 |
DEPS_DIR = Path("./deps")
|
| 40 |
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
|
| 41 |
LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
|
| 42 |
|
| 43 |
-
# --- Placeholders
|
| 44 |
create_ltx_video_pipeline = None
|
| 45 |
calculate_padding = None
|
| 46 |
LTXVideoPipeline = None
|
|
@@ -54,8 +50,8 @@ class LtxPoolManager:
|
|
| 54 |
"""
|
| 55 |
Manages a pool of LtxWorkers and exposes the enhancement pipeline for other specialists.
|
| 56 |
"""
|
| 57 |
-
def __init__(self, device_ids, ltx_config_file_name):
|
| 58 |
-
logger.info(f"LTX POOL MANAGER:
|
| 59 |
self._ltx_modules_loaded = False
|
| 60 |
self._setup_dependencies()
|
| 61 |
self._lazy_load_ltx_modules()
|
|
@@ -66,47 +62,43 @@ class LtxPoolManager:
|
|
| 66 |
self.current_worker_index = 0
|
| 67 |
self.lock = threading.Lock()
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
#
|
| 71 |
-
# da mesma forma e contêm os mesmos modelos de enhancement.
|
| 72 |
self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
|
| 73 |
if self.prompt_enhancement_pipeline:
|
| 74 |
-
logger.info("LTX POOL MANAGER:
|
| 75 |
-
# <--- FIM DA NOVA PROPRIEDADE --->
|
| 76 |
|
| 77 |
self._apply_ltx_pipeline_patches()
|
| 78 |
|
| 79 |
if all(w.device.type == 'cuda' for w in self.workers):
|
| 80 |
-
logger.info("LTX POOL MANAGER: HOT START
|
| 81 |
for worker in self.workers:
|
| 82 |
worker.to_gpu()
|
| 83 |
-
logger.info("LTX POOL MANAGER:
|
| 84 |
else:
|
| 85 |
-
logger.info("LTX POOL MANAGER:
|
| 86 |
-
|
| 87 |
-
# ... (O resto da classe LtxPoolManager, como _setup_dependencies, generate_latent_fragment, etc., permanece exatamente o mesmo) ...
|
| 88 |
|
| 89 |
def _setup_dependencies(self):
|
| 90 |
"""Clones the LTX-Video repo if not found and adds it to the system path."""
|
| 91 |
if not LTX_VIDEO_REPO_DIR.exists():
|
| 92 |
-
logger.info(f"LTX-Video
|
| 93 |
try:
|
| 94 |
DEPS_DIR.mkdir(exist_ok=True)
|
| 95 |
subprocess.run(
|
| 96 |
-
["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
|
| 97 |
check=True, capture_output=True, text=True
|
| 98 |
)
|
| 99 |
-
logger.info("LTX-Video
|
| 100 |
except subprocess.CalledProcessError as e:
|
| 101 |
-
logger.error(f"
|
| 102 |
-
raise RuntimeError("
|
| 103 |
else:
|
| 104 |
-
logger.info("
|
| 105 |
|
| 106 |
if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
|
| 107 |
sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
|
| 108 |
-
logger.info(f"
|
| 109 |
-
|
| 110 |
def _lazy_load_ltx_modules(self):
|
| 111 |
"""Dynamically imports LTX-Video modules after ensuring the repo exists."""
|
| 112 |
if self._ltx_modules_loaded:
|
|
@@ -115,22 +107,22 @@ class LtxPoolManager:
|
|
| 115 |
global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
|
| 116 |
global vae_encode, latent_to_pixel_coords, randn_tensor
|
| 117 |
|
| 118 |
-
from
|
| 119 |
from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
|
| 120 |
from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
|
| 121 |
from diffusers.utils.torch_utils import randn_tensor
|
| 122 |
|
| 123 |
self._ltx_modules_loaded = True
|
| 124 |
-
logger.info("LTX-Video
|
| 125 |
|
| 126 |
def _apply_ltx_pipeline_patches(self):
|
| 127 |
"""Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
|
| 128 |
-
logger.info("LTX POOL MANAGER:
|
| 129 |
for worker in self.workers:
|
| 130 |
worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
|
| 131 |
-
logger.info("LTX POOL MANAGER:
|
| 132 |
|
| 133 |
-
def _get_next_worker(self):
|
| 134 |
with self.lock:
|
| 135 |
worker = self.workers[self.current_worker_index]
|
| 136 |
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
|
|
@@ -152,62 +144,66 @@ class LtxPoolManager:
|
|
| 152 |
if 'strength' in kwargs:
|
| 153 |
pipeline_params["strength"] = kwargs['strength']
|
| 154 |
if 'conditioning_items_data' in kwargs:
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
item
|
| 158 |
-
|
| 159 |
-
pipeline_params["conditioning_items"] = final_conditioning_items
|
| 160 |
if worker.is_distilled:
|
| 161 |
-
logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.")
|
| 162 |
fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
|
| 163 |
-
pipeline_params["timesteps"] = fixed_timesteps
|
| 164 |
if fixed_timesteps:
|
|
|
|
| 165 |
pipeline_params["num_inference_steps"] = len(fixed_timesteps)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
return pipeline_params
|
| 167 |
|
| 168 |
-
def generate_latent_fragment(self, **kwargs) ->
|
| 169 |
worker_to_use = self._get_next_worker()
|
| 170 |
try:
|
| 171 |
height, width = kwargs['height'], kwargs['width']
|
| 172 |
padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
|
| 173 |
padding_vals = calculate_padding(height, width, padded_h, padded_w)
|
| 174 |
kwargs['height'], kwargs['width'] = padded_h, padded_w
|
|
|
|
| 175 |
pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
|
| 176 |
-
|
|
|
|
|
|
|
| 177 |
if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
|
| 178 |
result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
|
| 179 |
else:
|
| 180 |
result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
|
| 181 |
return result, padding_vals
|
| 182 |
except Exception as e:
|
| 183 |
-
logger.error(f"LTX POOL MANAGER:
|
| 184 |
raise e
|
| 185 |
finally:
|
| 186 |
if worker_to_use and worker_to_use.device.type == 'cuda':
|
| 187 |
with torch.cuda.device(worker_to_use.device):
|
| 188 |
-
gc.collect()
|
|
|
|
| 189 |
|
| 190 |
-
def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) ->
|
| 191 |
-
pass
|
| 192 |
|
| 193 |
-
# ... (O resto do arquivo: LtxWorker, _aduc_prepare_conditioning_patch, Singleton Instantiation, etc. permanece idêntico) ...
|
| 194 |
class LtxWorker:
|
| 195 |
-
"""
|
| 196 |
-
Represents a single instance of the LTX-Video pipeline on a specific device.
|
| 197 |
-
"""
|
| 198 |
def __init__(self, device_id, ltx_config_file):
|
| 199 |
self.cpu_device = torch.device('cpu')
|
| 200 |
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
|
| 201 |
-
logger.info(f"LTX Worker ({self.device}):
|
| 202 |
|
| 203 |
with open(ltx_config_file, "r") as file:
|
| 204 |
self.config = yaml.safe_load(file)
|
| 205 |
|
| 206 |
self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
|
| 207 |
-
|
| 208 |
models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
|
| 209 |
|
| 210 |
-
logger.info(f"LTX Worker ({self.device}):
|
| 211 |
model_filename = self.config["checkpoint_path"]
|
| 212 |
model_path = huggingface_hub.hf_hub_download(
|
| 213 |
repo_id="Lightricks/LTX-Video", filename=model_filename,
|
|
@@ -219,24 +215,22 @@ class LtxWorker:
|
|
| 219 |
precision=self.config["precision"],
|
| 220 |
text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
|
| 221 |
sampler=self.config["sampler"],
|
| 222 |
-
device='cpu'
|
| 223 |
)
|
| 224 |
-
logger.info(f"LTX Worker ({self.device}):
|
| 225 |
|
| 226 |
def to_gpu(self):
|
| 227 |
if self.device.type == 'cpu': return
|
| 228 |
-
logger.info(f"LTX Worker:
|
| 229 |
self.pipeline.to(self.device)
|
| 230 |
if self.device.type == 'cuda' and can_optimize_fp8():
|
| 231 |
-
logger.info(f"LTX Worker ({self.device}):
|
| 232 |
optimize_ltx_worker(self)
|
| 233 |
-
logger.info(f"LTX Worker ({self.device}):
|
| 234 |
-
|
| 235 |
-
logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.")
|
| 236 |
-
|
| 237 |
def to_cpu(self):
|
| 238 |
if self.device.type == 'cpu': return
|
| 239 |
-
logger.info(f"LTX Worker:
|
| 240 |
self.pipeline.to('cpu')
|
| 241 |
gc.collect()
|
| 242 |
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
|
@@ -244,10 +238,9 @@ class LtxWorker:
|
|
| 244 |
def generate_video_fragment_internal(self, **kwargs):
|
| 245 |
return self.pipeline(**kwargs).images
|
| 246 |
|
| 247 |
-
|
| 248 |
def _aduc_prepare_conditioning_patch(
|
| 249 |
-
self: LTXVideoPipeline,
|
| 250 |
-
conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]],
|
| 251 |
init_latents: torch.Tensor,
|
| 252 |
num_frames: int,
|
| 253 |
height: int,
|
|
@@ -259,62 +252,52 @@ def _aduc_prepare_conditioning_patch(
|
|
| 259 |
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
|
| 260 |
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 261 |
return init_latents, init_pixel_coords, None, 0
|
| 262 |
-
|
|
|
|
| 263 |
extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
|
| 264 |
extra_conditioning_num_latents = 0
|
| 265 |
-
is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor')
|
| 266 |
-
if is_latent_mode:
|
| 267 |
-
for item in conditioning_items:
|
| 268 |
-
media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
|
| 269 |
-
media_frame_number, strength = item.media_frame_number, item.conditioning_strength
|
| 270 |
-
if media_frame_number == 0:
|
| 271 |
-
f_l, h_l, w_l = media_item_latents.shape[-3:]
|
| 272 |
-
init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength)
|
| 273 |
-
init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength
|
| 274 |
-
else:
|
| 275 |
-
noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
|
| 276 |
-
media_item_latents = torch.lerp(noise, media_item_latents, strength)
|
| 277 |
-
patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
|
| 278 |
-
pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 279 |
-
pixel_coords[:, 0] += media_frame_number
|
| 280 |
-
extra_conditioning_num_latents += patched_latents.shape[1]
|
| 281 |
-
new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
|
| 282 |
-
extra_conditioning_latents.append(patched_latents)
|
| 283 |
-
extra_conditioning_pixel_coords.append(pixel_coords)
|
| 284 |
-
extra_conditioning_mask.append(new_mask)
|
| 285 |
-
else:
|
| 286 |
-
for item in conditioning_items:
|
| 287 |
-
if not isinstance(item, ConditioningItem): continue
|
| 288 |
-
item = self._resize_conditioning_item(item, height, width)
|
| 289 |
-
media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype)
|
| 290 |
-
if item.media_frame_number == 0:
|
| 291 |
-
media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True)
|
| 292 |
-
f_l, h_l, w_l = media_item_latents.shape[-3:]
|
| 293 |
-
init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength)
|
| 294 |
-
init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength
|
| 295 |
-
else:
|
| 296 |
-
logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.")
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
|
| 299 |
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 300 |
init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
|
| 301 |
init_conditioning_mask = init_conditioning_mask.squeeze(-1)
|
|
|
|
| 302 |
if extra_conditioning_latents:
|
| 303 |
init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
|
| 304 |
init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
|
| 305 |
init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
|
| 306 |
-
|
| 307 |
-
init_latents = init_latents[:, :-extra_conditioning_num_latents]
|
| 308 |
-
init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents]
|
| 309 |
-
init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents]
|
| 310 |
return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
|
| 311 |
|
| 312 |
-
|
| 313 |
-
# --- Singleton Instantiation ---
|
| 314 |
with open("config.yaml", 'r') as f:
|
| 315 |
config = yaml.safe_load(f)
|
| 316 |
ltx_gpus_required = config['specialists']['ltx']['gpus_required']
|
| 317 |
ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
|
| 318 |
ltx_config_filename = config['specialists']['ltx']['config_file']
|
| 319 |
ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
|
| 320 |
-
logger.info("
|
|
|
|
| 1 |
+
# aduc_framework/managers/ltx_manager.py
|
|
|
|
|
|
|
| 2 |
#
|
| 3 |
+
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
|
|
|
|
|
|
|
|
|
| 4 |
#
|
| 5 |
+
# Versão 2.3.1 (Framework-Compliant)
|
| 6 |
#
|
| 7 |
+
# Este manager é responsável por controlar a pipeline LTX-Video. Ele gerencia
|
| 8 |
+
# um pool de workers para otimizar o uso de múltiplas GPUs, lida com a inicialização
|
| 9 |
+
# e o setup de dependências complexas, e expõe uma interface de alto nível para a
|
| 10 |
+
# geração de fragmentos de vídeo no espaço latente.
|
|
|
|
|
|
|
| 11 |
|
| 12 |
import torch
|
| 13 |
import gc
|
|
|
|
| 22 |
from pathlib import Path
|
| 23 |
from typing import Optional, List, Tuple, Union
|
| 24 |
|
| 25 |
+
# --- CORREÇÃO DE IMPORTAÇÃO ---
|
| 26 |
+
# O manager agora importa os tipos de seu próprio pacote "pai" e
|
| 27 |
+
# as ferramentas de um pacote "irmão".
|
| 28 |
+
from ..types import LatentConditioningItem
|
| 29 |
+
from ..tools.optimization import optimize_ltx_worker, can_optimize_fp8
|
| 30 |
+
from ..tools.hardware_manager import hardware_manager
|
| 31 |
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
|
| 34 |
+
# --- Gerenciamento de Dependências ---
|
| 35 |
DEPS_DIR = Path("./deps")
|
| 36 |
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
|
| 37 |
LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
|
| 38 |
|
| 39 |
+
# --- Placeholders para módulos importados tardiamente (lazy-loaded) ---
|
| 40 |
create_ltx_video_pipeline = None
|
| 41 |
calculate_padding = None
|
| 42 |
LTXVideoPipeline = None
|
|
|
|
| 50 |
"""
|
| 51 |
Manages a pool of LtxWorkers and exposes the enhancement pipeline for other specialists.
|
| 52 |
"""
|
| 53 |
+
def __init__(self, device_ids: List[str], ltx_config_file_name: str):
|
| 54 |
+
logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
|
| 55 |
self._ltx_modules_loaded = False
|
| 56 |
self._setup_dependencies()
|
| 57 |
self._lazy_load_ltx_modules()
|
|
|
|
| 62 |
self.current_worker_index = 0
|
| 63 |
self.lock = threading.Lock()
|
| 64 |
|
| 65 |
+
# Expõe a pipeline do primeiro worker para que outros especialistas (como o Deformes3DThinker)
|
| 66 |
+
# possam acessar os modelos de aprimoramento de prompt.
|
|
|
|
| 67 |
self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
|
| 68 |
if self.prompt_enhancement_pipeline:
|
| 69 |
+
logger.info("LTX POOL MANAGER: Pipeline de aprimoramento de prompt exposta para outros especialistas.")
|
|
|
|
| 70 |
|
| 71 |
self._apply_ltx_pipeline_patches()
|
| 72 |
|
| 73 |
if all(w.device.type == 'cuda' for w in self.workers):
|
| 74 |
+
logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...")
|
| 75 |
for worker in self.workers:
|
| 76 |
worker.to_gpu()
|
| 77 |
+
logger.info("LTX POOL MANAGER: Todas as GPUs estão prontas.")
|
| 78 |
else:
|
| 79 |
+
logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. Pré-aquecimento de GPU pulado.")
|
|
|
|
|
|
|
| 80 |
|
| 81 |
def _setup_dependencies(self):
|
| 82 |
"""Clones the LTX-Video repo if not found and adds it to the system path."""
|
| 83 |
if not LTX_VIDEO_REPO_DIR.exists():
|
| 84 |
+
logger.info(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Clonando do GitHub...")
|
| 85 |
try:
|
| 86 |
DEPS_DIR.mkdir(exist_ok=True)
|
| 87 |
subprocess.run(
|
| 88 |
+
["git", "clone", "--depth", "1", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
|
| 89 |
check=True, capture_output=True, text=True
|
| 90 |
)
|
| 91 |
+
logger.info("Repositório LTX-Video clonado com sucesso.")
|
| 92 |
except subprocess.CalledProcessError as e:
|
| 93 |
+
logger.error(f"Falha ao clonar o repositório LTX-Video. Git stderr: {e.stderr}")
|
| 94 |
+
raise RuntimeError("Não foi possível clonar a dependência LTX-Video do GitHub.")
|
| 95 |
else:
|
| 96 |
+
logger.info("Repositório LTX-Video local encontrado.")
|
| 97 |
|
| 98 |
if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
|
| 99 |
sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
|
| 100 |
+
logger.info(f"Adicionado '{LTX_VIDEO_REPO_DIR.resolve()}' ao sys.path.")
|
| 101 |
+
|
| 102 |
def _lazy_load_ltx_modules(self):
|
| 103 |
"""Dynamically imports LTX-Video modules after ensuring the repo exists."""
|
| 104 |
if self._ltx_modules_loaded:
|
|
|
|
| 107 |
global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
|
| 108 |
global vae_encode, latent_to_pixel_coords, randn_tensor
|
| 109 |
|
| 110 |
+
from .ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
|
| 111 |
from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
|
| 112 |
from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
|
| 113 |
from diffusers.utils.torch_utils import randn_tensor
|
| 114 |
|
| 115 |
self._ltx_modules_loaded = True
|
| 116 |
+
logger.info("Módulos do LTX-Video foram carregados dinamicamente.")
|
| 117 |
|
| 118 |
def _apply_ltx_pipeline_patches(self):
|
| 119 |
"""Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
|
| 120 |
+
logger.info("LTX POOL MANAGER: Aplicando patches ADUC-SDR na pipeline LTX...")
|
| 121 |
for worker in self.workers:
|
| 122 |
worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
|
| 123 |
+
logger.info("LTX POOL MANAGER: Todas as instâncias da pipeline foram corrigidas com sucesso.")
|
| 124 |
|
| 125 |
+
def _get_next_worker(self) -> 'LtxWorker':
|
| 126 |
with self.lock:
|
| 127 |
worker = self.workers[self.current_worker_index]
|
| 128 |
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
|
|
|
|
| 144 |
if 'strength' in kwargs:
|
| 145 |
pipeline_params["strength"] = kwargs['strength']
|
| 146 |
if 'conditioning_items_data' in kwargs:
|
| 147 |
+
pipeline_params["conditioning_items"] = [
|
| 148 |
+
item._replace(latent_tensor=item.latent_tensor.to(worker.device))
|
| 149 |
+
for item in kwargs['conditioning_items_data']
|
| 150 |
+
]
|
|
|
|
| 151 |
if worker.is_distilled:
|
|
|
|
| 152 |
fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
|
|
|
|
| 153 |
if fixed_timesteps:
|
| 154 |
+
pipeline_params["timesteps"] = fixed_timesteps
|
| 155 |
pipeline_params["num_inference_steps"] = len(fixed_timesteps)
|
| 156 |
+
|
| 157 |
+
callback = kwargs.get('callback')
|
| 158 |
+
if callback:
|
| 159 |
+
pipeline_params["callback_on_step_end"] = callback
|
| 160 |
+
pipeline_params["callback_on_step_end_tensor_inputs"] = ["latents"]
|
| 161 |
+
|
| 162 |
return pipeline_params
|
| 163 |
|
| 164 |
+
def generate_latent_fragment(self, **kwargs) -> Tuple[torch.Tensor, tuple]:
|
| 165 |
worker_to_use = self._get_next_worker()
|
| 166 |
try:
|
| 167 |
height, width = kwargs['height'], kwargs['width']
|
| 168 |
padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
|
| 169 |
padding_vals = calculate_padding(height, width, padded_h, padded_w)
|
| 170 |
kwargs['height'], kwargs['width'] = padded_h, padded_w
|
| 171 |
+
|
| 172 |
pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
|
| 173 |
+
|
| 174 |
+
logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}")
|
| 175 |
+
|
| 176 |
if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
|
| 177 |
result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
|
| 178 |
else:
|
| 179 |
result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
|
| 180 |
return result, padding_vals
|
| 181 |
except Exception as e:
|
| 182 |
+
logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True)
|
| 183 |
raise e
|
| 184 |
finally:
|
| 185 |
if worker_to_use and worker_to_use.device.type == 'cuda':
|
| 186 |
with torch.cuda.device(worker_to_use.device):
|
| 187 |
+
gc.collect()
|
| 188 |
+
torch.cuda.empty_cache()
|
| 189 |
|
| 190 |
+
def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, tuple]:
|
| 191 |
+
pass # Placeholder
|
| 192 |
|
|
|
|
| 193 |
class LtxWorker:
|
| 194 |
+
"""Represents a single instance of the LTX-Video pipeline on a specific device."""
|
|
|
|
|
|
|
| 195 |
def __init__(self, device_id, ltx_config_file):
|
| 196 |
self.cpu_device = torch.device('cpu')
|
| 197 |
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
|
| 198 |
+
logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...")
|
| 199 |
|
| 200 |
with open(ltx_config_file, "r") as file:
|
| 201 |
self.config = yaml.safe_load(file)
|
| 202 |
|
| 203 |
self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
|
|
|
|
| 204 |
models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
|
| 205 |
|
| 206 |
+
logger.info(f"LTX Worker ({self.device}): Preparando para carregar modelo...")
|
| 207 |
model_filename = self.config["checkpoint_path"]
|
| 208 |
model_path = huggingface_hub.hf_hub_download(
|
| 209 |
repo_id="Lightricks/LTX-Video", filename=model_filename,
|
|
|
|
| 215 |
precision=self.config["precision"],
|
| 216 |
text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
|
| 217 |
sampler=self.config["sampler"],
|
| 218 |
+
device='cpu' # Sempre carrega na CPU primeiro
|
| 219 |
)
|
| 220 |
+
logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo distilled? {self.is_distilled}")
|
| 221 |
|
| 222 |
def to_gpu(self):
|
| 223 |
if self.device.type == 'cpu': return
|
| 224 |
+
logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
|
| 225 |
self.pipeline.to(self.device)
|
| 226 |
if self.device.type == 'cuda' and can_optimize_fp8():
|
| 227 |
+
logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Otimizando...")
|
| 228 |
optimize_ltx_worker(self)
|
| 229 |
+
logger.info(f"LTX Worker ({self.device}): Otimização completa.")
|
| 230 |
+
|
|
|
|
|
|
|
| 231 |
def to_cpu(self):
|
| 232 |
if self.device.type == 'cpu': return
|
| 233 |
+
logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
|
| 234 |
self.pipeline.to('cpu')
|
| 235 |
gc.collect()
|
| 236 |
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
|
|
|
| 238 |
def generate_video_fragment_internal(self, **kwargs):
|
| 239 |
return self.pipeline(**kwargs).images
|
| 240 |
|
|
|
|
| 241 |
def _aduc_prepare_conditioning_patch(
|
| 242 |
+
self: "LTXVideoPipeline",
|
| 243 |
+
conditioning_items: Optional[List[Union["ConditioningItem", "LatentConditioningItem"]]],
|
| 244 |
init_latents: torch.Tensor,
|
| 245 |
num_frames: int,
|
| 246 |
height: int,
|
|
|
|
| 252 |
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
|
| 253 |
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 254 |
return init_latents, init_pixel_coords, None, 0
|
| 255 |
+
|
| 256 |
+
init_conditioning_mask = torch.zeros_like(init_latents[:, 0, ...], dtype=torch.float32, device=init_latents.device)
|
| 257 |
extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
|
| 258 |
extra_conditioning_num_latents = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
+
for item in conditioning_items:
|
| 261 |
+
if not isinstance(item, LatentConditioningItem):
|
| 262 |
+
logger.warning("Patch ADUC: Item de condicionamento não é um LatentConditioningItem e será ignorado.")
|
| 263 |
+
continue
|
| 264 |
+
|
| 265 |
+
media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
|
| 266 |
+
media_frame_number, strength = item.media_frame_number, item.conditioning_strength
|
| 267 |
+
|
| 268 |
+
if media_frame_number == 0:
|
| 269 |
+
f_l, h_l, w_l = media_item_latents.shape[-3:]
|
| 270 |
+
init_latents[..., :f_l, :h_l, :w_l] = torch.lerp(init_latents[..., :f_l, :h_l, :w_l], media_item_latents, strength)
|
| 271 |
+
init_conditioning_mask[..., :f_l, :h_l, :w_l] = strength
|
| 272 |
+
else:
|
| 273 |
+
noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
|
| 274 |
+
media_item_latents = torch.lerp(noise, media_item_latents, strength)
|
| 275 |
+
patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
|
| 276 |
+
pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 277 |
+
pixel_coords[:, 0] += media_frame_number
|
| 278 |
+
extra_conditioning_num_latents += patched_latents.shape[1]
|
| 279 |
+
new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
|
| 280 |
+
extra_conditioning_latents.append(patched_latents)
|
| 281 |
+
extra_conditioning_pixel_coords.append(pixel_coords)
|
| 282 |
+
extra_conditioning_mask.append(new_mask)
|
| 283 |
+
|
| 284 |
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
|
| 285 |
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 286 |
init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
|
| 287 |
init_conditioning_mask = init_conditioning_mask.squeeze(-1)
|
| 288 |
+
|
| 289 |
if extra_conditioning_latents:
|
| 290 |
init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
|
| 291 |
init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
|
| 292 |
init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
|
| 293 |
+
|
|
|
|
|
|
|
|
|
|
| 294 |
return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
|
| 295 |
|
| 296 |
+
# --- Instanciação Singleton ---
|
|
|
|
| 297 |
with open("config.yaml", 'r') as f:
|
| 298 |
config = yaml.safe_load(f)
|
| 299 |
ltx_gpus_required = config['specialists']['ltx']['gpus_required']
|
| 300 |
ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
|
| 301 |
ltx_config_filename = config['specialists']['ltx']['config_file']
|
| 302 |
ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
|
| 303 |
+
logger.info("Especialista de Vídeo (LTX) pronto.")
|
aduc_framework/managers/ltx_pipeline_utils.py
CHANGED
|
@@ -23,20 +23,20 @@ from transformers import (
|
|
| 23 |
)
|
| 24 |
from huggingface_hub import hf_hub_download
|
| 25 |
|
| 26 |
-
from ltx_video.models.autoencoders.causal_video_autoencoder import (
|
| 27 |
CausalVideoAutoencoder,
|
| 28 |
)
|
| 29 |
-
from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
|
| 30 |
-
from ltx_video.models.transformers.transformer3d import Transformer3DModel
|
| 31 |
-
from ltx_video.pipelines.pipeline_ltx_video import (
|
| 32 |
ConditioningItem,
|
| 33 |
LTXVideoPipeline,
|
| 34 |
LTXMultiScalePipeline,
|
| 35 |
)
|
| 36 |
-
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 37 |
-
from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
| 38 |
-
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
| 39 |
-
import ltx_video.pipelines.crf_compressor as crf_compressor
|
| 40 |
|
| 41 |
MAX_HEIGHT = 720
|
| 42 |
MAX_WIDTH = 1280
|
|
|
|
| 23 |
)
|
| 24 |
from huggingface_hub import hf_hub_download
|
| 25 |
|
| 26 |
+
from ..ltx_video.models.autoencoders.causal_video_autoencoder import (
|
| 27 |
CausalVideoAutoencoder,
|
| 28 |
)
|
| 29 |
+
from ..ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
|
| 30 |
+
from ..ltx_video.models.transformers.transformer3d import Transformer3DModel
|
| 31 |
+
from ..ltx_video.pipelines.pipeline_ltx_video import (
|
| 32 |
ConditioningItem,
|
| 33 |
LTXVideoPipeline,
|
| 34 |
LTXMultiScalePipeline,
|
| 35 |
)
|
| 36 |
+
from ..ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 37 |
+
from ..ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
| 38 |
+
from ..ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
| 39 |
+
import ..ltx_video.pipelines.crf_compressor as crf_compressor
|
| 40 |
|
| 41 |
MAX_HEIGHT = 720
|
| 42 |
MAX_WIDTH = 1280
|
aduc_framework/managers/upscaler_specialist.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
import torch
|
| 6 |
import logging
|
| 7 |
from diffusers import LTXLatentUpsamplePipeline
|
| 8 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
| 5 |
import torch
|
| 6 |
import logging
|
| 7 |
from diffusers import LTXLatentUpsamplePipeline
|
| 8 |
+
from ..managers.ltx_manager import ltx_manager_singleton
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
aduc_framework/managers/vae_manager.py
CHANGED
|
@@ -28,8 +28,8 @@ import gc
|
|
| 28 |
from typing import Generator
|
| 29 |
|
| 30 |
# Import the source of the VAE model and the low-level functions
|
| 31 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 32 |
-
from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
|
| 33 |
|
| 34 |
logger = logging.getLogger(__name__)
|
| 35 |
|
|
|
|
| 28 |
from typing import Generator
|
| 29 |
|
| 30 |
# Import the source of the VAE model and the low-level functions
|
| 31 |
+
from ..managers.ltx_manager import ltx_manager_singleton
|
| 32 |
+
from ..ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
|
| 33 |
|
| 34 |
logger = logging.getLogger(__name__)
|
| 35 |
|
aduc_framework/types.py
CHANGED
|
@@ -2,20 +2,19 @@
|
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
-
# Versão 3.
|
| 6 |
#
|
| 7 |
# Este arquivo define as estruturas de dados centrais para o Aduc Framework
|
| 8 |
-
# usando Pydantic.
|
| 9 |
-
#
|
| 10 |
-
#
|
| 11 |
-
# O uso de Pydantic garante validação automática de tipos, serialização/desserialização
|
| 12 |
-
# fácil para JSON e uma fonte única da verdade para a estrutura de dados.
|
| 13 |
|
| 14 |
from pydantic import BaseModel, Field
|
| 15 |
from typing import List, Dict, Any, Optional
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
# --- Modelos de Parâmetros de Entrada ---
|
| 18 |
-
#
|
| 19 |
|
| 20 |
class PreProductionParams(BaseModel):
|
| 21 |
"""Parâmetros para a etapa de Roteiro e Keyframes."""
|
|
@@ -41,8 +40,8 @@ class GenerationParameters(BaseModel):
|
|
| 41 |
pos_producao: Optional[Dict[str, Any]] = None
|
| 42 |
|
| 43 |
|
| 44 |
-
# --- Modelos de Artefatos Gerados ---
|
| 45 |
-
#
|
| 46 |
|
| 47 |
class MediaRef(BaseModel):
|
| 48 |
"""Representa uma mídia de referência fornecida pelo usuário."""
|
|
@@ -60,13 +59,11 @@ class KeyframeData(BaseModel):
|
|
| 60 |
caminho_pixel: str
|
| 61 |
caminho_latent: str
|
| 62 |
prompt_keyframe: str
|
| 63 |
-
# Futuramente: midias_contexto: List[Dict[str, Any]]
|
| 64 |
|
| 65 |
class VideoFragmentData(BaseModel):
|
| 66 |
"""Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes."""
|
| 67 |
id: int
|
| 68 |
prompt_video: str
|
| 69 |
-
# Futuramente: midias_inicio, midias_caminho, midias_fim
|
| 70 |
|
| 71 |
class VideoData(BaseModel):
|
| 72 |
"""Estrutura de dados completa para o vídeo final (ou um grande clipe)."""
|
|
@@ -76,7 +73,7 @@ class VideoData(BaseModel):
|
|
| 76 |
fragmentos_componentes: List[VideoFragmentData]
|
| 77 |
|
| 78 |
|
| 79 |
-
# --- O Modelo de Estado Principal ---
|
| 80 |
|
| 81 |
class GenerationState(BaseModel):
|
| 82 |
"""
|
|
@@ -88,4 +85,16 @@ class GenerationState(BaseModel):
|
|
| 88 |
midias_referencia: List[MediaRef] = Field(default_factory=list)
|
| 89 |
Atos: List[Ato] = Field(default_factory=list)
|
| 90 |
Keyframe_atos: List[KeyframeData] = Field(default_factory=list)
|
| 91 |
-
videos_atos: List[VideoData] = Field(default_factory=list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
+
# Versão 3.1.0 (Framework Data Models with Core Types)
|
| 6 |
#
|
| 7 |
# Este arquivo define as estruturas de dados centrais para o Aduc Framework
|
| 8 |
+
# usando Pydantic. Ele também inclui tipos de dados de baixo nível, como dataclasses,
|
| 9 |
+
# que são usados internamente pelos managers e engineers.
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from pydantic import BaseModel, Field
|
| 12 |
from typing import List, Dict, Any, Optional
|
| 13 |
+
from dataclasses import dataclass
|
| 14 |
+
import torch
|
| 15 |
|
| 16 |
+
# --- Modelos de Parâmetros de Entrada (Pydantic) ---
|
| 17 |
+
# Representam os dados que o usuário fornece através de uma interface.
|
| 18 |
|
| 19 |
class PreProductionParams(BaseModel):
|
| 20 |
"""Parâmetros para a etapa de Roteiro e Keyframes."""
|
|
|
|
| 40 |
pos_producao: Optional[Dict[str, Any]] = None
|
| 41 |
|
| 42 |
|
| 43 |
+
# --- Modelos de Artefatos Gerados (Pydantic) ---
|
| 44 |
+
# Representam os dados e metadados dos resultados criados pelo framework.
|
| 45 |
|
| 46 |
class MediaRef(BaseModel):
|
| 47 |
"""Representa uma mídia de referência fornecida pelo usuário."""
|
|
|
|
| 59 |
caminho_pixel: str
|
| 60 |
caminho_latent: str
|
| 61 |
prompt_keyframe: str
|
|
|
|
| 62 |
|
| 63 |
class VideoFragmentData(BaseModel):
|
| 64 |
"""Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes."""
|
| 65 |
id: int
|
| 66 |
prompt_video: str
|
|
|
|
| 67 |
|
| 68 |
class VideoData(BaseModel):
|
| 69 |
"""Estrutura de dados completa para o vídeo final (ou um grande clipe)."""
|
|
|
|
| 73 |
fragmentos_componentes: List[VideoFragmentData]
|
| 74 |
|
| 75 |
|
| 76 |
+
# --- O Modelo de Estado Principal (Pydantic) ---
|
| 77 |
|
| 78 |
class GenerationState(BaseModel):
|
| 79 |
"""
|
|
|
|
| 85 |
midias_referencia: List[MediaRef] = Field(default_factory=list)
|
| 86 |
Atos: List[Ato] = Field(default_factory=list)
|
| 87 |
Keyframe_atos: List[KeyframeData] = Field(default_factory=list)
|
| 88 |
+
videos_atos: List[VideoData] = Field(default_factory=list)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# --- Tipos de Dados Internos (Dataclass) ---
|
| 92 |
+
# Usado para passar dados complexos (como tensores) que não são facilmente
|
| 93 |
+
# serializáveis em JSON, entre os componentes internos do framework.
|
| 94 |
+
|
| 95 |
+
@dataclass
|
| 96 |
+
class LatentConditioningItem:
|
| 97 |
+
"""Representa uma âncora de condicionamento no espaço latente para o LTX."""
|
| 98 |
+
latent_tensor: torch.Tensor
|
| 99 |
+
media_frame_number: int
|
| 100 |
+
conditioning_strength: float
|
app.py
CHANGED
|
@@ -1,39 +1,18 @@
|
|
| 1 |
-
|
| 2 |
# app.py
|
| 3 |
#
|
| 4 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 5 |
#
|
| 6 |
-
#
|
| 7 |
-
#
|
| 8 |
-
# Contact:
|
| 9 |
-
# Carlos Rodrigues dos Santos
|
| 10 |
-
# carlex22@gmail.com
|
| 11 |
-
#
|
| 12 |
-
# Related Repositories and Projects:
|
| 13 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 14 |
-
# YouTube (Results): https://m.youtube.com/channel/UC3EgoJi_Fv7yuDpvfYNtoIQ
|
| 15 |
-
#
|
| 16 |
-
# This program is free software: you can redistribute it and/or modify
|
| 17 |
-
# it under the terms of the GNU Affero General Public License as published by the
|
| 18 |
-
# Free Software Foundation, either version 3 of the License, or
|
| 19 |
-
# (at your option) any later version.
|
| 20 |
#
|
| 21 |
-
#
|
| 22 |
-
#
|
| 23 |
-
#
|
| 24 |
-
# GNU Affero General Public License for more details.
|
| 25 |
-
#
|
| 26 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 27 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 28 |
-
#
|
| 29 |
-
# PENDING PATENT NOTICE: The ADUC method and system implemented in this
|
| 30 |
-
# software is in the process of being patented. Please see NOTICE.md for details.
|
| 31 |
-
|
| 32 |
|
| 33 |
import gradio as gr
|
| 34 |
import yaml
|
| 35 |
import logging
|
| 36 |
import os
|
|
|
|
| 37 |
import shutil
|
| 38 |
import time
|
| 39 |
import json
|
|
@@ -70,7 +49,15 @@ if os.path.exists(LOG_FILE_PATH):
|
|
| 70 |
log_format = '%(asctime)s - %(levelname)s - [%(name)s:%(funcName)s] - %(message)s'
|
| 71 |
root_logger = logging.getLogger()
|
| 72 |
root_logger.setLevel(logging.INFO)
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
logger = logging.getLogger(__name__)
|
| 75 |
|
| 76 |
# Carrega a configuração e inicializa o framework
|
|
@@ -89,11 +76,16 @@ except Exception as e:
|
|
| 89 |
# --- 2. FUNÇÕES WRAPPER (CAMADA DE TRADUÇÃO UI <-> FRAMEWORK) ---
|
| 90 |
|
| 91 |
def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str, duration_per_fragment, progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
| 92 |
if not ref_files:
|
| 93 |
raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.")
|
| 94 |
|
|
|
|
| 95 |
ref_paths = [aduc.process_image_for_story(f.name, 480, f"ref_processed_{i}.png") for i, f in enumerate(ref_files)]
|
| 96 |
|
|
|
|
| 97 |
params = PreProductionParams(
|
| 98 |
prompt=prompt,
|
| 99 |
num_keyframes=int(num_keyframes),
|
|
@@ -102,17 +94,26 @@ def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str,
|
|
| 102 |
duration_per_fragment=duration_per_fragment
|
| 103 |
)
|
| 104 |
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
|
|
|
| 107 |
return updated_state.model_dump(), storyboard, final_keyframes, gr.update(visible=True, open=True)
|
| 108 |
|
| 109 |
def run_original_production_wrapper(current_state_dict, trim_percent, handler_strength, dest_strength, guidance_scale, stg_scale, steps, progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
| 110 |
yield {
|
| 111 |
original_video_output: gr.update(value=None, visible=True, label="🎬 Produzindo seu filme..."),
|
| 112 |
final_video_output: gr.update(value=None, visible=True, label="🎬 Produção em progresso..."),
|
| 113 |
step4_accordion: gr.update(visible=False)
|
| 114 |
}
|
| 115 |
|
|
|
|
| 116 |
production_params = ProductionParams(
|
| 117 |
trim_percent=int(trim_percent),
|
| 118 |
handler_strength=handler_strength,
|
|
@@ -122,6 +123,7 @@ def run_original_production_wrapper(current_state_dict, trim_percent, handler_st
|
|
| 122 |
inference_steps=int(steps)
|
| 123 |
)
|
| 124 |
|
|
|
|
| 125 |
final_video_path, latent_paths, updated_state = aduc.task_produce_original_movie(
|
| 126 |
params=production_params,
|
| 127 |
progress_callback=progress
|
|
@@ -129,6 +131,7 @@ def run_original_production_wrapper(current_state_dict, trim_percent, handler_st
|
|
| 129 |
|
| 130 |
updated_state_dict = updated_state.model_dump()
|
| 131 |
|
|
|
|
| 132 |
yield {
|
| 133 |
original_video_output: gr.update(value=final_video_path, label="✅ Filme Original Master"),
|
| 134 |
final_video_output: gr.update(value=final_video_path),
|
|
@@ -150,11 +153,15 @@ def get_log_content():
|
|
| 150 |
# --- 3. DEFINIÇÃO DA UI GRADIO ---
|
| 151 |
with gr.Blocks(theme=cinematic_theme, css="style.css") as demo:
|
| 152 |
|
|
|
|
| 153 |
generation_state_holder = gr.State(value={})
|
| 154 |
|
|
|
|
| 155 |
original_latents_paths_state = gr.State(value=None)
|
| 156 |
original_video_path_state = gr.State(value=None)
|
| 157 |
current_source_video_state = gr.State(value=None)
|
|
|
|
|
|
|
| 158 |
|
| 159 |
gr.Markdown("<h1>ADUC-SDR 🎬 - O Diretor de Cinema IA</h1>")
|
| 160 |
gr.Markdown("<p>Crie um filme completo com vídeo e áudio, orquestrado por uma equipe de IAs especialistas.</p>")
|
|
@@ -174,7 +181,6 @@ with gr.Blocks(theme=cinematic_theme, css="style.css") as demo:
|
|
| 174 |
keyframe_gallery = gr.Gallery(label="Galeria de Cenas-Chave (Keyframes)", visible=True, object_fit="contain", height="auto", type="filepath")
|
| 175 |
|
| 176 |
with gr.Accordion("Etapa 3: Produção do Vídeo Original", open=False, visible=False) as step3_accordion:
|
| 177 |
-
# Aqui omiti a definição detalhada dos sliders para brevidade, mas eles existem
|
| 178 |
trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label="Poda Causal (%)")
|
| 179 |
handler_strength = gr.Slider(label="Força do Déjà-Vu", minimum=0.0, maximum=1.0, value=0.5, step=0.05)
|
| 180 |
dest_strength = gr.Slider(label="Força da Âncora Final", minimum=0.0, maximum=1.0, value=0.75, step=0.05)
|
|
|
|
|
|
|
| 1 |
# app.py
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
+
# Versão 3.0.0 (UI Head for Aduc Framework)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
#
|
| 7 |
+
# Este arquivo implementa a interface de usuário com Gradio para o Aduc-Sdr.
|
| 8 |
+
# Ele atua como um cliente para o 'aduc_framework', que contém toda a
|
| 9 |
+
# lógica de negócio e orquestração.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
import yaml
|
| 13 |
import logging
|
| 14 |
import os
|
| 15 |
+
import sys
|
| 16 |
import shutil
|
| 17 |
import time
|
| 18 |
import json
|
|
|
|
| 49 |
log_format = '%(asctime)s - %(levelname)s - [%(name)s:%(funcName)s] - %(message)s'
|
| 50 |
root_logger = logging.getLogger()
|
| 51 |
root_logger.setLevel(logging.INFO)
|
| 52 |
+
root_logger.handlers.clear()
|
| 53 |
+
stream_handler = logging.StreamHandler(sys.stdout)
|
| 54 |
+
stream_handler.setLevel(logging.INFO)
|
| 55 |
+
stream_handler.setFormatter(logging.Formatter(log_format))
|
| 56 |
+
root_logger.addHandler(stream_handler)
|
| 57 |
+
file_handler = logging.FileHandler(LOG_FILE_PATH, mode='w', encoding='utf-8')
|
| 58 |
+
file_handler.setLevel(logging.INFO)
|
| 59 |
+
file_handler.setFormatter(logging.Formatter(log_format))
|
| 60 |
+
root_logger.addHandler(file_handler)
|
| 61 |
logger = logging.getLogger(__name__)
|
| 62 |
|
| 63 |
# Carrega a configuração e inicializa o framework
|
|
|
|
| 76 |
# --- 2. FUNÇÕES WRAPPER (CAMADA DE TRADUÇÃO UI <-> FRAMEWORK) ---
|
| 77 |
|
| 78 |
def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str, duration_per_fragment, progress=gr.Progress()):
|
| 79 |
+
"""
|
| 80 |
+
Coleta dados da UI, os empacota em um objeto Pydantic e chama a tarefa de pré-produção do framework.
|
| 81 |
+
"""
|
| 82 |
if not ref_files:
|
| 83 |
raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.")
|
| 84 |
|
| 85 |
+
# Etapa de UI: Processar e salvar os arquivos de referência
|
| 86 |
ref_paths = [aduc.process_image_for_story(f.name, 480, f"ref_processed_{i}.png") for i, f in enumerate(ref_files)]
|
| 87 |
|
| 88 |
+
# 1. Empacota os parâmetros da UI no modelo Pydantic que o framework espera
|
| 89 |
params = PreProductionParams(
|
| 90 |
prompt=prompt,
|
| 91 |
num_keyframes=int(num_keyframes),
|
|
|
|
| 94 |
duration_per_fragment=duration_per_fragment
|
| 95 |
)
|
| 96 |
|
| 97 |
+
# 2. Define a função de callback para o progresso
|
| 98 |
+
progress_callback = progress
|
| 99 |
+
|
| 100 |
+
# 3. Chama o framework
|
| 101 |
+
storyboard, final_keyframes, updated_state = aduc.task_pre_production(params, progress_callback)
|
| 102 |
|
| 103 |
+
# 4. Retorna os resultados desempacotados para os componentes corretos da UI
|
| 104 |
return updated_state.model_dump(), storyboard, final_keyframes, gr.update(visible=True, open=True)
|
| 105 |
|
| 106 |
def run_original_production_wrapper(current_state_dict, trim_percent, handler_strength, dest_strength, guidance_scale, stg_scale, steps, progress=gr.Progress()):
|
| 107 |
+
"""
|
| 108 |
+
Coleta os parâmetros da etapa de produção e o estado atual, e chama a tarefa de produção do framework.
|
| 109 |
+
"""
|
| 110 |
yield {
|
| 111 |
original_video_output: gr.update(value=None, visible=True, label="🎬 Produzindo seu filme..."),
|
| 112 |
final_video_output: gr.update(value=None, visible=True, label="🎬 Produção em progresso..."),
|
| 113 |
step4_accordion: gr.update(visible=False)
|
| 114 |
}
|
| 115 |
|
| 116 |
+
# 1. Empacota os parâmetros dos sliders no modelo Pydantic
|
| 117 |
production_params = ProductionParams(
|
| 118 |
trim_percent=int(trim_percent),
|
| 119 |
handler_strength=handler_strength,
|
|
|
|
| 123 |
inference_steps=int(steps)
|
| 124 |
)
|
| 125 |
|
| 126 |
+
# 2. Chama a tarefa de produção no framework.
|
| 127 |
final_video_path, latent_paths, updated_state = aduc.task_produce_original_movie(
|
| 128 |
params=production_params,
|
| 129 |
progress_callback=progress
|
|
|
|
| 131 |
|
| 132 |
updated_state_dict = updated_state.model_dump()
|
| 133 |
|
| 134 |
+
# 3. Desempacota e retorna o resultado final para a UI
|
| 135 |
yield {
|
| 136 |
original_video_output: gr.update(value=final_video_path, label="✅ Filme Original Master"),
|
| 137 |
final_video_output: gr.update(value=final_video_path),
|
|
|
|
| 153 |
# --- 3. DEFINIÇÃO DA UI GRADIO ---
|
| 154 |
with gr.Blocks(theme=cinematic_theme, css="style.css") as demo:
|
| 155 |
|
| 156 |
+
# O gr.State é a "memória" da nossa UI. Ele armazena o JSON de estado entre os cliques.
|
| 157 |
generation_state_holder = gr.State(value={})
|
| 158 |
|
| 159 |
+
# Outros states para gerenciar caminhos de arquivos
|
| 160 |
original_latents_paths_state = gr.State(value=None)
|
| 161 |
original_video_path_state = gr.State(value=None)
|
| 162 |
current_source_video_state = gr.State(value=None)
|
| 163 |
+
upscaled_video_path_state = gr.State(value=None)
|
| 164 |
+
hd_video_path_state = gr.State(value=None)
|
| 165 |
|
| 166 |
gr.Markdown("<h1>ADUC-SDR 🎬 - O Diretor de Cinema IA</h1>")
|
| 167 |
gr.Markdown("<p>Crie um filme completo com vídeo e áudio, orquestrado por uma equipe de IAs especialistas.</p>")
|
|
|
|
| 181 |
keyframe_gallery = gr.Gallery(label="Galeria de Cenas-Chave (Keyframes)", visible=True, object_fit="contain", height="auto", type="filepath")
|
| 182 |
|
| 183 |
with gr.Accordion("Etapa 3: Produção do Vídeo Original", open=False, visible=False) as step3_accordion:
|
|
|
|
| 184 |
trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label="Poda Causal (%)")
|
| 185 |
handler_strength = gr.Slider(label="Força do Déjà-Vu", minimum=0.0, maximum=1.0, value=0.5, step=0.05)
|
| 186 |
dest_strength = gr.Slider(label="Força da Âncora Final", minimum=0.0, maximum=1.0, value=0.75, step=0.05)
|
engineers/LICENSE
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
|
| 2 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 3 |
-
#
|
| 4 |
-
# Contato:
|
| 5 |
-
# Carlos Rodrigues dos Santos
|
| 6 |
-
# carlex22@gmail.com
|
| 7 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 8 |
-
#
|
| 9 |
-
# Repositórios e Projetos Relacionados:
|
| 10 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 11 |
-
#
|
| 12 |
-
# This program is free software: you can redistribute it and/or modify
|
| 13 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 14 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 15 |
-
# (at your option) any later version.
|
| 16 |
-
#
|
| 17 |
-
# This program is distributed in the hope that it will be useful,
|
| 18 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 20 |
-
# GNU Affero General Public License for more details.
|
| 21 |
-
#
|
| 22 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 23 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/NOTICE.md
DELETED
|
@@ -1,76 +0,0 @@
|
|
| 1 |
-
# NOTICE
|
| 2 |
-
|
| 3 |
-
Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## Aviso de Propriedade Intelectual e Licenciamento
|
| 8 |
-
|
| 9 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 10 |
-
|
| 11 |
-
O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
|
| 12 |
-
|
| 13 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
|
| 14 |
-
|
| 15 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 16 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 17 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 18 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 19 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 20 |
-
|
| 21 |
-
### **Reconhecimento e Implicações (EM PORTUGUÊS):**
|
| 22 |
-
|
| 23 |
-
Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
|
| 24 |
-
|
| 25 |
-
1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
|
| 26 |
-
2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
|
| 27 |
-
3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
|
| 28 |
-
|
| 29 |
-
---
|
| 30 |
-
|
| 31 |
-
### **Patent Pending (IN ENGLISH):**
|
| 32 |
-
|
| 33 |
-
The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
|
| 34 |
-
|
| 35 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 36 |
-
|
| 37 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 38 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 39 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 40 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 41 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 42 |
-
|
| 43 |
-
### **Acknowledgement and Implications (IN ENGLISH):**
|
| 44 |
-
|
| 45 |
-
By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
|
| 46 |
-
|
| 47 |
-
1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
|
| 48 |
-
2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
|
| 49 |
-
3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
|
| 50 |
-
|
| 51 |
-
---
|
| 52 |
-
|
| 53 |
-
## Licença AGPLv3
|
| 54 |
-
|
| 55 |
-
This program is free software: you can redistribute it and/or modify
|
| 56 |
-
it under the terms of the GNU Affero General Public License as published by
|
| 57 |
-
the Free Software Foundation, either version 3 of the License, or
|
| 58 |
-
(at your option) any later version.
|
| 59 |
-
|
| 60 |
-
This program is distributed in the hope that it will be useful,
|
| 61 |
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 62 |
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 63 |
-
GNU Affero General Public License for more details.
|
| 64 |
-
|
| 65 |
-
You should have received a copy of the GNU Affero General Public License
|
| 66 |
-
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 67 |
-
|
| 68 |
-
---
|
| 69 |
-
|
| 70 |
-
**Contato para Consultas:**
|
| 71 |
-
|
| 72 |
-
Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
|
| 73 |
-
|
| 74 |
-
Carlos Rodrigues dos Santos
|
| 75 |
-
carlex22@gmail.com
|
| 76 |
-
Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/README.md
DELETED
|
@@ -1,211 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Euia-AducSdr
|
| 3 |
-
emoji: 🎥
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: purple
|
| 6 |
-
sdk: gradio
|
| 7 |
-
app_file: app.py
|
| 8 |
-
pinned: true
|
| 9 |
-
license: agpl-3.0
|
| 10 |
-
short_description: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
### 🇧🇷 Português
|
| 15 |
-
|
| 16 |
-
Uma implementação aberta e funcional da arquitetura ADUC-SDR (Arquitetura de Unificação Compositiva - Escala Dinâmica e Resiliente), projetada para a geração de vídeo coerente de longa duração. Este projeto materializa os princípios de fragmentação, navegação geométrica e um mecanismo de "eco causal 4bits memoria" para garantir a continuidade física e narrativa em sequências de vídeo geradas por múltiplos modelos de IA.
|
| 17 |
-
|
| 18 |
-
**Licença:** Este projeto é licenciado sob os termos da **GNU Affero General Public License v3.0**. Isto significa que se você usar este software (ou qualquer trabalho derivado) para fornecer um serviço através de uma rede, você é **obrigado a disponibilizar o código-fonte completo** da sua versão para os usuários desse serviço.
|
| 19 |
-
|
| 20 |
-
- **Copyright (C) 4 de Agosto de 2025, Carlos Rodrigues dos Santos**
|
| 21 |
-
- Uma cópia completa da licença pode ser encontrada no arquivo [LICENSE](LICENSE).
|
| 22 |
-
|
| 23 |
-
---
|
| 24 |
-
|
| 25 |
-
### 🇬🇧 English
|
| 26 |
-
|
| 27 |
-
An open and functional implementation of the ADUC-SDR (Architecture for Compositive Unification - Dynamic and Resilient Scaling) architecture, designed for long-form coherent video generation. This project materializes the principles of fragmentation, geometric navigation, and a "causal echo 4bits memori" mechanism to ensure physical and narrative continuity in video sequences generated by multiple AI models.
|
| 28 |
-
|
| 29 |
-
**License:** This project is licensed under the terms of the **GNU Affero General Public License v3.0**. This means that if you use this software (or any derivative work) to provide a service over a network, you are **required to make the complete source code** of your version available to the users of that service.
|
| 30 |
-
|
| 31 |
-
- **Copyright (C) August 4, 2025, Carlos Rodrigues dos Santos**
|
| 32 |
-
- A full copy of the license can be found in the [LICENSE](LICENSE) file.
|
| 33 |
-
|
| 34 |
-
---
|
| 35 |
-
|
| 36 |
-
## **Aviso de Propriedade Intelectual e Patenteamento**
|
| 37 |
-
|
| 38 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 39 |
-
|
| 40 |
-
A arquitetura e o método **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste projeto e nas reivindicações associadas, estão **atualmente em processo de patenteamento**.
|
| 41 |
-
|
| 42 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, que incluem, mas não se limitam a:
|
| 43 |
-
|
| 44 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 45 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 46 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 47 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 48 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 49 |
-
|
| 50 |
-
Ao utilizar este software e a arquitetura ADUC aqui implementada, você reconhece a natureza inovadora desta arquitetura e que a **reprodução ou exploração da lógica central da ADUC em sistemas independentes pode infringir direitos de patente pendente.**
|
| 51 |
-
|
| 52 |
-
---
|
| 53 |
-
|
| 54 |
-
### **Patent Pending (IN ENGLISH):**
|
| 55 |
-
|
| 56 |
-
The **ADUC (Automated Discovery and Orchestration of Complex tasks)** architecture and method, as described in this project and its associated claims, are **currently in the process of being patented.**
|
| 57 |
-
|
| 58 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 59 |
-
|
| 60 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 61 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 62 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 63 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 64 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 65 |
-
|
| 66 |
-
By using this software and the ADUC architecture implemented herein, you acknowledge the innovative nature of this architecture and that **the reproduction or exploitation of ADUC's core logic in independent systems may infringe upon pending patent rights.**
|
| 67 |
-
|
| 68 |
-
---
|
| 69 |
-
|
| 70 |
-
### Detalhes Técnicos e Reivindicações da ADUC
|
| 71 |
-
|
| 72 |
-
#### 🇧🇷 Definição Curta (para Tese e Patente)
|
| 73 |
-
|
| 74 |
-
**ADUC** é um *framework pré-input* e *intermediário* de **gerenciamento de prompts** que:
|
| 75 |
-
|
| 76 |
-
1. **fragmenta** solicitações acima do limite de contexto de qualquer modelo,
|
| 77 |
-
2. **escala linearmente** (processo sequencial com memória persistida),
|
| 78 |
-
3. **distribui** sub-tarefas a **especialistas** (modelos/ferramentas heterogêneos), e
|
| 79 |
-
4. **realimenta** a próxima etapa com avaliação do que foi feito/esperado (LLM diretor).
|
| 80 |
-
|
| 81 |
-
Não é um modelo; é uma **camada orquestradora** plugável antes do input de modelos existentes (texto, imagem, áudio, vídeo), usando *tokens universais* e a tecnologia atual.
|
| 82 |
-
|
| 83 |
-
#### 🇬🇧 Short Definition (for Thesis and Patent)
|
| 84 |
-
|
| 85 |
-
**ADUC** is a *pre-input* and *intermediate* **prompt management framework** that:
|
| 86 |
-
|
| 87 |
-
1. **fragments** requests exceeding any model's context limit,
|
| 88 |
-
2. **scales linearly** (sequential process with persisted memory),
|
| 89 |
-
3. **distributes** sub-tasks to **specialists** (heterogeneous models/tools), and
|
| 90 |
-
4. **feeds back** to the next step with an evaluation of what was done/expected (director LLM).
|
| 91 |
-
|
| 92 |
-
It is not a model; it is a pluggable **orchestration layer** before the input of existing models (text, image, audio, video), using *universal tokens* and current technology.
|
| 93 |
-
|
| 94 |
-
---
|
| 95 |
-
|
| 96 |
-
#### 🇧🇷 Elementos Essenciais (Telegráfico)
|
| 97 |
-
|
| 98 |
-
* **Agnóstico a modelos:** opera com qualquer LLM/difusor/API.
|
| 99 |
-
* **Pré-input manager:** recebe pedido do usuário, **divide** em blocos ≤ limite de tokens, **prioriza**, **agenda** e **roteia**.
|
| 100 |
-
* **Memória persistida:** resultados/latentes/“eco” viram **estado compartilhado** para o próximo bloco (nada é ignorado).
|
| 101 |
-
* **Especialistas:** *routers* decidem quem faz o quê (ex.: “descrição → LLM-A”, “keyframe → Img-B”, “vídeo → Vid-C”).
|
| 102 |
-
* **Controle de qualidade:** LLM diretor compara *o que fez* × *o que deveria* × *o que falta* e **regenera objetivos** do próximo fragmento.
|
| 103 |
-
* **Custo/latência-aware:** planeja pela **VRAM/tempo/custo**, não tenta “abraçar tudo de uma vez”.
|
| 104 |
-
|
| 105 |
-
#### 🇬🇧 Essential Elements (Telegraphic)
|
| 106 |
-
|
| 107 |
-
* **Model-agnostic:** operates with any LLM/diffuser/API.
|
| 108 |
-
* **Pre-input manager:** receives user request, **divides** into blocks ≤ token limit, **prioritizes**, **schedules**, and **routes**.
|
| 109 |
-
* **Persisted memory:** results/latents/“echo” become **shared state** for the next block (nothing is ignored).
|
| 110 |
-
* **Specialists:** *routers* decide who does what (e.g., “description → LLM-A”, “keyframe → Img-B”, “video → Vid-C”).
|
| 111 |
-
* **Quality control:** director LLM compares *what was done* × *what should be done* × *what is missing* and **regenerates objectives** for the next fragment.
|
| 112 |
-
* **Cost/latency-aware:** plans by **VRAM/time/cost**, does not try to “embrace everything at once”.
|
| 113 |
-
|
| 114 |
-
---
|
| 115 |
-
|
| 116 |
-
#### 🇧🇷 Reivindicações Independentes (Método e Sistema)
|
| 117 |
-
|
| 118 |
-
**Reivindicação Independente (Método) — Versão Enxuta:**
|
| 119 |
-
|
| 120 |
-
1. **Método** de **orquestração de prompts** para execução de tarefas acima do limite de contexto de modelos de IA, compreendendo:
|
| 121 |
-
(a) **receber** uma solicitação que excede um limite de tokens;
|
| 122 |
-
(b) **analisar** a solicitação por um **LLM diretor** e **fragmentá-la** em sub-tarefas ≤ limite;
|
| 123 |
-
(c) **selecionar** especialistas de execução para cada sub-tarefa com base em capacidades declaradas;
|
| 124 |
-
(d) **gerar** prompts específicos por sub-tarefa em **tokens universais**, incluindo referências ao **estado persistido** de execuções anteriores;
|
| 125 |
-
(e) **executar sequencialmente** as sub-tarefas e **persistir** suas saídas como memória (incluindo latentes/eco/artefatos);
|
| 126 |
-
(f) **avaliar** automaticamente a saída versus metas declaradas e **regenerar objetivos** do próximo fragmento;
|
| 127 |
-
(g) **iterar** (b)–(f) até que os critérios de completude sejam atendidos, produzindo o resultado agregado;
|
| 128 |
-
em que o framework **escala linearmente** no tempo e armazenamento físico, **independente** da janela de contexto dos modelos subjacentes.
|
| 129 |
-
|
| 130 |
-
**Reivindicação Independente (Sistema):**
|
| 131 |
-
|
| 132 |
-
2. **Sistema** de orquestração de prompts, compreendendo: um **planejador LLM diretor**; um **roteador de especialistas**; um **banco de estado persistido** (incl. memória cinética para vídeo); um **gerador de prompts universais**; e um **módulo de avaliação/realimentação**, acoplados por uma **API pré-input** a modelos heterogêneos.
|
| 133 |
-
|
| 134 |
-
#### 🇬🇧 Independent Claims (Method and System)
|
| 135 |
-
|
| 136 |
-
**Independent Claim (Method) — Concise Version:**
|
| 137 |
-
|
| 138 |
-
1. A **method** for **prompt orchestration** for executing tasks exceeding AI model context limits, comprising:
|
| 139 |
-
(a) **receiving** a request that exceeds a token limit;
|
| 140 |
-
(b) **analyzing** the request by a **director LLM** and **fragmenting it** into sub-tasks ≤ the limit;
|
| 141 |
-
(c) **selecting** execution specialists for each sub-task based on declared capabilities;
|
| 142 |
-
(d) **generating** specific prompts per sub-task in **universal tokens**, including references to the **persisted state** of previous executions;
|
| 143 |
-
(e) **sequentially executing** the sub-tasks and **persisting** their outputs as memory (including latents/echo/artifacts);
|
| 144 |
-
(f) **automatically evaluating** the output against declared goals and **regenerating objectives** for the next fragment;
|
| 145 |
-
(g) **iterating** (b)–(f) until completion criteria are met, producing the aggregated result;
|
| 146 |
-
wherein the framework **scales linearly** in time and physical storage, **independent** of the context window of the underlying models.
|
| 147 |
-
|
| 148 |
-
**Independent Claim (System):**
|
| 149 |
-
|
| 150 |
-
2. A prompt orchestration **system**, comprising: a **director LLM planner**; a **specialist router**; a **persisted state bank** (incl. kinetic memory for video); a **universal prompt generator**; and an **evaluation/feedback module**, coupled via a **pre-input API** to heterogeneous models.
|
| 151 |
-
|
| 152 |
-
---
|
| 153 |
-
|
| 154 |
-
#### 🇧🇷 Dependentes Úteis
|
| 155 |
-
|
| 156 |
-
* (3) Onde o roteamento considera **custo/latência/VRAM** e metas de qualidade.
|
| 157 |
-
* (4) Onde o banco de estado inclui **eco cinético** para vídeo (últimos *n* frames/latentes/fluxo).
|
| 158 |
-
* (5) Onde a avaliação usa métricas específicas por domínio (Lflow, consistência semântica, etc.).
|
| 159 |
-
* (6) Onde *tokens universais* padronizam instruções entre especialistas.
|
| 160 |
-
* (7) Onde a orquestração decide **cut vs continuous** e **corte regenerativo** (Déjà-Vu) ao editar vídeo.
|
| 161 |
-
* (8) Onde o sistema **nunca descarta** conteúdo excedente: **reagenda** em novos fragmentos.
|
| 162 |
-
|
| 163 |
-
#### 🇬🇧 Useful Dependents
|
| 164 |
-
|
| 165 |
-
* (3) Wherein routing considers **cost/latency/VRAM** and quality goals.
|
| 166 |
-
* (4) Wherein the state bank includes **kinetic echo** for video (last *n* frames/latents/flow).
|
| 167 |
-
* (5) Wherein evaluation uses domain-specific metrics (Lflow, semantic consistency, etc.).
|
| 168 |
-
* (6) Wherein *universal tokens* standardize instructions between specialists.
|
| 169 |
-
* (7) Wherein orchestration decides **cut vs continuous** and **regenerative cut** (Déjà-Vu) when editing video.
|
| 170 |
-
* (8) Wherein the system **never discards** excess content: it **reschedules** it in new fragments.
|
| 171 |
-
|
| 172 |
-
---
|
| 173 |
-
|
| 174 |
-
#### 🇧🇷 Como isso conversa com SDR (Vídeo)
|
| 175 |
-
|
| 176 |
-
* **Eco Cinético**: é um **tipo de estado persistido** consumido pelo próximo passo.
|
| 177 |
-
* **Déjà-Vu (Corte Regenerativo)**: é **uma política de orquestração** aplicada quando há edição; ADUC decide, monta os prompts certos e chama o especialista de vídeo.
|
| 178 |
-
* **Cut vs Continuous**: decisão do **diretor** com base em estado + metas; ADUC roteia e garante a sobreposição/remoção final.
|
| 179 |
-
|
| 180 |
-
#### 🇬🇧 How this Converses with SDR (Video)
|
| 181 |
-
|
| 182 |
-
* **Kinetic Echo**: is a **type of persisted state** consumed by the next step.
|
| 183 |
-
* **Déjà-Vu (Regenerative Cut)**: is an **orchestration policy** applied during editing; ADUC decides, crafts the right prompts, and calls the video specialist.
|
| 184 |
-
* **Cut vs Continuous**: decision made by the **director** based on state + goals; ADUC routes and ensures the final overlap/removal.
|
| 185 |
-
|
| 186 |
-
---
|
| 187 |
-
|
| 188 |
-
#### 🇧🇷 Mensagem Clara ao Usuário (Experiência)
|
| 189 |
-
|
| 190 |
-
> “Seu pedido excede o limite X do modelo Y. Em vez de truncar silenciosamente, o **ADUC** dividirá e **entregará 100%** do conteúdo por etapas coordenadas.”
|
| 191 |
-
|
| 192 |
-
Isso é diferencial prático e jurídico: **não-obviedade** por transformar limite de contexto em **pipeline controlado**, com **persistência de estado** e **avaliação iterativa**.
|
| 193 |
-
|
| 194 |
-
#### 🇬🇧 Clear User Message (Experience)
|
| 195 |
-
|
| 196 |
-
> "Your request exceeds model Y's limit X. Instead of silently truncating, **ADUC** will divide and **deliver 100%** of the content through coordinated steps."
|
| 197 |
-
|
| 198 |
-
This is a practical and legal differentiator: **non-obviousness** by transforming context limits into a **controlled pipeline**, with **state persistence** and **iterative evaluation**.
|
| 199 |
-
|
| 200 |
-
---
|
| 201 |
-
|
| 202 |
-
### Contact / Contato / Contacto
|
| 203 |
-
|
| 204 |
-
- **Author / Autor:** Carlos Rodrigues dos Santos
|
| 205 |
-
- **Email:** carlex22@gmail.com
|
| 206 |
-
- **GitHub:** [https://github.com/carlex22/Aduc-sdr](https://github.com/carlex22/Aduc-sdr)
|
| 207 |
-
- **Hugging Face Spaces:**
|
| 208 |
-
- [Ltx-SuperTime-60Secondos](https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/)
|
| 209 |
-
- [Novinho](https://huggingface.co/spaces/Carlexxx/Novinho/)
|
| 210 |
-
|
| 211 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/__init__.py
DELETED
|
File without changes
|
engineers/deformes2D_thinker.py
DELETED
|
@@ -1,171 +0,0 @@
|
|
| 1 |
-
# engineers/deformes2D_thinker.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# Repositórios e Projetos Relacionados:
|
| 11 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 12 |
-
#
|
| 13 |
-
# This program is free software: you can redistribute it and/or modify
|
| 14 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 15 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 16 |
-
# (at your option) any later version.
|
| 17 |
-
#
|
| 18 |
-
# This program is distributed in the hope that it will be useful,
|
| 19 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 20 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 21 |
-
# GNU Affero General Public License for more details.
|
| 22 |
-
#
|
| 23 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 24 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 25 |
-
#
|
| 26 |
-
# This program is free software: you can redistribute it and/or modify
|
| 27 |
-
# it under the terms of the GNU Affero General Public License...
|
| 28 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 29 |
-
#
|
| 30 |
-
# Version 1.0.1
|
| 31 |
-
|
| 32 |
-
import logging
|
| 33 |
-
from pathlib import Path
|
| 34 |
-
from PIL import Image
|
| 35 |
-
import gradio as gr
|
| 36 |
-
from typing import List
|
| 37 |
-
|
| 38 |
-
# It imports the communication layer, not the API directly
|
| 39 |
-
from managers.gemini_manager import gemini_manager_singleton
|
| 40 |
-
|
| 41 |
-
logger = logging.getLogger(__name__)
|
| 42 |
-
|
| 43 |
-
class Deformes2DThinker:
|
| 44 |
-
"""
|
| 45 |
-
The cognitive specialist that handles prompt engineering and creative logic.
|
| 46 |
-
"""
|
| 47 |
-
def _read_prompt_template(self, filename: str) -> str:
|
| 48 |
-
"""Reads a prompt template file from the 'prompts' directory."""
|
| 49 |
-
try:
|
| 50 |
-
prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
|
| 51 |
-
with open(prompts_dir / filename, "r", encoding="utf-8") as f:
|
| 52 |
-
return f.read()
|
| 53 |
-
except FileNotFoundError:
|
| 54 |
-
raise gr.Error(f"Prompt template file not found: prompts/{filename}")
|
| 55 |
-
|
| 56 |
-
def generate_storyboard(self, prompt: str, num_keyframes: int, ref_image_paths: List[str]) -> List[str]:
|
| 57 |
-
"""Acts as a Scriptwriter to generate a storyboard."""
|
| 58 |
-
try:
|
| 59 |
-
template = self._read_prompt_template("unified_storyboard_prompt.txt")
|
| 60 |
-
storyboard_prompt = template.format(user_prompt=prompt, num_fragments=num_keyframes)
|
| 61 |
-
images = [Image.open(p) for p in ref_image_paths]
|
| 62 |
-
|
| 63 |
-
# Assemble all parts into a single list for the manager
|
| 64 |
-
prompt_parts = [storyboard_prompt] + images
|
| 65 |
-
storyboard_data = gemini_manager_singleton.get_json_object(prompt_parts)
|
| 66 |
-
|
| 67 |
-
storyboard = storyboard_data.get("scene_storyboard", [])
|
| 68 |
-
if not storyboard or len(storyboard) != num_keyframes:
|
| 69 |
-
raise ValueError(f"Incorrect number of scenes generated. Expected {num_keyframes}, got {len(storyboard)}.")
|
| 70 |
-
return storyboard
|
| 71 |
-
except Exception as e:
|
| 72 |
-
raise gr.Error(f"The Scriptwriter (Deformes2D Thinker) failed: {e}")
|
| 73 |
-
|
| 74 |
-
def select_keyframes_from_pool(self, storyboard: list, base_image_paths: list[str], pool_image_paths: list[str]) -> list[str]:
|
| 75 |
-
"""Acts as a Photographer/Editor to select keyframes."""
|
| 76 |
-
if not pool_image_paths:
|
| 77 |
-
raise gr.Error("The 'image pool' (Additional Images) is empty.")
|
| 78 |
-
|
| 79 |
-
try:
|
| 80 |
-
template = self._read_prompt_template("keyframe_selection_prompt.txt")
|
| 81 |
-
|
| 82 |
-
image_map = {f"IMG-{i+1}": path for i, path in enumerate(pool_image_paths)}
|
| 83 |
-
|
| 84 |
-
prompt_parts = ["# Reference Images (Story Base)"]
|
| 85 |
-
prompt_parts.extend([Image.open(p) for p in base_image_paths])
|
| 86 |
-
prompt_parts.append("\n# Image Pool (Scene Bank)")
|
| 87 |
-
prompt_parts.extend([Image.open(p) for p in pool_image_paths])
|
| 88 |
-
|
| 89 |
-
storyboard_str = "\n".join([f"- Scene {i+1}: {s}" for i, s in enumerate(storyboard)])
|
| 90 |
-
selection_prompt = template.format(storyboard_str=storyboard_str, image_identifiers=list(image_map.keys()))
|
| 91 |
-
prompt_parts.append(selection_prompt)
|
| 92 |
-
|
| 93 |
-
selection_data = gemini_manager_singleton.get_json_object(prompt_parts)
|
| 94 |
-
|
| 95 |
-
selected_identifiers = selection_data.get("selected_image_identifiers", [])
|
| 96 |
-
|
| 97 |
-
if len(selected_identifiers) != len(storyboard):
|
| 98 |
-
raise ValueError("The AI did not select the correct number of images for the scenes.")
|
| 99 |
-
|
| 100 |
-
selected_paths = [image_map[identifier] for identifier in selected_identifiers]
|
| 101 |
-
return selected_paths
|
| 102 |
-
|
| 103 |
-
except Exception as e:
|
| 104 |
-
raise gr.Error(f"The Photographer (Deformes2D Thinker) failed to select images: {e}")
|
| 105 |
-
|
| 106 |
-
def get_anticipatory_keyframe_prompt(self, global_prompt: str, scene_history: str, current_scene_desc: str, future_scene_desc: str, last_image_path: str, fixed_ref_paths: list[str]) -> str:
|
| 107 |
-
"""Acts as an Art Director to generate an image prompt."""
|
| 108 |
-
try:
|
| 109 |
-
template = self._read_prompt_template("anticipatory_keyframe_prompt.txt")
|
| 110 |
-
|
| 111 |
-
director_prompt = template.format(
|
| 112 |
-
historico_prompt=scene_history,
|
| 113 |
-
cena_atual=current_scene_desc,
|
| 114 |
-
cena_futura=future_scene_desc
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
prompt_parts = [
|
| 118 |
-
f"# CONTEXT:\n- Global Story Goal: {global_prompt}\n# VISUAL ASSETS:",
|
| 119 |
-
"Current Base Image [IMG-BASE]:",
|
| 120 |
-
Image.open(last_image_path)
|
| 121 |
-
]
|
| 122 |
-
|
| 123 |
-
ref_counter = 1
|
| 124 |
-
for path in fixed_ref_paths:
|
| 125 |
-
if path != last_image_path:
|
| 126 |
-
prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
|
| 127 |
-
ref_counter += 1
|
| 128 |
-
|
| 129 |
-
prompt_parts.append(director_prompt)
|
| 130 |
-
|
| 131 |
-
final_flux_prompt = gemini_manager_singleton.get_raw_text(prompt_parts)
|
| 132 |
-
|
| 133 |
-
return final_flux_prompt.strip().replace("`", "").replace("\"", "")
|
| 134 |
-
except Exception as e:
|
| 135 |
-
raise gr.Error(f"The Art Director (Deformes2D Thinker) failed: {e}")
|
| 136 |
-
|
| 137 |
-
def get_cinematic_decision(self, global_prompt: str, story_history: str,
|
| 138 |
-
past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
|
| 139 |
-
past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> dict:
|
| 140 |
-
"""Acts as a Film Director to make editing decisions and generate motion prompts."""
|
| 141 |
-
try:
|
| 142 |
-
template = self._read_prompt_template("cinematic_director_prompt.txt")
|
| 143 |
-
prompt_text = template.format(
|
| 144 |
-
global_prompt=global_prompt,
|
| 145 |
-
story_history=story_history,
|
| 146 |
-
past_scene_desc=past_scene_desc,
|
| 147 |
-
present_scene_desc=present_scene_desc,
|
| 148 |
-
future_scene_desc=future_scene_desc
|
| 149 |
-
)
|
| 150 |
-
|
| 151 |
-
prompt_parts = [
|
| 152 |
-
prompt_text,
|
| 153 |
-
"[PAST_IMAGE]:", Image.open(past_keyframe_path),
|
| 154 |
-
"[PRESENT_IMAGE]:", Image.open(present_keyframe_path),
|
| 155 |
-
"[FUTURE_IMAGE]:", Image.open(future_keyframe_path)
|
| 156 |
-
]
|
| 157 |
-
|
| 158 |
-
decision_data = gemini_manager_singleton.get_json_object(prompt_parts)
|
| 159 |
-
|
| 160 |
-
if "transition_type" not in decision_data or "motion_prompt" not in decision_data:
|
| 161 |
-
raise ValueError("AI response (Cinematographer) is malformed. Missing 'transition_type' or 'motion_prompt'.")
|
| 162 |
-
return decision_data
|
| 163 |
-
except Exception as e:
|
| 164 |
-
logger.error(f"The Film Director (Deformes2D Thinker) failed: {e}. Using fallback to 'continuous'.", exc_info=True)
|
| 165 |
-
return {
|
| 166 |
-
"transition_type": "continuous",
|
| 167 |
-
"motion_prompt": f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
|
| 168 |
-
}
|
| 169 |
-
|
| 170 |
-
# --- Singleton Instance ---
|
| 171 |
-
deformes2d_thinker_singleton = Deformes2DThinker()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/deformes3D.py
DELETED
|
@@ -1,193 +0,0 @@
|
|
| 1 |
-
# engineers/deformes3D.py
|
| 2 |
-
#
|
| 3 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 4 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 5 |
-
#
|
| 6 |
-
# Contato:
|
| 7 |
-
# Carlos Rodrigues dos Santos
|
| 8 |
-
# carlex22@gmail.com
|
| 9 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 10 |
-
#
|
| 11 |
-
# Repositórios e Projetos Relacionados:
|
| 12 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 13 |
-
#
|
| 14 |
-
# This program is free software: you can redistribute it and/or modify
|
| 15 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 16 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 17 |
-
# (at your option) any later version.
|
| 18 |
-
#
|
| 19 |
-
# This program is distributed in the hope that it will be useful,
|
| 20 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
-
# GNU Affero General Public License for more details.
|
| 23 |
-
#
|
| 24 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 25 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 26 |
-
#
|
| 27 |
-
# This program is free software: you can redistribute it and/or modify
|
| 28 |
-
# it under the terms of the GNU Affero General Public License...
|
| 29 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 30 |
-
#
|
| 31 |
-
# Version 2.0.1
|
| 32 |
-
|
| 33 |
-
from PIL import Image, ImageOps
|
| 34 |
-
import os
|
| 35 |
-
import time
|
| 36 |
-
import logging
|
| 37 |
-
import gradio as gr
|
| 38 |
-
import yaml
|
| 39 |
-
import torch
|
| 40 |
-
import numpy as np
|
| 41 |
-
|
| 42 |
-
from managers.flux_kontext_manager import flux_kontext_singleton
|
| 43 |
-
from engineers.deformes2D_thinker import deformes2d_thinker_singleton
|
| 44 |
-
from aduc_types import LatentConditioningItem
|
| 45 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 46 |
-
from managers.vae_manager import vae_manager_singleton
|
| 47 |
-
from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
|
| 48 |
-
|
| 49 |
-
logger = logging.getLogger(__name__)
|
| 50 |
-
|
| 51 |
-
class Deformes3DEngine:
|
| 52 |
-
"""
|
| 53 |
-
ADUC Specialist for static image (keyframe) generation.
|
| 54 |
-
"""
|
| 55 |
-
def __init__(self, workspace_dir):
|
| 56 |
-
self.workspace_dir = workspace_dir
|
| 57 |
-
self.image_generation_helper = flux_kontext_singleton
|
| 58 |
-
logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.")
|
| 59 |
-
|
| 60 |
-
def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str:
|
| 61 |
-
"""
|
| 62 |
-
Low-level function that generates a single image using the LTX helper.
|
| 63 |
-
"""
|
| 64 |
-
logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'")
|
| 65 |
-
generated_image = self.image_generation_helper.generate_image(
|
| 66 |
-
reference_images=reference_images, prompt=prompt, width=width,
|
| 67 |
-
height=height, seed=int(time.time()), callback=callback
|
| 68 |
-
)
|
| 69 |
-
final_path = os.path.join(self.workspace_dir, output_filename)
|
| 70 |
-
generated_image.save(final_path)
|
| 71 |
-
logger.info(f"Keyframe successfully saved to: {final_path}")
|
| 72 |
-
return final_path
|
| 73 |
-
|
| 74 |
-
def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None):
|
| 75 |
-
"""
|
| 76 |
-
Orchestrates the generation of all keyframes.
|
| 77 |
-
"""
|
| 78 |
-
current_base_image_path = initial_ref_path
|
| 79 |
-
previous_prompt = "N/A (initial reference image)"
|
| 80 |
-
final_keyframes_gallery = [] #[current_base_image_path]
|
| 81 |
-
width, height = keyframe_resolution, keyframe_resolution
|
| 82 |
-
target_resolution_tuple = (width, height)
|
| 83 |
-
|
| 84 |
-
num_keyframes_to_generate = len(storyboard) - 1
|
| 85 |
-
logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).")
|
| 86 |
-
|
| 87 |
-
for i in range(num_keyframes_to_generate):
|
| 88 |
-
scene_index = i + 1
|
| 89 |
-
current_scene = storyboard[i]
|
| 90 |
-
future_scene = storyboard[i+1]
|
| 91 |
-
progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None
|
| 92 |
-
|
| 93 |
-
logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...")
|
| 94 |
-
|
| 95 |
-
# --- STEP A: Generate with FLUX (Primary Method) ---
|
| 96 |
-
logger.info(f" - Step A: Generating with keyframe...")
|
| 97 |
-
|
| 98 |
-
img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
|
| 99 |
-
global_prompt=global_prompt, scene_history=previous_prompt,
|
| 100 |
-
current_scene_desc=current_scene, future_scene_desc=future_scene,
|
| 101 |
-
last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
#flux_ref_paths = list(set([current_base_image_path] + general_ref_paths))
|
| 105 |
-
#flux_ref_images = [Image.open(p) for p in flux_ref_paths]
|
| 106 |
-
|
| 107 |
-
#flux_keyframe_path = self._generate_single_keyframe(
|
| 108 |
-
# prompt=img_prompt, reference_images=flux_ref_images,
|
| 109 |
-
# output_filename=f"keyframe_{scene_index}_flux.png", width=width, height=height,
|
| 110 |
-
# callback=progress_callback_flux
|
| 111 |
-
#)
|
| 112 |
-
#final_keyframes_gallery.append(flux_keyframe_path)
|
| 113 |
-
|
| 114 |
-
# --- STEP B: LTX Enrichment Experiment ---
|
| 115 |
-
#logger.info(f" - Step B: Generating enrichment with LTX...")
|
| 116 |
-
|
| 117 |
-
ltx_context_paths = []
|
| 118 |
-
context_paths = []
|
| 119 |
-
context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3]
|
| 120 |
-
|
| 121 |
-
ltx_context_paths = list(reversed(context_paths))
|
| 122 |
-
logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}")
|
| 123 |
-
|
| 124 |
-
ltx_conditioning_items = []
|
| 125 |
-
|
| 126 |
-
weight = 0.6
|
| 127 |
-
for idx, path in enumerate(ltx_context_paths):
|
| 128 |
-
img_pil = Image.open(path).convert("RGB")
|
| 129 |
-
img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple)
|
| 130 |
-
pixel_tensor = self._pil_to_pixel_tensor(img_processed)
|
| 131 |
-
latent_tensor = vae_manager_singleton.encode(pixel_tensor)
|
| 132 |
-
|
| 133 |
-
ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
|
| 134 |
-
|
| 135 |
-
if idx >= 0:
|
| 136 |
-
weight -= 0.1
|
| 137 |
-
|
| 138 |
-
ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25}
|
| 139 |
-
generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
|
| 140 |
-
height=height, width=width,
|
| 141 |
-
conditioning_items_data=ltx_conditioning_items,
|
| 142 |
-
motion_prompt=img_prompt,
|
| 143 |
-
video_total_frames=48,
|
| 144 |
-
video_fps=24,
|
| 145 |
-
**ltx_base_params
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
final_latent = generated_latents[:, :, -1:, :, :]
|
| 149 |
-
upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
|
| 150 |
-
enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent)
|
| 151 |
-
|
| 152 |
-
ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png")
|
| 153 |
-
self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path)
|
| 154 |
-
final_keyframes_gallery.append(ltx_keyframe_path)
|
| 155 |
-
|
| 156 |
-
# Use the FLUX keyframe as the base for the next iteration to maintain the primary narrative path
|
| 157 |
-
current_base_image_path = ltx_keyframe_path #flux_keyframe_path
|
| 158 |
-
previous_prompt = img_prompt
|
| 159 |
-
|
| 160 |
-
logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.")
|
| 161 |
-
return final_keyframes_gallery
|
| 162 |
-
|
| 163 |
-
# --- HELPER FUNCTIONS ---
|
| 164 |
-
|
| 165 |
-
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
|
| 166 |
-
"""Resizes and fits an image to the target resolution for VAE encoding."""
|
| 167 |
-
if image.size != target_resolution:
|
| 168 |
-
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
|
| 169 |
-
return image
|
| 170 |
-
|
| 171 |
-
def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
|
| 172 |
-
"""Helper to convert PIL to the 5D pixel tensor the VAE expects."""
|
| 173 |
-
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 174 |
-
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 175 |
-
return (tensor * 2.0) - 1.0
|
| 176 |
-
|
| 177 |
-
def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
|
| 178 |
-
"""Helper to save a 1-frame pixel tensor as an image."""
|
| 179 |
-
tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
|
| 180 |
-
tensor_hwc = tensor_chw.permute(1, 2, 0)
|
| 181 |
-
tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
|
| 182 |
-
image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
|
| 183 |
-
Image.fromarray(image_np).save(path)
|
| 184 |
-
|
| 185 |
-
# --- Singleton Instantiation ---
|
| 186 |
-
try:
|
| 187 |
-
with open("config.yaml", 'r') as f:
|
| 188 |
-
config = yaml.safe_load(f)
|
| 189 |
-
WORKSPACE_DIR = config['application']['workspace_dir']
|
| 190 |
-
deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR)
|
| 191 |
-
except Exception as e:
|
| 192 |
-
logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True)
|
| 193 |
-
deformes3d_engine_singleton = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/deformes3D_thinker.py
DELETED
|
@@ -1,136 +0,0 @@
|
|
| 1 |
-
# engineers/deformes3D_thinker.py
|
| 2 |
-
#
|
| 3 |
-
# Copyright (C) 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Version: 4.0.0 (Definitive)
|
| 6 |
-
#
|
| 7 |
-
# This is the definitive, robust implementation. It directly contains the prompt
|
| 8 |
-
# enhancement logic copied from the LTX pipeline's utils. It accesses the
|
| 9 |
-
# enhancement models loaded by the LTX Manager and performs the captioning
|
| 10 |
-
# and LLM generation steps locally, ensuring full control and compatibility.
|
| 11 |
-
|
| 12 |
-
import logging
|
| 13 |
-
from PIL import Image
|
| 14 |
-
import torch
|
| 15 |
-
|
| 16 |
-
# Importa o singleton do LTX para ter acesso à sua pipeline e aos modelos nela
|
| 17 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 18 |
-
|
| 19 |
-
# Importa o prompt de sistema do LTX para garantir consistência
|
| 20 |
-
from ltx_video.utils.prompt_enhance_utils import I2V_CINEMATIC_PROMPT
|
| 21 |
-
|
| 22 |
-
logger = logging.getLogger(__name__)
|
| 23 |
-
|
| 24 |
-
class Deformes3DThinker:
|
| 25 |
-
"""
|
| 26 |
-
The tactical specialist that now directly implements the prompt enhancement
|
| 27 |
-
logic, using the models provided by the LTX pipeline.
|
| 28 |
-
"""
|
| 29 |
-
|
| 30 |
-
def __init__(self):
|
| 31 |
-
# Acessa a pipeline exposta para obter os modelos necessários
|
| 32 |
-
pipeline = ltx_manager_singleton.prompt_enhancement_pipeline
|
| 33 |
-
if not pipeline:
|
| 34 |
-
raise RuntimeError("Deformes3DThinker could not access the LTX pipeline.")
|
| 35 |
-
|
| 36 |
-
# Armazena os modelos e processadores como atributos diretos
|
| 37 |
-
self.caption_model = pipeline.prompt_enhancer_image_caption_model
|
| 38 |
-
self.caption_processor = pipeline.prompt_enhancer_image_caption_processor
|
| 39 |
-
self.llm_model = pipeline.prompt_enhancer_llm_model
|
| 40 |
-
self.llm_tokenizer = pipeline.prompt_enhancer_llm_tokenizer
|
| 41 |
-
|
| 42 |
-
# Verifica se os modelos foram realmente carregados
|
| 43 |
-
if not all([self.caption_model, self.caption_processor, self.llm_model, self.llm_tokenizer]):
|
| 44 |
-
logger.warning("Deformes3DThinker initialized, but one or more enhancement models were not loaded by the LTX pipeline. Fallback will be used.")
|
| 45 |
-
else:
|
| 46 |
-
logger.info("Deformes3DThinker initialized and successfully linked to LTX enhancement models.")
|
| 47 |
-
|
| 48 |
-
@torch.no_grad()
|
| 49 |
-
def get_enhanced_motion_prompt(self, global_prompt: str, story_history: str,
|
| 50 |
-
past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
|
| 51 |
-
past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> str:
|
| 52 |
-
"""
|
| 53 |
-
Generates a refined motion prompt by directly executing the enhancement pipeline logic.
|
| 54 |
-
"""
|
| 55 |
-
# Verifica se os modelos estão disponíveis antes de tentar usá-los
|
| 56 |
-
if not all([self.caption_model, self.caption_processor, self.llm_model, self.llm_tokenizer]):
|
| 57 |
-
logger.warning("Enhancement models not available. Using fallback prompt.")
|
| 58 |
-
return f"A cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
|
| 59 |
-
|
| 60 |
-
try:
|
| 61 |
-
present_image = Image.open(present_keyframe_path).convert("RGB")
|
| 62 |
-
|
| 63 |
-
# --- INÍCIO DA LÓGICA COPIADA E ADAPTADA DO LTX ---
|
| 64 |
-
|
| 65 |
-
# 1. Gerar a caption da imagem de referência (presente)
|
| 66 |
-
image_captions = self._generate_image_captions([present_image])
|
| 67 |
-
|
| 68 |
-
# 2. Construir o prompt para o LLM
|
| 69 |
-
# Usamos a cena futura como o "prompt do usuário"
|
| 70 |
-
messages = [
|
| 71 |
-
{"role": "system", "content": I2V_CINEMATIC_PROMPT},
|
| 72 |
-
{"role": "user", "content": f"user_prompt: {future_scene_desc}\nimage_caption: {image_captions[0]}"},
|
| 73 |
-
]
|
| 74 |
-
|
| 75 |
-
# 3. Gerar e decodificar o prompt final com o LLM
|
| 76 |
-
enhanced_prompt = self._generate_and_decode_prompts(messages)
|
| 77 |
-
|
| 78 |
-
# --- FIM DA LÓGICA COPIADA E ADAPTADA ---
|
| 79 |
-
|
| 80 |
-
logger.info(f"Deformes3DThinker received enhanced prompt: '{enhanced_prompt}'")
|
| 81 |
-
return enhanced_prompt
|
| 82 |
-
|
| 83 |
-
except Exception as e:
|
| 84 |
-
logger.error(f"The Film Director (Deformes3D Thinker) failed during enhancement: {e}. Using fallback.", exc_info=True)
|
| 85 |
-
return f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
|
| 86 |
-
|
| 87 |
-
def _generate_image_captions(self, images: list[Image.Image]) -> list[str]:
|
| 88 |
-
"""
|
| 89 |
-
Lógica interna para gerar captions, copiada do LTX utils.
|
| 90 |
-
"""
|
| 91 |
-
# O modelo Florence-2 do LTX não usa um system_prompt aqui, mas um task_prompt
|
| 92 |
-
task_prompt = "<MORE_DETAILED_CAPTION>"
|
| 93 |
-
inputs = self.caption_processor(
|
| 94 |
-
text=[task_prompt] * len(images), images=images, return_tensors="pt"
|
| 95 |
-
).to(self.caption_model.device)
|
| 96 |
-
|
| 97 |
-
generated_ids = self.caption_model.generate(
|
| 98 |
-
input_ids=inputs["input_ids"],
|
| 99 |
-
pixel_values=inputs["pixel_values"],
|
| 100 |
-
max_new_tokens=1024,
|
| 101 |
-
num_beams=3,
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
# Usa o post_process_generation para extrair a resposta limpa
|
| 105 |
-
generated_text = self.caption_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
| 106 |
-
processed_result = self.caption_processor.post_process_generation(
|
| 107 |
-
generated_text,
|
| 108 |
-
task=task_prompt,
|
| 109 |
-
image_size=(images[0].width, images[0].height)
|
| 110 |
-
)
|
| 111 |
-
return [processed_result[task_prompt]]
|
| 112 |
-
|
| 113 |
-
def _generate_and_decode_prompts(self, messages: list[dict]) -> str:
|
| 114 |
-
"""
|
| 115 |
-
Lógica interna para gerar prompt com o LLM, copiada do LTX utils.
|
| 116 |
-
"""
|
| 117 |
-
text = self.llm_tokenizer.apply_chat_template(
|
| 118 |
-
messages, tokenize=False, add_generation_prompt=True
|
| 119 |
-
)
|
| 120 |
-
model_inputs = self.llm_tokenizer([text], return_tensors="pt").to(self.llm_model.device)
|
| 121 |
-
|
| 122 |
-
output_ids = self.llm_model.generate(**model_inputs, max_new_tokens=256)
|
| 123 |
-
|
| 124 |
-
input_ids_len = model_inputs.input_ids.shape[1]
|
| 125 |
-
decoded_prompts = self.llm_tokenizer.batch_decode(
|
| 126 |
-
output_ids[:, input_ids_len:], skip_special_tokens=True
|
| 127 |
-
)
|
| 128 |
-
return decoded_prompts[0].strip()
|
| 129 |
-
|
| 130 |
-
# --- Singleton Instantiation ---
|
| 131 |
-
try:
|
| 132 |
-
deformes3d_thinker_singleton = Deformes3DThinker()
|
| 133 |
-
except Exception as e:
|
| 134 |
-
# A falha já terá sido logada dentro do __init__
|
| 135 |
-
deformes3d_thinker_singleton = None
|
| 136 |
-
raise e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/deformes4D.py
DELETED
|
@@ -1,338 +0,0 @@
|
|
| 1 |
-
# engineers/deformes4D.py
|
| 2 |
-
#
|
| 3 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 4 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 5 |
-
#
|
| 6 |
-
# Contato:
|
| 7 |
-
# Carlos Rodrigues dos Santos
|
| 8 |
-
# carlex22@gmail.com
|
| 9 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 10 |
-
#
|
| 11 |
-
# Repositórios e Projetos Relacionados:
|
| 12 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 13 |
-
#
|
| 14 |
-
# This program is free software: you can redistribute it and/or modify
|
| 15 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 16 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 17 |
-
# (at your option) any later version.
|
| 18 |
-
#
|
| 19 |
-
# This program is distributed in the hope that it will be useful,
|
| 20 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
-
# GNU Affero General Public License for more details.
|
| 23 |
-
#
|
| 24 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 25 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 26 |
-
#
|
| 27 |
-
# This program is free software: you can redistribute it and/or modify
|
| 28 |
-
# it under the terms of the GNU Affero General Public License...
|
| 29 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 30 |
-
#
|
| 31 |
-
# Version 2.0.1
|
| 32 |
-
|
| 33 |
-
import os
|
| 34 |
-
import time
|
| 35 |
-
import imageio
|
| 36 |
-
import numpy as np
|
| 37 |
-
import torch
|
| 38 |
-
import logging
|
| 39 |
-
from PIL import Image, ImageOps
|
| 40 |
-
from dataclasses import dataclass
|
| 41 |
-
import gradio as gr
|
| 42 |
-
import subprocess
|
| 43 |
-
import gc
|
| 44 |
-
import shutil
|
| 45 |
-
from pathlib import Path
|
| 46 |
-
from typing import List, Tuple, Generator, Dict, Any
|
| 47 |
-
|
| 48 |
-
from aduc_types import LatentConditioningItem
|
| 49 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 50 |
-
from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
|
| 51 |
-
from managers.vae_manager import vae_manager_singleton
|
| 52 |
-
from engineers.deformes2D_thinker import deformes2d_thinker_singleton
|
| 53 |
-
from managers.seedvr_manager import seedvr_manager_singleton
|
| 54 |
-
from managers.mmaudio_manager import mmaudio_manager_singleton
|
| 55 |
-
from tools.video_encode_tool import video_encode_tool_singleton
|
| 56 |
-
|
| 57 |
-
logger = logging.getLogger(__name__)
|
| 58 |
-
|
| 59 |
-
class Deformes4DEngine:
|
| 60 |
-
"""
|
| 61 |
-
Implements the Camera (Ψ) and Distiller (Δ) of the ADUC-SDR architecture.
|
| 62 |
-
Orchestrates the generation, latent post-production, and final rendering of video fragments.
|
| 63 |
-
"""
|
| 64 |
-
def __init__(self, workspace_dir="deformes_workspace"):
|
| 65 |
-
self.workspace_dir = workspace_dir
|
| 66 |
-
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 67 |
-
logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
|
| 68 |
-
os.makedirs(self.workspace_dir, exist_ok=True)
|
| 69 |
-
|
| 70 |
-
# --- HELPER METHODS ---
|
| 71 |
-
|
| 72 |
-
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
|
| 73 |
-
"""Saves a pixel-space tensor as an MP4 video file."""
|
| 74 |
-
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
|
| 75 |
-
video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
|
| 76 |
-
video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
|
| 77 |
-
video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
|
| 78 |
-
with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
|
| 79 |
-
for frame in video_np: writer.append_data(frame)
|
| 80 |
-
|
| 81 |
-
def read_video_to_tensor(self, video_path: str) -> torch.Tensor:
|
| 82 |
-
"""Reads a video file and converts it into a pixel-space tensor."""
|
| 83 |
-
with imageio.get_reader(video_path, 'ffmpeg') as reader:
|
| 84 |
-
frames = [frame for frame in reader]
|
| 85 |
-
|
| 86 |
-
frames_np = np.stack(frames, axis=0).astype(np.float32) / 255.0
|
| 87 |
-
# (F, H, W, C) -> (C, F, H, W)
|
| 88 |
-
tensor = torch.from_numpy(frames_np).permute(3, 0, 1, 2)
|
| 89 |
-
tensor = tensor.unsqueeze(0) # (B, C, F, H, W)
|
| 90 |
-
tensor = (tensor * 2.0) - 1.0 # Normalize to [-1, 1]
|
| 91 |
-
return tensor.to(self.device)
|
| 92 |
-
|
| 93 |
-
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
|
| 94 |
-
"""Resizes and fits an image to the target resolution for VAE encoding."""
|
| 95 |
-
if image.size != target_resolution:
|
| 96 |
-
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
|
| 97 |
-
return image
|
| 98 |
-
|
| 99 |
-
def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
|
| 100 |
-
"""Converts a PIL Image to a latent tensor by calling the VaeManager."""
|
| 101 |
-
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 102 |
-
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 103 |
-
tensor = (tensor * 2.0) - 1.0
|
| 104 |
-
return vae_manager_singleton.encode(tensor)
|
| 105 |
-
|
| 106 |
-
# --- CORE ADUC-SDR LOGIC ---
|
| 107 |
-
|
| 108 |
-
def generate_original_movie(self, keyframes: list, global_prompt: str, storyboard: list,
|
| 109 |
-
seconds_per_fragment: float, trim_percent: int,
|
| 110 |
-
handler_strength: float, destination_convergence_strength: float,
|
| 111 |
-
video_resolution: int, use_continuity_director: bool,
|
| 112 |
-
guidance_scale: float, stg_scale: float, num_inference_steps: int,
|
| 113 |
-
progress: gr.Progress = gr.Progress()):
|
| 114 |
-
FPS = 24
|
| 115 |
-
FRAMES_PER_LATENT_CHUNK = 8
|
| 116 |
-
LATENT_PROCESSING_CHUNK_SIZE = 4
|
| 117 |
-
|
| 118 |
-
run_timestamp = int(time.time())
|
| 119 |
-
temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
|
| 120 |
-
temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
|
| 121 |
-
os.makedirs(temp_latent_dir, exist_ok=True)
|
| 122 |
-
os.makedirs(temp_video_clips_dir, exist_ok=True)
|
| 123 |
-
|
| 124 |
-
total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
|
| 125 |
-
frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
|
| 126 |
-
latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
|
| 127 |
-
|
| 128 |
-
#if frames_a_podar % 2 == 0:
|
| 129 |
-
# frames_a_podar = frames_a_podar-1
|
| 130 |
-
|
| 131 |
-
total_latent_frames = total_frames_brutos // FRAMES_PER_LATENT_CHUNK
|
| 132 |
-
|
| 133 |
-
DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
|
| 134 |
-
DESTINATION_FRAME_TARGET = total_frames_brutos - 1
|
| 135 |
-
|
| 136 |
-
base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps, "rescaling_scale": 0.15, "image_cond_noise_scale": 0.00}
|
| 137 |
-
keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes]
|
| 138 |
-
story_history = ""
|
| 139 |
-
target_resolution_tuple = (video_resolution, video_resolution)
|
| 140 |
-
eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
|
| 141 |
-
latent_fragment_paths = []
|
| 142 |
-
|
| 143 |
-
if len(keyframe_paths) < 2: raise gr.Error(f"Generation requires at least 2 keyframes. You provided {len(keyframe_paths)}.")
|
| 144 |
-
num_transitions_to_generate = len(keyframe_paths) - 1
|
| 145 |
-
|
| 146 |
-
logger.info("--- STARTING STAGE 1: Latent Fragment Generation ---")
|
| 147 |
-
for i in range(num_transitions_to_generate):
|
| 148 |
-
fragment_index = i + 1
|
| 149 |
-
progress(i / num_transitions_to_generate, desc=f"Generating Latent {fragment_index}/{num_transitions_to_generate}")
|
| 150 |
-
past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
|
| 151 |
-
start_keyframe_path = keyframe_paths[i]
|
| 152 |
-
destination_keyframe_path = keyframe_paths[i + 1]
|
| 153 |
-
future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "The final scene."
|
| 154 |
-
logger.info(f"Calling deformes2D_thinker to generate cinematic decision for fragment {fragment_index}...")
|
| 155 |
-
decision = deformes2d_thinker_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "The beginning.", storyboard[i], future_story_prompt)
|
| 156 |
-
transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
|
| 157 |
-
story_history += f"\n- Act {fragment_index}: {motion_prompt}"
|
| 158 |
-
|
| 159 |
-
conditioning_items = []
|
| 160 |
-
if eco_latent_for_next_loop is None:
|
| 161 |
-
img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
|
| 162 |
-
conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_start), 0, 1.0))
|
| 163 |
-
else:
|
| 164 |
-
conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
|
| 165 |
-
conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
|
| 166 |
-
|
| 167 |
-
if transition_type == "cutx":
|
| 168 |
-
logger.info(f"Cinematic Director chose a 'cut'. Creating FFmpeg transition bridge...")
|
| 169 |
-
bridge_duration_seconds = FRAMES_PER_LATENT_CHUNK / FPS
|
| 170 |
-
bridge_video_path = video_encode_tool_singleton.create_transition_bridge(
|
| 171 |
-
start_image_path=start_keyframe_path, end_image_path=destination_keyframe_path,
|
| 172 |
-
duration=bridge_duration_seconds, fps=FPS, target_resolution=target_resolution_tuple,
|
| 173 |
-
workspace_dir=self.workspace_dir
|
| 174 |
-
)
|
| 175 |
-
bridge_pixel_tensor = self.read_video_to_tensor(bridge_video_path)
|
| 176 |
-
bridge_latent_tensor = vae_manager_singleton.encode(bridge_pixel_tensor)
|
| 177 |
-
final_fade_latent = bridge_latent_tensor[:, :, -2:, :, :]
|
| 178 |
-
conditioning_items.append(LatentConditioningItem(final_fade_latent, total_latent_frames - 16, 0.95))
|
| 179 |
-
#img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
|
| 180 |
-
#conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength * 0.5))
|
| 181 |
-
del bridge_pixel_tensor, bridge_latent_tensor, final_fade_latent
|
| 182 |
-
if os.path.exists(bridge_video_path): os.remove(bridge_video_path)
|
| 183 |
-
else:
|
| 184 |
-
img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
|
| 185 |
-
conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
|
| 186 |
-
|
| 187 |
-
current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
|
| 188 |
-
logger.info(f"Calling LTX to generate video latents for fragment {fragment_index} ({total_frames_brutos} frames)...")
|
| 189 |
-
latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
|
| 190 |
-
num_latent_frames = latents_brutos.shape[2]
|
| 191 |
-
logger.info(f"LTX responded with a latent tensor of shape {latents_brutos.shape}, representing ~{num_latent_frames * 8 + 1} video frames at {FPS} FPS.")
|
| 192 |
-
|
| 193 |
-
last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
|
| 194 |
-
eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
|
| 195 |
-
dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
|
| 196 |
-
latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
|
| 197 |
-
latents_video = latents_video[:, :, 1:, :, :]
|
| 198 |
-
del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
|
| 199 |
-
|
| 200 |
-
if transition_type == "cutx":
|
| 201 |
-
eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
cpu_latent = latents_video.cpu()
|
| 205 |
-
latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
|
| 206 |
-
torch.save(cpu_latent, latent_path)
|
| 207 |
-
latent_fragment_paths.append(latent_path)
|
| 208 |
-
del latents_video, cpu_latent; gc.collect()
|
| 209 |
-
del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache()
|
| 210 |
-
|
| 211 |
-
logger.info(f"--- STARTING STAGE 2: Processing {len(latent_fragment_paths)} latents in chunks of {LATENT_PROCESSING_CHUNK_SIZE} ---")
|
| 212 |
-
final_video_clip_paths = []
|
| 213 |
-
num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE)
|
| 214 |
-
for i in range(num_chunks):
|
| 215 |
-
chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
|
| 216 |
-
chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
|
| 217 |
-
chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
|
| 218 |
-
progress(i / num_chunks, desc=f"Processing & Decoding Batch {i+1}/{num_chunks}")
|
| 219 |
-
tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
|
| 220 |
-
tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
|
| 221 |
-
sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
|
| 222 |
-
del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
|
| 223 |
-
logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
|
| 224 |
-
base_name = f"clip_{i:04d}_{run_timestamp}"
|
| 225 |
-
current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
|
| 226 |
-
pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
|
| 227 |
-
self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
|
| 228 |
-
del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 229 |
-
final_video_clip_paths.append(current_clip_path)
|
| 230 |
-
|
| 231 |
-
progress(0.98, desc="Final assembly of clips...")
|
| 232 |
-
final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4")
|
| 233 |
-
video_encode_tool_singleton.concatenate_videos(video_paths=final_video_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir)
|
| 234 |
-
logger.info("Cleaning up temporary clip files...")
|
| 235 |
-
try:
|
| 236 |
-
shutil.rmtree(temp_video_clips_dir)
|
| 237 |
-
except OSError as e:
|
| 238 |
-
logger.warning(f"Could not remove temporary clip directory: {e}")
|
| 239 |
-
logger.info(f"Process complete! Original video saved to: {final_video_path}")
|
| 240 |
-
return {"final_path": final_video_path, "latent_paths": latent_fragment_paths}
|
| 241 |
-
|
| 242 |
-
def upscale_latents_and_create_video(self, latent_paths: list, chunk_size: int, progress: gr.Progress):
|
| 243 |
-
if not latent_paths:
|
| 244 |
-
raise gr.Error("Cannot perform upscaling: no latent paths were provided.")
|
| 245 |
-
logger.info("--- STARTING POST-PRODUCTION: Latent Upscaling ---")
|
| 246 |
-
run_timestamp = int(time.time())
|
| 247 |
-
temp_upscaled_clips_dir = os.path.join(self.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
|
| 248 |
-
os.makedirs(temp_upscaled_clips_dir, exist_ok=True)
|
| 249 |
-
final_upscaled_clip_paths = []
|
| 250 |
-
num_chunks = -(-len(latent_paths) // chunk_size)
|
| 251 |
-
for i in range(num_chunks):
|
| 252 |
-
chunk_start_index = i * chunk_size
|
| 253 |
-
chunk_end_index = chunk_start_index + chunk_size
|
| 254 |
-
chunk_paths = latent_paths[chunk_start_index:chunk_end_index]
|
| 255 |
-
progress(i / num_chunks, desc=f"Upscaling & Decoding Batch {i+1}/{num_chunks}")
|
| 256 |
-
tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
|
| 257 |
-
tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
|
| 258 |
-
sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
|
| 259 |
-
del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
|
| 260 |
-
logger.info(f"Batch {i+1} loaded. Original latent shape: {sub_group_latent.shape}")
|
| 261 |
-
upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
|
| 262 |
-
del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 263 |
-
logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
|
| 264 |
-
pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
|
| 265 |
-
del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
|
| 266 |
-
base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
|
| 267 |
-
current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
|
| 268 |
-
self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
|
| 269 |
-
final_upscaled_clip_paths.append(current_clip_path)
|
| 270 |
-
del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
|
| 271 |
-
logger.info(f"Saved upscaled clip: {Path(current_clip_path).name}")
|
| 272 |
-
progress(0.98, desc="Assembling upscaled clips...")
|
| 273 |
-
final_video_path = os.path.join(self.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
|
| 274 |
-
video_encode_tool_singleton.concatenate_videos(video_paths=final_upscaled_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir)
|
| 275 |
-
logger.info("Cleaning up temporary upscaled clip files...")
|
| 276 |
-
try:
|
| 277 |
-
shutil.rmtree(temp_upscaled_clips_dir)
|
| 278 |
-
except OSError as e:
|
| 279 |
-
logger.warning(f"Could not remove temporary upscaled clip directory: {e}")
|
| 280 |
-
logger.info(f"Latent upscaling complete! Final video at: {final_video_path}")
|
| 281 |
-
yield {"final_path": final_video_path}
|
| 282 |
-
|
| 283 |
-
def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
|
| 284 |
-
logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
|
| 285 |
-
progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...")
|
| 286 |
-
run_timestamp = int(time.time())
|
| 287 |
-
output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{model_version}_{run_timestamp}.mp4")
|
| 288 |
-
try:
|
| 289 |
-
final_path = seedvr_manager_singleton.process_video(
|
| 290 |
-
input_video_path=source_video_path,
|
| 291 |
-
output_video_path=output_path,
|
| 292 |
-
prompt=prompt,
|
| 293 |
-
model_version=model_version,
|
| 294 |
-
steps=steps,
|
| 295 |
-
progress=progress
|
| 296 |
-
)
|
| 297 |
-
logger.info(f"HD Mastering complete! Final video at: {final_path}")
|
| 298 |
-
yield {"final_path": final_path}
|
| 299 |
-
except Exception as e:
|
| 300 |
-
logger.error(f"HD Mastering failed: {e}", exc_info=True)
|
| 301 |
-
raise gr.Error(f"HD Mastering failed. Details: {e}")
|
| 302 |
-
|
| 303 |
-
def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
|
| 304 |
-
logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
|
| 305 |
-
progress(0.1, desc="Preparing for audio generation...")
|
| 306 |
-
run_timestamp = int(time.time())
|
| 307 |
-
source_name = Path(source_video_path).stem
|
| 308 |
-
output_path = os.path.join(self.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
|
| 309 |
-
try:
|
| 310 |
-
result = subprocess.run(
|
| 311 |
-
["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
|
| 312 |
-
capture_output=True, text=True, check=True)
|
| 313 |
-
duration = float(result.stdout.strip())
|
| 314 |
-
logger.info(f"Source video duration: {duration:.2f} seconds.")
|
| 315 |
-
progress(0.5, desc="Generating audio track...")
|
| 316 |
-
final_path = mmaudio_manager_singleton.generate_audio_for_video(
|
| 317 |
-
video_path=source_video_path,
|
| 318 |
-
prompt=audio_prompt,
|
| 319 |
-
duration_seconds=duration,
|
| 320 |
-
output_path_override=output_path
|
| 321 |
-
)
|
| 322 |
-
logger.info(f"Audio generation complete! Final video with audio at: {final_path}")
|
| 323 |
-
progress(1.0, desc="Audio generation complete!")
|
| 324 |
-
yield {"final_path": final_path}
|
| 325 |
-
except Exception as e:
|
| 326 |
-
logger.error(f"Audio generation failed: {e}", exc_info=True)
|
| 327 |
-
raise gr.Error(f"Audio generation failed. Details: {e}")
|
| 328 |
-
|
| 329 |
-
def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
|
| 330 |
-
"""Internal helper to call the LTX manager."""
|
| 331 |
-
final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
|
| 332 |
-
return ltx_manager_singleton.generate_latent_fragment(**final_ltx_params)
|
| 333 |
-
|
| 334 |
-
def _quantize_to_multiple(self, n, m):
|
| 335 |
-
"""Helper to round n to the nearest multiple of m."""
|
| 336 |
-
if m == 0: return n
|
| 337 |
-
quantized = int(round(n / m) * m)
|
| 338 |
-
return m if n > 0 and quantized == 0 else quantized
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engineers/deformes7D.py
DELETED
|
@@ -1,316 +0,0 @@
|
|
| 1 |
-
# engineers/deformes7D.py
|
| 2 |
-
#
|
| 3 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 4 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 5 |
-
#
|
| 6 |
-
# Contato:
|
| 7 |
-
# Carlos Rodrigues dos Santos
|
| 8 |
-
# carlex22@gmail.com
|
| 9 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 10 |
-
#
|
| 11 |
-
# Repositórios e Projetos Relacionados:
|
| 12 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 13 |
-
#
|
| 14 |
-
# This program is free software: you can redistribute it and/or modify
|
| 15 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 16 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 17 |
-
# (at your option) any later version.
|
| 18 |
-
#
|
| 19 |
-
# This program is distributed in the hope that it will be useful,
|
| 20 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
-
# GNU Affero General Public License for more details.
|
| 23 |
-
#
|
| 24 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 25 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 26 |
-
#
|
| 27 |
-
# This program is free software: you can redistribute it and/or modify
|
| 28 |
-
# it under the terms of the GNU Affero General Public License...
|
| 29 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 30 |
-
#
|
| 31 |
-
# Version 3.2.1
|
| 32 |
-
|
| 33 |
-
import os
|
| 34 |
-
import time
|
| 35 |
-
import imageio
|
| 36 |
-
import numpy as np
|
| 37 |
-
import torch
|
| 38 |
-
import logging
|
| 39 |
-
from PIL import Image, ImageOps
|
| 40 |
-
import gradio as gr
|
| 41 |
-
import subprocess
|
| 42 |
-
import gc
|
| 43 |
-
import yaml
|
| 44 |
-
import shutil
|
| 45 |
-
from pathlib import Path
|
| 46 |
-
from typing import List, Tuple, Dict, Generator
|
| 47 |
-
|
| 48 |
-
from aduc_types import LatentConditioningItem
|
| 49 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 50 |
-
from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
|
| 51 |
-
from managers.vae_manager import vae_manager_singleton
|
| 52 |
-
from engineers.deformes2D_thinker import deformes2d_thinker_singleton
|
| 53 |
-
from engineers.deformes3D_thinker import deformes3d_thinker_singleton
|
| 54 |
-
from managers.seedvr_manager import seedvr_manager_singleton
|
| 55 |
-
from managers.mmaudio_manager import mmaudio_manager_singleton
|
| 56 |
-
from tools.video_encode_tool import video_encode_tool_singleton
|
| 57 |
-
|
| 58 |
-
logger = logging.getLogger(__name__)
|
| 59 |
-
|
| 60 |
-
class Deformes7DEngine:
|
| 61 |
-
# ... (todo o corpo da classe permanece exatamente o mesmo da nossa última versão) ...
|
| 62 |
-
"""
|
| 63 |
-
Unified 3D/4D engine for continuous, interleaved generation of keyframes and video fragments.
|
| 64 |
-
"""
|
| 65 |
-
def __init__(self, workspace_dir="deformes_workspace"):
|
| 66 |
-
self.workspace_dir = workspace_dir
|
| 67 |
-
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 68 |
-
logger.info("Deformes7D Unified Engine initialized.")
|
| 69 |
-
os.makedirs(self.workspace_dir, exist_ok=True)
|
| 70 |
-
|
| 71 |
-
# --- HELPER METHODS ---
|
| 72 |
-
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
|
| 73 |
-
"""Saves a pixel-space tensor as an MP4 video file."""
|
| 74 |
-
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
|
| 75 |
-
video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
|
| 76 |
-
video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
|
| 77 |
-
video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
|
| 78 |
-
with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
|
| 79 |
-
for frame in video_np: writer.append_data(frame)
|
| 80 |
-
|
| 81 |
-
def read_video_to_tensor(self, video_path: str) -> torch.Tensor:
|
| 82 |
-
"""Reads a video file and converts it into a pixel-space tensor."""
|
| 83 |
-
with imageio.get_reader(video_path, 'ffmpeg') as reader:
|
| 84 |
-
frames = [frame for frame in reader]
|
| 85 |
-
frames_np = np.stack(frames, axis=0).astype(np.float32) / 255.0
|
| 86 |
-
tensor = torch.from_numpy(frames_np).permute(3, 0, 1, 2)
|
| 87 |
-
tensor = tensor.unsqueeze(0)
|
| 88 |
-
tensor = (tensor * 2.0) - 1.0
|
| 89 |
-
return tensor.to(self.device)
|
| 90 |
-
|
| 91 |
-
def _preprocess_image(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
|
| 92 |
-
if image.size != target_resolution:
|
| 93 |
-
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
|
| 94 |
-
return image
|
| 95 |
-
|
| 96 |
-
def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
|
| 97 |
-
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 98 |
-
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 99 |
-
return (tensor * 2.0) - 1.0
|
| 100 |
-
|
| 101 |
-
def _save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
|
| 102 |
-
tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
|
| 103 |
-
tensor_hwc = tensor_chw.permute(1, 2, 0)
|
| 104 |
-
tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
|
| 105 |
-
image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
|
| 106 |
-
Image.fromarray(image_np).save(path)
|
| 107 |
-
|
| 108 |
-
def _quantize_to_multiple(self, n, m):
|
| 109 |
-
if m == 0: return n
|
| 110 |
-
quantized = int(round(n / m) * m)
|
| 111 |
-
return m if n > 0 and quantized == 0 else quantized
|
| 112 |
-
|
| 113 |
-
# --- CORE GENERATION LOGIC ---
|
| 114 |
-
def _generate_next_causal_keyframe(self, base_keyframe_path: str, all_ref_paths: list,
|
| 115 |
-
prompt: str, resolution_tuple: tuple) -> Tuple[str, torch.Tensor]:
|
| 116 |
-
# (código interno deste método permanece o mesmo)
|
| 117 |
-
ltx_context_paths = [base_keyframe_path] + [p for p in all_ref_paths if p != base_keyframe_path][:3]
|
| 118 |
-
ltx_conditioning_items = []
|
| 119 |
-
weight = 1.0
|
| 120 |
-
for path in ltx_context_paths:
|
| 121 |
-
img_pil = Image.open(path).convert("RGB")
|
| 122 |
-
img_processed = self._preprocess_image(img_pil, resolution_tuple)
|
| 123 |
-
pixel_tensor = self._pil_to_pixel_tensor(img_processed)
|
| 124 |
-
latent_tensor = vae_manager_singleton.encode(pixel_tensor)
|
| 125 |
-
ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
|
| 126 |
-
if weight == 1.0: weight = -0.2
|
| 127 |
-
else: weight -= 0.2
|
| 128 |
-
ltx_base_params = {"guidance_scale": 3.0, "stg_scale": 0.1, "num_inference_steps": 25}
|
| 129 |
-
generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
|
| 130 |
-
height=resolution_tuple[0], width=resolution_tuple[1],
|
| 131 |
-
conditioning_items_data=ltx_conditioning_items, motion_prompt=prompt,
|
| 132 |
-
video_total_frames=48, video_fps=24, **ltx_base_params
|
| 133 |
-
)
|
| 134 |
-
final_latent = generated_latents[:, :, -1:, :, :]
|
| 135 |
-
upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
|
| 136 |
-
pixel_tensor_out = vae_manager_singleton.decode(upscaled_latent)
|
| 137 |
-
timestamp = int(time.time() * 1000)
|
| 138 |
-
output_path = os.path.join(self.workspace_dir, f"keyframe_{timestamp}.png")
|
| 139 |
-
self._save_image_from_tensor(pixel_tensor_out, output_path)
|
| 140 |
-
return output_path, final_latent
|
| 141 |
-
|
| 142 |
-
def generate_full_movie_interleaved(self, initial_ref_paths: list, storyboard: list, global_prompt: str,
|
| 143 |
-
video_resolution: int, seconds_per_fragment: float, trim_percent: int,
|
| 144 |
-
handler_strength: float, dest_strength: float, ltx_params: dict,
|
| 145 |
-
progress=gr.Progress()):
|
| 146 |
-
# (código interno deste método permanece o mesmo)
|
| 147 |
-
logger.info("--- DEFORMES 7D: INITIATING INTERLEAVED RENDERING PIPELINE ---")
|
| 148 |
-
run_timestamp = int(time.time())
|
| 149 |
-
temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
|
| 150 |
-
os.makedirs(temp_video_clips_dir, exist_ok=True)
|
| 151 |
-
FPS = 24
|
| 152 |
-
FRAMES_PER_LATENT_CHUNK = 8
|
| 153 |
-
resolution_tuple = (video_resolution, video_resolution)
|
| 154 |
-
generated_keyframe_paths, generated_keyframe_latents, generated_video_fragment_paths = [], [], []
|
| 155 |
-
progress(0, desc="Bootstrap: Processing K0...")
|
| 156 |
-
k0_path = initial_ref_paths[0]
|
| 157 |
-
k0_pil = Image.open(k0_path).convert("RGB")
|
| 158 |
-
k0_processed_pil = self._preprocess_image(k0_pil, resolution_tuple)
|
| 159 |
-
k0_pixel_tensor = self._pil_to_pixel_tensor(k0_processed_pil)
|
| 160 |
-
k0_latent = vae_manager_singleton.encode(k0_pixel_tensor)
|
| 161 |
-
generated_keyframe_paths.append(k0_path)
|
| 162 |
-
generated_keyframe_latents.append(k0_latent)
|
| 163 |
-
progress(0.01, desc="Bootstrap: Generating K1...")
|
| 164 |
-
prompt_k1 = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
|
| 165 |
-
global_prompt, "Initial scene.", storyboard[0], storyboard[1], k0_path, initial_ref_paths
|
| 166 |
-
)
|
| 167 |
-
k1_path, k1_latent = self._generate_next_causal_keyframe(k0_path, initial_ref_paths, prompt_k1, resolution_tuple)
|
| 168 |
-
generated_keyframe_paths.append(k1_path)
|
| 169 |
-
generated_keyframe_latents.append(k1_latent)
|
| 170 |
-
story_history = ""
|
| 171 |
-
eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
|
| 172 |
-
num_transitions = len(storyboard) - 1
|
| 173 |
-
base_4d_ltx_params = {"rescaling_scale": 0.15, "image_cond_noise_scale": 0.00, **ltx_params}
|
| 174 |
-
|
| 175 |
-
for i in range(1, num_transitions):
|
| 176 |
-
act_progress = i / num_transitions
|
| 177 |
-
progress(act_progress, desc=f"Processing Act {i+1}/{num_transitions} (Keyframe Gen)...")
|
| 178 |
-
logger.info(f"--> Step 3D: Generating Keyframe K{i+1}")
|
| 179 |
-
kx_path = generated_keyframe_paths[i]
|
| 180 |
-
prompt_ky = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
|
| 181 |
-
global_prompt, story_history, storyboard[i], storyboard[i+1], kx_path, initial_ref_paths
|
| 182 |
-
)
|
| 183 |
-
ky_path, ky_latent = self._generate_next_causal_keyframe(kx_path, initial_ref_paths, prompt_ky, resolution_tuple)
|
| 184 |
-
generated_keyframe_paths.append(ky_path)
|
| 185 |
-
generated_keyframe_latents.append(ky_latent)
|
| 186 |
-
progress(act_progress, desc=f"Processing Act {i+1}/{num_transitions} (Video Gen)...")
|
| 187 |
-
logger.info(f"--> Step 4D: Generating Video Fragment V{i-1}")
|
| 188 |
-
kb_path, kx_path, ky_path = generated_keyframe_paths[i-1], generated_keyframe_paths[i], generated_keyframe_paths[i+1]
|
| 189 |
-
motion_prompt = deformes3d_thinker_singleton.get_enhanced_motion_prompt(
|
| 190 |
-
global_prompt, story_history, kb_path, kx_path, ky_path,
|
| 191 |
-
storyboard[i-1], storyboard[i], storyboard[i+1]
|
| 192 |
-
)
|
| 193 |
-
transition_type = "continuous"
|
| 194 |
-
story_history += f"\n- Act {i}: {motion_prompt}"
|
| 195 |
-
total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
|
| 196 |
-
frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
|
| 197 |
-
latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
|
| 198 |
-
DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
|
| 199 |
-
DESTINATION_FRAME_TARGET = total_frames_brutos - 1
|
| 200 |
-
conditioning_items = []
|
| 201 |
-
if eco_latent_for_next_loop is None:
|
| 202 |
-
conditioning_items.append(LatentConditioningItem(generated_keyframe_latents[i], 0, 1.0))
|
| 203 |
-
else:
|
| 204 |
-
conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
|
| 205 |
-
conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
|
| 206 |
-
if transition_type != "cut":
|
| 207 |
-
conditioning_items.append(LatentConditioningItem(ky_latent, DESTINATION_FRAME_TARGET, dest_strength))
|
| 208 |
-
fragment_latents_brutos, _ = ltx_manager_singleton.generate_latent_fragment(
|
| 209 |
-
height=video_resolution, width=video_resolution,
|
| 210 |
-
conditioning_items_data=conditioning_items, motion_prompt=motion_prompt,
|
| 211 |
-
video_total_frames=total_frames_brutos, video_fps=FPS, **base_4d_ltx_params
|
| 212 |
-
)
|
| 213 |
-
last_trim = fragment_latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
|
| 214 |
-
eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
|
| 215 |
-
dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
|
| 216 |
-
final_fragment_latents = fragment_latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
|
| 217 |
-
final_fragment_latents = final_fragment_latents[:, :, 1:, :, :]
|
| 218 |
-
pixel_tensor = vae_manager_singleton.decode(final_fragment_latents)
|
| 219 |
-
fragment_path = os.path.join(temp_video_clips_dir, f"fragment_{i-1}.mp4")
|
| 220 |
-
self.save_video_from_tensor(pixel_tensor, fragment_path, fps=FPS)
|
| 221 |
-
generated_video_fragment_paths.append(fragment_path)
|
| 222 |
-
logger.info(f"Video Fragment V{i-1} saved to {fragment_path}")
|
| 223 |
-
|
| 224 |
-
logger.info("--- Final Assembly of Video Fragments ---")
|
| 225 |
-
final_video_path = os.path.join(self.workspace_dir, f"movie_7D_{run_timestamp}.mp4")
|
| 226 |
-
video_encode_tool_singleton.concatenate_videos(generated_video_fragment_paths, final_video_path, self.workspace_dir)
|
| 227 |
-
shutil.rmtree(temp_video_clips_dir)
|
| 228 |
-
logger.info(f"Full movie generated at: {final_video_path}")
|
| 229 |
-
return {"final_path": final_video_path, "all_keyframes": generated_keyframe_paths, "latent_paths": "NOT_IMPLEMENTED_YET"}
|
| 230 |
-
|
| 231 |
-
# --- POST-PRODUCTION METHODS ---
|
| 232 |
-
def task_run_latent_upscaling(self, latent_paths: list, chunk_size: int, progress: gr.Progress) -> Generator[Dict[str, any], None, None]:
|
| 233 |
-
# (código interno deste método permanece o mesmo)
|
| 234 |
-
if not latent_paths:
|
| 235 |
-
raise gr.Error("Cannot perform upscaling: no latent paths were provided from the main generation.")
|
| 236 |
-
logger.info("--- POST-PRODUCTION: Latent Upscaling ---")
|
| 237 |
-
run_timestamp = int(time.time())
|
| 238 |
-
temp_upscaled_clips_dir = os.path.join(self.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
|
| 239 |
-
os.makedirs(temp_upscaled_clips_dir, exist_ok=True)
|
| 240 |
-
final_upscaled_clip_paths = []
|
| 241 |
-
num_chunks = -(-len(latent_paths) // chunk_size)
|
| 242 |
-
for i in range(num_chunks):
|
| 243 |
-
chunk_start_index = i * chunk_size
|
| 244 |
-
chunk_end_index = chunk_start_index + chunk_size
|
| 245 |
-
chunk_paths = latent_paths[chunk_start_index:chunk_end_index]
|
| 246 |
-
progress(i / num_chunks, desc=f"Upscaling & Decoding Batch {i+1}/{num_chunks}")
|
| 247 |
-
tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
|
| 248 |
-
tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)]
|
| 249 |
-
sub_group_latent = torch.cat(tensors_para_concatenar, dim=2)
|
| 250 |
-
del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache()
|
| 251 |
-
upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
|
| 252 |
-
del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 253 |
-
pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
|
| 254 |
-
del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
|
| 255 |
-
base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
|
| 256 |
-
current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
|
| 257 |
-
self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
|
| 258 |
-
final_upscaled_clip_paths.append(current_clip_path)
|
| 259 |
-
del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
|
| 260 |
-
progress(0.98, desc="Assembling upscaled clips...")
|
| 261 |
-
final_video_path = os.path.join(self.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
|
| 262 |
-
video_encode_tool_singleton.concatenate_videos(video_paths=final_upscaled_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir)
|
| 263 |
-
shutil.rmtree(temp_upscaled_clips_dir)
|
| 264 |
-
logger.info(f"Latent upscaling complete! Final video at: {final_video_path}")
|
| 265 |
-
yield {"final_path": final_video_path}
|
| 266 |
-
|
| 267 |
-
def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress):
|
| 268 |
-
# (código interno deste método permanece o mesmo)
|
| 269 |
-
logger.info(f"--- POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---")
|
| 270 |
-
run_timestamp = int(time.time())
|
| 271 |
-
output_path = os.path.join(self.workspace_dir, f"{Path(source_video_path).stem}_hd.mp4")
|
| 272 |
-
try:
|
| 273 |
-
final_path = seedvr_manager_singleton.process_video(
|
| 274 |
-
input_video_path=source_video_path, output_video_path=output_path,
|
| 275 |
-
prompt=prompt, model_version=model_version, steps=steps, progress=progress
|
| 276 |
-
)
|
| 277 |
-
yield {"final_path": final_path}
|
| 278 |
-
except Exception as e:
|
| 279 |
-
logger.error(f"HD Mastering failed: {e}", exc_info=True)
|
| 280 |
-
raise gr.Error(f"HD Mastering failed. Details: {e}")
|
| 281 |
-
|
| 282 |
-
def generate_audio(self, source_video_path: str, audio_prompt: str, progress: gr.Progress):
|
| 283 |
-
# (código interno deste método permanece o mesmo)
|
| 284 |
-
logger.info(f"--- POST-PRODUCTION: Audio Generation ---")
|
| 285 |
-
run_timestamp = int(time.time())
|
| 286 |
-
output_path = os.path.join(self.workspace_dir, f"{Path(source_video_path).stem}_audio.mp4")
|
| 287 |
-
try:
|
| 288 |
-
result = subprocess.run(
|
| 289 |
-
["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
|
| 290 |
-
capture_output=True, text=True, check=True)
|
| 291 |
-
duration = float(result.stdout.strip())
|
| 292 |
-
progress(0.5, desc="Generating audio track...")
|
| 293 |
-
final_path = mmaudio_manager_singleton.generate_audio_for_video(
|
| 294 |
-
video_path=source_video_path, prompt=audio_prompt,
|
| 295 |
-
duration_seconds=duration, output_path_override=output_path
|
| 296 |
-
)
|
| 297 |
-
yield {"final_path": final_path}
|
| 298 |
-
except Exception as e:
|
| 299 |
-
logger.error(f"Audio generation failed: {e}", exc_info=True)
|
| 300 |
-
raise gr.Error(f"Audio generation failed. Details: {e}")
|
| 301 |
-
|
| 302 |
-
# --- Singleton Instantiation ---
|
| 303 |
-
try:
|
| 304 |
-
config_path = Path(__file__).resolve().parent.parent / "config.yaml"
|
| 305 |
-
with open(config_path, 'r') as f:
|
| 306 |
-
config = yaml.safe_load(f)
|
| 307 |
-
WORKSPACE_DIR = config['application']['workspace_dir']
|
| 308 |
-
deformes7d_engine_singleton = Deformes7DEngine(workspace_dir=WORKSPACE_DIR)
|
| 309 |
-
# <--- INÍCIO DA CORREÇÃO --->
|
| 310 |
-
except Exception as e:
|
| 311 |
-
# Loga o erro como CRÍTICO, pois a aplicação não pode funcionar sem este motor.
|
| 312 |
-
logger.critical(f"CRITICAL: Failed to initialize the Deformes7DEngine singleton from {config_path}: {e}", exc_info=True)
|
| 313 |
-
# Relança a exceção para parar a aplicação imediatamente.
|
| 314 |
-
# Isso evita o erro 'NoneType' mais tarde e fornece um ponto claro de falha.
|
| 315 |
-
raise
|
| 316 |
-
# <--- FIM DA CORREÇÃO --->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/LICENSE
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
# Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
|
| 2 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 3 |
-
#
|
| 4 |
-
# Contato:
|
| 5 |
-
# Carlos Rodrigues dos Santos
|
| 6 |
-
# carlex22@gmail.com
|
| 7 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 8 |
-
#
|
| 9 |
-
# Repositórios e Projetos Relacionados:
|
| 10 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 11 |
-
# Hugging Face (Ltx-SuperTime-60Secondos): https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/
|
| 12 |
-
# Hugging Face (Novinho): https://huggingface.co/spaces/Carlexxx/Novinho/
|
| 13 |
-
#
|
| 14 |
-
# This program is free software: you can redistribute it and/or modify
|
| 15 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 16 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 17 |
-
# (at your option) any later version.
|
| 18 |
-
#
|
| 19 |
-
# This program is distributed in the hope that it will be useful,
|
| 20 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
-
# GNU Affero General Public License for more details.
|
| 23 |
-
#
|
| 24 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 25 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/LICENSE.txt
DELETED
|
@@ -1,201 +0,0 @@
|
|
| 1 |
-
Apache License
|
| 2 |
-
Version 2.0, January 2004
|
| 3 |
-
http://www.apache.org/licenses/
|
| 4 |
-
|
| 5 |
-
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
-
|
| 7 |
-
1. Definitions.
|
| 8 |
-
|
| 9 |
-
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
-
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
-
|
| 12 |
-
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
-
the copyright owner that is granting the License.
|
| 14 |
-
|
| 15 |
-
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
-
other entities that control, are controlled by, or are under common
|
| 17 |
-
control with that entity. For the purposes of this definition,
|
| 18 |
-
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
-
direction or management of such entity, whether by contract or
|
| 20 |
-
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
-
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
-
|
| 23 |
-
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
-
exercising permissions granted by this License.
|
| 25 |
-
|
| 26 |
-
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
-
including but not limited to software source code, documentation
|
| 28 |
-
source, and configuration files.
|
| 29 |
-
|
| 30 |
-
"Object" form shall mean any form resulting from mechanical
|
| 31 |
-
transformation or translation of a Source form, including but
|
| 32 |
-
not limited to compiled object code, generated documentation,
|
| 33 |
-
and conversions to other media types.
|
| 34 |
-
|
| 35 |
-
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
-
Object form, made available under the License, as indicated by a
|
| 37 |
-
copyright notice that is included in or attached to the work
|
| 38 |
-
(an example is provided in the Appendix below).
|
| 39 |
-
|
| 40 |
-
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
-
form, that is based on (or derived from) the Work and for which the
|
| 42 |
-
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
-
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
-
of this License, Derivative Works shall not include works that remain
|
| 45 |
-
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
-
the Work and Derivative Works thereof.
|
| 47 |
-
|
| 48 |
-
"Contribution" shall mean any work of authorship, including
|
| 49 |
-
the original version of the Work and any modifications or additions
|
| 50 |
-
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
-
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
-
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
-
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
-
means any form of electronic, verbal, or written communication sent
|
| 55 |
-
to the Licensor or its representatives, including but not limited to
|
| 56 |
-
communication on electronic mailing lists, source code control systems,
|
| 57 |
-
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
-
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
-
excluding communication that is conspicuously marked or otherwise
|
| 60 |
-
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
-
|
| 62 |
-
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
-
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
-
subsequently incorporated within the Work.
|
| 65 |
-
|
| 66 |
-
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
-
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
-
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
-
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
-
Work and such Derivative Works in Source or Object form.
|
| 72 |
-
|
| 73 |
-
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
-
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
-
(except as stated in this section) patent license to make, have made,
|
| 77 |
-
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
-
where such license applies only to those patent claims licensable
|
| 79 |
-
by such Contributor that are necessarily infringed by their
|
| 80 |
-
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
-
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
-
institute patent litigation against any entity (including a
|
| 83 |
-
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
-
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
-
or contributory patent infringement, then any patent licenses
|
| 86 |
-
granted to You under this License for that Work shall terminate
|
| 87 |
-
as of the date such litigation is filed.
|
| 88 |
-
|
| 89 |
-
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
-
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
-
modifications, and in Source or Object form, provided that You
|
| 92 |
-
meet the following conditions:
|
| 93 |
-
|
| 94 |
-
(a) You must give any other recipients of the Work or
|
| 95 |
-
Derivative Works a copy of this License; and
|
| 96 |
-
|
| 97 |
-
(b) You must cause any modified files to carry prominent notices
|
| 98 |
-
stating that You changed the files; and
|
| 99 |
-
|
| 100 |
-
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
-
that You distribute, all copyright, patent, trademark, and
|
| 102 |
-
attribution notices from the Source form of the Work,
|
| 103 |
-
excluding those notices that do not pertain to any part of
|
| 104 |
-
the Derivative Works; and
|
| 105 |
-
|
| 106 |
-
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
-
distribution, then any Derivative Works that You distribute must
|
| 108 |
-
include a readable copy of the attribution notices contained
|
| 109 |
-
within such NOTICE file, excluding those notices that do not
|
| 110 |
-
pertain to any part of the Derivative Works, in at least one
|
| 111 |
-
of the following places: within a NOTICE text file distributed
|
| 112 |
-
as part of the Derivative Works; within the Source form or
|
| 113 |
-
documentation, if provided along with the Derivative Works; or,
|
| 114 |
-
within a display generated by the Derivative Works, if and
|
| 115 |
-
wherever such third-party notices normally appear. The contents
|
| 116 |
-
of the NOTICE file are for informational purposes only and
|
| 117 |
-
do not modify the License. You may add Your own attribution
|
| 118 |
-
notices within Derivative Works that You distribute, alongside
|
| 119 |
-
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
-
that such additional attribution notices cannot be construed
|
| 121 |
-
as modifying the License.
|
| 122 |
-
|
| 123 |
-
You may add Your own copyright statement to Your modifications and
|
| 124 |
-
may provide additional or different license terms and conditions
|
| 125 |
-
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
-
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
-
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
-
the conditions stated in this License.
|
| 129 |
-
|
| 130 |
-
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
-
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
-
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
-
this License, without any additional terms or conditions.
|
| 134 |
-
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
-
the terms of any separate license agreement you may have executed
|
| 136 |
-
with Licensor regarding such Contributions.
|
| 137 |
-
|
| 138 |
-
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
-
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
-
except as required for reasonable and customary use in describing the
|
| 141 |
-
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
-
|
| 143 |
-
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
-
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
-
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
-
implied, including, without limitation, any warranties or conditions
|
| 148 |
-
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
-
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
-
appropriateness of using or redistributing the Work and assume any
|
| 151 |
-
risks associated with Your exercise of permissions under this License.
|
| 152 |
-
|
| 153 |
-
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
-
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
-
unless required by applicable law (such as deliberate and grossly
|
| 156 |
-
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
-
liable to You for damages, including any direct, indirect, special,
|
| 158 |
-
incidental, or consequential damages of any character arising as a
|
| 159 |
-
result of this License or out of the use or inability to use the
|
| 160 |
-
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
-
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
-
other commercial damages or losses), even if such Contributor
|
| 163 |
-
has been advised of the possibility of such damages.
|
| 164 |
-
|
| 165 |
-
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
-
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
-
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
-
or other liability obligations and/or rights consistent with this
|
| 169 |
-
License. However, in accepting such obligations, You may act only
|
| 170 |
-
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
-
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
-
defend, and hold each Contributor harmless for any liability
|
| 173 |
-
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
-
of your accepting any such warranty or additional liability.
|
| 175 |
-
|
| 176 |
-
END OF TERMS AND CONDITIONS
|
| 177 |
-
|
| 178 |
-
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
-
|
| 180 |
-
To apply the Apache License to your work, attach the following
|
| 181 |
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
-
replaced with your own identifying information. (Don't include
|
| 183 |
-
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
-
comment syntax for the file format. We also recommend that a
|
| 185 |
-
file or class name and description of purpose be included on the
|
| 186 |
-
same "printed page" as the copyright notice for easier
|
| 187 |
-
identification within third-party archives.
|
| 188 |
-
|
| 189 |
-
Copyright [yyyy] [name of copyright owner]
|
| 190 |
-
|
| 191 |
-
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
-
you may not use this file except in compliance with the License.
|
| 193 |
-
You may obtain a copy of the License at
|
| 194 |
-
|
| 195 |
-
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
-
|
| 197 |
-
Unless required by applicable law or agreed to in writing, software
|
| 198 |
-
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
-
See the License for the specific language governing permissions and
|
| 201 |
-
limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/NOTICE.md
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
# NOTICE
|
| 2 |
-
|
| 3 |
-
Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## Aviso de Propriedade Intelectual e Licenciamento
|
| 8 |
-
|
| 9 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 10 |
-
|
| 11 |
-
O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
|
| 12 |
-
|
| 13 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
|
| 14 |
-
|
| 15 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 16 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 17 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 18 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 19 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 20 |
-
|
| 21 |
-
### **Reconhecimento e Implicações (EM PORTUGUÊS):**
|
| 22 |
-
|
| 23 |
-
Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
|
| 24 |
-
|
| 25 |
-
1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
|
| 26 |
-
2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
|
| 27 |
-
3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
|
| 28 |
-
|
| 29 |
-
---
|
| 30 |
-
|
| 31 |
-
### **Patent Pending (IN ENGLISH):**
|
| 32 |
-
|
| 33 |
-
The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
|
| 34 |
-
|
| 35 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 36 |
-
|
| 37 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 38 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 39 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 40 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 41 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 42 |
-
|
| 43 |
-
### **Acknowledgement and Implications (IN ENGLISH):**
|
| 44 |
-
|
| 45 |
-
By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
|
| 46 |
-
|
| 47 |
-
1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
|
| 48 |
-
2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
|
| 49 |
-
3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
|
| 50 |
-
|
| 51 |
-
---
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
**Contato para Consultas:**
|
| 55 |
-
|
| 56 |
-
Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
|
| 57 |
-
|
| 58 |
-
Carlos Rodrigues dos Santos
|
| 59 |
-
carlex22@gmail.com
|
| 60 |
-
Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/README.md
DELETED
|
@@ -1,156 +0,0 @@
|
|
| 1 |
-
# 🛠️ managers/ - Ferramentas de IA de Terceiros para orquestração ADUC-SDR
|
| 2 |
-
|
| 3 |
-
Esta pasta contém implementações adaptadas de modelos e utilitários de IA de terceiros, que servem como "especialistas" ou "ferramentas" de baixo nível para a arquitetura ADUC-SDR.
|
| 4 |
-
|
| 5 |
-
**IMPORTANTE:** O conteúdo desta pasta é de autoria de seus respectivos idealizadores e desenvolvedores originais. Esta pasta **NÃO FAZ PARTE** do projeto principal ADUC-SDR em termos de sua arquitetura inovadora. Ela serve como um repositório para as **dependências diretas e modificadas** que os `Deformes enginers` (os estágios do "foguete" ADUC-SDR) invocam para realizar tarefas específicas (geração de imagem, vídeo, áudio).
|
| 6 |
-
|
| 7 |
-
As modificações realizadas nos arquivos aqui presentes visam principalmente:
|
| 8 |
-
1. **Adaptação de Interfaces:** Padronizar as interfaces para que se encaixem no fluxo de orquestração do ADUC-SDR.
|
| 9 |
-
2. **Gerenciamento de Recursos:** Integrar lógicas de carregamento/descarregamento de modelos (GPU management) e configurações via arquivos YAML.
|
| 10 |
-
3. **Otimização de Fluxo:** Ajustar as pipelines para aceitar formatos de entrada mais eficientes (ex: tensores pré-codificados em vez de caminhos de mídia, pulando etapas de codificação/decodificação redundantes).
|
| 11 |
-
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
-
## 📄 Licenciamento
|
| 15 |
-
|
| 16 |
-
O conteúdo original dos projetos listados abaixo é licenciado sob a **Licença Apache 2.0**, ou outra licença especificada pelos autores originais. Todas as modificações e o uso desses arquivos dentro da estrutura `helpers/` do projeto ADUC-SDR estão em conformidade com os termos da **Licença Apache 2.0**.
|
| 17 |
-
|
| 18 |
-
As licenças originais dos projetos podem ser encontradas nas suas respectivas fontes ou nos subdiretórios `incl_licenses/` dentro de cada módulo adaptado.
|
| 19 |
-
|
| 20 |
-
---
|
| 21 |
-
|
| 22 |
-
## 🛠️ API dos Helpers e Guia de Uso
|
| 23 |
-
|
| 24 |
-
Esta seção detalha como cada helper (agente especialista) deve ser utilizado dentro do ecossistema ADUC-SDR. Todos os agentes são instanciados como **singletons** no `hardware_manager.py` para garantir o gerenciamento centralizado de recursos de GPU.
|
| 25 |
-
|
| 26 |
-
### **gemini_helpers.py (GeminiAgent)**
|
| 27 |
-
|
| 28 |
-
* **Propósito:** Atua como o "Oráculo de Síntese Adaptativo", responsável por todas as tarefas de processamento de linguagem natural, como criação de storyboards, geração de prompts, e tomada de decisões narrativas.
|
| 29 |
-
* **Singleton Instance:** `gemini_agent_singleton`
|
| 30 |
-
* **Construtor:** `GeminiAgent()`
|
| 31 |
-
* Lê `configs/gemini_config.yaml` para obter o nome do modelo, parâmetros de inferência e caminhos de templates de prompt. A chave da API é lida da variável de ambiente `GEMINI_API_KEY`.
|
| 32 |
-
* **Métodos Públicos:**
|
| 33 |
-
* `generate_storyboard(prompt: str, num_keyframes: int, ref_image_paths: list[str])`
|
| 34 |
-
* **Inputs:**
|
| 35 |
-
* `prompt`: A ideia geral do filme (string).
|
| 36 |
-
* `num_keyframes`: O número de cenas a serem geradas (int).
|
| 37 |
-
* `ref_image_paths`: Lista de caminhos para as imagens de referência (list[str]).
|
| 38 |
-
* **Output:** `tuple[list[str], str]` (Uma tupla contendo a lista de strings do storyboard e um relatório textual da operação).
|
| 39 |
-
* `select_keyframes_from_pool(storyboard: list, base_image_paths: list[str], pool_image_paths: list[str])`
|
| 40 |
-
* **Inputs:**
|
| 41 |
-
* `storyboard`: A lista de strings do storyboard gerado.
|
| 42 |
-
* `base_image_paths`: Imagens de referência base (list[str]).
|
| 43 |
-
* `pool_image_paths`: O "banco de imagens" de onde selecionar (list[str]).
|
| 44 |
-
* **Output:** `tuple[list[str], str]` (Uma tupla contendo a lista de caminhos de imagens selecionadas e um relatório textual).
|
| 45 |
-
* `get_anticipatory_keyframe_prompt(...)`
|
| 46 |
-
* **Inputs:** Contexto narrativo e visual para gerar um prompt de imagem.
|
| 47 |
-
* **Output:** `tuple[str, str]` (Uma tupla contendo o prompt gerado para o modelo de imagem e um relatório textual).
|
| 48 |
-
* `get_initial_motion_prompt(...)`
|
| 49 |
-
* **Inputs:** Contexto narrativo e visual para a primeira transição de vídeo.
|
| 50 |
-
* **Output:** `tuple[str, str]` (Uma tupla contendo o prompt de movimento gerado e um relatório textual).
|
| 51 |
-
* `get_transition_decision(...)`
|
| 52 |
-
* **Inputs:** Contexto narrativo e visual para uma transição de vídeo intermediária.
|
| 53 |
-
* **Output:** `tuple[dict, str]` (Uma tupla contendo um dicionário `{"transition_type": "...", "motion_prompt": "..."}` e um relatório textual).
|
| 54 |
-
* `generate_audio_prompts(...)`
|
| 55 |
-
* **Inputs:** Contexto narrativo global.
|
| 56 |
-
* **Output:** `tuple[dict, str]` (Uma tupla contendo um dicionário `{"music_prompt": "...", "sfx_prompt": "..."}` e um relatório textual).
|
| 57 |
-
|
| 58 |
-
### **flux_kontext_helpers.py (FluxPoolManager)**
|
| 59 |
-
|
| 60 |
-
* **Propósito:** Especialista em geração de imagens de alta qualidade (keyframes) usando a pipeline FluxKontext. Gerencia um pool de workers para otimizar o uso de múltiplas GPUs.
|
| 61 |
-
* **Singleton Instance:** `flux_kontext_singleton`
|
| 62 |
-
* **Construtor:** `FluxPoolManager(device_ids: list[str], flux_config_file: str)`
|
| 63 |
-
* Lê `configs/flux_config.yaml`.
|
| 64 |
-
* **Método Público:**
|
| 65 |
-
* `generate_image(prompt: str, reference_images: list[Image.Image], width: int, height: int, seed: int = 42, callback: callable = None)`
|
| 66 |
-
* **Inputs:**
|
| 67 |
-
* `prompt`: Prompt textual para guiar a geração (string).
|
| 68 |
-
* `reference_images`: Lista de objetos `PIL.Image` como referência visual.
|
| 69 |
-
* `width`, `height`: Dimensões da imagem de saída (int).
|
| 70 |
-
* `seed`: Semente para reprodutibilidade (int).
|
| 71 |
-
* `callback`: Função de callback opcional para monitorar o progresso.
|
| 72 |
-
* **Output:** `PIL.Image.Image` (O objeto da imagem gerada).
|
| 73 |
-
|
| 74 |
-
### **dreamo_helpers.py (DreamOAgent)**
|
| 75 |
-
|
| 76 |
-
* **Propósito:** Especialista em geração de imagens de alta qualidade (keyframes) usando a pipeline DreamO, com capacidades avançadas de edição e estilo a partir de referências.
|
| 77 |
-
* **Singleton Instance:** `dreamo_agent_singleton`
|
| 78 |
-
* **Construtor:** `DreamOAgent(device_id: str = None)`
|
| 79 |
-
* Lê `configs/dreamo_config.yaml`.
|
| 80 |
-
* **Método Público:**
|
| 81 |
-
* `generate_image(prompt: str, reference_images: list[Image.Image], width: int, height: int)`
|
| 82 |
-
* **Inputs:**
|
| 83 |
-
* `prompt`: Prompt textual para guiar a geração (string).
|
| 84 |
-
* `reference_images`: Lista de objetos `PIL.Image` como referência visual. A lógica interna atribui a primeira imagem como `style` e as demais como `ip`.
|
| 85 |
-
* `width`, `height`: Dimensões da imagem de saída (int).
|
| 86 |
-
* **Output:** `PIL.Image.Image` (O objeto da imagem gerada).
|
| 87 |
-
|
| 88 |
-
### **ltx_manager_helpers.py (LtxPoolManager)**
|
| 89 |
-
|
| 90 |
-
* **Propósito:** Especialista na geração de fragmentos de vídeo no espaço latente usando a pipeline LTX-Video. Gerencia um pool de workers para otimizar o uso de múltiplas GPUs.
|
| 91 |
-
* **Singleton Instance:** `ltx_manager_singleton`
|
| 92 |
-
* **Construtor:** `LtxPoolManager(device_ids: list[str], ltx_model_config_file: str, ltx_global_config_file: str)`
|
| 93 |
-
* Lê o `ltx_global_config_file` e o `ltx_model_config_file` para configurar a pipeline.
|
| 94 |
-
* **Método Público:**
|
| 95 |
-
* `generate_latent_fragment(**kwargs)`
|
| 96 |
-
* **Inputs:** Dicionário de keyword arguments (`kwargs`) contendo todos os parâmetros da pipeline LTX, incluindo:
|
| 97 |
-
* `height`, `width`: Dimensões do vídeo (int).
|
| 98 |
-
* `video_total_frames`: Número total de frames a serem gerados (int).
|
| 99 |
-
* `video_fps`: Frames por segundo (int).
|
| 100 |
-
* `motion_prompt`: Prompt de movimento (string).
|
| 101 |
-
* `conditioning_items_data`: Lista de objetos `LatentConditioningItem` contendo os tensores latentes de condição.
|
| 102 |
-
* `guidance_scale`, `stg_scale`, `num_inference_steps`, etc.
|
| 103 |
-
* **Output:** `tuple[torch.Tensor, tuple]` (Uma tupla contendo o tensor latente gerado e os valores de padding utilizados).
|
| 104 |
-
|
| 105 |
-
### **mmaudio_helper.py (MMAudioAgent)**
|
| 106 |
-
|
| 107 |
-
* **Propósito:** Especialista em geração de áudio para um determinado fragmento de vídeo.
|
| 108 |
-
* **Singleton Instance:** `mmaudio_agent_singleton`
|
| 109 |
-
* **Construtor:** `MMAudioAgent(workspace_dir: str, device_id: str = None, mmaudio_config_file: str)`
|
| 110 |
-
* Lê `configs/mmaudio_config.yaml`.
|
| 111 |
-
* **Método Público:**
|
| 112 |
-
* `generate_audio_for_video(video_path: str, prompt: str, negative_prompt: str, duration_seconds: float)`
|
| 113 |
-
* **Inputs:**
|
| 114 |
-
* `video_path`: Caminho para o arquivo de vídeo silencioso (string).
|
| 115 |
-
* `prompt`: Prompt textual para guiar a geração de áudio (string).
|
| 116 |
-
* `negative_prompt`: Prompt negativo para áudio (string).
|
| 117 |
-
* `duration_seconds`: Duração exata do vídeo (float).
|
| 118 |
-
* **Output:** `str` (O caminho para o novo arquivo de vídeo com a faixa de áudio integrada).
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
### **seedvr_helpers.py (SeedVrManager)**
|
| 122 |
-
|
| 123 |
-
* **Propósito:** Especialista em pós-produção de vídeo, aplicando super-resolução com IA (`Video Super-Resolution`) para adicionar detalhes finos, nitidez e texturas realistas a um vídeo já renderizado.
|
| 124 |
-
* **Singleton Instance:** `seedvr_manager_singleton`
|
| 125 |
-
* **Construtor:** `SeedVrManager(workspace_dir: str, device_id: str = None)`
|
| 126 |
-
* Lê `configs/seedvr_config.yaml`.
|
| 127 |
-
* **Método Público:**
|
| 128 |
-
* `process_video(input_video_path: str, output_video_path: str, prompt: str, model_version: str = '7B', steps: int = 100, seed: int = 666)`
|
| 129 |
-
* **Inputs:**
|
| 130 |
-
* `input_video_path`: Caminho para o vídeo de entrada a ser aprimorado (string).
|
| 131 |
-
* `output_video_path`: Caminho onde o vídeo finalizado será salvo (string).
|
| 132 |
-
* `prompt`: Um prompt de estilo geral para guiar o aprimoramento (string).
|
| 133 |
-
* `model_version`: A versão do modelo a ser usada, '3B' ou '7B' (string).
|
| 134 |
-
* `steps`: Número de passos de inferência para o processo de aprimoramento (int).
|
| 135 |
-
* `seed`: Semente para reprodutibilidade (int).
|
| 136 |
-
* **Output:** `str` (O caminho para o vídeo finalizado em alta definição).
|
| 137 |
-
|
| 138 |
-
---
|
| 139 |
-
|
| 140 |
-
## 🔗 Projetos Originais e Atribuições
|
| 141 |
-
(A seção de atribuições e licenças permanece a mesma que definimos anteriormente)
|
| 142 |
-
|
| 143 |
-
### DreamO
|
| 144 |
-
* **Repositório Original:** [https://github.com/bytedance/DreamO](https://github.com/bytedance/DreamO)
|
| 145 |
-
...
|
| 146 |
-
|
| 147 |
-
### LTX-Video
|
| 148 |
-
* **Repositório Original:** [https://github.com/Lightricks/LTX-Video](https://github.com/Lightricks/LTX-Video)
|
| 149 |
-
...
|
| 150 |
-
|
| 151 |
-
### MMAudio
|
| 152 |
-
* **Repositório Original:** [https://github.com/hkchengrex/MMAudio](https://github.com/hkchengrex/MMAudio)
|
| 153 |
-
...
|
| 154 |
-
|
| 155 |
-
### SeedVr
|
| 156 |
-
* **Repositório Original:** [https://github.com/ByteDance-Seed/SeedVR](https://github.com/ByteDance-Seed/SeedVR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/__init__.py
DELETED
|
File without changes
|
managers/config.yaml
DELETED
|
@@ -1,24 +0,0 @@
|
|
| 1 |
-
# config.yaml
|
| 2 |
-
# Configuração central para a aplicação Deformes4D e seus especialistas.
|
| 3 |
-
|
| 4 |
-
application:
|
| 5 |
-
workspace_dir: "deformes_workspace"
|
| 6 |
-
|
| 7 |
-
# Configuração para Hugging Face Spaces
|
| 8 |
-
sdk: gradio
|
| 9 |
-
app_file: app.py
|
| 10 |
-
|
| 11 |
-
specialists:
|
| 12 |
-
flux:
|
| 13 |
-
# Define quantas GPUs o pool do Flux deve tentar alocar.
|
| 14 |
-
# Se não houver GPUs suficientes, o hardware_manager lançará um erro.
|
| 15 |
-
# Se 0, usará a CPU.
|
| 16 |
-
gpus_required: 4
|
| 17 |
-
|
| 18 |
-
ltx:
|
| 19 |
-
# Define quantas GPUs o pool do LTX deve tentar alocar.
|
| 20 |
-
gpus_required: 4
|
| 21 |
-
|
| 22 |
-
# Aponta para o arquivo de configuração específico do modelo LTX.
|
| 23 |
-
# Alterado para usar o modelo 0.9.8-dev.
|
| 24 |
-
config_file: "ltxv-13b-0.9.8-distilled.yaml"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/flux_kontext_manager.py
DELETED
|
@@ -1,165 +0,0 @@
|
|
| 1 |
-
# flux_kontext_helpers.py (ADUC: O Especialista Pintor - com suporte a callback)
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# Repositórios e Projetos Relacionados:
|
| 11 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 12 |
-
#
|
| 13 |
-
#
|
| 14 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 15 |
-
#
|
| 16 |
-
# Version 1.0.1
|
| 17 |
-
|
| 18 |
-
import torch
|
| 19 |
-
from PIL import Image, ImageOps
|
| 20 |
-
import gc
|
| 21 |
-
from diffusers import FluxKontextPipeline
|
| 22 |
-
import huggingface_hub
|
| 23 |
-
import os
|
| 24 |
-
import threading
|
| 25 |
-
import yaml
|
| 26 |
-
import logging
|
| 27 |
-
|
| 28 |
-
from tools.hardware_manager import hardware_manager
|
| 29 |
-
|
| 30 |
-
logger = logging.getLogger(__name__)
|
| 31 |
-
|
| 32 |
-
class FluxWorker:
|
| 33 |
-
"""Representa uma única instância do pipeline FluxKontext em um dispositivo."""
|
| 34 |
-
def __init__(self, device_id='cuda:0'):
|
| 35 |
-
self.cpu_device = torch.device('cpu')
|
| 36 |
-
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
|
| 37 |
-
self.pipe = None
|
| 38 |
-
self._load_pipe_to_cpu()
|
| 39 |
-
|
| 40 |
-
def _load_pipe_to_cpu(self):
|
| 41 |
-
if self.pipe is None:
|
| 42 |
-
logger.info(f"FLUX Worker ({self.device}): Carregando modelo para a CPU...")
|
| 43 |
-
self.pipe = FluxKontextPipeline.from_pretrained(
|
| 44 |
-
"black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
|
| 45 |
-
).to(self.cpu_device)
|
| 46 |
-
logger.info(f"FLUX Worker ({self.device}): Modelo pronto na CPU.")
|
| 47 |
-
|
| 48 |
-
def to_gpu(self):
|
| 49 |
-
if self.device.type == 'cpu': return
|
| 50 |
-
logger.info(f"FLUX Worker: Movendo modelo para a GPU {self.device}...")
|
| 51 |
-
self.pipe.to(self.device)
|
| 52 |
-
|
| 53 |
-
def to_cpu(self):
|
| 54 |
-
if self.device.type == 'cpu': return
|
| 55 |
-
logger.info(f"FLUX Worker: Descarregando modelo da GPU {self.device}...")
|
| 56 |
-
self.pipe.to(self.cpu_device)
|
| 57 |
-
gc.collect()
|
| 58 |
-
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
| 59 |
-
|
| 60 |
-
def _create_composite_reference(self, images: list[Image.Image], target_width: int, target_height: int) -> Image.Image:
|
| 61 |
-
if not images: return None
|
| 62 |
-
valid_images = [img.convert("RGB") for img in images if img is not None]
|
| 63 |
-
if not valid_images: return None
|
| 64 |
-
if len(valid_images) == 1:
|
| 65 |
-
if valid_images[0].size != (target_width, target_height):
|
| 66 |
-
return ImageOps.fit(valid_images[0], (target_width, target_height), Image.Resampling.LANCZOS)
|
| 67 |
-
return valid_images[0]
|
| 68 |
-
|
| 69 |
-
base_height = valid_images[0].height
|
| 70 |
-
resized_for_concat = []
|
| 71 |
-
for img in valid_images:
|
| 72 |
-
if img.height != base_height:
|
| 73 |
-
aspect_ratio = img.width / img.height
|
| 74 |
-
new_width = int(base_height * aspect_ratio)
|
| 75 |
-
resized_for_concat.append(img.resize((new_width, base_height), Image.Resampling.LANCZOS))
|
| 76 |
-
else:
|
| 77 |
-
resized_for_concat.append(img)
|
| 78 |
-
|
| 79 |
-
total_width = sum(img.width for img in resized_for_concat)
|
| 80 |
-
concatenated = Image.new('RGB', (total_width, base_height))
|
| 81 |
-
x_offset = 0
|
| 82 |
-
for img in resized_for_concat:
|
| 83 |
-
concatenated.paste(img, (x_offset, 0))
|
| 84 |
-
x_offset += img.width
|
| 85 |
-
|
| 86 |
-
#final_reference = ImageOps.fit(concatenated, (target_width, target_height), Image.Resampling.LANCZOS)
|
| 87 |
-
return concatenated
|
| 88 |
-
|
| 89 |
-
@torch.inference_mode()
|
| 90 |
-
def generate_image_internal(self, reference_images: list[Image.Image], prompt: str, target_width: int, target_height: int, seed: int, callback: callable = None):
|
| 91 |
-
composite_reference = self._create_composite_reference(reference_images, target_width, target_height)
|
| 92 |
-
|
| 93 |
-
num_steps = 12 # Valor fixo otimizado
|
| 94 |
-
|
| 95 |
-
logger.info(f"\n===== [CHAMADA AO PIPELINE FLUX em {self.device}] =====\n"
|
| 96 |
-
f" - Prompt: '{prompt}'\n"
|
| 97 |
-
f" - Resolução: {target_width}x{target_height}, Seed: {seed}, Passos: {num_steps}\n"
|
| 98 |
-
f" - Nº de Imagens na Composição: {len(reference_images)}\n"
|
| 99 |
-
f"==========================================")
|
| 100 |
-
|
| 101 |
-
generated_image = self.pipe(
|
| 102 |
-
image=composite_reference,
|
| 103 |
-
prompt=prompt,
|
| 104 |
-
guidance_scale=2.5,
|
| 105 |
-
width=target_width,
|
| 106 |
-
height=target_height,
|
| 107 |
-
num_inference_steps=num_steps,
|
| 108 |
-
generator=torch.Generator(device="cpu").manual_seed(seed),
|
| 109 |
-
callback_on_step_end=callback,
|
| 110 |
-
callback_on_step_end_tensor_inputs=["latents"] if callback else None
|
| 111 |
-
).images[0]
|
| 112 |
-
|
| 113 |
-
return generated_image
|
| 114 |
-
|
| 115 |
-
class FluxPoolManager:
|
| 116 |
-
def __init__(self, device_ids):
|
| 117 |
-
logger.info(f"FLUX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
|
| 118 |
-
self.workers = [FluxWorker(device_id) for device_id in device_ids]
|
| 119 |
-
self.current_worker_index = 0
|
| 120 |
-
self.lock = threading.Lock()
|
| 121 |
-
self.last_cleanup_thread = None
|
| 122 |
-
|
| 123 |
-
def _cleanup_worker_thread(self, worker):
|
| 124 |
-
logger.info(f"FLUX CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...")
|
| 125 |
-
worker.to_cpu()
|
| 126 |
-
|
| 127 |
-
def generate_image(self, reference_images, prompt, width, height, seed=42, callback=None):
|
| 128 |
-
worker_to_use = None
|
| 129 |
-
try:
|
| 130 |
-
with self.lock:
|
| 131 |
-
if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
|
| 132 |
-
self.last_cleanup_thread.join()
|
| 133 |
-
worker_to_use = self.workers[self.current_worker_index]
|
| 134 |
-
previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers)
|
| 135 |
-
worker_to_cleanup = self.workers[previous_worker_index]
|
| 136 |
-
cleanup_thread = threading.Thread(target=self._cleanup_worker_thread, args=(worker_to_cleanup,))
|
| 137 |
-
cleanup_thread.start()
|
| 138 |
-
self.last_cleanup_thread = cleanup_thread
|
| 139 |
-
worker_to_use.to_gpu()
|
| 140 |
-
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
|
| 141 |
-
|
| 142 |
-
logger.info(f"FLUX POOL MANAGER: Gerando imagem em {worker_to_use.device}...")
|
| 143 |
-
return worker_to_use.generate_image_internal(
|
| 144 |
-
reference_images=reference_images,
|
| 145 |
-
prompt=prompt,
|
| 146 |
-
target_width=width,
|
| 147 |
-
target_height=height,
|
| 148 |
-
seed=seed,
|
| 149 |
-
callback=callback
|
| 150 |
-
)
|
| 151 |
-
except Exception as e:
|
| 152 |
-
logger.error(f"FLUX POOL MANAGER: Erro durante a geração: {e}", exc_info=True)
|
| 153 |
-
raise e
|
| 154 |
-
finally:
|
| 155 |
-
pass
|
| 156 |
-
|
| 157 |
-
# --- Instanciação Singleton Dinâmica ---
|
| 158 |
-
logger.info("Lendo config.yaml para inicializar o FluxKontext Pool Manager...")
|
| 159 |
-
with open("config.yaml", 'r') as f: config = yaml.safe_load(f)
|
| 160 |
-
hf_token = os.getenv('HF_TOKEN');
|
| 161 |
-
if hf_token: huggingface_hub.login(token=hf_token)
|
| 162 |
-
flux_gpus_required = config['specialists']['flux']['gpus_required']
|
| 163 |
-
flux_device_ids = hardware_manager.allocate_gpus('Flux', flux_gpus_required)
|
| 164 |
-
flux_kontext_singleton = FluxPoolManager(device_ids=flux_device_ids)
|
| 165 |
-
logger.info("Especialista de Imagem (Flux) pronto.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/gemini_manager.py
DELETED
|
@@ -1,119 +0,0 @@
|
|
| 1 |
-
# managers/gemini_manager.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# Repositórios e Projetos Relacionados:
|
| 11 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 12 |
-
#
|
| 13 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 14 |
-
#
|
| 15 |
-
# Version: 1.1.1
|
| 16 |
-
#
|
| 17 |
-
# This file defines the GeminiManager, a specialist responsible for raw communication
|
| 18 |
-
# with the Google Gemini API. It acts as a lean API client, handling requests,
|
| 19 |
-
# parsing responses, and managing API-level errors. It does not contain any
|
| 20 |
-
# high-level prompt engineering or creative logic.
|
| 21 |
-
|
| 22 |
-
import os
|
| 23 |
-
import logging
|
| 24 |
-
import json
|
| 25 |
-
from pathlib import Path
|
| 26 |
-
import gradio as gr
|
| 27 |
-
from PIL import Image
|
| 28 |
-
import google.generativeai as genai
|
| 29 |
-
import re
|
| 30 |
-
from typing import List, Union, Any
|
| 31 |
-
|
| 32 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 33 |
-
logger = logging.getLogger(__name__)
|
| 34 |
-
|
| 35 |
-
def robust_json_parser(raw_text: str) -> dict:
|
| 36 |
-
"""
|
| 37 |
-
Parses a JSON object from a string that might contain extra text,
|
| 38 |
-
such as Markdown code blocks from an LLM's response.
|
| 39 |
-
"""
|
| 40 |
-
clean_text = raw_text.strip()
|
| 41 |
-
try:
|
| 42 |
-
match = re.search(r'```json\s*(\{.*?\})\s*```', clean_text, re.DOTALL)
|
| 43 |
-
if match:
|
| 44 |
-
json_str = match.group(1)
|
| 45 |
-
return json.loads(json_str)
|
| 46 |
-
|
| 47 |
-
start_index = clean_text.find('{')
|
| 48 |
-
end_index = clean_text.rfind('}')
|
| 49 |
-
if start_index != -1 and end_index != -1 and end_index > start_index:
|
| 50 |
-
json_str = clean_text[start_index : end_index + 1]
|
| 51 |
-
return json.loads(json_str)
|
| 52 |
-
else:
|
| 53 |
-
raise ValueError("No valid JSON object could be found in the AI's response.")
|
| 54 |
-
except json.JSONDecodeError as e:
|
| 55 |
-
logger.error(f"Failed to decode JSON. The AI returned the following text:\n---\n{raw_text}\n---")
|
| 56 |
-
raise ValueError(f"The AI returned an invalid JSON format: {e}")
|
| 57 |
-
|
| 58 |
-
class GeminiManager:
|
| 59 |
-
"""
|
| 60 |
-
Manages raw interactions with the Google Gemini API.
|
| 61 |
-
"""
|
| 62 |
-
def __init__(self):
|
| 63 |
-
self.api_key = os.environ.get("GEMINI_API_KEY")
|
| 64 |
-
if self.api_key:
|
| 65 |
-
genai.configure(api_key=self.api_key)
|
| 66 |
-
self.model = genai.GenerativeModel('gemini-2.5-flash')
|
| 67 |
-
logger.info("GeminiManager (Communication Layer) initialized successfully.")
|
| 68 |
-
else:
|
| 69 |
-
self.model = None
|
| 70 |
-
logger.warning("Gemini API key not found. GeminiManager disabled.")
|
| 71 |
-
|
| 72 |
-
def _check_model(self):
|
| 73 |
-
"""Raises an error if the Gemini API is not configured."""
|
| 74 |
-
if not self.model:
|
| 75 |
-
raise gr.Error("The Google Gemini API key is not configured (GEMINI_API_KEY).")
|
| 76 |
-
|
| 77 |
-
def _generate_content(self, prompt_parts: List[Any]) -> str:
|
| 78 |
-
"""Internal method to make the API call."""
|
| 79 |
-
self._check_model()
|
| 80 |
-
logger.info("Calling Gemini API...")
|
| 81 |
-
response = self.model.generate_content(prompt_parts)
|
| 82 |
-
logger.info(f"Gemini responded with raw text: {response.text}")
|
| 83 |
-
return response.text
|
| 84 |
-
|
| 85 |
-
def get_raw_text(self, prompt_parts: List[Any]) -> str:
|
| 86 |
-
"""
|
| 87 |
-
Sends a prompt to the Gemini API and returns the raw text response.
|
| 88 |
-
|
| 89 |
-
Args:
|
| 90 |
-
prompt_parts (List[Any]): A list containing strings and/or PIL.Image objects.
|
| 91 |
-
|
| 92 |
-
Returns:
|
| 93 |
-
str: The raw string response from the API.
|
| 94 |
-
"""
|
| 95 |
-
try:
|
| 96 |
-
return self._generate_content(prompt_parts)
|
| 97 |
-
except Exception as e:
|
| 98 |
-
logger.error(f"Gemini API call failed: {e}", exc_info=True)
|
| 99 |
-
raise gr.Error(f"Gemini API communication failed: {e}")
|
| 100 |
-
|
| 101 |
-
def get_json_object(self, prompt_parts: List[Any]) -> dict:
|
| 102 |
-
"""
|
| 103 |
-
Sends a prompt to the Gemini API, expects a JSON response, parses it, and returns a dictionary.
|
| 104 |
-
|
| 105 |
-
Args:
|
| 106 |
-
prompt_parts (List[Any]): A list containing strings and/or PIL.Image objects.
|
| 107 |
-
|
| 108 |
-
Returns:
|
| 109 |
-
dict: The parsed JSON object from the API response.
|
| 110 |
-
"""
|
| 111 |
-
try:
|
| 112 |
-
raw_response = self._generate_content(prompt_parts)
|
| 113 |
-
return robust_json_parser(raw_response)
|
| 114 |
-
except Exception as e:
|
| 115 |
-
logger.error(f"Gemini API call or JSON parsing failed: {e}", exc_info=True)
|
| 116 |
-
raise gr.Error(f"Gemini API communication or response parsing failed: {e}")
|
| 117 |
-
|
| 118 |
-
# --- Singleton Instance ---
|
| 119 |
-
gemini_manager_singleton = GeminiManager()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/latent_enhancer_manager.py
DELETED
|
@@ -1,109 +0,0 @@
|
|
| 1 |
-
# latent_enhancer_specialist.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# Repositórios e Projetos Relacionados:
|
| 11 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 12 |
-
#
|
| 13 |
-
#
|
| 14 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 15 |
-
#
|
| 16 |
-
# Version 1.0.1
|
| 17 |
-
|
| 18 |
-
import torch
|
| 19 |
-
import logging
|
| 20 |
-
import time
|
| 21 |
-
from diffusers import LTXLatentUpsamplePipeline
|
| 22 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 23 |
-
|
| 24 |
-
logger = logging.getLogger(__name__)
|
| 25 |
-
|
| 26 |
-
class LatentEnhancerSpecialist:
|
| 27 |
-
"""
|
| 28 |
-
Especialista responsável por melhorar a qualidade de tensores latentes,
|
| 29 |
-
incluindo upscaling espacial e refinamento por denoise.
|
| 30 |
-
"""
|
| 31 |
-
def __init__(self):
|
| 32 |
-
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 33 |
-
self.pipe_upsample = None
|
| 34 |
-
self.base_vae = None # VAE para o upscaler
|
| 35 |
-
|
| 36 |
-
def _lazy_init_upscaler(self):
|
| 37 |
-
"""Inicializa a pipeline de upscaling apenas quando for usada."""
|
| 38 |
-
if self.pipe_upsample is not None:
|
| 39 |
-
return
|
| 40 |
-
try:
|
| 41 |
-
from diffusers.models.autoencoders import AutoencoderKLLTXVideo
|
| 42 |
-
self.base_vae = AutoencoderKLLTXVideo.from_pretrained(
|
| 43 |
-
"linoyts/LTX-Video-spatial-upscaler-0.9.8",
|
| 44 |
-
subfolder="vae",
|
| 45 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
| 46 |
-
).to(self.device)
|
| 47 |
-
|
| 48 |
-
self.pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
|
| 49 |
-
"linoyts/LTX-Video-spatial-upscaler-0.9.8",
|
| 50 |
-
vae=self.base_vae,
|
| 51 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
| 52 |
-
).to(self.device)
|
| 53 |
-
logger.info("[Enhancer] Pipeline de Upscale carregada com sucesso.")
|
| 54 |
-
except Exception as e:
|
| 55 |
-
logger.error(f"[Enhancer] Falha ao carregar pipeline de Upscale: {e}")
|
| 56 |
-
self.pipe_upsample = None
|
| 57 |
-
|
| 58 |
-
@torch.no_grad()
|
| 59 |
-
def upscale(self, latents: torch.Tensor) -> torch.Tensor:
|
| 60 |
-
"""Aplica o upscaling 2x nos tensores latentes fornecidos."""
|
| 61 |
-
self._lazy_init_upscaler()
|
| 62 |
-
if self.pipe_upsample is None:
|
| 63 |
-
logger.warning("[Enhancer] Pipeline de Upscale indisponível. Retornando latentes originais.")
|
| 64 |
-
return latents
|
| 65 |
-
try:
|
| 66 |
-
logger.info(f"[Enhancer] Recebido shape {latents.shape} para Upscale.")
|
| 67 |
-
result = self.pipe_upsample(latents=latents, output_type="latent")
|
| 68 |
-
output_tensor = result.frames
|
| 69 |
-
logger.info(f"[Enhancer] Upscale concluído. Novo shape: {output_tensor.shape}")
|
| 70 |
-
return output_tensor
|
| 71 |
-
except Exception as e:
|
| 72 |
-
logger.error(f"[Enhancer] Erro durante upscale: {e}", exc_info=True)
|
| 73 |
-
return latents
|
| 74 |
-
|
| 75 |
-
@torch.no_grad()
|
| 76 |
-
def refine(self, latents: torch.Tensor, fps: int = 24, **kwargs) -> torch.Tensor:
|
| 77 |
-
"""
|
| 78 |
-
Invoca o LTX Pool Manager para refinar um tensor latente existente.
|
| 79 |
-
"""
|
| 80 |
-
logger.info(f"[Enhancer] Refinando tensor latente com shape {latents.shape}.")
|
| 81 |
-
|
| 82 |
-
main_pipeline_vae = ltx_manager_singleton.workers[0].pipeline.vae
|
| 83 |
-
video_scale_factor = getattr(main_pipeline_vae.config, 'temporal_scale_factor', 8)
|
| 84 |
-
|
| 85 |
-
_, _, num_latent_frames, _, _ = latents.shape
|
| 86 |
-
|
| 87 |
-
# --- [CORREÇÃO FINAL E CRÍTICA] ---
|
| 88 |
-
# A pipeline de refinamento (vid2vid) espera o número de frames de pixels que CORRESPONDE
|
| 89 |
-
# ao latente existente, SEM a lógica do +1 que ela aplicará internamente.
|
| 90 |
-
pixel_frames = (num_latent_frames - 1) * video_scale_factor
|
| 91 |
-
|
| 92 |
-
final_ltx_params = {
|
| 93 |
-
"video_total_frames": pixel_frames,
|
| 94 |
-
"video_fps": fps,
|
| 95 |
-
"current_fragment_index": int(time.time()),
|
| 96 |
-
**kwargs
|
| 97 |
-
}
|
| 98 |
-
|
| 99 |
-
refined_latents_tensor, _ = ltx_manager_singleton.refine_latents(latents, **final_ltx_params)
|
| 100 |
-
|
| 101 |
-
if refined_latents_tensor is None:
|
| 102 |
-
logger.warning("[Enhancer] O refinamento falhou. Retornando tensor original não refinado.")
|
| 103 |
-
return latents
|
| 104 |
-
|
| 105 |
-
logger.info(f"[Enhancer] Retornando tensor latente refinado com shape: {refined_latents_tensor.shape}")
|
| 106 |
-
return refined_latents_tensor
|
| 107 |
-
|
| 108 |
-
# --- Singleton Global ---
|
| 109 |
-
latent_enhancer_specialist_singleton = LatentEnhancerSpecialist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/ltx_manager.py
DELETED
|
@@ -1,320 +0,0 @@
|
|
| 1 |
-
# managers/ltx_manager.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 11 |
-
#
|
| 12 |
-
# Version: 2.3.0
|
| 13 |
-
#
|
| 14 |
-
# This version adds a public property `prompt_enhancement_pipeline` to the manager.
|
| 15 |
-
# This allows other specialists, specifically the Deformes3DThinker, to access
|
| 16 |
-
# the internal prompt refinement models (captioning and LLM) used by the LTX pipeline,
|
| 17 |
-
# ensuring stylistic and logical consistency.
|
| 18 |
-
|
| 19 |
-
import torch
|
| 20 |
-
import gc
|
| 21 |
-
import os
|
| 22 |
-
import sys
|
| 23 |
-
import yaml
|
| 24 |
-
import logging
|
| 25 |
-
import huggingface_hub
|
| 26 |
-
import time
|
| 27 |
-
import threading
|
| 28 |
-
import subprocess
|
| 29 |
-
from pathlib import Path
|
| 30 |
-
from typing import Optional, List, Tuple, Union
|
| 31 |
-
|
| 32 |
-
from tools.optimization import optimize_ltx_worker, can_optimize_fp8
|
| 33 |
-
from tools.hardware_manager import hardware_manager
|
| 34 |
-
from aduc_types import LatentConditioningItem
|
| 35 |
-
|
| 36 |
-
logger = logging.getLogger(__name__)
|
| 37 |
-
|
| 38 |
-
# --- Dependency Management ---
|
| 39 |
-
DEPS_DIR = Path("./deps")
|
| 40 |
-
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
|
| 41 |
-
LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
|
| 42 |
-
|
| 43 |
-
# --- Placeholders for lazy-loaded modules ---
|
| 44 |
-
create_ltx_video_pipeline = None
|
| 45 |
-
calculate_padding = None
|
| 46 |
-
LTXVideoPipeline = None
|
| 47 |
-
ConditioningItem = None
|
| 48 |
-
LTXMultiScalePipeline = None
|
| 49 |
-
vae_encode = None
|
| 50 |
-
latent_to_pixel_coords = None
|
| 51 |
-
randn_tensor = None
|
| 52 |
-
|
| 53 |
-
class LtxPoolManager:
|
| 54 |
-
"""
|
| 55 |
-
Manages a pool of LtxWorkers and exposes the enhancement pipeline for other specialists.
|
| 56 |
-
"""
|
| 57 |
-
def __init__(self, device_ids, ltx_config_file_name):
|
| 58 |
-
logger.info(f"LTX POOL MANAGER: Creating workers for devices: {device_ids}")
|
| 59 |
-
self._ltx_modules_loaded = False
|
| 60 |
-
self._setup_dependencies()
|
| 61 |
-
self._lazy_load_ltx_modules()
|
| 62 |
-
|
| 63 |
-
self.ltx_config_file = LTX_VIDEO_REPO_DIR / "configs" / ltx_config_file_name
|
| 64 |
-
|
| 65 |
-
self.workers = [LtxWorker(dev_id, self.ltx_config_file) for dev_id in device_ids]
|
| 66 |
-
self.current_worker_index = 0
|
| 67 |
-
self.lock = threading.Lock()
|
| 68 |
-
|
| 69 |
-
# <--- NOVA PROPRIEDADE PARA O DEFORMES3DTHINKER USAR --->
|
| 70 |
-
# Expõe a pipeline do primeiro worker. Assumimos que todas são configuradas
|
| 71 |
-
# da mesma forma e contêm os mesmos modelos de enhancement.
|
| 72 |
-
self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
|
| 73 |
-
if self.prompt_enhancement_pipeline:
|
| 74 |
-
logger.info("LTX POOL MANAGER: Prompt enhancement pipeline exposed for other specialists.")
|
| 75 |
-
# <--- FIM DA NOVA PROPRIEDADE --->
|
| 76 |
-
|
| 77 |
-
self._apply_ltx_pipeline_patches()
|
| 78 |
-
|
| 79 |
-
if all(w.device.type == 'cuda' for w in self.workers):
|
| 80 |
-
logger.info("LTX POOL MANAGER: HOT START MODE ENABLED. Pre-warming all GPUs...")
|
| 81 |
-
for worker in self.workers:
|
| 82 |
-
worker.to_gpu()
|
| 83 |
-
logger.info("LTX POOL MANAGER: All GPUs are hot and ready.")
|
| 84 |
-
else:
|
| 85 |
-
logger.info("LTX POOL MANAGER: Operating in CPU or mixed mode. GPU pre-warming skipped.")
|
| 86 |
-
|
| 87 |
-
# ... (O resto da classe LtxPoolManager, como _setup_dependencies, generate_latent_fragment, etc., permanece exatamente o mesmo) ...
|
| 88 |
-
|
| 89 |
-
def _setup_dependencies(self):
|
| 90 |
-
"""Clones the LTX-Video repo if not found and adds it to the system path."""
|
| 91 |
-
if not LTX_VIDEO_REPO_DIR.exists():
|
| 92 |
-
logger.info(f"LTX-Video repository not found at '{LTX_VIDEO_REPO_DIR}'. Cloning from GitHub...")
|
| 93 |
-
try:
|
| 94 |
-
DEPS_DIR.mkdir(exist_ok=True)
|
| 95 |
-
subprocess.run(
|
| 96 |
-
["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
|
| 97 |
-
check=True, capture_output=True, text=True
|
| 98 |
-
)
|
| 99 |
-
logger.info("LTX-Video repository cloned successfully.")
|
| 100 |
-
except subprocess.CalledProcessError as e:
|
| 101 |
-
logger.error(f"Failed to clone LTX-Video repository. Git stderr: {e.stderr}")
|
| 102 |
-
raise RuntimeError("Could not clone the required LTX-Video dependency from GitHub.")
|
| 103 |
-
else:
|
| 104 |
-
logger.info("Found local LTX-Video repository.")
|
| 105 |
-
|
| 106 |
-
if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
|
| 107 |
-
sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
|
| 108 |
-
logger.info(f"Added '{LTX_VIDEO_REPO_DIR.resolve()}' to sys.path.")
|
| 109 |
-
|
| 110 |
-
def _lazy_load_ltx_modules(self):
|
| 111 |
-
"""Dynamically imports LTX-Video modules after ensuring the repo exists."""
|
| 112 |
-
if self._ltx_modules_loaded:
|
| 113 |
-
return
|
| 114 |
-
|
| 115 |
-
global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
|
| 116 |
-
global vae_encode, latent_to_pixel_coords, randn_tensor
|
| 117 |
-
|
| 118 |
-
from managers.ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
|
| 119 |
-
from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
|
| 120 |
-
from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
|
| 121 |
-
from diffusers.utils.torch_utils import randn_tensor
|
| 122 |
-
|
| 123 |
-
self._ltx_modules_loaded = True
|
| 124 |
-
logger.info("LTX-Video modules have been dynamically loaded.")
|
| 125 |
-
|
| 126 |
-
def _apply_ltx_pipeline_patches(self):
|
| 127 |
-
"""Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
|
| 128 |
-
logger.info("LTX POOL MANAGER: Applying ADUC-SDR patches to LTX pipeline...")
|
| 129 |
-
for worker in self.workers:
|
| 130 |
-
worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
|
| 131 |
-
logger.info("LTX POOL MANAGER: All pipeline instances have been patched successfully.")
|
| 132 |
-
|
| 133 |
-
def _get_next_worker(self):
|
| 134 |
-
with self.lock:
|
| 135 |
-
worker = self.workers[self.current_worker_index]
|
| 136 |
-
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
|
| 137 |
-
return worker
|
| 138 |
-
|
| 139 |
-
def _prepare_pipeline_params(self, worker: 'LtxWorker', **kwargs) -> dict:
|
| 140 |
-
pipeline_params = {
|
| 141 |
-
"height": kwargs['height'], "width": kwargs['width'], "num_frames": kwargs['video_total_frames'],
|
| 142 |
-
"frame_rate": kwargs.get('video_fps', 24),
|
| 143 |
-
"generator": torch.Generator(device=worker.device).manual_seed(int(time.time()) + kwargs.get('current_fragment_index', 0)),
|
| 144 |
-
"is_video": True, "vae_per_channel_normalize": True,
|
| 145 |
-
"prompt": kwargs.get('motion_prompt', ""), "negative_prompt": kwargs.get('negative_prompt', "blurry, distorted, static, bad quality"),
|
| 146 |
-
"guidance_scale": kwargs.get('guidance_scale', 1.0), "stg_scale": kwargs.get('stg_scale', 0.0),
|
| 147 |
-
"rescaling_scale": kwargs.get('rescaling_scale', 0.15), "num_inference_steps": kwargs.get('num_inference_steps', 20),
|
| 148 |
-
"output_type": "latent"
|
| 149 |
-
}
|
| 150 |
-
if 'latents' in kwargs:
|
| 151 |
-
pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
|
| 152 |
-
if 'strength' in kwargs:
|
| 153 |
-
pipeline_params["strength"] = kwargs['strength']
|
| 154 |
-
if 'conditioning_items_data' in kwargs:
|
| 155 |
-
final_conditioning_items = []
|
| 156 |
-
for item in kwargs['conditioning_items_data']:
|
| 157 |
-
item.latent_tensor = item.latent_tensor.to(worker.device)
|
| 158 |
-
final_conditioning_items.append(item)
|
| 159 |
-
pipeline_params["conditioning_items"] = final_conditioning_items
|
| 160 |
-
if worker.is_distilled:
|
| 161 |
-
logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.")
|
| 162 |
-
fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
|
| 163 |
-
pipeline_params["timesteps"] = fixed_timesteps
|
| 164 |
-
if fixed_timesteps:
|
| 165 |
-
pipeline_params["num_inference_steps"] = len(fixed_timesteps)
|
| 166 |
-
return pipeline_params
|
| 167 |
-
|
| 168 |
-
def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
|
| 169 |
-
worker_to_use = self._get_next_worker()
|
| 170 |
-
try:
|
| 171 |
-
height, width = kwargs['height'], kwargs['width']
|
| 172 |
-
padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
|
| 173 |
-
padding_vals = calculate_padding(height, width, padded_h, padded_w)
|
| 174 |
-
kwargs['height'], kwargs['width'] = padded_h, padded_w
|
| 175 |
-
pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
|
| 176 |
-
logger.info(f"Initiating GENERATION on {worker_to_use.device} with shape {padded_w}x{padded_h}")
|
| 177 |
-
if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
|
| 178 |
-
result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
|
| 179 |
-
else:
|
| 180 |
-
result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
|
| 181 |
-
return result, padding_vals
|
| 182 |
-
except Exception as e:
|
| 183 |
-
logger.error(f"LTX POOL MANAGER: Error during generation on {worker_to_use.device}: {e}", exc_info=True)
|
| 184 |
-
raise e
|
| 185 |
-
finally:
|
| 186 |
-
if worker_to_use and worker_to_use.device.type == 'cuda':
|
| 187 |
-
with torch.cuda.device(worker_to_use.device):
|
| 188 |
-
gc.collect(); torch.cuda.empty_cache()
|
| 189 |
-
|
| 190 |
-
def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
|
| 191 |
-
pass
|
| 192 |
-
|
| 193 |
-
# ... (O resto do arquivo: LtxWorker, _aduc_prepare_conditioning_patch, Singleton Instantiation, etc. permanece idêntico) ...
|
| 194 |
-
class LtxWorker:
|
| 195 |
-
"""
|
| 196 |
-
Represents a single instance of the LTX-Video pipeline on a specific device.
|
| 197 |
-
"""
|
| 198 |
-
def __init__(self, device_id, ltx_config_file):
|
| 199 |
-
self.cpu_device = torch.device('cpu')
|
| 200 |
-
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
|
| 201 |
-
logger.info(f"LTX Worker ({self.device}): Initializing with config '{ltx_config_file}'...")
|
| 202 |
-
|
| 203 |
-
with open(ltx_config_file, "r") as file:
|
| 204 |
-
self.config = yaml.safe_load(file)
|
| 205 |
-
|
| 206 |
-
self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
|
| 207 |
-
|
| 208 |
-
models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
|
| 209 |
-
|
| 210 |
-
logger.info(f"LTX Worker ({self.device}): Preparing to load model...")
|
| 211 |
-
model_filename = self.config["checkpoint_path"]
|
| 212 |
-
model_path = huggingface_hub.hf_hub_download(
|
| 213 |
-
repo_id="Lightricks/LTX-Video", filename=model_filename,
|
| 214 |
-
local_dir=str(models_dir), local_dir_use_symlinks=False
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
self.pipeline = create_ltx_video_pipeline(
|
| 218 |
-
ckpt_path=model_path,
|
| 219 |
-
precision=self.config["precision"],
|
| 220 |
-
text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
|
| 221 |
-
sampler=self.config["sampler"],
|
| 222 |
-
device='cpu'
|
| 223 |
-
)
|
| 224 |
-
logger.info(f"LTX Worker ({self.device}): Model ready on CPU. Is distilled model? {self.is_distilled}")
|
| 225 |
-
|
| 226 |
-
def to_gpu(self):
|
| 227 |
-
if self.device.type == 'cpu': return
|
| 228 |
-
logger.info(f"LTX Worker: Moving pipeline to GPU {self.device}...")
|
| 229 |
-
self.pipeline.to(self.device)
|
| 230 |
-
if self.device.type == 'cuda' and can_optimize_fp8():
|
| 231 |
-
logger.info(f"LTX Worker ({self.device}): FP8 supported GPU detected. Optimizing...")
|
| 232 |
-
optimize_ltx_worker(self)
|
| 233 |
-
logger.info(f"LTX Worker ({self.device}): Optimization complete.")
|
| 234 |
-
elif self.device.type == 'cuda':
|
| 235 |
-
logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.")
|
| 236 |
-
|
| 237 |
-
def to_cpu(self):
|
| 238 |
-
if self.device.type == 'cpu': return
|
| 239 |
-
logger.info(f"LTX Worker: Unloading pipeline from GPU {self.device}...")
|
| 240 |
-
self.pipeline.to('cpu')
|
| 241 |
-
gc.collect()
|
| 242 |
-
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
| 243 |
-
|
| 244 |
-
def generate_video_fragment_internal(self, **kwargs):
|
| 245 |
-
return self.pipeline(**kwargs).images
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
def _aduc_prepare_conditioning_patch(
|
| 249 |
-
self: LTXVideoPipeline,
|
| 250 |
-
conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]],
|
| 251 |
-
init_latents: torch.Tensor,
|
| 252 |
-
num_frames: int,
|
| 253 |
-
height: int,
|
| 254 |
-
width: int,
|
| 255 |
-
vae_per_channel_normalize: bool = False,
|
| 256 |
-
generator=None,
|
| 257 |
-
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, int]:
|
| 258 |
-
if not conditioning_items:
|
| 259 |
-
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
|
| 260 |
-
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 261 |
-
return init_latents, init_pixel_coords, None, 0
|
| 262 |
-
init_conditioning_mask = torch.zeros(init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device)
|
| 263 |
-
extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
|
| 264 |
-
extra_conditioning_num_latents = 0
|
| 265 |
-
is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor')
|
| 266 |
-
if is_latent_mode:
|
| 267 |
-
for item in conditioning_items:
|
| 268 |
-
media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
|
| 269 |
-
media_frame_number, strength = item.media_frame_number, item.conditioning_strength
|
| 270 |
-
if media_frame_number == 0:
|
| 271 |
-
f_l, h_l, w_l = media_item_latents.shape[-3:]
|
| 272 |
-
init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength)
|
| 273 |
-
init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength
|
| 274 |
-
else:
|
| 275 |
-
noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
|
| 276 |
-
media_item_latents = torch.lerp(noise, media_item_latents, strength)
|
| 277 |
-
patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
|
| 278 |
-
pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 279 |
-
pixel_coords[:, 0] += media_frame_number
|
| 280 |
-
extra_conditioning_num_latents += patched_latents.shape[1]
|
| 281 |
-
new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
|
| 282 |
-
extra_conditioning_latents.append(patched_latents)
|
| 283 |
-
extra_conditioning_pixel_coords.append(pixel_coords)
|
| 284 |
-
extra_conditioning_mask.append(new_mask)
|
| 285 |
-
else:
|
| 286 |
-
for item in conditioning_items:
|
| 287 |
-
if not isinstance(item, ConditioningItem): continue
|
| 288 |
-
item = self._resize_conditioning_item(item, height, width)
|
| 289 |
-
media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype)
|
| 290 |
-
if item.media_frame_number == 0:
|
| 291 |
-
media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True)
|
| 292 |
-
f_l, h_l, w_l = media_item_latents.shape[-3:]
|
| 293 |
-
init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength)
|
| 294 |
-
init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength
|
| 295 |
-
else:
|
| 296 |
-
logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.")
|
| 297 |
-
|
| 298 |
-
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
|
| 299 |
-
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
|
| 300 |
-
init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
|
| 301 |
-
init_conditioning_mask = init_conditioning_mask.squeeze(-1)
|
| 302 |
-
if extra_conditioning_latents:
|
| 303 |
-
init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
|
| 304 |
-
init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
|
| 305 |
-
init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
|
| 306 |
-
if self.transformer.use_tpu_flash_attention:
|
| 307 |
-
init_latents = init_latents[:, :-extra_conditioning_num_latents]
|
| 308 |
-
init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents]
|
| 309 |
-
init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents]
|
| 310 |
-
return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
# --- Singleton Instantiation ---
|
| 314 |
-
with open("config.yaml", 'r') as f:
|
| 315 |
-
config = yaml.safe_load(f)
|
| 316 |
-
ltx_gpus_required = config['specialists']['ltx']['gpus_required']
|
| 317 |
-
ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
|
| 318 |
-
ltx_config_filename = config['specialists']['ltx']['config_file']
|
| 319 |
-
ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
|
| 320 |
-
logger.info("Video Specialist (LTX) ready.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/ltx_pipeline_utils.py
DELETED
|
@@ -1,774 +0,0 @@
|
|
| 1 |
-
import argparse
|
| 2 |
-
import os
|
| 3 |
-
import random
|
| 4 |
-
from datetime import datetime
|
| 5 |
-
from pathlib import Path
|
| 6 |
-
from diffusers.utils import logging
|
| 7 |
-
from typing import Optional, List, Union
|
| 8 |
-
import yaml
|
| 9 |
-
|
| 10 |
-
import imageio
|
| 11 |
-
import json
|
| 12 |
-
import numpy as np
|
| 13 |
-
import torch
|
| 14 |
-
import cv2
|
| 15 |
-
from safetensors import safe_open
|
| 16 |
-
from PIL import Image
|
| 17 |
-
from transformers import (
|
| 18 |
-
T5EncoderModel,
|
| 19 |
-
T5Tokenizer,
|
| 20 |
-
AutoModelForCausalLM,
|
| 21 |
-
AutoProcessor,
|
| 22 |
-
AutoTokenizer,
|
| 23 |
-
)
|
| 24 |
-
from huggingface_hub import hf_hub_download
|
| 25 |
-
|
| 26 |
-
from ltx_video.models.autoencoders.causal_video_autoencoder import (
|
| 27 |
-
CausalVideoAutoencoder,
|
| 28 |
-
)
|
| 29 |
-
from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
|
| 30 |
-
from ltx_video.models.transformers.transformer3d import Transformer3DModel
|
| 31 |
-
from ltx_video.pipelines.pipeline_ltx_video import (
|
| 32 |
-
ConditioningItem,
|
| 33 |
-
LTXVideoPipeline,
|
| 34 |
-
LTXMultiScalePipeline,
|
| 35 |
-
)
|
| 36 |
-
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 37 |
-
from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
| 38 |
-
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
| 39 |
-
import ltx_video.pipelines.crf_compressor as crf_compressor
|
| 40 |
-
|
| 41 |
-
MAX_HEIGHT = 720
|
| 42 |
-
MAX_WIDTH = 1280
|
| 43 |
-
MAX_NUM_FRAMES = 257
|
| 44 |
-
|
| 45 |
-
logger = logging.get_logger("LTX-Video")
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def get_total_gpu_memory():
|
| 49 |
-
if torch.cuda.is_available():
|
| 50 |
-
total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 51 |
-
return total_memory
|
| 52 |
-
return 44
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def get_device():
|
| 56 |
-
if torch.cuda.is_available():
|
| 57 |
-
return "cuda"
|
| 58 |
-
elif torch.backends.mps.is_available():
|
| 59 |
-
return "mps"
|
| 60 |
-
return "cuda"
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
def load_image_to_tensor_with_resize_and_crop(
|
| 64 |
-
image_input: Union[str, Image.Image],
|
| 65 |
-
target_height: int = 512,
|
| 66 |
-
target_width: int = 768,
|
| 67 |
-
just_crop: bool = False,
|
| 68 |
-
) -> torch.Tensor:
|
| 69 |
-
"""Load and process an image into a tensor.
|
| 70 |
-
|
| 71 |
-
Args:
|
| 72 |
-
image_input: Either a file path (str) or a PIL Image object
|
| 73 |
-
target_height: Desired height of output tensor
|
| 74 |
-
target_width: Desired width of output tensor
|
| 75 |
-
just_crop: If True, only crop the image to the target size without resizing
|
| 76 |
-
"""
|
| 77 |
-
if isinstance(image_input, str):
|
| 78 |
-
image = Image.open(image_input).convert("RGB")
|
| 79 |
-
elif isinstance(image_input, Image.Image):
|
| 80 |
-
image = image_input
|
| 81 |
-
else:
|
| 82 |
-
raise ValueError("image_input must be either a file path or a PIL Image object")
|
| 83 |
-
|
| 84 |
-
input_width, input_height = image.size
|
| 85 |
-
aspect_ratio_target = target_width / target_height
|
| 86 |
-
aspect_ratio_frame = input_width / input_height
|
| 87 |
-
if aspect_ratio_frame > aspect_ratio_target:
|
| 88 |
-
new_width = int(input_height * aspect_ratio_target)
|
| 89 |
-
new_height = input_height
|
| 90 |
-
x_start = (input_width - new_width) // 2
|
| 91 |
-
y_start = 0
|
| 92 |
-
else:
|
| 93 |
-
new_width = input_width
|
| 94 |
-
new_height = int(input_width / aspect_ratio_target)
|
| 95 |
-
x_start = 0
|
| 96 |
-
y_start = (input_height - new_height) // 2
|
| 97 |
-
|
| 98 |
-
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
| 99 |
-
if not just_crop:
|
| 100 |
-
image = image.resize((target_width, target_height))
|
| 101 |
-
|
| 102 |
-
image = np.array(image)
|
| 103 |
-
image = cv2.GaussianBlur(image, (3, 3), 0)
|
| 104 |
-
frame_tensor = torch.from_numpy(image).float()
|
| 105 |
-
frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
|
| 106 |
-
frame_tensor = frame_tensor.permute(2, 0, 1)
|
| 107 |
-
frame_tensor = (frame_tensor / 127.5) - 1.0
|
| 108 |
-
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
| 109 |
-
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
def calculate_padding(
|
| 113 |
-
source_height: int, source_width: int, target_height: int, target_width: int
|
| 114 |
-
) -> tuple[int, int, int, int]:
|
| 115 |
-
|
| 116 |
-
# Calculate total padding needed
|
| 117 |
-
pad_height = target_height - source_height
|
| 118 |
-
pad_width = target_width - source_width
|
| 119 |
-
|
| 120 |
-
# Calculate padding for each side
|
| 121 |
-
pad_top = pad_height // 2
|
| 122 |
-
pad_bottom = pad_height - pad_top # Handles odd padding
|
| 123 |
-
pad_left = pad_width // 2
|
| 124 |
-
pad_right = pad_width - pad_left # Handles odd padding
|
| 125 |
-
|
| 126 |
-
# Return padded tensor
|
| 127 |
-
# Padding format is (left, right, top, bottom)
|
| 128 |
-
padding = (pad_left, pad_right, pad_top, pad_bottom)
|
| 129 |
-
return padding
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
def convert_prompt_to_filename(text: str, max_len: int = 20) -> str:
|
| 133 |
-
# Remove non-letters and convert to lowercase
|
| 134 |
-
clean_text = "".join(
|
| 135 |
-
char.lower() for char in text if char.isalpha() or char.isspace()
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
# Split into words
|
| 139 |
-
words = clean_text.split()
|
| 140 |
-
|
| 141 |
-
# Build result string keeping track of length
|
| 142 |
-
result = []
|
| 143 |
-
current_length = 0
|
| 144 |
-
|
| 145 |
-
for word in words:
|
| 146 |
-
# Add word length plus 1 for underscore (except for first word)
|
| 147 |
-
new_length = current_length + len(word)
|
| 148 |
-
|
| 149 |
-
if new_length <= max_len:
|
| 150 |
-
result.append(word)
|
| 151 |
-
current_length += len(word)
|
| 152 |
-
else:
|
| 153 |
-
break
|
| 154 |
-
|
| 155 |
-
return "-".join(result)
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
# Generate output video name
|
| 159 |
-
def get_unique_filename(
|
| 160 |
-
base: str,
|
| 161 |
-
ext: str,
|
| 162 |
-
prompt: str,
|
| 163 |
-
seed: int,
|
| 164 |
-
resolution: tuple[int, int, int],
|
| 165 |
-
dir: Path,
|
| 166 |
-
endswith=None,
|
| 167 |
-
index_range=1000,
|
| 168 |
-
) -> Path:
|
| 169 |
-
base_filename = f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{seed}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
|
| 170 |
-
for i in range(index_range):
|
| 171 |
-
filename = dir / f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
|
| 172 |
-
if not os.path.exists(filename):
|
| 173 |
-
return filename
|
| 174 |
-
raise FileExistsError(
|
| 175 |
-
f"Could not find a unique filename after {index_range} attempts."
|
| 176 |
-
)
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
def seed_everething(seed: int):
|
| 180 |
-
random.seed(seed)
|
| 181 |
-
np.random.seed(seed)
|
| 182 |
-
torch.manual_seed(seed)
|
| 183 |
-
if torch.cuda.is_available():
|
| 184 |
-
torch.cuda.manual_seed(seed)
|
| 185 |
-
if torch.backends.mps.is_available():
|
| 186 |
-
torch.mps.manual_seed(seed)
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
def main():
|
| 190 |
-
parser = argparse.ArgumentParser(
|
| 191 |
-
description="Load models from separate directories and run the pipeline."
|
| 192 |
-
)
|
| 193 |
-
|
| 194 |
-
# Directories
|
| 195 |
-
parser.add_argument(
|
| 196 |
-
"--output_path",
|
| 197 |
-
type=str,
|
| 198 |
-
default=None,
|
| 199 |
-
help="Path to the folder to save output video, if None will save in outputs/ directory.",
|
| 200 |
-
)
|
| 201 |
-
parser.add_argument("--seed", type=int, default="171198")
|
| 202 |
-
|
| 203 |
-
# Pipeline parameters
|
| 204 |
-
parser.add_argument(
|
| 205 |
-
"--num_images_per_prompt",
|
| 206 |
-
type=int,
|
| 207 |
-
default=1,
|
| 208 |
-
help="Number of images per prompt",
|
| 209 |
-
)
|
| 210 |
-
parser.add_argument(
|
| 211 |
-
"--image_cond_noise_scale",
|
| 212 |
-
type=float,
|
| 213 |
-
default=0.15,
|
| 214 |
-
help="Amount of noise to add to the conditioned image",
|
| 215 |
-
)
|
| 216 |
-
parser.add_argument(
|
| 217 |
-
"--height",
|
| 218 |
-
type=int,
|
| 219 |
-
default=704,
|
| 220 |
-
help="Height of the output video frames. Optional if an input image provided.",
|
| 221 |
-
)
|
| 222 |
-
parser.add_argument(
|
| 223 |
-
"--width",
|
| 224 |
-
type=int,
|
| 225 |
-
default=1216,
|
| 226 |
-
help="Width of the output video frames. If None will infer from input image.",
|
| 227 |
-
)
|
| 228 |
-
parser.add_argument(
|
| 229 |
-
"--num_frames",
|
| 230 |
-
type=int,
|
| 231 |
-
default=121,
|
| 232 |
-
help="Number of frames to generate in the output video",
|
| 233 |
-
)
|
| 234 |
-
parser.add_argument(
|
| 235 |
-
"--frame_rate", type=int, default=30, help="Frame rate for the output video"
|
| 236 |
-
)
|
| 237 |
-
parser.add_argument(
|
| 238 |
-
"--device",
|
| 239 |
-
default=None,
|
| 240 |
-
help="Device to run inference on. If not specified, will automatically detect and use CUDA or MPS if available, else CPU.",
|
| 241 |
-
)
|
| 242 |
-
parser.add_argument(
|
| 243 |
-
"--pipeline_config",
|
| 244 |
-
type=str,
|
| 245 |
-
default="configs/ltxv-13b-0.9.7-dev.yaml",
|
| 246 |
-
help="The path to the config file for the pipeline, which contains the parameters for the pipeline",
|
| 247 |
-
)
|
| 248 |
-
|
| 249 |
-
# Prompts
|
| 250 |
-
parser.add_argument(
|
| 251 |
-
"--prompt",
|
| 252 |
-
type=str,
|
| 253 |
-
help="Text prompt to guide generation",
|
| 254 |
-
)
|
| 255 |
-
parser.add_argument(
|
| 256 |
-
"--negative_prompt",
|
| 257 |
-
type=str,
|
| 258 |
-
default="worst quality, inconsistent motion, blurry, jittery, distorted",
|
| 259 |
-
help="Negative prompt for undesired features",
|
| 260 |
-
)
|
| 261 |
-
|
| 262 |
-
parser.add_argument(
|
| 263 |
-
"--offload_to_cpu",
|
| 264 |
-
action="store_true",
|
| 265 |
-
help="Offloading unnecessary computations to CPU.",
|
| 266 |
-
)
|
| 267 |
-
|
| 268 |
-
# video-to-video arguments:
|
| 269 |
-
parser.add_argument(
|
| 270 |
-
"--input_media_path",
|
| 271 |
-
type=str,
|
| 272 |
-
default=None,
|
| 273 |
-
help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
|
| 274 |
-
)
|
| 275 |
-
|
| 276 |
-
# Conditioning arguments
|
| 277 |
-
parser.add_argument(
|
| 278 |
-
"--conditioning_media_paths",
|
| 279 |
-
type=str,
|
| 280 |
-
nargs="*",
|
| 281 |
-
help="List of paths to conditioning media (images or videos). Each path will be used as a conditioning item.",
|
| 282 |
-
)
|
| 283 |
-
parser.add_argument(
|
| 284 |
-
"--conditioning_strengths",
|
| 285 |
-
type=float,
|
| 286 |
-
nargs="*",
|
| 287 |
-
help="List of conditioning strengths (between 0 and 1) for each conditioning item. Must match the number of conditioning items.",
|
| 288 |
-
)
|
| 289 |
-
parser.add_argument(
|
| 290 |
-
"--conditioning_start_frames",
|
| 291 |
-
type=int,
|
| 292 |
-
nargs="*",
|
| 293 |
-
help="List of frame indices where each conditioning item should be applied. Must match the number of conditioning items.",
|
| 294 |
-
)
|
| 295 |
-
|
| 296 |
-
args = parser.parse_args()
|
| 297 |
-
logger.warning(f"Running generation with arguments: {args}")
|
| 298 |
-
infer(**vars(args))
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
def create_ltx_video_pipeline(
|
| 302 |
-
ckpt_path: str,
|
| 303 |
-
precision: str,
|
| 304 |
-
text_encoder_model_name_or_path: str,
|
| 305 |
-
sampler: Optional[str] = None,
|
| 306 |
-
device: Optional[str] = None,
|
| 307 |
-
enhance_prompt: bool = False,
|
| 308 |
-
prompt_enhancer_image_caption_model_name_or_path: Optional[str] = None,
|
| 309 |
-
prompt_enhancer_llm_model_name_or_path: Optional[str] = None,
|
| 310 |
-
) -> LTXVideoPipeline:
|
| 311 |
-
ckpt_path = Path(ckpt_path)
|
| 312 |
-
assert os.path.exists(
|
| 313 |
-
ckpt_path
|
| 314 |
-
), f"Ckpt path provided (--ckpt_path) {ckpt_path} does not exist"
|
| 315 |
-
|
| 316 |
-
with safe_open(ckpt_path, framework="pt") as f:
|
| 317 |
-
metadata = f.metadata()
|
| 318 |
-
config_str = metadata.get("config")
|
| 319 |
-
configs = json.loads(config_str)
|
| 320 |
-
allowed_inference_steps = configs.get("allowed_inference_steps", None)
|
| 321 |
-
|
| 322 |
-
vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
|
| 323 |
-
transformer = Transformer3DModel.from_pretrained(ckpt_path)
|
| 324 |
-
|
| 325 |
-
# Use constructor if sampler is specified, otherwise use from_pretrained
|
| 326 |
-
if sampler == "from_checkpoint" or not sampler:
|
| 327 |
-
scheduler = RectifiedFlowScheduler.from_pretrained(ckpt_path)
|
| 328 |
-
else:
|
| 329 |
-
scheduler = RectifiedFlowScheduler(
|
| 330 |
-
sampler=("Uniform" if sampler.lower() == "uniform" else "LinearQuadratic")
|
| 331 |
-
)
|
| 332 |
-
|
| 333 |
-
text_encoder = T5EncoderModel.from_pretrained(
|
| 334 |
-
text_encoder_model_name_or_path, subfolder="text_encoder"
|
| 335 |
-
)
|
| 336 |
-
patchifier = SymmetricPatchifier(patch_size=1)
|
| 337 |
-
tokenizer = T5Tokenizer.from_pretrained(
|
| 338 |
-
text_encoder_model_name_or_path, subfolder="tokenizer"
|
| 339 |
-
)
|
| 340 |
-
|
| 341 |
-
transformer = transformer.to(device)
|
| 342 |
-
vae = vae.to(device)
|
| 343 |
-
text_encoder = text_encoder.to(device)
|
| 344 |
-
|
| 345 |
-
if enhance_prompt:
|
| 346 |
-
prompt_enhancer_image_caption_model = AutoModelForCausalLM.from_pretrained(
|
| 347 |
-
prompt_enhancer_image_caption_model_name_or_path, trust_remote_code=True
|
| 348 |
-
)
|
| 349 |
-
prompt_enhancer_image_caption_processor = AutoProcessor.from_pretrained(
|
| 350 |
-
prompt_enhancer_image_caption_model_name_or_path, trust_remote_code=True
|
| 351 |
-
)
|
| 352 |
-
prompt_enhancer_llm_model = AutoModelForCausalLM.from_pretrained(
|
| 353 |
-
prompt_enhancer_llm_model_name_or_path,
|
| 354 |
-
torch_dtype="bfloat16",
|
| 355 |
-
)
|
| 356 |
-
prompt_enhancer_llm_tokenizer = AutoTokenizer.from_pretrained(
|
| 357 |
-
prompt_enhancer_llm_model_name_or_path,
|
| 358 |
-
)
|
| 359 |
-
else:
|
| 360 |
-
prompt_enhancer_image_caption_model = None
|
| 361 |
-
prompt_enhancer_image_caption_processor = None
|
| 362 |
-
prompt_enhancer_llm_model = None
|
| 363 |
-
prompt_enhancer_llm_tokenizer = None
|
| 364 |
-
|
| 365 |
-
vae = vae.to(torch.bfloat16)
|
| 366 |
-
if precision == "bfloat16" and transformer.dtype != torch.bfloat16:
|
| 367 |
-
transformer = transformer.to(torch.bfloat16)
|
| 368 |
-
text_encoder = text_encoder.to(torch.bfloat16)
|
| 369 |
-
|
| 370 |
-
# Use submodels for the pipeline
|
| 371 |
-
submodel_dict = {
|
| 372 |
-
"transformer": transformer,
|
| 373 |
-
"patchifier": patchifier,
|
| 374 |
-
"text_encoder": text_encoder,
|
| 375 |
-
"tokenizer": tokenizer,
|
| 376 |
-
"scheduler": scheduler,
|
| 377 |
-
"vae": vae,
|
| 378 |
-
"prompt_enhancer_image_caption_model": prompt_enhancer_image_caption_model,
|
| 379 |
-
"prompt_enhancer_image_caption_processor": prompt_enhancer_image_caption_processor,
|
| 380 |
-
"prompt_enhancer_llm_model": prompt_enhancer_llm_model,
|
| 381 |
-
"prompt_enhancer_llm_tokenizer": prompt_enhancer_llm_tokenizer,
|
| 382 |
-
"allowed_inference_steps": allowed_inference_steps,
|
| 383 |
-
}
|
| 384 |
-
|
| 385 |
-
pipeline = LTXVideoPipeline(**submodel_dict)
|
| 386 |
-
pipeline = pipeline.to(device)
|
| 387 |
-
return pipeline
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
def create_latent_upsampler(latent_upsampler_model_path: str, device: str):
|
| 391 |
-
latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
|
| 392 |
-
latent_upsampler.to(device)
|
| 393 |
-
latent_upsampler.eval()
|
| 394 |
-
return latent_upsampler
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
def infer(
|
| 398 |
-
output_path: Optional[str],
|
| 399 |
-
seed: int,
|
| 400 |
-
pipeline_config: str,
|
| 401 |
-
image_cond_noise_scale: float,
|
| 402 |
-
height: Optional[int],
|
| 403 |
-
width: Optional[int],
|
| 404 |
-
num_frames: int,
|
| 405 |
-
frame_rate: int,
|
| 406 |
-
prompt: str,
|
| 407 |
-
negative_prompt: str,
|
| 408 |
-
offload_to_cpu: bool,
|
| 409 |
-
input_media_path: Optional[str] = None,
|
| 410 |
-
conditioning_media_paths: Optional[List[str]] = None,
|
| 411 |
-
conditioning_strengths: Optional[List[float]] = None,
|
| 412 |
-
conditioning_start_frames: Optional[List[int]] = None,
|
| 413 |
-
device: Optional[str] = None,
|
| 414 |
-
**kwargs,
|
| 415 |
-
):
|
| 416 |
-
# check if pipeline_config is a file
|
| 417 |
-
if not os.path.isfile(pipeline_config):
|
| 418 |
-
raise ValueError(f"Pipeline config file {pipeline_config} does not exist")
|
| 419 |
-
with open(pipeline_config, "r") as f:
|
| 420 |
-
pipeline_config = yaml.safe_load(f)
|
| 421 |
-
|
| 422 |
-
models_dir = "MODEL_DIR"
|
| 423 |
-
|
| 424 |
-
ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
|
| 425 |
-
if not os.path.isfile(ltxv_model_name_or_path):
|
| 426 |
-
ltxv_model_path = hf_hub_download(
|
| 427 |
-
repo_id="Lightricks/LTX-Video",
|
| 428 |
-
filename=ltxv_model_name_or_path,
|
| 429 |
-
local_dir=models_dir,
|
| 430 |
-
repo_type="model",
|
| 431 |
-
)
|
| 432 |
-
else:
|
| 433 |
-
ltxv_model_path = ltxv_model_name_or_path
|
| 434 |
-
|
| 435 |
-
spatial_upscaler_model_name_or_path = pipeline_config.get(
|
| 436 |
-
"spatial_upscaler_model_path"
|
| 437 |
-
)
|
| 438 |
-
if spatial_upscaler_model_name_or_path and not os.path.isfile(
|
| 439 |
-
spatial_upscaler_model_name_or_path
|
| 440 |
-
):
|
| 441 |
-
spatial_upscaler_model_path = hf_hub_download(
|
| 442 |
-
repo_id="Lightricks/LTX-Video",
|
| 443 |
-
filename=spatial_upscaler_model_name_or_path,
|
| 444 |
-
local_dir=models_dir,
|
| 445 |
-
repo_type="model",
|
| 446 |
-
)
|
| 447 |
-
else:
|
| 448 |
-
spatial_upscaler_model_path = spatial_upscaler_model_name_or_path
|
| 449 |
-
|
| 450 |
-
if kwargs.get("input_image_path", None):
|
| 451 |
-
logger.warning(
|
| 452 |
-
"Please use conditioning_media_paths instead of input_image_path."
|
| 453 |
-
)
|
| 454 |
-
assert not conditioning_media_paths and not conditioning_start_frames
|
| 455 |
-
conditioning_media_paths = [kwargs["input_image_path"]]
|
| 456 |
-
conditioning_start_frames = [0]
|
| 457 |
-
|
| 458 |
-
# Validate conditioning arguments
|
| 459 |
-
if conditioning_media_paths:
|
| 460 |
-
# Use default strengths of 1.0
|
| 461 |
-
if not conditioning_strengths:
|
| 462 |
-
conditioning_strengths = [1.0] * len(conditioning_media_paths)
|
| 463 |
-
if not conditioning_start_frames:
|
| 464 |
-
raise ValueError(
|
| 465 |
-
"If `conditioning_media_paths` is provided, "
|
| 466 |
-
"`conditioning_start_frames` must also be provided"
|
| 467 |
-
)
|
| 468 |
-
if len(conditioning_media_paths) != len(conditioning_strengths) or len(
|
| 469 |
-
conditioning_media_paths
|
| 470 |
-
) != len(conditioning_start_frames):
|
| 471 |
-
raise ValueError(
|
| 472 |
-
"`conditioning_media_paths`, `conditioning_strengths`, "
|
| 473 |
-
"and `conditioning_start_frames` must have the same length"
|
| 474 |
-
)
|
| 475 |
-
if any(s < 0 or s > 1 for s in conditioning_strengths):
|
| 476 |
-
raise ValueError("All conditioning strengths must be between 0 and 1")
|
| 477 |
-
if any(f < 0 or f >= num_frames for f in conditioning_start_frames):
|
| 478 |
-
raise ValueError(
|
| 479 |
-
f"All conditioning start frames must be between 0 and {num_frames-1}"
|
| 480 |
-
)
|
| 481 |
-
|
| 482 |
-
seed_everething(seed)
|
| 483 |
-
if offload_to_cpu and not torch.cuda.is_available():
|
| 484 |
-
logger.warning(
|
| 485 |
-
"offload_to_cpu is set to True, but offloading will not occur since the model is already running on CPU."
|
| 486 |
-
)
|
| 487 |
-
offload_to_cpu = False
|
| 488 |
-
else:
|
| 489 |
-
offload_to_cpu = offload_to_cpu and get_total_gpu_memory() < 30
|
| 490 |
-
|
| 491 |
-
output_dir = (
|
| 492 |
-
Path(output_path)
|
| 493 |
-
if output_path
|
| 494 |
-
else Path(f"outputs/{datetime.today().strftime('%Y-%m-%d')}")
|
| 495 |
-
)
|
| 496 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
| 497 |
-
|
| 498 |
-
# Adjust dimensions to be divisible by 32 and num_frames to be (N * 8 + 1)
|
| 499 |
-
height_padded = ((height - 1) // 32 + 1) * 32
|
| 500 |
-
width_padded = ((width - 1) // 32 + 1) * 32
|
| 501 |
-
num_frames_padded = ((num_frames - 2) // 8 + 1) * 8 + 1
|
| 502 |
-
|
| 503 |
-
padding = calculate_padding(height, width, height_padded, width_padded)
|
| 504 |
-
|
| 505 |
-
logger.warning(
|
| 506 |
-
f"Padded dimensions: {height_padded}x{width_padded}x{num_frames_padded}"
|
| 507 |
-
)
|
| 508 |
-
|
| 509 |
-
prompt_enhancement_words_threshold = pipeline_config[
|
| 510 |
-
"prompt_enhancement_words_threshold"
|
| 511 |
-
]
|
| 512 |
-
|
| 513 |
-
prompt_word_count = len(prompt.split())
|
| 514 |
-
enhance_prompt = (
|
| 515 |
-
prompt_enhancement_words_threshold > 0
|
| 516 |
-
and prompt_word_count < prompt_enhancement_words_threshold
|
| 517 |
-
)
|
| 518 |
-
|
| 519 |
-
if prompt_enhancement_words_threshold > 0 and not enhance_prompt:
|
| 520 |
-
logger.info(
|
| 521 |
-
f"Prompt has {prompt_word_count} words, which exceeds the threshold of {prompt_enhancement_words_threshold}. Prompt enhancement disabled."
|
| 522 |
-
)
|
| 523 |
-
|
| 524 |
-
precision = pipeline_config["precision"]
|
| 525 |
-
text_encoder_model_name_or_path = pipeline_config["text_encoder_model_name_or_path"]
|
| 526 |
-
sampler = pipeline_config["sampler"]
|
| 527 |
-
prompt_enhancer_image_caption_model_name_or_path = pipeline_config[
|
| 528 |
-
"prompt_enhancer_image_caption_model_name_or_path"
|
| 529 |
-
]
|
| 530 |
-
prompt_enhancer_llm_model_name_or_path = pipeline_config[
|
| 531 |
-
"prompt_enhancer_llm_model_name_or_path"
|
| 532 |
-
]
|
| 533 |
-
|
| 534 |
-
pipeline = create_ltx_video_pipeline(
|
| 535 |
-
ckpt_path=ltxv_model_path,
|
| 536 |
-
precision=precision,
|
| 537 |
-
text_encoder_model_name_or_path=text_encoder_model_name_or_path,
|
| 538 |
-
sampler=sampler,
|
| 539 |
-
device=kwargs.get("device", get_device()),
|
| 540 |
-
enhance_prompt=enhance_prompt,
|
| 541 |
-
prompt_enhancer_image_caption_model_name_or_path=prompt_enhancer_image_caption_model_name_or_path,
|
| 542 |
-
prompt_enhancer_llm_model_name_or_path=prompt_enhancer_llm_model_name_or_path,
|
| 543 |
-
)
|
| 544 |
-
|
| 545 |
-
if pipeline_config.get("pipeline_type", None) == "multi-scale":
|
| 546 |
-
if not spatial_upscaler_model_path:
|
| 547 |
-
raise ValueError(
|
| 548 |
-
"spatial upscaler model path is missing from pipeline config file and is required for multi-scale rendering"
|
| 549 |
-
)
|
| 550 |
-
latent_upsampler = create_latent_upsampler(
|
| 551 |
-
spatial_upscaler_model_path, pipeline.device
|
| 552 |
-
)
|
| 553 |
-
pipeline = LTXMultiScalePipeline(pipeline, latent_upsampler=latent_upsampler)
|
| 554 |
-
|
| 555 |
-
media_item = None
|
| 556 |
-
if input_media_path:
|
| 557 |
-
media_item = load_media_file(
|
| 558 |
-
media_path=input_media_path,
|
| 559 |
-
height=height,
|
| 560 |
-
width=width,
|
| 561 |
-
max_frames=num_frames_padded,
|
| 562 |
-
padding=padding,
|
| 563 |
-
)
|
| 564 |
-
|
| 565 |
-
conditioning_items = (
|
| 566 |
-
prepare_conditioning(
|
| 567 |
-
conditioning_media_paths=conditioning_media_paths,
|
| 568 |
-
conditioning_strengths=conditioning_strengths,
|
| 569 |
-
conditioning_start_frames=conditioning_start_frames,
|
| 570 |
-
height=height,
|
| 571 |
-
width=width,
|
| 572 |
-
num_frames=num_frames,
|
| 573 |
-
padding=padding,
|
| 574 |
-
pipeline=pipeline,
|
| 575 |
-
)
|
| 576 |
-
if conditioning_media_paths
|
| 577 |
-
else None
|
| 578 |
-
)
|
| 579 |
-
|
| 580 |
-
stg_mode = pipeline_config.get("stg_mode", "attention_values")
|
| 581 |
-
del pipeline_config["stg_mode"]
|
| 582 |
-
if stg_mode.lower() == "stg_av" or stg_mode.lower() == "attention_values":
|
| 583 |
-
skip_layer_strategy = SkipLayerStrategy.AttentionValues
|
| 584 |
-
elif stg_mode.lower() == "stg_as" or stg_mode.lower() == "attention_skip":
|
| 585 |
-
skip_layer_strategy = SkipLayerStrategy.AttentionSkip
|
| 586 |
-
elif stg_mode.lower() == "stg_r" or stg_mode.lower() == "residual":
|
| 587 |
-
skip_layer_strategy = SkipLayerStrategy.Residual
|
| 588 |
-
elif stg_mode.lower() == "stg_t" or stg_mode.lower() == "transformer_block":
|
| 589 |
-
skip_layer_strategy = SkipLayerStrategy.TransformerBlock
|
| 590 |
-
else:
|
| 591 |
-
raise ValueError(f"Invalid spatiotemporal guidance mode: {stg_mode}")
|
| 592 |
-
|
| 593 |
-
# Prepare input for the pipeline
|
| 594 |
-
sample = {
|
| 595 |
-
"prompt": prompt,
|
| 596 |
-
"prompt_attention_mask": None,
|
| 597 |
-
"negative_prompt": negative_prompt,
|
| 598 |
-
"negative_prompt_attention_mask": None,
|
| 599 |
-
}
|
| 600 |
-
|
| 601 |
-
device = device or get_device()
|
| 602 |
-
generator = torch.Generator(device=device).manual_seed(seed)
|
| 603 |
-
|
| 604 |
-
images = pipeline(
|
| 605 |
-
**pipeline_config,
|
| 606 |
-
skip_layer_strategy=skip_layer_strategy,
|
| 607 |
-
generator=generator,
|
| 608 |
-
output_type="pt",
|
| 609 |
-
callback_on_step_end=None,
|
| 610 |
-
height=height_padded,
|
| 611 |
-
width=width_padded,
|
| 612 |
-
num_frames=num_frames_padded,
|
| 613 |
-
frame_rate=frame_rate,
|
| 614 |
-
**sample,
|
| 615 |
-
media_items=media_item,
|
| 616 |
-
conditioning_items=conditioning_items,
|
| 617 |
-
is_video=True,
|
| 618 |
-
vae_per_channel_normalize=True,
|
| 619 |
-
image_cond_noise_scale=image_cond_noise_scale,
|
| 620 |
-
mixed_precision=(precision == "mixed_precision"),
|
| 621 |
-
offload_to_cpu=offload_to_cpu,
|
| 622 |
-
device=device,
|
| 623 |
-
enhance_prompt=enhance_prompt,
|
| 624 |
-
).images
|
| 625 |
-
|
| 626 |
-
# Crop the padded images to the desired resolution and number of frames
|
| 627 |
-
(pad_left, pad_right, pad_top, pad_bottom) = padding
|
| 628 |
-
pad_bottom = -pad_bottom
|
| 629 |
-
pad_right = -pad_right
|
| 630 |
-
if pad_bottom == 0:
|
| 631 |
-
pad_bottom = images.shape[3]
|
| 632 |
-
if pad_right == 0:
|
| 633 |
-
pad_right = images.shape[4]
|
| 634 |
-
images = images[:, :, :num_frames, pad_top:pad_bottom, pad_left:pad_right]
|
| 635 |
-
|
| 636 |
-
for i in range(images.shape[0]):
|
| 637 |
-
# Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
|
| 638 |
-
video_np = images[i].permute(1, 2, 3, 0).cpu().float().numpy()
|
| 639 |
-
# Unnormalizing images to [0, 255] range
|
| 640 |
-
video_np = (video_np * 255).astype(np.uint8)
|
| 641 |
-
fps = frame_rate
|
| 642 |
-
height, width = video_np.shape[1:3]
|
| 643 |
-
# In case a single image is generated
|
| 644 |
-
if video_np.shape[0] == 1:
|
| 645 |
-
output_filename = get_unique_filename(
|
| 646 |
-
f"image_output_{i}",
|
| 647 |
-
".png",
|
| 648 |
-
prompt=prompt,
|
| 649 |
-
seed=seed,
|
| 650 |
-
resolution=(height, width, num_frames),
|
| 651 |
-
dir=output_dir,
|
| 652 |
-
)
|
| 653 |
-
imageio.imwrite(output_filename, video_np[0])
|
| 654 |
-
else:
|
| 655 |
-
output_filename = get_unique_filename(
|
| 656 |
-
f"video_output_{i}",
|
| 657 |
-
".mp4",
|
| 658 |
-
prompt=prompt,
|
| 659 |
-
seed=seed,
|
| 660 |
-
resolution=(height, width, num_frames),
|
| 661 |
-
dir=output_dir,
|
| 662 |
-
)
|
| 663 |
-
|
| 664 |
-
# Write video
|
| 665 |
-
with imageio.get_writer(output_filename, fps=fps) as video:
|
| 666 |
-
for frame in video_np:
|
| 667 |
-
video.append_data(frame)
|
| 668 |
-
|
| 669 |
-
logger.warning(f"Output saved to {output_filename}")
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
def prepare_conditioning(
|
| 673 |
-
conditioning_media_paths: List[str],
|
| 674 |
-
conditioning_strengths: List[float],
|
| 675 |
-
conditioning_start_frames: List[int],
|
| 676 |
-
height: int,
|
| 677 |
-
width: int,
|
| 678 |
-
num_frames: int,
|
| 679 |
-
padding: tuple[int, int, int, int],
|
| 680 |
-
pipeline: LTXVideoPipeline,
|
| 681 |
-
) -> Optional[List[ConditioningItem]]:
|
| 682 |
-
"""Prepare conditioning items based on input media paths and their parameters.
|
| 683 |
-
|
| 684 |
-
Args:
|
| 685 |
-
conditioning_media_paths: List of paths to conditioning media (images or videos)
|
| 686 |
-
conditioning_strengths: List of conditioning strengths for each media item
|
| 687 |
-
conditioning_start_frames: List of frame indices where each item should be applied
|
| 688 |
-
height: Height of the output frames
|
| 689 |
-
width: Width of the output frames
|
| 690 |
-
num_frames: Number of frames in the output video
|
| 691 |
-
padding: Padding to apply to the frames
|
| 692 |
-
pipeline: LTXVideoPipeline object used for condition video trimming
|
| 693 |
-
|
| 694 |
-
Returns:
|
| 695 |
-
A list of ConditioningItem objects.
|
| 696 |
-
"""
|
| 697 |
-
conditioning_items = []
|
| 698 |
-
for path, strength, start_frame in zip(
|
| 699 |
-
conditioning_media_paths, conditioning_strengths, conditioning_start_frames
|
| 700 |
-
):
|
| 701 |
-
num_input_frames = orig_num_input_frames = get_media_num_frames(path)
|
| 702 |
-
if hasattr(pipeline, "trim_conditioning_sequence") and callable(
|
| 703 |
-
getattr(pipeline, "trim_conditioning_sequence")
|
| 704 |
-
):
|
| 705 |
-
num_input_frames = pipeline.trim_conditioning_sequence(
|
| 706 |
-
start_frame, orig_num_input_frames, num_frames
|
| 707 |
-
)
|
| 708 |
-
if num_input_frames < orig_num_input_frames:
|
| 709 |
-
logger.warning(
|
| 710 |
-
f"Trimming conditioning video {path} from {orig_num_input_frames} to {num_input_frames} frames."
|
| 711 |
-
)
|
| 712 |
-
|
| 713 |
-
media_tensor = load_media_file(
|
| 714 |
-
media_path=path,
|
| 715 |
-
height=height,
|
| 716 |
-
width=width,
|
| 717 |
-
max_frames=num_input_frames,
|
| 718 |
-
padding=padding,
|
| 719 |
-
just_crop=True,
|
| 720 |
-
)
|
| 721 |
-
conditioning_items.append(ConditioningItem(media_tensor, start_frame, strength))
|
| 722 |
-
return conditioning_items
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
def get_media_num_frames(media_path: str) -> int:
|
| 726 |
-
is_video = any(
|
| 727 |
-
media_path.lower().endswith(ext) for ext in [".mp4", ".avi", ".mov", ".mkv"]
|
| 728 |
-
)
|
| 729 |
-
num_frames = 1
|
| 730 |
-
if is_video:
|
| 731 |
-
reader = imageio.get_reader(media_path)
|
| 732 |
-
num_frames = reader.count_frames()
|
| 733 |
-
reader.close()
|
| 734 |
-
return num_frames
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
def load_media_file(
|
| 738 |
-
media_path: str,
|
| 739 |
-
height: int,
|
| 740 |
-
width: int,
|
| 741 |
-
max_frames: int,
|
| 742 |
-
padding: tuple[int, int, int, int],
|
| 743 |
-
just_crop: bool = False,
|
| 744 |
-
) -> torch.Tensor:
|
| 745 |
-
is_video = any(
|
| 746 |
-
media_path.lower().endswith(ext) for ext in [".mp4", ".avi", ".mov", ".mkv"]
|
| 747 |
-
)
|
| 748 |
-
if is_video:
|
| 749 |
-
reader = imageio.get_reader(media_path)
|
| 750 |
-
num_input_frames = min(reader.count_frames(), max_frames)
|
| 751 |
-
|
| 752 |
-
# Read and preprocess the relevant frames from the video file.
|
| 753 |
-
frames = []
|
| 754 |
-
for i in range(num_input_frames):
|
| 755 |
-
frame = Image.fromarray(reader.get_data(i))
|
| 756 |
-
frame_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 757 |
-
frame, height, width, just_crop=just_crop
|
| 758 |
-
)
|
| 759 |
-
frame_tensor = torch.nn.functional.pad(frame_tensor, padding)
|
| 760 |
-
frames.append(frame_tensor)
|
| 761 |
-
reader.close()
|
| 762 |
-
|
| 763 |
-
# Stack frames along the temporal dimension
|
| 764 |
-
media_tensor = torch.cat(frames, dim=2)
|
| 765 |
-
else: # Input image
|
| 766 |
-
media_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 767 |
-
media_path, height, width, just_crop=just_crop
|
| 768 |
-
)
|
| 769 |
-
media_tensor = torch.nn.functional.pad(media_tensor, padding)
|
| 770 |
-
return media_tensor
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
if __name__ == "__main__":
|
| 774 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/mmaudio_manager.py
DELETED
|
@@ -1,208 +0,0 @@
|
|
| 1 |
-
# managers/mmaudio_manager.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# Repositórios e Projetos Relacionados:
|
| 11 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 12 |
-
#
|
| 13 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 14 |
-
#
|
| 15 |
-
# Version: 2.3.0
|
| 16 |
-
#
|
| 17 |
-
# This file defines the MMAudioManager for the ADUC-SDR framework. It is responsible
|
| 18 |
-
# for generating audio synchronized with video clips. This version has been refactored
|
| 19 |
-
# to be self-contained by automatically cloning the MMAudio dependency from its
|
| 20 |
-
# official repository, making the framework more portable and easier to set up.
|
| 21 |
-
|
| 22 |
-
import torch
|
| 23 |
-
import logging
|
| 24 |
-
import subprocess
|
| 25 |
-
import os
|
| 26 |
-
import time
|
| 27 |
-
import yaml
|
| 28 |
-
import gc
|
| 29 |
-
from pathlib import Path
|
| 30 |
-
import gradio as gr
|
| 31 |
-
import sys
|
| 32 |
-
|
| 33 |
-
logger = logging.getLogger(__name__)
|
| 34 |
-
|
| 35 |
-
# --- Dependency Management ---
|
| 36 |
-
DEPS_DIR = Path("./deps")
|
| 37 |
-
MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
|
| 38 |
-
MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
|
| 39 |
-
|
| 40 |
-
def setup_mmaudio_dependencies():
|
| 41 |
-
"""
|
| 42 |
-
Ensures the MMAudio repository is cloned and available in the sys.path.
|
| 43 |
-
This function is run once when the module is first imported.
|
| 44 |
-
"""
|
| 45 |
-
if not MMAUDIO_REPO_DIR.exists():
|
| 46 |
-
logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
|
| 47 |
-
try:
|
| 48 |
-
DEPS_DIR.mkdir(exist_ok=True)
|
| 49 |
-
subprocess.run(
|
| 50 |
-
["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
|
| 51 |
-
check=True, capture_output=True, text=True
|
| 52 |
-
)
|
| 53 |
-
logger.info("MMAudio repository cloned successfully.")
|
| 54 |
-
except subprocess.CalledProcessError as e:
|
| 55 |
-
logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
|
| 56 |
-
raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
|
| 57 |
-
else:
|
| 58 |
-
logger.info("Found local MMAudio repository.")
|
| 59 |
-
|
| 60 |
-
if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
|
| 61 |
-
sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
|
| 62 |
-
logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
|
| 63 |
-
|
| 64 |
-
setup_mmaudio_dependencies()
|
| 65 |
-
|
| 66 |
-
from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
|
| 67 |
-
from mmaudio.model.flow_matching import FlowMatching
|
| 68 |
-
from mmaudio.model.networks import MMAudio, get_my_mmaudio
|
| 69 |
-
from mmaudio.model.utils.features_utils import FeaturesUtils
|
| 70 |
-
from mmaudio.model.sequence_config import SequenceConfig
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
class MMAudioManager:
|
| 74 |
-
"""
|
| 75 |
-
Manages the MMAudio model for audio generation tasks.
|
| 76 |
-
"""
|
| 77 |
-
def __init__(self, workspace_dir):
|
| 78 |
-
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 79 |
-
self.cpu_device = torch.device("cpu")
|
| 80 |
-
self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
|
| 81 |
-
self.workspace_dir = workspace_dir
|
| 82 |
-
|
| 83 |
-
self.all_model_cfg = all_model_cfg
|
| 84 |
-
self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
|
| 85 |
-
self.net: 'MMAudio' = None
|
| 86 |
-
self.feature_utils: 'FeaturesUtils' = None
|
| 87 |
-
self.seq_cfg: 'SequenceConfig' = None
|
| 88 |
-
|
| 89 |
-
self._load_models_to_cpu()
|
| 90 |
-
|
| 91 |
-
def _adjust_paths_for_repo(self):
|
| 92 |
-
"""Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
|
| 93 |
-
for cfg_key in self.all_model_cfg:
|
| 94 |
-
cfg = self.all_model_cfg[cfg_key]
|
| 95 |
-
# The paths in the original config are relative, so we join them with our repo path
|
| 96 |
-
cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
|
| 97 |
-
cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
|
| 98 |
-
if cfg.bigvgan_16k_path is not None:
|
| 99 |
-
cfg.bigvgan_16k_path = MMAUDIO_REPO_DIR / cfg.bigvgan_16k_path
|
| 100 |
-
cfg.synchformer_ckpt = MMAUDIO_REPO_DIR / cfg.synchformer_ckpt
|
| 101 |
-
|
| 102 |
-
def _load_models_to_cpu(self):
|
| 103 |
-
"""Loads the MMAudio models to CPU memory on initialization."""
|
| 104 |
-
try:
|
| 105 |
-
self._adjust_paths_for_repo()
|
| 106 |
-
logger.info("Verifying and downloading MMAudio models, if necessary...")
|
| 107 |
-
self.model_config.download_if_needed()
|
| 108 |
-
|
| 109 |
-
self.seq_cfg = self.model_config.seq_cfg
|
| 110 |
-
|
| 111 |
-
logger.info(f"Loading MMAudio model: {self.model_config.model_name} to CPU...")
|
| 112 |
-
self.net = get_my_mmaudio(self.model_config.model_name).eval()
|
| 113 |
-
self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True))
|
| 114 |
-
|
| 115 |
-
logger.info("Loading MMAudio feature utils to CPU...")
|
| 116 |
-
self.feature_utils = FeaturesUtils(
|
| 117 |
-
tod_vae_ckpt=self.model_config.vae_path,
|
| 118 |
-
synchformer_ckpt=self.model_config.synchformer_ckpt,
|
| 119 |
-
enable_conditions=True,
|
| 120 |
-
mode=self.model_config.mode,
|
| 121 |
-
bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path,
|
| 122 |
-
need_vae_encoder=False
|
| 123 |
-
)
|
| 124 |
-
self.feature_utils = self.feature_utils.eval()
|
| 125 |
-
self.net.to(self.cpu_device)
|
| 126 |
-
self.feature_utils.to(self.cpu_device)
|
| 127 |
-
logger.info("MMAudioManager ready on CPU.")
|
| 128 |
-
except Exception as e:
|
| 129 |
-
logger.error(f"Failed to load audio models: {e}", exc_info=True)
|
| 130 |
-
self.net = None
|
| 131 |
-
|
| 132 |
-
def to_gpu(self):
|
| 133 |
-
"""Moves the models and utilities to the GPU before inference."""
|
| 134 |
-
if self.device == 'cpu': return
|
| 135 |
-
logger.info(f"Moving MMAudioManager to GPU ({self.device})...")
|
| 136 |
-
self.net.to(self.device, self.dtype)
|
| 137 |
-
self.feature_utils.to(self.device, self.dtype)
|
| 138 |
-
|
| 139 |
-
def to_cpu(self):
|
| 140 |
-
"""Moves the models back to CPU and clears VRAM after inference."""
|
| 141 |
-
if self.device == 'cpu': return
|
| 142 |
-
logger.info("Unloading MMAudioManager from GPU...")
|
| 143 |
-
self.net.to(self.cpu_device)
|
| 144 |
-
self.feature_utils.to(self.cpu_device)
|
| 145 |
-
gc.collect()
|
| 146 |
-
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
| 147 |
-
|
| 148 |
-
def generate_audio_for_video(self, video_path: str, prompt: str, duration_seconds: float, output_path_override: str = None) -> str:
|
| 149 |
-
"""
|
| 150 |
-
Generates audio for a video file, applying a negative prompt to avoid speech.
|
| 151 |
-
"""
|
| 152 |
-
if self.net is None:
|
| 153 |
-
raise gr.Error("MMAudio model is not loaded. Cannot generate audio.")
|
| 154 |
-
|
| 155 |
-
logger.info("--- Generating Audio for Video Fragment ---")
|
| 156 |
-
logger.info(f"--- Video: {os.path.basename(video_path)}")
|
| 157 |
-
logger.info(f"--- Duration: {duration_seconds:.2f}s")
|
| 158 |
-
|
| 159 |
-
negative_prompt = "human voice, speech, talking, singing, narration"
|
| 160 |
-
logger.info(f"--- Prompt: '{prompt}' | Negative Prompt: '{negative_prompt}'")
|
| 161 |
-
|
| 162 |
-
if duration_seconds < 1:
|
| 163 |
-
logger.warning("Fragment too short (<1s). Returning original video.")
|
| 164 |
-
return video_path
|
| 165 |
-
|
| 166 |
-
if self.device == 'cpu':
|
| 167 |
-
logger.warning("Generating audio on CPU. This may be very slow.")
|
| 168 |
-
|
| 169 |
-
try:
|
| 170 |
-
self.to_gpu()
|
| 171 |
-
with torch.no_grad():
|
| 172 |
-
rng = torch.Generator(device=self.device).manual_seed(int(time.time()))
|
| 173 |
-
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
|
| 174 |
-
|
| 175 |
-
video_info = load_video(Path(video_path), duration_seconds)
|
| 176 |
-
self.seq_cfg.duration = video_info.duration_sec
|
| 177 |
-
self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len)
|
| 178 |
-
|
| 179 |
-
audios = mmaudio_generate(
|
| 180 |
-
clip_video=video_info.clip_frames.unsqueeze(0),
|
| 181 |
-
sync_video=video_info.sync_frames.unsqueeze(0),
|
| 182 |
-
text=[prompt],
|
| 183 |
-
negative_text=[negative_prompt],
|
| 184 |
-
feature_utils=self.feature_utils,
|
| 185 |
-
net=self.net,
|
| 186 |
-
fm=fm,
|
| 187 |
-
rng=rng,
|
| 188 |
-
cfg_strength=4.5
|
| 189 |
-
)
|
| 190 |
-
audio_waveform = audios.float().cpu()[0]
|
| 191 |
-
|
| 192 |
-
output_video_path = output_path_override if output_path_override else os.path.join(self.workspace_dir, f"{Path(video_path).stem}_with_audio.mp4")
|
| 193 |
-
|
| 194 |
-
make_video(video_info, Path(output_video_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate)
|
| 195 |
-
logger.info(f"--- Fragment with audio saved to: {os.path.basename(output_video_path)}")
|
| 196 |
-
return output_video_path
|
| 197 |
-
finally:
|
| 198 |
-
self.to_cpu()
|
| 199 |
-
|
| 200 |
-
# --- Singleton Instantiation ---
|
| 201 |
-
try:
|
| 202 |
-
with open("config.yaml", 'r') as f:
|
| 203 |
-
config = yaml.safe_load(f)
|
| 204 |
-
WORKSPACE_DIR = config['application']['workspace_dir']
|
| 205 |
-
mmaudio_manager_singleton = MMAudioManager(workspace_dir=WORKSPACE_DIR)
|
| 206 |
-
except Exception as e:
|
| 207 |
-
logger.error(f"Could not initialize MMAudioManager: {e}", exc_info=True)
|
| 208 |
-
mmaudio_manager_singleton = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/seedvr_manager.py
DELETED
|
@@ -1,233 +0,0 @@
|
|
| 1 |
-
# managers/seedvr_manager.py
|
| 2 |
-
#
|
| 3 |
-
# Copyright (C) 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Version: 4.0.0 (Root Installer & Executor)
|
| 6 |
-
#
|
| 7 |
-
# This version fully adopts the logic from the functional hd_specialist.py example.
|
| 8 |
-
# It acts as a setup manager: it clones the SeedVR repo and then copies all
|
| 9 |
-
# necessary directories (projects, common, models, configs, ckpts) to the
|
| 10 |
-
# application root. It also handles the pip installation of the Apex dependency.
|
| 11 |
-
# This ensures that the SeedVR code runs in the exact file structure it expects.
|
| 12 |
-
|
| 13 |
-
import torch
|
| 14 |
-
import torch.distributed as dist
|
| 15 |
-
import os
|
| 16 |
-
import gc
|
| 17 |
-
import logging
|
| 18 |
-
import sys
|
| 19 |
-
import subprocess
|
| 20 |
-
from pathlib import Path
|
| 21 |
-
from urllib.parse import urlparse
|
| 22 |
-
from torch.hub import download_url_to_file
|
| 23 |
-
import gradio as gr
|
| 24 |
-
import mediapy
|
| 25 |
-
from einops import rearrange
|
| 26 |
-
import shutil
|
| 27 |
-
from omegaconf import OmegaConf
|
| 28 |
-
|
| 29 |
-
logger = logging.getLogger(__name__)
|
| 30 |
-
|
| 31 |
-
# --- Caminhos Globais ---
|
| 32 |
-
APP_ROOT = Path("/home/user/app")
|
| 33 |
-
DEPS_DIR = APP_ROOT / "deps"
|
| 34 |
-
SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
|
| 35 |
-
SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B"
|
| 36 |
-
|
| 37 |
-
class SeedVrManager:
|
| 38 |
-
def __init__(self, workspace_dir="deformes_workspace"):
|
| 39 |
-
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 40 |
-
self.runner = None
|
| 41 |
-
self.workspace_dir = workspace_dir
|
| 42 |
-
self.is_initialized = False
|
| 43 |
-
self._original_barrier = None
|
| 44 |
-
self.setup_complete = False # Flag para rodar o setup apenas uma vez
|
| 45 |
-
logger.info("SeedVrManager initialized. Setup will run on first use.")
|
| 46 |
-
|
| 47 |
-
def _full_setup(self):
|
| 48 |
-
"""
|
| 49 |
-
Executa todo o processo de setup uma única vez.
|
| 50 |
-
"""
|
| 51 |
-
if self.setup_complete:
|
| 52 |
-
return
|
| 53 |
-
|
| 54 |
-
logger.info("--- Starting Full SeedVR Setup ---")
|
| 55 |
-
|
| 56 |
-
# 1. Clonar o repositório se não existir
|
| 57 |
-
if not SEEDVR_SPACE_DIR.exists():
|
| 58 |
-
logger.info(f"Cloning SeedVR Space repo to {SEEDVR_SPACE_DIR}...")
|
| 59 |
-
DEPS_DIR.mkdir(exist_ok=True, parents=True)
|
| 60 |
-
subprocess.run(
|
| 61 |
-
["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)],
|
| 62 |
-
check=True, capture_output=True, text=True
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
# 2. Copiar as pastas necessárias para a raiz da aplicação
|
| 66 |
-
required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b"]
|
| 67 |
-
for dirname in required_dirs:
|
| 68 |
-
source = SEEDVR_SPACE_DIR / dirname
|
| 69 |
-
target = APP_ROOT / dirname
|
| 70 |
-
if not target.exists():
|
| 71 |
-
logger.info(f"Copying '{dirname}' to application root...")
|
| 72 |
-
shutil.copytree(source, target)
|
| 73 |
-
|
| 74 |
-
# 3. Adicionar a raiz ao sys.path para garantir que os imports funcionem
|
| 75 |
-
if str(APP_ROOT) not in sys.path:
|
| 76 |
-
sys.path.insert(0, str(APP_ROOT))
|
| 77 |
-
logger.info(f"Added '{APP_ROOT}' to sys.path.")
|
| 78 |
-
|
| 79 |
-
# 4. Instalar dependências complexas como Apex
|
| 80 |
-
try:
|
| 81 |
-
import apex
|
| 82 |
-
logger.info("Apex is already installed.")
|
| 83 |
-
except ImportError:
|
| 84 |
-
logger.info("Installing Apex dependency...")
|
| 85 |
-
apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
|
| 86 |
-
apex_wheel_path = _load_file_from_url(url=apex_url, model_dir=str(DEPS_DIR))
|
| 87 |
-
subprocess.run(f"pip install {apex_wheel_path}", check=True, shell=True)
|
| 88 |
-
logger.info("Apex installed successfully.")
|
| 89 |
-
|
| 90 |
-
# 5. Baixar os modelos para a pasta ./ckpts na raiz
|
| 91 |
-
ckpt_dir = APP_ROOT / 'ckpts'
|
| 92 |
-
ckpt_dir.mkdir(exist_ok=True)
|
| 93 |
-
pretrain_model_urls = {
|
| 94 |
-
'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
|
| 95 |
-
'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
|
| 96 |
-
'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth',
|
| 97 |
-
'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
|
| 98 |
-
'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
|
| 99 |
-
}
|
| 100 |
-
for name, url in pretrain_model_urls.items():
|
| 101 |
-
_load_file_from_url(url=url, model_dir=str(ckpt_dir))
|
| 102 |
-
|
| 103 |
-
self.setup_complete = True
|
| 104 |
-
logger.info("--- Full SeedVR Setup Complete ---")
|
| 105 |
-
|
| 106 |
-
def _initialize_runner(self, model_version: str):
|
| 107 |
-
if self.runner is not None: return
|
| 108 |
-
|
| 109 |
-
# Garante que todo o ambiente está configurado antes de prosseguir
|
| 110 |
-
self._full_setup()
|
| 111 |
-
|
| 112 |
-
# Agora que o setup está feito, podemos importar os módulos
|
| 113 |
-
from projects.video_diffusion_sr.infer import VideoDiffusionInfer
|
| 114 |
-
from common.config import load_config
|
| 115 |
-
from common.seed import set_seed
|
| 116 |
-
|
| 117 |
-
if dist.is_available() and not dist.is_initialized():
|
| 118 |
-
os.environ["MASTER_ADDR"] = "127.0.0.1"
|
| 119 |
-
os.environ["MASTER_PORT"] = "12355"
|
| 120 |
-
os.environ["RANK"] = str(0)
|
| 121 |
-
os.environ["WORLD_SIZE"] = str(1)
|
| 122 |
-
dist.init_process_group(backend='gloo')
|
| 123 |
-
logger.info("Initialized torch.distributed process group.")
|
| 124 |
-
|
| 125 |
-
logger.info(f"Initializing SeedVR2 {model_version} runner...")
|
| 126 |
-
if model_version == '3B':
|
| 127 |
-
config_path = APP_ROOT / 'configs_3b' / 'main.yaml'
|
| 128 |
-
checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_3b.pth'
|
| 129 |
-
else: # Assumimos 7B
|
| 130 |
-
config_path = APP_ROOT / 'configs_7b' / 'main.yaml'
|
| 131 |
-
checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_7b.pth'
|
| 132 |
-
|
| 133 |
-
config = load_config(str(config_path))
|
| 134 |
-
|
| 135 |
-
self.runner = VideoDiffusionInfer(config)
|
| 136 |
-
OmegaConf.set_readonly(self.runner.config, False)
|
| 137 |
-
|
| 138 |
-
self.runner.configure_dit_model(device=self.device, checkpoint=str(checkpoint_path))
|
| 139 |
-
self.runner.configure_vae_model()
|
| 140 |
-
|
| 141 |
-
if hasattr(self.runner.vae, "set_memory_limit"):
|
| 142 |
-
self.runner.vae.set_memory_limit(**self.runner.config.vae.memory_limit)
|
| 143 |
-
|
| 144 |
-
self.is_initialized = True
|
| 145 |
-
logger.info(f"Runner for SeedVR2 {model_version} initialized and ready.")
|
| 146 |
-
|
| 147 |
-
def _unload_runner(self):
|
| 148 |
-
if self.runner is not None:
|
| 149 |
-
del self.runner
|
| 150 |
-
self.runner = None
|
| 151 |
-
gc.collect()
|
| 152 |
-
torch.cuda.empty_cache()
|
| 153 |
-
self.is_initialized = False
|
| 154 |
-
logger.info("Runner do SeedVR2 descarregado da VRAM.")
|
| 155 |
-
if dist.is_initialized():
|
| 156 |
-
dist.destroy_process_group()
|
| 157 |
-
logger.info("Destroyed torch.distributed process group.")
|
| 158 |
-
|
| 159 |
-
def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
|
| 160 |
-
model_version: str = '7B', steps: int = 100, seed: int = 666,
|
| 161 |
-
progress: gr.Progress = None) -> str:
|
| 162 |
-
try:
|
| 163 |
-
self._initialize_runner(model_version)
|
| 164 |
-
|
| 165 |
-
# Precisamos importar aqui, pois o sys.path é modificado no setup
|
| 166 |
-
from common.seed import set_seed
|
| 167 |
-
from data.image.transforms.divisible_crop import DivisibleCrop
|
| 168 |
-
from data.image.transforms.na_resize import NaResize
|
| 169 |
-
from data.video.transforms.rearrange import Rearrange
|
| 170 |
-
from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
|
| 171 |
-
from torchvision.transforms import Compose, Lambda, Normalize
|
| 172 |
-
from torchvision.io.video import read_video
|
| 173 |
-
|
| 174 |
-
set_seed(seed, same_across_ranks=True)
|
| 175 |
-
self.runner.config.diffusion.timesteps.sampling.steps = steps
|
| 176 |
-
self.runner.configure_diffusion()
|
| 177 |
-
|
| 178 |
-
video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0
|
| 179 |
-
res_h, res_w = video_tensor.shape[-2:]
|
| 180 |
-
video_transform = Compose([
|
| 181 |
-
NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
|
| 182 |
-
Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
|
| 183 |
-
DivisibleCrop((16, 16)),
|
| 184 |
-
Normalize(0.5, 0.5),
|
| 185 |
-
Rearrange("t c h w -> c t h w"),
|
| 186 |
-
])
|
| 187 |
-
cond_latents = [video_transform(video_tensor.to(self.device))]
|
| 188 |
-
input_videos = cond_latents
|
| 189 |
-
self.runner.dit.to("cpu")
|
| 190 |
-
self.runner.vae.to(self.device)
|
| 191 |
-
cond_latents = self.runner.vae_encode(cond_latents)
|
| 192 |
-
self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache()
|
| 193 |
-
self.runner.dit.to(self.device)
|
| 194 |
-
|
| 195 |
-
pos_emb = torch.load(APP_ROOT / 'pos_emb.pt').to(self.device)
|
| 196 |
-
neg_emb = torch.load(APP_ROOT / 'neg_emb.pt').to(self.device)
|
| 197 |
-
text_embeds_dict = {"texts_pos": [pos_emb], "texts_neg": [neg_emb]}
|
| 198 |
-
|
| 199 |
-
noises = [torch.randn_like(latent) for latent in cond_latents]
|
| 200 |
-
conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)]
|
| 201 |
-
|
| 202 |
-
with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
|
| 203 |
-
video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict)
|
| 204 |
-
|
| 205 |
-
self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache()
|
| 206 |
-
self.runner.vae.to(self.device)
|
| 207 |
-
samples = self.runner.vae_decode(video_tensors)
|
| 208 |
-
final_sample = samples[0]
|
| 209 |
-
input_video_sample = input_videos[0]
|
| 210 |
-
if final_sample.shape[1] < input_video_sample.shape[1]:
|
| 211 |
-
input_video_sample = input_video_sample[:, :final_sample.shape[1]]
|
| 212 |
-
|
| 213 |
-
final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w"))
|
| 214 |
-
final_sample = rearrange(final_sample, "t c h w -> t h w c")
|
| 215 |
-
final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round()
|
| 216 |
-
final_sample_np = final_sample.to(torch.uint8).cpu().numpy()
|
| 217 |
-
|
| 218 |
-
mediapy.write_video(output_video_path, final_sample_np, fps=24)
|
| 219 |
-
logger.info(f"HD Mastered video saved to: {output_video_path}")
|
| 220 |
-
return output_path
|
| 221 |
-
finally:
|
| 222 |
-
self._unload_runner()
|
| 223 |
-
|
| 224 |
-
def _load_file_from_url(url, model_dir='./', file_name=None):
|
| 225 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 226 |
-
filename = file_name or os.path.basename(urlparse(url).path)
|
| 227 |
-
cached_file = os.path.abspath(os.path.join(model_dir, filename))
|
| 228 |
-
if not os.path.exists(cached_file):
|
| 229 |
-
logger.info(f'Downloading: "{url}" to {cached_file}')
|
| 230 |
-
download_url_to_file(url, cached_file, hash_prefix=None, progress=True)
|
| 231 |
-
return cached_file
|
| 232 |
-
|
| 233 |
-
seedvr_manager_singleton = SeedVrManager()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/upscaler_specialist.py
DELETED
|
@@ -1,91 +0,0 @@
|
|
| 1 |
-
# upscaler_specialist.py
|
| 2 |
-
# Copyright (C) 2025 Carlos Rodrigues
|
| 3 |
-
# Especialista ADUC para upscaling espacial de tensores latentes.
|
| 4 |
-
|
| 5 |
-
import torch
|
| 6 |
-
import logging
|
| 7 |
-
from diffusers import LTXLatentUpsamplePipeline
|
| 8 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 9 |
-
|
| 10 |
-
logger = logging.getLogger(__name__)
|
| 11 |
-
|
| 12 |
-
class UpscalerSpecialist:
|
| 13 |
-
"""
|
| 14 |
-
Especialista responsável por aumentar a resolução espacial de tensores latentes
|
| 15 |
-
usando o LTX Video Spatial Upscaler.
|
| 16 |
-
"""
|
| 17 |
-
def __init__(self):
|
| 18 |
-
# Força uso de CUDA se disponível
|
| 19 |
-
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 20 |
-
self.base_vae = None
|
| 21 |
-
self.pipe_upsample = None
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def _lazy_init(self):
|
| 25 |
-
try:
|
| 26 |
-
# Tenta usar o VAE do ltx_manager
|
| 27 |
-
if ltx_manager_singleton.workers:
|
| 28 |
-
candidate_vae = ltx_manager_singleton.workers[0].pipeline.vae
|
| 29 |
-
if candidate_vae.__class__.__name__ == "AutoencoderKLLTXVideo":
|
| 30 |
-
self.base_vae = candidate_vae
|
| 31 |
-
logger.info("[Upscaler] Usando VAE do ltx_manager (AutoencoderKLLTXVideo).")
|
| 32 |
-
else:
|
| 33 |
-
logger.warning(f"[Upscaler] VAE incompatível: {type(candidate_vae)}. "
|
| 34 |
-
"Carregando AutoencoderKLLTXVideo manualmente...")
|
| 35 |
-
from diffusers.models.autoencoders import AutoencoderKLLTXVideo
|
| 36 |
-
self.base_vae = AutoencoderKLLTXVideo.from_pretrained(
|
| 37 |
-
"linoyts/LTX-Video-spatial-upscaler-0.9.8",
|
| 38 |
-
subfolder="vae",
|
| 39 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
| 40 |
-
).to(self.device)
|
| 41 |
-
else:
|
| 42 |
-
logger.warning("[Upscaler] Nenhum worker disponível, carregando VAE manualmente...")
|
| 43 |
-
from diffusers.models.autoencoders import AutoencoderKLLTXVideo
|
| 44 |
-
self.base_vae = AutoencoderKLLTXVideo.from_pretrained(
|
| 45 |
-
"linoyts/LTX-Video-spatial-upscaler-0.9.8",
|
| 46 |
-
subfolder="vae",
|
| 47 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
| 48 |
-
).to(self.device)
|
| 49 |
-
|
| 50 |
-
# Carregar pipeline
|
| 51 |
-
self.pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
|
| 52 |
-
"linoyts/LTX-Video-spatial-upscaler-0.9.8",
|
| 53 |
-
vae=self.base_vae,
|
| 54 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
| 55 |
-
).to(self.device)
|
| 56 |
-
|
| 57 |
-
logger.info("[Upscaler] Pipeline carregado com sucesso.")
|
| 58 |
-
|
| 59 |
-
except Exception as e:
|
| 60 |
-
logger.error(f"[Upscaler] Falha ao carregar pipeline: {e}")
|
| 61 |
-
self.pipe_upsample = None
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
@torch.no_grad()
|
| 66 |
-
def upscale(self, latents: torch.Tensor) -> torch.Tensor:
|
| 67 |
-
"""Aplica o upscaling 2x nos tensores latentes fornecidos."""
|
| 68 |
-
self._lazy_init()
|
| 69 |
-
if self.pipe_upsample is None:
|
| 70 |
-
logger.warning("[Upscaler] Pipeline indisponível. Retornando latentes originais.")
|
| 71 |
-
return latents
|
| 72 |
-
|
| 73 |
-
try:
|
| 74 |
-
logger.info(f"[Upscaler] Recebido shape {latents.shape}. Executando upscale em {self.device}...")
|
| 75 |
-
|
| 76 |
-
# [CORREÇÃO FINAL] Conforme a documentação oficial, o resultado está em .frames
|
| 77 |
-
result = self.pipe_upsample(latents=latents, output_type="latent")
|
| 78 |
-
output_tensor = result.frames
|
| 79 |
-
|
| 80 |
-
logger.info(f"[Upscaler] Upscale concluído. Novo shape: {output_tensor.shape}")
|
| 81 |
-
return output_tensor
|
| 82 |
-
|
| 83 |
-
except Exception as e:
|
| 84 |
-
logger.error(f"[Upscaler] Erro durante upscale: {e}", exc_info=True)
|
| 85 |
-
return latents
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
# ---------------------------
|
| 89 |
-
# Singleton global
|
| 90 |
-
# ---------------------------
|
| 91 |
-
upscaler_specialist_singleton = UpscalerSpecialist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
managers/vae_manager.py
DELETED
|
@@ -1,99 +0,0 @@
|
|
| 1 |
-
# managers/vae_manager.py
|
| 2 |
-
# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 3 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 4 |
-
#
|
| 5 |
-
# Contato:
|
| 6 |
-
# Carlos Rodrigues dos Santos
|
| 7 |
-
# carlex22@gmail.com
|
| 8 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 9 |
-
#
|
| 10 |
-
# Repositórios e Projetos Relacionados:
|
| 11 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 12 |
-
#
|
| 13 |
-
# PENDING PATENT NOTICE: Please see NOTICE.md.
|
| 14 |
-
#
|
| 15 |
-
#
|
| 16 |
-
# This file defines the VaeManager specialist. Its purpose is to abstract all
|
| 17 |
-
# direct interactions with the Variational Autoencoder (VAE) model. It handles
|
| 18 |
-
# the model's state (CPU/GPU memory), provides clean interfaces for encoding and
|
| 19 |
-
# decoding, and ensures that the heavy VAE model only occupies VRAM when actively
|
| 20 |
-
# performing a task, freeing up resources for other specialists.
|
| 21 |
-
#
|
| 22 |
-
# Version 1.0.1
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
import torch
|
| 26 |
-
import logging
|
| 27 |
-
import gc
|
| 28 |
-
from typing import Generator
|
| 29 |
-
|
| 30 |
-
# Import the source of the VAE model and the low-level functions
|
| 31 |
-
from managers.ltx_manager import ltx_manager_singleton
|
| 32 |
-
from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
|
| 33 |
-
|
| 34 |
-
logger = logging.getLogger(__name__)
|
| 35 |
-
|
| 36 |
-
class VaeManager:
|
| 37 |
-
"""
|
| 38 |
-
A specialist for managing the LTX VAE model. It provides high-level methods
|
| 39 |
-
for encoding pixels to latents and decoding latents to pixels, while managing
|
| 40 |
-
the model's presence on the GPU to conserve VRAM.
|
| 41 |
-
"""
|
| 42 |
-
def __init__(self, vae_model):
|
| 43 |
-
self.vae = vae_model
|
| 44 |
-
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 45 |
-
self.cpu_device = torch.device('cpu')
|
| 46 |
-
|
| 47 |
-
# Initialize the VAE on the CPU to keep VRAM free at startup
|
| 48 |
-
self.vae.to(self.cpu_device)
|
| 49 |
-
logger.info(f"VaeManager initialized. VAE model is on CPU.")
|
| 50 |
-
|
| 51 |
-
def to_gpu(self):
|
| 52 |
-
"""Moves the VAE model to the active GPU."""
|
| 53 |
-
if self.device == 'cpu': return
|
| 54 |
-
logger.info("VaeManager: Moving VAE to GPU...")
|
| 55 |
-
self.vae.to(self.device)
|
| 56 |
-
|
| 57 |
-
def to_cpu(self):
|
| 58 |
-
"""Moves the VAE model to the CPU and clears VRAM cache."""
|
| 59 |
-
if self.device == 'cpu': return
|
| 60 |
-
logger.info("VaeManager: Unloading VAE from GPU...")
|
| 61 |
-
self.vae.to(self.cpu_device)
|
| 62 |
-
gc.collect()
|
| 63 |
-
if torch.cuda.is_available():
|
| 64 |
-
torch.cuda.empty_cache()
|
| 65 |
-
|
| 66 |
-
@torch.no_grad()
|
| 67 |
-
def encode(self, pixel_tensor: torch.Tensor) -> torch.Tensor:
|
| 68 |
-
"""
|
| 69 |
-
Encodes a pixel-space tensor to the latent space.
|
| 70 |
-
Manages moving the VAE to and from the GPU.
|
| 71 |
-
"""
|
| 72 |
-
try:
|
| 73 |
-
self.to_gpu()
|
| 74 |
-
pixel_tensor = pixel_tensor.to(self.device, dtype=self.vae.dtype)
|
| 75 |
-
latents = vae_encode(pixel_tensor, self.vae, vae_per_channel_normalize=True)
|
| 76 |
-
return latents.to(self.cpu_device) # Return to CPU to free VRAM
|
| 77 |
-
finally:
|
| 78 |
-
self.to_cpu()
|
| 79 |
-
|
| 80 |
-
@torch.no_grad()
|
| 81 |
-
def decode(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
|
| 82 |
-
"""
|
| 83 |
-
Decodes a latent-space tensor to pixels.
|
| 84 |
-
Manages moving the VAE to and from the GPU.
|
| 85 |
-
"""
|
| 86 |
-
try:
|
| 87 |
-
self.to_gpu()
|
| 88 |
-
latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
|
| 89 |
-
timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
|
| 90 |
-
pixels = vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
|
| 91 |
-
return pixels.to(self.cpu_device) # Return to CPU to free VRAM
|
| 92 |
-
finally:
|
| 93 |
-
self.to_cpu()
|
| 94 |
-
|
| 95 |
-
# --- Singleton Instance ---
|
| 96 |
-
# The VaeManager must use the exact same VAE instance as the LTX pipeline to ensure
|
| 97 |
-
# latent space compatibility. We source it directly from the already-initialized ltx_manager.
|
| 98 |
-
source_vae_model = ltx_manager_singleton.workers[0].pipeline.vae
|
| 99 |
-
vae_manager_singleton = VaeManager(source_vae_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/LICENSE
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
# Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
|
| 2 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 3 |
-
#
|
| 4 |
-
# Contato:
|
| 5 |
-
# Carlos Rodrigues dos Santos
|
| 6 |
-
# carlex22@gmail.com
|
| 7 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 8 |
-
#
|
| 9 |
-
# Repositórios e Projetos Relacionados:
|
| 10 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 11 |
-
# Hugging Face (Ltx-SuperTime-60Secondos): https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/
|
| 12 |
-
# Hugging Face (Novinho): https://huggingface.co/spaces/Carlexxx/Novinho/
|
| 13 |
-
#
|
| 14 |
-
# This program is free software: you can redistribute it and/or modify
|
| 15 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 16 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 17 |
-
# (at your option) any later version.
|
| 18 |
-
#
|
| 19 |
-
# This program is distributed in the hope that it will be useful,
|
| 20 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
-
# GNU Affero General Public License for more details.
|
| 23 |
-
#
|
| 24 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 25 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/NOTICE.md
DELETED
|
@@ -1,76 +0,0 @@
|
|
| 1 |
-
# NOTICE
|
| 2 |
-
|
| 3 |
-
Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## Aviso de Propriedade Intelectual e Licenciamento
|
| 8 |
-
|
| 9 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 10 |
-
|
| 11 |
-
O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
|
| 12 |
-
|
| 13 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
|
| 14 |
-
|
| 15 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 16 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 17 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 18 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 19 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 20 |
-
|
| 21 |
-
### **Reconhecimento e Implicações (EM PORTUGUÊS):**
|
| 22 |
-
|
| 23 |
-
Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
|
| 24 |
-
|
| 25 |
-
1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
|
| 26 |
-
2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
|
| 27 |
-
3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
|
| 28 |
-
|
| 29 |
-
---
|
| 30 |
-
|
| 31 |
-
### **Patent Pending (IN ENGLISH):**
|
| 32 |
-
|
| 33 |
-
The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
|
| 34 |
-
|
| 35 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 36 |
-
|
| 37 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 38 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 39 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 40 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 41 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 42 |
-
|
| 43 |
-
### **Acknowledgement and Implications (IN ENGLISH):**
|
| 44 |
-
|
| 45 |
-
By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
|
| 46 |
-
|
| 47 |
-
1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
|
| 48 |
-
2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
|
| 49 |
-
3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
|
| 50 |
-
|
| 51 |
-
---
|
| 52 |
-
|
| 53 |
-
## Licença AGPLv3
|
| 54 |
-
|
| 55 |
-
This program is free software: you can redistribute it and/or modify
|
| 56 |
-
it under the terms of the GNU Affero General Public License as published by
|
| 57 |
-
the Free Software Foundation, either version 3 of the License, or
|
| 58 |
-
(at your option) any later version.
|
| 59 |
-
|
| 60 |
-
This program is distributed in the hope that it will be useful,
|
| 61 |
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 62 |
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 63 |
-
GNU Affero General Public License for more details.
|
| 64 |
-
|
| 65 |
-
You should have received a copy of the GNU Affero General Public License
|
| 66 |
-
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 67 |
-
|
| 68 |
-
---
|
| 69 |
-
|
| 70 |
-
**Contato para Consultas:**
|
| 71 |
-
|
| 72 |
-
Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
|
| 73 |
-
|
| 74 |
-
Carlos Rodrigues dos Santos
|
| 75 |
-
carlex22@gmail.com
|
| 76 |
-
Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/README.md
DELETED
|
@@ -1,211 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Euia-AducSdr
|
| 3 |
-
emoji: 🎥
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: purple
|
| 6 |
-
sdk: gradio
|
| 7 |
-
app_file: app.py
|
| 8 |
-
pinned: true
|
| 9 |
-
license: agpl-3.0
|
| 10 |
-
short_description: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
### 🇧🇷 Português
|
| 15 |
-
|
| 16 |
-
Uma implementação aberta e funcional da arquitetura ADUC-SDR (Arquitetura de Unificação Compositiva - Escala Dinâmica e Resiliente), projetada para a geração de vídeo coerente de longa duração. Este projeto materializa os princípios de fragmentação, navegação geométrica e um mecanismo de "eco causal 4bits memoria" para garantir a continuidade física e narrativa em sequências de vídeo geradas por múltiplos modelos de IA.
|
| 17 |
-
|
| 18 |
-
**Licença:** Este projeto é licenciado sob os termos da **GNU Affero General Public License v3.0**. Isto significa que se você usar este software (ou qualquer trabalho derivado) para fornecer um serviço através de uma rede, você é **obrigado a disponibilizar o código-fonte completo** da sua versão para os usuários desse serviço.
|
| 19 |
-
|
| 20 |
-
- **Copyright (C) 4 de Agosto de 2025, Carlos Rodrigues dos Santos**
|
| 21 |
-
- Uma cópia completa da licença pode ser encontrada no arquivo [LICENSE](LICENSE).
|
| 22 |
-
|
| 23 |
-
---
|
| 24 |
-
|
| 25 |
-
### 🇬🇧 English
|
| 26 |
-
|
| 27 |
-
An open and functional implementation of the ADUC-SDR (Architecture for Compositive Unification - Dynamic and Resilient Scaling) architecture, designed for long-form coherent video generation. This project materializes the principles of fragmentation, geometric navigation, and a "causal echo 4bits memori" mechanism to ensure physical and narrative continuity in video sequences generated by multiple AI models.
|
| 28 |
-
|
| 29 |
-
**License:** This project is licensed under the terms of the **GNU Affero General Public License v3.0**. This means that if you use this software (or any derivative work) to provide a service over a network, you are **required to make the complete source code** of your version available to the users of that service.
|
| 30 |
-
|
| 31 |
-
- **Copyright (C) August 4, 2025, Carlos Rodrigues dos Santos**
|
| 32 |
-
- A full copy of the license can be found in the [LICENSE](LICENSE) file.
|
| 33 |
-
|
| 34 |
-
---
|
| 35 |
-
|
| 36 |
-
## **Aviso de Propriedade Intelectual e Patenteamento**
|
| 37 |
-
|
| 38 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 39 |
-
|
| 40 |
-
A arquitetura e o método **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste projeto e nas reivindicações associadas, estão **atualmente em processo de patenteamento**.
|
| 41 |
-
|
| 42 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, que incluem, mas não se limitam a:
|
| 43 |
-
|
| 44 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 45 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 46 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 47 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 48 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 49 |
-
|
| 50 |
-
Ao utilizar este software e a arquitetura ADUC aqui implementada, você reconhece a natureza inovadora desta arquitetura e que a **reprodução ou exploração da lógica central da ADUC em sistemas independentes pode infringir direitos de patente pendente.**
|
| 51 |
-
|
| 52 |
-
---
|
| 53 |
-
|
| 54 |
-
### **Patent Pending (IN ENGLISH):**
|
| 55 |
-
|
| 56 |
-
The **ADUC (Automated Discovery and Orchestration of Complex tasks)** architecture and method, as described in this project and its associated claims, are **currently in the process of being patented.**
|
| 57 |
-
|
| 58 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 59 |
-
|
| 60 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 61 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 62 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 63 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 64 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 65 |
-
|
| 66 |
-
By using this software and the ADUC architecture implemented herein, you acknowledge the innovative nature of this architecture and that **the reproduction or exploitation of ADUC's core logic in independent systems may infringe upon pending patent rights.**
|
| 67 |
-
|
| 68 |
-
---
|
| 69 |
-
|
| 70 |
-
### Detalhes Técnicos e Reivindicações da ADUC
|
| 71 |
-
|
| 72 |
-
#### 🇧🇷 Definição Curta (para Tese e Patente)
|
| 73 |
-
|
| 74 |
-
**ADUC** é um *framework pré-input* e *intermediário* de **gerenciamento de prompts** que:
|
| 75 |
-
|
| 76 |
-
1. **fragmenta** solicitações acima do limite de contexto de qualquer modelo,
|
| 77 |
-
2. **escala linearmente** (processo sequencial com memória persistida),
|
| 78 |
-
3. **distribui** sub-tarefas a **especialistas** (modelos/ferramentas heterogêneos), e
|
| 79 |
-
4. **realimenta** a próxima etapa com avaliação do que foi feito/esperado (LLM diretor).
|
| 80 |
-
|
| 81 |
-
Não é um modelo; é uma **camada orquestradora** plugável antes do input de modelos existentes (texto, imagem, áudio, vídeo), usando *tokens universais* e a tecnologia atual.
|
| 82 |
-
|
| 83 |
-
#### 🇬🇧 Short Definition (for Thesis and Patent)
|
| 84 |
-
|
| 85 |
-
**ADUC** is a *pre-input* and *intermediate* **prompt management framework** that:
|
| 86 |
-
|
| 87 |
-
1. **fragments** requests exceeding any model's context limit,
|
| 88 |
-
2. **scales linearly** (sequential process with persisted memory),
|
| 89 |
-
3. **distributes** sub-tasks to **specialists** (heterogeneous models/tools), and
|
| 90 |
-
4. **feeds back** to the next step with an evaluation of what was done/expected (director LLM).
|
| 91 |
-
|
| 92 |
-
It is not a model; it is a pluggable **orchestration layer** before the input of existing models (text, image, audio, video), using *universal tokens* and current technology.
|
| 93 |
-
|
| 94 |
-
---
|
| 95 |
-
|
| 96 |
-
#### 🇧🇷 Elementos Essenciais (Telegráfico)
|
| 97 |
-
|
| 98 |
-
* **Agnóstico a modelos:** opera com qualquer LLM/difusor/API.
|
| 99 |
-
* **Pré-input manager:** recebe pedido do usuário, **divide** em blocos ≤ limite de tokens, **prioriza**, **agenda** e **roteia**.
|
| 100 |
-
* **Memória persistida:** resultados/latentes/“eco” viram **estado compartilhado** para o próximo bloco (nada é ignorado).
|
| 101 |
-
* **Especialistas:** *routers* decidem quem faz o quê (ex.: “descrição → LLM-A”, “keyframe → Img-B”, “vídeo → Vid-C”).
|
| 102 |
-
* **Controle de qualidade:** LLM diretor compara *o que fez* × *o que deveria* × *o que falta* e **regenera objetivos** do próximo fragmento.
|
| 103 |
-
* **Custo/latência-aware:** planeja pela **VRAM/tempo/custo**, não tenta “abraçar tudo de uma vez”.
|
| 104 |
-
|
| 105 |
-
#### 🇬🇧 Essential Elements (Telegraphic)
|
| 106 |
-
|
| 107 |
-
* **Model-agnostic:** operates with any LLM/diffuser/API.
|
| 108 |
-
* **Pre-input manager:** receives user request, **divides** into blocks ≤ token limit, **prioritizes**, **schedules**, and **routes**.
|
| 109 |
-
* **Persisted memory:** results/latents/“echo” become **shared state** for the next block (nothing is ignored).
|
| 110 |
-
* **Specialists:** *routers* decide who does what (e.g., “description → LLM-A”, “keyframe → Img-B”, “video → Vid-C”).
|
| 111 |
-
* **Quality control:** director LLM compares *what was done* × *what should be done* × *what is missing* and **regenerates objectives** for the next fragment.
|
| 112 |
-
* **Cost/latency-aware:** plans by **VRAM/time/cost**, does not try to “embrace everything at once”.
|
| 113 |
-
|
| 114 |
-
---
|
| 115 |
-
|
| 116 |
-
#### 🇧🇷 Reivindicações Independentes (Método e Sistema)
|
| 117 |
-
|
| 118 |
-
**Reivindicação Independente (Método) — Versão Enxuta:**
|
| 119 |
-
|
| 120 |
-
1. **Método** de **orquestração de prompts** para execução de tarefas acima do limite de contexto de modelos de IA, compreendendo:
|
| 121 |
-
(a) **receber** uma solicitação que excede um limite de tokens;
|
| 122 |
-
(b) **analisar** a solicitação por um **LLM diretor** e **fragmentá-la** em sub-tarefas ≤ limite;
|
| 123 |
-
(c) **selecionar** especialistas de execução para cada sub-tarefa com base em capacidades declaradas;
|
| 124 |
-
(d) **gerar** prompts específicos por sub-tarefa em **tokens universais**, incluindo referências ao **estado persistido** de execuções anteriores;
|
| 125 |
-
(e) **executar sequencialmente** as sub-tarefas e **persistir** suas saídas como memória (incluindo latentes/eco/artefatos);
|
| 126 |
-
(f) **avaliar** automaticamente a saída versus metas declaradas e **regenerar objetivos** do próximo fragmento;
|
| 127 |
-
(g) **iterar** (b)–(f) até que os critérios de completude sejam atendidos, produzindo o resultado agregado;
|
| 128 |
-
em que o framework **escala linearmente** no tempo e armazenamento físico, **independente** da janela de contexto dos modelos subjacentes.
|
| 129 |
-
|
| 130 |
-
**Reivindicação Independente (Sistema):**
|
| 131 |
-
|
| 132 |
-
2. **Sistema** de orquestração de prompts, compreendendo: um **planejador LLM diretor**; um **roteador de especialistas**; um **banco de estado persistido** (incl. memória cinética para vídeo); um **gerador de prompts universais**; e um **módulo de avaliação/realimentação**, acoplados por uma **API pré-input** a modelos heterogêneos.
|
| 133 |
-
|
| 134 |
-
#### 🇬🇧 Independent Claims (Method and System)
|
| 135 |
-
|
| 136 |
-
**Independent Claim (Method) — Concise Version:**
|
| 137 |
-
|
| 138 |
-
1. A **method** for **prompt orchestration** for executing tasks exceeding AI model context limits, comprising:
|
| 139 |
-
(a) **receiving** a request that exceeds a token limit;
|
| 140 |
-
(b) **analyzing** the request by a **director LLM** and **fragmenting it** into sub-tasks ≤ the limit;
|
| 141 |
-
(c) **selecting** execution specialists for each sub-task based on declared capabilities;
|
| 142 |
-
(d) **generating** specific prompts per sub-task in **universal tokens**, including references to the **persisted state** of previous executions;
|
| 143 |
-
(e) **sequentially executing** the sub-tasks and **persisting** their outputs as memory (including latents/echo/artifacts);
|
| 144 |
-
(f) **automatically evaluating** the output against declared goals and **regenerating objectives** for the next fragment;
|
| 145 |
-
(g) **iterating** (b)–(f) until completion criteria are met, producing the aggregated result;
|
| 146 |
-
wherein the framework **scales linearly** in time and physical storage, **independent** of the context window of the underlying models.
|
| 147 |
-
|
| 148 |
-
**Independent Claim (System):**
|
| 149 |
-
|
| 150 |
-
2. A prompt orchestration **system**, comprising: a **director LLM planner**; a **specialist router**; a **persisted state bank** (incl. kinetic memory for video); a **universal prompt generator**; and an **evaluation/feedback module**, coupled via a **pre-input API** to heterogeneous models.
|
| 151 |
-
|
| 152 |
-
---
|
| 153 |
-
|
| 154 |
-
#### 🇧🇷 Dependentes Úteis
|
| 155 |
-
|
| 156 |
-
* (3) Onde o roteamento considera **custo/latência/VRAM** e metas de qualidade.
|
| 157 |
-
* (4) Onde o banco de estado inclui **eco cinético** para vídeo (últimos *n* frames/latentes/fluxo).
|
| 158 |
-
* (5) Onde a avaliação usa métricas específicas por domínio (Lflow, consistência semântica, etc.).
|
| 159 |
-
* (6) Onde *tokens universais* padronizam instruções entre especialistas.
|
| 160 |
-
* (7) Onde a orquestração decide **cut vs continuous** e **corte regenerativo** (Déjà-Vu) ao editar vídeo.
|
| 161 |
-
* (8) Onde o sistema **nunca descarta** conteúdo excedente: **reagenda** em novos fragmentos.
|
| 162 |
-
|
| 163 |
-
#### 🇬🇧 Useful Dependents
|
| 164 |
-
|
| 165 |
-
* (3) Wherein routing considers **cost/latency/VRAM** and quality goals.
|
| 166 |
-
* (4) Wherein the state bank includes **kinetic echo** for video (last *n* frames/latents/flow).
|
| 167 |
-
* (5) Wherein evaluation uses domain-specific metrics (Lflow, semantic consistency, etc.).
|
| 168 |
-
* (6) Wherein *universal tokens* standardize instructions between specialists.
|
| 169 |
-
* (7) Wherein orchestration decides **cut vs continuous** and **regenerative cut** (Déjà-Vu) when editing video.
|
| 170 |
-
* (8) Wherein the system **never discards** excess content: it **reschedules** it in new fragments.
|
| 171 |
-
|
| 172 |
-
---
|
| 173 |
-
|
| 174 |
-
#### 🇧🇷 Como isso conversa com SDR (Vídeo)
|
| 175 |
-
|
| 176 |
-
* **Eco Cinético**: é um **tipo de estado persistido** consumido pelo próximo passo.
|
| 177 |
-
* **Déjà-Vu (Corte Regenerativo)**: é **uma política de orquestração** aplicada quando há edição; ADUC decide, monta os prompts certos e chama o especialista de vídeo.
|
| 178 |
-
* **Cut vs Continuous**: decisão do **diretor** com base em estado + metas; ADUC roteia e garante a sobreposição/remoção final.
|
| 179 |
-
|
| 180 |
-
#### 🇬🇧 How this Converses with SDR (Video)
|
| 181 |
-
|
| 182 |
-
* **Kinetic Echo**: is a **type of persisted state** consumed by the next step.
|
| 183 |
-
* **Déjà-Vu (Regenerative Cut)**: is an **orchestration policy** applied during editing; ADUC decides, crafts the right prompts, and calls the video specialist.
|
| 184 |
-
* **Cut vs Continuous**: decision made by the **director** based on state + goals; ADUC routes and ensures the final overlap/removal.
|
| 185 |
-
|
| 186 |
-
---
|
| 187 |
-
|
| 188 |
-
#### 🇧🇷 Mensagem Clara ao Usuário (Experiência)
|
| 189 |
-
|
| 190 |
-
> “Seu pedido excede o limite X do modelo Y. Em vez de truncar silenciosamente, o **ADUC** dividirá e **entregará 100%** do conteúdo por etapas coordenadas.”
|
| 191 |
-
|
| 192 |
-
Isso é diferencial prático e jurídico: **não-obviedade** por transformar limite de contexto em **pipeline controlado**, com **persistência de estado** e **avaliação iterativa**.
|
| 193 |
-
|
| 194 |
-
#### 🇬🇧 Clear User Message (Experience)
|
| 195 |
-
|
| 196 |
-
> "Your request exceeds model Y's limit X. Instead of silently truncating, **ADUC** will divide and **deliver 100%** of the content through coordinated steps."
|
| 197 |
-
|
| 198 |
-
This is a practical and legal differentiator: **non-obviousness** by transforming context limits into a **controlled pipeline**, with **state persistence** and **iterative evaluation**.
|
| 199 |
-
|
| 200 |
-
---
|
| 201 |
-
|
| 202 |
-
### Contact / Contato / Contacto
|
| 203 |
-
|
| 204 |
-
- **Author / Autor:** Carlos Rodrigues dos Santos
|
| 205 |
-
- **Email:** carlex22@gmail.com
|
| 206 |
-
- **GitHub:** [https://github.com/carlex22/Aduc-sdr](https://github.com/carlex22/Aduc-sdr)
|
| 207 |
-
- **Hugging Face Spaces:**
|
| 208 |
-
- [Ltx-SuperTime-60Secondos](https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/)
|
| 209 |
-
- [Novinho](https://huggingface.co/spaces/Carlexxx/Novinho/)
|
| 210 |
-
|
| 211 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/anticipatory_keyframe_prompt.txt
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Cinematographer and Storyboard Artist
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
Your task is to generate a single, descriptive prompt for an image generation model (Flux). This prompt must describe a keyframe that serves as a perfect visual transition BETWEEN a current scene and a future scene. You must see what you just did, where you are, and where you are preparing to go.
|
| 5 |
-
|
| 6 |
-
# CRITICAL DIRECTIVES:
|
| 7 |
-
1. **SYNTHESIZE, DON'T DESCRIBE:** Do not simply describe the "Current Scene" or the "Future Scene". Your prompt must create a visual concept that exists *in the moment between them*. It's the "in-between" frame.
|
| 8 |
-
|
| 9 |
-
2. **VISUAL ANCHORING:** The primary visual canvas is the "Current Base Image" (`[IMG-BASE]`). Your generated prompt should describe an evolution FROM this image. Maintain its environment and characters unless the narrative arc demands a change.
|
| 10 |
-
|
| 11 |
-
3. **NARRATIVE FORESHADOWING:** The prompt must contain visual elements that hint at or prepare for the "Future Scene". If the future scene is "the chicken climbs the sidewalk", your prompt for the current scene ("the chicken crosses the road") might be "the chicken, halfway across the road, lifts its head and looks towards the curb of the sidewalk".
|
| 12 |
-
|
| 13 |
-
4. **LEARN FROM THE PAST:** Analyze the "Previous Prompt" to understand the creative choices that led to the "Current Base Image". Maintain that stylistic and narrative trajectory.
|
| 14 |
-
|
| 15 |
-
5. **REFERENCE POOL:** Use the "General Reference Images" (`[IMG-REF-X]`) to maintain the identity and style of key subjects throughout the sequence.
|
| 16 |
-
|
| 17 |
-
# CONTEXT FOR YOUR DECISION:
|
| 18 |
-
- **Previous Prompt (What I thought to create the current image):**
|
| 19 |
-
{historico_prompt}
|
| 20 |
-
|
| 21 |
-
- **Current Scene (Where I am now):** "{cena_atual}"
|
| 22 |
-
- **Future Scene (Where I am going next):** "{cena_futura}"
|
| 23 |
-
|
| 24 |
-
# VISUAL ASSETS:
|
| 25 |
-
# [The "Current Base Image" will be tagged as [IMG-BASE].]
|
| 26 |
-
# [The "General Reference Images" will be tagged as [IMG-REF-1], [IMG-REF-2], etc.]
|
| 27 |
-
|
| 28 |
-
# RESPONSE FORMAT:
|
| 29 |
-
Respond with ONLY the final, single-line prompt string for the image generator.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/audio_director_prompt.txt
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Audio Director and Sound Designer
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
Analyze the provided film script/storyboard. Based on the overall narrative and mood, generate two distinct prompts for audio generation: one for a background music score and one for ambient sound effects (SFX).
|
| 5 |
-
|
| 6 |
-
# INSTRUCTIONS:
|
| 7 |
-
1. **Analyze the Story:** Read the "Global Idea" and the "Scene Storyboard" to understand the plot, pacing, and emotional tone of the film.
|
| 8 |
-
2. **Create Music Prompt:** Write a concise, descriptive prompt for a music generation model (like MusicGen). Focus on genre, mood, instruments
|
| 9 |
-
3. **Create SFX Prompt:** Write a concise, descriptive prompt for an audio generation model (like AudioLDM2). Focus on ambient sounds and key effects that match the scenes.
|
| 10 |
-
4. **Output Format:** You MUST respond with a single, clean JSON object with exactly two keys: "music_prompt" and "sfx_prompt".
|
| 11 |
-
|
| 12 |
-
# == PROVIDED CONTEXT ==
|
| 13 |
-
- **Global Idea:** "{global_prompt}"
|
| 14 |
-
- **Scene Storyboard:**
|
| 15 |
-
{storyboard_str}
|
| 16 |
-
|
| 17 |
-
# == YOUR TASK ==
|
| 18 |
-
# Generate the JSON response with the two audio prompts.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/cinematic_director_prompt.txt
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Cinematic Scenarist
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
# Your single, crucial task is to write a rich, cinematic motion prompt.
|
| 5 |
-
# This prompt must describe the most logical and compelling action that
|
| 6 |
-
# connects the PRESENT visual state to the FUTURE visual state, considering
|
| 7 |
-
# the context of the PAST.
|
| 8 |
-
|
| 9 |
-
# CONTEXT FOR YOUR PROMPT:
|
| 10 |
-
- Global Story Goal: {global_prompt}
|
| 11 |
-
- Creative History: {story_history}
|
| 12 |
-
- The Past: "{past_scene_desc}" -> [PAST_IMAGE]
|
| 13 |
-
- The Present: "{present_scene_desc}" -> [PRESENT_IMAGE]
|
| 14 |
-
- The Future: "{future_scene_desc}" -> [FUTURE_IMAGE]
|
| 15 |
-
|
| 16 |
-
# CRITICAL PROMPT DIRECTIVES:
|
| 17 |
-
# 1. ALWAYS DESCRIBE MOTION: The scene must not be static.
|
| 18 |
-
# 2. STYLE: Be descriptive, cinematic, and direct.
|
| 19 |
-
# 3. STRUCTURE: In a single paragraph (under 150 words), describe the scene's
|
| 20 |
-
# motion, prioritizing in this EXACT order:
|
| 21 |
-
# a. Actors/Animals: What are they doing?
|
| 22 |
-
# b. Objects: How do they interact?
|
| 23 |
-
# c. Camera: How is it moving?
|
| 24 |
-
# d. Scenery/Environment: What details add to the mood?
|
| 25 |
-
|
| 26 |
-
# RESPONSE FORMAT:
|
| 27 |
-
# You MUST respond with ONLY the raw, single-line string for the motion prompt.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/director_composition_prompt.txt
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Animation Director (Key Pose)
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
Generate a single, powerful, CLIP-style prompt to create the NEXT keyframe in a sequence. Your goal is to describe a logical and visually coherent evolution FROM the last generated image.
|
| 5 |
-
|
| 6 |
-
# CRITICAL DIRECTIVES:
|
| 7 |
-
1. **ANCHOR TO THE PREVIOUS SCENE:** The last generated image, tagged as `[IMG-1]`, represents the END of the previous scene. Your new prompt MUST describe what happens IMMEDIATELY AFTER. Treat `[IMG-1]` as your primary visual and environmental canvas.
|
| 8 |
-
|
| 9 |
-
2. **EVOLVE, DO NOT REPLACE:** Unless the "Current Scene Description" explicitly describes a major change in location or character (e.g., "cut to a new scene"), you must maintain the environment, lighting, and core subjects from `[IMG-1]`. Your prompt should describe how the characters and objects *within* that scene evolve.
|
| 10 |
-
|
| 11 |
-
3. **POSE, NOT PANNING:** Describe the state of the subject at a specific instant, not camera movement. Focus on body language, expression, and interaction that logically follows the previous state.
|
| 12 |
-
|
| 13 |
-
4. **USE REFERENCES FOR CONTINUITY:** Use the `[IMG-X]` tags provided to maintain the identity of characters and objects across frames. Prioritize `[IMG-1]` for environmental context.
|
| 14 |
-
|
| 15 |
-
5. **BE A DIRECTOR:** Use strong, active verbs. Instead of "the lion is now sitting", prefer "the lion lowers its body, muscles tensing as it settles onto the dry grass".
|
| 16 |
-
|
| 17 |
-
# CONTEXT:
|
| 18 |
-
- Global Story Goal: "{global_prompt}"
|
| 19 |
-
- Current Scene Description: "{current_scene_desc}"
|
| 20 |
-
- Scene History (what happened before):
|
| 21 |
-
{history_scene}
|
| 22 |
-
|
| 23 |
-
# VISUAL ASSETS FOR ANALYSIS:
|
| 24 |
-
# [Images will be provided and tagged as [IMG-1] (Last Image/Environment), [IMG-2] (Character Ref), etc.]
|
| 25 |
-
|
| 26 |
-
# RESPONSE FORMAT:
|
| 27 |
-
Respond with ONLY the final, single-line prompt string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/flux_composition_wrapper_prompt.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
From the provided reference images, create a single, natural, and cohesive scene where: {target_prompt}
|
|
|
|
|
|
prompts/initial_motion_prompt.txt
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Cinematographer (Initial Scene)
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
Create a single, concise, CLIP-style motion prompt. The prompt must describe a coherent video sequence that transitions from a STARTING image to a DESTINATION image.
|
| 5 |
-
|
| 6 |
-
# INSTRUCTIONS:
|
| 7 |
-
1. **Analyze the Arc:** Understand the visual and narrative journey from the START to the DESTINATION image.
|
| 8 |
-
2. **Describe the Motion:** Focus on DYNAMICS (camera and subject movement).
|
| 9 |
-
3. **Style Guide:** Use dense, descriptive, cinematic keywords. Omit fluff like "The video shows...". Be direct.
|
| 10 |
-
|
| 11 |
-
# CONTEXT:
|
| 12 |
-
- Overall Story Goal: "{user_prompt}"
|
| 13 |
-
- Destination Scene Description: "{destination_scene_description}"
|
| 14 |
-
|
| 15 |
-
# SCENE ANALYSIS:
|
| 16 |
-
# START Image: [Image 1]
|
| 17 |
-
# DESTINATION Image: [Image 2]
|
| 18 |
-
|
| 19 |
-
# RESPONSE FORMAT:
|
| 20 |
-
Respond with ONLY the raw prompt string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/keyframe_selection_prompt.txt
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Film Editor / Photographer
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
You are tasked with selecting the best keyframe for each scene of a storyboard to create a coherent visual narrative. You have a "scene bank" of available images. Your selections must create a smooth and logical progression.
|
| 5 |
-
|
| 6 |
-
# INSTRUCTIONS:
|
| 7 |
-
1. **Analyze the Storyboard:** Read each scene description carefully to understand the intended action and emotion.
|
| 8 |
-
2. **Prioritize Continuity:** For each scene, your primary goal is to find an image from the "Image Pool" that represents a logical **next step** from the previously selected scene. Avoid jarring jumps in location, lighting, or character appearance unless the storyboard explicitly calls for a "cut".
|
| 9 |
-
3. **Maintain Consistency:** Your choices must be consistent with the characters and style established in the "Reference Images (Story Base)".
|
| 10 |
-
4. **Select the Best Fit:** If multiple images could work, choose the one that best captures the specific action or mood of the current scene description.
|
| 11 |
-
5. **Output Format:** You MUST respond with a single, clean JSON object with one key: "selected_image_identifiers". The value should be an array of strings, where each string is the identifier of the chosen image (e.g., "IMG-3"). The order of the array must match the order of the scenes in the storyboard. The length of the array must be exactly the same as the number of scenes.
|
| 12 |
-
|
| 13 |
-
# == PROVIDED CONTEXT ==
|
| 14 |
-
- **Storyboard:**
|
| 15 |
-
{storyboard_str}
|
| 16 |
-
|
| 17 |
-
- **Available Image Identifiers in Pool:** {image_identifiers}
|
| 18 |
-
|
| 19 |
-
# == YOUR TASK ==
|
| 20 |
-
# Generate the JSON response with the selected image identifiers, prioritizing a smooth visual and narrative flow from one selection to the next.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/sound_director_prompt.txt
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Sound Director & Foley Artist
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
You are the sound director for a film. Your task is to create a single, rich, and descriptive prompt for an audio generation model (like MMAudio). This prompt must describe the complete soundscape for the CURRENT scene, considering what happened before and what will happen next to ensure audio continuity.
|
| 5 |
-
|
| 6 |
-
# CRITICAL RULES (MUST FOLLOW):
|
| 7 |
-
1. **NO SPEECH OR VOICES:** The final prompt must NOT include any terms related to human speech, dialogue, talking, voices, singing, or narration. The goal is to create a world of ambient sounds and specific sound effects (SFX).
|
| 8 |
-
2. **FOCUS ON THE PRESENT:** The audio must primarily match the CURRENT visual scene (Keyframe Kn) and its textual description (Ato_n).
|
| 9 |
-
3. **USE THE PAST FOR CONTINUITY:** Analyze the "Previous Audio Prompt" to understand the established soundscape. If a sound should logically continue from the previous scene, include it (e.g., "the continued sound of a gentle breeze...").
|
| 10 |
-
4. **USE THE FUTURE FOR FORESHADOWING:** Analyze the FUTURE keyframe and scene description. If appropriate, introduce subtle sounds that hint at what's to come. (e.g., if the next scene is a storm, you could add "...with the faint, distant rumble of thunder in the background.").
|
| 11 |
-
5. **BE DESCRIPTIVE:** Use evocative language. Instead of "dog bark", use "the sharp, excited yapping of a small dog". Combine multiple elements into a cohesive soundscape.
|
| 12 |
-
|
| 13 |
-
# CONTEXT FOR YOUR DECISION:
|
| 14 |
-
|
| 15 |
-
- **Previous Audio Prompt (what was just heard):**
|
| 16 |
-
{audio_history}
|
| 17 |
-
|
| 18 |
-
- **VISUAL PAST (Keyframe Kn-1):** [PAST_IMAGE]
|
| 19 |
-
- **VISUAL PRESENT (Keyframe Kn):** [PRESENT_IMAGE]
|
| 20 |
-
- **VISUAL FUTURE (Keyframe Kn+1):** [FUTURE_IMAGE]
|
| 21 |
-
|
| 22 |
-
- **CURRENT Scene Description (Ato_n):** "{present_scene_desc}"
|
| 23 |
-
- **CURRENT Motion Prompt (what the camera is doing):** "{motion_prompt}"
|
| 24 |
-
- **FUTURE Scene Description (Ato_n+1):** "{future_scene_desc}"
|
| 25 |
-
|
| 26 |
-
# RESPONSE FORMAT:
|
| 27 |
-
Respond with ONLY the final, single-line prompt string for the audio generator.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/sound_director_prompt.txt.txt
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Sound Director & Foley Artist
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
You are the sound director for a film. Your task is to create a single, rich, and descriptive prompt for an audio generation model (like MMAudio). This prompt must describe the complete soundscape for the CURRENT scene, considering what happened before and what will happen next to ensure audio continuity.
|
| 5 |
-
|
| 6 |
-
# CRITICAL RULES (MUST FOLLOW):
|
| 7 |
-
1. **NO SPEECH OR VOICES:** The final prompt must NOT include any terms related to human speech, dialogue, talking, voices, singing, or narration. The goal is to create a world of ambient sounds and specific sound effects (SFX).
|
| 8 |
-
2. **FOCUS ON THE PRESENT:** The audio must primarily match the CURRENT visual scene (Keyframe Kn) and its textual description (Ato_n).
|
| 9 |
-
3. **USE THE PAST FOR CONTINUITY:** Analyze the "Previous Audio Prompt" to understand the established soundscape. If a sound should logically continue from the previous scene, include it (e.g., "the continued sound of a gentle breeze...").
|
| 10 |
-
4. **USE THE FUTURE FOR FORESHADOWING:** Analyze the FUTURE keyframe and scene description. If appropriate, introduce subtle sounds that hint at what's to come. (e.g., if the next scene is a storm, you could add "...with the faint, distant rumble of thunder in the background.").
|
| 11 |
-
5. **BE DESCRIPTIVE:** Use evocative language. Instead of "dog bark", use "the sharp, excited yapping of a small dog". Combine multiple elements into a cohesive soundscape.
|
| 12 |
-
|
| 13 |
-
# CONTEXT FOR YOUR DECISION:
|
| 14 |
-
|
| 15 |
-
- **Previous Audio Prompt (what was just heard):**
|
| 16 |
-
{audio_history}
|
| 17 |
-
|
| 18 |
-
- **VISUAL PAST (Keyframe Kn-1):** [PAST_IMAGE]
|
| 19 |
-
- **VISUAL PRESENT (Keyframe Kn):** [PRESENT_IMAGE]
|
| 20 |
-
- **VISUAL FUTURE (Keyframe Kn+1):** [FUTURE_IMAGE]
|
| 21 |
-
|
| 22 |
-
- **CURRENT Scene Description (Ato_n):** "{present_scene_desc}"
|
| 23 |
-
- **CURRENT Motion Prompt (what the camera is doing):** "{motion_prompt}"
|
| 24 |
-
- **FUTURE Scene Description (Ato_n+1):** "{future_scene_desc}"
|
| 25 |
-
|
| 26 |
-
# RESPONSE FORMAT:
|
| 27 |
-
Respond with ONLY the final, single-line prompt string for the audio generator.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/transition_decision_prompt.txt
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Director of Continuity & Cinematographer
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
Analyze the visual continuity between a START, MIDPOINT, and DESTINATION image. Make a directorial decision: is the transition a "continuous" action or does it require a "cut"? Then, write the appropriate motion prompt.
|
| 5 |
-
|
| 6 |
-
# INSTRUCTIONS:
|
| 7 |
-
1. **Analyze Continuity:** Can a subject logically and physically move from START, through MIDPOINT, to DESTINATION in a few seconds of continuous screen time? Consider changes in location, pose, and time of day.
|
| 8 |
-
* **Continuous Example:** Man walks to door (START) -> Hand on doorknob (MIDPOINT) -> Man walks through door (DESTINATION).
|
| 9 |
-
* **Cut Example:** Woman outside house (START) -> Close up on face (MIDPOINT) -> Woman now inside house (DESTINATION).
|
| 10 |
-
2. **Make a Decision:**
|
| 11 |
-
* If the action is unbroken, decide `"transition_type": "continuous"`.
|
| 12 |
-
* If there is a jump in time, space, or logic, decide `"transition_type": "cut"`.
|
| 13 |
-
3. **Write Motion Prompt:**
|
| 14 |
-
* **For "continuous":** Describe the physical action and camera movement. Example: "Camera follows the man as he opens the door and steps inside."
|
| 15 |
-
* **For "cut":** Describe a cinematic transition effect. DO NOT describe character actions. Example: "A smooth cross-dissolve transition to the new scene."
|
| 16 |
-
|
| 17 |
-
# CONTEXT:
|
| 18 |
-
- Overall Story Goal: "{user_prompt}"
|
| 19 |
-
- Story So Far: {story_history}
|
| 20 |
-
|
| 21 |
-
# SCENE ANALYSIS:
|
| 22 |
-
# START Image (Memory from last fragment): [Image 1]
|
| 23 |
-
# MIDPOINT Image (Path): [Image 2]
|
| 24 |
-
# DESTINATION Image (Destination): [Image 3]
|
| 25 |
-
|
| 26 |
-
# RESPONSE FORMAT:
|
| 27 |
-
You MUST respond with a single, clean JSON object with two keys: "transition_type" and "motion_prompt".
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/unified_cinematographer_prompt.txt
DELETED
|
@@ -1,47 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Continuity Director & Cinematographer
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
Your task is twofold. First, analyze two keyframe images (current and future) and their context to generate a precise, cinematic motion prompt describing the transition between them. Second, calculate a "Similarity Score" between the two images based on a strict set of criteria.
|
| 5 |
-
|
| 6 |
-
# --- TASK 1: Generate Cinematic Motion Prompt ---
|
| 7 |
-
|
| 8 |
-
# CONTEXT:
|
| 9 |
-
- Previous Motion Prompt (what I thought before):
|
| 10 |
-
{historico_prompt}
|
| 11 |
-
|
| 12 |
-
- Current Scene Description (where we are now): "{cena_atual}"
|
| 13 |
-
- Future Scene Description (where we are going next): "{cena_futura}"
|
| 14 |
-
|
| 15 |
-
# INSTRUCTIONS for Motion Prompt:
|
| 16 |
-
You must generate a single, concise, CLIP-style motion prompt describing the action that connects the CURRENT image to the FUTURE image. The prompt must be dense and descriptive, following this priority order:
|
| 17 |
-
1. **People/Animals:** Focus on expression, emotion, and specific actions.
|
| 18 |
-
2. **Objects:** Describe their location and any interaction or movement.
|
| 19 |
-
3. **Camera:** Specify focus, zoom, and movement (pan, tilt, dolly, etc.).
|
| 20 |
-
|
| 21 |
-
Your prompt should describe the moment unfolding BETWEEN the current and future state.
|
| 22 |
-
|
| 23 |
-
# --- TASK 2: Calculate Similarity Score ---
|
| 24 |
-
|
| 25 |
-
# INSTRUCTIONS for Similarity Score:
|
| 26 |
-
Calculate a similarity score between the CURRENT and FUTURE images, ranging from 0.0 (completely different) to 1.0 (very similar).
|
| 27 |
-
|
| 28 |
-
**Consider ONLY the following criteria for similarity:**
|
| 29 |
-
- **Objects:** Consistency in colors, textures, and relative sizes.
|
| 30 |
-
- **People/Animals:** Consistency in morphology (body shape), clothing, and accessories.
|
| 31 |
-
- **Environment:** Consistency in location, time of day (lighting), colors, and background/horizon.
|
| 32 |
-
|
| 33 |
-
**Disregard the following for similarity:**
|
| 34 |
-
- Repositioning or movement of subjects or the camera.
|
| 35 |
-
|
| 36 |
-
**Negative Factors (Penalties):**
|
| 37 |
-
- If the horizontal positions of two or more people are inverted (e.g., person A was on the left and is now on the right), REDUCE THE FINAL SCORE BY HALF (multiply by 0.5).
|
| 38 |
-
- If the entire image appears horizontally flipped (mirrored), REDUCE THE FINAL SCORE BY HALF (multiply by 0.5).
|
| 39 |
-
|
| 40 |
-
# VISUAL ASSETS:
|
| 41 |
-
# [The CURRENT keyframe image will be provided here.]
|
| 42 |
-
# [The FUTURE keyframe image will be provided here.]
|
| 43 |
-
|
| 44 |
-
# --- RESPONSE FORMAT ---
|
| 45 |
-
You MUST respond with a single, clean JSON object with exactly two keys:
|
| 46 |
-
1. "motion_prompt": A string containing the generated cinematic prompt.
|
| 47 |
-
2. "similarity_score": A floating-point number between 0.0 and 1.0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/unified_storyboard_prompt.txt
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
# ROLE: AI Storyboard Writer
|
| 2 |
-
|
| 3 |
-
# GOAL:
|
| 4 |
-
You are a scriptwriter tasked with breaking down a general idea into a sequence of exactly {num_fragments} distinct scenes or "acts". Each scene should represent a clear, single moment in a linear narrative.
|
| 5 |
-
|
| 6 |
-
# CRITICAL RULES (MUST FOLLOW):
|
| 7 |
-
1. **ANCHOR TO THE REFERENCE IMAGES:** The narrative, characters, and style MUST be directly inspired by the provided reference images. The story should feel like it belongs in the same world as these images.
|
| 8 |
-
2. **SIMPLE, LINEAR ACTION:** Do not create a complex plot. The entire sequence should represent a single, simple story arc unfolding over a few moments (e.g., a character notices something, approaches it, and reacts).
|
| 9 |
-
3. **FOCUS ON "WHAT", NOT "HOW":** Each description is a scene, not a camera direction. Describe the core action or emotional beat of the moment. Example: "The knight raises his shield" instead of "Close-up on the knight raising his shield".
|
| 10 |
-
|
| 11 |
-
# CONTEXT:
|
| 12 |
-
- General Idea (User Prompt): "{user_prompt}"
|
| 13 |
-
- Number of Scenes to Create: {num_fragments}
|
| 14 |
-
|
| 15 |
-
# YOUR TASK:
|
| 16 |
-
Based on the user's idea and the reference images, create a storyboard that tells a simple, continuous story across {num_fragments} scenes.
|
| 17 |
-
|
| 18 |
-
# RESPONSE FORMAT:
|
| 19 |
-
Return a single JSON object with the key "scene_storyboard", containing an array of strings (the scene descriptions).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/LICENSE
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
# Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
|
| 2 |
-
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
|
| 3 |
-
#
|
| 4 |
-
# Contato:
|
| 5 |
-
# Carlos Rodrigues dos Santos
|
| 6 |
-
# carlex22@gmail.com
|
| 7 |
-
# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
| 8 |
-
#
|
| 9 |
-
# Repositórios e Projetos Relacionados:
|
| 10 |
-
# GitHub: https://github.com/carlex22/Aduc-sdr
|
| 11 |
-
# Hugging Face (Ltx-SuperTime-60Secondos): https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/
|
| 12 |
-
# Hugging Face (Novinho): https://huggingface.co/spaces/Carlexxx/Novinho/
|
| 13 |
-
#
|
| 14 |
-
# This program is free software: you can redistribute it and/or modify
|
| 15 |
-
# it under the terms of the GNU Affero General Public License as published by
|
| 16 |
-
# the Free Software Foundation, either version 3 of the License, or
|
| 17 |
-
# (at your option) any later version.
|
| 18 |
-
#
|
| 19 |
-
# This program is distributed in the hope that it will be useful,
|
| 20 |
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
-
# GNU Affero General Public License for more details.
|
| 23 |
-
#
|
| 24 |
-
# You should have received a copy of the GNU Affero General Public License
|
| 25 |
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/NOTICE.md
DELETED
|
@@ -1,76 +0,0 @@
|
|
| 1 |
-
# NOTICE
|
| 2 |
-
|
| 3 |
-
Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## Aviso de Propriedade Intelectual e Licenciamento
|
| 8 |
-
|
| 9 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 10 |
-
|
| 11 |
-
O método e o sistema de orquestração de prompts denominados **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste documento e implementados neste software, estão atualmente em processo de patenteamento.
|
| 12 |
-
|
| 13 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, incluindo, mas não se limitando a:
|
| 14 |
-
|
| 15 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 16 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 17 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 18 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 19 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 20 |
-
|
| 21 |
-
### **Reconhecimento e Implicações (EM PORTUGUÊS):**
|
| 22 |
-
|
| 23 |
-
Ao acessar ou utilizar este software e a arquitetura ADUC aqui implementada, você reconhece:
|
| 24 |
-
|
| 25 |
-
1. A natureza inovadora e a importância da arquitetura ADUC no campo da orquestração de prompts para IA.
|
| 26 |
-
2. Que a essência desta arquitetura, ou suas implementações derivadas, podem estar sujeitas a direitos de propriedade intelectual, incluindo patentes.
|
| 27 |
-
3. Que o uso comercial, a reprodução da lógica central da ADUC em sistemas independentes, ou a exploração direta da invenção sem o devido licenciamento podem infringir os direitos de patente pendente.
|
| 28 |
-
|
| 29 |
-
---
|
| 30 |
-
|
| 31 |
-
### **Patent Pending (IN ENGLISH):**
|
| 32 |
-
|
| 33 |
-
The method and system for prompt orchestration named **ADUC (Automated Discovery and Orchestration of Complex tasks)**, as described herein and implemented in this software, are currently in the process of being patented.
|
| 34 |
-
|
| 35 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 36 |
-
|
| 37 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 38 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 39 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 40 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 41 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 42 |
-
|
| 43 |
-
### **Acknowledgement and Implications (IN ENGLISH):**
|
| 44 |
-
|
| 45 |
-
By accessing or using this software and the ADUC architecture implemented herein, you acknowledge:
|
| 46 |
-
|
| 47 |
-
1. The innovative nature and significance of the ADUC architecture in the field of AI prompt orchestration.
|
| 48 |
-
2. That the essence of this architecture, or its derivative implementations, may be subject to intellectual property rights, including patents.
|
| 49 |
-
3. That commercial use, reproduction of ADUC's core logic in independent systems, or direct exploitation of the invention without proper licensing may infringe upon pending patent rights.
|
| 50 |
-
|
| 51 |
-
---
|
| 52 |
-
|
| 53 |
-
## Licença AGPLv3
|
| 54 |
-
|
| 55 |
-
This program is free software: you can redistribute it and/or modify
|
| 56 |
-
it under the terms of the GNU Affero General Public License as published by
|
| 57 |
-
the Free Software Foundation, either version 3 of the License, or
|
| 58 |
-
(at your option) any later version.
|
| 59 |
-
|
| 60 |
-
This program is distributed in the hope that it will be useful,
|
| 61 |
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 62 |
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 63 |
-
GNU Affero General Public License for more details.
|
| 64 |
-
|
| 65 |
-
You should have received a copy of the GNU Affero General Public License
|
| 66 |
-
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
| 67 |
-
|
| 68 |
-
---
|
| 69 |
-
|
| 70 |
-
**Contato para Consultas:**
|
| 71 |
-
|
| 72 |
-
Para mais informações sobre a arquitetura ADUC, o status do patenteamento, ou para discutir licenciamento para usos comerciais ou não conformes com a AGPLv3, por favor, entre em contato:
|
| 73 |
-
|
| 74 |
-
Carlos Rodrigues dos Santos
|
| 75 |
-
carlex22@gmail.com
|
| 76 |
-
Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/README.md
DELETED
|
@@ -1,211 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Euia-AducSdr
|
| 3 |
-
emoji: 🎥
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: purple
|
| 6 |
-
sdk: gradio
|
| 7 |
-
app_file: app.py
|
| 8 |
-
pinned: true
|
| 9 |
-
license: agpl-3.0
|
| 10 |
-
short_description: Uma implementação aberta e funcional da arquitetura ADUC-SDR
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
### 🇧🇷 Português
|
| 15 |
-
|
| 16 |
-
Uma implementação aberta e funcional da arquitetura ADUC-SDR (Arquitetura de Unificação Compositiva - Escala Dinâmica e Resiliente), projetada para a geração de vídeo coerente de longa duração. Este projeto materializa os princípios de fragmentação, navegação geométrica e um mecanismo de "eco causal 4bits memoria" para garantir a continuidade física e narrativa em sequências de vídeo geradas por múltiplos modelos de IA.
|
| 17 |
-
|
| 18 |
-
**Licença:** Este projeto é licenciado sob os termos da **GNU Affero General Public License v3.0**. Isto significa que se você usar este software (ou qualquer trabalho derivado) para fornecer um serviço através de uma rede, você é **obrigado a disponibilizar o código-fonte completo** da sua versão para os usuários desse serviço.
|
| 19 |
-
|
| 20 |
-
- **Copyright (C) 4 de Agosto de 2025, Carlos Rodrigues dos Santos**
|
| 21 |
-
- Uma cópia completa da licença pode ser encontrada no arquivo [LICENSE](LICENSE).
|
| 22 |
-
|
| 23 |
-
---
|
| 24 |
-
|
| 25 |
-
### 🇬🇧 English
|
| 26 |
-
|
| 27 |
-
An open and functional implementation of the ADUC-SDR (Architecture for Compositive Unification - Dynamic and Resilient Scaling) architecture, designed for long-form coherent video generation. This project materializes the principles of fragmentation, geometric navigation, and a "causal echo 4bits memori" mechanism to ensure physical and narrative continuity in video sequences generated by multiple AI models.
|
| 28 |
-
|
| 29 |
-
**License:** This project is licensed under the terms of the **GNU Affero General Public License v3.0**. This means that if you use this software (or any derivative work) to provide a service over a network, you are **required to make the complete source code** of your version available to the users of that service.
|
| 30 |
-
|
| 31 |
-
- **Copyright (C) August 4, 2025, Carlos Rodrigues dos Santos**
|
| 32 |
-
- A full copy of the license can be found in the [LICENSE](LICENSE) file.
|
| 33 |
-
|
| 34 |
-
---
|
| 35 |
-
|
| 36 |
-
## **Aviso de Propriedade Intelectual e Patenteamento**
|
| 37 |
-
|
| 38 |
-
### **Processo de Patenteamento em Andamento (EM PORTUGUÊS):**
|
| 39 |
-
|
| 40 |
-
A arquitetura e o método **ADUC (Automated Discovery and Orchestration of Complex tasks)**, conforme descritos neste projeto e nas reivindicações associadas, estão **atualmente em processo de patenteamento**.
|
| 41 |
-
|
| 42 |
-
O titular dos direitos, Carlos Rodrigues dos Santos, está buscando proteção legal para as inovações chave da arquitetura ADUC, que incluem, mas não se limitam a:
|
| 43 |
-
|
| 44 |
-
* Fragmentação e escalonamento de solicitações que excedem limites de contexto de modelos de IA.
|
| 45 |
-
* Distribuição inteligente de sub-tarefas para especialistas heterogêneos.
|
| 46 |
-
* Gerenciamento de estado persistido com avaliação iterativa e realimentação para o planejamento de próximas etapas.
|
| 47 |
-
* Planejamento e roteamento sensível a custo, latência e requisitos de qualidade.
|
| 48 |
-
* O uso de "tokens universais" para comunicação agnóstica a modelos.
|
| 49 |
-
|
| 50 |
-
Ao utilizar este software e a arquitetura ADUC aqui implementada, você reconhece a natureza inovadora desta arquitetura e que a **reprodução ou exploração da lógica central da ADUC em sistemas independentes pode infringir direitos de patente pendente.**
|
| 51 |
-
|
| 52 |
-
---
|
| 53 |
-
|
| 54 |
-
### **Patent Pending (IN ENGLISH):**
|
| 55 |
-
|
| 56 |
-
The **ADUC (Automated Discovery and Orchestration of Complex tasks)** architecture and method, as described in this project and its associated claims, are **currently in the process of being patented.**
|
| 57 |
-
|
| 58 |
-
The rights holder, Carlos Rodrigues dos Santos, is seeking legal protection for the key innovations of the ADUC architecture, including, but not limited to:
|
| 59 |
-
|
| 60 |
-
* Fragmentation and scaling of requests exceeding AI model context limits.
|
| 61 |
-
* Intelligent distribution of sub-tasks to heterogeneous specialists.
|
| 62 |
-
* Persistent state management with iterative evaluation and feedback for planning subsequent steps.
|
| 63 |
-
* Cost, latency, and quality-aware planning and routing.
|
| 64 |
-
* The use of "universal tokens" for model-agnostic communication.
|
| 65 |
-
|
| 66 |
-
By using this software and the ADUC architecture implemented herein, you acknowledge the innovative nature of this architecture and that **the reproduction or exploitation of ADUC's core logic in independent systems may infringe upon pending patent rights.**
|
| 67 |
-
|
| 68 |
-
---
|
| 69 |
-
|
| 70 |
-
### Detalhes Técnicos e Reivindicações da ADUC
|
| 71 |
-
|
| 72 |
-
#### 🇧🇷 Definição Curta (para Tese e Patente)
|
| 73 |
-
|
| 74 |
-
**ADUC** é um *framework pré-input* e *intermediário* de **gerenciamento de prompts** que:
|
| 75 |
-
|
| 76 |
-
1. **fragmenta** solicitações acima do limite de contexto de qualquer modelo,
|
| 77 |
-
2. **escala linearmente** (processo sequencial com memória persistida),
|
| 78 |
-
3. **distribui** sub-tarefas a **especialistas** (modelos/ferramentas heterogêneos), e
|
| 79 |
-
4. **realimenta** a próxima etapa com avaliação do que foi feito/esperado (LLM diretor).
|
| 80 |
-
|
| 81 |
-
Não é um modelo; é uma **camada orquestradora** plugável antes do input de modelos existentes (texto, imagem, áudio, vídeo), usando *tokens universais* e a tecnologia atual.
|
| 82 |
-
|
| 83 |
-
#### 🇬🇧 Short Definition (for Thesis and Patent)
|
| 84 |
-
|
| 85 |
-
**ADUC** is a *pre-input* and *intermediate* **prompt management framework** that:
|
| 86 |
-
|
| 87 |
-
1. **fragments** requests exceeding any model's context limit,
|
| 88 |
-
2. **scales linearly** (sequential process with persisted memory),
|
| 89 |
-
3. **distributes** sub-tasks to **specialists** (heterogeneous models/tools), and
|
| 90 |
-
4. **feeds back** to the next step with an evaluation of what was done/expected (director LLM).
|
| 91 |
-
|
| 92 |
-
It is not a model; it is a pluggable **orchestration layer** before the input of existing models (text, image, audio, video), using *universal tokens* and current technology.
|
| 93 |
-
|
| 94 |
-
---
|
| 95 |
-
|
| 96 |
-
#### 🇧🇷 Elementos Essenciais (Telegráfico)
|
| 97 |
-
|
| 98 |
-
* **Agnóstico a modelos:** opera com qualquer LLM/difusor/API.
|
| 99 |
-
* **Pré-input manager:** recebe pedido do usuário, **divide** em blocos ≤ limite de tokens, **prioriza**, **agenda** e **roteia**.
|
| 100 |
-
* **Memória persistida:** resultados/latentes/“eco” viram **estado compartilhado** para o próximo bloco (nada é ignorado).
|
| 101 |
-
* **Especialistas:** *routers* decidem quem faz o quê (ex.: “descrição → LLM-A”, “keyframe → Img-B”, “vídeo → Vid-C”).
|
| 102 |
-
* **Controle de qualidade:** LLM diretor compara *o que fez* × *o que deveria* × *o que falta* e **regenera objetivos** do próximo fragmento.
|
| 103 |
-
* **Custo/latência-aware:** planeja pela **VRAM/tempo/custo**, não tenta “abraçar tudo de uma vez”.
|
| 104 |
-
|
| 105 |
-
#### 🇬🇧 Essential Elements (Telegraphic)
|
| 106 |
-
|
| 107 |
-
* **Model-agnostic:** operates with any LLM/diffuser/API.
|
| 108 |
-
* **Pre-input manager:** receives user request, **divides** into blocks ≤ token limit, **prioritizes**, **schedules**, and **routes**.
|
| 109 |
-
* **Persisted memory:** results/latents/“echo” become **shared state** for the next block (nothing is ignored).
|
| 110 |
-
* **Specialists:** *routers* decide who does what (e.g., “description → LLM-A”, “keyframe → Img-B”, “video → Vid-C”).
|
| 111 |
-
* **Quality control:** director LLM compares *what was done* × *what should be done* × *what is missing* and **regenerates objectives** for the next fragment.
|
| 112 |
-
* **Cost/latency-aware:** plans by **VRAM/time/cost**, does not try to “embrace everything at once”.
|
| 113 |
-
|
| 114 |
-
---
|
| 115 |
-
|
| 116 |
-
#### 🇧🇷 Reivindicações Independentes (Método e Sistema)
|
| 117 |
-
|
| 118 |
-
**Reivindicação Independente (Método) — Versão Enxuta:**
|
| 119 |
-
|
| 120 |
-
1. **Método** de **orquestração de prompts** para execução de tarefas acima do limite de contexto de modelos de IA, compreendendo:
|
| 121 |
-
(a) **receber** uma solicitação que excede um limite de tokens;
|
| 122 |
-
(b) **analisar** a solicitação por um **LLM diretor** e **fragmentá-la** em sub-tarefas ≤ limite;
|
| 123 |
-
(c) **selecionar** especialistas de execução para cada sub-tarefa com base em capacidades declaradas;
|
| 124 |
-
(d) **gerar** prompts específicos por sub-tarefa em **tokens universais**, incluindo referências ao **estado persistido** de execuções anteriores;
|
| 125 |
-
(e) **executar sequencialmente** as sub-tarefas e **persistir** suas saídas como memória (incluindo latentes/eco/artefatos);
|
| 126 |
-
(f) **avaliar** automaticamente a saída versus metas declaradas e **regenerar objetivos** do próximo fragmento;
|
| 127 |
-
(g) **iterar** (b)–(f) até que os critérios de completude sejam atendidos, produzindo o resultado agregado;
|
| 128 |
-
em que o framework **escala linearmente** no tempo e armazenamento físico, **independente** da janela de contexto dos modelos subjacentes.
|
| 129 |
-
|
| 130 |
-
**Reivindicação Independente (Sistema):**
|
| 131 |
-
|
| 132 |
-
2. **Sistema** de orquestração de prompts, compreendendo: um **planejador LLM diretor**; um **roteador de especialistas**; um **banco de estado persistido** (incl. memória cinética para vídeo); um **gerador de prompts universais**; e um **módulo de avaliação/realimentação**, acoplados por uma **API pré-input** a modelos heterogêneos.
|
| 133 |
-
|
| 134 |
-
#### 🇬🇧 Independent Claims (Method and System)
|
| 135 |
-
|
| 136 |
-
**Independent Claim (Method) — Concise Version:**
|
| 137 |
-
|
| 138 |
-
1. A **method** for **prompt orchestration** for executing tasks exceeding AI model context limits, comprising:
|
| 139 |
-
(a) **receiving** a request that exceeds a token limit;
|
| 140 |
-
(b) **analyzing** the request by a **director LLM** and **fragmenting it** into sub-tasks ≤ the limit;
|
| 141 |
-
(c) **selecting** execution specialists for each sub-task based on declared capabilities;
|
| 142 |
-
(d) **generating** specific prompts per sub-task in **universal tokens**, including references to the **persisted state** of previous executions;
|
| 143 |
-
(e) **sequentially executing** the sub-tasks and **persisting** their outputs as memory (including latents/echo/artifacts);
|
| 144 |
-
(f) **automatically evaluating** the output against declared goals and **regenerating objectives** for the next fragment;
|
| 145 |
-
(g) **iterating** (b)–(f) until completion criteria are met, producing the aggregated result;
|
| 146 |
-
wherein the framework **scales linearly** in time and physical storage, **independent** of the context window of the underlying models.
|
| 147 |
-
|
| 148 |
-
**Independent Claim (System):**
|
| 149 |
-
|
| 150 |
-
2. A prompt orchestration **system**, comprising: a **director LLM planner**; a **specialist router**; a **persisted state bank** (incl. kinetic memory for video); a **universal prompt generator**; and an **evaluation/feedback module**, coupled via a **pre-input API** to heterogeneous models.
|
| 151 |
-
|
| 152 |
-
---
|
| 153 |
-
|
| 154 |
-
#### 🇧🇷 Dependentes Úteis
|
| 155 |
-
|
| 156 |
-
* (3) Onde o roteamento considera **custo/latência/VRAM** e metas de qualidade.
|
| 157 |
-
* (4) Onde o banco de estado inclui **eco cinético** para vídeo (últimos *n* frames/latentes/fluxo).
|
| 158 |
-
* (5) Onde a avaliação usa métricas específicas por domínio (Lflow, consistência semântica, etc.).
|
| 159 |
-
* (6) Onde *tokens universais* padronizam instruções entre especialistas.
|
| 160 |
-
* (7) Onde a orquestração decide **cut vs continuous** e **corte regenerativo** (Déjà-Vu) ao editar vídeo.
|
| 161 |
-
* (8) Onde o sistema **nunca descarta** conteúdo excedente: **reagenda** em novos fragmentos.
|
| 162 |
-
|
| 163 |
-
#### 🇬🇧 Useful Dependents
|
| 164 |
-
|
| 165 |
-
* (3) Wherein routing considers **cost/latency/VRAM** and quality goals.
|
| 166 |
-
* (4) Wherein the state bank includes **kinetic echo** for video (last *n* frames/latents/flow).
|
| 167 |
-
* (5) Wherein evaluation uses domain-specific metrics (Lflow, semantic consistency, etc.).
|
| 168 |
-
* (6) Wherein *universal tokens* standardize instructions between specialists.
|
| 169 |
-
* (7) Wherein orchestration decides **cut vs continuous** and **regenerative cut** (Déjà-Vu) when editing video.
|
| 170 |
-
* (8) Wherein the system **never discards** excess content: it **reschedules** it in new fragments.
|
| 171 |
-
|
| 172 |
-
---
|
| 173 |
-
|
| 174 |
-
#### 🇧🇷 Como isso conversa com SDR (Vídeo)
|
| 175 |
-
|
| 176 |
-
* **Eco Cinético**: é um **tipo de estado persistido** consumido pelo próximo passo.
|
| 177 |
-
* **Déjà-Vu (Corte Regenerativo)**: é **uma política de orquestração** aplicada quando há edição; ADUC decide, monta os prompts certos e chama o especialista de vídeo.
|
| 178 |
-
* **Cut vs Continuous**: decisão do **diretor** com base em estado + metas; ADUC roteia e garante a sobreposição/remoção final.
|
| 179 |
-
|
| 180 |
-
#### 🇬🇧 How this Converses with SDR (Video)
|
| 181 |
-
|
| 182 |
-
* **Kinetic Echo**: is a **type of persisted state** consumed by the next step.
|
| 183 |
-
* **Déjà-Vu (Regenerative Cut)**: is an **orchestration policy** applied during editing; ADUC decides, crafts the right prompts, and calls the video specialist.
|
| 184 |
-
* **Cut vs Continuous**: decision made by the **director** based on state + goals; ADUC routes and ensures the final overlap/removal.
|
| 185 |
-
|
| 186 |
-
---
|
| 187 |
-
|
| 188 |
-
#### 🇧🇷 Mensagem Clara ao Usuário (Experiência)
|
| 189 |
-
|
| 190 |
-
> “Seu pedido excede o limite X do modelo Y. Em vez de truncar silenciosamente, o **ADUC** dividirá e **entregará 100%** do conteúdo por etapas coordenadas.”
|
| 191 |
-
|
| 192 |
-
Isso é diferencial prático e jurídico: **não-obviedade** por transformar limite de contexto em **pipeline controlado**, com **persistência de estado** e **avaliação iterativa**.
|
| 193 |
-
|
| 194 |
-
#### 🇬🇧 Clear User Message (Experience)
|
| 195 |
-
|
| 196 |
-
> "Your request exceeds model Y's limit X. Instead of silently truncating, **ADUC** will divide and **deliver 100%** of the content through coordinated steps."
|
| 197 |
-
|
| 198 |
-
This is a practical and legal differentiator: **non-obviousness** by transforming context limits into a **controlled pipeline**, with **state persistence** and **iterative evaluation**.
|
| 199 |
-
|
| 200 |
-
---
|
| 201 |
-
|
| 202 |
-
### Contact / Contato / Contacto
|
| 203 |
-
|
| 204 |
-
- **Author / Autor:** Carlos Rodrigues dos Santos
|
| 205 |
-
- **Email:** carlex22@gmail.com
|
| 206 |
-
- **GitHub:** [https://github.com/carlex22/Aduc-sdr](https://github.com/carlex22/Aduc-sdr)
|
| 207 |
-
- **Hugging Face Spaces:**
|
| 208 |
-
- [Ltx-SuperTime-60Secondos](https://huggingface.co/spaces/Carlexx/Ltx-SuperTime-60Secondos/)
|
| 209 |
-
- [Novinho](https://huggingface.co/spaces/Carlexxx/Novinho/)
|
| 210 |
-
|
| 211 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|