Test3

Paused

App Files Files Community

Eueuiaa commited on Oct 8

Commit

ab2fc5d

verified ·

1 Parent(s): 9fddc3b

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +126 -263

api/ltx_server.py CHANGED Viewed

@@ -1,27 +1,19 @@
 # ltx_server.py — VideoService (beta 1.1)
 # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
 # Ignora UserWarning/FutureWarning e injeta VAE no manager com dtype/device corretos.
 # --- 0. WARNINGS E AMBIENTE ---
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", message=".*")
 from huggingface_hub import logging
 logging.set_verbosity_error()
 logging.set_verbosity_warning()
 logging.set_verbosity_info()
 logging.set_verbosity_debug()
 LTXV_DEBUG=1
 LTXV_FRAME_LOG_EVERY=8
-# --- 1. IMPORTAÇÕES ---
 import os, subprocess, shlex, tempfile
 import torch
 import json
@@ -44,12 +36,30 @@ import time
 import traceback
 from einops import rearrange
 import torch.nn.functional as F
-# Singletons (versões simples)
 from managers.vae_manager import vae_manager_singleton
 from tools.video_encode_tool import video_encode_tool_singleton
-# --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
     try:
         import psutil
@@ -83,7 +93,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
         return results
     except Exception:
         return []
 def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
     cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
     try:
@@ -107,9 +116,6 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
             except Exception:
                 continue
     return results
 def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     """
     Calcula novas dimensões mantendo a proporção, garantindo que ambos os
@@ -143,8 +149,6 @@ def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
     return final_h, final_w # Retorna (altura, largura)
 def handle_media_upload_for_dims(filepath, current_h, current_w):
     """
     Esta função agora usará o novo cálculo robusto.
@@ -168,8 +172,6 @@ def handle_media_upload_for_dims(filepath, current_h, current_w):
     except Exception as e:
         print(f"Erro ao processar mídia para dimensões: {e}")
         return gr.update(value=current_h), gr.update(value=current_w)
 def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
     if not processes:
         return "  - Processos ativos: (nenhum)\n"
@@ -180,52 +182,6 @@ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
         used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
         lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
     return "\n".join(lines) + "\n"
-def run_setup():
-    setup_script_path = "setup.py"
-    if not os.path.exists(setup_script_path):
-        print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
-        return
-    try:
-        print("[DEBUG] Executando setup.py para dependências...")
-        subprocess.run([sys.executable, setup_script_path], check=True)
-        print("[DEBUG] Setup concluído com sucesso.")
-    except subprocess.CalledProcessError as e:
-        print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
-        sys.exit(1)
-from api.ltx.inference import (
-    create_ltx_video_pipeline,
-    create_latent_upsampler,
-    load_image_to_tensor_with_resize_and_crop,
-    seed_everething,
-    calculate_padding,
-    load_media_file,
-)
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-if not LTX_VIDEO_REPO_DIR.exists():
-    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
-    run_setup()
-def add_deps_to_path():
-    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
-        sys.path.insert(0, repo_path)
-        print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
-add_deps_to_path()
-# --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
-from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
-from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
-from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
-# --- 4. FUNÇÕES HELPER DE LOG ---
 def log_tensor_info(tensor, name="Tensor"):
     if not isinstance(tensor, torch.Tensor):
         print(f"\n[INFO] '{name}' não é tensor.")
@@ -240,12 +196,19 @@ def log_tensor_info(tensor, name="Tensor"):
         except Exception:
             pass
     print("------------------------------------------\n")
-# --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 class VideoService:
     def __init__(self):
         t0 = time.perf_counter()
@@ -366,57 +329,31 @@ class VideoService:
             return yaml.safe_load(file)
     def _load_models(self):
-        """
-        Carrega os modelos de forma inteligente:
-        1. Tenta resolver o caminho do cache local (rápido, sem rede).
-        2. Se o arquivo não for encontrado localmente, baixa como fallback.
-        Garante que o serviço possa iniciar mesmo que o setup.py não tenha sido executado.
-        """
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
-        print("[DEBUG] Resolvendo caminhos dos modelos de forma inteligente...")
-        # --- Função Auxiliar para Carregamento Inteligente ---
-        def get_or_download_model(repo_id, filename, description):
-            try:
-                # hf_hub_download é a ferramenta certa aqui. Ela verifica o cache PRIMEIRO.
-                # Se o arquivo estiver no cache, retorna o caminho instantaneamente (após uma verificação rápida de metadados).
-                # Se não estiver no cache, ela o baixa.
-                print(f"[DEBUG] Verificando {description}: {filename}...")
-                model_path = hf_hub_download(
-                    repo_id=repo_id,
-                    filename=filename,
-                    # Forçar o uso de um cache específico se necessário
-                    cache_dir=os.getenv("HF_HOME_CACHE"),
-                    token=os.getenv("HF_TOKEN")
-                )
-                print(f"[DEBUG] Caminho do {description} resolvido com sucesso.")
-                return model_path
-            except Exception as e:
-                print("\n" + "="*80)
-                print(f"[ERRO CRÍTICO] Falha ao obter o modelo '{filename}'.")
-                print(f"Detalhe do erro: {e}")
-                print("Verifique sua conexão com a internet ou o estado do cache do Hugging Face.")
-                print("="*80 + "\n")
-                sys.exit(1)
-        # --- Checkpoint Principal ---
-        checkpoint_filename = self.config["checkpoint_path"]
-        distilled_model_path = get_or_download_model(
-            LTX_REPO, checkpoint_filename, "checkpoint principal"
         )
         self.config["checkpoint_path"] = distilled_model_path
-        # --- Upscaler Espacial ---
-        upscaler_filename = self.config["spatial_upscaler_model_path"]
-        spatial_upscaler_path = get_or_download_model(
-            LTX_REPO, upscaler_filename, "upscaler espacial"
         )
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
-        # --- Construção dos Pipelines ---
-        print("\n[DEBUG] Construindo pipeline a partir dos caminhos resolvidos...")
         pipeline = create_ltx_video_pipeline(
             ckpt_path=self.config["checkpoint_path"],
             precision=self.config["precision"],
@@ -434,7 +371,6 @@ class VideoService:
             print("[DEBUG] Construindo latent_upsampler...")
             latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
             print("[DEBUG] Upsampler pronto.")
         print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
         return pipeline, latent_upsampler
@@ -462,6 +398,8 @@ class VideoService:
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         """
@@ -478,8 +416,11 @@ class VideoService:
         upsampled_latents = self.latent_upsampler(latents)
         upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
         print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
@@ -513,6 +454,7 @@ class VideoService:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
         """
         Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
@@ -543,10 +485,10 @@ class VideoService:
                 start = (num_latente_por_chunk*i)
                 end = (start+num_latente_por_chunk+overlap)
                 if i+1 < n_chunks:
-                    chunk = latents_brutos[:, :, start:end, :, :].detach()
                     print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
                 else:
-                    chunk = latents_brutos[:, :, start:, :, :].detach()
                     print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
                 chunks.append(chunk)
                 i+=1
@@ -570,21 +512,6 @@ class VideoService:
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
         return int(result.stdout.strip())
-    def _dividir_latentes(self, latents_brutos):
-        total = latents_brutos.shape[2]  # dimensão temporal (número de latentes)
-        #if total % 2 == 1:  # ÍMPAR
-            # Ex: 11 → primeira 0..5, segunda 5..10
-        cut = total // 2
-        primeira = latents_brutos[:, :, :cut+1, :, :].detach()
-        segunda  = latents_brutos[:, :, cut:, :, :].detach()
-        return primeira, segunda
     def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
         """
         Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
@@ -662,25 +589,10 @@ class VideoService:
             print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
         print("===========CONCATECAO CAUSAL=============")
         print(f"[DEBUG] {nova_lista}")
         return nova_lista
-    def _concat_mp4s_no_reencode2(self, mp4_a: str, mp4_b: str, out_path: str):
-       # Concat demuxer do ffmpeg (sem reencode)
-        import tempfile, subprocess, shlex, os
-        with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
-            f.write(f"file '{os.path.abspath(mp4_a)}'\n")
-            f.write(f"file '{os.path.abspath(mp4_b)}'\n")
-            list_path = f.name
-        cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
-        print(f"[DEBUG] Concat: {cmd}")
-        try:
-            subprocess.check_call(shlex.split(cmd))
-        finally:
-            try: os.remove(list_path)
-            except Exception: pass
     def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
         """
@@ -759,20 +671,14 @@ class VideoService:
         if mode == "image-to-video":
             start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
             conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
-            conditioning_items1.append(ConditioningItem(start_tensor, 0, 1.0))
             if middle_image_filepath and middle_frame_number is not None:
                 middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
                 safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
                 conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
-                conditioning_items1.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
             if end_image_filepath:
                 end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
                 last_frame_index = actual_num_frames - 1
                 conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
-                conditioning_items2.append(ConditioningItem(end_tensor, last_frame_index//2, 1.0))
             print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
         call_kwargs = {
@@ -789,7 +695,7 @@ class VideoService:
             "decode_timestep": self.config["decode_timestep"],
             "decode_noise_scale": self.config["decode_noise_scale"],
             "stochastic_sampling": self.config["stochastic_sampling"],
-            "image_cond_noise_scale": 0.05,
             "is_video": True,
             "vae_per_channel_normalize": True,
             "mixed_precision": (self.config["precision"] == "mixed_precision"),
@@ -797,12 +703,8 @@ class VideoService:
             "enhance_prompt": False,
             "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
-        print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
         latents = None
-        latents_list = []
-        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
         try:
             ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
@@ -819,7 +721,6 @@ class VideoService:
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
                     vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
-                    # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
                     # Replica a fórmula da LTXMultiScalePipeline
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
@@ -852,8 +753,8 @@ class VideoService:
                     log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
                     print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
                     del base_latents; gc.collect(); torch.cuda.empty_cache()
                     latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
                     torch.cuda.empty_cache()
                     try:
@@ -861,18 +762,11 @@ class VideoService:
                     except Exception:
                          pass
-                    #latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,15,0)
-                    lat_aup, lat_bup = self._dividir_latentes(latents_cpu_up)
-                    print(f"[DEBUG] Partição Aup: {tuple(lat_aup.shape)}")
-                    print(f"[DEBUG] Partição Bup: {tuple(lat_bup.shape)}")
-                    latents_parts_up = [lat_aup, lat_bup]
-                    #latents_parts_up = [latents_cpu_up]
-                    par = 0
                     for latents in latents_parts_up:
                         # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
@@ -886,39 +780,16 @@ class VideoService:
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
                         # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
                         t_pass2 = time.perf_counter()
-                        num_latent_frames_part = latents.shape[2]
-                        vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
-                        num_pixel_frames_part = ((num_latent_frames_part - 1) * vae_temporal_scale) + 1
-                        print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
                         second_pass_kwargs = call_kwargs.copy()
-                        if par==0:
-                            second_pass_kwargs.update({
-                               "conditioning_items": conditioning_items1,
-                               "output_type": "latent",
-                               "width": second_pass_width,
-                               "height": second_pass_height,
-                               "num_frames": num_pixel_frames_part,
-                               "latents": latents, # O tensor upscaled
-                               "guidance_scale": float(guidance_scale),
-                               **second_pass_config
-                            })
-                        else:
-                            second_pass_kwargs.update({
-                               "conditioning_items": conditioning_items2,
-                               "output_type": "latent",
-                               "width": second_pass_width,
-                               "height": second_pass_height,
-                               "num_frames": num_pixel_frames_part,
-                               "latents": latents, # O tensor upscaled
-                               "guidance_scale": float(guidance_scale),
-                               **second_pass_config
-                            })
-                        par+=1
                         print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
                         final_latents = self.pipeline(**second_pass_kwargs).images
@@ -943,70 +814,62 @@ class VideoService:
             # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
             print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
-            temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
-            results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
             partes_mp4 = []
             par = 0
-            for latents_vae in latents_list:
-                latents_cpu_vae = latents_vae.detach().to("cpu", non_blocking=True)
-                torch.cuda.empty_cache()
                 try:
-                    torch.cuda.ipc_collect()
-                except Exception:
-                    pass
-                #latents_parts_vae = self._dividir_latentes_por_tamanho(latents_cpu_vae,4,1)
-                lat_a, lat_b = self._dividir_latentes(latents_cpu_vae)
-                print(f"[DEBUG] Partição A: {tuple(lat_a.shape)}")
-                print(f"[DEBUG] Partição B: {tuple(lat_b.shape)}")
-                latents_parts_vae = [lat_a, lat_b]
-                for latents in latents_parts_vae:
-                    #print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
-                    par = par + 1
-                    output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
-                    final_output_path = None
-                    print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
-                    # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
-                    pixel_tensor = vae_manager_singleton.decode(
-                        latents.to(self.device, non_blocking=True),
-                        decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                    )
-                    log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
-                    print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
-                    video_encode_tool_singleton.save_video_from_tensor(
-                        pixel_tensor,
-                        output_video_path,
-                        fps=call_kwargs["frame_rate"],
-                        progress_callback=progress_callback,
-                    )
-                    try:
-                        candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
-                        shutil.move(output_video_path, candidate)
-                        print(f"[DEBUG] MP4 parte {par} movido para {candidate}")
-                        partes_mp4.append(candidate)
-                    except Exception as e:
-                        final_output_path = output_video_path
-                        print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
                 final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
-                #partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
-                final_vid = video_encode_tool_singleton.concatenate_videos(video_paths=partes_mp4, output_path="concate_fim.mp4", workspace_dir=results_dir)
-                self._concat_mp4s_no_reencode(partes_mp4, final_vid)
             else:
                 final_vid = partes_mp4[0]

 # ltx_server.py — VideoService (beta 1.1)
 # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
 # Ignora UserWarning/FutureWarning e injeta VAE no manager com dtype/device corretos.
 # --- 0. WARNINGS E AMBIENTE ---
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", message=".*")
 from huggingface_hub import logging
 logging.set_verbosity_error()
 logging.set_verbosity_warning()
 logging.set_verbosity_info()
 logging.set_verbosity_debug()
 LTXV_DEBUG=1
 LTXV_FRAME_LOG_EVERY=8
 import os, subprocess, shlex, tempfile
 import torch
 import json
 import traceback
 from einops import rearrange
 import torch.nn.functional as F
 from managers.vae_manager import vae_manager_singleton
 from tools.video_encode_tool import video_encode_tool_singleton
+DEPS_DIR = Path("/data")
+LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+if not LTX_VIDEO_REPO_DIR.exists():
+    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
+    run_setup()
+def run_setup():
+    setup_script_path = "setup.py"
+    if not os.path.exists(setup_script_path):
+        print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
+        return
+    try:
+        print("[DEBUG] Executando setup.py para dependências...")
+        subprocess.run([sys.executable, setup_script_path], check=True)
+        print("[DEBUG] Setup concluído com sucesso.")
+    except subprocess.CalledProcessError as e:
+        print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
+        sys.exit(1)
+def add_deps_to_path():
+    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+    if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
+        sys.path.insert(0, repo_path)
+        print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
 def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
     try:
         import psutil
         return results
     except Exception:
         return []
 def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
     cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
     try:
             except Exception:
                 continue
     return results
 def calculate_new_dimensions(orig_w, orig_h, divisor=8):
     """
     Calcula novas dimensões mantendo a proporção, garantindo que ambos os
     print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
     return final_h, final_w # Retorna (altura, largura)
 def handle_media_upload_for_dims(filepath, current_h, current_w):
     """
     Esta função agora usará o novo cálculo robusto.
     except Exception as e:
         print(f"Erro ao processar mídia para dimensões: {e}")
         return gr.update(value=current_h), gr.update(value=current_w)
 def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
     if not processes:
         return "  - Processos ativos: (nenhum)\n"
         used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
         lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
     return "\n".join(lines) + "\n"
 def log_tensor_info(tensor, name="Tensor"):
     if not isinstance(tensor, torch.Tensor):
         print(f"\n[INFO] '{name}' não é tensor.")
         except Exception:
             pass
     print("------------------------------------------\n")
+add_deps_to_path()
+from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
+from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
+from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
+from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
+from api.ltx.inference import (
+    create_ltx_video_pipeline,
+    create_latent_upsampler,
+    load_image_to_tensor_with_resize_and_crop,
+    seed_everething,
+    calculate_padding,
+    load_media_file,
+)
 class VideoService:
     def __init__(self):
         t0 = time.perf_counter()
             return yaml.safe_load(file)
     def _load_models(self):
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
+        print("[DEBUG] Baixando checkpoint principal...")
+        distilled_model_path = hf_hub_download(
+            repo_id=LTX_REPO,
+            filename=self.config["checkpoint_path"],
+            local_dir=os.getenv("HF_HOME"),
+            cache_dir=os.getenv("HF_HOME_CACHE"),
+            token=os.getenv("HF_TOKEN"),
         )
         self.config["checkpoint_path"] = distilled_model_path
+        print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
+        print("[DEBUG] Baixando upscaler espacial...")
+        spatial_upscaler_path = hf_hub_download(
+            repo_id=LTX_REPO,
+            filename=self.config["spatial_upscaler_model_path"],
+            local_dir=os.getenv("HF_HOME"),
+            cache_dir=os.getenv("HF_HOME_CACHE"),
+            token=os.getenv("HF_TOKEN")
         )
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
+        print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
+        print("[DEBUG] Construindo pipeline...")
         pipeline = create_ltx_video_pipeline(
             ckpt_path=self.config["checkpoint_path"],
             precision=self.config["precision"],
             print("[DEBUG] Construindo latent_upsampler...")
             latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
             print("[DEBUG] Upsampler pronto.")
         print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
         return pipeline, latent_upsampler
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         """
         upsampled_latents = self.latent_upsampler(latents)
         upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
         print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
         """
         Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
                 start = (num_latente_por_chunk*i)
                 end = (start+num_latente_por_chunk+overlap)
                 if i+1 < n_chunks:
+                    chunk = latents_brutos[:, :, start:end, :, :].clone().detach()
                     print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
                 else:
+                    chunk = latents_brutos[:, :, start:, :, :].clone().detach()
                     print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
                 chunks.append(chunk)
                 i+=1
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
         return int(result.stdout.strip())
     def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
         """
         Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
             print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
         print("===========CONCATECAO CAUSAL=============")
         print(f"[DEBUG] {nova_lista}")
         return nova_lista
     def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
         """
         if mode == "image-to-video":
             start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
             conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
             if middle_image_filepath and middle_frame_number is not None:
                 middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
                 safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
                 conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
             if end_image_filepath:
                 end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
                 last_frame_index = actual_num_frames - 1
                 conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
             print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
         call_kwargs = {
             "decode_timestep": self.config["decode_timestep"],
             "decode_noise_scale": self.config["decode_noise_scale"],
             "stochastic_sampling": self.config["stochastic_sampling"],
+            "image_cond_noise_scale": 0.01,
             "is_video": True,
             "vae_per_channel_normalize": True,
             "mixed_precision": (self.config["precision"] == "mixed_precision"),
             "enhance_prompt": False,
             "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
         latents = None
+        latents_list[]
         try:
             ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
                     vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
                     # Replica a fórmula da LTXMultiScalePipeline
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
                     print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
                     del base_latents; gc.collect(); torch.cuda.empty_cache()
+                    par = 0
                     latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
                     torch.cuda.empty_cache()
                     try:
                     except Exception:
                          pass
+                    latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
+                    temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
+                    results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
                     for latents in latents_parts_up:
                         # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
                         # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
                         t_pass2 = time.perf_counter()
                         second_pass_kwargs = call_kwargs.copy()
+                        second_pass_kwargs.update({
+                           "output_type": "latent",
+                           "width": second_pass_width,
+                           "height": second_pass_height,
+                           "latents": upsampled_latents, # O tensor upscaled
+                           "guidance_scale": float(guidance_scale),
+                           **second_pass_config
+                        })
                         print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
                         final_latents = self.pipeline(**second_pass_kwargs).images
             # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
             print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
+            #latents_cpu = latents.detach().to("cpu", non_blocking=True)
+            #torch.cuda.empty_cache()
+            #try:
+            #    torch.cuda.ipc_collect()
+            #except Exception:
+            #    pass
+            latents_parts[]
+            for latents in latents_list:
+                latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
+                temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
+                results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
             partes_mp4 = []
             par = 0
+            for latents in latents_parts:
+                print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
+                par = par + 1
+                output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
+                final_output_path = None
+                print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
+                # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
+                pixel_tensor = vae_manager_singleton.decode(
+                    latents.to(self.device, non_blocking=True),
+                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                )
+                log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+                print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
+                video_encode_tool_singleton.save_video_from_tensor(
+                    pixel_tensor,
+                    output_video_path,
+                    fps=call_kwargs["frame_rate"],
+                    progress_callback=progress_callback
+                )
+                candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
                 try:
+                    shutil.move(output_video_path, candidate)
+                    final_output_path = candidate
+                    print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
+                    partes_mp4.append(final_output_path)
+                except Exception as e:
+                    final_output_path = output_video_path
+                    print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
                 final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
+                partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
+                self._concat_mp4s_no_reencode(partes_mp4_fade, final_vid)
             else:
                 final_vid = partes_mp4[0]