Test

Paused

App Files Files Community

Eueuiaa commited on Oct 9

Commit

d67548c

verified ·

1 Parent(s): 8e066ce

Delete api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +0 -462

api/ltx_server_refactored.py DELETED Viewed

@@ -1,462 +0,0 @@
-# ltx_server.py — VideoService (beta 1.2 - Robusto e Completo)
-# DESCRIÇÃO:
-# - Servidor de geração de vídeo com pipeline de 2 passes para melhoria de textura.
-# - Gerenciamento de memória robusto com limpeza garantida via `finalize()`.
-# - Cálculo de dimensões inteligente para preservar a proporção e evitar erros.
-# - Suporte para divisão de tarefas longas em chunks para evitar OOM (Out of Memory).
-# - Concatenação de chunks com transições suaves (crossfade) para um resultado contínuo.
-# --- 0. WARNINGS, IMPORTS E CONFIGURAÇÃO DE AMBIENTE ---
-import warnings
-warnings.filterwarnings("ignore", category=UserWarning)
-warnings.filterwarnings("ignore", category=FutureWarning)
-from huggingface_hub import logging as hf_logging, hf_hub_download
-hf_logging.set_verbosity_error()
-import os
-import sys
-import subprocess
-import shlex
-import tempfile
-import gc
-import shutil
-import contextlib
-import time
-import traceback
-import json
-import yaml
-import random
-from typing import List, Dict
-from pathlib import Path
-import torch
-import torch.nn.functional as F
-import numpy as np
-import imageio
-from PIL import Image
-from einops import rearrange
-# --- Variáveis de Ambiente e Constantes ---
-LTXV_DEBUG = os.getenv("LTXV_DEBUG", "1") == "1"
-LTXV_FRAME_LOG_EVERY = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-# --- 1. SETUP E GERENCIAMENTO DE DEPENDÊNCIAS ---
-def run_setup():
-    """Executa o script de setup para clonar dependências se necessário."""
-    setup_script_path = "setup.py"
-    if not os.path.exists(setup_script_path):
-        print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
-        return
-    try:
-        print("[DEBUG] Executando setup.py para instalar dependências...")
-        subprocess.run([sys.executable, setup_script_path], check=True, capture_output=True, text=True)
-        print("[DEBUG] Setup concluído com sucesso.")
-    except subprocess.CalledProcessError as e:
-        print(f"[ERROR] Falha crítica ao executar setup.py (código {e.returncode}).\nOutput:\n{e.stdout}\n{e.stderr}")
-        sys.exit(1)
-def add_deps_to_path():
-    """Adiciona o diretório do repositório ao sys.path para importação dos módulos."""
-    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if repo_path not in sys.path:
-        sys.path.insert(0, repo_path)
-        print(f"[DEBUG] Repositório LTX-Video adicionado ao sys.path: {repo_path}")
-# Executa a configuração inicial ao carregar o script
-if not LTX_VIDEO_REPO_DIR.exists():
-    print(f"[INFO] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Executando setup...")
-    run_setup()
-add_deps_to_path()
-# --- Importações que dependem do sys.path modificado ---
-from managers.vae_manager import vae_manager_singleton
-from tools.video_encode_tool import video_encode_tool_singleton
-from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline, adain_filter_latent
-from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
-from api.ltx.inference import (
-    create_ltx_video_pipeline, create_latent_upsampler,
-    load_image_to_tensor_with_resize_and_crop, seed_everething,
-    calculate_padding, load_media_file
-)
-# --- 2. FUNÇÕES UTILITÁRIAS INTELIGENTES ---
-def calculate_new_dimensions(orig_w, orig_h, target_area=512*768, divisor=8):
-    """
-    [FUNÇÃO INTELIGENTE]
-    Calcula novas dimensões mantendo a proporção original, garantindo que ambos
-    os lados sejam múltiplos do divisor. Visa uma 'área alvo' para manter o
-    uso de VRAM consistente e previsível.
-    """
-    if orig_w <= 0 or orig_h <= 0:
-        print(f"[WARN] Dimensões originais inválidas ({orig_w}x{orig_h}). Usando padrão 512x768.")
-        return 512, 768
-    aspect_ratio = orig_w / orig_h
-    new_h = int((target_area / aspect_ratio)**0.5)
-    new_w = int(new_h * aspect_ratio)
-    final_w = round(new_w / divisor) * divisor
-    final_h = round(new_h / divisor) * divisor
-    final_w = max(divisor, final_w)
-    final_h = max(divisor, final_h)
-    if LTXV_DEBUG:
-        print(f"[Dimension Calc] Original: {orig_w}x{orig_h} (AR: {aspect_ratio:.2f}) -> "
-              f"Calculado: {new_w}x{new_h} -> Final (múltiplo de {divisor}): {final_w}x{final_h}")
-    return final_h, final_w
-def log_tensor_info(tensor, name="Tensor"):
-    """Exibe informações detalhadas sobre um tensor para depuração."""
-    if not LTXV_DEBUG: return
-    if not isinstance(tensor, torch.Tensor):
-        print(f"\n[INFO] '{name}' não é um tensor.")
-        return
-    print(f"\n--- Tensor: {name} ---")
-    print(f"  - Shape: {tuple(tensor.shape)}")
-    print(f"  - Dtype: {tensor.dtype}")
-    print(f"  - Device: {tensor.device}")
-    if tensor.numel() > 0:
-        try:
-            print(f"  - Stats: Min={tensor.min().item():.4f}, Max={tensor.max().item():.4f}, Mean={tensor.mean().item():.4f}")
-        except Exception as e:
-            print(f"  - Stats: Falha ao calcular estatísticas - {e}")
-    print("------------------------------------------\n")
-# --- 3. CLASSE PRINCIPAL DO SERVIÇO DE VÍDEO ---
-class VideoService:
-    def __init__(self):
-        t0 = time.perf_counter()
-        print("[INFO] Inicializando VideoService...")
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.config = self._load_config()
-        print(f"[INFO] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
-        print(f"[INFO] Dispositivo selecionado: {self.device}")
-        self._tmp_dirs = set()
-        self._tmp_files = set()
-        self.pipeline, self.latent_upsampler = self._load_models()
-        print("[INFO] Movendo modelos para o dispositivo...")
-        self.pipeline.to(self.device)
-        if self.latent_upsampler:
-            self.latent_upsampler.to(self.device)
-        self._apply_precision_policy()
-        vae_manager_singleton.attach_pipeline(
-            self.pipeline,
-            device=self.device,
-            autocast_dtype=self.runtime_autocast_dtype
-        )
-        print("[INFO] VAE manager conectado ao pipeline.")
-        if self.device == "cuda":
-            torch.cuda.empty_cache()
-        print(f"[SUCCESS] VideoService pronto. Tempo de inicialização: {time.perf_counter()-t0:.2f}s")
-    # --- MÉTODOS INTERNOS: INICIALIZAÇÃO E SETUP ---
-    def _load_config(self):
-        """Carrega o arquivo de configuração YAML do modelo."""
-        base = LTX_VIDEO_REPO_DIR / "configs"
-        # Tenta carregar a configuração mais provável, com fallbacks
-        candidates = [
-            base / "ltxv-13b-0.9.8-dev-fp8.yaml",
-            base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
-            base / "ltxv-13b-0.9.8-distilled.yaml",
-        ]
-        for cfg_path in candidates:
-            if cfg_path.exists():
-                print(f"[DEBUG] Configuração encontrada e selecionada: {cfg_path}")
-                with open(cfg_path, "r") as file:
-                    return yaml.safe_load(file)
-        raise FileNotFoundError(f"Nenhum arquivo de configuração YAML encontrado em {base}. Verifique a instalação.")
-    def _load_models(self):
-        t0 = time.perf_counter()
-        LTX_REPO = "Lightricks/LTX-Video"
-        print("[DEBUG] Baixando checkpoint principal...")
-        distilled_model_path = hf_hub_download(
-            repo_id=LTX_REPO,
-            filename=self.config["checkpoint_path"],
-            local_dir=os.getenv("HF_HOME"),
-            cache_dir=os.getenv("HF_HOME_CACHE"),
-            token=os.getenv("HF_TOKEN"),
-        )
-        self.config["checkpoint_path"] = distilled_model_path
-        print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
-        print("[DEBUG] Baixando upscaler espacial...")
-        spatial_upscaler_path = hf_hub_download(
-            repo_id=LTX_REPO,
-            filename=self.config["spatial_upscaler_model_path"],
-            local_dir=os.getenv("HF_HOME"),
-            cache_dir=os.getenv("HF_HOME_CACHE"),
-            token=os.getenv("HF_TOKEN")
-        )
-        self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
-        print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
-        print("[DEBUG] Construindo pipeline...")
-        pipeline = create_ltx_video_pipeline(
-            ckpt_path=self.config["checkpoint_path"],
-            precision=self.config["precision"],
-            text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
-            sampler=self.config["sampler"],
-            device="cpu",
-            enhance_prompt=False,
-            prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
-            prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
-        )
-        print("[DEBUG] Pipeline pronto.")
-        latent_upsampler = None
-        if self.config.get("spatial_upscaler_model_path"):
-            print("[DEBUG] Construindo latent_upsampler...")
-            latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
-            print("[DEBUG] Upsampler pronto.")
-        print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
-        return pipeline, latent_upsampler
-    def _apply_precision_policy(self):
-        """Define o dtype a ser usado pelo autocast com base na configuração."""
-        prec = str(self.config.get("precision", "")).lower()
-        self.runtime_autocast_dtype = torch.float32
-        if "bfloat16" in prec or "fp8" in prec:
-            self.runtime_autocast_dtype = torch.bfloat16
-        elif "mixed_precision" in prec or "fp16" in prec:
-            self.runtime_autocast_dtype = torch.float16
-        print(f"[INFO] Política de precisão aplicada. Dtype para Autocast: {self.runtime_autocast_dtype}")
-    # --- MÉTODOS INTERNOS: OPERAÇÕES DE TENSOR E VÍDEO ---
-    def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
-        """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo correto."""
-        tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
-        tensor = F.pad(tensor, padding_values)
-        return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
-    @torch.no_grad()
-    def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
-        """Lógica de upscale de latentes, garantindo que os modelos estejam no dispositivo correto."""
-        if not self.latent_upsampler:
-            raise ValueError("Latent Upsampler não está carregado, mas foi solicitado.")
-        # Garante que o VAE e o upsampler estejam no dispositivo correto para a operação
-        self.latent_upsampler.to(self.device)
-        self.pipeline.vae.to(self.device)
-        latents_up = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
-        latents_up = self.latent_upsampler(latents_up)
-        latents_up = normalize_latents(latents_up, self.pipeline.vae, vae_per_channel_normalize=True)
-        return latents_up
-    # --- MÉTODO PRINCIPAL DE LIMPEZA ---
-    def finalize(self, keep_paths=None, clear_gpu=True):
-        """
-        [FUNÇÃO INTELIGENTE]
-        Limpeza robusta para garantir a liberação de recursos de disco e GPU,
-        mesmo em caso de falhas, prevenindo memory leaks.
-        """
-        print("[INFO] Finalize: iniciando limpeza de recursos...")
-        keep = set(keep_paths or [])
-        files_to_clean, dirs_to_clean = list(self._tmp_files), list(self._tmp_dirs)
-        removed_files, removed_dirs = 0, 0
-        for f in files_to_clean:
-            try:
-                if f not in keep and os.path.isfile(f):
-                    os.remove(f); removed_files += 1
-            except OSError as e:
-                print(f"[WARN] Falha ao remover arquivo temporário {f}: {e}")
-            finally:
-                self._tmp_files.discard(f)
-        for d in dirs_to_clean:
-            try:
-                if d not in keep and os.path.isdir(d):
-                    shutil.rmtree(d, ignore_errors=True); removed_dirs += 1
-            except OSError as e:
-                print(f"[WARN] Falha ao remover diretório temporário {d}: {e}")
-            finally:
-                self._tmp_dirs.discard(d)
-        if LTXV_DEBUG:
-            print(f"[DEBUG] Limpeza de disco: {removed_files} arquivos e {removed_dirs} diretórios removidos.")
-        gc.collect()
-        if clear_gpu and self.device == "cuda":
-            try:
-                torch.cuda.empty_cache()
-                torch.cuda.ipc_collect()
-                if LTXV_DEBUG: print("[DEBUG] Limpeza da GPU concluída com sucesso.")
-            except Exception as e:
-                print(f"[ERROR] Falha crítica durante a limpeza da GPU: {e}")
-    # ==============================================================================
-    # --- FUNÇÃO PRINCIPAL DE GERAÇÃO (generate) ---
-    # ==============================================================================
-    @torch.no_grad()
-    def generate(
-        self,
-        prompt: str,
-        negative_prompt: str = "",
-        mode: str = "text-to-video",
-        start_image_filepath: str = None,
-        height: int = 512,
-        width: int = 704,
-        duration: float = 2.0,
-        seed: int = 42,
-        randomize_seed: bool = True,
-        guidance_scale: float = 3.0,
-        improve_texture: bool = True,
-    ):
-        output_path, final_seed = None, None
-        try:
-            t_all = time.perf_counter()
-            print(f"\n{'='*20} INICIANDO NOVA GERAÇÃO {'='*20}")
-            if self.device == "cuda":
-                torch.cuda.empty_cache()
-            # --- 1. Setup da Geração (parâmetros, seed, dimensões) ---
-            if mode == "image-to-video" and not start_image_filepath:
-                raise ValueError("Imagem de início é obrigatória para o modo 'image-to-video'")
-            final_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
-            seed_everething(final_seed)
-            print(f"[INFO] Geração com Seed: {final_seed}")
-            FPS = 24.0; MAX_NUM_FRAMES = 2570
-            target_frames_rounded = round(duration * FPS)
-            n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
-            actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
-            height_padded = ((height - 1) // 8 + 1) * 8
-            width_padded = ((width - 1) // 8 + 1) * 8
-            padding_values = calculate_padding(height, width, height_padded, width_padded)
-            generator = torch.Generator(device=self.device).manual_seed(final_seed)
-            temp_dir = tempfile.mkdtemp(prefix="ltxv_")
-            self._tmp_dirs.add(temp_dir)
-            # --- 2. Preparação dos Tensores de Condicionamento ---
-            conditioning_items = []
-            if mode == "image-to-video" and start_image_filepath:
-                start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
-                conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
-            # --- 3. Construção dos Argumentos da Pipeline ---
-            call_kwargs = self.config.get("pipeline_defaults", {}).copy() # Carrega defaults do YAML
-            call_kwargs.update({
-                "prompt": prompt, "negative_prompt": negative_prompt,
-                "height": height_padded, "width": width_padded,
-                "num_frames": actual_num_frames, "frame_rate": int(FPS),
-                "generator": generator, "output_type": "latent",
-                "conditioning_items": conditioning_items or None,
-                "guidance_scale": float(guidance_scale),
-            })
-            # --- 4. Lógica de Geração (Pipeline de 1 ou 2 passes) ---
-            final_latents = None
-            ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
-            with ctx:
-                if improve_texture:
-                    print("[INFO] Iniciando pipeline de 2 passes para melhoria de textura.")
-                    # ETAPA 1: Geração Base em Baixa Resolução
-                    downscale_factor = self.config.get("downscale_factor", 0.5)
-                    target_low_res_area = (width * height) * (downscale_factor**2)
-                    downscaled_h, downscaled_w = calculate_new_dimensions(width, height, target_area=target_low_res_area)
-                    first_pass_kwargs = call_kwargs.copy()
-                    first_pass_kwargs.update(self.config.get("first_pass", {}))
-                    first_pass_kwargs.update({"width": downscaled_w, "height": downscaled_h, "guidance_scale": float(guidance_scale)})
-                    base_latents = self.pipeline(**first_pass_kwargs).images
-                    log_tensor_info(base_latents, "Latentes Base (Passo 1)")
-                    # ETAPA 2: Upscale e Refinamento
-                    upsampled_latents = self._upsample_latents_internal(base_latents)
-                    del base_latents; gc.collect(); torch.cuda.empty_cache()
-                    second_pass_kwargs = call_kwargs.copy()
-                    second_pass_kwargs.update(self.config.get("second_pass", {}))
-                    second_pass_kwargs.update({"latents": upsampled_latents, "guidance_scale": float(guidance_scale)})
-                    final_latents = self.pipeline(**second_pass_kwargs).images
-                    log_tensor_info(final_latents, "Latentes Finais (Passo 2)")
-                else:
-                    print("[INFO] Iniciando pipeline de 1 passe.")
-                    final_latents = self.pipeline(**call_kwargs).images
-                    log_tensor_info(final_latents, "Latentes Finais (Passe Único)")
-            # --- 5. Decodificação, Codificação de Vídeo e Finalização ---
-            print("[INFO] Decodificando latentes para pixels com VAE...")
-            pixel_tensor = vae_manager_singleton.decode(
-                final_latents.to(self.device),
-                decode_timestep=float(self.config.get("decode_timestep", 0.05))
-            )
-            del final_latents; gc.collect(); torch.cuda.empty_cache()
-            output_video_path_tmp = os.path.join(temp_dir, f"output_{final_seed}.mp4")
-            print(f"[INFO] Codificando vídeo final para: {output_video_path_tmp}")
-            video_encode_tool_singleton.save_video_from_tensor(
-               pixel_tensor, output_video_path_tmp, fps=call_kwargs["frame_rate"]
-            )
-            del pixel_tensor
-            results_dir = "/app/output"
-            os.makedirs(results_dir, exist_ok=True)
-            output_path = os.path.join(results_dir, f"final_video_{final_seed}.mp4")
-            shutil.move(output_video_path_tmp, output_path)
-            print(f"[SUCCESS] Geração concluída em {time.perf_counter() - t_all:.2f}s. Vídeo salvo em: {output_path}")
-            return output_path, final_seed
-        except Exception as e:
-            print(f"[FATAL ERROR] A geração falhou: {type(e).__name__} - {e}")
-            traceback.print_exc()
-            raise
-        finally:
-            print("[INFO] Executando limpeza final da tarefa...")
-            self.finalize(keep_paths=[output_path] if output_path else [])
-# --- Ponto de Entrada Principal ---
-if __name__ == "__main__":
-    print("Iniciando carregamento do VideoService...")
-    video_generation_service = VideoService()
-    print("\n[INFO] VideoService carregado e pronto para receber tarefas.")
-    # Exemplo de como chamar a geração (pode ser substituído por uma API)
-    try:
-        video_generation_service.generate(
-            prompt="A cinematic shot of a panda drinking bubble tea in a Tokyo cafe",
-            negative_prompt="blurry, low quality, cartoon",
-            duration=3.0,
-            improve_texture=True
-        )
-    except Exception as e:
-        print("\n[MAIN] Exemplo de geração falhou. O servidor ainda está de pé, mas verifique o erro acima.")