Test3

Paused

App Files Files Community

Eueuiaa commited on Oct 8

Commit

a65518f

verified ·

1 Parent(s): 39769b3

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +90 -72

api/ltx_server.py CHANGED Viewed

@@ -366,31 +366,57 @@ class VideoService:
             return yaml.safe_load(file)
     def _load_models(self):
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
-        print("[DEBUG] Baixando checkpoint principal...")
-        distilled_model_path = hf_hub_download(
-            repo_id=LTX_REPO,
-            filename=self.config["checkpoint_path"],
-            local_dir=os.getenv("HF_HOME"),
-            cache_dir=os.getenv("HF_HOME_CACHE"),
-            token=os.getenv("HF_TOKEN"),
         )
         self.config["checkpoint_path"] = distilled_model_path
-        print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
-        print("[DEBUG] Baixando upscaler espacial...")
-        spatial_upscaler_path = hf_hub_download(
-            repo_id=LTX_REPO,
-            filename=self.config["spatial_upscaler_model_path"],
-            local_dir=os.getenv("HF_HOME"),
-            cache_dir=os.getenv("HF_HOME_CACHE"),
-            token=os.getenv("HF_TOKEN")
         )
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
-        print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
-        print("[DEBUG] Construindo pipeline...")
         pipeline = create_ltx_video_pipeline(
             ckpt_path=self.config["checkpoint_path"],
             precision=self.config["precision"],
@@ -408,6 +434,7 @@ class VideoService:
             print("[DEBUG] Construindo latent_upsampler...")
             latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
             print("[DEBUG] Upsampler pronto.")
         print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
         return pipeline, latent_upsampler
@@ -435,8 +462,6 @@ class VideoService:
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         """
@@ -453,11 +478,8 @@ class VideoService:
         upsampled_latents = self.latent_upsampler(latents)
         upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
         print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
@@ -491,7 +513,6 @@ class VideoService:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
         """
         Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
@@ -626,7 +647,6 @@ class VideoService:
             print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
         print("===========CONCATECAO CAUSAL=============")
         print(f"[DEBUG] {nova_lista}")
         return nova_lista
@@ -804,7 +824,7 @@ class VideoService:
                     except Exception:
                          pass
-                    latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,15,1)
                     for latents in latents_parts_up:
@@ -832,8 +852,8 @@ class VideoService:
                            "output_type": "latent",
                            "width": second_pass_width,
                            "height": second_pass_height,
-                           #"num_frames": num_pixel_frames_part,
-                           "latents": upsampled_latents, # O tensor upscaled
                            "guidance_scale": float(guidance_scale),
                            **second_pass_config
                         })
@@ -861,54 +881,52 @@ class VideoService:
             # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
             print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
-            #latents_cpu = latents.detach().to("cpu", non_blocking=True)
-            #torch.cuda.empty_cache()
-            #try:
-            #    torch.cuda.ipc_collect()
-            #except Exception:
-            #    pass
-            latents_parts = []
-            for latents in latents_list:
-                latents_parts.append(self._dividir_latentes_por_tamanho(latents,15,1))
-            partes_mp4 = []
-            par = 0
-            for latents in latents_parts:
-                par = par + 1
-                output_video_path = os.path.join(results_dir, f"output_{used_seed}_{par}.mp4")
-                final_output_path = None
-                print("[DEBUG] Decodificando bloco de latentes com VAE {par} → tensor de pixels...")
-                # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
-                pixel_tensor = vae_manager_singleton.decode(
-                    latents.to(self.device, non_blocking=True),
-                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                )
-                log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
-                print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
-                video_encode_tool_singleton.save_video_from_tensor(
-                    pixel_tensor,
-                    output_video_path,
-                    fps=call_kwargs["frame_rate"],
-                    progress_callback=progress_callback
-                )
-                candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
-                try:
-                    shutil.move(output_video_path, candidate)
-                    final_output_path = candidate
-                    print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
-                    partes_mp4.append(final_output_path)
-                except Exception as e:
-                    final_output_path = output_video_path
-                    print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
                 final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")

             return yaml.safe_load(file)
     def _load_models(self):
+        """
+        Carrega os modelos de forma inteligente:
+        1. Tenta resolver o caminho do cache local (rápido, sem rede).
+        2. Se o arquivo não for encontrado localmente, baixa como fallback.
+        Garante que o serviço possa iniciar mesmo que o setup.py não tenha sido executado.
+        """
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
+        print("[DEBUG] Resolvendo caminhos dos modelos de forma inteligente...")
+        # --- Função Auxiliar para Carregamento Inteligente ---
+        def get_or_download_model(repo_id, filename, description):
+            try:
+                # hf_hub_download é a ferramenta certa aqui. Ela verifica o cache PRIMEIRO.
+                # Se o arquivo estiver no cache, retorna o caminho instantaneamente (após uma verificação rápida de metadados).
+                # Se não estiver no cache, ela o baixa.
+                print(f"[DEBUG] Verificando {description}: {filename}...")
+                model_path = hf_hub_download(
+                    repo_id=repo_id,
+                    filename=filename,
+                    # Forçar o uso de um cache específico se necessário
+                    cache_dir=os.getenv("HF_HOME_CACHE"),
+                    token=os.getenv("HF_TOKEN")
+                )
+                print(f"[DEBUG] Caminho do {description} resolvido com sucesso.")
+                return model_path
+            except Exception as e:
+                print("\n" + "="*80)
+                print(f"[ERRO CRÍTICO] Falha ao obter o modelo '{filename}'.")
+                print(f"Detalhe do erro: {e}")
+                print("Verifique sua conexão com a internet ou o estado do cache do Hugging Face.")
+                print("="*80 + "\n")
+                sys.exit(1)
+        # --- Checkpoint Principal ---
+        checkpoint_filename = self.config["checkpoint_path"]
+        distilled_model_path = get_or_download_model(
+            LTX_REPO, checkpoint_filename, "checkpoint principal"
         )
         self.config["checkpoint_path"] = distilled_model_path
+        # --- Upscaler Espacial ---
+        upscaler_filename = self.config["spatial_upscaler_model_path"]
+        spatial_upscaler_path = get_or_download_model(
+            LTX_REPO, upscaler_filename, "upscaler espacial"
         )
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
+        # --- Construção dos Pipelines ---
+        print("\n[DEBUG] Construindo pipeline a partir dos caminhos resolvidos...")
         pipeline = create_ltx_video_pipeline(
             ckpt_path=self.config["checkpoint_path"],
             precision=self.config["precision"],
             print("[DEBUG] Construindo latent_upsampler...")
             latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
             print("[DEBUG] Upsampler pronto.")
         print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
         return pipeline, latent_upsampler
                 pass
         print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
     @torch.no_grad()
     def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
         """
         upsampled_latents = self.latent_upsampler(latents)
         upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
         print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
         return upsampled_latents
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
         """
         Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
             print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
         print("===========CONCATECAO CAUSAL=============")
         print(f"[DEBUG] {nova_lista}")
         return nova_lista
                     except Exception:
                          pass
+                    latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,15,0)
                     for latents in latents_parts_up:
                            "output_type": "latent",
                            "width": second_pass_width,
                            "height": second_pass_height,
+                           "num_frames": num_pixel_frames_part,
+                           "latents": latents, # O tensor upscaled
                            "guidance_scale": float(guidance_scale),
                            **second_pass_config
                         })
             # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
             print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
+            for latents_vae in latents_list:
+                latents_cpu_vae = latents_vae.detach().to("cpu", non_blocking=True)
+                torch.cuda.empty_cache()
+                try:
+                    torch.cuda.ipc_collect()
+                except Exception:
+                    pass
+                latents_parts_vae = self._dividir_latentes_por_tamanho(latents_cpu_vae,4,1)
+                for latents in latents_parts_vae:
+                    print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
+                    par = par + 1
+                    output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
+                    final_output_path = None
+                    print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
+                    # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
+                    pixel_tensor = vae_manager_singleton.decode(
+                        latents.to(self.device, non_blocking=True),
+                        decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                    )
+                    log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+                    print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
+                    video_encode_tool_singleton.save_video_from_tensor(
+                        pixel_tensor,
+                        output_video_path,
+                        fps=call_kwargs["frame_rate"],
+                        progress_callback=progress_callback
+                    )
+                    candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
+                    try:
+                        shutil.move(output_video_path, candidate)
+                        final_output_path = candidate
+                        print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
+                        partes_mp4.append(final_output_path)
+                    except Exception as e:
+                        final_output_path = output_video_path
+                        print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
                 final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")