Test

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

bd507dd

verified ·

1 Parent(s): 86c2fc6

Update video_service.py

Browse files

Files changed (1) hide show

video_service.py +201 -103

video_service.py CHANGED Viewed

@@ -14,6 +14,8 @@ import tempfile
 from huggingface_hub import hf_hub_download
 import sys
 import subprocess
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
@@ -23,7 +25,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
         import pynvml as nvml
         nvml.nvmlInit()
         handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
-        # Try v3, then fall back to the generic name if binding differs
         try:
             procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
         except Exception:
@@ -33,7 +34,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
             pid = int(p.pid)
             used_mb = None
             try:
-                # NVML returns bytes; some bindings may use NVML_VALUE_NOT_AVAILABLE
                 if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
                     used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
             except Exception:
@@ -53,7 +53,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
         return []
 def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
-    # CSV, no header, no units gives lines: "PID,process_name,used_memory"
     cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
     try:
         out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
@@ -82,7 +81,6 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
 def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
     if not processes:
         return "  - Processos ativos: (nenhum)\n"
-    # sort by used_mb desc, then pid
     processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
     lines = ["  - Processos ativos (PID | USER | NAME | VRAM MB):"]
     for p in processes:
@@ -91,36 +89,6 @@ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
         lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
     return "\n".join(lines) + "\n"
-# Integração no método existente:
-def _log_gpu_memory(self, stage_name: str):
-    import torch
-    if self.device != "cuda":
-        return
-    device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
-    current_reserved_b = torch.cuda.memory_reserved(device_index)
-    current_reserved_mb = current_reserved_b / (1024 ** 2)
-    total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
-    total_memory_mb = total_memory_b / (1024 ** 2)
-    peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
-    delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
-    # Coleta de processos: tenta NVML, depois fallback para nvidia-smi
-    processes = _query_gpu_processes_via_nvml(device_index)
-    if not processes:
-        processes = _query_gpu_processes_via_nvidiasmi(device_index)
-    print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
-    print(f"  - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
-    print(f"  - Variação desde o último log: {delta_mb:+.2f} MB")
-    if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
-        print(f"  - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
-    # Imprime tabela de processos
-    print(_gpu_process_table(processes, os.getpid()), end="")
-    print("--------------------------------------------------\n")
-    self.last_memory_reserved_mb = current_reserved_mb
 def run_setup():
     """Executa o script setup.py para clonar as dependências necessárias."""
     setup_script_path = "setup.py"
@@ -151,9 +119,12 @@ add_deps_to_path()
 # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
 from inference import (
-    create_ltx_video_pipeline, create_latent_upsampler,
-    load_image_to_tensor_with_resize_and_crop, seed_everething,
-    calculate_padding, load_media_file
 )
 from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
 from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
@@ -175,15 +146,13 @@ def log_tensor_info(tensor, name="Tensor"):
         print("  - O tensor está vazio, sem estatísticas.")
     print("------------------------------------------\n")
 # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 class VideoService:
     def __init__(self):
         print("Inicializando VideoService...")
         self.config = self._load_config()
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.last_memory_reserved_mb = 0
         self._tmp_dirs = set()
         self._tmp_files = set()
         self._last_outputs = []
@@ -196,25 +165,53 @@ class VideoService:
             torch.cuda.empty_cache()
             self._log_gpu_memory("Após carregar modelos")
         print("VideoService pronto para uso.")
     def _register_tmp_dir(self, d: str):
-        if d and os.path.isdir(d):
-            self._tmp_dirs.add(d)
     def _register_tmp_file(self, f: str):
-        if f and os.path.isfile(f):
-            self._tmp_files.add(f)
     def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
         """
-        Remove temporários e coleta memória.
         keep_paths: caminhos que não devem ser removidos (ex.: vídeo final).
         extra_paths: caminhos adicionais para tentar remover (opcional).
         """
         keep = set(keep_paths or [])
         extras = set(extra_paths or [])
-        # Remoção de arquivos
         for f in list(self._tmp_files | extras):
             try:
                 if f not in keep and os.path.isfile(f):
@@ -224,7 +221,6 @@ class VideoService:
             finally:
                 self._tmp_files.discard(f)
-        # Remoção de diretórios
         for d in list(self._tmp_dirs):
             try:
                 if d not in keep and os.path.isdir(d):
@@ -234,13 +230,10 @@ class VideoService:
             finally:
                 self._tmp_dirs.discard(d)
-        # Coleta de GC e limpeza de VRAM
         gc.collect()
         try:
-            import torch
             if clear_gpu and torch.cuda.is_available():
                 torch.cuda.empty_cache()
-                # Limpa buffers de IPC quando aplicável
                 try:
                     torch.cuda.ipc_collect()
                 except Exception:
@@ -248,13 +241,11 @@ class VideoService:
         except Exception:
             pass
-        # Log opcional pós-limpeza
         try:
             self._log_gpu_memory("Após finalize")
         except Exception:
             pass
     def _load_config(self):
         config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml"
         with open(config_file_path, "r") as file:
@@ -262,28 +253,68 @@ class VideoService:
     def _load_models(self):
         LTX_REPO = "Lightricks/LTX-Video"
-        distilled_model_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["checkpoint_path"], local_dir=os.getenv("HF_HOME"), cache_dir=os.getenv("HF_HOME_CACHE"), token=os.getenv("HF_TOKEN"))
         self.config["checkpoint_path"] = distilled_model_path
-        spatial_upscaler_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"], local_dir=os.getenv("HF_HOME"), cache_dir=os.getenv("HF_HOME_CACHE"), token=os.getenv("HF_TOKEN"))
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
-        pipeline = create_ltx_video_pipeline(ckpt_path=self.config["checkpoint_path"], precision=self.config["precision"], text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"], sampler=self.config["sampler"], device="cpu", enhance_prompt=False, prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"], prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"])
         latent_upsampler = None
         if self.config.get("spatial_upscaler_model_path"):
             latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
         return pipeline, latent_upsampler
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
         return tensor.to(self.device)
-    def generate(self, prompt, negative_prompt, mode="text-to-video",
-                 start_image_filepath=None,
-                 middle_image_filepath=None, middle_frame_number=None, middle_image_weight=1.0,
-                 end_image_filepath=None, end_image_weight=1.0,
-                 input_video_filepath=None, height=512, width=704, duration=2.0,
-                 frames_to_use=9, seed=42, randomize_seed=True, guidance_scale=3.0,
-                 improve_texture=True, progress_callback=None):
         if self.device == "cuda":
             torch.cuda.empty_cache()
             torch.cuda.reset_peak_memory_stats()
@@ -302,14 +333,14 @@ class VideoService:
         target_frames_rounded = round(duration * FPS)
         n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
         actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
         height_padded = ((height - 1) // 32 + 1) * 32
         width_padded = ((width - 1) // 32 + 1) * 32
         padding_values = calculate_padding(height, width, height_padded, width_padded)
         generator = torch.Generator(device=self.device).manual_seed(used_seed)
         conditioning_items = []
         if mode == "image-to-video":
             start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
             conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
@@ -323,22 +354,41 @@ class VideoService:
                 conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
         call_kwargs = {
-            "prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
-            "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "pt",
-            "conditioning_items": conditioning_items if conditioning_items else None,
             "media_items": None,
-            "decode_timestep": self.config["decode_timestep"], "decode_noise_scale": self.config["decode_noise_scale"],
-            "stochastic_sampling": self.config["stochastic_sampling"], "image_cond_noise_scale": 0.15,
-            "is_video": True, "vae_per_channel_normalize": True,
             "mixed_precision": (self.config["precision"] == "mixed_precision"),
-            "offload_to_cpu": False, "enhance_prompt": False,
-            "skip_layer_strategy": SkipLayerStrategy.AttentionValues
         }
         if mode == "video-to-video":
-            call_kwargs["media_items"] = load_media_file(media_path=input_video_filepath, height=height, width=width, max_frames=int(frames_to_use), padding=padding_values).to(self.device)
         result_tensor = None
         if improve_texture:
             if not self.latent_upsampler:
                 raise ValueError("Upscaler espacial não carregado.")
@@ -347,53 +397,101 @@ class VideoService:
             first_pass_args["guidance_scale"] = float(guidance_scale)
             second_pass_args = self.config.get("second_pass", {}).copy()
             second_pass_args["guidance_scale"] = float(guidance_scale)
             multi_scale_call_kwargs = call_kwargs.copy()
-            multi_scale_call_kwargs.update({"downscale_factor": self.config["downscale_factor"], "first_pass": first_pass_args, "second_pass": second_pass_args})
             result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
             log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
         else:
             single_pass_kwargs = call_kwargs.copy()
             first_pass_config = self.config.get("first_pass", {})
-            single_pass_kwargs.update({
-                "guidance_scale": float(guidance_scale),
-                "stg_scale": first_pass_config.get("stg_scale"),
-                "rescaling_scale": first_pass_config.get("rescaling_scale"),
-                "skip_block_list": first_pass_config.get("skip_block_list"),
-            })
-            # --- <INÍCIO DA CORREÇÃO> ---
             if mode == "video-to-video":
-                single_pass_kwargs["timesteps"] = [0.7] # CORRIGIDO: Passar como uma lista
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
             else:
                 single_pass_kwargs["timesteps"] = first_pass_config.get("timesteps")
-            # --- <FIM DA CORREÇÃO> ---
             print("\n[INFO] Executando pipeline de etapa única...")
             result_tensor = self.pipeline(**single_pass_kwargs).images
         pad_left, pad_right, pad_top, pad_bottom = padding_values
         slice_h_end = -pad_bottom if pad_bottom > 0 else None
         slice_w_end = -pad_right if pad_right > 0 else None
         result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
         log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
         video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
-        temp_dir = tempfile.mkdtemp()
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
-        with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec='libx264', quality=8) as writer:
-            total_frames = len(video_np)
-            for i, frame in enumerate(video_np):
-                writer.append_data(frame)
-                if progress_callback:
-                    progress_callback(i + 1, total_frames)
-        self._log_gpu_memory("Fim da Geração")
-        finalize()
-        return output_video_path, used_seed
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
-video_generation_service = VideoService()

 from huggingface_hub import hf_hub_download
 import sys
 import subprocess
+import gc
+import shutil
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
         import pynvml as nvml
         nvml.nvmlInit()
         handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
         try:
             procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
         except Exception:
             pid = int(p.pid)
             used_mb = None
             try:
                 if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
                     used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
             except Exception:
         return []
 def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
     cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
     try:
         out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
 def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
     if not processes:
         return "  - Processos ativos: (nenhum)\n"
     processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
     lines = ["  - Processos ativos (PID | USER | NAME | VRAM MB):"]
     for p in processes:
         lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
     return "\n".join(lines) + "\n"
 def run_setup():
     """Executa o script setup.py para clonar as dependências necessárias."""
     setup_script_path = "setup.py"
 # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
 from inference import (
+    create_ltx_video_pipeline,
+    create_latent_upsampler,
+    load_image_to_tensor_with_resize_and_crop,
+    seed_everething,
+    calculate_padding,
+    load_media_file,
 )
 from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
 from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
         print("  - O tensor está vazio, sem estatísticas.")
     print("------------------------------------------\n")
 # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 class VideoService:
     def __init__(self):
         print("Inicializando VideoService...")
         self.config = self._load_config()
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.last_memory_reserved_mb = 0.0
         self._tmp_dirs = set()
         self._tmp_files = set()
         self._last_outputs = []
             torch.cuda.empty_cache()
             self._log_gpu_memory("Após carregar modelos")
         print("VideoService pronto para uso.")
+    # Método de log de GPU como parte da classe
+    def _log_gpu_memory(self, stage_name: str):
+        if self.device != "cuda":
+            return
+        device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
+        current_reserved_b = torch.cuda.memory_reserved(device_index)
+        current_reserved_mb = current_reserved_b / (1024 ** 2)
+        total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
+        total_memory_mb = total_memory_b / (1024 ** 2)
+        peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
+        delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
+        processes = _query_gpu_processes_via_nvml(device_index)
+        if not processes:
+            processes = _query_gpu_processes_via_nvidiasmi(device_index)
+        print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
+        print(f"  - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
+        print(f"  - Variação desde o último log: {delta_mb:+.2f} MB")
+        if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
+            print(f"  - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
+        print(_gpu_process_table(processes, os.getpid()), end="")
+        print("--------------------------------------------------\n")
+        self.last_memory_reserved_mb = current_reserved_mb
     def _register_tmp_dir(self, d: str):
+        try:
+            if d and os.path.isdir(d):
+                self._tmp_dirs.add(d)
+        except Exception:
+            pass
     def _register_tmp_file(self, f: str):
+        try:
+            if f and os.path.isfile(f):
+                self._tmp_files.add(f)
+        except Exception:
+            pass
     def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
         """
+        Remove temporários e coleta memória.
         keep_paths: caminhos que não devem ser removidos (ex.: vídeo final).
         extra_paths: caminhos adicionais para tentar remover (opcional).
         """
         keep = set(keep_paths or [])
         extras = set(extra_paths or [])
         for f in list(self._tmp_files | extras):
             try:
                 if f not in keep and os.path.isfile(f):
             finally:
                 self._tmp_files.discard(f)
         for d in list(self._tmp_dirs):
             try:
                 if d not in keep and os.path.isdir(d):
             finally:
                 self._tmp_dirs.discard(d)
         gc.collect()
         try:
             if clear_gpu and torch.cuda.is_available():
                 torch.cuda.empty_cache()
                 try:
                     torch.cuda.ipc_collect()
                 except Exception:
         except Exception:
             pass
         try:
             self._log_gpu_memory("Após finalize")
         except Exception:
             pass
     def _load_config(self):
         config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml"
         with open(config_file_path, "r") as file:
     def _load_models(self):
         LTX_REPO = "Lightricks/LTX-Video"
+        distilled_model_path = hf_hub_download(
+            repo_id=LTX_REPO,
+            filename=self.config["checkpoint_path"],
+            local_dir=os.getenv("HF_HOME"),
+            cache_dir=os.getenv("HF_HOME_CACHE"),
+            token=os.getenv("HF_TOKEN"),
+        )
         self.config["checkpoint_path"] = distilled_model_path
+        spatial_upscaler_path = hf_hub_download(
+            repo_id=LTX_REPO,
+            filename=self.config["spatial_upscaler_model_path"],
+            local_dir=os.getenv("HF_HOME"),
+            cache_dir=os.getenv("HF_HOME_CACHE"),
+            token=os.getenv("HF_TOKEN"),
+        )
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
+        pipeline = create_ltx_video_pipeline(
+            ckpt_path=self.config["checkpoint_path"],
+            precision=self.config["precision"],
+            text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
+            sampler=self.config["sampler"],
+            device="cpu",
+            enhance_prompt=False,
+            prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
+            prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
+        )
         latent_upsampler = None
         if self.config.get("spatial_upscaler_model_path"):
             latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
         return pipeline, latent_upsampler
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
         return tensor.to(self.device)
+    def generate(
+        self,
+        prompt,
+        negative_prompt,
+        mode="text-to-video",
+        start_image_filepath=None,
+        middle_image_filepath=None,
+        middle_frame_number=None,
+        middle_image_weight=1.0,
+        end_image_filepath=None,
+        end_image_weight=1.0,
+        input_video_filepath=None,
+        height=512,
+        width=704,
+        duration=2.0,
+        frames_to_use=9,
+        seed=42,
+        randomize_seed=True,
+        guidance_scale=3.0,
+        improve_texture=True,
+        progress_callback=None,
+    ):
         if self.device == "cuda":
             torch.cuda.empty_cache()
             torch.cuda.reset_peak_memory_stats()
         target_frames_rounded = round(duration * FPS)
         n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
         actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
         height_padded = ((height - 1) // 32 + 1) * 32
         width_padded = ((width - 1) // 32 + 1) * 32
         padding_values = calculate_padding(height, width, height_padded, width_padded)
         generator = torch.Generator(device=self.device).manual_seed(used_seed)
         conditioning_items = []
         if mode == "image-to-video":
             start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
             conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
                 conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
         call_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "height": height_padded,
+            "width": width_padded,
+            "num_frames": actual_num_frames,
+            "frame_rate": int(FPS),
+            "generator": generator,
+            "output_type": "pt",
+            "conditioning_items": conditioning_items if conditioning_items else None,
             "media_items": None,
+            "decode_timestep": self.config["decode_timestep"],
+            "decode_noise_scale": self.config["decode_noise_scale"],
+            "stochastic_sampling": self.config["stochastic_sampling"],
+            "image_cond_noise_scale": 0.15,
+            "is_video": True,
+            "vae_per_channel_normalize": True,
             "mixed_precision": (self.config["precision"] == "mixed_precision"),
+            "offload_to_cpu": False,
+            "enhance_prompt": False,
+            "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
         }
         if mode == "video-to-video":
+            call_kwargs["media_items"] = load_media_file(
+                media_path=input_video_filepath,
+                height=height,
+                width=width,
+                max_frames=int(frames_to_use),
+                padding=padding_values,
+            ).to(self.device)
         result_tensor = None
+        video_np = None
+        multi_scale_pipeline = None
         if improve_texture:
             if not self.latent_upsampler:
                 raise ValueError("Upscaler espacial não carregado.")
             first_pass_args["guidance_scale"] = float(guidance_scale)
             second_pass_args = self.config.get("second_pass", {}).copy()
             second_pass_args["guidance_scale"] = float(guidance_scale)
             multi_scale_call_kwargs = call_kwargs.copy()
+            multi_scale_call_kwargs.update(
+                {
+                    "downscale_factor": self.config["downscale_factor"],
+                    "first_pass": first_pass_args,
+                    "second_pass": second_pass_args,
+                }
+            )
             result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
             log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
         else:
             single_pass_kwargs = call_kwargs.copy()
             first_pass_config = self.config.get("first_pass", {})
+            single_pass_kwargs.update(
+                {
+                    "guidance_scale": float(guidance_scale),
+                    "stg_scale": first_pass_config.get("stg_scale"),
+                    "rescaling_scale": first_pass_config.get("rescaling_scale"),
+                    "skip_block_list": first_pass_config.get("skip_block_list"),
+                }
+            )
             if mode == "video-to-video":
+                single_pass_kwargs["timesteps"] = [0.7]
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
             else:
                 single_pass_kwargs["timesteps"] = first_pass_config.get("timesteps")
             print("\n[INFO] Executando pipeline de etapa única...")
             result_tensor = self.pipeline(**single_pass_kwargs).images
         pad_left, pad_right, pad_top, pad_bottom = padding_values
         slice_h_end = -pad_bottom if pad_bottom > 0 else None
         slice_w_end = -pad_right if pad_right > 0 else None
         result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
         log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
         video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
+        temp_dir = tempfile.mkdtemp(prefix="ltxv_")
+        self._register_tmp_dir(temp_dir)
+        results_dir = "/data/results"
+        os.makedirs(results_dir, exist_ok=True)
+        final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
+        try:
+            with imageio.get_writer(
+                output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8
+            ) as writer:
+                total_frames = len(video_np)
+                for i, frame in enumerate(video_np):
+                    writer.append_data(frame)
+                    if progress_callback:
+                        progress_callback(i + 1, total_frames)
+            candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
+            try:
+                shutil.move(output_video_path, candidate_final)
+                final_output_path = candidate_final
+            except Exception:
+                final_output_path = output_video_path
+            self._register_tmp_file(output_video_path)
+            self._log_gpu_memory("Fim da Geração")
+            return final_output_path, used_seed
+        finally:
+            try:
+                del result_tensor
+            except Exception:
+                pass
+            try:
+                del video_np
+            except Exception:
+                pass
+            try:
+                del multi_scale_pipeline
+            except Exception:
+                pass
+            gc.collect()
+            try:
+                if self.device == "cuda":
+                    torch.cuda.empty_cache()
+                    try:
+                        torch.cuda.ipc_collect()
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+            try:
+                self.finalize(keep_paths=[final_output_path] if final_output_path else [])
+            except Exception:
+                pass
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
+video_generation_service = VideoService()