Test4

Paused

App Files Files Community

Test4 / video_service.py

EuuIia

Update video_service.py

eec60ab verified about 2 months ago

raw

history blame

24.3 kB

	# video_service.py

	# --- 1. IMPORTAÇÕES ---
	import torch
	import numpy as np
	import random
	import os
	import shlex
	import yaml
	from typing import List, Dict
	from pathlib import Path
	import imageio
	import tempfile
	from huggingface_hub import hf_hub_download
	import sys
	import subprocess
	import gc
	import shutil
	import contextlib

	# --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
	def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
	try:
	import psutil
	import pynvml as nvml
	nvml.nvmlInit()
	handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
	try:
	procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
	except Exception:
	procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
	results = []
	for p in procs:
	pid = int(p.pid)
	used_mb = None
	try:
	if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
	used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
	except Exception:
	used_mb = None
	name = "unknown"
	user = "unknown"
	try:
	pr = psutil.Process(pid)
	name = pr.name()
	user = pr.username()
	except Exception:
	pass
	results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
	nvml.nvmlShutdown()
	return results
	except Exception:
	return []

	def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
	cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
	try:
	out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
	except Exception:
	return []
	results = []
	for line in out.strip().splitlines():
	parts = [p.strip() for p in line.split(",")]
	if len(parts) >= 3:
	try:
	pid = int(parts[0])
	name = parts[1]
	used_mb = int(parts[2])
	user = "unknown"
	try:
	import psutil
	pr = psutil.Process(pid)
	user = pr.username()
	except Exception:
	pass
	results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
	except Exception:
	continue
	return results

	def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
	if not processes:
	return " - Processos ativos: (nenhum)\n"
	processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
	lines = [" - Processos ativos (PID \| USER \| NAME \| VRAM MB):"]
	for p in processes:
	star = "*" if p["pid"] == current_pid else " "
	used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
	lines.append(f" {star} {p['pid']} \| {p['user']} \| {p['name']} \| {used_str}")
	return "\n".join(lines) + "\n"

	def run_setup():
	"""Executa o script setup.py para clonar as dependências necessárias."""
	setup_script_path = "setup.py"
	if not os.path.exists(setup_script_path):
	print("AVISO: script 'setup.py' não encontrado. Pulando a clonagem de dependências.")
	return
	try:
	print("--- Executando setup.py para garantir que as dependências estão presentes ---")
	subprocess.run([sys.executable, setup_script_path], check=True)
	print("--- Setup concluído com sucesso ---")
	except subprocess.CalledProcessError as e:
	print(f"ERRO CRÍTICO DURANTE O SETUP: 'setup.py' falhou com código {e.returncode}.")
	sys.exit(1)

	DEPS_DIR = Path("/data")
	LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
	if not LTX_VIDEO_REPO_DIR.exists():
	run_setup()

	def add_deps_to_path():
	"""Adiciona o repositório clonado ao sys.path para que suas bibliotecas possam ser importadas."""
	if not LTX_VIDEO_REPO_DIR.exists():
	raise FileNotFoundError(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Execute o setup.")
	if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
	sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))

	add_deps_to_path()

	# --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
	from inference import (
	create_ltx_video_pipeline,
	create_latent_upsampler,
	load_image_to_tensor_with_resize_and_crop,
	seed_everething,
	calculate_padding,
	load_media_file,
	)
	from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
	from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy

	# --- 4. FUNÇÕES HELPER DE LOG ---
	def log_tensor_info(tensor, name="Tensor"):
	if not isinstance(tensor, torch.Tensor):
	print(f"\n[INFO] O item '{name}' não é um tensor para logar.")
	return
	print(f"\n--- Informações do Tensor: {name} ---")
	print(f" - Shape: {tensor.shape}")
	print(f" - Dtype: {tensor.dtype}")
	print(f" - Device: {tensor.device}")
	if tensor.numel() > 0:
	print(f" - Min valor: {tensor.min().item():.4f}")
	print(f" - Max valor: {tensor.max().item():.4f}")
	print(f" - Média: {tensor.mean().item():.4f}")
	else:
	print(" - O tensor está vazio, sem estatísticas.")
	print("------------------------------------------\n")

	# --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
	class VideoService:
	def __init__(self):
	print("Inicializando VideoService...")
	self.config = self._load_config()
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.last_memory_reserved_mb = 0.0
	self._tmp_dirs = set()
	self._tmp_files = set()
	self._last_outputs = []

	self.pipeline, self.latent_upsampler = self._load_models()
	print(f"Movendo modelos para o dispositivo de inferência: {self.device}")
	self.pipeline.to(self.device)
	if self.latent_upsampler:
	self.latent_upsampler.to(self.device)

	# Política de precisão (inclui promoção FP8->BF16 e dtype de autocast)
	self._apply_precision_policy()

	if self.device == "cuda":
	torch.cuda.empty_cache()
	self._log_gpu_memory("Após carregar modelos")
	print("VideoService pronto para uso.")

	# Método de log de GPU como parte da classe
	def _log_gpu_memory(self, stage_name: str):
	if self.device != "cuda":
	return
	device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
	current_reserved_b = torch.cuda.memory_reserved(device_index)
	current_reserved_mb = current_reserved_b / (1024 ** 2)
	total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
	total_memory_mb = total_memory_b / (1024 ** 2)
	peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
	delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
	processes = _query_gpu_processes_via_nvml(device_index)
	if not processes:
	processes = _query_gpu_processes_via_nvidiasmi(device_index)
	print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
	print(f" - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
	print(f" - Variação desde o último log: {delta_mb:+.2f} MB")
	if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
	print(f" - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
	print(_gpu_process_table(processes, os.getpid()), end="")
	print("--------------------------------------------------\n")
	self.last_memory_reserved_mb = current_reserved_mb

	def _register_tmp_dir(self, d: str):
	try:
	if d and os.path.isdir(d):
	self._tmp_dirs.add(d)
	except Exception:
	pass

	def _register_tmp_file(self, f: str):
	try:
	if f and os.path.isfile(f):
	self._tmp_files.add(f)
	except Exception:
	pass

	def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
	"""
	Remove temporários e coleta memória.
	keep_paths: caminhos que não devem ser removidos (ex.: vídeo final).
	extra_paths: caminhos adicionais para tentar remover (opcional).
	"""
	keep = set(keep_paths or [])
	extras = set(extra_paths or [])

	# Remoção de arquivos
	for f in list(self._tmp_files \| extras):
	try:
	if f not in keep and os.path.isfile(f):
	os.remove(f)
	except Exception:
	pass
	finally:
	self._tmp_files.discard(f)

	# Remoção de diretórios
	for d in list(self._tmp_dirs):
	try:
	if d not in keep and os.path.isdir(d):
	shutil.rmtree(d, ignore_errors=True)
	except Exception:
	pass
	finally:
	self._tmp_dirs.discard(d)

	# Coleta de GC e limpeza de VRAM
	gc.collect()
	try:
	if clear_gpu and torch.cuda.is_available():
	torch.cuda.empty_cache()
	try:
	torch.cuda.ipc_collect()
	except Exception:
	pass
	except Exception:
	pass

	# Log opcional pós-limpeza
	try:
	self._log_gpu_memory("Após finalize")
	except Exception:
	pass

	def _load_config(self):
	# Prioriza configs FP8 se presentes, mantendo compatibilidade
	base = LTX_VIDEO_REPO_DIR / "configs"
	candidates = [
	base / "ltxv-13b-0.9.8-dev-fp8.yaml",
	base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
	base / "ltxv-13b-0.9.8-dev-fp8.yaml.txt",
	base / "ltxv-13b-0.9.8-distilled.yaml", # fallback não-FP8
	]
	for cfg in candidates:
	if cfg.exists():
	with open(cfg, "r") as file:
	return yaml.safe_load(file)
	# Fallback rígido para caminho clássico se nada acima existir
	config_file_path = base / "ltxv-13b-0.9.8-distilled.yaml"
	with open(config_file_path, "r") as file:
	return yaml.safe_load(file)

	def _load_models(self):
	LTX_REPO = "Lightricks/LTX-Video"

	distilled_model_path = hf_hub_download(
	repo_id=LTX_REPO,
	filename=self.config["checkpoint_path"],
	local_dir=os.getenv("HF_HOME"),
	cache_dir=os.getenv("HF_HOME_CACHE"),
	token=os.getenv("HF_TOKEN"),
	)
	self.config["checkpoint_path"] = distilled_model_path

	spatial_upscaler_path = hf_hub_download(
	repo_id=LTX_REPO,
	filename=self.config["spatial_upscaler_model_path"],
	local_dir=os.getenv("HF_HOME"),
	cache_dir=os.getenv("HF_HOME_CACHE"),
	token=os.getenv("HF_TOKEN"),
	)
	self.config["spatial_upscaler_model_path"] = spatial_upscaler_path

	pipeline = create_ltx_video_pipeline(
	ckpt_path=self.config["checkpoint_path"],
	precision=self.config["precision"],
	text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
	sampler=self.config["sampler"],
	device="cpu",
	enhance_prompt=False,
	prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
	prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
	)

	latent_upsampler = None
	if self.config.get("spatial_upscaler_model_path"):
	latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")

	return pipeline, latent_upsampler

	# Precisão: promove FP8->BF16 e define dtype de autocast (versão segura)
	def _promote_fp8_weights_to_bf16(self, module):
	# Só promova se for realmente um nn.Module; Pipelines não são nn.Module
	if not isinstance(module, torch.nn.Module):
	return
	f8 = getattr(torch, "float8_e4m3fn", None)
	if f8 is None:
	return
	for _, p in module.named_parameters(recurse=True):
	try:
	if p.dtype == f8:
	with torch.no_grad():
	p.data = p.data.to(torch.bfloat16)
	except Exception:
	pass
	for _, b in module.named_buffers(recurse=True):
	try:
	if hasattr(b, "dtype") and b.dtype == f8:
	b.data = b.data.to(torch.bfloat16)
	except Exception:
	pass

	def _apply_precision_policy(self):
	prec = str(self.config.get("precision", "")).lower()
	self.runtime_autocast_dtype = torch.float32
	if prec == "float8_e4m3fn":
	# FP8: kernels nativos da LTX podem estar ativos; por padrão, não promover pesos
	self.runtime_autocast_dtype = torch.bfloat16
	force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
	if force_promote and hasattr(torch, "float8_e4m3fn"):
	# Promove apenas módulos reais; ignora objetos Pipeline
	try:
	self._promote_fp8_weights_to_bf16(self.pipeline)
	except Exception:
	pass
	try:
	if self.latent_upsampler:
	self._promote_fp8_weights_to_bf16(self.latent_upsampler)
	except Exception:
	pass
	elif prec == "bfloat16":
	self.runtime_autocast_dtype = torch.bfloat16
	elif prec == "mixed_precision":
	self.runtime_autocast_dtype = torch.float16
	else:
	self.runtime_autocast_dtype = torch.float32

	def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
	tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
	tensor = torch.nn.functional.pad(tensor, padding_values)
	if self.device == "cuda":
	return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
	return tensor.to(self.device)

	def generate(
	self,
	prompt,
	negative_prompt,
	mode="text-to-video",
	start_image_filepath=None,
	middle_image_filepath=None,
	middle_frame_number=None,
	middle_image_weight=1.0,
	end_image_filepath=None,
	end_image_weight=1.0,
	input_video_filepath=None,
	height=512,
	width=704,
	duration=2.0,
	frames_to_use=9,
	seed=42,
	randomize_seed=True,
	guidance_scale=3.0,
	improve_texture=True,
	progress_callback=None,
	):
	if self.device == "cuda":
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats()
	self._log_gpu_memory("Início da Geração")

	if mode == "image-to-video" and not start_image_filepath:
	raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
	if mode == "video-to-video" and not input_video_filepath:
	raise ValueError("O vídeo de entrada é obrigatório para o modo video-to-video")

	used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
	seed_everething(used_seed)

	FPS = 24.0
	MAX_NUM_FRAMES = 257
	target_frames_rounded = round(duration * FPS)
	n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
	actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))

	height_padded = ((height - 1) // 32 + 1) * 32
	width_padded = ((width - 1) // 32 + 1) * 32
	padding_values = calculate_padding(height, width, height_padded, width_padded)

	generator = torch.Generator(device=self.device).manual_seed(used_seed)
	conditioning_items = []

	if mode == "image-to-video":
	start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
	conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
	if middle_image_filepath and middle_frame_number is not None:
	middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
	safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
	conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
	if end_image_filepath:
	end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
	last_frame_index = actual_num_frames - 1
	conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))

	call_kwargs = {
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"height": height_padded,
	"width": width_padded,
	"num_frames": actual_num_frames,
	"frame_rate": int(FPS),
	"generator": generator,
	"output_type": "pt",
	"conditioning_items": conditioning_items if conditioning_items else None,
	"media_items": None,
	"decode_timestep": self.config["decode_timestep"],
	"decode_noise_scale": self.config["decode_noise_scale"],
	"stochastic_sampling": self.config["stochastic_sampling"],
	"image_cond_noise_scale": 0.15,
	"is_video": True,
	"vae_per_channel_normalize": True,
	"mixed_precision": (self.config["precision"] == "mixed_precision"),
	"offload_to_cpu": False,
	"enhance_prompt": False,
	"skip_layer_strategy": SkipLayerStrategy.AttentionValues,
	}

	if mode == "video-to-video":
	call_kwargs["media_items"] = load_media_file(
	media_path=input_video_filepath,
	height=height,
	width=width,
	max_frames=int(frames_to_use),
	padding=padding_values,
	).to(self.device)

	result_tensor = None
	video_np = None
	multi_scale_pipeline = None

	if improve_texture:
	if not self.latent_upsampler:
	raise ValueError("Upscaler espacial não carregado.")
	multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
	first_pass_args = self.config.get("first_pass", {}).copy()
	first_pass_args["guidance_scale"] = float(guidance_scale)
	second_pass_args = self.config.get("second_pass", {}).copy()
	second_pass_args["guidance_scale"] = float(guidance_scale)

	multi_scale_call_kwargs = call_kwargs.copy()
	multi_scale_call_kwargs.update(
	{
	"downscale_factor": self.config["downscale_factor"],
	"first_pass": first_pass_args,
	"second_pass": second_pass_args,
	}
	)

	ctx = contextlib.nullcontext()
	if self.device == "cuda":
	ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
	with ctx:
	result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
	log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
	else:
	single_pass_kwargs = call_kwargs.copy()
	first_pass_config = self.config.get("first_pass", {})
	single_pass_kwargs.update(
	{
	"guidance_scale": float(guidance_scale),
	"stg_scale": first_pass_config.get("stg_scale"),
	"rescaling_scale": first_pass_config.get("rescaling_scale"),
	"skip_block_list": first_pass_config.get("skip_block_list"),
	}
	)

	# EVITAR guidance_timesteps no single-pass para não acionar guidance_mapping na lib
	# Preferir 'timesteps' se existir; caso contrário, deixar sem e usar defaults do pipeline.
	config_timesteps = first_pass_config.get("timesteps")
	if mode == "video-to-video":
	single_pass_kwargs["timesteps"] = [0.7]
	print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
	elif isinstance(config_timesteps, (list, tuple)) and len(config_timesteps) > 0:
	single_pass_kwargs["timesteps"] = config_timesteps
	# IMPORTANTE: não usar first_pass_config.get("guidance_timesteps") aqui

	print("\n[INFO] Executando pipeline de etapa única...")
	ctx = contextlib.nullcontext()
	if self.device == "cuda":
	ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
	with ctx:
	result_tensor = self.pipeline(**single_pass_kwargs).images

	pad_left, pad_right, pad_top, pad_bottom = padding_values
	slice_h_end = -pad_bottom if pad_bottom > 0 else None
	slice_w_end = -pad_right if pad_right > 0 else None
	result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
	log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")

	video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)

	# Staging seguro em tmp e move para diretório persistente
	temp_dir = tempfile.mkdtemp(prefix="ltxv_")
	self._register_tmp_dir(temp_dir)
	results_dir = "/app/output"
	os.makedirs(results_dir, exist_ok=True)

	final_output_path = None
	output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
	try:
	with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
	total_frames = len(video_np)
	for i, frame in enumerate(video_np):
	writer.append_data(frame)
	if progress_callback:
	progress_callback(i + 1, total_frames)

	candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
	try:
	shutil.move(output_video_path, candidate_final)
	final_output_path = candidate_final
	except Exception:
	final_output_path = output_video_path
	self._register_tmp_file(output_video_path)

	self._log_gpu_memory("Fim da Geração")
	return final_output_path, used_seed
	finally:
	# Libera tensores/objetos grandes antes de limpar VRAM
	try:
	del result_tensor
	except Exception:
	pass
	try:
	del video_np
	except Exception:
	pass
	try:
	del multi_scale_pipeline
	except Exception:
	pass

	gc.collect()
	try:
	if self.device == "cuda":
	torch.cuda.empty_cache()
	try:
	torch.cuda.ipc_collect()
	except Exception:
	pass
	except Exception:
	pass

	# Limpeza de temporários preservando o vídeo final
	try:
	self.finalize(keep_paths=[final_output_path] if final_output_path else [])
	except Exception:
	pass

	print("Criando instância do VideoService. O carregamento do modelo começará agora...")
	video_generation_service = VideoService()