Test4 / app.py
euiiiia's picture
Update app.py
22512f1 verified
raw
history blame
10.8 kB
# app_refactored_with_postprod.py (FINAL VERSION with LTX Refinement)
import gradio as gr
import os
import sys
import traceback
from pathlib import Path
import torch
import numpy as np
from PIL import Image
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
# --- Import dos Serviços de Backend ---
# Serviço LTX para geração de vídeo base e refinamento de textura
from api.ltx_server_refactored import video_generation_service
# Serviço SeedVR para upscaling de alta qualidade
from api.seedvr_server import SeedVRServer
# Inicializa o servidor SeedVR uma vez, se disponível
seedvr_inference_server = SeedVRServer() if SeedVRServer else None
DEPS_DIR = Path("/data")
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
def add_deps_to_path(repo_path: Path):
"""Adiciona o diretório do repositório ao sys.path para importações locais."""
resolved_path = str(repo_path.resolve())
if resolved_path not in sys.path:
sys.path.insert(0, resolved_path)
if LTXV_DEBUG:
print(f"[DEBUG] Adicionado ao sys.path: {resolved_path}")
# --- Execução da configuração inicial ---
if not LTX_VIDEO_REPO_DIR.exists():
_run_setup_script()
add_deps_to_path(LTX_VIDEO_REPO_DIR)
# --- ESTADO DA SESSÃO ---
def create_initial_state():
return {
"low_res_video": None,
"low_res_latents": None,
"refined_video_ltx": None,
"refined_latents_ltx": None,
"used_seed": None
}
# --- FUNÇÕES WRAPPER PARA A UI ---
from PIL import Image
import torch
import numpy as np
from torchvision import transforms
def image_to_tensor1(path: str):
img = Image.open(path).convert("RGB")
arr = (np.array(img).astype(np.float32) / 255.0)
tensor = torch.from_numpy(arr).permute(2, 0, 1) # [3, H, W]
mediaitem = tensor.unsqueeze(0).unsqueeze(2) # [1, 3, 1, H, W]
return mediaitem
def image_to_media_item(path: str) -> torch.Tensor:
img = Image.open(path).convert("RGB")
x = transforms.ToTensor()(img) # [3, H, W] em [0,1]
x = x.unsqueeze(0).unsqueeze(2) # [1, 3, 1, H, W]
return x # deixe em CPU; a pipeline cuida do device/dtype
from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXVideoPipeline
import copy
def run_generate_low(prompt, neg_prompt, start_img, height, width, duration, cfg, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
"""Executa a primeira etapa: geração de um vídeo base em baixa resolução."""
print("UI: Chamando generate_low")
if True:
conditioning_items = []
if start_img:
media = image_to_media_item(start_img) # [1,3,1,H,W]
print (f"media {media.shape}")
item = ConditioningItem(
media_item=media,
media_frame_number=0,
conditioning_strength=1.0,
media_x=None,
media_y=None,
)
item = LTXVideoPipeline._resize_conditioning_item(item, height, width)
conditioning_items.append(item)
used_seed = None if randomize_seed else seed
video_path, tensor_path, final_seed = video_generation_service.generate_low_resolution(
prompt=prompt, negative_prompt=neg_prompt,
height=height, width=width, duration_secs=duration,
guidance_scale=cfg, seed=used_seed,
conditioning_items=conditioning_items
)
new_state = {
"low_res_video": video_path,
"low_res_latents": tensor_path,
"refined_video_ltx": None,
"refined_latents_ltx": None,
"used_seed": final_seed
}
return video_path, new_state, gr.update(visible=True)
def run_ltx_refinement(state, prompt, neg_prompt, cfg, progress=gr.Progress(track_tqdm=True)):
"""Executa o processo de refinamento e upscaling de textura com o pipeline LTX."""
print("UI: Chamando run_ltx_refinement (generate_upscale_denoise)")
if True:
video_path, tensor_path = video_generation_service.generate_upscale_denoise(
latents_path=state["low_res_latents"],
prompt=prompt,
negative_prompt=neg_prompt,
guidance_scale=cfg,
seed=state["used_seed"]
)
# Atualiza o estado com os novos artefatos refinados
state["refined_video_ltx"] = video_path
state["refined_latents_ltx"] = tensor_path
return video_path, state
def run_seedvr_upscaling(state, seed, resolution, batch_size, fps, progress=gr.Progress(track_tqdm=True)):
"""Executa o processo de upscaling com SeedVR."""
video_path = state["low_res_video"]
print(f"▶️ Iniciando processo de upscaling SeedVR para o vídeo: {video_path}")
if True:
def progress_wrapper(p, desc=""):
progress(p, desc=desc)
output_filepath = seedvr_inference_server.run_inference(
file_path=video_path, seed=seed, resolution=resolution,
batch_size=batch_size, fps=fps, progress=progress_wrapper
)
final_message = f"✅ Processo SeedVR concluído!\nVídeo salvo em: {output_filepath}"
return gr.update(value=output_filepath, interactive=True), gr.update(value=final_message, interactive=False)
# --- DEFINIÇÃO DA INTERFACE GRADIO ---
with gr.Blocks() as demo:
gr.Markdown("# LTX Video - Geração e Pós-Produção por Etapas")
app_state = gr.State(value=create_initial_state())
# --- ETAPA 1: Geração Base ---
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Etapa 1: Configurações de Geração")
prompt_input = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
neg_prompt_input = gr.Textbox(visible=False, label="Negative Prompt", value="worst quality, blurry, low quality, jittery", lines=2)
start_image = gr.Image(label="Imagem de Início (Opcional)", type="filepath", sources=["upload", "clipboard"])
with gr.Accordion("Parâmetros Avançados", open=False):
height_input = gr.Slider(label="Height", value=512, step=64, minimum=256, maximum=1024)
width_input = gr.Slider(label="Width", value=512, step=64, minimum=256, maximum=1024)
duration_input = gr.Slider(label="Duração (s)", value=8, step=8, minimum=1, maximum=60)
cfg_input = gr.Slider(label="Guidance Scale (CFG)", value=5.0, step=1, minimum=1, maximum=10.0)
seed_input = gr.Number(label="Seed", value=42, precision=0)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
generate_low_btn = gr.Button("1. Gerar Vídeo Base (Low-Res)", variant="primary")
with gr.Column(scale=1):
gr.Markdown("### Vídeo Base Gerado")
low_res_video_output = gr.Video(label="O resultado da Etapa 1 aparecerá aqui", interactive=False)
# --- ETAPA 2: Pós-Produção (no rodapé, em abas) ---
with gr.Group(visible=False) as post_prod_group:
gr.Markdown("<hr style='margin-top: 20px; margin-bottom: 20px;'>")
gr.Markdown("## Etapa 2: Pós-Produção")
gr.Markdown("Use o vídeo gerado acima como entrada para as ferramentas abaixo. **O prompt e a CFG da Etapa 1 serão reutilizados.**")
with gr.Tabs():
# --- ABA LTX REFINEMENT (AGORA FUNCIONAL) ---
with gr.TabItem("🚀 Upscaler Textura (LTX)"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Parâmetros de Refinamento")
gr.Markdown("Esta etapa reutiliza o prompt, o prompt negativo e a CFG da Etapa 1 para manter a consistência.")
ltx_refine_btn = gr.Button("Aplicar Refinamento de Textura LTX", variant="primary")
with gr.Column(scale=1):
gr.Markdown("### Resultado do Refinamento")
ltx_refined_video_output = gr.Video(label="Vídeo com Textura Refinada (LTX)", interactive=False)
# --- ABA SEEDVR UPSCALER ---
with gr.TabItem("✨ Upscaler SeedVR"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Parâmetros do SeedVR")
seedvr_seed = gr.Slider(minimum=0, maximum=999999, value=42, step=1, label="Seed")
seedvr_resolution = gr.Slider(minimum=720, maximum=1440, value=1072, step=8, label="Resolução Vertical (Altura)")
seedvr_batch_size = gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Batch Size por GPU")
seedvr_fps_output = gr.Number(label="FPS de Saída (0 = original)", value=0)
run_seedvr_button = gr.Button("Iniciar Upscaling SeedVR", variant="primary", interactive=(seedvr_inference_server is not None))
if not seedvr_inference_server:
gr.Markdown("<p style='color: red;'>Serviço SeedVR não disponível.</p>")
with gr.Column(scale=1):
gr.Markdown("### Resultado do Upscaling")
seedvr_video_output = gr.Video(label="Vídeo com Upscale SeedVR", interactive=False)
seedvr_status_box = gr.Textbox(label="Status do Processamento", value="Aguardando...", lines=3, interactive=False)
# --- ABA MM-AUDIO ---
with gr.TabItem("🔊 Áudio (MM-Audio)"):
gr.Markdown("*(Funcionalidade futura para adicionar som aos vídeos)*")
# --- LÓGICA DE EVENTOS DA UI ---
# Botão da Etapa 1
generate_low_btn.click(
fn=run_generate_low,
inputs=[prompt_input, neg_prompt_input, start_image, height_input, width_input, duration_input, cfg_input, seed_input, randomize_seed],
outputs=[low_res_video_output, app_state, post_prod_group]
)
# Botão da Aba LTX Refinement
ltx_refine_btn.click(
fn=run_ltx_refinement,
inputs=[app_state, prompt_input, neg_prompt_input, cfg_input],
outputs=[ltx_refined_video_output, app_state]
)
# Botão da Aba SeedVR
run_seedvr_button.click(
fn=run_seedvr_upscaling,
inputs=[app_state, seedvr_seed, seedvr_resolution, seedvr_batch_size, seedvr_fps_output],
outputs=[seedvr_video_output, seedvr_status_box]
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860, debug=True, show_error=True)