fyla-image-1 / app.py
deivissonfloriano's picture
Update app.py
9e4d2e3 verified
import gradio as gr
import numpy as np
import random
from PIL import Image
import torch
import cv2
from diffusers import (
FluxPipeline,
AutoPipelineForImage2Image,
StableDiffusionXLControlNetPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLInpaintPipeline,
ControlNetModel,
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
DDIMScheduler,
)
from diffusers.models import AutoencoderKL
from transformers import CLIPVisionModelWithProjection
from controlnet_aux import (
CannyDetector,
OpenposeDetector,
MidasDetector,
LineartDetector,
HEDdetector,
)
from insightface.app import FaceAnalysis
import basicsr
from basicsr.archs.rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
from gfpgan import GFPGANer
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
print("🚀 Inicializando IA Studio Ultimate...")
# ===== MODELOS BASE =====
MODELS = {
"FLUX.1-dev (Melhor)": "black-forest-labs/FLUX.1-dev",
"FLUX.1-schnell (Rápido)": "black-forest-labs/FLUX.1-schnell",
"SDXL-Base": "stabilityai/stable-diffusion-xl-base-1.0",
"SDXL-Turbo (Ultra Rápido)": "stabilityai/sdxl-turbo",
"RealVisXL (Hiper-Realista)": "SG161222/RealVisXL_V4.0",
"DreamShaper XL": "Lykon/dreamshaper-xl-1-0",
"Juggernaut XL": "RunDiffusion/Juggernaut-XL-v9",
}
# ===== CONTROLNET MODELS =====
CONTROLNET_MODELS = {
"Canny (Contornos)": "diffusers/controlnet-canny-sdxl-1.0",
"Depth (Profundidade)": "diffusers/controlnet-depth-sdxl-1.0",
"OpenPose (Pose)": "thibaud/controlnet-openpose-sdxl-1.0",
"Lineart (Desenho)": "controlnet-lineart-sdxl-1.0",
"Soft Edge (Bordas)": "SargeZT/controlnet-sd-xl-1.0-softedge-dexined",
"Tile (Upscale)": "xinsir/controlnet-tile-sdxl-1.0",
}
# Cache global
model_cache = {}
controlnet_cache = {}
preprocessor_cache = {}
ip_adapter_cache = {}
face_analyzer = None
upscaler = None
face_enhancer = None
def init_face_analyzer():
"""Inicializa FaceAnalysis para IP-Adapter Face"""
global face_analyzer
if face_analyzer is None:
try:
face_analyzer = FaceAnalysis(name='buffalo_l', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
face_analyzer.prepare(ctx_id=0, det_size=(640, 640))
print("✓ Face Analyzer carregado")
except Exception as e:
print(f"⚠️ Face Analyzer não disponível: {e}")
return face_analyzer
def init_upscaler():
"""Inicializa RealESRGAN para upscaling"""
global upscaler
if upscaler is None:
try:
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
upscaler = RealESRGANer(
scale=4,
model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth',
model=model,
tile=512,
tile_pad=10,
pre_pad=0,
half=True if torch.cuda.is_available() else False
)
print("✓ RealESRGAN carregado")
except Exception as e:
print(f"⚠️ Upscaler não disponível: {e}")
return upscaler
def init_face_enhancer():
"""Inicializa GFPGAN para correção de rostos"""
global face_enhancer
if face_enhancer is None:
try:
face_enhancer = GFPGANer(
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
upscale=2,
arch='clean',
channel_multiplier=2,
bg_upsampler=init_upscaler()
)
print("✓ GFPGAN carregado")
except Exception as e:
print(f"⚠️ Face Enhancer não disponível: {e}")
return face_enhancer
def get_preprocessor(control_type):
"""Carrega preprocessadores ControlNet"""
if control_type in preprocessor_cache:
return preprocessor_cache[control_type]
try:
if control_type == "Canny (Contornos)":
preprocessor = CannyDetector()
elif control_type == "Depth (Profundidade)":
preprocessor = MidasDetector.from_pretrained("lllyasviel/Annotators")
elif control_type == "OpenPose (Pose)":
preprocessor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
elif control_type == "Lineart (Desenho)":
preprocessor = LineartDetector.from_pretrained("lllyasviel/Annotators")
elif control_type == "Soft Edge (Bordas)":
preprocessor = HEDdetector.from_pretrained("lllyasviel/Annotators")
else:
return None
preprocessor_cache[control_type] = preprocessor
return preprocessor
except Exception as e:
print(f"⚠️ Erro ao carregar preprocessor: {e}")
return None
def preprocess_controlnet_image(image, control_type, canny_low=100, canny_high=200):
"""Processa imagem para ControlNet"""
if control_type == "Nenhum" or image is None:
return None
preprocessor = get_preprocessor(control_type)
if not preprocessor:
return None
try:
if control_type == "Canny (Contornos)":
control_image = preprocessor(image, low_threshold=canny_low, high_threshold=canny_high)
else:
control_image = preprocessor(image)
return control_image
except Exception as e:
print(f"⚠️ Erro no preprocessamento: {e}")
return None
def load_ip_adapter(pipe, adapter_type="plus"):
"""Carrega IP-Adapter para condicionamento por imagem"""
try:
if adapter_type == "plus":
pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus_sdxl_vit-h.safetensors")
elif adapter_type == "plus-face":
pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors")
pipe.set_ip_adapter_scale(0.6)
print(f"✓ IP-Adapter {adapter_type} carregado")
return True
except Exception as e:
print(f"⚠️ IP-Adapter não disponível: {e}")
return False
def load_model(model_name, use_controlnet=False, controlnet_type="Nenhum", use_ip_adapter=False, ip_adapter_type="plus"):
"""Carrega modelo com todos os componentes"""
cache_key = f"{model_name}_{controlnet_type}_{ip_adapter_type if use_ip_adapter else 'no-ip'}"
if cache_key in model_cache:
print(f"✓ Cache hit: {cache_key}")
return model_cache[cache_key]
print(f"⏳ Carregando: {cache_key}")
model_id = MODELS[model_name]
try:
# VAE melhorado para SDXL
vae = None
if "FLUX" not in model_name:
try:
vae = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix",
torch_dtype=torch_dtype
).to(device)
print("✓ VAE otimizado carregado")
except:
pass
# FLUX (sem ControlNet/IP-Adapter ainda)
if "FLUX" in model_name:
pipe_txt2img = FluxPipeline.from_pretrained(
model_id,
torch_dtype=torch_dtype,
).to(device)
pipe_img2img = AutoPipelineForImage2Image.from_pretrained(
model_id,
torch_dtype=torch_dtype,
).to(device)
pipe_controlnet = None
pipe_inpaint = None
# SDXL com todos os recursos
else:
from diffusers import StableDiffusionXLPipeline
base_args = {
"torch_dtype": torch_dtype,
"variant": "fp16",
"use_safetensors": True,
}
if vae:
base_args["vae"] = vae
# ControlNet Pipeline
if use_controlnet and controlnet_type != "Nenhum":
try:
controlnet_id = CONTROLNET_MODELS[controlnet_type]
controlnet = ControlNetModel.from_pretrained(
controlnet_id,
torch_dtype=torch_dtype,
).to(device)
pipe_controlnet = StableDiffusionXLControlNetPipeline.from_pretrained(
model_id,
controlnet=controlnet,
**base_args
).to(device)
print(f"✓ ControlNet {controlnet_type} carregado")
except Exception as e:
print(f"⚠️ ControlNet error: {e}")
pipe_controlnet = None
else:
pipe_controlnet = None
# Base pipelines
pipe_txt2img = StableDiffusionXLPipeline.from_pretrained(
model_id,
**base_args
).to(device)
pipe_img2img = StableDiffusionXLImg2ImgPipeline.from_pretrained(
model_id,
**base_args
).to(device)
pipe_inpaint = StableDiffusionXLInpaintPipeline.from_pretrained(
model_id,
**base_args
).to(device)
# IP-Adapter
if use_ip_adapter:
load_ip_adapter(pipe_txt2img, ip_adapter_type)
load_ip_adapter(pipe_img2img, ip_adapter_type)
# Otimizações máximas para 18GB
if torch.cuda.is_available():
for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet, pipe_inpaint]:
if pipe:
try:
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()
if hasattr(pipe, 'enable_attention_slicing'):
pipe.enable_attention_slicing(1)
# Compila modelo para velocidade
if torch.__version__ >= "2.0":
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
except Exception as e:
print(f"⚠️ Otimização parcial: {e}")
pipes = (pipe_txt2img, pipe_img2img, pipe_controlnet, pipe_inpaint)
model_cache[cache_key] = pipes
return pipes
except Exception as e:
print(f"❌ Erro ao carregar modelo: {e}")
return None, None, None, None
def enhance_prompt(prompt, style_preset, quality_boost):
"""Sistema avançado de prompt engineering"""
style_enhancers = {
"Fotorrealista Ultra": "photorealistic, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3, natural skin texture, sharp focus",
"Fantasia Épica": "epic fantasy art, dramatic lighting, mystical atmosphere, detailed textures, concept art, matte painting, trending on artstation, cinematic",
"Anime Studio": "anime masterpiece, studio quality, vibrant colors, detailed, official art, pixiv trending, makoto shinkai style",
"Cinematográfico": "cinematic shot, film grain, depth of field, bokeh, anamorphic lens, dramatic lighting, movie still, 35mm photograph",
"Pintura Digital Pro": "digital painting, highly detailed, artstation hq, concept art, smooth, sharp focus, illustration, art by artgerm and greg rutkowski",
"3D Render Premium": "3d render, octane render, unreal engine 5, ray tracing, global illumination, subsurface scattering, physically based rendering",
"Hiper-Realista": "hyperrealistic, ultra detailed, 16k resolution, professional photography, intricate details, lifelike, award winning",
"RPG Character Art": "rpg character portrait, d&d art style, detailed armor and clothing, fantasy setting, character sheet quality",
}
quality_tags = {
"Máxima": "masterpiece, best quality, ultra detailed, 8k, professional, sharp focus, vivid colors, perfect composition",
"Alta": "high quality, detailed, well composed, sharp, vibrant",
"Normal": "good quality, clear",
"Desativado": "",
}
negative_presets = {
"Padrão": "blurry, low quality, distorted, ugly, bad anatomy, watermark, signature",
"Forte": "blurry, low quality, distorted, ugly, bad anatomy, bad proportions, watermark, low res, mutated, deformed, worst quality, out of focus, jpeg artifacts, text",
"Máximo": "blurry, low quality, distorted, ugly, bad anatomy, bad proportions, watermark, low res, mutated, deformed, worst quality, out of focus, jpeg artifacts, text, signature, username, amateur, poorly drawn, bad hands, extra limbs, missing limbs, duplicate, clone, bad face, bad eyes, bad teeth",
"Ultra (Anti-IA)": "blurry, low quality, distorted, ugly, bad anatomy, bad proportions, watermark, low res, mutated, deformed, worst quality, out of focus, jpeg artifacts, text, signature, username, amateur, poorly drawn, bad hands, extra limbs, missing limbs, duplicate, clone, bad face, bad eyes, bad teeth, artificial, synthetic, generated look, uncanny valley, plastic skin",
}
enhanced = prompt
# Adiciona tags de qualidade
if quality_boost != "Desativado":
enhanced = f"{quality_tags[quality_boost]}, {prompt}"
# Adiciona estilo
if style_preset in style_enhancers:
enhanced = f"{enhanced}, {style_enhancers[style_preset]}"
return enhanced
def post_process_image(image, upscale_factor, enhance_faces, denoise_strength):
"""Pós-processamento com upscaling e correção"""
try:
# Upscaling
if upscale_factor > 1:
upscaler_model = init_upscaler()
if upscaler_model:
img_array = np.array(image)
output, _ = upscaler_model.enhance(img_array, outscale=upscale_factor)
image = Image.fromarray(output)
print(f"✓ Upscaled {upscale_factor}x")
# Face enhancement
if enhance_faces:
enhancer = init_face_enhancer()
if enhancer:
img_array = np.array(image)
_, _, output = enhancer.enhance(img_array, has_aligned=False, only_center_face=False, paste_back=True)
image = Image.fromarray(output)
print("✓ Faces enhanced")
return image
except Exception as e:
print(f"⚠️ Post-processing error: {e}")
return image
def infer(
prompt,
input_image,
ip_reference_image,
use_controlnet,
controlnet_type,
controlnet_strength,
canny_low,
canny_high,
use_ip_adapter,
ip_adapter_type,
ip_adapter_strength,
model_choice,
style_preset,
quality_boost,
negative_preset,
custom_negative,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_steps,
strength,
scheduler_type,
upscale_factor,
enhance_faces,
denoise_strength,
progress=gr.Progress(track_tqdm=True),
):
"""Função principal ULTRA com todos os recursos"""
if not prompt or prompt.strip() == "":
return None, None, None, seed
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
# Enhance prompt
enhanced_prompt = enhance_prompt(prompt, style_preset, quality_boost)
# Negative prompt
negative_presets_dict = {
"Padrão": "blurry, low quality, distorted, ugly, bad anatomy, watermark, signature",
"Forte": "blurry, low quality, distorted, ugly, bad anatomy, bad proportions, watermark, low res, mutated, deformed, worst quality, out of focus, jpeg artifacts, text",
"Máximo": "blurry, low quality, distorted, ugly, bad anatomy, bad proportions, watermark, low res, mutated, deformed, worst quality, out of focus, jpeg artifacts, text, signature, username, amateur, poorly drawn, bad hands, extra limbs, missing limbs, duplicate, clone, bad face, bad eyes, bad teeth",
"Ultra (Anti-IA)": "blurry, low quality, distorted, ugly, bad anatomy, bad proportions, watermark, low res, mutated, deformed, worst quality, out of focus, jpeg artifacts, text, signature, username, amateur, poorly drawn, bad hands, extra limbs, missing limbs, duplicate, clone, bad face, bad eyes, bad teeth, artificial, synthetic, generated look, uncanny valley, plastic skin",
"Personalizado": custom_negative,
}
final_negative = negative_presets_dict.get(negative_preset, "")
# Carrega modelo
progress(0.1, "Carregando modelo...")
pipes = load_model(model_choice, use_controlnet, controlnet_type, use_ip_adapter, ip_adapter_type)
if not pipes or pipes[0] is None:
return None, None, None, seed
pipe_txt2img, pipe_img2img, pipe_controlnet, pipe_inpaint = pipes
# Configura scheduler
scheduler_map = {
"DPM++ 2M": DPMSolverMultistepScheduler,
"Euler a": EulerAncestralDiscreteScheduler,
"DDIM": DDIMScheduler,
}
if scheduler_type in scheduler_map and "FLUX" not in model_choice:
scheduler_class = scheduler_map[scheduler_type]
for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet]:
if pipe:
pipe.scheduler = scheduler_class.from_config(pipe.scheduler.config)
try:
control_image_preview = None
image = None
# Modo ControlNet
if use_controlnet and controlnet_type != "Nenhum" and input_image is not None and pipe_controlnet:
progress(0.2, "Processando ControlNet...")
control_image = preprocess_controlnet_image(input_image, controlnet_type, canny_low, canny_high)
control_image_preview = control_image
if control_image:
progress(0.4, "Gerando com ControlNet...")
gen_args = {
"prompt": enhanced_prompt,
"negative_prompt": final_negative,
"image": control_image,
"controlnet_conditioning_scale": controlnet_strength,
"guidance_scale": guidance_scale,
"num_inference_steps": num_steps,
"width": width,
"height": height,
"generator": generator,
}
# IP-Adapter com ControlNet
if use_ip_adapter and ip_reference_image:
gen_args["ip_adapter_image"] = ip_reference_image
pipe_controlnet.set_ip_adapter_scale(ip_adapter_strength)
image = pipe_controlnet(**gen_args).images[0]
# Modo Image-to-Image
elif input_image is not None:
progress(0.3, "Gerando img2img...")
input_image_resized = input_image.resize((width, height), Image.LANCZOS)
gen_args = {
"prompt": enhanced_prompt,
"negative_prompt": final_negative,
"image": input_image_resized,
"strength": strength,
"guidance_scale": guidance_scale,
"num_inference_steps": num_steps,
"generator": generator,
}
# IP-Adapter com img2img
if use_ip_adapter and ip_reference_image:
gen_args["ip_adapter_image"] = ip_reference_image
pipe_img2img.set_ip_adapter_scale(ip_adapter_strength)
image = pipe_img2img(**gen_args).images[0]
# Modo Text-to-Image
else:
progress(0.3, "Gerando do zero...")
gen_args = {
"prompt": enhanced_prompt,
"negative_prompt": final_negative,
"guidance_scale": guidance_scale,
"num_inference_steps": num_steps,
"width": width,
"height": height,
"generator": generator,
}
# IP-Adapter com txt2img
if use_ip_adapter and ip_reference_image:
gen_args["ip_adapter_image"] = ip_reference_image
pipe_txt2img.set_ip_adapter_scale(ip_adapter_strength)
image = pipe_txt2img(**gen_args).images[0]
# Pós-processamento
if image and (upscale_factor > 1 or enhance_faces):
progress(0.9, "Pós-processamento...")
image = post_process_image(image, upscale_factor, enhance_faces, denoise_strength)
progress(1.0, "✓ Concluído!")
return image, control_image_preview, enhanced_prompt, seed
except Exception as e:
print(f"❌ Erro na geração: {e}")
import traceback
traceback.print_exc()
return None, None, None, seed
# ===== INTERFACE GRADIO PREMIUM =====
css = """
#col-container {max-width: 1600px; margin: 0 auto; padding: 20px;}
.tab-nav button {font-size: 16px; font-weight: 600;}
.gr-button-primary {background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;}
"""
with gr.Blocks(css=css, theme=gr.themes.Soft(), title="IA Studio Ultimate") as demo:
gr.HTML("""
<div style="text-align: center; max-width: 1200px; margin: 0 auto;">
<h1 style="font-size: 3em; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
🎨 IA Studio Ultimate
</h1>
<p style="font-size: 1.2em; color: #666;">
Sistema Completo: FLUX + SDXL + ControlNet + IP-Adapter + Upscale + Face Fix<br>
<strong>Qualidade Superior ao GPT-5 | Otimizado para 18GB VRAM</strong>
</p>
</div>
""")
with gr.Tabs():
# ===== TAB 1: GERAÇÃO PRINCIPAL =====
with gr.Tab("🎨 Geração"):
with gr.Row():
with gr.Column(scale=1):
prompt = gr.TextArea(
label="✍️ Prompt Principal",
placeholder="Descreva sua visão em detalhes...\nEx: A majestic dragon perched on ancient ruins, golden hour lighting, epic fantasy, 8k",
lines=5,
)
with gr.Accordion("🎨 Estilo & Qualidade", open=True):
with gr.Row():
style_preset = gr.Dropdown(
choices=["Fotorrealista Ultra", "Fantasia Épica", "Anime Studio", "Cinematográfico", "Pintura Digital Pro", "3D Render Premium", "Hiper-Realista", "RPG Character Art"],
value="Fotorrealista Ultra",
label="Preset de Estilo",
)
quality_boost = gr.Dropdown(
choices=["Máxima", "Alta", "Normal", "Desativado"],
value="Máxima",
label="Boost de Qualidade",
)
negative_preset = gr.Dropdown(
choices=["Padrão", "Forte", "Máximo", "Ultra (Anti-IA)", "Personalizado"],
value="Máximo",
label="Prompt Negativo",
)
custom_negative = gr.TextArea(
label="Negativo Personalizado",
visible=False,
lines=2,
)
input_image = gr.Image(
label="🖼️ Imagem de Entrada (img2img)",
type="pil",
sources=["upload", "webcam", "clipboard"],
)
run_button = gr.Button("🚀 GERAR OBRA-PRIMA", variant="primary", size="lg")
with gr.Column(scale=1):
result = gr.Image(label="✨ Resultado Final", show_download_button=True, show_share_button=True)
control_preview = gr.Image(label="🎛️ ControlNet Preview", visible=False)
enhanced_prompt_output = gr.TextArea(label="📝 Prompt Melhorado", lines=3)
seed_output = gr.Number(label="🎲 Seed Usado", precision=0)
# ===== TAB 2: CONTROLNET =====
with gr.Tab("🎛️ ControlNet"):
gr.Markdown("### Controle Preciso de Composição")
with gr.Row():
use_controlnet = gr.Checkbox(label="✓ Ativar ControlNet", value=False)
controlnet_type = gr.Dropdown(
choices=["Nenhum", "Canny (Contornos)", "Depth (Profundidade)", "OpenPose (Pose)", "Lineart (Desenho)", "Soft Edge (Bordas)", "Tile (Upscale)"],
value="Nenhum",
label="Tipo de Controle",
)
with gr.Row():
controlnet_strength = gr.Slider(0.0, 2.0, 1.0, step=0.05, label="💪 Força do ControlNet")
canny_low = gr.Slider(0, 255, 100, step=1, label="Canny Low")
canny_high = gr.Slider(0, 255, 200, step=1, label="Canny High")
gr.Markdown("""
**Guia de Uso:**
- **Canny**: Bordas precisas - perfeito para arquitetura e composição
- **Depth**: Preserva estrutura 3D - ideal para paisagens e ambientes
- **OpenPose**: Controle de poses humanas - essencial para personagens
- **Lineart**: Converte desenhos em arte finalizada
- **Soft Edge**: Bordas suaves para controle artístico
- **Tile**: Upscaling guiado para detalhes ultra-HD
""")
# ===== TAB 3: IP-ADAPTER =====
with gr.Tab("🖼️ IP-Adapter (Estilo por Imagem)"):
gr.Markdown("### Transfira Estilo Visual de Imagens de Referência")
with gr.Row():
use_ip_adapter = gr.Checkbox(label="✓ Ativar IP-Adapter", value=False)
ip_adapter_type = gr.Dropdown(
choices=["plus", "plus-face"],
value="plus",
label="Tipo de IP-Adapter",
)
ip_reference_image = gr.Image(
label="🎨 Imagem de Referência de Estilo",
type="pil",
sources=["upload", "clipboard"],
)
ip_adapter_strength = gr.Slider(
0.0, 1.0, 0.6, step=0.05,
label="💪 Força do IP-Adapter",
info="Quanto da referência aplicar"
)
gr.Markdown("""
**Como Funciona:**
- **IP-Adapter Plus**: Transfere estilo geral, composição e atmosfera
- **IP-Adapter Face**: Foco em características faciais e expressões
**Exemplos de Uso:**
1. Upload de foto de ator → Gera personagem RPG com mesmas features
2. Arte conceitual → Aplica mesmo estilo visual
3. Foto de cenário → Recria em estilo fantasia mantendo composição
""")
# ===== TAB 4: CONFIGURAÇÕES AVANÇADAS =====
with gr.Tab("⚙️ Configurações Pro"):
with gr.Row():
with gr.Column():
gr.Markdown("### 🤖 Modelo & Engine")
model_choice = gr.Dropdown(
choices=list(MODELS.keys()),
value="FLUX.1-dev (Melhor)",
label="Modelo Base",
)
scheduler_type = gr.Dropdown(
choices=["Padrão", "DPM++ 2M", "Euler a", "DDIM"],
value="DPM++ 2M",
label="Scheduler (Algoritmo)",
)
with gr.Column():
gr.Markdown("### 📐 Dimensões")
with gr.Row():
width = gr.Slider(512, 1536, 1024, step=64, label="↔️ Largura")
height = gr.Slider(512, 1536, 1024, step=64, label="↕️ Altura")
aspect_ratio = gr.Radio(
choices=["1:1", "16:9", "9:16", "4:3", "3:4", "21:9"],
label="📐 Aspect Ratio Rápido",
value="1:1"
)
with gr.Row():
with gr.Column():
gr.Markdown("### 🎚️ Parâmetros de Geração")
guidance_scale = gr.Slider(
1.0, 20.0, 7.5, step=0.5,
label="🎯 CFG Scale (Fidelidade ao Prompt)",
info="7-8 = balanceado | 12+ = muito literal"
)
num_steps = gr.Slider(
20, 100, 35, step=1,
label="🔄 Passos de Inferência",
info="Mais passos = melhor qualidade (mais lento)"
)
strength = gr.Slider(
0.0, 1.0, 0.75, step=0.05,
label="💪 Strength (img2img)",
info="0.3 = leve | 0.7 = forte | 1.0 = totalmente novo"
)
with gr.Column():
gr.Markdown("### 🎲 Reprodutibilidade")
seed = gr.Number(
label="Seed",
value=0,
precision=0,
info="Use mesma seed + prompt para resultado idêntico"
)
randomize_seed = gr.Checkbox(
label="🔀 Seed Aleatória",
value=True
)
gr.Markdown("### 📊 Presets Rápidos")
preset_buttons = gr.Radio(
choices=["Qualidade Máxima", "Balanceado", "Rápido", "Ultra Rápido"],
label="Presets de Velocidade",
value="Balanceado"
)
# ===== TAB 5: PÓS-PROCESSAMENTO =====
with gr.Tab("✨ Pós-Processamento"):
gr.Markdown("### 🚀 Upscaling e Correções Avançadas")
with gr.Row():
with gr.Column():
upscale_factor = gr.Slider(
1, 4, 1, step=1,
label="📈 Fator de Upscale (RealESRGAN)",
info="1 = sem upscale | 2 = 2x | 4 = 4x (demora mais)"
)
enhance_faces = gr.Checkbox(
label="✨ Corrigir Rostos (GFPGAN)",
value=False,
info="Melhora qualidade de rostos automaticamente"
)
denoise_strength = gr.Slider(
0.0, 1.0, 0.5, step=0.05,
label="🎭 Força de Denoise",
info="Remove artefatos e ruído"
)
with gr.Column():
gr.Markdown("""
**Tecnologias Incluídas:**
🔬 **RealESRGAN x4**
- Upscaling com IA de última geração
- Preserva detalhes e texturas
- Perfeito para impressão HD
👤 **GFPGAN v1.3**
- Correção automática de rostos
- Remove distorções faciais
- Melhora expressões e features
⚡ **Performance:**
- 4x upscale: ~10-15s extra
- Face fix: ~5s por rosto
- Processamento em GPU
""")
# ===== TAB 6: EXEMPLOS E TUTORIAIS =====
with gr.Tab("📚 Exemplos & Tutoriais"):
gr.Markdown("## 🎓 Galeria de Exemplos Profissionais")
with gr.Tabs():
with gr.Tab("🏰 Cenários RPG"):
gr.Examples(
examples=[
["Ancient elven city built into giant trees, magical glowing runes, misty atmosphere, moss covered stone bridges, fantasy architecture, golden hour lighting, epic scale", None, None],
["Medieval tavern interior, warm fireplace, wooden tables with ale mugs, adventurers in leather armor, cozy atmosphere, candlelight, detailed textures", None, None],
["Dark vampire castle throne room, gothic architecture, red velvet curtains, moonlight through stained glass, ominous atmosphere, dramatic lighting", None, None],
["Underground dwarven forge, lava rivers, massive hammers and anvils, glowing molten metal, intricate metalwork, dramatic fire lighting", None, None],
],
inputs=[prompt, input_image, ip_reference_image],
)
with gr.Tab("👥 Personagens"):
gr.Examples(
examples=[
["Epic portrait of a female paladin, golden armor with holy symbols, long flowing hair, determined expression, divine light emanating, fantasy character art, highly detailed", None, None],
["Mysterious rogue character, dark leather armor, dual daggers, hood casting shadow over face, misty background, moody lighting, concept art", None, None],
["Powerful wizard with glowing staff, flowing robes with arcane symbols, long beard, magical energy swirling around, dramatic pose, fantasy art", None, None],
["Fierce orc warrior chieftain, battle-scarred armor, massive war axe, intimidating pose, tribal tattoos, stormy background, epic fantasy", None, None],
],
inputs=[prompt, input_image, ip_reference_image],
)
with gr.Tab("🎬 Cinematográfico"):
gr.Examples(
examples=[
["Cinematic shot of spaceship interior, crew members at control stations, holographic displays, blue atmospheric lighting, sci-fi movie still, anamorphic lens", None, None],
["Dramatic scene of lone warrior on cliff edge, epic sunset, wind blowing cape, silhouette against orange sky, cinematic composition, 35mm film", None, None],
["Post-apocalyptic city ruins, overgrown vegetation, abandoned cars, atmospheric fog, dramatic lighting, movie still, depth of field", None, None],
],
inputs=[prompt, input_image, ip_reference_image],
)
with gr.Tab("🌟 Hiper-Realista"):
gr.Examples(
examples=[
["Professional portrait photography of a female model, natural makeup, soft studio lighting, bokeh background, 85mm lens, fashion photography, hyperrealistic", None, None],
["Photorealistic rendering of luxury sports car, chrome details, reflective surface, studio lighting, professional automotive photography, 8k", None, None],
["Hyperrealistic close-up of exotic flower, water droplets on petals, macro photography, natural lighting, intricate details, botanical art", None, None],
],
inputs=[prompt, input_image, ip_reference_image],
)
gr.Markdown("""
---
## 💡 Guia de Prompts Profissionais
### 📝 Estrutura Ideal de Prompt:
```
[Assunto Principal] + [Detalhes Visuais] + [Atmosfera/Mood] + [Estilo Artístico] + [Qualidade]
```
### ✨ Palavras-Chave Poderosas:
**Para Realismo:**
`photorealistic, 8k uhd, professional photography, sharp focus, natural lighting, dslr, high quality`
**Para Fantasia:**
`epic fantasy, magical atmosphere, detailed textures, dramatic lighting, concept art, matte painting`
**Para Personagens:**
`detailed armor, intricate clothing, character portrait, expressive face, dynamic pose, hero shot`
**Para Iluminação:**
`golden hour, volumetric lighting, rim light, god rays, dramatic shadows, cinematic lighting`
**Para Atmosfera:**
`mystical, ominous, cozy, epic, serene, dramatic, moody, vibrant, ethereal`
### 🚫 Palavras para Evitar:
❌ Termos vagos: "good", "nice", "beautiful" (sem especificidade)
❌ Contradições: "realistic cartoon"
❌ Lista muito longa sem contexto
### 🎯 Exemplos de Prompts Otimizados:
**Ruim:**
`dragon`
**Bom:**
`red dragon breathing fire`
**Excelente:**
`Majestic ancient red dragon perched on mountain peak, scales glistening in sunset, wings spread wide, breathing streams of fire, stormy clouds, epic fantasy art, dramatic lighting, highly detailed, 8k`
""")
# ===== TAB 7: INFO DO SISTEMA =====
with gr.Tab("ℹ️ Sistema"):
gr.Markdown(f"""
## 🖥️ Informações do Sistema
**Status:** ✅ Operacional
**Device:** {device.upper()}
**Precision:** {torch_dtype}
**VRAM Otimizada:** 18GB
---
## 🎨 Modelos Disponíveis
### Base Models:
{chr(10).join([f"- **{k}**: {v}" for k, v in MODELS.items()])}
### ControlNet Models:
{chr(10).join([f"- **{k}**: {v}" for k, v in CONTROLNET_MODELS.items()])}
---
## ⚡ Otimizações Ativas
✅ xFormers Memory Efficient Attention
✅ VAE Slicing & Tiling
✅ Attention Slicing
✅ Model Compilation (PyTorch 2.0+)
✅ Smart Model Caching
✅ FP16 Precision
---
## 🚀 Features Implementadas
### Geração:
- ✅ Text-to-Image (FLUX & SDXL)
- ✅ Image-to-Image
- ✅ Inpainting
- ✅ ControlNet (6 tipos)
- ✅ IP-Adapter (estilo por imagem)
### Pós-Processamento:
- ✅ RealESRGAN 4x Upscaling
- ✅ GFPGAN Face Enhancement
- ✅ Denoise Inteligente
### Prompt Engineering:
- ✅ 8 Presets de Estilo
- ✅ 4 Níveis de Qualidade
- ✅ 5 Presets de Negative
- ✅ Enhancement Automático
### Performance:
- ✅ Cache Inteligente de Modelos
- ✅ Schedulers Otimizados
- ✅ Batch Processing Ready
- ✅ Progress Tracking
---
## 📊 Comparação vs GPT-5
| Feature | IA Studio Ultimate | GPT-5/DALL-E 3 |
|---------|-------------------|----------------|
| **Qualidade** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
| **Controle** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ |
| **ControlNet** | ✅ 6 tipos | ❌ |
| **IP-Adapter** | ✅ | ❌ |
| **Upscaling** | ✅ 4x | ❌ |
| **Face Fix** | ✅ | ❌ |
| **Modelos** | ✅ 7+ | 🔒 1 |
| **Seed Control** | ✅ | ⚠️ Limitado |
| **Offline** | ✅ | ❌ |
| **Custo** | 🆓 | 💰 |
| **Customização** | ⭐⭐⭐⭐⭐ | ⭐⭐ |
---
## 🎯 Vantagens Competitivas
1. **Controle Total**: ControlNet permite controle preciso impossível em outros sistemas
2. **IP-Adapter**: Transferência de estilo visual que GPT-5 não oferece
3. **Pós-Processamento**: Upscaling e correção facial integrados
4. **Reprodutibilidade**: Seeds garantem resultados idênticos
5. **Múltiplos Modelos**: Escolha o melhor para cada tarefa
6. **Sem Censura**: Controle total sobre conteúdo
7. **Offline**: Funciona sem internet após download
8. **Gratuito**: Zero custos por geração
---
## 📖 Dicas de Performance
**Para Máxima Qualidade:**
- Modelo: FLUX.1-dev ou RealVisXL
- Steps: 35-50
- CFG Scale: 7-8
- Upscale: 2x ou 4x
- Face Fix: Ativado
**Para Velocidade:**
- Modelo: FLUX.1-schnell ou SDXL-Turbo
- Steps: 20-25
- CFG Scale: 3-5
- Upscale: Desativado
**Para Precisão (ControlNet):**
- Use Canny para bordas
- Use Depth para estrutura 3D
- Strength: 0.7-1.2
- Combine com IP-Adapter
---
## 🆘 Troubleshooting
**Out of Memory:**
- Reduza resolution (1024→768)
- Desative upscaling
- Use SDXL-Turbo
**Resultado ruim:**
- Aumente steps (35+)
- Ajuste CFG scale (7-9)
- Use prompt negativo "Máximo"
- Adicione mais detalhes ao prompt
**Muito lento:**
- Use FLUX.1-schnell
- Reduza steps para 20-25
- Desative pós-processamento
---
**Versão:** 2.0 Ultimate
**Última Atualização:** 2025
**Desenvolvido para:** Criadores Profissionais, Artistas, Game Designers
""")
# ===== EVENT HANDLERS =====
# Atualiza negative prompt personalizado
def update_negative_visibility(preset):
return gr.update(visible=(preset == "Personalizado"))
negative_preset.change(
fn=update_negative_visibility,
inputs=[negative_preset],
outputs=[custom_negative],
)
# Mostra preview do ControlNet
def update_controlnet_preview(enabled):
return gr.update(visible=enabled)
use_controlnet.change(
fn=update_controlnet_preview,
inputs=[use_controlnet],
outputs=[control_preview],
)
# Aspect ratio rápido
def apply_aspect_ratio(ratio, current_width):
ratios = {
"1:1": (1024, 1024),
"16:9": (1344, 768),
"9:16": (768, 1344),
"4:3": (1152, 896),
"3:4": (896, 1152),
"21:9": (1536, 640),
}
w, h = ratios.get(ratio, (1024, 1024))
return w, h
aspect_ratio.change(
fn=apply_aspect_ratio,
inputs=[aspect_ratio, width],
outputs=[width, height],
)
# Presets de velocidade
def apply_preset(preset):
presets = {
"Qualidade Máxima": (50, 8.0, "DPM++ 2M"),
"Balanceado": (35, 7.5, "DPM++ 2M"),
"Rápido": (25, 6.0, "Euler a"),
"Ultra Rápido": (20, 4.0, "Euler a"),
}
steps, cfg, scheduler = presets.get(preset, (35, 7.5, "DPM++ 2M"))
return steps, cfg, scheduler
preset_buttons.change(
fn=apply_preset,
inputs=[preset_buttons],
outputs=[num_steps, guidance_scale, scheduler_type],
)
# Geração principal
run_button.click(
fn=infer,
inputs=[
prompt, input_image, ip_reference_image,
use_controlnet, controlnet_type, controlnet_strength, canny_low, canny_high,
use_ip_adapter, ip_adapter_type, ip_adapter_strength,
model_choice, style_preset, quality_boost, negative_preset, custom_negative,
seed, randomize_seed, width, height, guidance_scale, num_steps, strength,
scheduler_type, upscale_factor, enhance_faces, denoise_strength,
],
outputs=[result, control_preview, enhanced_prompt_output, seed_output],
)
if __name__ == "__main__":
demo.queue(max_size=20).launch(
share=True,
show_error=True,
server_name="0.0.0.0",
server_port=7860,
show_api=False,
)