wan2-2 / app.py
HAL1993's picture
Update app.py
5776815 verified
import spaces
import torch
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
from diffusers.utils.export_utils import export_to_video
import gradio as gr
import tempfile
import numpy as np
from PIL import Image
import random
import gc
import requests
import time
from torchao.quantization import quantize_
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
from torchao.quantization import Int8WeightOnlyConfig
import aoti
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
MAX_DIM = 832
MIN_DIM = 480
SQUARE_DIM = 640
MULTIPLE_OF = 16
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 16
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 80
MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS, 1)
MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS, 1)
# --- Translation Functions ---
@spaces.GPU
def translate_albanian_to_english(text):
"""Translate from Albanian to English using the sepioo-facebook-translation API."""
if not text.strip():
raise gr.Error("Please enter a description.")
for attempt in range(2):
try:
response = requests.post(
"https://hal1993-mdftranslation1234567890abcdef1234567890-fc073a6.hf.space/v1/translate",
json={"from_language": "sq", "to_language": "en", "input_text": text},
headers={"accept": "application/json", "Content-Type": "application/json"},
timeout=5
)
response.raise_for_status()
translated = response.json().get("translate", "")
print(f"Translation response (sq->en): {translated}")
return translated
except Exception as e:
print(f"Translation error (attempt {attempt + 1}): {e}")
if attempt == 1:
raise gr.Error("Translation failed. Please try again.")
raise gr.Error("Translation failed. Please try again.")
pipe = WanImageToVideoPipeline.from_pretrained(
MODEL_ID,
transformer=WanTransformer3DModel.from_pretrained(
'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
subfolder='transformer',
torch_dtype=torch.bfloat16,
device_map='cuda',
),
transformer_2=WanTransformer3DModel.from_pretrained(
'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
subfolder='transformer_2',
torch_dtype=torch.bfloat16,
device_map='cuda',
),
torch_dtype=torch.bfloat16,
).to('cuda')
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
adapter_name="lightx2v"
)
kwargs_lora = {}
kwargs_lora["load_into_transformer_2"] = True
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
adapter_name="lightx2v_2",
**kwargs_lora
)
pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
pipe.unload_lora_weights()
quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
def resize_image(image: Image.Image) -> Image.Image:
width, height = image.size
if width == height:
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
aspect_ratio = width / height
MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
image_to_resize = image
if aspect_ratio > MAX_ASPECT_RATIO:
target_w, target_h = MAX_DIM, MIN_DIM
crop_width = int(round(height * MAX_ASPECT_RATIO))
left = (width - crop_width) // 2
image_to_resize = image.crop((left, 0, left + crop_width, height))
elif aspect_ratio < MIN_ASPECT_RATIO:
target_w, target_h = MIN_DIM, MAX_DIM
crop_height = int(round(width / MIN_ASPECT_RATIO))
top = (height - crop_height) // 2
image_to_resize = image.crop((0, top, width, top + crop_height))
else:
if width > height:
target_w = MAX_DIM
target_h = int(round(target_w / aspect_ratio))
else:
target_h = MAX_DIM
target_w = int(round(target_h * aspect_ratio))
final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
final_w = max(MIN_DIM, min(MAX_DIM, final_w))
final_h = max(MIN_DIM, min(MAX_DIM, final_h))
return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
def get_num_frames(duration_seconds: float):
return 1 + int(np.clip(
int(round(duration_seconds * FIXED_FPS)),
MIN_FRAMES_MODEL,
MAX_FRAMES_MODEL,
))
def get_duration(
input_image,
prompt,
steps,
negative_prompt,
duration_seconds,
guidance_scale,
guidance_scale_2,
seed,
randomize_seed,
progress,
):
BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
BASE_STEP_DURATION = 15
width, height = resize_image(input_image).size
frames = get_num_frames(duration_seconds)
factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
step_duration = BASE_STEP_DURATION * factor ** 1.5
est = 10 + int(steps) * step_duration
MAX_ALLOWED = 30
return min(est, MAX_ALLOWED)
@spaces.GPU
def generate_video(
input_image,
prompt,
steps=6,
negative_prompt=default_negative_prompt,
duration_seconds=3.2,
guidance_scale=1.5,
guidance_scale_2=1.5,
seed=42,
randomize_seed=False,
progress=gr.Progress(track_tqdm=True),
):
if input_image is None:
raise gr.Error("Please upload an input image.")
num_frames = get_num_frames(duration_seconds)
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
resized_image = resize_image(input_image)
output_frames_list = pipe(
image=resized_image,
prompt=prompt,
negative_prompt=negative_prompt,
height=resized_image.height,
width=resized_image.width,
num_frames=num_frames,
guidance_scale=float(guidance_scale),
guidance_scale_2=float(guidance_scale_2),
num_inference_steps=int(steps),
generator=torch.Generator(device="cuda").manual_seed(current_seed),
).frames[0]
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
video_path = tmpfile.name
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
return video_path, current_seed
def create_demo():
with gr.Blocks(css="", title="Fast Image to Video") as demo:
gr.HTML("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;600;700&display=swap');
@keyframes glow {
0% { box-shadow: 0 0 14px rgba(0, 255, 128, 0.5); }
50% { box-shadow: 0 0 14px rgba(0, 255, 128, 0.7); }
100% { box-shadow: 0 0 14px rgba(0, 255, 128, 0.5); }
}
@keyframes glow-hover {
0% { box-shadow: 0 0 20px rgba(0, 255, 128, 0.7); }
50% { box-shadow: 0 0 20px rgba(0, 255, 128, 0.9); }
100% { box-shadow: 0 0 20px rgba(0, 255, 128, 0.7); }
}
@keyframes slide {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
@keyframes pulse {
0%, 100% { opacity: 0.7; }
50% { opacity: 1; }
}
@keyframes typewriter {
0% { width: 0; }
100% { width: 100%; }
}
body {
background: #000000 !important;
color: #FFFFFF !important;
font-family: 'Orbitron', sans-serif;
min-height: 100vh;
margin: 0 !important;
padding: 0 !important;
width: 100% !important;
max-width: 100vw !important;
overflow-x: hidden !important;
display: flex !important;
justify-content: center;
align-items: center;
flex-direction: column;
}
body::before {
content: "";
display: block;
height: 600px;
background: #000000 !important;
}
.gr-blocks, .container {
width: 100% !important;
max-width: 100vw !important;
margin: 0 !important;
padding: 0 !important;
box-sizing: border-box !important;
overflow-x: hidden !important;
background: #000000 !important;
color: #FFFFFF !important;
}
#general_items {
width: 100% !important;
max-width: 100vw !important;
margin: 2rem 0 !important;
display: flex !important;
flex-direction: column;
align-items: center;
justify-content: center;
background: #000000 !important;
color: #FFFFFF !important;
}
#input_column {
background: #000000 !important;
border: none !important;
border-radius: 8px;
padding: 1rem !important;
box-shadow: 0 0 10px rgba(255, 255, 255, 0.3) !important;
width: 100% !important;
max-width: 100vw !important;
box-sizing: border-box !important;
color: #FFFFFF !important;
}
h1 {
font-size: 5rem;
font-weight: 700;
text-align: center;
color: #FFFFFF !important;
text-shadow: 0 0 8px rgba(255, 255, 255, 0.3) !important;
margin: 0 auto 0.5rem auto;
display: block;
max-width: 100%;
}
#subtitle {
font-size: 1rem;
text-align: center;
color: #FFFFFF !important;
opacity: 0.8;
margin-bottom: 1rem;
display: block;
max-width: 100%;
}
.gradio-component {
background: #000000 !important;
border: none;
margin: 0.75rem 0;
width: 100% !important;
max-width: 100vw !important;
color: #FFFFFF !important;
}
.image-container {
aspect-ratio: 1/1;
width: 100% !important;
max-width: 100vw !important;
min-height: 500px;
height: auto;
border: 0.5px solid #FFFFFF !important;
border-radius: 4px;
box-sizing: border-box !important;
background: #000000 !important;
box-shadow: 0 0 10px rgba(255, 255, 255, 0.3) !important;
position: relative;
color: #FFFFFF !important;
overflow: hidden !important;
}
.image-container img, .image-container video {
width: 100% !important;
height: auto;
box-sizing: border-box !important;
display: block !important;
}
/* FORCE HIDE ALL GRADIO PROCESSING ELEMENTS - 100+ SELECTORS */
.image-container[aria-label="Generated Video"] .progress-text,
.image-container[aria-label="Generated Video"] .gr-progress,
.image-container[aria-label="Generated Video"] .gr-progress-bar,
.image-container[aria-label="Generated Video"] .progress-bar,
.image-container[aria-label="Generated Video"] [data-testid="progress"],
.image-container[aria-label="Generated Video"] .status,
.image-container[aria-label="Generated Video"] .loading,
.image-container[aria-label="Generated Video"] .spinner,
.image-container[aria-label="Generated Video"] .gr-spinner,
.image-container[aria-label="Generated Video"] .gr-loading,
.image-container[aria-label="Generated Video"] .gr-status,
.image-container[aria-label="Generated Video"] .gpu-init,
.image-container[aria-label="Generated Video"] .initializing,
.image-container[aria-label="Generated Video"] .queue,
.image-container[aria-label="Generated Video"] .queued,
.image-container[aria-label="Generated Video"] .waiting,
.image-container[aria-label="Generated Video"] .processing,
.image-container[aria-label="Generated Video"] .gradio-progress,
.image-container[aria-label="Generated Video"] .gradio-status,
.image-container[aria-label="Generated Video"] div[class*="progress"],
.image-container[aria-label="Generated Video"] div[class*="loading"],
.image-container[aria-label="Generated Video"] div[class*="status"],
.image-container[aria-label="Generated Video"] div[class*="spinner"],
.image-container[aria-label="Generated Video"] *[class*="progress"],
.image-container[aria-label="Generated Video"] *[class*="loading"],
.image-container[aria-label="Generated Video"] *[class*="status"],
.image-container[aria-label="Generated Video"] *[class*="spinner"],
.image-container[aria-label="Generated Video"] .gr-video > div > div:not(.gr-video video),
.image-container[aria-label="Generated Video"] .gr-video > div > *:not(video),
.image-container[aria-label="Generated Video"] .gr-video > *:not(video),
.progress-text, .gr-progress, .gr-progress-bar, .progress-bar,
[data-testid="progress"], .status, .loading, .spinner, .gr-spinner,
.gr-loading, .gr-status, .gpu-init, .initializing, .queue,
.queued, .waiting, .processing, .gradio-progress, .gradio-status,
div[class*="progress"], div[class*="loading"], div[class*="status"],
div[class*="spinner"], *[class*="progress"], *[class*="loading"],
*[class*="status"], *[class*="spinner"] {
display: none !important;
visibility: hidden !important;
opacity: 0 !important;
height: 0 !important;
width: 0 !important;
font-size: 0 !important;
line-height: 0 !important;
padding: 0 !important;
margin: 0 !important;
position: absolute !important;
left: -9999px !important;
top: -9999px !important;
z-index: -9999 !important;
pointer-events: none !important;
overflow: hidden !important;
}
/* EXHAUSTIVE TOOLBAR HIDING */
.image-container[aria-label="Input Image"] .file-upload,
.image-container[aria-label="Input Image"] .file-preview,
.image-container[aria-label="Input Image"] .image-actions,
.image-container[aria-label="Input Image"] .gr-file-upload,
.image-container[aria-label="Input Image"] .gr-file,
.image-container[aria-label="Input Image"] .gr-actions,
.image-container[aria-label="Input Image"] .gr-upload-button,
.image-container[aria-label="Input Image"] .gr-image-toolbar,
.image-container[aria-label="Input Image"] .gr-file-actions,
.image-container[aria-label="Input Image"] .gr-upload-options,
div[aria-label="Input Image"] > div > div:not(.image-container),
div[aria-label="Input Image"] .gr-button,
.image-container[aria-label="Generated Video"] .file-upload,
.image-container[aria-label="Generated Video"] .file-preview,
.image-container[aria-label="Generated Video"] .image-actions,
.image-container[aria-label="Generated Video"] .gr-file-upload,
.image-container[aria-label="Generated Video"] .gr-file,
.image-container[aria-label="Generated Video"] .gr-actions,
.image-container[aria-label="Generated Video"] .gr-upload-button,
.image-container[aria-label="Generated Video"] .gr-image-toolbar,
.image-container[aria-label="Generated Video"] .gr-file-actions,
.image-container[aria-label="Generated Video"] .gr-upload-options,
div[aria-label="Generated Video"] > div > div:not(.image-container),
div[aria-label="Generated Video"] .gr-button {
display: none !important;
}
/* PERFECT PROCESSING OVERLAY - FORCED BLACK SCREEN */
.image-container[aria-label="Generated Video"].processing {
background: #000000 !important;
position: relative !important;
}
.image-container[aria-label="Generated Video"].processing::before {
content: "PROCESSING...";
position: absolute !important;
top: 50% !important;
left: 50% !important;
transform: translate(-50%, -50%) !important;
color: #FFFFFF !important;
font-family: 'Orbitron', sans-serif !important;
font-size: 1.8rem !important;
font-weight: 700 !important;
text-align: center !important;
text-shadow: 0 0 10px rgba(0, 255, 128, 0.8) !important;
animation: pulse 1.5s ease-in-out infinite, glow 2s ease-in-out infinite !important;
z-index: 9999 !important;
width: 100% !important;
height: 100% !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
pointer-events: none !important;
background: #000000 !important;
border-radius: 4px !important;
box-sizing: border-box !important;
}
.image-container[aria-label="Generated Video"].processing * {
display: none !important;
}
.image-container[aria-label="Generated Video"].processing video,
.image-container[aria-label="Generated Video"].processing img {
display: none !important;
}
input, textarea, .gr-dropdown, .gr-dropdown select {
background: #000000 !important;
color: #FFFFFF !important;
border: 1px solid #FFFFFF !important;
border-radius: 4px;
padding: 0.5rem;
width: 100% !important;
max-width: 100vw !important;
box-sizing: border-box !important;
}
input:hover, textarea:hover, .gr-dropdown:hover, .gr-dropdown select:hover {
box-shadow: 0 0 8px rgba(255, 255, 255, 0.3) !important;
transition: box-shadow 0.3s;
}
.gr-button-primary {
background: linear-gradient(90deg, rgba(0, 255, 128, 0.3), rgba(0, 200, 100, 0.3), rgba(0, 255, 128, 0.3)) !important;
background-size: 200% 100%;
animation: slide 4s ease-in-out infinite, glow 3s ease-in-out infinite;
color: #FFFFFF !important;
border: 1px solid #FFFFFF !important;
border-radius: 6px;
padding: 0.75rem 1.5rem;
font-size: 1.1rem;
font-weight: 600;
box-shadow: 0 0 14px rgba(0, 255, 128, 0.7) !important;
transition: box-shadow 0.3s, transform 0.3s;
width: 100% !important;
max-width: 100vw !important;
min-height: 48px;
cursor: pointer;
}
.gr-button-primary:hover {
box-shadow: 0 0 20px rgba(0, 255, 128, 0.9) !important;
animation: slide 4s ease-in-out infinite, glow-hover 3s ease-in-out infinite;
transform: scale(1.05);
}
button[aria-label="Fullscreen"], button[aria-label="Fullscreen"]:hover,
button[aria-label="Share"], button[aria-label="Share"]:hover {
display: none !important;
}
button[aria-label="Download"] {
transform: scale(3);
transform-origin: top right;
background: #000000 !important;
color: #FFFFFF !important;
border: 1px solid #FFFFFF !important;
border-radius: 4px;
padding: 0.4rem !important;
margin: 0.5rem !important;
box-shadow: 0 0 8px rgba(255, 255, 255, 0.3) !important;
transition: box-shadow 0.3s;
}
button[aria-label="Download"]:hover {
box-shadow: 0 0 12px rgba(255, 255, 255, 0.5) !important;
}
footer, .gr-button-secondary {
display: none !important;
}
.gr-group {
background: #000000 !important;
border: none !important;
width: 100% !important;
max-width: 100vw !important;
}
@media (max-width: 768px) {
h1 { font-size: 4rem; }
#subtitle { font-size: 0.9rem; }
.gr-button-primary {
padding: 0.6rem 1rem;
font-size: 1rem;
box-shadow: 0 0 10px rgba(0, 255, 128, 0.7) !important;
}
.gr-button-primary:hover {
box-shadow: 0 0 12px rgba(0, 255, 128, 0.9) !important;
}
.image-container {
min-height: 300px;
box-shadow: 0 0 8px rgba(255, 255, 255, 0.3) !important;
}
.image-container[aria-label="Generated Video"].processing::before {
font-size: 1.2rem !important;
}
}
</style>
<script>
if (!window.location.pathname.includes('b9v0c1x2z3a4s5d6f7g8h9j0k1l2m3n4b5v6c7x8z9a0s1d2f3g4h5j6k7l8m9n0')) {
document.body.innerHTML = '<h1 style="color:#ef4444;font-family:sans-serif;text-align:center;margin-top:100px;">500 Internal Server Error</h1>';
throw new Error('500');
}
document.addEventListener('DOMContentLoaded', () => {
const forceHideGradioProcessing = () => {
const selectors = [
'.progress-text', '.gr-progress', '.gr-progress-bar', '.progress-bar',
'[data-testid="progress"]', '.status', '.loading', '.spinner', '.gr-spinner',
'.gr-loading', '.gr-status', '.gpu-init', '.initializing', '.queue',
'.queued', '.waiting', '.processing', '.gradio-progress', '.gradio-status',
'div[class*="progress"]', 'div[class*="loading"]', 'div[class*="status"]',
'div[class*="spinner"]', '*[class*="progress"]', '*[class*="loading"]',
'*[class*="status"]', '*[class*="spinner"]'
];
selectors.forEach(selector => {
document.querySelectorAll(selector).forEach(el => {
el.style.display = 'none !important';
el.style.visibility = 'hidden !important';
el.style.opacity = '0 !important';
el.style.height = '0 !important';
el.style.width = '0 !important';
el.style.position = 'absolute !important';
el.style.left = '-9999px !important';
el.style.top = '-9999px !important';
el.style.zIndex = '-9999 !important';
});
});
};
forceHideGradioProcessing();
const observer = new MutationObserver(forceHideGradioProcessing);
observer.observe(document.body, { childList: true, subtree: true });
const generateButton = document.querySelector('.gr-button-primary');
const outputContainer = document.querySelector('.image-container[aria-label="Generated Video"]');
if (generateButton && outputContainer) {
generateButton.addEventListener('click', () => {
outputContainer.classList.add('processing');
const allChildren = outputContainer.querySelectorAll('*');
allChildren.forEach(child => {
if (child.tagName !== 'VIDEO' && child.tagName !== 'IMG') {
child.style.display = 'none !important';
}
});
});
const videoObserver = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
if (mutation.addedNodes.length > 0) {
mutation.addedNodes.forEach((node) => {
if (node.nodeType === 1 && (node.tagName === 'VIDEO' || node.querySelector('video'))) {
outputContainer.classList.remove('processing');
videoObserver.disconnect();
}
});
}
});
});
videoObserver.observe(outputContainer, { childList: true, subtree: true });
}
setInterval(() => {
forceHideGradioProcessing();
const processingEls = document.querySelectorAll('.progress-text, .gr-progress, [class*="progress"]');
processingEls.forEach(el => el.remove());
}, 500);
});
</script>
""")
with gr.Row(elem_id="general_items"):
gr.Markdown("# ")
gr.Markdown("Convert an image into an animated video with prompt description.", elem_id="subtitle")
with gr.Column(elem_id="input_column"):
input_image = gr.Image(
type="pil",
label="Input Image",
sources=["upload"],
show_download_button=False,
show_share_button=False,
interactive=True,
elem_classes=["gradio-component", "image-container"]
)
prompt = gr.Textbox(
label="Prompt",
value=default_prompt_i2v,
lines=3,
placeholder="Describe the desired animation or motion",
elem_classes=["gradio-component"]
)
generate_button = gr.Button(
"Generate Video",
variant="primary",
elem_classes=["gradio-component", "gr-button-primary"]
)
output_video = gr.Video(
label="Generated Video",
autoplay=True,
interactive=False,
show_download_button=True,
show_share_button=False,
elem_classes=["gradio-component", "image-container"]
)
generate_button.click(
fn=generate_video,
inputs=[
input_image,
prompt,
gr.State(value=6),
gr.State(value=default_negative_prompt),
gr.State(value=3.2),
gr.State(value=1.5),
gr.State(value=1.5),
gr.State(value=42),
gr.State(value=True),
],
outputs=[output_video, gr.State(value=42)],
)
return demo
if __name__ == "__main__":
demo = create_demo()
demo.queue().launch(share=True)