Spaces:
				
			
			
	
			
			
					
		Restarting
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
					
		Restarting
		
			on 
			
			Zero
	Upload 3 files
Browse files- README.md +8 -17
- app.py +385 -997
- requirements.txt +10 -42
    	
        README.md
    CHANGED
    
    | @@ -1,21 +1,12 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
            -
            title:  | 
|  | |
|  | |
|  | |
| 3 | 
             
            sdk: gradio
         | 
| 4 | 
            -
            emoji: 📷
         | 
| 5 | 
             
            sdk_version: 5.29.1
         | 
| 6 | 
             
            app_file: app.py
         | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
            - Upscaling
         | 
| 12 | 
            -
            - Restoring
         | 
| 13 | 
            -
            - Image-to-Image
         | 
| 14 | 
            -
            - Image-2-Image
         | 
| 15 | 
            -
            - Img-to-Img
         | 
| 16 | 
            -
            - Img-2-Img
         | 
| 17 | 
            -
            - language models
         | 
| 18 | 
            -
            - LLMs
         | 
| 19 | 
            -
            short_description: Restore blurred or small images with prompt
         | 
| 20 | 
            -
            suggested_hardware: zero-a10g
         | 
| 21 | 
            -
            ---
         | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
            +
            title: Wan 2 2 First Last Frame
         | 
| 3 | 
            +
            emoji: 💻
         | 
| 4 | 
            +
            colorFrom: purple
         | 
| 5 | 
            +
            colorTo: gray
         | 
| 6 | 
             
            sdk: gradio
         | 
|  | |
| 7 | 
             
            sdk_version: 5.29.1
         | 
| 8 | 
             
            app_file: app.py
         | 
| 9 | 
            +
            pinned: false
         | 
| 10 | 
            +
            ---
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        app.py
    CHANGED
    
    | @@ -1,1058 +1,446 @@ | |
| 1 | 
             
            import os
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 | 
             
            import gradio as gr
         | 
| 3 | 
            -
            import  | 
| 4 | 
             
            import numpy as np
         | 
| 5 | 
            -
            import  | 
| 6 | 
            -
            import einops
         | 
| 7 | 
            -
            import copy
         | 
| 8 | 
            -
            import math
         | 
| 9 | 
            -
            import time
         | 
| 10 | 
             
            import random
         | 
| 11 | 
            -
             | 
|  | |
| 12 |  | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
            except:
         | 
| 16 | 
            -
                class spaces():
         | 
| 17 | 
            -
                    def GPU(*args, **kwargs):
         | 
| 18 | 
            -
                        def decorator(function):
         | 
| 19 | 
            -
                            return lambda *dummy_args, **dummy_kwargs: function(*dummy_args, **dummy_kwargs)
         | 
| 20 | 
            -
                        return decorator
         | 
| 21 |  | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 |  | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
            import pillow_heif
         | 
| 31 | 
            -
             | 
| 32 | 
            -
            pillow_heif.register_heif_opener()
         | 
| 33 |  | 
| 34 | 
            -
             | 
| 35 |  | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
            hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0Q.ckpt", local_dir="yushan777_SUPIR")
         | 
| 40 | 
            -
            hf_hub_download(repo_id="RunDiffusion/Juggernaut-XL-Lightning", filename="Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors", local_dir="RunDiffusion_Juggernaut-XL-Lightning")
         | 
| 41 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
            parser.add_argument("--ip", type=str, default='127.0.0.1')
         | 
| 45 | 
            -
            parser.add_argument("--port", type=int, default='6688')
         | 
| 46 | 
            -
            parser.add_argument("--no_llava", action='store_true', default=True)#False
         | 
| 47 | 
            -
            parser.add_argument("--use_image_slider", action='store_true', default=False)#False
         | 
| 48 | 
            -
            parser.add_argument("--log_history", action='store_true', default=False)
         | 
| 49 | 
            -
            parser.add_argument("--loading_half_params", action='store_true', default=False)#False
         | 
| 50 | 
            -
            parser.add_argument("--use_tile_vae", action='store_true', default=True)#False
         | 
| 51 | 
            -
            parser.add_argument("--encoder_tile_size", type=int, default=512)
         | 
| 52 | 
            -
            parser.add_argument("--decoder_tile_size", type=int, default=64)
         | 
| 53 | 
            -
            parser.add_argument("--load_8bit_llava", action='store_true', default=False)
         | 
| 54 | 
            -
            args = parser.parse_args()
         | 
| 55 |  | 
| 56 | 
             
            input_image_debug_value = [None]
         | 
|  | |
|  | |
| 57 | 
             
            prompt_debug_value = [None]
         | 
| 58 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 59 |  | 
| 60 | 
            -
             | 
| 61 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 62 |  | 
| 63 | 
            -
                # Load SUPIR
         | 
| 64 | 
            -
                model, default_setting = create_SUPIR_model(args.opt, SUPIR_sign='Q', load_default_setting=True)
         | 
| 65 | 
            -
                if args.loading_half_params:
         | 
| 66 | 
            -
                    model = model.half()
         | 
| 67 | 
            -
                if args.use_tile_vae:
         | 
| 68 | 
            -
                    model.init_tile_vae(encoder_tile_size=args.encoder_tile_size, decoder_tile_size=args.decoder_tile_size)
         | 
| 69 | 
            -
                model = model.to(SUPIR_device)
         | 
| 70 | 
            -
                model.first_stage_model.denoise_encoder_s1 = copy.deepcopy(model.first_stage_model.denoise_encoder)
         | 
| 71 | 
            -
                model.current_model = 'v0-Q'
         | 
| 72 | 
            -
                ckpt_Q, ckpt_F = load_QF_ckpt(args.opt)
         | 
| 73 |  | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 77 | 
            -
                 | 
|  | |
| 78 |  | 
| 79 | 
            -
             | 
| 80 | 
            -
                if  | 
| 81 | 
            -
                     | 
| 82 | 
            -
                return seed
         | 
| 83 |  | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
                     | 
| 87 | 
            -
                    0,
         | 
| 88 | 
            -
                    None,
         | 
| 89 | 
            -
                    None,
         | 
| 90 | 
            -
                    "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 91 | 
            -
                    "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 92 | 
            -
                    1,
         | 
| 93 | 
            -
                    1024,
         | 
| 94 | 
            -
                    1,
         | 
| 95 | 
            -
                    2,
         | 
| 96 | 
            -
                    50,
         | 
| 97 | 
            -
                    -1.0,
         | 
| 98 | 
            -
                    1.,
         | 
| 99 | 
            -
                    default_setting.s_cfg_Quality if torch.cuda.device_count() > 0 else 1.0,
         | 
| 100 | 
            -
                    True,
         | 
| 101 | 
            -
                    random.randint(0, max_64_bit_int),
         | 
| 102 | 
            -
                    5,
         | 
| 103 | 
            -
                    1.003,
         | 
| 104 | 
            -
                    "Wavelet",
         | 
| 105 | 
            -
                    "fp32",
         | 
| 106 | 
            -
                    "fp32",
         | 
| 107 | 
            -
                    1.0,
         | 
| 108 | 
            -
                    True,
         | 
| 109 | 
            -
                    default_setting.spt_linear_CFG_Quality if torch.cuda.device_count() > 0 else 1.0,
         | 
| 110 | 
            -
                    False,
         | 
| 111 | 
            -
                    0.,
         | 
| 112 | 
            -
                    "v0-Q",
         | 
| 113 | 
            -
                    "input",
         | 
| 114 | 
            -
                    179
         | 
| 115 | 
            -
                ]
         | 
| 116 |  | 
| 117 | 
            -
             | 
| 118 | 
            -
                 | 
| 119 | 
            -
             | 
| 120 | 
            -
                 | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
                 | 
| 131 | 
            -
                     | 
| 132 | 
            -
                    return None, None, gr.update(interactive = False)
         | 
| 133 | 
            -
                torch.cuda.set_device(SUPIR_device)
         | 
| 134 | 
            -
                LQ = HWC3(np.array(Image.open(input_image)))
         | 
| 135 | 
            -
                LQ = fix_resize(LQ, 512)
         | 
| 136 | 
            -
                # stage1
         | 
| 137 | 
            -
                LQ = np.array(LQ) / 255 * 2 - 1
         | 
| 138 | 
            -
                LQ = torch.tensor(LQ, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(SUPIR_device)[:, :3, :, :]
         | 
| 139 |  | 
| 140 | 
            -
                 | 
| 141 | 
            -
                 | 
|  | |
| 142 |  | 
| 143 | 
            -
             | 
| 144 | 
            -
                 | 
| 145 | 
            -
                 | 
| 146 | 
            -
                LQ = LQ / 255.0
         | 
| 147 | 
            -
                LQ = np.power(LQ, gamma_correction)
         | 
| 148 | 
            -
                LQ *= 255.0
         | 
| 149 | 
            -
                LQ = LQ.round().clip(0, 255).astype(np.uint8)
         | 
| 150 | 
            -
                print('<<== stage1_process')
         | 
| 151 | 
            -
                return LQ, gr.update(visible = True)
         | 
| 152 |  | 
| 153 | 
            -
            def stage2_process_example(*args, **kwargs):
         | 
| 154 | 
            -
                [result_slider, result_gallery, restore_information, reset_btn, warning, dummy_button] = restore_in_Xmin(*args, **kwargs)
         | 
| 155 | 
            -
                #outputs_folder = './outputs/'
         | 
| 156 | 
            -
                outputs_folder = './tmp/'
         | 
| 157 | 
            -
                os.makedirs(outputs_folder, exist_ok=True)
         | 
| 158 | 
            -
                output_filename = os.path.join(outputs_folder, datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + '.png')
         | 
| 159 | 
            -
                print(output_filename)
         | 
| 160 | 
            -
                iio.imwrite(output_filename, result_slider[1], format="png")
         | 
| 161 | 
            -
                return [gr.update(visible = True, value=output_filename), warning, dummy_button, gr.skip()]
         | 
| 162 |  | 
| 163 | 
            -
            def  | 
| 164 | 
            -
                 | 
| 165 | 
            -
             | 
| 166 | 
            -
                 | 
| 167 | 
            -
             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
| 176 | 
            -
             | 
| 177 | 
            -
             | 
| 178 | 
            -
             | 
| 179 | 
            -
             | 
| 180 | 
            -
             | 
| 181 | 
            -
             | 
| 182 | 
            -
                 | 
| 183 | 
            -
                 | 
| 184 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 185 | 
             
                prompt,
         | 
| 186 | 
            -
                 | 
| 187 | 
            -
                 | 
| 188 | 
            -
                 | 
| 189 | 
            -
                 | 
| 190 | 
            -
                 | 
| 191 | 
            -
                 | 
| 192 | 
            -
                 | 
| 193 | 
            -
                 | 
| 194 | 
            -
                s_stage2,
         | 
| 195 | 
            -
                s_cfg,
         | 
| 196 | 
            -
                randomize_seed,
         | 
| 197 | 
            -
                seed,
         | 
| 198 | 
            -
                s_churn,
         | 
| 199 | 
            -
                s_noise,
         | 
| 200 | 
            -
                color_fix_type,
         | 
| 201 | 
            -
                diff_dtype,
         | 
| 202 | 
            -
                ae_dtype,
         | 
| 203 | 
            -
                gamma_correction,
         | 
| 204 | 
            -
                linear_CFG,
         | 
| 205 | 
            -
                spt_linear_CFG,
         | 
| 206 | 
            -
                linear_s_stage2,
         | 
| 207 | 
            -
                spt_linear_s_stage2,
         | 
| 208 | 
            -
                model_select,
         | 
| 209 | 
            -
                output_format,
         | 
| 210 | 
            -
                allocation
         | 
| 211 | 
             
            ):
         | 
| 212 | 
            -
                 | 
| 213 | 
            -
                 | 
| 214 | 
            -
                 | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 221 | 
            -
                 | 
| 222 | 
            -
                 | 
| 223 | 
            -
                 | 
| 224 | 
            -
                 | 
| 225 | 
            -
                 | 
| 226 | 
            -
                 | 
| 227 | 
            -
                 | 
| 228 | 
            -
                 | 
| 229 | 
            -
                 | 
| 230 | 
            -
                 | 
| 231 | 
            -
                 | 
| 232 | 
            -
                 | 
| 233 | 
            -
                 | 
| 234 | 
            -
             | 
| 235 | 
            -
                print("linear_s_stage2: " + str(linear_s_stage2))
         | 
| 236 | 
            -
                print("spt_linear_CFG: " + str(spt_linear_CFG))
         | 
| 237 | 
            -
                print("spt_linear_s_stage2: " + str(spt_linear_s_stage2))
         | 
| 238 | 
            -
                print("model_select: " + str(model_select))
         | 
| 239 | 
            -
                print("GPU time allocation: " + str(allocation) + " min")
         | 
| 240 | 
            -
                print("output_format: " + str(output_format))
         | 
| 241 | 
            -
             | 
| 242 | 
            -
                if input_image_debug_value[0] is not None or prompt_debug_value[0] is not None or upscale_debug_value[0] is not None:
         | 
| 243 | 
            -
                    denoise_image = noisy_image = input_image_debug_value[0]
         | 
| 244 | 
            -
                    a_prompt = prompt_debug_value[0]
         | 
| 245 | 
            -
                    upscale = upscale_debug_value[0]
         | 
| 246 | 
            -
                    allocation = min(allocation * 60 * 100, 600)
         | 
| 247 | 
            -
                    seed = random.randint(0, max_64_bit_int)
         | 
| 248 | 
            -
             | 
| 249 | 
            -
                input_format = re.sub(r"^.*\.([^\.]+)$", r"\1", noisy_image)
         | 
| 250 | 
            -
             | 
| 251 | 
            -
                if input_format not in ['png', 'webp', 'jpg', 'jpeg', 'gif', 'bmp', 'avif']:
         | 
| 252 | 
            -
                    gr.Warning('Invalid image format. Please first convert into *.png, *.webp, *.jpg, *.jpeg, *.gif, *.bmp, *.heic or *.avif.')
         | 
| 253 | 
            -
                    return None, None, None, None, None, gr.update(interactive = False)
         | 
| 254 | 
            -
             | 
| 255 | 
            -
                if output_format == "input":
         | 
| 256 | 
            -
                    if noisy_image is None:
         | 
| 257 | 
            -
                        output_format = "png"
         | 
| 258 | 
            -
                    else:
         | 
| 259 | 
            -
                        output_format = input_format
         | 
| 260 | 
            -
                print("final output_format: " + str(output_format))
         | 
| 261 | 
            -
             | 
| 262 | 
            -
                if prompt is None:
         | 
| 263 | 
            -
                    prompt = ""
         | 
| 264 | 
            -
             | 
| 265 | 
            -
                if a_prompt is None:
         | 
| 266 | 
            -
                    a_prompt = ""
         | 
| 267 | 
            -
             | 
| 268 | 
            -
                if n_prompt is None:
         | 
| 269 | 
            -
                    n_prompt = ""
         | 
| 270 | 
            -
             | 
| 271 | 
            -
                if prompt != "" and a_prompt != "":
         | 
| 272 | 
            -
                    a_prompt = prompt + ", " + a_prompt
         | 
| 273 | 
            -
                else:
         | 
| 274 | 
            -
                    a_prompt = prompt + a_prompt
         | 
| 275 | 
            -
                print("Final prompt: " + str(a_prompt))
         | 
| 276 | 
            -
             | 
| 277 | 
            -
                denoise_image = np.array(Image.open(noisy_image if denoise_image is None else denoise_image))
         | 
| 278 | 
            -
             | 
| 279 | 
            -
                if rotation == 90:
         | 
| 280 | 
            -
                    denoise_image = np.array(list(zip(*denoise_image[::-1])))
         | 
| 281 | 
            -
                elif rotation == 180:
         | 
| 282 | 
            -
                    denoise_image = np.array(list(zip(*denoise_image[::-1])))
         | 
| 283 | 
            -
                    denoise_image = np.array(list(zip(*denoise_image[::-1])))
         | 
| 284 | 
            -
                elif rotation == -90:
         | 
| 285 | 
            -
                    denoise_image = np.array(list(zip(*denoise_image))[::-1])
         | 
| 286 | 
            -
             | 
| 287 | 
            -
                if 1 < downscale:
         | 
| 288 | 
            -
                    input_height, input_width, input_channel = denoise_image.shape
         | 
| 289 | 
            -
                    denoise_image = np.array(Image.fromarray(denoise_image).resize((input_width // downscale, input_height // downscale), Image.LANCZOS))
         | 
| 290 | 
            -
             | 
| 291 | 
            -
                denoise_image = HWC3(denoise_image)
         | 
| 292 | 
            -
             | 
| 293 | 
            -
                if torch.cuda.device_count() == 0:
         | 
| 294 | 
            -
                    gr.Warning('Set this space to GPU config to make it work.')
         | 
| 295 | 
            -
                    return [noisy_image, denoise_image], gr.update(label="Downloadable results in *." + output_format + " format", format = output_format, value = [denoise_image]), None, gr.update(visible=True)
         | 
| 296 | 
            -
             | 
| 297 | 
            -
                if model_select != model.current_model:
         | 
| 298 | 
            -
                    print('load ' + model_select)
         | 
| 299 | 
            -
                    if model_select == 'v0-Q':
         | 
| 300 | 
            -
                        model.load_state_dict(ckpt_Q, strict=False)
         | 
| 301 | 
            -
                    elif model_select == 'v0-F':
         | 
| 302 | 
            -
                        model.load_state_dict(ckpt_F, strict=False)
         | 
| 303 | 
            -
                    model.current_model = model_select
         | 
| 304 | 
            -
             | 
| 305 | 
            -
                model.ae_dtype = convert_dtype(ae_dtype)
         | 
| 306 | 
            -
                model.model.dtype = convert_dtype(diff_dtype)
         | 
| 307 | 
            -
             | 
| 308 | 
            -
                return restore_on_gpu(
         | 
| 309 | 
            -
                    noisy_image, denoise_image, prompt, a_prompt, n_prompt, num_samples, min_size, downscale, upscale, edm_steps, s_stage1, s_stage2, s_cfg, randomize_seed, seed, s_churn, s_noise, color_fix_type, diff_dtype, ae_dtype, gamma_correction, linear_CFG, linear_s_stage2, spt_linear_CFG, spt_linear_s_stage2, model_select, output_format, allocation
         | 
| 310 | 
            -
                )
         | 
| 311 |  | 
| 312 | 
             
            def get_duration(
         | 
| 313 | 
            -
                 | 
| 314 | 
            -
                 | 
| 315 | 
             
                prompt,
         | 
| 316 | 
            -
                 | 
| 317 | 
            -
                 | 
| 318 | 
            -
                 | 
| 319 | 
            -
                 | 
| 320 | 
            -
                 | 
| 321 | 
            -
                upscale,
         | 
| 322 | 
            -
                edm_steps,
         | 
| 323 | 
            -
                s_stage1,
         | 
| 324 | 
            -
                s_stage2,
         | 
| 325 | 
            -
                s_cfg,
         | 
| 326 | 
            -
                randomize_seed,
         | 
| 327 | 
             
                seed,
         | 
| 328 | 
            -
                 | 
| 329 | 
            -
                 | 
| 330 | 
            -
                 | 
| 331 | 
            -
                diff_dtype,
         | 
| 332 | 
            -
                ae_dtype,
         | 
| 333 | 
            -
                gamma_correction,
         | 
| 334 | 
            -
                linear_CFG,
         | 
| 335 | 
            -
                spt_linear_CFG,
         | 
| 336 | 
            -
                linear_s_stage2,
         | 
| 337 | 
            -
                spt_linear_s_stage2,
         | 
| 338 | 
            -
                model_select,
         | 
| 339 | 
            -
                output_format,
         | 
| 340 | 
            -
                allocation
         | 
| 341 | 
             
            ):
         | 
| 342 | 
            -
                return  | 
| 343 |  | 
| 344 | 
             
            @spaces.GPU(duration=get_duration)
         | 
| 345 | 
            -
            def  | 
| 346 | 
            -
                 | 
| 347 | 
            -
                 | 
| 348 | 
             
                prompt,
         | 
| 349 | 
            -
                 | 
| 350 | 
            -
                 | 
| 351 | 
            -
                 | 
| 352 | 
            -
                 | 
| 353 | 
            -
                 | 
| 354 | 
            -
                upscale,
         | 
| 355 | 
            -
                edm_steps,
         | 
| 356 | 
            -
                s_stage1,
         | 
| 357 | 
            -
                s_stage2,
         | 
| 358 | 
            -
                s_cfg,
         | 
| 359 | 
            -
                randomize_seed,
         | 
| 360 | 
             
                seed,
         | 
| 361 | 
            -
                 | 
| 362 | 
            -
                 | 
| 363 | 
            -
                 | 
| 364 | 
            -
                diff_dtype,
         | 
| 365 | 
            -
                ae_dtype,
         | 
| 366 | 
            -
                gamma_correction,
         | 
| 367 | 
            -
                linear_CFG,
         | 
| 368 | 
            -
                spt_linear_CFG,
         | 
| 369 | 
            -
                linear_s_stage2,
         | 
| 370 | 
            -
                spt_linear_s_stage2,
         | 
| 371 | 
            -
                model_select,
         | 
| 372 | 
            -
                output_format,
         | 
| 373 | 
            -
                allocation
         | 
| 374 | 
             
            ):
         | 
| 375 | 
            -
                start = time.time()
         | 
| 376 | 
            -
                print('restore ==>>')
         | 
| 377 | 
            -
             | 
| 378 | 
            -
                torch.cuda.set_device(SUPIR_device)
         | 
| 379 | 
            -
             | 
| 380 | 
            -
                with torch.no_grad():
         | 
| 381 | 
            -
                    input_image = upscale_image(input_image, upscale, unit_resolution=32, min_size=min_size)
         | 
| 382 | 
            -
                    LQ = np.array(input_image) / 255.0
         | 
| 383 | 
            -
                    LQ = np.power(LQ, gamma_correction)
         | 
| 384 | 
            -
                    LQ *= 255.0
         | 
| 385 | 
            -
                    LQ = LQ.round().clip(0, 255).astype(np.uint8)
         | 
| 386 | 
            -
                    LQ = LQ / 255 * 2 - 1
         | 
| 387 | 
            -
                    LQ = torch.tensor(LQ, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(SUPIR_device)[:, :3, :, :]
         | 
| 388 | 
            -
                    captions = ['']
         | 
| 389 | 
            -
             | 
| 390 | 
            -
                    samples = model.batchify_sample(LQ, captions, num_steps=edm_steps, restoration_scale=s_stage1, s_churn=s_churn,
         | 
| 391 | 
            -
                                                s_noise=s_noise, cfg_scale=s_cfg, control_scale=s_stage2, seed=seed,
         | 
| 392 | 
            -
                                                num_samples=num_samples, p_p=a_prompt, n_p=n_prompt, color_fix_type=color_fix_type,
         | 
| 393 | 
            -
                                                use_linear_CFG=linear_CFG, use_linear_control_scale=linear_s_stage2,
         | 
| 394 | 
            -
                                                cfg_scale_start=spt_linear_CFG, control_scale_start=spt_linear_s_stage2)
         | 
| 395 | 
            -
             | 
| 396 | 
            -
                    x_samples = (einops.rearrange(samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().round().clip(
         | 
| 397 | 
            -
                        0, 255).astype(np.uint8)
         | 
| 398 | 
            -
                    results = [x_samples[i] for i in range(num_samples)]
         | 
| 399 | 
            -
                torch.cuda.empty_cache()
         | 
| 400 | 
            -
             | 
| 401 | 
            -
                # All the results have the same size
         | 
| 402 | 
            -
                input_height, input_width, input_channel = np.array(input_image).shape
         | 
| 403 | 
            -
                result_height, result_width, result_channel = np.array(results[0]).shape
         | 
| 404 | 
            -
             | 
| 405 | 
            -
                print('<<== restore')
         | 
| 406 | 
            -
                end = time.time()
         | 
| 407 | 
            -
                secondes = int(end - start)
         | 
| 408 | 
            -
                minutes = math.floor(secondes / 60)
         | 
| 409 | 
            -
                secondes = secondes - (minutes * 60)
         | 
| 410 | 
            -
                hours = math.floor(minutes / 60)
         | 
| 411 | 
            -
                minutes = minutes - (hours * 60)
         | 
| 412 | 
            -
                information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
         | 
| 413 | 
            -
                "If you don't get the image you wanted, add more details in the « Image description ». " + \
         | 
| 414 | 
            -
                "The image" + (" has" if len(results) == 1 else "s have") + " been generated in " + \
         | 
| 415 | 
            -
                ((str(hours) + " h, ") if hours != 0 else "") + \
         | 
| 416 | 
            -
                ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
         | 
| 417 | 
            -
                str(secondes) + " sec. " + \
         | 
| 418 | 
            -
                "The new image resolution is " + str(result_width) + \
         | 
| 419 | 
            -
                " pixels large and " + str(result_height) + \
         | 
| 420 | 
            -
                " pixels high, so a resolution of " + f'{result_width * result_height:,}' + " pixels."
         | 
| 421 | 
            -
                print(information)
         | 
| 422 | 
            -
                try:
         | 
| 423 | 
            -
                    print("Initial resolution: " + f'{input_width * input_height:,}')
         | 
| 424 | 
            -
                    print("Final resolution: " + f'{result_width * result_height:,}')
         | 
| 425 | 
            -
                    print("edm_steps: " + str(edm_steps))
         | 
| 426 | 
            -
                    print("num_samples: " + str(num_samples))
         | 
| 427 | 
            -
                    print("downscale: " + str(downscale))
         | 
| 428 | 
            -
                    print("Estimated minutes: " + f'{(((result_width * result_height**(1/1.75)) * input_width * input_height * (edm_steps**(1/2)) * (num_samples**(1/2.5)))**(1/2.5)) / 25000:,}')
         | 
| 429 | 
            -
                except Exception as e:
         | 
| 430 | 
            -
                    print('Exception of Estimation')
         | 
| 431 | 
            -
             | 
| 432 | 
            -
                # Only one image can be shown in the slider
         | 
| 433 | 
            -
                return [noisy_image] + [results[0]], gr.update(label="Downloadable results in *." + output_format + " format", format = output_format, value = results), gr.update(value = information, visible = True), gr.update(visible=True), gr.update(visible=False), gr.update(interactive = False)
         | 
| 434 | 
            -
             | 
| 435 | 
            -
            def load_and_reset(param_setting):
         | 
| 436 | 
            -
                print('load_and_reset ==>>')
         | 
| 437 | 
            -
                if torch.cuda.device_count() == 0:
         | 
| 438 | 
            -
                    gr.Warning('Set this space to GPU config to make it work.')
         | 
| 439 | 
            -
                    return None, None, None, None, None, None, None, None, None, None, None, None, None, None
         | 
| 440 | 
            -
                edm_steps = default_setting.edm_steps
         | 
| 441 | 
            -
                s_stage2 = 1.0
         | 
| 442 | 
            -
                s_stage1 = -1.0
         | 
| 443 | 
            -
                s_churn = 5
         | 
| 444 | 
            -
                s_noise = 1.003
         | 
| 445 | 
            -
                a_prompt = 'Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - ' \
         | 
| 446 | 
            -
                           'realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore ' \
         | 
| 447 | 
            -
                           'detailing, hyper sharpness, perfect without deformations.'
         | 
| 448 | 
            -
                n_prompt = 'painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, ' \
         | 
| 449 | 
            -
                           '3D render, unreal engine, blurring, dirty, messy, worst quality, low quality, frames, watermark, ' \
         | 
| 450 | 
            -
                           'signature, jpeg artifacts, deformed, lowres, over-smooth'
         | 
| 451 | 
            -
                color_fix_type = 'Wavelet'
         | 
| 452 | 
            -
                spt_linear_s_stage2 = 0.0
         | 
| 453 | 
            -
                linear_s_stage2 = False
         | 
| 454 | 
            -
                linear_CFG = True
         | 
| 455 | 
            -
                if param_setting == "Quality":
         | 
| 456 | 
            -
                    s_cfg = default_setting.s_cfg_Quality
         | 
| 457 | 
            -
                    spt_linear_CFG = default_setting.spt_linear_CFG_Quality
         | 
| 458 | 
            -
                    model_select = "v0-Q"
         | 
| 459 | 
            -
                elif param_setting == "Fidelity":
         | 
| 460 | 
            -
                    s_cfg = default_setting.s_cfg_Fidelity
         | 
| 461 | 
            -
                    spt_linear_CFG = default_setting.spt_linear_CFG_Fidelity
         | 
| 462 | 
            -
                    model_select = "v0-F"
         | 
| 463 | 
            -
                else:
         | 
| 464 | 
            -
                    raise NotImplementedError
         | 
| 465 | 
            -
                gr.Info('The parameters are reset.')
         | 
| 466 | 
            -
                print('<<== load_and_reset')
         | 
| 467 | 
            -
                return edm_steps, s_cfg, s_stage2, s_stage1, s_churn, s_noise, a_prompt, n_prompt, color_fix_type, linear_CFG, \
         | 
| 468 | 
            -
                    spt_linear_CFG, linear_s_stage2, spt_linear_s_stage2, model_select
         | 
| 469 | 
            -
             | 
| 470 | 
            -
            def log_information(result_gallery):
         | 
| 471 | 
            -
                print('log_information')
         | 
| 472 | 
            -
                if result_gallery is not None:
         | 
| 473 | 
            -
                    for i, result in enumerate(result_gallery):
         | 
| 474 | 
            -
                        print(result[0])
         | 
| 475 | 
            -
             | 
| 476 | 
            -
            def on_select_result(result_slider, result_gallery, evt: gr.SelectData):
         | 
| 477 | 
            -
                print('on_select_result')
         | 
| 478 | 
            -
                if result_gallery is not None:
         | 
| 479 | 
            -
                    for i, result in enumerate(result_gallery):
         | 
| 480 | 
            -
                        print(result[0])
         | 
| 481 | 
            -
                return [result_slider[0], result_gallery[evt.index][0]]
         | 
| 482 | 
            -
             | 
| 483 | 
            -
            def on_render_image_example(result_example):
         | 
| 484 | 
            -
                print('on_render_image_example')
         | 
| 485 | 
            -
                return gr.update(value = result_example, visible = True)
         | 
| 486 | 
            -
             | 
| 487 | 
            -
            title_html = """
         | 
| 488 | 
            -
                <h1><center>SUPIR</center></h1>
         | 
| 489 | 
            -
                <big><center>Upscale your images up to x10 freely, without account, without watermark and download it</center></big>
         | 
| 490 | 
            -
                <center><big><big>🤸<big><big><big><big><big><big>🤸</big></big></big></big></big></big></big></big></center>
         | 
| 491 | 
            -
                
         | 
| 492 | 
            -
                <p>This is an online demo of SUPIR, a practicing model scaling for photo-realistic image restoration.
         | 
| 493 | 
            -
                The content added by SUPIR is <b><u>imagination, not real-world information</u></b>.
         | 
| 494 | 
            -
                SUPIR is for beauty and illustration only.
         | 
| 495 | 
            -
                Most of the processes last few minutes.
         | 
| 496 | 
            -
                If you want to upscale AI-generated images, be noticed that <i>PixArt Sigma</i> space can directly generate 5984x5984 images.
         | 
| 497 | 
            -
                Due to Gradio issues, the generated image is slightly less satured than the original.
         | 
| 498 | 
            -
                Please leave a <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR/discussions/new">message in discussion</a> if you encounter issues.
         | 
| 499 | 
            -
                You can also use <a href="https://huggingface.co/spaces/gokaygokay/AuraSR">AuraSR</a> to upscale x4.
         | 
| 500 | 
            -
                
         | 
| 501 | 
            -
                <p><center><a href="https://arxiv.org/abs/2401.13627">Paper</a>   <a href="http://supir.xpixel.group/">Project Page</a>   <a href="https://huggingface.co/blog/MonsterMMORPG/supir-sota-image-upscale-better-than-magnific-ai">Local Install Guide</a></center></p>
         | 
| 502 | 
            -
                <p><center><a style="display:inline-block" href='https://github.com/Fanghua-Yu/SUPIR'><img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/Fanghua-Yu/SUPIR?style=social"></a></center></p>
         | 
| 503 | 
             
                """
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 504 |  | 
|  | |
| 505 |  | 
| 506 | 
            -
             | 
| 507 | 
            -
             | 
| 508 | 
            -
            The images are not stored but the logs are saved during a month.
         | 
| 509 | 
            -
            ## **How to get SUPIR**
         | 
| 510 | 
            -
            You can get SUPIR on HuggingFace by [duplicating this space](https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR?duplicate=true) and set GPU.
         | 
| 511 | 
            -
            You can also install SUPIR on your computer following [this tutorial](https://huggingface.co/blog/MonsterMMORPG/supir-sota-image-upscale-better-than-magnific-ai).
         | 
| 512 | 
            -
            You can install _Pinokio_ on your computer and then install _SUPIR_ into it. It should be quite easy if you have an Nvidia GPU.
         | 
| 513 | 
            -
            ## **Terms of use**
         | 
| 514 | 
            -
            By using this service, users are required to agree to the following terms: The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research. Please submit a feedback to us if you get any inappropriate answer! We will collect those to keep improving our models. For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
         | 
| 515 | 
            -
            ## **License**
         | 
| 516 | 
            -
            The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/Fanghua-Yu/SUPIR) of SUPIR.
         | 
| 517 | 
            -
            """
         | 
| 518 | 
            -
             | 
| 519 | 
            -
            js = """
         | 
| 520 | 
            -
            function createGradioAnimation() {
         | 
| 521 | 
            -
                window.addEventListener("beforeunload", function(e) {
         | 
| 522 | 
            -
                    if (document.getElementById('dummy_button_id') && !document.getElementById('dummy_button_id').disabled) {
         | 
| 523 | 
            -
                        var confirmationMessage = 'A process is still running. '
         | 
| 524 | 
            -
                                                + 'If you leave before saving, your changes will be lost.';
         | 
| 525 |  | 
| 526 | 
            -
             | 
| 527 | 
            -
             | 
| 528 | 
            -
                    return confirmationMessage;
         | 
| 529 | 
            -
                });
         | 
| 530 | 
            -
                return 'Animation created';
         | 
| 531 | 
            -
            }
         | 
| 532 | 
            -
            """
         | 
| 533 | 
            -
             | 
| 534 | 
            -
            # Gradio interface
         | 
| 535 | 
            -
            with gr.Blocks(js=js) as interface:
         | 
| 536 | 
            -
                if torch.cuda.device_count() == 0:
         | 
| 537 | 
            -
                    with gr.Row():
         | 
| 538 | 
            -
                        gr.HTML("""
         | 
| 539 | 
            -
                <p style="background-color: red;"><big><big><big><b>⚠️To use SUPIR, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
         | 
| 540 |  | 
| 541 | 
            -
                 | 
| 542 | 
            -
             | 
| 543 | 
            -
                 | 
| 544 | 
            -
                 | 
| 545 | 
            -
             | 
| 546 | 
            -
             | 
| 547 | 
            -
                 | 
| 548 | 
            -
             | 
| 549 | 
            -
             | 
| 550 | 
            -
                     | 
| 551 | 
            -
                     | 
| 552 | 
            -
                     | 
| 553 | 
            -
                     | 
| 554 | 
            -
             | 
| 555 | 
            -
             | 
| 556 | 
            -
                     | 
| 557 | 
            -
                     | 
| 558 | 
            -
                     | 
| 559 | 
            -
                     | 
| 560 | 
            -
             | 
| 561 | 
            -
                 | 
| 562 | 
            -
             | 
| 563 | 
            -
             | 
| 564 | 
            -
             | 
| 565 | 
            -
             | 
| 566 | 
            -
             | 
| 567 | 
            -
             | 
| 568 | 
            -
             | 
| 569 | 
            -
             | 
| 570 | 
            -
             | 
| 571 | 
            -
             | 
| 572 | 
            -
             | 
| 573 | 
            -
             | 
| 574 | 
            -
             | 
| 575 | 
            -
             | 
| 576 | 
            -
             | 
| 577 | 
            -
             | 
| 578 | 
            -
             | 
| 579 | 
            -
             | 
| 580 | 
            -
             | 
| 581 | 
            -
                    with gr. | 
| 582 | 
            -
                        with gr. | 
| 583 | 
            -
                             | 
| 584 | 
            -
             | 
| 585 | 
            -
             | 
| 586 | 
            -
             | 
| 587 | 
            -
             | 
| 588 | 
            -
             | 
| 589 | 
            -
             | 
| 590 | 
            -
             | 
| 591 | 
            -
             | 
| 592 | 
            -
             | 
| 593 | 
            -
             | 
| 594 | 
            -
                             | 
| 595 | 
            -
             | 
| 596 | 
            -
             | 
| 597 | 
            -
             | 
| 598 | 
            -
             | 
| 599 | 
            -
             | 
| 600 | 
            -
             | 
| 601 | 
            -
             | 
| 602 | 
            -
             | 
| 603 | 
            -
             | 
| 604 | 
            -
                             | 
| 605 | 
            -
             | 
| 606 | 
            -
             | 
| 607 | 
            -
             | 
| 608 | 
            -
             | 
| 609 | 
            -
             | 
| 610 | 
            -
             | 
| 611 | 
            -
             | 
| 612 | 
            -
             | 
| 613 | 
            -
                     | 
| 614 | 
            -
             | 
| 615 | 
            -
             | 
| 616 | 
            -
             | 
| 617 | 
            -
             | 
| 618 | 
            -
             | 
| 619 | 
            -
                     | 
| 620 | 
            -
             | 
| 621 | 
            -
                    warning = gr.HTML(elem_id="warning", value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
         | 
| 622 | 
            -
                    restore_information = gr.HTML(value = "Restart the process to get another result.", visible = False)
         | 
| 623 | 
            -
                    result_slider = ImageSlider(label = 'Comparator', show_label = False, interactive = False, elem_id = "slider1", show_download_button = False, visible = False)
         | 
| 624 | 
            -
                    result_gallery = gr.Gallery(label = 'Downloadable results', show_label = True, interactive = False, elem_id = "gallery1")
         | 
| 625 | 
            -
                    result_example = gr.HTML(elem_id="result_example", visible = False)
         | 
| 626 | 
            -
                    result_image_example = gr.Image(label="Example Image", visible = False)
         | 
| 627 | 
            -
             | 
| 628 | 
            -
                with gr.Row(elem_id="examples", visible = False):
         | 
| 629 | 
            -
                    gr.Examples(
         | 
| 630 | 
            -
                        label = "Examples for cache",
         | 
| 631 | 
            -
                        examples = [
         | 
| 632 | 
            -
                                [
         | 
| 633 | 
            -
                                    "./Examples/Example2.jpeg",
         | 
| 634 | 
            -
                                    0,
         | 
| 635 | 
            -
                                    "./Examples/Example2.jpeg",
         | 
| 636 | 
            -
                                    "La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
         | 
| 637 | 
            -
                                    "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 638 | 
            -
                                    "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 639 | 
            -
                                    1,         # num_samples
         | 
| 640 | 
            -
                                    32,        # min_size
         | 
| 641 | 
            -
                                    1,         # downscale
         | 
| 642 | 
            -
                                    1,         # upscale
         | 
| 643 | 
            -
                                    100,       # edm_steps
         | 
| 644 | 
            -
                                    -1,        # s_stage1
         | 
| 645 | 
            -
                                    1,         # s_stage2
         | 
| 646 | 
            -
                                    7.5,       # s_cfg
         | 
| 647 | 
            -
                                    True,     # randomize_seed
         | 
| 648 | 
            -
                                    42,        # seed
         | 
| 649 | 
            -
                                    5,         # s_churn
         | 
| 650 | 
            -
                                    1.003,     # s_noise
         | 
| 651 | 
            -
                                    "Wavelet", # color_fix_type
         | 
| 652 | 
            -
                                    "fp16",    # diff_dtype
         | 
| 653 | 
            -
                                    "bf16",    # ae_dtype
         | 
| 654 | 
            -
                                    1.0,       # gamma_correction
         | 
| 655 | 
            -
                                    True,      # linear_CFG
         | 
| 656 | 
            -
                                    4,         # spt_linear_CFG
         | 
| 657 | 
            -
                                    False,     # linear_s_stage2
         | 
| 658 | 
            -
                                    0.,        # spt_linear_s_stage2
         | 
| 659 | 
            -
                                    "v0-Q",    # model_select
         | 
| 660 | 
            -
                                    "input",   # output_format
         | 
| 661 | 
            -
                                    60         # allocation
         | 
| 662 | 
            -
                                ],
         | 
| 663 | 
            -
                                [
         | 
| 664 | 
            -
                                    "./Examples/Example2.jpeg",
         | 
| 665 | 
            -
                                    0,
         | 
| 666 | 
            -
                                    "./Examples/Example2.jpeg",
         | 
| 667 | 
            -
                                    "La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
         | 
| 668 | 
            -
                                    "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 669 | 
            -
                                    "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 670 | 
            -
                                    4,         # num_samples
         | 
| 671 | 
            -
                                    32,        # min_size
         | 
| 672 | 
            -
                                    1,         # downscale
         | 
| 673 | 
            -
                                    1,         # upscale
         | 
| 674 | 
            -
                                    100,       # edm_steps
         | 
| 675 | 
            -
                                    -1,        # s_stage1
         | 
| 676 | 
            -
                                    1,         # s_stage2
         | 
| 677 | 
            -
                                    7.5,       # s_cfg
         | 
| 678 | 
            -
                                    True,     # randomize_seed
         | 
| 679 | 
            -
                                    42,        # seed
         | 
| 680 | 
            -
                                    5,         # s_churn
         | 
| 681 | 
            -
                                    1.003,     # s_noise
         | 
| 682 | 
            -
                                    "Wavelet", # color_fix_type
         | 
| 683 | 
            -
                                    "fp16",    # diff_dtype
         | 
| 684 | 
            -
                                    "bf16",    # ae_dtype
         | 
| 685 | 
            -
                                    1.0,       # gamma_correction
         | 
| 686 | 
            -
                                    True,      # linear_CFG
         | 
| 687 | 
            -
                                    4,         # spt_linear_CFG
         | 
| 688 | 
            -
                                    False,     # linear_s_stage2
         | 
| 689 | 
            -
                                    0.,        # spt_linear_s_stage2
         | 
| 690 | 
            -
                                    "v0-Q",    # model_select
         | 
| 691 | 
            -
                                    "input",   # output_format
         | 
| 692 | 
            -
                                    60         # allocation
         | 
| 693 | 
            -
                                ]
         | 
| 694 | 
            -
                            ],
         | 
| 695 | 
            -
                        run_on_click = True,
         | 
| 696 | 
            -
                        fn = stage2_process_example,
         | 
| 697 | 
            -
                	    inputs = [
         | 
| 698 | 
            -
                            input_image,
         | 
| 699 | 
            -
                            rotation,
         | 
| 700 | 
            -
                            denoise_image,
         | 
| 701 | 
            -
                            prompt,
         | 
| 702 | 
            -
                            a_prompt,
         | 
| 703 | 
            -
                            n_prompt,
         | 
| 704 | 
            -
                            num_samples,
         | 
| 705 | 
            -
                            min_size,
         | 
| 706 | 
            -
                            downscale,
         | 
| 707 | 
            -
                            upscale,
         | 
| 708 | 
            -
                            edm_steps,
         | 
| 709 | 
            -
                            s_stage1,
         | 
| 710 | 
            -
                            s_stage2,
         | 
| 711 | 
            -
                            s_cfg,
         | 
| 712 | 
            -
                            randomize_seed,
         | 
| 713 | 
            -
                            seed,
         | 
| 714 | 
            -
                            s_churn,
         | 
| 715 | 
            -
                            s_noise,
         | 
| 716 | 
            -
                            color_fix_type,
         | 
| 717 | 
            -
                            diff_dtype,
         | 
| 718 | 
            -
                            ae_dtype,
         | 
| 719 | 
            -
                            gamma_correction,
         | 
| 720 | 
            -
                            linear_CFG,
         | 
| 721 | 
            -
                            spt_linear_CFG,
         | 
| 722 | 
            -
                            linear_s_stage2,
         | 
| 723 | 
            -
                            spt_linear_s_stage2,
         | 
| 724 | 
            -
                            model_select,
         | 
| 725 | 
            -
                            output_format,
         | 
| 726 | 
            -
                            allocation
         | 
| 727 | 
            -
                        ],
         | 
| 728 | 
            -
                	    outputs = [
         | 
| 729 | 
            -
                        result_example,
         | 
| 730 | 
            -
                            warning,
         | 
| 731 | 
            -
                            dummy_button,
         | 
| 732 | 
            -
                            prompt_hint
         | 
| 733 | 
            -
                        ],
         | 
| 734 | 
            -
                        cache_examples = True,
         | 
| 735 | 
            -
                    )
         | 
| 736 | 
            -
             | 
| 737 | 
            -
                gr.Examples(
         | 
| 738 | 
            -
                    label = "Examples for demo",
         | 
| 739 | 
            -
                    examples = [
         | 
| 740 | 
            -
                            [
         | 
| 741 | 
            -
                                "./Examples/Example1.png",
         | 
| 742 | 
            -
                                0,
         | 
| 743 | 
            -
                                "./Examples/Example1.png",
         | 
| 744 | 
            -
                                "Group of people, walking, happy, in the street, photorealistic, 8k, extremely detailled",
         | 
| 745 | 
            -
                                "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 746 | 
            -
                                "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 747 | 
            -
                                2,       # num_samples
         | 
| 748 | 
            -
                                1024,    # min_size
         | 
| 749 | 
            -
                                1,       # downscale
         | 
| 750 | 
            -
                                8,       # upscale
         | 
| 751 | 
            -
                                100,     # edm_steps
         | 
| 752 | 
            -
                                -1,      # s_stage1
         | 
| 753 | 
            -
                                1,       # s_stage2
         | 
| 754 | 
            -
                                7.5,     # s_cfg
         | 
| 755 | 
            -
                                False,   # randomize_seed
         | 
| 756 | 
            -
                                42,      # seed
         | 
| 757 | 
            -
                                5,       # s_churn
         | 
| 758 | 
            -
                                1.003,   # s_noise
         | 
| 759 | 
            -
                                "AdaIn", # color_fix_type
         | 
| 760 | 
            -
                                "fp16",  # diff_dtype
         | 
| 761 | 
            -
                                "bf16",  # ae_dtype
         | 
| 762 | 
            -
                                1.0,     # gamma_correction
         | 
| 763 | 
            -
                                True,    # linear_CFG
         | 
| 764 | 
            -
                                4,       # spt_linear_CFG
         | 
| 765 | 
            -
                                False,   # linear_s_stage2
         | 
| 766 | 
            -
                                0.,      # spt_linear_s_stage2
         | 
| 767 | 
            -
                                "v0-Q",  # model_select
         | 
| 768 | 
            -
                                "input", # output_format
         | 
| 769 | 
            -
                                180      # allocation
         | 
| 770 | 
            -
                            ],
         | 
| 771 | 
            -
                            [
         | 
| 772 | 
            -
                                "./Examples/Example2.jpeg",
         | 
| 773 | 
            -
                                0,
         | 
| 774 | 
            -
                                "./Examples/Example2.jpeg",
         | 
| 775 | 
            -
                                "La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
         | 
| 776 | 
            -
                                "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 777 | 
            -
                                "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 778 | 
            -
                                1,         # num_samples
         | 
| 779 | 
            -
                                1024,      # min_size
         | 
| 780 | 
            -
                                1,         # downscale
         | 
| 781 | 
            -
                                1,         # upscale
         | 
| 782 | 
            -
                                100,       # edm_steps
         | 
| 783 | 
            -
                                -1,        # s_stage1
         | 
| 784 | 
            -
                                1,         # s_stage2
         | 
| 785 | 
            -
                                7.5,       # s_cfg
         | 
| 786 | 
            -
                                False,     # randomize_seed
         | 
| 787 | 
            -
                                42,        # seed
         | 
| 788 | 
            -
                                5,         # s_churn
         | 
| 789 | 
            -
                                1.003,     # s_noise
         | 
| 790 | 
            -
                                "Wavelet", # color_fix_type
         | 
| 791 | 
            -
                                "fp16",    # diff_dtype
         | 
| 792 | 
            -
                                "bf16",    # ae_dtype
         | 
| 793 | 
            -
                                1.0,       # gamma_correction
         | 
| 794 | 
            -
                                True,      # linear_CFG
         | 
| 795 | 
            -
                                4,         # spt_linear_CFG
         | 
| 796 | 
            -
                                False,     # linear_s_stage2
         | 
| 797 | 
            -
                                0.,        # spt_linear_s_stage2
         | 
| 798 | 
            -
                                "v0-Q",    # model_select
         | 
| 799 | 
            -
                                "input",   # output_format
         | 
| 800 | 
            -
                                60         # allocation
         | 
| 801 | 
            -
                            ],
         | 
| 802 | 
            -
                            [
         | 
| 803 | 
            -
                                "./Examples/Example3.webp",
         | 
| 804 | 
            -
                                0,
         | 
| 805 | 
            -
                                "./Examples/Example3.webp",
         | 
| 806 | 
            -
                                "A red apple",
         | 
| 807 | 
            -
                                "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 808 | 
            -
                                "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 809 | 
            -
                                1,         # num_samples
         | 
| 810 | 
            -
                                1024,      # min_size
         | 
| 811 | 
            -
                                1,         # downscale
         | 
| 812 | 
            -
                                1,         # upscale
         | 
| 813 | 
            -
                                200,       # edm_steps
         | 
| 814 | 
            -
                                -1,        # s_stage1
         | 
| 815 | 
            -
                                1,         # s_stage2
         | 
| 816 | 
            -
                                7.5,       # s_cfg
         | 
| 817 | 
            -
                                False,     # randomize_seed
         | 
| 818 | 
            -
                                42,        # seed
         | 
| 819 | 
            -
                                5,         # s_churn
         | 
| 820 | 
            -
                                1.003,     # s_noise
         | 
| 821 | 
            -
                                "Wavelet", # color_fix_type
         | 
| 822 | 
            -
                                "fp16",    # diff_dtype
         | 
| 823 | 
            -
                                "bf16",    # ae_dtype
         | 
| 824 | 
            -
                                1.0,       # gamma_correction
         | 
| 825 | 
            -
                                True,      # linear_CFG
         | 
| 826 | 
            -
                                4,         # spt_linear_CFG
         | 
| 827 | 
            -
                                False,     # linear_s_stage2
         | 
| 828 | 
            -
                                0.,        # spt_linear_s_stage2
         | 
| 829 | 
            -
                                "v0-Q",    # model_select
         | 
| 830 | 
            -
                                "input",   # output_format
         | 
| 831 | 
            -
                                180        # allocation
         | 
| 832 | 
            -
                            ],
         | 
| 833 | 
            -
                            [
         | 
| 834 | 
            -
                                "./Examples/Example3.webp",
         | 
| 835 | 
            -
                                0,
         | 
| 836 | 
            -
                                "./Examples/Example3.webp",
         | 
| 837 | 
            -
                                "A red marble",
         | 
| 838 | 
            -
                                "Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
         | 
| 839 | 
            -
                                "painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
         | 
| 840 | 
            -
                                1,         # num_samples
         | 
| 841 | 
            -
                                1024,      # min_size
         | 
| 842 | 
            -
                                1,         # downscale
         | 
| 843 | 
            -
                                1,         # upscale
         | 
| 844 | 
            -
                                200,       # edm_steps
         | 
| 845 | 
            -
                                -1,        # s_stage1
         | 
| 846 | 
            -
                                1,         # s_stage2
         | 
| 847 | 
            -
                                7.5,       # s_cfg
         | 
| 848 | 
            -
                                False,     # randomize_seed
         | 
| 849 | 
            -
                                42,        # seed
         | 
| 850 | 
            -
                                5,         # s_churn
         | 
| 851 | 
            -
                                1.003,     # s_noise
         | 
| 852 | 
            -
                                "Wavelet", # color_fix_type
         | 
| 853 | 
            -
                                "fp16",    # diff_dtype
         | 
| 854 | 
            -
                                "bf16",    # ae_dtype
         | 
| 855 | 
            -
                                1.0,       # gamma_correction
         | 
| 856 | 
            -
                                True,      # linear_CFG
         | 
| 857 | 
            -
                                4,         # spt_linear_CFG
         | 
| 858 | 
            -
                                False,     # linear_s_stage2
         | 
| 859 | 
            -
                                0.,        # spt_linear_s_stage2
         | 
| 860 | 
            -
                                "v0-Q",    # model_select
         | 
| 861 | 
            -
                                "input",   # output_format
         | 
| 862 | 
            -
                                180        # allocation
         | 
| 863 | 
            -
                            ],
         | 
| 864 | 
            -
                        ],
         | 
| 865 | 
            -
                    run_on_click = True,
         | 
| 866 | 
            -
                    fn = stage2_process,
         | 
| 867 | 
            -
            	    inputs = [
         | 
| 868 | 
            -
                        input_image,
         | 
| 869 | 
            -
                        rotation,
         | 
| 870 | 
            -
                        denoise_image,
         | 
| 871 | 
            -
                        prompt,
         | 
| 872 | 
            -
                        a_prompt,
         | 
| 873 | 
            -
                        n_prompt,
         | 
| 874 | 
            -
                        num_samples,
         | 
| 875 | 
            -
                        min_size,
         | 
| 876 | 
            -
                        downscale,
         | 
| 877 | 
            -
                        upscale,
         | 
| 878 | 
            -
                        edm_steps,
         | 
| 879 | 
            -
                        s_stage1,
         | 
| 880 | 
            -
                        s_stage2,
         | 
| 881 | 
            -
                        s_cfg,
         | 
| 882 | 
            -
                        randomize_seed,
         | 
| 883 | 
            -
                        seed,
         | 
| 884 | 
            -
                        s_churn,
         | 
| 885 | 
            -
                        s_noise,
         | 
| 886 | 
            -
                        color_fix_type,
         | 
| 887 | 
            -
                        diff_dtype,
         | 
| 888 | 
            -
                        ae_dtype,
         | 
| 889 | 
            -
                        gamma_correction,
         | 
| 890 | 
            -
                        linear_CFG,
         | 
| 891 | 
            -
                        spt_linear_CFG,
         | 
| 892 | 
            -
                        linear_s_stage2,
         | 
| 893 | 
            -
                        spt_linear_s_stage2,
         | 
| 894 | 
            -
                        model_select,
         | 
| 895 | 
            -
                        output_format,
         | 
| 896 | 
            -
                        allocation
         | 
| 897 | 
            -
                    ],
         | 
| 898 | 
            -
            	    outputs = [
         | 
| 899 | 
            -
                        result_slider,
         | 
| 900 | 
            -
                        result_gallery,
         | 
| 901 | 
            -
                        restore_information,
         | 
| 902 | 
            -
                        reset_btn,
         | 
| 903 | 
            -
                        warning,
         | 
| 904 | 
            -
                        dummy_button
         | 
| 905 | 
            -
                    ],
         | 
| 906 | 
            -
                    cache_examples = False,
         | 
| 907 | 
            -
                )
         | 
| 908 | 
            -
             | 
| 909 | 
            -
                with gr.Row():
         | 
| 910 | 
            -
                    gr.Markdown(claim_md)
         | 
| 911 | 
            -
                
         | 
| 912 | 
            -
                input_image.upload(fn = check_upload, inputs = [
         | 
| 913 | 
            -
                    input_image
         | 
| 914 | 
            -
                ], outputs = [
         | 
| 915 | 
            -
                    rotation
         | 
| 916 | 
            -
                ], queue = False, show_progress = False)
         | 
| 917 | 
            -
             | 
| 918 | 
            -
                denoise_button.click(fn = check_and_update, inputs = [
         | 
| 919 | 
            -
                    input_image
         | 
| 920 | 
            -
                ], outputs = [warning, dummy_button], queue = False, show_progress = False).success(fn = stage1_process, inputs = [
         | 
| 921 | 
            -
                    input_image,
         | 
| 922 | 
            -
                    gamma_correction,
         | 
| 923 | 
            -
                    diff_dtype,
         | 
| 924 | 
            -
                    ae_dtype
         | 
| 925 | 
            -
                ], outputs=[
         | 
| 926 | 
            -
                    denoise_image,
         | 
| 927 | 
            -
                    denoise_information,
         | 
| 928 | 
            -
                    dummy_button
         | 
| 929 | 
            -
                ])
         | 
| 930 | 
            -
             | 
| 931 | 
            -
                diffusion_button.click(fn = update_seed, inputs = [
         | 
| 932 | 
            -
                    randomize_seed,
         | 
| 933 | 
            -
                    seed
         | 
| 934 | 
            -
                ], outputs = [
         | 
| 935 | 
            -
                    seed
         | 
| 936 | 
            -
                ], queue = False, show_progress = False).then(fn = check_and_update, inputs = [
         | 
| 937 | 
            -
                    input_image
         | 
| 938 | 
            -
                ], outputs = [warning, dummy_button], queue = False, show_progress = False).success(fn=stage2_process, inputs = [
         | 
| 939 | 
            -
                    input_image,
         | 
| 940 | 
            -
                    rotation,
         | 
| 941 | 
            -
                    denoise_image,
         | 
| 942 | 
             
                    prompt,
         | 
| 943 | 
            -
                     | 
| 944 | 
            -
                     | 
| 945 | 
            -
                     | 
| 946 | 
            -
                     | 
| 947 | 
            -
                     | 
| 948 | 
            -
                     | 
| 949 | 
            -
                     | 
| 950 | 
            -
             | 
| 951 | 
            -
             | 
| 952 | 
            -
                    s_cfg,
         | 
| 953 | 
            -
                    randomize_seed,
         | 
| 954 | 
            -
                    seed,
         | 
| 955 | 
            -
                    s_churn,
         | 
| 956 | 
            -
                    s_noise,
         | 
| 957 | 
            -
                    color_fix_type,
         | 
| 958 | 
            -
                    diff_dtype,
         | 
| 959 | 
            -
                    ae_dtype,
         | 
| 960 | 
            -
                    gamma_correction,
         | 
| 961 | 
            -
                    linear_CFG,
         | 
| 962 | 
            -
                    spt_linear_CFG,
         | 
| 963 | 
            -
                    linear_s_stage2,
         | 
| 964 | 
            -
                    spt_linear_s_stage2,
         | 
| 965 | 
            -
                    model_select,
         | 
| 966 | 
            -
                    output_format,
         | 
| 967 | 
            -
                    allocation
         | 
| 968 | 
            -
                ], outputs = [
         | 
| 969 | 
            -
                    result_slider,
         | 
| 970 | 
            -
                    result_gallery,
         | 
| 971 | 
            -
                    restore_information,
         | 
| 972 | 
            -
                    reset_btn,
         | 
| 973 | 
            -
                    warning,
         | 
| 974 | 
            -
                    dummy_button
         | 
| 975 | 
            -
                ]).success(fn = log_information, inputs = [
         | 
| 976 | 
            -
                    result_gallery
         | 
| 977 | 
            -
                ], outputs = [], queue = False, show_progress = False)
         | 
| 978 | 
            -
             | 
| 979 | 
            -
                result_gallery.change(on_select_result, [result_slider, result_gallery], result_slider)
         | 
| 980 | 
            -
                result_gallery.select(on_select_result, [result_slider, result_gallery], result_slider)
         | 
| 981 | 
            -
                result_example.change(on_render_image_example, result_example, result_image_example)
         | 
| 982 |  | 
| 983 | 
            -
                 | 
| 984 | 
            -
                     | 
| 985 | 
            -
             | 
| 986 | 
            -
                     | 
| 987 | 
            -
             | 
| 988 | 
            -
                    s_stage2,
         | 
| 989 | 
            -
                    s_stage1,
         | 
| 990 | 
            -
                    s_churn,
         | 
| 991 | 
            -
                    s_noise,
         | 
| 992 | 
            -
                    a_prompt,
         | 
| 993 | 
            -
                    n_prompt,
         | 
| 994 | 
            -
                    color_fix_type,
         | 
| 995 | 
            -
                    linear_CFG,
         | 
| 996 | 
            -
                    spt_linear_CFG,
         | 
| 997 | 
            -
                    linear_s_stage2,
         | 
| 998 | 
            -
                    spt_linear_s_stage2,
         | 
| 999 | 
            -
                    model_select
         | 
| 1000 | 
            -
                ])
         | 
| 1001 |  | 
| 1002 | 
            -
                 | 
| 1003 | 
            -
             | 
| 1004 | 
            -
             | 
| 1005 | 
            -
             | 
| 1006 | 
            -
             | 
| 1007 | 
            -
             | 
| 1008 | 
            -
             | 
| 1009 | 
            -
             | 
| 1010 | 
            -
             | 
| 1011 | 
            -
             | 
| 1012 | 
            -
             | 
| 1013 | 
            -
             | 
| 1014 | 
            -
             | 
| 1015 | 
            -
                        s_stage2,
         | 
| 1016 | 
            -
                        s_cfg,
         | 
| 1017 | 
            -
                        randomize_seed,
         | 
| 1018 | 
            -
                        seed,
         | 
| 1019 | 
            -
                        s_churn,
         | 
| 1020 | 
            -
                        s_noise,
         | 
| 1021 | 
            -
                        color_fix_type,
         | 
| 1022 | 
            -
                        diff_dtype,
         | 
| 1023 | 
            -
                        ae_dtype,
         | 
| 1024 | 
            -
                        gamma_correction,
         | 
| 1025 | 
            -
                        linear_CFG,
         | 
| 1026 | 
            -
                        spt_linear_CFG,
         | 
| 1027 | 
            -
                        linear_s_stage2,
         | 
| 1028 | 
            -
                        spt_linear_s_stage2,
         | 
| 1029 | 
            -
                        model_select,
         | 
| 1030 | 
            -
                        output_format,
         | 
| 1031 | 
            -
                        allocation
         | 
| 1032 | 
            -
                    ], queue = False, show_progress = False)
         | 
| 1033 |  | 
| 1034 | 
            -
                def handle_field_debug_change(input_image_debug_data, prompt_debug_data,  | 
|  | |
| 1035 | 
             
                    input_image_debug_value[0] = input_image_debug_data
         | 
|  | |
|  | |
| 1036 | 
             
                    prompt_debug_value[0] = prompt_debug_data
         | 
| 1037 | 
            -
                     | 
| 1038 | 
             
                    return []
         | 
| 1039 |  | 
| 1040 | 
             
                input_image_debug.upload(
         | 
| 1041 | 
             
                    fn=handle_field_debug_change,
         | 
| 1042 | 
            -
                    inputs=[input_image_debug, prompt_debug,  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1043 | 
             
                    outputs=[]
         | 
| 1044 | 
             
                )
         | 
| 1045 |  | 
| 1046 | 
             
                prompt_debug.change(
         | 
| 1047 | 
             
                    fn=handle_field_debug_change,
         | 
| 1048 | 
            -
                    inputs=[input_image_debug, prompt_debug,  | 
| 1049 | 
             
                    outputs=[]
         | 
| 1050 | 
             
                )
         | 
| 1051 |  | 
| 1052 | 
            -
                 | 
| 1053 | 
             
                    fn=handle_field_debug_change,
         | 
| 1054 | 
            -
                    inputs=[input_image_debug, prompt_debug,  | 
| 1055 | 
             
                    outputs=[]
         | 
| 1056 | 
             
                )
         | 
| 1057 | 
            -
             | 
| 1058 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
            +
            # PyTorch 2.8 (temporary hack)
         | 
| 3 | 
            +
            os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # --- 1. Model Download and Setup (Diffusers Backend) ---
         | 
| 6 | 
            +
            import spaces
         | 
| 7 | 
            +
            import torch
         | 
| 8 | 
            +
            from diffusers import FlowMatchEulerDiscreteScheduler
         | 
| 9 | 
            +
            from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
         | 
| 10 | 
            +
            from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
         | 
| 11 | 
            +
            from diffusers.utils.export_utils import export_to_video
         | 
| 12 | 
             
            import gradio as gr
         | 
| 13 | 
            +
            import tempfile
         | 
| 14 | 
             
            import numpy as np
         | 
| 15 | 
            +
            from PIL import Image
         | 
|  | |
|  | |
|  | |
|  | |
| 16 | 
             
            import random
         | 
| 17 | 
            +
            import gc
         | 
| 18 | 
            +
            from gradio_client import Client, handle_file # Import for API call
         | 
| 19 |  | 
| 20 | 
            +
            # Import the optimization function from the separate file
         | 
| 21 | 
            +
            from optimization import optimize_pipeline_
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 22 |  | 
| 23 | 
            +
            # --- Constants and Model Loading ---
         | 
| 24 | 
            +
            MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
         | 
| 25 |  | 
| 26 | 
            +
            # --- NEW: Flexible Dimension Constants ---
         | 
| 27 | 
            +
            MAX_DIMENSION = 832
         | 
| 28 | 
            +
            MIN_DIMENSION = 480
         | 
| 29 | 
            +
            DIMENSION_MULTIPLE = 16
         | 
| 30 | 
            +
            SQUARE_SIZE = 480
         | 
|  | |
|  | |
|  | |
| 31 |  | 
| 32 | 
            +
            MAX_SEED = np.iinfo(np.int32).max
         | 
| 33 |  | 
| 34 | 
            +
            FIXED_FPS = 24
         | 
| 35 | 
            +
            MIN_FRAMES_MODEL = 8
         | 
| 36 | 
            +
            MAX_FRAMES_MODEL = 81
         | 
|  | |
|  | |
| 37 |  | 
| 38 | 
            +
            MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS, 1)
         | 
| 39 | 
            +
            MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS, 1)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 40 |  | 
| 41 | 
             
            input_image_debug_value = [None]
         | 
| 42 | 
            +
            input_video_debug_value = [None]
         | 
| 43 | 
            +
            end_image_debug_value = [None]
         | 
| 44 | 
             
            prompt_debug_value = [None]
         | 
| 45 | 
            +
            total_second_length_debug_value = [None]
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            default_negative_prompt = "Vibrant colors, overexposure, static, blurred details, subtitles, error, style, artwork, painting, image, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, mutilated, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, malformed limbs, fused fingers, still image, cluttered background, three legs, many people in the background, walking backwards, overexposure, jumpcut, crossfader, "
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            print("Loading models into memory. This may take a few minutes...")
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            pipe = WanImageToVideoPipeline.from_pretrained(
         | 
| 52 | 
            +
                MODEL_ID,
         | 
| 53 | 
            +
                transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
         | 
| 54 | 
            +
                    subfolder='transformer',
         | 
| 55 | 
            +
                    torch_dtype=torch.bfloat16,
         | 
| 56 | 
            +
                    device_map='cuda',
         | 
| 57 | 
            +
                ),
         | 
| 58 | 
            +
                transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
         | 
| 59 | 
            +
                    subfolder='transformer_2',
         | 
| 60 | 
            +
                    torch_dtype=torch.bfloat16,
         | 
| 61 | 
            +
                    device_map='cuda',
         | 
| 62 | 
            +
                ),
         | 
| 63 | 
            +
                torch_dtype=torch.bfloat16,
         | 
| 64 | 
            +
            )
         | 
| 65 | 
            +
            pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=8.0)
         | 
| 66 | 
            +
            pipe.to('cuda')
         | 
| 67 | 
            +
             | 
| 68 | 
            +
             | 
| 69 | 
            +
             | 
| 70 | 
            +
            print("Optimizing pipeline...")
         | 
| 71 | 
            +
            for i in range(3):
         | 
| 72 | 
            +
                gc.collect()
         | 
| 73 | 
            +
                torch.cuda.synchronize()
         | 
| 74 | 
            +
                torch.cuda.empty_cache()
         | 
| 75 |  | 
| 76 | 
            +
            optimize_pipeline_(pipe,
         | 
| 77 | 
            +
                image=Image.new('RGB', (MAX_DIMENSION, MIN_DIMENSION)), 
         | 
| 78 | 
            +
                prompt='prompt',
         | 
| 79 | 
            +
                height=MIN_DIMENSION,
         | 
| 80 | 
            +
                width=MAX_DIMENSION,
         | 
| 81 | 
            +
                num_frames=MAX_FRAMES_MODEL,
         | 
| 82 | 
            +
            )
         | 
| 83 | 
            +
            print("All models loaded and optimized. Gradio app is ready.")
         | 
| 84 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 85 |  | 
| 86 | 
            +
            # --- 2. Image Processing and Application Logic ---
         | 
| 87 | 
            +
            def generate_end_frame(start_img, gen_prompt, progress=gr.Progress(track_tqdm=True)):
         | 
| 88 | 
            +
                """Calls an external Gradio API to generate an image."""
         | 
| 89 | 
            +
                if start_img is None:
         | 
| 90 | 
            +
                    raise gr.Error("Please provide a Start Frame first.")
         | 
| 91 |  | 
| 92 | 
            +
                hf_token = os.getenv("HF_TOKEN")
         | 
| 93 | 
            +
                if not hf_token:
         | 
| 94 | 
            +
                    raise gr.Error("HF_TOKEN not found in environment variables. Please set it in your Space secrets.")
         | 
|  | |
| 95 |  | 
| 96 | 
            +
                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
         | 
| 97 | 
            +
                    start_img.save(tmpfile.name)
         | 
| 98 | 
            +
                    tmp_path = tmpfile.name
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 99 |  | 
| 100 | 
            +
                progress(0.1, desc="Connecting to image generation API...")
         | 
| 101 | 
            +
                client = Client("multimodalart/nano-banana-private")
         | 
| 102 | 
            +
                
         | 
| 103 | 
            +
                progress(0.5, desc=f"Generating with prompt: '{gen_prompt}'...")
         | 
| 104 | 
            +
                try:
         | 
| 105 | 
            +
                    result = client.predict(
         | 
| 106 | 
            +
                        prompt=gen_prompt, 
         | 
| 107 | 
            +
                        images=[
         | 
| 108 | 
            +
                            {"image": handle_file(tmp_path)}
         | 
| 109 | 
            +
                        ],
         | 
| 110 | 
            +
                        manual_token=hf_token,
         | 
| 111 | 
            +
                        api_name="/unified_image_generator"
         | 
| 112 | 
            +
                    )
         | 
| 113 | 
            +
                finally:
         | 
| 114 | 
            +
                    os.remove(tmp_path)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 115 |  | 
| 116 | 
            +
                progress(1.0, desc="Done!")
         | 
| 117 | 
            +
                print(result)
         | 
| 118 | 
            +
                return result
         | 
| 119 |  | 
| 120 | 
            +
            def switch_to_upload_tab():
         | 
| 121 | 
            +
                """Returns a gr.Tabs update to switch to the first tab."""
         | 
| 122 | 
            +
                return gr.Tabs(selected="upload_tab")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 123 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 124 |  | 
| 125 | 
            +
            def process_image_for_video(image: Image.Image) -> Image.Image:
         | 
| 126 | 
            +
                """
         | 
| 127 | 
            +
                Resizes an image based on the following rules for video generation:
         | 
| 128 | 
            +
                1. The longest side will be scaled down to MAX_DIMENSION if it's larger.
         | 
| 129 | 
            +
                2. The shortest side will be scaled up to MIN_DIMENSION if it's smaller.
         | 
| 130 | 
            +
                3. The final dimensions will be rounded to the nearest multiple of DIMENSION_MULTIPLE.
         | 
| 131 | 
            +
                4. Square images are resized to a fixed SQUARE_SIZE.
         | 
| 132 | 
            +
                The aspect ratio is preserved as closely as possible.
         | 
| 133 | 
            +
                """
         | 
| 134 | 
            +
                width, height = image.size
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                # Rule 4: Handle square images
         | 
| 137 | 
            +
                if width == height:
         | 
| 138 | 
            +
                    return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS)
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                # Determine target dimensions while preserving aspect ratio
         | 
| 141 | 
            +
                aspect_ratio = width / height
         | 
| 142 | 
            +
                new_width, new_height = width, height
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                # Rule 1: Scale down if too large
         | 
| 145 | 
            +
                if new_width > MAX_DIMENSION or new_height > MAX_DIMENSION:
         | 
| 146 | 
            +
                    if aspect_ratio > 1:  # Landscape
         | 
| 147 | 
            +
                        scale = MAX_DIMENSION / new_width
         | 
| 148 | 
            +
                    else:  # Portrait
         | 
| 149 | 
            +
                        scale = MAX_DIMENSION / new_height
         | 
| 150 | 
            +
                    new_width *= scale
         | 
| 151 | 
            +
                    new_height *= scale
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                # Rule 2: Scale up if too small
         | 
| 154 | 
            +
                if new_width < MIN_DIMENSION or new_height < MIN_DIMENSION:
         | 
| 155 | 
            +
                    if aspect_ratio > 1:  # Landscape
         | 
| 156 | 
            +
                        scale = MIN_DIMENSION / new_height
         | 
| 157 | 
            +
                    else:  # Portrait
         | 
| 158 | 
            +
                        scale = MIN_DIMENSION / new_width
         | 
| 159 | 
            +
                    new_width *= scale
         | 
| 160 | 
            +
                    new_height *= scale
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                # Rule 3: Round to the nearest multiple of DIMENSION_MULTIPLE
         | 
| 163 | 
            +
                final_width = int(round(new_width / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
         | 
| 164 | 
            +
                final_height = int(round(new_height / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
         | 
| 165 | 
            +
                
         | 
| 166 | 
            +
                # Ensure final dimensions are at least the minimum
         | 
| 167 | 
            +
                final_width = max(final_width, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
         | 
| 168 | 
            +
                final_height = max(final_height, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
         | 
| 169 | 
            +
             | 
| 170 | 
            +
             | 
| 171 | 
            +
                return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            def resize_and_crop_to_match(target_image, reference_image):
         | 
| 174 | 
            +
                """Resizes and center-crops the target image to match the reference image's dimensions."""
         | 
| 175 | 
            +
                ref_width, ref_height = reference_image.size
         | 
| 176 | 
            +
                target_width, target_height = target_image.size
         | 
| 177 | 
            +
                scale = max(ref_width / target_width, ref_height / target_height)
         | 
| 178 | 
            +
                new_width, new_height = int(target_width * scale), int(target_height * scale)
         | 
| 179 | 
            +
                resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
         | 
| 180 | 
            +
                left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
         | 
| 181 | 
            +
                return resized.crop((left, top, left + ref_width, top + ref_height))
         | 
| 182 | 
            +
             | 
| 183 | 
            +
            def generate_video(
         | 
| 184 | 
            +
                start_image_pil,
         | 
| 185 | 
            +
                end_image_pil,
         | 
| 186 | 
             
                prompt,
         | 
| 187 | 
            +
                negative_prompt=default_negative_prompt,
         | 
| 188 | 
            +
                duration_seconds=2.1,
         | 
| 189 | 
            +
                steps=8,
         | 
| 190 | 
            +
                guidance_scale=1,
         | 
| 191 | 
            +
                guidance_scale_2=1,
         | 
| 192 | 
            +
                seed=42,
         | 
| 193 | 
            +
                randomize_seed=True,
         | 
| 194 | 
            +
                progress=gr.Progress(track_tqdm=True)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 195 | 
             
            ):
         | 
| 196 | 
            +
                allocation_time = 120
         | 
| 197 | 
            +
                
         | 
| 198 | 
            +
                if input_image_debug_value[0] is not None or end_image_debug_value[0] is not None or prompt_debug_value[0] is not None or total_second_length_debug_value[0] is not None:
         | 
| 199 | 
            +
                    start_image_pil = input_image_debug_value[0]
         | 
| 200 | 
            +
                    end_image_pil = end_image_debug_value[0]
         | 
| 201 | 
            +
                    prompt = prompt_debug_value[0]
         | 
| 202 | 
            +
                    duration_seconds = total_second_length_debug_value[0]
         | 
| 203 | 
            +
                    allocation_time = min(duration_seconds * 60 * 100, 120)
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                return generate_video_on_gpu(
         | 
| 206 | 
            +
                start_image_pil,
         | 
| 207 | 
            +
                end_image_pil,
         | 
| 208 | 
            +
                prompt,
         | 
| 209 | 
            +
                negative_prompt,
         | 
| 210 | 
            +
                duration_seconds,
         | 
| 211 | 
            +
                steps,
         | 
| 212 | 
            +
                guidance_scale,
         | 
| 213 | 
            +
                guidance_scale_2,
         | 
| 214 | 
            +
                seed,
         | 
| 215 | 
            +
                randomize_seed,
         | 
| 216 | 
            +
                progress,
         | 
| 217 | 
            +
                allocation_time
         | 
| 218 | 
            +
            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 219 |  | 
| 220 | 
             
            def get_duration(
         | 
| 221 | 
            +
                start_image_pil,
         | 
| 222 | 
            +
                end_image_pil,
         | 
| 223 | 
             
                prompt,
         | 
| 224 | 
            +
                negative_prompt,
         | 
| 225 | 
            +
                duration_seconds,
         | 
| 226 | 
            +
                steps,
         | 
| 227 | 
            +
                guidance_scale,
         | 
| 228 | 
            +
                guidance_scale_2,
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 229 | 
             
                seed,
         | 
| 230 | 
            +
                randomize_seed,
         | 
| 231 | 
            +
                progress,
         | 
| 232 | 
            +
                allocation_time
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 233 | 
             
            ):
         | 
| 234 | 
            +
                return allocation_time
         | 
| 235 |  | 
| 236 | 
             
            @spaces.GPU(duration=get_duration)
         | 
| 237 | 
            +
            def generate_video_on_gpu(
         | 
| 238 | 
            +
                start_image_pil,
         | 
| 239 | 
            +
                end_image_pil,
         | 
| 240 | 
             
                prompt,
         | 
| 241 | 
            +
                negative_prompt,
         | 
| 242 | 
            +
                duration_seconds,
         | 
| 243 | 
            +
                steps,
         | 
| 244 | 
            +
                guidance_scale,
         | 
| 245 | 
            +
                guidance_scale_2,
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 246 | 
             
                seed,
         | 
| 247 | 
            +
                randomize_seed,
         | 
| 248 | 
            +
                progress,
         | 
| 249 | 
            +
                allocation_time
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 250 | 
             
            ):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 251 | 
             
                """
         | 
| 252 | 
            +
                Generates a video by interpolating between a start and end image, guided by a text prompt,
         | 
| 253 | 
            +
                using the diffusers Wan2.2 pipeline.
         | 
| 254 | 
            +
                """
         | 
| 255 | 
            +
                if start_image_pil is None or end_image_pil is None:
         | 
| 256 | 
            +
                    raise gr.Error("Please upload both a start and an end image.")
         | 
| 257 |  | 
| 258 | 
            +
                progress(0.1, desc="Preprocessing images...")
         | 
| 259 |  | 
| 260 | 
            +
                # Step 1: Process the start image to get our target dimensions based on the new rules.
         | 
| 261 | 
            +
                processed_start_image = process_image_for_video(start_image_pil)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 262 |  | 
| 263 | 
            +
                # Step 2: Make the end image match the *exact* dimensions of the processed start image.
         | 
| 264 | 
            +
                processed_end_image = resize_and_crop_to_match(end_image_pil, processed_start_image)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 265 |  | 
| 266 | 
            +
                target_height, target_width = processed_start_image.height, processed_start_image.width
         | 
| 267 | 
            +
             | 
| 268 | 
            +
                # Handle seed and frame count
         | 
| 269 | 
            +
                current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
         | 
| 270 | 
            +
                num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                progress(0.2, desc=f"Generating {num_frames} frames at {target_width}x{target_height} (seed: {current_seed})...")
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                output_frames_list = pipe(
         | 
| 275 | 
            +
                    image=processed_start_image,
         | 
| 276 | 
            +
                    last_image=processed_end_image,
         | 
| 277 | 
            +
                    prompt=prompt,
         | 
| 278 | 
            +
                    negative_prompt=negative_prompt,
         | 
| 279 | 
            +
                    height=target_height,
         | 
| 280 | 
            +
                    width=target_width,
         | 
| 281 | 
            +
                    num_frames=num_frames,
         | 
| 282 | 
            +
                    guidance_scale=float(guidance_scale),
         | 
| 283 | 
            +
                    guidance_scale_2=float(guidance_scale_2),
         | 
| 284 | 
            +
                    num_inference_steps=int(steps),
         | 
| 285 | 
            +
                    generator=torch.Generator(device="cuda").manual_seed(current_seed),
         | 
| 286 | 
            +
                ).frames[0]
         | 
| 287 | 
            +
             | 
| 288 | 
            +
                progress(0.9, desc="Encoding and saving video...")
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         | 
| 291 | 
            +
                    video_path = tmpfile.name
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                progress(1.0, desc="Done!")
         | 
| 296 | 
            +
                return video_path, gr.update(value = video_path, visible = True), current_seed
         | 
| 297 | 
            +
             | 
| 298 | 
            +
             | 
| 299 | 
            +
            # --- 3. Gradio User Interface ---
         | 
| 300 | 
            +
             | 
| 301 | 
            +
            with gr.Blocks() as app:
         | 
| 302 | 
            +
                gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
         | 
| 303 | 
            +
                gr.Markdown("Based on the [Wan 2.2 First/Last Frame workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/), applied to 🧨 Diffusers + [lightx2v/Wan2.2-Lightning](https://huggingface.co/lightx2v/Wan2.2-Lightning) 8-step LoRA")
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                with gr.Row(elem_id="general_items"):
         | 
| 306 | 
            +
                    with gr.Column():
         | 
| 307 | 
            +
                        with gr.Group(elem_id="group_all"):
         | 
| 308 | 
            +
                            with gr.Row():
         | 
| 309 | 
            +
                                start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
         | 
| 310 | 
            +
                                # Capture the Tabs component in a variable and assign IDs to tabs
         | 
| 311 | 
            +
                                with gr.Tabs(elem_id="group_tabs") as tabs:
         | 
| 312 | 
            +
                                    with gr.TabItem("Upload", id="upload_tab"):
         | 
| 313 | 
            +
                                        end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
         | 
| 314 | 
            +
                                    with gr.TabItem("Generate", id="generate_tab"):
         | 
| 315 | 
            +
                                        generate_5seconds = gr.Button("Generate scene 5 seconds in the future", elem_id="fivesec")
         | 
| 316 | 
            +
                                        gr.Markdown("Generate a custom end-frame with an edit model like [Nano Banana](https://huggingface.co/spaces/multimodalart/nano-banana) or [Qwen Image Edit](https://huggingface.co/spaces/multimodalart/Qwen-Image-Edit-Fast)", elem_id="or_item")
         | 
| 317 | 
            +
                            prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                            with gr.Accordion("Advanced Settings", open=False):
         | 
| 320 | 
            +
                                duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=2.1, label="Video Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
         | 
| 321 | 
            +
                                negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
         | 
| 322 | 
            +
                                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
         | 
| 323 | 
            +
                                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - high noise")
         | 
| 324 | 
            +
                                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - low noise")
         | 
| 325 | 
            +
                                with gr.Row():
         | 
| 326 | 
            +
                                    seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
         | 
| 327 | 
            +
                                    randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True)
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                            with gr.Accordion("Debug", elem_id="wan_accordion", open=False):
         | 
| 330 | 
            +
                                input_image_debug = gr.Image(type="pil", label="Image Debug", height=320)
         | 
| 331 | 
            +
                                input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320, visible = False)
         | 
| 332 | 
            +
                                end_image_debug = gr.Image(type="pil", label="End Image Debug", height=320)
         | 
| 333 | 
            +
                                prompt_debug = gr.Textbox(elem_id="wan_prompt_debug", label="Prompt Debug", value='')
         | 
| 334 | 
            +
                                total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (seconds) Debug", minimum=1, maximum=120, value=10, step=0.1)
         | 
| 335 | 
            +
             | 
| 336 | 
            +
                            generate_button = gr.Button("Generate Video", variant="primary")
         | 
| 337 | 
            +
             | 
| 338 | 
            +
                    with gr.Column():
         | 
| 339 | 
            +
                        output_video = gr.Video(label="Generated Video", autoplay = True, loop = True)
         | 
| 340 | 
            +
                        download_button = gr.DownloadButton(label="Download", visible = True)
         | 
| 341 | 
            +
             | 
| 342 | 
            +
                # Main video generation button
         | 
| 343 | 
            +
                ui_inputs = [
         | 
| 344 | 
            +
                    start_image,
         | 
| 345 | 
            +
                    end_image,
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 346 | 
             
                    prompt,
         | 
| 347 | 
            +
                    negative_prompt_input,
         | 
| 348 | 
            +
                    duration_seconds_input,
         | 
| 349 | 
            +
                    steps_slider,
         | 
| 350 | 
            +
                    guidance_scale_input,
         | 
| 351 | 
            +
                    guidance_scale_2_input,
         | 
| 352 | 
            +
                    seed_input,
         | 
| 353 | 
            +
                    randomize_seed_checkbox
         | 
| 354 | 
            +
                ]
         | 
| 355 | 
            +
                ui_outputs = [output_video, download_button, seed_input]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 356 |  | 
| 357 | 
            +
                generate_button.click(
         | 
| 358 | 
            +
                    fn=generate_video,
         | 
| 359 | 
            +
                    inputs=ui_inputs,
         | 
| 360 | 
            +
                    outputs=ui_outputs
         | 
| 361 | 
            +
                )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 362 |  | 
| 363 | 
            +
                generate_5seconds.click(
         | 
| 364 | 
            +
                    fn=switch_to_upload_tab,
         | 
| 365 | 
            +
                    inputs=None,
         | 
| 366 | 
            +
                    outputs=[tabs]
         | 
| 367 | 
            +
                ).then( 
         | 
| 368 | 
            +
                    fn=lambda img: generate_end_frame(img, "this image is a still frame from a movie. generate a new frame with what happens on this scene 5 seconds in the future"),
         | 
| 369 | 
            +
                    inputs=[start_image],
         | 
| 370 | 
            +
                    outputs=[end_image]
         | 
| 371 | 
            +
                ).success(
         | 
| 372 | 
            +
                    fn=generate_video,
         | 
| 373 | 
            +
                    inputs=ui_inputs,
         | 
| 374 | 
            +
                    outputs=ui_outputs
         | 
| 375 | 
            +
                )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 376 |  | 
| 377 | 
            +
                def handle_field_debug_change(input_image_debug_data, input_video_debug_data, end_image_debug_data, prompt_debug_data, total_second_length_debug_data):
         | 
| 378 | 
            +
                    print("handle_field_debug_change")
         | 
| 379 | 
             
                    input_image_debug_value[0] = input_image_debug_data
         | 
| 380 | 
            +
                    input_video_debug_value[0] = input_video_debug_data
         | 
| 381 | 
            +
                    end_image_debug_value[0] = end_image_debug_data
         | 
| 382 | 
             
                    prompt_debug_value[0] = prompt_debug_data
         | 
| 383 | 
            +
                    total_second_length_debug_value[0] = total_second_length_debug_data
         | 
| 384 | 
             
                    return []
         | 
| 385 |  | 
| 386 | 
             
                input_image_debug.upload(
         | 
| 387 | 
             
                    fn=handle_field_debug_change,
         | 
| 388 | 
            +
                    inputs=[input_image_debug, input_video_debug, end_image_debug, prompt_debug, total_second_length_debug],
         | 
| 389 | 
            +
                    outputs=[]
         | 
| 390 | 
            +
                )
         | 
| 391 | 
            +
                
         | 
| 392 | 
            +
                input_video_debug.upload(
         | 
| 393 | 
            +
                    fn=handle_field_debug_change,
         | 
| 394 | 
            +
                    inputs=[input_image_debug, input_video_debug, end_image_debug, prompt_debug, total_second_length_debug],
         | 
| 395 | 
            +
                    outputs=[]
         | 
| 396 | 
            +
                )
         | 
| 397 | 
            +
                
         | 
| 398 | 
            +
                end_image_debug.upload(
         | 
| 399 | 
            +
                    fn=handle_field_debug_change,
         | 
| 400 | 
            +
                    inputs=[input_image_debug, input_video_debug, end_image_debug, prompt_debug, total_second_length_debug],
         | 
| 401 | 
             
                    outputs=[]
         | 
| 402 | 
             
                )
         | 
| 403 |  | 
| 404 | 
             
                prompt_debug.change(
         | 
| 405 | 
             
                    fn=handle_field_debug_change,
         | 
| 406 | 
            +
                    inputs=[input_image_debug, input_video_debug, end_image_debug, prompt_debug, total_second_length_debug],
         | 
| 407 | 
             
                    outputs=[]
         | 
| 408 | 
             
                )
         | 
| 409 |  | 
| 410 | 
            +
                total_second_length_debug.change(
         | 
| 411 | 
             
                    fn=handle_field_debug_change,
         | 
| 412 | 
            +
                    inputs=[input_image_debug, input_video_debug, end_image_debug, prompt_debug, total_second_length_debug],
         | 
| 413 | 
             
                    outputs=[]
         | 
| 414 | 
             
                )
         | 
| 415 | 
            +
                
         | 
| 416 | 
            +
                with gr.Row(elem_id="wan_image_examples", visible=False):
         | 
| 417 | 
            +
                    gr.Examples(
         | 
| 418 | 
            +
                    label = "Examples from images",
         | 
| 419 | 
            +
                    examples = [
         | 
| 420 | 
            +
                        ["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"],
         | 
| 421 | 
            +
                        ["capyabara_zoomed.png", "capyabara.webp", "a dramatic dolly zoom"],
         | 
| 422 | 
            +
                        ["squatting_sonic.png", "ugly_sonic.jpeg", "the character jumps"],
         | 
| 423 | 
            +
                        ["poli_tower.png", "tower_takes_off.png", "the man turns around"],
         | 
| 424 | 
            +
                        ["capyabara.webp", "capyabara_zoomed.png", "a straight forward zoom"],
         | 
| 425 | 
            +
                    ],
         | 
| 426 | 
            +
                    inputs = [start_image, end_image, prompt],
         | 
| 427 | 
            +
                    outputs = ui_outputs,
         | 
| 428 | 
            +
                    fn = generate_video,
         | 
| 429 | 
            +
                    run_on_click = True,
         | 
| 430 | 
            +
                    cache_examples = True,
         | 
| 431 | 
            +
                )
         | 
| 432 | 
            +
                
         | 
| 433 | 
            +
                gr.Examples(
         | 
| 434 | 
            +
                    examples = [
         | 
| 435 | 
            +
                        ["poli_tower.png", "tower_takes_off.png", "the man turns around"],
         | 
| 436 | 
            +
                        ["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"],
         | 
| 437 | 
            +
                        ["capyabara_zoomed.png", "capyabara.webp", "a dramatic dolly zoom"],
         | 
| 438 | 
            +
                    ],
         | 
| 439 | 
            +
                    inputs = [start_image, end_image, prompt],
         | 
| 440 | 
            +
                    outputs = ui_outputs,
         | 
| 441 | 
            +
                    fn = generate_video,
         | 
| 442 | 
            +
                    cache_examples = False,
         | 
| 443 | 
            +
                )
         | 
| 444 | 
            +
             | 
| 445 | 
            +
            if __name__ == "__main__":
         | 
| 446 | 
            +
                app.launch(share=True)
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -1,43 +1,11 @@ | |
| 1 | 
            -
             | 
| 2 |  | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
            wandb==0.20.1
         | 
| 13 | 
            -
            httpx==0.28.1
         | 
| 14 | 
            -
            transformers==4.43.0
         | 
| 15 | 
            -
            accelerate==1.8.0
         | 
| 16 | 
            -
            scikit-learn==1.7.0
         | 
| 17 | 
            -
            einops==0.8.1
         | 
| 18 | 
            -
            einops-exts==0.0.4
         | 
| 19 | 
            -
            timm==1.0.15
         | 
| 20 | 
            -
            openai-clip==1.0.1
         | 
| 21 | 
            -
            fsspec==2025.5.1
         | 
| 22 | 
            -
            kornia==0.8.1
         | 
| 23 | 
            -
            matplotlib==3.10.3
         | 
| 24 | 
            -
            ninja==1.11.1.4
         | 
| 25 | 
            -
            omegaconf==2.3.0
         | 
| 26 | 
            -
            opencv-python==4.11.0.86
         | 
| 27 | 
            -
            pandas==2.3.0
         | 
| 28 | 
            -
            pillow==11.2.1
         | 
| 29 | 
            -
            pytorch-lightning==2.5.1.post0
         | 
| 30 | 
            -
            PyYAML==6.0.2
         | 
| 31 | 
            -
            scipy==1.15.3
         | 
| 32 | 
            -
            tqdm==4.67.1
         | 
| 33 | 
            -
            triton==3.3.0
         | 
| 34 | 
            -
            urllib3==2.4.0
         | 
| 35 | 
            -
            webdataset==0.2.111
         | 
| 36 | 
            -
            xformers==0.0.30
         | 
| 37 | 
            -
            facexlib==0.3.0
         | 
| 38 | 
            -
            k-diffusion==0.1.1.post1
         | 
| 39 | 
            -
            diffusers==0.33.1
         | 
| 40 | 
            -
            imageio==2.37.0
         | 
| 41 | 
            -
            pillow-heif==0.22.0
         | 
| 42 | 
            -
             | 
| 43 | 
            -
            open-clip-torch==2.24.0
         | 
|  | |
| 1 | 
            +
            git+https://github.com/linoytsaban/diffusers.git@wan22-loras
         | 
| 2 |  | 
| 3 | 
            +
            transformers
         | 
| 4 | 
            +
            accelerate
         | 
| 5 | 
            +
            safetensors
         | 
| 6 | 
            +
            sentencepiece
         | 
| 7 | 
            +
            peft
         | 
| 8 | 
            +
            ftfy
         | 
| 9 | 
            +
            imageio-ffmpeg
         | 
| 10 | 
            +
            opencv-python
         | 
| 11 | 
            +
            torchao==0.11.0
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
