Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -110,6 +110,19 @@ def prep(config):
|
|
| 110 |
frames, latents, total_inverted_latents = frames_and_latents
|
| 111 |
return frames, latents, total_inverted_latents, rgb_reconstruction
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
@spaces.GPU(duration=120)
|
| 114 |
def preprocess_and_invert(input_video,
|
| 115 |
frames,
|
|
@@ -127,6 +140,16 @@ def preprocess_and_invert(input_video,
|
|
| 127 |
skip_steps: int = 15,
|
| 128 |
|
| 129 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
sd_version = "2.1"
|
| 131 |
height: int = 512
|
| 132 |
weidth: int = 512
|
|
@@ -156,14 +179,30 @@ def preprocess_and_invert(input_video,
|
|
| 156 |
seed = randomize_seed_fn()
|
| 157 |
seed_everything(seed)
|
| 158 |
|
| 159 |
-
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
do_inversion = False
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
return frames, latents, inverted_latents, zs, do_inversion
|
| 168 |
|
| 169 |
@spaces.GPU(duration=120)
|
|
@@ -189,6 +228,16 @@ def edit_with_pnp(input_video,
|
|
| 189 |
n_fps: int = 10,
|
| 190 |
progress=gr.Progress(track_tqdm=True)
|
| 191 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
config = {}
|
| 193 |
|
| 194 |
config["sd_version"] = "2.1"
|
|
@@ -231,12 +280,34 @@ def edit_with_pnp(input_video,
|
|
| 231 |
seed_everything(seed)
|
| 232 |
|
| 233 |
tokenflow_pipe.enable_xformers_memory_efficient_attention()
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
edited_frames = editor.edit_video()
|
| 236 |
|
| 237 |
edit_video_path = f'tokenflow_PnP_fps_{n_fps}.mp4'
|
| 238 |
save_video(edited_frames, edit_video_path, fps=n_fps)
|
| 239 |
# path = export_to_video(edited_frames)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
return edit_video_path, frames, latents, inverted_latents, zs, do_inversion
|
| 241 |
|
| 242 |
########
|
|
|
|
| 110 |
frames, latents, total_inverted_latents = frames_and_latents
|
| 111 |
return frames, latents, total_inverted_latents, rgb_reconstruction
|
| 112 |
|
| 113 |
+
def move_to_device(data, device):
|
| 114 |
+
"""Helper function to recursively move tensors to device"""
|
| 115 |
+
if isinstance(data, torch.Tensor):
|
| 116 |
+
return data.to(device)
|
| 117 |
+
elif isinstance(data, list):
|
| 118 |
+
return [move_to_device(item, device) for item in data]
|
| 119 |
+
elif isinstance(data, dict):
|
| 120 |
+
return {key: move_to_device(value, device) for key, value in data.items()}
|
| 121 |
+
elif isinstance(data, tuple):
|
| 122 |
+
return tuple(move_to_device(item, device) for item in data)
|
| 123 |
+
else:
|
| 124 |
+
return data
|
| 125 |
+
|
| 126 |
@spaces.GPU(duration=120)
|
| 127 |
def preprocess_and_invert(input_video,
|
| 128 |
frames,
|
|
|
|
| 140 |
skip_steps: int = 15,
|
| 141 |
|
| 142 |
):
|
| 143 |
+
# Move tensors to CUDA at the beginning
|
| 144 |
+
if frames is not None and hasattr(frames, 'value'):
|
| 145 |
+
frames.value = move_to_device(frames.value, 'cuda')
|
| 146 |
+
if latents is not None and hasattr(latents, 'value'):
|
| 147 |
+
latents.value = move_to_device(latents.value, 'cuda')
|
| 148 |
+
if inverted_latents is not None and hasattr(inverted_latents, 'value'):
|
| 149 |
+
inverted_latents.value = move_to_device(inverted_latents.value, 'cuda')
|
| 150 |
+
if zs is not None and hasattr(zs, 'value'):
|
| 151 |
+
zs.value = move_to_device(zs.value, 'cuda')
|
| 152 |
+
|
| 153 |
sd_version = "2.1"
|
| 154 |
height: int = 512
|
| 155 |
weidth: int = 512
|
|
|
|
| 179 |
seed = randomize_seed_fn()
|
| 180 |
seed_everything(seed)
|
| 181 |
|
| 182 |
+
frames_val, latents_val, total_inverted_latents_val, zs_val, rgb_reconstruction = prep(preprocess_config)
|
| 183 |
|
| 184 |
+
# Move tensors to CPU before creating gr.State
|
| 185 |
+
frames_val = move_to_device(frames_val, 'cpu')
|
| 186 |
+
latents_val = move_to_device(latents_val, 'cpu')
|
| 187 |
+
total_inverted_latents_val = move_to_device(total_inverted_latents_val, 'cpu')
|
| 188 |
+
zs_val = move_to_device(zs_val, 'cpu')
|
| 189 |
+
|
| 190 |
+
frames = gr.State(value = frames_val)
|
| 191 |
+
latents = gr.State(value = latents_val)
|
| 192 |
+
inverted_latents = gr.State(value = total_inverted_latents_val)
|
| 193 |
+
zs = gr.State(value = zs_val)
|
| 194 |
do_inversion = False
|
| 195 |
|
| 196 |
+
# Move all tensors to CPU before returning
|
| 197 |
+
if frames is not None and hasattr(frames, 'value'):
|
| 198 |
+
frames.value = move_to_device(frames.value, 'cpu')
|
| 199 |
+
if latents is not None and hasattr(latents, 'value'):
|
| 200 |
+
latents.value = move_to_device(latents.value, 'cpu')
|
| 201 |
+
if inverted_latents is not None and hasattr(inverted_latents, 'value'):
|
| 202 |
+
inverted_latents.value = move_to_device(inverted_latents.value, 'cpu')
|
| 203 |
+
if zs is not None and hasattr(zs, 'value'):
|
| 204 |
+
zs.value = move_to_device(zs.value, 'cpu')
|
| 205 |
+
|
| 206 |
return frames, latents, inverted_latents, zs, do_inversion
|
| 207 |
|
| 208 |
@spaces.GPU(duration=120)
|
|
|
|
| 228 |
n_fps: int = 10,
|
| 229 |
progress=gr.Progress(track_tqdm=True)
|
| 230 |
):
|
| 231 |
+
# Move tensors to CUDA at the beginning
|
| 232 |
+
if frames is not None and hasattr(frames, 'value'):
|
| 233 |
+
frames.value = move_to_device(frames.value, 'cuda')
|
| 234 |
+
if latents is not None and hasattr(latents, 'value'):
|
| 235 |
+
latents.value = move_to_device(latents.value, 'cuda')
|
| 236 |
+
if inverted_latents is not None and hasattr(inverted_latents, 'value'):
|
| 237 |
+
inverted_latents.value = move_to_device(inverted_latents.value, 'cuda')
|
| 238 |
+
if zs is not None and hasattr(zs, 'value'):
|
| 239 |
+
zs.value = move_to_device(zs.value, 'cuda')
|
| 240 |
+
|
| 241 |
config = {}
|
| 242 |
|
| 243 |
config["sd_version"] = "2.1"
|
|
|
|
| 280 |
seed_everything(seed)
|
| 281 |
|
| 282 |
tokenflow_pipe.enable_xformers_memory_efficient_attention()
|
| 283 |
+
|
| 284 |
+
# Ensure tensors are on CUDA for editing
|
| 285 |
+
frames_cuda = frames.value if hasattr(frames, 'value') else frames
|
| 286 |
+
inverted_latents_cuda = inverted_latents.value if hasattr(inverted_latents, 'value') else inverted_latents
|
| 287 |
+
zs_cuda = zs.value if hasattr(zs, 'value') else zs
|
| 288 |
+
|
| 289 |
+
# Move to CUDA if needed
|
| 290 |
+
frames_cuda = move_to_device(frames_cuda, 'cuda')
|
| 291 |
+
inverted_latents_cuda = move_to_device(inverted_latents_cuda, 'cuda')
|
| 292 |
+
zs_cuda = move_to_device(zs_cuda, 'cuda')
|
| 293 |
+
|
| 294 |
+
editor = TokenFlow(config=config, pipe=tokenflow_pipe, frames=frames_cuda, inverted_latents=inverted_latents_cuda, zs=zs_cuda)
|
| 295 |
edited_frames = editor.edit_video()
|
| 296 |
|
| 297 |
edit_video_path = f'tokenflow_PnP_fps_{n_fps}.mp4'
|
| 298 |
save_video(edited_frames, edit_video_path, fps=n_fps)
|
| 299 |
# path = export_to_video(edited_frames)
|
| 300 |
+
|
| 301 |
+
# Move all tensors to CPU before returning
|
| 302 |
+
if frames is not None and hasattr(frames, 'value'):
|
| 303 |
+
frames.value = move_to_device(frames.value, 'cpu')
|
| 304 |
+
if latents is not None and hasattr(latents, 'value'):
|
| 305 |
+
latents.value = move_to_device(latents.value, 'cpu')
|
| 306 |
+
if inverted_latents is not None and hasattr(inverted_latents, 'value'):
|
| 307 |
+
inverted_latents.value = move_to_device(inverted_latents.value, 'cpu')
|
| 308 |
+
if zs is not None and hasattr(zs, 'value'):
|
| 309 |
+
zs.value = move_to_device(zs.value, 'cpu')
|
| 310 |
+
|
| 311 |
return edit_video_path, frames, latents, inverted_latents, zs, do_inversion
|
| 312 |
|
| 313 |
########
|