Spaces:

weizmannscience
/

tokenflow

Running on Zero

App Files Files Community

multimodalart HF Staff commited on Sep 19

Commit

68957ea

verified ·

1 Parent(s): 005bebe

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -6

app.py CHANGED Viewed

@@ -110,6 +110,19 @@ def prep(config):
         frames, latents, total_inverted_latents = frames_and_latents
         return frames, latents, total_inverted_latents, rgb_reconstruction
 @spaces.GPU(duration=120)
 def preprocess_and_invert(input_video,
                           frames,
@@ -127,6 +140,16 @@ def preprocess_and_invert(input_video,
                           skip_steps: int = 15,
               ):
     sd_version = "2.1"
     height: int = 512
     weidth: int = 512
@@ -156,14 +179,30 @@ def preprocess_and_invert(input_video,
             seed = randomize_seed_fn()
         seed_everything(seed)
-        frames, latents, total_inverted_latents, zs, rgb_reconstruction = prep(preprocess_config)
-        frames = gr.State(value = frames)
-        latents = gr.State(value = latents)
-        inverted_latents = gr.State(value = total_inverted_latents)
-        zs = gr.State(value = zs)
         do_inversion = False
     return frames, latents, inverted_latents, zs, do_inversion
 @spaces.GPU(duration=120)
@@ -189,6 +228,16 @@ def edit_with_pnp(input_video,
                   n_fps: int = 10,
                   progress=gr.Progress(track_tqdm=True)
 ):
     config = {}
     config["sd_version"] = "2.1"
@@ -231,12 +280,34 @@ def edit_with_pnp(input_video,
     seed_everything(seed)
     tokenflow_pipe.enable_xformers_memory_efficient_attention()
-    editor = TokenFlow(config=config,pipe=tokenflow_pipe, frames=frames.value, inverted_latents=inverted_latents.value, zs= zs.value)
     edited_frames = editor.edit_video()
     edit_video_path = f'tokenflow_PnP_fps_{n_fps}.mp4'
     save_video(edited_frames, edit_video_path, fps=n_fps)
     # path = export_to_video(edited_frames)
     return edit_video_path, frames, latents, inverted_latents, zs, do_inversion
 ########

         frames, latents, total_inverted_latents = frames_and_latents
         return frames, latents, total_inverted_latents, rgb_reconstruction
+def move_to_device(data, device):
+    """Helper function to recursively move tensors to device"""
+    if isinstance(data, torch.Tensor):
+        return data.to(device)
+    elif isinstance(data, list):
+        return [move_to_device(item, device) for item in data]
+    elif isinstance(data, dict):
+        return {key: move_to_device(value, device) for key, value in data.items()}
+    elif isinstance(data, tuple):
+        return tuple(move_to_device(item, device) for item in data)
+    else:
+        return data
 @spaces.GPU(duration=120)
 def preprocess_and_invert(input_video,
                           frames,
                           skip_steps: int = 15,
               ):
+    # Move tensors to CUDA at the beginning
+    if frames is not None and hasattr(frames, 'value'):
+        frames.value = move_to_device(frames.value, 'cuda')
+    if latents is not None and hasattr(latents, 'value'):
+        latents.value = move_to_device(latents.value, 'cuda')
+    if inverted_latents is not None and hasattr(inverted_latents, 'value'):
+        inverted_latents.value = move_to_device(inverted_latents.value, 'cuda')
+    if zs is not None and hasattr(zs, 'value'):
+        zs.value = move_to_device(zs.value, 'cuda')
     sd_version = "2.1"
     height: int = 512
     weidth: int = 512
             seed = randomize_seed_fn()
         seed_everything(seed)
+        frames_val, latents_val, total_inverted_latents_val, zs_val, rgb_reconstruction = prep(preprocess_config)
+        # Move tensors to CPU before creating gr.State
+        frames_val = move_to_device(frames_val, 'cpu')
+        latents_val = move_to_device(latents_val, 'cpu')
+        total_inverted_latents_val = move_to_device(total_inverted_latents_val, 'cpu')
+        zs_val = move_to_device(zs_val, 'cpu')
+        frames = gr.State(value = frames_val)
+        latents = gr.State(value = latents_val)
+        inverted_latents = gr.State(value = total_inverted_latents_val)
+        zs = gr.State(value = zs_val)
         do_inversion = False
+    # Move all tensors to CPU before returning
+    if frames is not None and hasattr(frames, 'value'):
+        frames.value = move_to_device(frames.value, 'cpu')
+    if latents is not None and hasattr(latents, 'value'):
+        latents.value = move_to_device(latents.value, 'cpu')
+    if inverted_latents is not None and hasattr(inverted_latents, 'value'):
+        inverted_latents.value = move_to_device(inverted_latents.value, 'cpu')
+    if zs is not None and hasattr(zs, 'value'):
+        zs.value = move_to_device(zs.value, 'cpu')
     return frames, latents, inverted_latents, zs, do_inversion
 @spaces.GPU(duration=120)
                   n_fps: int = 10,
                   progress=gr.Progress(track_tqdm=True)
 ):
+    # Move tensors to CUDA at the beginning
+    if frames is not None and hasattr(frames, 'value'):
+        frames.value = move_to_device(frames.value, 'cuda')
+    if latents is not None and hasattr(latents, 'value'):
+        latents.value = move_to_device(latents.value, 'cuda')
+    if inverted_latents is not None and hasattr(inverted_latents, 'value'):
+        inverted_latents.value = move_to_device(inverted_latents.value, 'cuda')
+    if zs is not None and hasattr(zs, 'value'):
+        zs.value = move_to_device(zs.value, 'cuda')
     config = {}
     config["sd_version"] = "2.1"
     seed_everything(seed)
     tokenflow_pipe.enable_xformers_memory_efficient_attention()
+    # Ensure tensors are on CUDA for editing
+    frames_cuda = frames.value if hasattr(frames, 'value') else frames
+    inverted_latents_cuda = inverted_latents.value if hasattr(inverted_latents, 'value') else inverted_latents
+    zs_cuda = zs.value if hasattr(zs, 'value') else zs
+    # Move to CUDA if needed
+    frames_cuda = move_to_device(frames_cuda, 'cuda')
+    inverted_latents_cuda = move_to_device(inverted_latents_cuda, 'cuda')
+    zs_cuda = move_to_device(zs_cuda, 'cuda')
+    editor = TokenFlow(config=config, pipe=tokenflow_pipe, frames=frames_cuda, inverted_latents=inverted_latents_cuda, zs=zs_cuda)
     edited_frames = editor.edit_video()
     edit_video_path = f'tokenflow_PnP_fps_{n_fps}.mp4'
     save_video(edited_frames, edit_video_path, fps=n_fps)
     # path = export_to_video(edited_frames)
+    # Move all tensors to CPU before returning
+    if frames is not None and hasattr(frames, 'value'):
+        frames.value = move_to_device(frames.value, 'cpu')
+    if latents is not None and hasattr(latents, 'value'):
+        latents.value = move_to_device(latents.value, 'cpu')
+    if inverted_latents is not None and hasattr(inverted_latents, 'value'):
+        inverted_latents.value = move_to_device(inverted_latents.value, 'cpu')
+    if zs is not None and hasattr(zs, 'value'):
+        zs.value = move_to_device(zs.value, 'cpu')
     return edit_video_path, frames, latents, inverted_latents, zs, do_inversion
 ########