EdgeTAM

Running on Zero

App Files Files Community

adding mps support for Apple silicon

by clementlr - opened 19 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+42

-10

Files changed (1) hide show

app.py +42 -10

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ import spaces
 import torch
 from moviepy.editor import ImageSequenceClip
 from PIL import Image
 from sam2.build_sam import build_sam2_video_predictor
@@ -72,7 +73,9 @@ examples = [
 OBJ_ID = 0
 sam2_checkpoint = "checkpoints/edgetam.pt"
 model_cfg = "edgetam.yaml"
 predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
 def get_video_fps(video_path):
@@ -340,11 +343,26 @@ def propagate_to_all(
     input_points,
     inference_state,
 ):
-    if torch.cuda.get_device_properties(0).major >= 8:
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.allow_tf32 = True
-    with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
-        predictor.to("cuda")
         if inference_state:
             inference_state["device"] = predictor.device
@@ -374,15 +392,16 @@ def propagate_to_all(
             out_mask = video_segments[out_frame_idx][OBJ_ID]
             mask_image = show_mask(out_mask)
             output_frame = Image.alpha_composite(transparent_background, mask_image)
             output_frame = np.array(output_frame)
             output_frames.append(output_frame)
-        torch.cuda.empty_cache()
         # Create a video clip from the image sequence
         original_fps = get_video_fps(video_in)
-        fps = original_fps  # Frames per second
-        clip = ImageSequenceClip(output_frames, fps=fps)
         # Write the result to a file
         unique_id = datetime.now().strftime("%Y%m%d%H%M%S")
         final_vid_output_path = f"output_video_{unique_id}.mp4"
@@ -390,8 +409,21 @@ def propagate_to_all(
             tempfile.gettempdir(), final_vid_output_path
         )
-        # Write the result to a file
-        clip.write_videofile(final_vid_output_path, codec="libx264")
         return gr.update(value=final_vid_output_path)

 import torch
 from moviepy.editor import ImageSequenceClip
+from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter # adding this for MPS compatibility
 from PIL import Image
 from sam2.build_sam import build_sam2_video_predictor
 OBJ_ID = 0
 sam2_checkpoint = "checkpoints/edgetam.pt"
 model_cfg = "edgetam.yaml"
+device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu") # MPS support
 predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
+predictor.to(device)
 def get_video_fps(video_path):
     input_points,
     inference_state,
 ):
+    # Set boolean for cuda or mps support
+    use_cuda = torch.cuda.is_available()
+    use_mps = torch.backends.mps.is_available() and torch.backends.mps.is_built()
+    if use_cuda:
+        if torch.cuda.get_device_properties(0).major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
+            autocast_kwargs = dict(device_type="cuda", dtype=torch.bfloat16)
+    elif use_mps:
+        autocast_kwargs = dict(device_type="mps", dtype=torch.float16)
+    with torch.autocast(**autocast_kwargs):
+        if use_cuda:
+            predictor.to("cuda")
+        elif use_mps:
+            predictor.to("mps")
         if inference_state:
             inference_state["device"] = predictor.device
             out_mask = video_segments[out_frame_idx][OBJ_ID]
             mask_image = show_mask(out_mask)
             output_frame = Image.alpha_composite(transparent_background, mask_image)
+            output_frame = output_frame.convert("RGB")
             output_frame = np.array(output_frame)
             output_frames.append(output_frame)
+        if use_cuda:
+            torch.cuda.empty_cache()
         # Create a video clip from the image sequence
         original_fps = get_video_fps(video_in)
+        fps = float(original_fps)  # Frames per second
         # Write the result to a file
         unique_id = datetime.now().strftime("%Y%m%d%H%M%S")
         final_vid_output_path = f"output_video_{unique_id}.mp4"
             tempfile.gettempdir(), final_vid_output_path
         )
+        # Write the result to a file using moviepy ImageSequenceClip
+        if use_cuda:
+            clip = ImageSequenceClip(output_frames, fps=fps)
+            clip.write_videofile(final_vid_output_path, codec="libx264")
+        # Write the result to a file using moviepy FFMPEG_VideoWriter for MPS compatibility
+        elif use_mps:
+            clip_array = output_frames  # list of RGB numpy arrays
+            size = clip_array[0].shape[1], clip_array[0].shape[0]  # (width, height)
+            writer = FFMPEG_VideoWriter(final_vid_output_path, size, fps=fps, codec="libx264")
+            for frame in clip_array:
+                writer.write_frame(frame)
+            writer.close()
         return gr.update(value=final_vid_output_path)