Spaces:

shivalikasingh
/

mask2former-demo

Running

App Files Files Community

shivalikasingh commited on Jan 24, 2023

Commit

aeaceee

1 Parent(s): c027c15

Add instance seg visualization

Browse files

Files changed (2) hide show

app.py +2 -2
predict.py +17 -17

app.py CHANGED Viewed

@@ -10,14 +10,14 @@ demo = gr.Blocks()
 with demo:
-    gr.Markdown("# **<p align='center'>Mask2Former: Masked Attention Transformer for Universal Segmentation</p>**")
     gr.Markdown("This space demonstrates the use of Mask2Former. It was introduced in the paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) and first released in [this repository](https://github.com/facebookresearch/Mask2Former/). \
         Before Mask2Former, you'd have to resort to using a specialized architecture designed for solving a particular kind of image segmentation task (i.e. semantic, instance or panoptic segmentation). On the other hand, in the form of Mask2Former, for the first time, we have a single architecture that is capable of solving any segmentation task and performs on par or better than specialized architectures.")
     with gr.Box():
         with gr.Row():
-            segmentation_task = gr.Dropdown(["semantic", "panoptic"], value="panoptic", label="Segmentation Task", show_label=True)
         with gr.Box():
             with gr.Row():
                 input_image = gr.Image(type='filepath',label="Input Image", show_label=True)

 with demo:
+    gr.Markdown("# **<p align='center'>Mask2Former: Masked Attention Mask Transformer for Universal Segmentation</p>**")
     gr.Markdown("This space demonstrates the use of Mask2Former. It was introduced in the paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) and first released in [this repository](https://github.com/facebookresearch/Mask2Former/). \
         Before Mask2Former, you'd have to resort to using a specialized architecture designed for solving a particular kind of image segmentation task (i.e. semantic, instance or panoptic segmentation). On the other hand, in the form of Mask2Former, for the first time, we have a single architecture that is capable of solving any segmentation task and performs on par or better than specialized architectures.")
     with gr.Box():
         with gr.Row():
+            segmentation_task = gr.Dropdown(["semantic", "instance", "panoptic"], value="panoptic", label="Segmentation Task", show_label=True)
         with gr.Box():
             with gr.Row():
                 input_image = gr.Image(type='filepath',label="Input Image", show_label=True)

predict.py CHANGED Viewed

@@ -40,6 +40,7 @@ def draw_panoptic_segmentation(predicted_segmentation_map, seg_info, image):
     return output_img
 def draw_semantic_segmentation(segmentation_map, image, palette):
     color_segmentation_map = np.zeros((segmentation_map.shape[0], segmentation_map.shape[1], 3), dtype=np.uint8) # height, width, 3
     for label, color in enumerate(palette):
         color_segmentation_map[segmentation_map - 1 == label, :] = color
@@ -50,15 +51,20 @@ def draw_semantic_segmentation(segmentation_map, image, palette):
     img = img.astype(np.uint8)
     return img
-def visualize_instance_seg_mask(mask):
-    image = np.zeros((mask.shape[0], mask.shape[1], 3))
     labels = np.unique(mask)
-    label2color = {label: (random.randint(0, 1), random.randint(0, 255), random.randint(0, 255)) for label in labels}
-    for i in range(image.shape[0]):
-      for j in range(image.shape[1]):
-        image[i, j, :] = label2color[mask[i, j]]
-    image = image / 255
-    return image
 def predict_masks(input_img_path: str, segmentation_task: str):
@@ -82,15 +88,9 @@ def predict_masks(input_img_path: str, segmentation_task: str):
         output_result = draw_semantic_segmentation(predicted_segmentation_map, image, palette)
     elif segmentation_task == "instance":
-        pass
-        # result = image_processor.post_process_segmentation(outputs)[0].cpu().detach()
-        # predicted_segmentation_map = result["segmentation"]
-        # # predicted_segmentation_map = torch.argmax(result, dim=0).numpy()
-        # # results = torch.argmax(predicted_segmentation_map, dim=0).numpy()
-        # print("predicted_segmentation_map:",predicted_segmentation_map)
-        # print("type predicted_segmentation_map:", type(predicted_segmentation_map))
-        # output_result = visualize_instance_seg_mask(predicted_segmentation_map)
-        # # mask = plot_semantic_map(predicted_segmentation_map, image)
     else:
         result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]

     return output_img
 def draw_semantic_segmentation(segmentation_map, image, palette):
     color_segmentation_map = np.zeros((segmentation_map.shape[0], segmentation_map.shape[1], 3), dtype=np.uint8) # height, width, 3
     for label, color in enumerate(palette):
         color_segmentation_map[segmentation_map - 1 == label, :] = color
     img = img.astype(np.uint8)
     return img
+def visualize_instance_seg_mask(mask, input_image):
+    color_segmentation_map = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
     labels = np.unique(mask)
+    label2color = {int(label): (random.randint(0, 1), random.randint(0, 255), random.randint(0, 255)) for label in labels}
+    for label, color in label2color.items():
+        color_segmentation_map[mask - 1 == label, :] = color
+    ground_truth_color_seg = color_segmentation_map[..., ::-1]
+    img = np.array(input_image) * 0.5 + ground_truth_color_seg * 0.5
+    img = img.astype(np.uint8)
+    return img
 def predict_masks(input_img_path: str, segmentation_task: str):
         output_result = draw_semantic_segmentation(predicted_segmentation_map, image, palette)
     elif segmentation_task == "instance":
+        result = image_processor.post_process_instance_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
+        predicted_instance_map = result["segmentation"].cpu().detach().numpy()
+        output_result = visualize_instance_seg_mask(predicted_instance_map, image)
     else:
         result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]