Spaces:

facebook
/

ov-seg

Runtime error

App Files Files Community

JeffLiang commited on May 5, 2023

Commit

f9b1bcf

1 Parent(s): 8c62972

try to fix memory with fixed input resolution

Browse files

Files changed (2) hide show

app.py +2 -2
open_vocab_seg/utils/predictor.py +13 -3

app.py CHANGED Viewed

@@ -55,7 +55,7 @@ def inference(class_names, proposal_gen, granularity, input_img):
 examples = [['Saturn V, toys, desk, wall, sunflowers, white roses, chrysanthemums, carnations, green dianthus', 'Segment_Anything', 0.8, './resources/demo_samples/sample_01.jpeg'],
             ['red bench, yellow bench, blue bench, brown bench, green bench, blue chair, yellow chair, green chair, brown chair, yellow square painting, barrel, buddha statue', 'Segment_Anything', 0.8, './resources/demo_samples/sample_04.png'],
-            ['pillow, pipe, sweater, shirt, jeans jacket, shoes, cabinet, handbag, photo frame', 'Segment_Anything', 0.8, './resources/demo_samples/sample_05.png'],
             ['Saturn V, toys, blossom', 'MaskFormer', 1.0, './resources/demo_samples/sample_01.jpeg'],
             ['Oculus, Ukulele', 'MaskFormer', 1.0, './resources/demo_samples/sample_03.jpeg'],
             ['Golden gate, yacht', 'MaskFormer', 1.0, './resources/demo_samples/sample_02.jpeg'],]
@@ -89,7 +89,7 @@ gr.Interface(
         gr.Slider(0, 1.0, 0.8, label="For Segment_Anything only, granularity of masks from 0 (most coarse) to 1 (most precise)"),
         gr.Image(type='filepath'),
     ],
-    outputs=gr.outputs.Image(label='segmentation map'),
     title=title,
     description=description,
     article=article,

 examples = [['Saturn V, toys, desk, wall, sunflowers, white roses, chrysanthemums, carnations, green dianthus', 'Segment_Anything', 0.8, './resources/demo_samples/sample_01.jpeg'],
             ['red bench, yellow bench, blue bench, brown bench, green bench, blue chair, yellow chair, green chair, brown chair, yellow square painting, barrel, buddha statue', 'Segment_Anything', 0.8, './resources/demo_samples/sample_04.png'],
+            ['pillow, pipe, sweater, shirt, jeans jacket, shoes, cabinet, handbag, photo frame', 'Segment_Anything', 0.7, './resources/demo_samples/sample_05.png'],
             ['Saturn V, toys, blossom', 'MaskFormer', 1.0, './resources/demo_samples/sample_01.jpeg'],
             ['Oculus, Ukulele', 'MaskFormer', 1.0, './resources/demo_samples/sample_03.jpeg'],
             ['Golden gate, yacht', 'MaskFormer', 1.0, './resources/demo_samples/sample_02.jpeg'],]
         gr.Slider(0, 1.0, 0.8, label="For Segment_Anything only, granularity of masks from 0 (most coarse) to 1 (most precise)"),
         gr.Image(type='filepath'),
     ],
+    outputs=gr.components.Image(type="pil", label='segmentation map'),
     title=title,
     description=description,
     article=article,

open_vocab_seg/utils/predictor.py CHANGED Viewed

@@ -153,11 +153,19 @@ class SAMVisualizationDemo(object):
         sam = sam_model_registry["vit_l"](checkpoint=sam_path).cuda()
         self.predictor = SamAutomaticMaskGenerator(sam, points_per_batch=16)
         self.clip_model, _, _ = open_clip.create_model_and_transforms('ViT-L-14', pretrained=ovsegclip_path)
-        self.clip_model.cuda()
-    def run_on_image(self, image, class_names):
         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        visualizer = OVSegVisualizer(image, self.metadata, instance_mode=self.instance_mode, class_names=class_names)
         with torch.no_grad(), torch.cuda.amp.autocast():
             masks = self.predictor.generate(image)
         pred_masks = [masks[i]['segmentation'][None,:,:] for i in range(len(masks))]
@@ -192,6 +200,7 @@ class SAMVisualizationDemo(object):
         img_batches = torch.split(imgs, 32, dim=0)
         with torch.no_grad(), torch.cuda.amp.autocast():
             text_features = self.clip_model.encode_text(text.cuda())
             text_features /= text_features.norm(dim=-1, keepdim=True)
             image_features = []
@@ -224,6 +233,7 @@ class SAMVisualizationDemo(object):
         pred_mask = r.argmax(dim=0).to('cpu')
         pred_mask[blank_area] = 255
         pred_mask = np.array(pred_mask, dtype=np.int)
         vis_output = visualizer.draw_sem_seg(
             pred_mask

         sam = sam_model_registry["vit_l"](checkpoint=sam_path).cuda()
         self.predictor = SamAutomaticMaskGenerator(sam, points_per_batch=16)
         self.clip_model, _, _ = open_clip.create_model_and_transforms('ViT-L-14', pretrained=ovsegclip_path)
+    def run_on_image(self, ori_image, class_names):
+        height, width, _ = ori_image.shape
+        if width > height:
+            new_width = 1280
+            new_height = int((new_width / width) * height)
+        else:
+            new_height = 1280
+            new_width = int((new_height / height) * width)
+        image = cv2.resize(ori_image, (new_width, new_height))
         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        ori_image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB)
+        visualizer = OVSegVisualizer(ori_image, self.metadata, instance_mode=self.instance_mode, class_names=class_names)
         with torch.no_grad(), torch.cuda.amp.autocast():
             masks = self.predictor.generate(image)
         pred_masks = [masks[i]['segmentation'][None,:,:] for i in range(len(masks))]
         img_batches = torch.split(imgs, 32, dim=0)
         with torch.no_grad(), torch.cuda.amp.autocast():
+            self.clip_model.cuda()
             text_features = self.clip_model.encode_text(text.cuda())
             text_features /= text_features.norm(dim=-1, keepdim=True)
             image_features = []
         pred_mask = r.argmax(dim=0).to('cpu')
         pred_mask[blank_area] = 255
         pred_mask = np.array(pred_mask, dtype=np.int)
+        pred_mask = cv2.resize(pred_mask, (width, height), interpolation=cv2.INTER_NEAREST)
         vis_output = visualizer.draw_sem_seg(
             pred_mask