Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		JeffLiang
		
	commited on
		
		
					Commit 
							
							·
						
						f9b1bcf
	
1
								Parent(s):
							
							8c62972
								
try to fix memory with fixed input resolution
Browse files- app.py +2 -2
 - open_vocab_seg/utils/predictor.py +13 -3
 
    	
        app.py
    CHANGED
    
    | 
         @@ -55,7 +55,7 @@ def inference(class_names, proposal_gen, granularity, input_img): 
     | 
|
| 55 | 
         | 
| 56 | 
         
             
            examples = [['Saturn V, toys, desk, wall, sunflowers, white roses, chrysanthemums, carnations, green dianthus', 'Segment_Anything', 0.8, './resources/demo_samples/sample_01.jpeg'],
         
     | 
| 57 | 
         
             
                        ['red bench, yellow bench, blue bench, brown bench, green bench, blue chair, yellow chair, green chair, brown chair, yellow square painting, barrel, buddha statue', 'Segment_Anything', 0.8, './resources/demo_samples/sample_04.png'],
         
     | 
| 58 | 
         
            -
                        ['pillow, pipe, sweater, shirt, jeans jacket, shoes, cabinet, handbag, photo frame', 'Segment_Anything', 0. 
     | 
| 59 | 
         
             
                        ['Saturn V, toys, blossom', 'MaskFormer', 1.0, './resources/demo_samples/sample_01.jpeg'],
         
     | 
| 60 | 
         
             
                        ['Oculus, Ukulele', 'MaskFormer', 1.0, './resources/demo_samples/sample_03.jpeg'],
         
     | 
| 61 | 
         
             
                        ['Golden gate, yacht', 'MaskFormer', 1.0, './resources/demo_samples/sample_02.jpeg'],]
         
     | 
| 
         @@ -89,7 +89,7 @@ gr.Interface( 
     | 
|
| 89 | 
         
             
                    gr.Slider(0, 1.0, 0.8, label="For Segment_Anything only, granularity of masks from 0 (most coarse) to 1 (most precise)"),
         
     | 
| 90 | 
         
             
                    gr.Image(type='filepath'),
         
     | 
| 91 | 
         
             
                ],
         
     | 
| 92 | 
         
            -
                outputs=gr. 
     | 
| 93 | 
         
             
                title=title,
         
     | 
| 94 | 
         
             
                description=description,
         
     | 
| 95 | 
         
             
                article=article,
         
     | 
| 
         | 
|
| 55 | 
         | 
| 56 | 
         
             
            examples = [['Saturn V, toys, desk, wall, sunflowers, white roses, chrysanthemums, carnations, green dianthus', 'Segment_Anything', 0.8, './resources/demo_samples/sample_01.jpeg'],
         
     | 
| 57 | 
         
             
                        ['red bench, yellow bench, blue bench, brown bench, green bench, blue chair, yellow chair, green chair, brown chair, yellow square painting, barrel, buddha statue', 'Segment_Anything', 0.8, './resources/demo_samples/sample_04.png'],
         
     | 
| 58 | 
         
            +
                        ['pillow, pipe, sweater, shirt, jeans jacket, shoes, cabinet, handbag, photo frame', 'Segment_Anything', 0.7, './resources/demo_samples/sample_05.png'],
         
     | 
| 59 | 
         
             
                        ['Saturn V, toys, blossom', 'MaskFormer', 1.0, './resources/demo_samples/sample_01.jpeg'],
         
     | 
| 60 | 
         
             
                        ['Oculus, Ukulele', 'MaskFormer', 1.0, './resources/demo_samples/sample_03.jpeg'],
         
     | 
| 61 | 
         
             
                        ['Golden gate, yacht', 'MaskFormer', 1.0, './resources/demo_samples/sample_02.jpeg'],]
         
     | 
| 
         | 
|
| 89 | 
         
             
                    gr.Slider(0, 1.0, 0.8, label="For Segment_Anything only, granularity of masks from 0 (most coarse) to 1 (most precise)"),
         
     | 
| 90 | 
         
             
                    gr.Image(type='filepath'),
         
     | 
| 91 | 
         
             
                ],
         
     | 
| 92 | 
         
            +
                outputs=gr.components.Image(type="pil", label='segmentation map'),
         
     | 
| 93 | 
         
             
                title=title,
         
     | 
| 94 | 
         
             
                description=description,
         
     | 
| 95 | 
         
             
                article=article,
         
     | 
    	
        open_vocab_seg/utils/predictor.py
    CHANGED
    
    | 
         @@ -153,11 +153,19 @@ class SAMVisualizationDemo(object): 
     | 
|
| 153 | 
         
             
                    sam = sam_model_registry["vit_l"](checkpoint=sam_path).cuda()
         
     | 
| 154 | 
         
             
                    self.predictor = SamAutomaticMaskGenerator(sam, points_per_batch=16)
         
     | 
| 155 | 
         
             
                    self.clip_model, _, _ = open_clip.create_model_and_transforms('ViT-L-14', pretrained=ovsegclip_path)
         
     | 
| 156 | 
         
            -
                    self.clip_model.cuda()
         
     | 
| 157 | 
         | 
| 158 | 
         
            -
                def run_on_image(self,  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 159 | 
         
             
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         
     | 
| 160 | 
         
            -
                     
     | 
| 
         | 
|
| 161 | 
         
             
                    with torch.no_grad(), torch.cuda.amp.autocast():
         
     | 
| 162 | 
         
             
                        masks = self.predictor.generate(image)
         
     | 
| 163 | 
         
             
                    pred_masks = [masks[i]['segmentation'][None,:,:] for i in range(len(masks))]
         
     | 
| 
         @@ -192,6 +200,7 @@ class SAMVisualizationDemo(object): 
     | 
|
| 192 | 
         
             
                    img_batches = torch.split(imgs, 32, dim=0)
         
     | 
| 193 | 
         | 
| 194 | 
         
             
                    with torch.no_grad(), torch.cuda.amp.autocast():
         
     | 
| 
         | 
|
| 195 | 
         
             
                        text_features = self.clip_model.encode_text(text.cuda())
         
     | 
| 196 | 
         
             
                        text_features /= text_features.norm(dim=-1, keepdim=True)
         
     | 
| 197 | 
         
             
                        image_features = []
         
     | 
| 
         @@ -224,6 +233,7 @@ class SAMVisualizationDemo(object): 
     | 
|
| 224 | 
         
             
                    pred_mask = r.argmax(dim=0).to('cpu')
         
     | 
| 225 | 
         
             
                    pred_mask[blank_area] = 255
         
     | 
| 226 | 
         
             
                    pred_mask = np.array(pred_mask, dtype=np.int)
         
     | 
| 
         | 
|
| 227 | 
         | 
| 228 | 
         
             
                    vis_output = visualizer.draw_sem_seg(
         
     | 
| 229 | 
         
             
                        pred_mask
         
     | 
| 
         | 
|
| 153 | 
         
             
                    sam = sam_model_registry["vit_l"](checkpoint=sam_path).cuda()
         
     | 
| 154 | 
         
             
                    self.predictor = SamAutomaticMaskGenerator(sam, points_per_batch=16)
         
     | 
| 155 | 
         
             
                    self.clip_model, _, _ = open_clip.create_model_and_transforms('ViT-L-14', pretrained=ovsegclip_path)
         
     | 
| 
         | 
|
| 156 | 
         | 
| 157 | 
         
            +
                def run_on_image(self, ori_image, class_names):
         
     | 
| 158 | 
         
            +
                    height, width, _ = ori_image.shape
         
     | 
| 159 | 
         
            +
                    if width > height:
         
     | 
| 160 | 
         
            +
                        new_width = 1280
         
     | 
| 161 | 
         
            +
                        new_height = int((new_width / width) * height)
         
     | 
| 162 | 
         
            +
                    else:
         
     | 
| 163 | 
         
            +
                        new_height = 1280
         
     | 
| 164 | 
         
            +
                        new_width = int((new_height / height) * width)
         
     | 
| 165 | 
         
            +
                    image = cv2.resize(ori_image, (new_width, new_height))
         
     | 
| 166 | 
         
             
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         
     | 
| 167 | 
         
            +
                    ori_image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB)
         
     | 
| 168 | 
         
            +
                    visualizer = OVSegVisualizer(ori_image, self.metadata, instance_mode=self.instance_mode, class_names=class_names)
         
     | 
| 169 | 
         
             
                    with torch.no_grad(), torch.cuda.amp.autocast():
         
     | 
| 170 | 
         
             
                        masks = self.predictor.generate(image)
         
     | 
| 171 | 
         
             
                    pred_masks = [masks[i]['segmentation'][None,:,:] for i in range(len(masks))]
         
     | 
| 
         | 
|
| 200 | 
         
             
                    img_batches = torch.split(imgs, 32, dim=0)
         
     | 
| 201 | 
         | 
| 202 | 
         
             
                    with torch.no_grad(), torch.cuda.amp.autocast():
         
     | 
| 203 | 
         
            +
                        self.clip_model.cuda()
         
     | 
| 204 | 
         
             
                        text_features = self.clip_model.encode_text(text.cuda())
         
     | 
| 205 | 
         
             
                        text_features /= text_features.norm(dim=-1, keepdim=True)
         
     | 
| 206 | 
         
             
                        image_features = []
         
     | 
| 
         | 
|
| 233 | 
         
             
                    pred_mask = r.argmax(dim=0).to('cpu')
         
     | 
| 234 | 
         
             
                    pred_mask[blank_area] = 255
         
     | 
| 235 | 
         
             
                    pred_mask = np.array(pred_mask, dtype=np.int)
         
     | 
| 236 | 
         
            +
                    pred_mask = cv2.resize(pred_mask, (width, height), interpolation=cv2.INTER_NEAREST)
         
     | 
| 237 | 
         | 
| 238 | 
         
             
                    vis_output = visualizer.draw_sem_seg(
         
     | 
| 239 | 
         
             
                        pred_mask
         
     |