Spaces:

TencentARC
/

BrushEdit

Build error

App Files Files Community

Yw22 commited on Dec 26, 2024

Commit

bfb88c0

1 Parent(s): 9529cd5

[fix] fix mask np.uint8 bug

Browse files

Files changed (2) hide show

app/src/brushedit_app.py +17 -17
app/src/vlm_template.py +3 -3

app/src/brushedit_app.py CHANGED Viewed

@@ -528,23 +528,23 @@ def update_vlm_model(vlm_name):
         else:
             if os.path.exists(vlm_local_path):
                 vlm_processor = LlavaNextProcessor.from_pretrained(vlm_local_path)
-                vlm_model = LlavaNextForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype="auto", device_map="auto")
             else:
                 if vlm_name == "llava-v1.6-mistral-7b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
-                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype="auto", device_map="auto")
                 elif vlm_name == "llama3-llava-next-8b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llama3-llava-next-8b-hf")
-                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llama3-llava-next-8b-hf", torch_dtype="auto", device_map="auto")
                 elif vlm_name == "llava-v1.6-vicuna-13b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-13b-hf")
-                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype="auto", device_map="auto")
                 elif vlm_name == "llava-v1.6-34b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-34b-hf")
-                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-34b-hf", torch_dtype="auto", device_map="auto")
                 elif vlm_name == "llava-next-72b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-next-72b-hf")
-                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-next-72b-hf", torch_dtype="auto", device_map="auto")
     elif vlm_type == "qwen2-vl":
         if vlm_processor != "" and vlm_model != "":
             vlm_model.to(device)
@@ -552,17 +552,17 @@ def update_vlm_model(vlm_name):
         else:
             if os.path.exists(vlm_local_path):
                 vlm_processor = Qwen2VLProcessor.from_pretrained(vlm_local_path)
-                vlm_model = Qwen2VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype="auto", device_map="auto")
             else:
                 if vlm_name == "qwen2-vl-2b-instruct (Preload)":
                     vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
-                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto")
                 elif vlm_name == "qwen2-vl-7b-instruct (Preload)":
                     vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
-                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
                 elif vlm_name == "qwen2-vl-72b-instruct (Preload)":
                     vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
-                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-72B-Instruct", torch_dtype="auto", device_map="auto")
     elif vlm_type == "openai":
         pass
     return "success"
@@ -654,10 +654,10 @@ def process(input_image,
             original_image = resize(Image.fromarray(original_image), target_width=int(output_w), target_height=int(output_h))
             original_image = np.array(original_image)
             if input_mask is not None:
-                input_mask = resize(Image.fromarray(np.squeeze(input_mask)), target_width=int(output_w), target_height=int(output_h))
                 input_mask = np.array(input_mask)
             if original_mask is not None:
-                original_mask = resize(Image.fromarray(np.squeeze(original_mask)), target_width=int(output_w), target_height=int(output_h))
                 original_mask = np.array(original_mask)
             gr.Info(f"Output aspect ratio: {output_w}:{output_h}")
         else:
@@ -673,10 +673,10 @@ def process(input_image,
         original_image = resize(Image.fromarray(original_image), target_width=int(output_w), target_height=int(output_h))
         original_image = np.array(original_image)
         if input_mask is not None:
-            input_mask = resize(Image.fromarray(np.squeeze(input_mask)), target_width=int(output_w), target_height=int(output_h))
             input_mask = np.array(input_mask)
         if original_mask is not None:
-            original_mask = resize(Image.fromarray(np.squeeze(original_mask)), target_width=int(output_w), target_height=int(output_h))
             original_mask = np.array(original_mask)
     if invert_mask_state:
@@ -722,7 +722,7 @@ def process(input_image,
                                             sam_predictor,
                                             sam_automask_generator,
                                             groundingdino_model,
-                                            device)
         except Exception as e:
             raise gr.Error("Please select the correct VLM model and input the correct API Key first!")
@@ -831,9 +831,9 @@ def process_mask(input_image,
                                 sam_predictor,
                                 sam_automask_generator,
                                 groundingdino_model,
-                                device)
     else:
-        original_mask = input_mask
         category = None
     ## resize mask if needed

         else:
             if os.path.exists(vlm_local_path):
                 vlm_processor = LlavaNextProcessor.from_pretrained(vlm_local_path)
+                vlm_model = LlavaNextForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
             else:
                 if vlm_name == "llava-v1.6-mistral-7b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device)
                 elif vlm_name == "llama3-llava-next-8b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llama3-llava-next-8b-hf")
+                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device)
                 elif vlm_name == "llava-v1.6-vicuna-13b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-13b-hf")
+                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device)
                 elif vlm_name == "llava-v1.6-34b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-34b-hf")
+                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device)
                 elif vlm_name == "llava-next-72b-hf (Preload)":
                     vlm_processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-next-72b-hf")
+                    vlm_model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-next-72b-hf", torch_dtype=torch_dtype, device_map=device)
     elif vlm_type == "qwen2-vl":
         if vlm_processor != "" and vlm_model != "":
             vlm_model.to(device)
         else:
             if os.path.exists(vlm_local_path):
                 vlm_processor = Qwen2VLProcessor.from_pretrained(vlm_local_path)
+                vlm_model = Qwen2VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
             else:
                 if vlm_name == "qwen2-vl-2b-instruct (Preload)":
                     vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
+                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device)
                 elif vlm_name == "qwen2-vl-7b-instruct (Preload)":
                     vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
+                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
                 elif vlm_name == "qwen2-vl-72b-instruct (Preload)":
                     vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
+                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-72B-Instruct", torch_dtype=torch_dtype, device_map=device)
     elif vlm_type == "openai":
         pass
     return "success"
             original_image = resize(Image.fromarray(original_image), target_width=int(output_w), target_height=int(output_h))
             original_image = np.array(original_image)
             if input_mask is not None:
+                input_mask = resize(Image.fromarray(np.squeeze(input_mask).astype(np.uint8)), target_width=int(output_w), target_height=int(output_h))
                 input_mask = np.array(input_mask)
             if original_mask is not None:
+                original_mask = resize(Image.fromarray(np.squeeze(original_mask).astype(np.uint8)), target_width=int(output_w), target_height=int(output_h))
                 original_mask = np.array(original_mask)
             gr.Info(f"Output aspect ratio: {output_w}:{output_h}")
         else:
         original_image = resize(Image.fromarray(original_image), target_width=int(output_w), target_height=int(output_h))
         original_image = np.array(original_image)
         if input_mask is not None:
+            input_mask = resize(Image.fromarray(np.squeeze(input_mask).astype(np.uint8)), target_width=int(output_w), target_height=int(output_h))
             input_mask = np.array(input_mask)
         if original_mask is not None:
+            original_mask = resize(Image.fromarray(np.squeeze(original_mask).astype(np.uint8)), target_width=int(output_w), target_height=int(output_h))
             original_mask = np.array(original_mask)
     if invert_mask_state:
                                             sam_predictor,
                                             sam_automask_generator,
                                             groundingdino_model,
+                                            device).astype(np.uint8)
         except Exception as e:
             raise gr.Error("Please select the correct VLM model and input the correct API Key first!")
                                 sam_predictor,
                                 sam_automask_generator,
                                 groundingdino_model,
+                                device).astype(np.uint8)
     else:
+        original_mask = input_mask.astype(np.uint8)
         category = None
     ## resize mask if needed

app/src/vlm_template.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import (
     Qwen2VLForConditionalGeneration, Qwen2VLProcessor
 )
 ## init device
-device = "cpu"
 torch_dtype = torch.float16
@@ -103,10 +103,10 @@ vlms_list = [
         ),
         "model": Qwen2VLForConditionalGeneration.from_pretrained(
             "models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
-        ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
             Qwen2VLForConditionalGeneration.from_pretrained(
                 "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
-            ).to("cpu"),
     },
     {
         "type": "openai",

     Qwen2VLForConditionalGeneration, Qwen2VLProcessor
 )
 ## init device
+device = "cuda"
 torch_dtype = torch.float16
         ),
         "model": Qwen2VLForConditionalGeneration.from_pretrained(
             "models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
+        ).to(device) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
             Qwen2VLForConditionalGeneration.from_pretrained(
                 "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
+            ).to(device),
     },
     {
         "type": "openai",