Spaces:
Runtime error
Runtime error
Commit
·
ee54eed
1
Parent(s):
bc147cf
api key
Browse files
visual_foundation_models.py
CHANGED
|
@@ -77,7 +77,7 @@ class ImageEditing:
|
|
| 77 |
print("Initializing ImageEditing to %s" % device)
|
| 78 |
self.device = device
|
| 79 |
self.mask_former = MaskFormer(device=self.device)
|
| 80 |
-
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting").to(device)
|
| 81 |
|
| 82 |
@prompts(name="Remove Something From The Photo",
|
| 83 |
description="useful when you want to remove and object or something from the photo "
|
|
@@ -113,7 +113,7 @@ class InstructPix2Pix:
|
|
| 113 |
def __init__(self, device):
|
| 114 |
print("Initializing InstructPix2Pix to %s" % device)
|
| 115 |
self.device = device
|
| 116 |
-
self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix",
|
| 117 |
safety_checker=None).to(device)
|
| 118 |
self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
|
| 119 |
|
|
@@ -139,7 +139,7 @@ class Text2Image:
|
|
| 139 |
def __init__(self, device):
|
| 140 |
print("Initializing Text2Image to %s" % device)
|
| 141 |
self.device = device
|
| 142 |
-
self.pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
| 143 |
self.text_refine_tokenizer = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
| 144 |
self.text_refine_model = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
| 145 |
self.text_refine_gpt2_pipe = pipeline("text-generation", model=self.text_refine_model,
|
|
@@ -166,13 +166,13 @@ class ImageCaptioning:
|
|
| 166 |
self.device = device
|
| 167 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
| 168 |
self.model = BlipForConditionalGeneration.from_pretrained(
|
| 169 |
-
"Salesforce/blip-image-captioning-base").to(self.device)
|
| 170 |
|
| 171 |
@prompts(name="Get Photo Description",
|
| 172 |
description="useful when you want to know what is inside the photo. receives image_path as input. "
|
| 173 |
"The input to this tool should be a string, representing the image_path. ")
|
| 174 |
def inference(self, image_path):
|
| 175 |
-
inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device)
|
| 176 |
out = self.model.generate(**inputs)
|
| 177 |
captions = self.processor.decode(out[0], skip_special_tokens=True)
|
| 178 |
print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
|
|
@@ -206,9 +206,9 @@ class Image2Canny:
|
|
| 206 |
class CannyText2Image:
|
| 207 |
def __init__(self, device):
|
| 208 |
print("Initializing CannyText2Image to %s" % device)
|
| 209 |
-
self.controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-canny")
|
| 210 |
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
| 211 |
-
"runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None)
|
| 212 |
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
|
| 213 |
self.pipe.to(device)
|
| 214 |
self.seed = -1
|
|
@@ -685,7 +685,7 @@ class VisualQuestionAnswering:
|
|
| 685 |
print("Initializing VisualQuestionAnswering to %s" % device)
|
| 686 |
self.device = device
|
| 687 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
| 688 |
-
self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(self.device)
|
| 689 |
|
| 690 |
@prompts(name="Answer Question About The Image",
|
| 691 |
description="useful when you need an answer for a question based on an image. "
|
|
@@ -694,7 +694,7 @@ class VisualQuestionAnswering:
|
|
| 694 |
def inference(self, inputs):
|
| 695 |
image_path, question = inputs.split(",")
|
| 696 |
raw_image = Image.open(image_path).convert('RGB')
|
| 697 |
-
inputs = self.processor(raw_image, question, return_tensors="pt").to(self.device)
|
| 698 |
out = self.model.generate(**inputs)
|
| 699 |
answer = self.processor.decode(out[0], skip_special_tokens=True)
|
| 700 |
print(f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
|
|
|
|
| 77 |
print("Initializing ImageEditing to %s" % device)
|
| 78 |
self.device = device
|
| 79 |
self.mask_former = MaskFormer(device=self.device)
|
| 80 |
+
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", revision="fp16", torch_dtype=torch.float16).to(device)
|
| 81 |
|
| 82 |
@prompts(name="Remove Something From The Photo",
|
| 83 |
description="useful when you want to remove and object or something from the photo "
|
|
|
|
| 113 |
def __init__(self, device):
|
| 114 |
print("Initializing InstructPix2Pix to %s" % device)
|
| 115 |
self.device = device
|
| 116 |
+
self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix", torch_dtype=torch.float16,
|
| 117 |
safety_checker=None).to(device)
|
| 118 |
self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
|
| 119 |
|
|
|
|
| 139 |
def __init__(self, device):
|
| 140 |
print("Initializing Text2Image to %s" % device)
|
| 141 |
self.device = device
|
| 142 |
+
self.pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",torch_dtype=torch.float16)
|
| 143 |
self.text_refine_tokenizer = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
| 144 |
self.text_refine_model = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
| 145 |
self.text_refine_gpt2_pipe = pipeline("text-generation", model=self.text_refine_model,
|
|
|
|
| 166 |
self.device = device
|
| 167 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
| 168 |
self.model = BlipForConditionalGeneration.from_pretrained(
|
| 169 |
+
"Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to(self.device)
|
| 170 |
|
| 171 |
@prompts(name="Get Photo Description",
|
| 172 |
description="useful when you want to know what is inside the photo. receives image_path as input. "
|
| 173 |
"The input to this tool should be a string, representing the image_path. ")
|
| 174 |
def inference(self, image_path):
|
| 175 |
+
inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device, torch.float16)
|
| 176 |
out = self.model.generate(**inputs)
|
| 177 |
captions = self.processor.decode(out[0], skip_special_tokens=True)
|
| 178 |
print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
|
|
|
|
| 206 |
class CannyText2Image:
|
| 207 |
def __init__(self, device):
|
| 208 |
print("Initializing CannyText2Image to %s" % device)
|
| 209 |
+
self.controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-canny", torch_dtype=torch.float16)
|
| 210 |
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
| 211 |
+
"runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None, torch_dtype=torch.float16)
|
| 212 |
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
|
| 213 |
self.pipe.to(device)
|
| 214 |
self.seed = -1
|
|
|
|
| 685 |
print("Initializing VisualQuestionAnswering to %s" % device)
|
| 686 |
self.device = device
|
| 687 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
| 688 |
+
self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base", torch_dtype=torch.float16).to(self.device)
|
| 689 |
|
| 690 |
@prompts(name="Answer Question About The Image",
|
| 691 |
description="useful when you need an answer for a question based on an image. "
|
|
|
|
| 694 |
def inference(self, inputs):
|
| 695 |
image_path, question = inputs.split(",")
|
| 696 |
raw_image = Image.open(image_path).convert('RGB')
|
| 697 |
+
inputs = self.processor(raw_image, question, return_tensors="pt").to(self.device, torch.float16)
|
| 698 |
out = self.model.generate(**inputs)
|
| 699 |
answer = self.processor.decode(out[0], skip_special_tokens=True)
|
| 700 |
print(f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
|