Spaces:
Build error
Build error
| from transformers import CLIPProcessor, CLIPModel, ViTImageProcessor, ViTModel | |
| from PIL import Image | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from warnings import filterwarnings | |
| filterwarnings("ignore") | |
| models = ["CLIP-ViT Base", "ViT Base", "DINO ViT-S16"] | |
| models_info = { | |
| "CLIP-ViT Base": { | |
| "model_size": "386MB", | |
| "model_url": "openai/clip-vit-base-patch32", | |
| "efficiency": "High", | |
| }, | |
| "ViT Base": { | |
| "model_size": "304MB", | |
| "model_url": "google/vit-base-patch16-224", | |
| "efficiency": "High", | |
| }, | |
| "DINO ViT-S16": { | |
| "model_size": "1.34GB", | |
| "model_url": "facebook/dino-vits16", | |
| "efficiency": "Moderate", | |
| }, | |
| } | |
| class Image_Validator: | |
| def __init__(self, model_name=None): | |
| if model_name is None: model_name="ViT Base" | |
| self.model_info = models_info[model_name] | |
| model_url = self.model_info["model_url"] | |
| if model_name == "CLIP-ViT Base": | |
| self.model = CLIPModel.from_pretrained(model_url) | |
| self.processor = CLIPProcessor.from_pretrained(model_url) | |
| elif model_name == "ViT Base": | |
| self.model = ViTModel.from_pretrained(model_url) | |
| self.feature_extractor = ViTImageProcessor.from_pretrained(model_url) | |
| elif model_name == "DINO ViT-S16": | |
| self.model = ViTModel.from_pretrained(model_url) | |
| self.feature_extractor = ViTImageProcessor.from_pretrained(model_url) | |
| def get_image_embedding(self, image): | |
| # Process image according to the model | |
| if hasattr(self, 'processor'): # CLIP models | |
| inputs = self.processor(images=image, return_tensors="pt") | |
| outputs = self.model.get_image_features(**inputs) | |
| elif hasattr(self, 'feature_extractor'): # ViT models | |
| inputs = self.feature_extractor(images=image, return_tensors="pt") | |
| outputs = self.model(**inputs).last_hidden_state | |
| return outputs | |
| def similarity_score(self, image1, image2): | |
| embedding1 = self.get_image_embedding(image1).reshape(1, -1) | |
| embedding2 = self.get_image_embedding(image2).reshape(1, -1) | |
| similarity = cosine_similarity(embedding1.detach().numpy(), embedding2.detach().numpy()) | |
| return similarity[0][0] |