| import os | |
| import numpy as np | |
| import requests | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from PIL import Image | |
| from io import BytesIO | |
| import torch | |
| from clip_retrieval.load_clip import load_clip, get_tokenizer | |
| class ClipAppClient: | |
| """ | |
| A class to handle generating embeddings using the OpenAI CLIP model. | |
| clip_embeddings = ClipEmbeddings() | |
| test_image_url = "https://example.com/image.jpg" | |
| preprocessed_image = clip_embeddings.preprocess_image(test_image_url) | |
| text = "A beautiful landscape" | |
| text_embeddings = clip_embeddings.text_to_embedding(text) | |
| image_embeddings = clip_embeddings.image_url_to_embedding(test_image_url) | |
| preprocessed_image_embeddings = clip_embeddings.preprocessed_image_to_embedding(preprocessed_image) | |
| """ | |
| def __init__(self, clip_model="ViT-L/14", device=None): | |
| self.clip_model = clip_model | |
| self.device = device or ("cuda:0" if torch.cuda.is_available() else "cpu") | |
| print("using device", self.device) | |
| self.model, self.preprocess = load_clip(clip_model, use_jit=True, device=self.device) | |
| self.tokenizer = get_tokenizer(clip_model) | |
| def preprocess_image(self, image_url): | |
| """ | |
| Preprocess an image from a given URL. | |
| :param image_url: str, URL of the image to preprocess | |
| :return: torch.Tensor, preprocessed image | |
| """ | |
| response = requests.get(image_url) | |
| input_image = Image.open(BytesIO(response.content)).convert('RGB') | |
| input_image = np.array(input_image) | |
| input_im = Image.fromarray(input_image) | |
| prepro = self.preprocess(input_im).unsqueeze(0).cpu() | |
| return prepro | |
| def text_to_embedding(self, text): | |
| """ | |
| Convert a given text to an embedding using the OpenAI CLIP model. | |
| :param text: str, text to convert to an embedding | |
| :return: str, text embeddings | |
| """ | |
| payload = { | |
| "text": ('str', text, 'application/octet-stream'), | |
| } | |
| url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") | |
| response = requests.post(url, files=payload) | |
| embeddings = response.text | |
| return embeddings | |
| def image_url_to_embedding(self, image_url): | |
| """ | |
| Convert an image URL to an embedding using the OpenAI CLIP model. | |
| :param image_url: str, URL of the image to convert to an embedding | |
| :return: str, image embeddings | |
| """ | |
| payload = { | |
| "image_url": ('str', image_url, 'application/octet-stream'), | |
| } | |
| url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") | |
| response = requests.post(url, files=payload) | |
| embeddings = response.text | |
| return embeddings | |
| def preprocessed_image_to_embedding(self, image): | |
| """ | |
| Convert a preprocessed image to an embedding using the OpenAI CLIP model. | |
| :param image: torch.Tensor, preprocessed image | |
| :return: str, image embeddings | |
| """ | |
| key = "preprocessed_image" | |
| data_bytes = image.numpy().tobytes() | |
| shape_bytes = np.array(image.shape).tobytes() | |
| dtype_bytes = str(image.dtype).encode() | |
| payload = { | |
| key: ('tensor', data_bytes, 'application/octet-stream'), | |
| 'shape': ('shape', shape_bytes, 'application/octet-stream'), | |
| 'dtype': ('dtype', dtype_bytes, 'application/octet-stream'), | |
| } | |
| url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") | |
| response = requests.post(url, files=payload) | |
| embeddings = response.text | |
| return embeddings | |