Spaces:
Runtime error
Runtime error
| import io | |
| import os | |
| from typing import Literal, TypeAlias | |
| import numpy as np | |
| import PIL.Image | |
| import PIL.ImageOps | |
| import requests | |
| import torch | |
| """ | |
| - pil: `PIL.Image.Image`, size (w, h), seamless conversion between `uint8` | |
| - np: `np.ndarray`, shape (h, w, c), default `np.uint8` | |
| - pt: `torch.Tensor`, shape (c, h, w), default `torch.uint8` | |
| """ | |
| ImageType: TypeAlias = PIL.Image.Image | np.ndarray | torch.Tensor | |
| ImageTypeStr: TypeAlias = Literal["pil", "np", "pt"] | |
| ImageFormat: TypeAlias = Literal["JPEG", "PNG"] | |
| DataFormat: TypeAlias = Literal["255", "01", "11"] | |
| IMG_SUPPORT_MODE = ["L", "LA", "RGB", "RGBA", "CMYK", "P", "1"] | |
| IMAGE_EXT_LOWER = ["png", "jpeg", "jpg", "webp"] | |
| IMAGE_EXT = IMAGE_EXT_LOWER + [_ext.upper() for _ext in IMAGE_EXT_LOWER] | |
| def check_image_type(image: ImageType): | |
| if not (isinstance(image, PIL.Image.Image) or isinstance(image, np.ndarray) or isinstance(image, torch.Tensor)): | |
| raise TypeError(f"`image` should be PIL Image, ndarray or Tensor. Got `{type(image)}`.") | |
| def to_rgb(image: PIL.Image.Image) -> PIL.Image.Image: | |
| # Automatically adjust the orientation of the image to match the direction it was taken. | |
| image = PIL.ImageOps.exif_transpose(image) | |
| if image.mode not in IMG_SUPPORT_MODE: | |
| raise ValueError(f"Only support mode in `{IMG_SUPPORT_MODE}`, got `{image.mode}`") | |
| if image.mode == "LA": | |
| image = image.convert("RGBA") | |
| # add white background for RGBA images, and convert to RGB | |
| if image.mode == "RGBA": | |
| background = PIL.Image.new("RGBA", image.size, "white") | |
| image = PIL.Image.alpha_composite(background, image).convert("RGB") | |
| # then convert to RGB | |
| image = image.convert("RGB") | |
| return image | |
| def load_image( | |
| image: str | os.PathLike | PIL.Image.Image | bytes, | |
| *, | |
| output_type: ImageTypeStr = "pil", | |
| ) -> ImageType: | |
| """ | |
| Loads `image` to a PIL Image, NumPy array or PyTorch tensor. | |
| Args: | |
| image (str | PIL.Image.Image): The path to image or PIL Image. | |
| mode (ImageMode, optional): The mode to convert to. Defaults to None (no conversion). | |
| The current version supports all possible conversions between "L", "RGB", "RGBA". | |
| output_type (ImageTypeStr, optional): The type of the output image. Defaults to "pil". | |
| The current version supports "pil", "np", "pt". | |
| Returns: | |
| ImageType: The loaded image in the given type. | |
| """ | |
| timeout = 10 | |
| # Load the `image` into a PIL Image. | |
| if isinstance(image, str) or isinstance(image, os.PathLike): | |
| if image.startswith("http://") or image.startswith("https://"): | |
| try: | |
| image = PIL.Image.open(requests.get(image, stream=True, timeout=timeout).raw) | |
| except requests.exceptions.Timeout: | |
| raise ValueError(f"HTTP request timed out after {timeout} seconds") | |
| elif os.path.isfile(image): | |
| image = PIL.Image.open(image) | |
| else: | |
| raise ValueError( | |
| f"Incorrect path or url, URLs must start with `http://`, `https://` or `s3+[profile]://`, and `{image}` is not a valid path." | |
| ) | |
| elif isinstance(image, PIL.Image.Image): | |
| image = image | |
| elif isinstance(image, bytes): | |
| image = PIL.Image.open(io.BytesIO(image)) | |
| else: | |
| raise ValueError(f"`image` must be a path or PIL Image, got `{type(image)}`") | |
| image = to_rgb(image) | |
| if output_type == "pil": | |
| image = image | |
| elif output_type == "np": | |
| image = to_np(image) | |
| elif output_type == "pt": | |
| image = to_pt(image) | |
| else: | |
| raise ValueError(f"`output_type` must be one of `{ImageTypeStr}`, got `{output_type}`") | |
| return image | |
| def to_pil(image: ImageType, image_mode: DataFormat | None = None) -> PIL.Image.Image: | |
| """ | |
| Convert a NumPy array or a PyTorch tensor to a PIL image. | |
| """ | |
| check_image_type(image) | |
| if isinstance(image, PIL.Image.Image): | |
| return image | |
| elif isinstance(image, np.ndarray): | |
| image = normalize_np(image, image_mode) | |
| elif isinstance(image, torch.Tensor): | |
| image = normalize_pt(image, image_mode) | |
| image = image.cpu().permute(1, 2, 0).numpy() | |
| assert image.dtype == np.uint8, f"Supposed to convert `torch.uint8` to `np.uint8`, but got `{image.dtype}`" | |
| mode_map = {1: "L", 3: "RGB"} | |
| mode = mode_map[image.shape[-1]] | |
| if image.shape[-1] == 1: | |
| image = image[:, :, 0] | |
| return PIL.Image.fromarray(image, mode=mode) | |
| def to_np(image: ImageType, image_mode: DataFormat | None = None) -> np.ndarray: | |
| """ | |
| Convert a PIL image or a PyTorch tensor to a NumPy array. | |
| """ | |
| check_image_type(image) | |
| if isinstance(image, PIL.Image.Image): | |
| image = np.array(image, np.uint8, copy=True) | |
| if isinstance(image, np.ndarray): | |
| image = normalize_np(image, image_mode) | |
| elif isinstance(image, torch.Tensor): | |
| image = normalize_pt(image, image_mode) | |
| image = image.cpu().permute(1, 2, 0).numpy() | |
| assert image.dtype == np.uint8, f"Supposed to convert `torch.uint8` to `np.uint8`, but got `{image.dtype}`" | |
| return image | |
| def to_pt(image: ImageType, image_mode: DataFormat | None = None) -> torch.Tensor: | |
| """ | |
| Convert a PIL image or a NumPy array to a PyTorch tensor. | |
| """ | |
| check_image_type(image) | |
| if isinstance(image, torch.Tensor): | |
| image = normalize_pt(image, image_mode) | |
| return image | |
| # convert PIL Image to NumPy array | |
| if isinstance(image, PIL.Image.Image): | |
| image = np.array(image, np.uint8, copy=True) | |
| image = normalize_np(image, image_mode) | |
| image = torch.from_numpy(image.transpose((2, 0, 1))).contiguous() | |
| assert image.dtype == torch.uint8, f"Supposed to convert `np.uint8` to `torch.uint8`, but got `{image.dtype}`" | |
| return image | |
| def normalize_np(image: np.ndarray, image_mode: DataFormat | None = None) -> np.ndarray: | |
| """ | |
| Normalize a NumPy array to the standard format of shape (h, w, c) and uint8. | |
| """ | |
| if image.ndim not in {2, 3}: | |
| raise ValueError(f"`image` should be 2 or 3 dimensions. Got {image.ndim} dimensions.") | |
| elif image.ndim == 2: | |
| # if 2D image, add channel dimension (HWC) | |
| image = np.expand_dims(image, 2) | |
| if image.shape[-1] not in {1, 3}: | |
| raise ValueError(f"`image` should have 1 (`L`) or 3 (`RGB`) channels. Got {image.shape[-1]} channels.") | |
| image = to_dataformat(image, image_mode=image_mode, mode="255") | |
| return image | |
| def normalize_pt(image: torch.Tensor, image_mode: DataFormat | None = None) -> torch.Tensor: | |
| """ | |
| Normalize a PyTorch tensor to the standard format of shape (c, h, w) and uint8. | |
| """ | |
| if image.ndimension() not in {2, 3}: | |
| raise ValueError(f"`image` should be 2 or 3 dimensions. Got {image.ndimension()} dimensions.") | |
| elif image.ndimension() == 2: | |
| # if 2D image, add channel dimension (CHW) | |
| image = image.unsqueeze(0) | |
| # check number of channels | |
| if image.shape[-3] not in {1, 3}: | |
| raise ValueError(f"`image` should have 1 (`L`) or 3 (`RGB`) channels. Got {image.shape[-3]} channels.") | |
| image = to_dataformat(image, image_mode=image_mode, mode="255") | |
| return image | |
| def to_dataformat( | |
| image: ImageType, | |
| *, | |
| image_mode: DataFormat | None = None, | |
| mode: DataFormat = "255", | |
| ) -> np.ndarray | torch.Tensor: | |
| check_image_type(image) | |
| # convert PIL Image to NumPy array | |
| if isinstance(image, PIL.Image.Image): | |
| image = np.array(image, np.uint8, copy=True) | |
| image_mode = "255" | |
| # guess image mode | |
| if image.dtype == np.uint8 or image.dtype == torch.uint8: | |
| guess_image_mode = "255" | |
| elif image.dtype == np.float32 or image.dtype == np.float16 or image.dtype == torch.float32 or image.dtype == torch.float16: | |
| if image.min() < 0.0: | |
| guess_image_mode = "11" | |
| else: | |
| guess_image_mode = "01" | |
| else: | |
| raise ValueError(f"Unsupported dtype `{image.dtype}`") | |
| if image_mode is None: | |
| image_mode = guess_image_mode | |
| else: | |
| if guess_image_mode != image_mode: | |
| print(f"Guess image mode is `{guess_image_mode}`, but image mode is `{image_mode}`") | |
| if isinstance(image, np.ndarray): | |
| if image_mode == "255" and mode != "255": | |
| np.clip((image.astype(np.float32) / 255), 0, 1, out=image) | |
| if mode == "11": | |
| np.clip((image * 2 - 1), -1, 1, out=image) | |
| elif image_mode == "01" and mode != "01": | |
| if mode == "255": | |
| np.clip(image, 0, 1, out=image) | |
| image = (image * 255).round().astype(np.uint8) | |
| elif mode == "11": | |
| np.clip((image * 2 - 1), -1, 1, out=image) | |
| elif image_mode == "11" and mode != "11": | |
| np.clip((image / 2 + 0.5), 0, 1, out=image) | |
| if mode == "255": | |
| image = (image * 255).round().astype(np.uint8) | |
| elif isinstance(image, torch.Tensor): | |
| if image_mode == "255" and mode != "255": | |
| image = image.to(dtype=torch.float32).div(255).clamp(0, 1) | |
| if mode == "11": | |
| image = (image * 2 - 1).clamp(-1, 1) | |
| elif image_mode == "01" and mode != "01": | |
| if mode == "255": | |
| image = image.clamp(0, 1) | |
| image = (image * 255).round().to(dtype=torch.uint8) | |
| elif mode == "11": | |
| image = (image * 2 - 1).clamp(-1, 1) | |
| elif image_mode == "11" and mode != "11": | |
| image = (image / 2 + 0.5).clamp(0, 1) | |
| if mode == "255": | |
| image = image.mul(255).round().to(dtype=torch.uint8) | |
| return image | |
| def resize_image(pil_image, image_size): | |
| while min(*pil_image.size) >= 2 * image_size: | |
| pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size), resample=PIL.Image.BOX) | |
| scale = image_size / min(*pil_image.size) | |
| pil_image = pil_image.resize(tuple(round(x * scale) for x in pil_image.size), resample=PIL.Image.BICUBIC) | |
| return pil_image | |
| def center_crop_arr(pil_image, image_size, crop=True): | |
| """ | |
| Center cropping implementation from ADM. | |
| https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126 | |
| """ | |
| if crop: | |
| pil_image = resize_image(pil_image, image_size) | |
| arr = np.array(pil_image) | |
| crop_y = (arr.shape[0] - image_size) // 2 | |
| crop_x = (arr.shape[1] - image_size) // 2 | |
| return PIL.Image.fromarray(arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size]) | |
| else: | |
| # 将图像填充为正方形 | |
| width, height = pil_image.size | |
| if width != height: | |
| # 创建一个正方形画布,尺寸为较大的边长 | |
| max_dim = max(width, height) | |
| padded_img = PIL.Image.new(pil_image.mode, (max_dim, max_dim), (0, 0, 0)) | |
| # 将原图居中粘贴到正方形画布上 | |
| padded_img.paste(pil_image, ((max_dim - width) // 2, (max_dim - height) // 2)) | |
| pil_image = padded_img | |
| pil_image = resize_image(pil_image, image_size) | |
| return pil_image | |