NextStep-1-Large / utils /image_utils.py

Upload folder using huggingface_hub

22243b2 verified 7 months ago

11.2 kB

	import io
	import os
	from typing import Literal, TypeAlias

	import numpy as np
	import PIL.Image
	import PIL.ImageOps
	import requests
	import torch

	"""
	- pil: `PIL.Image.Image`, size (w, h), seamless conversion between `uint8`
	- np: `np.ndarray`, shape (h, w, c), default `np.uint8`
	- pt: `torch.Tensor`, shape (c, h, w), default `torch.uint8`
	"""
	ImageType: TypeAlias = PIL.Image.Image \| np.ndarray \| torch.Tensor
	ImageTypeStr: TypeAlias = Literal["pil", "np", "pt"]
	ImageFormat: TypeAlias = Literal["JPEG", "PNG"]
	DataFormat: TypeAlias = Literal["255", "01", "11"]


	IMG_SUPPORT_MODE = ["L", "LA", "RGB", "RGBA", "CMYK", "P", "1"]
	IMAGE_EXT_LOWER = ["png", "jpeg", "jpg", "webp"]
	IMAGE_EXT = IMAGE_EXT_LOWER + [_ext.upper() for _ext in IMAGE_EXT_LOWER]


	def check_image_type(image: ImageType):
	if not (isinstance(image, PIL.Image.Image) or isinstance(image, np.ndarray) or isinstance(image, torch.Tensor)):
	raise TypeError(f"`image` should be PIL Image, ndarray or Tensor. Got `{type(image)}`.")


	def to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
	# Automatically adjust the orientation of the image to match the direction it was taken.
	image = PIL.ImageOps.exif_transpose(image)

	if image.mode not in IMG_SUPPORT_MODE:
	raise ValueError(f"Only support mode in `{IMG_SUPPORT_MODE}`, got `{image.mode}`")

	if image.mode == "LA":
	image = image.convert("RGBA")

	# add white background for RGBA images, and convert to RGB
	if image.mode == "RGBA":
	background = PIL.Image.new("RGBA", image.size, "white")
	image = PIL.Image.alpha_composite(background, image).convert("RGB")

	# then convert to RGB
	image = image.convert("RGB")

	return image


	def load_image(
	image: str \| os.PathLike \| PIL.Image.Image \| bytes,
	*,
	output_type: ImageTypeStr = "pil",
	) -> ImageType:
	"""
	Loads `image` to a PIL Image, NumPy array or PyTorch tensor.

	Args:
	image (str \| PIL.Image.Image): The path to image or PIL Image.
	mode (ImageMode, optional): The mode to convert to. Defaults to None (no conversion).
	The current version supports all possible conversions between "L", "RGB", "RGBA".
	output_type (ImageTypeStr, optional): The type of the output image. Defaults to "pil".
	The current version supports "pil", "np", "pt".

	Returns:
	ImageType: The loaded image in the given type.
	"""
	timeout = 10
	# Load the `image` into a PIL Image.
	if isinstance(image, str) or isinstance(image, os.PathLike):
	if image.startswith("http://") or image.startswith("https://"):
	try:
	image = PIL.Image.open(requests.get(image, stream=True, timeout=timeout).raw)
	except requests.exceptions.Timeout:
	raise ValueError(f"HTTP request timed out after {timeout} seconds")
	elif os.path.isfile(image):
	image = PIL.Image.open(image)
	else:
	raise ValueError(
	f"Incorrect path or url, URLs must start with `http://`, `https://` or `s3+[profile]://`, and `{image}` is not a valid path."
	)
	elif isinstance(image, PIL.Image.Image):
	image = image
	elif isinstance(image, bytes):
	image = PIL.Image.open(io.BytesIO(image))
	else:
	raise ValueError(f"`image` must be a path or PIL Image, got `{type(image)}`")

	image = to_rgb(image)

	if output_type == "pil":
	image = image
	elif output_type == "np":
	image = to_np(image)
	elif output_type == "pt":
	image = to_pt(image)
	else:
	raise ValueError(f"`output_type` must be one of `{ImageTypeStr}`, got `{output_type}`")

	return image


	def to_pil(image: ImageType, image_mode: DataFormat \| None = None) -> PIL.Image.Image:
	"""
	Convert a NumPy array or a PyTorch tensor to a PIL image.
	"""
	check_image_type(image)

	if isinstance(image, PIL.Image.Image):
	return image

	elif isinstance(image, np.ndarray):
	image = normalize_np(image, image_mode)

	elif isinstance(image, torch.Tensor):
	image = normalize_pt(image, image_mode)

	image = image.cpu().permute(1, 2, 0).numpy()
	assert image.dtype == np.uint8, f"Supposed to convert `torch.uint8` to `np.uint8`, but got `{image.dtype}`"

	mode_map = {1: "L", 3: "RGB"}
	mode = mode_map[image.shape[-1]]

	if image.shape[-1] == 1:
	image = image[:, :, 0]

	return PIL.Image.fromarray(image, mode=mode)


	def to_np(image: ImageType, image_mode: DataFormat \| None = None) -> np.ndarray:
	"""
	Convert a PIL image or a PyTorch tensor to a NumPy array.
	"""
	check_image_type(image)

	if isinstance(image, PIL.Image.Image):
	image = np.array(image, np.uint8, copy=True)

	if isinstance(image, np.ndarray):
	image = normalize_np(image, image_mode)

	elif isinstance(image, torch.Tensor):
	image = normalize_pt(image, image_mode)

	image = image.cpu().permute(1, 2, 0).numpy()
	assert image.dtype == np.uint8, f"Supposed to convert `torch.uint8` to `np.uint8`, but got `{image.dtype}`"

	return image


	def to_pt(image: ImageType, image_mode: DataFormat \| None = None) -> torch.Tensor:
	"""
	Convert a PIL image or a NumPy array to a PyTorch tensor.
	"""
	check_image_type(image)

	if isinstance(image, torch.Tensor):
	image = normalize_pt(image, image_mode)
	return image

	# convert PIL Image to NumPy array
	if isinstance(image, PIL.Image.Image):
	image = np.array(image, np.uint8, copy=True)

	image = normalize_np(image, image_mode)

	image = torch.from_numpy(image.transpose((2, 0, 1))).contiguous()
	assert image.dtype == torch.uint8, f"Supposed to convert `np.uint8` to `torch.uint8`, but got `{image.dtype}`"
	return image


	def normalize_np(image: np.ndarray, image_mode: DataFormat \| None = None) -> np.ndarray:
	"""
	Normalize a NumPy array to the standard format of shape (h, w, c) and uint8.
	"""
	if image.ndim not in {2, 3}:
	raise ValueError(f"`image` should be 2 or 3 dimensions. Got {image.ndim} dimensions.")

	elif image.ndim == 2:
	# if 2D image, add channel dimension (HWC)
	image = np.expand_dims(image, 2)

	if image.shape[-1] not in {1, 3}:
	raise ValueError(f"`image` should have 1 (`L`) or 3 (`RGB`) channels. Got {image.shape[-1]} channels.")

	image = to_dataformat(image, image_mode=image_mode, mode="255")

	return image


	def normalize_pt(image: torch.Tensor, image_mode: DataFormat \| None = None) -> torch.Tensor:
	"""
	Normalize a PyTorch tensor to the standard format of shape (c, h, w) and uint8.
	"""
	if image.ndimension() not in {2, 3}:
	raise ValueError(f"`image` should be 2 or 3 dimensions. Got {image.ndimension()} dimensions.")

	elif image.ndimension() == 2:
	# if 2D image, add channel dimension (CHW)
	image = image.unsqueeze(0)

	# check number of channels
	if image.shape[-3] not in {1, 3}:
	raise ValueError(f"`image` should have 1 (`L`) or 3 (`RGB`) channels. Got {image.shape[-3]} channels.")

	image = to_dataformat(image, image_mode=image_mode, mode="255")

	return image


	def to_dataformat(
	image: ImageType,
	*,
	image_mode: DataFormat \| None = None,
	mode: DataFormat = "255",
	) -> np.ndarray \| torch.Tensor:
	check_image_type(image)

	# convert PIL Image to NumPy array
	if isinstance(image, PIL.Image.Image):
	image = np.array(image, np.uint8, copy=True)
	image_mode = "255"

	# guess image mode
	if image.dtype == np.uint8 or image.dtype == torch.uint8:
	guess_image_mode = "255"
	elif image.dtype == np.float32 or image.dtype == np.float16 or image.dtype == torch.float32 or image.dtype == torch.float16:
	if image.min() < 0.0:
	guess_image_mode = "11"
	else:
	guess_image_mode = "01"
	else:
	raise ValueError(f"Unsupported dtype `{image.dtype}`")

	if image_mode is None:
	image_mode = guess_image_mode
	else:
	if guess_image_mode != image_mode:
	print(f"Guess image mode is `{guess_image_mode}`, but image mode is `{image_mode}`")

	if isinstance(image, np.ndarray):
	if image_mode == "255" and mode != "255":
	np.clip((image.astype(np.float32) / 255), 0, 1, out=image)
	if mode == "11":
	np.clip((image * 2 - 1), -1, 1, out=image)

	elif image_mode == "01" and mode != "01":
	if mode == "255":
	np.clip(image, 0, 1, out=image)
	image = (image * 255).round().astype(np.uint8)
	elif mode == "11":
	np.clip((image * 2 - 1), -1, 1, out=image)

	elif image_mode == "11" and mode != "11":
	np.clip((image / 2 + 0.5), 0, 1, out=image)
	if mode == "255":
	image = (image * 255).round().astype(np.uint8)

	elif isinstance(image, torch.Tensor):
	if image_mode == "255" and mode != "255":
	image = image.to(dtype=torch.float32).div(255).clamp(0, 1)
	if mode == "11":
	image = (image * 2 - 1).clamp(-1, 1)

	elif image_mode == "01" and mode != "01":
	if mode == "255":
	image = image.clamp(0, 1)
	image = (image * 255).round().to(dtype=torch.uint8)
	elif mode == "11":
	image = (image * 2 - 1).clamp(-1, 1)

	elif image_mode == "11" and mode != "11":
	image = (image / 2 + 0.5).clamp(0, 1)
	if mode == "255":
	image = image.mul(255).round().to(dtype=torch.uint8)

	return image


	def resize_image(pil_image, image_size):
	while min(pil_image.size) >= 2 image_size:
	pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size), resample=PIL.Image.BOX)

	scale = image_size / min(*pil_image.size)
	pil_image = pil_image.resize(tuple(round(x * scale) for x in pil_image.size), resample=PIL.Image.BICUBIC)
	return pil_image


	def center_crop_arr(pil_image, image_size, crop=True):
	"""
	Center cropping implementation from ADM.
	https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126
	"""
	if crop:
	pil_image = resize_image(pil_image, image_size)
	arr = np.array(pil_image)
	crop_y = (arr.shape[0] - image_size) // 2
	crop_x = (arr.shape[1] - image_size) // 2
	return PIL.Image.fromarray(arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size])
	else:
	# 将图像填充为正方形
	width, height = pil_image.size
	if width != height:
	# 创建一个正方形画布，尺寸为较大的边长
	max_dim = max(width, height)
	padded_img = PIL.Image.new(pil_image.mode, (max_dim, max_dim), (0, 0, 0))
	# 将原图居中粘贴到正方形画布上
	padded_img.paste(pil_image, ((max_dim - width) // 2, (max_dim - height) // 2))
	pil_image = padded_img
	pil_image = resize_image(pil_image, image_size)
	return pil_image