| from __future__ import division |
|
|
| import math |
| import random |
|
|
| import torch |
| from PIL import Image, ImageEnhance, ImageOps |
|
|
| try: |
| import accimage |
| except ImportError: |
| accimage = None |
| import collections |
| import numbers |
| import types |
| import warnings |
|
|
| import numpy as np |
|
|
|
|
| def _is_pil_image(img): |
| if accimage is not None: |
| return isinstance(img, (Image.Image, accimage.Image)) |
| else: |
| return isinstance(img, Image.Image) |
|
|
|
|
| def _is_tensor_image(img): |
| return torch.is_tensor(img) and img.ndimension() == 3 |
|
|
|
|
| def _is_numpy_image(img): |
| return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) |
|
|
|
|
| def to_tensor(pic): |
| """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. |
| |
| See ``ToTensor`` for more details. |
| |
| Args: |
| pic (PIL Image or numpy.ndarray): Image to be converted to tensor. |
| |
| Returns: |
| Tensor: Converted image. |
| """ |
| if not (_is_pil_image(pic) or _is_numpy_image(pic)): |
| raise TypeError("pic should be PIL Image or ndarray. Got {}".format(type(pic))) |
|
|
| if isinstance(pic, np.ndarray): |
| |
| img = torch.from_numpy(pic.transpose((2, 0, 1))) |
| |
| return img.float().div(255) |
|
|
| if accimage is not None and isinstance(pic, accimage.Image): |
| nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) |
| pic.copyto(nppic) |
| return torch.from_numpy(nppic) |
|
|
| |
| if pic.mode == "I": |
| img = torch.from_numpy(np.array(pic, np.int32, copy=False)) |
| elif pic.mode == "I;16": |
| img = torch.from_numpy(np.array(pic, np.int16, copy=False)) |
| else: |
| img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) |
| |
| if pic.mode == "YCbCr": |
| nchannel = 3 |
| elif pic.mode == "I;16": |
| nchannel = 1 |
| else: |
| nchannel = len(pic.mode) |
| img = img.view(pic.size[1], pic.size[0], nchannel) |
| |
| |
| img = img.transpose(0, 1).transpose(0, 2).contiguous() |
| if isinstance(img, torch.ByteTensor): |
| return img.float().div(255) |
| else: |
| return img |
|
|
|
|
| def to_mytensor(pic): |
| """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. |
| |
| See ``ToTensor`` for more details. |
| |
| Args: |
| pic (PIL Image or numpy.ndarray): Image to be converted to tensor. |
| |
| Returns: |
| Tensor: Converted image. |
| """ |
| pic_arr = np.array(pic) |
| if pic_arr.ndim == 2: |
| pic_arr = pic_arr[..., np.newaxis] |
| img = torch.from_numpy(pic_arr.transpose((2, 0, 1))) |
| if not isinstance(img, torch.FloatTensor): |
| return img.float() |
| else: |
| return img |
|
|
|
|
| def to_pil_image(pic, mode=None): |
| """Convert a tensor or an ndarray to PIL Image. |
| |
| See :class:`~torchvision.transforms.ToPIlImage` for more details. |
| |
| Args: |
| pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. |
| mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). |
| |
| .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes |
| |
| Returns: |
| PIL Image: Image converted to PIL Image. |
| """ |
| if not (_is_numpy_image(pic) or _is_tensor_image(pic)): |
| raise TypeError("pic should be Tensor or ndarray. Got {}.".format(type(pic))) |
|
|
| npimg = pic |
| if isinstance(pic, torch.FloatTensor): |
| pic = pic.mul(255).byte() |
| if torch.is_tensor(pic): |
| npimg = np.transpose(pic.numpy(), (1, 2, 0)) |
|
|
| if not isinstance(npimg, np.ndarray): |
| raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray, " + "not {}".format(type(npimg))) |
|
|
| if npimg.shape[2] == 1: |
| expected_mode = None |
| npimg = npimg[:, :, 0] |
| if npimg.dtype == np.uint8: |
| expected_mode = "L" |
| if npimg.dtype == np.int16: |
| expected_mode = "I;16" |
| if npimg.dtype == np.int32: |
| expected_mode = "I" |
| elif npimg.dtype == np.float32: |
| expected_mode = "F" |
| if mode is not None and mode != expected_mode: |
| raise ValueError( |
| "Incorrect mode ({}) supplied for input type {}. Should be {}".format(mode, np.dtype, expected_mode) |
| ) |
| mode = expected_mode |
|
|
| elif npimg.shape[2] == 4: |
| permitted_4_channel_modes = ["RGBA", "CMYK"] |
| if mode is not None and mode not in permitted_4_channel_modes: |
| raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) |
|
|
| if mode is None and npimg.dtype == np.uint8: |
| mode = "RGBA" |
| else: |
| permitted_3_channel_modes = ["RGB", "YCbCr", "HSV"] |
| if mode is not None and mode not in permitted_3_channel_modes: |
| raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) |
| if mode is None and npimg.dtype == np.uint8: |
| mode = "RGB" |
|
|
| if mode is None: |
| raise TypeError("Input type {} is not supported".format(npimg.dtype)) |
|
|
| return Image.fromarray(npimg, mode=mode) |
|
|
|
|
| def normalize(tensor, mean, std): |
| """Normalize a tensor image with mean and standard deviation. |
| |
| See ``Normalize`` for more details. |
| |
| Args: |
| tensor (Tensor): Tensor image of size (C, H, W) to be normalized. |
| mean (sequence): Sequence of means for each channel. |
| std (sequence): Sequence of standard deviations for each channely. |
| |
| Returns: |
| Tensor: Normalized Tensor image. |
| """ |
| if not _is_tensor_image(tensor): |
| raise TypeError("tensor is not a torch image.") |
| |
| if tensor.size(0) == 1: |
| tensor.sub_(mean).div_(std) |
| else: |
| for t, m, s in zip(tensor, mean, std): |
| t.sub_(m).div_(s) |
| return tensor |
|
|
|
|
| def resize(img, size, interpolation=Image.BILINEAR): |
| """Resize the input PIL Image to the given size. |
| |
| Args: |
| img (PIL Image): Image to be resized. |
| size (sequence or int): Desired output size. If size is a sequence like |
| (h, w), the output size will be matched to this. If size is an int, |
| the smaller edge of the image will be matched to this number maintaing |
| the aspect ratio. i.e, if height > width, then image will be rescaled to |
| (size * height / width, size) |
| interpolation (int, optional): Desired interpolation. Default is |
| ``PIL.Image.BILINEAR`` |
| |
| Returns: |
| PIL Image: Resized image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
| if not isinstance(size, int) and (not isinstance(size, collections.Iterable) or len(size) != 2): |
| raise TypeError("Got inappropriate size arg: {}".format(size)) |
|
|
| if not isinstance(size, int): |
| return img.resize(size[::-1], interpolation) |
|
|
| w, h = img.size |
| if (w <= h and w == size) or (h <= w and h == size): |
| return img |
| if w < h: |
| ow = size |
| oh = int(round(size * h / w)) |
| else: |
| oh = size |
| ow = int(round(size * w / h)) |
| return img.resize((ow, oh), interpolation) |
|
|
|
|
| def scale(*args, **kwargs): |
| warnings.warn("The use of the transforms.Scale transform is deprecated, " + "please use transforms.Resize instead.") |
| return resize(*args, **kwargs) |
|
|
|
|
| def pad(img, padding, fill=0): |
| """Pad the given PIL Image on all sides with the given "pad" value. |
| |
| Args: |
| img (PIL Image): Image to be padded. |
| padding (int or tuple): Padding on each border. If a single int is provided this |
| is used to pad all borders. If tuple of length 2 is provided this is the padding |
| on left/right and top/bottom respectively. If a tuple of length 4 is provided |
| this is the padding for the left, top, right and bottom borders |
| respectively. |
| fill: Pixel fill value. Default is 0. If a tuple of |
| length 3, it is used to fill R, G, B channels respectively. |
| |
| Returns: |
| PIL Image: Padded image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| if not isinstance(padding, (numbers.Number, tuple)): |
| raise TypeError("Got inappropriate padding arg") |
| if not isinstance(fill, (numbers.Number, str, tuple)): |
| raise TypeError("Got inappropriate fill arg") |
|
|
| if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]: |
| raise ValueError( |
| "Padding must be an int or a 2, or 4 element tuple, not a " + "{} element tuple".format(len(padding)) |
| ) |
|
|
| return ImageOps.expand(img, border=padding, fill=fill) |
|
|
|
|
| def crop(img, i, j, h, w): |
| """Crop the given PIL Image. |
| |
| Args: |
| img (PIL Image): Image to be cropped. |
| i: Upper pixel coordinate. |
| j: Left pixel coordinate. |
| h: Height of the cropped image. |
| w: Width of the cropped image. |
| |
| Returns: |
| PIL Image: Cropped image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| return img.crop((j, i, j + w, i + h)) |
|
|
|
|
| def center_crop(img, output_size): |
| if isinstance(output_size, numbers.Number): |
| output_size = (int(output_size), int(output_size)) |
| w, h = img.size |
| th, tw = output_size |
| i = int(round((h - th) / 2.0)) |
| j = int(round((w - tw) / 2.0)) |
| return crop(img, i, j, th, tw) |
|
|
|
|
| def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR): |
| """Crop the given PIL Image and resize it to desired size. |
| |
| Notably used in RandomResizedCrop. |
| |
| Args: |
| img (PIL Image): Image to be cropped. |
| i: Upper pixel coordinate. |
| j: Left pixel coordinate. |
| h: Height of the cropped image. |
| w: Width of the cropped image. |
| size (sequence or int): Desired output size. Same semantics as ``scale``. |
| interpolation (int, optional): Desired interpolation. Default is |
| ``PIL.Image.BILINEAR``. |
| Returns: |
| PIL Image: Cropped image. |
| """ |
| assert _is_pil_image(img), "img should be PIL Image" |
| img = crop(img, i, j, h, w) |
| img = resize(img, size, interpolation) |
| return img |
|
|
|
|
| def hflip(img): |
| """Horizontally flip the given PIL Image. |
| |
| Args: |
| img (PIL Image): Image to be flipped. |
| |
| Returns: |
| PIL Image: Horizontall flipped image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| return img.transpose(Image.FLIP_LEFT_RIGHT) |
|
|
|
|
| def vflip(img): |
| """Vertically flip the given PIL Image. |
| |
| Args: |
| img (PIL Image): Image to be flipped. |
| |
| Returns: |
| PIL Image: Vertically flipped image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| return img.transpose(Image.FLIP_TOP_BOTTOM) |
|
|
|
|
| def five_crop(img, size): |
| """Crop the given PIL Image into four corners and the central crop. |
| |
| .. Note:: |
| This transform returns a tuple of images and there may be a |
| mismatch in the number of inputs and targets your ``Dataset`` returns. |
| |
| Args: |
| size (sequence or int): Desired output size of the crop. If size is an |
| int instead of sequence like (h, w), a square crop (size, size) is |
| made. |
| Returns: |
| tuple: tuple (tl, tr, bl, br, center) corresponding top left, |
| top right, bottom left, bottom right and center crop. |
| """ |
| if isinstance(size, numbers.Number): |
| size = (int(size), int(size)) |
| else: |
| assert len(size) == 2, "Please provide only two dimensions (h, w) for size." |
|
|
| w, h = img.size |
| crop_h, crop_w = size |
| if crop_w > w or crop_h > h: |
| raise ValueError("Requested crop size {} is bigger than input size {}".format(size, (h, w))) |
| tl = img.crop((0, 0, crop_w, crop_h)) |
| tr = img.crop((w - crop_w, 0, w, crop_h)) |
| bl = img.crop((0, h - crop_h, crop_w, h)) |
| br = img.crop((w - crop_w, h - crop_h, w, h)) |
| center = center_crop(img, (crop_h, crop_w)) |
| return (tl, tr, bl, br, center) |
|
|
|
|
| def ten_crop(img, size, vertical_flip=False): |
| """Crop the given PIL Image into four corners and the central crop plus the |
| flipped version of these (horizontal flipping is used by default). |
| |
| .. Note:: |
| This transform returns a tuple of images and there may be a |
| mismatch in the number of inputs and targets your ``Dataset`` returns. |
| |
| Args: |
| size (sequence or int): Desired output size of the crop. If size is an |
| int instead of sequence like (h, w), a square crop (size, size) is |
| made. |
| vertical_flip (bool): Use vertical flipping instead of horizontal |
| |
| Returns: |
| tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, |
| br_flip, center_flip) corresponding top left, top right, |
| bottom left, bottom right and center crop and same for the |
| flipped image. |
| """ |
| if isinstance(size, numbers.Number): |
| size = (int(size), int(size)) |
| else: |
| assert len(size) == 2, "Please provide only two dimensions (h, w) for size." |
|
|
| first_five = five_crop(img, size) |
|
|
| if vertical_flip: |
| img = vflip(img) |
| else: |
| img = hflip(img) |
|
|
| second_five = five_crop(img, size) |
| return first_five + second_five |
|
|
|
|
| def adjust_brightness(img, brightness_factor): |
| """Adjust brightness of an Image. |
| |
| Args: |
| img (PIL Image): PIL Image to be adjusted. |
| brightness_factor (float): How much to adjust the brightness. Can be |
| any non negative number. 0 gives a black image, 1 gives the |
| original image while 2 increases the brightness by a factor of 2. |
| |
| Returns: |
| PIL Image: Brightness adjusted image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| enhancer = ImageEnhance.Brightness(img) |
| img = enhancer.enhance(brightness_factor) |
| return img |
|
|
|
|
| def adjust_contrast(img, contrast_factor): |
| """Adjust contrast of an Image. |
| |
| Args: |
| img (PIL Image): PIL Image to be adjusted. |
| contrast_factor (float): How much to adjust the contrast. Can be any |
| non negative number. 0 gives a solid gray image, 1 gives the |
| original image while 2 increases the contrast by a factor of 2. |
| |
| Returns: |
| PIL Image: Contrast adjusted image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| enhancer = ImageEnhance.Contrast(img) |
| img = enhancer.enhance(contrast_factor) |
| return img |
|
|
|
|
| def adjust_saturation(img, saturation_factor): |
| """Adjust color saturation of an image. |
| |
| Args: |
| img (PIL Image): PIL Image to be adjusted. |
| saturation_factor (float): How much to adjust the saturation. 0 will |
| give a black and white image, 1 will give the original image while |
| 2 will enhance the saturation by a factor of 2. |
| |
| Returns: |
| PIL Image: Saturation adjusted image. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| enhancer = ImageEnhance.Color(img) |
| img = enhancer.enhance(saturation_factor) |
| return img |
|
|
|
|
| def adjust_hue(img, hue_factor): |
| """Adjust hue of an image. |
| |
| The image hue is adjusted by converting the image to HSV and |
| cyclically shifting the intensities in the hue channel (H). |
| The image is then converted back to original image mode. |
| |
| `hue_factor` is the amount of shift in H channel and must be in the |
| interval `[-0.5, 0.5]`. |
| |
| See https://en.wikipedia.org/wiki/Hue for more details on Hue. |
| |
| Args: |
| img (PIL Image): PIL Image to be adjusted. |
| hue_factor (float): How much to shift the hue channel. Should be in |
| [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in |
| HSV space in positive and negative direction respectively. |
| 0 means no shift. Therefore, both -0.5 and 0.5 will give an image |
| with complementary colors while 0 gives the original image. |
| |
| Returns: |
| PIL Image: Hue adjusted image. |
| """ |
| if not (-0.5 <= hue_factor <= 0.5): |
| raise ValueError("hue_factor is not in [-0.5, 0.5].".format(hue_factor)) |
|
|
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| input_mode = img.mode |
| if input_mode in {"L", "1", "I", "F"}: |
| return img |
|
|
| h, s, v = img.convert("HSV").split() |
|
|
| np_h = np.array(h, dtype=np.uint8) |
| |
| with np.errstate(over="ignore"): |
| np_h += np.uint8(hue_factor * 255) |
| h = Image.fromarray(np_h, "L") |
|
|
| img = Image.merge("HSV", (h, s, v)).convert(input_mode) |
| return img |
|
|
|
|
| def adjust_gamma(img, gamma, gain=1): |
| """Perform gamma correction on an image. |
| |
| Also known as Power Law Transform. Intensities in RGB mode are adjusted |
| based on the following equation: |
| |
| I_out = 255 * gain * ((I_in / 255) ** gamma) |
| |
| See https://en.wikipedia.org/wiki/Gamma_correction for more details. |
| |
| Args: |
| img (PIL Image): PIL Image to be adjusted. |
| gamma (float): Non negative real number. gamma larger than 1 make the |
| shadows darker, while gamma smaller than 1 make dark regions |
| lighter. |
| gain (float): The constant multiplier. |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| if gamma < 0: |
| raise ValueError("Gamma should be a non-negative real number") |
|
|
| input_mode = img.mode |
| img = img.convert("RGB") |
|
|
| np_img = np.array(img, dtype=np.float32) |
| np_img = 255 * gain * ((np_img / 255) ** gamma) |
| np_img = np.uint8(np.clip(np_img, 0, 255)) |
|
|
| img = Image.fromarray(np_img, "RGB").convert(input_mode) |
| return img |
|
|
|
|
| def rotate(img, angle, resample=False, expand=False, center=None): |
| """Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows) |
| |
| |
| Args: |
| img (PIL Image): PIL Image to be rotated. |
| angle ({float, int}): In degrees degrees counter clockwise order. |
| resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): |
| An optional resampling filter. |
| See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters |
| If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. |
| expand (bool, optional): Optional expansion flag. |
| If true, expands the output image to make it large enough to hold the entire rotated image. |
| If false or omitted, make the output image the same size as the input image. |
| Note that the expand flag assumes rotation around the center and no translation. |
| center (2-tuple, optional): Optional center of rotation. |
| Origin is the upper left corner. |
| Default is the center of the image. |
| """ |
|
|
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| return img.rotate(angle, resample, expand, center) |
|
|
|
|
| def to_grayscale(img, num_output_channels=1): |
| """Convert image to grayscale version of image. |
| |
| Args: |
| img (PIL Image): Image to be converted to grayscale. |
| |
| Returns: |
| PIL Image: Grayscale version of the image. |
| if num_output_channels == 1 : returned image is single channel |
| if num_output_channels == 3 : returned image is 3 channel with r == g == b |
| """ |
| if not _is_pil_image(img): |
| raise TypeError("img should be PIL Image. Got {}".format(type(img))) |
|
|
| if num_output_channels == 1: |
| img = img.convert("L") |
| elif num_output_channels == 3: |
| img = img.convert("L") |
| np_img = np.array(img, dtype=np.uint8) |
| np_img = np.dstack([np_img, np_img, np_img]) |
| img = Image.fromarray(np_img, "RGB") |
| else: |
| raise ValueError("num_output_channels should be either 1 or 3") |
|
|
| return img |
|
|