`torchsight.transforms.detection` module

Useful transforms for the images for any dataset for object detection.

The recomendation is to compose the transforms in the order that are written: Resize(), ToTensor(), Normalize().

Source code

"""Useful transforms for the images for any dataset for object detection.

The recomendation is to compose the transforms in the order that are written:
Resize(), ToTensor(), Normalize().
"""
import numpy as np
import skimage
import torch
from PIL.Image import Image
from torchvision.transforms.functional import normalize, to_tensor


class Resize():
    """Resize an image to fit between the min_side and max_side.

    It tries to match the smallest side of the image to the min_side attribute of this transform
    and if the biggest side of the image after the transformation will be over the max_size attribute
    it instead resize the image to match the biggest side to the max_size attribute.

    Also, it tries to keep a multiple of the stride attribute on each of the sides to match design
    better the feature map.
    """

    def __init__(self, min_side=384, max_side=512, stride=128):
        self.min_side = min_side
        self.max_side = max_side
        self.stride = stride

    def resize_image(self, image):
        """Resize the given image.

        Arguments:
            image (PIL Image or np.array): the image to resize.

        Returns:
            np.array: The resized image.
        """
        if isinstance(image, Image):
            image = np.array(image)

        if len(image.shape) == 2:
            image = skimage.color.gray2rgb(image)

        height, width, channels = image.shape

        smallest_side = height if height < width else width
        biggest_side = height if height > width else width

        scale = self.min_side / smallest_side
        scale = self.max_side / biggest_side if scale * biggest_side > self.max_side else scale

        new_width = round(width * scale)
        new_height = round(height * scale)

        padding_width = self.stride - (new_width % self.stride)
        padding_width = 0 if padding_width == self.stride else padding_width
        padding_height = self.stride - (new_height % self.stride)
        padding_height = 0 if padding_height == self.stride else padding_height

        image = skimage.transform.resize(image, (new_height, new_width), mode='constant', anti_aliasing=True)
        height, width, channels = image.shape

        final = np.zeros((new_height + padding_height, new_width + padding_width, channels))
        final[:height, :width, :] = image

        return final, scale

    def __call__(self, data):
        """Resize the image and scale the bounding boxes.

        Args:
            data (PIL Image or tuple): The image to resize or a tuple with a PIL image and
                the bounding boxes as numpy arrays.
        """
        if isinstance(data, Image):
            image, _ = self.resize_image(data)
            return image

        image, bounding_boxes, *rest = data
        image, scale = self.resize_image(image)

        if bounding_boxes.shape[0] > 0:
            bounding_boxes[:, :4] *= scale

        if rest and isinstance(rest[0], dict):
            info = rest[0]
            info['resize_scale'] = scale
            return image, bounding_boxes, info

        return image, bounding_boxes


class ToTensor():
    """Transform a tuple with a PIL image or ndarray and bounding boxes to tensors.

    See: https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py#L38
    """

    def __call__(self, data):
        """Transforms the image and bounding boxes to tensors.

        Arguments:
            data (tuple): A tuple with a PIL image and the bounding boxes as numpy arrays.

        Returns:
            torch.Tensor: The image.
            torch.Tensor: The annotations.
        """
        image, boxes, *rest = data

        image = to_tensor(image)

        if not torch.is_tensor(boxes):
            boxes = torch.from_numpy(boxes)

        return (image, boxes, *rest)


class Normalize():
    """Normalize an image by a mean and standard deviation.

    See: https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py#L157

    It works with a tuple and it assumes that the first element is the image as a tensor.
    """

    def __init__(self, mean=None, std=None):
        """Initialize the normalizer with the given mean and std.

        Arguments:
            mean (sequence): Sequence of floats that contains the mean to which normalize each channel.
            std (sequence): The standard deviation for each of the channels.
        """
        self.mean = mean if mean is not None else [0.485, 0.456, 0.406]
        self.std = std if std is not None else [0.229, 0.224, 0.225]

    def __call__(self, data):
        """Normalize the first element of the tuple assuming that is an image.

        Arguments:
            data (tuple): A tuple where it first element is an image as a tensor.

        Returns:
            torch.Tensor: The image normalized.
        """
        image, *rest = data
        image = image.type(torch.float)
        return (normalize(image, self.mean, self.std), *rest)

Classes

class Normalize

Normalize an image by a mean and standard deviation.

See : <https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py#L157>

It works with a tuple and it assumes that the first element is the image as a tensor.

Source code

class Normalize():
    """Normalize an image by a mean and standard deviation.

    See: https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py#L157

    It works with a tuple and it assumes that the first element is the image as a tensor.
    """

    def __init__(self, mean=None, std=None):
        """Initialize the normalizer with the given mean and std.

        Arguments:
            mean (sequence): Sequence of floats that contains the mean to which normalize each channel.
            std (sequence): The standard deviation for each of the channels.
        """
        self.mean = mean if mean is not None else [0.485, 0.456, 0.406]
        self.std = std if std is not None else [0.229, 0.224, 0.225]

    def __call__(self, data):
        """Normalize the first element of the tuple assuming that is an image.

        Arguments:
            data (tuple): A tuple where it first element is an image as a tensor.

        Returns:
            torch.Tensor: The image normalized.
        """
        image, *rest = data
        image = image.type(torch.float)
        return (normalize(image, self.mean, self.std), *rest)

Methods

def __init__(self, mean=None, std=None)

Initialize the normalizer with the given mean and std.

Arguments

mean : sequence: Sequence of floats that contains the mean to which normalize each channel.
std : sequence: The standard deviation for each of the channels.

Source code

def __init__(self, mean=None, std=None):
    """Initialize the normalizer with the given mean and std.

    Arguments:
        mean (sequence): Sequence of floats that contains the mean to which normalize each channel.
        std (sequence): The standard deviation for each of the channels.
    """
    self.mean = mean if mean is not None else [0.485, 0.456, 0.406]
    self.std = std if std is not None else [0.229, 0.224, 0.225]

class Resize

Resize an image to fit between the min_side and max_side.

It tries to match the smallest side of the image to the min_side attribute of this transform and if the biggest side of the image after the transformation will be over the max_size attribute it instead resize the image to match the biggest side to the max_size attribute.

Also, it tries to keep a multiple of the stride attribute on each of the sides to match design better the feature map.

Source code

class Resize():
    """Resize an image to fit between the min_side and max_side.

    It tries to match the smallest side of the image to the min_side attribute of this transform
    and if the biggest side of the image after the transformation will be over the max_size attribute
    it instead resize the image to match the biggest side to the max_size attribute.

    Also, it tries to keep a multiple of the stride attribute on each of the sides to match design
    better the feature map.
    """

    def __init__(self, min_side=384, max_side=512, stride=128):
        self.min_side = min_side
        self.max_side = max_side
        self.stride = stride

    def resize_image(self, image):
        """Resize the given image.

        Arguments:
            image (PIL Image or np.array): the image to resize.

        Returns:
            np.array: The resized image.
        """
        if isinstance(image, Image):
            image = np.array(image)

        if len(image.shape) == 2:
            image = skimage.color.gray2rgb(image)

        height, width, channels = image.shape

        smallest_side = height if height < width else width
        biggest_side = height if height > width else width

        scale = self.min_side / smallest_side
        scale = self.max_side / biggest_side if scale * biggest_side > self.max_side else scale

        new_width = round(width * scale)
        new_height = round(height * scale)

        padding_width = self.stride - (new_width % self.stride)
        padding_width = 0 if padding_width == self.stride else padding_width
        padding_height = self.stride - (new_height % self.stride)
        padding_height = 0 if padding_height == self.stride else padding_height

        image = skimage.transform.resize(image, (new_height, new_width), mode='constant', anti_aliasing=True)
        height, width, channels = image.shape

        final = np.zeros((new_height + padding_height, new_width + padding_width, channels))
        final[:height, :width, :] = image

        return final, scale

    def __call__(self, data):
        """Resize the image and scale the bounding boxes.

        Args:
            data (PIL Image or tuple): The image to resize or a tuple with a PIL image and
                the bounding boxes as numpy arrays.
        """
        if isinstance(data, Image):
            image, _ = self.resize_image(data)
            return image

        image, bounding_boxes, *rest = data
        image, scale = self.resize_image(image)

        if bounding_boxes.shape[0] > 0:
            bounding_boxes[:, :4] *= scale

        if rest and isinstance(rest[0], dict):
            info = rest[0]
            info['resize_scale'] = scale
            return image, bounding_boxes, info

        return image, bounding_boxes

Methods

def __init__(self, min_side=384, max_side=512, stride=128)

Initialize self. See help(type(self)) for accurate signature.

Source code

def __init__(self, min_side=384, max_side=512, stride=128):
    self.min_side = min_side
    self.max_side = max_side
    self.stride = stride

def resize_image(self, image)

Resize the given image.

Arguments

image : PIL Image or np.array: the image to resize.

Returns

np.array: The resized image.

Source code

def resize_image(self, image):
    """Resize the given image.

    Arguments:
        image (PIL Image or np.array): the image to resize.

    Returns:
        np.array: The resized image.
    """
    if isinstance(image, Image):
        image = np.array(image)

    if len(image.shape) == 2:
        image = skimage.color.gray2rgb(image)

    height, width, channels = image.shape

    smallest_side = height if height < width else width
    biggest_side = height if height > width else width

    scale = self.min_side / smallest_side
    scale = self.max_side / biggest_side if scale * biggest_side > self.max_side else scale

    new_width = round(width * scale)
    new_height = round(height * scale)

    padding_width = self.stride - (new_width % self.stride)
    padding_width = 0 if padding_width == self.stride else padding_width
    padding_height = self.stride - (new_height % self.stride)
    padding_height = 0 if padding_height == self.stride else padding_height

    image = skimage.transform.resize(image, (new_height, new_width), mode='constant', anti_aliasing=True)
    height, width, channels = image.shape

    final = np.zeros((new_height + padding_height, new_width + padding_width, channels))
    final[:height, :width, :] = image

    return final, scale

class ToTensor

Transform a tuple with a PIL image or ndarray and bounding boxes to tensors.

See : <https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py#L38>

Source code

class ToTensor():
    """Transform a tuple with a PIL image or ndarray and bounding boxes to tensors.

    See: https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py#L38
    """

    def __call__(self, data):
        """Transforms the image and bounding boxes to tensors.

        Arguments:
            data (tuple): A tuple with a PIL image and the bounding boxes as numpy arrays.

        Returns:
            torch.Tensor: The image.
            torch.Tensor: The annotations.
        """
        image, boxes, *rest = data

        image = to_tensor(image)

        if not torch.is_tensor(boxes):
            boxes = torch.from_numpy(boxes)

        return (image, boxes, *rest)