torchsight.evaluators.dlde module

Evaluators for the DLDENet models.

Source code
"""Evaluators for the DLDENet models."""
import json
import os

import torch
from torch.utils.data import DataLoader
from torchvision import transforms

from torchsight.datasets import CocoDataset
from torchsight.models import DLDENet, DLDENetWithTrackedMeans
from torchsight.transforms.augmentation import AugmentDetection
from torchsight.transforms.detection import Normalize, Resize, ToTensor
from torchsight.utils import merge_dicts

from .evaluator import Evaluator
from .flickr32 import Flickr32Evaluator


class DLDENetCOCOEvaluator(Evaluator):
    """An evaluator for the DLDENet.

    It will evaluate the model computing the mAP over the coco valid dataset.
    """
    params = {'results': {'dir': './evaluations/dldenet/coco', 'file': 'val2017.json'},
              'dataset': {'root': './datasets/coco',
                          'validation': 'val2017',
                          'class_names': (),
                          # Try to load the classes names from the checkpoint file
                          'class_names_from_checkpoint': True},
              'dataloader': {'batch_size': 8,
                             'shuffle': False,
                             'num_workers': 8},
              'model': {'with_tracked_means': False,
                        'evaluation': {'threshold': 0.5, 'iou_threshold': 0.5},
                        # As the tracked version was created when the trainer didn't save the hyperparameters
                        # we must provide them
                        'tracked': {'classes': 80,
                                    'resnet': 50,
                                    'features': {'pyramid': 256,
                                                 'regression': 256,
                                                 'classification': 256},
                                    'anchors': {'sizes': [32, 64, 128, 256, 512],
                                                'scales': [2 ** 0, 2 ** (1/3), 2 ** (2/3)],
                                                'ratios': [0.5, 1, 2]},
                                    'embedding_size': 256,
                                    'concentration': 15,
                                    'shift': 0.8}},
              # We don't have params from the transforms because we load the params from the checkpoint,
              # but we can edit this params writing them here
              'transforms': {}}

    def __init__(self, *args, **kwargs):
        """Initialize the evaluator.

        Set the initial list with the predictions.
        """
        self.predictions = []

        super().__init__(*args, **kwargs)

    ###############################
    ###         GETTERS         ###
    ###############################

    def get_transform(self):
        """Get the transformations to apply to the dataset.

        Returns:
            torchvision.transforms.Compose: A composition of the transformations to apply.
        """
        params = self.checkpoint['hyperparameters']['transforms']
        params = merge_dicts(params, self.params['transforms'], verbose=True)

        return transforms.Compose([Resize(**params['resize']),
                                   ToTensor(),
                                   Normalize(**params['normalize'])])

    def get_dataset(self):
        """Get the COCO dataset for the evaluation.

        Returns:
            torch.utils.data.Dataset: The dataset to use for the evaluation.
        """
        params = self.params['dataset']
        transform = self.get_transform()
        class_names = params['class_names']

        if params['class_names_from_checkpoint']:
            if 'hyperparameters' in self.checkpoint:
                class_names = self.checkpoint['hyperparameters']['datasets']['class_names']
            else:
                print("Couldn't load the class_names from the checkpoint, it doesn't have the hyperparameters.")

        return CocoDataset(
            root=params['root'],
            dataset=params['validation'],
            classes_names=class_names,
            transform=transform)

    def get_dataloader(self):
        """Get the dataloader to use for the evaluation.

        Returns:
            torch.utils.data.Dataloader: The dataloader to use for the evaluation.
        """
        def collate(data):
            """Custom collate function to join the different images.

            It pads the images so all has the same size.

            Arguments:
                data (sequence): Sequence of tuples as (image, annotations, images' infos, *_).

            Returns:
                torch.Tensor: The images.
                    Shape: (batch size, channels, height, width)
            """
            images = [image for image, *_ in data]
            max_width = max([image.shape[-1] for image in images])
            max_height = max([image.shape[-2] for image in images])

            def pad_image(image):
                aux = torch.zeros((image.shape[0], max_height, max_width))
                aux[:, :image.shape[1], :image.shape[2]] = image
                return aux

            images = torch.stack([pad_image(image) for image, *_ in data], dim=0)
            infos = [info for _, _, info, *_ in data]

            return images, infos

        return DataLoader(dataset=self.dataset, collate_fn=collate, **self.params['dataloader'])

    def get_model(self):
        """Get the model to use to make the predictions.

        We can use the DLDENet with tracked means or the weighted version by changing
        the flag params['model']['with_tracked_means'].

        Returns:
            torch.nn.Module: The model to use to make the predictions over the data.
        """
        if self.params['model']['with_tracked_means']:
            params = {**self.params['model']['tracked'], 'device': self.device}
            state_dict = self.checkpoint['model']
            return DLDENetWithTrackedMeans(**params).load_state_dict(state_dict)

        return DLDENet.from_checkpoint(self.checkpoint, self.device)

    ###############################
    ###         METHODS         ###
    ###############################

    def eval_mode(self):
        """Put the model in evaluation mode and set the threshold for the detection."""
        params = self.params['model']['evaluation']
        self.model.eval(threshold=params['threshold'], iou_threshold=params['iou_threshold'])

    @staticmethod
    def transform_boxes(boxes, info):
        """Transform the bounding boxes from x1, y1, x2, y2 to x, y, width, height.

        As the images were transformed using the Resize transformation we need to get the scale
        used to update the boxes to use the same scale to revert the prediction to the scale
        of the original annotations.
        That scale is stored in the info of the image as info['resize_scale'].

        Arguments:
            boxes (torch.Tensor): A tensor with shape (n predictions, 4).
            info (dict): The information of the image that contains the original height and width.

        Returns:
            torch.Tensor: The transformed bounding boxes.
                Shape: (n predictions, 4)
        """
        # Update the scale
        if 'resize_scale' in info:
            boxes /= info['resize_scale']

        x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
        w, h = x2 - x1, y2 - y1

        return torch.stack([x1, y1, w, h], dim=1)

    def forward(self, images, infos, *_):
        """Forward pass through the network.

        Here we make the predictions over the images.

        Arguments:
            images (torch.Tensor): The tensor with the batch of images where to make predictions.
            infos (list): A list with the info of each image.
        """
        # Get the list of tuples (boxes, classifications)
        # With shapes (n predictions, 4) and (n predictions, n classes)
        predictions = self.model(images.to(self.device))
        for i, (boxes, classifications) in enumerate(predictions):
            # Check if there are no detections
            if boxes.shape[0] == 0:
                continue

            scores, labels = classifications.max(dim=1)
            boxes = self.transform_boxes(boxes, infos[i])
            image_id = infos[i]['id']

            for j, box in enumerate(boxes):
                score, label = scores[j], labels[j]
                try:
                    category_id = self.dataset.classes['ids'][int(label)]
                except KeyError:
                    # The model predicted a class that is not present in the dataset
                    continue
                self.predictions.append({'image_id': image_id,
                                         'category_id': category_id,
                                         'bbox': [float(point) for point in box],
                                         'score': float(score)})

    def evaluate_callback(self):
        """After the finish of the evaluation store the predictions in the results directory
        and use the pycocotools to compute the mAP.
        """
        result_dir = self.params['results']['dir']
        file = self.params['results']['file']
        file_path = os.path.join(result_dir, file)

        if not os.path.exists(result_dir):
            os.makedirs(result_dir)

        with open(file_path, 'w') as file:
            file.write(json.dumps(self.predictions))

        self.dataset.compute_map(file_path)


class DLDENetFlickr32Evaluator(Flickr32Evaluator):
    """Extend the Flickr32Evaluator class to perform an evaluation over the dataset using the evaluation kit
    provided with it."""

    @staticmethod
    def get_base_params():
        """Add the thresholds to the base parameters."""
        return merge_dicts(
            super(DLDENetFlickr32Evaluator, DLDENetFlickr32Evaluator).get_base_params(),
            {
                'thresholds': {
                    'detection': 0.1,
                    'iou': 0.1
                }
            }
        )

    def eval_mode(self):
        """Put the model in evaluation mode and set the threshold for the detection."""
        params = self.params['thresholds']
        self.model.eval(threshold=params['detection'], iou_threshold=params['iou'])

    def get_transform(self):
        """Get the transformation to applies to the dataset according to the model."""
        return AugmentDetection(params=self.checkpoint['hyperparameters']['transform'], evaluation=True)

    def get_model(self):
        """Get the model to use to make the predictions.

        Returns:
            DLDENet: The model loaded from the checkpoint.
        """
        return DLDENet.from_checkpoint(self.checkpoint, self.device)

Classes

class DLDENetCOCOEvaluator (ancestors: Evaluator, PrintMixin)

An evaluator for the DLDENet.

It will evaluate the model computing the mAP over the coco valid dataset.

Source code
class DLDENetCOCOEvaluator(Evaluator):
    """An evaluator for the DLDENet.

    It will evaluate the model computing the mAP over the coco valid dataset.
    """
    params = {'results': {'dir': './evaluations/dldenet/coco', 'file': 'val2017.json'},
              'dataset': {'root': './datasets/coco',
                          'validation': 'val2017',
                          'class_names': (),
                          # Try to load the classes names from the checkpoint file
                          'class_names_from_checkpoint': True},
              'dataloader': {'batch_size': 8,
                             'shuffle': False,
                             'num_workers': 8},
              'model': {'with_tracked_means': False,
                        'evaluation': {'threshold': 0.5, 'iou_threshold': 0.5},
                        # As the tracked version was created when the trainer didn't save the hyperparameters
                        # we must provide them
                        'tracked': {'classes': 80,
                                    'resnet': 50,
                                    'features': {'pyramid': 256,
                                                 'regression': 256,
                                                 'classification': 256},
                                    'anchors': {'sizes': [32, 64, 128, 256, 512],
                                                'scales': [2 ** 0, 2 ** (1/3), 2 ** (2/3)],
                                                'ratios': [0.5, 1, 2]},
                                    'embedding_size': 256,
                                    'concentration': 15,
                                    'shift': 0.8}},
              # We don't have params from the transforms because we load the params from the checkpoint,
              # but we can edit this params writing them here
              'transforms': {}}

    def __init__(self, *args, **kwargs):
        """Initialize the evaluator.

        Set the initial list with the predictions.
        """
        self.predictions = []

        super().__init__(*args, **kwargs)

    ###############################
    ###         GETTERS         ###
    ###############################

    def get_transform(self):
        """Get the transformations to apply to the dataset.

        Returns:
            torchvision.transforms.Compose: A composition of the transformations to apply.
        """
        params = self.checkpoint['hyperparameters']['transforms']
        params = merge_dicts(params, self.params['transforms'], verbose=True)

        return transforms.Compose([Resize(**params['resize']),
                                   ToTensor(),
                                   Normalize(**params['normalize'])])

    def get_dataset(self):
        """Get the COCO dataset for the evaluation.

        Returns:
            torch.utils.data.Dataset: The dataset to use for the evaluation.
        """
        params = self.params['dataset']
        transform = self.get_transform()
        class_names = params['class_names']

        if params['class_names_from_checkpoint']:
            if 'hyperparameters' in self.checkpoint:
                class_names = self.checkpoint['hyperparameters']['datasets']['class_names']
            else:
                print("Couldn't load the class_names from the checkpoint, it doesn't have the hyperparameters.")

        return CocoDataset(
            root=params['root'],
            dataset=params['validation'],
            classes_names=class_names,
            transform=transform)

    def get_dataloader(self):
        """Get the dataloader to use for the evaluation.

        Returns:
            torch.utils.data.Dataloader: The dataloader to use for the evaluation.
        """
        def collate(data):
            """Custom collate function to join the different images.

            It pads the images so all has the same size.

            Arguments:
                data (sequence): Sequence of tuples as (image, annotations, images' infos, *_).

            Returns:
                torch.Tensor: The images.
                    Shape: (batch size, channels, height, width)
            """
            images = [image for image, *_ in data]
            max_width = max([image.shape[-1] for image in images])
            max_height = max([image.shape[-2] for image in images])

            def pad_image(image):
                aux = torch.zeros((image.shape[0], max_height, max_width))
                aux[:, :image.shape[1], :image.shape[2]] = image
                return aux

            images = torch.stack([pad_image(image) for image, *_ in data], dim=0)
            infos = [info for _, _, info, *_ in data]

            return images, infos

        return DataLoader(dataset=self.dataset, collate_fn=collate, **self.params['dataloader'])

    def get_model(self):
        """Get the model to use to make the predictions.

        We can use the DLDENet with tracked means or the weighted version by changing
        the flag params['model']['with_tracked_means'].

        Returns:
            torch.nn.Module: The model to use to make the predictions over the data.
        """
        if self.params['model']['with_tracked_means']:
            params = {**self.params['model']['tracked'], 'device': self.device}
            state_dict = self.checkpoint['model']
            return DLDENetWithTrackedMeans(**params).load_state_dict(state_dict)

        return DLDENet.from_checkpoint(self.checkpoint, self.device)

    ###############################
    ###         METHODS         ###
    ###############################

    def eval_mode(self):
        """Put the model in evaluation mode and set the threshold for the detection."""
        params = self.params['model']['evaluation']
        self.model.eval(threshold=params['threshold'], iou_threshold=params['iou_threshold'])

    @staticmethod
    def transform_boxes(boxes, info):
        """Transform the bounding boxes from x1, y1, x2, y2 to x, y, width, height.

        As the images were transformed using the Resize transformation we need to get the scale
        used to update the boxes to use the same scale to revert the prediction to the scale
        of the original annotations.
        That scale is stored in the info of the image as info['resize_scale'].

        Arguments:
            boxes (torch.Tensor): A tensor with shape (n predictions, 4).
            info (dict): The information of the image that contains the original height and width.

        Returns:
            torch.Tensor: The transformed bounding boxes.
                Shape: (n predictions, 4)
        """
        # Update the scale
        if 'resize_scale' in info:
            boxes /= info['resize_scale']

        x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
        w, h = x2 - x1, y2 - y1

        return torch.stack([x1, y1, w, h], dim=1)

    def forward(self, images, infos, *_):
        """Forward pass through the network.

        Here we make the predictions over the images.

        Arguments:
            images (torch.Tensor): The tensor with the batch of images where to make predictions.
            infos (list): A list with the info of each image.
        """
        # Get the list of tuples (boxes, classifications)
        # With shapes (n predictions, 4) and (n predictions, n classes)
        predictions = self.model(images.to(self.device))
        for i, (boxes, classifications) in enumerate(predictions):
            # Check if there are no detections
            if boxes.shape[0] == 0:
                continue

            scores, labels = classifications.max(dim=1)
            boxes = self.transform_boxes(boxes, infos[i])
            image_id = infos[i]['id']

            for j, box in enumerate(boxes):
                score, label = scores[j], labels[j]
                try:
                    category_id = self.dataset.classes['ids'][int(label)]
                except KeyError:
                    # The model predicted a class that is not present in the dataset
                    continue
                self.predictions.append({'image_id': image_id,
                                         'category_id': category_id,
                                         'bbox': [float(point) for point in box],
                                         'score': float(score)})

    def evaluate_callback(self):
        """After the finish of the evaluation store the predictions in the results directory
        and use the pycocotools to compute the mAP.
        """
        result_dir = self.params['results']['dir']
        file = self.params['results']['file']
        file_path = os.path.join(result_dir, file)

        if not os.path.exists(result_dir):
            os.makedirs(result_dir)

        with open(file_path, 'w') as file:
            file.write(json.dumps(self.predictions))

        self.dataset.compute_map(file_path)

Class variables

var params

Static methods

def transform_boxes(boxes, info)

Transform the bounding boxes from x1, y1, x2, y2 to x, y, width, height.

As the images were transformed using the Resize transformation we need to get the scale used to update the boxes to use the same scale to revert the prediction to the scale of the original annotations. That scale is stored in the info of the image as info['resize_scale'].

Arguments

boxes : torch.Tensor
A tensor with shape (n predictions, 4).
info : dict
The information of the image that contains the original height and width.

Returns

torch.Tensor: The transformed bounding boxes. Shape: (n predictions, 4)

Source code
@staticmethod
def transform_boxes(boxes, info):
    """Transform the bounding boxes from x1, y1, x2, y2 to x, y, width, height.

    As the images were transformed using the Resize transformation we need to get the scale
    used to update the boxes to use the same scale to revert the prediction to the scale
    of the original annotations.
    That scale is stored in the info of the image as info['resize_scale'].

    Arguments:
        boxes (torch.Tensor): A tensor with shape (n predictions, 4).
        info (dict): The information of the image that contains the original height and width.

    Returns:
        torch.Tensor: The transformed bounding boxes.
            Shape: (n predictions, 4)
    """
    # Update the scale
    if 'resize_scale' in info:
        boxes /= info['resize_scale']

    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    w, h = x2 - x1, y2 - y1

    return torch.stack([x1, y1, w, h], dim=1)

Methods

def __init__(self, *args, **kwargs)

Initialize the evaluator.

Set the initial list with the predictions.

Source code
def __init__(self, *args, **kwargs):
    """Initialize the evaluator.

    Set the initial list with the predictions.
    """
    self.predictions = []

    super().__init__(*args, **kwargs)
def eval_mode(self)

Put the model in evaluation mode and set the threshold for the detection.

Source code
def eval_mode(self):
    """Put the model in evaluation mode and set the threshold for the detection."""
    params = self.params['model']['evaluation']
    self.model.eval(threshold=params['threshold'], iou_threshold=params['iou_threshold'])
def evaluate_callback(self)

After the finish of the evaluation store the predictions in the results directory and use the pycocotools to compute the mAP.

Source code
def evaluate_callback(self):
    """After the finish of the evaluation store the predictions in the results directory
    and use the pycocotools to compute the mAP.
    """
    result_dir = self.params['results']['dir']
    file = self.params['results']['file']
    file_path = os.path.join(result_dir, file)

    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    with open(file_path, 'w') as file:
        file.write(json.dumps(self.predictions))

    self.dataset.compute_map(file_path)
def forward(self, images, infos, *_)

Forward pass through the network.

Here we make the predictions over the images.

Arguments

images : torch.Tensor
The tensor with the batch of images where to make predictions.
infos : list
A list with the info of each image.
Source code
def forward(self, images, infos, *_):
    """Forward pass through the network.

    Here we make the predictions over the images.

    Arguments:
        images (torch.Tensor): The tensor with the batch of images where to make predictions.
        infos (list): A list with the info of each image.
    """
    # Get the list of tuples (boxes, classifications)
    # With shapes (n predictions, 4) and (n predictions, n classes)
    predictions = self.model(images.to(self.device))
    for i, (boxes, classifications) in enumerate(predictions):
        # Check if there are no detections
        if boxes.shape[0] == 0:
            continue

        scores, labels = classifications.max(dim=1)
        boxes = self.transform_boxes(boxes, infos[i])
        image_id = infos[i]['id']

        for j, box in enumerate(boxes):
            score, label = scores[j], labels[j]
            try:
                category_id = self.dataset.classes['ids'][int(label)]
            except KeyError:
                # The model predicted a class that is not present in the dataset
                continue
            self.predictions.append({'image_id': image_id,
                                     'category_id': category_id,
                                     'bbox': [float(point) for point in box],
                                     'score': float(score)})
def get_dataset(self)

Get the COCO dataset for the evaluation.

Returns

torch.utils.data.Dataset: The dataset to use for the evaluation.

Source code
def get_dataset(self):
    """Get the COCO dataset for the evaluation.

    Returns:
        torch.utils.data.Dataset: The dataset to use for the evaluation.
    """
    params = self.params['dataset']
    transform = self.get_transform()
    class_names = params['class_names']

    if params['class_names_from_checkpoint']:
        if 'hyperparameters' in self.checkpoint:
            class_names = self.checkpoint['hyperparameters']['datasets']['class_names']
        else:
            print("Couldn't load the class_names from the checkpoint, it doesn't have the hyperparameters.")

    return CocoDataset(
        root=params['root'],
        dataset=params['validation'],
        classes_names=class_names,
        transform=transform)
def get_model(self)

Get the model to use to make the predictions.

We can use the DLDENet with tracked means or the weighted version by changing the flag params['model']['with_tracked_means'].

Returns

torch.nn.Module: The model to use to make the predictions over the data.

Source code
def get_model(self):
    """Get the model to use to make the predictions.

    We can use the DLDENet with tracked means or the weighted version by changing
    the flag params['model']['with_tracked_means'].

    Returns:
        torch.nn.Module: The model to use to make the predictions over the data.
    """
    if self.params['model']['with_tracked_means']:
        params = {**self.params['model']['tracked'], 'device': self.device}
        state_dict = self.checkpoint['model']
        return DLDENetWithTrackedMeans(**params).load_state_dict(state_dict)

    return DLDENet.from_checkpoint(self.checkpoint, self.device)
def get_transform(self)

Get the transformations to apply to the dataset.

Returns

torchvision.transforms.Compose: A composition of the transformations to apply.

Source code
def get_transform(self):
    """Get the transformations to apply to the dataset.

    Returns:
        torchvision.transforms.Compose: A composition of the transformations to apply.
    """
    params = self.checkpoint['hyperparameters']['transforms']
    params = merge_dicts(params, self.params['transforms'], verbose=True)

    return transforms.Compose([Resize(**params['resize']),
                               ToTensor(),
                               Normalize(**params['normalize'])])

Inherited members

class DLDENetFlickr32Evaluator (ancestors: Flickr32Evaluator, Evaluator, PrintMixin)

Extend the Flickr32Evaluator class to perform an evaluation over the dataset using the evaluation kit provided with it.

Source code
class DLDENetFlickr32Evaluator(Flickr32Evaluator):
    """Extend the Flickr32Evaluator class to perform an evaluation over the dataset using the evaluation kit
    provided with it."""

    @staticmethod
    def get_base_params():
        """Add the thresholds to the base parameters."""
        return merge_dicts(
            super(DLDENetFlickr32Evaluator, DLDENetFlickr32Evaluator).get_base_params(),
            {
                'thresholds': {
                    'detection': 0.1,
                    'iou': 0.1
                }
            }
        )

    def eval_mode(self):
        """Put the model in evaluation mode and set the threshold for the detection."""
        params = self.params['thresholds']
        self.model.eval(threshold=params['detection'], iou_threshold=params['iou'])

    def get_transform(self):
        """Get the transformation to applies to the dataset according to the model."""
        return AugmentDetection(params=self.checkpoint['hyperparameters']['transform'], evaluation=True)

    def get_model(self):
        """Get the model to use to make the predictions.

        Returns:
            DLDENet: The model loaded from the checkpoint.
        """
        return DLDENet.from_checkpoint(self.checkpoint, self.device)

Static methods

def get_base_params()

Add the thresholds to the base parameters.

Source code
@staticmethod
def get_base_params():
    """Add the thresholds to the base parameters."""
    return merge_dicts(
        super(DLDENetFlickr32Evaluator, DLDENetFlickr32Evaluator).get_base_params(),
        {
            'thresholds': {
                'detection': 0.1,
                'iou': 0.1
            }
        }
    )

Methods

def eval_mode(self)

Put the model in evaluation mode and set the threshold for the detection.

Source code
def eval_mode(self):
    """Put the model in evaluation mode and set the threshold for the detection."""
    params = self.params['thresholds']
    self.model.eval(threshold=params['detection'], iou_threshold=params['iou'])
def get_model(self)

Get the model to use to make the predictions.

Returns

DLDENet
The model loaded from the checkpoint.
Source code
def get_model(self):
    """Get the model to use to make the predictions.

    Returns:
        DLDENet: The model loaded from the checkpoint.
    """
    return DLDENet.from_checkpoint(self.checkpoint, self.device)

Inherited members