Source code for deepvisiontools.data.additional_augmentations

import torch
import torchvision.transforms.v2 as T
from torchvision.transforms.v2 import Transform
from typing import Sequence, Union, Tuple
import random as rd
from pathlib import Path
from deepvisiontools.preprocessing.image import load_image
from torchvision.tv_tensors import BoundingBoxes, Mask
from torch import Tensor
import copy



[docs]
class RandomCropAndResize(Transform):
    """
    With a given probability, apply RandomCrop and Resize from torchvision.transforms.v2.
    NB : here we resize only and systematically if cropped.

    Args:
        crop (``Union[int, Sequence[int]]``): Size to crop
        resize (``Union[int, Sequence[int]]``): Size to resize
        p (``float``, **optional**): probability. Defaults to 0.5.


    **Methods**:
    """

    def __init__(
        self,
        crop: Union[int, Sequence[int]],
        resize: Union[int, Sequence[int]],
        p=0.5,
        **kwargs,
    ):

        super().__init__(**kwargs)
        self.p = p
        self.crop = T.RandomCrop(crop)
        self.resize = T.Resize(resize)

    def forward(self, *inputs):
        if torch.rand(1) >= self.p:
            pass
        else:
            inputs = self.crop.forward(inputs)
            inputs = self.resize.forward(inputs)
        return inputs




[docs]
class RandomCenterCropAndResize(Transform):
    """
    With a given probability, apply CenterCrop and Resize from torchvision.transforms.v2.
    NB : here we resize only and systematically if cropped.

        Args:
            crop (``Union[int, Sequence[int]]``): Size to crop
            resize (``Union[int, Sequence[int]]``): Size to resize
            p (``float``, **optional**): probability. Defaults to 0.5.
    """

    def __init__(self, crop: Sequence[int], resize: Sequence[int], p=0.5, **kwargs):
        super().__init__(**kwargs)
        self.p = p
        self.crop = T.CenterCrop(crop)
        self.resize = T.Resize(resize)

    def forward(self, *inputs):
        if torch.rand(1) >= self.p:
            pass
        else:
            inputs = self.crop.forward(inputs)
            inputs = self.resize.forward(inputs)
        return inputs




[docs]
class RandomPadAndResize(Transform):
    """
    With a given probability, apply Pad and Resize from torchvision.transforms.v2. This looks like a zoom out effect by decreasing spatial resolution.
    NB : here we resize only and systematically if Padded.

        Args:
            MaxPad (``Union[int, Sequence[int]]``): maximum padding bounds can be int for common padding bound for all borders or sequence of 4 ints for (t, l, b, r)
            resize (``Tuple[int, int]``): Size to resize
            p (``float``, **optional**): probability to apply transformation. Defaults to 0.5.
    """

    def __init__(
        self,
        maxpad: Sequence[int],
        resize: Tuple[int, int],
        p=0.5,
        **kwargs,
    ):

        super().__init__(**kwargs)
        self.p = p
        self.max_pad = (maxpad,) * 4 if isinstance(maxpad, int) else maxpad
        self.resize = T.Resize(resize)

    def forward(self, *inputs):
        if torch.rand(1) >= self.p:
            return inputs
        else:
            t = rd.randrange(0, self.max_pad[0])
            l = rd.randrange(0, self.max_pad[1])
            r = rd.randrange(0, self.max_pad[2])
            b = rd.randrange(0, self.max_pad[3])
            padder = T.Pad([t, l, b, r])
            inputs = padder(inputs)
            inputs = self.resize(inputs)
        return inputs




[docs]
class RandomChangeBackground(Transform):
    """With a given probability p, swap image background. New background is taken from an image folder for which path is provided.
    Note 1 : it is implemented only for instance_mask, semantic_mask and bbox data type
    Note 2 : new background image type must be one of .jpg, .jpeg, .png, .tif, .tiff, .PNG, .JPG, .JPEG, .TIF, .TIFF
    Args:
        background_dir_path (``Union[str, Path]``): Path to background folder
        p (``float``, **optional**): Probability. Defaults to 0.5.
    """

    IMPLEMENTED_TVTENSOR = [
        BoundingBoxes,
        Mask,
    ]  # list of available hooks, i.e implemented for these type of tv_tensor

    IMG_TYPE = [
        ".jpg",
        ".jpeg",
        ".png",
        ".tif",
        ".tiff",
        ".PNG",
        ".JPG",
        ".JPEG",
        ".TIF",
        ".TIFF",
    ]

    def __init__(self, background_dir_path: Union[str, Path], p: float = 0.5, **kwargs):

        super().__init__(**kwargs)
        path = (
            background_dir_path
            if isinstance(background_dir_path, Path)
            else Path(background_dir_path)
        )
        list_bckg_imgs = []
        for stem in RandomChangeBackground.IMG_TYPE:
            list_bckg_imgs += list(path.glob(f"*{stem}"))
        self.bckg_imgs = list_bckg_imgs
        self.p = p
        self._truth_extracter = _ExtractTruthFromTargetImage()

    def forward(self, *inputs):
        mu = torch.rand(1)
        if mu > self.p:
            return inputs
        assert (
            len(inputs) == 2
        ), f"RandomChangeBackground is implemented only for exactly image and target input provided. You can't provide only image or additional item like scores. Got {inputs}"
        image, target = inputs
        assert (
            type(target) in RandomChangeBackground.IMPLEMENTED_TVTENSOR
        ), f"RandomChangeBackground is implemented only for {RandomChangeBackground.IMPLEMENTED_TVTENSOR} torchvision tv_tensors. Got {type(target)}"

        # randomly select background image
        original_dtype = image.dtype
        image = image.float()
        idx = torch.randint(0, len(self.bckg_imgs), (1,)).item()
        bck_img = load_image(self.bckg_imgs[idx]).to(image.device).float()
        bck_img = T.Resize(image.shape[-2:])(bck_img)
        removed_bckg = self._truth_extracter(image, target)
        removed_bckg[removed_bckg == -1.0] = bck_img[removed_bckg == -1.0]
        return removed_bckg.to(original_dtype), target



class _ExtractTruthFromTargetImage:
    """Handle background removing from target. Background is asserted as -1 to avoid errors removing 0 entries from image."""

    def __call__(self, image: Tensor, targ: Tensor) -> Tensor:
        if isinstance(targ, Mask):
            rm_bckg = self._call_mask(image, targ)
        elif isinstance(targ, BoundingBoxes):
            rm_bckg = self._call_bbox(image, targ)
        return rm_bckg

    def _call_mask(self, image: Tensor, targ: Mask) -> Tensor:
        removed_bckg = copy.deepcopy(image)
        removed_bckg[:, targ == 0] = -1.0
        return removed_bckg

    def _call_bbox(self, image: Tensor, targ: BoundingBoxes) -> Tensor:
        mask = Mask(torch.zeros(image.shape[-2:]))
        for box in targ:
            x1, y1, x2, y2 = box.to(torch.uint64).split(1)
            x1, y1, x2, y2 = tuple(
                max(i.item(), 0) for i in (x1, y1, x2, y2)
            )  # ensure that bbox has coordinates positive
            mask[y1:y2, x1:x2] = 1
        return self._call_mask(image, mask)