Source code for deepvisiontools.preprocessing.preprocessing

from typing import List, Tuple, Union
import torch
from torch import Tensor
import torchvision.transforms.v2 as T
from torchvision.io import read_image
from pathlib import Path



[docs]
def build_preprocessing(
    mean: List[float] = [0.485, 0.456, 0.406], std: List[float] = [0.229, 0.224, 0.225]
) -> T.Compose:
    """Defaults values are from Imagenet.

    Args:
        mean (List[float], optional): mean values for each channels Defaults to [0.485, 0.456, 0.406].
        std (List[float], optional): std values for each channels. Defaults to [0.229, 0.224, 0.225].

    Returns:
        T.Compose:
    """
    # build the Compose
    preprocessing = T.Compose(
        [
            T.ConvertImageDtype(
                dtype=torch.float32
            ),  # torch.float32 is universal for DL
            T.Normalize(mean=mean, std=std),
        ]
    )

    return preprocessing




[docs]
def get_channels_statistics(image_folder: Union[str, Path]) -> Tuple[Tensor]:
    """Iterate over image folder and output mean and std for each channels for the dataset of images.

    Args:
        image_folder (str): path to folder of images

    Returns:
        Tuple[List[float]]: values for mean and std
    """
    if not isinstance(image_folder, Path):
        image_folder = Path(image_folder)
    # build Compose to scale image values
    scaler = T.Compose([T.ConvertImageDtype(dtype=torch.float32)])
    # initialization of dataset variables
    channels_sum, channels_squared_sum = 0, 0
    n_images = 0
    # iterate over folder
    for img_path in image_folder.iterdir():
        # load image as tensor and scale values
        image = read_image(img_path.as_posix())
        image = scaler(image)
        # sum channels values
        channels_sum += torch.mean(image, dim=[1, 2])
        channels_squared_sum += torch.mean(image**2, dim=[1, 2])
        # increment total of image
        n_images += 1

    # compute dataset mean & std
    mean = channels_sum / n_images
    std = (channels_squared_sum / n_images - mean**2) ** 0.5

    return mean, std