Source code for deepvisiontools.models.yolo.utils

from deepvisiontools.formats import BaseFormat
from deepvisiontools import Configuration
from math import ceil, floor
from typing import Tuple
from torchvision.ops import nms
from torch import Tensor


[docs] def yolo_pad_requirements(h: int, w: int, required=32) -> Tuple[int, int, int, int]: """Conmpute pad coordinates to ensure /32 or /64 yolo criterium on height and width Args: h (``int``): height w (``int``): width Returns: ``Tuple[int, int, int, int]``: - top, left, right, bottom """ diff_h, diff_w = h % required, w % required pad_h = required - diff_h if diff_h > 0 else 0 pad_w = required - diff_w if diff_w > 0 else 0 # define padding for each border if pad_h or pad_w: half_h, half_w = pad_h / 2, pad_w / 2 left, top, right, bottom = ( ceil(half_w), ceil(half_h), floor(half_w), floor(half_h), ) else: left, top, right, bottom = (0, 0, 0, 0) return (top, left, right, bottom)
[docs] def confidence_filter(format: BaseFormat) -> BaseFormat: """Filter Format according to confidence threshold from Configuration() Args: format (``Format``) Returns: ``Format`` """ assert format.scores != None, "Scores are not in format: cannot filter." new_format, _ = format[format.scores > Configuration().model_confidence_threshold] return new_format
[docs] def box_nms_filter(format: BaseFormat) -> BaseFormat: """Filter Format according to nms threshold from Configuration() Args: format (``Format``) Returns: ``Format`` """ if format.nb_object == 0: return format original_format = format.data.format format.data.format = "XYXY" indexes: Tensor = nms( format.data.value.float(), scores=format.scores.float(), iou_threshold=Configuration().model_nms_threshold, ) indexes = indexes.to(Configuration().device).sort()[0] format.data.format = original_format new_format, _ = format[indexes] return new_format
[docs] def normalize_boxes(boxes: Tensor, img_size: Tuple[int, int]) -> Tensor: """Normalize boxes to 1 -> h, w -> [0, 1] according to img size Args: boxes (``Tensor``): boxes Tensor img_size (``Tuple[int, int]``): image size Returns: ``Tensor``: - normalized boxes """ boxes = boxes.float() image_w = img_size[1] image_h = img_size[0] boxes[:, 0] = boxes[:, 0] / image_w boxes[:, 1] = boxes[:, 1] / image_h boxes[:, 2] = boxes[:, 2] / image_w boxes[:, 3] = boxes[:, 3] / image_h return boxes