Skip to content

Object Detection utilites

Kano aims to support visualization and extract bounding boxes without the need to handle various box formats, such as xyxy, xywh, or scaled xywh:

  • xywh2xyxy: convert box with xywh format (x_center, y_center, width, height - which is model input/output format) to xyxy format(x_min, y_min, x_max, y_max - which used to draw boxes).
  • extract_bbox_area: get cropped box image from the image
  • draw_bbox: draw bounding box on the image.

kano.detect_utils.xywh2xyxy(xywh)

Converts bounding box coordinates from (x_center, y_center, width, height) format to (x_min, y_min, x_max, y_max) format.

Parameters:

Name Type Description Default
xywh np.array) with shape (4,

A tuple containing (x_center, y_center, width, height) of the bounding box.

required

Returns:

Name Type Description
xyxy tuple(int)

xyxy location of the bounding box.

Source code in kano\detect_utils.py
def xywh2xyxy(xywh):
    """
    Converts bounding box coordinates from (x_center, y_center, width, height) format to (x_min, y_min, x_max, y_max) format.

    Args:
        xywh (np.array) with shape (4,): A tuple containing (x_center, y_center, width, height) of the bounding box.

    Returns:
        xyxy (tuple(int)): xyxy location of the bounding box.
    """

    x_center, y_center, bbox_width, bbox_height = xywh
    x_min = int(x_center - bbox_width / 2)
    y_min = int(y_center - bbox_height / 2)
    x_max = int(x_center + bbox_width / 2)
    y_max = int(y_center + bbox_height / 2)

    return x_min, y_min, x_max, y_max

kano.detect_utils.extract_bbox_area(image, bbox)

Return cropped image from the given bounding box area

Parameters:

Name Type Description Default
image np.array) with shape (H, W, 3

image to extract the box

required
bbox np.array) with shape (4,

xyxy location of the box

required

Returns:

Name Type Description
cropped_image array

with shape (new_H, new_W, 3) based on bbox

Source code in kano\detect_utils.py
def extract_bbox_area(image, bbox):
    """
    Return cropped image from the given bounding box area

    Args:
        image (np.array) with shape (H, W, 3): image to extract the box
        bbox (np.array) with shape (4,): xyxy location of the box

    Returns:
        cropped_image (np.array): with shape (new_H, new_W, 3) based on bbox
    """

    (left, top), (right, bottom) = bbox[:2], bbox[2:]
    return image.copy()[top:bottom, left:right]

kano.detect_utils.draw_bbox(image, bbox, bbox_type='xyxy', bbox_color=(0, 0, 255), label=None)

Draws a bounding box on the image and optionally draws a multi-line label.

Parameters:

Name Type Description Default
image ndarray or str

The image on which the bounding box will be drawn.

required
bbox list or tuple or ndarray

The bounding box coordinates. If it's a list, it should be in the format specified by bbox_type.

required
bbox_type str

Type of bounding box coordinates. Should be either "xyxy" or "xywh" or "s_xywh".

'xyxy'
bbox_color tuple

Color of the bounding box in BGR format.

(0, 0, 255)
label str

Label to be displayed alongside the bounding box. Supports multiple lines with '/n' separating lines.

None

Returns:

Type Description
ndarray

np.ndarray: Image with the bounding box and label drawn.

Source code in kano\detect_utils.py
def draw_bbox(
    image: Union[np.ndarray, str],
    bbox: Union[List[float], Tuple[float, ...], np.ndarray],
    bbox_type: str = "xyxy",
    bbox_color: Tuple[int, int, int] = (0, 0, 255),
    label: Optional[str] = None,
) -> np.ndarray:
    """
    Draws a bounding box on the image and optionally draws a multi-line label.

    Args:
        image (np.ndarray or str): The image on which the bounding box will be drawn.
        bbox (list or tuple or np.ndarray): The bounding box coordinates. If it's a list, it should be in the format specified by bbox_type.
        bbox_type (str): Type of bounding box coordinates. Should be either "xyxy" or "xywh" or "s_xywh".
        bbox_color (tuple): Color of the bounding box in BGR format.
        label (str, optional): Label to be displayed alongside the bounding box. Supports multiple lines with '/n' separating lines.

    Returns:
        np.ndarray: Image with the bounding box and label drawn.
    """
    if isinstance(image, str):
        temp_image = cv2.imread(image)
    else:
        temp_image = image.copy()

    image_height, image_width = temp_image.shape[:2]

    if isinstance(bbox, (list, tuple)):
        temp_bbox = np.array(bbox)
    temp_bbox = temp_bbox.copy()
    if "s_" in bbox_type:
        temp_bbox *= np.array(
            [image_width, image_height, image_width, image_height]
        )

    temp_bbox = temp_bbox.astype(np.int64)

    if "xyxy" in bbox_type:
        x_min, y_min, x_max, y_max = temp_bbox
    elif "xywh" in bbox_type:
        x_min, y_min, x_max, y_max = xywh2xyxy(temp_bbox)
    else:
        raise ValueError("Invalid bounding box type")

    cv2.rectangle(temp_image, (x_min, y_min), (x_max, y_max), bbox_color, 2)

    if label is not None:
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale, thickness, pad = get_font_config(image_height)

        label_lines = label.split("\n")
        (text_width, text_height), _ = cv2.getTextSize(
            "sample", font, font_scale, thickness
        )
        y_offset = y_min - (text_height + pad) * (len(label_lines) - 1)

        for line in label_lines:
            (text_width, text_height), _ = cv2.getTextSize(
                line, font, font_scale, thickness
            )

            background_position = (x_min, y_offset)
            background_end_position = (
                x_min + text_width,
                y_offset - text_height - pad,
            )
            cv2.rectangle(
                temp_image,
                background_position,
                background_end_position,
                bbox_color,
                -1,
            )
            cv2.putText(
                temp_image,
                line,
                (x_min, y_offset - pad // 2),
                font,
                font_scale,
                (255, 255, 255),
                thickness,
            )

            y_offset += text_height + pad

    return temp_image