diff --git a/docs/detection/utils/mask_metrics.md b/docs/detection/utils/mask_metrics.md
new file mode 100644
index 0000000000..c3e684a740
--- /dev/null
+++ b/docs/detection/utils/mask_metrics.md
@@ -0,0 +1,59 @@
+---
+comments: true
+status: new
+---
+
+# Mask Metrics
+
+These utilities compare a predicted binary segmentation mask with a target mask.
+They are useful for segmentation model debugging, annotation QA, dataset
+inspection, and contour-sensitive evaluation workflows.
+
+Region-overlap metrics such as Dice and IoU focus on how much foreground area is
+shared. Boundary metrics focus on how well the mask contours align. The
+`tolerance` parameter controls how forgiving boundary matching is in pixel space.
+
+Empty-mask behavior is explicit across all functions:
+
+- both masks empty: `1.0`
+- one mask empty: `0.0`
+
+Current limitations:
+
+- single mask pair only
+- integer pixel tolerance only
+- no ratio-based tolerance
+- no spacing-aware tolerance
+- no visualization or report helpers in this PR
+
+Future work:
+
+- `compare_masks` quality report helper
+- false-positive / false-negative / true-positive mask decomposition
+- boundary error maps
+- visualization helper or `ComparisonAnnotator` integration
+- dataset-level segmentation QA cookbook
+
+
+
+:::supervision.detection.utils.mask_metrics.mask_iou
+
+
+
+:::supervision.detection.utils.mask_metrics.dice_coefficient
+
+
+
+:::supervision.detection.utils.mask_metrics.boundary_iou
+
+
+
+:::supervision.detection.utils.mask_metrics.boundary_f_score
diff --git a/mkdocs.yml b/mkdocs.yml
index c5f10c90ae..b58b55125f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -49,6 +49,7 @@ nav:
- Compact Mask: detection/compact_mask.md
- Converters: detection/utils/converters.md
- IoU and NMS: detection/utils/iou_and_nms.md
+ - Mask Metrics: detection/utils/mask_metrics.md
- Boxes: detection/utils/boxes.md
- Masks: detection/utils/masks.md
- Polygons: detection/utils/polygons.md
diff --git a/src/supervision/__init__.py b/src/supervision/__init__.py
index 2f98038a69..6ec0b099e7 100644
--- a/src/supervision/__init__.py
+++ b/src/supervision/__init__.py
@@ -91,6 +91,12 @@
mask_non_max_suppression,
oriented_box_iou_batch,
)
+from supervision.detection.utils.mask_metrics import (
+ boundary_f_score,
+ boundary_iou,
+ dice_coefficient,
+ mask_iou,
+)
from supervision.detection.utils.masks import (
calculate_masks_centroids,
contains_holes,
@@ -208,6 +214,8 @@
"VideoInfo",
"VideoSink",
"approximate_polygon",
+ "boundary_f_score",
+ "boundary_iou",
"box_iou",
"box_iou_batch",
"box_iou_batch_with_jaccard",
@@ -221,6 +229,7 @@
"contains_multiple_segments",
"crop_image",
"cv2_to_pillow",
+ "dice_coefficient",
"draw_filled_polygon",
"draw_filled_rectangle",
"draw_image",
@@ -242,6 +251,7 @@
"is_valid_hex",
"letterbox_image",
"list_files_with_extensions",
+ "mask_iou",
"mask_iou_batch",
"mask_non_max_merge",
"mask_non_max_suppression",
diff --git a/src/supervision/detection/utils/mask_metrics.py b/src/supervision/detection/utils/mask_metrics.py
new file mode 100644
index 0000000000..ce735985e7
--- /dev/null
+++ b/src/supervision/detection/utils/mask_metrics.py
@@ -0,0 +1,322 @@
+from __future__ import annotations
+
+from typing import Any, cast
+
+import cv2
+import numpy as np
+import numpy.typing as npt
+
+from supervision.detection.compact_mask import CompactMask
+
+_MASK_DIMENSIONS_ERROR = (
+ "Mask must be a 2D array or a single-mask batch with shape (1, H, W)."
+)
+_COMPACT_MASK_LENGTH_ERROR = "CompactMask inputs must contain exactly one mask."
+
+
+def _coerce_single_mask(
+ mask: npt.NDArray[Any] | CompactMask, mask_name: str
+) -> npt.NDArray[np.bool_]:
+ if isinstance(mask, CompactMask):
+ if len(mask) != 1:
+ raise ValueError(f"{mask_name} {_COMPACT_MASK_LENGTH_ERROR}")
+ return cast(npt.NDArray[np.bool_], np.asarray(mask[0], dtype=bool))
+
+ mask_array = np.asarray(mask)
+ if mask_array.ndim == 2:
+ return cast(npt.NDArray[np.bool_], mask_array.astype(bool, copy=False))
+ if mask_array.ndim == 3 and mask_array.shape[0] == 1:
+ return cast(npt.NDArray[np.bool_], mask_array[0].astype(bool, copy=False))
+ raise ValueError(f"{mask_name} {_MASK_DIMENSIONS_ERROR}")
+
+
+def _validate_mask_pair(
+ prediction: npt.NDArray[Any] | CompactMask,
+ target: npt.NDArray[Any] | CompactMask,
+) -> tuple[npt.NDArray[np.bool_], npt.NDArray[np.bool_]]:
+ prediction_mask = _coerce_single_mask(prediction, "prediction")
+ target_mask = _coerce_single_mask(target, "target")
+
+ if prediction_mask.shape != target_mask.shape:
+ raise ValueError(
+ "prediction and target must have the same shape. "
+ f"Got {prediction_mask.shape} and {target_mask.shape}."
+ )
+ return prediction_mask, target_mask
+
+
+def _validate_tolerance(tolerance: int) -> int:
+ if isinstance(tolerance, (bool, np.bool_)) or not isinstance(
+ tolerance, (int, np.integer)
+ ):
+ raise ValueError("tolerance must be a non-negative integer.")
+ if tolerance < 0:
+ raise ValueError("tolerance must be a non-negative integer.")
+ return int(tolerance)
+
+
+def _resolve_empty_mask_score(
+ prediction_mask: npt.NDArray[np.bool_], target_mask: npt.NDArray[np.bool_]
+) -> float | None:
+ prediction_empty = not prediction_mask.any()
+ target_empty = not target_mask.any()
+
+ if prediction_empty and target_empty:
+ return 1.0
+ if prediction_empty or target_empty:
+ return 0.0
+ return None
+
+
+def _extract_boundary(mask: npt.NDArray[np.bool_]) -> npt.NDArray[np.bool_]:
+ mask_uint8 = mask.astype(np.uint8, copy=False)
+ kernel = np.ones((3, 3), dtype=np.uint8)
+ eroded_mask = cv2.erode(mask_uint8, kernel, borderType=cv2.BORDER_CONSTANT)
+ return cast(
+ npt.NDArray[np.bool_],
+ np.logical_and(mask, np.logical_not(eroded_mask.astype(bool, copy=False))),
+ )
+
+
+def _build_tolerance_kernel(tolerance: int) -> npt.NDArray[np.uint8]:
+ kernel_size = 2 * tolerance + 1
+ return cv2.getStructuringElement(
+ cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)
+ ).astype(np.uint8, copy=False)
+
+
+def _expand_boundary(
+ boundary_mask: npt.NDArray[np.bool_], tolerance: int
+) -> npt.NDArray[np.bool_]:
+ if tolerance == 0:
+ return boundary_mask
+ kernel = _build_tolerance_kernel(tolerance)
+ dilated_boundary = cv2.dilate(
+ boundary_mask.astype(np.uint8, copy=False),
+ kernel,
+ borderType=cv2.BORDER_CONSTANT,
+ )
+ return cast(npt.NDArray[np.bool_], np.asarray(dilated_boundary, dtype=bool))
+
+
+def mask_iou(
+ prediction: npt.NDArray[Any] | CompactMask,
+ target: npt.NDArray[Any] | CompactMask,
+) -> float:
+ """
+ Compute Intersection over Union (IoU) for a single pair of segmentation masks.
+
+ Args:
+ prediction: Predicted binary mask. Accepts a 2D array, a
+ single-mask batch with shape `(1, H, W)`, or a
+ :class:`~supervision.detection.compact_mask.CompactMask`
+ containing exactly one mask.
+ target: Target binary mask. Accepts the same input forms as
+ `prediction`.
+
+ Returns:
+ IoU score as a Python float.
+
+ Raises:
+ ValueError: If shapes differ or the inputs do not represent exactly one
+ mask each.
+
+ Examples:
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> prediction = np.array([[1, 1], [0, 0]], dtype=bool)
+ >>> target = np.array([[1, 0], [0, 0]], dtype=bool)
+ >>> sv.mask_iou(prediction, target)
+ 0.5
+
+ ```
+ """
+ prediction_mask, target_mask = _validate_mask_pair(prediction, target)
+ empty_mask_score = _resolve_empty_mask_score(prediction_mask, target_mask)
+ if empty_mask_score is not None:
+ return empty_mask_score
+
+ intersection = np.logical_and(prediction_mask, target_mask).sum()
+ union = np.logical_or(prediction_mask, target_mask).sum()
+ return float(intersection / union)
+
+
+def dice_coefficient(
+ prediction: npt.NDArray[Any] | CompactMask,
+ target: npt.NDArray[Any] | CompactMask,
+) -> float:
+ """
+ Compute Dice coefficient for a single pair of segmentation masks.
+
+ Args:
+ prediction: Predicted binary mask. Accepts a 2D array, a
+ single-mask batch with shape `(1, H, W)`, or a
+ :class:`~supervision.detection.compact_mask.CompactMask`
+ containing exactly one mask.
+ target: Target binary mask. Accepts the same input forms as
+ `prediction`.
+
+ Returns:
+ Dice coefficient as a Python float.
+
+ Raises:
+ ValueError: If shapes differ or the inputs do not represent exactly one
+ mask each.
+
+ Examples:
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> prediction = np.array([[1, 1], [0, 0]], dtype=bool)
+ >>> target = np.array([[1, 0], [0, 0]], dtype=bool)
+ >>> round(sv.dice_coefficient(prediction, target), 2)
+ 0.67
+
+ ```
+ """
+ prediction_mask, target_mask = _validate_mask_pair(prediction, target)
+ empty_mask_score = _resolve_empty_mask_score(prediction_mask, target_mask)
+ if empty_mask_score is not None:
+ return empty_mask_score
+
+ intersection = np.logical_and(prediction_mask, target_mask).sum()
+ denominator = prediction_mask.sum() + target_mask.sum()
+ return float((2 * intersection) / denominator)
+
+
+def boundary_iou(
+ prediction: npt.NDArray[Any] | CompactMask,
+ target: npt.NDArray[Any] | CompactMask,
+ tolerance: int = 2,
+) -> float:
+ """
+ Compute boundary IoU for a single pair of segmentation masks.
+
+ Boundary IoU dilates the foreground contours of both masks by `tolerance`
+ pixels before measuring IoU. This makes the score more forgiving to small
+ contour shifts than standard region IoU.
+
+ Args:
+ prediction: Predicted binary mask. Accepts a 2D array, a
+ single-mask batch with shape `(1, H, W)`, or a
+ :class:`~supervision.detection.compact_mask.CompactMask`
+ containing exactly one mask.
+ target: Target binary mask. Accepts the same input forms as
+ `prediction`.
+ tolerance: Integer pixel distance used to dilate both
+ boundaries before comparison. `0` requires exact boundary-pixel
+ agreement.
+
+ Returns:
+ Boundary IoU score as a Python float.
+
+ Raises:
+ ValueError: If shapes differ, the inputs do not represent exactly one
+ mask each, or `tolerance` is invalid.
+
+ Examples:
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> prediction = np.zeros((5, 5), dtype=bool)
+ >>> target = np.zeros((5, 5), dtype=bool)
+ >>> prediction[1:4, 1:4] = True
+ >>> target[1:4, 2:5] = True
+ >>> round(sv.boundary_iou(prediction, target, tolerance=1), 2)
+ 0.7
+
+ ```
+ """
+ tolerance = _validate_tolerance(tolerance)
+ prediction_mask, target_mask = _validate_mask_pair(prediction, target)
+ empty_mask_score = _resolve_empty_mask_score(prediction_mask, target_mask)
+ if empty_mask_score is not None:
+ return empty_mask_score
+
+ prediction_boundary = _expand_boundary(
+ _extract_boundary(prediction_mask), tolerance
+ )
+ target_boundary = _expand_boundary(_extract_boundary(target_mask), tolerance)
+
+ intersection = np.logical_and(prediction_boundary, target_boundary).sum()
+ union = np.logical_or(prediction_boundary, target_boundary).sum()
+ return float(intersection / union) if union > 0 else 0.0
+
+
+def boundary_f_score(
+ prediction: npt.NDArray[Any] | CompactMask,
+ target: npt.NDArray[Any] | CompactMask,
+ tolerance: int = 2,
+) -> float:
+ """
+ Compute boundary F-score for a single pair of segmentation masks.
+
+ Boundary F-score measures contour agreement by matching predicted boundary
+ pixels to target boundary pixels within `tolerance` pixels, then combining
+ boundary precision and boundary recall into a single score.
+
+ Args:
+ prediction: Predicted binary mask. Accepts a 2D array, a
+ single-mask batch with shape `(1, H, W)`, or a
+ :class:`~supervision.detection.compact_mask.CompactMask`
+ containing exactly one mask.
+ target: Target binary mask. Accepts the same input forms as
+ `prediction`.
+ tolerance: Integer pixel distance used for boundary
+ matching. `0` requires exact boundary-pixel agreement.
+
+ Returns:
+ Boundary F-score as a Python float.
+
+ Raises:
+ ValueError: If shapes differ, the inputs do not represent exactly one
+ mask each, or `tolerance` is invalid.
+
+ Examples:
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> prediction = np.zeros((5, 5), dtype=bool)
+ >>> target = np.zeros((5, 5), dtype=bool)
+ >>> prediction[1:4, 1:4] = True
+ >>> target[1:4, 2:5] = True
+ >>> round(sv.boundary_f_score(prediction, target, tolerance=1), 2)
+ 1.0
+
+ ```
+ """
+ tolerance = _validate_tolerance(tolerance)
+ prediction_mask, target_mask = _validate_mask_pair(prediction, target)
+ empty_mask_score = _resolve_empty_mask_score(prediction_mask, target_mask)
+ if empty_mask_score is not None:
+ return empty_mask_score
+
+ prediction_boundary = _extract_boundary(prediction_mask)
+ target_boundary = _extract_boundary(target_mask)
+
+ expanded_prediction_boundary = _expand_boundary(prediction_boundary, tolerance)
+ expanded_target_boundary = _expand_boundary(target_boundary, tolerance)
+
+ matched_prediction = np.logical_and(
+ prediction_boundary, expanded_target_boundary
+ ).sum()
+ matched_target = np.logical_and(target_boundary, expanded_prediction_boundary).sum()
+
+ prediction_boundary_area = prediction_boundary.sum()
+ target_boundary_area = target_boundary.sum()
+
+ precision = (
+ float(matched_prediction / prediction_boundary_area)
+ if prediction_boundary_area > 0
+ else 0.0
+ )
+ recall = (
+ float(matched_target / target_boundary_area)
+ if target_boundary_area > 0
+ else 0.0
+ )
+
+ if precision + recall == 0.0:
+ return 0.0
+ return float(2 * precision * recall / (precision + recall))
diff --git a/tests/detection/utils/test_mask_metrics.py b/tests/detection/utils/test_mask_metrics.py
new file mode 100644
index 0000000000..ded316fe71
--- /dev/null
+++ b/tests/detection/utils/test_mask_metrics.py
@@ -0,0 +1,238 @@
+from __future__ import annotations
+
+import math
+
+import numpy as np
+import pytest
+
+from supervision.detection.compact_mask import CompactMask
+from supervision.detection.utils.converters import mask_to_xyxy
+from supervision.detection.utils.mask_metrics import (
+ boundary_f_score,
+ boundary_iou,
+ dice_coefficient,
+ mask_iou,
+)
+
+
+def _make_compact_mask(mask: np.ndarray) -> CompactMask:
+ dense_mask = np.expand_dims(mask.astype(bool), axis=0)
+ xyxy = mask_to_xyxy(dense_mask).astype(np.float32)
+ return CompactMask.from_dense(dense_mask, xyxy, image_shape=mask.shape)
+
+
+def test_dice_coefficient_perfect_overlap() -> None:
+ mask = np.array(
+ [
+ [0, 0, 0, 0],
+ [0, 1, 1, 0],
+ [0, 1, 1, 0],
+ [0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+
+ assert dice_coefficient(mask, mask) == 1.0
+
+
+def test_dice_coefficient_disjoint_masks() -> None:
+ prediction = np.array(
+ [
+ [1, 1, 0, 0],
+ [1, 1, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+ target = np.array(
+ [
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 1, 1],
+ [0, 0, 1, 1],
+ ],
+ dtype=bool,
+ )
+
+ assert dice_coefficient(prediction, target) == 0.0
+
+
+def test_dice_coefficient_partial_overlap() -> None:
+ prediction = np.array(
+ [
+ [1, 1, 0, 0],
+ [1, 1, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+ target = np.array(
+ [
+ [0, 1, 1, 0],
+ [0, 1, 1, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+
+ assert dice_coefficient(prediction, target) == 0.5
+
+
+@pytest.mark.parametrize(
+ ("metric", "expected"),
+ [
+ (dice_coefficient, 1.0),
+ (mask_iou, 1.0),
+ (boundary_iou, 1.0),
+ (boundary_f_score, 1.0),
+ ],
+)
+def test_metrics_empty_vs_empty(metric, expected: float) -> None:
+ mask = np.zeros((4, 4), dtype=bool)
+
+ assert metric(mask, mask) == expected
+
+
+@pytest.mark.parametrize(
+ ("metric", "expected"),
+ [
+ (dice_coefficient, 0.0),
+ (mask_iou, 0.0),
+ (boundary_iou, 0.0),
+ (boundary_f_score, 0.0),
+ ],
+)
+def test_metrics_empty_vs_non_empty(metric, expected: float) -> None:
+ empty_mask = np.zeros((4, 4), dtype=bool)
+ filled_mask = np.zeros((4, 4), dtype=bool)
+ filled_mask[1:3, 1:3] = True
+
+ assert metric(empty_mask, filled_mask) == expected
+
+
+def test_mask_iou_partial_overlap() -> None:
+ prediction = np.array(
+ [
+ [1, 1, 0, 0],
+ [1, 1, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+ target = np.array(
+ [
+ [0, 1, 1, 0],
+ [0, 1, 1, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+
+ assert mask_iou(prediction, target) == pytest.approx(1.0 / 3.0)
+
+
+def test_boundary_metrics_perfect_overlap() -> None:
+ mask = np.zeros((7, 7), dtype=bool)
+ mask[2:5, 2:5] = True
+
+ assert boundary_iou(mask, mask) == 1.0
+ assert boundary_f_score(mask, mask) == 1.0
+
+
+def test_boundary_metrics_shift_is_more_forgiving_with_tolerance() -> None:
+ prediction = np.zeros((7, 7), dtype=bool)
+ prediction[2:5, 1:4] = True
+
+ target = np.zeros((7, 7), dtype=bool)
+ target[2:5, 2:5] = True
+
+ strict_boundary_iou = boundary_iou(prediction, target, tolerance=0)
+ tolerant_boundary_iou = boundary_iou(prediction, target, tolerance=1)
+ strict_boundary_f_score = boundary_f_score(prediction, target, tolerance=0)
+ tolerant_boundary_f_score = boundary_f_score(prediction, target, tolerance=1)
+
+ assert strict_boundary_iou < tolerant_boundary_iou <= 1.0
+ assert strict_boundary_f_score < tolerant_boundary_f_score <= 1.0
+
+
+def test_boundary_metrics_disjoint_masks() -> None:
+ prediction = np.zeros((7, 7), dtype=bool)
+ prediction[1:3, 1:3] = True
+
+ target = np.zeros((7, 7), dtype=bool)
+ target[4:6, 4:6] = True
+
+ assert boundary_iou(prediction, target, tolerance=0) == 0.0
+ assert boundary_f_score(prediction, target, tolerance=0) == 0.0
+
+
+def test_shape_mismatch_raises_error() -> None:
+ prediction = np.zeros((4, 4), dtype=bool)
+ target = np.zeros((5, 5), dtype=bool)
+
+ with pytest.raises(ValueError, match="must have the same shape"):
+ dice_coefficient(prediction, target)
+
+
+@pytest.mark.parametrize("tolerance", [-1, 1.5, True])
+def test_invalid_tolerance_raises_error(tolerance: object) -> None:
+ mask = np.zeros((4, 4), dtype=bool)
+
+ with pytest.raises(ValueError, match="tolerance must be a non-negative integer"):
+ boundary_iou(mask, mask, tolerance=tolerance) # type: ignore[arg-type]
+
+
+def test_bool_and_uint8_masks_produce_same_result() -> None:
+ prediction_bool = np.zeros((6, 6), dtype=bool)
+ prediction_bool[1:5, 1:4] = True
+
+ target_bool = np.zeros((6, 6), dtype=bool)
+ target_bool[1:5, 2:5] = True
+
+ prediction_uint8 = prediction_bool.astype(np.uint8)
+ target_uint8 = target_bool.astype(np.uint8)
+
+ assert dice_coefficient(prediction_bool, target_bool) == dice_coefficient(
+ prediction_uint8, target_uint8
+ )
+ assert mask_iou(prediction_bool, target_bool) == mask_iou(
+ prediction_uint8, target_uint8
+ )
+ assert boundary_iou(prediction_bool, target_bool, tolerance=1) == boundary_iou(
+ prediction_uint8, target_uint8, tolerance=1
+ )
+ assert boundary_f_score(
+ prediction_bool, target_bool, tolerance=1
+ ) == boundary_f_score(prediction_uint8, target_uint8, tolerance=1)
+
+
+def test_compact_mask_and_dense_mask_produce_same_scores() -> None:
+ prediction = np.zeros((6, 6), dtype=bool)
+ prediction[1:5, 1:4] = True
+
+ target = np.zeros((6, 6), dtype=bool)
+ target[1:5, 2:5] = True
+
+ compact_prediction = _make_compact_mask(prediction)
+ compact_target = _make_compact_mask(target)
+
+ assert math.isclose(
+ dice_coefficient(compact_prediction, compact_target),
+ dice_coefficient(prediction, target),
+ )
+ assert math.isclose(
+ mask_iou(compact_prediction, compact_target), mask_iou(prediction, target)
+ )
+ assert math.isclose(
+ boundary_iou(compact_prediction, compact_target, tolerance=1),
+ boundary_iou(prediction, target, tolerance=1),
+ )
+ assert math.isclose(
+ boundary_f_score(compact_prediction, compact_target, tolerance=1),
+ boundary_f_score(prediction, target, tolerance=1),
+ )