Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ date_modified: 2026-04-30

### UnReleased

- Added [#2284](https://github.com/roboflow/supervision/pull/2284): [`DetectionDataset.from_createml`](https://supervision.roboflow.com/latest/datasets/core/#supervision.dataset.core.DetectionDataset.from_createml) and [`DetectionDataset.as_createml`](https://supervision.roboflow.com/latest/datasets/core/#supervision.dataset.core.DetectionDataset.as_createml) add load and export support for the CreateML object-detection JSON format, alongside the existing COCO, YOLO, and Pascal VOC formats.

- Fixed [#2282](https://github.com/roboflow/supervision/pull/2282): [`oriented_box_iou_batch`](https://supervision.roboflow.com/latest/detection/utils/iou_and_nms/) now allocates the rasterization canvas with correct dimensions. Previously x-extents were mapped to canvas height and y-extents to width, causing IoU to be computed on a transposed canvas for non-square oriented bounding boxes. Metrics using OBB IoU (Precision, Recall, F1Score, MeanAverageRecall with `metric_target=MetricTarget.ORIENTED_BOUNDING_BOXES`) now return correct scores.

- Fixed [#2276](https://github.com/roboflow/supervision/pull/2276): [`DetectionDataset.as_coco`](https://supervision.roboflow.com/latest/datasets/core/#supervision.dataset.core.DetectionDataset.as_coco) now emits 1-indexed `category_id` and `categories[].id` values as required by the COCO spec, fixing CVAT import failures reported in [#1181](https://github.com/roboflow/supervision/issues/1181). Files generated by earlier supervision versions used 0-indexed category ids; they can still be loaded correctly by `from_coco` because the read path maps categories by name, not by id value.
Expand Down
82 changes: 82 additions & 0 deletions src/supervision/dataset/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
load_coco_annotations,
save_coco_annotations,
)
from supervision.dataset.formats.createml import (
load_createml_annotations,
save_createml_annotations,
)
from supervision.dataset.formats.pascal_voc import (
detections_to_pascal_voc,
load_pascal_voc_annotations,
Expand Down Expand Up @@ -553,6 +557,84 @@ def as_yolo(
if data_yaml_path is not None:
save_data_yaml(data_yaml_path=data_yaml_path, classes=self.classes)

@classmethod
def from_createml(
cls,
images_directory_path: str,
annotations_path: str,
) -> DetectionDataset:
"""
Creates a Dataset instance from CreateML formatted data.

CreateML stores object-detection annotations in a single JSON file as a
list of per-image entries, with each box expressed as a pixel-space
centre point plus width and height. Class names are inferred from the
labels present in the file.

Args:
images_directory_path: The path to the
directory containing the images.
annotations_path: The path to the CreateML json annotation file.

Returns:
A DetectionDataset instance containing
the loaded images and annotations.

Examples:
```python
import roboflow
from roboflow import Roboflow
import supervision as sv

roboflow.login()
rf = Roboflow()

project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)
dataset = project.version(PROJECT_VERSION).download("createml")

ds = sv.DetectionDataset.from_createml(
images_directory_path=f"{dataset.location}/train",
annotations_path=f"{dataset.location}/train/_annotations.createml.json",
)

ds.classes
# ['dog', 'person']
```
"""
classes, image_paths, annotations = load_createml_annotations(
images_directory_path=images_directory_path,
annotations_path=annotations_path,
)
return DetectionDataset(
classes=classes, images=image_paths, annotations=annotations
)

def as_createml(
self,
images_directory_path: str | None = None,
annotations_path: str | None = None,
) -> None:
"""
Exports the dataset to CreateML format. This method saves the
images and their corresponding annotations in CreateML format.

Args:
images_directory_path: The path to the directory
where the images should be saved.
If not provided, images will not be saved.
annotations_path: The path to the CreateML json annotation file.
If not provided, the annotations will not be saved.
"""
if images_directory_path is not None:
save_dataset_images(
dataset=self, images_directory_path=images_directory_path
)
if annotations_path is not None:
save_createml_annotations(
dataset=self,
annotations_path=annotations_path,
)

@classmethod
def from_coco(
cls,
Expand Down
191 changes: 191 additions & 0 deletions src/supervision/dataset/formats/createml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Any, cast

import numpy as np

from supervision.detection.core import Detections
from supervision.utils.file import read_json_file, save_json_file

if TYPE_CHECKING:
from supervision.dataset.core import DetectionDataset

CreateMLDict = dict[str, Any]


def _resolve_image_path(images_directory_path: str, image_name: str) -> str:
"""Resolve and validate an image path against the images directory.

Rejects annotations whose ``image`` field escapes ``images_directory_path``
(via ``..`` traversal, an absolute path, or a symlink pointing outside),
mirroring the protection used by the COCO loader.
"""
images_directory_resolved = Path(images_directory_path).resolve()
image_path = Path(images_directory_path) / image_name
try:
resolved_image_path = image_path.resolve()
except (OSError, ValueError) as exc:
raise ValueError(
f"CreateML annotation refers to image {image_name!r}, which "
f"produces an invalid path: {exc}"
) from exc
if resolved_image_path == images_directory_resolved:
raise ValueError(
f"CreateML annotation refers to image {image_name!r}, which "
f"resolves to the images directory itself "
f"({images_directory_resolved}). Expected a path to an image file."
)
if images_directory_resolved not in resolved_image_path.parents:
raise ValueError(
f"CreateML annotation refers to image {image_name!r}, which "
f"resolves to {resolved_image_path} — outside the images "
f"directory {images_directory_resolved}."
)
if resolved_image_path.is_dir():
raise ValueError(
f"CreateML annotation refers to image {image_name!r}, which "
f"resolves to directory {resolved_image_path}. Expected a path "
"to an image file."
)
return str(image_path)


def createml_annotations_to_detections(
image_annotations: list[CreateMLDict], class_to_index: dict[str, int]
) -> Detections:
"""Convert a single image's CreateML annotations into ``Detections``.

CreateML stores each box as a pixel-space centre point plus width/height
(``{"x", "y", "width", "height"}``); they are converted to ``xyxy`` corners.
"""
if not image_annotations:
return Detections.empty()

xyxy = []
class_ids = []
for annotation in image_annotations:
coordinates = annotation["coordinates"]
x_center = float(coordinates["x"])
y_center = float(coordinates["y"])
width = float(coordinates["width"])
height = float(coordinates["height"])
xyxy.append(
[
x_center - width / 2,
y_center - height / 2,
x_center + width / 2,
y_center + height / 2,
]
)
class_ids.append(class_to_index[annotation["label"]])

return Detections(
xyxy=np.array(xyxy, dtype=np.float32),
class_id=np.array(class_ids, dtype=int),
)


def load_createml_annotations(
images_directory_path: str,
annotations_path: str,
) -> tuple[list[str], list[str], dict[str, Detections]]:
"""Load CreateML object-detection annotations and convert them to ``Detections``.

CreateML uses a single JSON file containing a list of per-image entries, each
holding axis-aligned bounding boxes. Class names are inferred from the labels
present in the file and assigned stable, sorted, zero-based ids. Because the
format has no explicit category list, a class with no boxes anywhere in the
file will not appear in the returned ``classes``.

Args:
images_directory_path: Path to the directory containing the images.
annotations_path: Path to the CreateML JSON annotation file.

Returns:
A tuple of ``(classes, image_paths, annotations)``.

Raises:
ValueError: If an annotation's ``image`` field resolves to the images
directory itself or to a path outside it (e.g. via ``..`` traversal
or an absolute path).
"""
createml_data = cast(
"list[CreateMLDict]", read_json_file(file_path=annotations_path)
)

classes = sorted(
{
annotation["label"]
for entry in createml_data
for annotation in entry.get("annotations", [])
}
)
class_to_index = {class_name: index for index, class_name in enumerate(classes)}

image_paths: list[str] = []
annotations: dict[str, Detections] = {}
for entry in createml_data:
image_path = _resolve_image_path(
images_directory_path=images_directory_path, image_name=entry["image"]
)
annotations[image_path] = createml_annotations_to_detections(
image_annotations=entry.get("annotations", []),
class_to_index=class_to_index,
)
image_paths.append(image_path)

return classes, image_paths, annotations


def detections_to_createml_annotations(
detections: Detections, classes: list[str]
) -> list[CreateMLDict]:
"""Convert ``Detections`` into a list of CreateML annotation dicts."""
class_ids = detections.class_id
if class_ids is None:
raise ValueError(
"class_id is required for CreateML export, but the provided "
"Detections has class_id=None."
)
annotations: list[CreateMLDict] = []
for xyxy, class_id in zip(detections.xyxy, class_ids):
x_min, y_min, x_max, y_max = (float(value) for value in xyxy)
annotations.append(
{
"label": classes[int(class_id)],
"coordinates": {
"x": (x_min + x_max) / 2,
"y": (y_min + y_max) / 2,
"width": x_max - x_min,
"height": y_max - y_min,
},
}
)
return annotations


def save_createml_annotations(
dataset: DetectionDataset,
annotations_path: str,
) -> None:
"""Export a ``DetectionDataset`` to a CreateML object-detection JSON file.

Args:
dataset: The ``DetectionDataset`` to write.
annotations_path: Output path for the CreateML JSON file. Parent
directories are created if they do not already exist.
"""
Path(annotations_path).parent.mkdir(parents=True, exist_ok=True)
createml_data: list[CreateMLDict] = [
{
"image": Path(image_path).name,
"annotations": detections_to_createml_annotations(
detections=dataset.annotations[image_path], classes=dataset.classes
),
}
for image_path in dataset.image_paths
]
save_json_file(
data=cast("dict[str, Any]", createml_data), file_path=annotations_path
)
Loading
Loading