dandi · bendichter · May 14, 2026 · May 14, 2026 · May 14, 2026 · candleindark
diff --git a/dandischema/metadata.py b/dandischema/metadata.py
@@ -530,15 +530,31 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
     # which components already found, so we do not count more than
     # once in some incorrectly named datasets
     found: Dict[str, str] = {}
-    for part in Path(assetmeta["path"]).name.split(".")[0].split("_"):
-        if not found.get("subject") and part.startswith("sub-"):
-            found["subject"] = subject = part.split("sub-", 1)[1]
-            if subject not in stats["subjects"]:
-                stats["subjects"].append(subject)
-        if not found.get("sample") and part.startswith("sample-"):
-            found["sample"] = sample = part.replace("sample-", "")
-            if sample not in stats["tissuesample"]:
-                stats["tissuesample"].append(sample)
+    asset_path = Path(assetmeta["path"])
+    # (entity key, BIDS prefix, unique-values bucket in stats; None if not aggregated)
+    entities = [
+        ("subject", "sub-", "subjects"),
+        ("sample", "sample-", "tissuesample"),
+        ("session", "ses-", None),
+    ]
+    for part in asset_path.name.split(".")[0].split("_"):
+        for key, prefix, bucket in entities:
+            if not found.get(key) and part.startswith(prefix):
+                found[key] = value = part.split(prefix, 1)[1]
+                if bucket is not None and value not in stats[bucket]:
+                    stats[bucket].append(value)
+    # If ses- is absent from the filename, fall back to scanning the path
+    # parts (BIDS keeps `ses-X` as its own directory).
+    if not found.get("session"):
+        for part in asset_path.parts[:-1]:
+            if part.startswith("ses-"):
+                found["session"] = part.split("ses-", 1)[1]
+                break
+    stats["sessions"] = stats.get("sessions", [])
+    if found.get("subject") and found.get("session"):
+        pair = (found["subject"], found["session"])
+        if pair not in stats["sessions"]:
+            stats["sessions"].append(pair)
 
     stats["dataStandard"] = stats.get("dataStandard", [])
 
@@ -573,4 +589,5 @@ def aggregate_assets_summary(metadata: Iterable[Dict[str, Any]]) -> dict:
         len(stats.pop("tissuesample", [])) + len(stats.pop("slice", []))
     ) or None
     stats["numberOfCells"] = len(stats.pop("cell", [])) or None
+    stats["numberOfSessions"] = len(stats.pop("sessions", [])) or None
     return models.AssetsSummary(**stats).model_dump(mode="json", exclude_none=True)
diff --git a/dandischema/models.py b/dandischema/models.py
@@ -1201,6 +1201,9 @@ class AssetsSummary(DandiBaseModel):
         None, json_schema_extra={"readOnly": True}
     )  # more of NWB
     numberOfCells: Optional[int] = Field(None, json_schema_extra={"readOnly": True})
+    numberOfSessions: Optional[int] = Field(
+        None, json_schema_extra={"readOnly": True}
+    )  # BIDS ses-* tokens, counted as unique (subject, session) pairs
 
     dataStandard: Optional[List[StandardsType]] = Field(
         None, json_schema_extra={"readOnly": True}

diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py
@@ -521,6 +521,7 @@ def test_migrate_schemaversion_update() -> None:
                 "numberOfSubjects": 1,
                 "numberOfSamples": 1,
                 "numberOfCells": 1,
+                "numberOfSessions": 1,
                 "dataStandard": [
                     {
                         "schemaKey": "StandardsType",
@@ -541,6 +542,7 @@ def test_migrate_schemaversion_update() -> None:
                 "numberOfBytes": 608720,
                 "numberOfFiles": 2,
                 "numberOfSubjects": 1,
+                "numberOfSessions": 2,
                 "dataStandard": [
                     {
                         "schemaKey": "StandardsType",
@@ -588,6 +590,7 @@ def test_migrate_schemaversion_update() -> None:
                 "numberOfSubjects": 2,
                 "numberOfSamples": 1,
                 "numberOfCells": 1,
+                "numberOfSessions": 2,
                 "dataStandard": [
                     {
                         "schemaKey": "StandardsType",
@@ -751,13 +754,56 @@ def test_aggregation_bids() -> None:
     assert summary["numberOfFiles"] == 3
     assert summary["numberOfSamples"] == 2
     assert summary["numberOfSubjects"] == 1
+    assert summary["numberOfSessions"] == 2
     assert sum("BIDS" in _.get("name", "") for _ in summary["dataStandard"]) == 1
     assert (
         sum(_.get("name", "").startswith("OME/NGFF") for _ in summary["dataStandard"])
         == 1
     )  # only a single entry so we do not duplicate them
 
 
+def _bids_asset(path: str, size: int = 1) -> Dict[str, Any]:
+    return {
+        "schemaKey": "Asset",
+        "schemaVersion": DANDI_SCHEMA_VERSION,
+        "path": path,
+        "contentSize": size,
+        "encodingFormat": "application/x-nwb",
+    }
+
+
+def test_aggregate_number_of_sessions() -> None:
+    # Same subject, two sessions (session token only in filename)
+    data = [
+        _bids_asset("sub-01/ses-A/eeg/sub-01_ses-A_task-rest_eeg.edf"),
+        _bids_asset("sub-01/ses-B/eeg/sub-01_ses-B_task-rest_eeg.edf"),
+    ]
+    summary = aggregate_assets_summary(data)
+    assert summary["numberOfSubjects"] == 1
+    assert summary["numberOfSessions"] == 2
+
+    # Two subjects sharing a session id "A" -> two distinct (sub, ses) pairs
+    data = [
+        _bids_asset("sub-01/ses-A/eeg/sub-01_ses-A_task-rest_eeg.edf"),
+        _bids_asset("sub-02/ses-A/eeg/sub-02_ses-A_task-rest_eeg.edf"),
+    ]
+    summary = aggregate_assets_summary(data)
+    assert summary["numberOfSubjects"] == 2
+    assert summary["numberOfSessions"] == 2
+
+    # ses- only in directory portion (filename omits it) still counts
+    data = [
+        _bids_asset("sub-01/ses-A/anat/sub-01_T1w.nii.gz"),
+    ]
+    summary = aggregate_assets_summary(data)
+    assert summary["numberOfSessions"] == 1
+
+    # No ses- anywhere -> field is absent
+    data = [_bids_asset("sub-01/anat/sub-01_T1w.nii.gz")]
+    summary = aggregate_assets_summary(data)
+    assert "numberOfSessions" not in summary
+
+
 class TestValidateObjJson:
     """
     Tests for `_validate_obj_json()`