dandi · bendichter · May 14, 2026 · May 14, 2026 · May 14, 2026 · yarikoptic
diff --git a/dandischema/metadata.py b/dandischema/metadata.py
@@ -530,7 +530,8 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
     # which components already found, so we do not count more than
     # once in some incorrectly named datasets
     found: Dict[str, str] = {}
-    for part in Path(assetmeta["path"]).name.split(".")[0].split("_"):
+    asset_path = Path(assetmeta["path"])
+    for part in asset_path.name.split(".")[0].split("_"):
         if not found.get("subject") and part.startswith("sub-"):
             found["subject"] = subject = part.split("sub-", 1)[1]
             if subject not in stats["subjects"]:
@@ -539,6 +540,31 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
             found["sample"] = sample = part.replace("sample-", "")
             if sample not in stats["tissuesample"]:
                 stats["tissuesample"].append(sample)
+        if not found.get("session") and part.startswith("ses-"):
+            found["session"] = part.split("ses-", 1)[1]
+    # Fallback: ses- tokens that appear only in directory components (e.g.
+    # `sub-X/ses-Y/foo_acq-Z_bold.nii.gz`) should still be counted. To form
+    # the (subject, session) pair we also accept a directory-only subject,
+    # but we do not add such a subject to stats["subjects"] — subject
+    # counting remains driven by the filename and wasAttributedTo, matching
+    # prior behavior.
+    if not found.get("session"):
+        dir_subject = found.get("subject")
+        dir_session: Optional[str] = None
+        for directory in asset_path.parts[:-1]:
+            for part in directory.split("_"):
+                if dir_subject is None and part.startswith("sub-"):
+                    dir_subject = part.split("sub-", 1)[1]
+                if dir_session is None and part.startswith("ses-"):
+                    dir_session = part.split("ses-", 1)[1]
+        if dir_subject is not None and dir_session is not None:
+            found.setdefault("subject", dir_subject)
+            found["session"] = dir_session
+    stats["sessions"] = stats.get("sessions", [])
+    if found.get("subject") and found.get("session"):
+        pair = (found["subject"], found["session"])
+        if pair not in stats["sessions"]:
+            stats["sessions"].append(pair)
 
     stats["dataStandard"] = stats.get("dataStandard", [])
 
@@ -573,4 +599,5 @@ def aggregate_assets_summary(metadata: Iterable[Dict[str, Any]]) -> dict:
         len(stats.pop("tissuesample", [])) + len(stats.pop("slice", []))
     ) or None
     stats["numberOfCells"] = len(stats.pop("cell", [])) or None
+    stats["numberOfSessions"] = len(stats.pop("sessions", [])) or None
     return models.AssetsSummary(**stats).model_dump(mode="json", exclude_none=True)
diff --git a/dandischema/models.py b/dandischema/models.py
@@ -1201,6 +1201,9 @@ class AssetsSummary(DandiBaseModel):
         None, json_schema_extra={"readOnly": True}
     )  # more of NWB
     numberOfCells: Optional[int] = Field(None, json_schema_extra={"readOnly": True})
+    numberOfSessions: Optional[int] = Field(
+        None, json_schema_extra={"readOnly": True}
+    )  # BIDS ses-* tokens, counted as unique (subject, session) pairs
 
     dataStandard: Optional[List[StandardsType]] = Field(
         None, json_schema_extra={"readOnly": True}

diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py
@@ -521,6 +521,7 @@ def test_migrate_schemaversion_update() -> None:
                 "numberOfSubjects": 1,
                 "numberOfSamples": 1,
                 "numberOfCells": 1,
+                "numberOfSessions": 1,
                 "dataStandard": [
                     {
                         "schemaKey": "StandardsType",
@@ -541,6 +542,7 @@ def test_migrate_schemaversion_update() -> None:
                 "numberOfBytes": 608720,
                 "numberOfFiles": 2,
                 "numberOfSubjects": 1,
+                "numberOfSessions": 2,
                 "dataStandard": [
                     {
                         "schemaKey": "StandardsType",
@@ -588,6 +590,7 @@ def test_migrate_schemaversion_update() -> None:
                 "numberOfSubjects": 2,
                 "numberOfSamples": 1,
                 "numberOfCells": 1,
+                "numberOfSessions": 2,
                 "dataStandard": [
                     {
                         "schemaKey": "StandardsType",
@@ -751,13 +754,56 @@ def test_aggregation_bids() -> None:
     assert summary["numberOfFiles"] == 3
     assert summary["numberOfSamples"] == 2
     assert summary["numberOfSubjects"] == 1
+    assert summary["numberOfSessions"] == 2
     assert sum("BIDS" in _.get("name", "") for _ in summary["dataStandard"]) == 1
     assert (
         sum(_.get("name", "").startswith("OME/NGFF") for _ in summary["dataStandard"])
         == 1
     )  # only a single entry so we do not duplicate them
 
 
+def _bids_asset(path: str, size: int = 1) -> Dict[str, Any]:
+    return {
+        "schemaKey": "Asset",
+        "schemaVersion": DANDI_SCHEMA_VERSION,
+        "path": path,
+        "contentSize": size,
+        "encodingFormat": "application/x-nwb",
+    }
+
+
+def test_aggregate_number_of_sessions() -> None:
+    # Same subject, two sessions (session token only in filename)
+    data = [
+        _bids_asset("sub-01/ses-A/eeg/sub-01_ses-A_task-rest_eeg.edf"),
+        _bids_asset("sub-01/ses-B/eeg/sub-01_ses-B_task-rest_eeg.edf"),
+    ]
+    summary = aggregate_assets_summary(data)
+    assert summary["numberOfSubjects"] == 1
+    assert summary["numberOfSessions"] == 2
+
+    # Two subjects sharing a session id "A" -> two distinct (sub, ses) pairs
+    data = [
+        _bids_asset("sub-01/ses-A/eeg/sub-01_ses-A_task-rest_eeg.edf"),
+        _bids_asset("sub-02/ses-A/eeg/sub-02_ses-A_task-rest_eeg.edf"),
+    ]
+    summary = aggregate_assets_summary(data)
+    assert summary["numberOfSubjects"] == 2
+    assert summary["numberOfSessions"] == 2
+
+    # ses- only in directory portion (filename omits it) still counts
+    data = [
+        _bids_asset("sub-01/ses-A/anat/sub-01_T1w.nii.gz"),
+    ]
+    summary = aggregate_assets_summary(data)
+    assert summary["numberOfSessions"] == 1
+
+    # No ses- anywhere -> field is absent
+    data = [_bids_asset("sub-01/anat/sub-01_T1w.nii.gz")]
+    summary = aggregate_assets_summary(data)
+    assert "numberOfSessions" not in summary
+
+
 class TestValidateObjJson:
     """
     Tests for `_validate_obj_json()`