From 1298b26c588d7012957a5f5b31fd9c0fd3f41eb7 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Sat, 6 Jun 2026 01:14:10 -0700 Subject: [PATCH] Consolidate Asset/Dandiset models and gate publish validation on datePublished Collapse the publication-specific model variants into their base classes so that each class's schemaKey value matches its class name, which is what an eventual LinkML translation needs to use schemaKey as a type designator (designates_type). The three classes whose schemaKey differed from their class name (BareAsset -> "Asset", PublishedAsset -> "Asset", PublishedDandiset -> "Dandiset") were the blockers. Changes (all in dandischema/models.py): - Merge PublishedDandiset into Dandiset and PublishedAsset into Asset. The publication-only fields (doi, publishedBy, datePublished, releaseNotes on Dandiset; publishedBy, datePublished on Asset) become optional, and the publication requirements move into a datePublished-gated `check_publication_status` model validator on each class: - when datePublished is None (a draft), the publication-only fields must be absent; - when datePublished is set (published), enforce the former Published* requirements (publishedBy/url/doi presence, the stricter id/url patterns, check_filesbytes, digest_sha256check). All violations are reported together in one error. dandi-archive's publish flow injects datePublished before validating, so the gated checks fire exactly as the Published* classes did before. - Keep BareAsset as a distinct class (Asset still inherits from it) but align its schemaKey to Literal["BareAsset"], so both BareAsset and Asset are schemaKey-aligned. The client (dandi-cli) is responsible for setting schemaKey to "Asset" when uploading bare metadata as an Asset. - Remove the Publishable mixin; add PublishedDandiset/PublishedAsset as deprecated aliases of Dandiset/Asset for backward compatibility (dandi-cli, dandi-archive). These will be removed in a follow-up once consumers migrate. - Simplify DandiBaseModel.ensure_schemakey to a plain schemaKey == class-name check now that no class intentionally diverges. to_datacite now asserts the published precondition (datePublished set) that the PublishedDandiset type used to guarantee. Tests updated for the merged models: the published variants report the same missing fields as their base classes, and the publication requirements are exercised on complete, datePublished instances; new tests cover the publication-coherence invariant. Generated JSON Schema diff: the draft schemas (Dandiset, Asset) gain only optional, readOnly publication properties (nothing newly required, no pattern tightened), so the dandi-archive Meditor is unaffected; the published-*.json schemas become the relaxed versions, with publication strictness now enforced by the gated Pydantic validator. Co-Authored-By: Claude Code 2.1.161 / Claude Opus 4.8 claude-opus-4-8 --- dandischema/datacite/__init__.py | 10 + dandischema/datacite/tests/test_datacite.py | 5 + dandischema/models.py | 237 ++++++++++++-------- dandischema/tests/test_metadata.py | 105 ++++----- dandischema/tests/test_models.py | 193 +++++++++++----- 5 files changed, 327 insertions(+), 223 deletions(-) diff --git a/dandischema/datacite/__init__.py b/dandischema/datacite/__init__.py index 72536821..b053b224 100644 --- a/dandischema/datacite/__init__.py +++ b/dandischema/datacite/__init__.py @@ -125,6 +125,16 @@ def to_datacite( if not isinstance(meta, PublishedDandiset): meta = PublishedDandiset(**meta) + # ``to_datacite`` operates on *published* Dandiset metadata. Since the + # publication-only fields are now optional on ``Dandiset`` (gated on + # ``datePublished``), enforce the precondition the ``PublishedDandiset`` type + # used to guarantee. + if meta.datePublished is None: + raise ValueError( + "to_datacite requires published Dandiset metadata, but datePublished " + "is not set" + ) + attributes: Dict[str, Any] = {} if publish: attributes["event"] = "publish" diff --git a/dandischema/datacite/tests/test_datacite.py b/dandischema/datacite/tests/test_datacite.py index 2202b27b..5f0725e1 100644 --- a/dandischema/datacite/tests/test_datacite.py +++ b/dandischema/datacite/tests/test_datacite.py @@ -260,6 +260,11 @@ def test_datacite(dandi_id: str, schema: Any) -> None: datacite = to_datacite(meta=meta, validate=True) + # ``doi`` is Optional on the consolidated ``Dandiset`` but always set on a + # published one (here via ``basic_publishmeta``); narrow it for the + # str-typed ``datacite_post`` argument. + assert meta.doi is not None + # trying to post datacite datacite_post(datacite, meta.doi) diff --git a/dandischema/models.py b/dandischema/models.py index ec3f05b0..1d3d19d3 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -35,6 +35,7 @@ ) from pydantic.json_schema import JsonSchemaValue from pydantic_core import CoreSchema +from typing_extensions import Self from zarr_checksum.checksum import InvalidZarrChecksum, ZarrDirectoryDigest from dandischema.conf import ( @@ -570,15 +571,8 @@ def preserve_anys_values( @field_validator("schemaKey") @classmethod def ensure_schemakey(cls, val: str) -> str: - tempval = val - if "Published" in cls.__name__: - tempval = "Published" + tempval - elif "BareAsset" == cls.__name__: - tempval = "Bare" + tempval - if tempval != cls.__name__: - raise ValueError( - f"schemaKey {tempval} does not match classname {cls.__name__}" - ) + if val != cls.__name__: + raise ValueError(f"schemaKey {val} does not match classname {cls.__name__}") return val @classmethod @@ -1752,6 +1746,40 @@ def contributor_musthave_contact( version: str = Field(json_schema_extra={"nskey": "schema", "readOnly": True}) + # Publication-related fields. A non-``None`` ``datePublished`` marks the + # record as published; ``check_publication_status`` enforces the cross-field + # rules (which of these are then required vs. must be absent). + url: Optional[AnyHttpUrl] = Field( + None, + description="Permalink to the Dandiset.", + json_schema_extra={"readOnly": True, "nskey": "schema"}, + ) + doi: Optional[str] = Field( + None, + title="DOI", + pattern=DANDI_DOI_PATTERN, + json_schema_extra={"readOnly": True, "nskey": DANDI_NSKEY}, + ) + """The DOI of the published Dandiset. + + The empty string indicates that there is no DOI for the published Dandiset; + it is set automatically for a published Dandiset on a DANDI instance with no + configured DOI prefix. + """ + publishedBy: Optional[Union[AnyHttpUrl, PublishActivity]] = Field( + None, + description="The URL should contain the provenance of the publishing process.", + json_schema_extra={"readOnly": True, "nskey": DANDI_NSKEY}, + ) + datePublished: Optional[datetime] = Field( + None, json_schema_extra={"readOnly": True, "nskey": "schema"} + ) + releaseNotes: Optional[str] = Field( + None, + description="The description of the release", + json_schema_extra={"readOnly": True, "nskey": "schema"}, + ) + wasGeneratedBy: Optional[Sequence[Project]] = Field( None, title="Associated projects", @@ -1769,6 +1797,54 @@ def contributor_musthave_contact( "nskey": DANDI_NSKEY, } + @model_validator(mode="after") + def check_publication_status(self) -> Self: + """Enforce publication coherence keyed on ``datePublished``. + + A non-``None`` ``datePublished`` marks the record as published and + triggers the published-Dandiset requirements; otherwise the publication-only + fields must be absent. + """ + errors = [] + if self.datePublished is None: + errors += [ + f"{name} is not allowed unless datePublished is set" + for name in ("doi", "publishedBy", "releaseNotes") + if getattr(self, name) is not None + ] + else: + if self.publishedBy is None: + errors.append("publishedBy is required for a published Dandiset") + if self.url is None: + errors.append("url is required for a published Dandiset") + elif not re.match(PUBLISHED_VERSION_URL_PATTERN, str(self.url)): + errors.append( + f'url does not match regex "{PUBLISHED_VERSION_URL_PATTERN}"' + ) + # Stricter than the ``id`` field pattern, which also accepts + # ``/draft`` and a lowercase prefix; only a published version id + # passes here. + if not re.match(DANDI_PUBID_PATTERN, self.id): + errors.append( + f'id "{self.id}" does not match the published-Dandiset pattern ' + f'"{DANDI_PUBID_PATTERN}"' + ) + if ( + self.assetsSummary.numberOfBytes == 0 + or self.assetsSummary.numberOfFiles == 0 + ): + errors.append( + "A Dandiset containing no files or zero bytes is not publishable" + ) + if self.doi is None: + if _INSTANCE_CONFIG.doi_prefix is None: + self.doi = "" + else: + errors.append("doi is required for a published Dandiset") + if errors: + raise ValueError("; ".join(errors)) + return self + class BareAsset(CommonModel): """Metadata used to describe an asset anywhere (local or server). @@ -1841,9 +1917,8 @@ class BareAsset(CommonModel): json_schema_extra={"nskey": "prov"}, ) - # Bare asset is to be just Asset. - schemaKey: Literal["Asset"] = Field( - "Asset", validate_default=True, json_schema_extra={"readOnly": True} + schemaKey: Literal["BareAsset"] = Field( + "BareAsset", validate_default=True, json_schema_extra={"readOnly": True} ) _ldmeta = { @@ -1894,6 +1969,10 @@ def digest_check( class Asset(BareAsset): """Metadata used to describe an asset on the server.""" + # A non-``None`` ``datePublished`` marks the asset as published, which + # additionally requires ``publishedBy``, a published-asset ``id``, and (for a + # non-zarr asset) a ``sha2_256`` digest; see ``check_publication_status``. + # all of the following are set by server id: str = Field( json_schema_extra={"readOnly": True}, description="Uniform resource identifier." @@ -1903,102 +1982,64 @@ class Asset(BareAsset): json_schema_extra={"readOnly": True, "nskey": "schema"} ) - -class Publishable(DandiBaseModel): - publishedBy: Union[AnyHttpUrl, PublishActivity] = Field( + # Publication-related fields. Present only on a published asset, i.e. when + # ``datePublished`` is set. + publishedBy: Optional[Union[AnyHttpUrl, PublishActivity]] = Field( + None, description="The URL should contain the provenance of the publishing process.", json_schema_extra={"readOnly": True, "nskey": DANDI_NSKEY}, ) - datePublished: datetime = Field( - json_schema_extra={"readOnly": True, "nskey": "schema"} - ) - schemaKey: Literal["Publishable", "Dandiset", "Asset"] = Field( - "Publishable", validate_default=True, json_schema_extra={"readOnly": True} - ) - - -_doi_field_kwargs: dict[str, Any] = { - "title": "DOI", - "pattern": DANDI_DOI_PATTERN, - "json_schema_extra": {"readOnly": True, "nskey": DANDI_NSKEY}, -} -if _INSTANCE_CONFIG.doi_prefix is None: - _doi_field_kwargs["default"] = "" - - -class PublishedDandiset(Dandiset, Publishable): - id: str = Field( - description="Uniform resource identifier.", - pattern=DANDI_PUBID_PATTERN, - json_schema_extra={"readOnly": True}, - ) - doi: str = Field(**_doi_field_kwargs) - """ - The DOI of the published Dandiset - - The value of the empty string indicates that there is no DOI for the published - Dandiset. - """ - - url: AnyHttpUrl = Field( - description="Permalink to the Dandiset.", - json_schema_extra={"readOnly": True, "nskey": "schema"}, - ) - releaseNotes: Optional[str] = Field( - None, - description="The description of the release", - json_schema_extra={"readOnly": True, "nskey": "schema"}, + datePublished: Optional[datetime] = Field( + None, json_schema_extra={"readOnly": True, "nskey": "schema"} ) - schemaKey: Literal["Dandiset"] = Field( - "Dandiset", validate_default=True, json_schema_extra={"readOnly": True} + # mypy flags the narrowed Literal as an incompatible override of + # BareAsset's, but pydantic allows it and ensure_schemakey pins each + # instance to its class name at runtime. + schemaKey: Literal["Asset"] = Field( # type: ignore[assignment] + "Asset", validate_default=True, json_schema_extra={"readOnly": True} ) - @field_validator("assetsSummary") - @classmethod - def check_filesbytes(cls, values: AssetsSummary) -> AssetsSummary: - if values.numberOfBytes == 0 or values.numberOfFiles == 0: - raise ValueError( - "A Dandiset containing no files or zero bytes is not publishable" - ) - return values - - @field_validator("url") - @classmethod - def check_url(cls, url: AnyHttpUrl) -> AnyHttpUrl: - if not re.match(PUBLISHED_VERSION_URL_PATTERN, str(url)): - raise ValueError( - f'string does not match regex "{PUBLISHED_VERSION_URL_PATTERN}"' - ) - return url - + @model_validator(mode="after") + def check_publication_status(self) -> Self: + """Enforce publication coherence keyed on ``datePublished``. -class PublishedAsset(Asset, Publishable): - id: str = Field( - description="Uniform resource identifier.", - pattern=ASSET_UUID_PATTERN, - json_schema_extra={"readOnly": True}, - ) + A non-``None`` ``datePublished`` marks the asset as published and + triggers the published-asset requirements; otherwise ``publishedBy`` + must be absent. + """ + errors = [] + if self.datePublished is None: + if self.publishedBy is not None: + errors.append("publishedBy is not allowed unless datePublished is set") + else: + if self.publishedBy is None: + errors.append("publishedBy is required for a published asset") + if not re.match(ASSET_UUID_PATTERN, self.id): + errors.append( + f'id "{self.id}" does not match the published-asset pattern ' + f'"{ASSET_UUID_PATTERN}"' + ) + if self.encodingFormat != "application/x-zarr": + if DigestType.sha2_256 not in self.digest: + errors.append("A non-zarr asset must have a sha2_256.") + elif not re.fullmatch(SHA256_PATTERN, self.digest[DigestType.sha2_256]): + errors.append( + "Digest must have an appropriate sha2_256 value. " + f"Got {self.digest[DigestType.sha2_256]}" + ) + if errors: + raise ValueError("; ".join(errors)) + return self - schemaKey: Literal["Asset"] = Field( - "Asset", validate_default=True, json_schema_extra={"readOnly": True} - ) - @field_validator("digest") - @classmethod - def digest_sha256check( - cls, v: Dict[DigestType, str], info: ValidationInfo - ) -> Dict[DigestType, str]: - values = info.data - if values.get("encodingFormat") != "application/x-zarr": - if DigestType.sha2_256 not in v: - raise ValueError("A non-zarr asset must have a sha2_256.") - digest = v[DigestType.sha2_256] - if not re.fullmatch(SHA256_PATTERN, digest): - raise ValueError( - f"Digest must have an appropriate sha2_256 value. Got {digest}" - ) - return v +# ``PublishedDandiset`` and ``PublishedAsset`` were consolidated into +# ``Dandiset`` and ``Asset`` respectively: a record is "published" when its +# ``datePublished`` is set (see each class's ``check_publication_status``). +# These names are kept as deprecated aliases for backward compatibility and +# will be removed in a future release. +PublishedDandiset = Dandiset +PublishedAsset = Asset def get_schema_version() -> str: diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 2327271d..eeb80597 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -14,7 +14,6 @@ from .utils import ( DANDISET_METADATA_DIR, - DOI_PREFIX, INSTANCE_NAME, METADATA_DIR, skipif_instance_name_not_dandi, @@ -132,27 +131,23 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, ), ( + # ``PublishedDandiset`` is now an alias of ``Dandiset``; its + # publication-only fields are optional (gated on ``datePublished``), + # so an incomplete instance reports the same missing fields as + # ``Dandiset``. {"schemaKey": "Dandiset"}, "PublishedDandiset", { - e - for e in [ - "assetsSummary", - "citation", - "contributor", - "datePublished", - "description", - "doi", - "id", - "identifier", - "license", - "manifestLocation", - "name", - "publishedBy", - "url", - "version", - ] - if DOI_PREFIX is not None or e != "doi" + "assetsSummary", + "citation", + "contributor", + "description", + "id", + "identifier", + "license", + "manifestLocation", + "name", + "version", }, ), ( @@ -162,24 +157,16 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, "PublishedDandiset", { - e - for e in [ - "assetsSummary", - "citation", - "contributor", - "datePublished", - "description", - "doi", - "id", - "identifier", - "license", - "manifestLocation", - "name", - "publishedBy", - "url", - "version", - ] - if DOI_PREFIX is not None or e != "doi" + "assetsSummary", + "citation", + "contributor", + "description", + "id", + "identifier", + "license", + "manifestLocation", + "name", + "version", }, ), ( @@ -196,23 +183,15 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, "PublishedDandiset", { - e - for e in [ - "assetsSummary", - "citation", - "datePublished", - "description", - "doi", - "id", - "identifier", - "license", - "manifestLocation", - "name", - "publishedBy", - "url", - "version", - ] - if DOI_PREFIX is not None or e != "doi" + "assetsSummary", + "citation", + "description", + "id", + "identifier", + "license", + "manifestLocation", + "name", + "version", }, ), ( @@ -237,12 +216,13 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: {"contentSize", "encodingFormat", "id", "identifier", "path", "contentUrl"}, ), ( + # ``PublishedAsset`` is now an alias of ``Asset``; ``publishedBy`` and + # ``datePublished`` are optional (gated on ``datePublished``), so an + # incomplete instance reports the same missing fields as ``Asset``. {"schemaKey": "Asset"}, "PublishedAsset", { - "datePublished", "contentSize", - "publishedBy", "encodingFormat", "id", "identifier", @@ -252,15 +232,15 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, ), ( + # A sha2_256-only digest fails ``digest_check`` (a non-zarr asset must + # have a dandi-etag), so ``digest`` is reported too. { "schemaKey": "Asset", "digest": {"dandi:sha2-256": sha256(b"test").hexdigest()}, }, "PublishedAsset", { - "datePublished", "contentSize", - "publishedBy", "encodingFormat", "id", "identifier", @@ -270,20 +250,21 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, ), ( + # A valid etag digest passes ``digest_check``; the sha2_256 + # requirement is gated on ``datePublished`` and the model validator + # never runs here (required fields are missing), so ``digest`` is not + # reported. { "schemaKey": "Asset", "digest": {"dandi:dandi-etag": md5(b"test").hexdigest() + "-1"}, }, "PublishedAsset", { - "datePublished", "contentSize", - "publishedBy", "encodingFormat", "id", "identifier", "path", - "digest", "contentUrl", }, ), @@ -297,9 +278,7 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, "PublishedAsset", { - "datePublished", "contentSize", - "publishedBy", "encodingFormat", "id", "identifier", diff --git a/dandischema/tests/test_models.py b/dandischema/tests/test_models.py index 2a50b174..2a3a027e 100644 --- a/dandischema/tests/test_models.py +++ b/dandischema/tests/test_models.py @@ -1,7 +1,6 @@ -from collections import namedtuple from enum import Enum from inspect import isclass -from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union, cast +from typing import Any, Dict, List, Literal, Optional, Type, Union, cast import anys import pydantic @@ -13,7 +12,6 @@ from .utils import DOI_PREFIX, INSTANCE_NAME, basic_publishmeta, skipif_no_doi_prefix from .. import models from ..models import ( - DANDI_INSTANCE_URL_PATTERN, AccessRequirements, AccessType, Affiliation, @@ -130,6 +128,17 @@ def test_asset() -> None: def test_asset_digest() -> None: + # Fields needed (beyond the bare asset fields) to make an instance a + # complete, published ``Asset`` so the ``datePublished``-gated checks (e.g. + # the sha2_256 requirement, formerly on ``PublishedAsset``) actually run. + pub_extra: Dict[str, Any] = { + "id": "dandiasset:6668d37f-e842-4b73-8c20-082a1dd0d31a", + "identifier": "6668d37f-e842-4b73-8c20-082a1dd0d31a", + "contentUrl": ["https://example.com/asset"], + "publishedBy": "https://example.com/dandi/publish", + "datePublished": "2021-01-01T00:00:00+00:00", + } + with pytest.raises(pydantic.ValidationError) as exc: models.BareAsset( contentSize=100, encodingFormat="nwb", digest={"sha1": ""}, path="/" @@ -179,8 +188,12 @@ def test_asset_digest() -> None: models.DigestType.sha2_256: 63 * "a", } with pytest.raises(pydantic.ValidationError) as exc: - models.PublishedAsset( # type: ignore[call-arg] - contentSize=100, encodingFormat="nwb", digest=digest_model, path="/" + models.Asset( + contentSize=100, + encodingFormat="nwb", + digest=digest_model, + path="/", + **pub_extra, ) assert any( "Digest must have an appropriate sha2_256 value." in el["msg"] @@ -190,20 +203,24 @@ def test_asset_digest() -> None: models.DigestType.dandi_etag: digest, models.DigestType.sha2_256: 64 * "a", } - with pytest.raises(pydantic.ValidationError) as exc: - models.PublishedAsset( # type: ignore[call-arg] - contentSize=100, encodingFormat="nwb", digest=digest_model, path="/" - ) - assert not any( - "Digest must have an appropriate dandi-etag value." in el["msg"] - for el in exc.value.errors() + # A complete published asset with a valid etag and sha2_256 is valid. + models.Asset( + contentSize=100, + encodingFormat="nwb", + digest=digest_model, + path="/", + **pub_extra, ) digest_model = { models.DigestType.dandi_etag: digest, } with pytest.raises(pydantic.ValidationError) as exc: - models.PublishedAsset( # type: ignore[call-arg] - contentSize=100, encodingFormat="nwb", digest=digest_model, path="/" + models.Asset( + contentSize=100, + encodingFormat="nwb", + digest=digest_model, + path="/", + **pub_extra, ) assert any( "A non-zarr asset must have a sha2_256." in el["msg"] @@ -455,54 +472,27 @@ def test_dandimeta_1(base_dandiset_metadata: dict[str, Any]) -> None: assert DOI_PREFIX is not None - # should work for Dandiset but PublishedDandiset should raise an error + # The base metadata is a valid draft (no datePublished), so it validates as a + # Dandiset (and, since PublishedDandiset is now an alias of Dandiset, as that + # too). Dandiset(**base_dandiset_metadata) - with pytest.raises(ValidationError) as exc: - PublishedDandiset(**base_dandiset_metadata) - - ErrDetail = namedtuple("ErrDetail", ["type", "msg"]) - - # Expected errors keyed by location of the respective error - # Note: Pydantic generated error messages are not provided for they are not in our - # control, and the error type should be indicative enough. - expected_errors: Dict[Tuple[Union[int, str], ...], ErrDetail] = { - ("id",): ErrDetail(type="string_pattern_mismatch", msg=None), - ("publishedBy",): ErrDetail(type="missing", msg=None), - ("datePublished",): ErrDetail(type="missing", msg=None), - ("url",): ErrDetail( - type="value_error", - msg="Value error, string does not match regex " - f'"^{DANDI_INSTANCE_URL_PATTERN}/dandiset/' - '\\d{6}/\\d+\\.\\d+\\.\\d+$"', - ), - ("assetsSummary",): ErrDetail( - type="value_error", - msg="Value error, " - "A Dandiset containing no files or zero bytes is not publishable", - ), - ("doi",): ErrDetail(type="missing", msg=None), - } - assert len(exc.value.errors()) == len(expected_errors) - for err in exc.value.errors(): - err_loc = err["loc"] - assert err_loc in expected_errors - - assert err["type"] == expected_errors[err_loc].type - if expected_errors[err_loc].msg is not None: - assert err["msg"] == expected_errors[err_loc].msg - - assert set([el["loc"][0] for el in exc.value.errors()]) == { - e - for e in [ - "assetsSummary", - "datePublished", - "publishedBy", - "doi", - "url", - "id", - ] - } + # Setting datePublished marks the record published and triggers the + # publish-only requirements. The draft id/url/assetsSummary and the missing + # publishedBy/doi are reported together in a single model-level error. + publishing = dict(base_dandiset_metadata, datePublished="2021-01-01T00:00:00+00:00") + with pytest.raises(ValidationError) as exc: + Dandiset(**publishing) + assert len(exc.value.errors()) == 1 + msg = exc.value.errors()[0]["msg"] + for expected in [ + "publishedBy is required for a published Dandiset", + "url does not match regex", + "does not match the published-Dandiset pattern", + "A Dandiset containing no files or zero bytes is not publishable", + "doi is required for a published Dandiset", + ]: + assert expected in msg # after adding basic meta required to publish: doi, datePublished, publishedBy, assetsSummary, # so PublishedDandiset should work @@ -531,9 +521,82 @@ def test_dandimeta_1(base_dandiset_metadata: dict[str, Any]) -> None: assert dumped["releaseNotes"] == "Releasing during testing" +def test_dandiset_publication_coherence( + base_dandiset_metadata: dict[str, Any], +) -> None: + """A draft Dandiset (no datePublished) must not carry publication-only fields.""" + Dandiset(**base_dandiset_metadata) # a valid draft + + draft_with_pub_field = dict( + base_dandiset_metadata, + publishedBy="https://example.com/dandi/publish", + ) + with pytest.raises(ValidationError) as exc: + Dandiset(**draft_with_pub_field) + assert ( + "publishedBy is not allowed unless datePublished is set" + in exc.value.errors()[0]["msg"] + ) + + +def test_asset_publication_coherence() -> None: + """A non-``None`` ``datePublished`` gates the published-asset requirements.""" + valid_etag = 32 * "a" + "-1" + valid_sha256 = 64 * "a" + server_fields: Dict[str, Any] = { + "id": "dandiasset:6668d37f-e842-4b73-8c20-082a1dd0d31a", + "identifier": "6668d37f-e842-4b73-8c20-082a1dd0d31a", + "contentUrl": ["https://example.com/asset"], + "contentSize": 100, + "encodingFormat": "application/x-nwb", + "path": "/", + } + + # An unpublished server asset needs only an etag (no sha2_256 required) and + # must not carry publishedBy. + Asset(**server_fields, digest={DigestType.dandi_etag: valid_etag}) + with pytest.raises(ValidationError) as exc: + Asset( + **server_fields, + digest={DigestType.dandi_etag: valid_etag}, + publishedBy="https://example.com/dandi/publish", + ) + assert ( + "publishedBy is not allowed unless datePublished is set" + in exc.value.errors()[0]["msg"] + ) + + # A complete published asset (publishedBy, a published id and a sha2_256) + # validates. + Asset( + **server_fields, + digest={ + DigestType.dandi_etag: valid_etag, + DigestType.sha2_256: valid_sha256, + }, + publishedBy="https://example.com/dandi/publish", + datePublished="2021-01-01T00:00:00+00:00", + ) + + # A published asset whose id is not a published-asset id is rejected. + with pytest.raises(ValidationError) as exc: + Asset( + **{**server_fields, "id": "not-a-dandiasset-id"}, + digest={ + DigestType.dandi_etag: valid_etag, + DigestType.sha2_256: valid_sha256, + }, + publishedBy="https://example.com/dandi/publish", + datePublished="2021-01-01T00:00:00+00:00", + ) + assert "does not match the published-asset pattern" in exc.value.errors()[0]["msg"] + + def test_schemakey() -> None: + # ``PublishedAsset``/``PublishedDandiset`` are deprecated aliases of + # ``Asset``/``Dandiset``, so under their alias names ``dir(models)`` yields + # the consolidated class whose ``schemaKey`` default is the base class name. typemap = { - "BareAsset": "Asset", "PublishedAsset": "Asset", "PublishedDandiset": "Dandiset", } @@ -592,6 +655,13 @@ def check_qname(qname: str, klass: type) -> None: "AssetsSummary", ): return + # ``publishedBy`` lives on both ``Asset`` and ``Dandiset`` since the + # shared ``Publishable`` mixin was removed during consolidation. + if qname == "dandi:publishedBy" and klass.__name__ in ( + "Asset", + "Dandiset", + ): + return raise ValueError(f"{qname},{klass} already exists {qnames[qname]}") qnames[qname] = klass @@ -630,7 +700,6 @@ def test_properties_mismatch() -> None: modelnames.remove("DandiBaseModel") modelnames.remove("CommonModel") modelnames.remove("Contributor") - modelnames.remove("Publishable") for val in modelnames: klass = getattr(models, val) if not isclass(klass) or not issubclass(klass, pydantic.BaseModel):