-
Notifications
You must be signed in to change notification settings - Fork 15
Add concept DOI model, dates field, IsVersionOf relations, DANDI identifier #401
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b178a3a
02c97f1
db8c67e
74d3d5c
0028819
cc07c42
2077205
8d7f335
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,3 +14,4 @@ sandbox/ | |
| venv/ | ||
| venvs/ | ||
| dandischema/_version.py | ||
| uv.lock | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,253 @@ | ||
| """ | ||
| Tests for DataCite DOI improvements. | ||
|
|
||
| T001: Optional doi field on Dandiset model for concept DOIs | ||
| T002: dates field in to_datacite() output | ||
| T003: IsVersionOf/HasVersion relation support in to_datacite() | ||
|
|
||
| Note: These tests were AI-generated (Claude Code) using TDD methodology. | ||
| """ | ||
|
|
||
| import random | ||
| from typing import Any, Dict | ||
|
|
||
| from pydantic import ValidationError | ||
| import pytest | ||
|
|
||
| from dandischema.conf import get_instance_config | ||
| from dandischema.models import ( | ||
| Dandiset, | ||
| LicenseType, | ||
| RoleType, | ||
| ) | ||
| from dandischema.tests.utils import ( | ||
| DOI_PREFIX, | ||
| INSTANCE_NAME, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC, settting the instance name is one possible strategy for "safe testing" against a real datacite instance, ie instead of So if we set up some e2e test where real findable DOIs get created, they would be namespaced and not interfere with future tests. |
||
| basic_publishmeta, | ||
| skipif_no_doi_prefix, | ||
| ) | ||
|
|
||
| from .. import to_datacite | ||
|
|
||
| _INSTANCE_CONFIG = get_instance_config() | ||
|
|
||
|
|
||
| @pytest.fixture(scope="function") | ||
| def metadata_with_publish() -> Dict[str, Any]: | ||
| """Create a complete metadata dict suitable for PublishedDandiset.""" | ||
| dandi_id_noprefix = f"000{random.randrange(100, 999)}" | ||
| dandi_id = f"{INSTANCE_NAME}:{dandi_id_noprefix}" | ||
| version = "0.0.0" | ||
| meta_dict = { | ||
| "identifier": dandi_id, | ||
| "id": f"{dandi_id}/{version}", | ||
| "name": "Test DOI Improvements Dataset", | ||
| "description": "Testing DOI metadata improvements", | ||
| "contributor": [ | ||
| { | ||
| "name": "Test_last, Test_first", | ||
| "email": "test@example.com", | ||
| "roleName": [RoleType("dcite:ContactPerson")], | ||
| "schemaKey": "Person", | ||
| } | ||
| ], | ||
| "license": [LicenseType("spdx:CC-BY-4.0")], | ||
| "url": f"https://dandiarchive.org/dandiset/{dandi_id_noprefix}/{version}", | ||
| "version": version, | ||
| "citation": "Test_last, Test_first 2026", | ||
| "manifestLocation": [ | ||
| f"https://api.dandiarchive.org/api/dandisets/{dandi_id_noprefix}/versions/draft/assets/" | ||
| ], | ||
| "assetsSummary": { | ||
| "schemaKey": "AssetsSummary", | ||
| "numberOfBytes": 100, | ||
| "numberOfFiles": 2, | ||
| "dataStandard": [{"schemaKey": "StandardsType", "name": "NWB"}], | ||
| "approach": [{"schemaKey": "ApproachType", "name": "electrophysiology"}], | ||
| "measurementTechnique": [ | ||
| { | ||
| "schemaKey": "MeasurementTechniqueType", | ||
| "name": "two-photon microscopy technique", | ||
| } | ||
| ], | ||
| "species": [{"schemaKey": "SpeciesType", "name": "Human"}], | ||
| }, | ||
| } | ||
| if DOI_PREFIX is not None: | ||
| meta_dict.update( | ||
| basic_publishmeta( | ||
| INSTANCE_NAME, dandi_id=dandi_id_noprefix, prefix=DOI_PREFIX | ||
| ) | ||
| ) | ||
| return meta_dict | ||
|
|
||
|
|
||
| # ============================================================================= | ||
| # T001: Optional doi field on Dandiset model for concept DOIs | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| class TestDandisetConceptDoi: | ||
| """T001: The Dandiset model should accept an optional doi field.""" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_dandiset_accepts_doi_field( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """Dandiset model should accept a doi field without error.""" | ||
| meta = metadata_with_publish.copy() | ||
| dandi_id_noprefix = meta["identifier"].split(":")[1] | ||
| # Construct a concept DOI (no version suffix) | ||
| concept_doi = f"{DOI_PREFIX}/{INSTANCE_NAME.lower()}.{dandi_id_noprefix}" | ||
|
|
||
| # Build a Dandiset (not PublishedDandiset) with a doi | ||
| dandiset_meta = { | ||
| k: v | ||
| for k, v in meta.items() | ||
| if k not in ("datePublished", "publishedBy", "doi") | ||
| } | ||
| dandiset_meta["doi"] = concept_doi | ||
|
|
||
| # This should not raise — Dandiset should accept an optional doi | ||
| dandiset = Dandiset(**dandiset_meta) | ||
| assert dandiset.doi == concept_doi | ||
|
|
||
| def test_dandiset_doi_is_optional( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """Dandiset model should work without a doi field (default None).""" | ||
| meta = metadata_with_publish.copy() | ||
| dandiset_meta = { | ||
| k: v | ||
| for k, v in meta.items() | ||
| if k not in ("datePublished", "publishedBy", "doi") | ||
| } | ||
| # No doi field — should still work | ||
| dandiset = Dandiset(**dandiset_meta) | ||
| assert dandiset.doi is None | ||
|
|
||
| @skipif_no_doi_prefix | ||
| @pytest.mark.parametrize( | ||
| "bad_doi", | ||
| [ | ||
| "not-a-doi", | ||
| "10.1234/foo", # wrong prefix and format | ||
| f"{DOI_PREFIX}/{INSTANCE_NAME.lower()}.000123/0.0.0", # version DOI, not concept | ||
| "https://doi.org/10.80507/dandi.000123", # URL, not bare DOI | ||
| "", | ||
| ], | ||
| ) | ||
| def test_dandiset_doi_rejects_malformed( | ||
| self, metadata_with_publish: Dict[str, Any], bad_doi: str | ||
| ) -> None: | ||
| """Dandiset.doi should reject values not matching DANDI_CONCEPT_DOI_PATTERN.""" | ||
| meta = metadata_with_publish.copy() | ||
| dandiset_meta = { | ||
| k: v | ||
| for k, v in meta.items() | ||
| if k not in ("datePublished", "publishedBy", "doi") | ||
| } | ||
| dandiset_meta["doi"] = bad_doi | ||
| with pytest.raises(ValidationError): | ||
| Dandiset(**dandiset_meta) | ||
|
|
||
|
|
||
| # ============================================================================= | ||
| # T002: dates field in to_datacite() output | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| class TestDataciteDatesField: | ||
| """T002: to_datacite() should include a dates field with dateType Issued.""" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_dates_field_present(self, metadata_with_publish: Dict[str, Any]) -> None: | ||
| """DataCite output should contain a 'dates' attribute.""" | ||
| datacite = to_datacite(metadata_with_publish) | ||
| attrs = datacite["data"]["attributes"] | ||
| assert "dates" in attrs, "dates field missing from DataCite output" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_dates_field_has_issued_type( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """The dates field should contain an entry with dateType 'Issued'.""" | ||
| datacite = to_datacite(metadata_with_publish) | ||
| attrs = datacite["data"]["attributes"] | ||
| dates = attrs.get("dates", []) | ||
| issued_dates = [d for d in dates if d.get("dateType") == "Issued"] | ||
| assert len(issued_dates) == 1, "Expected exactly one Issued date entry" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_dates_field_value_matches_publication_year( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """The Issued date value should correspond to datePublished.""" | ||
| datacite = to_datacite(metadata_with_publish) | ||
| attrs = datacite["data"]["attributes"] | ||
| dates = attrs.get("dates", []) | ||
| issued_dates = [d for d in dates if d.get("dateType") == "Issued"] | ||
| assert len(issued_dates) == 1 | ||
| # The date string should contain the publication year | ||
| pub_year = attrs["publicationYear"] | ||
| assert pub_year in issued_dates[0]["date"] | ||
|
|
||
|
|
||
| # ============================================================================= | ||
| # T003: IsVersionOf/HasVersion relation support in to_datacite() | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| class TestDataciteConceptDoiRelations: | ||
| """T003: to_datacite() should support concept DOI relations.""" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_is_version_of_relation_added( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """When concept_doi is provided, an IsVersionOf relatedIdentifier should appear.""" | ||
| dandi_id_noprefix = metadata_with_publish["identifier"].split(":")[1] | ||
| concept_doi = f"{DOI_PREFIX}/{INSTANCE_NAME.lower()}.{dandi_id_noprefix}" | ||
|
|
||
| datacite = to_datacite(metadata_with_publish, concept_doi=concept_doi) | ||
| attrs = datacite["data"]["attributes"] | ||
|
|
||
| related = attrs.get("relatedIdentifiers", []) | ||
| is_version_of = [r for r in related if r.get("relationType") == "IsVersionOf"] | ||
| assert len(is_version_of) == 1, "Expected one IsVersionOf relation" | ||
| assert is_version_of[0]["relatedIdentifier"] == concept_doi | ||
| assert is_version_of[0]["relatedIdentifierType"] == "DOI" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_no_concept_doi_no_relation( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """When concept_doi is not provided, no IsVersionOf relation should appear.""" | ||
| datacite = to_datacite(metadata_with_publish) | ||
| attrs = datacite["data"]["attributes"] | ||
|
|
||
| related = attrs.get("relatedIdentifiers", []) | ||
| is_version_of = [r for r in related if r.get("relationType") == "IsVersionOf"] | ||
| assert ( | ||
| len(is_version_of) == 0 | ||
| ), "No IsVersionOf relation expected without concept_doi" | ||
|
|
||
|
|
||
| # ============================================================================= | ||
| # T004: DANDI identifier in alternateIdentifiers + version in Version property | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| class TestDataciteVersionProperty: | ||
| """DataCite output should include version property.""" | ||
|
|
||
| @skipif_no_doi_prefix | ||
| def test_version_property_populated( | ||
| self, metadata_with_publish: Dict[str, Any] | ||
| ) -> None: | ||
| """The 'version' attribute should be populated in the DataCite output.""" | ||
| datacite = to_datacite(metadata_with_publish) | ||
| attrs = datacite["data"]["attributes"] | ||
| # version should already be present, but verify it matches the input | ||
| assert "version" in attrs | ||
| assert attrs["version"] == metadata_with_publish["version"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -95,6 +95,13 @@ | |
| if _INSTANCE_CONFIG.doi_prefix is not None | ||
| else rf"^({_INNER_DANDI_DOI_PATTERN}|)$" # This matches an empty string as well | ||
| ) | ||
| # Concept DOI pattern: like version DOI but without the version suffix | ||
| _INNER_CONCEPT_DOI_PATTERN = rf"{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.\d{{6}}" | ||
| DANDI_CONCEPT_DOI_PATTERN = ( | ||
| rf"^{_INNER_CONCEPT_DOI_PATTERN}$" | ||
| if _INSTANCE_CONFIG.doi_prefix is not None | ||
| else rf"^({_INNER_CONCEPT_DOI_PATTERN}|)$" | ||
| ) | ||
| DANDI_PUBID_PATTERN = rf"^{ID_PATTERN}:{VERSION_PATTERN}$" | ||
| DANDI_NSKEY = "dandi" # Namespace for DANDI ontology | ||
|
|
||
|
|
@@ -1700,6 +1707,14 @@ def contributor_musthave_contact( | |
| ), | ||
| ] | ||
|
|
||
| doi: Optional[str] = Field( | ||
| default=None, | ||
| title="Concept DOI", | ||
| description="A version-independent DOI for the Dandiset as a whole.", | ||
| pattern=DANDI_CONCEPT_DOI_PATTERN, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @candleindark I believe this would be one more case where subclass would overload the constraint, and thus we would have difficulty expressing in linkml right? I feel that in such cases we just need to define slot without any constraint and in specific class uses of the slots define their specific constraints thus making it all work without warning... can we do that? NB this is just a note/question to @candleindark , not request to change anything here
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, but not exactly right here though this will cause it. With this, we will have difficult of translating the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No. In the schema level slot definitions in our LinkML schema of dandischema, a definitions already contains just the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree. This is problem adding to a category that we already have. I will just have to find a way to solve the problem category not any particular problem.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
that's what I am saying -- it will not override but only add it where actually used, i.e. not defined in the anyways -- remind where the issue on this is, we could continue there
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't think that will work conceptually. Where a slot is defined doesn't change the slot inheritance behavior in LinkML. A more detailed post is in the issue you are looking for #389 (comment). However, overriding still works in practice because of the bug. |
||
| json_schema_extra={"readOnly": True, "nskey": DANDI_NSKEY}, | ||
| ) | ||
|
|
||
| name: str = Field( | ||
| title="Dandiset title", | ||
| description="A title associated with the Dandiset.", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is done with spec-kit, it might be worth reviewing the spec itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 -- we would need the "sources"! ;)