From ae4ba07b8f7888f537e73455288fd6f033ed5ff2 Mon Sep 17 00:00:00 2001 From: Brian Helba Date: Wed, 10 Jun 2026 14:12:56 -0400 Subject: [PATCH] Fix a bunch of type checking issues --- isic/core/api/collection.py | 2 +- isic/core/dsl.py | 4 +-- .../management/commands/anonymize_database.py | 24 +++++++++++++----- .../commands/set_isic_permissions.py | 2 +- isic/core/models/collection.py | 20 ++++++++++----- isic/core/models/image.py | 4 ++- isic/core/pagination.py | 22 ++++++++-------- isic/core/permissions.py | 3 ++- isic/core/search.py | 4 +-- isic/core/serializers.py | 2 +- isic/core/services/__init__.py | 2 +- isic/core/services/collection/doi.py | 10 ++++---- isic/core/services/collection/image.py | 2 +- isic/core/tests/test_doi.py | 12 ++++++--- isic/core/views/collections.py | 3 ++- isic/core/views/doi.py | 1 + .../commands/export_metadata_parquet.py | 3 ++- .../commands/metadata_files_from_csv.py | 2 +- isic/ingest/models/accession.py | 8 +++--- isic/ingest/models/cohort.py | 8 ++++-- isic/ingest/models/contributor.py | 4 +-- isic/ingest/models/metadata_version.py | 3 ++- isic/ingest/models/publish_request.py | 11 +++++++- isic/ingest/tasks.py | 2 +- isic/ingest/tests/factories.py | 18 ++++++------- isic/ingest/tests/test_accession.py | 8 +++--- isic/ingest/tests/test_parquet.py | 5 ++-- isic/ingest/utils/metadata.py | 2 +- isic/settings/_sentry_utils.py | 25 +++++++++---------- isic/settings/base.py | 2 +- isic/stats/tests/test_tasks.py | 5 ++-- isic/studies/models/study.py | 14 +++++++++-- isic/studies/models/study_task.py | 4 ++- isic/studies/tests/factories.py | 14 +++++------ .../studies/tests/test_multiselect_browser.py | 1 + pyproject.toml | 3 +++ 36 files changed, 161 insertions(+), 98 deletions(-) diff --git a/isic/core/api/collection.py b/isic/core/api/collection.py index 4df4f4a09..2beb4af8c 100644 --- a/isic/core/api/collection.py +++ b/isic/core/api/collection.py @@ -279,7 +279,7 @@ def collection_populate_from_search(request, id: int, payload: SearchQueryIn): if collection.locked: return 409, {"error": "Collection is locked"} - if collection.public and payload.to_queryset(request.user).private().exists(): # type: ignore[attr-defined] + if collection.public and payload.to_queryset(request.user).private().exists(): return 409, {"error": "Collection is public and cannot contain private images."} # Pass data instead of validated_data because the celery task is going to revalidate. diff --git a/isic/core/dsl.py b/isic/core/dsl.py index ac2cf80b1..ca25675c7 100644 --- a/isic/core/dsl.py +++ b/isic/core/dsl.py @@ -183,7 +183,7 @@ def es_query(s, loc, toks): def es_query_and(s, loc, toks): - ret = {"bool": {"filter": []}} + ret: dict[str, Any] = {"bool": {"filter": []}} if isinstance(toks, ParseResults): # Explicit ANDs come in as parse results q_objects = toks.asList() @@ -204,7 +204,7 @@ def es_query_and(s, loc, toks): def es_query_or(s, loc, toks): - ret = {"bool": {"should": []}} + ret: dict[str, Any] = {"bool": {"should": []}} for tok in toks[0]: ret["bool"]["should"].append(tok) toks[0] = ret diff --git a/isic/core/management/commands/anonymize_database.py b/isic/core/management/commands/anonymize_database.py index d30c78e2e..a54e6f9e2 100644 --- a/isic/core/management/commands/anonymize_database.py +++ b/isic/core/management/commands/anonymize_database.py @@ -5,6 +5,7 @@ metadata. All users are given the password "password". """ +from collections.abc import Callable import hashlib import logging import random @@ -16,6 +17,7 @@ from django.contrib.auth.models import User from django.contrib.sessions.models import Session from django.db import connection, transaction +from django.db.models import Model, QuerySet import djclick as click from faker import Faker from oauth2_provider.models import AccessToken, RefreshToken @@ -124,17 +126,27 @@ def anonymize_hash_id(salt: str, hash_id: str, *, seen: set) -> str: raise RuntimeError(f"Failed to generate unique hash_id after 100 attempts: {hash_id}") -def _batch_anonymize(queryset, fields, transform, *, label, dry_run, batch_size): # noqa: PLR0913 +def _batch_anonymize[M: Model]( # noqa: PLR0913 + queryset: QuerySet[M], + fields: list[str], + transform: Callable[[M], None], + *, + label: str, + dry_run: bool, + batch_size: int, +) -> int: model_class = queryset.model total = queryset.count() - objects_to_update = [] + objects_to_update: list[M] = [] if dry_run: label = f"[DRY RUN] {label}" def _flush(): if objects_to_update and not dry_run: - model_class.objects.bulk_update(objects_to_update, fields, batch_size=batch_size) + model_class._default_manager.bulk_update( + objects_to_update, fields, batch_size=batch_size + ) with click.progressbar( queryset.iterator(), length=total, label=label, show_eta=True, show_percent=True @@ -154,8 +166,8 @@ def _flush(): def _anonymize_users(faker, salt, *, dry_run, batch_size): password_hash = make_password("password") - names_cache = {} - emails_cache = {} + names_cache: dict[str, str] = {} + emails_cache: dict[str, str] = {} def transform(user): user.username = anonymize_username(salt, user.username) @@ -244,7 +256,7 @@ def _anonymize_private_ids(salt, *, dry_run, batch_size): counts = {} for model_class, field_name, id_type in models_to_process: - cache = {} + cache: dict[str, str] = {} def transform(obj, _field=field_name, _type=id_type, _cache=cache): current_value = getattr(obj, _field) diff --git a/isic/core/management/commands/set_isic_permissions.py b/isic/core/management/commands/set_isic_permissions.py index e10a07235..86ab00c5b 100644 --- a/isic/core/management/commands/set_isic_permissions.py +++ b/isic/core/management/commands/set_isic_permissions.py @@ -53,7 +53,7 @@ def add_staff_group(): User, ZipUpload, ]: - content_type = ContentType.objects.get_for_model(model) # type: ignore[arg-type] + content_type = ContentType.objects.get_for_model(model) for permission in ["view", "change"]: group.permissions.add( Permission.objects.get( diff --git a/isic/core/models/collection.py b/isic/core/models/collection.py index 04f6086c3..22cba17ae 100644 --- a/isic/core/models/collection.py +++ b/isic/core/models/collection.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + from django.contrib.auth.models import User from django.contrib.postgres.indexes import GinIndex, OpClass from django.core.exceptions import ValidationError @@ -5,13 +9,15 @@ from django.db.models.constraints import CheckConstraint, UniqueConstraint from django.db.models.expressions import F from django.db.models.functions import Upper -from django.db.models.query import QuerySet from django.db.models.query_utils import Q from django.urls import reverse from django_extensions.db.models import TimeStampedModel from .image import Image +if TYPE_CHECKING: + from django.db.models.query import QuerySet + class CollectionQuerySet(models.QuerySet["Collection"]): def pinned(self): @@ -62,7 +68,9 @@ class Meta(TimeStampedModel.Meta): creator = models.ForeignKey(User, on_delete=models.PROTECT, related_name="collections") - images = models.ManyToManyField(Image, related_name="collections", through="CollectionImage") + images: models.ManyToManyField[Image, CollectionImage] = models.ManyToManyField( + Image, related_name="collections", through="CollectionImage" + ) # unique per user. names of pinned collections can't be used. name = models.CharField(max_length=200) @@ -73,7 +81,7 @@ class Meta(TimeStampedModel.Meta): public = models.BooleanField(default=False) - shares = models.ManyToManyField( + shares: models.ManyToManyField[User, CollectionShare] = models.ManyToManyField( User, through="CollectionShare", through_fields=("collection", "grantee"), @@ -135,11 +143,11 @@ def counts(self): } return self.cached_counts - def full_clean(self, exclude=None, validate_unique=True): # noqa: FBT002 - if self.pk and self.public and self.images.private().exists(): # type: ignore[attr-defined] + def full_clean(self, *args, **kwargs): + if self.pk and self.public and self.images.private().exists(): raise ValidationError("Can't make collection public, it contains private images.") - return super().full_clean(exclude=exclude, validate_unique=validate_unique) + return super().full_clean(*args, **kwargs) class CollectionImage(models.Model): diff --git a/isic/core/models/image.py b/isic/core/models/image.py index 61a261833..2438f3413 100644 --- a/isic/core/models/image.py +++ b/isic/core/models/image.py @@ -96,7 +96,9 @@ class Meta(CreationSortedTimeStampedModel.Meta): # index is used because public is filtered in every permissions check public = models.BooleanField(default=False, db_index=True) - shares = models.ManyToManyField(User, through="ImageShare", through_fields=("image", "grantee")) + shares: models.ManyToManyField[User, ImageShare] = models.ManyToManyField( + User, through="ImageShare", through_fields=("image", "grantee") + ) objects = ImageManager() diff --git a/isic/core/pagination.py b/isic/core/pagination.py index 864ac59ab..bfc7306fa 100644 --- a/isic/core/pagination.py +++ b/isic/core/pagination.py @@ -41,7 +41,7 @@ def _clamp(val: int, min_: int, max_: int) -> int: return max(min_, min(val, max_)) -def _reverse_order(order: tuple): +def _reverse_order(order: Sequence[str]): """ Reverse the ordering specification for a Django ORM query. @@ -83,8 +83,8 @@ def decode_cursor(cls, encoded_cursor: str | None) -> Cursor: offset = int(tokens.get("o", ["0"])[0]) offset = _clamp(offset, 0, CursorPagination._offset_cutoff) - reverse = tokens.get("r", ["0"])[0] - reverse = bool(int(reverse)) + reverse_str = tokens.get("r", ["0"])[0] + reverse = bool(int(reverse_str)) position = tokens.get("p", [None])[0] except (TypeError, ValueError) as e: @@ -117,7 +117,7 @@ def paginate_queryset( if not queryset.query.order_by: queryset = queryset.order_by(*self.ordering) - order = queryset.query.order_by + order: tuple[str, ...] = queryset.query.order_by total_count = ( # let the queryset define a custom_count attribute in the event that computing @@ -227,11 +227,11 @@ def next_link( # noqa: PLR0913 base_url: str, page: list, cursor: Cursor, - order: tuple, + order: Sequence[str], has_previous: bool, limit: int, - next_position: str, - previous_position: str, + next_position: str | None, + previous_position: str | None, ) -> str: if page and cursor.reverse and cursor.offset: # If we're reversing direction and we have an offset cursor @@ -287,12 +287,12 @@ def previous_link( # noqa: PLR0913 base_url: str, page: list, cursor: Cursor, - order: tuple, + order: Sequence[str], has_next: bool, limit: int, - next_position: str, - previous_position: str, - ): + next_position: str | None, + previous_position: str | None, + ) -> str: if page and not cursor.reverse and cursor.offset: # If we're reversing direction and we have an offset cursor # then we cannot use the first position we find as a marker. diff --git a/isic/core/permissions.py b/isic/core/permissions.py index 31c646a79..2eb116ce3 100644 --- a/isic/core/permissions.py +++ b/isic/core/permissions.py @@ -1,4 +1,5 @@ from functools import wraps +from typing import Any from urllib.parse import urlparse import django.apps @@ -27,7 +28,7 @@ def authenticate(self, request: HttpRequest, key: str | None) -> User | None: class UserPermissions: model = User perms = ["view_staff"] - filters = {} + filters: dict[str, Any] = {} @staticmethod def view_staff(user_obj, _=None): diff --git a/isic/core/search.py b/isic/core/search.py index 093fca7d8..a6ea0d187 100644 --- a/isic/core/search.py +++ b/isic/core/search.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) -IMAGE_INDEX_MAPPINGS = {"properties": {}} +IMAGE_INDEX_MAPPINGS: dict[str, Any] = {"properties": {}} DEFAULT_SEARCH_AGGREGATES = {} COUNTS_AGGREGATES = {} @@ -170,7 +170,7 @@ def add_to_search_index(image: Image) -> None: image = Image.objects.with_elasticsearch_properties().get(pk=image.pk) get_elasticsearch_client().index( index=settings.ISIC_ELASTICSEARCH_IMAGES_INDEX, - id=image.pk, + id=str(image.pk), document=image.to_elasticsearch_document(source_only=True), ) diff --git a/isic/core/serializers.py b/isic/core/serializers.py index dfd03893a..7b7d68d13 100644 --- a/isic/core/serializers.py +++ b/isic/core/serializers.py @@ -80,7 +80,7 @@ def to_queryset( qs = qs.from_search_query(self.query) # type: ignore[attr-defined] if self.collections: - qs = qs.filter( # type: ignore[union-attr] + qs = qs.filter( collections__in=get_visible_objects( user, "core.view_collection", diff --git a/isic/core/services/__init__.py b/isic/core/services/__init__.py index 2f6635465..aaa900205 100644 --- a/isic/core/services/__init__.py +++ b/isic/core/services/__init__.py @@ -55,7 +55,7 @@ def staff_image_metadata_csv(*, qs: QuerySet[Image]) -> Generator[list[str] | di .distinct() ) - remapped_keys = reduce( + remapped_keys: list[str] = reduce( operator.iadd, [ [field.internal_id_name, field.csv_field_name] diff --git a/isic/core/services/collection/doi.py b/isic/core/services/collection/doi.py index 1a85e5ee0..4933447d5 100644 --- a/isic/core/services/collection/doi.py +++ b/isic/core/services/collection/doi.py @@ -35,7 +35,7 @@ from isic.ingest.services.publish import unembargo_image if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Sequence from urllib.parse import ParseResult from django.contrib.auth.models import User @@ -120,8 +120,8 @@ def check_create_draft_doi_allowed( *, user: User, collection: Collection, - supplemental_files: Iterable[dict[str, str]] | None = None, - related_identifiers: Iterable[RelatedIdentifierIn] | None = None, + supplemental_files: Sequence[dict[str, str]] | None = None, + related_identifiers: Sequence[RelatedIdentifierIn] | None = None, ) -> None: if not user.has_perm("core.create_doi", collection): raise ValidationError("You don't have permissions to do that.") @@ -190,8 +190,8 @@ def create_collection_draft_doi( user: User, collection: Collection, description: str, - supplemental_files: Iterable[dict[str, str]] | None = None, - related_identifiers: Iterable[RelatedIdentifierIn] | None = None, + supplemental_files: Sequence[dict[str, str]] | None = None, + related_identifiers: Sequence[RelatedIdentifierIn] | None = None, ) -> DraftDoi: check_create_draft_doi_allowed( user=user, diff --git a/isic/core/services/collection/image.py b/isic/core/services/collection/image.py index 538623344..15c2e45e2 100644 --- a/isic/core/services/collection/image.py +++ b/isic/core/services/collection/image.py @@ -64,7 +64,7 @@ def move_collection_images( if not ignore_lock and (src_collection.locked or dest_collection.locked): raise ValidationError("Can't move images to/from a locked collection.") - if dest_collection.public and src_collection.images.private().exists(): # type: ignore[attr-defined] + if dest_collection.public and src_collection.images.private().exists(): raise ValidationError("Can't move private images to a public collection.") with transaction.atomic(): diff --git a/isic/core/tests/test_doi.py b/isic/core/tests/test_doi.py index 983534598..55f75ad16 100644 --- a/isic/core/tests/test_doi.py +++ b/isic/core/tests/test_doi.py @@ -261,9 +261,11 @@ def test_doi_files( doi.refresh_from_db() assert doi.bundle is not None + assert doi.bundle_size is not None assert doi.bundle_size > 0 assert doi.metadata is not None + assert doi.metadata_size is not None assert doi.metadata_size > 0 with tempfile.TemporaryDirectory() as temp_dir, zipfile.ZipFile(doi.bundle) as zf: @@ -322,7 +324,7 @@ def test_api_doi_creation( draft_doi = DraftDoi.objects.get(collection=public_collection_with_public_images) assert draft_doi.supplemental_files.count() == 1 - assert draft_doi.supplemental_files.first().description == "test supplemental file" + assert draft_doi.supplemental_files.get().description == "test supplemental file" assert draft_doi.related_identifiers.count() == 2 @@ -368,7 +370,7 @@ def test_doi_bundle_includes_supplemental_files( assert doi.bundle is not None assert doi.supplemental_files.count() == 1 - supplemental_file = doi.supplemental_files.first() + supplemental_file = doi.supplemental_files.get() with tempfile.TemporaryDirectory() as temp_dir, zipfile.ZipFile(doi.bundle) as zf: zf.extractall(temp_dir) @@ -553,7 +555,7 @@ def test_draft_doi_complete_lifecycle( # noqa: PLR0915 assert draft_doi.collection == collection assert draft_doi.supplemental_files.count() == 1 - supplemental_file = draft_doi.supplemental_files.first() + supplemental_file = draft_doi.supplemental_files.get() assert supplemental_file.description == "Test supplemental file" assert storages["default"].exists(supplemental_file.blob.name) @@ -611,7 +613,7 @@ def test_draft_doi_complete_lifecycle( # noqa: PLR0915 } assert final_doi.supplemental_files.count() == 1 - final_supplemental_file = final_doi.supplemental_files.first() + final_supplemental_file = final_doi.supplemental_files.get() assert final_supplemental_file.description == "Test supplemental file" assert storages["sponsored"].exists(final_supplemental_file.blob.name) @@ -627,8 +629,10 @@ def test_draft_doi_complete_lifecycle( # noqa: PLR0915 assert mock_datacite_promote_draft_doi_to_findable.call_count == 1 assert final_doi.bundle is not None + assert final_doi.bundle_size is not None assert final_doi.bundle_size > 0 assert final_doi.metadata is not None + assert final_doi.metadata_size is not None assert final_doi.metadata_size > 0 diff --git a/isic/core/views/collections.py b/isic/core/views/collections.py index d1263681f..858ec958e 100644 --- a/isic/core/views/collections.py +++ b/isic/core/views/collections.py @@ -1,5 +1,6 @@ from collections.abc import Iterable from datetime import UTC, datetime +from typing import Any from django.contrib import messages from django.contrib.auth.decorators import login_required @@ -98,7 +99,7 @@ def csv_rows(buffer: Buffer) -> Iterable[bytes]: def collection_create_doi(request, pk): collection = get_object_or_404(Collection, pk=pk) - context = { + context: dict[str, Any] = { "collection": collection, "error": None, } diff --git a/isic/core/views/doi.py b/isic/core/views/doi.py index 39b32f57b..fe5d2408f 100644 --- a/isic/core/views/doi.py +++ b/isic/core/views/doi.py @@ -31,6 +31,7 @@ # @cache_page(timeout=60 * 60 * 24 * 7, key_prefix="doi_detail") def doi_detail(request, slug): + doi: Doi | DraftDoi try: doi = Doi.objects.select_related("collection").get(slug=slug) except Doi.DoesNotExist: diff --git a/isic/ingest/management/commands/export_metadata_parquet.py b/isic/ingest/management/commands/export_metadata_parquet.py index 8fc2a016a..c31069351 100644 --- a/isic/ingest/management/commands/export_metadata_parquet.py +++ b/isic/ingest/management/commands/export_metadata_parquet.py @@ -11,6 +11,7 @@ import pyarrow.parquet as pq from isic.core.models import Image +from isic.core.models.base import CopyrightLicense from isic.ingest.utils.parquet import ( ROW_GROUP_SIZE, ParquetMetadataRow, @@ -30,7 +31,7 @@ def export_metadata_parquet(): ParquetMetadataRow( isic_id=image.isic_id, attribution=image.accession.attribution, - copyright_license=image.accession.copyright_license, + copyright_license=CopyrightLicense(image.accession.copyright_license), **image.metadata, ) for image in qs.iterator() diff --git a/isic/ingest/management/commands/metadata_files_from_csv.py b/isic/ingest/management/commands/metadata_files_from_csv.py index 28627a1c2..7e087aa7c 100644 --- a/isic/ingest/management/commands/metadata_files_from_csv.py +++ b/isic/ingest/management/commands/metadata_files_from_csv.py @@ -36,7 +36,7 @@ def metadata_files_from_csv(user_id, csv_path, isic_id_column): columns_to_drop = {"isic_id", "filename", isic_id_column} cohort_files = defaultdict(list) - cohort_columns = defaultdict(set) + cohort_columns: defaultdict[int, set[str]] = defaultdict(set) with Path(csv_path).open() as f: reader = csv.DictReader(f) diff --git a/isic/ingest/models/accession.py b/isic/ingest/models/accession.py index e3a3a3f0e..e81fbe51a 100644 --- a/isic/ingest/models/accession.py +++ b/isic/ingest/models/accession.py @@ -1,4 +1,4 @@ -from collections.abc import Callable +from collections.abc import Callable, Mapping from copy import deepcopy from dataclasses import dataclass import io @@ -6,7 +6,7 @@ from mimetypes import guess_type from pathlib import Path, PurePosixPath import tempfile -from typing import Literal, TypeVar +from typing import Any, Literal, TypeVar from uuid import uuid4 from django.contrib.auth.models import User @@ -773,7 +773,7 @@ def _require_unpublished(self): raise ValidationError("Can't modify the accession as it's already been published.") def update_metadata( # noqa: C901 - self, user: User, csv_row: dict, *, ignore_image_check=False, reset_review=True + self, user: User, csv_row: Mapping[str, Any], *, ignore_image_check=False, reset_review=True ) -> bool: """ Apply metadata to an accession from a row in a CSV. @@ -855,7 +855,7 @@ def maybe_remap_internal_metadata(metadata: dict) -> bool: delete_accession_review(accession=self) if modified: - remapped_internal_values = {} + remapped_internal_values: dict[str, Any] = {} for field in self.remapped_internal_fields: remapped_internal_values.setdefault(field.relation_name, {}) diff --git a/isic/ingest/models/cohort.py b/isic/ingest/models/cohort.py index 8aec3377c..48219554a 100644 --- a/isic/ingest/models/cohort.py +++ b/isic/ingest/models/cohort.py @@ -122,8 +122,12 @@ def edit_cohort(user_obj, obj): return CohortPermissions.edit_cohort_list(user_obj).contains(obj) @staticmethod - def add_accession(user_obj: User, obj: Cohort) -> bool: - return obj and user_obj.is_authenticated and obj.contributor.owners.contains(user_obj) + def add_accession(user_obj: User, obj: Cohort | None) -> bool: + return ( + obj is not None + and user_obj.is_authenticated + and obj.contributor.owners.contains(user_obj) + ) Cohort.perms_class = CohortPermissions diff --git a/isic/ingest/models/contributor.py b/isic/ingest/models/contributor.py index ac9378120..25e60ac73 100644 --- a/isic/ingest/models/contributor.py +++ b/isic/ingest/models/contributor.py @@ -79,9 +79,9 @@ def add_contributor(user_obj, _=None): return user_obj.is_authenticated @staticmethod - def add_cohort(user_obj: User, obj: Contributor) -> bool: + def add_cohort(user_obj: User, obj: Contributor | None) -> bool: return ( - obj + obj is not None and user_obj.is_authenticated and ContributorPermissions.view_contributor(user_obj, obj) ) diff --git a/isic/ingest/models/metadata_version.py b/isic/ingest/models/metadata_version.py index 8e37052fd..6ccb1a871 100644 --- a/isic/ingest/models/metadata_version.py +++ b/isic/ingest/models/metadata_version.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +from typing import Any from django.contrib.auth.models import User from django.db import models @@ -62,7 +63,7 @@ def _diff(a, b): ignore_order=True, verbose_level=2, ) - formatted_result = { + formatted_result: dict[str, dict[str, Any]] = { "added": {}, "removed": {}, "changed": {}, diff --git a/isic/ingest/models/publish_request.py b/isic/ingest/models/publish_request.py index 500e3ad6d..2b995d10a 100644 --- a/isic/ingest/models/publish_request.py +++ b/isic/ingest/models/publish_request.py @@ -1,6 +1,13 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + from django.contrib.auth.models import User from django.db import models +if TYPE_CHECKING: + from .accession import Accession + class PublishRequestAccession(models.Model): publish_request = models.ForeignKey( @@ -23,7 +30,9 @@ class PublishRequest(models.Model): created = models.DateTimeField(auto_now_add=True) creator = models.ForeignKey(User, on_delete=models.PROTECT, related_name="publish_requests") - accessions = models.ManyToManyField("Accession", through="PublishRequestAccession") + accessions: models.ManyToManyField[Accession, PublishRequestAccession] = models.ManyToManyField( + "Accession", through="PublishRequestAccession" + ) # the additional collections to which the images will be added, including the magic collection collections = models.ManyToManyField("core.Collection") public = models.BooleanField(default=False) diff --git a/isic/ingest/tasks.py b/isic/ingest/tasks.py index 048085125..561dd4f96 100644 --- a/isic/ingest/tasks.py +++ b/isic/ingest/tasks.py @@ -58,7 +58,7 @@ def generate_blobs(): ): # avoid .delay since we want to avoid putting tens of thousands of elements # into the transaction.on_commit list. - generate_accession_blob_task.apply_async(args=[accession_id]) + generate_accession_blob_task.apply_async(args=(accession_id,)) transaction.on_commit(generate_blobs) diff --git a/isic/ingest/tests/factories.py b/isic/ingest/tests/factories.py index 89f29b5d2..0b0281c7c 100644 --- a/isic/ingest/tests/factories.py +++ b/isic/ingest/tests/factories.py @@ -38,13 +38,13 @@ class Meta: creator = factory.SubFactory(UserFactory) @factory.post_generation - def owners(self, create: bool, extracted: Any, **kwargs: Any) -> None: + def owners(obj: Contributor, create: bool, extracted: Any, **kwargs: Any) -> None: # noqa: ARG004 if not create: return if extracted is None: # The creator is the default owner. - extracted = [self.creator] - self.owners.add(*extracted) + extracted = [obj.creator] + obj.owners.add(*extracted) class CohortFactory(factory.django.DjangoModelFactory): @@ -163,7 +163,7 @@ class Params: # https://github.com/pytest-dev/pytest-factoryboy/issues/67 @factory.post_generation - def short_diagnosis(self, create: bool, extracted: Any, **kwargs: Any) -> None: + def short_diagnosis(obj: Accession, create: bool, extracted: Any, **kwargs: Any) -> None: if extracted is None: # Normal flow, no short_diagnosis provided. return @@ -178,13 +178,13 @@ def short_diagnosis(self, create: bool, extracted: Any, **kwargs: Any) -> None: raise ValueError("Unknown additional arguments to short_diagnosis: {kwargs}") for key, value in DiagnosisEnum.as_dict(diagnosis).items(): - setattr(self, key, value) + setattr(obj, key, value) if create: - self.save() + obj.save() @factory.post_generation - def short_anatom_site(self, create: bool, extracted: Any, **kwargs: Any) -> None: + def short_anatom_site(obj: Accession, create: bool, extracted: Any, **kwargs: Any) -> None: # noqa: ARG004 if extracted is None: return @@ -196,10 +196,10 @@ def short_anatom_site(self, create: bool, extracted: Any, **kwargs: Any) -> None raise ValueError(f"Unknown short_anatom_site: {extracted}") for key, value in AnatomSiteEnum.as_dict(anatom_site).items(): - setattr(self, key, value) + setattr(obj, key, value) if create: - self.save() + obj.save() class AccessionReviewFactory(factory.django.DjangoModelFactory): diff --git a/isic/ingest/tests/test_accession.py b/isic/ingest/tests/test_accession.py index bc175e567..8bddd5725 100644 --- a/isic/ingest/tests/test_accession.py +++ b/isic/ingest/tests/test_accession.py @@ -98,8 +98,10 @@ def test_accession_orients_images(user, cohort): original_image = PIL.Image.open(original_blob) - assert original_image._exif.get(PIL.ExifTags.Base.Make) == "Canon" - assert original_image._exif.get(PIL.ExifTags.Base.Orientation) == 6 # 90 degrees clockwise + assert original_image.getexif().get(PIL.ExifTags.Base.Make) == "Canon" + assert ( + original_image.getexif().get(PIL.ExifTags.Base.Orientation) == 6 + ) # 90 degrees clockwise accession = create_accession( creator=user, @@ -113,7 +115,7 @@ def test_accession_orients_images(user, cohort): processed_image = PIL.Image.open(accession.blob) # assert that all exif data is stripped but the orientation is applied - assert processed_image._exif is None + assert len(processed_image.getexif()) == 0 assert processed_image.height == original_image.width assert processed_image.width == original_image.height diff --git a/isic/ingest/tests/test_parquet.py b/isic/ingest/tests/test_parquet.py index dd9b1ebba..7d4f796b2 100644 --- a/isic/ingest/tests/test_parquet.py +++ b/isic/ingest/tests/test_parquet.py @@ -6,6 +6,7 @@ from pydantic_to_pyarrow import get_pyarrow_schema import pytest +from isic.core.models.base import CopyrightLicense from isic.ingest.utils.parquet import ( EXCLUDED_FIELDS, FIELD_ORDER, @@ -38,7 +39,7 @@ def test_parquet_metadata_row_roundtrip_through_parquet(): row = ParquetMetadataRow( isic_id="ISIC_0000001", attribution="Test Attribution", - copyright_license="CC-0", + copyright_license=CopyrightLicense("CC-0"), sex="male", age_approx=50, clin_size_long_diam_mm=Decimal("3.14"), @@ -79,7 +80,7 @@ def test_parquet_metadata_row_from_image(image_factory, accession_factory): row = ParquetMetadataRow( isic_id=image.isic_id, attribution=image.accession.attribution, - copyright_license=image.accession.copyright_license, + copyright_license=CopyrightLicense(image.accession.copyright_license), **image.metadata, ) diff --git a/isic/ingest/utils/metadata.py b/isic/ingest/utils/metadata.py index 9bead6e33..7dd2b97b5 100644 --- a/isic/ingest/utils/metadata.py +++ b/isic/ingest/utils/metadata.py @@ -32,7 +32,7 @@ class Problem(BaseModel): def validate_csv_format_and_filenames(rows: csv.DictReader, cohort: Cohort) -> list[Problem]: problems = [] - filenames = Counter() + filenames: Counter[str] = Counter() if not rows.fieldnames or "filename" not in rows.fieldnames: problems.append(Problem(message="Unable to find a filename column in CSV.")) diff --git a/isic/settings/_sentry_utils.py b/isic/settings/_sentry_utils.py index ea7e07f00..515673c82 100644 --- a/isic/settings/_sentry_utils.py +++ b/isic/settings/_sentry_utils.py @@ -1,8 +1,9 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: + from celery import Task from sentry_sdk._types import SamplingContext @@ -20,18 +21,16 @@ def get_sentry_performance_sample_rate(sampling_context: SamplingContext) -> flo ) from isic.studies.tasks import populate_study_tasks_task - infrequent_tasks = { - task.name - for task in [ - extract_zip_task, - validate_metadata_task, - update_metadata_task, - publish_cohort_task, - populate_collection_from_isic_ids_task, - populate_collection_from_search_task, - populate_study_tasks_task, - ] + infrequent_tasks: set[Task[Any, Any]] = { + extract_zip_task, + validate_metadata_task, + update_metadata_task, + publish_cohort_task, + populate_collection_from_isic_ids_task, + populate_collection_from_search_task, + populate_study_tasks_task, } + infrequent_task_names = {task.name for task in infrequent_tasks} if "wsgi_environ" in sampling_context: path: str = sampling_context["wsgi_environ"]["PATH_INFO"] @@ -51,7 +50,7 @@ def get_sentry_performance_sample_rate(sampling_context: SamplingContext) -> flo ): return 0.20 elif "celery_job" in sampling_context: - if sampling_context["celery_job"]["task"] in infrequent_tasks: + if sampling_context["celery_job"]["task"] in infrequent_task_names: return 1.0 return 0.05 diff --git a/isic/settings/base.py b/isic/settings/base.py index 4c8475ae8..c94e95c92 100644 --- a/isic/settings/base.py +++ b/isic/settings/base.py @@ -295,7 +295,7 @@ ISIC_CDN_LOG_BUCKET: str | None = env.str("DJANGO_ISIC_CDN_LOG_BUCKET", default=None) -TEMPLATES[0]["OPTIONS"]["context_processors"] += [ # type: ignore[index] +TEMPLATES[0]["OPTIONS"]["context_processors"] += [ "isic.core.context_processors.js_sentry", "isic.core.context_processors.citation_styles", ] diff --git a/isic/stats/tests/test_tasks.py b/isic/stats/tests/test_tasks.py index 6ae370613..159a9a560 100644 --- a/isic/stats/tests/test_tasks.py +++ b/isic/stats/tests/test_tasks.py @@ -38,8 +38,9 @@ def test_collect_google_analytics_task(mocker, settings): collect_google_analytics_metrics_task() assert GaMetrics.objects.count() == 1 - assert GaMetrics.objects.first().num_sessions == 10 - assert GaMetrics.objects.first().sessions_per_country == [ + ga_metrics = GaMetrics.objects.get() + assert ga_metrics.num_sessions == 10 + assert ga_metrics.sessions_per_country == [ { "country_name": "United States", "country_numeric": "840", diff --git a/isic/studies/models/study.py b/isic/studies/models/study.py index e38b46b68..67304eb21 100644 --- a/isic/studies/models/study.py +++ b/isic/studies/models/study.py @@ -1,7 +1,10 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + from django.contrib.auth.models import User from django.core.exceptions import ValidationError from django.db import models -from django.db.models.query import QuerySet from django.db.models.query_utils import Q from django.urls import reverse from django_extensions.db.models import TimeStampedModel @@ -11,6 +14,11 @@ from .feature import Feature from .question import Question +if TYPE_CHECKING: + from django.db.models.query import QuerySet + + from .study_question import StudyQuestion + class StudyQuerySet(models.QuerySet["Study"]): def public(self): @@ -42,7 +50,9 @@ class Meta(TimeStampedModel.Meta): ) features = models.ManyToManyField(Feature, related_name="studies") - questions = models.ManyToManyField(Question, through="StudyQuestion", related_name="studies") + questions: models.ManyToManyField[Question, StudyQuestion] = models.ManyToManyField( + Question, through="StudyQuestion", related_name="studies" + ) # public study means that all images in the study must be public # and all of the related data to the study is public (responses). diff --git a/isic/studies/models/study_task.py b/isic/studies/models/study_task.py index e15be83e7..81d77872f 100644 --- a/isic/studies/models/study_task.py +++ b/isic/studies/models/study_task.py @@ -1,3 +1,5 @@ +from datetime import timedelta + from django.contrib.auth.models import User from django.db import models from django.db.models.query import QuerySet @@ -19,7 +21,7 @@ def for_user(self, user: User): def just_completed(self): return self.filter( - annotation__created__gte=timezone.now() - timezone.timedelta(seconds=60) + annotation__created__gte=timezone.now() - timedelta(seconds=60) ).order_by("annotation__created") def random_next(self): diff --git a/isic/studies/tests/factories.py b/isic/studies/tests/factories.py index 76c64139f..4a735a53e 100644 --- a/isic/studies/tests/factories.py +++ b/isic/studies/tests/factories.py @@ -69,30 +69,30 @@ class Meta: public = factory.Faker("boolean") @factory.post_generation - def owners(self, create: bool, extracted: Any, **kwargs: Any) -> None: + def owners(obj: Study, create: bool, extracted: Any, **kwargs: Any) -> None: # noqa: ARG004 if not create: return if extracted is None: # The creator is the default owner. - extracted = [self.creator] - self.owners.add(*extracted) + extracted = [obj.creator] + obj.owners.add(*extracted) @factory.post_generation - def features(self, create: bool, extracted: Any, **kwargs: Any) -> None: + def features(obj: Study, create: bool, extracted: Any, **kwargs: Any) -> None: # noqa: ARG004 if not create: return if extracted: # A list of features were passed in, use them - self.features.add(*extracted) + obj.features.add(*extracted) @factory.post_generation - def questions(self, create, extracted, *, required: bool = False, **kwargs): + def questions(obj: Study, create, extracted, *, required: bool = False, **kwargs: Any) -> None: # noqa: ARG004 if not create: return if extracted: # A list of questions were passed in, use them # TODO: the required status should be settable per question - self.questions.add(*extracted, through_defaults={"required": required}) + obj.questions.add(*extracted, through_defaults={"required": required}) class StudyTaskFactory(factory.django.DjangoModelFactory): diff --git a/isic/studies/tests/test_multiselect_browser.py b/isic/studies/tests/test_multiselect_browser.py index 590446166..65b608022 100644 --- a/isic/studies/tests/test_multiselect_browser.py +++ b/isic/studies/tests/test_multiselect_browser.py @@ -97,6 +97,7 @@ def test_multiselect_picker_search_select_all_and_submit( # Verify responses in the database annotation = Annotation.objects.get(task=task) response = annotation.responses.get(question=question) + assert response.value is not None selected_pks = response.value["choices"] selected_texts = set( QuestionChoice.objects.filter(pk__in=selected_pks).values_list("text", flat=True) diff --git a/pyproject.toml b/pyproject.toml index 0b3490fe2..42951b454 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -223,6 +223,9 @@ required-imports = ["from __future__ import annotations"] section-order = ["future", "standard-library", "third-party", "first-party", "local-folder", "resonant-settings"] sections = {"resonant-settings" = ["resonant_settings"] } +[tool.ruff.lint.pep8-naming] +staticmethod-decorators = ["factory.post_generation"] + [tool.ruff.lint.pydocstyle] convention = "pep257"