diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py index ea76d4668..972c50981 100644 --- a/ami/main/api/serializers.py +++ b/ami/main/api/serializers.py @@ -1,6 +1,5 @@ import datetime -from django.conf import settings from django.db.models import QuerySet from guardian.shortcuts import get_perms from rest_framework import serializers @@ -74,20 +73,16 @@ class Meta: class SourceImageThumbnailSerializer(DefaultSerializer): + """Adds a ``thumbnails`` field via :meth:`SourceImage.thumbnail_urls`. + Viewsets must apply :meth:`SourceImageQuerySet.with_thumbnails`. + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields["thumbnails"] = serializers.SerializerMethodField() - def get_thumbnails(self, obj: SourceImage) -> dict | None: - return { - label: reverse_with_params( - "sourceimagethumbnail-detail", - args=(obj.pk,), - request=self.context.get("request"), - params={"label": label}, - ) - for label in settings.THUMBNAILS["SIZES"] - } + def get_thumbnails(self, obj: SourceImage) -> dict[str, str]: + return obj.thumbnail_urls(request=self.context.get("request")) class SourceImageNestedSerializer(DefaultSerializer): diff --git a/ami/main/api/views.py b/ami/main/api/views.py index 333d20db5..85f7a4054 100644 --- a/ami/main/api/views.py +++ b/ami/main/api/views.py @@ -451,9 +451,10 @@ def get_queryset(self) -> QuerySet: Prefetch( "captures", queryset=SourceImage.objects.order_by("-size").select_related( - "deployment", - "deployment__data_source", - )[:num_example_captures], + "deployment", "deployment__data_source" + ) + # Required by SourceImage.thumbnail_urls in the nested serializer. + .with_thumbnails()[:num_example_captures], to_attr="example_captures", ) ) @@ -632,11 +633,11 @@ def get_queryset(self) -> QuerySet: self.require_project = True project = self.get_active_project() - queryset = queryset.select_related( - "event", - "deployment", - "deployment__data_source", - ).order_by("timestamp") + queryset = ( + queryset.select_related("event", "deployment", "deployment__data_source") + # Required by SourceImage.thumbnail_urls in SourceImageThumbnailSerializer. + .with_thumbnails().order_by("timestamp") + ) if self.action == "list": # It's cumbersome to override the default list view, so customize the queryset here diff --git a/ami/main/models.py b/ami/main/models.py index 870c7f288..601b3720b 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -1309,7 +1309,8 @@ def first_capture(self): # Ideally this would be an annotated field, rather than an additional query. # raise NotImplementedError("This is added an annotated field, it should not be called directly.") # return SourceImage.objects.filter(event=self).order_by("timestamp").first().with_detections() - return SourceImage.objects.filter(event=self).order_by("timestamp").first() + # with_thumbnails() satisfies the thumbnail_urls prefetch contract for the nested serializer. + return SourceImage.objects.filter(event=self).order_by("timestamp").with_thumbnails().first() def summary_data(self): """ @@ -2092,6 +2093,12 @@ def with_was_processed(self): processed_exists = models.Exists(Detection.objects.filter(source_image_id=models.OuterRef("pk"))) return self.annotate(was_processed=processed_exists) + def with_thumbnails(self): + """Prefetch ``thumbnails`` so :meth:`SourceImage.thumbnail_urls` decides + warm/cold in memory instead of firing a SELECT per row. + """ + return self.prefetch_related("thumbnails") + class SourceImageManager(models.Manager.from_queryset(SourceImageQuerySet)): pass @@ -2247,7 +2254,7 @@ def get_was_processed(self, algorithm_key: str | None = None) -> bool: ``SourceImageQuerySet.with_was_processed()``). Falls back to a DB query otherwise. Do not call in bulk without the annotation — use ``with_was_processed()`` - on the queryset instead to avoid N+1 queries. + on the queryset instead so each row does not trigger its own DB query. :param algorithm_key: If provided, only detections from this algorithm are checked. The annotation does not filter by algorithm; per-algorithm @@ -2399,6 +2406,59 @@ def get_custom_user_permissions(self, user) -> list[str]: custom_perms.add(Project.Permissions.RUN_SINGLE_IMAGE_JOB) return list(custom_perms) + def thumbnail_is_valid(self, spec: dict, thumb: "SourceImageThumbnail | None") -> bool: + """Whether ``thumb`` satisfies ``spec`` and need not be regenerated. + + ``thumb.width`` stores the requested spec width (see the generator), so the + comparison is strict equality; legacy encoder-width rows read invalid and + self-heal on next generation. A None ``last_modified`` on either side means + "no signal of change" (matches ``NULL < x`` → ``False`` in SQL). + """ + if thumb is None or not thumb.path or thumb.width != spec["width"]: + return False + source_changed = ( + self.last_modified is not None + and thumb.last_modified is not None + and thumb.last_modified < self.last_modified + ) + return not source_changed + + def thumbnail_urls(self, request: Request | None = None) -> dict[str, str]: + """Per-label ``{label: url}`` for this capture's thumbnails. + + Warm (cached row valid for the spec) → direct storage URL. Cold/stale → + route URL into the thumbnail viewset, which (re)generates lazily. + + The warm path needs prefetched ``thumbnails`` + (:meth:`SourceImageQuerySet.with_thumbnails`). Without the prefetch — a + freshly created instance in a write response, or a caller that skipped + ``with_thumbnails`` — every label falls back to the route URL without + querying. This never lazily loads per object (which would be an N+1 in + list contexts); list endpoints must apply ``with_thumbnails`` to get the + warm storage URLs, and the list query-count tests pin that. + """ + # Local import avoids a models ↔ serializers cycle at module load time. + from ami.base.serializers import reverse_with_params + + prefetched = "thumbnails" in getattr(self, "_prefetched_objects_cache", {}) + thumbs: dict[str, "SourceImageThumbnail"] = {t.label: t for t in self.thumbnails.all()} if prefetched else {} + + out: dict[str, str] = {} + for label, spec in settings.THUMBNAILS["SIZES"].items(): + thumb = thumbs.get(label) + if self.thumbnail_is_valid(spec, thumb): + out[label] = default_storage.url(thumb.path) + else: + # Qualified ``api:`` namespace so this also resolves when ``request`` + # is None (management commands, template tags). + out[label] = reverse_with_params( + "api:sourceimagethumbnail-detail", + args=(self.pk,), + request=request, + params={"label": label}, + ) + return out + def find_or_generate_thumbnail_for_label(self, label): try: thumb = self.thumbnails.get(label=label) @@ -2407,14 +2467,9 @@ def find_or_generate_thumbnail_for_label(self, label): size = settings.THUMBNAILS["SIZES"].get(label) prefix = settings.THUMBNAILS["STORAGE_PREFIX"] - # ``self.last_modified`` can be None on legacy rows synced before upload - # backfill — treat None as "no signal of change", not an error. - source_changed = ( - self.last_modified is not None and thumb is not None and thumb.last_modified < self.last_modified - ) # The row is trusted without a storage existence check; an orphan row (blob # deleted out of band) shows a broken image until the row is removed. - if not thumb or not thumb.path or thumb.width != size["width"] or source_changed: + if not self.thumbnail_is_valid(size, thumb): img = PIL.Image.open(BytesIO(fetch_image_content(self.public_url(raise_errors=True)))) # JPEG only supports L, RGB, CMYK — convert other modes (e.g. RGBA PNGs) # or PIL raises ``OSError: cannot write mode as JPEG``. diff --git a/ami/main/tests.py b/ami/main/tests.py index 1c7aae4e2..5238e3096 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -518,6 +518,173 @@ def test_thumbnail_delete_removes_storage_blob(self): self.assertFalse(default_storage.exists(path), "pre_delete signal must clean the storage blob") + def test_serializer_emits_storage_url_when_thumb_row_exists(self): + """Warm-path optimization: when a SourceImageThumbnail row exists with the + configured width, the serializer must emit ``default_storage.url(row.path)`` + directly instead of the lazy-regen route URL. This is the whole point of + the direct-URLs change — every cached thumbnail must skip Django on the + warm path. + """ + from django.core.files.storage import default_storage + + # Pre-create a thumbnail row whose width matches THUMBNAILS['SIZES']['small']. + configured_width = settings.THUMBNAILS["SIZES"]["small"]["width"] + self.first_capture.thumbnails.create( + path="thumbnails/cached/abc.jpg", label="small", width=configured_width, height=180, size=42 + ) + + response = self.client.get(f"/api/v2/captures/{self.first_capture.pk}/?project_id={self.project.pk}") + self.assertEqual(response.status_code, 200) + small_url = response.json()["thumbnails"]["small"] + + # ``small`` should be the storage URL, not the route URL. + expected_storage_url = default_storage.url("thumbnails/cached/abc.jpg") + self.assertEqual(small_url, expected_storage_url) + # ``medium`` has no cached row → falls back to the route URL. + self.assertURLEqual( + response.json()["thumbnails"]["medium"], f"{self.base_url}{self.first_capture.pk}/?label=medium" + ) + + def test_serializer_falls_back_to_route_url_when_width_mismatches_spec(self): + """Any width != the configured spec → route URL so the next browser fetch + triggers lazy regen via the redirect viewset. The check is strict equality, + so this covers both a changed setting (stored wider/narrower than spec) and + legacy rows that recorded PIL's rounded output (e.g. 239 for a 240 spec) + before the #1306 spec-width fix — those self-heal through one regeneration. + """ + configured_width = settings.THUMBNAILS["SIZES"]["small"]["width"] + # +100 = settings changed since last gen; -1 = legacy encoder rounding. + for delta in (100, -1): + with self.subTest(delta=delta): + self.first_capture.thumbnails.update_or_create( + label="small", + defaults={ + "path": f"thumbnails/stale/{delta}.jpg", + "width": configured_width + delta, + "height": 180, + "size": 42, + }, + ) + response = self.client.get(f"/api/v2/captures/{self.first_capture.pk}/?project_id={self.project.pk}") + self.assertEqual(response.status_code, 200) + self.assertURLEqual( + response.json()["thumbnails"]["small"], f"{self.base_url}{self.first_capture.pk}/?label=small" + ) + + def test_serializer_falls_back_to_route_url_when_source_changed(self): + """If the source image was re-uploaded after the cached row was + generated (``source.last_modified > row.last_modified``), the serializer + must emit the route URL so the next browser fetch regenerates via the + redirect viewset. Mirrors the predicate the generator already uses. + + Guards https://github.com/RolnickLab/antenna/pull/1331#discussion_r3373715397. + """ + configured_width = settings.THUMBNAILS["SIZES"]["small"]["width"] + # Row generated at T0. ``last_modified`` is ``auto_now_add`` so set + # ``self.first_capture.last_modified`` to a later timestamp via UPDATE + # below to flip the staleness predicate. + self.first_capture.thumbnails.create( + path="thumbnails/preupload/abc.jpg", label="small", width=configured_width, height=180, size=42 + ) + # Source bytes were re-uploaded after the cached row was written. + SourceImage.objects.filter(pk=self.first_capture.pk).update( + last_modified=timezone.now() + datetime.timedelta(minutes=1) + ) + + response = self.client.get(f"/api/v2/captures/{self.first_capture.pk}/?project_id={self.project.pk}") + self.assertEqual(response.status_code, 200) + # Source-changed → emit route URL so the redirect viewset regenerates. + self.assertURLEqual( + response.json()["thumbnails"]["small"], f"{self.base_url}{self.first_capture.pk}/?label=small" + ) + + def test_thumbnail_urls_model_method_emits_warm_and_route(self): + """``SourceImage.thumbnail_urls`` is the single source of truth for the + per-label URL contract: warm storage URL when a cached row exists at + the configured width, route URL otherwise. + + Exercising the model method directly keeps the contract regression-tested + even if the serializer surface changes (e.g. a future template tag or + management command that needs the same dict). + """ + from django.core.files.storage import default_storage + + configured_width = settings.THUMBNAILS["SIZES"]["small"]["width"] + self.first_capture.thumbnails.create( + path="thumbnails/cached/direct.jpg", label="small", width=configured_width, height=180, size=42 + ) + + # Prefetch via ``with_thumbnails()`` to exercise the warm path; without the + # prefetch the method emits route URLs without querying (see the test below). + capture = SourceImage.objects.with_thumbnails().get(pk=self.first_capture.pk) + urls = capture.thumbnail_urls(request=None) + + # Warm path → direct storage URL for the cached label. + self.assertEqual(urls["small"], default_storage.url("thumbnails/cached/direct.jpg")) + # Cold path → route URL for labels without a cached row. With ``request=None`` + # the URL is path-only (no scheme/host) — matches DRF's reverse() contract. + self.assertIn(f"/api/v2/captures/thumbnails/{self.first_capture.pk}/", urls["medium"]) + self.assertIn("label=medium", urls["medium"]) + + def test_thumbnail_urls_without_prefetch_emits_route_urls_without_querying(self): + """Without prefetched thumbnails, ``thumbnail_urls`` must not fire a + per-object SELECT (an N+1 in list contexts) — it falls back to route URLs. + A cached row exists here, but the un-prefetched call must ignore it rather + than query for it.""" + configured_width = settings.THUMBNAILS["SIZES"]["small"]["width"] + self.first_capture.thumbnails.create( + path="thumbnails/cached/noprefetch.jpg", label="small", width=configured_width, height=180, size=42 + ) + + capture = SourceImage.objects.get(pk=self.first_capture.pk) + with self.assertNumQueries(0): + urls = capture.thumbnail_urls(request=None) + + # Cached row ignored (not prefetched) → route URL, not the storage URL. + self.assertIn(f"/api/v2/captures/thumbnails/{self.first_capture.pk}/", urls["small"]) + self.assertIn("label=small", urls["small"]) + + def test_warm_list_serves_storage_urls_with_single_thumbnail_query(self): + """The contract this PR exists to deliver: a warm gallery list serves direct + storage URLs while reading the thumbnail table once (the prefetch), not once + per row. + + Runtime no longer enforces the prefetch (un-prefetched falls back to route + URLs without querying), so this test is the only guard against both + regressions: dropping ``with_thumbnails()`` from the viewset (→ route URLs, + the perf win silently lost) and a per-row lazy read (→ N+1). Needs a + multi-row fixture — a single row hides an N+1. + """ + from django.core.files.storage import default_storage + from django.db import connection + from django.test.utils import CaptureQueriesContext + + configured_width = settings.THUMBNAILS["SIZES"]["small"]["width"] + captures = list(SourceImage.objects.filter(deployment=self.deployment)) + self.assertGreater(len(captures), 1, "need >1 capture to detect an N+1") + for cap in captures: + cap.thumbnails.create( + path=f"thumbnails/warm/{cap.pk}.jpg", label="small", width=configured_width, height=180, size=42 + ) + + with CaptureQueriesContext(connection) as ctx: + response = self.client.get(f"/api/v2/captures/?project_id={self.project.pk}") + self.assertEqual(response.status_code, 200) + + results = response.json()["results"] + self.assertGreater(len(results), 1) + for rec in results: + # Warm row → direct storage URL (regression guard: dropped prefetch → route URL). + self.assertEqual( + rec["thumbnails"]["small"], + default_storage.url(f"thumbnails/warm/{rec['id']}.jpg"), + ) + + thumb_queries = [q for q in ctx.captured_queries if "sourceimagethumbnail" in q["sql"].lower()] + self.assertLessEqual( + len(thumb_queries), 1, f"thumbnails must be one prefetch, not per-row; got {len(thumb_queries)}" + ) + class TestImageGrouping(TestCase): def setUp(self) -> None: