Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions check_types.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ TYPED_FILES=(
wp1/logic/zim_files.py
wp1/logic/zim_schedules.py
wp1/selection/abstract_builder.py
wp1/selection/meta_builder.py
wp1/selection/meta_builder_test.py
wp1/selection/models/simple.py
wp1/selection/models/petscan.py
wp1/selection/models/sparql.py
Expand Down
5 changes: 5 additions & 0 deletions wp1/selection/abstract_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@

class AbstractBuilder:

def _as_text(self, value: bytes | str | int | None) -> str:
if isinstance(value, bytes):
return value.decode("utf-8")
return str(value)

def _upload_to_storage(
self, s3: KiwixStorage, selection: Selection, builder: Builder
) -> None:
Expand Down
97 changes: 97 additions & 0 deletions wp1/selection/meta_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import io

from botocore.exceptions import ClientError

import wp1.logic.builder as logic_builder
from wp1.exceptions import (
ObjectNotFoundError,
Wp1FatalSelectionError,
Wp1RetryableSelectionError,
)
from wp1.models.wp10.builder import Builder as Wp10Builder
from wp1.selection.abstract_builder import AbstractBuilder


class MetaBuilder(AbstractBuilder):
"""Base class for builders that reference other builders."""

META_BUILDER_MODELS = {"wp1.selection.models.combinator"}

def _builder_model(self, builder: Wp10Builder) -> str:
Comment thread
audiodude marked this conversation as resolved.
Outdated
return self._as_text(getattr(builder, "b_model", ""))

def _builder_label(self, builder: Wp10Builder) -> str:
Comment thread
audiodude marked this conversation as resolved.
Outdated
name = getattr(builder, "b_name", None)
builder_id = self._as_text(getattr(builder, "b_id", ""))
if name is not None:
return f"{self._as_text(name)} ({builder_id})"
return builder_id

def _reference_label(self, wp10db, builder_id: str) -> str:
Comment thread
audiodude marked this conversation as resolved.
Outdated
try:
builder = logic_builder.get_builder(wp10db, builder_id)
except ObjectNotFoundError:
return builder_id
return self._builder_label(builder)

def _is_meta_builder(self, builder: Wp10Builder) -> bool:
Comment thread
audiodude marked this conversation as resolved.
Outdated
return self._builder_model(builder) in self.META_BUILDER_MODELS

def _dedupe(self, builder_ids: list[str]) -> list[str]:
Comment thread
audiodude marked this conversation as resolved.
Outdated
seen: set[str] = set()
unique_ids: list[str] = []
for builder_id in builder_ids:
if builder_id not in seen:
seen.add(builder_id)
unique_ids.append(builder_id)
return unique_ids

def _fetch_selection_data(
self, wp10db, s3, builder_id: str, reference_label: str | None = None
) -> bytes:
"""Fetch the latest materialized TSV snapshot for a referenced builder."""
label = reference_label or builder_id
selection = logic_builder.latest_selection_for(
wp10db, builder_id, "text/tab-separated-values"
)

if selection is None:
raise Wp1RetryableSelectionError(
Comment thread
audiodude marked this conversation as resolved.
f"Referenced builder {label} has no usable selection "
f"(no selection found)"
)

status = self._as_text(selection.s_status)
if status == "FAILED":
raise Wp1FatalSelectionError(
f"Referenced builder {label} latest selection failed"
)

if status != "OK":
raise Wp1RetryableSelectionError(
f"Referenced builder {label} latest selection is not ready "
f"(status={status!r})"
)

# OK selections can have no object key when materialization produced empty
# data, since AbstractBuilder only uploads filled selection.data.
if selection.s_object_key is None:
raise Wp1RetryableSelectionError(
f"Referenced builder {label} latest selection has no object key"
Comment thread
audiodude marked this conversation as resolved.
Outdated
)

object_key = selection.s_object_key
if isinstance(object_key, bytes):
object_key = object_key.decode("utf-8")

buffer = io.BytesIO()
try:
s3.download_fileobj(object_key, buffer)
except ClientError as e:
code = e.response.get("Error", {}).get("Code", "Unknown")
raise Wp1RetryableSelectionError(
f"Failed to download selection for builder {label} "
f"from S3 key {object_key!r}: {code}"
) from e

return buffer.getvalue()
54 changes: 54 additions & 0 deletions wp1/selection/meta_builder_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from unittest import TestCase
from unittest.mock import MagicMock, patch

from wp1.exceptions import Wp1FatalSelectionError, Wp1RetryableSelectionError
from wp1.models.wp10.selection import Selection
from wp1.selection.meta_builder import MetaBuilder


def _selection(status=b"OK", object_key=b"object-key"):
return Selection(
s_builder_id=b"builder-a",
s_content_type=b"text/tab-separated-values",
s_version=1,
s_status=status,
s_object_key=object_key,
)


class MetaBuilderTest(TestCase):

def setUp(self):
self.builder = MetaBuilder()

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data(self, mock_latest_selection):
mock_latest_selection.return_value = _selection()
s3 = MagicMock()
s3.download_fileobj.side_effect = lambda _key, buf: buf.write(b"first\n")

actual = self.builder._fetch_selection_data(MagicMock(), s3, "builder-a")

self.assertEqual(b"first\n", actual)
s3.download_fileobj.assert_called_once()

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_failed_selection(self, mock_latest_selection):
mock_latest_selection.return_value = _selection(status=b"FAILED")

with self.assertRaises(Wp1FatalSelectionError):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_retryable_selection(self, mock_latest_selection):
mock_latest_selection.return_value = _selection(status=b"CAN_RETRY")

with self.assertRaises(Wp1RetryableSelectionError):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_missing_selection(self, mock_latest_selection):
mock_latest_selection.return_value = None

with self.assertRaises(Wp1RetryableSelectionError):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")
Loading
Loading