Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions check_types.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ TYPED_FILES=(
wp1/logic/zim_files.py
wp1/logic/zim_schedules.py
wp1/selection/abstract_builder.py
wp1/selection/meta_builder.py
wp1/selection/meta_builder_test.py
wp1/selection/models/simple.py
wp1/selection/models/petscan.py
wp1/selection/models/sparql.py
Expand Down
14 changes: 14 additions & 0 deletions wp1/logic/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@

logger = logging.getLogger(__name__)

META_BUILDER_MODELS = {"wp1.selection.models.combinator"}


def builder_label_by_id(wp10db: Connection, builder_id: str | bytes) -> str:
try:
builder = get_builder(wp10db, builder_id)
except ObjectNotFoundError:
return logic_util.as_text(builder_id)
return builder.label


def is_meta_builder(builder: Builder) -> bool:
return builder.model in META_BUILDER_MODELS


def get_builder_module_class(model: str) -> type[AbstractBuilder]:
"""Dynamically imports the builder module and returns the Builder class."""
Expand Down
6 changes: 6 additions & 0 deletions wp1/logic/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
DATABASE_WIKI_TS = config["DATABASE_WIKI_TS"]


def as_text(value: bytes | str | int | None) -> str:
if isinstance(value, bytes):
return value.decode("utf-8")
return str(value)


def wp10_timestamp_to_unix(ts):
if ts is None:
raise ValueError("Cannot convert None timestamp")
Expand Down
33 changes: 30 additions & 3 deletions wp1/models/wp10/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import attr

from wp1.logic import util as logic_util
from wp1.constants import TS_FORMAT_WP10
from wp1.timestamp import utcnow

Expand Down Expand Up @@ -33,6 +34,32 @@ class Builder:
b_current_version: int = attr.ib(default=0)
b_selection_zim_version: int = attr.ib(default=0)

@property
def id(self) -> str:
return logic_util.as_text(self.b_id)

@property
def name(self) -> str:
return logic_util.as_text(self.b_name)

@property
def user_id(self) -> str:
return logic_util.as_text(self.b_user_id)

@property
def project(self) -> str:
return logic_util.as_text(self.b_project)

@property
def model(self) -> str:
return logic_util.as_text(self.b_model)

@property
def label(self) -> str:
if self.b_name is not None:
return f"{self.name} ({self.id})"
return self.id

@property
def created_at_dt(self) -> datetime.datetime:
"""The timestamp parsed into a datetime.datetime object."""
Expand Down Expand Up @@ -75,10 +102,10 @@ def set_updated_at_now(self) -> None:

def to_web_dict(self) -> dict[str, Any]:
return {
"name": self.b_name.decode("utf-8"),
"project": self.b_project.decode("utf-8"),
"name": self.name,
"project": self.project,
"params": json.loads(self.b_params.decode("utf-8")),
"model": self.b_model.decode("utf-8"),
"model": self.model,
}

def set_id(self) -> None:
Expand Down
10 changes: 10 additions & 0 deletions wp1/models/wp10/builder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ def test_updated_at_dt(self):
self.assertEqual(28, dt.minute)
self.assertEqual(44, dt.second)

def test_decoded_properties(self):
self.builder.b_id = b"builder-a"

self.assertEqual("builder-a", self.builder.id)
self.assertEqual("My List", self.builder.name)
self.assertEqual("100", self.builder.user_id)
self.assertEqual("en.wikipedia.org", self.builder.project)
self.assertEqual("wp1.selection.models.simple", self.builder.model)
self.assertEqual("My List (builder-a)", self.builder.label)

def test_set_updated_at_dt(self):
dt = datetime.datetime(2020, 12, 15, 9, 30, 55)
self.builder.set_updated_at_dt(dt)
Expand Down
66 changes: 66 additions & 0 deletions wp1/selection/meta_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import io

from botocore.exceptions import ClientError

import wp1.logic.builder as logic_builder
from wp1.logic import util as logic_util
from wp1.exceptions import (
Wp1FatalSelectionError,
Wp1RetryableSelectionError,
)
from wp1.selection.abstract_builder import AbstractBuilder


class MetaBuilder(AbstractBuilder):
"""Base class for builders that reference other builders."""

def _fetch_selection_data(
self, wp10db, s3, builder_id: str, reference_label: str | None = None
) -> bytes:
"""Fetch the latest materialized TSV snapshot for a referenced builder."""
label = reference_label or builder_id
selection = logic_builder.latest_selection_for(
wp10db, builder_id, "text/tab-separated-values"
)

# TODO: #1196 - Add retry handling for Combinator referenced selections.

Check notice on line 26 in wp1/selection/meta_builder.py

View check run for this annotation

codefactor.io / CodeFactor

wp1/selection/meta_builder.py#L26

Unresolved comment '# TODO: #1196 - Add retry handling for Combinator referenced selections.' (C100)
if selection is None:
raise Wp1RetryableSelectionError(
Comment thread
audiodude marked this conversation as resolved.
f"Referenced builder {label} has no usable selection "
f"(no selection found)"
)

status = logic_util.as_text(selection.s_status)
if status == "FAILED":
raise Wp1FatalSelectionError(
f"Referenced builder {label} latest selection failed"
)

if status != "OK":
raise Wp1RetryableSelectionError(
f"Referenced builder {label} latest selection is not ready "
f"(status={status!r})"
)

# OK selections can have no stored data when materialization produced empty
# data, since AbstractBuilder only uploads filled selection.data.
if selection.s_object_key is None:
raise Wp1RetryableSelectionError(
f"Referenced builder {label} latest selection has no stored data"
)

object_key = selection.s_object_key
if isinstance(object_key, bytes):
object_key = object_key.decode("utf-8")

buffer = io.BytesIO()
try:
s3.download_fileobj(object_key, buffer)
except ClientError as e:
code = e.response.get("Error", {}).get("Code", "Unknown")
raise Wp1RetryableSelectionError(
f"Failed to download selection for builder {label} "
f"from S3 key {object_key!r}: {code}"
) from e

return buffer.getvalue()
61 changes: 61 additions & 0 deletions wp1/selection/meta_builder_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from unittest import TestCase
from unittest.mock import MagicMock, patch

from wp1.exceptions import Wp1FatalSelectionError, Wp1RetryableSelectionError
from wp1.models.wp10.selection import Selection
from wp1.selection.meta_builder import MetaBuilder


def _selection(status: bytes = b"OK", object_key: bytes | None = b"object-key"):
return Selection(
s_builder_id=b"builder-a",
s_content_type=b"text/tab-separated-values",
s_version=1,
s_status=status,
s_object_key=object_key,
)


class MetaBuilderTest(TestCase):

def setUp(self):
self.builder = MetaBuilder()

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data(self, mock_latest_selection):
mock_latest_selection.return_value = _selection()
s3 = MagicMock()
s3.download_fileobj.side_effect = lambda _key, buf: buf.write(b"first\n")

actual = self.builder._fetch_selection_data(MagicMock(), s3, "builder-a")

self.assertEqual(b"first\n", actual)
s3.download_fileobj.assert_called_once()

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_failed_selection(self, mock_latest_selection):
mock_latest_selection.return_value = _selection(status=b"FAILED")

with self.assertRaises(Wp1FatalSelectionError):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_retryable_selection(self, mock_latest_selection):
mock_latest_selection.return_value = _selection(status=b"CAN_RETRY")

with self.assertRaises(Wp1RetryableSelectionError):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_without_stored_data(self, mock_latest_selection):
mock_latest_selection.return_value = _selection(object_key=None)

with self.assertRaisesRegex(Wp1RetryableSelectionError, "no stored data"):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")

@patch("wp1.selection.meta_builder.logic_builder.latest_selection_for")
def test_fetch_selection_data_missing_selection(self, mock_latest_selection):
mock_latest_selection.return_value = None

with self.assertRaises(Wp1RetryableSelectionError):
self.builder._fetch_selection_data(MagicMock(), MagicMock(), "builder-a")
Loading
Loading