Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions tests/modules/scoring/test_bert.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,39 @@
from __future__ import annotations

import shutil
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast

import numpy as np
import pytest

from autointent import Pipeline
from autointent.configs import HFModelConfig
from autointent.context.data_handler import DataHandler
from autointent.modules import BertScorer
from autointent.modules.scoring import BertScorer

if TYPE_CHECKING:
import numpy.typing as npt

from autointent import Dataset
from autointent.custom_types import ListOfLabels

_config = HFModelConfig(model_name="prajjwal1/bert-tiny")

pytest.importorskip("transformers", reason="Transformers library is required for BertScorer tests")


def test_bert_scorer_dump_load(dataset):
def test_bert_scorer_dump_load(dataset: Dataset) -> None:
"""Test that BertScorer can be saved and loaded while preserving predictions."""
pytest.importorskip("accelerate", reason="Accelerate library is required for this test")

data_handler = DataHandler(dataset)

# Create and train scorer
scorer_original = BertScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8)
scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

# Test data
test_data = [
Expand Down Expand Up @@ -61,15 +71,16 @@ def test_bert_scorer_dump_load(dataset):
shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error


def test_bert_prediction(dataset):
def test_bert_prediction(dataset: Dataset) -> None:
"""Test that the transformer model can fit and make predictions."""
pytest.importorskip("accelerate", reason="Accelerate library is required for this test")

data_handler = DataHandler(dataset)

scorer = BertScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8)

scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = [
"why is there a hold on my american saving bank account",
Expand All @@ -95,20 +106,24 @@ def test_bert_prediction(dataset):

# Test metadata function if available
if hasattr(scorer, "predict_with_metadata"):
predictions, metadata = scorer.predict_with_metadata(test_data)
# cast: base predict_with_metadata signature is wider than scoring subclasses actually return.
predictions, metadata = cast(
"tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data)
)
assert len(predictions) == len(test_data)
assert metadata is None


def test_bert_cache_clearing(dataset):
def test_bert_cache_clearing(dataset: Dataset) -> None:
"""Test that the transformer model properly handles cache clearing."""
pytest.importorskip("accelerate", reason="Accelerate library is required for this test")

data_handler = DataHandler(dataset)

scorer = BertScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8)

scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = ["test text"]

Expand All @@ -127,7 +142,7 @@ def test_bert_cache_clearing(dataset):
scorer.predict(test_data)


def test_bert_in_pipeline(dataset):
def test_bert_in_pipeline(dataset: Dataset) -> None:
"""Test BertScorer as part of an AutoML pipeline."""
search_space = [
{
Expand Down
65 changes: 38 additions & 27 deletions tests/modules/scoring/test_catboost.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,42 @@
from __future__ import annotations

import shutil
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, cast

import numpy as np
import pytest

from autointent import Pipeline
from autointent.context.data_handler import DataHandler
from autointent.modules import CatBoostScorer
from autointent.modules.scoring import CatBoostScorer
from tests.conftest import get_test_embedder_config

if TYPE_CHECKING:
from autointent import Dataset
from autointent.custom_types import ListOfLabels

pytest.importorskip("catboost")


def test_catboost_scorer_dump_load(dataset):
def test_catboost_scorer_dump_load(dataset: Dataset) -> None:
"""Test that CatBoostScorer can be saved and loaded while preserving predictions."""
data_handler = DataHandler(dataset)

scorer_original = CatBoostScorer(
embedder_config=get_test_embedder_config(),
iterations=50,
learning_rate=0.05,
learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs
depth=6,
l2_leaf_reg=3,
eval_metric="Accuracy",
random_seed=42,
l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment
eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment
random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment
verbose=False,
)

scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = [
"why is there a hold on my account",
Expand All @@ -53,23 +61,24 @@ def test_catboost_scorer_dump_load(dataset):
shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error


def test_catboost_prediction_multilabel(dataset):
def test_catboost_prediction_multilabel(dataset: Dataset) -> None:
"""Test that the transformer model can fit and make predictions."""
data_handler = DataHandler(dataset.to_multilabel())

scorer = CatBoostScorer(
embedder_config=get_test_embedder_config(),
iterations=50,
learning_rate=0.05,
learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs
depth=6,
l2_leaf_reg=3,
eval_metric="Accuracy",
random_seed=42,
l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment
eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment
random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment
verbose=False,
val_fraction=None,
)

scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = [
"why is there a hold on my american saving bank account",
Expand Down Expand Up @@ -97,24 +106,25 @@ def test_catboost_prediction_multilabel(dataset):

@pytest.mark.parametrize("features_type", ["text", "embedding", "both"])
@pytest.mark.parametrize("use_embedding_features", [True, False])
def test_catboost_features_types(dataset, features_type, use_embedding_features):
def test_catboost_features_types(dataset: Dataset, features_type: str, use_embedding_features: bool) -> None:
"""Test that CatBoostScorer works properly without an embedder (using BoW encoding)."""
data_handler = DataHandler(dataset)

scorer = CatBoostScorer(
embedder_config=get_test_embedder_config(),
iterations=50,
learning_rate=0.05,
learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs
depth=6,
l2_leaf_reg=3,
eval_metric="Accuracy",
random_seed=42,
features_type=features_type,
l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment
eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment
random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment
features_type=features_type, # type: ignore[arg-type] # reason: src signature uses FeaturesType enum; test passes the literal string form catboost accepts
use_embedding_features=use_embedding_features,
verbose=False,
)

scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = [
"why is there a hold on my american saving bank account",
Expand All @@ -130,28 +140,29 @@ def test_catboost_features_types(dataset, features_type, use_embedding_features)
assert 0.0 <= np.min(predictions) <= np.max(predictions) <= 1.0


def test_catboost_cache_clearing(dataset):
def test_catboost_cache_clearing(dataset: Dataset) -> None:
"""Test that the transformer model properly handles cache clearing."""
data_handler = DataHandler(dataset)
scorer = CatBoostScorer(
embedder_config=get_test_embedder_config(),
iterations=50,
learning_rate=0.05,
learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs
depth=6,
l2_leaf_reg=3,
eval_metric="Accuracy",
random_seed=42,
l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment
eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment
random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment
verbose=False,
)
scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))
test_data = ["test text"]
scorer.predict(test_data)
scorer.clear_cache()
with pytest.raises(RuntimeError):
scorer.predict(test_data)


def test_catboost_in_pipeline(dataset):
def test_catboost_in_pipeline(dataset: Dataset) -> None:
"""Test CatBoostScorer as part of an AutoML pipeline."""
search_space = [
{
Expand Down
31 changes: 23 additions & 8 deletions tests/modules/scoring/test_cnn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import shutil
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast

import numpy as np
import pytest
Expand All @@ -10,8 +13,14 @@
from autointent.context.data_handler import DataHandler
from autointent.modules.scoring import CNNScorer

if TYPE_CHECKING:
import numpy.typing as npt

from autointent import Dataset
from autointent.custom_types import ListOfLabels


def test_cnn_prediction(dataset):
def test_cnn_prediction(dataset: Dataset) -> None:
"""Test that the CNN model can fit and make predictions."""
data_handler = DataHandler(dataset)

Expand All @@ -20,7 +29,8 @@ def test_cnn_prediction(dataset):
num_train_epochs=1,
vocab_config=VocabConfig(max_seq_length=50),
)
scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = [
"why is there a hold on my account",
Expand All @@ -45,12 +55,15 @@ def test_cnn_prediction(dataset):

# Проверяем работу predict_with_metadata если метод существует
if hasattr(scorer, "predict_with_metadata"):
predictions, metadata = scorer.predict_with_metadata(test_data)
# cast: base predict_with_metadata signature is wider than scoring subclasses actually return.
predictions, metadata = cast(
"tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data)
)
assert len(predictions) == len(test_data)
assert metadata is None


def test_cnn_cache_clearing(dataset):
def test_cnn_cache_clearing(dataset: Dataset) -> None:
"""Test that the CNN model properly handles cache clearing."""
data_handler = DataHandler(dataset)

Expand All @@ -59,7 +72,8 @@ def test_cnn_cache_clearing(dataset):
num_train_epochs=1,
vocab_config=VocabConfig(max_seq_length=50),
)
scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

test_data = ["test text"]

Expand All @@ -77,7 +91,7 @@ def test_cnn_cache_clearing(dataset):
scorer.predict(test_data)


def test_cnn_scorer_dump_load(dataset):
def test_cnn_scorer_dump_load(dataset: Dataset) -> None:
"""Test that BERTLoRAScorer can be saved and loaded while preserving predictions."""
data_handler = DataHandler(dataset)

Expand All @@ -87,7 +101,8 @@ def test_cnn_scorer_dump_load(dataset):
num_train_epochs=1,
vocab_config=VocabConfig(max_seq_length=50),
)
scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
# cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries.
scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0)))

# Test data
test_data = [
Expand Down Expand Up @@ -123,7 +138,7 @@ def test_cnn_scorer_dump_load(dataset):
shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error


def test_cnn_in_pipeline(dataset):
def test_cnn_in_pipeline(dataset: Dataset) -> None:
"""Test CNNScorer as part of an AutoML pipeline."""
search_space = [
{
Expand Down
Loading
Loading