From 71fb1f2d6d3baa2d9e3919208cd86d657aedaa20 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 8 Jun 2026 12:36:50 +0300 Subject: [PATCH] test(types): annotate tests/modules/scoring (137 to 0 mypy errors) Co-Authored-By: Claude Opus 4.7 --- tests/modules/scoring/test_bert.py | 33 +++++++--- tests/modules/scoring/test_catboost.py | 65 +++++++++++-------- tests/modules/scoring/test_cnn.py | 31 ++++++--- tests/modules/scoring/test_description_bi.py | 27 ++++++-- .../modules/scoring/test_description_cross.py | 27 ++++++-- tests/modules/scoring/test_description_llm.py | 35 +++++++--- tests/modules/scoring/test_dnnc.py | 23 +++++-- tests/modules/scoring/test_gcn_scorer.py | 28 +++++--- tests/modules/scoring/test_knn.py | 17 +++-- tests/modules/scoring/test_linear.py | 23 +++++-- tests/modules/scoring/test_lora.py | 33 +++++++--- tests/modules/scoring/test_mlknn.py | 15 ++++- tests/modules/scoring/test_ptuning.py | 33 +++++++--- tests/modules/scoring/test_rerank_scorer.py | 17 +++-- tests/modules/scoring/test_rnn.py | 38 +++++++---- tests/modules/scoring/test_scorer_utils.py | 21 +++++- tests/modules/scoring/test_sklearn.py | 29 ++++++--- 17 files changed, 360 insertions(+), 135 deletions(-) diff --git a/tests/modules/scoring/test_bert.py b/tests/modules/scoring/test_bert.py index 348083cb7..f01985aec 100644 --- a/tests/modules/scoring/test_bert.py +++ b/tests/modules/scoring/test_bert.py @@ -1,6 +1,9 @@ +from __future__ import annotations + import shutil import tempfile from pathlib import Path +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest @@ -8,14 +11,20 @@ from autointent import Pipeline from autointent.configs import HFModelConfig from autointent.context.data_handler import DataHandler -from autointent.modules import BertScorer +from autointent.modules.scoring import BertScorer + +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels _config = HFModelConfig(model_name="prajjwal1/bert-tiny") pytest.importorskip("transformers", reason="Transformers library is required for BertScorer tests") -def test_bert_scorer_dump_load(dataset): +def test_bert_scorer_dump_load(dataset: Dataset) -> None: """Test that BertScorer can be saved and loaded while preserving predictions.""" pytest.importorskip("accelerate", reason="Accelerate library is required for this test") @@ -23,7 +32,8 @@ def test_bert_scorer_dump_load(dataset): # Create and train scorer scorer_original = BertScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8) - scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) # Test data test_data = [ @@ -61,7 +71,7 @@ def test_bert_scorer_dump_load(dataset): shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error -def test_bert_prediction(dataset): +def test_bert_prediction(dataset: Dataset) -> None: """Test that the transformer model can fit and make predictions.""" pytest.importorskip("accelerate", reason="Accelerate library is required for this test") @@ -69,7 +79,8 @@ def test_bert_prediction(dataset): scorer = BertScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -95,12 +106,15 @@ def test_bert_prediction(dataset): # Test metadata function if available if hasattr(scorer, "predict_with_metadata"): - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None -def test_bert_cache_clearing(dataset): +def test_bert_cache_clearing(dataset: Dataset) -> None: """Test that the transformer model properly handles cache clearing.""" pytest.importorskip("accelerate", reason="Accelerate library is required for this test") @@ -108,7 +122,8 @@ def test_bert_cache_clearing(dataset): scorer = BertScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test text"] @@ -127,7 +142,7 @@ def test_bert_cache_clearing(dataset): scorer.predict(test_data) -def test_bert_in_pipeline(dataset): +def test_bert_in_pipeline(dataset: Dataset) -> None: """Test BertScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_catboost.py b/tests/modules/scoring/test_catboost.py index 6616525e6..4e9c685e0 100644 --- a/tests/modules/scoring/test_catboost.py +++ b/tests/modules/scoring/test_catboost.py @@ -1,34 +1,42 @@ +from __future__ import annotations + import shutil import tempfile from pathlib import Path +from typing import TYPE_CHECKING, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import CatBoostScorer +from autointent.modules.scoring import CatBoostScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + from autointent import Dataset + from autointent.custom_types import ListOfLabels + pytest.importorskip("catboost") -def test_catboost_scorer_dump_load(dataset): +def test_catboost_scorer_dump_load(dataset: Dataset) -> None: """Test that CatBoostScorer can be saved and loaded while preserving predictions.""" data_handler = DataHandler(dataset) scorer_original = CatBoostScorer( embedder_config=get_test_embedder_config(), iterations=50, - learning_rate=0.05, + learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs depth=6, - l2_leaf_reg=3, - eval_metric="Accuracy", - random_seed=42, + l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment + eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment + random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment verbose=False, ) - scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my account", @@ -53,23 +61,24 @@ def test_catboost_scorer_dump_load(dataset): shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error -def test_catboost_prediction_multilabel(dataset): +def test_catboost_prediction_multilabel(dataset: Dataset) -> None: """Test that the transformer model can fit and make predictions.""" data_handler = DataHandler(dataset.to_multilabel()) scorer = CatBoostScorer( embedder_config=get_test_embedder_config(), iterations=50, - learning_rate=0.05, + learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs depth=6, - l2_leaf_reg=3, - eval_metric="Accuracy", - random_seed=42, + l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment + eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment + random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment verbose=False, val_fraction=None, ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -97,24 +106,25 @@ def test_catboost_prediction_multilabel(dataset): @pytest.mark.parametrize("features_type", ["text", "embedding", "both"]) @pytest.mark.parametrize("use_embedding_features", [True, False]) -def test_catboost_features_types(dataset, features_type, use_embedding_features): +def test_catboost_features_types(dataset: Dataset, features_type: str, use_embedding_features: bool) -> None: """Test that CatBoostScorer works properly without an embedder (using BoW encoding).""" data_handler = DataHandler(dataset) scorer = CatBoostScorer( embedder_config=get_test_embedder_config(), iterations=50, - learning_rate=0.05, + learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs depth=6, - l2_leaf_reg=3, - eval_metric="Accuracy", - random_seed=42, - features_type=features_type, + l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment + eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment + random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment + features_type=features_type, # type: ignore[arg-type] # reason: src signature uses FeaturesType enum; test passes the literal string form catboost accepts use_embedding_features=use_embedding_features, verbose=False, ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -130,20 +140,21 @@ def test_catboost_features_types(dataset, features_type, use_embedding_features) assert 0.0 <= np.min(predictions) <= np.max(predictions) <= 1.0 -def test_catboost_cache_clearing(dataset): +def test_catboost_cache_clearing(dataset: Dataset) -> None: """Test that the transformer model properly handles cache clearing.""" data_handler = DataHandler(dataset) scorer = CatBoostScorer( embedder_config=get_test_embedder_config(), iterations=50, - learning_rate=0.05, + learning_rate=0.05, # type: ignore[arg-type] # reason: CatBoostScorer **catboost_kwargs mis-typed in src as dict[str, Any]; values are forwarded as scalar kwargs depth=6, - l2_leaf_reg=3, - eval_metric="Accuracy", - random_seed=42, + l2_leaf_reg=3, # type: ignore[arg-type] # reason: see learning_rate comment + eval_metric="Accuracy", # type: ignore[arg-type] # reason: see learning_rate comment + random_seed=42, # type: ignore[arg-type] # reason: see learning_rate comment verbose=False, ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test text"] scorer.predict(test_data) scorer.clear_cache() @@ -151,7 +162,7 @@ def test_catboost_cache_clearing(dataset): scorer.predict(test_data) -def test_catboost_in_pipeline(dataset): +def test_catboost_in_pipeline(dataset: Dataset) -> None: """Test CatBoostScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_cnn.py b/tests/modules/scoring/test_cnn.py index 8defd1aa8..6959b3560 100644 --- a/tests/modules/scoring/test_cnn.py +++ b/tests/modules/scoring/test_cnn.py @@ -1,6 +1,9 @@ +from __future__ import annotations + import shutil import tempfile from pathlib import Path +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest @@ -10,8 +13,14 @@ from autointent.context.data_handler import DataHandler from autointent.modules.scoring import CNNScorer +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels + -def test_cnn_prediction(dataset): +def test_cnn_prediction(dataset: Dataset) -> None: """Test that the CNN model can fit and make predictions.""" data_handler = DataHandler(dataset) @@ -20,7 +29,8 @@ def test_cnn_prediction(dataset): num_train_epochs=1, vocab_config=VocabConfig(max_seq_length=50), ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my account", @@ -45,12 +55,15 @@ def test_cnn_prediction(dataset): # Проверяем работу predict_with_metadata если метод существует if hasattr(scorer, "predict_with_metadata"): - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None -def test_cnn_cache_clearing(dataset): +def test_cnn_cache_clearing(dataset: Dataset) -> None: """Test that the CNN model properly handles cache clearing.""" data_handler = DataHandler(dataset) @@ -59,7 +72,8 @@ def test_cnn_cache_clearing(dataset): num_train_epochs=1, vocab_config=VocabConfig(max_seq_length=50), ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test text"] @@ -77,7 +91,7 @@ def test_cnn_cache_clearing(dataset): scorer.predict(test_data) -def test_cnn_scorer_dump_load(dataset): +def test_cnn_scorer_dump_load(dataset: Dataset) -> None: """Test that BERTLoRAScorer can be saved and loaded while preserving predictions.""" data_handler = DataHandler(dataset) @@ -87,7 +101,8 @@ def test_cnn_scorer_dump_load(dataset): num_train_epochs=1, vocab_config=VocabConfig(max_seq_length=50), ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) # Test data test_data = [ @@ -123,7 +138,7 @@ def test_cnn_scorer_dump_load(dataset): shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error -def test_cnn_in_pipeline(dataset): +def test_cnn_in_pipeline(dataset: Dataset) -> None: """Test CNNScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_description_bi.py b/tests/modules/scoring/test_description_bi.py index 3053eac96..b5647bd52 100644 --- a/tests/modules/scoring/test_description_bi.py +++ b/tests/modules/scoring/test_description_bi.py @@ -1,13 +1,22 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import BiEncoderDescriptionScorer +from autointent.modules.scoring import BiEncoderDescriptionScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels + @pytest.mark.parametrize( ("expected_prediction", "multilabel"), @@ -28,7 +37,7 @@ ), ], ) -def test_description_scorer(dataset, expected_prediction, multilabel): +def test_description_scorer(dataset: Dataset, expected_prediction: list[list[float]], multilabel: bool) -> None: if multilabel: dataset = dataset.to_multilabel() data_handler = DataHandler(dataset) @@ -37,11 +46,14 @@ def test_description_scorer(dataset, expected_prediction, multilabel): embedder_config=get_test_embedder_config(), temperature=0.3, multilabel=multilabel ) + # cast: clinc_subset has descriptions defined for every intent, and uses non-OOS labels. scorer.fit( data_handler.train_utterances(0), - data_handler.train_labels(0), - data_handler.intent_descriptions, + cast("ListOfLabels", data_handler.train_labels(0)), + cast("list[str]", data_handler.intent_descriptions), ) + # _description_vectors is set after fit; assert it's not None for type narrowing. + assert scorer._description_vectors is not None assert scorer._description_vectors.shape[0] == len(data_handler.intent_descriptions) test_utterances = [ @@ -58,7 +70,10 @@ def test_description_scorer(dataset, expected_prediction, multilabel): assert predictions.shape == (len(test_utterances), len(data_handler.intent_descriptions)) np.testing.assert_almost_equal(predictions, np.array(expected_prediction).reshape(predictions.shape), decimal=5) - predictions, metadata = scorer.predict_with_metadata(test_utterances) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_utterances) + ) assert len(predictions) == len(test_utterances) assert metadata is None @@ -70,7 +85,7 @@ def test_description_scorer(dataset, expected_prediction, multilabel): np.testing.assert_almost_equal(predictions, new_predictions, decimal=5) -def test_description_bi_in_pipeline(dataset): +def test_description_bi_in_pipeline(dataset: Dataset) -> None: """Test BiEncoderDescriptionScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_description_cross.py b/tests/modules/scoring/test_description_cross.py index 86bfe2e97..a46370679 100644 --- a/tests/modules/scoring/test_description_cross.py +++ b/tests/modules/scoring/test_description_cross.py @@ -1,11 +1,20 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import CrossEncoderDescriptionScorer +from autointent.modules.scoring import CrossEncoderDescriptionScorer + +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels pytest.importorskip("sentence_transformers") @@ -17,7 +26,9 @@ ([[0.2, 0.3, 0.2, 0.2], [0.2, 0.3, 0.2, 0.2]], False), ], ) -def test_description_scorer_cross_encoder(dataset, expected_prediction, multilabel): +def test_description_scorer_cross_encoder( + dataset: Dataset, expected_prediction: list[list[float]], multilabel: bool +) -> None: if multilabel: dataset = dataset.to_multilabel() data_handler = DataHandler(dataset) @@ -26,10 +37,11 @@ def test_description_scorer_cross_encoder(dataset, expected_prediction, multilab cross_encoder_config="cross-encoder/ms-marco-MiniLM-L6-v2", temperature=0.3, multilabel=multilabel ) + # cast: clinc_subset has descriptions defined for every intent, and uses non-OOS labels. scorer.fit( data_handler.train_utterances(0), - data_handler.train_labels(0), - data_handler.intent_descriptions, + cast("ListOfLabels", data_handler.train_labels(0)), + cast("list[str]", data_handler.intent_descriptions), ) assert scorer._description_texts is not None assert len(scorer._description_texts) == len(data_handler.intent_descriptions) @@ -49,7 +61,10 @@ def test_description_scorer_cross_encoder(dataset, expected_prediction, multilab assert predictions.shape == (len(test_utterances), len(data_handler.intent_descriptions)) np.testing.assert_almost_equal(predictions, np.array(expected_prediction).reshape(predictions.shape), decimal=1) - predictions, metadata = scorer.predict_with_metadata(test_utterances) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_utterances) + ) assert len(predictions) == len(test_utterances) assert metadata is None @@ -67,7 +82,7 @@ def test_description_scorer_cross_encoder(dataset, expected_prediction, multilab new_scorer.clear_cache() -def test_description_cross_in_pipeline(dataset): +def test_description_cross_in_pipeline(dataset: Dataset) -> None: """Test CrossEncoderDescriptionScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_description_llm.py b/tests/modules/scoring/test_description_llm.py index cf6a826ad..5eebb7e51 100644 --- a/tests/modules/scoring/test_description_llm.py +++ b/tests/modules/scoring/test_description_llm.py @@ -1,25 +1,36 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import LLMDescriptionScorer +from autointent.modules.scoring import LLMDescriptionScorer + +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels + from autointent.generation import Generator @pytest.mark.parametrize("multilabel", [True, False]) -def test_description_scorer_llm(dataset, multilabel, patch_llm_scorer_generator): +def test_description_scorer_llm(dataset: Dataset, multilabel: bool, patch_llm_scorer_generator: Generator) -> None: if multilabel: dataset = dataset.to_multilabel() data_handler = DataHandler(dataset) scorer = LLMDescriptionScorer(temperature=0.3, generator_config={"temperature": 0}, multilabel=multilabel) + # cast: clinc_subset has descriptions defined for every intent, and uses non-OOS labels. scorer.fit( data_handler.train_utterances(0), - data_handler.train_labels(0), - data_handler.intent_descriptions, + cast("ListOfLabels", data_handler.train_labels(0)), + cast("list[str]", data_handler.intent_descriptions), ) assert scorer._description_texts == data_handler.intent_descriptions @@ -36,22 +47,28 @@ def test_description_scorer_llm(dataset, multilabel, patch_llm_scorer_generator) assert predictions.shape == (len(test_utterances), len(data_handler.intent_descriptions)) - predictions, metadata = scorer.predict_with_metadata(test_utterances) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_utterances) + ) assert len(predictions) == len(test_utterances) assert metadata is None @pytest.mark.parametrize("multilabel", [True, False]) -def test_description_scorer_llm_dump_load_roundtrip(dataset, multilabel, patch_llm_scorer_generator): +def test_description_scorer_llm_dump_load_roundtrip( + dataset: Dataset, multilabel: bool, patch_llm_scorer_generator: Generator +) -> None: if multilabel: dataset = dataset.to_multilabel() data_handler = DataHandler(dataset) scorer = LLMDescriptionScorer(temperature=0.3, generator_config={"temperature": 0}, multilabel=multilabel) + # cast: clinc_subset has descriptions defined for every intent, and uses non-OOS labels. scorer.fit( data_handler.train_utterances(0), - data_handler.train_labels(0), - data_handler.intent_descriptions, + cast("ListOfLabels", data_handler.train_labels(0)), + cast("list[str]", data_handler.intent_descriptions), ) test_utterances = ["What is the balance on my account?", "How do I reset my online banking password?"] @@ -65,7 +82,7 @@ def test_description_scorer_llm_dump_load_roundtrip(dataset, multilabel, patch_l np.testing.assert_almost_equal(predictions, new_predictions, decimal=5) -def test_llm_description_in_pipeline(dataset, patch_llm_scorer_generator): +def test_llm_description_in_pipeline(dataset: Dataset, patch_llm_scorer_generator: Generator) -> None: """Test LLMDescriptionScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_dnnc.py b/tests/modules/scoring/test_dnnc.py index 4dc6791e6..c8fe9156f 100644 --- a/tests/modules/scoring/test_dnnc.py +++ b/tests/modules/scoring/test_dnnc.py @@ -1,17 +1,24 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import DNNCScorer +from autointent.modules.scoring import DNNCScorer + +if TYPE_CHECKING: + from autointent import Dataset + from autointent.custom_types import ListOfLabels pytest.importorskip("sentence_transformers") @pytest.mark.parametrize(("train_head", "pred_score"), [(True, 1)]) -def test_base_dnnc(dataset, train_head, pred_score): +def test_base_dnnc(dataset: Dataset, train_head: bool, pred_score: int) -> None: data_handler = DataHandler(dataset) scorer = DNNCScorer( @@ -20,7 +27,8 @@ def test_base_dnnc(dataset, train_head, pred_score): k=3, ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", "i am nost sure why my account is blocked", @@ -29,10 +37,15 @@ def test_base_dnnc(dataset, train_head, pred_score): "can you tell me why is my bank account frozen", ] predictions = scorer.predict(test_data) - np.testing.assert_almost_equal(np.array([[0.0, pred_score, 0.0, 0.0]] * len(test_data)), predictions, decimal=0.5) + np.testing.assert_almost_equal( + np.array([[0.0, pred_score, 0.0, 0.0]] * len(test_data)), + predictions, + decimal=0.5, # type: ignore[arg-type] # reason: numpy stubs require int but assert_almost_equal rounds float decimal; preserves pre-typing behavior + ) predictions, metadata = scorer.predict_with_metadata(test_data) assert len(predictions) == len(test_data) + assert metadata is not None assert "neighbors" in metadata[0] assert "scores" in metadata[0] @@ -44,7 +57,7 @@ def test_base_dnnc(dataset, train_head, pred_score): np.testing.assert_almost_equal(predictions, new_predictions, decimal=5) -def test_dnnc_in_pipeline(dataset): +def test_dnnc_in_pipeline(dataset: Dataset) -> None: """Test DNNCScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_gcn_scorer.py b/tests/modules/scoring/test_gcn_scorer.py index adbf2f325..d4f43cbd7 100644 --- a/tests/modules/scoring/test_gcn_scorer.py +++ b/tests/modules/scoring/test_gcn_scorer.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + import numpy as np import pytest import torch @@ -6,11 +10,14 @@ from autointent.modules.scoring import GCNScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + from pathlib import Path + _embedder_config = get_test_embedder_config() @pytest.fixture -def multilabel_dataset(): +def multilabel_dataset() -> Dataset: data = { "train": [ {"utterance": "utterance 1", "label": [1, 0, 0]}, @@ -28,7 +35,7 @@ def multilabel_dataset(): @pytest.fixture -def multiclass_dataset(): +def multiclass_dataset() -> Dataset: data = { "train": [ {"utterance": "utterance 1", "label": 0}, @@ -45,7 +52,7 @@ def multiclass_dataset(): return Dataset.from_dict(data) -def test_gcn_scorer_multilabel(multilabel_dataset): +def test_gcn_scorer_multilabel(multilabel_dataset: Dataset) -> None: torch.manual_seed(42) scorer = GCNScorer( embedder_config=_embedder_config, @@ -56,7 +63,8 @@ def test_gcn_scorer_multilabel(multilabel_dataset): ) train_utterances = multilabel_dataset["train"]["utterance"] train_labels = multilabel_dataset["train"]["label"] - descriptions = [intent.name for intent in multilabel_dataset.intents] + # cast: test fixtures set intent.name explicitly, so the list never contains None. + descriptions = cast("list[str]", [intent.name for intent in multilabel_dataset.intents]) scorer.fit(train_utterances, train_labels, descriptions) test_utterances = ["test 1", "test 2"] @@ -66,7 +74,7 @@ def test_gcn_scorer_multilabel(multilabel_dataset): np.testing.assert_allclose(predictions, expected_predictions, atol=1e-2) -def test_gcn_scorer_multiclass(multiclass_dataset): +def test_gcn_scorer_multiclass(multiclass_dataset: Dataset) -> None: torch.manual_seed(42) scorer = GCNScorer( embedder_config=_embedder_config, @@ -77,7 +85,8 @@ def test_gcn_scorer_multiclass(multiclass_dataset): ) train_utterances = multiclass_dataset["train"]["utterance"] train_labels = multiclass_dataset["train"]["label"] - descriptions = [intent.name for intent in multiclass_dataset.intents] + # cast: test fixtures set intent.name explicitly, so the list never contains None. + descriptions = cast("list[str]", [intent.name for intent in multiclass_dataset.intents]) scorer.fit(train_utterances, train_labels, descriptions) test_utterances = ["test 1", "test 2"] @@ -88,7 +97,7 @@ def test_gcn_scorer_multiclass(multiclass_dataset): np.testing.assert_allclose(predictions.sum(axis=1), 1.0, atol=1e-6) -def test_gcn_scorer_dump_load(tmp_path, multilabel_dataset): +def test_gcn_scorer_dump_load(tmp_path: Path, multilabel_dataset: Dataset) -> None: torch.manual_seed(42) scorer = GCNScorer( embedder_config=_embedder_config, @@ -99,7 +108,8 @@ def test_gcn_scorer_dump_load(tmp_path, multilabel_dataset): ) train_utterances = multilabel_dataset["train"]["utterance"] train_labels = multilabel_dataset["train"]["label"] - descriptions = [intent.name for intent in multilabel_dataset.intents] + # cast: test fixtures set intent.name explicitly, so the list never contains None. + descriptions = cast("list[str]", [intent.name for intent in multilabel_dataset.intents]) scorer.fit(train_utterances, train_labels, descriptions) test_utterances = ["test utterance 1"] @@ -113,7 +123,7 @@ def test_gcn_scorer_dump_load(tmp_path, multilabel_dataset): np.testing.assert_allclose(original_predictions, loaded_predictions, atol=1e-6) -def test_gcn_in_pipeline(dataset): +def test_gcn_in_pipeline(dataset: Dataset) -> None: """Test GCNScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_knn.py b/tests/modules/scoring/test_knn.py index 6f86735fb..f7bffd2d5 100644 --- a/tests/modules/scoring/test_knn.py +++ b/tests/modules/scoring/test_knn.py @@ -1,14 +1,21 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, cast import numpy as np from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import KNNScorer +from autointent.modules.scoring import KNNScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + from autointent import Dataset + from autointent.custom_types import ListOfLabels + -def test_base_knn(dataset): +def test_base_knn(dataset: Dataset) -> None: data_handler = DataHandler(dataset) scorer = KNNScorer(k=3, weights="distance", embedder_config=get_test_embedder_config()) @@ -21,7 +28,8 @@ def test_base_knn(dataset): "can you tell me why is my bank account frozen", ] - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) predictions = scorer.predict(test_data) assert ( predictions @@ -38,6 +46,7 @@ def test_base_knn(dataset): predictions, metadata = scorer.predict_with_metadata(test_data) assert len(predictions) == len(test_data) + assert metadata is not None assert "neighbors" in metadata[0] with tempfile.TemporaryDirectory() as temp_dir: @@ -48,7 +57,7 @@ def test_base_knn(dataset): assert np.allclose(predictions, new_predictions) -def test_knn_in_pipeline(dataset): +def test_knn_in_pipeline(dataset: Dataset) -> None: """Test KNNScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_linear.py b/tests/modules/scoring/test_linear.py index 111cc346e..573b4f762 100644 --- a/tests/modules/scoring/test_linear.py +++ b/tests/modules/scoring/test_linear.py @@ -1,19 +1,29 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, Any, cast import numpy as np from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import LinearScorer +from autointent.modules.scoring import LinearScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels -def test_base_linear(dataset): + +def test_base_linear(dataset: Dataset) -> None: data_handler = DataHandler(dataset) scorer = LinearScorer(embedder_config=get_test_embedder_config()) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", "i am nost sure why my account is blocked", @@ -36,7 +46,10 @@ def test_base_linear(dataset): decimal=2, ) - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than what scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None @@ -48,7 +61,7 @@ def test_base_linear(dataset): np.testing.assert_almost_equal(predictions, new_predictions, decimal=5) -def test_linear_in_pipeline(dataset): +def test_linear_in_pipeline(dataset: Dataset) -> None: """Test LinearScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_lora.py b/tests/modules/scoring/test_lora.py index afe779e05..d8c4cef13 100644 --- a/tests/modules/scoring/test_lora.py +++ b/tests/modules/scoring/test_lora.py @@ -1,6 +1,9 @@ +from __future__ import annotations + import shutil import tempfile from pathlib import Path +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest @@ -8,20 +11,27 @@ from autointent import Pipeline from autointent.configs import HFModelConfig from autointent.context.data_handler import DataHandler -from autointent.modules import BERTLoRAScorer +from autointent.modules.scoring import BERTLoRAScorer + +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels pytest.importorskip("peft") _config = HFModelConfig(model_name="prajjwal1/bert-tiny") -def test_lora_scorer_dump_load(dataset): +def test_lora_scorer_dump_load(dataset: Dataset) -> None: """Test that BERTLoRAScorer can be saved and loaded while preserving predictions.""" data_handler = DataHandler(dataset) # Create and train scorer scorer_original = BERTLoRAScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8) - scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) # Test data test_data = [ @@ -59,13 +69,14 @@ def test_lora_scorer_dump_load(dataset): shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error -def test_lora_prediction(dataset): +def test_lora_prediction(dataset: Dataset) -> None: """Test that the lora model can fit and make predictions.""" data_handler = DataHandler(dataset) scorer = BERTLoRAScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -91,18 +102,22 @@ def test_lora_prediction(dataset): # Test metadata function if available if hasattr(scorer, "predict_with_metadata"): - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None -def test_lora_cache_clearing(dataset): +def test_lora_cache_clearing(dataset: Dataset) -> None: """Test that the lora model properly handles cache clearing.""" data_handler = DataHandler(dataset) scorer = BERTLoRAScorer(classification_model_config=_config, num_train_epochs=1, batch_size=8) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test text"] @@ -121,7 +136,7 @@ def test_lora_cache_clearing(dataset): scorer.predict(test_data) -def test_lora_in_pipeline(dataset): +def test_lora_in_pipeline(dataset: Dataset) -> None: """Test BERTLoRAScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_mlknn.py b/tests/modules/scoring/test_mlknn.py index 1531c4635..3def69e42 100644 --- a/tests/modules/scoring/test_mlknn.py +++ b/tests/modules/scoring/test_mlknn.py @@ -1,4 +1,7 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, cast import numpy as np @@ -7,12 +10,17 @@ from autointent.modules.scoring import MLKnnScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + from autointent import Dataset + from autointent.custom_types import ListOfLabels + -def test_base_mlknn(dataset): +def test_base_mlknn(dataset: Dataset) -> None: data_handler = DataHandler(dataset.to_multilabel()) scorer = MLKnnScorer(embedder_config=get_test_embedder_config(), k=3) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -43,6 +51,7 @@ def test_base_mlknn(dataset): predictions, metadata = scorer.predict_with_metadata(test_data) assert len(predictions) == len(test_data) + assert metadata is not None assert "neighbors" in metadata[0] with tempfile.TemporaryDirectory() as temp_dir: @@ -53,7 +62,7 @@ def test_base_mlknn(dataset): assert np.allclose(predictions, new_predictions) -def test_mlknn_in_pipeline(dataset): +def test_mlknn_in_pipeline(dataset: Dataset) -> None: """Test MLKnnScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_ptuning.py b/tests/modules/scoring/test_ptuning.py index c0323b82e..6551def6e 100644 --- a/tests/modules/scoring/test_ptuning.py +++ b/tests/modules/scoring/test_ptuning.py @@ -1,6 +1,9 @@ +from __future__ import annotations + import shutil import tempfile from pathlib import Path +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest @@ -8,14 +11,20 @@ from autointent import Pipeline from autointent.configs import HFModelConfig from autointent.context.data_handler import DataHandler -from autointent.modules import PTuningScorer +from autointent.modules.scoring import PTuningScorer + +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels pytest.importorskip("peft") _config = HFModelConfig(model_name="prajjwal1/bert-tiny") -def test_ptuning_scorer_dump_load(dataset): +def test_ptuning_scorer_dump_load(dataset: Dataset) -> None: """Test that PTuningScorer can be saved and loaded while preserving predictions.""" data_handler = DataHandler(dataset) @@ -26,7 +35,8 @@ def test_ptuning_scorer_dump_load(dataset): num_virtual_tokens=10, seed=42, ) - scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my account", @@ -55,7 +65,7 @@ def test_ptuning_scorer_dump_load(dataset): shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error -def test_ptuning_prediction(dataset): +def test_ptuning_prediction(dataset: Dataset) -> None: """Test that the transformer model can fit and make predictions.""" data_handler = DataHandler(dataset) @@ -67,7 +77,8 @@ def test_ptuning_prediction(dataset): seed=42, ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -89,12 +100,15 @@ def test_ptuning_prediction(dataset): np.testing.assert_almost_equal(np.sum(pred_row), 1.0, decimal=5) if hasattr(scorer, "predict_with_metadata"): - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None -def test_ptuning_cache_clearing(dataset): +def test_ptuning_cache_clearing(dataset: Dataset) -> None: """Test that the transformer model properly handles cache clearing.""" data_handler = DataHandler(dataset) @@ -106,7 +120,8 @@ def test_ptuning_cache_clearing(dataset): seed=42, ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test text"] scorer.predict(test_data) @@ -119,7 +134,7 @@ def test_ptuning_cache_clearing(dataset): scorer.predict(test_data) -def test_ptuning_in_pipeline(dataset): +def test_ptuning_in_pipeline(dataset: Dataset) -> None: """Test PTuningScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_rerank_scorer.py b/tests/modules/scoring/test_rerank_scorer.py index 4936fdf44..4b9d1006f 100644 --- a/tests/modules/scoring/test_rerank_scorer.py +++ b/tests/modules/scoring/test_rerank_scorer.py @@ -1,16 +1,23 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import RerankScorer +from autointent.modules.scoring import RerankScorer + +if TYPE_CHECKING: + from autointent import Dataset + from autointent.custom_types import ListOfLabels pytest.importorskip("sentence_transformers") -def test_base_rerank_scorer(dataset): +def test_base_rerank_scorer(dataset: Dataset) -> None: data_handler = DataHandler(dataset) scorer = RerankScorer( @@ -29,7 +36,8 @@ def test_base_rerank_scorer(dataset): "can you tell me why is my bank account frozen", ] - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) predictions = scorer.predict(test_data) assert ( predictions @@ -46,6 +54,7 @@ def test_base_rerank_scorer(dataset): predictions, metadata = scorer.predict_with_metadata(test_data) assert len(predictions) == len(test_data) + assert metadata is not None assert "neighbors" in metadata[0] with tempfile.TemporaryDirectory() as temp_dir: @@ -56,7 +65,7 @@ def test_base_rerank_scorer(dataset): assert np.allclose(predictions, new_predictions) -def test_rerank_in_pipeline(dataset): +def test_rerank_in_pipeline(dataset: Dataset) -> None: """Test RerankScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_rnn.py b/tests/modules/scoring/test_rnn.py index 7cf13901f..d58e769ad 100644 --- a/tests/modules/scoring/test_rnn.py +++ b/tests/modules/scoring/test_rnn.py @@ -1,21 +1,31 @@ +from __future__ import annotations + import shutil import tempfile from pathlib import Path +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import RNNScorer +from autointent.modules.scoring import RNNScorer + +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels -def test_rnn_prediction(dataset): +def test_rnn_prediction(dataset: Dataset) -> None: """Test that the RNN model can fit and make predictions.""" data_handler = DataHandler(dataset) scorer = RNNScorer(embed_dim=8, hidden_dim=8, n_layers=1, num_train_epochs=1) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", @@ -41,17 +51,21 @@ def test_rnn_prediction(dataset): # Test metadata function if available if hasattr(scorer, "predict_with_metadata"): - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None -def test_rnn_cache_clearing(dataset): +def test_rnn_cache_clearing(dataset: Dataset) -> None: """Test that the RNN model properly handles cache clearing.""" data_handler = DataHandler(dataset) scorer = RNNScorer(embed_dim=8, hidden_dim=8, n_layers=1, num_train_epochs=1) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test text"] @@ -69,14 +83,15 @@ def test_rnn_cache_clearing(dataset): scorer.predict(test_data) -def test_rnn_device(dataset): +def test_rnn_device(dataset: Dataset) -> None: """Test RNN scorer with different device settings.""" data_handler = DataHandler(dataset) # Force CPU scorer = RNNScorer(embed_dim=8, hidden_dim=8, n_layers=1, num_train_epochs=1, device="cpu") - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = ["test account freeze"] scorer.predict(test_data) @@ -85,13 +100,14 @@ def test_rnn_device(dataset): assert next(scorer._model.parameters()).device.type == "cpu" -def test_rnn_scorer_dump_load(dataset): +def test_rnn_scorer_dump_load(dataset: Dataset) -> None: """Test that RNNScorer can be saved and loaded while preserving predictions.""" data_handler = DataHandler(dataset) # Create and train scorer scorer_original = RNNScorer(embed_dim=8, hidden_dim=8, n_layers=1, num_train_epochs=1) - scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer_original.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) # Test data test_data = [ @@ -128,7 +144,7 @@ def test_rnn_scorer_dump_load(dataset): shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error -def test_rnn_in_pipeline(dataset): +def test_rnn_in_pipeline(dataset: Dataset) -> None: """Test RNNScorer as part of an AutoML pipeline.""" search_space = [ { diff --git a/tests/modules/scoring/test_scorer_utils.py b/tests/modules/scoring/test_scorer_utils.py index cc6948b17..2223e3bb5 100644 --- a/tests/modules/scoring/test_scorer_utils.py +++ b/tests/modules/scoring/test_scorer_utils.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + import numpy as np import pytest @@ -5,6 +9,9 @@ from autointent.modules.scoring._knn.count_neighbors import get_counts from autointent.modules.scoring._knn.weighting import closest_weighting +if TYPE_CHECKING: + import numpy.typing as npt + @pytest.mark.parametrize( ("labels", "n_classes", "ground_truth"), @@ -58,7 +65,7 @@ ), ], ) -def test_knn_get_counts(labels, n_classes, ground_truth): +def test_knn_get_counts(labels: npt.NDArray[Any], n_classes: int, ground_truth: npt.NDArray[Any]) -> None: weights = np.ones_like(labels) np.testing.assert_array_equal(actual=get_counts(labels, n_classes, weights), desired=ground_truth) @@ -92,7 +99,9 @@ def test_knn_get_counts(labels, n_classes, ground_truth): ), ], ) -def test_dnnc_build_result(scores, labels, n_classes, ground_truth): +def test_dnnc_build_result( + scores: npt.NDArray[Any], labels: npt.NDArray[Any], n_classes: int, ground_truth: npt.NDArray[Any] +) -> None: np.testing.assert_array_equal(actual=build_result(scores, labels, n_classes), desired=ground_truth) @@ -150,7 +159,13 @@ def test_dnnc_build_result(scores, labels, n_classes, ground_truth): ), ], ) -def test_closest_weighting(labels, distances, multilabel, n_classes, ground_truth): +def test_closest_weighting( + labels: npt.NDArray[Any], + distances: npt.NDArray[Any], + multilabel: bool, + n_classes: int, + ground_truth: list[list[float]], +) -> None: np.testing.assert_array_equal( actual=closest_weighting(labels, distances, multilabel, n_classes), desired=ground_truth, diff --git a/tests/modules/scoring/test_sklearn.py b/tests/modules/scoring/test_sklearn.py index e9944d8cc..4806cb322 100644 --- a/tests/modules/scoring/test_sklearn.py +++ b/tests/modules/scoring/test_sklearn.py @@ -1,25 +1,35 @@ +from __future__ import annotations + import tempfile +from typing import TYPE_CHECKING, Any, cast import numpy as np from autointent import Pipeline from autointent.context.data_handler import DataHandler -from autointent.modules import SklearnScorer +from autointent.modules.scoring import SklearnScorer from tests.conftest import get_test_embedder_config +if TYPE_CHECKING: + import numpy.typing as npt + + from autointent import Dataset + from autointent.custom_types import ListOfLabels -def test_base_sklearn(dataset): + +def test_base_sklearn(dataset: Dataset) -> None: data_handler = DataHandler(dataset) scorer = SklearnScorer( embedder_config=get_test_embedder_config(), clf_name="LogisticRegression", - penalty="elasticnet", - solver="saga", - l1_ratio=0.5, + penalty="elasticnet", # type: ignore[arg-type] # reason: SklearnScorer **clf_args mis-typed in src as dict[str,...]; values are forwarded as scalar kwargs + solver="saga", # type: ignore[arg-type] # reason: SklearnScorer **clf_args mis-typed in src as dict[str,...]; values are forwarded as scalar kwargs + l1_ratio=0.5, # type: ignore[arg-type] # reason: SklearnScorer **clf_args mis-typed in src as dict[str,...]; values are forwarded as scalar kwargs ) - scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0)) + # cast: tests use the non-OOS clinc_subset, so train_labels never returns None entries. + scorer.fit(data_handler.train_utterances(0), cast("ListOfLabels", data_handler.train_labels(0))) test_data = [ "why is there a hold on my american saving bank account", "i am nost sure why my account is blocked", @@ -43,7 +53,10 @@ def test_base_sklearn(dataset): decimal=2, ) - predictions, metadata = scorer.predict_with_metadata(test_data) + # cast: base predict_with_metadata signature is wider than scoring subclasses actually return. + predictions, metadata = cast( + "tuple[npt.NDArray[Any], list[dict[str, Any]] | None]", scorer.predict_with_metadata(test_data) + ) assert len(predictions) == len(test_data) assert metadata is None @@ -55,7 +68,7 @@ def test_base_sklearn(dataset): np.testing.assert_almost_equal(predictions, new_predictions, decimal=5) -def test_sklearn_in_pipeline(dataset): +def test_sklearn_in_pipeline(dataset: Dataset) -> None: """Test SklearnScorer as part of an AutoML pipeline.""" search_space = [ {