GeoOcean · tausiaj · Jul 3, 2026 · Jul 1, 2026 · Jul 1, 2026
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+addopts = -ra
diff --git a/tests/test_deeplearning_autoencoders.py b/tests/test_deeplearning_autoencoders.py
@@ -0,0 +1,274 @@
+"""
+Smoke tests for BlueMath_tk autoencoders.
+
+These tests are intentionally small so they can run quickly in CI and in a local
+Anaconda environment.
+
+Run from the repository root with:
+
+    pytest -q tests/test_deeplearning_autoencoders.py
+
+Some tests are marked xfail because they document likely current bugs in the
+implementation. After fixing each issue, remove the corresponding xfail marker.
+"""
+
+import numpy as np
+import pytest
+
+torch = pytest.importorskip("torch")
+
+from bluemath_tk.deeplearning.autoencoders import (
+    CNNAutoencoder,
+    ConvLSTMAutoencoder,
+    HybridConvLSTMTransformerAutoencoder,
+    LSTMAutoencoder,
+    OrthogonalAutoencoder,
+    StandardAutoencoder,
+)
+
+
+@pytest.fixture(autouse=True)
+def _set_reproducible_seed():
+    """Keep tests deterministic and avoid excessive CPU thread use."""
+    np.random.seed(123)
+    torch.manual_seed(123)
+    torch.set_num_threads(1)
+
+
+def _fit_kwargs():
+    """Common tiny training configuration."""
+    return dict(
+        epochs=1,
+        batch_size=4,
+        validation_split=0.25,
+        patience=2,
+        verbose=0,
+        learning_rate=1e-3,
+    )
+
+
+def test_standard_autoencoder_fit_predict_encode_shapes():
+    """StandardAutoencoder should reconstruct 2D tabular input."""
+    X = np.random.randn(16, 10).astype("float32")
+
+    ae = StandardAutoencoder(
+        k=3,
+        hidden_dims=[8],
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert len(history["train_loss"]) >= 1
+    assert len(history["val_loss"]) >= 1
+    assert X_hat.shape == X.shape
+    assert Z.shape == (16, 3)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+def test_orthogonal_autoencoder_fit_predict_encode_shapes():
+    """OrthogonalAutoencoder should reconstruct 2D tabular input."""
+    X = np.random.randn(16, 10).astype("float32")
+
+    ae = OrthogonalAutoencoder(
+        k=3,
+        hidden_dims=[8],
+        lambda_W=1e-4,
+        lambda_Z=1e-4,
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X.shape
+    assert Z.shape == (16, 3)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+def test_lstm_autoencoder_fit_predict_encode_shapes():
+    """LSTMAutoencoder should reconstruct sequence input."""
+    X = np.random.randn(16, 5, 3).astype("float32")
+
+    ae = LSTMAutoencoder(
+        k=4,
+        hidden=(8, 6),
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X.shape
+    assert Z.shape == (16, 4)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+def test_cnn_autoencoder_fit_predict_encode_shapes():
+    """CNNAutoencoder should reconstruct channels-first image/grid input."""
+    X = np.random.randn(16, 1, 8, 8).astype("float32")
+
+    ae = CNNAutoencoder(
+        k=4,
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X.shape
+    assert Z.shape == (16, 4)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+@pytest.mark.xfail(
+    reason=(
+        "Current ViT decoder appears to send d_model-sized tokens directly to "
+        "Unpatchify, which expects patch_size*patch_size*C tokens. Add a decoder "
+        "projection from d_model to patch dimension before unpatchify."
+    ),
+    strict=False,
+)
+def test_vit_autoencoder_d_model_can_differ_from_patch_dimension():
+    """
+    VisionTransformerAutoencoder should allow d_model != patch_size*patch_size*C.
+
+    For C=1 and patch_size=4, patch dimension is 16.
+    This test uses d_model=8 to catch missing decoder projection bugs.
+    """
+    X = np.random.randn(16, 1, 8, 8).astype("float32")
+
+    ae = VisionTransformerAutoencoder(
+        k=4,
+        patch_size=4,
+        d_model=8,
+        depth_enc=1,
+        depth_dec=1,
+        heads=2,
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X.shape
+    assert Z.shape == (16, 4)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+@pytest.mark.xfail(
+    reason=(
+        "Current ConvLSTMAutoencoder returns a single frame but default fit(X) "
+        "uses the full 5D input as target. Either fit should use X[:, -1] as the "
+        "target or the decoder should reconstruct the full sequence."
+    ),
+    strict=False,
+)
+def test_convlstm_autoencoder_default_fit_reconstructs_documented_single_frame():
+    """
+    ConvLSTMAutoencoder example/docstring says fit(X) should work and predict
+    returns a single reconstructed frame with shape (B, C, H, W).
+    """
+    X = np.random.randn(16, 3, 1, 8, 8).astype("float32")
+
+    ae = ConvLSTMAutoencoder(
+        k=4,
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X[:, -1].shape
+    assert Z.shape == (16, 4)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+@pytest.mark.xfail(
+    reason=(
+        "Current HybridConvLSTMTransformerAutoencoder appears to reference "
+        "self.efficient_attention inside the inner model without assigning it. "
+        "It also has the same single-frame vs full-sequence fit target mismatch "
+        "as ConvLSTMAutoencoder."
+    ),
+    strict=False,
+)
+def test_hybrid_autoencoder_default_fit_reconstructs_documented_single_frame():
+    """
+    HybridConvLSTMTransformerAutoencoder example/docstring says fit(X) should
+    work and predict returns a single reconstructed frame with shape (B, C, H, W).
+    """
+    X = np.random.randn(16, 3, 1, 8, 8).astype("float32")
+
+    ae = HybridConvLSTMTransformerAutoencoder(
+        k=4,
+        d_model=8,
+        n_heads=2,
+        n_layers=1,
+        efficient_attention="linear",
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X[:, -1].shape
+    assert Z.shape == (16, 4)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()
+
+
+@pytest.mark.xfail(
+    reason=(
+        "Current StandardAutoencoder docstring says multidimensional inputs are "
+        "flattened, but _build_model appears to use only input_shape[-1] as the "
+        "feature count. Either restrict StandardAutoencoder to 2D input or make "
+        "flattening consistent in build, fit, predict, and loss target handling."
+    ),
+    strict=False,
+)
+def test_standard_autoencoder_multidimensional_flatten_contract():
+    """
+    Document the current ambiguity in StandardAutoencoder.
+
+    The docstring says multidimensional inputs are automatically flattened.
+    If that is intended, fit/predict should work for (B, C, H) input.
+    """
+    X = np.random.randn(16, 2, 5).astype("float32")
+
+    ae = StandardAutoencoder(
+        k=3,
+        hidden_dims=[8],
+        device="cpu",
+    )
+
+    history = ae.fit(X, **_fit_kwargs())
+    X_hat = ae.predict(X, batch_size=4, verbose=0)
+    Z = ae.encode(X, batch_size=4, verbose=0)
+
+    assert set(history) == {"train_loss", "val_loss"}
+    assert X_hat.shape == X.shape
+    assert Z.shape == (16, 3)
+    assert np.isfinite(X_hat).all()
+    assert np.isfinite(Z).all()