From 902220a3b21f2947f7f70ffeb32fa5b562d4733e Mon Sep 17 00:00:00 2001 From: Kartik Pradeepan Date: Fri, 10 Apr 2026 12:50:28 -0400 Subject: [PATCH 1/5] Implement dual ridge regression for memory optimization. Use only when n_samples < n_features. Scores identical. Memory magnitude lower. Runs faster. --- .../metrics/regression_correlation/metric.py | 102 +++++++++++++++++- 1 file changed, 99 insertions(+), 3 deletions(-) diff --git a/brainscore_vision/metrics/regression_correlation/metric.py b/brainscore_vision/metrics/regression_correlation/metric.py index 42ed635ca..70aaf2396 100644 --- a/brainscore_vision/metrics/regression_correlation/metric.py +++ b/brainscore_vision/metrics/regression_correlation/metric.py @@ -107,7 +107,7 @@ def apply(self, source_train, target_train, source_test, target_test): prediction = self.regression.predict(source_test) score = self.correlation(prediction, target_test) - if self.regression._regression.__class__ in [RidgeCV]: + if hasattr(self.regression._regression, 'alpha_'): score.attrs['alpha'] = self.regression._regression.alpha_ return score @@ -128,6 +128,102 @@ def __call__(self, *, source_train, target_train, source_test, target_test): target_test=source_test, ) +class DualRidgeRegression: + """Ridge regression using dual (kernel) form for memory efficiency. + + When n_samples < n_features, avoids materializing the (n_features, n_targets) + coefficient matrix. Computes predictions via a (n_test, n_train) projection + matrix instead. Falls back to sklearn Ridge when n_samples >= n_features. + + Mathematically identical to sklearn Ridge with fit_intercept=True. + """ + + def __init__(self, alpha: float = 1.0, chunk_size: int = 5000): + self.alpha = alpha + self.chunk_size = chunk_size + + def fit(self, X, Y) -> None: + X = np.asarray(X, dtype=np.float64) + Y = np.asarray(Y, dtype=np.float64) + n_samples, n_features = X.shape + + if n_samples >= n_features: + self._use_dual = False + self._primal = Ridge(alpha=self.alpha) + self._primal.fit(X, Y) + return + + self._use_dual = True + self._X_mean = X.mean(axis=0) + self._Y_mean = Y.mean(axis=0) + X_c = X - self._X_mean + self._X_train_centered = X_c + self._Y_train_centered = Y - self._Y_mean + + K = X_c @ X_c.T + K[np.diag_indices_from(K)] += self.alpha + self._K_inv = np.linalg.solve(K, np.eye(K.shape[0])) + + def predict(self, X) -> np.ndarray: + if not self._use_dual: + return self._primal.predict(X) + + X = np.asarray(X, dtype=np.float64) + X_test_c = X - self._X_mean + proj = X_test_c @ self._X_train_centered.T @ self._K_inv + + n_test = X.shape[0] + n_targets = self._Y_train_centered.shape[1] + predictions = np.empty((n_test, n_targets), dtype=np.float64) + for i in range(0, n_targets, self.chunk_size): + end = min(i + self.chunk_size, n_targets) + predictions[:, i:end] = proj @ self._Y_train_centered[:, i:end] + self._Y_mean[i:end] + return predictions + + +class DualRidgeCVRegression: + """RidgeCV with dual form prediction for memory efficiency. + + Uses sklearn RidgeCV for alpha selection (LOO/GCV), then the dual kernel + form for prediction to avoid storing the (n_features, n_targets) coef_ matrix. + Falls back to sklearn RidgeCV when n_samples >= n_features. + + Exposes ``alpha_`` after fit (selected regularization strength). + """ + + def __init__(self, alphas=None, chunk_size: int = 5000, **ridgecv_kwargs): + self.alphas = alphas + self.chunk_size = chunk_size + self._ridgecv_kwargs = ridgecv_kwargs + self.alpha_ = None + + def fit(self, X, Y) -> None: + X = np.asarray(X, dtype=np.float64) + Y = np.asarray(Y, dtype=np.float64) + n_samples, n_features = X.shape + + if n_samples >= n_features: + self._use_dual = False + self._primal = RidgeCV(alphas=self.alphas, **self._ridgecv_kwargs) + self._primal.fit(X, Y) + self.alpha_ = self._primal.alpha_ + return + + self._use_dual = True + rcv = RidgeCV(alphas=self.alphas, **self._ridgecv_kwargs) + rcv.fit(X, Y) + self.alpha_ = rcv.alpha_ + del rcv + + self._dual = DualRidgeRegression(alpha=float(self.alpha_), chunk_size=self.chunk_size) + self._dual.fit(X, Y) + + def predict(self, X) -> np.ndarray: + if not self._use_dual: + return self._primal.predict(X) + return self._dual.predict(X) + + def pls_regression(regression_kwargs=None, xarray_kwargs=None): regression_defaults = dict(n_components=25, scale=False) regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} @@ -147,7 +243,7 @@ def linear_regression(xarray_kwargs=None): def ridge_regression(regression_kwargs=None, xarray_kwargs=None): regression_defaults = dict(alpha=1) regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} - regression = Ridge(**regression_kwargs) + regression = DualRidgeRegression(**regression_kwargs) xarray_kwargs = xarray_kwargs or {} regression = XarrayRegression(regression, **xarray_kwargs) return regression @@ -166,7 +262,7 @@ def ridge_cv_regression(regression_kwargs=None, xarray_kwargs=None, alphas=ALPHA regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} regression_kwargs.pop('alpha', None) # RidgeCV does not accept 'alpha' as a parameter - regression = RidgeCV(**regression_kwargs) + regression = DualRidgeCVRegression(**regression_kwargs) xarray_kwargs = xarray_kwargs or {} regression = XarrayRegression(regression, **xarray_kwargs) return regression From 8449a873dc7a73247f6e7c178375456d458e3c39 Mon Sep 17 00:00:00 2001 From: Kartik Pradeepan Date: Mon, 13 Apr 2026 10:37:31 -0400 Subject: [PATCH 2/5] Dual LOO for alpha selection --- .../metrics/regression_correlation/metric.py | 128 ++++++++++++++++-- 1 file changed, 120 insertions(+), 8 deletions(-) diff --git a/brainscore_vision/metrics/regression_correlation/metric.py b/brainscore_vision/metrics/regression_correlation/metric.py index 70aaf2396..5450526b8 100644 --- a/brainscore_vision/metrics/regression_correlation/metric.py +++ b/brainscore_vision/metrics/regression_correlation/metric.py @@ -182,11 +182,18 @@ def predict(self, X) -> np.ndarray: class DualRidgeCVRegression: - """RidgeCV with dual form prediction for memory efficiency. + """RidgeCV with dual form for memory efficiency. - Uses sklearn RidgeCV for alpha selection (LOO/GCV), then the dual kernel - form for prediction to avoid storing the (n_features, n_targets) coef_ matrix. - Falls back to sklearn RidgeCV when n_samples >= n_features. + When n_samples < n_features and no custom scoring/cv is requested, + selects alpha via LOO cross-validation in kernel space using the + eigendecomposition of K = X @ X.T, then predicts via dual-form + projection. Never materializes the (n_features, n_targets) coef_ matrix. + + When custom scoring or cv is requested, falls back to sklearn RidgeCV + for alpha selection (preserving all sklearn behavior), then uses + DualRidgeRegression for prediction. + + Falls back to sklearn RidgeCV entirely when n_samples >= n_features. Exposes ``alpha_`` after fit (selected regularization strength). """ @@ -197,6 +204,20 @@ def __init__(self, alphas=None, chunk_size: int = 5000, **ridgecv_kwargs): self._ridgecv_kwargs = ridgecv_kwargs self.alpha_ = None + def _can_use_dual_loo(self) -> bool: + """Check if we can do alpha selection in kernel space. + + Dual LOO is only valid when sklearn would use its efficient LOO path: + no custom scoring function, no explicit cv folds, no per-target alpha. + """ + if self._ridgecv_kwargs.get('scoring') is not None: + return False + if self._ridgecv_kwargs.get('cv') is not None: + return False + if self._ridgecv_kwargs.get('alpha_per_target', False): + return False + return True + def fit(self, X, Y) -> None: X = np.asarray(X, dtype=np.float64) Y = np.asarray(Y, dtype=np.float64) @@ -204,24 +225,115 @@ def fit(self, X, Y) -> None: if n_samples >= n_features: self._use_dual = False - self._primal = RidgeCV(alphas=self.alphas, **self._ridgecv_kwargs) + kwargs = dict(self._ridgecv_kwargs) + if self.alphas is not None: + kwargs['alphas'] = self.alphas + self._primal = RidgeCV(**kwargs) self._primal.fit(X, Y) self.alpha_ = self._primal.alpha_ return self._use_dual = True - rcv = RidgeCV(alphas=self.alphas, **self._ridgecv_kwargs) + + if self._can_use_dual_loo(): + self._fit_dual_loo(X, Y, n_samples) + else: + self._fit_sklearn_then_dual(X, Y) + + def _fit_dual_loo(self, X, Y, n_samples) -> None: + """Select alpha via LOO in kernel space. No coef_ materialized. + + Replicates sklearn's _RidgeGCV eigen decomposition approach: + center X, add intercept to kernel via outer product, eigendecompose, + zero regularization on the intercept eigenvector, then evaluate LOO + for each alpha candidate. + """ + # Center X (sklearn centers X in preprocessing, not Y) + self._X_mean = X.mean(axis=0) + self._Y_mean = Y.mean(axis=0) + X_c = X - self._X_mean + self._X_train_centered = X_c + self._Y_train_centered = Y - self._Y_mean + + # Kernel with intercept: K = X_c @ X_c.T + 1*1.T + # The outer product accounts for the unregularized intercept + K = X_c @ X_c.T + K += np.ones((n_samples, n_samples)) + + eigenvalues, Q = np.linalg.eigh(K) + QT_y = Q.T @ Y # project UN-centered Y + + # Find the intercept eigenvector (most aligned with ones vector) + normalized_sw = np.ones(n_samples) / np.sqrt(n_samples) + intercept_dim = np.argmax(np.abs(Q.T @ normalized_sw)) + + # Evaluate LOO for each alpha + alphas = self.alphas if self.alphas is not None else [0.1, 1.0, 10.0] + best_alpha = alphas[0] + best_score = -np.inf + + Q_sq = Q ** 2 + + for alpha in alphas: + w = 1.0 / (eigenvalues + alpha) + w[intercept_dim] = 0 # no regularization on intercept + + c = Q @ (w[:, None] * QT_y) + G_inv_diag = Q_sq @ w + G_inv_diag = np.maximum(G_inv_diag, 1e-12) + + loo_errors = c / G_inv_diag[:, None] + score = -np.mean(loo_errors ** 2) # negative MSE (higher is better) + + if score > best_score: + best_score = score + best_alpha = alpha + + self.alpha_ = best_alpha + + # Compute K_inv for prediction (on original centered K, no intercept) + K_pred = X_c @ X_c.T + K_pred[np.diag_indices_from(K_pred)] += self.alpha_ + self._K_inv = np.linalg.solve(K_pred, np.eye(n_samples)) + + def _fit_sklearn_then_dual(self, X, Y) -> None: + """Fallback: sklearn RidgeCV for alpha, DualRidge for prediction. + + Used when custom scoring/cv/alpha_per_target prevents dual LOO. + """ + kwargs = dict(self._ridgecv_kwargs) + if self.alphas is not None: + kwargs['alphas'] = self.alphas + rcv = RidgeCV(**kwargs) rcv.fit(X, Y) self.alpha_ = rcv.alpha_ del rcv - self._dual = DualRidgeRegression(alpha=float(self.alpha_), chunk_size=self.chunk_size) + self._dual = DualRidgeRegression( + alpha=float(self.alpha_), chunk_size=self.chunk_size + ) self._dual.fit(X, Y) def predict(self, X) -> np.ndarray: if not self._use_dual: return self._primal.predict(X) - return self._dual.predict(X) + + if hasattr(self, '_dual'): + return self._dual.predict(X) + + X = np.asarray(X, dtype=np.float64) + X_test_c = X - self._X_mean + proj = X_test_c @ self._X_train_centered.T @ self._K_inv + + n_test = X.shape[0] + n_targets = self._Y_train_centered.shape[1] + predictions = np.empty((n_test, n_targets), dtype=np.float64) + for i in range(0, n_targets, self.chunk_size): + end = min(i + self.chunk_size, n_targets) + predictions[:, i:end] = ( + proj @ self._Y_train_centered[:, i:end] + self._Y_mean[i:end] + ) + return predictions def pls_regression(regression_kwargs=None, xarray_kwargs=None): From f9d8bde717c4152fa278bfce590ad6c22b3bb20d Mon Sep 17 00:00:00 2001 From: Kartik Pradeepan Date: Mon, 13 Apr 2026 11:52:24 -0400 Subject: [PATCH 3/5] Store any kernel operation in float32 --- .../metrics/regression_correlation/metric.py | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/brainscore_vision/metrics/regression_correlation/metric.py b/brainscore_vision/metrics/regression_correlation/metric.py index 5450526b8..880cbeaf5 100644 --- a/brainscore_vision/metrics/regression_correlation/metric.py +++ b/brainscore_vision/metrics/regression_correlation/metric.py @@ -143,8 +143,8 @@ def __init__(self, alpha: float = 1.0, chunk_size: int = 5000): self.chunk_size = chunk_size def fit(self, X, Y) -> None: - X = np.asarray(X, dtype=np.float64) - Y = np.asarray(Y, dtype=np.float64) + X = np.asarray(X, dtype=np.float32) + Y = np.asarray(Y, dtype=np.float32) n_samples, n_features = X.shape if n_samples >= n_features: @@ -160,21 +160,22 @@ def fit(self, X, Y) -> None: self._X_train_centered = X_c self._Y_train_centered = Y - self._Y_mean - K = X_c @ X_c.T + # Compute kernel and solve in float64 for numerical stability + K = np.float64(X_c @ X_c.T) K[np.diag_indices_from(K)] += self.alpha - self._K_inv = np.linalg.solve(K, np.eye(K.shape[0])) + self._K_inv = np.float32(np.linalg.solve(K, np.eye(K.shape[0]))) def predict(self, X) -> np.ndarray: if not self._use_dual: return self._primal.predict(X) - X = np.asarray(X, dtype=np.float64) + X = np.asarray(X, dtype=np.float32) X_test_c = X - self._X_mean proj = X_test_c @ self._X_train_centered.T @ self._K_inv n_test = X.shape[0] n_targets = self._Y_train_centered.shape[1] - predictions = np.empty((n_test, n_targets), dtype=np.float64) + predictions = np.empty((n_test, n_targets), dtype=np.float32) for i in range(0, n_targets, self.chunk_size): end = min(i + self.chunk_size, n_targets) predictions[:, i:end] = proj @ self._Y_train_centered[:, i:end] + self._Y_mean[i:end] @@ -219,8 +220,8 @@ def _can_use_dual_loo(self) -> bool: return True def fit(self, X, Y) -> None: - X = np.asarray(X, dtype=np.float64) - Y = np.asarray(Y, dtype=np.float64) + X = np.asarray(X, dtype=np.float32) + Y = np.asarray(Y, dtype=np.float32) n_samples, n_features = X.shape if n_samples >= n_features: @@ -247,6 +248,9 @@ def _fit_dual_loo(self, X, Y, n_samples) -> None: center X, add intercept to kernel via outer product, eigendecompose, zero regularization on the intercept eigenvector, then evaluate LOO for each alpha candidate. + + Data stored in float32 to halve memory. Kernel eigendecomposition and + LOO scoring done in float64 for numerical precision. """ # Center X (sklearn centers X in preprocessing, not Y) self._X_mean = X.mean(axis=0) @@ -255,19 +259,18 @@ def _fit_dual_loo(self, X, Y, n_samples) -> None: self._X_train_centered = X_c self._Y_train_centered = Y - self._Y_mean - # Kernel with intercept: K = X_c @ X_c.T + 1*1.T - # The outer product accounts for the unregularized intercept - K = X_c @ X_c.T - K += np.ones((n_samples, n_samples)) + # Kernel with intercept in float64 for eigendecomposition precision + K = np.float64(X_c @ X_c.T) + K += 1.0 # equivalent to np.ones((n,n)) but avoids allocation eigenvalues, Q = np.linalg.eigh(K) - QT_y = Q.T @ Y # project UN-centered Y + QT_y = Q.T @ np.float64(Y) # project UN-centered Y in float64 # Find the intercept eigenvector (most aligned with ones vector) normalized_sw = np.ones(n_samples) / np.sqrt(n_samples) intercept_dim = np.argmax(np.abs(Q.T @ normalized_sw)) - # Evaluate LOO for each alpha + # Evaluate LOO for each alpha (all float64 — small matrices) alphas = self.alphas if self.alphas is not None else [0.1, 1.0, 10.0] best_alpha = alphas[0] best_score = -np.inf @@ -292,9 +295,10 @@ def _fit_dual_loo(self, X, Y, n_samples) -> None: self.alpha_ = best_alpha # Compute K_inv for prediction (on original centered K, no intercept) - K_pred = X_c @ X_c.T + # Solve in float64, store as float32 + K_pred = np.float64(X_c @ X_c.T) K_pred[np.diag_indices_from(K_pred)] += self.alpha_ - self._K_inv = np.linalg.solve(K_pred, np.eye(n_samples)) + self._K_inv = np.float32(np.linalg.solve(K_pred, np.eye(n_samples))) def _fit_sklearn_then_dual(self, X, Y) -> None: """Fallback: sklearn RidgeCV for alpha, DualRidge for prediction. @@ -321,13 +325,13 @@ def predict(self, X) -> np.ndarray: if hasattr(self, '_dual'): return self._dual.predict(X) - X = np.asarray(X, dtype=np.float64) + X = np.asarray(X, dtype=np.float32) X_test_c = X - self._X_mean proj = X_test_c @ self._X_train_centered.T @ self._K_inv n_test = X.shape[0] n_targets = self._Y_train_centered.shape[1] - predictions = np.empty((n_test, n_targets), dtype=np.float64) + predictions = np.empty((n_test, n_targets), dtype=np.float32) for i in range(0, n_targets, self.chunk_size): end = min(i + self.chunk_size, n_targets) predictions[:, i:end] = ( From 201878f92799f1c938fe43809092c5516a8416e4 Mon Sep 17 00:00:00 2001 From: Kartik Pradeepan Date: Mon, 13 Apr 2026 12:02:12 -0400 Subject: [PATCH 4/5] Add condition for when n_features > n_samples after fit for prediction. Used to minimize memory utilization --- .../metrics/regression_correlation/metric.py | 62 +++++++++++++++---- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/brainscore_vision/metrics/regression_correlation/metric.py b/brainscore_vision/metrics/regression_correlation/metric.py index 880cbeaf5..be19052da 100644 --- a/brainscore_vision/metrics/regression_correlation/metric.py +++ b/brainscore_vision/metrics/regression_correlation/metric.py @@ -131,10 +131,14 @@ def __call__(self, *, source_train, target_train, source_test, target_test): class DualRidgeRegression: """Ridge regression using dual (kernel) form for memory efficiency. - When n_samples < n_features, avoids materializing the (n_features, n_targets) - coefficient matrix. Computes predictions via a (n_test, n_train) projection - matrix instead. Falls back to sklearn Ridge when n_samples >= n_features. - + When n_samples < n_features, uses adaptive storage to minimize memory + after fit: + - If n_targets < n_samples: computes coef_ and frees X_train (primal-style + predict, but without sklearn's float64 copy) + - If n_targets >= n_samples: keeps X_train and predicts via kernel projection + (avoids materializing the large coef_ matrix) + + Falls back to sklearn Ridge when n_samples >= n_features. Mathematically identical to sklearn Ridge with fit_intercept=True. """ @@ -146,6 +150,7 @@ def fit(self, X, Y) -> None: X = np.asarray(X, dtype=np.float32) Y = np.asarray(Y, dtype=np.float32) n_samples, n_features = X.shape + n_targets = Y.shape[1] if n_samples >= n_features: self._use_dual = False @@ -157,13 +162,24 @@ def fit(self, X, Y) -> None: self._X_mean = X.mean(axis=0) self._Y_mean = Y.mean(axis=0) X_c = X - self._X_mean - self._X_train_centered = X_c - self._Y_train_centered = Y - self._Y_mean + Y_c = Y - self._Y_mean # Compute kernel and solve in float64 for numerical stability K = np.float64(X_c @ X_c.T) K[np.diag_indices_from(K)] += self.alpha - self._K_inv = np.float32(np.linalg.solve(K, np.eye(K.shape[0]))) + K_inv = np.float32(np.linalg.solve(K, np.eye(n_samples))) + + if n_targets < n_samples: + # coef_ is smaller than X_train — compute it, free X + dual_coef = K_inv @ Y_c + self._coef = X_c.T @ dual_coef + self._use_coef = True + else: + # X_train is smaller than coef_ — keep it for projection + self._X_train_centered = X_c + self._Y_train_centered = Y_c + self._K_inv = K_inv + self._use_coef = False def predict(self, X) -> np.ndarray: if not self._use_dual: @@ -171,6 +187,10 @@ def predict(self, X) -> np.ndarray: X = np.asarray(X, dtype=np.float32) X_test_c = X - self._X_mean + + if self._use_coef: + return X_test_c @ self._coef + self._Y_mean + proj = X_test_c @ self._X_train_centered.T @ self._K_inv n_test = X.shape[0] @@ -252,12 +272,11 @@ def _fit_dual_loo(self, X, Y, n_samples) -> None: Data stored in float32 to halve memory. Kernel eigendecomposition and LOO scoring done in float64 for numerical precision. """ - # Center X (sklearn centers X in preprocessing, not Y) + # Center self._X_mean = X.mean(axis=0) self._Y_mean = Y.mean(axis=0) X_c = X - self._X_mean - self._X_train_centered = X_c - self._Y_train_centered = Y - self._Y_mean + Y_c = Y - self._Y_mean # Kernel with intercept in float64 for eigendecomposition precision K = np.float64(X_c @ X_c.T) @@ -294,11 +313,24 @@ def _fit_dual_loo(self, X, Y, n_samples) -> None: self.alpha_ = best_alpha - # Compute K_inv for prediction (on original centered K, no intercept) - # Solve in float64, store as float32 + # Compute K_inv in float64, store as float32 K_pred = np.float64(X_c @ X_c.T) K_pred[np.diag_indices_from(K_pred)] += self.alpha_ - self._K_inv = np.float32(np.linalg.solve(K_pred, np.eye(n_samples))) + K_inv = np.float32(np.linalg.solve(K_pred, np.eye(n_samples))) + + # Adaptive storage: keep whichever is smaller after fit + n_targets = Y_c.shape[1] + if n_targets < n_samples: + # coef_ is smaller than X_train — compute it, free X + dual_coef = K_inv @ Y_c + self._coef = X_c.T @ dual_coef + self._use_coef = True + else: + # X_train is smaller than coef_ — keep it for projection + self._X_train_centered = X_c + self._Y_train_centered = Y_c + self._K_inv = K_inv + self._use_coef = False def _fit_sklearn_then_dual(self, X, Y) -> None: """Fallback: sklearn RidgeCV for alpha, DualRidge for prediction. @@ -327,6 +359,10 @@ def predict(self, X) -> np.ndarray: X = np.asarray(X, dtype=np.float32) X_test_c = X - self._X_mean + + if self._use_coef: + return X_test_c @ self._coef + self._Y_mean + proj = X_test_c @ self._X_train_centered.T @ self._K_inv n_test = X.shape[0] From a529710fd0c16d1be118caf9064196312c2896eb Mon Sep 17 00:00:00 2001 From: Kartik Pradeepan Date: Wed, 6 May 2026 09:25:50 -0400 Subject: [PATCH 5/5] Make dual ridge opt-in; only Allen2022 uses it --- .../benchmarks/allen2022_fmri/benchmark.py | 2 +- .../allen2022_fmri_surface/benchmark.py | 2 +- .../regression_correlation/__init__.py | 7 +++++- .../metrics/regression_correlation/metric.py | 24 +++++++++++++++++-- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/brainscore_vision/benchmarks/allen2022_fmri/benchmark.py b/brainscore_vision/benchmarks/allen2022_fmri/benchmark.py index 4194a14f2..fa2190a48 100644 --- a/brainscore_vision/benchmarks/allen2022_fmri/benchmark.py +++ b/brainscore_vision/benchmarks/allen2022_fmri/benchmark.py @@ -85,7 +85,7 @@ def _Allen2022fmri(region, def Allen2022fmri(region: str, metric_type: str, dataset_prefix: str = 'Allen2022_fmri', alphas: list = ALPHA_LIST): - similarity_metric = load_metric(f'{metric_type}_split', alphas=alphas) + similarity_metric = load_metric(f'dual_{metric_type}_split', alphas=alphas) return _Allen2022fmri(region, similarity_metric=similarity_metric, identifier_metric_suffix=metric_type, dataset_prefix=dataset_prefix, diff --git a/brainscore_vision/benchmarks/allen2022_fmri_surface/benchmark.py b/brainscore_vision/benchmarks/allen2022_fmri_surface/benchmark.py index 79bfe23ac..1a8292ce2 100644 --- a/brainscore_vision/benchmarks/allen2022_fmri_surface/benchmark.py +++ b/brainscore_vision/benchmarks/allen2022_fmri_surface/benchmark.py @@ -85,7 +85,7 @@ def _Allen2022fmriSurface(region, def Allen2022fmriSurface(region: str, metric_type: str, dataset_prefix: str = 'Allen2022_fmri_surface', alphas: list = ALPHA_LIST): - similarity_metric = load_metric(f'{metric_type}_split', alphas=alphas) + similarity_metric = load_metric(f'dual_{metric_type}_split', alphas=alphas) return _Allen2022fmriSurface(region, similarity_metric=similarity_metric, identifier_metric_suffix=metric_type, dataset_prefix=dataset_prefix, diff --git a/brainscore_vision/metrics/regression_correlation/__init__.py b/brainscore_vision/metrics/regression_correlation/__init__.py index cc26ec14b..4f6c98425 100644 --- a/brainscore_vision/metrics/regression_correlation/__init__.py +++ b/brainscore_vision/metrics/regression_correlation/__init__.py @@ -1,5 +1,6 @@ from brainscore_vision import metric_registry -from .metric import CrossRegressedCorrelation, pls_regression, ridge_cv_regression, ridge_regression, single_regression, linear_regression,\ +from .metric import CrossRegressedCorrelation, pls_regression, ridge_cv_regression, ridge_regression, \ + dual_ridge_regression, dual_ridge_cv_regression, single_regression, linear_regression,\ pearsonr_correlation, ReverseCrossRegressedCorrelation, ReverseTrainTestSplitCorrelation @@ -26,6 +27,10 @@ regression=linear_regression(), correlation=pearsonr_correlation(), *args, **kwargs) metric_registry['ridgecv_split'] = lambda *args, **kwargs: TrainTestSplitCorrelation( regression=ridge_cv_regression(**kwargs), correlation=pearsonr_correlation(), *args, **kwargs) +metric_registry['dual_ridge_split'] = lambda *args, **kwargs: TrainTestSplitCorrelation( + regression=dual_ridge_regression(), correlation=pearsonr_correlation(), *args, **kwargs) +metric_registry['dual_ridgecv_split'] = lambda *args, **kwargs: TrainTestSplitCorrelation( + regression=dual_ridge_cv_regression(**kwargs), correlation=pearsonr_correlation(), *args, **kwargs) metric_registry["reverse_pls_cv"] = lambda *args, **kwargs: ReverseCrossRegressedCorrelation( regression=pls_regression(), correlation=pearsonr_correlation(), *args, **kwargs) diff --git a/brainscore_vision/metrics/regression_correlation/metric.py b/brainscore_vision/metrics/regression_correlation/metric.py index be19052da..49fe4981c 100644 --- a/brainscore_vision/metrics/regression_correlation/metric.py +++ b/brainscore_vision/metrics/regression_correlation/metric.py @@ -395,7 +395,7 @@ def linear_regression(xarray_kwargs=None): def ridge_regression(regression_kwargs=None, xarray_kwargs=None): regression_defaults = dict(alpha=1) regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} - regression = DualRidgeRegression(**regression_kwargs) + regression = Ridge(**regression_kwargs) xarray_kwargs = xarray_kwargs or {} regression = XarrayRegression(regression, **xarray_kwargs) return regression @@ -413,7 +413,27 @@ def ridge_cv_regression(regression_kwargs=None, xarray_kwargs=None, alphas=ALPHA regression_defaults = dict(alphas=alphas, store_cv_results=False) regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} regression_kwargs.pop('alpha', None) # RidgeCV does not accept 'alpha' as a parameter - + + regression = RidgeCV(**regression_kwargs) + xarray_kwargs = xarray_kwargs or {} + regression = XarrayRegression(regression, **xarray_kwargs) + return regression + + +def dual_ridge_regression(regression_kwargs=None, xarray_kwargs=None): + regression_defaults = dict(alpha=1) + regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} + regression = DualRidgeRegression(**regression_kwargs) + xarray_kwargs = xarray_kwargs or {} + regression = XarrayRegression(regression, **xarray_kwargs) + return regression + + +def dual_ridge_cv_regression(regression_kwargs=None, xarray_kwargs=None, alphas=ALPHA_LIST): + regression_defaults = dict(alphas=alphas) + regression_kwargs = {**regression_defaults, **(regression_kwargs or {})} + regression_kwargs.pop('alpha', None) + regression = DualRidgeCVRegression(**regression_kwargs) xarray_kwargs = xarray_kwargs or {} regression = XarrayRegression(regression, **xarray_kwargs)