scikit-learn
diff --git a/‎doc/whats_new/v1.0.rst
Lines changed: 11 additions & 0 deletions b/‎doc/whats_new/v1.0.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
Lines changed: 53 additions & 0 deletions b/‎examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
Lines changed: 53 additions & 0 deletions
diff --git a/‎sklearn/linear_model/_base.py
Lines changed: 72 additions & 10 deletions b/‎sklearn/linear_model/_base.py
Lines changed: 72 additions & 10 deletions
diff --git a/‎sklearn/linear_model/_coordinate_descent.py
Lines changed: 2 additions & 1 deletion b/‎sklearn/linear_model/_coordinate_descent.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎sklearn/linear_model/tests/test_coordinate_descent.py
Lines changed: 39 additions & 0 deletions b/‎sklearn/linear_model/tests/test_coordinate_descent.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎sklearn/metrics/tests/test_common.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/metrics/tests/test_common.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/naive_bayes.py
Lines changed: 25 additions & 8 deletions b/‎sklearn/naive_bayes.py
Lines changed: 25 additions & 8 deletions
diff --git a/‎sklearn/neighbors/tests/test_dist_metrics.py
Lines changed: 26 additions & 2 deletions b/‎sklearn/neighbors/tests/test_dist_metrics.py
Lines changed: 26 additions & 2 deletions
diff --git a/‎sklearn/neighbors/tests/test_neighbors.py
Lines changed: 14 additions & 2 deletions b/‎sklearn/neighbors/tests/test_neighbors.py
Lines changed: 14 additions & 2 deletions
@@ -62,6 +62,17 @@ Changelog
 - |Fix| :meth:`ElasticNet.fit` no longer modifies `sample_weight` in place.
   :pr:`19055` by `Thomas Fan`_.
 
+- |Enhancement| Validate user-supplied gram matrix passed to linear models
+  via the `precompute` argument. :pr:`19004` by :user:`Adam Midvidy <amidvidy>`.
+
+:mod:`sklearn.naive_bayes`
+..........................
+
+- |API| The attribute ``sigma_`` is now deprecated in
+  :class:`naive_bayes.GaussianNB` and will be removed in 1.2.
+  Use ``var_`` instead.
+  :pr:`18842` by :user:`Hong Shao Yang <hongshaoyang>`.
+
 Code and Documentation Contributors
 -----------------------------------
 
 
@@ -0,0 +1,53 @@
+"""
+==========================================================================
+Fitting an Elastic Net with a precomputed Gram Matrix and Weighted Samples
+==========================================================================
+
+The following example shows how to precompute the gram matrix
+while using weighted samples with an ElasticNet.
+
+If weighted samples are used, the design matrix must be centered and then
+rescaled by the square root of the weight vector before the gram matrix
+is computed.
+
+.. note::
+  `sample_weight` vector is also rescaled to sum to `n_samples`, see the
+   documentation for the `sample_weight` parameter to
+   :func:`linear_model.ElasticNet.fit`.
+
+"""
+
+print(__doc__)
+
+# %%
+# Let's start by loading the dataset and creating some sample weights.
+import numpy as np
+from sklearn.datasets import make_regression
+
+rng = np.random.RandomState(0)
+
+n_samples = int(1e5)
+X, y = make_regression(n_samples=n_samples, noise=0.5, random_state=rng)
+
+sample_weight = rng.lognormal(size=n_samples)
+# normalize the sample weights
+normalized_weights = sample_weight * (n_samples / (sample_weight.sum()))
+
+# %%
+# To fit the elastic net using the `precompute` option together with the sample
+# weights, we must first center the design matrix,  and rescale it by the
+# normalized weights prior to computing the gram matrix.
+X_offset = np.average(X, axis=0, weights=normalized_weights)
+X_centered = (X - np.average(X, axis=0, weights=normalized_weights))
+X_scaled = X_centered * np.sqrt(normalized_weights)[:, np.newaxis]
+gram = np.dot(X_scaled.T, X_scaled)
+
+# %%
+# We can now proceed with fitting. We must passed the centered design matrix to
+# `fit` otherwise the elastic net estimator will detect that it is uncentered
+# and discard the gram matrix we passed. However, if we pass the scaled design
+# matrix, the preprocessing code will incorrectly rescale it a second time.
+from sklearn.linear_model import ElasticNet
+
+lm = ElasticNet(alpha=0.01, precompute=gram)
+lm.fit(X_centered, y, sample_weight=normalized_weights)
@@ -37,6 +37,7 @@
 from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
 from ..utils._seq_dataset import ArrayDataset64, CSRDataset64
 from ..utils.validation import check_is_fitted, _check_sample_weight
+
 from ..utils.fixes import delayed
 from ..preprocessing import normalize as f_normalize
 
@@ -570,6 +571,61 @@ def rmatvec(b):
         return self
 
 
+def _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale,
+                                   rtol=1e-7,
+                                   atol=1e-5):
+    """Computes a single element of the gram matrix and compares it to
+    the corresponding element of the user supplied gram matrix.
+
+    If the values do not match a ValueError will be thrown.
+
+    Parameters
+    ----------
+    X : ndarray of shape (n_samples, n_features)
+        Data array.
+
+    precompute : array-like of shape (n_features, n_features)
+        User-supplied gram matrix.
+
+    X_offset : ndarray of shape (n_features,)
+        Array of feature means used to center design matrix.
+
+    X_scale : ndarray of shape (n_features,)
+        Array of feature scale factors used to normalize design matrix.
+
+    rtol : float, default=1e-7
+        Relative tolerance; see numpy.allclose.
+
+    atol : float, default=1e-5
+        absolute tolerance; see :func`numpy.allclose`. Note that the default
+        here is more tolerant than the default for
+        :func:`numpy.testing.assert_allclose`, where `atol=0`.
+
+    Raises
+    ------
+    ValueError
+        Raised when the provided Gram matrix is not consistent.
+    """
+
+    n_features = X.shape[1]
+    f1 = n_features // 2
+    f2 = min(f1+1, n_features-1)
+
+    v1 = (X[:, f1] - X_offset[f1]) * X_scale[f1]
+    v2 = (X[:, f2] - X_offset[f2]) * X_scale[f2]
+
+    expected = np.dot(v1, v2)
+    actual = precompute[f1, f2]
+
+    if not np.isclose(expected, actual, rtol=rtol, atol=atol):
+        raise ValueError("Gram matrix passed in via 'precompute' parameter "
+                         "did not pass validation when a single element was "
+                         "checked - please check that it was computed "
+                         f"properly. For element ({f1},{f2}) we computed "
+                         f"{expected} but the user-supplied value was "
+                         f"{actual}.")
+
+
 def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
              check_input=True, sample_weight=None):
     """Aux function used at beginning of fit in linear models
@@ -595,16 +651,22 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
             check_input=check_input, sample_weight=sample_weight)
     if sample_weight is not None:
         X, y = _rescale_data(X, y, sample_weight=sample_weight)
-    if hasattr(precompute, '__array__') and (
-        fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
-            normalize and not np.allclose(X_scale, np.ones(n_features))):
-        warnings.warn("Gram matrix was provided but X was centered"
-                      " to fit intercept, "
-                      "or X was normalized : recomputing Gram matrix.",
-                      UserWarning)
-        # recompute Gram
-        precompute = 'auto'
-        Xy = None
+    if hasattr(precompute, '__array__'):
+        if (fit_intercept and not np.allclose(X_offset, np.zeros(n_features))
+                or normalize and not np.allclose(X_scale,
+                                                 np.ones(n_features))):
+            warnings.warn(
+                "Gram matrix was provided but X was centered to fit "
+                "intercept, or X was normalized : recomputing Gram matrix.",
+                UserWarning
+            )
+            # recompute Gram
+            precompute = 'auto'
+            Xy = None
+        elif check_input:
+            # If we're going to use the user's precomputed gram matrix, we
+            # do a quick check to make sure its not totally bogus.
+            _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale)
 
     # precompute if n_samples > n_features
     if isinstance(precompute, str) and precompute == 'auto':
 
@@ -729,7 +729,8 @@ def fit(self, X, y, sample_weight=None, check_input=True):
             Target. Will be cast to X's dtype if necessary.
 
         sample_weight : float or array-like of shape (n_samples,), default=None
-            Sample weight.
+            Sample weight. Internally, the `sample_weight` vector will be
+            rescaled to sum to `n_samples`.
 
             .. versionadded:: 0.23
 
 
@@ -743,6 +743,45 @@ def test_precompute_invalid_argument():
                         "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
 
 
+def test_elasticnet_precompute_incorrect_gram():
+    # check that passing an invalid precomputed Gram matrix will raise an
+    # error.
+    X, y, _, _ = build_dataset()
+
+    rng = np.random.RandomState(0)
+
+    X_centered = X - np.average(X, axis=0)
+    garbage = rng.standard_normal(X.shape)
+    precompute = np.dot(garbage.T, garbage)
+
+    clf = ElasticNet(alpha=0.01, precompute=precompute)
+    msg = "Gram matrix.*did not pass validation.*"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X_centered, y)
+
+
+def test_elasticnet_precompute_gram_weighted_samples():
+    # check the equivalence between passing a precomputed Gram matrix and
+    # internal computation using sample weights.
+    X, y, _, _ = build_dataset()
+
+    rng = np.random.RandomState(0)
+    sample_weight = rng.lognormal(size=y.shape)
+
+    w_norm = sample_weight * (y.shape / np.sum(sample_weight))
+    X_c = (X - np.average(X, axis=0, weights=w_norm))
+    X_r = X_c * np.sqrt(w_norm)[:, np.newaxis]
+    gram = np.dot(X_r.T, X_r)
+
+    clf1 = ElasticNet(alpha=0.01, precompute=gram)
+    clf1.fit(X_c, y, sample_weight=sample_weight)
+
+    clf2 = ElasticNet(alpha=0.01, precompute=False)
+    clf2.fit(X, y, sample_weight=sample_weight)
+
+    assert_allclose(clf1.coef_, clf2.coef_)
+
+
 def test_warm_start_convergence():
     X, y, _, _ = build_dataset()
     model = ElasticNet(alpha=1e-3, tol=1e-3).fit(X, y)
 
@@ -198,7 +198,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     return np.array([
         precision,
         recall,
-        np.pad(thresholds,
+        np.pad(thresholds.astype(np.float64),
                pad_width=(0, pad_threshholds),
                mode='constant',
                constant_values=[np.nan])
 
@@ -154,7 +154,16 @@ class labels known to the classifier
         absolute additive value to variances
 
     sigma_ : ndarray of shape (n_classes, n_features)
-        variance of each feature per class
+        Variance of each feature per class.
+
+        .. deprecated:: 1.0
+           `sigma_` is deprecated in 1.0 and will be removed in 1.2.
+           Use `var_` instead.
+
+    var_ : ndarray of shape (n_classes, n_features)
+        Variance of each feature per class.
+
+        .. versionadded:: 1.0
 
     theta_ : ndarray of shape (n_classes, n_features)
         mean of each feature per class
@@ -377,7 +386,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False,
             n_features = X.shape[1]
             n_classes = len(self.classes_)
             self.theta_ = np.zeros((n_classes, n_features))
-            self.sigma_ = np.zeros((n_classes, n_features))
+            self.var_ = np.zeros((n_classes, n_features))
 
             self.class_count_ = np.zeros(n_classes, dtype=np.float64)
 
@@ -405,7 +414,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False,
                 msg = "Number of features %d does not match previous data %d."
                 raise ValueError(msg % (X.shape[1], self.theta_.shape[1]))
             # Put epsilon back in each time
-            self.sigma_[:, :] -= self.epsilon_
+            self.var_[:, :] -= self.epsilon_
 
         classes = self.classes_
 
@@ -429,14 +438,14 @@ def _partial_fit(self, X, y, classes=None, _refit=False,
                 N_i = X_i.shape[0]
 
             new_theta, new_sigma = self._update_mean_variance(
-                self.class_count_[i], self.theta_[i, :], self.sigma_[i, :],
+                self.class_count_[i], self.theta_[i, :], self.var_[i, :],
                 X_i, sw_i)
 
             self.theta_[i, :] = new_theta
-            self.sigma_[i, :] = new_sigma
+            self.var_[i, :] = new_sigma
             self.class_count_[i] += N_i
 
-        self.sigma_[:, :] += self.epsilon_
+        self.var_[:, :] += self.epsilon_
 
         # Update if only no priors is provided
         if self.priors is None:
@@ -449,14 +458,22 @@ def _joint_log_likelihood(self, X):
         joint_log_likelihood = []
         for i in range(np.size(self.classes_)):
             jointi = np.log(self.class_prior_[i])
-            n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
+            n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.var_[i, :]))
             n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /
-                                 (self.sigma_[i, :]), 1)
+                                 (self.var_[i, :]), 1)
             joint_log_likelihood.append(jointi + n_ij)
 
         joint_log_likelihood = np.array(joint_log_likelihood).T
         return joint_log_likelihood
 
+    @deprecated(  # type: ignore
+        "Attribute sigma_ was deprecated in 1.0 and will be removed in"
+        "1.2. Use var_ instead."
+    )
+    @property
+    def sigma_(self):
+        return self.var_
+
 
 _ALPHA_MIN = 1e-10
 
 
@@ -55,7 +55,19 @@ def test_cdist(metric):
     keys = argdict.keys()
     for vals in itertools.product(*argdict.values()):
         kwargs = dict(zip(keys, vals))
-        D_true = cdist(X1, X2, metric, **kwargs)
+        if metric == "wminkowski":
+            if sp_version >= parse_version("1.8.0"):
+                pytest.skip("wminkowski will be removed in SciPy 1.8.0")
+
+            # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
+            ExceptionToAssert = None
+            if sp_version >= parse_version("1.6.0"):
+                ExceptionToAssert = DeprecationWarning
+            with pytest.warns(ExceptionToAssert):
+                D_true = cdist(X1, X2, metric, **kwargs)
+        else:
+            D_true = cdist(X1, X2, metric, **kwargs)
+
         check_cdist(metric, kwargs, D_true)
 
 
@@ -83,7 +95,19 @@ def test_pdist(metric):
     keys = argdict.keys()
     for vals in itertools.product(*argdict.values()):
         kwargs = dict(zip(keys, vals))
-        D_true = cdist(X1, X1, metric, **kwargs)
+        if metric == "wminkowski":
+            if sp_version >= parse_version("1.8.0"):
+                pytest.skip("wminkowski will be removed in SciPy 1.8.0")
+
+            # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
+            ExceptionToAssert = None
+            if sp_version >= parse_version("1.6.0"):
+                ExceptionToAssert = DeprecationWarning
+            with pytest.warns(ExceptionToAssert):
+                D_true = cdist(X1, X1, metric, **kwargs)
+        else:
+            D_true = cdist(X1, X1, metric, **kwargs)
+
         check_pdist(metric, kwargs, D_true)
 
 
 
@@ -26,6 +26,7 @@
 from sklearn.utils._testing import assert_raise_message
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils.validation import check_random_state
+from sklearn.utils.fixes import sp_version, parse_version
 
 import joblib
 
@@ -1244,6 +1245,9 @@ def test_neighbors_metrics(n_samples=20, n_features=3,
     test = rng.rand(n_query_pts, n_features)
 
     for metric, metric_params in metrics:
+        if metric == "wminkowski" and sp_version >= parse_version("1.8.0"):
+            # wminkowski will be removed in SciPy 1.8.0
+            continue
         results = {}
         p = metric_params.pop('p', 2)
         for algorithm in algorithms:
@@ -1265,8 +1269,16 @@ def test_neighbors_metrics(n_samples=20, n_features=3,
                           if metric == 'haversine' else slice(None))
 
             neigh.fit(X[:, feature_sl])
-            results[algorithm] = neigh.kneighbors(test[:, feature_sl],
-                                                  return_distance=True)
+
+            # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
+            ExceptionToAssert = None
+            if (metric == "wminkowski" and algorithm == 'brute'
+                    and sp_version >= parse_version("1.6.0")):
+                ExceptionToAssert = DeprecationWarning
+
+            with pytest.warns(ExceptionToAssert):
+                results[algorithm] = neigh.kneighbors(test[:, feature_sl],
+                                                      return_distance=True)
 
         assert_array_almost_equal(results['brute'][0], results['ball_tree'][0])
         assert_array_almost_equal(results['brute'][1], results['ball_tree'][1])