From 9800b4b08c62b7616e3d3de997c82f840aa97c3c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 10:32:56 +0200
Subject: [PATCH 01/34] Common sample_weight validation in huber

---
 sklearn/linear_model/huber.py |  9 +++------
 sklearn/utils/validation.py   | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index 0a4b6e10f6f98..3225487df7dab 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -8,8 +8,8 @@
 from ..base import BaseEstimator, RegressorMixin
 from .base import LinearModel
 from ..utils import check_X_y
-from ..utils import check_consistent_length
 from ..utils import axis0_safe_slice
+from ..utils.validation import _check_sample_weight
 from ..utils.extmath import safe_sparse_dot
 
 
@@ -253,11 +253,8 @@ def fit(self, X, y, sample_weight=None):
         X, y = check_X_y(
             X, y, copy=False, accept_sparse=['csr'], y_numeric=True,
             dtype=[np.float64, np.float32])
-        if sample_weight is not None:
-            sample_weight = np.array(sample_weight)
-            check_consistent_length(y, sample_weight)
-        else:
-            sample_weight = np.ones_like(y)
+
+        sample_weight = _check_sample_weight(sample_weight, y)
 
         if self.epsilon < 1.0:
             raise ValueError(
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index bb6cf1c8ffe00..0413c48daeec7 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -980,3 +980,32 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
 
     if max_val is not None and x > max_val:
         raise ValueError('`{}`= {}, must be <= {}.'.format(name, x, max_val))
+
+
+def _check_sample_weight(sample_weight, y, **kwargs):
+    """Validate sample weights
+
+    Parameters
+    ----------
+    sample_weight : {ndarray, Number or None}
+       input sample weights
+    y: ndarray or None
+       target variable
+    kwargs:
+       additional parameters to pass to check_array
+
+    Parameters
+    ----------
+    sample_weight : ndarray
+       validated sample weights
+    """
+    if sample_weight is None or isinstance(sample_weight, numbers.Number):
+        sample_weight = np.ones_like(y)
+    else:
+        sample_weight = check_array(
+                sample_weight, accept_sparse=False,
+                ensure_2d=False, dtype=[np.float64, np.float32],
+                **kwargs
+        )
+        check_consistent_length(y, sample_weight)
+    return sample_weight

From 95df18752be8f8a39b94196ef1a0584f42629c1c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 10:52:58 +0200
Subject: [PATCH 02/34] Use _check_sample_weight in
 linear_model/stochastic_gradient

---
 sklearn/linear_model/stochastic_gradient.py | 21 ++++-----------------
 sklearn/utils/validation.py                 |  2 +-
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 625bdb5bdc3f9..b47adcfac2e87 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -18,7 +18,7 @@
 from ..utils import check_array, check_random_state, check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.multiclass import _check_partial_fit_first_call
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..exceptions import ConvergenceWarning
 from ..model_selection import StratifiedShuffleSplit, ShuffleSplit
 
@@ -169,19 +169,6 @@ def _get_penalty_type(self, penalty):
         except KeyError:
             raise ValueError("Penalty %s is not supported. " % penalty)
 
-    def _validate_sample_weight(self, sample_weight, n_samples):
-        """Set the sample weight array."""
-        if sample_weight is None:
-            # uniform sample weights
-            sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
-        else:
-            # user-provided array
-            sample_weight = np.asarray(sample_weight, dtype=np.float64,
-                                       order="C")
-        if sample_weight.shape[0] != n_samples:
-            raise ValueError("Shapes of X and sample_weight do not match.")
-        return sample_weight
-
     def _allocate_parameter_mem(self, n_classes, n_features, coef_init=None,
                                 intercept_init=None):
         """Allocate mem for parameters; initialize if provided."""
@@ -488,7 +475,7 @@ def _partial_fit(self, X, y, alpha, C,
         # Allocate datastructures from input arguments
         self._expanded_class_weight = compute_class_weight(self.class_weight,
                                                            self.classes_, y)
-        sample_weight = self._validate_sample_weight(sample_weight, n_samples)
+        sample_weight = _check_sample_weight(sample_weight, y, order="C")
 
         if getattr(self, "coef_", None) is None or coef_init is not None:
             self._allocate_parameter_mem(n_classes, n_features,
@@ -1095,9 +1082,9 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
 
         n_samples, n_features = X.shape
 
-        # Allocate datastructures from input arguments
-        sample_weight = self._validate_sample_weight(sample_weight, n_samples)
+        sample_weight = _check_sample_weight(sample_weight, y, order="C")
 
+        # Allocate datastructures from input arguments
         if getattr(self, "coef_", None) is None:
             self._allocate_parameter_mem(1, n_features, coef_init,
                                          intercept_init)
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 0413c48daeec7..4ae108c0cade4 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1000,7 +1000,7 @@ def _check_sample_weight(sample_weight, y, **kwargs):
        validated sample weights
     """
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
-        sample_weight = np.ones_like(y)
+        sample_weight = np.ones(y.shape)
     else:
         sample_weight = check_array(
                 sample_weight, accept_sparse=False,

From c889db37fc015499a4d8d78e44dc7192800a2b1e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 11:17:09 +0200
Subject: [PATCH 03/34] Use check_sample weight in logistic

---
 sklearn/linear_model/logistic.py | 13 ++++---------
 sklearn/linear_model/ransac.py   |  5 ++---
 sklearn/linear_model/ridge.py    |  9 ++++-----
 sklearn/linear_model/sag.py      |  5 +++--
 sklearn/utils/validation.py      | 28 ++++++++++++++++++++++------
 5 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 5b9de7bc6e68c..56697206ace82 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -30,7 +30,7 @@
 from ..utils.fixes import logsumexp
 from ..utils.optimize import newton_cg
 from ..utils.validation import check_X_y
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..utils import deprecated
 from ..exceptions import (ConvergenceWarning, ChangedBehaviorWarning)
 from ..utils.multiclass import check_classification_targets
@@ -826,11 +826,8 @@ def _logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     # If sample weights exist, convert them to array (support for lists)
     # and check length
     # Otherwise set them to 1 for all examples
-    if sample_weight is not None:
-        sample_weight = np.array(sample_weight, dtype=X.dtype, order='C')
-        check_consistent_length(y, sample_weight)
-    else:
-        sample_weight = np.ones(X.shape[0], dtype=X.dtype)
+    sample_weight = _check_sample_weight(sample_weight, n_samples=X.shape[0],
+                                         dtype=X.dtype, order='C')
 
     # If class_weights is a dict (provided by the user), the weights
     # are assigned to the original labels. If it is "balanced", then
@@ -1135,9 +1132,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
     y_test = y[test]
 
     if sample_weight is not None:
-        sample_weight = check_array(sample_weight, ensure_2d=False)
-        check_consistent_length(y, sample_weight)
-
+        sample_weight = _check_sample_weight(sample_weight, y)
         sample_weight = sample_weight[train]
 
     coefs, Cs, n_iter = _logistic_regression_path(
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index 7f4fb650b59e8..b4f9cc03d9e4a 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -11,7 +11,7 @@
 from ..base import MultiOutputMixin
 from ..utils import check_random_state, check_array, check_consistent_length
 from ..utils.random import sample_without_replacement
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, _check_sample_weight
 from .base import LinearRegression
 from ..utils.validation import has_fit_parameter
 from ..exceptions import ConvergenceWarning
@@ -324,8 +324,7 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("%s does not support sample_weight. Samples"
                              " weights are only used for the calibration"
                              " itself." % estimator_name)
-        if sample_weight is not None:
-            sample_weight = np.asarray(sample_weight)
+        sample_weight = _check_sample_weight(sample_weight, y)
 
         n_inliers_best = 1
         score_best = -np.inf
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 45862d5f3cffb..9db7bd41731a4 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -27,6 +27,7 @@
 from ..utils import check_consistent_length
 from ..utils import compute_sample_weight
 from ..utils import column_or_1d
+from ..utils.validation import _check_sample_weight
 from ..preprocessing import LabelBinarizer
 from ..model_selection import GridSearchCV
 from ..metrics.scorer import check_scoring
@@ -428,8 +429,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
                          " %d != %d" % (n_samples, n_samples_))
 
     if has_sw:
-        if np.atleast_1d(sample_weight).ndim > 1:
-            raise ValueError("Sample weights must be 1D array or scalar")
+        sample_weight = _check_sample_weight(sample_weight, y)
 
         if solver not in ['sag', 'saga']:
             # SAG supports sample_weight directly. For other solvers,
@@ -1406,9 +1406,8 @@ def fit(self, X, y, sample_weight=None):
                 "alphas must be positive. Got {} containing some "
                 "negative or null value instead.".format(self.alphas))
 
-        if sample_weight is not None and not isinstance(sample_weight, float):
-            sample_weight = check_array(sample_weight, ensure_2d=False,
-                                        dtype=X.dtype)
+        sample_weight = _check_sample_weight(sample_weight, y, dtype=X.dtype)
+
         n_samples, n_features = X.shape
 
         X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(
diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index 233a6ed1c50af..eccc4ab99462e 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -12,6 +12,7 @@
 from .sag_fast import sag32, sag64
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array
+from ..utils.validation import _check_sample_weight
 from ..utils.extmath import row_norms
 
 
@@ -251,8 +252,8 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
     n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1
 
     # initialization
-    if sample_weight is None:
-        sample_weight = np.ones(n_samples, dtype=X.dtype, order='C')
+    sample_weight = _check_sample_weight(sample_weight, n_samples=n_samples,
+                                         order='C')
 
     if 'coef' in warm_start_mem.keys():
         coef_init = warm_start_mem['coef']
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 4ae108c0cade4..887a9fe956bfa 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -982,7 +982,8 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
         raise ValueError('`{}`= {}, must be <= {}.'.format(name, x, max_val))
 
 
-def _check_sample_weight(sample_weight, y, **kwargs):
+def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
+                         **kwargs):
     """Validate sample weights
 
     Parameters
@@ -990,7 +991,11 @@ def _check_sample_weight(sample_weight, y, **kwargs):
     sample_weight : {ndarray, Number or None}
        input sample weights
     y: ndarray or None
-       target variable
+       target variable. Either y or n_samples must be provided.
+    n_samples: int or None
+       expected number of samples. Either y or n_samples must be provided.
+    dtype: dtype
+       dtype of the validated sample_weight
     kwargs:
        additional parameters to pass to check_array
 
@@ -999,13 +1004,24 @@ def _check_sample_weight(sample_weight, y, **kwargs):
     sample_weight : ndarray
        validated sample weights
     """
+    if n_samples is not None and y is not None:
+        raise ValueError('Only one of y, n_samples must be provided!')
+    elif y is not None:
+        n_samples = y.shape[0]
+
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
-        sample_weight = np.ones(y.shape)
+        sample_weight = np.ones(n_samples, dtype=dtype)
     else:
+        if dtype is None:
+            dtype = [np.float64, np.float32]
         sample_weight = check_array(
                 sample_weight, accept_sparse=False,
-                ensure_2d=False, dtype=[np.float64, np.float32],
-                **kwargs
+                ensure_2d=False, dtype=dtype, **kwargs
         )
-        check_consistent_length(y, sample_weight)
+        if sample_weight.ndim != 1:
+            raise ValueError("Sample weights must be 1D array or scalar")
+
+        if sample_weight.shape != (n_samples,):
+            raise ValueError("samples_weight.shape == {}, expected {}!"
+                             .format(sample_weight.shape, (n_samples,)))
     return sample_weight

From bd52cfc1c0d04c8ef65ace8be0ff8be58b50ad5e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 11:21:10 +0200
Subject: [PATCH 04/34] Better order parameter

---
 sklearn/utils/validation.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 887a9fe956bfa..49f439c18962d 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -983,7 +983,7 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
 
 
 def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
-                         **kwargs):
+                         order=None, **kwargs):
     """Validate sample weights
 
     Parameters
@@ -996,6 +996,12 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
        expected number of samples. Either y or n_samples must be provided.
     dtype: dtype
        dtype of the validated sample_weight
+    order : 'F', 'C' or None (default=None)
+        Whether an array will be forced to be fortran or c-style.
+        When order is None (default), if ``sample_weights`` is an ndarray,
+        nothing is ensured about the memory layout of the output array,
+        otherwise it will be of 'C' order by default.
+
     kwargs:
        additional parameters to pass to check_array
 
@@ -1010,7 +1016,9 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
         n_samples = y.shape[0]
 
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
-        sample_weight = np.ones(n_samples, dtype=dtype)
+        if order is None:
+            order = 'C'
+        sample_weight = np.ones(n_samples, dtype=dtype, order=order)
     else:
         if dtype is None:
             dtype = [np.float64, np.float32]

From 9e108a467ff442299ac0ba5ff5283c7fb47e6280 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 11:25:10 +0200
Subject: [PATCH 05/34] Sample weight checks in svm

---
 sklearn/svm/base.py         | 10 ++++------
 sklearn/utils/validation.py |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 4a50ee479f030..2dbfad97eb774 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -8,7 +8,7 @@
 from ..base import BaseEstimator, ClassifierMixin
 from ..preprocessing import LabelEncoder
 from ..utils.multiclass import _ovr_decision_function
-from ..utils import check_array, check_consistent_length, check_random_state
+from ..utils import check_array, check_random_state, _check_sample_weight
 from ..utils import column_or_1d, check_X_y
 from ..utils import compute_class_weight
 from ..utils.extmath import safe_sparse_dot
@@ -906,11 +906,9 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
     # LibLinear wants targets as doubles, even for classification
     y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
     y_ind = np.require(y_ind, requirements="W")
-    if sample_weight is None:
-        sample_weight = np.ones(X.shape[0])
-    else:
-        sample_weight = np.array(sample_weight, dtype=np.float64, order='C')
-        check_consistent_length(sample_weight, X)
+
+    sample_weight = _check_sample_weight(sample_weight, n_samples=X.shape[0],
+                                         dtype=np.float64, order='C')
 
     solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
     raw_coef_, n_iter_ = liblinear.train_wrap(
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 49f439c18962d..5d91655ae307b 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1007,7 +1007,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
 
     Parameters
     ----------
-    sample_weight : ndarray
+    sample_weight : ndarray, shape=(n_samples,)
        validated sample weights
     """
     if n_samples is not None and y is not None:

From 4ff292d0fc9f9481e1e9106672baf44f986d442c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 11:39:42 +0200
Subject: [PATCH 06/34] Tests

---
 sklearn/svm/base.py                    |  3 ++-
 sklearn/utils/tests/test_validation.py | 29 +++++++++++++++++++++++++-
 sklearn/utils/validation.py            |  2 +-
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 2dbfad97eb774..4f3b57a35c144 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -8,11 +8,12 @@
 from ..base import BaseEstimator, ClassifierMixin
 from ..preprocessing import LabelEncoder
 from ..utils.multiclass import _ovr_decision_function
-from ..utils import check_array, check_random_state, _check_sample_weight
+from ..utils import check_array, check_random_state
 from ..utils import column_or_1d, check_X_y
 from ..utils import compute_class_weight
 from ..utils.extmath import safe_sparse_dot
 from ..utils.validation import check_is_fitted, _check_large_sparse
+from ..utils.validation import _check_sample_weight
 from ..utils.multiclass import check_classification_targets
 from ..exceptions import ConvergenceWarning
 from ..exceptions import NotFittedError
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 0aa8eae22b1e2..c614019c548f3 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -20,6 +20,7 @@
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_allclose_dense_sparse
+from sklearn.utils.testing import assert_allclose
 from sklearn.utils import as_float_array, check_array, check_symmetric
 from sklearn.utils import check_X_y
 from sklearn.utils import deprecated
@@ -39,7 +40,8 @@
     check_memory,
     check_non_negative,
     _num_samples,
-    check_scalar)
+    check_scalar,
+    _check_sample_weight)
 import sklearn
 
 from sklearn.exceptions import NotFittedError
@@ -853,3 +855,28 @@ def test_check_scalar_invalid(x, target_name, target_type, min_val, max_val,
                      min_val=min_val, max_val=max_val)
     assert str(raised_error.value) == str(err_msg)
     assert type(raised_error.value) == type(err_msg)
+
+
+def test_check_sample_weight():
+    with pytest.raises(ValueError,
+                       match="Only one of y, n_samples must be provided"):
+        _check_sample_weight(np.ones(3), y=np.ones(3), n_samples=3)
+
+    # check order="C" parameter
+    sample_weight = np.ones(10)[::2]
+    assert not sample_weight.flags["C_CONTIGUOUS"]
+    sample_weight = _check_sample_weight(sample_weight, n_samples=5, order="C")
+    assert sample_weight.flags["C_CONTIGUOUS"]
+
+    # check None input
+    sample_weight = _check_sample_weight(None, n_samples=5)
+    assert_allclose(sample_weight, np.ones(5))
+
+    # check numbers input
+    sample_weight = _check_sample_weight(2.0, n_samples=5)
+    assert_allclose(sample_weight, np.ones(5))
+
+    # check wrong number of dimensions
+    with pytest.raises(ValueError,
+                       match="Sample weights must be 1D array or scalar"):
+        _check_sample_weight(np.ones((2, 4)), n_samples=5)
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 5d91655ae307b..f70ef5db5b8cb 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1024,7 +1024,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
             dtype = [np.float64, np.float32]
         sample_weight = check_array(
                 sample_weight, accept_sparse=False,
-                ensure_2d=False, dtype=dtype, **kwargs
+                ensure_2d=False, dtype=dtype, order=order, **kwargs
         )
         if sample_weight.ndim != 1:
             raise ValueError("Sample weights must be 1D array or scalar")

From bb64a9b18e1f4b85b10eb5ee2d661210bdc0065d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 11:51:57 +0200
Subject: [PATCH 07/34] Minor fix

---
 sklearn/linear_model/sag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index eccc4ab99462e..7bd2cfba24703 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -253,7 +253,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
 
     # initialization
     sample_weight = _check_sample_weight(sample_weight, n_samples=n_samples,
-                                         order='C')
+                                         dtype=X.dtype, order='C')
 
     if 'coef' in warm_start_mem.keys():
         coef_init = warm_start_mem['coef']

From 29e4ff620c794f0ff7c253535c903ccc18f95995 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 12:57:33 +0200
Subject: [PATCH 08/34] Better handle numeric sample_weight

---
 sklearn/utils/tests/test_validation.py | 2 +-
 sklearn/utils/validation.py            | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index c614019c548f3..1961b995f8f31 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -874,7 +874,7 @@ def test_check_sample_weight():
 
     # check numbers input
     sample_weight = _check_sample_weight(2.0, n_samples=5)
-    assert_allclose(sample_weight, np.ones(5))
+    assert_allclose(sample_weight, 2*np.ones(5))
 
     # check wrong number of dimensions
     with pytest.raises(ValueError,
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index f70ef5db5b8cb..308611445805a 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1018,7 +1018,11 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
         if order is None:
             order = 'C'
-        sample_weight = np.ones(n_samples, dtype=dtype, order=order)
+        if sample_weight is None:
+            sample_weight = np.ones(n_samples, dtype=dtype, order=order)
+        else:
+            sample_weight = np.full(n_samples, sample_weight,
+                                    dtype=dtype, order=order)
     else:
         if dtype is None:
             dtype = [np.float64, np.float32]

From 4d7bb1550bba297c3f4e919d47777622b3368464 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:24:13 +0200
Subject: [PATCH 09/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 308611445805a..53c80e6d5346c 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -988,7 +988,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
 
     Parameters
     ----------
-    sample_weight : {ndarray, Number or None}
+    sample_weight : {ndarray, Number or None}, shape (n_samples,)
        input sample weights
     y: ndarray or None
        target variable. Either y or n_samples must be provided.

From 1c0f6a764d7c1ca7b441a398b36fd5c9ebf53194 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:24:26 +0200
Subject: [PATCH 10/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 53c80e6d5346c..755f69186a5a1 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -989,7 +989,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     Parameters
     ----------
     sample_weight : {ndarray, Number or None}, shape (n_samples,)
-       input sample weights
+       Input sample weights.
     y: ndarray or None
        target variable. Either y or n_samples must be provided.
     n_samples: int or None

From 59abc05176b9aaea20f7c493e9362d2841b56462 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:24:38 +0200
Subject: [PATCH 11/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 755f69186a5a1..bf2772b7054af 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -990,7 +990,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     ----------
     sample_weight : {ndarray, Number or None}, shape (n_samples,)
        Input sample weights.
-    y: ndarray or None
+    y : ndarray or None, shape (n_samples,)
        target variable. Either y or n_samples must be provided.
     n_samples: int or None
        expected number of samples. Either y or n_samples must be provided.

From 84b0ac0bf8e1433d221ce2a79c572c52ff3d4be3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:24:54 +0200
Subject: [PATCH 12/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index bf2772b7054af..f690fb9bf3eed 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -991,7 +991,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     sample_weight : {ndarray, Number or None}, shape (n_samples,)
        Input sample weights.
     y : ndarray or None, shape (n_samples,)
-       target variable. Either y or n_samples must be provided.
+       Target variable. Either `y` or `n_samples` must be provided.
     n_samples: int or None
        expected number of samples. Either y or n_samples must be provided.
     dtype: dtype

From 4ea0694f852c5e07094b7e760b0d5804bc129e5b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:25:05 +0200
Subject: [PATCH 13/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index f690fb9bf3eed..2514f7e40cea0 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -995,7 +995,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     n_samples: int or None
        expected number of samples. Either y or n_samples must be provided.
     dtype: dtype
-       dtype of the validated sample_weight
+       dtype of the validated `sample_weight`.
     order : 'F', 'C' or None (default=None)
         Whether an array will be forced to be fortran or c-style.
         When order is None (default), if ``sample_weights`` is an ndarray,

From cfc7a97c836d9d8a166a7210c224dd7deb526303 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:25:17 +0200
Subject: [PATCH 14/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 2514f7e40cea0..b0885b7cfd0cd 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -992,7 +992,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
        Input sample weights.
     y : ndarray or None, shape (n_samples,)
        Target variable. Either `y` or `n_samples` must be provided.
-    n_samples: int or None
+    n_samples : int or None
        expected number of samples. Either y or n_samples must be provided.
     dtype: dtype
        dtype of the validated `sample_weight`.

From 908bbfc073bf641f4adb5078fd46a02b9e6c721b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:25:26 +0200
Subject: [PATCH 15/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index b0885b7cfd0cd..f55e63f5a5ac2 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1003,7 +1003,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
         otherwise it will be of 'C' order by default.
 
     kwargs:
-       additional parameters to pass to check_array
+       Additional parameters to pass to `check_array`
 
     Parameters
     ----------

From c6280b679fc86ea0d1a0512e4117740b4d775dbd Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:25:37 +0200
Subject: [PATCH 16/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index f55e63f5a5ac2..2ce2e7aeb59d7 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1007,7 +1007,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
 
     Parameters
     ----------
-    sample_weight : ndarray, shape=(n_samples,)
+    sample_weight : ndarray, shape (n_samples,)
        validated sample weights
     """
     if n_samples is not None and y is not None:

From 2b84f90ef6fe6e55003257fece7384d36506bb54 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:26:02 +0200
Subject: [PATCH 17/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 2ce2e7aeb59d7..9b53c3b48b86b 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1002,7 +1002,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
         nothing is ensured about the memory layout of the output array,
         otherwise it will be of 'C' order by default.
 
-    kwargs:
+    kwargs :
        Additional parameters to pass to `check_array`
 
     Parameters

From d81fec19c07daf856c20ec90a9e277a3d18ed638 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 12 Jul 2019 13:26:12 +0200
Subject: [PATCH 18/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 9b53c3b48b86b..a70d58be41eaa 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1008,7 +1008,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     Parameters
     ----------
     sample_weight : ndarray, shape (n_samples,)
-       validated sample weights
+       Validated sample weights.
     """
     if n_samples is not None and y is not None:
         raise ValueError('Only one of y, n_samples must be provided!')

From b2b1773b24df5368e03ba7134ae60dac21987adb Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 16:49:46 +0200
Subject: [PATCH 19/34] Remove kwargs

---
 sklearn/utils/validation.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index a70d58be41eaa..3135f7d2c4efc 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -983,7 +983,7 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
 
 
 def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
-                         order=None, **kwargs):
+                         order=None):
     """Validate sample weights
 
     Parameters
@@ -1002,9 +1002,6 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
         nothing is ensured about the memory layout of the output array,
         otherwise it will be of 'C' order by default.
 
-    kwargs :
-       Additional parameters to pass to `check_array`
-
     Parameters
     ----------
     sample_weight : ndarray, shape (n_samples,)
@@ -1028,7 +1025,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
             dtype = [np.float64, np.float32]
         sample_weight = check_array(
                 sample_weight, accept_sparse=False,
-                ensure_2d=False, dtype=dtype, order=order, **kwargs
+                ensure_2d=False, dtype=dtype, order=order
         )
         if sample_weight.ndim != 1:
             raise ValueError("Sample weights must be 1D array or scalar")

From 22f9275bb6ca21c4829dad9bfee4535b8dd5491d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 17:43:53 +0200
Subject: [PATCH 20/34] Use _check_sample_weight(sample_weight, X)

---
 sklearn/linear_model/huber.py               |  2 +-
 sklearn/linear_model/logistic.py            |  4 ++--
 sklearn/linear_model/ransac.py              |  2 +-
 sklearn/linear_model/ridge.py               |  2 +-
 sklearn/linear_model/sag.py                 |  2 +-
 sklearn/linear_model/stochastic_gradient.py |  4 ++--
 sklearn/svm/base.py                         |  2 +-
 sklearn/utils/tests/test_validation.py      | 13 +++++--------
 sklearn/utils/validation.py                 | 13 ++++---------
 9 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index 3225487df7dab..15d9415de0031 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -254,7 +254,7 @@ def fit(self, X, y, sample_weight=None):
             X, y, copy=False, accept_sparse=['csr'], y_numeric=True,
             dtype=[np.float64, np.float32])
 
-        sample_weight = _check_sample_weight(sample_weight, y)
+        sample_weight = _check_sample_weight(sample_weight, X)
 
         if self.epsilon < 1.0:
             raise ValueError(
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 56697206ace82..8dffe84d5bb02 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -826,7 +826,7 @@ def _logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     # If sample weights exist, convert them to array (support for lists)
     # and check length
     # Otherwise set them to 1 for all examples
-    sample_weight = _check_sample_weight(sample_weight, n_samples=X.shape[0],
+    sample_weight = _check_sample_weight(sample_weight, X,
                                          dtype=X.dtype, order='C')
 
     # If class_weights is a dict (provided by the user), the weights
@@ -1132,7 +1132,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
     y_test = y[test]
 
     if sample_weight is not None:
-        sample_weight = _check_sample_weight(sample_weight, y)
+        sample_weight = _check_sample_weight(sample_weight, X)
         sample_weight = sample_weight[train]
 
     coefs, Cs, n_iter = _logistic_regression_path(
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index b4f9cc03d9e4a..b901e848f49bf 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -324,7 +324,7 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("%s does not support sample_weight. Samples"
                              " weights are only used for the calibration"
                              " itself." % estimator_name)
-        sample_weight = _check_sample_weight(sample_weight, y)
+        sample_weight = _check_sample_weight(sample_weight, X)
 
         n_inliers_best = 1
         score_best = -np.inf
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 9db7bd41731a4..e111d2061514b 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -1406,7 +1406,7 @@ def fit(self, X, y, sample_weight=None):
                 "alphas must be positive. Got {} containing some "
                 "negative or null value instead.".format(self.alphas))
 
-        sample_weight = _check_sample_weight(sample_weight, y, dtype=X.dtype)
+        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
 
         n_samples, n_features = X.shape
 
diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index 7bd2cfba24703..0a092447a49b1 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -252,7 +252,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
     n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1
 
     # initialization
-    sample_weight = _check_sample_weight(sample_weight, n_samples=n_samples,
+    sample_weight = _check_sample_weight(sample_weight, X,
                                          dtype=X.dtype, order='C')
 
     if 'coef' in warm_start_mem.keys():
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index b47adcfac2e87..3b892ecec4e35 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -475,7 +475,7 @@ def _partial_fit(self, X, y, alpha, C,
         # Allocate datastructures from input arguments
         self._expanded_class_weight = compute_class_weight(self.class_weight,
                                                            self.classes_, y)
-        sample_weight = _check_sample_weight(sample_weight, y, order="C")
+        sample_weight = _check_sample_weight(sample_weight, X, order="C")
 
         if getattr(self, "coef_", None) is None or coef_init is not None:
             self._allocate_parameter_mem(n_classes, n_features,
@@ -1082,7 +1082,7 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
 
         n_samples, n_features = X.shape
 
-        sample_weight = _check_sample_weight(sample_weight, y, order="C")
+        sample_weight = _check_sample_weight(sample_weight, X, order="C")
 
         # Allocate datastructures from input arguments
         if getattr(self, "coef_", None) is None:
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 4f3b57a35c144..f2ab0baaf3a70 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -908,7 +908,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
     y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
     y_ind = np.require(y_ind, requirements="W")
 
-    sample_weight = _check_sample_weight(sample_weight, n_samples=X.shape[0],
+    sample_weight = _check_sample_weight(sample_weight, X,
                                          dtype=np.float64, order='C')
 
     solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 1961b995f8f31..ecd25ffd63386 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -858,25 +858,22 @@ def test_check_scalar_invalid(x, target_name, target_type, min_val, max_val,
 
 
 def test_check_sample_weight():
-    with pytest.raises(ValueError,
-                       match="Only one of y, n_samples must be provided"):
-        _check_sample_weight(np.ones(3), y=np.ones(3), n_samples=3)
-
     # check order="C" parameter
     sample_weight = np.ones(10)[::2]
     assert not sample_weight.flags["C_CONTIGUOUS"]
-    sample_weight = _check_sample_weight(sample_weight, n_samples=5, order="C")
+    sample_weight = _check_sample_weight(sample_weight, X=np.ones((5, 1)),
+                                         order="C")
     assert sample_weight.flags["C_CONTIGUOUS"]
 
     # check None input
-    sample_weight = _check_sample_weight(None, n_samples=5)
+    sample_weight = _check_sample_weight(None, X=np.ones((5, 2)))
     assert_allclose(sample_weight, np.ones(5))
 
     # check numbers input
-    sample_weight = _check_sample_weight(2.0, n_samples=5)
+    sample_weight = _check_sample_weight(2.0, X=np.ones((5, 2)))
     assert_allclose(sample_weight, 2*np.ones(5))
 
     # check wrong number of dimensions
     with pytest.raises(ValueError,
                        match="Sample weights must be 1D array or scalar"):
-        _check_sample_weight(np.ones((2, 4)), n_samples=5)
+        _check_sample_weight(np.ones((2, 4)), X=np.ones((2, 2)))
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 3135f7d2c4efc..c3cc1f6cb054f 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -982,7 +982,7 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
         raise ValueError('`{}`= {}, must be <= {}.'.format(name, x, max_val))
 
 
-def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
+def _check_sample_weight(sample_weight, X, dtype=None,
                          order=None):
     """Validate sample weights
 
@@ -990,10 +990,8 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     ----------
     sample_weight : {ndarray, Number or None}, shape (n_samples,)
        Input sample weights.
-    y : ndarray or None, shape (n_samples,)
-       Target variable. Either `y` or `n_samples` must be provided.
-    n_samples : int or None
-       expected number of samples. Either y or n_samples must be provided.
+    X : nd-array, list or sparse matrix
+        Input data.
     dtype: dtype
        dtype of the validated `sample_weight`.
     order : 'F', 'C' or None (default=None)
@@ -1007,10 +1005,7 @@ def _check_sample_weight(sample_weight, y=None, n_samples=None, dtype=None,
     sample_weight : ndarray, shape (n_samples,)
        Validated sample weights.
     """
-    if n_samples is not None and y is not None:
-        raise ValueError('Only one of y, n_samples must be provided!')
-    elif y is not None:
-        n_samples = y.shape[0]
+    n_samples = _num_samples(X)
 
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
         if order is None:

From c28226a97c0d84f6b563a482d3319037f39235d3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 17:56:16 +0200
Subject: [PATCH 21/34] Update kmeans to use _check_sample_weight

---
 sklearn/cluster/k_means_.py           | 34 +++++++++++++--------------
 sklearn/cluster/tests/test_k_means.py | 11 +++++----
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 7520b6b6b6bd5..365c5a9a886f0 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -27,7 +27,7 @@
 from ..utils import check_array
 from ..utils import gen_batches
 from ..utils import check_random_state
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..utils.validation import FLOAT_DTYPES
 from ..exceptions import ConvergenceWarning
 from . import _k_means
@@ -164,19 +164,19 @@ def _tolerance(X, tol):
     return np.mean(variances) * tol
 
 
-def _check_sample_weight(X, sample_weight):
+def _check_normalize_sample_weight(sample_weight, X):
     """Set sample_weight if None, and check for correct dtype"""
-    n_samples = X.shape[0]
-    if sample_weight is None:
-        return np.ones(n_samples, dtype=X.dtype)
-    else:
-        sample_weight = np.asarray(sample_weight)
-        if n_samples != len(sample_weight):
-            raise ValueError("n_samples=%d should be == len(sample_weight)=%d"
-                             % (n_samples, len(sample_weight)))
+
+    sample_weight_was_none = sample_weight is None
+
+    sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
+    if not sample_weight_was_none:
         # normalize the weights to sum up to n_samples
+        # an array of 1 (i.e. samples_weight is None) is already normalized
+        n_samples = len(sample_weight)
         scale = n_samples / sample_weight.sum()
-        return (sample_weight * scale).astype(X.dtype, copy=False)
+        sample_weight *= scale
+    return sample_weight
 
 
 def k_means(X, n_clusters, sample_weight=None, init='k-means++',
@@ -434,7 +434,7 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
     if verbose:
         print('Initialization complete')
 
-    checked_sample_weight = _check_sample_weight(X, sample_weight)
+    checked_sample_weight = _check_normalize_sample_weight(sample_weight, X)
     centers, labels, n_iter = k_means_elkan(X, checked_sample_weight,
                                             n_clusters, centers, tol=tol,
                                             max_iter=max_iter, verbose=verbose)
@@ -519,7 +519,7 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
     """
     random_state = check_random_state(random_state)
 
-    sample_weight = _check_sample_weight(X, sample_weight)
+    sample_weight = _check_normalize_sample_weight(sample_weight, X)
 
     best_labels, best_inertia, best_centers = None, None, None
     # init
@@ -662,7 +662,7 @@ def _labels_inertia(X, sample_weight, x_squared_norms, centers,
         Sum of squared distances of samples to their closest cluster center.
     """
     n_samples = X.shape[0]
-    sample_weight = _check_sample_weight(X, sample_weight)
+    sample_weight = _check_normalize_sample_weight(sample_weight, X)
     # set the default value of centers to -1 to be able to detect any anomaly
     # easily
     labels = np.full(n_samples, -1, np.int32)
@@ -1492,7 +1492,7 @@ def fit(self, X, y=None, sample_weight=None):
             raise ValueError("n_samples=%d should be >= n_clusters=%d"
                              % (n_samples, self.n_clusters))
 
-        sample_weight = _check_sample_weight(X, sample_weight)
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
 
         n_init = self.n_init
         if hasattr(self.init, '__array__'):
@@ -1641,7 +1641,7 @@ def _labels_inertia_minibatch(self, X, sample_weight):
         """
         if self.verbose:
             print('Computing label assignment and total inertia')
-        sample_weight = _check_sample_weight(X, sample_weight)
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
         x_squared_norms = row_norms(X, squared=True)
         slices = gen_batches(X.shape[0], self.batch_size)
         results = [_labels_inertia(X[s], sample_weight[s], x_squared_norms[s],
@@ -1675,7 +1675,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
         if n_samples == 0:
             return self
 
-        sample_weight = _check_sample_weight(X, sample_weight)
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
 
         x_squared_norms = row_norms(X, squared=True)
         self.random_state_ = getattr(self, "random_state_",
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 4fca8f621e141..5b8f086a89948 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -909,14 +909,15 @@ def test_sample_weight_length():
     # check that an error is raised when passing sample weights
     # with an incompatible shape
     km = KMeans(n_clusters=n_clusters, random_state=42)
-    assert_raises_regex(ValueError, r'len\(sample_weight\)', km.fit, X,
-                        sample_weight=np.ones(2))
+    msg = r'samples_weight.shape == \(2,\), expected \(100,\)'
+    with pytest.raises(ValueError, match=msg):
+        km.fit(X, sample_weight=np.ones(2))
 
 
-def test_check_sample_weight():
-    from sklearn.cluster.k_means_ import _check_sample_weight
+def test_check_normalize_sample_weight():
+    from sklearn.cluster.k_means_ import _check_normalize_sample_weight
     sample_weight = None
-    checked_sample_weight = _check_sample_weight(X, sample_weight)
+    checked_sample_weight = _check_normalize_sample_weight(sample_weight, X)
     assert _num_samples(X) == _num_samples(checked_sample_weight)
     assert_almost_equal(checked_sample_weight.sum(), _num_samples(X))
     assert X.dtype == checked_sample_weight.dtype

From ed2dc698ba862bfb2420fc055f555155f2a0b4e9 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 12 Jul 2019 18:43:24 +0200
Subject: [PATCH 22/34] Non float dtype should not be supported

---
 sklearn/utils/tests/test_validation.py | 5 +++++
 sklearn/utils/validation.py            | 6 +++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index ecd25ffd63386..fa34b1e00667f 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -877,3 +877,8 @@ def test_check_sample_weight():
     with pytest.raises(ValueError,
                        match="Sample weights must be 1D array or scalar"):
         _check_sample_weight(np.ones((2, 4)), X=np.ones((2, 2)))
+
+    # int dtype will be converted to float64 instead
+    X = np.ones((5, 2), dtype=np.int)
+    sample_weight = _check_sample_weight(None, X, dtype=X.dtype)
+    assert sample_weight.dtype == np.float64
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index c3cc1f6cb054f..8b3ae72ddd711 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -993,7 +993,8 @@ def _check_sample_weight(sample_weight, X, dtype=None,
     X : nd-array, list or sparse matrix
         Input data.
     dtype: dtype
-       dtype of the validated `sample_weight`.
+       dtype of the validated `sample_weight`. Note that if `dtype` is not
+       one of `float32`, `float64`, the output will be of dtype `float64`.
     order : 'F', 'C' or None (default=None)
         Whether an array will be forced to be fortran or c-style.
         When order is None (default), if ``sample_weights`` is an ndarray,
@@ -1007,6 +1008,9 @@ def _check_sample_weight(sample_weight, X, dtype=None,
     """
     n_samples = _num_samples(X)
 
+    if dtype is not None and dtype not in [np.float32, np.float64]:
+        dtype = np.float64
+
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
         if order is None:
             order = 'C'

From 380d9eb95efadb19af411fa7e59bb6f12bda0a96 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 15 Jul 2019 13:00:42 +0200
Subject: [PATCH 23/34] Drop order parameter + address review comments

---
 sklearn/linear_model/stochastic_gradient.py |  4 ++--
 sklearn/svm/base.py                         |  2 +-
 sklearn/utils/tests/test_validation.py      |  3 +--
 sklearn/utils/validation.py                 | 24 ++++++++-------------
 4 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 3b892ecec4e35..25b63a3a0cdce 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -475,7 +475,7 @@ def _partial_fit(self, X, y, alpha, C,
         # Allocate datastructures from input arguments
         self._expanded_class_weight = compute_class_weight(self.class_weight,
                                                            self.classes_, y)
-        sample_weight = _check_sample_weight(sample_weight, X, order="C")
+        sample_weight = _check_sample_weight(sample_weight, X)
 
         if getattr(self, "coef_", None) is None or coef_init is not None:
             self._allocate_parameter_mem(n_classes, n_features,
@@ -1082,7 +1082,7 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
 
         n_samples, n_features = X.shape
 
-        sample_weight = _check_sample_weight(sample_weight, X, order="C")
+        sample_weight = _check_sample_weight(sample_weight, X)
 
         # Allocate datastructures from input arguments
         if getattr(self, "coef_", None) is None:
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index f2ab0baaf3a70..e27abeed7ecee 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -909,7 +909,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
     y_ind = np.require(y_ind, requirements="W")
 
     sample_weight = _check_sample_weight(sample_weight, X,
-                                         dtype=np.float64, order='C')
+                                         dtype=np.float64)
 
     solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
     raw_coef_, n_iter_ = liblinear.train_wrap(
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index fa34b1e00667f..f30e1a1b2ada7 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -861,8 +861,7 @@ def test_check_sample_weight():
     # check order="C" parameter
     sample_weight = np.ones(10)[::2]
     assert not sample_weight.flags["C_CONTIGUOUS"]
-    sample_weight = _check_sample_weight(sample_weight, X=np.ones((5, 1)),
-                                         order="C")
+    sample_weight = _check_sample_weight(sample_weight, X=np.ones((5, 1)))
     assert sample_weight.flags["C_CONTIGUOUS"]
 
     # check None input
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 8b3ae72ddd711..931be8f4192d6 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -982,29 +982,25 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
         raise ValueError('`{}`= {}, must be <= {}.'.format(name, x, max_val))
 
 
-def _check_sample_weight(sample_weight, X, dtype=None,
-                         order=None):
+def _check_sample_weight(sample_weight, X, dtype=None):
     """Validate sample weights
 
     Parameters
     ----------
     sample_weight : {ndarray, Number or None}, shape (n_samples,)
        Input sample weights.
+
     X : nd-array, list or sparse matrix
         Input data.
+
     dtype: dtype
        dtype of the validated `sample_weight`. Note that if `dtype` is not
        one of `float32`, `float64`, the output will be of dtype `float64`.
-    order : 'F', 'C' or None (default=None)
-        Whether an array will be forced to be fortran or c-style.
-        When order is None (default), if ``sample_weights`` is an ndarray,
-        nothing is ensured about the memory layout of the output array,
-        otherwise it will be of 'C' order by default.
 
-    Parameters
-    ----------
+    Returns
+    -------
     sample_weight : ndarray, shape (n_samples,)
-       Validated sample weights.
+       Validated sample weights. They are guaranteed to be "C" contiguous.
     """
     n_samples = _num_samples(X)
 
@@ -1012,19 +1008,17 @@ def _check_sample_weight(sample_weight, X, dtype=None,
         dtype = np.float64
 
     if sample_weight is None or isinstance(sample_weight, numbers.Number):
-        if order is None:
-            order = 'C'
         if sample_weight is None:
-            sample_weight = np.ones(n_samples, dtype=dtype, order=order)
+            sample_weight = np.ones(n_samples, dtype=dtype)
         else:
             sample_weight = np.full(n_samples, sample_weight,
-                                    dtype=dtype, order=order)
+                                    dtype=dtype)
     else:
         if dtype is None:
             dtype = [np.float64, np.float32]
         sample_weight = check_array(
                 sample_weight, accept_sparse=False,
-                ensure_2d=False, dtype=dtype, order=order
+                ensure_2d=False, dtype=dtype, order="C"
         )
         if sample_weight.ndim != 1:
             raise ValueError("Sample weights must be 1D array or scalar")

From 3fa5f73c1a5540ff85bd55450744cd6b67f0fb56 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 15 Jul 2019 13:02:45 +0200
Subject: [PATCH 24/34] Fix typo

---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 931be8f4192d6..8a4462e371a33 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1000,7 +1000,7 @@ def _check_sample_weight(sample_weight, X, dtype=None):
     Returns
     -------
     sample_weight : ndarray, shape (n_samples,)
-       Validated sample weights. They are guaranteed to be "C" contiguous.
+       Validated sample weight. It is guaranteed to be "C" contiguous.
     """
     n_samples = _num_samples(X)
 

From 08e204fa9ece0155919bee9a00713c17a642020a Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 15 Jul 2019 14:07:40 +0200
Subject: [PATCH 25/34] Fix tests

---
 sklearn/linear_model/logistic.py       | 2 +-
 sklearn/linear_model/sag.py            | 3 +--
 sklearn/utils/tests/test_validation.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index c087fb7038c0c..10a4d32e51275 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -827,7 +827,7 @@ def _logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     # and check length
     # Otherwise set them to 1 for all examples
     sample_weight = _check_sample_weight(sample_weight, X,
-                                         dtype=X.dtype, order='C')
+                                         dtype=X.dtype)
 
     # If class_weights is a dict (provided by the user), the weights
     # are assigned to the original labels. If it is "balanced", then
diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index 0a092447a49b1..fa02c7a4a0ef8 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -252,8 +252,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
     n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1
 
     # initialization
-    sample_weight = _check_sample_weight(sample_weight, X,
-                                         dtype=X.dtype, order='C')
+    sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
 
     if 'coef' in warm_start_mem.keys():
         coef_init = warm_start_mem['coef']
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index f30e1a1b2ada7..2ef58aa8df4a6 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -858,7 +858,7 @@ def test_check_scalar_invalid(x, target_name, target_type, min_val, max_val,
 
 
 def test_check_sample_weight():
-    # check order="C" parameter
+    # check array order
     sample_weight = np.ones(10)[::2]
     assert not sample_weight.flags["C_CONTIGUOUS"]
     sample_weight = _check_sample_weight(sample_weight, X=np.ones((5, 1)))

From 44d99c1c06d6bddf9e4dcc0c9845fba184ffbd87 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 15 Jul 2019 15:08:26 +0200
Subject: [PATCH 26/34] Address review

---
 sklearn/linear_model/ridge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index e111d2061514b..4600b56f8d89d 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -429,7 +429,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
                          " %d != %d" % (n_samples, n_samples_))
 
     if has_sw:
-        sample_weight = _check_sample_weight(sample_weight, y)
+        sample_weight = _check_sample_weight(sample_weight, X)
 
         if solver not in ['sag', 'saga']:
             # SAG supports sample_weight directly. For other solvers,

From 3fc9d1aaf9b715c48600e465147a5b76b3510265 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 10:43:13 +0200
Subject: [PATCH 27/34] Attempt to fix 32bit / 64bit comparison test

---
 sklearn/linear_model/ridge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 4600b56f8d89d..cc3b6a518add5 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -429,7 +429,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
                          " %d != %d" % (n_samples, n_samples_))
 
     if has_sw:
-        sample_weight = _check_sample_weight(sample_weight, X)
+        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
 
         if solver not in ['sag', 'saga']:
             # SAG supports sample_weight directly. For other solvers,

From 22e107036d793b3d8c8be06b8fe598ed91865ebf Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 10:48:50 +0200
Subject: [PATCH 28/34] Check that float32 dtype is preserved

---
 sklearn/utils/tests/test_validation.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 2ef58aa8df4a6..e932e5cdaf7f7 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -877,6 +877,12 @@ def test_check_sample_weight():
                        match="Sample weights must be 1D array or scalar"):
         _check_sample_weight(np.ones((2, 4)), X=np.ones((2, 2)))
 
+    # float32 dtype is preserved
+    X = np.ones((5, 2))
+    sample_weight = np.ones(5, dtype=np.float32)
+    sample_weight = _check_sample_weight(sample_weight, X)
+    assert sample_weight.dtype == np.float32
+
     # int dtype will be converted to float64 instead
     X = np.ones((5, 2), dtype=np.int)
     sample_weight = _check_sample_weight(None, X, dtype=X.dtype)

From 561bb6ae6905d22cfa56980f4361de17d178185a Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 18 Jul 2019 16:27:22 +0200
Subject: [PATCH 29/34] Update sklearn/utils/validation.py

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 8a4462e371a33..4aefcf82ec247 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -983,7 +983,7 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
 
 
 def _check_sample_weight(sample_weight, X, dtype=None):
-    """Validate sample weights
+    """Validate sample weights. 
 
     Parameters
     ----------

From 71ecf65ec85638f7819a64d894a01acaf69867cc Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 18 Jul 2019 16:29:46 +0200
Subject: [PATCH 30/34] Lint

---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 4aefcf82ec247..d130a6083c143 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -983,7 +983,7 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
 
 
 def _check_sample_weight(sample_weight, X, dtype=None):
-    """Validate sample weights. 
+    """Validate sample weights.
 
     Parameters
     ----------

From e244ad5a75349cc64fa8f2f5f74771f4a97a17b0 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 19 Jul 2019 10:14:53 +0200
Subject: [PATCH 31/34] Update sklearn/utils/validation.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 sklearn/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index d130a6083c143..3a79754513237 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1024,6 +1024,6 @@ def _check_sample_weight(sample_weight, X, dtype=None):
             raise ValueError("Sample weights must be 1D array or scalar")
 
         if sample_weight.shape != (n_samples,):
-            raise ValueError("samples_weight.shape == {}, expected {}!"
+            raise ValueError("sample_weight.shape == {}, expected {}!"
                              .format(sample_weight.shape, (n_samples,)))
     return sample_weight

From 13f9decda73f91453d94699f41fa8ac44cb46513 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 19 Jul 2019 10:22:41 +0200
Subject: [PATCH 32/34] Improve docstring wording

---
 sklearn/utils/validation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 3a79754513237..abf51eef8f487 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -994,8 +994,11 @@ def _check_sample_weight(sample_weight, X, dtype=None):
         Input data.
 
     dtype: dtype
-       dtype of the validated `sample_weight`. Note that if `dtype` is not
-       one of `float32`, `float64`, the output will be of dtype `float64`.
+       dtype of the validated `sample_weight`.
+       If None, and the input `sample_weight` is an array, the dtype of the
+       input is preserved; otherwise an array with the default numpy dtype
+       is be allocated.  If `dtype` is not one of `float32`, `float64`,
+       `None`, the output will be of dtype `float64`.
 
     Returns
     -------

From 9cccaf609be562b3b7445d7ea9d2df282a22aed8 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 19 Jul 2019 10:42:36 +0200
Subject: [PATCH 33/34] Fix tests

---
 sklearn/cluster/tests/test_k_means.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 3043bfd238d92..362b0a9145fca 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -909,7 +909,7 @@ def test_sample_weight_length():
     # check that an error is raised when passing sample weights
     # with an incompatible shape
     km = KMeans(n_clusters=n_clusters, random_state=42)
-    msg = r'samples_weight.shape == \(2,\), expected \(100,\)'
+    msg = r'sample_weight.shape == \(2,\), expected \(100,\)'
     with pytest.raises(ValueError, match=msg):
         km.fit(X, sample_weight=np.ones(2))
 

From fb22cfc9dc0870a6269c247e6a97b992c03b4fc2 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 19 Jul 2019 16:56:59 +0200
Subject: [PATCH 34/34] Nicolas's comments

---
 sklearn/utils/tests/test_validation.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index e932e5cdaf7f7..2789a59344008 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -870,13 +870,18 @@ def test_check_sample_weight():
 
     # check numbers input
     sample_weight = _check_sample_weight(2.0, X=np.ones((5, 2)))
-    assert_allclose(sample_weight, 2*np.ones(5))
+    assert_allclose(sample_weight, 2 * np.ones(5))
 
     # check wrong number of dimensions
     with pytest.raises(ValueError,
                        match="Sample weights must be 1D array or scalar"):
         _check_sample_weight(np.ones((2, 4)), X=np.ones((2, 2)))
 
+    # check incorrect n_samples
+    msg = r"sample_weight.shape == \(4,\), expected \(2,\)!"
+    with pytest.raises(ValueError, match=msg):
+        _check_sample_weight(np.ones(4), X=np.ones((2, 2)))
+
     # float32 dtype is preserved
     X = np.ones((5, 2))
     sample_weight = np.ones(5, dtype=np.float32)