scikit-learn
diff --git a/‎sklearn/preprocessing/data.py
Lines changed: 14 additions & 19 deletions b/‎sklearn/preprocessing/data.py
Lines changed: 14 additions & 19 deletions
diff --git a/‎sklearn/preprocessing/tests/test_data.py
Lines changed: 5 additions & 4 deletions b/‎sklearn/preprocessing/tests/test_data.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎sklearn/utils/tests/test_validation.py
Lines changed: 32 additions & 9 deletions b/‎sklearn/utils/tests/test_validation.py
Lines changed: 32 additions & 9 deletions
diff --git a/‎sklearn/utils/validation.py
Lines changed: 29 additions & 15 deletions b/‎sklearn/utils/validation.py
Lines changed: 29 additions & 15 deletions
@@ -15,15 +15,12 @@
 from ..base import BaseEstimator, TransformerMixin
 from ..externals import six
 from ..utils import check_array
-from ..utils import warn_if_not_float
 from ..utils.extmath import row_norms
-from ..utils.fixes import (astype,
-                           bincount, isclose)
+from ..utils.fixes import combinations_with_replacement as combinations_w_r
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
 from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis)
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, FLOAT_DTYPES
 
 zip = six.moves.zip
 map = six.moves.map
@@ -114,8 +111,9 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
     scaling using the ``Transformer`` API (e.g. as part of a preprocessing
     :class:`sklearn.pipeline.Pipeline`)
     """
-    X = check_array(X, accept_sparse='csr', copy=copy, ensure_2d=False)
-    warn_if_not_float(X, estimator='The scale function')
+    X = check_array(X, accept_sparse='csr', copy=copy, ensure_2d=False,
+                    warn_on_dtype=True, estimator='the scale function',
+                    dtype=FLOAT_DTYPES)
     if sparse.issparse(X):
         if with_mean:
             raise ValueError(
@@ -223,8 +221,8 @@ def fit(self, X, y=None):
             The data used to compute the per-feature minimum and maximum
             used for later scaling along the features axis.
         """
-        X = check_array(X, copy=self.copy, ensure_2d=False)
-        warn_if_not_float(X, estimator=self)
+        X = check_array(X, copy=self.copy, ensure_2d=False, warn_on_dtype=True,
+                        estimator=self, dtype=FLOAT_DTYPES)
         feature_range = self.feature_range
         if feature_range[0] >= feature_range[1]:
             raise ValueError("Minimum of desired feature range must be smaller"
@@ -345,10 +343,8 @@ def fit(self, X, y=None):
             used for later scaling along the features axis.
         """
         X = check_array(X, accept_sparse='csr', copy=self.copy,
-                        ensure_2d=False, dtype=None)
-        if warn_if_not_float(X, estimator=self):
-            X = check_array(X, accept_sparse=True, copy=False,
-                            dtype=np.float)
+                        ensure_2d=False, warn_on_dtype=True,
+                        estimator=self, dtype=FLOAT_DTYPES)
         if sparse.issparse(X):
             if self.with_mean:
                 raise ValueError(
@@ -380,10 +376,9 @@ def transform(self, X, y=None, copy=None):
 
         copy = copy if copy is not None else self.copy
         X = check_array(X, accept_sparse='csr', copy=copy,
-                        ensure_2d=False, dtype=None)
-        if warn_if_not_float(X, estimator=self):
-            X = check_array(X, accept_sparse=True, copy=False,
-                            dtype=np.float)
+                        ensure_2d=False, warn_on_dtype=True,
+                        estimator=self, dtype=FLOAT_DTYPES)
+
         if sparse.issparse(X):
             if self.with_mean:
                 raise ValueError(
@@ -602,8 +597,8 @@ def normalize(X, norm='l2', axis=1, copy=True):
     else:
         raise ValueError("'%d' is not a supported axis" % axis)
 
-    X = check_array(X, sparse_format, copy=copy)
-    warn_if_not_float(X, 'The normalize function')
+    X = check_array(X, sparse_format, copy=copy, warn_on_dtype=True,
+                    estimator='the normalize function', dtype=FLOAT_DTYPES)
     if axis == 0:
         X = X.T
 
 
@@ -29,6 +29,7 @@
 from sklearn.preprocessing.data import MinMaxScaler
 from sklearn.preprocessing.data import add_dummy_feature
 from sklearn.preprocessing.data import PolynomialFeatures
+from sklearn.utils.validation import DataConversionWarning
 
 from sklearn import datasets
 
@@ -499,12 +500,12 @@ def test_warning_scaling_integers():
     X = np.array([[1, 2, 0],
                   [0, 0, 0]], dtype=np.uint8)
 
-    w = "assumes floating point values as input, got uint8"
+    w = "Data with input dtype uint8 was converted to float64"
 
     clean_warning_registry()
-    assert_warns_message(UserWarning, w, scale, X)
-    assert_warns_message(UserWarning, w, StandardScaler().fit, X)
-    assert_warns_message(UserWarning, w, MinMaxScaler().fit, X)
+    assert_warns_message(DataConversionWarning, w, scale, X)
+    assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X)
+    assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X)
 
 
 def test_normalizer_l1():
 
@@ -241,12 +241,16 @@ def test_check_array_dtype_stability():
 def test_check_array_dtype_warning():
     X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
     X_float64 = np.asarray(X_int_list, dtype=np.float64)
+    X_float32 = np.asarray(X_int_list, dtype=np.float32)
     X_int64 = np.asarray(X_int_list, dtype=np.int64)
     X_csr_float64 = sp.csr_matrix(X_float64)
+    X_csr_float32 = sp.csr_matrix(X_float32)
+    X_csc_float32 = sp.csc_matrix(X_float32)
     X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32)
     y = [0, 0, 1]
     integer_data = [X_int64, X_csc_int32]
     float64_data = [X_float64, X_csr_float64]
+    float32_data = [X_float32, X_csr_float32, X_csc_float32]
     for X in integer_data:
         X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
                                        accept_sparse=True)
@@ -260,19 +264,18 @@ def test_check_array_dtype_warning():
         # Check that the warning message includes the name of the Estimator
         X_checked = assert_warns_message(DataConversionWarning,
                                          'SomeEstimator',
-                                         check_array, X, dtype=np.float64,
+                                         check_array, X,
+                                         dtype=[np.float64, np.float32],
                                          accept_sparse=True,
                                          warn_on_dtype=True,
                                          estimator='SomeEstimator')
         assert_equal(X_checked.dtype, np.float64)
 
-        X_checked, y_checked = assert_warns_message(DataConversionWarning,
-                                                    'SomeEstimator',
-                                                    check_X_y, X, y,
-                                                    dtype=np.float64,
-                                                    accept_sparse=True,
-                                                    warn_on_dtype=True,
-                                                    estimator='SomeEstimator')
+        X_checked, y_checked = assert_warns_message(
+            DataConversionWarning, 'KNeighborsClassifier',
+            check_X_y, X, y, dtype=np.float64, accept_sparse=True,
+            warn_on_dtype=True, estimator=KNeighborsClassifier())
+
         assert_equal(X_checked.dtype, np.float64)
 
     for X in float64_data:
@@ -283,7 +286,27 @@ def test_check_array_dtype_warning():
                                        accept_sparse=True, warn_on_dtype=False)
         assert_equal(X_checked.dtype, np.float64)
 
-
+    for X in float32_data:
+        X_checked = assert_no_warnings(check_array, X,
+                                       dtype=[np.float64, np.float32],
+                                       accept_sparse=True)
+        assert_equal(X_checked.dtype, np.float32)
+        assert_true(X_checked is X)
+
+        X_checked = assert_no_warnings(check_array, X,
+                                       dtype=[np.float64, np.float32],
+                                       accept_sparse=['csr', 'dok'],
+                                       copy=True)
+        assert_equal(X_checked.dtype, np.float32)
+        assert_false(X_checked is X)
+
+    X_checked = assert_no_warnings(check_array, X_csc_float32,
+                                   dtype=[np.float64, np.float32],
+                                   accept_sparse=['csr', 'dok'],
+                                   copy=False)
+    assert_equal(X_checked.dtype, np.float32)
+    assert_false(X_checked is X_csc_float32)
+    assert_equal(X_checked.format, 'csr')
 
 
 def test_check_array_min_samples_and_features_messages():
 
@@ -16,6 +16,8 @@
 from .fixes import astype
 from inspect import getargspec
 
+FLOAT_DTYPES = (np.float64, np.float32, np.float16)
+
 
 class DataConversionWarning(UserWarning):
     """A warning on implicit data conversions happening in the code"""
@@ -233,26 +235,27 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
     spmatrix_converted : scipy sparse matrix.
         Matrix that is ensured to have an allowed type.
     """
-    if accept_sparse is None:
+    if accept_sparse in [None, False]:
         raise TypeError('A sparse matrix was passed, but dense '
                         'data is required. Use X.toarray() to '
                         'convert to a dense numpy array.')
-    sparse_type = spmatrix.format
     if dtype is None:
         dtype = spmatrix.dtype
-    if sparse_type in accept_sparse:
-        # correct type
-        if dtype == spmatrix.dtype:
-            # correct dtype
-            if copy:
-                spmatrix = spmatrix.copy()
-        else:
-            # convert dtype
-            spmatrix = spmatrix.astype(dtype)
-    else:
-        # create new
+
+    changed_format = False
+    if (isinstance(accept_sparse, (list, tuple))
+            and spmatrix.format not in accept_sparse):
+        # create new with correct sparse
         spmatrix = spmatrix.asformat(accept_sparse[0])
+        changed_format = True
+
+    if dtype != spmatrix.dtype:
+        # convert dtype
         spmatrix = spmatrix.astype(dtype)
+    elif copy and not changed_format:
+        # force copy
+        spmatrix = spmatrix.copy()
+
     if force_all_finite:
         if not hasattr(spmatrix, "data"):
             warnings.warn("Can't check %s sparse matrix for nan or inf."
@@ -283,9 +286,11 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
         If the input is sparse but not in the allowed format, it will be
         converted to the first listed format.
 
-    dtype : string, type or None (default="numeric")
+    dtype : string, type, list of types or None (default="numeric")
         Data type of result. If None, the dtype of the input is preserved.
         If "numeric", dtype is preserved unless array.dtype is object.
+        If dtype is a list of types, conversion on the first type is only
+        performed if the dtype of the input is not in the list.
 
     order : 'F', 'C' or None (default=None)
         Whether an array will be forced to be fortran or c-style.
@@ -344,6 +349,15 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
         else:
             dtype = None
 
+    if isinstance(dtype, (list, tuple)):
+        if dtype_orig is not None and dtype_orig in dtype:
+            # no dtype conversion required
+            dtype = None
+        else:
+            # dtype conversion required. Let's select the first element of the
+            # list of accepted types.
+            dtype = dtype[0]
+
     if sp.issparse(array):
         array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                       force_all_finite)
@@ -382,7 +396,7 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
         if estimator is not None:
             if not isinstance(estimator, six.string_types):
                 estimator = estimator.__class__.__name__
-            msg += "by %s" % estimator
+            msg += " by %s" % estimator
         warnings.warn(msg, DataConversionWarning)
     return array