scikit-learn · amueller · Oct 15, 2015 · Oct 14, 2015 · Oct 14, 2015 · Oct 14, 2015
diff --git a/.travis.yml b/.travis.yml
@@ -17,8 +17,8 @@ env:
     - DISTRIB="conda" PYTHON_VERSION="2.6" INSTALL_MKL="false"
       NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
     # This environment tests the newest supported anaconda env
-    - DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="true"
-      NUMPY_VERSION="1.8.1" SCIPY_VERSION="0.14.0"
+    - DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
+      NUMPY_VERSION="1.10.1" SCIPY_VERSION="0.16.0"
 install: source continuous_integration/install.sh
 script: bash continuous_integration/test_script.sh
 after_success:

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -180,7 +180,7 @@ the parameter vector.
 The implementation in the class :class:`Lasso` uses coordinate descent as
 the algorithm to fit the coefficients. See :ref:`least_angle_regression`
 for another implementation::
-    
+
     >>> from sklearn import linear_model
     >>> clf = linear_model.Lasso(alpha = 0.1)
     >>> clf.fit([[0, 0], [1, 1]], [0, 1])
@@ -1079,9 +1079,9 @@ of a given degree.  It can be used as follows::
            [4, 5]])
     >>> poly = PolynomialFeatures(degree=2)
     >>> poly.fit_transform(X)
-    array([[ 1,  0,  1,  0,  0,  1],
-           [ 1,  2,  3,  4,  6,  9],
-           [ 1,  4,  5, 16, 20, 25]])
+    array([[  1.,   0.,   1.,   0.,   0.,   1.],
+           [  1.,   2.,   3.,   4.,   6.,   9.],
+           [  1.,   4.,   5.,  16.,  20.,  25.]])
 
 The features of ``X`` have been transformed from :math:`[x_1, x_2]` to
 :math:`[1, x_1, x_2, x_1^2, x_1 x_2, x_2^2]`, and can now be used within
@@ -1125,10 +1125,10 @@ This way, we can solve the XOR problem with a linear classifier::
     >>> y = X[:, 0] ^ X[:, 1]
     >>> X = PolynomialFeatures(interaction_only=True).fit_transform(X)
     >>> X
-    array([[1, 0, 0, 0],
-           [1, 0, 1, 0],
-           [1, 1, 0, 0],
-           [1, 1, 1, 1]])
+    array([[ 1.,  0.,  0.,  0.],
+           [ 1.,  0.,  1.,  0.],
+           [ 1.,  1.,  0.,  0.],
+           [ 1.,  1.,  1.,  1.]])
     >>> clf = Perceptron(fit_intercept=False, n_iter=10, shuffle=False).fit(X, y)
     >>> clf.score(X, y)
     1.0

diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -484,9 +484,9 @@ Often it's useful to add complexity to the model by considering nonlinear featur
            [4, 5]])
     >>> poly = PolynomialFeatures(2)
     >>> poly.fit_transform(X)                             # doctest: +ELLIPSIS
-    array([[ 1,  0,  1,  0,  0,  1],
-           [ 1,  2,  3,  4,  6,  9],
-           [ 1,  4,  5, 16, 20, 25]])
+    array([[  1.,   0.,   1.,   0.,   0.,   1.],
+           [  1.,   2.,   3.,   4.,   6.,   9.],
+           [  1.,   4.,   5.,  16.,  20.,  25.]])
 
 The features of X have been transformed from :math:`(X_1, X_2)` to :math:`(1, X_1, X_2, X_1^2, X_1X_2, X_2^2)`.
 
@@ -499,9 +499,9 @@ In some cases, only interaction terms among features are required, and it can be
            [6, 7, 8]])
     >>> poly = PolynomialFeatures(degree=3, interaction_only=True)
     >>> poly.fit_transform(X)                             # doctest: +ELLIPSIS
-    array([[  1,   0,   1,   2,   0,   0,   2,   0],
-           [  1,   3,   4,   5,  12,  15,  20,  60],
-           [  1,   6,   7,   8,  42,  48,  56, 336]])
+    array([[   1.,    0.,    1.,    2.,    0.,    0.,    2.,    0.],
+           [   1.,    3.,    4.,    5.,   12.,   15.,   20.,   60.],
+           [   1.,    6.,    7.,    8.,   42.,   48.,   56.,  336.]])
 
 The features of X have been transformed from :math:`(X_1, X_2, X_3)` to :math:`(1, X_1, X_2, X_3, X_1X_2, X_1X_3, X_2X_3, X_1X_2X_3)`.
 

diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py
@@ -14,7 +14,7 @@
 import numpy as np
 from scipy import linalg
 from ..utils import arpack
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, FLOAT_DTYPES
 
 __all__ = ['PLSCanonical', 'PLSRegression', 'PLSSVD']
 
@@ -375,14 +375,14 @@ def transform(self, X, Y=None, copy=True):
         x_scores if Y is not given, (x_scores, y_scores) otherwise.
         """
         check_is_fitted(self, 'x_mean_')
-        X = check_array(X, copy=copy)
+        X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
         # Normalize
         X -= self.x_mean_
         X /= self.x_std_
         # Apply rotation
         x_scores = np.dot(X, self.x_rotations_)
         if Y is not None:
-            Y = check_array(Y, ensure_2d=False, copy=copy)
+            Y = check_array(Y, ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES)
             if Y.ndim == 1:
                 Y = Y.reshape(-1, 1)
             Y -= self.y_mean_
@@ -410,7 +410,7 @@ def predict(self, X, copy=True):
         be an issue in high dimensional space.
         """
         check_is_fitted(self, 'x_mean_')
-        X = check_array(X, copy=copy)
+        X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
         # Normalize
         X -= self.x_mean_
         X /= self.x_std_

diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
@@ -18,6 +18,7 @@
 from ..utils import check_array, as_float_array, check_random_state
 from ..utils.extmath import fast_dot
 from ..utils.validation import check_is_fitted
+from ..utils.validation import FLOAT_DTYPES
 
 __all__ = ['fastica', 'FastICA']
 
@@ -261,7 +262,7 @@ def my_g(x):
     fun_args = {} if fun_args is None else fun_args
     # make interface compatible with other decompositions
     # a copy is required only for non whitened data
-    X = check_array(X, copy=whiten).T
+    X = check_array(X, copy=whiten, dtype=FLOAT_DTYPES).T
 
     alpha = fun_args.get('alpha', 1.0)
     if not 1 <= alpha <= 2:
@@ -540,7 +541,7 @@ def transform(self, X, y=None, copy=True):
         """
         check_is_fitted(self, 'mixing_')
 
-        X = check_array(X, copy=copy)
+        X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
         if self.whiten:
             X -= self.mean_
 
@@ -563,8 +564,7 @@ def inverse_transform(self, X, copy=True):
         """
         check_is_fitted(self, 'mixing_')
 
-        if copy:
-            X = X.copy()
+        X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES)
         X = fast_dot(X, self.mixing_.T)
         if self.whiten:
             X += self.mean_

diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
@@ -29,7 +29,7 @@
 from numpy.core.umath_tests import inner1d
 
 from .base import BaseEnsemble
-from ..base import ClassifierMixin, RegressorMixin
+from ..base import ClassifierMixin, RegressorMixin, is_regressor
 from ..externals import six
 from ..externals.six.moves import zip
 from ..externals.six.moves import xrange as range
@@ -107,7 +107,8 @@ def fit(self, X, y, sample_weight=None):
             dtype = None
             accept_sparse = ['csr', 'csc']
 
-        X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype)
+        X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype,
+                         y_numeric=is_regressor(self))
 
         if sample_weight is None:
             # Initialize weights to 1 / n_samples

diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
@@ -1074,6 +1074,8 @@ def fit(self, X, y):
         """
         self.fit_path = True
         X, y = check_X_y(X, y, y_numeric=True)
+        X = as_float_array(X, copy=self.copy_X)
+        y = as_float_array(y, copy=self.copy_X)
 
         # init cross-validation generator
         cv = check_cv(self.cv, X, y, classifier=False)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -323,7 +323,7 @@ def transform(self, X):
         """
         check_is_fitted(self, 'scale_')
 
-        X = check_array(X, copy=self.copy, ensure_2d=False)
+        X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES)
         if X.ndim == 1:
             warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
 
@@ -341,7 +341,7 @@ def inverse_transform(self, X):
         """
         check_is_fitted(self, 'scale_')
 
-        X = check_array(X, copy=self.copy, ensure_2d=False)
+        X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES)
         if X.ndim == 1:
             warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
 
@@ -1068,14 +1068,14 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
            [4, 5]])
     >>> poly = PolynomialFeatures(2)
     >>> poly.fit_transform(X)
-    array([[ 1,  0,  1,  0,  0,  1],
-           [ 1,  2,  3,  4,  6,  9],
-           [ 1,  4,  5, 16, 20, 25]])
+    array([[  1.,   0.,   1.,   0.,   0.,   1.],
+           [  1.,   2.,   3.,   4.,   6.,   9.],
+           [  1.,   4.,   5.,  16.,  20.,  25.]])
     >>> poly = PolynomialFeatures(interaction_only=True)
     >>> poly.fit_transform(X)
-    array([[ 1,  0,  1,  0],
-           [ 1,  2,  3,  6],
-           [ 1,  4,  5, 20]])
+    array([[  1.,   0.,   1.,   0.],
+           [  1.,   2.,   3.,   6.],
+           [  1.,   4.,   5.,  20.]])
 
     Attributes
     ----------
@@ -1149,7 +1149,7 @@ def transform(self, X, y=None):
         """
         check_is_fitted(self, ['n_input_features_', 'n_output_features_'])
 
-        X = check_array(X)
+        X = check_array(X, dtype=FLOAT_DTYPES)
         n_samples, n_features = X.shape
 
         if n_features != self.n_input_features_:
@@ -1313,7 +1313,7 @@ def binarize(X, threshold=0.0, copy=True):
 
     Parameters
     ----------
-    XX : {array-like, sparse matrix}, shape [n_samples, n_features]
+    X : {array-like, sparse matrix}, shape [n_samples, n_features]
         The data to binarize, element by element.
         scipy.sparse matrices should be in CSR or CSC format to avoid an
         un-necessary copy.
@@ -1438,7 +1438,7 @@ def fit(self, K, y=None):
         -------
         self : returns an instance of self.
         """
-        K = check_array(K)
+        K = check_array(K, dtype=FLOAT_DTYPES)
         n_samples = K.shape[0]
         self.K_fit_rows_ = np.sum(K, axis=0) / n_samples
         self.K_fit_all_ = self.K_fit_rows_.sum() / n_samples
@@ -1461,9 +1461,7 @@ def transform(self, K, y=None, copy=True):
         """
         check_is_fitted(self, 'K_fit_all_')
 
-        K = check_array(K)
-        if copy:
-            K = K.copy()
+        K = check_array(K, copy=copy, dtype=FLOAT_DTYPES)
 
         K_pred_cols = (np.sum(K, axis=1) /
@@ -1503,7 +1501,7 @@ def add_dummy_feature(X, value=1.0):
     array([[ 1.,  0.,  1.],
            [ 1.,  1.,  0.]])
     """
-    X = check_array(X, accept_sparse=['csc', 'csr', 'coo'])
+    X = check_array(X, accept_sparse=['csc', 'csr', 'coo'], dtype=FLOAT_DTYPES)
     n_samples, n_features = X.shape
     shape = (n_samples, n_features + 1)
     if sparse.issparse(X):
@@ -1558,7 +1556,7 @@ def _transform_selected(X, transform, selected="all", copy=True):
     if selected == "all":
         return transform(X)
 
-    X = check_array(X, accept_sparse='csc', copy=copy)
+    X = check_array(X, accept_sparse='csc', copy=copy, dtype=FLOAT_DTYPES)
 
     if len(selected) == 0:
         return X

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -1,3 +1,4 @@
+
 # Authors:
 #
 #          Giorgio Patrini
@@ -1247,6 +1248,13 @@ def test_binarizer():
         X_bin = binarizer.transform(X)
         if init is not list:
             assert_true(X_bin is X)
+
+        binarizer = Binarizer(copy=False)
+        X_float = np.array([[1, 0, 5], [2, 3, -1]], dtype=np.float64)
+        X_bin = binarizer.transform(X_float)
+        if init is not list:
+            assert_true(X_bin is X_float)
+
         X_bin = toarray(X_bin)
         assert_equal(np.sum(X_bin == 0), 2)
         assert_equal(np.sum(X_bin == 1), 4)

diff --git a/sklearn/preprocessing/tests/test_imputation.py b/sklearn/preprocessing/tests/test_imputation.py
@@ -1,3 +1,4 @@
+
 import numpy as np
 from scipy import sparse
 
@@ -12,7 +13,7 @@
 from sklearn import grid_search
 from sklearn import tree
 from sklearn.random_projection import sparse_random_matrix
-
+ 
 
 def _check_statistics(X, X_true,
                       strategy, statistics, missing_values):
@@ -121,6 +122,18 @@ def test_imputation_mean_median_only_zero():
                       statistics_median, 0)
 
 
+def safe_median(arr, *args, **kwargs):
+    # np.median([]) raises a TypeError for numpy >= 1.10.1
+    length = arr.size if hasattr(arr, 'size') else len(arr)
+    return np.nan if length == 0 else np.median(arr, *args, **kwargs)
+
+
+def safe_mean(arr, *args, **kwargs):
+    # np.mean([]) raises a RuntimeWarning for numpy >= 1.10.1
+    length = arr.size if hasattr(arr, 'size') else len(arr)
+    return np.nan if length == 0 else np.mean(arr, *args, **kwargs)
+
+
 def test_imputation_mean_median():
     # Test imputation using the mean and median strategies, when
     # missing_values != 0.
@@ -134,9 +147,9 @@ def test_imputation_mean_median():
     values = np.arange(1, shape[0]+1)
     values[4::2] = - values[4::2]
 
-    tests = [("mean", "NaN", lambda z, v, p: np.mean(np.hstack((z, v)))),
+    tests = [("mean", "NaN", lambda z, v, p: safe_mean(np.hstack((z, v)))),
              ("mean", 0, lambda z, v, p: np.mean(v)),
-             ("median", "NaN", lambda z, v, p: np.median(np.hstack((z, v)))),
+             ("median", "NaN", lambda z, v, p: safe_median(np.hstack((z, v)))),
              ("median", 0, lambda z, v, p: np.median(v))]
 
     for strategy, test_missing_values, true_value_fun in tests: