diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index 9cae6338f1a76..03293de3d0035 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -770,7 +770,7 @@ def _check_test_data(self, X): warnings.warn("Got data type %s, converted to float " "to avoid overflows" % X.dtype, RuntimeWarning, stacklevel=2) - X = X.astype(np.float) + X = astype(X, np.float, copy=False) return X diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index a78803db1feb2..9bd8eb0cf6897 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -23,7 +23,6 @@ from ..utils import check_random_state - class Bunch(dict): """Container object for datasets diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 55366a0987289..2182c68cd93e0 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -50,6 +50,7 @@ from .base import Bunch from .base import load_files from ..utils import check_random_state +from ..utils.fixes import astype from ..feature_extraction.text import CountVectorizer from ..preprocessing import normalize from ..externals import joblib, six @@ -345,8 +346,8 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): # the data is stored as int16 for compactness # but normalize needs floats - X_train = X_train.astype(np.float64) - X_test = X_test.astype(np.float64) + X_train = astype(X_train, np.float64, copy=False) + X_test = astype(X_test, np.float64, copy=False) normalize(X_train, copy=False) normalize(X_test, copy=False) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 66962958371b9..67be3b4e11cb0 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -37,7 +37,7 @@ from ..utils import check_random_state, check_array, check_X_y, column_or_1d from ..utils import check_consistent_length, deprecated from ..utils.extmath import logsumexp -from ..utils.fixes import expit, bincount +from ..utils.fixes import astype, expit, bincount from ..utils.stats import _weighted_percentile from ..utils.validation import check_is_fitted, NotFittedError from ..externals import six @@ -1180,7 +1180,7 @@ def feature_importances_(self): def _validate_y(self, y): self.n_classes_ = 1 if y.dtype.kind == 'O': - y = y.astype(np.float64) + y = astype(y, np.float64, copy=False) # Default implementation return y diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 6eb9c47648070..f4a38431513ea 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -22,6 +22,7 @@ from ..externals import six from .sgd_fast import plain_sgd, average_sgd +from ..utils.fixes import astype from ..utils.seq_dataset import ArrayDataset, CSRDataset from ..utils import compute_class_weight from .sgd_fast import Hinge @@ -867,7 +868,7 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate, n_iter, sample_weight, coef_init, intercept_init): X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64) - y = y.astype(np.float64) + y = astype(y, np.float64, copy=False) n_samples, n_features = X.shape diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py index 5d78713115146..ff6c851096178 100644 --- a/sklearn/manifold/locally_linear.py +++ b/sklearn/manifold/locally_linear.py @@ -10,6 +10,7 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_random_state, check_array from ..utils.arpack import eigsh +from ..utils.fixes import astype from ..utils.validation import check_is_fitted from ..neighbors import NearestNeighbors @@ -43,9 +44,9 @@ def barycenter_weights(X, Z, reg=1e-3): n_samples, n_neighbors = X.shape[0], Z.shape[1] if X.dtype.kind == 'i': - X = X.astype(np.float) + X = astype(X, np.float, copy=False) if Z.dtype.kind == 'i': - Z = Z.astype(np.float) + Z = astype(Z, np.float, copy=False) B = np.empty((n_samples, n_neighbors), dtype=X.dtype) v = np.ones(n_neighbors, dtype=X.dtype) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index b5a9d8329ad84..991d5c9ec7d68 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -15,6 +15,7 @@ from ..externals import six from ..utils import check_random_state, check_array, check_symmetric from ..utils.extmath import _deterministic_vector_sign_flip +from ..utils.fixes import astype from ..utils.graph import graph_laplacian from ..utils.sparsetools import connected_components from ..utils.arpack import eigsh @@ -263,7 +264,8 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # problem. if not sparse.issparse(laplacian): warnings.warn("AMG works better for sparse matrices") - laplacian = laplacian.astype(np.float) # lobpcg needs native floats + # lobpcg needs native floats + laplacian = astype(laplacian, np.float, copy=False) laplacian = _set_diag(laplacian, 1) ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) M = ml.aspreconditioner() @@ -276,7 +278,8 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, raise ValueError elif eigen_solver == "lobpcg": - laplacian = laplacian.astype(np.float) # lobpcg needs native floats + # lobpcg needs native floats + laplacian = astype(laplacian, np.float, copy=True) if n_nodes < 5 * n_components + 1: # see note above under arpack why lobpcg has problems with small # number of nodes @@ -287,7 +290,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, embedding = diffusion_map.T[:n_components] * dd else: # lobpcg needs native floats - laplacian = laplacian.astype(np.float) + laplacian = astype(laplacian, np.float, copy=False) laplacian = _set_diag(laplacian, 1) # We increase the number of eigenvectors requested, as lobpcg # doesn't behave well in low dimension diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 1ecf4ad5bcc5f..bfb718df15340 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -27,6 +27,7 @@ from .preprocessing import label_binarize from .utils import check_X_y, check_array from .utils.extmath import safe_sparse_dot, logsumexp +from .utils.fixes import astype from .utils.multiclass import _check_partial_fit_first_call from .utils.fixes import in1d from .utils.validation import check_is_fitted @@ -473,7 +474,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): raise ValueError(msg % (X.shape[0], y.shape[0])) # convert to float to support sample weight consistently - Y = Y.astype(np.float64) + Y = astype(Y, np.float64, copy=False) if sample_weight is not None: Y *= check_array(sample_weight).T @@ -522,7 +523,7 @@ def fit(self, X, y, sample_weight=None): # convert to float to support sample weight consistently; # this means we also don't have to cast X to floating point - Y = Y.astype(np.float64) + Y = astype(Y, np.float64, copy=False) if sample_weight is not None: Y *= check_array(sample_weight).T diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index c97d0ba3e5aed..820eef7443bd1 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -17,9 +17,9 @@ from ..utils import check_array from ..utils import warn_if_not_float from ..utils.extmath import row_norms -from ..utils.fixes import (combinations_with_replacement as combinations_w_r, - bincount) -from ..utils.fixes import isclose +from ..utils.fixes import (astype, + combinations_with_replacement as combinations_w_r, + bincount, isclose) from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2) from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis) @@ -347,7 +347,7 @@ def fit(self, X, y=None): X = check_array(X, accept_sparse='csr', copy=self.copy, ensure_2d=False) if warn_if_not_float(X, estimator=self): - X = X.astype(np.float) + X = astype(X, np.float, copy=False) if sparse.issparse(X): if self.with_mean: raise ValueError( @@ -380,7 +380,7 @@ def transform(self, X, y=None, copy=None): copy = copy if copy is not None else self.copy X = check_array(X, accept_sparse='csr', copy=copy, ensure_2d=False) if warn_if_not_float(X, estimator=self): - X = X.astype(np.float) + X = astype(X, np.float, copy=False) if sparse.issparse(X): if self.with_mean: raise ValueError( diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py index 650e71841d359..e368e6bf9994d 100644 --- a/sklearn/utils/graph.py +++ b/sklearn/utils/graph.py @@ -13,6 +13,7 @@ import numpy as np from scipy import sparse +from .fixes import astype from .graph_shortest_path import graph_shortest_path @@ -113,7 +114,7 @@ def graph_laplacian(csgraph, normed=False, return_diag=False): if normed and (np.issubdtype(csgraph.dtype, np.int) or np.issubdtype(csgraph.dtype, np.uint)): - csgraph = csgraph.astype(np.float) + csgraph = astype(csgraph, np.float, copy=False) if sparse.isspmatrix(csgraph): return _laplacian_sparse(csgraph, normed=normed, diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py index b2a7fa498c079..5d5e9677b2b83 100644 --- a/sklearn/utils/random.py +++ b/sklearn/utils/random.py @@ -8,7 +8,7 @@ import array from sklearn.utils import check_random_state - +from sklearn.utils.fixes import astype from ._random import sample_without_replacement __all__ = ['sample_without_replacement', 'choice'] @@ -238,7 +238,7 @@ def random_choice_csc(n_samples, classes, class_probability=None, if classes[j].dtype.kind != 'i': raise ValueError("class dtype %s is not supported" % classes[j].dtype) - classes[j] = classes[j].astype(int) + classes[j] = astype(classes[j], int, copy=False) # use uniform distribution if no class_probability is given if class_probability is None: diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 0d5969b3e3643..525de0ffdea3f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -13,6 +13,7 @@ import scipy.sparse as sp from ..externals import six +from .fixes import astype from inspect import getargspec @@ -250,7 +251,8 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix = spmatrix.astype(dtype) else: # create new - spmatrix = spmatrix.asformat(accept_sparse[0]).astype(dtype) + spmatrix = spmatrix.asformat(accept_sparse[0]) + spmatrix = spmatrix.astype(dtype) if force_all_finite: if not hasattr(spmatrix, "data"): warnings.warn("Can't check %s sparse matrix for nan or inf." @@ -444,7 +446,7 @@ def check_X_y(X, y, accept_sparse=None, dtype="numeric", order=None, copy=False, y = column_or_1d(y, warn=True) _assert_all_finite(y) if y_numeric and y.dtype.kind == 'O': - y = y.astype(np.float64) + y = astype(y, np.float64, copy=False) check_consistent_length(X, y)