diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 9cae6338f1a76..03293de3d0035 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -770,7 +770,7 @@ def _check_test_data(self, X):
             warnings.warn("Got data type %s, converted to float "
                           "to avoid overflows" % X.dtype,
                           RuntimeWarning, stacklevel=2)
-            X = X.astype(np.float)
+            X = astype(X, np.float, copy=False)
 
         return X
 
diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py
index a78803db1feb2..9bd8eb0cf6897 100644
--- a/sklearn/datasets/base.py
+++ b/sklearn/datasets/base.py
@@ -23,7 +23,6 @@
 
 from ..utils import check_random_state
 
-
 class Bunch(dict):
     """Container object for datasets
 
diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py
index 55366a0987289..2182c68cd93e0 100644
--- a/sklearn/datasets/twenty_newsgroups.py
+++ b/sklearn/datasets/twenty_newsgroups.py
@@ -50,6 +50,7 @@
 from .base import Bunch
 from .base import load_files
 from ..utils import check_random_state
+from ..utils.fixes import astype
 from ..feature_extraction.text import CountVectorizer
 from ..preprocessing import normalize
 from ..externals import joblib, six
@@ -345,8 +346,8 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None):
 
     # the data is stored as int16 for compactness
     # but normalize needs floats
-    X_train = X_train.astype(np.float64)
-    X_test = X_test.astype(np.float64)
+    X_train = astype(X_train, np.float64, copy=False)
+    X_test = astype(X_test, np.float64, copy=False)
     normalize(X_train, copy=False)
     normalize(X_test, copy=False)
 
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 66962958371b9..67be3b4e11cb0 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -37,7 +37,7 @@
 from ..utils import check_random_state, check_array, check_X_y, column_or_1d
 from ..utils import check_consistent_length, deprecated
 from ..utils.extmath import logsumexp
-from ..utils.fixes import expit, bincount
+from ..utils.fixes import astype, expit, bincount
 from ..utils.stats import _weighted_percentile
 from ..utils.validation import check_is_fitted, NotFittedError
 from ..externals import six
@@ -1180,7 +1180,7 @@ def feature_importances_(self):
     def _validate_y(self, y):
         self.n_classes_ = 1
         if y.dtype.kind == 'O':
-            y = y.astype(np.float64)
+            y = astype(y, np.float64, copy=False)
         # Default implementation
         return y
 
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 6eb9c47648070..f4a38431513ea 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -22,6 +22,7 @@
 from ..externals import six
 
 from .sgd_fast import plain_sgd, average_sgd
+from ..utils.fixes import astype
 from ..utils.seq_dataset import ArrayDataset, CSRDataset
 from ..utils import compute_class_weight
 from .sgd_fast import Hinge
@@ -867,7 +868,7 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
                      n_iter, sample_weight,
                      coef_init, intercept_init):
         X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64)
-        y = y.astype(np.float64)
+        y = astype(y, np.float64, copy=False)
 
         n_samples, n_features = X.shape
 
diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py
index 5d78713115146..ff6c851096178 100644
--- a/sklearn/manifold/locally_linear.py
+++ b/sklearn/manifold/locally_linear.py
@@ -10,6 +10,7 @@
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_random_state, check_array
 from ..utils.arpack import eigsh
+from ..utils.fixes import astype
 from ..utils.validation import check_is_fitted
 from ..neighbors import NearestNeighbors
 
@@ -43,9 +44,9 @@ def barycenter_weights(X, Z, reg=1e-3):
 
     n_samples, n_neighbors = X.shape[0], Z.shape[1]
     if X.dtype.kind == 'i':
-        X = X.astype(np.float)
+        X = astype(X, np.float, copy=False)
     if Z.dtype.kind == 'i':
-        Z = Z.astype(np.float)
+        Z = astype(Z, np.float, copy=False)
     B = np.empty((n_samples, n_neighbors), dtype=X.dtype)
     v = np.ones(n_neighbors, dtype=X.dtype)
 
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index b5a9d8329ad84..991d5c9ec7d68 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -15,6 +15,7 @@
 from ..externals import six
 from ..utils import check_random_state, check_array, check_symmetric
 from ..utils.extmath import _deterministic_vector_sign_flip
+from ..utils.fixes import astype
 from ..utils.graph import graph_laplacian
 from ..utils.sparsetools import connected_components
 from ..utils.arpack import eigsh
@@ -263,7 +264,8 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
         # problem.
         if not sparse.issparse(laplacian):
             warnings.warn("AMG works better for sparse matrices")
-        laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
+        # lobpcg needs native floats
+        laplacian = astype(laplacian, np.float, copy=False)
         laplacian = _set_diag(laplacian, 1)
         ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
         M = ml.aspreconditioner()
@@ -276,7 +278,8 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
             raise ValueError
 
     elif eigen_solver == "lobpcg":
-        laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
+        # lobpcg needs native floats
+        laplacian = astype(laplacian, np.float, copy=True)
         if n_nodes < 5 * n_components + 1:
             # see note above under arpack why lobpcg has problems with small
             # number of nodes
@@ -287,7 +290,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
             embedding = diffusion_map.T[:n_components] * dd
         else:
             # lobpcg needs native floats
-            laplacian = laplacian.astype(np.float)
+            laplacian = astype(laplacian, np.float, copy=False)
             laplacian = _set_diag(laplacian, 1)
             # We increase the number of eigenvectors requested, as lobpcg
             # doesn't behave well in low dimension
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 1ecf4ad5bcc5f..bfb718df15340 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -27,6 +27,7 @@
 from .preprocessing import label_binarize
 from .utils import check_X_y, check_array
 from .utils.extmath import safe_sparse_dot, logsumexp
+from .utils.fixes import astype
 from .utils.multiclass import _check_partial_fit_first_call
 from .utils.fixes import in1d
 from .utils.validation import check_is_fitted
@@ -473,7 +474,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
             raise ValueError(msg % (X.shape[0], y.shape[0]))
 
         # convert to float to support sample weight consistently
-        Y = Y.astype(np.float64)
+        Y = astype(Y, np.float64, copy=False)
         if sample_weight is not None:
             Y *= check_array(sample_weight).T
 
@@ -522,7 +523,7 @@ def fit(self, X, y, sample_weight=None):
 
         # convert to float to support sample weight consistently;
         # this means we also don't have to cast X to floating point
-        Y = Y.astype(np.float64)
+        Y = astype(Y, np.float64, copy=False)
         if sample_weight is not None:
             Y *= check_array(sample_weight).T
 
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index c97d0ba3e5aed..820eef7443bd1 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -17,9 +17,9 @@
 from ..utils import check_array
 from ..utils import warn_if_not_float
 from ..utils.extmath import row_norms
-from ..utils.fixes import (combinations_with_replacement as combinations_w_r,
-                           bincount)
-from ..utils.fixes import isclose
+from ..utils.fixes import (astype,
+                           combinations_with_replacement as combinations_w_r,
+                           bincount, isclose)
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
 from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis)
@@ -347,7 +347,7 @@ def fit(self, X, y=None):
         X = check_array(X, accept_sparse='csr', copy=self.copy,
                         ensure_2d=False)
         if warn_if_not_float(X, estimator=self):
-            X = X.astype(np.float)
+            X = astype(X, np.float, copy=False)
         if sparse.issparse(X):
             if self.with_mean:
                 raise ValueError(
@@ -380,7 +380,7 @@ def transform(self, X, y=None, copy=None):
         copy = copy if copy is not None else self.copy
         X = check_array(X, accept_sparse='csr', copy=copy, ensure_2d=False)
         if warn_if_not_float(X, estimator=self):
-            X = X.astype(np.float)
+            X = astype(X, np.float, copy=False)
         if sparse.issparse(X):
             if self.with_mean:
                 raise ValueError(
diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py
index 650e71841d359..e368e6bf9994d 100644
--- a/sklearn/utils/graph.py
+++ b/sklearn/utils/graph.py
@@ -13,6 +13,7 @@
 import numpy as np
 from scipy import sparse
 
+from .fixes import astype
 from .graph_shortest_path import graph_shortest_path
 
 
@@ -113,7 +114,7 @@ def graph_laplacian(csgraph, normed=False, return_diag=False):
 
     if normed and (np.issubdtype(csgraph.dtype, np.int)
                    or np.issubdtype(csgraph.dtype, np.uint)):
-        csgraph = csgraph.astype(np.float)
+        csgraph = astype(csgraph, np.float, copy=False)
 
     if sparse.isspmatrix(csgraph):
         return _laplacian_sparse(csgraph, normed=normed,
diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py
index b2a7fa498c079..5d5e9677b2b83 100644
--- a/sklearn/utils/random.py
+++ b/sklearn/utils/random.py
@@ -8,7 +8,7 @@
 import array
 
 from sklearn.utils import check_random_state
-
+from sklearn.utils.fixes import astype
 from ._random import sample_without_replacement
 
 __all__ = ['sample_without_replacement', 'choice']
@@ -238,7 +238,7 @@ def random_choice_csc(n_samples, classes, class_probability=None,
         if classes[j].dtype.kind != 'i':
             raise ValueError("class dtype %s is not supported" %
                              classes[j].dtype)
-        classes[j] = classes[j].astype(int)
+        classes[j] = astype(classes[j], int, copy=False)
 
         # use uniform distribution if no class_probability is given
         if class_probability is None:
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 0d5969b3e3643..525de0ffdea3f 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -13,6 +13,7 @@
 import scipy.sparse as sp
 
 from ..externals import six
+from .fixes import astype
 from inspect import getargspec
 
 
@@ -250,7 +251,8 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
             spmatrix = spmatrix.astype(dtype)
     else:
         # create new
-        spmatrix = spmatrix.asformat(accept_sparse[0]).astype(dtype)
+        spmatrix = spmatrix.asformat(accept_sparse[0])
+        spmatrix = spmatrix.astype(dtype)
     if force_all_finite:
         if not hasattr(spmatrix, "data"):
             warnings.warn("Can't check %s sparse matrix for nan or inf."
@@ -444,7 +446,7 @@ def check_X_y(X, y, accept_sparse=None, dtype="numeric", order=None, copy=False,
         y = column_or_1d(y, warn=True)
         _assert_all_finite(y)
     if y_numeric and y.dtype.kind == 'O':
-        y = y.astype(np.float64)
+        y = astype(y, np.float64, copy=False)
 
     check_consistent_length(X, y)