diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index eacc208d4ef08..335cdd3cce5ea 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -235,30 +235,30 @@ def test_perfect_checkerboard():
 def test_errors():
     data = np.arange(25).reshape((5, 5))
 
-    model = SpectralBiclustering(n_clusters=(3, 3, 3))
+    model = SpectralBiclustering(n_clusters=(3, 3, 3), random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_clusters='abc')
+    model = SpectralBiclustering(n_clusters='abc', random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_clusters=(3, 'abc'))
+    model = SpectralBiclustering(n_clusters=(3, 'abc'), random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(method='unknown')
+    model = SpectralBiclustering(method='unknown', random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(svd_method='unknown')
+    model = SpectralBiclustering(svd_method='unknown', random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_components=0)
+    model = SpectralBiclustering(n_components=0, random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_best=0)
+    model = SpectralBiclustering(n_best=0, random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_components=3, n_best=4)
+    model = SpectralBiclustering(n_components=3, n_best=4, random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering()
+    model = SpectralBiclustering(random_state=42)
     data = np.arange(27).reshape((3, 3, 3))
     assert_raises(ValueError, model.fit, data)
diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
index 41d915b74fc9d..b3c45c3b74c8e 100644
--- a/sklearn/cluster/tests/test_birch.py
+++ b/sklearn/cluster/tests/test_birch.py
@@ -23,7 +23,7 @@
 
 def test_n_samples_leaves_roots():
     # Sanity check for the number of samples in leaves and roots
-    X, y = make_blobs(n_samples=10)
+    X, y = make_blobs(n_samples=10, random_state=42)
     brc = Birch()
     brc.fit(X)
     n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
@@ -35,7 +35,7 @@ def test_n_samples_leaves_roots():
 
 def test_partial_fit():
     # Test that fit is equivalent to calling partial_fit multiple times
-    X, y = make_blobs(n_samples=100)
+    X, y = make_blobs(n_samples=100, random_state=42)
     brc = Birch(n_clusters=3)
     brc.fit(X)
     brc_partial = Birch(n_clusters=None)
@@ -71,7 +71,7 @@ def test_birch_predict():
 
 def test_n_clusters():
     # Test that n_clusters param works properly
-    X, y = make_blobs(n_samples=100, centers=10)
+    X, y = make_blobs(n_samples=100, centers=10, random_state=42)
     brc1 = Birch(n_clusters=10)
     brc1.fit(X)
     assert_greater(len(brc1.subcluster_centers_), 10)
@@ -86,7 +86,7 @@ def test_n_clusters():
     assert_array_equal(brc1.labels_, brc2.labels_)
 
     # Test that the wrong global clustering step raises an Error.
-    clf = ElasticNet()
+    clf = ElasticNet(random_state=42)
     brc3 = Birch(n_clusters=clf)
     assert_raises(ValueError, brc3.fit, X)
 
@@ -97,7 +97,7 @@ def test_n_clusters():
 
 def test_sparse_X():
     # Test that sparse and dense data give same results
-    X, y = make_blobs(n_samples=100, centers=10)
+    X, y = make_blobs(n_samples=100, centers=10, random_state=42)
     brc = Birch(n_clusters=10)
     brc.fit(X)
 
@@ -120,7 +120,7 @@ def check_branching_factor(node, branching_factor):
 
 def test_branching_factor():
     # Test that nodes have at max branching_factor number of subclusters
-    X, y = make_blobs()
+    X, y = make_blobs(random_state=42)
     branching_factor = 9
 
     # Purposefully set a low threshold to maximize the subclusters.
@@ -150,7 +150,7 @@ def check_threshold(birch_instance, threshold):
 
 def test_threshold():
     # Test that the leaf subclusters have a threshold lesser than radius
-    X, y = make_blobs(n_samples=80, centers=4)
+    X, y = make_blobs(n_samples=80, centers=4, random_state=42)
     brc = Birch(threshold=0.5, n_clusters=None)
     brc.fit(X)
     check_threshold(brc, 0.5)
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 31307e55801a5..8601ac1b236ba 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -116,7 +116,8 @@ def test_minibatch_update_consistency():
     # step 1: compute the dense minibatch update
     old_inertia, incremental_diff = _mini_batch_step(
         X_mb, x_mb_squared_norms, new_centers, counts,
-        buffer, 1, None, random_reassign=False)
+        buffer, 1, None, random_reassign=False,
+        random_state=42)
     assert_greater(old_inertia, 0.0)
 
     # compute the new inertia on the same batch to check that it decreased
@@ -133,7 +134,8 @@ def test_minibatch_update_consistency():
     # step 2: compute the sparse minibatch update
     old_inertia_csr, incremental_diff_csr = _mini_batch_step(
         X_mb_csr, x_mb_squared_norms_csr, new_centers_csr, counts_csr,
-        buffer_csr, 1, None, random_reassign=False)
+        buffer_csr, 1, None, random_reassign=False,
+        random_state=42)
     assert_greater(old_inertia_csr, 0.0)
 
     # compute the new inertia on the same batch to check that it decreased
@@ -216,7 +218,7 @@ def test_k_means_plus_plus_init_2_jobs():
 def test_k_means_precompute_distances_flag():
     # check that a warning is raised if the precompute_distances flag is not
     # supported
-    km = KMeans(precompute_distances="wrong")
+    km = KMeans(precompute_distances="wrong", random_state=42)
     assert_raises(ValueError, km.fit, X)
 
 
@@ -263,8 +265,10 @@ def test_k_means_n_init():
 
     # two regression tests on bad n_init argument
     # previous bug: n_init <= 0 threw non-informative TypeError (#3858)
-    assert_raises_regex(ValueError, "n_init", KMeans(n_init=0).fit, X)
-    assert_raises_regex(ValueError, "n_init", KMeans(n_init=-1).fit, X)
+    assert_raises_regex(ValueError, "n_init", KMeans(n_init=0,
+                                                     random_state=42).fit, X)
+    assert_raises_regex(ValueError, "n_init", KMeans(n_init=-1,
+                                                     random_state=42).fit, X)
 
 
 def test_k_means_explicit_init_shape():
@@ -331,7 +335,8 @@ def test_mb_k_means_plus_plus_init_sparse_matrix():
 
 
 def test_minibatch_init_with_large_k():
-    mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20)
+    mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20,
+                                 random_state=42)
     # Check that a warning is raised, as the number clusters is larger
     # than the init_size
     assert_warns(RuntimeWarning, mb_k_means.fit, X)
@@ -516,12 +521,14 @@ def test_minibatch_set_init_size():
 
 
 def test_k_means_invalid_init():
-    km = KMeans(init="invalid", n_init=1, n_clusters=n_clusters)
+    km = KMeans(init="invalid", n_init=1, n_clusters=n_clusters,
+                random_state=42)
     assert_raises(ValueError, km.fit, X)
 
 
 def test_mini_match_k_means_invalid_init():
-    km = MiniBatchKMeans(init="invalid", n_init=1, n_clusters=n_clusters)
+    km = MiniBatchKMeans(init="invalid", n_init=1, n_clusters=n_clusters,
+                         random_state=42)
     assert_raises(ValueError, km.fit, X)
 
 
@@ -605,7 +612,8 @@ def test_predict_minibatch_dense_input():
 
 def test_predict_minibatch_kmeanspp_init_sparse_input():
     mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init='k-means++',
-                                 n_init=10).fit(X_csr)
+                                 n_init=10,
+                                 random_state=42).fit(X_csr)
 
     # sanity check: re-predict labeling for training set samples
     assert_array_equal(mb_k_means.predict(X_csr), mb_k_means.labels_)
@@ -621,6 +629,7 @@ def test_predict_minibatch_kmeanspp_init_sparse_input():
 
 def test_predict_minibatch_random_init_sparse_input():
     mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init='random',
+                                 random_state=42,
                                  n_init=10).fit(X_csr)
 
     # sanity check: re-predict labeling for training set samples
@@ -643,15 +652,20 @@ def test_int_input():
         init_int = X_int[:2]
 
         fitted_models = [
-            KMeans(n_clusters=2).fit(X_int),
-            KMeans(n_clusters=2, init=init_int, n_init=1).fit(X_int),
+            KMeans(n_clusters=2, random_state=42).fit(X_int),
+            KMeans(n_clusters=2, init=init_int, n_init=1,
+                   random_state=42).fit(X_int),
             # mini batch kmeans is very unstable on such a small dataset hence
             # we use many inits
-            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2).fit(X_int),
-            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2).fit(X_int_csr),
+            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2,
+                            random_state=42).fit(X_int),
+            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2,
+                            random_state=42).fit(X_int_csr),
             MiniBatchKMeans(n_clusters=2, batch_size=2,
+                            random_state=42,
                             init=init_int, n_init=1).fit(X_int),
             MiniBatchKMeans(n_clusters=2, batch_size=2,
+                            random_state=42,
                             init=init_int, n_init=1).fit(X_int_csr),
         ]
 
@@ -665,7 +679,7 @@ def test_int_input():
 
 
 def test_transform():
-    km = KMeans(n_clusters=n_clusters)
+    km = KMeans(n_clusters=n_clusters, random_state=42)
     km.fit(X)
     X_new = km.transform(km.cluster_centers_)
 
@@ -730,7 +744,8 @@ def test_k_means_function():
     sys.stdout = StringIO()
     try:
         cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
-                                                   verbose=True)
+                                                   verbose=True,
+                                                   random_state=42)
     finally:
         sys.stdout = old_stdout
     centers = cluster_centers
@@ -765,7 +780,7 @@ def test_x_squared_norms_init_centroids():
 
 def test_max_iter_error():
 
-    km = KMeans(max_iter=-1)
+    km = KMeans(max_iter=-1, random_state=42)
     assert_raise_message(ValueError, 'Number of iterations should be',
                          km.fit, X)
 
@@ -821,7 +836,8 @@ def test_k_means_init_centers():
         X_test = dtype(X_small)
         init_centers_test = dtype(init_centers)
         assert_array_equal(init_centers, init_centers_test)
-        km = KMeans(init=init_centers_test, n_clusters=3, n_init=1)
+        km = KMeans(init=init_centers_test, n_clusters=3, n_init=1,
+                    random_state=42)
         km.fit(X_test)
         assert_equal(False, np.may_share_memory(km.cluster_centers_, init_centers))
 
@@ -833,14 +849,15 @@ def test_sparse_k_means_init_centers():
     X = iris.data
 
     # Get a local optimum
-    centers = KMeans(n_clusters=3).fit(X).cluster_centers_
+    centers = KMeans(n_clusters=3, random_state=42).fit(X).cluster_centers_
 
     # Fit starting from a local optimum shouldn't change the solution
     np.testing.assert_allclose(
         centers,
         KMeans(n_clusters=3,
                init=centers,
-               n_init=1).fit(X).cluster_centers_
+               n_init=1,
+               random_state=42).fit(X).cluster_centers_
     )
 
     # The same should be true when X is sparse
@@ -849,7 +866,8 @@ def test_sparse_k_means_init_centers():
         centers,
         KMeans(n_clusters=3,
                init=centers,
-               n_init=1).fit(X_sparse).cluster_centers_
+               n_init=1,
+               random_state=42).fit(X_sparse).cluster_centers_
     )
 
 
@@ -860,10 +878,11 @@ def test_sparse_validate_centers():
     X = iris.data
 
     # Get a local optimum
-    centers = KMeans(n_clusters=4).fit(X).cluster_centers_
+    centers = KMeans(n_clusters=4, random_state=42).fit(X).cluster_centers_
 
     # Test that a ValueError is raised for validate_center_shape
-    classifier = KMeans(n_clusters=3, init=centers, n_init=1)
+    classifier = KMeans(n_clusters=3, init=centers, n_init=1,
+                        random_state=42)
 
     msg = "The shape of the initial centers \(\(4L?, 4L?\)\) " \
           "does not match the number of clusters 3"
diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index 74d4b91efee3b..0df13b5d9e875 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -27,7 +27,7 @@
 
 def test_estimate_bandwidth():
     # Test estimate_bandwidth
-    bandwidth = estimate_bandwidth(X, n_samples=200)
+    bandwidth = estimate_bandwidth(X, n_samples=200, random_state=42)
     assert_true(0.9 <= bandwidth <= 1.5)
 
 
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 494c9ab7c3fab..42c78e7a3b453 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -171,7 +171,8 @@ def histogram(x, y, **kwargs):
     assert_equal((X.shape[0],), labels.shape)
 
     # raise error on unknown affinity
-    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
+    sp = SpectralClustering(n_clusters=2, affinity='<unknown>',
+                            random_state=42)
     assert_raises(ValueError, sp.fit, X)
 
 
diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py
index 27e423b410210..9d6708cefb093 100644
--- a/sklearn/covariance/tests/test_robust_covariance.py
+++ b/sklearn/covariance/tests/test_robust_covariance.py
@@ -50,7 +50,7 @@ def test_fast_mcd_on_invalid_input():
 
 def test_mcd_class_on_invalid_input():
     X = np.arange(100)
-    mcd = MinCovDet()
+    mcd = MinCovDet(random_state=42)
     assert_raise_message(ValueError, 'Got X with X.ndim=1',
                          mcd.fit, X)
 
@@ -88,14 +88,14 @@ def test_mcd_issue1127():
     # (i.e. n_support = n_samples)
     rnd = np.random.RandomState(0)
     X = rnd.normal(size=(3, 1))
-    mcd = MinCovDet()
+    mcd = MinCovDet(random_state=42)
     mcd.fit(X)
 
 
 def test_outlier_detection():
     rnd = np.random.RandomState(0)
     X = rnd.randn(100, 10)
-    clf = EllipticEnvelope(contamination=0.1)
+    clf = EllipticEnvelope(contamination=0.1, random_state=42)
     assert_raises(NotFittedError, clf.predict, X)
     assert_raises(NotFittedError, clf.decision_function, X)
     clf.fit(X)
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index b7ed5c4703492..61e74cc28fc16 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -91,7 +91,8 @@ def test_dict_learning_nonzero_coefs():
 
 def test_dict_learning_unknown_fit_algorithm():
     n_components = 5
-    dico = DictionaryLearning(n_components, fit_algorithm='<unknown>')
+    dico = DictionaryLearning(n_components, fit_algorithm='<unknown>',
+                              random_state=42)
     assert_raises(ValueError, dico.fit, X)
 
 
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index d268cdc4e6047..47ece7a284576 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -87,7 +87,8 @@ def g_test(x):
             assert_raises(ValueError, fastica, m.T, fun=np.tanh,
                           algorithm=algo)
         else:
-            X = PCA(n_components=2, whiten=True).fit_transform(m.T)
+            X = PCA(n_components=2, whiten=True,
+                    random_state=42).fit_transform(m.T)
             k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False)
             assert_raises(ValueError, fastica, X, fun=np.tanh,
                           algorithm=algo)
@@ -129,7 +130,8 @@ def g_test(x):
         ica = FastICA(fun=fn, algorithm=algo, random_state=0)
         assert_raises(ValueError, ica.fit, m.T)
 
-    assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T)
+    assert_raises(TypeError, FastICA(fun=moves.xrange(10),
+                                     random_state=42).fit, m.T)
 
 
 def test_fastica_nowhiten():
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 87e7f9d7683e1..3c18db6200132 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -16,7 +16,7 @@ def test_incremental_pca():
     X = iris.data
     batch_size = X.shape[0] // 3
     ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
-    pca = PCA(n_components=2)
+    pca = PCA(n_components=2, random_state=42)
     pca.fit_transform(X)
 
     X_transformed = ipca.fit_transform(X)
@@ -167,7 +167,7 @@ def test_incremental_pca_against_pca_iris():
     # Test that IncrementalPCA and PCA are approximate (to a sign flip).
     X = iris.data
 
-    Y_pca = PCA(n_components=2).fit_transform(X)
+    Y_pca = PCA(n_components=2, random_state=42).fit_transform(X)
     Y_ipca = IncrementalPCA(n_components=2, batch_size=25).fit_transform(X)
 
     assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)
@@ -180,7 +180,7 @@ def test_incremental_pca_against_pca_random_data():
     n_features = 3
     X = rng.randn(n_samples, n_features) + 5 * rng.rand(1, n_features)
 
-    Y_pca = PCA(n_components=3).fit_transform(X)
+    Y_pca = PCA(n_components=3, random_state=42).fit_transform(X)
     Y_ipca = IncrementalPCA(n_components=3, batch_size=25).fit_transform(X)
 
     assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)
@@ -193,7 +193,7 @@ def test_explained_variances():
     prec = 3
     n_samples, n_features = X.shape
     for nc in [None, 99]:
-        pca = PCA(n_components=nc).fit(X)
+        pca = PCA(n_components=nc, random_state=42).fit(X)
         ipca = IncrementalPCA(n_components=nc, batch_size=100).fit(X)
         assert_almost_equal(pca.explained_variance_, ipca.explained_variance_,
                             decimal=prec)
@@ -261,7 +261,7 @@ def test_whitening():
     prec = 3
     n_samples, n_features = X.shape
     for nc in [None, 9]:
-        pca = PCA(whiten=True, n_components=nc).fit(X)
+        pca = PCA(whiten=True, n_components=nc, random_state=42).fit(X)
         ipca = IncrementalPCA(whiten=True, n_components=nc,
                               batch_size=250).fit(X)
 
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index 722d7ec0e018a..b8c5573b95895 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -31,7 +31,7 @@ def histogram(x, y, **kwargs):
 
             # transform fit data
             kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
-                             fit_inverse_transform=inv)
+                             fit_inverse_transform=inv, random_state=42)
             X_fit_transformed = kpca.fit_transform(X_fit)
             X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
             assert_array_almost_equal(np.abs(X_fit_transformed),
@@ -79,7 +79,7 @@ def test_kernel_pca_sparse():
         for kernel in ("linear", "rbf", "poly"):
             # transform fit data
             kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
-                             fit_inverse_transform=False)
+                             fit_inverse_transform=False, random_state=42)
             X_fit_transformed = kpca.fit_transform(X_fit)
             X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
             assert_array_almost_equal(np.abs(X_fit_transformed),
@@ -105,8 +105,8 @@ def test_kernel_pca_linear_kernel():
     # fit only the first four components: fifth is near zero eigenvalue, so
     # can be trimmed due to roundoff error
     assert_array_almost_equal(
-        np.abs(KernelPCA(4).fit(X_fit).transform(X_pred)),
-        np.abs(PCA(4).fit(X_fit).transform(X_pred)))
+        np.abs(KernelPCA(4, random_state=42).fit(X_fit).transform(X_pred)),
+        np.abs(PCA(4, random_state=42).fit(X_fit).transform(X_pred)))
 
 
 def test_kernel_pca_n_components():
@@ -116,7 +116,8 @@ def test_kernel_pca_n_components():
 
     for eigen_solver in ("dense", "arpack"):
         for c in [1, 2, 4]:
-            kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver)
+            kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver,
+                             random_state=42)
             shape = kpca.fit(X_fit).transform(X_pred).shape
 
             assert_equal(shape, (2, c))
@@ -126,15 +127,15 @@ def test_remove_zero_eig():
     X = np.array([[1 - 1e-30, 1], [1, 1], [1, 1 - 1e-20]])
 
     # n_components=None (default) => remove_zero_eig is True
-    kpca = KernelPCA()
+    kpca = KernelPCA(random_state=42)
     Xt = kpca.fit_transform(X)
     assert_equal(Xt.shape, (3, 0))
 
-    kpca = KernelPCA(n_components=2)
+    kpca = KernelPCA(n_components=2, random_state=42)
     Xt = kpca.fit_transform(X)
     assert_equal(Xt.shape, (3, 2))
 
-    kpca = KernelPCA(n_components=2, remove_zero_eig=True)
+    kpca = KernelPCA(n_components=2, remove_zero_eig=True, random_state=42)
     Xt = kpca.fit_transform(X)
     assert_equal(Xt.shape, (3, 0))
 
@@ -145,17 +146,20 @@ def test_kernel_pca_precomputed():
     X_pred = rng.random_sample((2, 4))
 
     for eigen_solver in ("dense", "arpack"):
-        X_kpca = KernelPCA(4, eigen_solver=eigen_solver).\
+        X_kpca = KernelPCA(4, eigen_solver=eigen_solver, random_state=42).\
             fit(X_fit).transform(X_pred)
         X_kpca2 = KernelPCA(
-            4, eigen_solver=eigen_solver, kernel='precomputed').fit(
+            4, eigen_solver=eigen_solver, kernel='precomputed',
+            random_state=42).fit(
                 np.dot(X_fit, X_fit.T)).transform(np.dot(X_pred, X_fit.T))
 
         X_kpca_train = KernelPCA(
             4, eigen_solver=eigen_solver,
+            random_state=42,
             kernel='precomputed').fit_transform(np.dot(X_fit, X_fit.T))
         X_kpca_train2 = KernelPCA(
-            4, eigen_solver=eigen_solver, kernel='precomputed').fit(
+            4, eigen_solver=eigen_solver, kernel='precomputed',
+            random_state=42).fit(
                 np.dot(X_fit, X_fit.T)).transform(np.dot(X_fit, X_fit.T))
 
         assert_array_almost_equal(np.abs(X_kpca),
@@ -168,7 +172,7 @@ def test_kernel_pca_precomputed():
 def test_kernel_pca_invalid_kernel():
     rng = np.random.RandomState(0)
     X_fit = rng.random_sample((2, 4))
-    kpca = KernelPCA(kernel="tototiti")
+    kpca = KernelPCA(kernel="tototiti", random_state=42)
     assert_raises(ValueError, kpca.fit, X_fit)
 
 
@@ -177,8 +181,9 @@ def test_gridsearch_pipeline():
     # circles with a perceptron model.
     X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                         random_state=0)
-    kpca = KernelPCA(kernel="rbf", n_components=2)
-    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
+    kpca = KernelPCA(kernel="rbf", n_components=2, random_state=42)
+    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron",
+                                                Perceptron(random_state=42))])
     param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
     grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
     grid_search.fit(X, y)
@@ -190,8 +195,9 @@ def test_gridsearch_pipeline_precomputed():
     # circles with a perceptron model using a precomputed kernel.
     X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                         random_state=0)
-    kpca = KernelPCA(kernel="precomputed", n_components=2)
-    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
+    kpca = KernelPCA(kernel="precomputed", n_components=2, random_state=42)
+    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron",
+                                                Perceptron(random_state=42))])
     param_grid = dict(Perceptron__n_iter=np.arange(1, 5))
     grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
     X_kernel = rbf_kernel(X, gamma=2.)
@@ -205,7 +211,7 @@ def test_nested_circles():
                         random_state=0)
 
     # 2D nested circles are not linearly separable
-    train_score = Perceptron().fit(X, y).score(X, y)
+    train_score = Perceptron(random_state=42).fit(X, y).score(X, y)
     assert_less(train_score, 0.8)
 
     # Project the circles data into the first 2 components of a RBF Kernel
@@ -214,9 +220,9 @@ def test_nested_circles():
     # and the gamma value has to be updated, the Kernel PCA example will
     # have to be updated too.
     kpca = KernelPCA(kernel="rbf", n_components=2,
-                     fit_inverse_transform=True, gamma=2.)
+                     fit_inverse_transform=True, gamma=2., random_state=42)
     X_kpca = kpca.fit_transform(X)
 
     # The data is perfectly linearly separable in that space
-    train_score = Perceptron().fit(X_kpca, y).score(X_kpca, y)
+    train_score = Perceptron(random_state=42).fit(X_kpca, y).score(X_kpca, y)
     assert_equal(train_score, 1.0)
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index 6254c147d45a5..fab781a6572c7 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -34,22 +34,26 @@ def test_parameter_checking():
     A = np.ones((2, 2))
     name = 'spam'
     msg = "Invalid solver parameter: got 'spam' instead of one of"
-    assert_raise_message(ValueError, msg, NMF(solver=name).fit, A)
+    assert_raise_message(ValueError, msg, NMF(solver=name,
+                                              random_state=42).fit, A)
     msg = "Invalid init parameter: got 'spam' instead of one of"
-    assert_raise_message(ValueError, msg, NMF(init=name).fit, A)
+    assert_raise_message(ValueError, msg, NMF(init=name,
+                                              random_state=42).fit, A)
     msg = "Invalid beta_loss parameter: got 'spam' instead of one"
     assert_raise_message(ValueError, msg, NMF(solver='mu',
-                                              beta_loss=name).fit, A)
+                                              beta_loss=name,
+                                              random_state=42).fit, A)
     msg = "Invalid beta_loss parameter: solver 'cd' does not handle "
     msg += "beta_loss = 1.0"
     assert_raise_message(ValueError, msg, NMF(solver='cd',
-                                              beta_loss=1.0).fit, A)
+                                              beta_loss=1.0,
+                                              random_state=42).fit, A)
 
     msg = "Negative values in data passed to"
-    assert_raise_message(ValueError, msg, NMF().fit, -A)
+    assert_raise_message(ValueError, msg, NMF(random_state=42).fit, -A)
     assert_raise_message(ValueError, msg, nmf._initialize_nmf, -A,
                          2, 'nndsvd')
-    clf = NMF(2, tol=0.1).fit(A)
+    clf = NMF(2, tol=0.1, random_state=42).fit(A)
     assert_raise_message(ValueError, msg, clf.transform, -A)
 
 
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index c3a221fe4800a..65eaa1fec617b 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -151,11 +151,14 @@ def test_invalid_params():
     X = np.ones((5, 10))
 
     invalid_models = (
-        ('n_topics', LatentDirichletAllocation(n_topics=0)),
+        ('n_topics', LatentDirichletAllocation(n_topics=0, random_state=42)),
         ('learning_method',
-         LatentDirichletAllocation(learning_method='unknown')),
-        ('total_samples', LatentDirichletAllocation(total_samples=0)),
-        ('learning_offset', LatentDirichletAllocation(learning_offset=-1)),
+         LatentDirichletAllocation(learning_method='unknown',
+                                   random_state=42)),
+        ('total_samples', LatentDirichletAllocation(total_samples=0,
+                                                    random_state=42)),
+        ('learning_offset', LatentDirichletAllocation(learning_offset=-1,
+                                                      random_state=42)),
     )
     for param, model in invalid_models:
         regex = r"^Invalid %r parameter" % param
@@ -165,7 +168,7 @@ def test_invalid_params():
 def test_lda_negative_input():
     # test pass dense matrix with sparse negative input.
     X = -np.ones((5, 10))
-    lda = LatentDirichletAllocation()
+    lda = LatentDirichletAllocation(random_state=42)
     regex = r"^Negative values in data passed"
     assert_raises_regexp(ValueError, regex, lda.fit, X)
 
@@ -174,7 +177,7 @@ def test_lda_no_component_error():
     # test `transform` and `perplexity` before `fit`
     rng = np.random.RandomState(0)
     X = rng.randint(4, size=(20, 10))
-    lda = LatentDirichletAllocation()
+    lda = LatentDirichletAllocation(random_state=42)
     regex = r"^no 'components_' attribute"
     assert_raises_regexp(NotFittedError, regex, lda.transform, X)
     assert_raises_regexp(NotFittedError, regex, lda.perplexity, X)
@@ -350,7 +353,7 @@ def test_lda_empty_docs():
     """Test LDA on empty document (all-zero rows)."""
     Z = np.zeros((5, 4))
     for X in [Z, csr_matrix(Z)]:
-        lda = LatentDirichletAllocation(max_iter=750).fit(X)
+        lda = LatentDirichletAllocation(max_iter=750, random_state=42).fit(X)
         assert_almost_equal(lda.components_.sum(axis=0),
                             np.ones(lda.components_.shape[1]))
 
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index ea321089d719c..25977c48ec797 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -28,7 +28,7 @@ def test_pca():
     X = iris.data
 
     for n_comp in np.arange(X.shape[1]):
-        pca = PCA(n_components=n_comp, svd_solver='full')
+        pca = PCA(n_components=n_comp, svd_solver='full', random_state=42)
 
         X_r = pca.fit(X).transform(X)
         np.testing.assert_equal(X_r.shape[1], n_comp)
@@ -47,7 +47,7 @@ def test_pca():
                                   np.eye(X.shape[1]), 12)
 
     # test explained_variance_ratio_ == 1 with all components
-    pca = PCA(svd_solver='full')
+    pca = PCA(svd_solver='full', random_state=42)
     pca.fit(X)
     assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
 
@@ -138,7 +138,7 @@ def test_no_empty_slice_warning():
     n_components = 10
     n_features = n_components + 2  # anything > n_comps triggered it in 0.16
     X = np.random.uniform(-1, 1, size=(n_components, n_features))
-    pca = PCA(n_components=n_components)
+    pca = PCA(n_components=n_components, random_state=42)
     assert_no_warnings(pca.fit, X)
 
 
@@ -180,7 +180,8 @@ def test_whitening():
 
         X_ = X.copy()
         pca = PCA(n_components=n_components, whiten=False, copy=copy,
-                  svd_solver=solver).fit(X_)
+                  svd_solver=solver,
+                  random_state=42).fit(X_)
         X_unwhitened = pca.transform(X_)
         assert_equal(X_unwhitened.shape, (n_samples, n_components))
 
@@ -199,7 +200,7 @@ def test_explained_variance():
 
     X = rng.randn(n_samples, n_features)
 
-    pca = PCA(n_components=2, svd_solver='full').fit(X)
+    pca = PCA(n_components=2, svd_solver='full', random_state=42).fit(X)
     apca = PCA(n_components=2, svd_solver='arpack', random_state=0).fit(X)
     assert_array_almost_equal(pca.explained_variance_,
                               apca.explained_variance_, 1)
@@ -230,7 +231,7 @@ def test_explained_variance():
                                      n_informative=n_features-2,
                                      random_state=rng)[0]
 
-    pca = PCA(n_components=2).fit(X)
+    pca = PCA(n_components=2, random_state=42).fit(X)
     rpca = PCA(n_components=2, svd_solver='randomized',
                random_state=rng).fit(X)
     assert_array_almost_equal(pca.explained_variance_ratio_,
@@ -309,7 +310,8 @@ def test_pca_check_projection():
     Xt = 0.1 * rng.randn(1, p) + np.array([3, 4, 5])
 
     for solver in solver_list:
-        Yt = PCA(n_components=2, svd_solver=solver).fit(X).transform(Xt)
+        Yt = PCA(n_components=2, svd_solver=solver,
+                 random_state=42).fit(X).transform(Xt)
         Yt /= np.sqrt((Yt ** 2).sum())
 
         assert_almost_equal(np.abs(Yt[0][0]), 1., 1)
@@ -325,14 +327,15 @@ def test_pca_inverse():
 
     # same check that we can find the original data from the transformed
     # signal (since the data is almost of rank n_components)
-    pca = PCA(n_components=2, svd_solver='full').fit(X)
+    pca = PCA(n_components=2, svd_solver='full', random_state=42).fit(X)
     Y = pca.transform(X)
     Y_inverse = pca.inverse_transform(Y)
     assert_almost_equal(X, Y_inverse, decimal=3)
 
     # same as above with whitening (approximate reconstruction)
     for solver in solver_list:
-        pca = PCA(n_components=2, whiten=True, svd_solver=solver)
+        pca = PCA(n_components=2, whiten=True, svd_solver=solver,
+                  random_state=42)
         pca.fit(X)
         Y = pca.transform(X)
         Y_inverse = pca.inverse_transform(Y)
@@ -344,7 +347,8 @@ def test_pca_validation():
     for solver in solver_list:
         for n_components in [-1, 3]:
             assert_raises(ValueError,
-                          PCA(n_components, svd_solver=solver).fit, X)
+                          PCA(n_components, svd_solver=solver,
+                              random_state=42).fit, X)
 
 
 def test_randomized_pca_check_projection():
@@ -402,7 +406,7 @@ def test_pca_dim():
     n, p = 100, 5
     X = rng.randn(n, p) * .1
     X[:10] += np.array([3, 4, 5, 1, 2])
-    pca = PCA(n_components='mle', svd_solver='full').fit(X)
+    pca = PCA(n_components='mle', svd_solver='full', random_state=42).fit(X)
     assert_equal(pca.n_components, 'mle')
     assert_equal(pca.n_components_, 1)
 
@@ -414,7 +418,7 @@ def test_infer_dim_1():
     rng = np.random.RandomState(0)
     X = (rng.randn(n, p) * .1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) +
          np.array([1, 0, 7, 4, 6]))
-    pca = PCA(n_components=p, svd_solver='full')
+    pca = PCA(n_components=p, svd_solver='full', random_state=42)
     pca.fit(X)
     spect = pca.explained_variance_
     ll = []
@@ -432,7 +436,7 @@ def test_infer_dim_2():
     X = rng.randn(n, p) * .1
     X[:10] += np.array([3, 4, 5, 1, 2])
     X[10:20] += np.array([6, 0, 7, 2, -1])
-    pca = PCA(n_components=p, svd_solver='full')
+    pca = PCA(n_components=p, svd_solver='full', random_state=42)
     pca.fit(X)
     spect = pca.explained_variance_
     assert_greater(_infer_dimension_(spect, n, p), 1)
@@ -445,7 +449,7 @@ def test_infer_dim_3():
     X[:10] += np.array([3, 4, 5, 1, 2])
     X[10:20] += np.array([6, 0, 7, 2, -1])
     X[30:40] += 2 * np.array([-1, 1, -1, 1, -1])
-    pca = PCA(n_components=p, svd_solver='full')
+    pca = PCA(n_components=p, svd_solver='full', random_state=42)
     pca.fit(X)
     spect = pca.explained_variance_
     assert_greater(_infer_dimension_(spect, n, p), 2)
@@ -453,12 +457,12 @@ def test_infer_dim_3():
 
 def test_infer_dim_by_explained_variance():
     X = iris.data
-    pca = PCA(n_components=0.95, svd_solver='full')
+    pca = PCA(n_components=0.95, svd_solver='full', random_state=42)
     pca.fit(X)
     assert_equal(pca.n_components, 0.95)
     assert_equal(pca.n_components_, 2)
 
-    pca = PCA(n_components=0.01, svd_solver='full')
+    pca = PCA(n_components=0.01, svd_solver='full', random_state=42)
     pca.fit(X)
     assert_equal(pca.n_components, 0.01)
     assert_equal(pca.n_components_, 1)
@@ -466,7 +470,7 @@ def test_infer_dim_by_explained_variance():
     rng = np.random.RandomState(0)
     # more features than samples
     X = rng.rand(5, 20)
-    pca = PCA(n_components=.5, svd_solver='full').fit(X)
+    pca = PCA(n_components=.5, svd_solver='full', random_state=42).fit(X)
     assert_equal(pca.n_components, 0.5)
     assert_equal(pca.n_components_, 2)
 
@@ -477,7 +481,7 @@ def test_pca_score():
     rng = np.random.RandomState(0)
     X = rng.randn(n, p) * .1 + np.array([3, 4, 5])
     for solver in solver_list:
-        pca = PCA(n_components=2, svd_solver=solver)
+        pca = PCA(n_components=2, svd_solver=solver, random_state=42)
         pca.fit(X)
         ll1 = pca.score(X)
         h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1 ** 2) * p
@@ -490,14 +494,15 @@ def test_pca_score2():
     rng = np.random.RandomState(0)
     X = rng.randn(n, p) * .1 + np.array([3, 4, 5])
     for solver in solver_list:
-        pca = PCA(n_components=2, svd_solver=solver)
+        pca = PCA(n_components=2, svd_solver=solver, random_state=42)
         pca.fit(X)
         ll1 = pca.score(X)
         ll2 = pca.score(rng.randn(n, p) * .2 + np.array([3, 4, 5]))
         assert_greater(ll1, ll2)
 
         # Test that it gives different scores if whiten=True
-        pca = PCA(n_components=2, whiten=True, svd_solver=solver)
+        pca = PCA(n_components=2, whiten=True, svd_solver=solver,
+                  random_state=42)
         pca.fit(X)
         ll2 = pca.score(X)
         assert_true(ll1 > ll2)
@@ -513,7 +518,7 @@ def test_pca_score3():
           np.array([1, 0, 7]))
     ll = np.zeros(p)
     for k in range(p):
-        pca = PCA(n_components=k, svd_solver='full')
+        pca = PCA(n_components=k, svd_solver='full', random_state=42)
         pca.fit(Xl)
         ll[k] = pca.score(Xt)
 
@@ -525,9 +530,9 @@ def test_svd_solver_auto():
     X = rng.uniform(size=(1000, 50))
 
     # case: n_components in (0,1) => 'full'
-    pca = PCA(n_components=.5)
+    pca = PCA(n_components=.5, random_state=42)
     pca.fit(X)
-    pca_test = PCA(n_components=.5, svd_solver='full')
+    pca_test = PCA(n_components=.5, svd_solver='full', random_state=42)
     pca_test.fit(X)
     assert_array_almost_equal(pca.components_, pca_test.components_)
 
@@ -540,9 +545,9 @@ def test_svd_solver_auto():
     assert_array_almost_equal(pca.components_, pca_test.components_)
 
     # case: n_components >= .8 * min(X.shape) => 'full'
-    pca = PCA(n_components=50)
+    pca = PCA(n_components=50, random_state=42)
     pca.fit(X)
-    pca_test = PCA(n_components=50, svd_solver='full')
+    pca_test = PCA(n_components=50, svd_solver='full', random_state=42)
     pca_test.fit(X)
     assert_array_almost_equal(pca.components_, pca_test.components_)
 
@@ -581,12 +586,12 @@ def test_pca_spase_input():
     assert(sp.sparse.issparse(X))
 
     for svd_solver in solver_list:
-        pca = PCA(n_components=3, svd_solver=svd_solver)
+        pca = PCA(n_components=3, svd_solver=svd_solver, random_state=42)
 
         assert_raises(TypeError, pca.fit, X)
 
 
 def test_pca_bad_solver():
     X = np.random.RandomState(0).rand(5, 4)
-    pca = PCA(n_components=3, svd_solver='bad_argument')
+    pca = PCA(n_components=3, svd_solver='bad_argument', random_state=42)
     assert_raises(ValueError, pca.fit, X)
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 6172ac568b0c4..8f677ea0156cd 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -102,7 +102,7 @@ def test_transform_nan():
     rng = np.random.RandomState(0)
     Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
     Y[:, 0] = 0
-    estimator = SparsePCA(n_components=8)
+    estimator = SparsePCA(n_components=8, random_state=42)
     assert_false(np.any(np.isnan(estimator.fit_transform(Y))))
 
 
diff --git a/sklearn/decomposition/tests/test_truncated_svd.py b/sklearn/decomposition/tests/test_truncated_svd.py
index 6d853642e1ce0..a51f3b942bbfc 100644
--- a/sklearn/decomposition/tests/test_truncated_svd.py
+++ b/sklearn/decomposition/tests/test_truncated_svd.py
@@ -22,7 +22,7 @@
 
 
 def test_algorithms():
-    svd_a = TruncatedSVD(30, algorithm="arpack")
+    svd_a = TruncatedSVD(30, algorithm="arpack", random_state=42)
     svd_r = TruncatedSVD(30, algorithm="randomized", random_state=42)
 
     Xa = svd_a.fit_transform(X)[:, :6]
@@ -38,7 +38,7 @@ def test_algorithms():
 
 def test_attributes():
     for n_components in (10, 25, 41):
-        tsvd = TruncatedSVD(n_components).fit(X)
+        tsvd = TruncatedSVD(n_components, random_state=42).fit(X)
         assert_equal(tsvd.n_components, n_components)
         assert_equal(tsvd.components_.shape, (n_components, n_features))
 
@@ -46,14 +46,15 @@ def test_attributes():
 def test_too_many_components():
     for algorithm in ["arpack", "randomized"]:
         for n_components in (n_features, n_features + 1):
-            tsvd = TruncatedSVD(n_components=n_components, algorithm=algorithm)
+            tsvd = TruncatedSVD(n_components=n_components, algorithm=algorithm,
+                                random_state=42)
             assert_raises(ValueError, tsvd.fit, X)
 
 
 def test_sparse_formats():
     for fmt in ("array", "csr", "csc", "coo", "lil"):
         Xfmt = Xdense if fmt == "dense" else getattr(X, "to" + fmt)()
-        tsvd = TruncatedSVD(n_components=11)
+        tsvd = TruncatedSVD(n_components=11, random_state=42)
         Xtrans = tsvd.fit_transform(Xfmt)
         assert_equal(Xtrans.shape, (n_samples, 11))
         Xtrans = tsvd.transform(Xfmt)
@@ -72,16 +73,16 @@ def test_inverse_transform():
 
 def test_integers():
     Xint = X.astype(np.int64)
-    tsvd = TruncatedSVD(n_components=6)
+    tsvd = TruncatedSVD(n_components=6, random_state=42)
     Xtrans = tsvd.fit_transform(Xint)
     assert_equal(Xtrans.shape, (n_samples, tsvd.n_components))
 
 
 def test_explained_variance():
     # Test sparse data
-    svd_a_10_sp = TruncatedSVD(10, algorithm="arpack")
+    svd_a_10_sp = TruncatedSVD(10, algorithm="arpack", random_state=42)
     svd_r_10_sp = TruncatedSVD(10, algorithm="randomized", random_state=42)
-    svd_a_20_sp = TruncatedSVD(20, algorithm="arpack")
+    svd_a_20_sp = TruncatedSVD(20, algorithm="arpack", random_state=42)
     svd_r_20_sp = TruncatedSVD(20, algorithm="randomized", random_state=42)
     X_trans_a_10_sp = svd_a_10_sp.fit_transform(X)
     X_trans_r_10_sp = svd_r_10_sp.fit_transform(X)
@@ -89,9 +90,9 @@ def test_explained_variance():
     X_trans_r_20_sp = svd_r_20_sp.fit_transform(X)
 
     # Test dense data
-    svd_a_10_de = TruncatedSVD(10, algorithm="arpack")
+    svd_a_10_de = TruncatedSVD(10, algorithm="arpack", random_state=42)
     svd_r_10_de = TruncatedSVD(10, algorithm="randomized", random_state=42)
-    svd_a_20_de = TruncatedSVD(20, algorithm="arpack")
+    svd_a_20_de = TruncatedSVD(20, algorithm="arpack", random_state=42)
     svd_r_20_de = TruncatedSVD(20, algorithm="randomized", random_state=42)
     X_trans_a_10_de = svd_a_10_de.fit_transform(X.toarray())
     X_trans_r_10_de = svd_r_10_de.fit_transform(X.toarray())
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 0170a3fa2262f..210151214be9e 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -64,11 +64,11 @@ def test_classification():
                           "bootstrap_features": [True, False]})
 
     for base_estimator in [None,
-                           DummyClassifier(),
-                           Perceptron(),
-                           DecisionTreeClassifier(),
+                           DummyClassifier(random_state=42),
+                           Perceptron(random_state=42),
+                           DecisionTreeClassifier(random_state=42),
                            KNeighborsClassifier(),
-                           SVC()]:
+                           SVC(random_state=42)]:
         for params in grid:
             BaggingClassifier(base_estimator=base_estimator,
                               random_state=rng,
@@ -148,7 +148,7 @@ def test_regression():
 
     for base_estimator in [None,
                            DummyRegressor(),
-                           DecisionTreeRegressor(),
+                           DecisionTreeRegressor(random_state=42),
                            KNeighborsRegressor(),
                            SVR()]:
         for params in grid:
@@ -224,10 +224,12 @@ def test_bootstrap_samples():
                                                         boston.target,
                                                         random_state=rng)
 
-    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)
+    base_estimator = DecisionTreeRegressor(random_state=42).fit(X_train,
+                                                                y_train)
 
     # without bootstrap, all trees are perfect on the training set
-    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(
+                                        random_state=42),
                                 max_samples=1.0,
                                 bootstrap=False,
                                 random_state=rng).fit(X_train, y_train)
@@ -236,7 +238,8 @@ def test_bootstrap_samples():
                  ensemble.score(X_train, y_train))
 
     # with bootstrap, trees are no longer perfect on the training set
-    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(
+                                      random_state=42),
                                 max_samples=1.0,
                                 bootstrap=True,
                                 random_state=rng).fit(X_train, y_train)
@@ -252,7 +255,8 @@ def test_bootstrap_features():
                                                         boston.target,
                                                         random_state=rng)
 
-    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(
+                                      random_state=42),
                                 max_features=1.0,
                                 bootstrap_features=False,
                                 random_state=rng).fit(X_train, y_train)
@@ -260,7 +264,8 @@ def test_bootstrap_features():
     for features in ensemble.estimators_features_:
         assert_equal(boston.data.shape[1], np.unique(features).shape[0])
 
-    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(
+                                      random_state=42),
                                 max_features=1.0,
                                 bootstrap_features=True,
                                 random_state=rng).fit(X_train, y_train)
@@ -278,7 +283,8 @@ def test_probability():
 
     with np.errstate(divide="ignore", invalid="ignore"):
         # Normal case
-        ensemble = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
+        ensemble = BaggingClassifier(base_estimator=DecisionTreeClassifier(
+                                          random_state=42),
                                      random_state=rng).fit(X_train, y_train)
 
         assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
@@ -289,9 +295,10 @@ def test_probability():
                                   np.exp(ensemble.predict_log_proba(X_test)))
 
         # Degenerate case, where some classes are missing
-        ensemble = BaggingClassifier(base_estimator=LogisticRegression(),
-                                     random_state=rng,
-                                     max_samples=5).fit(X_train, y_train)
+        ensemble = BaggingClassifier(
+                    base_estimator=LogisticRegression(random_state=42),
+                    random_state=rng,
+                    max_samples=5).fit(X_train, y_train)
 
         assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
                                          axis=1),
@@ -309,7 +316,8 @@ def test_oob_score_classification():
                                                         iris.target,
                                                         random_state=rng)
 
-    for base_estimator in [DecisionTreeClassifier(), SVC()]:
+    for base_estimator in [DecisionTreeClassifier(random_state=42),
+                           SVC(random_state=42)]:
         clf = BaggingClassifier(base_estimator=base_estimator,
                                 n_estimators=100,
                                 bootstrap=True,
@@ -339,7 +347,8 @@ def test_oob_score_regression():
                                                         boston.target,
                                                         random_state=rng)
 
-    clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
+    clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(
+                                random_state=42),
                            n_estimators=50,
                            bootstrap=True,
                            oob_score=True,
@@ -351,7 +360,8 @@ def test_oob_score_regression():
 
     # Test with few estimators
     assert_warns(UserWarning,
-                 BaggingRegressor(base_estimator=DecisionTreeRegressor(),
+                 BaggingRegressor(base_estimator=DecisionTreeRegressor(
+                                      random_state=42),
                                   n_estimators=1,
                                   bootstrap=True,
                                   oob_score=True,
@@ -381,34 +391,45 @@ def test_single_estimator():
 def test_error():
     # Test that it gives proper exception on deficient input.
     X, y = iris.data, iris.target
-    base = DecisionTreeClassifier()
+    base = DecisionTreeClassifier(random_state=42)
 
     # Test max_samples
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_samples=-1).fit, X, y)
+                  BaggingClassifier(base, max_samples=-1,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_samples=0.0).fit, X, y)
+                  BaggingClassifier(base, max_samples=0.0,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_samples=2.0).fit, X, y)
+                  BaggingClassifier(base, max_samples=2.0,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_samples=1000).fit, X, y)
+                  BaggingClassifier(base, max_samples=1000,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_samples="foobar").fit, X, y)
+                  BaggingClassifier(base, max_samples="foobar",
+                                    random_state=42).fit, X, y)
 
     # Test max_features
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_features=-1).fit, X, y)
+                  BaggingClassifier(base, max_features=-1,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_features=0.0).fit, X, y)
+                  BaggingClassifier(base, max_features=0.0,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_features=2.0).fit, X, y)
+                  BaggingClassifier(base, max_features=2.0,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_features=5).fit, X, y)
+                  BaggingClassifier(base, max_features=5,
+                                    random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  BaggingClassifier(base, max_features="foobar").fit, X, y)
+                  BaggingClassifier(base, max_features="foobar",
+                                    random_state=42).fit, X, y)
 
     # Test support of decision_function
-    assert_false(hasattr(BaggingClassifier(base).fit(X, y), 'decision_function'))
+    assert_false(hasattr(BaggingClassifier(base, random_state=42).fit(X, y),
+                         'decision_function'))
 
 
 def test_parallel_classification():
@@ -420,7 +441,7 @@ def test_parallel_classification():
                                                         iris.target,
                                                         random_state=rng)
 
-    ensemble = BaggingClassifier(DecisionTreeClassifier(),
+    ensemble = BaggingClassifier(DecisionTreeClassifier(random_state=42),
                                  n_jobs=3,
                                  random_state=0).fit(X_train, y_train)
 
@@ -431,7 +452,7 @@ def test_parallel_classification():
     y2 = ensemble.predict_proba(X_test)
     assert_array_almost_equal(y1, y2)
 
-    ensemble = BaggingClassifier(DecisionTreeClassifier(),
+    ensemble = BaggingClassifier(DecisionTreeClassifier(random_state=42),
                                  n_jobs=1,
                                  random_state=0).fit(X_train, y_train)
 
@@ -439,7 +460,8 @@ def test_parallel_classification():
     assert_array_almost_equal(y1, y3)
 
     # decision_function
-    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
+    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr',
+                                     random_state=42),
                                  n_jobs=3,
                                  random_state=0).fit(X_train, y_train)
 
@@ -449,7 +471,8 @@ def test_parallel_classification():
     decisions2 = ensemble.decision_function(X_test)
     assert_array_almost_equal(decisions1, decisions2)
 
-    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
+    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr',
+                                     random_state=42),
                                  n_jobs=1,
                                  random_state=0).fit(X_train, y_train)
 
@@ -465,7 +488,7 @@ def test_parallel_regression():
                                                         boston.target,
                                                         random_state=rng)
 
-    ensemble = BaggingRegressor(DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(DecisionTreeRegressor(random_state=42),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)
 
@@ -475,7 +498,7 @@ def test_parallel_regression():
     y2 = ensemble.predict(X_test)
     assert_array_almost_equal(y1, y2)
 
-    ensemble = BaggingRegressor(DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(DecisionTreeRegressor(random_state=42),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)
 
@@ -493,7 +516,7 @@ def test_gridsearch():
     parameters = {'n_estimators': (1, 2),
                   'base_estimator__C': (1, 2)}
 
-    GridSearchCV(BaggingClassifier(SVC()),
+    GridSearchCV(BaggingClassifier(SVC(random_state=42), random_state=42),
                  parameters,
                  scoring="roc_auc").fit(X, y)
 
@@ -513,13 +536,13 @@ def test_base_estimator():
 
     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
 
-    ensemble = BaggingClassifier(DecisionTreeClassifier(),
+    ensemble = BaggingClassifier(DecisionTreeClassifier(random_state=42),
                                  n_jobs=3,
                                  random_state=0).fit(X_train, y_train)
 
     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
 
-    ensemble = BaggingClassifier(Perceptron(),
+    ensemble = BaggingClassifier(Perceptron(random_state=42),
                                  n_jobs=3,
                                  random_state=0).fit(X_train, y_train)
 
@@ -536,7 +559,7 @@ def test_base_estimator():
 
     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
 
-    ensemble = BaggingRegressor(DecisionTreeRegressor(),
+    ensemble = BaggingRegressor(DecisionTreeRegressor(random_state=42),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)
 
@@ -550,7 +573,9 @@ def test_base_estimator():
 
 def test_bagging_with_pipeline():
     estimator = BaggingClassifier(make_pipeline(SelectKBest(k=1),
-                                                DecisionTreeClassifier()),
+                                                DecisionTreeClassifier(
+                                                  random_state=42)),
+                                  random_state=42,
                                   max_features=2)
     estimator.fit(iris.data, iris.target)
     assert_true(isinstance(estimator[0].steps[-1][1].random_state,
@@ -568,7 +593,7 @@ def predict(self, X):
 
 
 def test_bagging_sample_weight_unsupported_but_passed():
-    estimator = BaggingClassifier(DummyZeroEstimator())
+    estimator = BaggingClassifier(DummyZeroEstimator(), random_state=42)
     rng = check_random_state(0)
 
     estimator.fit(iris.data, iris.target).predict(iris.data)
@@ -603,7 +628,7 @@ def test_warm_start(random_state=42):
 def test_warm_start_smaller_n_estimators():
     # Test if warm start'ed second fit with smaller n_estimators raises error.
     X, y = make_hastie_10_2(n_samples=20, random_state=1)
-    clf = BaggingClassifier(n_estimators=5, warm_start=True)
+    clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=42)
     clf.fit(X, y)
     clf.set_params(n_estimators=4)
     assert_raises(ValueError, clf.fit, X, y)
@@ -651,14 +676,15 @@ def test_warm_start_equivalence():
 def test_warm_start_with_oob_score_fails():
     # Check using oob_score and warm_start simultaneously fails
     X, y = make_hastie_10_2(n_samples=20, random_state=1)
-    clf = BaggingClassifier(n_estimators=5, warm_start=True, oob_score=True)
+    clf = BaggingClassifier(n_estimators=5, warm_start=True, oob_score=True,
+                            random_state=42)
     assert_raises(ValueError, clf.fit, X, y)
 
 
 def test_oob_score_removed_on_warm_start():
     X, y = make_hastie_10_2(n_samples=2000, random_state=1)
 
-    clf = BaggingClassifier(n_estimators=50, oob_score=True)
+    clf = BaggingClassifier(n_estimators=50, oob_score=True, random_state=42)
     clf.fit(X, y)
 
     clf.set_params(warm_start=True, oob_score=False, n_estimators=100)
@@ -682,7 +708,8 @@ def test_estimators_samples():
     # generated at fit time can be identically reproduced at a later time
     # using data saved in object attributes.
     X, y = make_hastie_10_2(n_samples=200, random_state=1)
-    bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
+    bagging = BaggingClassifier(LogisticRegression(random_state=42),
+                                max_samples=0.5,
                                 max_features=0.5, random_state=1,
                                 bootstrap=False)
     bagging.fit(X, y)
diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py
index 6b81dbf67466d..82043de4f2b01 100644
--- a/sklearn/ensemble/tests/test_base.py
+++ b/sklearn/ensemble/tests/test_base.py
@@ -25,7 +25,8 @@
 def test_base():
     # Check BaseEnsemble methods.
     ensemble = BaggingClassifier(base_estimator=Perceptron(random_state=None),
-                                 n_estimators=3)
+                                 n_estimators=3,
+                                 random_state=42)
 
     iris = load_iris()
     ensemble.fit(iris.data, iris.target)
@@ -46,16 +47,19 @@ def test_base():
     assert_true(isinstance(ensemble[2].random_state, int))
     assert_not_equal(ensemble[1].random_state, ensemble[2].random_state)
 
-    np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(),
-                                        n_estimators=np.int32(3))
+    np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(
+                                            random_state=42),
+                                        n_estimators=np.int32(3),
+                                        random_state=42)
     np_int_ensemble.fit(iris.data, iris.target)
 
 
 def test_base_zero_n_estimators():
     # Check that instantiating a BaseEnsemble with n_estimators<=0 raises
     # a ValueError.
-    ensemble = BaggingClassifier(base_estimator=Perceptron(),
-                                 n_estimators=0)
+    ensemble = BaggingClassifier(base_estimator=Perceptron(random_state=42),
+                                 n_estimators=0,
+                                 random_state=42)
     iris = load_iris()
     assert_raise_message(ValueError,
                          "n_estimators must be greater than zero, got 0.",
@@ -65,14 +69,18 @@ def test_base_zero_n_estimators():
 def test_base_not_int_n_estimators():
     # Check that instantiating a BaseEnsemble with a string as n_estimators
     # raises a ValueError demanding n_estimators to be supplied as an integer.
-    string_ensemble = BaggingClassifier(base_estimator=Perceptron(),
-                                        n_estimators='3')
+    string_ensemble = BaggingClassifier(base_estimator=Perceptron(
+                                            random_state=42),
+                                        n_estimators='3',
+                                        random_state=42)
     iris = load_iris()
     assert_raise_message(ValueError,
                          "n_estimators must be an integer",
                          string_ensemble.fit, iris.data, iris.target)
-    float_ensemble = BaggingClassifier(base_estimator=Perceptron(),
-                                       n_estimators=3.0)
+    float_ensemble = BaggingClassifier(base_estimator=Perceptron(
+                                            random_state=42),
+                                       n_estimators=3.0,
+                                       random_state=42)
     assert_raise_message(ValueError,
                          "n_estimators must be an integer",
                          float_ensemble.fit, iris.data, iris.target)
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index d28780ee54e52..4cd22b4f7b70b 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -535,7 +535,8 @@ def test_random_trees_dense_type():
     # works by returning a dense array.
 
     # Create the RTE with sparse=False
-    hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
+    hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
+                                  random_state=42)
     X, y = datasets.make_circles(factor=0.5)
     X_transformed = hasher.fit_transform(X)
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 6fd55f691c26c..8d0cba1be1647 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -80,76 +80,107 @@ def test_parameter_checks():
     # Check input parameter validation.
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(n_estimators=0).fit, X, y)
+                  GradientBoostingClassifier(n_estimators=0, random_state=42)
+                  .fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(n_estimators=-1).fit, X, y)
+                  GradientBoostingClassifier(n_estimators=-1, random_state=42)
+                  .fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(learning_rate=0.0).fit, X, y)
+                  GradientBoostingClassifier(learning_rate=0.0,
+                                             random_state=42)
+                  .fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(learning_rate=-1.0).fit, X, y)
+                  GradientBoostingClassifier(learning_rate=-1.0,
+                                             random_state=42)
+                  .fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(loss='foobar').fit, X, y)
+                  GradientBoostingClassifier(loss='foobar', random_state=42)
+                  .fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_samples_split=0.0).fit, X, y)
+                  GradientBoostingClassifier(min_samples_split=0.0,
+                                             random_state=42)
+                  .fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_samples_split=-1.0).fit, X, y)
+                  GradientBoostingClassifier(min_samples_split=-1.0,
+                                             random_state=42)
+                  .fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_samples_split=1.1).fit, X, y)
+                  GradientBoostingClassifier(min_samples_split=1.1,
+                                             random_state=42)
+                  .fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_samples_leaf=0).fit, X, y)
+                  GradientBoostingClassifier(min_samples_leaf=0,
+                                             random_state=42)
+                  .fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_samples_leaf=-1.0).fit, X, y)
+                  GradientBoostingClassifier(min_samples_leaf=-1.0,
+                                             random_state=42).fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_weight_fraction_leaf=-1.).fit,
+                  GradientBoostingClassifier(min_weight_fraction_leaf=-1.,
+                                             random_state=42).fit,
                   X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(min_weight_fraction_leaf=0.6).fit,
+                  GradientBoostingClassifier(min_weight_fraction_leaf=0.6,
+                                             random_state=42).fit,
                   X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(subsample=0.0).fit, X, y)
+                  GradientBoostingClassifier(subsample=0.0,
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(subsample=1.1).fit, X, y)
+                  GradientBoostingClassifier(subsample=1.1,
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(subsample=-0.1).fit, X, y)
+                  GradientBoostingClassifier(subsample=-0.1,
+                                             random_state=42).fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(max_depth=-0.1).fit, X, y)
+                  GradientBoostingClassifier(max_depth=-0.1,
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(max_depth=0).fit, X, y)
+                  GradientBoostingClassifier(max_depth=0,
+                                             random_state=42).fit, X, y)
 
     assert_raises(ValueError,
-                  GradientBoostingClassifier(init={}).fit, X, y)
+                  GradientBoostingClassifier(init={},
+                                             random_state=42).fit, X, y)
 
     # test fit before feature importance
     assert_raises(ValueError,
-                  lambda: GradientBoostingClassifier().feature_importances_)
+                  lambda: GradientBoostingClassifier(random_state=42)
+                  .feature_importances_)
 
     # deviance requires ``n_classes >= 2``.
     assert_raises(ValueError,
                   lambda X, y: GradientBoostingClassifier(
-                      loss='deviance').fit(X, y),
+                      loss='deviance', random_state=42).fit(X, y),
                   X, [0, 0, 0, 0])
 
 
 def test_loss_function():
     assert_raises(ValueError,
-                  GradientBoostingClassifier(loss='ls').fit, X, y)
+                  GradientBoostingClassifier(loss='ls',
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(loss='lad').fit, X, y)
+                  GradientBoostingClassifier(loss='lad',
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(loss='quantile').fit, X, y)
+                  GradientBoostingClassifier(loss='quantile',
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingClassifier(loss='huber').fit, X, y)
+                  GradientBoostingClassifier(loss='huber',
+                                             random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingRegressor(loss='deviance').fit, X, y)
+                  GradientBoostingRegressor(loss='deviance',
+                                            random_state=42).fit, X, y)
     assert_raises(ValueError,
-                  GradientBoostingRegressor(loss='exponential').fit, X, y)
+                  GradientBoostingRegressor(loss='exponential',
+                                            random_state=42).fit, X, y)
 
 
 def check_classification_synthetic(presort, loss):
@@ -258,7 +289,7 @@ def test_regression_synthetic():
     X_test, y_test = X[200:], y[200:]
 
     for presort in True, False:
-        clf = GradientBoostingRegressor(presort=presort)
+        clf = GradientBoostingRegressor(presort=presort, random_state=42)
         clf.fit(X_train, y_train)
         mse = mean_squared_error(y_test, clf.predict(X_test))
         assert_less(mse, 5.0)
@@ -394,28 +425,34 @@ def test_max_feature_auto():
     X_train = X[:2000]
     y_train = y[:2000]
 
-    gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
+    gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto',
+                                      random_state=42)
     gbrt.fit(X_train, y_train)
     assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))
 
-    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
+    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto',
+                                     random_state=42)
     gbrt.fit(X_train, y_train)
     assert_equal(gbrt.max_features_, n_features)
 
-    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
+    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3,
+                                     random_state=42)
     gbrt.fit(X_train, y_train)
     assert_equal(gbrt.max_features_, int(n_features * 0.3))
 
-    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
+    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt',
+                                     random_state=42)
     gbrt.fit(X_train, y_train)
     assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))
 
-    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
+    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2',
+                                     random_state=42)
     gbrt.fit(X_train, y_train)
     assert_equal(gbrt.max_features_, int(np.log2(n_features)))
 
     gbrt = GradientBoostingRegressor(n_estimators=1,
-                                     max_features=0.01 / X.shape[1])
+                                     max_features=0.01 / X.shape[1],
+                                     random_state=42)
     gbrt.fit(X_train, y_train)
     assert_equal(gbrt.max_features_, 1)
 
@@ -427,7 +464,7 @@ def test_staged_predict():
                                    random_state=1, noise=1.0)
     X_train, y_train = X[:200], y[:200]
     X_test = X[200:]
-    clf = GradientBoostingRegressor()
+    clf = GradientBoostingRegressor(random_state=42)
     # test raise ValueError if not fitted
     assert_raises(ValueError, lambda X: np.fromiter(
         clf.staged_predict(X), dtype=np.float64), X_test)
@@ -449,7 +486,7 @@ def test_staged_predict_proba():
                                      random_state=1)
     X_train, y_train = X[:200], y[:200]
     X_test, y_test = X[200:], y[200:]
-    clf = GradientBoostingClassifier(n_estimators=20)
+    clf = GradientBoostingClassifier(n_estimators=20, random_state=42)
     # test raise NotFittedError if not fitted
     assert_raises(NotFittedError, lambda X: np.fromiter(
         clf.staged_predict_proba(X), dtype=np.float64), X_test)
@@ -475,8 +512,8 @@ def test_staged_functions_defensive():
     rng = np.random.RandomState(0)
     X = rng.uniform(size=(10, 3))
     y = (4 * X[:, 0]).astype(np.int) + 1  # don't predict zeros
-    for estimator in [GradientBoostingRegressor(),
-                      GradientBoostingClassifier()]:
+    for estimator in [GradientBoostingRegressor(random_state=42),
+                      GradientBoostingClassifier(random_state=42)]:
         estimator.fit(X, y)
         for func in ['predict', 'decision_function', 'predict_proba']:
             staged_func = getattr(estimator, "staged_" + func, None)
@@ -696,10 +733,11 @@ def test_warm_start():
     # Test if warm start equals fit.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=200, max_depth=1)
+        est = Cls(n_estimators=200, max_depth=1, random_state=42)
         est.fit(X, y)
 
-        est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True)
+        est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True,
+                     random_state=42)
         est_ws.fit(X, y)
         est_ws.set_params(n_estimators=200)
         est_ws.fit(X, y)
@@ -711,10 +749,11 @@ def test_warm_start_n_estimators():
     # Test if warm start equals fit - set n_estimators.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=300, max_depth=1)
+        est = Cls(n_estimators=300, max_depth=1, random_state=42)
         est.fit(X, y)
 
-        est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True)
+        est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True,
+                     random_state=42)
         est_ws.fit(X, y)
         est_ws.set_params(n_estimators=300)
         est_ws.fit(X, y)
@@ -726,7 +765,8 @@ def test_warm_start_max_depth():
     # Test if possible to fit trees of different depth in ensemble.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=100, max_depth=1, warm_start=True)
+        est = Cls(n_estimators=100, max_depth=1, warm_start=True,
+                  random_state=42)
         est.fit(X, y)
         est.set_params(n_estimators=110, max_depth=2)
         est.fit(X, y)
@@ -741,10 +781,11 @@ def test_warm_start_clear():
     # Test if fit clears state.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=100, max_depth=1)
+        est = Cls(n_estimators=100, max_depth=1, random_state=42)
         est.fit(X, y)
 
-        est_2 = Cls(n_estimators=100, max_depth=1, warm_start=True)
+        est_2 = Cls(n_estimators=100, max_depth=1, warm_start=True,
+                    random_state=42)
         est_2.fit(X, y)  # inits state
         est_2.set_params(warm_start=False)
         est_2.fit(X, y)  # clears old state and equals est
@@ -756,7 +797,8 @@ def test_warm_start_zero_n_estimators():
     # Test if warm start with zero n_estimators raises error
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=100, max_depth=1, warm_start=True)
+        est = Cls(n_estimators=100, max_depth=1, warm_start=True,
+                  random_state=42)
         est.fit(X, y)
         est.set_params(n_estimators=0)
         assert_raises(ValueError, est.fit, X, y)
@@ -766,7 +808,8 @@ def test_warm_start_smaller_n_estimators():
     # Test if warm start with smaller n_estimators raises error
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=100, max_depth=1, warm_start=True)
+        est = Cls(n_estimators=100, max_depth=1, warm_start=True,
+                  random_state=42)
         est.fit(X, y)
         est.set_params(n_estimators=99)
         assert_raises(ValueError, est.fit, X, y)
@@ -776,7 +819,7 @@ def test_warm_start_equal_n_estimators():
     # Test if warm start with equal n_estimators does nothing
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
-        est = Cls(n_estimators=100, max_depth=1)
+        est = Cls(n_estimators=100, max_depth=1, random_state=42)
         est.fit(X, y)
 
         est2 = clone(est)
@@ -977,7 +1020,8 @@ def test_min_impurity_split():
 def test_warm_start_wo_nestimators_change():
     # Test if warm_start does nothing if n_estimators is not changed.
     # Regression test for #3513.
-    clf = GradientBoostingClassifier(n_estimators=10, warm_start=True)
+    clf = GradientBoostingClassifier(n_estimators=10, warm_start=True,
+                                     random_state=42)
     clf.fit([[0, 1], [2, 3]], [0, 1])
     assert_equal(clf.estimators_.shape[0], 10)
     clf.fit([[0, 1], [2, 3]], [0, 1])
@@ -1017,7 +1061,7 @@ def test_non_uniform_weights_toy_edge_case_reg():
     sample_weight = [0, 0, 1, 1]
     for loss in ('huber', 'ls', 'lad', 'quantile'):
         gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2,
-                                       loss=loss)
+                                       loss=loss, random_state=42)
         gb.fit(X, y, sample_weight=sample_weight)
         assert_greater(gb.predict([[1, 0]])[0], 0.5)
 
@@ -1031,7 +1075,8 @@ def test_non_uniform_weights_toy_edge_case_clf():
     # ignore the first 2 training samples by setting their weight to 0
     sample_weight = [0, 0, 1, 1]
     for loss in ('deviance', 'exponential'):
-        gb = GradientBoostingClassifier(n_estimators=5, loss=loss)
+        gb = GradientBoostingClassifier(n_estimators=5, loss=loss,
+                                        random_state=42)
         gb.fit(X, y, sample_weight=sample_weight)
         assert_array_equal(gb.predict([[1, 0]]), [1])
 
diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
index 767444f923f77..0929c1ce78ed0 100644
--- a/sklearn/ensemble/tests/test_iforest.py
+++ b/sklearn/ensemble/tests/test_iforest.py
@@ -92,43 +92,48 @@ def test_iforest_error():
 
     # Test max_samples
     assert_raises(ValueError,
-                  IsolationForest(max_samples=-1).fit, X)
+                  IsolationForest(max_samples=-1, random_state=42).fit, X)
     assert_raises(ValueError,
-                  IsolationForest(max_samples=0.0).fit, X)
+                  IsolationForest(max_samples=0.0, random_state=42).fit, X)
     assert_raises(ValueError,
-                  IsolationForest(max_samples=2.0).fit, X)
+                  IsolationForest(max_samples=2.0, random_state=42).fit, X)
     # The dataset has less than 256 samples, explicitly setting
     # max_samples > n_samples should result in a warning. If not set
     # explicitly there should be no warning
     assert_warns_message(UserWarning,
                          "max_samples will be set to n_samples for estimation",
-                         IsolationForest(max_samples=1000).fit, X)
-    assert_no_warnings(IsolationForest(max_samples='auto').fit, X)
-    assert_no_warnings(IsolationForest(max_samples=np.int64(2)).fit, X)
-    assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
-    assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X)
+                         IsolationForest(max_samples=1000,
+                                         random_state=42).fit, X)
+    assert_no_warnings(IsolationForest(max_samples='auto',
+                                       random_state=42).fit, X)
+    assert_no_warnings(IsolationForest(max_samples=np.int64(2),
+                                       random_state=42).fit, X)
+    assert_raises(ValueError, IsolationForest(max_samples='foobar',
+                                              random_state=42).fit, X)
+    assert_raises(ValueError, IsolationForest(max_samples=1.5,
+                                              random_state=42).fit, X)
 
 
 def test_recalculate_max_depth():
     """Check max_depth recalculation when max_samples is reset to n_samples"""
     X = iris.data
-    clf = IsolationForest().fit(X)
+    clf = IsolationForest(random_state=42).fit(X)
     for est in clf.estimators_:
         assert_equal(est.max_depth, int(np.ceil(np.log2(X.shape[0]))))
 
 
 def test_max_samples_attribute():
     X = iris.data
-    clf = IsolationForest().fit(X)
+    clf = IsolationForest(random_state=42).fit(X)
     assert_equal(clf.max_samples_, X.shape[0])
 
-    clf = IsolationForest(max_samples=500)
+    clf = IsolationForest(max_samples=500, random_state=42)
     assert_warns_message(UserWarning,
                          "max_samples will be set to n_samples for estimation",
                          clf.fit, X)
     assert_equal(clf.max_samples_, X.shape[0])
 
-    clf = IsolationForest(max_samples=0.4).fit(X)
+    clf = IsolationForest(max_samples=0.4, random_state=42).fit(X)
     assert_equal(clf.max_samples_, 0.4*X.shape[0])
 
 
@@ -198,7 +203,7 @@ def test_iforest_works():
 def test_max_samples_consistency():
     # Make sure validated max_samples in iforest and BaseBagging are identical
     X = iris.data
-    clf = IsolationForest().fit(X)
+    clf = IsolationForest(random_state=42).fit(X)
     assert_equal(clf.max_samples_, clf._max_samples)
 
 
@@ -208,6 +213,6 @@ def test_iforest_subsampled_features():
     X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                         boston.target[:50],
                                                         random_state=rng)
-    clf = IsolationForest(max_features=0.8)
+    clf = IsolationForest(max_features=0.8, random_state=42)
     clf.fit(X_train, y_train)
     clf.predict(X_test)
diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py
index cec7efc46f03b..30e2b253bac31 100644
--- a/sklearn/ensemble/tests/test_partial_dependence.py
+++ b/sklearn/ensemble/tests/test_partial_dependence.py
@@ -92,7 +92,7 @@ def test_partial_dependecy_input():
 
     # Gradient boosting estimator must be fit
     assert_raises(ValueError, partial_dependence,
-                  GradientBoostingClassifier(), [0], X=X)
+                  GradientBoostingClassifier(random_state=42), [0], X=X)
 
     assert_raises(ValueError, partial_dependence, clf, [-1], X=X)
 
diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py
index 2ad007741940c..4558f0786d21b 100644
--- a/sklearn/ensemble/tests/test_voting_classifier.py
+++ b/sklearn/ensemble/tests/test_voting_classifier.py
@@ -42,16 +42,20 @@ def test_estimator_init():
 
 
 def test_predictproba_hardvoting():
-    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
-                                        ('lr2', LogisticRegression())],
+    eclf = VotingClassifier(estimators=[('lr1',
+                                         LogisticRegression(random_state=42)),
+                                        ('lr2',
+                                         LogisticRegression(random_state=42))],
                             voting='hard')
     msg = "predict_proba is not available when voting='hard'"
     assert_raise_message(AttributeError, msg, eclf.predict_proba, X)
 
 
 def test_notfitted():
-    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
-                                        ('lr2', LogisticRegression())],
+    eclf = VotingClassifier(estimators=[('lr1',
+                                         LogisticRegression(random_state=42)),
+                                        ('lr2',
+                                         LogisticRegression(random_state=42))],
                             voting='soft')
     msg = ("This VotingClassifier instance is not fitted yet. Call \'fit\'"
            " with appropriate arguments before using this method.")
@@ -183,7 +187,7 @@ def test_multilabel():
     X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                           allow_unlabeled=False,
                                           random_state=123)
-    clf = OneVsRestClassifier(SVC(kernel='linear'))
+    clf = OneVsRestClassifier(SVC(kernel='linear', random_state=42))
 
     eclf = VotingClassifier(estimators=[('ovr', clf)], voting='hard')
 
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index 5099b729de39f..3c70defcb8dd8 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -79,7 +79,7 @@ def test_oneclass_adaboost_proba():
     # In response to issue #7501
     # https://github.com/scikit-learn/scikit-learn/issues/7501
     y_t = np.ones(len(X))
-    clf = AdaBoostClassifier().fit(X, y_t)
+    clf = AdaBoostClassifier(random_state=42).fit(X, y_t)
     assert_array_equal(clf.predict_proba(X), np.ones((len(X), 1)))
 
 
@@ -107,7 +107,7 @@ def test_iris():
     clf_samme = prob_samme = None
 
     for alg in ['SAMME', 'SAMME.R']:
-        clf = AdaBoostClassifier(algorithm=alg)
+        clf = AdaBoostClassifier(algorithm=alg, random_state=42)
         clf.fit(iris.data, iris.target)
 
         assert_array_equal(classes, clf.classes_)
@@ -158,7 +158,8 @@ def test_staged_predict():
 
     # AdaBoost classification
     for alg in ['SAMME', 'SAMME.R']:
-        clf = AdaBoostClassifier(algorithm=alg, n_estimators=10)
+        clf = AdaBoostClassifier(algorithm=alg, n_estimators=10,
+                                 random_state=42)
         clf.fit(iris.data, iris.target, sample_weight=iris_weights)
 
         predictions = clf.predict(iris.data)
@@ -197,7 +198,9 @@ def test_staged_predict():
 def test_gridsearch():
     # Check that base trees can be grid-searched.
     # AdaBoost classification
-    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
+    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
+                               random_state=42),
+                               random_state=42)
     parameters = {'n_estimators': (1, 2),
                   'base_estimator__max_depth': (1, 2),
                   'algorithm': ('SAMME', 'SAMME.R')}
@@ -205,7 +208,8 @@ def test_gridsearch():
     clf.fit(iris.data, iris.target)
 
     # AdaBoost regression
-    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
+    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(
+                              random_state=42),
                               random_state=0)
     parameters = {'n_estimators': (1, 2),
                   'base_estimator__max_depth': (1, 2)}
@@ -219,7 +223,7 @@ def test_pickle():
 
     # Adaboost classifier
     for alg in ['SAMME', 'SAMME.R']:
-        obj = AdaBoostClassifier(algorithm=alg)
+        obj = AdaBoostClassifier(algorithm=alg, random_state=42)
         obj.fit(iris.data, iris.target)
         score = obj.score(iris.data, iris.target)
         s = pickle.dumps(obj)
@@ -252,7 +256,7 @@ def test_importances():
                                         random_state=1)
 
     for alg in ['SAMME', 'SAMME.R']:
-        clf = AdaBoostClassifier(algorithm=alg)
+        clf = AdaBoostClassifier(algorithm=alg, random_state=42)
 
         clf.fit(X, y)
         importances = clf.feature_importances_
@@ -265,15 +269,15 @@ def test_importances():
 def test_error():
     # Test that it gives proper exception on deficient input.
     assert_raises(ValueError,
-                  AdaBoostClassifier(learning_rate=-1).fit,
+                  AdaBoostClassifier(learning_rate=-1, random_state=42).fit,
                   X, y_class)
 
     assert_raises(ValueError,
-                  AdaBoostClassifier(algorithm="foo").fit,
+                  AdaBoostClassifier(algorithm="foo", random_state=42).fit,
                   X, y_class)
 
     assert_raises(ValueError,
-                  AdaBoostClassifier().fit,
+                  AdaBoostClassifier(random_state=42).fit,
                   X, y_class, sample_weight=np.asarray([-1]))
 
 
@@ -284,16 +288,19 @@ def test_base_estimator():
 
     # XXX doesn't work with y_class because RF doesn't support classes_
     # Shouldn't AdaBoost run a LabelBinarizer?
-    clf = AdaBoostClassifier(RandomForestClassifier())
+    clf = AdaBoostClassifier(RandomForestClassifier(random_state=42),
+                             random_state=42)
     clf.fit(X, y_regr)
 
-    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
+    clf = AdaBoostClassifier(SVC(random_state=42),
+                             algorithm="SAMME", random_state=42)
     clf.fit(X, y_class)
 
     from sklearn.ensemble import RandomForestRegressor
     from sklearn.svm import SVR
 
-    clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0)
+    clf = AdaBoostRegressor(RandomForestRegressor(random_state=42),
+                            random_state=0)
     clf.fit(X, y_regr)
 
     clf = AdaBoostRegressor(SVR(), random_state=0)
@@ -302,7 +309,8 @@ def test_base_estimator():
     # Check that an empty discrete ensemble fails in fit, not predict.
     X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
     y_fail = ["foo", "bar", 1, 2]
-    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
+    clf = AdaBoostClassifier(SVC(random_state=42),
+                             algorithm="SAMME", random_state=42)
     assert_raises_regexp(ValueError, "worse than random",
                          clf.fit, X_fail, y_fail)
 
@@ -311,10 +319,10 @@ def test_sample_weight_missing():
     from sklearn.linear_model import LogisticRegression
     from sklearn.cluster import KMeans
 
-    clf = AdaBoostClassifier(KMeans(), algorithm="SAMME")
+    clf = AdaBoostClassifier(KMeans(), algorithm="SAMME", random_state=42)
     assert_raises(ValueError, clf.fit, X, y_regr)
 
-    clf = AdaBoostRegressor(KMeans())
+    clf = AdaBoostRegressor(KMeans(), random_state=42)
     assert_raises(ValueError, clf.fit, X, y_regr)
 
 
diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py
index 6c57788efe904..688f347f9049e 100644
--- a/sklearn/feature_extraction/tests/test_image.py
+++ b/sklearn/feature_extraction/tests/test_image.py
@@ -135,7 +135,7 @@ def test_extract_patches_all():
     i_h, i_w = face.shape
     p_h, p_w = 16, 16
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
-    patches = extract_patches_2d(face, (p_h, p_w))
+    patches = extract_patches_2d(face, (p_h, p_w), random_state=42)
     assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
 
 
@@ -144,7 +144,7 @@ def test_extract_patches_all_color():
     i_h, i_w = face.shape[:2]
     p_h, p_w = 16, 16
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
-    patches = extract_patches_2d(face, (p_h, p_w))
+    patches = extract_patches_2d(face, (p_h, p_w), random_state=42)
     assert_equal(patches.shape, (expected_n_patches, p_h, p_w, 3))
 
 
@@ -155,7 +155,7 @@ def test_extract_patches_all_rect():
     p_h, p_w = 16, 12
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
 
-    patches = extract_patches_2d(face, (p_h, p_w))
+    patches = extract_patches_2d(face, (p_h, p_w), random_state=42)
     assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
 
 
@@ -164,11 +164,13 @@ def test_extract_patches_max_patches():
     i_h, i_w = face.shape
     p_h, p_w = 16, 16
 
-    patches = extract_patches_2d(face, (p_h, p_w), max_patches=100)
+    patches = extract_patches_2d(face, (p_h, p_w), max_patches=100,
+                                 random_state=42)
     assert_equal(patches.shape, (100, p_h, p_w))
 
     expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1))
-    patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5)
+    patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5,
+                                 random_state=42)
     assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
 
     assert_raises(ValueError, extract_patches_2d, face, (p_h, p_w),
@@ -181,7 +183,7 @@ def test_reconstruct_patches_perfect():
     face = downsampled_face
     p_h, p_w = 16, 16
 
-    patches = extract_patches_2d(face, (p_h, p_w))
+    patches = extract_patches_2d(face, (p_h, p_w), random_state=42)
     face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
     np.testing.assert_array_almost_equal(face, face_reconstructed)
 
@@ -190,7 +192,7 @@ def test_reconstruct_patches_perfect_color():
     face = orange_face
     p_h, p_w = 16, 16
 
-    patches = extract_patches_2d(face, (p_h, p_w))
+    patches = extract_patches_2d(face, (p_h, p_w), random_state=42)
     face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
     np.testing.assert_array_almost_equal(face, face_reconstructed)
 
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 88382f7d13c0b..c54d67ffbc7ca 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -708,7 +708,7 @@ def test_count_vectorizer_pipeline_grid_selection():
         data, target, test_size=.2, random_state=0)
 
     pipeline = Pipeline([('vect', CountVectorizer()),
-                         ('svc', LinearSVC())])
+                         ('svc', LinearSVC(random_state=42))])
 
     parameters = {
         'vect__ngram_range': [(1, 1), (1, 2)],
@@ -744,7 +744,7 @@ def test_vectorizer_pipeline_grid_selection():
         data, target, test_size=.1, random_state=0)
 
     pipeline = Pipeline([('vect', TfidfVectorizer()),
-                         ('svc', LinearSVC())])
+                         ('svc', LinearSVC(random_state=42))])
 
     parameters = {
         'vect__ngram_range': [(1, 1), (1, 2)],
@@ -779,7 +779,7 @@ def test_vectorizer_pipeline_cross_validation():
     target = [-1] * len(JUNK_FOOD_DOCS) + [1] * len(NOTJUNK_FOOD_DOCS)
 
     pipeline = Pipeline([('vect', TfidfVectorizer()),
-                         ('svc', LinearSVC())])
+                         ('svc', LinearSVC(random_state=42))])
 
     cv_scores = cross_val_score(pipeline, data, target, cv=3)
     assert_array_equal(cv_scores, [1., 1., 1.])
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 6567cc3d16493..834c5ad77508b 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -588,7 +588,7 @@ def test_invalid_k():
 def test_f_classif_constant_feature():
     # Test that f_classif warns if a feature is constant throughout.
 
-    X, y = make_classification(n_samples=10, n_features=5)
+    X, y = make_classification(n_samples=10, n_features=5, random_state=42)
     X[:, 0] = 2.0
     assert_warns(UserWarning, f_classif, X, y)
 
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 6efb6f405bb1c..8235502b9b1c5 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -35,7 +35,7 @@ def test_input_estimator_unchanged():
     """
     Test that SelectFromModel fits on a clone of the estimator.
     """
-    est = RandomForestClassifier()
+    est = RandomForestClassifier(random_state=42)
     transformer = SelectFromModel(estimator=est)
     transformer.fit(data, y)
     assert_true(transformer.estimator is est)
@@ -73,7 +73,7 @@ def test_feature_importances():
     assert_almost_equal(importances, importances_bis)
 
     # For the Lasso and related models, the threshold defaults to 1e-5
-    transformer = SelectFromModel(estimator=Lasso(alpha=0.1))
+    transformer = SelectFromModel(estimator=Lasso(alpha=0.1, random_state=42))
     transformer.fit(X, y)
     X_new = transformer.transform(X)
     mask = np.abs(transformer.estimator_.coef_) > 1e-5
@@ -86,11 +86,12 @@ def test_feature_importances_2d_coef():
         n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
         n_repeated=0, shuffle=False, random_state=0, n_classes=4)
 
-    est = LogisticRegression()
+    est = LogisticRegression(random_state=42)
     for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
         for order in [1, 2, np.inf]:
             # Fit SelectFromModel a multi-class problem
-            transformer = SelectFromModel(estimator=LogisticRegression(),
+            transformer = SelectFromModel(estimator=LogisticRegression(
+                                                        random_state=42),
                                           threshold=threshold,
                                           norm_order=order)
             transformer.fit(X, y)
diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index c4486c937f170..5b2a6d0724a94 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -122,7 +122,7 @@ def test_mutual_info_classif_discrete():
 
     # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
     # informative.
-    mi = mutual_info_classif(X, y, discrete_features=True)
+    mi = mutual_info_classif(X, y, discrete_features=True, random_state=42)
     assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))
 
 
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index 452a36de200e6..8f1c8ae29b44f 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -66,7 +66,7 @@ def test_rfe_features_importance():
     rfe.fit(X, y)
     assert_equal(len(rfe.ranking_), X.shape[1])
 
-    clf_svc = SVC(kernel="linear")
+    clf_svc = SVC(kernel="linear", random_state=42)
     rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
     rfe_svc.fit(X, y)
 
@@ -82,7 +82,7 @@ def test_rfe():
     y = iris.target
 
     # dense model
-    clf = SVC(kernel="linear")
+    clf = SVC(kernel="linear", random_state=42)
     rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
     rfe.fit(X, y)
     X_r = rfe.transform(X)
@@ -90,7 +90,7 @@ def test_rfe():
     assert_equal(len(rfe.ranking_), X.shape[1])
 
     # sparse model
-    clf_sparse = SVC(kernel="linear")
+    clf_sparse = SVC(kernel="linear", random_state=42)
     rfe_sparse = RFE(estimator=clf_sparse, n_features_to_select=4, step=0.1)
     rfe_sparse.fit(X_sparse, y)
     X_r_sparse = rfe_sparse.transform(X_sparse)
@@ -126,7 +126,8 @@ def test_rfecv():
     y = list(iris.target)   # regression test: list should be supported
 
     # Test using the score function
-    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
+    rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                  step=1, cv=5)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
     assert_equal(len(rfecv.grid_scores_), X.shape[1])
@@ -137,7 +138,8 @@ def test_rfecv():
     assert_array_equal(X_r, iris.data)
 
     # same in sparse
-    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                         step=1, cv=5)
     X_sparse = sparse.csr_matrix(X)
     rfecv_sparse.fit(X_sparse, y)
     X_r_sparse = rfecv_sparse.transform(X_sparse)
@@ -145,7 +147,8 @@ def test_rfecv():
 
     # Test using a customized loss function
     scoring = make_scorer(zero_one_loss, greater_is_better=False)
-    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
+    rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                  step=1, cv=5,
                   scoring=scoring)
     ignore_warnings(rfecv.fit)(X, y)
     X_r = rfecv.transform(X)
@@ -153,7 +156,8 @@ def test_rfecv():
 
     # Test using a scorer
     scorer = get_scorer('accuracy')
-    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
+    rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                  step=1, cv=5,
                   scoring=scorer)
     rfecv.fit(X, y)
     X_r = rfecv.transform(X)
@@ -162,13 +166,15 @@ def test_rfecv():
     # Test fix on grid_scores
     def test_scorer(estimator, X, y):
         return 1.0
-    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
+    rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                  step=1, cv=5,
                   scoring=test_scorer)
     rfecv.fit(X, y)
     assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
 
     # Same as the first two tests, but with step=2
-    rfecv = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
+    rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                  step=2, cv=5)
     rfecv.fit(X, y)
     assert_equal(len(rfecv.grid_scores_), 6)
     assert_equal(len(rfecv.ranking_), X.shape[1])
@@ -182,7 +188,8 @@ def test_scorer(estimator, X, y):
     assert_array_equal(X_r_sparse.toarray(), iris.data)
 
     # Verifying that steps < 1 don't blow up.
-    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=.2, cv=5)
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                         step=.2, cv=5)
     X_sparse = sparse.csr_matrix(X)
     rfecv_sparse.fit(X_sparse, y)
     X_r_sparse = rfecv_sparse.transform(X_sparse)
@@ -214,7 +221,8 @@ def test_rfecv_verbose_output():
     X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
     y = list(iris.target)
 
-    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, verbose=1)
+    rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                  step=1, cv=5, verbose=1)
     rfecv.fit(X, y)
 
     verbose_output = sys.stdout
@@ -223,7 +231,7 @@ def test_rfecv_verbose_output():
 
 
 def test_rfe_estimator_tags():
-    rfe = RFE(SVC(kernel='linear'))
+    rfe = RFE(SVC(kernel='linear', random_state=42))
     assert_equal(rfe._estimator_type, "classifier")
     # make sure that cross-validation is stratified
     iris = load_iris()
@@ -279,7 +287,7 @@ def formula2(n_features, n_features_to_select, step):
         generator = check_random_state(43)
         X = generator.normal(size=(100, n_features))
         y = generator.rand(100).round()
-        rfe = RFE(estimator=SVC(kernel="linear"),
+        rfe = RFE(estimator=SVC(kernel="linear", random_state=42),
                   n_features_to_select=n_features_to_select, step=step)
         rfe.fit(X, y)
         # this number also equals to the maximum of ranking_
@@ -304,7 +312,8 @@ def formula2(n_features, n_features_to_select, step):
         generator = check_random_state(43)
         X = generator.normal(size=(100, n_features))
         y = generator.rand(100).round()
-        rfecv = RFECV(estimator=SVC(kernel="linear"), step=step, cv=5)
+        rfecv = RFECV(estimator=SVC(kernel="linear", random_state=42),
+                      step=step, cv=5)
         rfecv.fit(X, y)
 
         assert_equal(rfecv.grid_scores_.shape[0],
@@ -319,7 +328,7 @@ def test_rfe_cv_n_jobs():
     X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
     y = iris.target
 
-    rfecv = RFECV(estimator=SVC(kernel='linear'))
+    rfecv = RFECV(estimator=SVC(kernel='linear', random_state=42))
     rfecv.fit(X, y)
     rfecv_ranking = rfecv.ranking_
     rfecv_grid_scores = rfecv.grid_scores_
diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
index 16b2507e45f18..a87f8d58c1dd1 100644
--- a/sklearn/gaussian_process/tests/test_gpc.py
+++ b/sklearn/gaussian_process/tests/test_gpc.py
@@ -36,7 +36,8 @@ def f(x):
 def test_predict_consistent():
     # Check binary predict decision has also predicted probability above 0.5.
     for kernel in kernels:
-        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
+        gpc = GaussianProcessClassifier(kernel=kernel,
+                                        random_state=42).fit(X, y)
         assert_array_equal(gpc.predict(X),
                            gpc.predict_proba(X)[:, 1] >= 0.5)
 
@@ -46,7 +47,8 @@ def test_lml_improving():
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
+        gpc = GaussianProcessClassifier(kernel=kernel,
+                                        random_state=42).fit(X, y)
         assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                        gpc.log_marginal_likelihood(kernel.theta))
 
@@ -54,7 +56,8 @@ def test_lml_improving():
 def test_lml_precomputed():
     # Test that lml of optimized kernel is stored correctly.
     for kernel in kernels:
-        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
+        gpc = GaussianProcessClassifier(kernel=kernel,
+                                        random_state=42).fit(X, y)
         assert_almost_equal(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                             gpc.log_marginal_likelihood(), 7)
 
@@ -64,7 +67,8 @@ def test_converged_to_local_maximum():
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
+        gpc = GaussianProcessClassifier(kernel=kernel,
+                                        random_state=42).fit(X, y)
 
         lml, lml_gradient = \
             gpc.log_marginal_likelihood(gpc.kernel_.theta, True)
@@ -77,7 +81,8 @@ def test_converged_to_local_maximum():
 def test_lml_gradient():
     # Compare analytic and numeric gradient of log marginal likelihood.
     for kernel in kernels:
-        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
+        gpc = GaussianProcessClassifier(kernel=kernel,
+                                        random_state=42).fit(X, y)
 
         lml, lml_gradient = gpc.log_marginal_likelihood(kernel.theta, True)
         lml_gradient_approx = \
@@ -129,7 +134,8 @@ def optimizer(obj_func, initial_theta, bounds):
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
+        gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer,
+                                        random_state=42)
         gpc.fit(X, y_mc)
         # Checks that optimizer improved marginal likelihood
         assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
@@ -139,7 +145,7 @@ def optimizer(obj_func, initial_theta, bounds):
 def test_multi_class():
     # Test GPC for multi-class classification problems.
     for kernel in kernels:
-        gpc = GaussianProcessClassifier(kernel=kernel)
+        gpc = GaussianProcessClassifier(kernel=kernel, random_state=42)
         gpc.fit(X, y_mc)
 
         y_prob = gpc.predict_proba(X2)
@@ -152,10 +158,11 @@ def test_multi_class():
 def test_multi_class_n_jobs():
     # Test that multi-class GPC produces identical results with n_jobs>1.
     for kernel in kernels:
-        gpc = GaussianProcessClassifier(kernel=kernel)
+        gpc = GaussianProcessClassifier(kernel=kernel, random_state=42)
         gpc.fit(X, y_mc)
 
-        gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2)
+        gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2,
+                                          random_state=42)
         gpc_2.fit(X, y_mc)
 
         y_prob = gpc.predict_proba(X2)
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index 6324dcb1838bc..97875ac56dd27 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -38,7 +38,8 @@ def f(x):
 def test_gpr_interpolation():
     # Test the interpolating property for different kernels.
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
         y_pred, y_cov = gpr.predict(X, return_cov=True)
 
         assert_almost_equal(y_pred, y)
@@ -50,7 +51,8 @@ def test_lml_improving():
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
         assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta),
                        gpr.log_marginal_likelihood(kernel.theta))
 
@@ -58,7 +60,8 @@ def test_lml_improving():
 def test_lml_precomputed():
     # Test that lml of optimized kernel is stored correctly.
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
         assert_equal(gpr.log_marginal_likelihood(gpr.kernel_.theta),
                      gpr.log_marginal_likelihood())
 
@@ -68,7 +71,8 @@ def test_converged_to_local_maximum():
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
 
         lml, lml_gradient = \
             gpr.log_marginal_likelihood(gpr.kernel_.theta, True)
@@ -83,7 +87,8 @@ def test_solution_inside_bounds():
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
 
         bounds = gpr.kernel_.bounds
         max_ = np.finfo(gpr.kernel_.theta.dtype).max
@@ -97,7 +102,8 @@ def test_solution_inside_bounds():
 def test_lml_gradient():
     # Compare analytic and numeric gradient of log marginal likelihood.
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
 
         lml, lml_gradient = gpr.log_marginal_likelihood(kernel.theta, True)
         lml_gradient_approx = \
@@ -112,7 +118,7 @@ def test_lml_gradient():
 def test_prior():
     # Test that GP prior has mean 0 and identical variances.
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel)
+        gpr = GaussianProcessRegressor(kernel=kernel, random_state=42)
 
         y_mean, y_cov = gpr.predict(X, return_cov=True)
 
@@ -127,7 +133,8 @@ def test_prior():
 def test_sample_statistics():
     # Test that statistics of samples drawn from GP are correct.
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
 
         y_mean, y_cov = gpr.predict(X2, return_cov=True)
 
@@ -142,14 +149,16 @@ def test_sample_statistics():
 def test_no_optimizer():
     # Test that kernel parameters are unmodified when optimizer is None.
     kernel = RBF(1.0)
-    gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None).fit(X, y)
+    gpr = GaussianProcessRegressor(
+        kernel=kernel, optimizer=None, random_state=42).fit(X, y)
     assert_equal(np.exp(gpr.kernel_.theta), 1.0)
 
 
 def test_predict_cov_vs_std():
     # Test that predicted std.-dev. is consistent with cov's diagonal.
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, random_state=42).fit(X, y)
         y_mean, y_cov = gpr.predict(X2, return_cov=True)
         y_mean, y_std = gpr.predict(X2, return_std=True)
         assert_almost_equal(np.sqrt(np.diag(y_cov)), y_std)
@@ -165,7 +174,8 @@ def test_anisotropic_kernel():
     y = X[:, 0] + 0.1 * X[:, 1]
 
     kernel = RBF([1.0, 1.0])
-    gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+    gpr = GaussianProcessRegressor(
+        kernel=kernel, random_state=42).fit(X, y)
     assert_greater(np.exp(gpr.kernel_.theta[1]),
                    np.exp(gpr.kernel_.theta[0]) * 5)
 
@@ -203,10 +213,11 @@ def test_y_normalization():
     y_norm = y - y_mean
     for kernel in kernels:
         # Fit non-normalizing GP on normalized y
-        gpr = GaussianProcessRegressor(kernel=kernel)
+        gpr = GaussianProcessRegressor(kernel=kernel, random_state=42)
         gpr.fit(X, y_norm)
         # Fit normalizing GP on unnormalized y
-        gpr_norm = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
+        gpr_norm = GaussianProcessRegressor(
+            kernel=kernel, normalize_y=True, random_state=42)
         gpr_norm.fit(X, y)
 
         # Compare predicted mean, std-devs and covariances
@@ -231,11 +242,13 @@ def test_y_multioutput():
     kernel = RBF(length_scale=1.0)
 
     gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None,
-                                   normalize_y=False)
+                                   normalize_y=False,
+                                   random_state=42)
     gpr.fit(X, y)
 
     gpr_2d = GaussianProcessRegressor(kernel=kernel, optimizer=None,
-                                      normalize_y=False)
+                                      normalize_y=False,
+                                      random_state=42)
     gpr_2d.fit(X, y_2d)
 
     y_pred_1d, y_std_1d = gpr.predict(X2, return_std=True)
@@ -256,10 +269,13 @@ def test_y_multioutput():
 
     # Test hyperparameter optimization
     for kernel in kernels:
-        gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
+        gpr = GaussianProcessRegressor(
+            kernel=kernel, normalize_y=True, random_state=42)
         gpr.fit(X, y)
 
-        gpr_2d = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
+        gpr_2d = GaussianProcessRegressor(
+            kernel=kernel, normalize_y=True,
+            random_state=42)
         gpr_2d.fit(X, np.vstack((y, y)).T)
 
         assert_almost_equal(gpr.kernel_.theta, gpr_2d.kernel_.theta, 4)
@@ -283,7 +299,8 @@ def optimizer(obj_func, initial_theta, bounds):
     for kernel in kernels:
         if kernel == fixed_kernel:
             continue
-        gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer)
+        gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer,
+                                       random_state=42)
         gpr.fit(X, y)
         # Checks that optimizer improved marginal likelihood
         assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta),
@@ -294,9 +311,11 @@ def test_duplicate_input():
     # Test GPR can handle two different output-values for the same input.
     for kernel in kernels:
         gpr_equal_inputs = \
-            GaussianProcessRegressor(kernel=kernel, alpha=1e-2)
+            GaussianProcessRegressor(kernel=kernel, alpha=1e-2,
+                                     random_state=42)
         gpr_similar_inputs = \
-            GaussianProcessRegressor(kernel=kernel, alpha=1e-2)
+            GaussianProcessRegressor(kernel=kernel, alpha=1e-2,
+                                     random_state=42)
 
         X_ = np.vstack((X, X[0]))
         y_ = np.hstack((y, y[0] + 1))
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 4c7b326f24f44..65d18bfb9f43f 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -317,7 +317,7 @@ def test_sparse_preprocess_data_with_return_mean():
 
 def test_csr_preprocess_data():
     # Test output format of _preprocess_data, when input is csr
-    X, y = make_regression()
+    X, y = make_regression(random_state=42)
     X[X < 2.5] = 0.0
     csr = sparse.csr_matrix(X)
     csr_, y, _, _, _ = _preprocess_data(csr, y, True)
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 13f3a999d8434..3e44be7ba2d4a 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -36,7 +36,7 @@ def test_lasso_zero():
     # Check that the lasso can handle zero data without crashing
     X = [[0], [0], [0]]
     y = [0, 0, 0]
-    clf = Lasso(alpha=0.1).fit(X, y)
+    clf = Lasso(alpha=0.1, random_state=42).fit(X, y)
     pred = clf.predict([[1], [2], [3]])
     assert_array_almost_equal(clf.coef_, [0])
     assert_array_almost_equal(pred, [0, 0, 0])
@@ -52,28 +52,28 @@ def test_lasso_toy():
     Y = [-1, 0, 1]       # just a straight line
     T = [[2], [3], [4]]  # test sample
 
-    clf = Lasso(alpha=1e-8)
+    clf = Lasso(alpha=1e-8, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [1])
     assert_array_almost_equal(pred, [2, 3, 4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = Lasso(alpha=0.1)
+    clf = Lasso(alpha=0.1, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [.85])
     assert_array_almost_equal(pred, [1.7, 2.55, 3.4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = Lasso(alpha=0.5)
+    clf = Lasso(alpha=0.5, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [.25])
     assert_array_almost_equal(pred, [0.5, 0.75, 1.])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = Lasso(alpha=1)
+    clf = Lasso(alpha=1, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [.0])
@@ -92,7 +92,7 @@ def test_enet_toy():
     T = [[2.], [3.], [4.]]  # test sample
 
     # this should be the same as lasso
-    clf = ElasticNet(alpha=1e-8, l1_ratio=1.0)
+    clf = ElasticNet(alpha=1e-8, l1_ratio=1.0, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [1])
@@ -100,7 +100,7 @@ def test_enet_toy():
     assert_almost_equal(clf.dual_gap_, 0)
 
     clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=100,
-                     precompute=False)
+                     precompute=False, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
@@ -121,7 +121,7 @@ def test_enet_toy():
     assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.45454], 3)
@@ -151,10 +151,12 @@ def build_dataset(n_samples=50, n_features=200, n_informative_features=10,
 def test_lasso_cv():
     X, y, X_test, y_test = build_dataset()
     max_iter = 150
-    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
+    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter,
+                  random_state=42).fit(X, y)
     assert_almost_equal(clf.alpha_, 0.056, 2)
 
-    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
+    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True,
+                  random_state=42)
     clf.fit(X, y)
     assert_almost_equal(clf.alpha_, 0.056, 2)
 
@@ -181,13 +183,15 @@ def test_lasso_cv_positive_constraint():
 
     # Ensure the unconstrained fit has a negative coefficient
     clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
-                                n_jobs=1)
+                                n_jobs=1,
+                                random_state=42)
     clf_unconstrained.fit(X, y)
     assert_true(min(clf_unconstrained.coef_) < 0)
 
     # On same data, constrained fit has non-negative coefficients
     clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
-                              positive=True, cv=2, n_jobs=1)
+                              positive=True, cv=2, n_jobs=1,
+                              random_state=42)
     clf_constrained.fit(X, y)
     assert_true(min(clf_constrained.coef_) >= 0)
 
@@ -227,7 +231,7 @@ def test_enet_path():
     # ElasticNet might not converge. This is to speed up tests
     clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                        l1_ratio=[0.5, 0.7], cv=3,
-                       max_iter=max_iter)
+                       max_iter=max_iter, random_state=42)
     ignore_warnings(clf.fit)(X, y)
     # Well-conditioned settings, we should have selected our
     # smallest penalty
@@ -238,7 +242,8 @@ def test_enet_path():
 
     clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                        l1_ratio=[0.5, 0.7], cv=3,
-                       max_iter=max_iter, precompute=True)
+                       max_iter=max_iter, precompute=True,
+                       random_state=42)
     ignore_warnings(clf.fit)(X, y)
 
     # Well-conditioned settings, we should have selected our
@@ -255,7 +260,8 @@ def test_enet_path():
     # Multi-output/target case
     X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
     clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
-                                cv=3, max_iter=max_iter)
+                                cv=3, max_iter=max_iter,
+                                random_state=42)
     ignore_warnings(clf.fit)(X, y)
     # We are in well-conditioned settings with low noise: we should
     # have a good test-set performance
@@ -265,9 +271,11 @@ def test_enet_path():
     # Mono-output should have same cross-validated alpha_ and l1_ratio_
     # in both cases.
     X, y, _, _ = build_dataset(n_features=10)
-    clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
+    clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
+                        random_state=42)
     clf1.fit(X, y)
-    clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
+    clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
+                                 random_state=42)
     clf2.fit(X, y[:, np.newaxis])
     assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_)
     assert_almost_equal(clf1.alpha_, clf2.alpha_)
@@ -278,7 +286,7 @@ def test_path_parameters():
     max_iter = 100
 
     clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter,
-                       l1_ratio=0.5, tol=1e-3)
+                       l1_ratio=0.5, tol=1e-3, random_state=42)
     clf.fit(X, y)  # new params
     assert_almost_equal(0.5, clf.l1_ratio)
     assert_equal(50, clf.n_alphas)
@@ -287,11 +295,11 @@ def test_path_parameters():
 
 def test_warm_start():
     X, y, _, _ = build_dataset()
-    clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True)
+    clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True, random_state=42)
     ignore_warnings(clf.fit)(X, y)
     ignore_warnings(clf.fit)(X, y)  # do a second round with 5 iterations
 
-    clf2 = ElasticNet(alpha=0.1, max_iter=10)
+    clf2 = ElasticNet(alpha=0.1, max_iter=10, random_state=42)
     ignore_warnings(clf2.fit)(X, y)
     assert_array_almost_equal(clf2.coef_, clf.coef_)
 
@@ -300,7 +308,7 @@ def test_lasso_alpha_warning():
     X = [[-1], [0], [1]]
     Y = [-1, 0, 1]       # just a straight line
 
-    clf = Lasso(alpha=0)
+    clf = Lasso(alpha=0, random_state=42)
     assert_warns(UserWarning, clf.fit, X, Y)
 
 
@@ -308,11 +316,12 @@ def test_lasso_positive_constraint():
     X = [[-1], [0], [1]]
     y = [1, 0, -1]       # just a straight line with negative slope
 
-    lasso = Lasso(alpha=0.1, max_iter=1000, positive=True)
+    lasso = Lasso(alpha=0.1, max_iter=1000, positive=True, random_state=42)
     lasso.fit(X, y)
     assert_true(min(lasso.coef_) >= 0)
 
-    lasso = Lasso(alpha=0.1, max_iter=1000, precompute=True, positive=True)
+    lasso = Lasso(alpha=0.1, max_iter=1000, precompute=True, positive=True,
+                  random_state=42)
     lasso.fit(X, y)
     assert_true(min(lasso.coef_) >= 0)
 
@@ -321,7 +330,7 @@ def test_enet_positive_constraint():
     X = [[-1], [0], [1]]
     y = [1, 0, -1]       # just a straight line with negative slope
 
-    enet = ElasticNet(alpha=0.1, max_iter=1000, positive=True)
+    enet = ElasticNet(alpha=0.1, max_iter=1000, positive=True, random_state=42)
     enet.fit(X, y)
     assert_true(min(enet.coef_) >= 0)
 
@@ -333,22 +342,27 @@ def test_enet_cv_positive_constraint():
     # Ensure the unconstrained fit has a negative coefficient
     enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1,
                                         max_iter=max_iter,
-                                        cv=2, n_jobs=1)
+                                        cv=2, n_jobs=1,
+                                        random_state=42)
     enetcv_unconstrained.fit(X, y)
     assert_true(min(enetcv_unconstrained.coef_) < 0)
 
     # On same data, constrained fit has non-negative coefficients
     enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
-                                      cv=2, positive=True, n_jobs=1)
+                                      cv=2, positive=True, n_jobs=1,
+                                      random_state=42)
     enetcv_constrained.fit(X, y)
     assert_true(min(enetcv_constrained.coef_) >= 0)
 
 
 def test_uniform_targets():
-    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
-    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
-    lasso = LassoCV(fit_intercept=True, n_alphas=3)
-    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)
+    enet = ElasticNetCV(fit_intercept=True, n_alphas=3, random_state=42)
+    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3,
+                                   random_state=42)
+    lasso = LassoCV(fit_intercept=True, n_alphas=3,
+                    random_state=42)
+    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3,
+                               random_state=42)
 
     models_single_task = (enet, lasso)
     models_multi_task = (m_enet, m_lasso)
@@ -379,15 +393,15 @@ def test_multi_task_lasso_and_enet():
     X, y, X_test, y_test = build_dataset()
     Y = np.c_[y, y]
     # Y_test = np.c_[y_test, y_test]
-    clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
+    clf = MultiTaskLasso(alpha=1, tol=1e-8, random_state=42).fit(X, Y)
     assert_true(0 < clf.dual_gap_ < 1e-5)
     assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
 
-    clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y)
+    clf = MultiTaskElasticNet(alpha=1, tol=1e-8, random_state=42).fit(X, Y)
     assert_true(0 < clf.dual_gap_ < 1e-5)
     assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
 
-    clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1)
+    clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1, random_state=42)
     assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
 
 
@@ -396,7 +410,7 @@ def test_lasso_readonly_data():
     Y = np.array([-1, 0, 1])   # just a straight line
     T = np.array([[2], [3], [4]])  # test sample
     with TempMemmap((X, Y)) as (X, Y):
-        clf = Lasso(alpha=0.5)
+        clf = Lasso(alpha=0.5, random_state=42)
         clf.fit(X, Y)
         pred = clf.predict(T)
         assert_array_almost_equal(clf.coef_, [.25])
@@ -409,7 +423,7 @@ def test_multi_task_lasso_readonly_data():
     Y = np.c_[y, y]
     with TempMemmap((X, Y)) as (X, Y):
         Y = np.c_[y, y]
-        clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
+        clf = MultiTaskLasso(alpha=1, tol=1e-8, random_state=42).fit(X, Y)
         assert_true(0 < clf.dual_gap_ < 1e-5)
         assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
 
@@ -418,7 +432,7 @@ def test_enet_multitarget():
     n_targets = 3
     X, y, _, _ = build_dataset(n_samples=10, n_features=8,
                                n_informative_features=10, n_targets=n_targets)
-    estimator = ElasticNet(alpha=0.01, fit_intercept=True)
+    estimator = ElasticNet(alpha=0.01, fit_intercept=True, random_state=42)
     estimator.fit(X, y)
     coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_,
                                  estimator.dual_gap_)
@@ -434,20 +448,20 @@ def test_multioutput_enetcv_error():
     rng = np.random.RandomState(0)
     X = rng.randn(10, 2)
     y = rng.randn(10, 2)
-    clf = ElasticNetCV()
+    clf = ElasticNetCV(random_state=42)
     assert_raises(ValueError, clf.fit, X, y)
 
 
 def test_multitask_enet_and_lasso_cv():
     X, y, _, _ = build_dataset(n_features=50, n_targets=3)
-    clf = MultiTaskElasticNetCV().fit(X, y)
+    clf = MultiTaskElasticNetCV(random_state=42).fit(X, y)
     assert_almost_equal(clf.alpha_, 0.00556, 3)
-    clf = MultiTaskLassoCV().fit(X, y)
+    clf = MultiTaskLassoCV(random_state=42).fit(X, y)
     assert_almost_equal(clf.alpha_, 0.00278, 3)
 
     X, y, _, _ = build_dataset(n_targets=3)
     clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100,
-                                l1_ratio=[0.3, 0.5], tol=1e-3)
+                                l1_ratio=[0.3, 0.5], tol=1e-3, random_state=42)
     clf.fit(X, y)
     assert_equal(0.5, clf.l1_ratio_)
     assert_equal((3, X.shape[1]), clf.coef_.shape)
@@ -456,7 +470,8 @@ def test_multitask_enet_and_lasso_cv():
     assert_equal((2, 10), clf.alphas_.shape)
 
     X, y, _, _ = build_dataset(n_targets=3)
-    clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3)
+    clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3,
+                           random_state=42)
     clf.fit(X, y)
     assert_equal((3, X.shape[1]), clf.coef_.shape)
     assert_equal((3, ), clf.intercept_.shape)
@@ -467,9 +482,11 @@ def test_multitask_enet_and_lasso_cv():
 def test_1d_multioutput_enet_and_multitask_enet_cv():
     X, y, _, _ = build_dataset(n_features=10)
     y = y[:, np.newaxis]
-    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
+    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
+                       random_state=42)
     clf.fit(X, y[:, 0])
-    clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
+    clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
+                                 random_state=42)
     clf1.fit(X, y)
     assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_)
     assert_almost_equal(clf.alpha_, clf1.alpha_)
@@ -480,9 +497,9 @@ def test_1d_multioutput_enet_and_multitask_enet_cv():
 def test_1d_multioutput_lasso_and_multitask_lasso_cv():
     X, y, _, _ = build_dataset(n_features=10)
     y = y[:, np.newaxis]
-    clf = LassoCV(n_alphas=5, eps=2e-3)
+    clf = LassoCV(n_alphas=5, eps=2e-3, random_state=42)
     clf.fit(X, y[:, 0])
-    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
+    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3, random_state=42)
     clf1.fit(X, y)
     assert_almost_equal(clf.alpha_, clf1.alpha_)
     assert_almost_equal(clf.coef_, clf1.coef_[0])
@@ -491,16 +508,16 @@ def test_1d_multioutput_lasso_and_multitask_lasso_cv():
 
 def test_sparse_input_dtype_enet_and_lassocv():
     X, y, _, _ = build_dataset(n_features=10)
-    clf = ElasticNetCV(n_alphas=5)
+    clf = ElasticNetCV(n_alphas=5, random_state=42)
     clf.fit(sparse.csr_matrix(X), y)
-    clf1 = ElasticNetCV(n_alphas=5)
+    clf1 = ElasticNetCV(n_alphas=5, random_state=42)
     clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
     assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
     assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
 
-    clf = LassoCV(n_alphas=5)
+    clf = LassoCV(n_alphas=5, random_state=42)
     clf.fit(sparse.csr_matrix(X), y)
-    clf1 = LassoCV(n_alphas=5)
+    clf1 = LassoCV(n_alphas=5, random_state=42)
     clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
     assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
     assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
@@ -508,19 +525,20 @@ def test_sparse_input_dtype_enet_and_lassocv():
 
 def test_precompute_invalid_argument():
     X, y, _, _ = build_dataset()
-    for clf in [ElasticNetCV(precompute="invalid"),
-                LassoCV(precompute="invalid")]:
+    for clf in [ElasticNetCV(precompute="invalid", random_state=42),
+                LassoCV(precompute="invalid", random_state=42)]:
         assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                             "array-like.*Got 'invalid'", clf.fit, X, y)
 
     # Precompute = 'auto' is not supported for ElasticNet
     assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
-                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
+                        "Got 'auto'", ElasticNet(precompute='auto',
+                                                 random_state=42).fit, X, y)
 
 
 def test_warm_start_convergence():
     X, y, _, _ = build_dataset()
-    model = ElasticNet(alpha=1e-3, tol=1e-3).fit(X, y)
+    model = ElasticNet(alpha=1e-3, tol=1e-3, random_state=42).fit(X, y)
     n_iter_reference = model.n_iter_
 
     # This dataset is not trivial enough for the model to converge in one pass.
@@ -546,12 +564,13 @@ def test_warm_start_convergence_with_regularizer_decrement():
 
     # Train a model to converge on a lightly regularized problem
     final_alpha = 1e-5
-    low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)
+    low_reg_model = ElasticNet(alpha=final_alpha, random_state=42).fit(X, y)
 
     # Fitting a new model on a more regularized version of the same problem.
     # Fitting with high regularization is easier it should converge faster
     # in general.
-    high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
+    high_reg_model = ElasticNet(alpha=final_alpha * 10,
+                                random_state=42).fit(X, y)
     assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_)
 
     # Fit the solution to the original, less regularized version of the
@@ -571,7 +590,7 @@ def test_random_descent():
 
     # This uses the coordinate descent algo using the gram trick.
     X, y, _, _ = build_dataset(n_samples=50, n_features=20)
-    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
+    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8, random_state=42)
     clf_cyclic.fit(X, y)
     clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
     clf_random.fit(X, y)
@@ -579,7 +598,7 @@ def test_random_descent():
     assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
 
     # This uses the descent algo without the gram trick
-    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
+    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8, random_state=42)
     clf_cyclic.fit(X.T, y[:20])
     clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
     clf_random.fit(X.T, y[:20])
@@ -587,7 +606,7 @@ def test_random_descent():
     assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
 
     # Sparse Case
-    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
+    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8, random_state=42)
     clf_cyclic.fit(sparse.csr_matrix(X), y)
     clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
     clf_random.fit(sparse.csr_matrix(X), y)
@@ -596,7 +615,8 @@ def test_random_descent():
 
     # Multioutput case.
     new_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
-    clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8)
+    clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8,
+                                     random_state=42)
     clf_cyclic.fit(X, new_y)
     clf_random = MultiTaskElasticNet(selection='random', tol=1e-8,
                                      random_state=42)
@@ -605,7 +625,7 @@ def test_random_descent():
     assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
 
     # Raise error when selection is not in cyclic or random.
-    clf_random = ElasticNet(selection='invalid')
+    clf_random = ElasticNet(selection='invalid', random_state=42)
     assert_raises(ValueError, clf_random.fit, X, y)
 
 
@@ -632,7 +652,7 @@ def test_check_input_false():
     X, y, _, _ = build_dataset(n_samples=20, n_features=10)
     X = check_array(X, order='F', dtype='float64')
     y = check_array(X, order='F', dtype='float64')
-    clf = ElasticNet(selection='cyclic', tol=1e-8)
+    clf = ElasticNet(selection='cyclic', tol=1e-8, random_state=42)
     # Check that no error is raised if data is provided in the right format
     clf.fit(X, y, check_input=False)
     X = check_array(X, order='F', dtype='float32')
@@ -651,7 +671,8 @@ def test_overrided_gram_matrix():
     X, y, _, _ = build_dataset(n_samples=20, n_features=10)
     Gram = X.T.dot(X)
     clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram,
-                     fit_intercept=True)
+                     fit_intercept=True,
+                     random_state=42)
     assert_warns_message(UserWarning,
                          "Gram matrix was provided but X was centered"
                          " to fit intercept, "
@@ -685,7 +706,8 @@ def test_enet_float_precision():
             for dtype in [np.float64, np.float32]:
                 clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False,
                                  fit_intercept=fit_intercept,
-                                 normalize=normalize)
+                                 normalize=normalize,
+                                 random_state=42)
 
                 X = dtype(X)
                 y = dtype(y)
@@ -701,7 +723,8 @@ def test_enet_float_precision():
                 clf_precompute = ElasticNet(alpha=0.5, max_iter=100,
                                             precompute=Gram,
                                             fit_intercept=fit_intercept,
-                                            normalize=normalize)
+                                            normalize=normalize,
+                                            random_state=42)
                 ignore_warnings(clf_precompute.fit)(X, y)
                 assert_array_almost_equal(clf.coef_, clf_precompute.coef_)
                 assert_array_almost_equal(clf.intercept_,
@@ -711,7 +734,8 @@ def test_enet_float_precision():
                 multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
                 clf_multioutput = MultiTaskElasticNet(
                     alpha=0.5, max_iter=100, fit_intercept=fit_intercept,
-                    normalize=normalize)
+                    normalize=normalize,
+                    random_state=42)
                 clf_multioutput.fit(X, multi_y)
                 coef[('multi', dtype)] = clf_multioutput.coef_
                 intercept[('multi', dtype)] = clf_multioutput.intercept_
diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index 9431e96f74108..c1a1e765eff74 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -186,7 +186,7 @@ def test_huber_better_r2_score():
 
     # The Ridge regressor should be influenced by the outliers and hence
     # give a worse score on the non-outliers as compared to the huber regressor.
-    ridge = Ridge(fit_intercept=True, alpha=0.01)
+    ridge = Ridge(fit_intercept=True, alpha=0.01, random_state=42)
     ridge.fit(X, y)
     ridge_score = ridge.score(X[mask], y[mask])
     ridge_outlier_score = ridge.score(X[~mask], y[~mask])
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index ec2be517bf382..cd0488f7c47d8 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -72,9 +72,10 @@ def test_error():
     # Test for appropriate exception on errors
     msg = "Penalty term must be positive"
     assert_raise_message(ValueError, msg,
-                         LogisticRegression(C=-1).fit, X, Y1)
+                         LogisticRegression(C=-1, random_state=42).fit, X, Y1)
     assert_raise_message(ValueError, msg,
-                         LogisticRegression(C="test").fit, X, Y1)
+                         LogisticRegression(C="test",
+                                            random_state=42).fit, X, Y1)
 
     for LR in [LogisticRegression, LogisticRegressionCV]:
         msg = "Tolerance for stopping criteria must be positive"
@@ -87,8 +88,8 @@ def test_error():
 
 
 def test_predict_3_classes():
-    check_predictions(LogisticRegression(C=10), X, Y2)
-    check_predictions(LogisticRegression(C=10), X_sp, Y2)
+    check_predictions(LogisticRegression(C=10, random_state=42), X, Y2)
+    check_predictions(LogisticRegression(C=10, random_state=42), X_sp, Y2)
 
 
 def test_predict_iris():
@@ -100,11 +101,13 @@ def test_predict_iris():
     # Test that both multinomial and OvR solvers handle
     # multiclass data correctly and give good accuracy
     # score (>0.95) for the training data.
-    for clf in [LogisticRegression(C=len(iris.data)),
+    for clf in [LogisticRegression(C=len(iris.data), random_state=42),
                 LogisticRegression(C=len(iris.data), solver='lbfgs',
-                                   multi_class='multinomial'),
+                                   multi_class='multinomial',
+                                   random_state=42),
                 LogisticRegression(C=len(iris.data), solver='newton-cg',
-                                   multi_class='multinomial'),
+                                   multi_class='multinomial',
+                                   random_state=42),
                 LogisticRegression(C=len(iris.data), solver='sag', tol=1e-2,
                                    multi_class='ovr', random_state=42)]:
         clf.fit(iris.data, target)
@@ -123,7 +126,8 @@ def test_predict_iris():
 
 def test_multinomial_validation():
     for solver in ['lbfgs', 'newton-cg', 'sag']:
-        lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial')
+        lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial',
+                                random_state=42)
         assert_raises(ValueError, lr.fit, [[0, 1], [1, 0]], [0, 1])
 
 
@@ -381,9 +385,10 @@ def test_logistic_cv():
     X_ref -= X_ref.mean()
     X_ref /= X_ref.std()
     lr_cv = LogisticRegressionCV(Cs=[1.], fit_intercept=False,
-                                 solver='liblinear')
+                                 solver='liblinear',
+                                 random_state=42)
     lr_cv.fit(X_ref, y)
-    lr = LogisticRegression(C=1., fit_intercept=False)
+    lr = LogisticRegression(C=1., fit_intercept=False, random_state=42)
     lr.fit(X_ref, y)
     assert_array_almost_equal(lr.coef_, lr_cv.coef_)
 
@@ -408,10 +413,14 @@ def test_multinomial_logistic_regression_string_inputs():
     # For numerical labels, let y values be taken from set (-1, 0, 1)
     y = np.array(y) - 1
     # Test for string labels
-    lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
-    lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
-    lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial')
-    lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
+    lr = LogisticRegression(solver='lbfgs', multi_class='multinomial',
+                            random_state=42)
+    lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial',
+                                 random_state=42)
+    lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial',
+                                random_state=42)
+    lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial',
+                                     random_state=42)
 
     lr.fit(X_ref, y)
     lr_cv.fit(X_ref, y)
@@ -433,7 +442,8 @@ def test_multinomial_logistic_regression_string_inputs():
     # Make sure class weights can be given with string labels
     lr_cv_str = LogisticRegression(
         solver='lbfgs', class_weight={'bar': 1, 'baz': 2, 'foo': 0},
-        multi_class='multinomial').fit(X_ref, y_str)
+        multi_class='multinomial',
+        random_state=42).fit(X_ref, y_str)
     assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
 
 
@@ -443,9 +453,9 @@ def test_logistic_cv_sparse():
     X[X < 1.0] = 0.0
     csr = sp.csr_matrix(X)
 
-    clf = LogisticRegressionCV(fit_intercept=True)
+    clf = LogisticRegressionCV(fit_intercept=True, random_state=42)
     clf.fit(X, y)
-    clfs = LogisticRegressionCV(fit_intercept=True)
+    clfs = LogisticRegressionCV(fit_intercept=True, random_state=42)
     clfs.fit(csr, y)
     assert_array_almost_equal(clfs.coef_, clf.coef_)
     assert_array_almost_equal(clfs.intercept_, clf.intercept_)
@@ -494,15 +504,15 @@ def test_ovr_multinomial_iris():
     # on the fine-grained iris classes, i.e, before the classes 0 and 1 are
     # conflated) is used for both clf and clf1
     n_cv = 2
-    cv = StratifiedKFold(n_cv)
+    cv = StratifiedKFold(n_cv, random_state=42)
     precomputed_folds = list(cv.split(train, target))
 
     # Train clf on the original dataset where classes 0 and 1 are separated
-    clf = LogisticRegressionCV(cv=precomputed_folds)
+    clf = LogisticRegressionCV(cv=precomputed_folds, random_state=42)
     clf.fit(train, target)
 
     # Conflate classes 0 and 1 and train clf1 on this modified dataset
-    clf1 = LogisticRegressionCV(cv=precomputed_folds)
+    clf1 = LogisticRegressionCV(cv=precomputed_folds, random_state=42)
     target_copy = target.copy()
     target_copy[target_copy == 0] = 1
     clf1.fit(train, target_copy)
@@ -547,9 +557,12 @@ def test_ovr_multinomial_iris():
 def test_logistic_regression_solvers():
     X, y = make_classification(n_features=10, n_informative=5, random_state=0)
 
-    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False)
-    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False)
-    lib = LogisticRegression(fit_intercept=False)
+    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False,
+                             random_state=42)
+    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False,
+                             random_state=42)
+    lib = LogisticRegression(fit_intercept=False,
+                             random_state=42)
     sag = LogisticRegression(solver='sag', fit_intercept=False,
                              random_state=42)
     ncg.fit(X, y)
@@ -568,9 +581,12 @@ def test_logistic_regression_solvers_multiclass():
     X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                                n_classes=3, random_state=0)
     tol = 1e-6
-    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False, tol=tol)
-    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False, tol=tol)
-    lib = LogisticRegression(fit_intercept=False, tol=tol)
+    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False, tol=tol,
+                             random_state=42)
+    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False, tol=tol,
+                             random_state=42)
+    lib = LogisticRegression(fit_intercept=False, tol=tol,
+                             random_state=42)
     sag = LogisticRegression(solver='sag', fit_intercept=False, tol=tol,
                              max_iter=1000, random_state=42)
     ncg.fit(X, y)
@@ -596,13 +612,16 @@ def test_logistic_regressioncv_class_weights():
 
             clf_lbf = LogisticRegressionCV(solver='lbfgs', Cs=1,
                                            fit_intercept=False,
-                                           class_weight=class_weight)
+                                           class_weight=class_weight,
+                                           random_state=42)
             clf_ncg = LogisticRegressionCV(solver='newton-cg', Cs=1,
                                            fit_intercept=False,
-                                           class_weight=class_weight)
+                                           class_weight=class_weight,
+                                           random_state=42)
             clf_lib = LogisticRegressionCV(solver='liblinear', Cs=1,
                                            fit_intercept=False,
-                                           class_weight=class_weight)
+                                           class_weight=class_weight,
+                                           random_state=42)
             clf_sag = LogisticRegressionCV(solver='sag', Cs=1,
                                            fit_intercept=False,
                                            class_weight=class_weight,
@@ -709,9 +728,11 @@ def test_logistic_regression_class_weights():
 
     for solver in solvers:
         clf1 = LogisticRegression(solver=solver, multi_class="multinomial",
-                                  class_weight="balanced")
+                                  class_weight="balanced",
+                                  random_state=42)
         clf2 = LogisticRegression(solver=solver, multi_class="multinomial",
-                                  class_weight=class_weight_dict)
+                                  class_weight=class_weight_dict,
+                                  random_state=42)
         clf1.fit(X, y)
         clf2.fit(X, y)
         assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=4)
@@ -724,9 +745,11 @@ def test_logistic_regression_class_weights():
 
     for solver in solvers:
         clf1 = LogisticRegression(solver=solver, multi_class="ovr",
-                                  class_weight="balanced")
+                                  class_weight="balanced",
+                                  random_state=42)
         clf2 = LogisticRegression(solver=solver, multi_class="ovr",
-                                  class_weight=class_weight_dict)
+                                  class_weight=class_weight_dict,
+                                  random_state=42)
         clf1.fit(X, y)
         clf2.fit(X, y)
         assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=6)
@@ -736,7 +759,8 @@ def test_logistic_regression_convergence_warnings():
     # Test that warnings are raised if model does not converge
 
     X, y = make_classification(n_samples=20, n_features=20, random_state=0)
-    clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1)
+    clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1,
+                                 random_state=42)
     assert_warns(ConvergenceWarning, clf_lib.fit, X, y)
     assert_equal(clf_lib.n_iter_, 2)
 
@@ -753,9 +777,11 @@ def test_logistic_regression_multinomial():
 
     # 'lbfgs' is used as a referenced
     solver = 'lbfgs'
-    ref_i = LogisticRegression(solver=solver, multi_class='multinomial')
+    ref_i = LogisticRegression(solver=solver, multi_class='multinomial',
+                               random_state=42)
     ref_w = LogisticRegression(solver=solver, multi_class='multinomial',
-                               fit_intercept=False)
+                               fit_intercept=False,
+                               random_state=42)
     ref_i.fit(X, y)
     ref_w.fit(X, y)
     assert_array_equal(ref_i.coef_.shape, (n_classes, n_features))
@@ -781,7 +807,8 @@ def test_logistic_regression_multinomial():
     # folds, it need not be exactly the same.
     for solver in ['lbfgs', 'newton-cg', 'sag']:
         clf_path = LogisticRegressionCV(solver=solver, max_iter=2000, tol=1e-6,
-                                        multi_class='multinomial', Cs=[1.])
+                                        multi_class='multinomial', Cs=[1.],
+                                        random_state=42)
         clf_path.fit(X, y)
         assert_array_almost_equal(clf_path.coef_, ref_i.coef_, decimal=3)
         assert_almost_equal(clf_path.intercept_, ref_i.intercept_, decimal=3)
@@ -825,7 +852,7 @@ def test_liblinear_decision_function_zero():
     # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
     # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
     X, y = make_classification(n_samples=5, n_features=5, random_state=0)
-    clf = LogisticRegression(fit_intercept=False)
+    clf = LogisticRegression(fit_intercept=False, random_state=42)
     clf.fit(X, y)
 
     # Dummy data such that the decision function becomes zero.
@@ -837,7 +864,7 @@ def test_liblinear_logregcv_sparse():
     # Test LogRegCV with solver='liblinear' works for sparse matrices
 
     X, y = make_classification(n_samples=10, n_features=5, random_state=0)
-    clf = LogisticRegressionCV(solver='liblinear')
+    clf = LogisticRegressionCV(solver='liblinear', random_state=42)
     clf.fit(sparse.csr_matrix(X), y)
 
 
@@ -845,7 +872,7 @@ def test_logreg_intercept_scaling():
     # Test that the right error message is thrown when intercept_scaling <= 0
 
     for i in [-1, 0]:
-        clf = LogisticRegression(intercept_scaling=i)
+        clf = LogisticRegression(intercept_scaling=i, random_state=42)
         msg = ('Intercept scaling is %r but needs to be greater than 0.'
                ' To disable fitting an intercept,'
                ' set fit_intercept=False.' % clf.intercept_scaling)
@@ -855,7 +882,7 @@ def test_logreg_intercept_scaling():
 def test_logreg_intercept_scaling_zero():
     # Test that intercept_scaling is ignored when fit_intercept is False
 
-    clf = LogisticRegression(fit_intercept=False)
+    clf = LogisticRegression(fit_intercept=False, random_state=42)
     clf.fit(X, Y1)
     assert_equal(clf.intercept_, 0.)
 
@@ -863,9 +890,11 @@ def test_logreg_intercept_scaling_zero():
 def test_logreg_cv_penalty():
     # Test that the correct penalty is passed to the final fit.
     X, y = make_classification(n_samples=50, n_features=20, random_state=0)
-    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
+    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear',
+                                 random_state=42)
     lr_cv.fit(X, y)
-    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
+    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
+                            random_state=42)
     lr.fit(X, y)
     assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
 
@@ -876,10 +905,12 @@ def test_logreg_predict_proba_multinomial():
 
     # Predicted probabilites using the true-entropy loss should give a
     # smaller loss than those using the ovr method.
-    clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
+    clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs",
+                                   random_state=42)
     clf_multi.fit(X, y)
     clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
-    clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
+    clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs",
+                                 random_state=42)
     clf_ovr.fit(X, y)
     clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
     assert_greater(clf_ovr_loss, clf_multi_loss)
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index a1dc5c4d684a2..fba66fd913ca9 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -106,7 +106,7 @@ def test_classifier_partial_fit():
 
 def test_classifier_refit():
     # Classifier can be retrained on different labels and features.
-    clf = PassiveAggressiveClassifier().fit(X, y)
+    clf = PassiveAggressiveClassifier(random_state=42).fit(X, y)
     assert_array_equal(clf.classes_, np.unique(y))
 
     clf.fit(X[:, :-1], iris.target_names[y])
@@ -122,21 +122,23 @@ def test_classifier_correctness():
         clf1 = MyPassiveAggressive(C=1.0,
                                    loss=loss,
                                    fit_intercept=True,
-                                   n_iter=2)
+                                   n_iter=2,
+                                   random_state=42)
         clf1.fit(X, y_bin)
 
         for data in (X, X_csr):
             clf2 = PassiveAggressiveClassifier(C=1.0,
                                                loss=loss,
                                                fit_intercept=True,
-                                               n_iter=2, shuffle=False)
+                                               n_iter=2, shuffle=False,
+                                               random_state=42)
             clf2.fit(data, y_bin)
 
             assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
 
 
 def test_classifier_undefined_methods():
-    clf = PassiveAggressiveClassifier()
+    clf = PassiveAggressiveClassifier(random_state=42)
     for meth in ("predict_proba", "predict_log_proba", "transform"):
         assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
 
@@ -165,23 +167,27 @@ def test_class_weights():
 
 def test_partial_fit_weight_class_balanced():
     # partial_fit with class_weight='balanced' not supported
-    clf = PassiveAggressiveClassifier(class_weight="balanced")
+    clf = PassiveAggressiveClassifier(class_weight="balanced",
+                                      random_state=42)
     assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))
 
 
 def test_equal_class_weight():
     X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
     y2 = [0, 0, 1, 1]
-    clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None)
+    clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None,
+                                      random_state=42)
     clf.fit(X2, y2)
 
     # Already balanced, so "balanced" weights should have no effect
     clf_balanced = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
-                                               class_weight="balanced")
+                                               class_weight="balanced",
+                                               random_state=42)
     clf_balanced.fit(X2, y2)
 
     clf_weighted = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
-                                               class_weight={0: 0.5, 1: 0.5})
+                                               class_weight={0: 0.5, 1: 0.5},
+                                               random_state=42)
     clf_weighted.fit(X2, y2)
 
     # should be similar up to some epsilon due to learning rate schedule
@@ -195,7 +201,7 @@ def test_wrong_class_weight_label():
                    [1.0, 1.0], [1.0, 0.0]])
     y2 = [1, 1, 1, -1, -1]
 
-    clf = PassiveAggressiveClassifier(class_weight={0: 0.5})
+    clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, random_state=42)
     assert_raises(ValueError, clf.fit, X2, y2)
 
 
@@ -205,10 +211,11 @@ def test_wrong_class_weight_format():
                    [1.0, 1.0], [1.0, 0.0]])
     y2 = [1, 1, 1, -1, -1]
 
-    clf = PassiveAggressiveClassifier(class_weight=[0.5])
+    clf = PassiveAggressiveClassifier(class_weight=[0.5], random_state=42)
     assert_raises(ValueError, clf.fit, X2, y2)
 
-    clf = PassiveAggressiveClassifier(class_weight="the larch")
+    clf = PassiveAggressiveClassifier(class_weight="the larch",
+                                      random_state=42)
     assert_raises(ValueError, clf.fit, X2, y2)
 
 
@@ -262,20 +269,22 @@ def test_regressor_correctness():
         reg1 = MyPassiveAggressive(C=1.0,
                                    loss=loss,
                                    fit_intercept=True,
-                                   n_iter=2)
+                                   n_iter=2,
+                                   random_state=42)
         reg1.fit(X, y_bin)
 
         for data in (X, X_csr):
             reg2 = PassiveAggressiveRegressor(C=1.0,
                                               loss=loss,
                                               fit_intercept=True,
-                                              n_iter=2, shuffle=False)
+                                              n_iter=2, shuffle=False,
+                                              random_state=42)
             reg2.fit(data, y_bin)
 
             assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
 
 
 def test_regressor_undefined_methods():
-    reg = PassiveAggressiveRegressor()
+    reg = PassiveAggressiveRegressor(random_state=42)
     for meth in ("transform",):
         assert_raises(AttributeError, lambda x: getattr(reg, x), meth)
diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py
index a5b97c431af3a..5f48d4f744d3b 100644
--- a/sklearn/linear_model/tests/test_perceptron.py
+++ b/sklearn/linear_model/tests/test_perceptron.py
@@ -45,7 +45,7 @@ def predict(self, X):
 
 def test_perceptron_accuracy():
     for data in (X, X_csr):
-        clf = Perceptron(n_iter=30, shuffle=False)
+        clf = Perceptron(n_iter=30, shuffle=False, random_state=42)
         clf.fit(data, y)
         score = clf.score(data, y)
         assert_true(score >= 0.7)
@@ -58,13 +58,13 @@ def test_perceptron_correctness():
     clf1 = MyPerceptron(n_iter=2)
     clf1.fit(X, y_bin)
 
-    clf2 = Perceptron(n_iter=2, shuffle=False)
+    clf2 = Perceptron(n_iter=2, shuffle=False, random_state=42)
     clf2.fit(X, y_bin)
 
     assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
 
 
 def test_undefined_methods():
-    clf = Perceptron()
+    clf = Perceptron(random_state=42)
     for meth in ("predict_proba", "predict_log_proba"):
         assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py
index 0ba2a113a12d8..b101d010006a6 100644
--- a/sklearn/linear_model/tests/test_randomized_l1.py
+++ b/sklearn/linear_model/tests/test_randomized_l1.py
@@ -68,10 +68,10 @@ def test_randomized_lasso():
     feature_scores = clf.fit(X, y).scores_
     assert_array_equal(feature_scores, X.shape[1] * [1.])
 
-    clf = RandomizedLasso(verbose=False, scaling=-0.1)
+    clf = RandomizedLasso(verbose=False, scaling=-0.1, random_state=42)
     assert_raises(ValueError, clf.fit, X, y)
 
-    clf = RandomizedLasso(verbose=False, scaling=1.1)
+    clf = RandomizedLasso(verbose=False, scaling=1.1, random_state=42)
     assert_raises(ValueError, clf.fit, X, y)
 
 
@@ -100,7 +100,8 @@ def test_randomized_logistic():
     feature_scores = clf.fit(X, y).scores_
     assert_array_equal(np.argsort(F), np.argsort(feature_scores))
 
-    clf = RandomizedLogisticRegression(verbose=False, C=[[1., 0.5]])
+    clf = RandomizedLogisticRegression(verbose=False, C=[[1., 0.5]],
+                                       random_state=42)
     assert_raises(ValueError, clf.fit, X, y)
 
 
diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index b19ee0aa25895..b6a7b5c614ae0 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -169,7 +169,8 @@ def is_data_valid(X, y):
     base_estimator = LinearRegression()
     ransac_estimator = RANSACRegressor(base_estimator,
                                        is_data_valid=is_data_valid,
-                                       max_trials=5)
+                                       max_trials=5,
+                                       random_state=42)
 
     msg = ("RANSAC could not find a valid consensus set")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
@@ -185,7 +186,8 @@ def is_model_valid(estimator, X, y):
     base_estimator = LinearRegression()
     ransac_estimator = RANSACRegressor(base_estimator,
                                        is_model_valid=is_model_valid,
-                                       max_trials=5)
+                                       max_trials=5,
+                                       random_state=42)
 
     msg = ("RANSAC could not find a valid consensus set")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
@@ -202,7 +204,8 @@ def is_data_valid(X, y):
     ransac_estimator = RANSACRegressor(base_estimator,
                                        is_data_valid=is_data_valid,
                                        max_trials=5,
-                                       max_skips=3)
+                                       max_skips=3,
+                                       random_state=42)
 
     msg = ("RANSAC skipped more iterations than `max_skips`")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
@@ -227,7 +230,8 @@ def is_data_valid(X, y):
     ransac_estimator = RANSACRegressor(base_estimator,
                                        is_data_valid=is_data_valid,
                                        max_skips=3,
-                                       max_trials=5)
+                                       max_trials=5,
+                                       random_state=42)
 
     assert_warns(UserWarning, ransac_estimator.fit, X, y)
     assert_equal(ransac_estimator.n_skips_no_inliers_, 0)
@@ -472,10 +476,12 @@ def test_ransac_dynamic_max_trials():
 
     base_estimator = LinearRegression()
     ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
-                                       stop_probability=-0.1)
+                                       stop_probability=-0.1,
+                                       random_state=42)
     assert_raises(ValueError, ransac_estimator.fit, X, y)
     ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
-                                       stop_probability=1.1)
+                                       stop_probability=1.1,
+                                       random_state=42)
     assert_raises(ValueError, ransac_estimator.fit, X, y)
 
 
@@ -520,6 +526,6 @@ def test_ransac_fit_sample_weight():
 
     # check that if base_estimator.fit doesn't support
     # sample_weight, raises error
-    base_estimator = Lasso()
-    ransac_estimator = RANSACRegressor(base_estimator)
+    base_estimator = Lasso(random_state=42)
+    ransac_estimator = RANSACRegressor(base_estimator, random_state=42)
     assert_raises(ValueError, ransac_estimator.fit, X, y, weights)
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 433801e45a8c1..77beaa566ff27 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -66,7 +66,7 @@ def test_ridge():
         y = rng.randn(n_samples)
         X = rng.randn(n_samples, n_features)
 
-        ridge = Ridge(alpha=alpha, solver=solver)
+        ridge = Ridge(alpha=alpha, solver=solver, random_state=42)
         ridge.fit(X, y)
         assert_equal(ridge.coef_.shape, (X.shape[1], ))
         assert_greater(ridge.score(X, y), 0.47)
@@ -80,7 +80,7 @@ def test_ridge():
         n_samples, n_features = 5, 10
         y = rng.randn(n_samples)
         X = rng.randn(n_samples, n_features)
-        ridge = Ridge(alpha=alpha, solver=solver)
+        ridge = Ridge(alpha=alpha, solver=solver, random_state=42)
         ridge.fit(X, y)
         assert_greater(ridge.score(X, y), .9)
 
@@ -108,7 +108,7 @@ def test_ridge_singular():
     X = rng.randn(n_samples // 2, n_features)
     X = np.concatenate((X, X), axis=0)
 
-    ridge = Ridge(alpha=0)
+    ridge = Ridge(alpha=0, random_state=42)
     ridge.fit(X, y)
     assert_greater(ridge.score(X, y), 0.9)
 
@@ -126,14 +126,16 @@ def test_ridge_regression_sample_weights():
                 coefs = ridge_regression(X, y,
                                          alpha=alpha,
                                          sample_weight=sample_weight,
-                                         solver=solver)
+                                         solver=solver,
+                                         random_state=42)
 
                 # Sample weight can be implemented via a simple rescaling
                 # for the square loss.
                 coefs2 = ridge_regression(
                     X * np.sqrt(sample_weight)[:, np.newaxis],
                     y * np.sqrt(sample_weight),
-                    alpha=alpha, solver=solver)
+                    alpha=alpha, solver=solver,
+                    random_state=42)
                 assert_array_almost_equal(coefs, coefs2)
 
 
@@ -153,7 +155,8 @@ def test_ridge_sample_weights():
         for (alpha, intercept, solver) in param_grid:
 
             # Ridge with explicit sample_weight
-            est = Ridge(alpha=alpha, fit_intercept=intercept, solver=solver)
+            est = Ridge(alpha=alpha, fit_intercept=intercept, solver=solver,
+                        random_state=42)
             est.fit(X, y, sample_weight=sample_weight)
             coefs = est.coef_
             inter = est.intercept_
@@ -189,7 +192,7 @@ def test_ridge_shapes():
     Y1 = y[:, np.newaxis]
     Y = np.c_[y, 1 + y]
 
-    ridge = Ridge()
+    ridge = Ridge(random_state=42)
 
     ridge.fit(X, y)
     assert_equal(ridge.coef_.shape, (n_features,))
@@ -212,7 +215,7 @@ def test_ridge_intercept():
     y = rng.randn(n_samples)
     Y = np.c_[y, 1. + y]
 
-    ridge = Ridge()
+    ridge = Ridge(random_state=42)
 
     ridge.fit(X, y)
     intercept = ridge.intercept_
@@ -227,7 +230,7 @@ def test_toy_ridge_object():
     # TODO: test also n_samples > n_features
     X = np.array([[1], [2]])
     Y = np.array([1, 2])
-    reg = Ridge(alpha=0.0)
+    reg = Ridge(alpha=0.0, random_state=42)
     reg.fit(X, Y)
     X_test = [[1], [2], [3], [4]]
     assert_almost_equal(reg.predict(X_test), [1., 2, 3, 4])
@@ -253,7 +256,7 @@ def test_ridge_vs_lstsq():
     y = rng.randn(n_samples)
     X = rng.randn(n_samples, n_features)
 
-    ridge = Ridge(alpha=0., fit_intercept=False)
+    ridge = Ridge(alpha=0., fit_intercept=False, random_state=42)
     ols = LinearRegression(fit_intercept=False)
 
     ridge.fit(X, y)
@@ -277,17 +280,19 @@ def test_ridge_individual_penalties():
     penalties = np.arange(n_targets)
 
     coef_cholesky = np.array([
-        Ridge(alpha=alpha, solver="cholesky").fit(X, target).coef_
+        Ridge(alpha=alpha, solver="cholesky",
+              random_state=42).fit(X, target).coef_
         for alpha, target in zip(penalties, y.T)])
 
     coefs_indiv_pen = [
-        Ridge(alpha=penalties, solver=solver, tol=1e-8).fit(X, y).coef_
+        Ridge(alpha=penalties, solver=solver,
+              tol=1e-8, random_state=42).fit(X, y).coef_
         for solver in ['svd', 'sparse_cg', 'lsqr', 'cholesky', 'sag']]
     for coef_indiv_pen in coefs_indiv_pen:
         assert_array_almost_equal(coef_cholesky, coef_indiv_pen)
 
     # Test error is raised when number of targets and penalties do not match.
-    ridge = Ridge(alpha=penalties[:-1])
+    ridge = Ridge(alpha=penalties[:-1], random_state=42)
     assert_raises(ValueError, ridge.fit, X, y)
 
 
@@ -303,7 +308,7 @@ def _test_ridge_loo(filter_):
     else:
         X_diabetes_ = X_diabetes
     ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)
-    ridge = Ridge(alpha=1.0, fit_intercept=fit_intercept)
+    ridge = Ridge(alpha=1.0, fit_intercept=fit_intercept, random_state=42)
 
     # because fit_intercept is applied
 
@@ -391,7 +396,7 @@ def _test_ridge_cv(filter_):
     assert_equal(len(ridge_cv.coef_.shape), 1)
     assert_equal(type(ridge_cv.intercept_), np.float64)
 
-    cv = KFold(5)
+    cv = KFold(5, random_state=42)
     ridge_cv.set_params(cv=cv)
     ridge_cv.fit(filter_(X_diabetes), y_diabetes)
     ridge_cv.predict(filter_(X_diabetes))
@@ -401,7 +406,7 @@ def _test_ridge_cv(filter_):
 
 
 def _test_ridge_diabetes(filter_):
-    ridge = Ridge(fit_intercept=False)
+    ridge = Ridge(fit_intercept=False, random_state=42)
     ridge.fit(filter_(X_diabetes), y_diabetes)
     return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)
 
@@ -411,7 +416,7 @@ def _test_multi_ridge_diabetes(filter_):
     Y = np.vstack((y_diabetes, y_diabetes)).T
     n_features = X_diabetes.shape[1]
 
-    ridge = Ridge(fit_intercept=False)
+    ridge = Ridge(fit_intercept=False, random_state=42)
     ridge.fit(filter_(X_diabetes), Y)
     assert_equal(ridge.coef_.shape, (2, n_features))
     Y_pred = ridge.predict(filter_(X_diabetes))
@@ -424,13 +429,13 @@ def _test_multi_ridge_diabetes(filter_):
 def _test_ridge_classifiers(filter_):
     n_classes = np.unique(y_iris).shape[0]
     n_features = X_iris.shape[1]
-    for reg in (RidgeClassifier(), RidgeClassifierCV()):
+    for reg in (RidgeClassifier(random_state=42), RidgeClassifierCV()):
         reg.fit(filter_(X_iris), y_iris)
         assert_equal(reg.coef_.shape, (n_classes, n_features))
         y_pred = reg.predict(filter_(X_iris))
         assert_greater(np.mean(y_iris == y_pred), .79)
 
-    cv = KFold(5)
+    cv = KFold(5, random_state=42)
     reg = RidgeClassifierCV(cv=cv)
     reg.fit(filter_(X_iris), y_iris)
     y_pred = reg.predict(filter_(X_iris))
@@ -438,11 +443,11 @@ def _test_ridge_classifiers(filter_):
 
 
 def _test_tolerance(filter_):
-    ridge = Ridge(tol=1e-5, fit_intercept=False)
+    ridge = Ridge(tol=1e-5, fit_intercept=False, random_state=42)
     ridge.fit(filter_(X_diabetes), y_diabetes)
     score = ridge.score(filter_(X_diabetes), y_diabetes)
 
-    ridge2 = Ridge(tol=1e-3, fit_intercept=False)
+    ridge2 = Ridge(tol=1e-3, fit_intercept=False, random_state=42)
     ridge2.fit(filter_(X_diabetes), y_diabetes)
     score2 = ridge2.score(filter_(X_diabetes), y_diabetes)
 
@@ -478,7 +483,7 @@ def test_ridge_cv_sparse_svd():
 def test_ridge_sparse_svd():
     X = sp.csc_matrix(rng.rand(100, 10))
     y = rng.rand(100)
-    ridge = Ridge(solver='svd', fit_intercept=False)
+    ridge = Ridge(solver='svd', fit_intercept=False, random_state=42)
     assert_raises(TypeError, ridge.fit, X, y)
 
 
@@ -488,12 +493,12 @@ def test_class_weights():
                   [1.0, 1.0], [1.0, 0.0]])
     y = [1, 1, 1, -1, -1]
 
-    reg = RidgeClassifier(class_weight=None)
+    reg = RidgeClassifier(class_weight=None, random_state=42)
     reg.fit(X, y)
     assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1]))
 
     # we give a small weights to class 1
-    reg = RidgeClassifier(class_weight={1: 0.001})
+    reg = RidgeClassifier(class_weight={1: 0.001}, random_state=42)
     reg.fit(X, y)
 
     # now the hyperplane should rotate clock-wise and
@@ -501,7 +506,7 @@ def test_class_weights():
     assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([-1]))
 
     # check if class_weight = 'balanced' can handle negative labels.
-    reg = RidgeClassifier(class_weight='balanced')
+    reg = RidgeClassifier(class_weight='balanced', random_state=42)
     reg.fit(X, y)
     assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1]))
 
@@ -509,9 +514,9 @@ def test_class_weights():
     # same values when y has equal number of all labels
     X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0]])
     y = [1, 1, -1, -1]
-    reg = RidgeClassifier(class_weight=None)
+    reg = RidgeClassifier(class_weight=None, random_state=42)
     reg.fit(X, y)
-    rega = RidgeClassifier(class_weight='balanced')
+    rega = RidgeClassifier(class_weight='balanced', random_state=42)
     rega.fit(X, y)
     assert_equal(len(rega.classes_), 2)
     assert_array_almost_equal(reg.coef_, rega.coef_)
@@ -598,13 +603,13 @@ def test_ridgecv_sample_weight():
         X = rng.randn(n_samples, n_features)
         sample_weight = 1.0 + rng.rand(n_samples)
 
-        cv = KFold(5)
+        cv = KFold(5, random_state=42)
         ridgecv = RidgeCV(alphas=alphas, cv=cv)
         ridgecv.fit(X, y, sample_weight=sample_weight)
 
         # Check using GridSearchCV directly
         parameters = {'alpha': alphas}
-        gs = GridSearchCV(Ridge(), parameters, cv=cv)
+        gs = GridSearchCV(Ridge(random_state=42), parameters, cv=cv)
         gs.fit(X, y, sample_weight=sample_weight)
 
         assert_equal(ridgecv.alpha_, gs.best_estimator_.alpha)
@@ -628,7 +633,7 @@ def test_raises_value_error_if_sample_weights_greater_than_1d():
         sample_weights_not_OK = sample_weights_OK[:, np.newaxis]
         sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :]
 
-        ridge = Ridge(alpha=1)
+        ridge = Ridge(alpha=1, random_state=42)
 
         # make sure the "OK" sample weights actually work
         ridge.fit(X, y, sample_weights_OK)
@@ -665,8 +670,8 @@ def test_sparse_design_with_sample_weights():
                                 sp.dok_matrix
                                 ]
 
-    sparse_ridge = Ridge(alpha=1., fit_intercept=False)
-    dense_ridge = Ridge(alpha=1., fit_intercept=False)
+    sparse_ridge = Ridge(alpha=1., fit_intercept=False, random_state=42)
+    dense_ridge = Ridge(alpha=1., fit_intercept=False, random_state=42)
 
     for n_samples, n_features in zip(n_sampless, n_featuress):
         X = rng.randn(n_samples, n_features)
@@ -693,13 +698,13 @@ def test_raises_value_error_if_solver_not_supported():
     def func():
         X = np.eye(3)
         y = np.ones(3)
-        ridge_regression(X, y, alpha=1., solver=wrong_solver)
+        ridge_regression(X, y, alpha=1., solver=wrong_solver, random_state=42)
 
     assert_raise_message(exception, message, func)
 
 
 def test_sparse_cg_max_iter():
-    reg = Ridge(solver="sparse_cg", max_iter=1)
+    reg = Ridge(solver="sparse_cg", max_iter=1, random_state=42)
     reg.fit(X_diabetes, y_diabetes)
     assert_equal(reg.coef_.shape[0], X_diabetes.shape[1])
 
@@ -713,12 +718,13 @@ def test_n_iter():
 
     for max_iter in range(1, 4):
         for solver in ('sag', 'lsqr'):
-            reg = Ridge(solver=solver, max_iter=max_iter, tol=1e-12)
+            reg = Ridge(solver=solver, max_iter=max_iter, tol=1e-12,
+                        random_state=42)
             reg.fit(X, y_n)
             assert_array_equal(reg.n_iter_, np.tile(max_iter, n_targets))
 
     for solver in ('sparse_cg', 'svd', 'cholesky'):
-        reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
+        reg = Ridge(solver=solver, max_iter=1, tol=1e-1, random_state=42)
         reg.fit(X, y_n)
         assert_equal(reg.n_iter_, None)
 
@@ -728,15 +734,18 @@ def test_ridge_fit_intercept_sparse():
                            bias=10., random_state=42)
     X_csr = sp.csr_matrix(X)
 
-    dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
-    sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
+    dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True,
+                  random_state=42)
+    sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True,
+                   random_state=42)
     dense.fit(X, y)
     sparse.fit(X_csr, y)
     assert_almost_equal(dense.intercept_, sparse.intercept_)
     assert_array_almost_equal(dense.coef_, sparse.coef_)
 
     # test the solver switch and the corresponding warning
-    sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True)
+    sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True,
+                   random_state=42)
     assert_warns(UserWarning, sparse.fit, X_csr, y)
     assert_almost_equal(dense.intercept_, sparse.intercept_)
     assert_array_almost_equal(dense.coef_, sparse.coef_)
@@ -786,4 +795,4 @@ def test_errors_and_values_svd_helper():
 
 def test_ridge_classifier_no_support_multilabel():
     X, y = make_multilabel_classification(n_samples=10, random_state=0)
-    assert_raises(ValueError, RidgeClassifier().fit, X, y)
+    assert_raises(ValueError, RidgeClassifier(random_state=42).fit, X, y)
diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py
index a993fb9f5919f..7b95e48509b5f 100644
--- a/sklearn/linear_model/tests/test_sag.py
+++ b/sklearn/linear_model/tests/test_sag.py
@@ -256,7 +256,8 @@ def test_regressor_matching():
 
     step_size = get_step_size(X, alpha, fit_intercept, classification=False)
     clf = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
-                alpha=alpha * n_samples, max_iter=n_iter)
+                alpha=alpha * n_samples, max_iter=n_iter,
+                random_state=42)
     clf.fit(X, y)
 
     weights1, intercept1 = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
@@ -350,7 +351,8 @@ def test_sag_regressor_computed_correctly():
     step_size = get_step_size(X, alpha, fit_intercept, classification=False)
 
     clf1 = Ridge(fit_intercept=fit_intercept, tol=tol, solver='sag',
-                 alpha=alpha * n_samples, max_iter=max_iter)
+                 alpha=alpha * n_samples, max_iter=max_iter,
+                 random_state=42)
     clf2 = clone(clf1)
 
     clf1.fit(X, y)
@@ -421,7 +423,8 @@ def test_sag_regressor():
     y = 0.5 * X.ravel()
 
     clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
-                 alpha=alpha * n_samples)
+                 alpha=alpha * n_samples,
+                 random_state=42)
     clf2 = clone(clf1)
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -434,7 +437,8 @@ def test_sag_regressor():
     y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
 
     clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
-                 alpha=alpha * n_samples)
+                 alpha=alpha * n_samples,
+                 random_state=42)
     clf2 = clone(clf1)
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -693,7 +697,7 @@ def test_classifier_single_class():
     assert_raise_message(ValueError,
                          "This solver needs samples of at least 2 classes "
                          "in the data",
-                         LogisticRegression(solver='sag').fit,
+                         LogisticRegression(solver='sag', random_state=42).fit,
                          X, y)
 
 
@@ -706,10 +710,12 @@ def test_step_size_alpha_error():
            " step_size * alpha_scaled == 1")
 
     clf1 = LogisticRegression(solver='sag', C=1. / alpha,
-                              fit_intercept=fit_intercept)
+                              fit_intercept=fit_intercept,
+                              random_state=42)
     assert_raise_message(ZeroDivisionError, msg, clf1.fit, X, y)
 
-    clf2 = Ridge(fit_intercept=fit_intercept, solver='sag', alpha=alpha)
+    clf2 = Ridge(fit_intercept=fit_intercept, solver='sag', alpha=alpha,
+                 random_state=42)
     assert_raise_message(ZeroDivisionError, msg, clf2.fit, X, y)
 
 
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 8287ade2c2309..5a978177fd2e9 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -470,7 +470,8 @@ def test_sgd_proba(self):
         # Hinge loss does not allow for conditional prob estimate.
         # We cannot use the factory here, because it defines predict_proba
         # anyway.
-        clf = SGDClassifier(loss="hinge", alpha=0.01, n_iter=10).fit(X, Y)
+        clf = SGDClassifier(loss="hinge",
+                            alpha=0.01, n_iter=10, random_state=42).fit(X, Y)
         assert_false(hasattr(clf, "predict_proba"))
         assert_false(hasattr(clf, "predict_log_proba"))
 
@@ -1129,7 +1130,8 @@ def test_underflow_or_overlow():
         y = (np.dot(X_scaled, ground_truth) > 0.).astype(np.int32)
         assert_array_equal(np.unique(y), [0, 1])
 
-        model = SGDClassifier(alpha=0.1, loss='squared_hinge', n_iter=500)
+        model = SGDClassifier(alpha=0.1, loss='squared_hinge', n_iter=500,
+                              random_state=42)
 
         # smoke test: model is stable on scaled data
         model.fit(X_scaled, y)
@@ -1158,7 +1160,8 @@ def test_large_regularization():
     # regularization parameters
     for penalty in ['l2', 'l1', 'elasticnet']:
         model = SGDClassifier(alpha=1e5, learning_rate='constant', eta0=0.1,
-                              n_iter=5, penalty=penalty, shuffle=False)
+                              n_iter=5, penalty=penalty, shuffle=False,
+                              random_state=42)
         with np.errstate(all='raise'):
             model.fit(iris.data, iris.target)
         assert_array_almost_equal(model.coef_, np.zeros_like(model.coef_))
diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
index 6b4c09d9742e0..3a9548240e996 100644
--- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@@ -16,7 +16,7 @@
 
 def test_sparse_coef():
     # Check that the sparse_coef property works
-    clf = ElasticNet()
+    clf = ElasticNet(random_state=42)
     clf.coef_ = [1, 2, 3]
 
     assert_true(sp.isspmatrix(clf.sparse_coef_))
@@ -27,8 +27,9 @@ def test_normalize_option():
     # Check that the normalize option in enet works
     X = sp.csc_matrix([[-1], [0], [1]])
     y = [-1, 0, 1]
-    clf_dense = ElasticNet(fit_intercept=True, normalize=True)
-    clf_sparse = ElasticNet(fit_intercept=True, normalize=True)
+    clf_dense = ElasticNet(fit_intercept=True, normalize=True, random_state=42)
+    clf_sparse = ElasticNet(fit_intercept=True, normalize=True,
+                            random_state=42)
     clf_dense.fit(X, y)
     X = sp.csc_matrix(X)
     clf_sparse.fit(X, y)
@@ -41,7 +42,7 @@ def test_lasso_zero():
     X = sp.csc_matrix((3, 1))
     y = [0, 0, 0]
     T = np.array([[1], [2], [3]])
-    clf = Lasso().fit(X, y)
+    clf = Lasso(random_state=42).fit(X, y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0])
     assert_array_almost_equal(pred, [0, 0, 0])
@@ -57,7 +58,7 @@ def test_enet_toy_list_input():
     T = np.array([[2], [3], [4]])  # test sample
 
     # this should be the same as unregularized least squares
-    clf = ElasticNet(alpha=0, l1_ratio=1.0)
+    clf = ElasticNet(alpha=0, l1_ratio=1.0, random_state=42)
     # catch warning about alpha=0.
     # this is discouraged but should work.
     ignore_warnings(clf.fit)(X, Y)
@@ -66,14 +67,14 @@ def test_enet_toy_list_input():
     assert_array_almost_equal(pred, [2, 3, 4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163,  1.5245,  2.0327], decimal=3)
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.45454], 3)
@@ -98,21 +99,21 @@ def test_enet_toy_explicit_sparse_input():
     T[2, 0] = 4
 
     # this should be the same as lasso
-    clf = ElasticNet(alpha=0, l1_ratio=1.0)
+    clf = ElasticNet(alpha=0, l1_ratio=1.0, random_state=42)
     f(clf.fit)(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [1])
     assert_array_almost_equal(pred, [2, 3, 4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163,  1.5245,  2.0327], decimal=3)
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42)
     clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.45454], 3)
@@ -157,7 +158,8 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
 
     s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
                        max_iter=max_iter, tol=1e-7, positive=positive,
-                       warm_start=True)
+                       warm_start=True,
+                       random_state=42)
     s_clf.fit(X_train, y_train)
 
     assert_almost_equal(s_clf.dual_gap_, 0, 4)
@@ -166,7 +168,8 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
     # check the convergence is the same as the dense version
     d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
                        max_iter=max_iter, tol=1e-7, positive=positive,
-                       warm_start=True)
+                       warm_start=True,
+                       random_state=42)
     d_clf.fit(X_train.toarray(), y_train)
 
     assert_almost_equal(d_clf.dual_gap_, 0, 4)
@@ -199,13 +202,15 @@ def test_sparse_lasso_not_as_toy_dataset():
     X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
     y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
 
-    s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
+    s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7,
+                  random_state=42)
     s_clf.fit(X_train, y_train)
     assert_almost_equal(s_clf.dual_gap_, 0, 4)
     assert_greater(s_clf.score(X_test, y_test), 0.85)
 
     # check the convergence is the same as the dense version
-    d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
+    d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7,
+                  random_state=42)
     d_clf.fit(X_train.toarray(), y_train)
     assert_almost_equal(d_clf.dual_gap_, 0, 4)
     assert_greater(d_clf.score(X_test, y_test), 0.85)
@@ -218,7 +223,8 @@ def test_enet_multitarget():
     n_targets = 3
     X, y = make_sparse_data(n_targets=n_targets)
 
-    estimator = ElasticNet(alpha=0.01, fit_intercept=True, precompute=None)
+    estimator = ElasticNet(alpha=0.01, fit_intercept=True, precompute=None,
+                           random_state=42)
     # XXX: There is a bug when precompute is not None!
     estimator.fit(X, y)
     coef, intercept, dual_gap = (estimator.coef_,
@@ -237,7 +243,8 @@ def test_path_parameters():
     max_iter = 50
     n_alphas = 10
     clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
-                       l1_ratio=0.5, fit_intercept=False)
+                       l1_ratio=0.5, fit_intercept=False,
+                       random_state=42)
     ignore_warnings(clf.fit)(X, y)  # new params
     assert_almost_equal(0.5, clf.l1_ratio)
     assert_equal(n_alphas, clf.n_alphas)
@@ -250,18 +257,22 @@ def test_path_parameters():
 def test_same_output_sparse_dense_lasso_and_enet_cv():
     X, y = make_sparse_data(n_samples=40, n_features=10)
     for normalize in [True, False]:
-        clfs = ElasticNetCV(max_iter=100, cv=5, normalize=normalize)
+        clfs = ElasticNetCV(max_iter=100, cv=5, normalize=normalize,
+                            random_state=42)
         ignore_warnings(clfs.fit)(X, y)
-        clfd = ElasticNetCV(max_iter=100, cv=5, normalize=normalize)
+        clfd = ElasticNetCV(max_iter=100, cv=5, normalize=normalize,
+                            random_state=42)
         ignore_warnings(clfd.fit)(X.toarray(), y)
         assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
         assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
         assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
         assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
 
-        clfs = LassoCV(max_iter=100, cv=4, normalize=normalize)
+        clfs = LassoCV(max_iter=100, cv=4, normalize=normalize,
+                       random_state=42)
         ignore_warnings(clfs.fit)(X, y)
-        clfd = LassoCV(max_iter=100, cv=4, normalize=normalize)
+        clfd = LassoCV(max_iter=100, cv=4, normalize=normalize,
+                       random_state=42)
         ignore_warnings(clfd.fit)(X.toarray(), y)
         assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
         assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
@@ -271,7 +282,7 @@ def test_same_output_sparse_dense_lasso_and_enet_cv():
 
 def test_same_multiple_output_sparse_dense():
     for normalize in [True, False]:
-        l = ElasticNet(normalize=normalize)
+        l = ElasticNet(normalize=normalize, random_state=42)
         X = [[0, 1, 2, 3, 4],
              [0, 2, 5, 8, 11],
              [9, 10, 11, 12, 13],
@@ -284,7 +295,7 @@ def test_same_multiple_output_sparse_dense():
         sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
         predict_dense = l.predict(sample)
 
-        l_sp = ElasticNet(normalize=normalize)
+        l_sp = ElasticNet(normalize=normalize, random_state=42)
         X_sp = sp.coo_matrix(X)
         ignore_warnings(l_sp.fit)(X_sp, y)
         sample_sparse = sp.coo_matrix(sample)
diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
index 1ebd753008c5d..7a13b2608e455 100644
--- a/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -238,8 +238,8 @@ def test_spectral_embedding_deterministic():
     random_state = np.random.RandomState(36)
     data = random_state.randn(10, 30)
     sims = rbf_kernel(data)
-    embedding_1 = spectral_embedding(sims)
-    embedding_2 = spectral_embedding(sims)
+    embedding_1 = spectral_embedding(sims, random_state=42)
+    embedding_2 = spectral_embedding(sims, random_state=42)
     assert_array_almost_equal(embedding_1, embedding_2)
 
 
@@ -253,7 +253,8 @@ def test_spectral_embedding_unnormalized():
     embedding_1 = spectral_embedding(sims,
                                      norm_laplacian=False,
                                      n_components=n_components,
-                                     drop_first=False)
+                                     drop_first=False,
+                                     random_state=42)
 
     # Verify using manual computation with dense eigh
     laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 3be02f359c167..6ca380708ee20 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -290,21 +290,21 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances():
 
 def test_early_exaggeration_too_small():
     # Early exaggeration factor must be >= 1.
-    tsne = TSNE(early_exaggeration=0.99)
+    tsne = TSNE(early_exaggeration=0.99, random_state=42)
     assert_raises_regexp(ValueError, "early_exaggeration .*",
                          tsne.fit_transform, np.array([[0.0]]))
 
 
 def test_too_few_iterations():
     # Number of gradient descent iterations must be at least 200.
-    tsne = TSNE(n_iter=199)
+    tsne = TSNE(n_iter=199, random_state=42)
     assert_raises_regexp(ValueError, "n_iter .*", tsne.fit_transform,
                          np.array([[0.0]]))
 
 
 def test_non_square_precomputed_distances():
     # Precomputed distance matrices must be square matrices.
-    tsne = TSNE(metric="precomputed")
+    tsne = TSNE(metric="precomputed", random_state=42)
     assert_raises_regexp(ValueError, ".* square distance matrix",
                          tsne.fit_transform, np.array([[0.0], [1.0]]))
 
@@ -317,7 +317,7 @@ def test_init_not_available():
 
 def test_init_ndarray():
     # Initialize TSNE with ndarray and test fit
-    tsne = TSNE(init=np.zeros((100, 2)))
+    tsne = TSNE(init=np.zeros((100, 2)), random_state=42)
     X_embedded = tsne.fit_transform(np.ones((100, 5)))
     assert_array_equal(np.zeros((100, 2)), X_embedded)
 
@@ -325,20 +325,20 @@ def test_init_ndarray():
 def test_init_ndarray_precomputed():
     # Initialize TSNE with ndarray and metric 'precomputed'
     # Make sure no FutureWarning is thrown from _fit
-    tsne = TSNE(init=np.zeros((100, 2)), metric="precomputed")
+    tsne = TSNE(init=np.zeros((100, 2)), metric="precomputed", random_state=42)
     tsne.fit(np.zeros((100, 100)))
 
 
 def test_distance_not_available():
     # 'metric' must be valid.
-    tsne = TSNE(metric="not available")
+    tsne = TSNE(metric="not available", random_state=42)
     assert_raises_regexp(ValueError, "Unknown metric not available.*",
                          tsne.fit_transform, np.array([[0.0], [1.0]]))
 
 
 def test_pca_initialization_not_compatible_with_precomputed_kernel():
     # Precomputed distance matrices must be square matrices.
-    tsne = TSNE(metric="precomputed", init="pca")
+    tsne = TSNE(metric="precomputed", init="pca", random_state=42)
     assert_raises_regexp(ValueError, "The parameter init=\"pca\" cannot be "
                          "used with metric=\"precomputed\".",
                          tsne.fit_transform, np.array([[0.0], [1.0]]))
@@ -425,7 +425,7 @@ def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
 def test_verbose():
     # Verbose options write to stdout.
     random_state = check_random_state(0)
-    tsne = TSNE(verbose=2)
+    tsne = TSNE(verbose=2, random_state=42)
     X = random_state.randn(5, 2)
 
     old_stdout = sys.stdout
@@ -449,7 +449,7 @@ def test_verbose():
 def test_chebyshev_metric():
     # t-SNE should allow metrics that cannot be squared (issue #3526).
     random_state = check_random_state(0)
-    tsne = TSNE(metric="chebyshev")
+    tsne = TSNE(metric="chebyshev", random_state=42)
     X = random_state.randn(5, 2)
     tsne.fit_transform(X)
 
@@ -457,7 +457,7 @@ def test_chebyshev_metric():
 def test_reduction_to_one_component():
     # t-SNE should allow reduction to one component (issue #4154).
     random_state = check_random_state(0)
-    tsne = TSNE(n_components=1)
+    tsne = TSNE(n_components=1, random_state=42)
     X = random_state.randn(5, 2)
     X_embedded = tsne.fit(X).embedding_
     assert(np.all(np.isfinite(X_embedded)))
@@ -469,7 +469,7 @@ def test_no_sparse_on_barnes_hut():
     X = random_state.randn(100, 2)
     X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0
     X_csr = sp.csr_matrix(X)
-    tsne = TSNE(n_iter=199, method='barnes_hut')
+    tsne = TSNE(n_iter=199, method='barnes_hut', random_state=42)
     assert_raises_regexp(TypeError, "A sparse matrix was.*",
                          tsne.fit_transform, X_csr)
 
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
index 55715d0c35cdd..e975733abcf5a 100644
--- a/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -29,10 +29,12 @@ def test_silhouette():
         D = pairwise_distances(X, metric='euclidean')
         # Given that the actual labels are used, we can assume that S would be
         # positive.
-        score_precomputed = silhouette_score(D, y, metric='precomputed')
+        score_precomputed = silhouette_score(D, y, metric='precomputed',
+                                             random_state=42)
         assert_greater(score_precomputed, 0)
         # Test without calculating D
-        score_euclidean = silhouette_score(X, y, metric='euclidean')
+        score_euclidean = silhouette_score(X, y, metric='euclidean',
+                                           random_state=42)
         assert_almost_equal(score_precomputed, score_euclidean)
 
         if X is X_dense:
@@ -75,7 +77,7 @@ def test_cluster_size_1():
     #            inter-cluster = [arbitrary, arbitrary]
     #            silhouette    = [1., 1.]
 
-    silhouette = silhouette_score(X, labels)
+    silhouette = silhouette_score(X, labels, random_state=42)
     assert_false(np.isnan(silhouette))
     ss = silhouette_samples(X, labels)
     assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
@@ -91,14 +93,14 @@ def test_correct_labelsize():
     assert_raises_regexp(ValueError,
                          'Number of labels is %d\. Valid values are 2 '
                          'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
-                         silhouette_score, X, y)
+                         silhouette_score, X, y, random_state=42)
 
     # n_labels = 1
     y = np.zeros(X.shape[0])
     assert_raises_regexp(ValueError,
                          'Number of labels is %d\. Valid values are 2 '
                          'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
-                         silhouette_score, X, y)
+                         silhouette_score, X, y, random_state=42)
 
 
 def test_non_encoded_labels():
@@ -106,7 +108,8 @@ def test_non_encoded_labels():
     X = dataset.data
     labels = dataset.target
     assert_equal(
-        silhouette_score(X, labels * 2 + 10), silhouette_score(X, labels))
+        silhouette_score(X, labels * 2 + 10, random_state=42),
+        silhouette_score(X, labels, random_state=42))
     assert_array_equal(
         silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
 
@@ -116,7 +119,8 @@ def test_non_numpy_labels():
     X = dataset.data
     y = dataset.target
     assert_equal(
-        silhouette_score(list(X), list(y)), silhouette_score(X, y))
+        silhouette_score(list(X), list(y), random_state=42),
+        silhouette_score(X, y, random_state=42))
 
 
 def test_calinski_harabaz_score():
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 461bdadf3d6e5..5a5b33b73a344 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -180,11 +180,11 @@ def test_check_scoring_gridsearchcv():
     # test that check_scoring works on GridSearchCV and pipeline.
     # slightly redundant non-regression test.
 
-    grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]})
+    grid = GridSearchCV(LinearSVC(random_state=42), param_grid={'C': [.1, 1]})
     scorer = check_scoring(grid, "f1")
     assert_true(isinstance(scorer, _PredictScorer))
 
-    pipe = make_pipeline(LinearSVC())
+    pipe = make_pipeline(LinearSVC(random_state=42))
     scorer = check_scoring(pipe, "f1")
     assert_true(isinstance(scorer, _PredictScorer))
 
@@ -252,7 +252,7 @@ def test_regression_scorers():
     diabetes = load_diabetes()
     X, y = diabetes.data, diabetes.target
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    clf = Ridge()
+    clf = Ridge(random_state=42)
     clf.fit(X_train, y_train)
     score1 = get_scorer('r2')(clf, X_test, y_test)
     score2 = r2_score(y_test, clf.predict(X_test))
@@ -276,14 +276,14 @@ def test_thresholded_scorers():
     assert_almost_equal(-logscore, logloss)
 
     # same for an estimator without decision_function
-    clf = DecisionTreeClassifier()
+    clf = DecisionTreeClassifier(random_state=42)
     clf.fit(X_train, y_train)
     score1 = get_scorer('roc_auc')(clf, X_test, y_test)
     score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
     assert_almost_equal(score1, score2)
 
     # test with a regressor (no decision_function)
-    reg = DecisionTreeRegressor()
+    reg = DecisionTreeRegressor(random_state=42)
     reg.fit(X_train, y_train)
     score1 = get_scorer('roc_auc')(reg, X_test, y_test)
     score2 = roc_auc_score(y_test, reg.predict(X_test))
@@ -304,7 +304,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     # Multi-output multi-class predict_proba
-    clf = DecisionTreeClassifier()
+    clf = DecisionTreeClassifier(random_state=42)
     clf.fit(X_train, y_train)
     y_proba = clf.predict_proba(X_test)
     score1 = get_scorer('roc_auc')(clf, X_test, y_test)
@@ -313,7 +313,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
 
     # Multi-output multi-class decision_function
     # TODO Is there any yet?
-    clf = DecisionTreeClassifier()
+    clf = DecisionTreeClassifier(random_state=42)
     clf.fit(X_train, y_train)
     clf._predict_proba = clf.predict_proba
     clf.predict_proba = None
@@ -325,7 +325,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
     assert_almost_equal(score1, score2)
 
     # Multilabel predict_proba
-    clf = OneVsRestClassifier(DecisionTreeClassifier())
+    clf = OneVsRestClassifier(DecisionTreeClassifier(random_state=42))
     clf.fit(X_train, y_train)
     score1 = get_scorer('roc_auc')(clf, X_test, y_test)
     score2 = roc_auc_score(y_test, clf.predict_proba(X_test))
@@ -343,7 +343,7 @@ def test_supervised_cluster_scorers():
     # Test clustering scorers against gold standard labeling.
     X, y = make_blobs(random_state=0, centers=2)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    km = KMeans(n_clusters=3)
+    km = KMeans(n_clusters=3, random_state=42)
     km.fit(X_train)
     for name in CLUSTER_SCORERS:
         score1 = get_scorer(name)(km, X_test, y_test)
@@ -356,7 +356,7 @@ def test_raises_on_score_list():
     # Test that when a list of scores is returned, we raise proper errors.
     X, y = make_blobs(random_state=0)
     f1_scorer_no_average = make_scorer(f1_score, average=None)
-    clf = DecisionTreeClassifier()
+    clf = DecisionTreeClassifier(random_state=42)
     assert_raises(ValueError, cross_val_score, clf, X, y,
                   scoring=f1_scorer_no_average)
     grid_search = GridSearchCV(clf, scoring=f1_scorer_no_average,
@@ -448,10 +448,11 @@ def test_deprecated_names():
 
 def test_scoring_is_not_metric():
     assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
-                         LogisticRegression(), f1_score)
+                         LogisticRegression(random_state=42), f1_score)
     assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
-                         LogisticRegression(), roc_auc_score)
+                         LogisticRegression(random_state=42), roc_auc_score)
     assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
-                         Ridge(), r2_score)
+                         Ridge(random_state=42), r2_score)
     assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
-                         KMeans(), cluster_module.adjusted_rand_score)
+                         KMeans(random_state=42),
+                         cluster_module.adjusted_rand_score)
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 3aba57494cb94..2b7ba1ea4012d 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -105,7 +105,7 @@ def test_gaussian_mixture_attributes():
     X = rng.rand(10, 2)
 
     n_components_bad = 0
-    gmm = GaussianMixture(n_components=n_components_bad)
+    gmm = GaussianMixture(n_components=n_components_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Invalid value for 'n_components': %d "
                          "Estimation requires at least one component"
@@ -113,7 +113,7 @@ def test_gaussian_mixture_attributes():
 
     # covariance_type should be in [spherical, diag, tied, full]
     covariance_type_bad = 'bad_covariance_type'
-    gmm = GaussianMixture(covariance_type=covariance_type_bad)
+    gmm = GaussianMixture(covariance_type=covariance_type_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Invalid value for 'covariance_type': %s "
                          "'covariance_type' should be in "
@@ -122,35 +122,35 @@ def test_gaussian_mixture_attributes():
                          gmm.fit, X)
 
     tol_bad = -1
-    gmm = GaussianMixture(tol=tol_bad)
+    gmm = GaussianMixture(tol=tol_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Invalid value for 'tol': %.5f "
                          "Tolerance used by the EM must be non-negative"
                          % tol_bad, gmm.fit, X)
 
     reg_covar_bad = -1
-    gmm = GaussianMixture(reg_covar=reg_covar_bad)
+    gmm = GaussianMixture(reg_covar=reg_covar_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Invalid value for 'reg_covar': %.5f "
                          "regularization on covariance must be "
                          "non-negative" % reg_covar_bad, gmm.fit, X)
 
     max_iter_bad = 0
-    gmm = GaussianMixture(max_iter=max_iter_bad)
+    gmm = GaussianMixture(max_iter=max_iter_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Invalid value for 'max_iter': %d "
                          "Estimation requires at least one iteration"
                          % max_iter_bad, gmm.fit, X)
 
     n_init_bad = 0
-    gmm = GaussianMixture(n_init=n_init_bad)
+    gmm = GaussianMixture(n_init=n_init_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Invalid value for 'n_init': %d "
                          "Estimation requires at least one run"
                          % n_init_bad, gmm.fit, X)
 
     init_params_bad = 'bad_method'
-    gmm = GaussianMixture(init_params=init_params_bad)
+    gmm = GaussianMixture(init_params=init_params_bad, random_state=42)
     assert_raise_message(ValueError,
                          "Unimplemented initialization method '%s'"
                          % init_params_bad,
@@ -162,7 +162,8 @@ def test_gaussian_mixture_attributes():
     gmm = GaussianMixture(n_components=n_components, tol=tol, n_init=n_init,
                           max_iter=max_iter, reg_covar=reg_covar,
                           covariance_type=covariance_type,
-                          init_params=init_params).fit(X)
+                          init_params=init_params,
+                          random_state=42).fit(X)
 
     assert_equal(gmm.n_components, n_components)
     assert_equal(gmm.covariance_type, covariance_type)
@@ -204,7 +205,7 @@ def test_check_weights():
     n_components = rand_data.n_components
     X = rand_data.X['full']
 
-    g = GaussianMixture(n_components=n_components)
+    g = GaussianMixture(n_components=n_components, random_state=42)
 
     # Check bad shape
     weights_bad_shape = rng.rand(n_components, 1)
@@ -237,7 +238,8 @@ def test_check_weights():
 
     # Check good weights matrix
     weights = rand_data.weights
-    g = GaussianMixture(weights_init=weights, n_components=n_components)
+    g = GaussianMixture(weights_init=weights, n_components=n_components,
+                        random_state=42)
     g.fit(X)
     assert_array_equal(weights, g.weights_init)
 
@@ -249,7 +251,7 @@ def test_check_means():
     n_components, n_features = rand_data.n_components, rand_data.n_features
     X = rand_data.X['full']
 
-    g = GaussianMixture(n_components=n_components)
+    g = GaussianMixture(n_components=n_components, random_state=42)
 
     # Check means bad shape
     means_bad_shape = rng.rand(n_components + 1, n_features)
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 055a4c061a7c0..74d9d09ca1c83 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -313,7 +313,7 @@ def test_grid_search_groups():
     grid = {'C': [1]}
 
     group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
-                 GroupShuffleSplit()]
+                 GroupShuffleSplit(random_state=42)]
     for cv in group_cvs:
         gs = GridSearchCV(clf, grid, cv=cv)
         assert_raise_message(ValueError,
@@ -321,7 +321,8 @@ def test_grid_search_groups():
                              gs.fit, X, y)
         gs.fit(X, y, groups=groups)
 
-    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
+    non_group_cvs = [StratifiedKFold(random_state=42),
+                     StratifiedShuffleSplit(random_state=42)]
     for cv in non_group_cvs:
         gs = GridSearchCV(clf, grid, cv=cv)
         # Should not raise an error
@@ -340,7 +341,7 @@ def test_classes__property():
                        grid_search.classes_)
 
     # Test that regressors do not have a classes_ attribute
-    grid_search = GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]})
+    grid_search = GridSearchCV(Ridge(random_state=42), {'alpha': [1.0, 2.0]})
     grid_search.fit(X, y)
     assert_false(hasattr(grid_search, 'classes_'))
 
@@ -363,7 +364,8 @@ def test_trivial_cv_results_attr():
     grid_search.fit(X, y)
     assert_true(hasattr(grid_search, "cv_results_"))
 
-    random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
+    random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1,
+                                       random_state=42)
     random_search.fit(X, y)
     assert_true(hasattr(grid_search, "cv_results_"))
 
@@ -390,7 +392,7 @@ def test_grid_search_error():
     # Test that grid search will capture errors on data with different length
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     assert_raises(ValueError, cv.fit, X_[:180], y_)
 
@@ -399,11 +401,11 @@ def test_grid_search_one_grid_point():
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
     param_dict = {"C": [1.0], "kernel": ["rbf"], "gamma": [0.1]}
 
-    clf = SVC()
+    clf = SVC(random_state=42)
     cv = GridSearchCV(clf, param_dict)
     cv.fit(X_, y_)
 
-    clf = SVC(C=1.0, kernel="rbf", gamma=0.1)
+    clf = SVC(C=1.0, kernel="rbf", gamma=0.1, random_state=42)
     clf.fit(X_, y_)
 
     assert_array_equal(clf.dual_coef_, cv.best_estimator_.dual_coef_)
@@ -423,7 +425,7 @@ def test_grid_search_when_param_grid_includes_range():
 
 def test_grid_search_bad_param_grid():
     param_dict = {"C": 1.0}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raise_message(
         ValueError,
         "Parameter values for parameter (C) need to be a sequence"
@@ -431,14 +433,14 @@ def test_grid_search_bad_param_grid():
         GridSearchCV, clf, param_dict)
 
     param_dict = {"C": []}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raise_message(
         ValueError,
         "Parameter values for parameter (C) need to be a non-empty sequence.",
         GridSearchCV, clf, param_dict)
 
     param_dict = {"C": "1,2,3"}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raise_message(
         ValueError,
         "Parameter values for parameter (C) need to be a sequence"
@@ -446,7 +448,7 @@ def test_grid_search_bad_param_grid():
         GridSearchCV, clf, param_dict)
 
     param_dict = {"C": np.ones(6).reshape(3, 2)}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raises(ValueError, GridSearchCV, clf, param_dict)
 
 
@@ -454,14 +456,14 @@ def test_grid_search_sparse():
     # Test that grid search works with both dense and sparse matrices
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     cv.fit(X_[:180], y_[:180])
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator_.C
 
     X_ = sp.csr_matrix(X_)
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     cv.fit(X_[:180].tocoo(), y_[:180])
     y_pred2 = cv.predict(X_[180:])
@@ -474,14 +476,14 @@ def test_grid_search_sparse():
 def test_grid_search_sparse_scoring():
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
     cv.fit(X_[:180], y_[:180])
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator_.C
 
     X_ = sp.csr_matrix(X_)
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
     cv.fit(X_[:180], y_[:180])
     y_pred2 = cv.predict(X_[180:])
@@ -515,7 +517,7 @@ def test_grid_search_precomputed_kernel():
     K_train = np.dot(X_[:180], X_[:180].T)
     y_train = y_[:180]
 
-    clf = SVC(kernel='precomputed')
+    clf = SVC(kernel='precomputed', random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     cv.fit(K_train, y_train)
 
@@ -539,7 +541,7 @@ def test_grid_search_precomputed_kernel_error_nonsquare():
     # training kernel matrix
     K_train = np.zeros((10, 20))
     y_train = np.ones((10, ))
-    clf = SVC(kernel='precomputed')
+    clf = SVC(kernel='precomputed', random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     assert_raises(ValueError, cv.fit, K_train, y_train)
 
@@ -589,7 +591,7 @@ def test_X_as_list():
     y = np.array([0] * 5 + [1] * 5)
 
     clf = CheckingClassifier(check_X=lambda x: isinstance(x, list))
-    cv = KFold(n_splits=3)
+    cv = KFold(n_splits=3, random_state=42)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X.tolist(), y).score(X, y)
     assert_true(hasattr(grid_search, "cv_results_"))
@@ -601,7 +603,7 @@ def test_y_as_list():
     y = np.array([0] * 5 + [1] * 5)
 
     clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
-    cv = KFold(n_splits=3)
+    cv = KFold(n_splits=3, random_state=42)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X, y.tolist()).score(X, y)
     assert_true(hasattr(grid_search, "cv_results_"))
@@ -742,10 +744,10 @@ def test_grid_search_cv_results():
     n_grid_points = 6
     params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
               dict(kernel=['poly', ], degree=[1, 2])]
-    grid_search = GridSearchCV(SVC(), cv=n_splits, iid=False,
+    grid_search = GridSearchCV(SVC(random_state=42), cv=n_splits, iid=False,
                                param_grid=params)
     grid_search.fit(X, y)
-    grid_search_iid = GridSearchCV(SVC(), cv=n_splits, iid=True,
+    grid_search_iid = GridSearchCV(SVC(random_state=42), cv=n_splits, iid=True,
                                    param_grid=params)
     grid_search_iid.fit(X, y)
 
@@ -802,13 +804,17 @@ def test_random_search_cv_results():
     n_splits = 3
     n_search_iter = 30
     params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
-    random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
+    random_search = RandomizedSearchCV(SVC(random_state=42),
+                                       n_iter=n_search_iter,
                                        cv=n_splits, iid=False,
-                                       param_distributions=params)
+                                       param_distributions=params,
+                                       random_state=42)
     random_search.fit(X, y)
-    random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
+    random_search_iid = RandomizedSearchCV(SVC(random_state=42),
+                                           n_iter=n_search_iter,
                                            cv=n_splits, iid=True,
-                                           param_distributions=params)
+                                           param_distributions=params,
+                                           random_state=42)
     random_search_iid.fit(X, y)
 
     param_keys = ('param_C', 'param_gamma')
@@ -850,10 +856,12 @@ def test_search_iid_param():
     # create "cv" for splits
     cv = [[mask, ~mask], [~mask, mask]]
     # once with iid=True (default)
-    grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, cv=cv)
-    random_search = RandomizedSearchCV(SVC(), n_iter=2,
+    grid_search = GridSearchCV(SVC(random_state=42),
+                               param_grid={'C': [1, 10]}, cv=cv)
+    random_search = RandomizedSearchCV(SVC(random_state=42), n_iter=2,
                                        param_distributions={'C': [1, 10]},
-                                       cv=cv)
+                                       cv=cv,
+                                       random_state=42)
     for search in (grid_search, random_search):
         search.fit(X, y)
         assert_true(search.iid)
@@ -893,12 +901,13 @@ def test_search_iid_param():
         assert_almost_equal(train_std, 0)
 
     # once with iid=False
-    grid_search = GridSearchCV(SVC(),
+    grid_search = GridSearchCV(SVC(random_state=42),
                                param_grid={'C': [1, 10]},
                                cv=cv, iid=False)
-    random_search = RandomizedSearchCV(SVC(), n_iter=2,
+    random_search = RandomizedSearchCV(SVC(random_state=42), n_iter=2,
                                        param_distributions={'C': [1, 10]},
-                                       cv=cv, iid=False)
+                                       cv=cv, iid=False,
+                                       random_state=42)
 
     for search in (grid_search, random_search):
         search.fit(X, y)
@@ -936,9 +945,10 @@ def test_search_cv_results_rank_tie_breaking():
     # which would result in a tie of their mean cv-scores
     param_grid = {'C': [1, 1.001, 0.001]}
 
-    grid_search = GridSearchCV(SVC(), param_grid=param_grid)
-    random_search = RandomizedSearchCV(SVC(), n_iter=3,
-                                       param_distributions=param_grid)
+    grid_search = GridSearchCV(SVC(random_state=42), param_grid=param_grid)
+    random_search = RandomizedSearchCV(SVC(random_state=42), n_iter=3,
+                                       param_distributions=param_grid,
+                                       random_state=42)
 
     for search in (grid_search, random_search):
         search.fit(X, y)
@@ -966,7 +976,8 @@ def test_search_cv_results_rank_tie_breaking():
 
 def test_search_cv_results_none_param():
     X, y = [[1], [2], [3], [4], [5]], [0, 0, 0, 0, 1]
-    estimators = (DecisionTreeRegressor(), DecisionTreeClassifier())
+    estimators = (DecisionTreeRegressor(random_state=42),
+                  DecisionTreeClassifier(random_state=42))
     est_parameters = {"random_state": [0, None]}
     cv = KFold(random_state=0)
 
@@ -984,7 +995,8 @@ def test_search_cv_timing():
     y = [0, 1, 1, 0]
 
     gs = GridSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0)
-    rs = RandomizedSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0, n_iter=2)
+    rs = RandomizedSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0, n_iter=2,
+                            random_state=42)
 
     for search in (gs, rs):
         search.fit(X, y)
@@ -1017,7 +1029,7 @@ def test_grid_search_correct_score_results():
                                for cv_i in range(n_splits)))
         assert_true(all(in1d(expected_keys, result_keys)))
 
-        cv = StratifiedKFold(n_splits=n_splits)
+        cv = StratifiedKFold(n_splits=n_splits, random_state=42)
         n_splits = grid_search.n_splits_
         for candidate_i, C in enumerate(Cs):
             clf.set_params(C=C)
@@ -1045,7 +1057,7 @@ def test_pickle():
                               grid_search_pickled.predict(X))
 
     random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
-                                       refit=True, n_iter=3)
+                                       refit=True, n_iter=3, random_state=42)
     random_search.fit(X, y)
     random_search_pickled = pickle.loads(pickle.dumps(random_search))
     assert_array_almost_equal(random_search.predict(X),
@@ -1082,7 +1094,7 @@ def test_grid_search_with_multioutput_data():
     # Test with a randomized search
     for est in estimators:
         random_search = RandomizedSearchCV(est, est_parameters,
-                                           cv=cv, n_iter=3)
+                                           cv=cv, n_iter=3, random_state=42)
         random_search.fit(X, y)
         res_params = random_search.cv_results_['params']
         for cand_i in range(len(res_params)):
@@ -1101,7 +1113,7 @@ def test_predict_proba_disabled():
     # Test predict_proba when disabled on estimator.
     X = np.arange(20).reshape(5, -1)
     y = [0, 0, 1, 1, 1]
-    clf = SVC(probability=False)
+    clf = SVC(probability=False, random_state=42)
     gs = GridSearchCV(clf, {}, cv=2).fit(X, y)
     assert_false(hasattr(gs, "predict_proba"))
 
@@ -1191,10 +1203,10 @@ def test_grid_search_failing_classifier_raise():
 def test_parameters_sampler_replacement():
     # raise error if n_iter too large
     params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
-    sampler = ParameterSampler(params, n_iter=7)
+    sampler = ParameterSampler(params, n_iter=7, random_state=42)
     assert_raises(ValueError, list, sampler)
     # degenerates to GridSearchCV if n_iter the same as grid_size
-    sampler = ParameterSampler(params, n_iter=6)
+    sampler = ParameterSampler(params, n_iter=6, random_state=42)
     samples = list(sampler)
     assert_equal(len(samples), 6)
     for values in ParameterGrid(params):
@@ -1211,7 +1223,7 @@ def test_parameters_sampler_replacement():
 
     # doesn't go into infinite loops
     params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
-    sampler = ParameterSampler(params_distribution, n_iter=7)
+    sampler = ParameterSampler(params_distribution, n_iter=7, random_state=42)
     samples = list(sampler)
     assert_equal(len(samples), 7)
 
@@ -1224,7 +1236,7 @@ def test_stochastic_gradient_loss_param():
     }
     X = np.arange(24).reshape(6, -1)
     y = [0, 0, 0, 1, 1, 1]
-    clf = GridSearchCV(estimator=SGDClassifier(loss='hinge'),
+    clf = GridSearchCV(estimator=SGDClassifier(loss='hinge', random_state=42),
                        param_grid=param_grid)
 
     # When the estimator is not fitted, `predict_proba` is not available as the
@@ -1239,7 +1251,7 @@ def test_stochastic_gradient_loss_param():
     param_grid = {
         'loss': ['hinge'],
     }
-    clf = GridSearchCV(estimator=SGDClassifier(loss='hinge'),
+    clf = GridSearchCV(estimator=SGDClassifier(loss='hinge', random_state=42),
                        param_grid=param_grid)
     assert_false(hasattr(clf, "predict_proba"))
     clf.fit(X, y)
@@ -1270,7 +1282,7 @@ def test_grid_search_cv_splits_consistency():
 
     gs2 = GridSearchCV(LinearSVC(random_state=0),
                        param_grid={'C': [0.1, 0.2, 0.3]},
-                       cv=KFold(n_splits=n_splits))
+                       cv=KFold(n_splits=n_splits, random_state=42))
     gs2.fit(X, y)
 
     def _pop_time_keys(cv_results):
@@ -1291,7 +1303,8 @@ def _pop_time_keys(cv_results):
     # Check consistency of folds across the parameters
     gs = GridSearchCV(LinearSVC(random_state=0),
                       param_grid={'C': [0.1, 0.1, 0.2, 0.2]},
-                      cv=KFold(n_splits=n_splits, shuffle=True))
+                      cv=KFold(n_splits=n_splits, shuffle=True,
+                               random_state=42))
     gs.fit(X, y)
 
     # As the first two param settings (C=0.1) and the next two param
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index c997ac9d73e5d..a23d487dfd379 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -227,13 +227,13 @@ def test_kfold_valueerrors():
     X1 = np.array([[1, 2], [3, 4], [5, 6]])
     X2 = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
     # Check that errors are raised if there is not enough samples
-    assert_raises(ValueError, next, KFold(4).split(X1))
+    assert_raises(ValueError, next, KFold(4, random_state=42).split(X1))
 
     # Check that a warning is raised if the least populated class has too few
     # members.
     y = np.array([3, 3, -1, -1, 3])
 
-    skf_3 = StratifiedKFold(3)
+    skf_3 = StratifiedKFold(3, random_state=42)
     assert_warns_message(Warning, "The least populated class",
                          next, skf_3.split(X2, y))
 
@@ -273,24 +273,24 @@ def test_kfold_valueerrors():
 def test_kfold_indices():
     # Check all indices are returned in the test folds
     X1 = np.ones(18)
-    kf = KFold(3)
+    kf = KFold(3, random_state=42)
     check_cv_coverage(kf, X1, y=None, groups=None, expected_n_splits=3)
 
     # Check all indices are returned in the test folds even when equal-sized
     # folds are not possible
     X2 = np.ones(17)
-    kf = KFold(3)
+    kf = KFold(3, random_state=42)
     check_cv_coverage(kf, X2, y=None, groups=None, expected_n_splits=3)
 
     # Check if get_n_splits returns the number of folds
-    assert_equal(5, KFold(5).get_n_splits(X2))
+    assert_equal(5, KFold(5, random_state=42).get_n_splits(X2))
 
 
 def test_kfold_no_shuffle():
     # Manually check that KFold preserves the data ordering on toy datasets
     X2 = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
 
-    splits = KFold(2).split(X2[:-1])
+    splits = KFold(2, random_state=42).split(X2[:-1])
     train, test = next(splits)
     assert_array_equal(test, [0, 1])
     assert_array_equal(train, [2, 3])
@@ -299,7 +299,7 @@ def test_kfold_no_shuffle():
     assert_array_equal(test, [2, 3])
     assert_array_equal(train, [0, 1])
 
-    splits = KFold(2).split(X2)
+    splits = KFold(2, random_state=42).split(X2)
     train, test = next(splits)
     assert_array_equal(test, [0, 1, 2])
     assert_array_equal(train, [3, 4])
@@ -314,7 +314,7 @@ def test_stratified_kfold_no_shuffle():
     # as possible on toy datasets in order to avoid hiding sample dependencies
     # when possible
     X, y = np.ones(4), [1, 1, 0, 0]
-    splits = StratifiedKFold(2).split(X, y)
+    splits = StratifiedKFold(2, random_state=42).split(X, y)
     train, test = next(splits)
     assert_array_equal(test, [0, 2])
     assert_array_equal(train, [1, 3])
@@ -324,7 +324,7 @@ def test_stratified_kfold_no_shuffle():
     assert_array_equal(train, [0, 2])
 
     X, y = np.ones(7), [1, 1, 1, 0, 0, 0, 0]
-    splits = StratifiedKFold(2).split(X, y)
+    splits = StratifiedKFold(2, random_state=42).split(X, y)
     train, test = next(splits)
     assert_array_equal(test, [0, 1, 3, 4])
     assert_array_equal(train, [2, 5, 6])
@@ -334,15 +334,15 @@ def test_stratified_kfold_no_shuffle():
     assert_array_equal(train, [0, 1, 3, 4])
 
     # Check if get_n_splits returns the number of folds
-    assert_equal(5, StratifiedKFold(5).get_n_splits(X, y))
+    assert_equal(5, StratifiedKFold(5, random_state=42).get_n_splits(X, y))
 
     # Make sure string labels are also supported
     X = np.ones(7)
     y1 = ['1', '1', '1', '0', '0', '0', '0']
     y2 = [1, 1, 1, 0, 0, 0, 0]
     np.testing.assert_equal(
-        list(StratifiedKFold(2).split(X, y1)),
-        list(StratifiedKFold(2).split(X, y2)))
+        list(StratifiedKFold(2, random_state=42).split(X, y1)),
+        list(StratifiedKFold(2, random_state=42).split(X, y2)))
 
 
 def test_stratified_kfold_ratios():
@@ -355,7 +355,9 @@ def test_stratified_kfold_ratios():
                  [1] * int(0.01 * n_samples))
 
     for shuffle in (False, True):
-        for train, test in StratifiedKFold(5, shuffle=shuffle).split(X, y):
+        for train, test in StratifiedKFold(5,
+                                           shuffle=shuffle,
+                                           random_state=42).split(X, y):
             assert_almost_equal(np.sum(y[train] == 4) / len(train), 0.10, 2)
             assert_almost_equal(np.sum(y[train] == 0) / len(train), 0.89, 2)
             assert_almost_equal(np.sum(y[train] == 1) / len(train), 0.01, 2)
@@ -367,7 +369,7 @@ def test_stratified_kfold_ratios():
 def test_kfold_balance():
     # Check that KFold returns folds with balanced sizes
     for i in range(11, 17):
-        kf = KFold(5).split(X=np.ones(i))
+        kf = KFold(5, random_state=42).split(X=np.ones(i))
         sizes = []
         for _, test in kf:
             sizes.append(len(test))
@@ -384,7 +386,7 @@ def test_stratifiedkfold_balance():
     y = [0] * 3 + [1] * 14
 
     for shuffle in (True, False):
-        cv = StratifiedKFold(3, shuffle=shuffle)
+        cv = StratifiedKFold(3, shuffle=shuffle, random_state=42)
         for i in range(11, 17):
             skf = cv.split(X[:i], y[:i])
             sizes = []
@@ -397,7 +399,7 @@ def test_stratifiedkfold_balance():
 
 def test_shuffle_kfold():
     # Check the indices are shuffled properly
-    kf = KFold(3)
+    kf = KFold(3, random_state=42)
     kf2 = KFold(3, shuffle=True, random_state=0)
     kf3 = KFold(3, shuffle=True, random_state=1)
 
@@ -474,7 +476,7 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
 
     n_splits = 3
 
-    cv = KFold(n_splits=n_splits, shuffle=False)
+    cv = KFold(n_splits=n_splits, shuffle=False, random_state=42)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
     assert_greater(0.92, mean_score)
     assert_greater(mean_score, 0.80)
@@ -498,7 +500,7 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
     # the estimated mean score is close to the score measured with
     # non-shuffled KFold
 
-    cv = StratifiedKFold(n_splits)
+    cv = StratifiedKFold(n_splits, random_state=42)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
     assert_greater(0.93, mean_score)
     assert_greater(mean_score, 0.80)
@@ -665,7 +667,8 @@ def test_predefinedsplit_with_kfold_split():
     folds = -1 * np.ones(10)
     kf_train = []
     kf_test = []
-    for i, (train_ind, test_ind) in enumerate(KFold(5, shuffle=True).split(X)):
+    for i, (train_ind, test_ind) in enumerate(KFold(5, shuffle=True,
+                                                    random_state=42).split(X)):
         kf_train.append(train_ind)
         kf_test.append(test_ind)
         folds[test_ind] = i
@@ -1043,27 +1046,32 @@ def test_check_cv():
     cv = check_cv(3, classifier=False)
     # Use numpy.testing.assert_equal which recursively compares
     # lists of lists
-    np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
+    np.testing.assert_equal(
+        list(KFold(3, random_state=42).split(X)), list(cv.split(X)))
 
     y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
     cv = check_cv(3, y_binary, classifier=True)
-    np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_binary)),
-                            list(cv.split(X, y_binary)))
+    np.testing.assert_equal(
+        list(StratifiedKFold(3, random_state=42).split(X, y_binary)),
+        list(cv.split(X, y_binary)))
 
     y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
     cv = check_cv(3, y_multiclass, classifier=True)
-    np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass)),
-                            list(cv.split(X, y_multiclass)))
+    np.testing.assert_equal(
+        list(StratifiedKFold(3, random_state=42).split(X, y_multiclass)),
+        list(cv.split(X, y_multiclass)))
 
     X = np.ones(5)
     y_multilabel = np.array([[0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1],
                              [1, 1, 0, 1], [0, 0, 1, 0]])
     cv = check_cv(3, y_multilabel, classifier=True)
-    np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
+    np.testing.assert_equal(
+        list(KFold(3, random_state=42).split(X)), list(cv.split(X)))
 
     y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
     cv = check_cv(3, y_multioutput, classifier=True)
-    np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
+    np.testing.assert_equal(list(KFold(3, random_state=42).split(X)),
+                            list(cv.split(X)))
 
     # Check if the old style classes are wrapped to have a split method
     X = np.ones(9)
@@ -1095,7 +1103,7 @@ def test_cv_iterable_wrapper():
     # Check if get_n_splits works correctly
     assert_equal(len(cv), wrapped_old_skf.get_n_splits())
 
-    kf_iter = KFold(n_splits=5).split(X, y)
+    kf_iter = KFold(n_splits=5, random_state=42).split(X, y)
     kf_iter_wrapped = check_cv(kf_iter)
     # Since the wrapped iterable is enlisted and stored,
     # split can be called any number of times to produce
@@ -1104,7 +1112,9 @@ def test_cv_iterable_wrapper():
                             list(kf_iter_wrapped.split(X, y)))
     # If the splits are randomized, successive calls to split yields different
     # results
-    kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
+    kf_randomized_iter = KFold(n_splits=5,
+                               shuffle=True,
+                               random_state=42).split(X, y)
     kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
     np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)),
                             list(kf_randomized_iter_wrapped.split(X, y)))
@@ -1255,7 +1265,8 @@ def test_nested_cv():
     X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
     groups = rng.randint(0, 5, 15)
 
-    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
+    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(),
+           StratifiedKFold(random_state=42),
            StratifiedShuffleSplit(n_splits=3, random_state=0)]
 
     for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index cc6f5973a0b09..c9e64ed9d142a 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -253,10 +253,10 @@ def test_cross_val_score_predict_groups():
     # And also check if groups is correctly passed to the cv object
     X, y = make_classification(n_samples=20, n_classes=2, random_state=0)
 
-    clf = SVC(kernel="linear")
+    clf = SVC(kernel="linear", random_state=42)
 
     group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
-                 GroupShuffleSplit()]
+                 GroupShuffleSplit(random_state=42)]
     for cv in group_cvs:
         assert_raise_message(ValueError,
                              "The groups parameter should not be None",
@@ -286,12 +286,12 @@ def test_cross_val_score_pandas():
 
 def test_cross_val_score_mask():
     # test that cross_val_score works with boolean masks
-    svm = SVC(kernel="linear")
+    svm = SVC(kernel="linear", random_state=42)
     iris = load_iris()
     X, y = iris.data, iris.target
-    kfold = KFold(5)
+    kfold = KFold(5, random_state=42)
     scores_indices = cross_val_score(svm, X, y, cv=kfold)
-    kfold = KFold(5)
+    kfold = KFold(5, random_state=42)
     cv_masks = []
     for train, test in kfold.split(X, y):
         mask_train = np.zeros(len(y), dtype=np.bool)
@@ -305,22 +305,22 @@ def test_cross_val_score_mask():
 
 def test_cross_val_score_precomputed():
     # test for svm with precomputed kernel
-    svm = SVC(kernel="precomputed")
+    svm = SVC(kernel="precomputed", random_state=42)
     iris = load_iris()
     X, y = iris.data, iris.target
     linear_kernel = np.dot(X, X.T)
     score_precomputed = cross_val_score(svm, linear_kernel, y)
-    svm = SVC(kernel="linear")
+    svm = SVC(kernel="linear", random_state=42)
     score_linear = cross_val_score(svm, X, y)
     assert_array_almost_equal(score_precomputed, score_linear)
 
     # test with callable
-    svm = SVC(kernel=lambda x, y: np.dot(x, y.T))
+    svm = SVC(kernel=lambda x, y: np.dot(x, y.T), random_state=42)
     score_callable = cross_val_score(svm, X, y)
     assert_array_almost_equal(score_precomputed, score_callable)
 
     # Error raised for non-square X
-    svm = SVC(kernel="precomputed")
+    svm = SVC(kernel="precomputed", random_state=42)
     assert_raises(ValueError, cross_val_score, svm, X, y)
 
     # test error is raised when the precomputed kernel is not array-like
@@ -385,7 +385,7 @@ class BrokenEstimator:
 
 def test_cross_val_score_with_score_func_classification():
     iris = load_iris()
-    clf = SVC(kernel='linear')
+    clf = SVC(kernel='linear', random_state=42)
 
     # Default score (should be the accuracy score)
     scores = cross_val_score(clf, iris.data, iris.target, cv=5)
@@ -407,7 +407,7 @@ def test_cross_val_score_with_score_func_classification():
 def test_cross_val_score_with_score_func_regression():
     X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                            random_state=0)
-    reg = Ridge()
+    reg = Ridge(random_state=42)
 
     # Default score of the Ridge regression estimator
     scores = cross_val_score(reg, X, y, cv=5)
@@ -435,8 +435,8 @@ def test_permutation_score():
     X = iris.data
     X_sparse = coo_matrix(X)
     y = iris.target
-    svm = SVC(kernel='linear')
-    cv = StratifiedKFold(2)
+    svm = SVC(kernel='linear', random_state=42)
+    cv = StratifiedKFold(2, random_state=42)
 
     score, scores, pvalue = permutation_test_score(
         svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
@@ -450,8 +450,8 @@ def test_permutation_score():
     assert_true(pvalue_group == pvalue)
 
     # check that we obtain the same results with a sparse representation
-    svm_sparse = SVC(kernel='linear')
-    cv_sparse = StratifiedKFold(2)
+    svm_sparse = SVC(kernel='linear', random_state=42)
+    cv_sparse = StratifiedKFold(2, random_state=42)
     score_group, _, pvalue_group = permutation_test_score(
         svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse,
         scoring="accuracy", groups=np.ones(y.size), random_state=0)
@@ -524,9 +524,9 @@ def test_cross_val_score_multilabel():
 def test_cross_val_predict():
     boston = load_boston()
     X, y = boston.data, boston.target
-    cv = KFold()
+    cv = KFold(random_state=42)
 
-    est = Ridge()
+    est = Ridge(random_state=42)
 
     # Naive loop (should be same as cross_val_predict):
     preds2 = np.zeros_like(y)
@@ -550,7 +550,7 @@ def test_cross_val_predict():
     preds = cross_val_predict(est, Xsp, y)
     assert_array_almost_equal(len(preds), len(y))
 
-    preds = cross_val_predict(KMeans(), X)
+    preds = cross_val_predict(KMeans(random_state=42), X)
     assert_equal(len(preds), len(y))
 
     class BadCV():
@@ -636,9 +636,10 @@ def test_learning_curve():
     for shuffle_train in [False, True]:
         with warnings.catch_warnings(record=True) as w:
             train_sizes, train_scores, test_scores = learning_curve(
-                estimator, X, y, cv=KFold(n_splits=n_splits),
+                estimator, X, y, cv=KFold(n_splits=n_splits, random_state=42),
                 train_sizes=np.linspace(0.1, 1.0, 10),
-                shuffle=shuffle_train)
+                shuffle=shuffle_train,
+                random_state=42)
         if len(w) > 0:
             raise RuntimeError("Unexpected warning: %r" % w[0].message)
         assert_equal(train_scores.shape, (10, 3))
@@ -655,7 +656,8 @@ def test_learning_curve():
                 estimator, X, y,
                 cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
                 train_sizes=np.linspace(0.1, 1.0, 10),
-                shuffle=shuffle_train)
+                shuffle=shuffle_train,
+                random_state=42)
         if len(w) > 0:
             raise RuntimeError("Unexpected warning: %r" % w[0].message)
         assert_array_almost_equal(train_scores2, train_scores)
@@ -668,7 +670,8 @@ def test_learning_curve_unsupervised():
                                n_clusters_per_class=1, random_state=0)
     estimator = MockImprovingEstimator(20)
     train_sizes, train_scores, test_scores = learning_curve(
-        estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
+        estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10),
+        random_state=42)
     assert_array_equal(train_sizes, np.linspace(2, 20, 10))
     assert_array_almost_equal(train_scores.mean(axis=1),
                               np.linspace(1.9, 1.0, 10))
@@ -686,7 +689,7 @@ def test_learning_curve_verbose():
     sys.stdout = StringIO()
     try:
         train_sizes, train_scores, test_scores = \
-            learning_curve(estimator, X, y, cv=3, verbose=1)
+            learning_curve(estimator, X, y, cv=3, verbose=1, random_state=42)
     finally:
         out = sys.stdout.getvalue()
         sys.stdout.close()
@@ -713,7 +716,8 @@ def test_learning_curve_incremental_learning():
     for shuffle_train in [False, True]:
         train_sizes, train_scores, test_scores = learning_curve(
             estimator, X, y, cv=3, exploit_incremental_learning=True,
-            train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train)
+            train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train,
+            random_state=42)
         assert_array_equal(train_sizes, np.linspace(2, 20, 10))
         assert_array_almost_equal(train_scores.mean(axis=1),
                                   np.linspace(1.9, 1.0, 10))
@@ -728,7 +732,8 @@ def test_learning_curve_incremental_learning_unsupervised():
     estimator = MockIncrementalImprovingEstimator(20)
     train_sizes, train_scores, test_scores = learning_curve(
         estimator, X, y=None, cv=3, exploit_incremental_learning=True,
-        train_sizes=np.linspace(0.1, 1.0, 10))
+        train_sizes=np.linspace(0.1, 1.0, 10),
+        random_state=42)
     assert_array_equal(train_sizes, np.linspace(2, 20, 10))
     assert_array_almost_equal(train_scores.mean(axis=1),
                               np.linspace(1.9, 1.0, 10))
@@ -741,16 +746,19 @@ def test_learning_curve_batch_and_incremental_learning_are_equal():
                                n_redundant=0, n_classes=2,
                                n_clusters_per_class=1, random_state=0)
     train_sizes = np.linspace(0.2, 1.0, 5)
-    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False)
+    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False,
+                                            random_state=42)
 
     train_sizes_inc, train_scores_inc, test_scores_inc = \
         learning_curve(
             estimator, X, y, train_sizes=train_sizes,
-            cv=3, exploit_incremental_learning=True)
+            cv=3, exploit_incremental_learning=True,
+            random_state=42)
     train_sizes_batch, train_scores_batch, test_scores_batch = \
         learning_curve(
             estimator, X, y, cv=3, train_sizes=train_sizes,
-            exploit_incremental_learning=False)
+            exploit_incremental_learning=False,
+            random_state=42)
 
     assert_array_equal(train_sizes_inc, train_sizes_batch)
     assert_array_almost_equal(train_scores_inc.mean(axis=1),
@@ -792,9 +800,10 @@ def test_learning_curve_with_boolean_indices():
                                n_redundant=0, n_classes=2,
                                n_clusters_per_class=1, random_state=0)
     estimator = MockImprovingEstimator(20)
-    cv = KFold(n_splits=3)
+    cv = KFold(n_splits=3, random_state=42)
     train_sizes, train_scores, test_scores = learning_curve(
-        estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
+        estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10),
+        random_state=42)
     assert_array_equal(train_sizes, np.linspace(2, 20, 10))
     assert_array_almost_equal(train_scores.mean(axis=1),
                               np.linspace(1.9, 1.0, 10))
@@ -812,7 +821,7 @@ def test_learning_curve_with_shuffle():
     groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
     # Splits on these groups fail without shuffle as the first iteration
     # of the learning curve doesn't contain label 4 in the training set.
-    estimator = PassiveAggressiveClassifier(shuffle=False)
+    estimator = PassiveAggressiveClassifier(shuffle=False, random_state=42)
 
     cv = GroupKFold(n_splits=2)
     train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
@@ -870,7 +879,8 @@ def test_validation_curve_cv_splits_consistency():
 
     scores2 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                                'C', [0.1, 0.1, 0.2, 0.2],
-                               cv=KFold(n_splits=n_splits, shuffle=True))
+                               cv=KFold(n_splits=n_splits, shuffle=True,
+                                        random_state=42))
 
     # For scores2, compare the 1st and 2nd parameter's scores
     # (Since the C value for 1st two param setting is 0.1, they must be
@@ -880,7 +890,8 @@ def test_validation_curve_cv_splits_consistency():
 
     scores3 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                                'C', [0.1, 0.1, 0.2, 0.2],
-                               cv=KFold(n_splits=n_splits))
+                               cv=KFold(n_splits=n_splits,
+                                        random_state=42))
 
     # OneTimeSplitter is basically unshuffled KFold(n_splits=5). Sanity check.
     assert_array_almost_equal(np.array(scores3), np.array(scores1))
@@ -908,7 +919,7 @@ def test_cross_val_predict_sparse_prediction():
                                           random_state=1)
     X_sparse = csr_matrix(X)
     y_sparse = csr_matrix(y)
-    classif = OneVsRestClassifier(SVC(kernel='linear'))
+    classif = OneVsRestClassifier(SVC(kernel='linear', random_state=42))
     preds = cross_val_predict(classif, X, y, cv=10)
     preds_sparse = cross_val_predict(classif, X_sparse, y_sparse, cv=10)
     preds_sparse = preds_sparse.toarray()
@@ -921,7 +932,7 @@ def check_cross_val_predict_with_method(est):
     X, y = shuffle(X, y, random_state=0)
     classes = len(set(y))
 
-    kfold = KFold(len(iris.target))
+    kfold = KFold(len(iris.target), random_state=42)
 
     methods = ['decision_function', 'predict_proba', 'predict_log_proba']
     for method in methods:
@@ -955,7 +966,7 @@ def check_cross_val_predict_with_method(est):
 
 
 def test_cross_val_predict_with_method():
-    check_cross_val_predict_with_method(LogisticRegression())
+    check_cross_val_predict_with_method(LogisticRegression(random_state=42))
 
 
 def test_gridsearchcv_cross_val_predict_with_method():
@@ -990,14 +1001,14 @@ def test_cross_val_predict_class_subset():
     y = np.array([0, 0, 1, 2])
     classes = 3
 
-    kfold3 = KFold(n_splits=3)
-    kfold4 = KFold(n_splits=4)
+    kfold3 = KFold(n_splits=3, random_state=42)
+    kfold4 = KFold(n_splits=4, random_state=42)
 
     le = LabelEncoder()
 
     methods = ['decision_function', 'predict_proba', 'predict_log_proba']
     for method in methods:
-        est = LogisticRegression()
+        est = LogisticRegression(random_state=42)
 
         # Test with n_splits=3
         predictions = cross_val_predict(est, X, y, method=method,
diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py
index b5f6260f314a9..e082ad12830dd 100644
--- a/sklearn/neighbors/tests/test_approximate.py
+++ b/sklearn/neighbors/tests/test_approximate.py
@@ -38,7 +38,7 @@ def test_neighbors_accuracy_with_n_candidates():
     X = rng.rand(n_samples, n_features)
 
     for i, n_candidates in enumerate(n_candidates_values):
-        lshf = LSHForest(n_candidates=n_candidates)
+        lshf = LSHForest(n_candidates=n_candidates, random_state=42)
         ignore_warnings(lshf.fit)(X)
         for j in range(n_iter):
             query = X[rng.randint(0, n_samples)].reshape(1, -1)
@@ -74,7 +74,7 @@ def test_neighbors_accuracy_with_n_estimators():
     X = rng.rand(n_samples, n_features)
 
     for i, t in enumerate(n_estimators):
-        lshf = LSHForest(n_candidates=500, n_estimators=t)
+        lshf = LSHForest(n_candidates=500, n_estimators=t, random_state=42)
         ignore_warnings(lshf.fit)(X)
         for j in range(n_iter):
             query = X[rng.randint(0, n_samples)].reshape(1, -1)
@@ -108,7 +108,7 @@ def test_kneighbors():
     rng = np.random.RandomState(42)
     X = rng.rand(n_samples, n_features)
 
-    lshf = LSHForest(min_hash_match=0)
+    lshf = LSHForest(min_hash_match=0, random_state=42)
     # Test unfitted estimator
     assert_raises(ValueError, lshf.kneighbors, X[0])
 
@@ -159,7 +159,7 @@ def test_radius_neighbors():
     rng = np.random.RandomState(42)
     X = rng.rand(n_samples, n_features)
 
-    lshf = LSHForest()
+    lshf = LSHForest(random_state=42)
     # Test unfitted estimator
     assert_raises(ValueError, lshf.radius_neighbors, X[0])
 
@@ -288,7 +288,7 @@ def test_distances():
     rng = np.random.RandomState(42)
     X = rng.rand(n_samples, n_features)
 
-    lshf = LSHForest()
+    lshf = LSHForest(random_state=42)
     ignore_warnings(lshf.fit)(X)
 
     for i in range(n_iter):
@@ -314,7 +314,7 @@ def test_fit():
     rng = np.random.RandomState(42)
     X = rng.rand(n_samples, n_features)
 
-    lshf = LSHForest(n_estimators=n_estimators)
+    lshf = LSHForest(n_estimators=n_estimators, random_state=42)
     ignore_warnings(lshf.fit)(X)
 
     # _input_array = X
@@ -343,7 +343,7 @@ def test_partial_fit():
     X = rng.rand(n_samples, n_features)
     X_partial_fit = rng.rand(n_samples_partial_fit, n_features)
 
-    lshf = LSHForest()
+    lshf = LSHForest(random_state=42)
 
     # Test unfitted estimator
     ignore_warnings(lshf.partial_fit)(X)
@@ -407,7 +407,7 @@ def test_candidates():
     X_test = np.array([7, 10, 3], dtype=np.float32).reshape(1, -1)
 
     # For zero candidates
-    lshf = LSHForest(min_hash_match=32)
+    lshf = LSHForest(min_hash_match=32, random_state=42)
     ignore_warnings(lshf.fit)(X_train)
 
     message = ("Number of candidates is not sufficient to retrieve"
@@ -421,7 +421,7 @@ def test_candidates():
     assert_equal(distances.shape[1], 3)
 
     # For candidates less than n_neighbors
-    lshf = LSHForest(min_hash_match=31)
+    lshf = LSHForest(min_hash_match=31, random_state=42)
     ignore_warnings(lshf.fit)(X_train)
 
     message = ("Number of candidates is not sufficient to retrieve"
@@ -443,7 +443,7 @@ def test_graphs():
 
     for n_samples in n_samples_sizes:
         X = rng.rand(n_samples, n_features)
-        lshf = LSHForest(min_hash_match=0)
+        lshf = LSHForest(min_hash_match=0, random_state=42)
         ignore_warnings(lshf.fit)(X)
 
         kneighbors_graph = lshf.kneighbors_graph(X)
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index fad90d77193d2..4e44816ff4b93 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -274,7 +274,8 @@ def test_learning_rate_warmstart():
     for learning_rate in ["invscaling", "constant"]:
         mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=4,
                             learning_rate=learning_rate, max_iter=1,
-                            power_t=0.25, warm_start=True)
+                            power_t=0.25, warm_start=True,
+                            random_state=42)
         with ignore_warnings(category=ConvergenceWarning):
             mlp.fit(X, y)
             prev_eta = mlp._optimizer.learning_rate
@@ -310,7 +311,7 @@ def test_multilabel_classification():
 
 def test_multioutput_regression():
     # Test that multi-output regression works as expected
-    X, y = make_regression(n_samples=200, n_targets=5)
+    X, y = make_regression(n_samples=200, n_targets=5, random_state=42)
     mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
                        random_state=1)
     mlp.fit(X, y)
@@ -321,7 +322,7 @@ def test_partial_fit_classes_error():
     # Tests that passing different classes to partial_fit raises an error
     X = [[3, 2]]
     y = [0]
-    clf = MLPClassifier(solver='sgd')
+    clf = MLPClassifier(solver='sgd', random_state=42)
     clf.partial_fit(X, y, classes=[0, 1])
     assert_raises(ValueError, clf.partial_fit, X, y, classes=[1, 2])
 
@@ -392,10 +393,12 @@ def test_partial_fit_errors():
 
     # no classes passed
     assert_raises(ValueError,
-                  MLPClassifier(solver='sgd').partial_fit, X, y, classes=[2])
+                  MLPClassifier(solver='sgd', random_state=42)
+                  .partial_fit, X, y, classes=[2])
 
     # lbfgs doesn't support partial_fit
-    assert_false(hasattr(MLPClassifier(solver='lbfgs'), 'partial_fit'))
+    assert_false(hasattr(MLPClassifier(solver='lbfgs', random_state=42),
+                 'partial_fit'))
 
 
 def test_params_errors():
@@ -431,7 +434,7 @@ def test_predict_proba_binary():
     X = X_digits_binary[:50]
     y = y_digits_binary[:50]
 
-    clf = MLPClassifier(hidden_layer_sizes=5)
+    clf = MLPClassifier(hidden_layer_sizes=5, random_state=42)
     with ignore_warnings(category=ConvergenceWarning):
         clf.fit(X, y)
     y_proba = clf.predict_proba(X)
@@ -454,7 +457,7 @@ def test_predict_proba_multiclass():
     X = X_digits_multi[:10]
     y = y_digits_multi[:10]
 
-    clf = MLPClassifier(hidden_layer_sizes=5)
+    clf = MLPClassifier(hidden_layer_sizes=5, random_state=42)
     with ignore_warnings(category=ConvergenceWarning):
         clf.fit(X, y)
     y_proba = clf.predict_proba(X)
@@ -516,7 +519,7 @@ def test_tolerance():
     # It should force the solver to exit the loop when it converges.
     X = [[3, 2], [1, 6]]
     y = [1, 0]
-    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd')
+    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd', random_state=42)
     clf.fit(X, y)
     assert_greater(clf.max_iter, clf.n_iter_)
 
@@ -526,7 +529,7 @@ def test_verbose_sgd():
     X = [[3, 2], [1, 6]]
     y = [1, 0]
     clf = MLPClassifier(solver='sgd', max_iter=2, verbose=10,
-                        hidden_layer_sizes=2)
+                        hidden_layer_sizes=2, random_state=42)
     old_stdout = sys.stdout
     sys.stdout = output = StringIO()
 
@@ -543,7 +546,7 @@ def test_early_stopping():
     y = y_digits_binary[:100]
     tol = 0.2
     clf = MLPClassifier(tol=tol, max_iter=3000, solver='sgd',
-                        early_stopping=True)
+                        early_stopping=True, random_state=42)
     clf.fit(X, y)
     assert_greater(clf.max_iter, clf.n_iter_)
 
@@ -558,7 +561,7 @@ def test_adaptive_learning_rate():
     X = [[3, 2], [1, 6]]
     y = [1, 0]
     clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd',
-                        learning_rate='adaptive')
+                        learning_rate='adaptive', random_state=42)
     clf.fit(X, y)
     assert_greater(clf.max_iter, clf.n_iter_)
     assert_greater(1e-6, clf._optimizer.learning_rate)
@@ -577,13 +580,14 @@ def test_warm_start():
 
     # No error raised
     clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs',
-                        warm_start=True).fit(X, y)
+                        warm_start=True, random_state=42).fit(X, y)
     clf.fit(X, y)
     clf.fit(X, y_3classes)
 
     for y_i in (y_2classes, y_3classes_alt, y_4classes, y_5classes):
         clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs',
-                            warm_start=True).fit(X, y)
+                            warm_start=True,
+                            random_state=42).fit(X, y)
         message = ('warm_start can only be used where `y` has the same '
                    'classes as in the previous call to fit.'
                    ' Previously got [0 1 2], `y` has %s' % np.unique(y_i))
diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py
index bf171b7fd2555..c4dafc7c491dd 100644
--- a/sklearn/neural_network/tests/test_rbm.py
+++ b/sklearn/neural_network/tests/test_rbm.py
@@ -61,7 +61,7 @@ def test_transform():
 def test_small_sparse():
     # BernoulliRBM should work on small sparse matrices.
     X = csr_matrix(Xdigits[:4])
-    BernoulliRBM().fit(X)       # no exception
+    BernoulliRBM(random_state=42).fit(X)       # no exception
 
 
 def test_small_sparse_partial_fit():
@@ -165,7 +165,7 @@ def test_score_samples():
 
 
 def test_rbm_verbose():
-    rbm = BernoulliRBM(n_iter=2, verbose=10)
+    rbm = BernoulliRBM(n_iter=2, verbose=10, random_state=42)
     old_stdout = sys.stdout
     sys.stdout = StringIO()
     try:
diff --git a/sklearn/preprocessing/tests/test_imputation.py b/sklearn/preprocessing/tests/test_imputation.py
index 1bfbcd3adbaee..88086064684bf 100644
--- a/sklearn/preprocessing/tests/test_imputation.py
+++ b/sklearn/preprocessing/tests/test_imputation.py
@@ -266,8 +266,8 @@ def test_imputation_pipeline_grid_search():
     }
 
     l = 100
-    X = sparse_random_matrix(l, l, density=0.10)
-    Y = sparse_random_matrix(l, 1, density=0.10).toarray()
+    X = sparse_random_matrix(l, l, density=0.10, random_state=42)
+    Y = sparse_random_matrix(l, 1, density=0.10, random_state=42).toarray()
     gs = GridSearchCV(pipeline, parameters)
     gs.fit(X, Y)
 
@@ -277,7 +277,7 @@ def test_imputation_pickle():
     import pickle
 
     l = 100
-    X = sparse_random_matrix(l, l, density=0.10)
+    X = sparse_random_matrix(l, l, density=0.10, random_state=42)
 
     for strategy in ["mean", "median", "most_frequent"]:
         imputer = Imputer(missing_values=0, strategy=strategy)
diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py
index 7a280d22a6a81..00e596ee6fe2a 100644
--- a/sklearn/svm/tests/test_bounds.py
+++ b/sklearn/svm/tests/test_bounds.py
@@ -46,9 +46,9 @@ def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None):
     min_c = l1_min_c(X, y, loss, fit_intercept, intercept_scaling)
 
     clf = {
-        'log': LogisticRegression(penalty='l1'),
+        'log': LogisticRegression(penalty='l1', random_state=42),
         'squared_hinge': LinearSVC(loss='squared_hinge',
-                                   penalty='l1', dual=False),
+                                   penalty='l1', dual=False, random_state=42),
     }[loss]
 
     clf.fit_intercept = fit_intercept
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 0f85be117a9ac..14d6c27ba40ef 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -449,7 +449,7 @@ def test_auto_weight():
     assert_true(np.argmax(class_weights) == 2)
 
     for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
-                LogisticRegression()):
+                LogisticRegression(random_state=42)):
         # check that score is better when class='balanced' is set.
         y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
         clf.set_params(class_weight='balanced')
@@ -543,7 +543,7 @@ def test_linearsvc_parameters():
     losses = ['hinge', 'squared_hinge', 'logistic_regression', 'foo']
     penalties, duals = ['l1', 'l2', 'bar'], [True, False]
 
-    X, y = make_classification(n_samples=5, n_features=5)
+    X, y = make_classification(n_samples=5, n_features=5, random_state=42)
 
     for loss, penalty, dual in itertools.product(losses, penalties, duals):
         clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual)
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 740e83105c991..68cb9eee339a4 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -232,7 +232,7 @@ def test_get_params_deprecated():
 
 
 def test_is_classifier():
-    svc = SVC()
+    svc = SVC(random_state=42)
     assert_true(is_classifier(svc))
     assert_true(is_classifier(GridSearchCV(svc, {'C': [0.1, 1]})))
     assert_true(is_classifier(Pipeline([('svc', svc)])))
@@ -242,7 +242,7 @@ def test_is_classifier():
 
 def test_set_params():
     # test nested estimator parameter setting
-    clf = Pipeline([("svc", SVC())])
+    clf = Pipeline([("svc", SVC(random_state=42))])
     # non-existing parameter in svc
     assert_raises(ValueError, clf.set_params, svc__stupid_param=True)
     # non-existing parameter of pipeline
@@ -258,8 +258,8 @@ def test_score_sample_weight():
     rng = np.random.RandomState(0)
 
     # test both ClassifierMixin and RegressorMixin
-    estimators = [DecisionTreeClassifier(max_depth=2),
-                  DecisionTreeRegressor(max_depth=2)]
+    estimators = [DecisionTreeClassifier(max_depth=2, random_state=42),
+                  DecisionTreeRegressor(max_depth=2, random_state=42)]
     sets = [datasets.load_iris(),
             datasets.load_boston()]
 
@@ -315,7 +315,7 @@ def transform(self, X, y=None):
 
 def test_pickle_version_warning_is_not_raised_with_matching_version():
     iris = datasets.load_iris()
-    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
+    tree = DecisionTreeClassifier(random_state=42).fit(iris.data, iris.target)
     tree_pickle = pickle.dumps(tree)
     assert_true(b"version" in tree_pickle)
     tree_restored = assert_no_warnings(pickle.loads, tree_pickle)
@@ -341,7 +341,7 @@ def __getstate__(self):
 
 def test_pickle_version_warning_is_issued_upon_different_version():
     iris = datasets.load_iris()
-    tree = TreeBadVersion().fit(iris.data, iris.target)
+    tree = TreeBadVersion(random_state=42).fit(iris.data, iris.target)
     tree_pickle_other = pickle.dumps(tree)
     message = pickle_error_message.format(estimator="TreeBadVersion",
                                           old_version="something",
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index e4499e35d5a67..bb19fd0cda8fb 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -96,7 +96,8 @@ def test_calibration():
         # base-estimators should provide either decision_function or
         # predict_proba (most regressors, for instance, should fail)
         clf_base_regressor = \
-            CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid")
+            CalibratedClassifierCV(RandomForestRegressor(random_state=42),
+                                   method="sigmoid")
         assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
 
 
@@ -129,7 +130,7 @@ def test_calibration_multiclass():
     """Test calibration for multiclass """
     # test multi-class setting with classifier that implements
     # only decision function
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                           centers=3, cluster_std=3.0)
 
@@ -267,7 +268,7 @@ def test_calibration_nan_imputer():
     X[0, 0] = np.nan
     clf = Pipeline(
         [('imputer', Imputer()),
-         ('rf', RandomForestClassifier(n_estimators=1))])
+         ('rf', RandomForestClassifier(n_estimators=1, random_state=42))])
     clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
     clf_c.fit(X, y)
     clf_c.predict(X)
@@ -278,8 +279,9 @@ def test_calibration_prob_sum():
     # issue #7796
     num_classes = 2
     X, y = make_classification(n_samples=10, n_features=5,
-                               n_classes=num_classes)
-    clf = LinearSVC(C=1.0)
+                               n_classes=num_classes,
+                               random_state=42)
+    clf = LinearSVC(C=1.0, random_state=42)
     clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
     clf_prob.fit(X, y)
 
@@ -294,7 +296,7 @@ def test_calibration_less_classes():
     # class label
     X = np.random.randn(10, 5)
     y = np.arange(10)
-    clf = LinearSVC(C=1.0)
+    clf = LinearSVC(C=1.0, random_state=42)
     cal_clf = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
     cal_clf.fit(X, y)
 
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index 4d756bdaa0cf8..bca643e55929d 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -341,7 +341,7 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
 
     digits = load_digits()
     X, y = digits.data[:800], digits.target[:800]
-    model = SVC(C=10, gamma=0.005)
+    model = SVC(C=10, gamma=0.005, random_state=42)
     n = len(y)
 
     cv = cval.KFold(n, 5, shuffle=False)
@@ -700,7 +700,7 @@ def test_cross_val_score_pandas():
 
 def test_cross_val_score_mask():
     # test that cross_val_score works with boolean masks
-    svm = SVC(kernel="linear")
+    svm = SVC(kernel="linear", random_state=42)
     iris = load_iris()
     X, y = iris.data, iris.target
     cv_indices = cval.KFold(len(y), 5)
@@ -719,17 +719,17 @@ def test_cross_val_score_mask():
 
 def test_cross_val_score_precomputed():
     # test for svm with precomputed kernel
-    svm = SVC(kernel="precomputed")
+    svm = SVC(kernel="precomputed", random_state=42)
     iris = load_iris()
     X, y = iris.data, iris.target
     linear_kernel = np.dot(X, X.T)
     score_precomputed = cval.cross_val_score(svm, linear_kernel, y)
-    svm = SVC(kernel="linear")
+    svm = SVC(kernel="linear", random_state=42)
     score_linear = cval.cross_val_score(svm, X, y)
     assert_array_equal(score_precomputed, score_linear)
 
     # Error raised for non-square X
-    svm = SVC(kernel="precomputed")
+    svm = SVC(kernel="precomputed", random_state=42)
     assert_raises(ValueError, cval.cross_val_score, svm, X, y)
 
     # test error is raised when the precomputed kernel is not array-like
@@ -878,7 +878,7 @@ def train_test_split_mock_pandas():
 
 def test_cross_val_score_with_score_func_classification():
     iris = load_iris()
-    clf = SVC(kernel='linear')
+    clf = SVC(kernel='linear', random_state=42)
 
     # Default score (should be the accuracy score)
     scores = cval.cross_val_score(clf, iris.data, iris.target, cv=5)
@@ -900,7 +900,7 @@ def test_cross_val_score_with_score_func_classification():
 def test_cross_val_score_with_score_func_regression():
     X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                            random_state=0)
-    reg = Ridge()
+    reg = Ridge(random_state=42)
 
     # Default score of the Ridge regression estimator
     scores = cval.cross_val_score(reg, X, y, cv=5)
@@ -928,7 +928,7 @@ def test_permutation_score():
     X = iris.data
     X_sparse = coo_matrix(X)
     y = iris.target
-    svm = SVC(kernel='linear')
+    svm = SVC(kernel='linear', random_state=42)
     cv = cval.StratifiedKFold(y, 2)
 
     score, scores, pvalue = cval.permutation_test_score(
@@ -943,7 +943,7 @@ def test_permutation_score():
     assert_true(pvalue_label == pvalue)
 
     # check that we obtain the same results with a sparse representation
-    svm_sparse = SVC(kernel='linear')
+    svm_sparse = SVC(kernel='linear', random_state=42)
     cv_sparse = cval.StratifiedKFold(y, 2)
     score_label, _, pvalue_label = cval.permutation_test_score(
         svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse,
@@ -1036,8 +1036,8 @@ def test_shufflesplit_reproducible():
 
 
 def test_safe_split_with_precomputed_kernel():
-    clf = SVC()
-    clfp = SVC(kernel="precomputed")
+    clf = SVC(random_state=42)
+    clfp = SVC(kernel="precomputed", random_state=42)
 
     iris = load_iris()
     X, y = iris.data, iris.target
@@ -1133,7 +1133,7 @@ def test_cross_val_predict():
     X, y = boston.data, boston.target
     cv = cval.KFold(len(boston.target))
 
-    est = Ridge()
+    est = Ridge(random_state=42)
 
     # Naive loop (should be same as cross_val_predict):
     preds2 = np.zeros_like(y)
@@ -1157,7 +1157,7 @@ def test_cross_val_predict():
     preds = cval.cross_val_predict(est, Xsp, y)
     assert_array_almost_equal(len(preds), len(y))
 
-    preds = cval.cross_val_predict(KMeans(), X)
+    preds = cval.cross_val_predict(KMeans(random_state=42), X)
     assert_equal(len(preds), len(y))
 
     def bad_cv():
@@ -1168,7 +1168,7 @@ def bad_cv():
 
 
 def test_cross_val_predict_input_types():
-    clf = Ridge()
+    clf = Ridge(random_state=42)
     # Smoke test
     predictions = cval.cross_val_predict(clf, X, y)
     assert_equal(predictions.shape, (10,))
@@ -1245,7 +1245,7 @@ def test_cross_val_predict_sparse_prediction():
                                           random_state=1)
     X_sparse = csr_matrix(X)
     y_sparse = csr_matrix(y)
-    classif = OneVsRestClassifier(SVC(kernel='linear'))
+    classif = OneVsRestClassifier(SVC(kernel='linear', random_state=42))
     preds = cval.cross_val_predict(classif, X, y, cv=10)
     preds_sparse = cval.cross_val_predict(classif, X_sparse, y_sparse, cv=10)
     preds_sparse = preds_sparse.toarray()
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 4f2dc86bc52bb..1cafcfc2c90ce 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -195,13 +195,13 @@ def test_uniform_strategy_multioutput():
 def test_string_labels():
     X = [[0]] * 5
     y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
-    clf = DummyClassifier(strategy="most_frequent")
+    clf = DummyClassifier(strategy="most_frequent", random_state=42)
     clf.fit(X, y)
     assert_array_equal(clf.predict(X), ["paris"] * 5)
 
 
 def test_classifier_exceptions():
-    clf = DummyClassifier(strategy="unknown")
+    clf = DummyClassifier(strategy="unknown", random_state=42)
     assert_raises(ValueError, clf.fit, [], [])
 
     assert_raises(ValueError, clf.predict, [])
@@ -494,7 +494,7 @@ def test_classification_sample_weight():
     y = [0, 1, 0]
     sample_weight = [0.1, 1., 0.1]
 
-    clf = DummyClassifier().fit(X, y, sample_weight)
+    clf = DummyClassifier(random_state=42).fit(X, y, sample_weight)
     assert_array_almost_equal(clf.class_prior_, [0.2 / 1.2, 1. / 1.2])
 
 
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index e6c2e18538163..4c2fac3e393a2 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -248,7 +248,7 @@ def test_grid_search_error():
     # length
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     assert_raises(ValueError, cv.fit, X_[:180], y_)
 
@@ -265,7 +265,7 @@ def test_grid_search_iid():
     mask[np.where(y == 2)[0][::2]] = 0
     # this leads to perfect classification on one fold and a score of 1/3 on
     # the other
-    svm = SVC(kernel='linear')
+    svm = SVC(kernel='linear', random_state=42)
     # create "cv" for splits
     cv = [[mask, ~mask], [~mask, mask]]
     # once with iid=True (default)
@@ -296,11 +296,11 @@ def test_grid_search_one_grid_point():
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
     param_dict = {"C": [1.0], "kernel": ["rbf"], "gamma": [0.1]}
 
-    clf = SVC()
+    clf = SVC(random_state=42)
     cv = GridSearchCV(clf, param_dict)
     cv.fit(X_, y_)
 
-    clf = SVC(C=1.0, kernel="rbf", gamma=0.1)
+    clf = SVC(C=1.0, kernel="rbf", gamma=0.1, random_state=42)
     clf.fit(X_, y_)
 
     assert_array_equal(clf.dual_coef_, cv.best_estimator_.dual_coef_)
@@ -308,15 +308,15 @@ def test_grid_search_one_grid_point():
 
 def test_grid_search_bad_param_grid():
     param_dict = {"C": 1.0}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raises(ValueError, GridSearchCV, clf, param_dict)
 
     param_dict = {"C": []}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raises(ValueError, GridSearchCV, clf, param_dict)
 
     param_dict = {"C": np.ones(6).reshape(3, 2)}
-    clf = SVC()
+    clf = SVC(random_state=42)
     assert_raises(ValueError, GridSearchCV, clf, param_dict)
 
 
@@ -324,14 +324,14 @@ def test_grid_search_sparse():
     # Test that grid search works with both dense and sparse matrices
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     cv.fit(X_[:180], y_[:180])
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator_.C
 
     X_ = sp.csr_matrix(X_)
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     cv.fit(X_[:180].tocoo(), y_[:180])
     y_pred2 = cv.predict(X_[180:])
@@ -344,14 +344,14 @@ def test_grid_search_sparse():
 def test_grid_search_sparse_scoring():
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
     cv.fit(X_[:180], y_[:180])
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator_.C
 
     X_ = sp.csr_matrix(X_)
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
     cv.fit(X_[:180], y_[:180])
     y_pred2 = cv.predict(X_[180:])
@@ -385,7 +385,7 @@ def test_grid_search_precomputed_kernel():
     K_train = np.dot(X_[:180], X_[:180].T)
     y_train = y_[:180]
 
-    clf = SVC(kernel='precomputed')
+    clf = SVC(kernel='precomputed', random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     cv.fit(K_train, y_train)
 
@@ -409,7 +409,7 @@ def test_grid_search_precomputed_kernel_error_nonsquare():
     # training kernel matrix
     K_train = np.zeros((10, 20))
     y_train = np.ones((10, ))
-    clf = SVC(kernel='precomputed')
+    clf = SVC(kernel='precomputed', random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     assert_raises(ValueError, cv.fit, K_train, y_train)
 
@@ -418,7 +418,7 @@ def test_grid_search_precomputed_kernel_error_kernel_function():
     # Test that grid search returns an error when using a kernel_function
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
     kernel_function = lambda x1, x2: np.dot(x1, x2.T)
-    clf = SVC(kernel=kernel_function)
+    clf = SVC(kernel=kernel_function, random_state=42)
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
     assert_raises(ValueError, cv.fit, X_, y_)
 
@@ -468,7 +468,7 @@ def test_X_as_list():
     y = np.array([0] * 5 + [1] * 5)
 
     clf = CheckingClassifier(check_X=lambda x: isinstance(x, list))
-    cv = KFold(n=len(X), n_folds=3)
+    cv = KFold(n=len(X), n_folds=3, random_state=42)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X.tolist(), y).score(X, y)
     assert_true(hasattr(grid_search, "grid_scores_"))
@@ -480,7 +480,7 @@ def test_y_as_list():
     y = np.array([0] * 5 + [1] * 5)
 
     clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
-    cv = KFold(n=len(X), n_folds=3)
+    cv = KFold(n=len(X), n_folds=3, random_state=42)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X, y.tolist()).score(X, y)
     assert_true(hasattr(grid_search, "grid_scores_"))
@@ -568,8 +568,10 @@ def test_randomized_search_grid_scores():
                   gamma=expon(scale=0.1))
     n_cv_iter = 3
     n_search_iter = 30
-    search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter,
-                                param_distributions=params, iid=False)
+    search = RandomizedSearchCV(SVC(random_state=42), n_iter=n_search_iter,
+                                cv=n_cv_iter,
+                                param_distributions=params,
+                                iid=False)
     search.fit(X, y)
     assert_equal(len(search.grid_scores_), n_search_iter)
 
@@ -606,7 +608,7 @@ def test_grid_search_score_consistency():
     for score in ['f1', 'roc_auc']:
         grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score)
         grid_search.fit(X, y)
-        cv = StratifiedKFold(n_folds=3, y=y)
+        cv = StratifiedKFold(n_folds=3, y=y, random_state=42)
         for C, scores in zip(Cs, grid_search.grid_scores_):
             clf.set_params(C=C)
             scores = scores[2]  # get the separate runs from grid scores
@@ -678,7 +680,7 @@ def test_predict_proba_disabled():
     # Test predict_proba when disabled on estimator.
     X = np.arange(20).reshape(5, -1)
     y = [0, 0, 1, 1, 1]
-    clf = SVC(probability=False)
+    clf = SVC(probability=False, random_state=42)
     gs = GridSearchCV(clf, {}, cv=2).fit(X, y)
     assert_false(hasattr(gs, "predict_proba"))
 
@@ -800,6 +802,6 @@ def test_classes__property():
                        grid_search.classes_)
 
     # Test that regressors do not have a classes_ attribute
-    grid_search = GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]})
+    grid_search = GridSearchCV(Ridge(random_state=42), {'alpha': [1.0, 2.0]})
     grid_search.fit(X, y)
     assert_false(hasattr(grid_search, 'classes_'))
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index 72226ffee5933..6b06c7c6d67fd 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -134,11 +134,11 @@ def test_input_validation():
     # No assertions; the old versions would simply crash
     X = [[1, 2], [3, 4], [5, 6]]
     AdditiveChi2Sampler().fit(X).transform(X)
-    SkewedChi2Sampler().fit(X).transform(X)
-    RBFSampler().fit(X).transform(X)
+    SkewedChi2Sampler(random_state=42).fit(X).transform(X)
+    RBFSampler(random_state=42).fit(X).transform(X)
 
     X = csr_matrix(X)
-    RBFSampler().fit(X).transform(X)
+    RBFSampler(random_state=42).fit(X).transform(X)
 
 
 def test_nystroem_approximation():
@@ -147,7 +147,8 @@ def test_nystroem_approximation():
     X = rnd.uniform(size=(10, 4))
 
     # With n_components = n_samples this is exact
-    X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X)
+    X_transformed = Nystroem(n_components=X.shape[0],
+                             random_state=42).fit_transform(X)
     K = rbf_kernel(X)
     assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
 
@@ -176,7 +177,8 @@ def test_nystroem_singular_kernel():
     X = np.vstack([X] * 2)  # duplicate samples
 
     gamma = 100
-    N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X)
+    N = Nystroem(gamma=gamma, n_components=X.shape[0],
+                 random_state=42).fit(X)
     X_transformed = N.transform(X)
 
     K = rbf_kernel(X, gamma=gamma)
@@ -192,7 +194,8 @@ def test_nystroem_poly_kernel_params():
 
     K = polynomial_kernel(X, degree=3.1, coef0=.1)
     nystroem = Nystroem(kernel="polynomial", n_components=X.shape[0],
-                        degree=3.1, coef0=.1)
+                        degree=3.1, coef0=.1,
+                        random_state=42)
     X_transformed = nystroem.fit_transform(X)
     assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
 
@@ -212,5 +215,6 @@ def logging_histogram_kernel(x, y, log):
     X = list(X)     # test input validation
     Nystroem(kernel=logging_histogram_kernel,
              n_components=(n_samples - 1),
-             kernel_params={'log': kernel_log}).fit(X)
+             kernel_params={'log': kernel_log},
+             random_state=42).fit(X)
     assert_equal(len(kernel_log), n_samples * (n_samples - 1) / 2)
diff --git a/sklearn/tests/test_kernel_ridge.py b/sklearn/tests/test_kernel_ridge.py
index 4750a096ac66f..4d710514e267f 100644
--- a/sklearn/tests/test_kernel_ridge.py
+++ b/sklearn/tests/test_kernel_ridge.py
@@ -10,28 +10,31 @@
 from sklearn.utils.testing import assert_array_almost_equal
 
 
-X, y = make_regression(n_features=10)
+X, y = make_regression(n_features=10, random_state=42)
 Xcsr = sp.csr_matrix(X)
 Xcsc = sp.csc_matrix(X)
 Y = np.array([y, y]).T
 
 
 def test_kernel_ridge():
-    pred = Ridge(alpha=1, fit_intercept=False).fit(X, y).predict(X)
+    pred = Ridge(
+        alpha=1, fit_intercept=False, random_state=42).fit(X, y).predict(X)
     pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, y).predict(X)
     assert_array_almost_equal(pred, pred2)
 
 
 def test_kernel_ridge_csr():
     pred = Ridge(alpha=1, fit_intercept=False,
-                 solver="cholesky").fit(Xcsr, y).predict(Xcsr)
+                 solver="cholesky",
+                 random_state=42).fit(Xcsr, y).predict(Xcsr)
     pred2 = KernelRidge(kernel="linear", alpha=1).fit(Xcsr, y).predict(Xcsr)
     assert_array_almost_equal(pred, pred2)
 
 
 def test_kernel_ridge_csc():
     pred = Ridge(alpha=1, fit_intercept=False,
-                 solver="cholesky").fit(Xcsc, y).predict(Xcsc)
+                 solver="cholesky",
+                 random_state=42).fit(Xcsc, y).predict(Xcsc)
     pred2 = KernelRidge(kernel="linear", alpha=1).fit(Xcsc, y).predict(Xcsc)
     assert_array_almost_equal(pred, pred2)
 
@@ -39,7 +42,8 @@ def test_kernel_ridge_csc():
 def test_kernel_ridge_singular_kernel():
     # alpha=0 causes a LinAlgError in computing the dual coefficients,
     # which causes a fallback to a lstsq solver. This is tested here.
-    pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X)
+    pred = Ridge(alpha=0, fit_intercept=False,
+                 random_state=42).fit(X, y).predict(X)
     kr = KernelRidge(kernel="linear", alpha=0)
     ignore_warnings(kr.fit)(X, y)
     pred2 = kr.predict(X)
@@ -66,7 +70,8 @@ def test_kernel_ridge_sample_weights():
     sw = np.random.RandomState(0).rand(X.shape[0])
 
     pred = Ridge(alpha=1,
-                 fit_intercept=False).fit(X, y, sample_weight=sw).predict(X)
+                 fit_intercept=False,
+                 random_state=42).fit(X, y, sample_weight=sw).predict(X)
     pred2 = KernelRidge(kernel="linear",
                         alpha=1).fit(X, y, sample_weight=sw).predict(X)
     pred3 = KernelRidge(kernel="precomputed",
@@ -76,7 +81,8 @@ def test_kernel_ridge_sample_weights():
 
 
 def test_kernel_ridge_multi_output():
-    pred = Ridge(alpha=1, fit_intercept=False).fit(X, Y).predict(X)
+    pred = Ridge(alpha=1, fit_intercept=False,
+                 random_state=42).fit(X, Y).predict(X)
     pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, Y).predict(X)
     assert_array_almost_equal(pred, pred2)
 
diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py
index 129dba52ac831..614300f587d9e 100644
--- a/sklearn/tests/test_learning_curve.py
+++ b/sklearn/tests/test_learning_curve.py
@@ -208,7 +208,8 @@ def test_learning_curve_batch_and_incremental_learning_are_equal():
                                n_redundant=0, n_classes=2,
                                n_clusters_per_class=1, random_state=0)
     train_sizes = np.linspace(0.2, 1.0, 5)
-    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False)
+    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False,
+                                            random_state=42)
 
     train_sizes_inc, train_scores_inc, test_scores_inc = \
         learning_curve(
@@ -259,7 +260,7 @@ def test_learning_curve_with_boolean_indices():
                                n_redundant=0, n_classes=2,
                                n_clusters_per_class=1, random_state=0)
     estimator = MockImprovingEstimator(20)
-    cv = KFold(n=30, n_folds=3)
+    cv = KFold(n=30, n_folds=3, random_state=42)
     train_sizes, train_scores, test_scores = learning_curve(
         estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
     assert_array_equal(train_sizes, np.linspace(2, 20, 10))
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index f0f30cb91ae72..cea8db8dab86a 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -16,7 +16,7 @@
 
 class DelegatorData(object):
     def __init__(self, name, construct, skip_methods=(),
-                 fit_args=make_classification()):
+                 fit_args=make_classification(random_state=42)):
         self.name = name
         self.construct = construct
         self.fit_args = fit_args
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index b62e78e87c223..ba3e2af890e8a 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -121,14 +121,14 @@ def test_ovr_partial_fit_exceptions():
 def test_ovr_ovo_regressor():
     # test that ovr and ovo work on regressors which don't have a decision_
     # function
-    ovr = OneVsRestClassifier(DecisionTreeRegressor())
+    ovr = OneVsRestClassifier(DecisionTreeRegressor(random_state=42))
     pred = ovr.fit(iris.data, iris.target).predict(iris.data)
     assert_equal(len(ovr.estimators_), n_classes)
     assert_array_equal(np.unique(pred), [0, 1, 2])
     # we are doing something sensible
     assert_greater(np.mean(pred == iris.target), .9)
 
-    ovr = OneVsOneClassifier(DecisionTreeRegressor())
+    ovr = OneVsOneClassifier(DecisionTreeRegressor(random_state=42))
     pred = ovr.fit(iris.data, iris.target).predict(iris.data)
     assert_equal(len(ovr.estimators_), n_classes * (n_classes - 1) / 2)
     assert_array_equal(np.unique(pred), [0, 1, 2])
@@ -171,7 +171,8 @@ def test_ovr_fit_predict_sparse():
         assert_array_equal(pred, Y_pred_sprs.toarray())
 
         # Test decision_function
-        clf_sprs = OneVsRestClassifier(svm.SVC()).fit(X_train, sparse(Y_train))
+        clf_sprs = OneVsRestClassifier(
+            svm.SVC(random_state=42)).fit(X_train, sparse(Y_train))
         dec_pred = (clf_sprs.decision_function(X_test) > 0).astype(int)
         assert_array_equal(dec_pred, clf_sprs.predict(X_test).toarray())
 
@@ -189,7 +190,7 @@ def test_ovr_always_present():
     y[:, 1] = 1
     y[:, 2] = 1
 
-    ovr = OneVsRestClassifier(LogisticRegression())
+    ovr = OneVsRestClassifier(LogisticRegression(random_state=42))
     assert_warns(UserWarning, ovr.fit, X, y)
     y_pred = ovr.predict(X)
     assert_array_equal(np.array(y_pred), np.array(y))
@@ -201,7 +202,7 @@ def test_ovr_always_present():
     # y has a constantly absent label
     y = np.zeros((10, 2))
     y[5:, 0] = 1  # variable label
-    ovr = OneVsRestClassifier(LogisticRegression())
+    ovr = OneVsRestClassifier(LogisticRegression(random_state=42))
     assert_warns(UserWarning, ovr.fit, X, y)
     y_pred = ovr.predict_proba(X)
     assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
@@ -220,8 +221,8 @@ def test_ovr_multiclass():
     classes = set("ham eggs spam".split())
 
     for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
-                     LinearRegression(), Ridge(),
-                     ElasticNet()):
+                     LinearRegression(), Ridge(random_state=42),
+                     ElasticNet(random_state=42)):
         clf = OneVsRestClassifier(base_clf).fit(X, y)
         assert_equal(set(clf.classes_), classes)
         y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
@@ -260,11 +261,11 @@ def conduct_test(base_clf, test_predict_proba=False):
         assert_equal(y_pred, 1)
 
     for base_clf in (LinearSVC(random_state=0), LinearRegression(),
-                     Ridge(), ElasticNet()):
+                     Ridge(random_state=42), ElasticNet(random_state=42)):
         conduct_test(base_clf)
 
-    for base_clf in (MultinomialNB(), SVC(probability=True),
-                     LogisticRegression()):
+    for base_clf in (MultinomialNB(), SVC(probability=True, random_state=42),
+                     LogisticRegression(random_state=42)):
         conduct_test(base_clf, test_predict_proba=True)
 
 
@@ -278,8 +279,9 @@ def test_ovr_multilabel():
                   [1, 0, 0]])
 
     for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
-                     LinearRegression(), Ridge(),
-                     ElasticNet(), Lasso(alpha=0.5)):
+                     LinearRegression(), Ridge(random_state=42),
+                     ElasticNet(random_state=42), Lasso(alpha=0.5,
+                                                        random_state=42)):
         clf = OneVsRestClassifier(base_clf).fit(X, y)
         y_pred = clf.predict([[0, 4, 4]])[0]
         assert_array_equal(y_pred, [0, 1, 1])
@@ -419,10 +421,10 @@ def test_ovr_pipeline():
     # Test with pipeline of length one
     # This test is needed because the multiclass estimators may fail to detect
     # the presence of predict_proba or decision_function.
-    clf = Pipeline([("tree", DecisionTreeClassifier())])
+    clf = Pipeline([("tree", DecisionTreeClassifier(random_state=42))])
     ovr_pipe = OneVsRestClassifier(clf)
     ovr_pipe.fit(iris.data, iris.target)
-    ovr = OneVsRestClassifier(DecisionTreeClassifier())
+    ovr = OneVsRestClassifier(DecisionTreeClassifier(random_state=42))
     ovr.fit(iris.data, iris.target)
     assert_array_equal(ovr.predict(iris.data), ovr_pipe.predict(iris.data))
 
@@ -449,7 +451,7 @@ def test_ovr_coef_exceptions():
     assert_raises(ValueError, lambda x: ovr.coef_, None)
 
     # Doesn't have coef_ exception!
-    ovr = OneVsRestClassifier(DecisionTreeClassifier())
+    ovr = OneVsRestClassifier(DecisionTreeClassifier(random_state=42))
     ovr.fit(iris.data, iris.target)
     assert_raises(AttributeError, lambda x: ovr.coef_, None)
 
@@ -564,7 +566,8 @@ def test_ovo_ties():
     # not defaulting to the smallest label
     X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]])
     y = np.array([2, 0, 1, 2])
-    multi_clf = OneVsOneClassifier(Perceptron(shuffle=False))
+    multi_clf = OneVsOneClassifier(Perceptron(shuffle=False,
+                                              random_state=42))
     ovo_prediction = multi_clf.fit(X, y).predict(X)
     ovo_decision = multi_clf.decision_function(X)
 
@@ -591,7 +594,8 @@ def test_ovo_ties2():
     # cycle through labels so that each label wins once
     for i in range(3):
         y = (y_ref + i) % 3
-        multi_clf = OneVsOneClassifier(Perceptron(shuffle=False))
+        multi_clf = OneVsOneClassifier(Perceptron(shuffle=False,
+                                                  random_state=42))
         ovo_prediction = multi_clf.fit(X, y).predict(X)
         assert_equal(ovo_prediction[0], i % 3)
 
@@ -601,13 +605,13 @@ def test_ovo_string_y():
     X = np.eye(4)
     y = np.array(['a', 'b', 'c', 'd'])
 
-    ovo = OneVsOneClassifier(LinearSVC())
+    ovo = OneVsOneClassifier(LinearSVC(random_state=42))
     ovo.fit(X, y)
     assert_array_equal(y, ovo.predict(X))
 
 
 def test_ecoc_exceptions():
-    ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
+    ecoc = OutputCodeClassifier(LinearSVC(random_state=0), random_state=42)
     assert_raises(ValueError, ecoc.predict, [])
 
 
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 163363155ca3d..2d8e3c63dc055 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -93,7 +93,7 @@ def test_multi_target_sample_weights_api():
     y = [[3.141, 2.718], [2.718, 3.141]]
     w = [0.8, 0.6]
 
-    rgr = MultiOutputRegressor(Lasso())
+    rgr = MultiOutputRegressor(Lasso(random_state=42))
     assert_raises_regex(ValueError, "does not support sample weights",
                         rgr.fit, X, y, w)
 
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 33e3128931aff..0cfff986976d4 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -166,7 +166,7 @@ def test_pipeline_init():
     repr(pipe)
 
     # Test with two objects
-    clf = SVC()
+    clf = SVC(random_state=42)
     filter1 = SelectKBest(f_classif)
     pipe = Pipeline([('anova', filter1), ('svc', clf)])
 
@@ -214,7 +214,7 @@ def test_pipeline_methods_anova():
     X = iris.data
     y = iris.target
     # Test with Anova + LogisticRegression
-    clf = LogisticRegression()
+    clf = LogisticRegression(random_state=42)
     filter1 = SelectKBest(f_classif, k=2)
     pipe = Pipeline([('anova', filter1), ('logistic', clf)])
     pipe.fit(X, y)
@@ -294,7 +294,8 @@ def test_pipeline_methods_pca_svm():
     y = iris.target
     # Test with PCA + SVC
     clf = SVC(probability=True, random_state=0)
-    pca = PCA(svd_solver='full', n_components='mle', whiten=True)
+    pca = PCA(svd_solver='full', n_components='mle', whiten=True,
+              random_state=42)
     pipe = Pipeline([('pca', pca), ('svc', clf)])
     pipe.fit(X, y)
     pipe.predict(X)
@@ -427,7 +428,7 @@ def test_feature_union():
 
 
 def test_make_union():
-    pca = PCA(svd_solver='full')
+    pca = PCA(svd_solver='full', random_state=42)
     mock = Transf()
     fu = make_union(pca, mock)
     names, transformers = zip(*fu.transformer_list)
@@ -436,7 +437,7 @@ def test_make_union():
 
 
 def test_make_union_kwargs():
-    pca = PCA(svd_solver='full')
+    pca = PCA(svd_solver='full', random_state=42)
     mock = Transf()
     fu = make_union(pca, mock, n_jobs=3)
     assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
@@ -454,7 +455,7 @@ def test_pipeline_transform():
     # Also test pipeline.transform and pipeline.inverse_transform
     iris = load_iris()
     X = iris.data
-    pca = PCA(n_components=2, svd_solver='full')
+    pca = PCA(n_components=2, svd_solver='full', random_state=42)
     pipeline = Pipeline([('pca', pca)])
 
     # test transform and fit_transform:
@@ -827,7 +828,8 @@ def test_pipeline_wrong_memory():
     y = iris.target
     # Define memory as an integer
     memory = 1
-    cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())],
+    cached_pipe = Pipeline([('transf', DummyTransf()), ('svc',
+                                                        SVC(random_state=42))],
                            memory=memory)
     assert_raises_regex(ValueError, "'memory' should either be a string or a"
                         " joblib.Memory instance, got 'memory=1' instead.",
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index 1379a7703f31f..c36c700ebe9fb 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -194,7 +194,7 @@ def test_graphviz_toy():
     assert_equal(contents1, contents2)
 
     # Test classifier with degraded learning set
-    clf = DecisionTreeClassifier(max_depth=3)
+    clf = DecisionTreeClassifier(max_depth=3, random_state=42)
     clf.fit(X, y_degraded)
 
     contents1 = export_graphviz(clf, filled=True, out_file=None)
@@ -209,7 +209,8 @@ def test_graphviz_toy():
 
 def test_graphviz_errors():
     # Check for errors of export_graphviz
-    clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2)
+    clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2,
+                                 random_state=42)
     clf.fit(X, y)
 
     # Check feature_names error
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index ff662e9af414a..eace35412334d 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -395,7 +395,7 @@ def test_importances():
 @raises(ValueError)
 def test_importances_raises():
     # Check if variable importance before fit raises ValueError.
-    clf = DecisionTreeClassifier()
+    clf = DecisionTreeClassifier(random_state=42)
     clf.feature_importances_
 
 
@@ -1170,7 +1170,8 @@ def test_arrays_persist():
     # non-regression for #2726
     for attr in ['n_classes', 'value', 'children_left', 'children_right',
                  'threshold', 'impurity', 'feature', 'n_node_samples']:
-        value = getattr(DecisionTreeClassifier().fit([[0], [1]], [0, 1]).tree_, attr)
+        value = getattr(DecisionTreeClassifier(random_state=42).fit(
+            [[0], [1]], [0, 1]).tree_, attr)
         # if pointing to freed memory, contents may be arbitrary
         assert_true(-3 <= value.flat[0] < 3,
                     'Array points to arbitrary memory')
@@ -1207,7 +1208,7 @@ def test_with_only_one_non_constant_features():
 def test_big_input():
     # Test if the warning for too large inputs is appropriate.
     X = np.repeat(10 ** 40., 4).astype(np.float64).reshape(-1, 1)
-    clf = DecisionTreeClassifier()
+    clf = DecisionTreeClassifier(random_state=42)
     try:
         clf.fit(X, [0, 1, 0, 1])
     except ValueError as e:
@@ -1228,13 +1229,15 @@ def test_huge_allocations():
     # Sanity check: we cannot request more memory than the size of the address
     # space. Currently raises OverflowError.
     huge = 2 ** (n_bits + 1)
-    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge)
+    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge,
+                                 random_state=42)
     assert_raises(Exception, clf.fit, X, y)
 
     # Non-regression test: MemoryError used to be dropped by Cython
     # because of missing "except *".
     huge = 2 ** (n_bits - 1) - 1
-    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge)
+    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge,
+                                 random_state=42)
     assert_raises(MemoryError, clf.fit, X, y)
 
 
@@ -1528,8 +1531,8 @@ def check_presort_sparse(est, X, y):
 
 
 def test_presort_sparse():
-    ests = (DecisionTreeClassifier(presort=True),
-            DecisionTreeRegressor(presort=True))
+    ests = (DecisionTreeClassifier(presort=True, random_state=42),
+            DecisionTreeRegressor(presort=True, random_state=42))
     sparse_matrices = (csr_matrix, csc_matrix, coo_matrix)
 
     y, X = datasets.make_multilabel_classification(random_state=0,
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index d248350faa0ff..cf533eaa0dcc2 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -86,9 +86,12 @@ def test_compute_class_weight_invariance():
     X_ = np.vstack([X] * 2)
     y_ = np.hstack([y] * 2)
     # results should be identical
-    logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
-    logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
-    logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
+    logreg1 = LogisticRegression(class_weight="balanced",
+                                 random_state=42).fit(X_1, y_1)
+    logreg0 = LogisticRegression(class_weight="balanced",
+                                 random_state=42).fit(X_0, y_0)
+    logreg = LogisticRegression(class_weight="balanced",
+                                random_state=42).fit(X_, y_)
     assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
     assert_array_almost_equal(logreg.coef_, logreg0.coef_)
 
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 34f60ffec8d97..d1ceeec6941ab 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -353,8 +353,8 @@ def test_class_distribution():
 
 
 def test_safe_split_with_precomputed_kernel():
-    clf = SVC()
-    clfp = SVC(kernel="precomputed")
+    clf = SVC(random_state=42)
+    clfp = SVC(kernel="precomputed", random_state=42)
 
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 10657682e5cf1..db55d9d7ffcb2 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -43,7 +43,7 @@ def test_assert_greater_equal():
 
 def test_set_random_state():
     lda = LinearDiscriminantAnalysis()
-    tree = DecisionTreeClassifier()
+    tree = DecisionTreeClassifier(random_state=42)
     # Linear Discriminant Analysis doesn't have random state: smoke test
     set_random_state(lda, 3)
     set_random_state(tree, 3)
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 49d867a1b0bee..f8c7c571415f3 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -63,7 +63,7 @@ def test_as_float_array():
     matrices = [
         np.matrix(np.arange(5)),
         sp.csc_matrix(np.arange(5)).toarray(),
-        sparse_random_matrix(10, 10, density=0.10).toarray()
+        sparse_random_matrix(10, 10, density=0.10, random_state=42).toarray()
     ]
     for M in matrices:
         N = as_float_array(M, copy=True)
@@ -479,8 +479,8 @@ def test_check_is_fitted():
     except AttributeError as e:
         assert_equal(str(e), "Another message SVR, SVR")
 
-    ard.fit(*make_blobs())
-    svr.fit(*make_blobs())
+    ard.fit(*make_blobs(random_state=42))
+    svr.fit(*make_blobs(random_state=42))
 
     assert_equal(None, check_is_fitted(ard, "coef_"))
     assert_equal(None, check_is_fitted(svr, "support_"))
@@ -500,7 +500,7 @@ def test_check_consistent_length():
     assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1))
     # Despite ensembles having __len__ they must raise TypeError
     assert_raises_regexp(TypeError, 'estimator', check_consistent_length,
-                         [1, 2], RandomForestRegressor())
+                         [1, 2], RandomForestRegressor(random_state=42))
     # XXX: We should have a test with a string, but what is correct behaviour?