scikit-learn · kkatrio · Mar 9, 2017 · Mar 10, 2017 · Mar 10, 2017 · Mar 12, 2017
diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
@@ -235,30 +235,30 @@ def test_perfect_checkerboard():
 def test_errors():
     data = np.arange(25).reshape((5, 5))
 
-    model = SpectralBiclustering(n_clusters=(3, 3, 3))
+    model = SpectralBiclustering(n_clusters=(3, 3, 3), random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_clusters='abc')
+    model = SpectralBiclustering(n_clusters='abc', random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_clusters=(3, 'abc'))
+    model = SpectralBiclustering(n_clusters=(3, 'abc'), random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(method='unknown')
+    model = SpectralBiclustering(method='unknown', random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(svd_method='unknown')
+    model = SpectralBiclustering(svd_method='unknown', random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_components=0)
+    model = SpectralBiclustering(n_components=0, random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_best=0)
+    model = SpectralBiclustering(n_best=0, random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering(n_components=3, n_best=4)
+    model = SpectralBiclustering(n_components=3, n_best=4, random_state=42)
     assert_raises(ValueError, model.fit, data)
 
-    model = SpectralBiclustering()
+    model = SpectralBiclustering(random_state=42)
     data = np.arange(27).reshape((3, 3, 3))
     assert_raises(ValueError, model.fit, data)
diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
@@ -23,7 +23,7 @@
 
 def test_n_samples_leaves_roots():
     # Sanity check for the number of samples in leaves and roots
-    X, y = make_blobs(n_samples=10)
+    X, y = make_blobs(n_samples=10, random_state=42)
     brc = Birch()
     brc.fit(X)
     n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
@@ -35,7 +35,7 @@ def test_n_samples_leaves_roots():
 
 def test_partial_fit():
     # Test that fit is equivalent to calling partial_fit multiple times
-    X, y = make_blobs(n_samples=100)
+    X, y = make_blobs(n_samples=100, random_state=42)
     brc = Birch(n_clusters=3)
     brc.fit(X)
     brc_partial = Birch(n_clusters=None)
@@ -71,7 +71,7 @@ def test_birch_predict():
 
 def test_n_clusters():
     # Test that n_clusters param works properly
-    X, y = make_blobs(n_samples=100, centers=10)
+    X, y = make_blobs(n_samples=100, centers=10, random_state=42)
     brc1 = Birch(n_clusters=10)
     brc1.fit(X)
     assert_greater(len(brc1.subcluster_centers_), 10)
@@ -86,7 +86,7 @@ def test_n_clusters():
     assert_array_equal(brc1.labels_, brc2.labels_)
 
     # Test that the wrong global clustering step raises an Error.
-    clf = ElasticNet()
+    clf = ElasticNet(random_state=42)
     brc3 = Birch(n_clusters=clf)
     assert_raises(ValueError, brc3.fit, X)
 
@@ -97,7 +97,7 @@ def test_n_clusters():
 
 def test_sparse_X():
     # Test that sparse and dense data give same results
-    X, y = make_blobs(n_samples=100, centers=10)
+    X, y = make_blobs(n_samples=100, centers=10, random_state=42)
     brc = Birch(n_clusters=10)
     brc.fit(X)
 
@@ -120,7 +120,7 @@ def check_branching_factor(node, branching_factor):
 
 def test_branching_factor():
     # Test that nodes have at max branching_factor number of subclusters
-    X, y = make_blobs()
+    X, y = make_blobs(random_state=42)
     branching_factor = 9
 
     # Purposefully set a low threshold to maximize the subclusters.
@@ -150,7 +150,7 @@ def check_threshold(birch_instance, threshold):
 
 def test_threshold():
     # Test that the leaf subclusters have a threshold lesser than radius
-    X, y = make_blobs(n_samples=80, centers=4)
+    X, y = make_blobs(n_samples=80, centers=4, random_state=42)
     brc = Birch(threshold=0.5, n_clusters=None)
     brc.fit(X)
     check_threshold(brc, 0.5)

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
@@ -116,7 +116,8 @@ def test_minibatch_update_consistency():
     # step 1: compute the dense minibatch update
     old_inertia, incremental_diff = _mini_batch_step(
         X_mb, x_mb_squared_norms, new_centers, counts,
-        buffer, 1, None, random_reassign=False)
+        buffer, 1, None, random_reassign=False,
+        random_state=42)
     assert_greater(old_inertia, 0.0)
 
     # compute the new inertia on the same batch to check that it decreased
@@ -133,7 +134,8 @@ def test_minibatch_update_consistency():
     # step 2: compute the sparse minibatch update
     old_inertia_csr, incremental_diff_csr = _mini_batch_step(
         X_mb_csr, x_mb_squared_norms_csr, new_centers_csr, counts_csr,
-        buffer_csr, 1, None, random_reassign=False)
+        buffer_csr, 1, None, random_reassign=False,
+        random_state=42)
     assert_greater(old_inertia_csr, 0.0)
 
     # compute the new inertia on the same batch to check that it decreased
@@ -216,7 +218,7 @@ def test_k_means_plus_plus_init_2_jobs():
 def test_k_means_precompute_distances_flag():
     # check that a warning is raised if the precompute_distances flag is not
     # supported
-    km = KMeans(precompute_distances="wrong")
+    km = KMeans(precompute_distances="wrong", random_state=42)
     assert_raises(ValueError, km.fit, X)
 
 
@@ -263,8 +265,10 @@ def test_k_means_n_init():
 
     # two regression tests on bad n_init argument
     # previous bug: n_init <= 0 threw non-informative TypeError (#3858)
-    assert_raises_regex(ValueError, "n_init", KMeans(n_init=0).fit, X)
-    assert_raises_regex(ValueError, "n_init", KMeans(n_init=-1).fit, X)
+    assert_raises_regex(ValueError, "n_init", KMeans(n_init=0,
+                                                     random_state=42).fit, X)
+    assert_raises_regex(ValueError, "n_init", KMeans(n_init=-1,
+                                                     random_state=42).fit, X)
 
 
 def test_k_means_explicit_init_shape():
@@ -331,7 +335,8 @@ def test_mb_k_means_plus_plus_init_sparse_matrix():
 
 
 def test_minibatch_init_with_large_k():
-    mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20)
+    mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20,
+                                 random_state=42)
     # Check that a warning is raised, as the number clusters is larger
     # than the init_size
     assert_warns(RuntimeWarning, mb_k_means.fit, X)
@@ -516,12 +521,14 @@ def test_minibatch_set_init_size():
 
 
 def test_k_means_invalid_in
F42D
it():
-    km = KMeans(init="invalid", n_init=1, n_clusters=n_clusters)
+    km = KMeans(init="invalid", n_init=1, n_clusters=n_clusters,
+                random_state=42)
     assert_raises(ValueError, km.fit, X)
 
 
 def test_mini_match_k_means_invalid_init():
-    km = MiniBatchKMeans(init="invalid", n_init=1, n_clusters=n_clusters)
+    km = MiniBatchKMeans(init="invalid", n_init=1, n_clusters=n_clusters,
+                         random_state=42)
     assert_raises(ValueError, km.fit, X)
 
 
@@ -605,7 +612,8 @@ def test_predict_minibatch_dense_input():
 
 def test_predict_minibatch_kmeanspp_init_sparse_input():
     mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init='k-means++',
-                                 n_init=10).fit(X_csr)
+                                 n_init=10,
+                                 random_state=42).fit(X_csr)
 
     # sanity check: re-predict labeling for training set samples
     assert_array_equal(mb_k_means.predict(X_csr), mb_k_means.labels_)
@@ -621,6 +629,7 @@ def test_predict_minibatch_kmeanspp_init_sparse_input():
 
 def test_predict_minibatch_random_init_sparse_input():
     mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init='random',
+                                 random_state=42,
                                  n_init=10).fit(X_csr)
 
     # sanity check: re-predict labeling for training set samples
@@ -643,15 +652,20 @@ def test_int_input():
         init_int = X_int[:2]
 
         fitted_models = [
-            KMeans(n_clusters=2).fit(X_int),
-            KMeans(n_clusters=2, init=init_int, n_init=1).fit(X_int),
+            KMeans(n_clusters=2, random_state=42).fit(X_int),
+            KMeans(n_clusters=2, init=init_int, n_init=1,
+                   random_state=42).fit(X_int),
             # mini batch kmeans is very unstable on such a small dataset hence
             # we use many inits
-            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2).fit(X_int),
-            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2).fit(X_int_csr),
+            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2,
+                            random_state=42).fit(X_int),
+            MiniBatchKMeans(n_clusters=2, n_init=10, batch_size=2,
+                            random_state=42).fit(X_int_csr),
             MiniBatchKMeans(n_clusters=2, batch_size=2,
+                            random_state=42,
                             init=init_int, n_init=1).fit(X_int),
             MiniBatchKMeans(n_clusters=2, batch_size=2,
+                            random_state=42,
                             init=init_int, n_init=1).fit(X_int_csr),
         ]
 
@@ -665,7 +679,7 @@ def test_int_input():
 
 
 def test_transform():
-    km = KMeans(n_clusters=n_clusters)
+    km = KMeans(n_clusters=n_clusters, random_state=42)
     km.fit(X)
     X_new = km.transform(km.cluster_centers_)
 
@@ -730,7 +744,8 @@ def test_k_means_function():
     sys.stdout = StringIO()
     try:
         cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
-                                                   verbose=True)
+                                                   verbose=True,
+                                                   random_state=42)
     finally:
         sys.stdout = old_stdout
     centers = cluster_centers
@@ -765,7 +780,7 @@ def test_x_squared_norms_init_centroids():
 
 def test_max_iter_error():
 
-    km = KMeans(max_iter=-1)
+    km = KMeans(max_iter=-1, random_state=42)
     assert_raise_message(ValueError, 'Number of iterations should be',
                          km.fit, X)
 
@@ -821,7 +836,8 @@ def test_k_means_init_centers():
         X_test = dtype(X_small)
         init_centers_test = dtype(init_centers)
         assert_array_equal(init_centers, init_centers_test)
-        km = KMeans(init=init_centers_test, n_clusters=3, n_init=1)
+        km = KMeans(init=init_centers_test, n_clusters=3, n_init=1,
+                    random_state=42)
         km.fit(X_test)
         assert_equal(False, np.may_share_memory(km.cluster_centers_, init_centers))
 
@@ -833,14 +849,15 @@ def test_sparse_k_means_init_centers():
     X = iris.data
 
     # Get a local optimum
-    centers = KMeans(n_clusters=3).fit(X).cluster_centers_
+    centers = KMeans(n_clusters=3, random_state=42).fit(X).cluster_centers_
 
     # Fit starting from a local optimum shouldn't change the solution
     np.testing.assert_allclose(
         centers,
         KMeans(n_clusters=3,
                init=centers,
-               n_init=1).fit(X).cluster_centers_
+               n_init=1,
+               random_state=42).fit(X).cluster_centers_
     )
 
     # The same should be true when X is sparse
@@ -849,7 +866,8 @@ def test_sparse_k_means_init_centers():
         centers,
         KMeans(n_clusters=3,
                init=centers,
-               n_init=1).fit(X_sparse).cluster_centers_
+               n_init=1,
+               random_state=42).fit(X_sparse).cluster_centers_
     )
 
 
@@ -860,10 +878,11 @@ def test_sparse_validate_centers():
     X = iris.data
 
     # Get a local optimum
-    centers = KMeans(n_clusters=4).fit(X).cluster_centers_
+    centers = KMeans(n_clusters=4, random_state=42).fit(X).cluster_centers_
 
     # Test that a ValueError is raised for validate_center_shape
-    classifier = KMeans(n_clusters=3, init=centers, n_init=1)
+    classifier = KMeans(n_clusters=3, init=centers, n_init=1,
+                        random_state=42)
 
     msg = "The shape of the initial centers \(\(4L?, 4L?\)\) " \
           "does not match the number of clusters 3"

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
@@ -27,7 +27,7 @@
 
 def test_estimate_bandwidth():
     # Test estimate_bandwidth
-    bandwidth = estimate_bandwidth(X, n_samples=200)
+    bandwidth = estimate_bandwidth(X, n_samples=200, random_state=42)
     assert_true(0.9 <= bandwidth <= 1.5)
 
 

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
@@ -171,7 +171,8 @@ def histogram(x, y, **kwargs):
     assert_equal((X.shape[0],), labels.shape)
 
     # raise error on unknown affinity
-    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
+    sp = SpectralClustering(n_clusters=2, affinity='<unknown>',
+                            random_state=42)
     assert_raises(ValueError, sp.fit, X)
 
 

diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py
@@ -50,7 +50,7 @@ def test_fast_mcd_on_invalid_input():
 
 def test_mcd_class_on_invalid_input():
     X = np.arange(100)
-    mcd = MinCovDet()
+    mcd = MinCovDet(random_state=42)
     assert_raise_message(ValueError, 'Got X with X.ndim=1',
                          mcd.fit, X)
 
@@ -88,14 +88,14 @@ def test_mcd_issue1127():
     # (i.e. n_support = n_samples)
     rnd = np.random.RandomState(0)
     X = rnd.normal(size=(3, 1))
-    mcd = MinCovDet()
+    mcd = MinCovDet(random_state=42)
     mcd.fit(X)
 
 
 def test_outlier_detection():
     rnd = np.random.RandomState(0)
     X = rnd.randn(100, 10)
-    clf = EllipticEnvelope(contamination=0.1)
+    clf = EllipticEnvelope(contamination=0.1, random_state=42)
     assert_raises(NotFittedError, clf.predict, X)
     assert_raises(NotFittedError, clf.decision_function, X)
     clf.fit(X)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -91,7 +91,8 @@ def test_dict_learning_nonzero_coefs():
 
 def test_dict_learning_unknown_fit_algorithm():
     n_components = 5
-    dico = DictionaryLearning(n_components, fit_algorithm='<unknown>')
+    dico = DictionaryLearning(n_components, fit_algorithm='<unknown>',
+                              random_state=42)
     assert_raises(ValueError, dico.fit, X)
 
 

diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
@@ -87,7 +87,8 @@ def g_test(x):
             assert_raises(ValueError, fastica, m.T, fun=np.tanh,
                           algorithm=algo)
         else:
-            X = PCA(n_components=2, whiten=True).fit_transform(m.T)
+            X = PCA(n_components=2, whiten=True,
+                    random_state=42).fit_transform(m.T)
             k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False)
             assert_raises(ValueError, fastica, X, fun=np.tanh,
                           algorithm=algo)
@@ -129,7 +130,8 @@ def g_test(x):
         ica = FastICA(fun=fn, algorithm=algo, random_state=0)
         assert_raises(ValueError, ica.fit, m.T)
 
-    assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T)
+    assert_raises(TypeError, FastICA(fun=moves.xrange(10),
+                                     random_state=42).fit, m.T)
 
 
 def test_fastica_nowhiten():