8000 ENH use random states everywhere, never call np.random. · seckcoder/scikit-learn@eeb6dcc · GitHub
[go: up one dir, main page]

Skip to content

Commit eeb6dcc

Browse files
amuellerGaelVaroquaux
authored andcommitted
ENH use random states everywhere, never call np.random.
1 parent 6c42e9d commit eeb6dcc

File tree

22 files changed

+100
-95
lines changed
  • svm/tests
  • tests
  • utils/tests
  • 22 files changed

    +100
    -95
    lines changed

    sklearn/cluster/tests/test_hierarchical.py

    Lines changed: 12 additions & 11 deletions
    Original file line numberDiff line numberDiff line change
    @@ -17,9 +17,9 @@ def test_structured_ward_tree():
    1717
    """
    1818
    Check that we obtain the correct solution for structured ward tree.
    1919
    """
    20-
    np.random.seed(0)
    20+
    rnd = np.random.RandomState(0)
    2121
    mask = np.ones([10, 10], dtype=np.bool)
    22-
    X = np.random.randn(50, 100)
    22+
    X = rnd.randn(50, 100)
    2323
    connectivity = grid_to_graph(*mask.shape)
    2424
    children, n_components, n_leaves = ward_tree(X.T, connectivity)
    2525
    n_nodes = 2 * X.shape[1] - 1
    @@ -30,8 +30,8 @@ def test_unstructured_ward_tree():
    3030
    """
    3131
    Check that we obtain the correct solution for unstructured ward tree.
    3232
    """
    33-
    np.random.seed(0)
    34-
    X = np.random.randn(50, 100)
    33+
    rnd = np.random.RandomState(0)
    34+
    X = rnd.randn(50, 100)
    3535
    children, n_nodes, n_leaves = ward_tree(X.T)
    3636
    n_nodes = 2 * X.shape[1] - 1
    3737
    assert_true(len(children) + n_leaves == n_nodes)
    @@ -41,9 +41,9 @@ def test_height_ward_tree():
    4141
    """
    4242
    Check that the height of ward tree is sorted.
    4343
    """
    44-
    np.random.seed(0)
    44+
    rnd = np.random.RandomState(0)
    4545
    mask = np.ones([10, 10], dtype=np.bool)
    46-
    X = np.random.randn(50, 100)
    46+
    X = rnd.randn(50, 100)
    4747
    connectivity = grid_to_graph(*mask.shape)
    4848
    children, n_nodes, n_leaves = ward_tree(X.T, connectivity)
    4949
    n_nodes = 2 * X.shape[1] - 1
    @@ -54,9 +54,9 @@ def test_ward_clustering():
    5454
    """
    5555
    Check that we obtain the correct number of clusters with Ward clustering.
    5656
    """
    57-
    np.random.seed(0)
    57+
    rnd = np.random.RandomState(0)
    5858
    mask = np.ones([10, 10], dtype=np.bool)
    59-
    X = np.random.randn(100, 50)
    59+
    X = rnd.randn(100, 50)
    6060
    connectivity = grid_to_graph(*mask.shape)
    6161
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    6262
    clustering.fit(X)
    @@ -67,9 +67,9 @@ def test_ward_agglomeration():
    6767
    """
    6868
    Check that we obtain the correct solution in a simplistic case
    6969
    """
    70-
    np.random.seed(0)
    70+
    rnd = np.random.RandomState(0)
    7171
    mask = np.ones([10, 10], dtype=np.bool)
    72-
    X = np.random.randn(50, 100)
    72+
    X = rnd.randn(50, 100)
    7373
    connectivity = grid_to_graph(*mask.shape)
    7474
    ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
    7575
    ward.fit(X)
    @@ -98,10 +98,11 @@ def test_scikit_vs_scipy():
    9898
    """
    9999
    from scipy.sparse import lil_matrix
    100100
    n, p, k = 10, 5, 3
    101+
    rnd = np.random.RandomState(0)
    101102

    102103
    connectivity = lil_matr 8000 ix(np.ones((n, n)))
    103104
    for i in range(5):
    104-
    X = .1 * np.random.normal(size=(n, p))
    105+
    X = .1 * rnd.normal(size=(n, p))
    105106
    X -= 4 * np.arange(n)[:, np.newaxis]
    106107
    X -= X.mean(axis=1)[:, np.newaxis]
    107108

    sklearn/cluster/tests/test_k_means.py

    Lines changed: 2 additions & 1 deletion
    8000
    Original file line numberDiff line numberDiff line change
    @@ -43,7 +43,8 @@ def test_square_norms():
    4343

    4444

    4545
    def test_kmeans_dtype():
    46-
    X = np.random.normal(size=(40, 2))
    46+
    rnd = np.random.RandomState(0)
    47+
    X = rnd.normal(size=(40, 2))
    4748
    X = (X * 10).astype(np.uint8)
    4849
    km = KMeans(n_init=1).fit(X)
    4950
    with warnings.catch_warnings(record=True) as w:

    sklearn/covariance/tests/test_robust_covariance.py

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -72,8 +72,8 @@ def test_outlier_detection():
    7272
    """
    7373
    7474
    """
    75-
    np.random.RandomState(0)
    76-
    X = np.random.randn(100, 10)
    75+
    rnd = np.random.RandomState(0)
    76+
    X = rnd.randn(100, 10)
    7777
    clf = EllipticEnvelope(contamination=0.1)
    7878
    clf.fit(X)
    7979
    y_pred = clf.predict(X)

    sklearn/decomposition/tests/test_fastica.py

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -51,7 +51,7 @@ def test_fastica(add_noise=False):
    5151
    """ Test the FastICA algorithm on very simple data.
    5252
    """
    5353
    # scipy.stats uses the global RNG:
    54-
    np.random.seed(0)
    54+
    rng = np.random.RandomState(0)
    5555
    n_samples = 1000
    5656
    # Generate two sources:
    5757
    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
    @@ -67,7 +67,7 @@ def test_fastica(add_noise=False):
    6767
    m = np.dot(mixing, s)
    6868

    6969
    if add_noise:
    70-
    m += 0.1 * np.random.randn(2, 1000)
    70+
    m += 0.1 * rng.randn(2, 1000)
    7171

    7272
    center_and_norm(m)
    7373

    sklearn/ensemble/tests/test_forest.py

    Lines changed: 3 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -30,15 +30,15 @@
    3030
    # also load the iris dataset
    3131
    # and randomly permute it
    3232
    iris = datasets.load_iris()
    33-
    np.random.seed([1])
    34-
    perm = np.random.permutation(iris.target.size)
    33+
    rng = np.random.RandomState(0)
    34+
    perm = rng.permutation(iris.target.size)
    3535
    iris.data = iris.data[perm]
    3636
    iris.target = iris.target[perm]
    3737

    3838
    # also load the boston dataset
    3939
    # and randomly permute it
    4040
    boston = datasets.load_boston()
    41-
    perm = np.random.permutation(boston.target.size)
    41+
    perm = rng.permutation(boston.target.size)
    4242
    boston.data = boston.data[perm]
    4343
    boston.target = boston.target[perm]
    4444

    sklearn/ensemble/tests/test_gradient_boosting.py

    Lines changed: 6 additions & 5 deletions
    Original file line numberDiff line numberDiff line change
    @@ -20,17 +20,18 @@
    2020
    T = [[-1, -1], [2, 2], [3, 2]]
    2121
    true_result = [-1, 1, 1]
    2222

    23+
    rng = np.random.RandomState(0)
    2324
    # also load the boston dataset
    2425
    # and randomly permute it
    2526
    boston = datasets.load_boston()
    26-
    perm = np.random.permutation(boston.target.size)
    27+
    perm = rng.permutation(boston.target.size)
    2728
    boston.data = boston.data[perm]
    2829
    boston.target = boston.target[perm]
    2930

    3031
    # also load the iris dataset
    3132
    # and randomly permute it
    3233
    iris = datasets.load_iris()
    33-
    perm = np.random.permutation(iris.target.size)
    34+
    perm = rng.permutation(iris.target.size)
    3435
    iris.data = iris.data[perm]
    3536
    iris.target = iris.target[perm]
    3637

    @@ -248,7 +249,7 @@ def test_check_inputs_predict():
    248249
    assert_raises(ValueError, clf.predict, x)
    249250

    250251
    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    251-
    clf.fit(X, np.random.rand(len(X)))
    252+
    clf.fit(X, rng.rand(len(X)))
    252253

    253254
    x = np.array([1.0, 2.0])[:, np.newaxis]
    254255
    assert_raises(ValueError, clf.predict, x)
    @@ -312,6 +313,6 @@ def test_degenerate_targets():
    312313

    313314
    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    314315
    clf.fit(X, np.ones(len(X)))
    315-
    clf.predict(np.random.rand(2))
    316+
    clf.predict(rng.rand(2))
    316317
    assert_array_equal(np.ones((1,), dtype=np.float64),
    317-
    clf.predict(np.random.rand(2)))
    318+
    clf.predict(rng.rand(2)))

    sklearn/feature_selection/tests/test_feature_select.py

    Lines changed: 5 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -19,8 +19,9 @@
    1919

    2020
    def test_f_oneway_vs_scipy_stats():
    2121
    """Test that our f_oneway gives the same result as scipy.stats"""
    22-
    X1 = np.random.randn(10, 3)
    23-
    X2 = 1 + np.random.randn(10, 3)
    22+
    rng = np.random.RandomState(0)
    23+
    X1 = rng.randn(10, 3)
    24+
    X2 = 1 + rng.randn(10, 3)
    2425
    f, pv = stats.f_oneway(X1, X2)
    2526
    f2, pv2 = f_oneway(X1, X2)
    2627
    assert_true(np.allclose(f, f2))
    @@ -67,8 +68,8 @@ def test_f_regression_input_dtype():
    6768
    Test whether f_regression returns the same value
    6869
    for any numeric data_type
    6970
    """
    70-
    71-
    X = np.random.rand(10, 20)
    71+
    rng = np.random.RandomState(0)
    72+
    X = rng.rand(10, 20)
    7273
    y = np.arange(10).astype(np.int)
    7374

    7475
    F1, pv1 = f_regression(X, y)

    sklearn/hmm.py

    Lines changed: 3 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -135,7 +135,7 @@ def __init__(self, n_components=1, startprob=None, transmat=None,
    135135
    self._algorithm = algorithm
    136136
    else:
    137137
    self._algorithm = "viterbi"
    138-
    self.random_state = random_state
    138+
    self.random_state = check_random_state(random_state)
    139139

    140140
    def eval(self, obs):
    141141
    """Compute the log probability under the model and compute posteriors
    @@ -900,8 +900,8 @@ def _init(self, obs, params='ste'):
    900900
    super(MultinomialHMM, self)._init(obs, params=params)
    901901

    902902
    if 'e' in params:
    903-
    emissionprob = normalize(np.random.rand(self.n_components,
    904-
    self.n_symbols), 1)
    903+
    emissionprob = normalize(self.random_state.rand(self.n_components,
    904+
    self.n_symbols), 1)
    905905
    self.emissionprob_ = emissionprob
    906906

    907907
    def _initialize_sufficient_statistics(self):

    sklearn/linear_model/tests/test_logistic.py

    Lines changed: 4 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -77,7 +77,8 @@ def test_predict_iris():
    7777

    7878
    def test_inconsistent_input():
    7979
    """Test that an exception is raised on inconsistent input"""
    80-
    X_ = np.random.random((5, 10))
    80+
    rng = np.random.RandomState(0)
    81+
    X_ = rng.random_sample((5, 10))
    8182
    y_ = np.ones(X_.shape[0])
    8283

    8384
    clf = logistic.LogisticRegression()
    @@ -87,9 +88,8 @@ def test_inconsistent_input():
    8788
    assert_raises(ValueError, clf.fit, X, y_wrong)
    8889

    8990
    # Wrong dimensions for test data
    90-
    assert_raises(ValueError,
    91-
    clf.fit(X_, y_).predict,
    92-
    np.random.random((3, 12)))
    91+
    assert_raises(ValueError, clf.fit(X_, y_).predict,
    92+
    rng.random_sample((3, 12)))
    9393

    9494

    9595
    @raises(ValueError)

    sklearn/linear_model/tests/test_ridge.py

    Lines changed: 12 additions & 15 deletions
    Original file line numberDiff line numberDiff line change
    @@ -17,11 +17,11 @@
    1717

    1818
    from sklearn.cross_validation import KFold
    1919

    20+
    rng = np.random.RandomState(0)
    2021
    diabetes = datasets.load_diabetes()
    21-
    2222
    X_diabetes, y_diabetes = diabetes.data, diabetes.target
    2323
    ind = np.arange(X_diabetes.shape[0])
    24-
    np.random.shuffle(ind)
    24+
    rng.shuffle(ind)
    2525
    ind = ind[:200]
    2626
    X_diabetes, y_diabetes = X_diabetes[ind], y_diabetes[ind]
    2727

    @@ -30,8 +30,6 @@
    3030
    X_iris = sp.csr_matrix(iris.data)
    3131
    y_iris = iris.target
    3232

    33-
    np.random.seed(0)
    34-
    3533
    DENSE_FILTER = lambda X: X
    3634
    SPARSE_FILTER = lambda X: sp.csr_matrix(X)
    3735

    @@ -46,8 +44,8 @@ def test_ridge():
    4644

    4745
    # With more samples than features
    4846
    n_samples, n_features = 6, 5
    49-
    y = np.random.randn(n_samples)
    50-
    X = np.random.randn(n_samples, n_features)
    47+
    y = rng.randn(n_samples)
    48+
    X = rng.randn(n_samples, n_features)
    5149

    5250
    ridge = Ridge(alpha=alpha)
    5351
    ridge.fit(X, y)
    @@ -59,8 +57,8 @@ def test_ridge():
    5957

    6058
    # With more features than samples
    6159
    n_samples, n_features = 5, 10
    62-
    y = np.random.randn(n_samples)
    63-
    X = np.random.randn(n_samples, n_features)
    60+
    y = rng.randn(n_samples)
    61+
    X = rng.randn(n_samples, n_features)
    6462
    ridge = Ridge(alpha=alpha)
    6563
    ridge.fit(X, y)
    6664
    assert_greater(ridge.score(X, y), .9)
    @@ -73,8 +71,8 @@ def test_ridge_shapes():
    7371
    """Test shape of coef_ and intercept_
    7472
    """
    7573
    n_samples, n_features = 5, 10
    76-
    X = np.random.randn(n_samples, n_features)
    77-
    y = np.random.randn(n_samples)
    74+
    X = rng.randn(n_samples, n_features)
    75+
    y = rng.randn(n_samples)
    7876
    Y1 = y[:, np.newaxis]
    7977
    Y = np.c_[y, 1 + y]
    8078

    @@ -97,8 +95,8 @@ def test_ridge_intercept():
    9795
    """Test intercept with multiple targets GH issue #708
    9896
    """
    9997
    n_samples, n_features = 5, 10
    100-
    X = np.random.randn(n_samples, n_features)
    101-
    y = np.random.randn(n_samples)
    98+
    X = rng.randn(n_samples, n_features)
    99+
    y = rng.randn(n_samples)
    102100
    Y = np.c_[y, 1. + y]
    103101

    104102
    ridge = Ridge()
    @@ -140,9 +138,8 @@ def test_ridge_vs_lstsq():
    140138

    141139
    # we need more samples than features
    142140
    n_samples, n_features = 5, 4
    143-
    np.random.seed(0)
    144-
    y = np.random.randn(n_samples)
    145-
    X = np.random.randn(n_samples, n_features)
    141+
    y = rng.randn(n_samples)
    142+
    X = rng.randn(n_samples, n_features)
    146143

    147144
    ridge = Ridge(alpha=0., fit_intercept=False)
    148145
    ols = LinearRegression(fit_intercept=False)

    0 commit comments

    Comments
     (0)
    0