8000 ENH Replaced RandomState.rand with equivalent uniform by Micky774 · Pull Request #22327 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

ENH Replaced RandomState.rand with equivalent uniform #22327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 27 additions & 25 deletions sklearn/datasets/_samples_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ def make_classification(
centroids *= 2 * class_sep
centroids -= class_sep
if not hypercube:
centroids *= generator.rand(n_clusters, 1)
centroids *= generator.rand(1, n_informative)
centroids *= generator.uniform(size=(n_clusters, 1))
centroids *= generator.uniform(size=(1, n_informative))

# Initially draw informative features from the standard normal
X[:, :n_informative] = generator.standard_normal(size=(n_samples, n_informative))
Expand All @@ -239,22 +239,22 @@ def make_classification(
y[start:stop] = k % n_classes # assign labels
X_k = X[start:stop, :n_informative] # slice a view of the cluster

A = 2 * generator.rand(n_informative, n_informative) - 1
A = 2 * generator.uniform(size=(n_informative, n_informative)) - 1
X_k[...] = np.dot(X_k, A) # introduce random covariance

X_k += centroid # shift the cluster to a vertex

# Create redundant features
if n_redundant > 0:
B = 2 * generator.rand(n_informative, n_redundant) - 1
B = 2 * generator.uniform(size=(n_informative, n_redundant)) - 1
X[:, n_informative : n_informative + n_redundant] = np.dot(
X[:, :n_informative], B
)

# Repeat some features
if n_repeated > 0:
n = n_informative + n_redundant
indices = ((n - 1) * generator.rand(n_repeated) + 0.5).astype(np.intp)
indices = ((n - 1) * generator.uniform(size=n_repeated) + 0.5).astype(np.intp)
X[:, n : n + n_repeated] = X[:, indices]

# Fill useless features
Expand All @@ -263,16 +263,16 @@ def make_classification(

# Randomly replace labels
if flip_y >= 0.0:
flip_mask = generator.rand(n_samples) < flip_y
flip_mask = generator.uniform(size=n_samples) < flip_y
y[flip_mask] = generator.randint(n_classes, size=flip_mask.sum())

# Randomly shift and scale
if shift is None:
shift = (2 * generator.rand(n_features) - 1) * class_sep
shift = (2 * generator.uniform(size=n_features) - 1) * class_sep
X += shift

if scale is None:
scale = 1 + 100 * generator.rand(n_features)
scale = 1 + 100 * generator.uniform(size=n_features)
X *= scale

if shuffle:
Expand Down Expand Up @@ -391,10 +391,10 @@ def make_multilabel_classification(
)

generator = check_random_state(random_state)
p_c = generator.rand(n_classes)
p_c = generator.uniform(size=n_classes)
p_c /= p_c.sum()
cumulative_p_c = np.cumsum(p_c)
p_w_c = generator.rand(n_features, n_classes)
p_w_c = generator.uniform(size=(n_features, n_classes))
p_w_c /= np.sum(p_w_c, axis=0)

def sample_example():
Expand All @@ -409,7 +409,7 @@ def sample_example():
y = set()
while len(y) != y_size:
# pick a class with probability P(c)
c = np.searchsorted(cumulative_p_c, generator.rand(y_size - len(y)))
c = np.searchsorted(cumulative_p_c, generator.uniform(size=y_size - len(y)))
y.update(c)
y = list(y)

Expand All @@ -427,7 +427,7 @@ def sample_example():
# sample words with replacement from selected classes
cumulative_p_w_sample = p_w_c.take(y, axis=1).sum(axis=1).cumsum()
cumulative_p_w_sample /= cumulative_p_w_sample[-1]
words = np.searchsorted(cumulative_p_w_sample, generator.rand(n_words))
words = np.searchsorted(cumulative_p_w_sample, generator.uniform(size=n_words))
return words, y

X_indices = array.array("i")
Expand Down Expand Up @@ -611,7 +611,9 @@ def make_regression(
# zeros (the other features are not correlated to y and should be ignored
# by a sparsifying regularizers such as L1 or elastic net)
ground_truth = np.zeros((n_features, n_targets))
ground_truth[:n_informative, :] = 100 * generator.rand(n_informative, n_targets)
ground_truth[:n_informative, :] = 100 * generator.uniform(
size=(n_informative, n_targets)
)

y = np.dot(X, ground_truth) + bias

Expand Down Expand Up @@ -1016,7 +1018,7 @@ def make_friedman1(n_samples=100, n_features=10, *, noise=0.0, random_state=None

generator = check_random_state(random_state)

X = generator.rand(n_samples, n_features)
X = generator.uniform(size=(n_samples, n_features))
y = (
10 * np.sin(np.pi * X[:, 0] * X[:, 1])
+ 20 * (X[:, 2] - 0.5) ** 2
Expand Down Expand Up @@ -1079,7 +1081,7 @@ def make_friedman2(n_samples=100, *, noise=0.0, random_state=None):
"""
generator = check_random_state(random_state)

X = generator.rand(n_samples, 4)
X = generator.uniform(size=(n_samples, 4))
X[:, 0] *= 100
X[:, 1] *= 520 * np.pi
X[:, 1] += 40 * np.pi
Expand Down Expand Up @@ -1144,7 +1146,7 @@ def make_friedman3(n_samples=100, *, noise=0.0, random_state=None):
"""
generator = check_random_state(random_state)

X = generator.rand(n_samples, 4)
X = generator.uniform(size=(n_samples, 4))
X[:, 0] *= 100
X[:, 1] *= 520 * np.pi
X[:, 1] += 40 * np.pi
Expand Down Expand Up @@ -1380,9 +1382,9 @@ def make_spd_matrix(n_dim, *, random_state=None):
"""
generator = check_random_state(random_state)

A = generator.rand(n_dim, n_dim)
A = generator.uniform(size=(n_dim, n_dim))
U, _, Vt = linalg.svd(np.dot(A.T, A), check_finite=False)
X = np.dot(np.dot(U, 1.0 + np.diag(generator.rand(n_dim))), Vt)
X = np.dot(np.dot(U, 1.0 + np.diag(generator.uniform(size=n_dim))), Vt)

return X

Expand Down Expand Up @@ -1442,11 +1444,11 @@ def make_sparse_spd_matrix(
random_state = check_random_state(random_state)

chol = -np.eye(dim)
aux = random_state.rand(dim, dim)
aux = random_state.uniform(size=(dim, dim))
aux[aux < alpha] = 0
aux[aux > alpha] = smallest_coef + (
largest_coef - smallest_coef
) * random_state.rand(np.sum(aux > alpha))
) * random_state.uniform(size=np.sum(aux > alpha))
aux = np.tril(aux, k=-1)

# Permute the lines: we don't want to have asymmetries in the final
Expand Down Expand Up @@ -1510,15 +1512,15 @@ def make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None, hole=False):
generator = check_random_state(random_state)

if not hole:
t = 1.5 * np.pi * (1 + 2 * generator.rand(n_samples))
y = 21 * generator.rand(n_samples)
t = 1.5 * np.pi * (1 + 2 * generator.uniform(size=n_samples))
y = 21 * generator.uniform(size=n_samples)
else:
corners = np.array(
[[np.pi * (1.5 + i), j * 7] for i in range(3) for j in range(3)]
)
corners = np.delete(corners, 4, axis=0)
corner_index = generator.choice(8, n_samples)
parameters = generator.rand(2, n_samples) * np.array([[np.pi], [7]])
parameters = generator.uniform(size=(2, n_samples)) * np.array([[np.pi], [7]])
t, y = corners[corner_index].T + parameters

x = t * np.cos(t)
Expand Down Expand Up @@ -1561,9 +1563,9 @@ def make_s_curve(n_samples=100, *, noise=0.0, random_state=None):
"""
generator = check_random_state(random_state)

t = 3 * np.pi * (generator.rand(1, n_samples) - 0.5)
t = 3 * np.pi * (generator.uniform(size=(1, n_samples)) - 0.5)
x = np.sin(t)
y = 2.0 * generator.rand(1, n_samples)
y = 2.0 * generator.uniform(size=(1, n_samples))
z = np.sign(t) * (np.cos(t) - 1)

X = np.concatenate((x, y, z))
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/_gradient_boosting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def _random_sample_mask(np.npy_intp n_total_samples,
the others are ``False``.
"""
cdef np.ndarray[float64, ndim=1, mode="c"] rand = \
random_state.rand(n_total_samples)
random_state.uniform(size=n_total_samples)
cdef np.ndarray[uint8, ndim=1, mode="c", cast=True] sample_mask = \
np_zeros((n_total_samples,), dtype=bool)

Expand Down
2 changes: 1 addition & 1 deletion sklearn/manifold/_mds.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _smacof_single(
sim_flat_w = sim_flat[sim_flat != 0]
if init is None:
# Randomly choose initial configuration
X = random_state.rand(n_samples * n_components)
X = random_state.uniform(size=n_samples * n_components)
X = X.reshape((n_samples, n_components))
else:
# overrides the parameter p
Expand Down
2 changes: 1 addition & 1 deletion sklearn/mixture/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _initialize_parameters(self, X, random_state):
)
resp[np.arange(n_samples), label] = 1
elif self.init_params == "random":
resp = random_state.rand(n_samples, self.n_components)
resp = random_state.uniform(size=(n_samples, self.n_components))
resp /= resp.sum(axis=1)[:, np.newaxis]
else:
raise ValueError(
Expand Down
6 changes: 3 additions & 3 deletions 6 sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,11 +798,11 @@ def _generate_sparse_matrix(X_csr):

def check_estimator_sparse_data(name, estimator_orig):
rng = np.random.RandomState(0)
X = rng.rand(40, 3)
X = rng.uniform(size=(40, 3))
X[X < 0.8] = 0
X = _pairwise_estimator_convert_X(X, estimator_orig)
X_csr = sparse.csr_matrix(X)
y = (4 * rng.rand(40)).astype(int)
y = (4 * rng.uniform(size=40)).astype(int)
# catch deprecation warnings
with ignore_warnings(category=FutureWarning):
estimator = clone(estimator_orig)
Expand Down Expand Up @@ -1088,7 +1088,7 @@ def check_sample_weights_not_overwritten(name, estimator_orig):
def check_dtype_object(name, estimator_orig):
# check that estimators treat dtype object as numeric if possible
rng = np.random.RandomState(0)
X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
X = _pairwise_estimator_convert_X(rng.uniform(size=(40, 10)), estimator_orig)
X = X.astype(object)
tags = _safe_tags(estimator_orig)
y = (X[:, 0] * 4).astype(int)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _random_choice_csc(n_samples, classes, class_probability=None, random_state=
class_probability_nz
)
classes_ind = np.searchsorted(
class_probability_nz_norm.cumsum(), rng.rand(nnz)
class_probability_nz_norm.cumsum(), rng.uniform(size=nnz)
)
data.extend(classes[j][classes_j_nonzero][classes_ind])
indptr.append(len(indices))
Expand Down
0