8000 BUG: random: Fix long delays/hangs with zipf(a) when a near 1. by WarrenWeckesser · Pull Request #27048 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG: random: Fix long delays/hangs with zipf(a) when a near 1. #27048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions numpy/random/src/distributions/distributions.c
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ int64_t random_geometric(bitgen_t *bitgen_state, double p) {
}

RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
double am1, b;
double am1, b, Umin;

if (a >= 1025) {
/*
Expand All @@ -1011,10 +1011,21 @@ RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
}
am1 = a - 1.0;
b = pow(2.0, am1);
/*
* In the while loop, X is generated from the uniform distribution (Umin, 1].
* Values below Umin would result in X being rejected because it is too
* large, so there is no point in including them in the distribution of U.
*/
Umin = pow(RAND_INT_MAX, -am1);
while (1) {
double T, U, V, X;
double U01, T, U, V, X;

U = 1.0 - next_double(bitgen_state);
/*
* U is sampled from (Umin, 1]. Note that Umin might be 0, and we don't
* want U to be 0.
*/
U01 = next_double(bitgen_state);
U = U01*Umin + (1 - U01);
V = next_double(bitgen_state);
X = floor(pow(U, -1.0 / am1));
/*
Expand Down
26 changes: 25 additions & 1 deletion numpy/random/src/legacy/legacy-distributions.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,31 @@ int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) {
}

int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) {
return (int64_t)random_zipf(bitgen_state, a);
double am1, b;

am1 = a - 1.0;
b = pow(2.0, am1);
while (1) {
double T, U, V, X;

U = 1.0 - next_double(bitgen_state);
V = next_double(bitgen_state);
X = floor(pow(U, -1.0 / am1));
/*
* The real result may be above what can be represented in a signed
* long. Since this is a straightforward rejection algorithm, we can
* just reject this value. This function then models a Zipf
* distribution truncated to sys.maxint.
*/
if (X > (double)RAND_INT_MAX || X < 1.0) {
continue;
}

T = pow(1.0 + 1.0 / X, am1);
if (V * X * (T - 1.0) / (b - 1.0) <= T / b) {
return (RAND_INT_TYPE)X;
}
}
}


Expand Down
11 changes: 11 additions & 0 deletions numpy/random/tests/test_generator_mt19937_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,14 @@ def test_zipf_large_parameter(self):
n = 8
sample = self.mt19937.zipf(10000, size=n)
assert_array_equal(sample, np.ones(n, dtype=np.int64))

def test_zipf_a_near_1(self):
# Regression test for gh-9829: a call such as rng.zipf(1.0000000000001)
# would hang.
n = 100000
sample = self.mt19937.zipf(1.0000000000001, size=n)
# Not much of a test, but let's do something more than verify that
# it doesn't hang. Certainly for a monotonically decreasing
# discrete distribution truncated to signed 64 bit integers, more
# than half should be less than 2**62.
assert np.count_nonzero(sample < 2**62) > n/2
Loading
0