8000 BUG: random: Fix long delays/hangs with zipf(a) when a near 1. · numpy/numpy@f5a5e04 · GitHub
[go: up one dir, main page]

Skip to content

Commit f5a5e04

Browse files
BUG: random: Fix long delays/hangs with zipf(a) when a near 1.
Closes gh-9829.
1 parent e214a12 commit f5a5e04

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

numpy/random/src/distributions/distributions.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,7 @@ int64_t random_geometric(bitgen_t *bitgen_state, double p) {
998998
}
999999

10001000
RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
1001-
double am1, b;
1001+
double am1, b, Umin;
10021002

10031003
if (a >= 1025) {
10041004
/*
@@ -1011,10 +1011,21 @@ RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
10111011
}
10121012
am1 = a - 1.0;
10131013
b = pow(2.0, am1);
1014+
/*
1015+
* In the while loop, X is generated from the uniform distribution (Umin, 1].
1016+
* Values below Umin would result in X being rejected because it is too
1017+
* large, so there is no point in including them in the distribution of U.
1018+
*/
1019+
Umin = pow(RAND_INT_MAX, -am1);
10141020
while (1) {
1015-
double T, U, V, X;
1021+
double U01, T, U, V, X;
10161022

1017-
U = 1.0 - next_double(bitgen_state);
1023+
/*
1024+
* U is sampled from (Umin, 1]. Note that Umin might be 0, and we don't
1025+
* want U to be 0.
1026+
*/
1027+
U01 = next_double(bitgen_state);
1028+
U = U01*Umin + (1 - U01);
10181029
V = next_double(bitgen_state);
10191030
X = floor(pow(U, -1.0 / am1));
10201031
/*

numpy/random/tests/test_generator_mt19937_regressions.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,14 @@ def test_zipf_large_parameter(self):
170170
n = 8
171171
sample = self.mt19937.zipf(10000, size=n)
172172
assert_array_equal(sample, np.ones(n, dtype=np.int64))
173+
174+
def test_zipf_a_near_1(self):
175+
# Regression test for gh-9829: a call such as rng.zipf(1.0000000000001)
176+
# would hang.
177+
n = 100000
178+
sample = self.mt19937.zipf(1.0000000000001, size=n)
179+
# Not much of a test, but let's do something more than verify that
180+
# it doesn't hang. Certainly for a monotonically decreasing
181+
# discrete distribution truncated to signed 64 bit integers, more
182+
# than half should be less than 2**62.
183+
assert np.count_nonzero(sample < 2**62) > n/2

0 commit comments

Comments
 (0)
0