From 3477a9f45817b1c8f6663f66ea17079e99a02b02 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 8 Jul 2022 23:16:04 -0500 Subject: [PATCH 01/19] First draft. Need to check against expected dists, including extreme values. --- Lib/random.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/Lib/random.py b/Lib/random.py index 2166474af0554b..5e934a53476043 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -24,6 +24,7 @@ negative exponential gamma beta + binomial pareto Weibull @@ -49,6 +50,7 @@ from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin from math import tau as TWOPI, floor as _floor, isfinite as _isfinite +from math import lgamma as _lgamma, fabs as _fabs from os import urandom as _urandom from _collections_abc import Sequence as _Sequence from operator import index as _index @@ -68,6 +70,7 @@ "Random", "SystemRandom", "betavariate", + "binomialvariate", "choice", "choices", "expovariate", @@ -99,6 +102,10 @@ RECIP_BPF = 2 ** -BPF _ONE = 1 +def _logfact(n): + "Return log(n!)" + return _lgamma(n + 1) + class Random(_random.Random): """Random number generator base class used by bound module functions. @@ -725,6 +732,83 @@ def betavariate(self, alpha, beta): return y / (y + self.gammavariate(beta, 1.0)) return 0.0 + + def binomialvariate(self, n=1, p=0.5): + """Binomial random variable. + + Gives the number of successes for *n* Bernoulli trials + with the probability of success in each trial being *p*. + + Returns an integer in the range: 0 <= X <= n + + """ + # Error check inputs and handle edge cases + if n < 0: + raise ValueError("n must be non-negative") + if p <= 0.0 or p >= 1.0: + if p == 0.0: + return 0 + if p == 1.0: + return n + raise ValueError("p must be in the range 0.0 <= p <= 1.0") + + random = self.random + + # Fast path for a common case + if n == 1: + return _index(random() < p) + + # Exploit symmetry to establish: p <= 0.5 + if p > 0.5: + return n - self.binomialvariate(n, 1.0 - p) + + if n * p < 10.0: + # BG: Geometric method by Devroye with running time of O(np). + # https://dl.acm.org/doi/pdf/10.1145/42372.42381 + x = y = 0 + c = _log(1.0 - p) + if not c: + return x + while True: + y += _floor(_log(random()) / c) + 1 + if y >= n: + return x + x += 1 + + # BTRS: Transformed rejection with squeeze method + # See "The Generation of Binomial Random Variates" by Wolfgang Hörmann + # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf + # Assumes n*p >= 10 and p <= 0.5 + + spq = _sqrt(n * p * (1.0 - p)) + b = 1.15 + 2.53 * spq + a = -0.0873 + 0.0248 * b + 0.01 * p + c = n * p + 0.5 + vr = 0.92 - 4.2 / b + + while True: + + u = random() + v = random() + u -= 0.5 + us = 0.5 - _fabs(u) + k = _floor((2.0 * a / us + b) * u + c) + + if us >= 0.07 and v <= vr: + return k + if k < 0 or k > n: + continue + + alpha = (2.83 + 5.1 / b) * spq + lpq = _log(p / (1.0 - p)) + m = _floor((n + 1) * p) + h = _logfact(m) + _logfact(n - m) + + v *= alpha / (a / (us * us) + b) + if v <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: + return k + + def paretovariate(self, alpha): """Pareto distribution. alpha is the shape parameter.""" # Jain, pg. 495 @@ -810,6 +894,7 @@ def _notimplemented(self, *args, **kwds): gammavariate = _inst.gammavariate gauss = _inst.gauss betavariate = _inst.betavariate +binomialvariate = _inst.binomialvariate paretovariate = _inst.paretovariate weibullvariate = _inst.weibullvariate getstate = _inst.getstate From 38ba9e66fab7930692555c9b6739a7cbff3f206f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 02:02:16 -0500 Subject: [PATCH 02/19] Get distribution correct for BG --- Lib/random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/random.py b/Lib/random.py index 5e934a53476043..8e036498f0666b 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -771,7 +771,7 @@ def binomialvariate(self, n=1, p=0.5): return x while True: y += _floor(_log(random()) / c) + 1 - if y >= n: + if y > n: return x x += 1 From 1e4e7ab809410ecf00f82ad69b30bc28bbd29b68 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 09:03:48 -0500 Subject: [PATCH 03/19] Fix log() call missing from original paper --- Lib/random.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 8e036498f0666b..6f25f0d9eb7232 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -778,7 +778,7 @@ def binomialvariate(self, n=1, p=0.5): # BTRS: Transformed rejection with squeeze method # See "The Generation of Binomial Random Variates" by Wolfgang Hörmann # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf - # Assumes n*p >= 10 and p <= 0.5 + assert n*p >= 10.0 and p <= 0.5 spq = _sqrt(n * p * (1.0 - p)) b = 1.15 + 2.53 * spq @@ -794,17 +794,18 @@ def binomialvariate(self, n=1, p=0.5): us = 0.5 - _fabs(u) k = _floor((2.0 * a / us + b) * u + c) - if us >= 0.07 and v <= vr: - return k if k < 0 or k > n: continue + if us >= 0.07 and v <= vr: + return k alpha = (2.83 + 5.1 / b) * spq lpq = _log(p / (1.0 - p)) m = _floor((n + 1) * p) h = _logfact(m) + _logfact(n - m) - v *= alpha / (a / (us * us) + b) + # Original paper errorneously omits the call to log() + v = _log(v * alpha / (a / (us * us) + b)) if v <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: return k From b224ef23cafa202f9646bd309d7bee24299f7393 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 09:14:39 -0500 Subject: [PATCH 04/19] Loop invariant code motion --- Lib/random.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 6f25f0d9eb7232..785b72b8489f4d 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -785,6 +785,10 @@ def binomialvariate(self, n=1, p=0.5): a = -0.0873 + 0.0248 * b + 0.01 * p c = n * p + 0.5 vr = 0.92 - 4.2 / b + alpha = (2.83 + 5.1 / b) * spq + lpq = _log(p / (1.0 - p)) + m = _floor((n + 1) * p) + h = _logfact(m) + _logfact(n - m) while True: @@ -799,11 +803,6 @@ def binomialvariate(self, n=1, p=0.5): if us >= 0.07 and v <= vr: return k - alpha = (2.83 + 5.1 / b) * spq - lpq = _log(p / (1.0 - p)) - m = _floor((n + 1) * p) - h = _logfact(m) + _logfact(n - m) - # Original paper errorneously omits the call to log() v = _log(v * alpha / (a / (us * us) + b)) if v <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: From f806df3da119bd7239d626b896148200d75cd124 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 09:45:20 -0500 Subject: [PATCH 05/19] Add section comments to link back to original text --- Lib/random.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/random.py b/Lib/random.py index 785b72b8489f4d..f5435d96604cba 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -780,11 +780,14 @@ def binomialvariate(self, n=1, p=0.5): # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf assert n*p >= 10.0 and p <= 0.5 + # Step 0: Setup for step 1 spq = _sqrt(n * p * (1.0 - p)) b = 1.15 + 2.53 * spq a = -0.0873 + 0.0248 * b + 0.01 * p c = n * p + 0.5 vr = 0.92 - 4.2 / b + + # Step 3.0: Setup for 3.1 alpha = (2.83 + 5.1 / b) * spq lpq = _log(p / (1.0 - p)) m = _floor((n + 1) * p) @@ -792,17 +795,20 @@ def binomialvariate(self, n=1, p=0.5): while True: + # Step 1: Generate two uniform random numbers u = random() v = random() u -= 0.5 us = 0.5 - _fabs(u) k = _floor((2.0 * a / us + b) * u + c) + # Step 2: Skip invalid k and test bounding box if k < 0 or k > n: continue if us >= 0.07 and v <= vr: return k + # Step 3.1: Acceptance-rejection test # Original paper errorneously omits the call to log() v = _log(v * alpha / (a / (us * us) + b)) if v <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: From d6a3ddf51099a2c9598a9dc0f6759aa8361420c1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 11:19:46 -0500 Subject: [PATCH 06/19] Add comment to help relate to the paper --- Lib/random.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index f5435d96604cba..ab295688b0af3a 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -781,16 +781,16 @@ def binomialvariate(self, n=1, p=0.5): assert n*p >= 10.0 and p <= 0.5 # Step 0: Setup for step 1 - spq = _sqrt(n * p * (1.0 - p)) - b = 1.15 + 2.53 * spq + spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution + b = 1.15 + 2.53 * spq # Dominating density function parameters a = -0.0873 + 0.0248 * b + 0.01 * p c = n * p + 0.5 vr = 0.92 - 4.2 / b - # Step 3.0: Setup for 3.1 + # Step 3.0: Setup for step 3.1 alpha = (2.83 + 5.1 / b) * spq - lpq = _log(p / (1.0 - p)) - m = _floor((n + 1) * p) + lpq = _log(p / (1.0 - p)) # Log of p / q ratio + m = _floor((n + 1) * p) # Mode of the distribution h = _logfact(m) + _logfact(n - m) while True: @@ -802,16 +802,18 @@ def binomialvariate(self, n=1, p=0.5): us = 0.5 - _fabs(u) k = _floor((2.0 * a / us + b) * u + c) - # Step 2: Skip invalid k and test bounding box + # Step 2: Skip over invalid k due to numeric issues. + # Then make the bounding box test. if k < 0 or k > n: continue if us >= 0.07 and v <= vr: return k # Step 3.1: Acceptance-rejection test - # Original paper errorneously omits the call to log() - v = _log(v * alpha / (a / (us * us) + b)) - if v <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: + # Original paper errorneously omits the call to log(v) which + # is needed because we're comparing to the log factorials. + v *= alpha / (a / (us * us) + b) + if _log(v) <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: return k From c9d3ca851d900863e337d1a3757cbe939f7eecab Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 14:06:40 -0500 Subject: [PATCH 07/19] More elaborate comments --- Lib/random.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index ab295688b0af3a..fb6e60e3f09ff3 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -737,7 +737,9 @@ def binomialvariate(self, n=1, p=0.5): """Binomial random variable. Gives the number of successes for *n* Bernoulli trials - with the probability of success in each trial being *p*. + with the probability of success in each trial being *p*: + + sum(random() < p for i in range(n)) Returns an integer in the range: 0 <= X <= n @@ -802,16 +804,21 @@ def binomialvariate(self, n=1, p=0.5): us = 0.5 - _fabs(u) k = _floor((2.0 * a / us + b) * u + c) - # Step 2: Skip over invalid k due to numeric issues. - # Then make the bounding box test. + # Step 2: Skip over invalid k arising due to numeric issues. if k < 0 or k > n: continue + + # This early-out "squeeze" test substantially reduces the + # number of acceptance condition evaluations. Checks to see + # whether *us* and *vr* lie in the large rectangle between + # the u-axis and the curve. if us >= 0.07 and v <= vr: return k - # Step 3.1: Acceptance-rejection test - # Original paper errorneously omits the call to log(v) which - # is needed because we're comparing to the log factorials. + # Step 3.1: Acceptance-rejection test. + # N.B. The original paper errorneously omits the call to + # log(v) which is needed because we're comparing to the log + # of the rescaled binomial distribution. v *= alpha / (a / (us * us) + b) if _log(v) <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: return k From 8e24e221e0aa99a2798d06749a4ee3d04ef8eef1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 15:12:33 -0500 Subject: [PATCH 08/19] Add documentation --- Doc/library/random.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 613fbce0fdf20d..e71ecd1f4939e6 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -258,6 +258,26 @@ Functions for sequences The *population* must be a sequence. Automatic conversion of sets to lists is no longer supported. +Discrete distributions +---------------------- + +The following function generates a discrete distribution. + +.. function:: binomial(n=1, p=0.5) + + Return the number of successes for *n* independent trials with the + probability of success in each trial being *p*: + + Roughly equivalent to:: + + sum(random() < p for i in range(n)) + + The number of trials *n* should be a non-negative integer. + The probability of success *p* should be between ``0.0 <= p <= 1.0``. + The result is an integer in the range ``0 <= X <= n``. + + .. versionadded:: 3.12 + .. _real-valued-distributions: From 2b2e589f66b402584ec1e0af384e3ac921358487 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 15:17:23 -0500 Subject: [PATCH 09/19] Add blurb --- .../next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst diff --git a/Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst b/Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst new file mode 100644 index 00000000000000..b4ccea4924ff67 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst @@ -0,0 +1 @@ +Add random.binomialvariate(). From 198904b45dce5604e1a090f883e999e5e196f303 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 16:22:09 -0500 Subject: [PATCH 10/19] Add unittests --- Lib/test/test_random.py | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index fcf17a949c2a62..2c4144fede1c15 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -1045,6 +1045,9 @@ def test_constant(self): (g.lognormvariate, (0.0, 0.0), 1.0), (g.lognormvariate, (-float('inf'), 0.0), 0.0), (g.normalvariate, (10.0, 0.0), 10.0), + (g.binomialvariate, (0, 0.5), 0), + (g.binomialvariate, (10, 0.0), 0), + (g.binomialvariate, (10, 1.0), 10), (g.paretovariate, (float('inf'),), 1.0), (g.weibullvariate, (10.0, float('inf')), 10.0), (g.weibullvariate, (0.0, 10.0), 0.0), @@ -1052,6 +1055,55 @@ def test_constant(self): for i in range(N): self.assertEqual(variate(*args), expected) + def test_binomialvariate(self): + bv = random.binomialvariate + + # Cover all the code paths + with self.assertRaises(ValueError): + bv(n=-1) # Negative n + with self.assertRaises(ValueError): + bv(n=1, p=-0.5) # Negative p + with self.assertRaises(ValueError): + bv(n=1, p=1.5) # p > 1.0 + self.assertEqual(bv(10, 0.0), 0) # p == 0.0 + self.assertEqual(bv(10, 1.0), 10) # p == 1.0 + self.assertTrue(bv(1, 0.3) in {0, 1}) # n == 1 fast path + self.assertTrue(bv(1, 0.9) in {0, 1}) # n == 1 fast path + self.assertTrue(bv(1, 0.0) in {0}) # n == 1 fast path + self.assertTrue(bv(1, 1.0) in {1}) # n == 1 fast path + + # BG method p <= 0.5 and n*p=1.25 + self.assertTrue(bv(5, 0.25) in set(range(6))) + + # BG method p >= 0.5 and n*(1-p)=1.25 + self.assertTrue(bv(5, 0.75) in set(range(6))) + + # BTRS method p <= 0.5 and n*p=25 + self.assertTrue(bv(100, 0.25) in set(range(101))) + + # BTRS method p > 0.5 and n*(1-p)=25 + self.assertTrue(bv(100, 0.75) in set(range(101))) + + # Statistical tests chosen such that they are + # exceedingly unlikely to ever fail for correct code. + + # BG code path + # Expected dist: [31641, 42188, 21094, 4688, 391] + c = Counter(bv(4, 0.25) for i in range(100_000)) + self.assertTrue(29_641 <= c[0] <= 33_641, c) + self.assertTrue(40_188 <= c[1] <= 44_188) + self.assertTrue(19_094 <= c[2] <= 23_094) + self.assertTrue(2_688 <= c[3] <= 6_688) + self.assertEqual(set(c), {0, 1, 2, 3, 4}) + + # BTRS code path + # Sum of c[20], c[21], c[22], c[23], c[24] expected to be 36,214 + c = Counter(bv(100, 0.25) for i in range(100_000)) + self.assertTrue(34_214 <= c[20]+c[21]+c[22]+c[23]+c[24] <= 38_214) + self.assertTrue(set(c) <= set(range(101))) + self.assertEqual(c.total(), 100_000) + + def test_von_mises_range(self): # Issue 17149: von mises variates were not consistently in the # range [0, 2*PI]. From 9f65a39d82794b447765d8a3691fe13e42618148 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 16:46:33 -0500 Subject: [PATCH 11/19] Add tests for large n --- Lib/test/test_random.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 2c4144fede1c15..3eefd280972b00 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -1103,6 +1103,12 @@ def test_binomialvariate(self): self.assertTrue(set(c) <= set(range(101))) self.assertEqual(c.total(), 100_000) + # Demonstrate the BTRS works for huge values of n + X = bv(100_000_000, 0.2) + self.assertTrue(19_000_000 <= X <= 21_000_000) + X = bv(100_000_000, 0.9) + self.assertTrue(89_000_000 <= X <= 91_000_000) + def test_von_mises_range(self): # Issue 17149: von mises variates were not consistently in the From 8286532936b705caa969909094604bb4d4ab2652 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 9 Jul 2022 16:48:54 -0500 Subject: [PATCH 12/19] Switch short name from "bv" to "B" --- Lib/test/test_random.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 3eefd280972b00..1e825c3572d20a 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -1056,40 +1056,40 @@ def test_constant(self): self.assertEqual(variate(*args), expected) def test_binomialvariate(self): - bv = random.binomialvariate + B = random.binomialvariate # Cover all the code paths with self.assertRaises(ValueError): - bv(n=-1) # Negative n + B(n=-1) # Negative n with self.assertRaises(ValueError): - bv(n=1, p=-0.5) # Negative p + B(n=1, p=-0.5) # Negative p with self.assertRaises(ValueError): - bv(n=1, p=1.5) # p > 1.0 - self.assertEqual(bv(10, 0.0), 0) # p == 0.0 - self.assertEqual(bv(10, 1.0), 10) # p == 1.0 - self.assertTrue(bv(1, 0.3) in {0, 1}) # n == 1 fast path - self.assertTrue(bv(1, 0.9) in {0, 1}) # n == 1 fast path - self.assertTrue(bv(1, 0.0) in {0}) # n == 1 fast path - self.assertTrue(bv(1, 1.0) in {1}) # n == 1 fast path + B(n=1, p=1.5) # p > 1.0 + self.assertEqual(B(10, 0.0), 0) # p == 0.0 + self.assertEqual(B(10, 1.0), 10) # p == 1.0 + self.assertTrue(B(1, 0.3) in {0, 1}) # n == 1 fast path + self.assertTrue(B(1, 0.9) in {0, 1}) # n == 1 fast path + self.assertTrue(B(1, 0.0) in {0}) # n == 1 fast path + self.assertTrue(B(1, 1.0) in {1}) # n == 1 fast path # BG method p <= 0.5 and n*p=1.25 - self.assertTrue(bv(5, 0.25) in set(range(6))) + self.assertTrue(B(5, 0.25) in set(range(6))) # BG method p >= 0.5 and n*(1-p)=1.25 - self.assertTrue(bv(5, 0.75) in set(range(6))) + self.assertTrue(B(5, 0.75) in set(range(6))) # BTRS method p <= 0.5 and n*p=25 - self.assertTrue(bv(100, 0.25) in set(range(101))) + self.assertTrue(B(100, 0.25) in set(range(101))) # BTRS method p > 0.5 and n*(1-p)=25 - self.assertTrue(bv(100, 0.75) in set(range(101))) + self.assertTrue(B(100, 0.75) in set(range(101))) # Statistical tests chosen such that they are # exceedingly unlikely to ever fail for correct code. # BG code path # Expected dist: [31641, 42188, 21094, 4688, 391] - c = Counter(bv(4, 0.25) for i in range(100_000)) + c = Counter(B(4, 0.25) for i in range(100_000)) self.assertTrue(29_641 <= c[0] <= 33_641, c) self.assertTrue(40_188 <= c[1] <= 44_188) self.assertTrue(19_094 <= c[2] <= 23_094) @@ -1098,16 +1098,14 @@ def test_binomialvariate(self): # BTRS code path # Sum of c[20], c[21], c[22], c[23], c[24] expected to be 36,214 - c = Counter(bv(100, 0.25) for i in range(100_000)) + c = Counter(B(100, 0.25) for i in range(100_000)) self.assertTrue(34_214 <= c[20]+c[21]+c[22]+c[23]+c[24] <= 38_214) self.assertTrue(set(c) <= set(range(101))) self.assertEqual(c.total(), 100_000) # Demonstrate the BTRS works for huge values of n - X = bv(100_000_000, 0.2) - self.assertTrue(19_000_000 <= X <= 21_000_000) - X = bv(100_000_000, 0.9) - self.assertTrue(89_000_000 <= X <= 91_000_000) + self.assertTrue(19_000_000 <= B(100_000_000, 0.2) <= 21_000_000) + self.assertTrue(89_000_000 <= B(100_000_000, 0.9) <= 91_000_000) def test_von_mises_range(self): From 40ec6a184e425127444bb18b52b0b7106782863a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 10 Jul 2022 11:02:23 -0500 Subject: [PATCH 13/19] Only perform step 3.0 setup when needed and only once. --- Lib/random.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index fb6e60e3f09ff3..719b5cc164ca19 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -781,6 +781,7 @@ def binomialvariate(self, n=1, p=0.5): # See "The Generation of Binomial Random Variates" by Wolfgang Hörmann # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf assert n*p >= 10.0 and p <= 0.5 + step_three_setup = False # Step 0: Setup for step 1 spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution @@ -789,12 +790,6 @@ def binomialvariate(self, n=1, p=0.5): c = n * p + 0.5 vr = 0.92 - 4.2 / b - # Step 3.0: Setup for step 3.1 - alpha = (2.83 + 5.1 / b) * spq - lpq = _log(p / (1.0 - p)) # Log of p / q ratio - m = _floor((n + 1) * p) # Mode of the distribution - h = _logfact(m) + _logfact(n - m) - while True: # Step 1: Generate two uniform random numbers @@ -815,6 +810,14 @@ def binomialvariate(self, n=1, p=0.5): if us >= 0.07 and v <= vr: return k + if not step_three_setup: + # Step 3.0: Compute constants for step 3.1 + alpha = (2.83 + 5.1 / b) * spq + lpq = _log(p / (1.0 - p)) # Log of p / q ratio + m = _floor((n + 1) * p) # Mode of the distribution + h = _logfact(m) + _logfact(n - m) + step_three_setup = True # Only needs to be done once + # Step 3.1: Acceptance-rejection test. # N.B. The original paper errorneously omits the call to # log(v) which is needed because we're comparing to the log From 6dfcd0c65ef75578bd2d43aa281fc459d39c37c1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 10 Jul 2022 11:14:12 -0500 Subject: [PATCH 14/19] Add binomialvariate to the in-module test code --- Lib/random.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 719b5cc164ca19..2e6ebbd2978f81 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -937,15 +937,17 @@ def _test_generator(n, func, args): low = min(data) high = max(data) - print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}') + print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}{args!r}') print('avg %g, stddev %g, min %g, max %g\n' % (xbar, sigma, low, high)) -def _test(N=2000): +def _test(N=10_000): _test_generator(N, random, ()) _test_generator(N, normalvariate, (0.0, 1.0)) _test_generator(N, lognormvariate, (0.0, 1.0)) _test_generator(N, vonmisesvariate, (0.0, 1.0)) + _test_generator(N, binomialvariate, (15, 0.60)) + _test_generator(N, binomialvariate, (100, 0.75)) _test_generator(N, gammavariate, (0.01, 1.0)) _test_generator(N, gammavariate, (0.1, 1.0)) _test_generator(N, gammavariate, (0.1, 2.0)) From 9ada9c09b1755f20b20e26951f6cd5a1f9d8b041 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 10 Jul 2022 11:27:20 -0500 Subject: [PATCH 15/19] Fix function name in docs. Add a reference link. --- Doc/library/random.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index e71ecd1f4939e6..fee35382042a40 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -263,12 +263,14 @@ Discrete distributions The following function generates a discrete distribution. -.. function:: binomial(n=1, p=0.5) +.. function:: binomialvariate(n=1, p=0.5) + `Binomial distribution + `_. Return the number of successes for *n* independent trials with the probability of success in each trial being *p*: - Roughly equivalent to:: + Mathematically equivalent to:: sum(random() < p for i in range(n)) From 05653a8e9f8c6debf533c203898a33e2401db9d8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 10 Jul 2022 11:46:21 -0500 Subject: [PATCH 16/19] Neated-up comments --- Lib/random.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 2e6ebbd2978f81..7aa37cb20d87eb 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -785,7 +785,7 @@ def binomialvariate(self, n=1, p=0.5): # Step 0: Setup for step 1 spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution - b = 1.15 + 2.53 * spq # Dominating density function parameters + b = 1.15 + 2.53 * spq a = -0.0873 + 0.0248 * b + 0.01 * p c = n * p + 0.5 vr = 0.92 - 4.2 / b @@ -798,30 +798,27 @@ def binomialvariate(self, n=1, p=0.5): u -= 0.5 us = 0.5 - _fabs(u) k = _floor((2.0 * a / us + b) * u + c) - - # Step 2: Skip over invalid k arising due to numeric issues. if k < 0 or k > n: continue - # This early-out "squeeze" test substantially reduces the - # number of acceptance condition evaluations. Checks to see - # whether *us* and *vr* lie in the large rectangle between + # Step 2: This early-out "squeeze" test substantially reduces + # the number of acceptance condition evaluations. Checks to + # see whether *us* and *vr* lie in the large rectangle between # the u-axis and the curve. if us >= 0.07 and v <= vr: return k if not step_three_setup: - # Step 3.0: Compute constants for step 3.1 + # Step 3.0: Set up constants for step 3.1 alpha = (2.83 + 5.1 / b) * spq - lpq = _log(p / (1.0 - p)) # Log of p / q ratio + lpq = _log(p / (1.0 - p)) m = _floor((n + 1) * p) # Mode of the distribution h = _logfact(m) + _logfact(n - m) step_three_setup = True # Only needs to be done once # Step 3.1: Acceptance-rejection test. - # N.B. The original paper errorneously omits the call to - # log(v) which is needed because we're comparing to the log - # of the rescaled binomial distribution. + # N.B. The original paper errorneously omits the call to log(v) + # when comparing to the log of the rescaled binomial distribution. v *= alpha / (a / (us * us) + b) if _log(v) <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: return k From 1b124627d6a3f1ff094870667fdd54a6b80541f3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 12 Jul 2022 18:52:05 -0500 Subject: [PATCH 17/19] Tighten-up comments and improve a variable name --- Lib/random.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 7aa37cb20d87eb..987caae6317719 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -736,7 +736,7 @@ def betavariate(self, alpha, beta): def binomialvariate(self, n=1, p=0.5): """Binomial random variable. - Gives the number of successes for *n* Bernoulli trials + Gives the number of successes for *n* independent trials with the probability of success in each trial being *p*: sum(random() < p for i in range(n)) @@ -777,13 +777,11 @@ def binomialvariate(self, n=1, p=0.5): return x x += 1 - # BTRS: Transformed rejection with squeeze method - # See "The Generation of Binomial Random Variates" by Wolfgang Hörmann + # BTRS: Transformed rejection with squeeze method by Wolfgang Hörmann # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf assert n*p >= 10.0 and p <= 0.5 - step_three_setup = False + setup_complete = False - # Step 0: Setup for step 1 spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution b = 1.15 + 2.53 * spq a = -0.0873 + 0.0248 * b + 0.01 * p @@ -792,7 +790,6 @@ def binomialvariate(self, n=1, p=0.5): while True: - # Step 1: Generate two uniform random numbers u = random() v = random() u -= 0.5 @@ -801,24 +798,20 @@ def binomialvariate(self, n=1, p=0.5): if k < 0 or k > n: continue - # Step 2: This early-out "squeeze" test substantially reduces - # the number of acceptance condition evaluations. Checks to - # see whether *us* and *vr* lie in the large rectangle between - # the u-axis and the curve. + # The early-out "squeeze" test substantially reduces + # the number of acceptance condition evaluations. if us >= 0.07 and v <= vr: return k - if not step_three_setup: - # Step 3.0: Set up constants for step 3.1 + # Acceptance-rejection test. + # Note, the original paper errorneously omits the call to log(v) + # when comparing to the log of the rescaled binomial distribution. + if not setup_complete: alpha = (2.83 + 5.1 / b) * spq lpq = _log(p / (1.0 - p)) m = _floor((n + 1) * p) # Mode of the distribution h = _logfact(m) + _logfact(n - m) - step_three_setup = True # Only needs to be done once - - # Step 3.1: Acceptance-rejection test. - # N.B. The original paper errorneously omits the call to log(v) - # when comparing to the log of the rescaled binomial distribution. + setup_complete = True # Only needs to be done once v *= alpha / (a / (us * us) + b) if _log(v) <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: return k From f51add2ea3d1ca38f79fb16609e852646d8d11c3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 12 Jul 2022 18:57:45 -0500 Subject: [PATCH 18/19] Inline _logfact() for 7% speed and keeping all code in one place. --- Lib/random.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 987caae6317719..00849bd7e732fb 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -102,10 +102,6 @@ RECIP_BPF = 2 ** -BPF _ONE = 1 -def _logfact(n): - "Return log(n!)" - return _lgamma(n + 1) - class Random(_random.Random): """Random number generator base class used by bound module functions. @@ -810,10 +806,10 @@ def binomialvariate(self, n=1, p=0.5): alpha = (2.83 + 5.1 / b) * spq lpq = _log(p / (1.0 - p)) m = _floor((n + 1) * p) # Mode of the distribution - h = _logfact(m) + _logfact(n - m) + h = _lgamma(m + 1) + _lgamma(n - m + 1) setup_complete = True # Only needs to be done once v *= alpha / (a / (us * us) + b) - if _log(v) <= h - _logfact(k) - _logfact(n - k) + (k - m) * lpq: + if _log(v) <= h - _lgamma(k + 1) - _lgamma(n - k + 1) + (k - m) * lpq: return k From 1e16bbb5e4cf744b072564817395589298d7ce70 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 12 Jul 2022 21:17:39 -0500 Subject: [PATCH 19/19] Update simulation example --- Doc/library/random.rst | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index fee35382042a40..78c2b030d6095f 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -474,16 +474,13 @@ Simulations:: >>> # Deal 20 cards without replacement from a deck >>> # of 52 playing cards, and determine the proportion of cards >>> # with a ten-value: ten, jack, queen, or king. - >>> dealt = sample(['tens', 'low cards'], counts=[16, 36], k=20) - >>> dealt.count('tens') / 20 + >>> deal = sample(['tens', 'low cards'], counts=[16, 36], k=20) + >>> deal.count('tens') / 20 0.15 >>> # Estimate the probability of getting 5 or more heads from 7 spins >>> # of a biased coin that settles on heads 60% of the time. - >>> def trial(): - ... return choices('HT', cum_weights=(0.60, 1.00), k=7).count('H') >= 5 - ... - >>> sum(trial() for i in range(10_000)) / 10_000 + >>> sum(binomialvariate(n=7, p=0.6) >= 5 for i in range(10_000)) / 10_000 0.4169 >>> # Probability of the median of 5 samples being in middle two quartiles