8000 GH-81620: Add random.binomialvariate() by rhettinger · Pull Request #94719 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-81620: Add random.binomialvariate() #94719

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions Doc/library/random.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,28 @@ Functions for sequences
The *population* must be a sequence. Automatic conversion of sets
to lists is no longer supported.

Discrete distributions
----------------------

The following function generates a discrete distribution.

.. function:: binomialvariate(n=1, p=0.5)

`Binomial distribution
<http://mathworld.wolfram.com/BinomialDistribution.html>`_.
Return the number of successes for *n* independent trials with the
probability of success in each trial being *p*:

Mathematically equivalent to::

sum(random() < p for i in range(n))

The number of trials *n* should be a non-negative integer.
The probability of success *p* should be between ``0.0 <= p <= 1.0``.
The result is an integer in the range ``0 <= X <= n``.

.. versionadded:: 3.12


.. _real-valued-distributions:

Expand Down Expand Up @@ -452,16 +474,13 @@ Simulations::
>>> # Deal 20 cards without replacement from a deck
>>> # of 52 playing cards, and determine the proportion of cards
>>> # with a ten-value: ten, jack, queen, or king.
>>> dealt = sample(['tens', 'low cards'], counts=[16, 36], k=20)
>>> dealt.count('tens') / 20
>>> deal = sample(['tens', 'low cards'], counts=[16, 36], k=20)
>>> deal.count('tens') / 20
0.15

>>> # Estimate the probability of getting 5 or more heads from 7 spins
>>> # of a biased coin that settles on heads 60% of the time.
>>> def trial():
... return choices('HT', cum_weights=(0.60, 1.00), k=7).count('H') >= 5
...
>>> sum(trial() for i in range(10_000)) / 10_000
>>> sum(binomialvariate(n=7, p=0.6) >= 5 for i in range(10_000)) / 10_000
0.4169

>>> # Probability of the median of 5 samples being in middle two quartiles
Expand Down
95 changes: 93 additions & 2 deletions Lib/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
negative exponential
gamma
beta
binomial
pareto
Weibull

Expand All @@ -49,6 +50,7 @@
from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
from math import tau as TWOPI, floor as _floor, isfinite as _isfinite
from math import lgamma as _lgamma, fabs as _fabs
from os import urandom as _urandom
from _collections_abc import Sequence as _Sequence
from operator import index as _index
Expand All @@ -68,6 +70,7 @@
"Random",
"SystemRandom",
"betavariate",
"binomialvariate",
"choice",
"choices",
"expovariate",
Expand Down Expand Up @@ -725,6 +728,91 @@ def betavariate(self, alpha, beta):
return y / (y + self.gammavariate(beta, 1.0))
return 0.0


def binomialvariate(self, n=1, p=0.5):
"""Binomial random variable.

Gives the number of successes for *n* independent trials
with the probability of success in each trial being *p*:

sum(random() < p for i in range(n))

Returns an integer in the range: 0 <= X <= n

"""
# Error check inputs and handle edge cases
if n < 0:
raise ValueError("n must be non-negative")
if p <= 0.0 or p >= 1.0:
if p == 0.0:
return 0
if p == 1.0:
return n
raise ValueError("p must be in the range 0.0 <= p <= 1.0")

random = self.random

# Fast path for a common case
if n == 1:
return _index(random() < p)

# Exploit symmetry to establish: p <= 0.5
if p > 0.5:
return n - self.binomialvariate(n, 1.0 - p)

if n * p < 10.0:
# BG: Geometric method by Devroye with running time of O(np).
# https://dl.acm.org/doi/pdf/10.1145/42372.42381
x = y = 0
c = _log(1.0 - p)
if not c:
return x
while True:
y += _floor(_log(random()) / c) + 1
if y > n:
return x
x += 1

# BTRS: Transformed rejection with squeeze method by Wolfgang Hörmann
# https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf
assert n*p >= 10.0 and p <= 0.5
setup_complete = False

spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution
b = 1.15 + 2.53 * spq
a = -0.0873 + 0.0248 * b + 0.01 * p
c = n * p + 0.5
vr = 0.92 - 4.2 / b

while True:

u = random()
v = random()
u -= 0.5
us = 0.5 - _fabs(u)
k = _floor((2.0 * a / us + b) * u + c)
if k < 0 or k > n:
continue

# The early-out "squeeze" test substantially reduces
# the number of acceptance condition evaluations.
if us >= 0.07 and v <= vr:
return k

# Acceptance-rejection test.
# Note, the original paper errorneously omits the call to log(v)
# when comparing to the log of the rescaled binomial distribution.
if not setup_complete:
alpha = (2.83 + 5.1 / b) * spq
lpq = _log(p / (1.0 - p))
m = _floor((n + 1) * p) # Mode of the distribution
h = _lgamma(m + 1) + _lgamma(n - m + 1)
setup_complete = True # Only needs to be done once
v *= alpha / (a / (us * us) + b)
if _log(v) <= h - _lgamma(k + 1) - _lgamma(n - k + 1) + (k - m) * lpq:
return k


def paretovariate(self, alpha):
"""Pareto distribution. alpha is the shape parameter."""
# Jain, pg. 495
Expand Down Expand Up @@ -810,6 +898,7 @@ def _notimplemented(self, *args, **kwds):
gammavariate = _inst.gammavariate
gauss = _inst.gauss
betavariate = _inst.betavariate
binomialvariate = _inst.binomialvariate
paretovariate = _inst.paretovariate
weibullvariate = _inst.weibullvariate
getstate = _inst.getstate
Expand All @@ -834,15 +923,17 @@ def _test_generator(n, func, args):
low = min(data)
high = max(data)

print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}')
print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}{args!r}')
print('avg %g, stddev %g, min %g, max %g\n' % (xbar, sigma, low, high))


def _test(N=2000):
def _test(N=10_000):
_test_generator(N, random, ())
_test_generator(N, normalvariate, (0.0, 1.0))
_test_generator(N, lognormvariate, (0.0, 1.0))
_test_generator(N, vonmisesvariate, (0.0, 1.0))
_test_generator(N, binomialvariate, (15, 0.60))
_test_generator(N, binomialvariate, (100, 0.75))
_test_generator(N, gammavariate, (0.01, 1.0))
_test_generator(N, gammavariate, (0.1, 1.0))
_test_generator(N, gammavariate, (0.1, 2.0))
Expand Down
56 changes: 56 additions & 0 deletions Lib/test/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -1045,13 +1045,69 @@ def test_constant(self):
(g.lognormvariate, (0.0, 0.0), 1.0),
(g.lognormvariate, (-float('inf'), 0.0), 0.0),
(g.normalvariate, (10.0, 0.0), 10.0),
(g.binomialvariate, (0, 0.5), 0),
(g.binomialvariate, (10, 0.0), 0),
(g.binomialvariate, (10, 1.0), 10),
(g.paretovariate, (float('inf'),), 1.0),
(g.weibullvariate, (10.0, float('inf')), 10.0),
(g.weibullvariate, (0.0, 10.0), 0.0),
]:
for i in range(N):
self.assertEqual(variate(*args), expected)

def test_binomialvariate(self):
B = random.binomialvariate

# Cover all the code paths
with self.assertRaises(ValueError):
B(n=-1) # Negative n
with self.assertRaises(ValueError):
B(n=1, p=-0.5) # Negative p
with self.assertRaises(ValueError):
B(n=1, p=1.5) # p > 1.0
self.assertEqual(B(10, 0.0), 0) # p == 0.0
self.assertEqual(B(10, 1.0), 10) # p == 1.0
self.assertTrue(B(1, 0.3) in {0, 1}) # n == 1 fast path
self.assertTrue(B(1, 0.9) in {0, 1}) # n == 1 fast path
self.assertTrue(B(1, 0.0) in {0}) # n == 1 fast path
self.assertTrue(B(1, 1.0) in {1}) # n == 1 fast path

# BG method p <= 0.5 and n*p=1.25
self.assertTrue(B(5, 0.25) in set(range(6)))

# BG method p >= 0.5 and n*(1-p)=1.25
self.assertTrue(B(5, 0.75) in set(range(6)))

# BTRS method p <= 0.5 and n*p=25
self.assertTrue(B(100, 0.25) in set(range(101)))

# BTRS method p > 0.5 and n*(1-p)=25
self.assertTrue(B(100, 0.75) in set(range(101)))

# Statistical tests chosen such that they are
# exceedingly unlikely to ever fail for correct code.

# BG code path
# Expected dist: [31641, 42188, 21094, 4688, 391]
c = Counter(B(4, 0.25) for i in range(100_000))
self.assertTrue(29_641 <= c[0] <= 33_641, c)
self.assertTrue(40_188 <= c[1] <= 44_188)
self.assertTrue(19_094 <= c[2] <= 23_094)
self.assertTrue(2_688 <= c[3] <= 6_688)
self.assertEqual(set(c), {0, 1, 2, 3, 4})

# BTRS code path
# Sum of c[20], c[21], c[22], c[23], c[24] expected to be 36,214
c = Counter(B(100, 0.25) for i in range(100_000))
self.assertTrue(34_214 <= c[20]+c[21]+c[22]+c[23]+c[24] <= 38_214)
self.assertTrue(set(c) <= set(range(101)))
self.assertEqual(c.total(), 100_000)

# Demonstrate the BTRS works for huge values of n
self.assertTrue(19_000_000 <= B(100_000_000, 0.2) <= 21_000_000)
self.assertTrue(89_000_000 <= B(100_000_000, 0.9) <= 91_000_000)


def test_von_mises_range(self):
# Issue 17149: von mises variates were not consistently in the
# range [0, 2*PI].
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add random.binomialvariate().
0