8000 Merge pull request #37 from bashtage/benchmark-improve · mattip/numpy@e937ebd · GitHub
[go: up one dir, main page]

Skip to content

Commit e937ebd

Browse files
authored
Merge pull request #37 from bashtage/benchmark-improve
REF: Refactor benchmark
2 parents f80e160 + 6701896 commit e937ebd

File tree

4 files changed

+83
-124
lines changed

4 files changed

+83
-124
lines changed

_randomgen/benchmark.py

Lines changed: 65 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -84,122 +84,67 @@ def timer_uniform():
8484
run_timer(dist, command, None, SETUP, 'Uniforms')
8585

8686

87-
def timer_8bit_bounded(max=95, use_masked=True):
88-
min = 0
87+
def timer_bounded(bits=8, max=95, use_masked=True):
88+
"""
89+
Timer for 8-bit bounded values.
90+
91+
Parameters
92+
----------
93+
bits : {8, 16, 32, 64}
94+
Bit width of unsigned output type
95+
max : int
96+
Upper bound for range. Lower is always 0. Must be <= 2**bits.
97+
use_masked: bool
98+
If True, masking and rejection sampling is used to generate a random
99+
number in an interval. If False, Lemire's algorithm is used if
100+
available to generate a random number in an interval.
101+
102+
Notes
103+
-----
104+
Lemire's algorithm has improved performance when {max}+1 is not a
105+
power of two.
106+
"""
107+
if bits not in (8, 16, 32, 64):
108+
raise ValueError('bits must be one of 8, 16, 32, 64.')
109+
minimum = 0
89110

90111
dist = 'random_uintegers'
91112

92-
# Note on performance of generating random numbers in an interval:
93-
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
94-
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
95-
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.
113+
if use_masked: # Use masking & rejection.
114+
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits}, use_masked=True)'
115+
else: # Use Lemire's algo.
116+
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits}, use_masked=False)'
96117

97-
if use_masked:
98-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8, use_masked=True)' # Use masking & rejection.
99-
else:
100-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8, use_masked=False)' # Use Lemire's algo.
118+
command = command.format(min=minimum, max=max, bits=bits)
101119

102-
command = command.format(min=min, max=max)
103-
104-
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8)'
105-
command_numpy = command_numpy.format(min=min, max=max)
120+
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits})'
121+
command_numpy = command_numpy.format(min=minimum, max=max, bits=bits)
106122

107123
run_timer(dist, command, command_numpy, SETUP,
108-
'8-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))
109-
110-
111-
def timer_16bit_bounded(max=1535, use_masked=True):
112-
min = 0
113-
114-
dist = 'random_uintegers'
115-
116-
# Note on performance of generating random numbers in an interval:
117-
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
118-
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
119-
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.
120-
121-
if use_masked:
122-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16, use_masked=True)' # Use masking & rejection.
123-
else:
124-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16, use_masked=False)' # Use Lemire's algo.
125-
126-
command = command.format(min=min, max=max)
127-
128-
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16)'
129-
command_numpy = command_numpy.format(min=min, max=max)
130-
131-
run_timer(dist, command, command_numpy, SETUP,
132-
'16-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))
124+
'{bits}-bit bounded unsigned integers (max={max}, '
125+
'use_masked={use_masked})'.format(max=max, use_masked=use_masked, bits=bits))
133126

134127

135128
def timer_32bit():
136129
info = np.iinfo(np.uint32)
137-
min, max = info.min, info.max
130+
minimum, maximum = info.min, info.max
138131
dist = 'random_uintegers'
139132
command = 'rg.random_uintegers(1000000, 32)'
140133
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32)'
141-
command_numpy = command_numpy.format(min=min, max=max)
134+
command_numpy = command_numpy.format(min=minimum, max=maximum)
142135
run_timer(dist, command, command_numpy, SETUP, '32-bit unsigned integers')
143136

144137

145-
def timer_32bit_bounded(max=1535, use_masked=True):
146-
min = 0
147-
148-
dist = 'random_uintegers'
149-
150-
# Note on performance of generating random numbers in an interval:
151-
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
152-
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
153-
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.
154-
155-
if use_masked:
156-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=True)' # Use masking & rejection.
157-
else:
158-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=False)' # Use Lemire's algo.
159-
160-
command = command.format(min=min, max=max)
161-
162-
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32)'
163-
command_numpy = command_numpy.format(min=min, max=max)
164-
165-
run_timer(dist, command, command_numpy, SETUP,
166-
'32-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))
167-
168-
169138
def timer_64bit():
170139
info = np.iinfo(np.uint64)
171-
min, max = info.min, info.max
140+
minimum, maximum = info.min, info.max
172141
dist = 'random_uintegers'
173142
command = 'rg.random_uintegers(1000000)'
174143
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64)'
175-
command_numpy = command_numpy.format(min=min, max=max)
144+
command_numpy = command_numpy.format(min=minimum, max=maximum)
176145
run_timer(dist, command, command_numpy, SETUP, '64-bit unsigned integers')
177146

178147

179-
def timer_64bit_bounded(max=1535, use_masked=True):
180-
min = 0
181-
182-
dist = 'random_uintegers'
183-
184-
# Note on performance of generating random numbers in an interval:
185-
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
186-
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
187-
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.
188-
189-
if use_masked:
190-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64, use_masked=True)' # Use masking & rejection.
191-
else:
192-
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64, use_masked=False)' # Use Lemire's algo.
193-
194-
command = command.format(min=min, max=max)
195-
196-
command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64)'
197-
command_numpy = command_numpy.format(min=min, max=max)
198-
199-
run_timer(dist, command, command_numpy, SETUP,
200-
'64-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))
201-
202-
203148
def timer_normal_zig():
204149
dist = 'standard_normal'
205150
command = 'rg.standard_normal(1000000)'
@@ -210,35 +155,47 @@ def timer_normal_zig():
210155

211156
if __name__ == '__main__':
212157
import argparse
158+
213159
parser = argparse.ArgumentParser()
214-
parser.add_argument('--full', dest='full', action='store_true')
160+
parser.add_argument('-f', '--full',
161+
help='Run benchmarks for a wide range of distributions.'
162+
' If not provided, only tests the production of '
163+
'uniform values.',
164+
dest='full', action='store_true')
165+
parser.add_argument('-bi', '--bounded-ints',
166+
help='Included benchmark coverage of the bounded '
167+
'integer generators in a full run.',
168+
dest='bounded_ints', action='store_true')
215169
args = parser.parse_args()
216170

217171
timer_uniform()
218172
if args.full:
219173
timer_raw()
220-
timer_8bit_bounded(use_masked=True)
221-
timer_8bit_bounded(max=64, use_masked=False) # Worst case for Numpy.
222-
timer_8bit_bounded(max=95, use_masked=False) # Typ. avrg. case for Numpy.
223-
timer_8bit_bounded(max=127, use_masked=False) # Best case for Numpy.
174+
if args.bounded_ints:
175+
timer_bounded(use_masked=True)
176+
timer_bounded(max=64, use_masked=False) # Worst case for Numpy.
177+
timer_bounded(max=95, use_masked=False) # Typ. avrg. case for Numpy.
178+
timer_bounded(max=127, use_masked=False) # Best case for Numpy.
224179

225-
timer_16bit_bounded(use_masked=True)
226-
timer_16bit_bounded(max=1024, use_masked=False) # Worst case for Numpy.
227-
timer_16bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy.
228-
timer_16bit_bounded(max=2047, use_masked=False) # Best case for Numpy.
180+
timer_bounded(16, use_masked=True)
181+
timer_bounded(16, max=1024, use_masked=False) # Worst case for Numpy.
182+
timer_bounded(16, max=1535, use_masked=False) # Typ. avrg. case for Numpy.
183+
timer_bounded(16, max=2047, use_masked=False) # Best case for Numpy.
229184

230185
timer_32bit()
231186

232-
timer_32bit_bounded(use_masked=True)
233-
timer_32bit_bounded(max=1024, use_masked=False) # Worst case for Numpy.
234-
timer_32bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy.
235-
timer_32bit_bounded(max=2047, use_masked=False) # Best case for Numpy.
187+
if args.bounded_ints:
188+
timer_bounded(32, use_masked=True)
189+
timer_bounded(32, max=1024, use_masked=False) # Worst case for Numpy.
190+
timer_bounded(32, max=1535, use_masked=False) # Typ. avrg. case for Numpy.
191+
timer_bounded(32, max=2047, use_masked=False) # Best case for Numpy.
236192

237193
timer_64bit()
238194

239-
timer_64bit_bounded(use_masked=True)
240-
timer_64bit_bounded(max=1024, use_masked=False) # Worst case for Numpy.
241-
timer_64bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy.
242-
timer_64bit_bounded(max=2047, use_masked=False) # Best case for Numpy.
195+
if args.bounded_ints:
196+
timer_bounded(64, use_masked=True)
197+
timer_bounded(64, max=1024, use_masked=False) # Worst case for Numpy.
198+
timer_bounded(64, max=1535, use_masked=False) # Typ. avrg. case for Numpy.
199+
timer_bounded(64, max=2047, use_masked=False) # Best case for Numpy.
243200

244201
timer_normal_zig()

_randomgen/doc/source/conf.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
# import os
1616
# import sys
1717
# sys.path.insert(0, os.path.abspath('.'))
18-
from distutils.version import LooseVersion
1918
import guzzle_sphinx_theme
20-
# import sphinx_rtd_theme
2119
import randomgen
2220

2321
# -- Project information -----------------------------------------------------

_randomgen/doc/source/performance.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
from timeit import repeat
2+
13
import numpy as np
2-
from timeit import timeit, repeat
34
import pandas as pd
45

56
from randomgen import MT19937, DSFMT, ThreeFry, PCG64, Xoroshiro128, \
6-
Xorshift1024, Philox
7+
Xorshift1024, Philox, Xoshiro256StarStar, Xoshiro512StarStar
78

8-
PRNGS = [DSFMT, MT19937, Philox, PCG64, ThreeFry, Xoroshiro128, Xorshift1024]
9+
PRNGS = [DSFMT, MT19937, Philox, PCG64, ThreeFry, Xoroshiro128, Xorshift1024,
10+
Xoshiro256StarStar, Xoshiro512StarStar]
911

1012
funcs = {'32-bit Unsigned Ints': 'random_uintegers(size=1000000,bits=32)',
1113
'64-bit Unsigned Ints': 'random_uintegers(size=1000000,bits=32)',
@@ -32,11 +34,10 @@
3234
t = repeat(test.format(func=funcs[key]),
3335
setup.format(prng=prng().__class__.__name__),
3436
number=1, repeat=3)
35-
col[key]= 1000 * min(t)
37+
col[key] = 1000 * min(t)
3638
col = pd.Series(col)
3739
table[prng().__class__.__name__] = col
3840

39-
4041
npfuncs = {}
4142
npfuncs.update(funcs)
4243
npfuncs['32-bit Unsigned Ints'] = 'randint(2**32,dtype="uint32",size=1000000)'
@@ -54,7 +55,6 @@
5455
col[key] = 1000 * min(t)
5556
table['NumPy'] = pd.Series(col)
5657

57-
5858
table = pd.DataFrame(table)
5959
table = table.reindex(table.mean(1).sort_values().index)
6060
order = np.log(table).mean().sort_values().index
@@ -63,11 +63,11 @@
6363
table = table.T
6464
print(table.to_csv(float_format='%0.1f'))
6565

66-
rel = table / (table.iloc[:,[0]].values @ np.ones((1,8)))
66+
rel = table / (table.iloc[:, [0]].values @ np.ones((1, 8)))
6767
rel.pop(rel.columns[0])
6868
rel = rel.T
6969
rel['Overall'] = np.exp(np.log(rel).mean(1))
7070
rel *= 100
7171
rel = np.round(rel)
7272
rel = rel.T
73-
print(rel.to_csv(float_format='%0d'))
73+
print(rel.to_csv(float_format='%0d'))

_randomgen/randomgen/tests/test_numpy_mt19937.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -437,27 +437,31 @@ def test_randint(self):
437437
assert_array_equal(actual, desired)
438438

439439
def test_randint_masked(self):
440-
""" Test masked rejection sampling algorithm to generate array of uint32 in an interval. """
440+
# Test masked rejection sampling algorithm to generate array of
441+
# uint32 in an interval.
441442
mt19937.seed(self.seed)
442-
actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32, use_masked=True)
443+
actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32,
444+
use_masked=True)
443445
desired = np.array([[2, 47],
444446
[12, 51],
445447
[33, 43]], dtype=np.uint32)
446448
assert_array_equal(actual, desired)
447449

448450
def test_randint_lemire_32(self):
449-
""" Test lemire algorithm to generate array of uint32 in an interval. """
451+
# Test lemire algorithm to generate array of uint32 in an interval.
450452
mt19937.seed(self.seed)
451-
actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32, use_masked=False)
453+
actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32,
454+
use_masked=False)
452455
desired = np.array([[61, 33],
453456
[58, 14],
454457
[87, 23]], dtype=np.uint32)
455458
assert_array_equal(actual, desired)
456459

457460
def test_randint_lemire_64(self):
458-
""" Test lemire algorithm to generate array of uint64 in an interval. """
461+
# Test lemire algorithm to generate array of uint64 in an interval.
459462
mt19937.seed(self.seed)
460-
actual = mt19937.randint(0, 99 + 0xFFFFFFFFF, size=(3, 2), dtype=np.uint64, use_masked=False)
463+
actual = mt19937.randint(0, 99 + 0xFFFFFFFFF, size=(3, 2),
464+
dtype=np.uint64, use_masked=False)
461465
desired = np.array([[42523252834, 40656066204],
462466
[61069871386, 61274051182],
463467
[31443797706, 53476677934]], dtype=np.uint64)

0 commit comments

Comments
 (0)
0