From a15dc309cb28ca4bab3a2ef89f90c24e6659e9bd Mon Sep 17 00:00:00 2001 From: przemb Date: Wed, 5 Feb 2020 01:14:31 +0100 Subject: [PATCH 1/3] MAINT: Large overhead in some random functions #15460 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit slow calls to np.dtype.name replaced with np.dtype, mtrand.pyx and _generator.pyx updated, test test_warns_byteorder updated before: %timeit rs.random(): 520 ns ± 33.1 ns per loop %timeit rg.random(): 6.36 µs ± 222 ns per loop after: %timeit rs.random(): 453 ns ± 6.95 ns per loop %timeit rg.random(): 594 ns ± 9.66 ns per loop --- numpy/random/_generator.pyx | 71 +++++++++---------- numpy/random/mtrand.pyx | 38 +++++----- .../tests/test_randomstate_regression.py | 3 +- 3 files changed, 53 insertions(+), 59 deletions(-) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index 32eda25f793d..c7ba4c534fe7 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -17,7 +17,6 @@ from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t, from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64, _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16, _rand_uint8, _gen_mask) -from ._bounded_integers import _integers_types from ._pcg64 import PCG64 from numpy.random cimport bitgen_t from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, @@ -312,13 +311,13 @@ cdef class Generator: """ cdef double temp - key = np.dtype(dtype).name - if key == 'float64': + _dtype = np.dtype(dtype) + if _dtype == np.float64: return double_fill(&random_standard_uniform_fill, &self._bitgen, size, self.lock, out) - elif key == 'float32': + elif _dtype == np.float32: return float_fill(&random_standard_uniform_fill_f, &self._bitgen, size, self.lock, out) else: - raise TypeError('Unsupported dtype "%s" for random' % key) + raise TypeError('Unsupported dtype %r for random' % _dtype) def beta(self, a, b, size=None): """ @@ -454,20 +453,20 @@ cdef class Generator: >>> n = np.random.default_rng().standard_exponential((3, 8000)) """ - key = np.dtype(dtype).name - if key == 'float64': + _dtype = np.dtype(dtype) + if _dtype == np.float64: if method == u'zig': return double_fill(&random_standard_exponential_fill, &self._bitgen, size, self.lock, out) else: return double_fill(&random_standard_exponential_inv_fill, &self._bitgen, size, self.lock, out) - elif key == 'float32': + elif _dtype == np.float32: if method == u'zig': return float_fill(&random_standard_exponential_fill_f, &self._bitgen, size, self.lock, out) else: return float_fill(&random_standard_exponential_inv_fill_f, &self._bitgen, size, self.lock, out) else: - raise TypeError('Unsupported dtype "%s" for standard_exponential' - % key) + raise TypeError('Unsupported dtype %r for standard_exponential' + % _dtype) def integers(self, low, high=None, size=None, dtype=np.int64, endpoint=False): """ @@ -559,39 +558,39 @@ cdef class Generator: high = low low = 0 - dt = np.dtype(dtype) - key = dt.name - if key not in _integers_types: - raise TypeError('Unsupported dtype "%s" for integers' % key) - if not dt.isnative: - raise ValueError('Providing a dtype with a non-native byteorder ' - 'is not supported. If you require ' - 'platform-independent byteorder, call byteswap ' - 'when required.') + _dtype = np.dtype(dtype) # Implementation detail: the old API used a masked method to generate # bounded uniform integers. Lemire's method is preferable since it is # faster. randomgen allows a choice, we will always use the faster one. cdef bint _masked = False - if key == 'int32': + if _dtype == np.int32: ret = _rand_int32(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'int64': + elif _dtype == np.int64: ret = _rand_int64(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'int16': + elif _dtype == np.int16: ret = _rand_int16(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'int8': + elif _dtype == np.int8: ret = _rand_int8(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'uint64': + elif _dtype == np.uint64: ret = _rand_uint64(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'uint32': + elif _dtype == np.uint32: ret = _rand_uint32(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'uint16': + elif _dtype == np.uint16: ret = _rand_uint16(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'uint8': + elif _dtype == np.uint8: ret = _rand_uint8(low, high, size, _masked, endpoint, &self._bitgen, self.lock) - elif key == 'bool': + elif _dtype == np.bool_: ret = _rand_bool(low, high, size, _masked, endpoint, &self._bitgen, self.lock) + elif not _dtype.isnative: + raise ValueError('Providing a dtype with a non-native byteorder ' + 'is not supported. If you require ' + 'platform-independent byteorder, call byteswap ' + 'when required.') + else: + raise TypeError('Unsupported dtype %r for integers' % _dtype) + if size is None and dtype in (bool, int, np.compat.long): if np.array(ret).shape == (): @@ -1041,14 +1040,14 @@ cdef class Generator: [ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random """ - key = np.dtype(dtype).name - if key == 'float64': + _dtype = np.dtype(dtype) + if _dtype == np.float64: return double_fill(&random_standard_normal_fill, &self._bitgen, size, self.lock, out) - elif key == 'float32': + elif _dtype == np.float32: return float_fill(&random_standard_normal_fill_f, &self._bitgen, size, self.lock, out) else: - raise TypeError('Unsupported dtype "%s" for standard_normal' % key) + raise TypeError('Unsupported dtype %r for standard_normal' % _dtype) def normal(self, loc=0.0, scale=1.0, size=None): """ @@ -1230,19 +1229,19 @@ cdef class Generator: """ cdef void *func - key = np.dtype(dtype).name - if key == 'float64': + _dtype = np.dtype(dtype) + if _dtype == np.float64: return cont(&random_standard_gamma, &self._bitgen, size, self.lock, 1, shape, 'shape', CONS_NON_NEGATIVE, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE, out) - if key == 'float32': + if _dtype == np.float32: return cont_f(&random_standard_gamma_f, &self._bitgen, size, self.lock, shape, 'shape', CONS_NON_NEGATIVE, out) else: - raise TypeError('Unsupported dtype "%s" for standard_gamma' % key) + raise TypeError('Unsupported dtype %r for standard_gamma' % _dtype) def gamma(self, shape, scale=1.0, size=None): """ diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index a04026c0df5e..b38e8bbbe04e 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -15,7 +15,6 @@ from libc.stdint cimport int64_t, uint64_t from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64, _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16, _rand_uint8,) -from ._bounded_integers import _integers_types from ._mt19937 import MT19937 as _MT19937 from numpy.random cimport bitgen_t from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, @@ -724,17 +723,7 @@ cdef class RandomState: high = low low = 0 - dt = np.dtype(dtype) - key = dt.name - if key not in _integers_types: - raise TypeError('Unsupported dtype "%s" for randint' % key) - if not dt.isnative: - # numpy 1.17.0, 2019-05-28 - warnings.warn('Providing a dtype with a non-native byteorder is ' - 'not supported. If you require platform-independent ' - 'byteorder, call byteswap when required.\nIn future ' - 'version, providing byteorder will raise a ' - 'ValueError', DeprecationWarning) + _dtype = np.dtype(dtype) # Implementation detail: the use a masked method to generate # bounded uniform integers. Lemire's method is preferable since it is @@ -743,24 +732,31 @@ cdef class RandomState: cdef bint _masked = True cdef bint _endpoint = False - if key == 'int32': + if _dtype == np.int32: ret = _rand_int32(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'int64': + elif _dtype == np.int64: ret = _rand_int64(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'int16': + elif _dtype == np.int16: ret = _rand_int16(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'int8': + elif _dtype == np.int8: ret = _rand_int8(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'uint64': + elif _dtype == np.uint64: ret = _rand_uint64(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'uint32': + elif _dtype == np.uint32: ret = _rand_uint32(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'uint16': + elif _dtype == np.uint16: ret = _rand_uint16(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'uint8': + elif _dtype == np.uint8: ret = _rand_uint8(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif key == 'bool': + elif _dtype == np.bool_: ret = _rand_bool(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) + elif not _dtype.isnative: + raise ValueError('Providing a dtype with a non-native byteorder ' + 'is not supported. If you require ' + 'platform-independent byteorder, call byteswap ' + 'when required.') + else: + raise TypeError('Unsupported dtype %r for randint' % _dtype) if size is None and dtype in (bool, int, np.compat.long): if np.array(ret).shape == (): diff --git a/numpy/random/tests/test_randomstate_regression.py b/numpy/random/tests/test_randomstate_regression.py index 1d8a0ed5a609..9f9728df249b 100644 --- a/numpy/random/tests/test_randomstate_regression.py +++ b/numpy/random/tests/test_randomstate_regression.py @@ -163,8 +163,7 @@ def __array__(self): def test_warns_byteorder(self): # GH 13159 other_byteord_dt = 'i4' - with pytest.deprecated_call(match='non-native byteorder is not'): - random.randint(0, 200, size=10, dtype=other_byteord_dt) + assert_raises(ValueError, random.randint, 0, 200, size=10, dtype=other_byteord_dt) def test_named_argument_initialization(self): # GH 13669 From df27b88b9271b2fb1d0770217facf9c4190bb778 Mon Sep 17 00:00:00 2001 From: przemb Date: Wed, 5 Feb 2020 14:33:06 +0100 Subject: [PATCH 2/3] Changes suggested in review --- numpy/random/_bounded_integers.pyx.in | 10 ---------- numpy/random/mtrand.pyx | 14 +++++++++----- numpy/random/tests/test_randomstate_regression.py | 3 ++- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/numpy/random/_bounded_integers.pyx.in b/numpy/random/_bounded_integers.pyx.in index 9e639b53b189..9f46685d3258 100644 --- a/numpy/random/_bounded_integers.pyx.in +++ b/numpy/random/_bounded_integers.pyx.in @@ -51,16 +51,6 @@ cdef extern from "numpy/random/distributions.h": np.npy_bool *out) nogil - -_integers_types = {'bool': (0, 2), - 'int8': (-2**7, 2**7), - 'int16': (-2**15, 2**15), - 'int32': (-2**31, 2**31), - 'int64': (-2**63, 2**63), - 'uint8': (0, 2**8), - 'uint16': (0, 2**16), - 'uint32': (0, 2**32), - 'uint64': (0, 2**64)} {{ py: type_info = (('uint32', 'uint32', 'uint64', 'NPY_UINT64', 0, 0, 0, '0X100000000ULL'), diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index b38e8bbbe04e..c58949e89eb5 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -725,6 +725,15 @@ cdef class RandomState: _dtype = np.dtype(dtype) + if not _dtype.isnative: + # numpy 1.17.0, 2019-05-28 + warnings.warn('Providing a dtype with a non-native byteorder is ' + 'not supported. If you require platform-independent ' + 'byteorder, call byteswap when required.\nIn future ' + 'version, providing byteorder will raise a ' + 'ValueError', DeprecationWarning) + _dtype = _dtype.newbyteorder() + # Implementation detail: the use a masked method to generate # bounded uniform integers. Lemire's method is preferable since it is # faster. randomgen allows a choice, we will always use the slower but @@ -750,11 +759,6 @@ cdef class RandomState: ret = _rand_uint8(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) elif _dtype == np.bool_: ret = _rand_bool(low, high, size, _masked, _endpoint, &self._bitgen, self.lock) - elif not _dtype.isnative: - raise ValueError('Providing a dtype with a non-native byteorder ' - 'is not supported. If you require ' - 'platform-independent byteorder, call byteswap ' - 'when required.') else: raise TypeError('Unsupported dtype %r for randint' % _dtype) diff --git a/numpy/random/tests/test_randomstate_regression.py b/numpy/random/tests/test_randomstate_regression.py index 9f9728df249b..1d8a0ed5a609 100644 --- a/numpy/random/tests/test_randomstate_regression.py +++ b/numpy/random/tests/test_randomstate_regression.py @@ -163,7 +163,8 @@ def __array__(self): def test_warns_byteorder(self): # GH 13159 other_byteord_dt = 'i4' - assert_raises(ValueError, random.randint, 0, 200, size=10, dtype=other_byteord_dt) + with pytest.deprecated_call(match='non-native byteorder is not'): + random.randint(0, 200, size=10, dtype=other_byteord_dt) def test_named_argument_initialization(self): # GH 13669 From deb857157159400fbc74f2f4035ec78918429f1d Mon Sep 17 00:00:00 2001 From: przemb Date: Wed, 5 Feb 2020 19:26:59 +0100 Subject: [PATCH 3/3] Documentation updated. --- numpy/random/_generator.pyx | 45 ++++++++++++++++--------------------- numpy/random/mtrand.pyx | 8 +++---- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index c7ba4c534fe7..f6c0c71b0233 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -261,7 +261,7 @@ cdef class Generator: def random(self, size=None, dtype=np.float64, out=None): """ - random(size=None, dtype='d', out=None) + random(size=None, dtype=np.float64, out=None) Return random floats in the half-open interval [0.0, 1.0). @@ -277,10 +277,9 @@ cdef class Generator: Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. - dtype : {str, dtype}, optional - Desired dtype of the result, either 'd' (or 'float64') or 'f' - (or 'float32'). All dtypes are determined by their name. The - default value is 'd'. + dtype : dtype, optional + Desired dtype of the result, only `float64` and `float32` are supported. + Byteorder must be native. The default value is np.float64. out : ndarray, optional Alternative output array in which to place the result. If size is not None, it must have the same shape as the provided size and must match the type of @@ -416,7 +415,7 @@ cdef class Generator: def standard_exponential(self, size=None, dtype=np.float64, method=u'zig', out=None): """ - standard_exponential(size=None, dtype='d', method='zig', out=None) + standard_exponential(size=None, dtype=np.float64, method='zig', out=None) Draw samples from the standard exponential distribution. @@ -430,9 +429,8 @@ cdef class Generator: ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. dtype : dtype, optional - Desired dtype of the result, either 'd' (or 'float64') or 'f' - (or 'float32'). All dtypes are determined by their name. The - default value is 'd'. + Desired dtype of the result, only `float64` and `float32` are supported. + Byteorder must be native. The default value is np.float64. method : str, optional Either 'inv' or 'zig'. 'inv' uses the default inverse CDF method. 'zig' uses the much faster Ziggurat method of Marsaglia and Tsang. @@ -470,7 +468,7 @@ cdef class Generator: def integers(self, low, high=None, size=None, dtype=np.int64, endpoint=False): """ - integers(low, high=None, size=None, dtype='int64', endpoint=False) + integers(low, high=None, size=None, dtype=np.int64, endpoint=False) Return random integers from `low` (inclusive) to `high` (exclusive), or if endpoint=True, `low` (inclusive) to `high` (inclusive). Replaces @@ -495,11 +493,9 @@ cdef class Generator: Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. - dtype : {str, dtype}, optional - Desired dtype of the result. All dtypes are determined by their - name, i.e., 'int64', 'int', etc, so byteorder is not available - and a specific precision may have different C types depending - on the platform. The default value is `np.int_`. + dtype : dtype, optional + Desired dtype of the result. Byteorder must be native. + The default value is np.int64. endpoint : bool, optional If true, sample from the interval [low, high] instead of the default [low, high) @@ -979,7 +975,7 @@ cdef class Generator: # Complicated, continuous distributions: def standard_normal(self, size=None, dtype=np.float64, out=None): """ - standard_normal(size=None, dtype='d', out=None) + standard_normal(size=None, dtype=np.float64, out=None) Draw samples from a standard Normal distribution (mean=0, stdev=1). @@ -989,10 +985,9 @@ cdef class Generator: Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. - dtype : {str, dtype}, optional - Desired dtype of the result, either 'd' (or 'float64') or 'f' - (or 'float32'). All dtypes are determined by their name. The - default value is 'd'. + dtype : dtype, optional + Desired dtype of the result, only `float64` and `float32` are supported. + Byteorder must be native. The default value is np.float64. out : ndarray, optional Alternative output array in which to place the result. If size is not None, it must have the same shape as the provided size and must match the type of @@ -1045,7 +1040,6 @@ cdef class Generator: return double_fill(&random_standard_normal_fill, &self._bitgen, size, self.lock, out) elif _dtype == np.float32: return float_fill(&random_standard_normal_fill_f, &self._bitgen, size, self.lock, out) - else: raise TypeError('Unsupported dtype %r for standard_normal' % _dtype) @@ -1153,7 +1147,7 @@ cdef class Generator: def standard_gamma(self, shape, size=None, dtype=np.float64, out=None): """ - standard_gamma(shape, size=None, dtype='d', out=None) + standard_gamma(shape, size=None, dtype=np.float64, out=None) Draw samples from a standard Gamma distribution. @@ -1169,10 +1163,9 @@ cdef class Generator: ``m * n * k`` samples are drawn. If size is ``None`` (default), a single value is returned if ``shape`` is a scalar. Otherwise, ``np.array(shape).size`` samples are drawn. - dtype : {str, dtype}, optional - Desired dtype of the result, either 'd' (or 'float64') or 'f' - (or 'float32'). All dtypes are determined by their name. The - default value is 'd'. + dtype : dtype, optional + Desired dtype of the result, only `float64` and `float32` are supported. + Byteorder must be native. The default value is np.float64. out : ndarray, optional Alternative output array in which to place the result. If size is not None, it must have the same shape as the provided size and diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index c58949e89eb5..3d03187524ca 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -642,7 +642,7 @@ cdef class RandomState: def randint(self, low, high=None, size=None, dtype=int): """ - randint(low, high=None, size=None, dtype='l') + randint(low, high=None, size=None, dtype=int) Return random integers from `low` (inclusive) to `high` (exclusive). @@ -669,10 +669,8 @@ cdef class RandomState: ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. dtype : dtype, optional - Desired dtype of the result. All dtypes are determined by their - name, i.e., 'int64', 'int', etc, so byteorder is not available - and a specific precision may have different C types depending - on the platform. The default value is `np.int_`. + Desired dtype of the result. Byteorder must be native. + The default value is int. .. versionadded:: 1.11.0