8000 memcpy-based fast, typed shuffle. by anntzer · Pull Request #6933 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

memcpy-based fast, typed shuffle. #6933

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 17, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversatio 10000 ns
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/release/1.11.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ extended to ``@``, ``numpy.dot``, ``numpy.inner``, and ``numpy.matmul``.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This matches the behavior of ``assert_raises``.

Speed improvement for np.random.shuffle
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``np.random.shuffle`` is now much faster for 1d ndarrays.

Changes
=======
Pyrex support was removed from ``numpy.distutils``. The method
Expand Down
55 changes: 38 additions & 17 deletions numpy/random/mtrand/mtrand.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
include "Python.pxi"
include "numpy.pxd"

from libc cimport string

cdef extern from "math.h":
double exp(double x)
double log(double x)
Expand Down Expand Up @@ -4979,33 +4981,52 @@ cdef class RandomState:
[0, 1, 2]])

"""
cdef npy_intp i, j

i = len(x) - 1

# Logic adapted from random.shuffle()
if isinstance(x, np.ndarray) and \
(x.ndim > 1 or x.dtype.fields is not None):
# For a multi-dimensional ndarray, indexing returns a view onto
# each row. So we can't just use ordinary assignment to swap the
# rows; we need a bounce buffer.
cdef:
npy_intp i, j, n = len(x), stride, itemsize
char* x_ptr
char* buf_ptr

if type(x) is np.ndarray and x.ndim == 1 and x.size:
# Fast, statically typed path: shuffle the underlying buffer.
# Only for non-empty, 1d objects of class ndarray (subclasses such
# as MaskedArrays may not support this approach).
x_ptr = <char*><size_t>x.ctypes.data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the cast to size_t?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because Cython interprets <char*>python_object as casting a bytes object (not an object supporting the buffer protocol) to a char*, but <char*>c_object as a C-style cast. (If you try it you'll get "TypeError: expected bytes, int (or whatever you pass in) found")

stride = x.strides[0]
itemsize = x.dtype.itemsize
buf = np.empty_like(x[0]) # GC'd at function exit
buf_ptr = <char*><size_t>buf.ctypes.data
with self.lock:
# We trick gcc into providing a specialized implementation for
# the most common case, yielding a ~33% performance improvement.
# Note that apparently, only one branch can ever be specialized.
if itemsize == sizeof(npy_intp):
self._shuffle_raw(n, sizeof(npy_intp), stride, x_ptr, buf_ptr)
else:
self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr)
elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size:
# Multidimensional ndarrays require a bounce buffer.
buf = np.empty_like(x[0])
with self.lock:
while i > 0:
for i in reversed(range(1, n)):
j = rk_interval(i, self.internal_state)
buf[...] = x[j]
x[j] = x[i]
x[i] = buf
i = i - 1
else:
# For single-dimensional arrays, lists, and any other Python
# sequence types, indexing returns a real object that's
# independent of the array contents, so we can just swap directly.
# Untyped path.
with self.lock:
while i > 0:
for i in reversed(range(1, n)):
j = rk_interval(i, self.internal_state)
x[i], x[j] = x[j], x[i]
i = i - 1

cdef inline _shuffle_raw(self, npy_intp n, npy_intp itemsize,
npy_intp stride, char* data, char* buf):
cdef npy_intp i, j
for i in reversed(range(1, n)):
j = rk_interval(i, self.internal_state)
string.memcpy(buf, data + j * stride, itemsize)
string.memcpy(data + j * stride, data + i * stride, itemsize)
string.memcpy(data + i * stride, buf, itemsize)

def permutation(self, object x):
"""
Expand Down
42 changes: 23 additions & 19 deletions numpy/random/tests/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import warnings



class TestSeed(TestCase):
def test_scalar(self):
s = np.random.RandomState(0)
Expand Down Expand Up @@ -40,6 +41,7 @@ def test_invalid_array(self):
assert_raises(ValueError, np.random.RandomState, [1, 2, 4294967296])
assert_raises(ValueError, np.random.RandomState, [1, -2, 4294967296])


class TestBinomial(TestCase):
def test_n_zero(self):
# Tests the corner case of n == 0 for the binomial distribution.
Expand Down Expand Up @@ -130,6 +132,7 @@ def test_negative_binomial(self):
# arguments without truncation.
self.prng.negative_binomial(0.5, 0.5)


class TestRandint(TestCase):

rfunc = np.random.randint
Expand Down Expand Up @@ -368,40 +371,41 @@ def test_bytes(self):
np.testing.assert_equal(actual, desired)

def test_shuffle(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ISTR that we don't like generators like this for testing. @rgommers Do you remember what that was about?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a comment by @pv scipy/scipy#3719 (comment) one reason is that it's harder to debug failing tests, for example

import numpy as np
from numpy.testing import assert_allclose

def test_foo():
    A = np.arange(100)
    B = np.concatenate([A, A])
    yield assert_allclose, A, B

gives a failing test result that looks like this:

$ nosetests                           
F
======================================================================
FAIL: test.test_foo(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
----------------------------------------------------------------------
Traceback (most recent call last):
[snip]

which looks messy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can set the description attribute of the yielded test to improve the output, e.g.

import numpy as np
from numpy.testing import assert_allclose

def test_foo():

    def allclose(description):
        f = lambda x, y: assert_allclose(x, y)
        f.description = description
        return f

    for i in range(100):
        A = np.arange(100)
        if i != 50:
            B = A
        else:
            B = np.arange(200)
        yield allclose("test_foo:test #{}".format(i)), A, B

def test_bar():
    for i in range(100):
        A = np.arange(100)
        if i != 50:
            B = A
        else:
            B = np.arange(200)
        assert_allclose(A, B)

gives, IMO, a nice output:

..................................................F.................................................F
======================================================================
FAIL: test_foo:test #50
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/lib/python3.5/site-packages/nose/case.py", line 198, in runTest
    self.test(*self.arg)
  File "/tmp/test_foo.py", line 7, in <lambda>
    f = lambda x, y: assert_allclose(x, y)
  File "/usr/lib/python3.5/site-packages/numpy/testing/utils.py", line 1347, in assert_allclose
    verbose=verbose, header=header)
  File "/usr/lib/python3.5/site-packages/numpy/testing/utils.py", line 663, in assert_array_compare
    raise AssertionError(msg)
AssertionError: 
Not equal to tolerance rtol=1e-07, atol=0

(shapes (100,), (200,) mismatch)
 x: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,...
 y: array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,...

======================================================================
FAIL: test_foo.test_bar
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/lib/python3.5/site-packages/nose/case.py", line 198, in runTest
    self.test(*self.arg)
  File "/tmp/test_foo.py", line 26, in test_bar
    assert_allclose(A, B)
  File "/usr/lib/python3.5/site-packages/numpy/testing/utils.py", line 1347, in assert_allclose
    verbose=verbose, header=header)
  File "/usr/lib/python3.5/site-packages/numpy/testing/utils.py", line 663, in assert_array_compare
    raise AssertionError(msg)
AssertionError: 
Not equal to tolerance rtol=1e-07, atol=0

(shapes (100,), (200,) mismatch)
 x: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,...
 y: array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,...

----------------------------------------------------------------------
Ran 101 tests in 0.056s

FAILED (failures=2)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The description is not the issue with debugging, it's the context when you enable --pdb-failure or --ipdb-failure - this is what many of us use for debugging failed tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any context using either --pdb-failure or --ipdb-failure (not that I knew about these options, thanks for pointing them out).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what you mean, but I think you'll find that non-generator tests will drop you into the errored test, and you can u you way to the enclosing test function to inspect the stack, but you can't do that with the yield test functions.

1029A

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see. I'll switch this out then.
I guess nose should try to stitch in a traceback to the still running generator (gen.gi_frame anyone?).
See nose-devs/nose2#260.

# Test lists, arrays, and multidimensional versions of both:
for conv in [lambda x: x,
np.asarray,
# Test lists, arrays (of various dtypes), and multidimensional versions
# of both, c-contiguous or not:
for conv in [lambda x: np.array([]),
lambda x: x,
lambda x: np.asarray(x).astype(np.int8),
lambda x: np.asarray(x).astype(np.float32),
lambda x: np.asarray(x).astype(np.complex64),
lambda x: np.asarray(x).astype(object),
lambda x: [(i, i) for i in x],
lambda x: np.asarray([(i, i) for i in x])]:
lambda x: np.asarray([[i, i] for i in x]),
lambda x: np.vstack([x, x]).T,
# gh-4270
lambda x: np.asarray([(i, i) for i in x],
[("a", object, 1),
("b", np.int32, 1)])]:
np.random.seed(self.seed)
alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0])
np.random.shuffle(alist)
actual = alist
desired = conv([0, 1, 9, 6, 2, 4, 5, 8, 7, 3])
np.testing.assert_array_equal(actual, desired)

def test_shuffle_flexible(self):
# gh-4270
arr = [(0, 1), (2, 3)]
dt = np.dtype([('a', np.int32, 1), ('b', np.int32, 1)])
nparr = np.array(arr, dtype=dt)
a, b = nparr[0].copy(), nparr[1].copy()
for i in range(50):
np.random.shuffle(nparr)
assert_(a in nparr)
assert_(b in nparr)

def test_shuffle_masked(self):
# gh-3263
a = np.ma.masked_values(np.reshape(range(20), (5,4)) % 3 - 1, -1)
b = np.ma.masked_values(np.arange(20) % 3 - 1, -1)
ma = np.ma.count_masked(a)
mb = np.ma.count_masked(b)
a_orig = a.copy()
b_orig = b.copy()
for i in range(50):
np.random.shuffle(a)
self.assertEqual(ma, np.ma.count_masked(a))
assert_equal(
sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask]))
np.random.shuffle(b)
self.assertEqual(mb, np.ma.count_masked(b))
assert_equal(
sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask]))

def test_beta(self):
np.random.seed(self.seed)
Expand Down
0