From 16a98c0efe9e09258946f8469f7b556c568f1e21 Mon Sep 17 00:00:00 2001 From: MattHarrigan Date: Tue, 18 Oct 2016 20:31:04 -0400 Subject: [PATCH 1/2] ENH: performance improvement to ediff1d Eliminate a copy operation when to_begin or to_end is given. Also use ravel instead of flatiter which is much faster. --- numpy/lib/arraysetops.py | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 17dfa7567e9c..e63e0954656c 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -78,20 +78,33 @@ def ediff1d(ary, to_end=None, to_begin=None): array([ 1, 2, -3, 5, 18]) """ - ary = np.asanyarray(ary).flat - ed = ary[1:] - ary[:-1] - arrays = [ed] - if to_begin is not None: - arrays.insert(0, to_begin) - if to_end is not None: - arrays.append(to_end) - - if len(arrays) != 1: - # We'll save ourselves a copy of a potentially large array in - # the common case where neither to_begin or to_end was given. - ed = np.hstack(arrays) - - return ed + # force a 1d array + ary = np.asanyarray(ary).ravel() + + # get the length of the diff'd values + l = len(ary) - 1 + if l < 0: + # force length to be non negative, match previous API + # should this be an warning or deprecated? + l = 0 + + if to_begin is None: + to_begin = np.array([]) + else: + to_begin = np.asanyarray(to_begin).ravel() + + if to_end is None: + to_end = np.array([]) + else: + to_end = np.asanyarray(to_end).ravel() + + # do the calculation in place and copy to_begin and to_end + result = np.empty(l + len(to_begin) + len(to_end), dtype=ary.dtype) + result[:len(to_begin)] = to_begin + result[len(to_begin) + l:] = to_end + np.subtract(ary[1:], ary[:-1], result[len(to_begin):len(to_begin) + l]) + return result + def unique(ar, return_index=False, return_inverse=False, return_counts=False): """ From 484827189756d45e6231bfd2cd3cf6db9c2993df Mon Sep 17 00:00:00 2001 From: MattHarrigan Date: Wed, 19 Oct 2016 19:57:43 -0400 Subject: [PATCH 2/2] TST: Added cases for better coverage of ediff1d --- numpy/lib/tests/test_arraysetops.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 852183ffec1d..b75a2b060d54 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -169,6 +169,12 @@ def test_ediff1d(self): assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0)) assert_array_equal([], ediff1d(one_elem)) assert_array_equal([1], ediff1d(two_elem)) + assert_array_equal([7,1,9], ediff1d(two_elem, to_begin=7, to_end=9)) + assert_array_equal([5,6,1,7,8], ediff1d(two_elem, to_begin=[5,6], to_end=[7,8])) + assert_array_equal([1,9], ediff1d(two_elem, to_end=9)) + assert_array_equal([1,7,8], ediff1d(two_elem, to_end=[7,8])) + assert_array_equal([7,1], ediff1d(two_elem, to_begin=7)) + assert_array_equal([5,6,1], ediff1d(two_elem, to_begin=[5,6])) def test_in1d(self): # we use two different sizes for the b array here to test the