8000 ENH: Use AVX for float32 implementation of np.sin & np.cos by r-devulap · Pull Request #13368 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Use AVX for float32 implementation of np.sin & np.cos #13368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 18, 2019
Merged
Prev Previous commit
Next Next commit
BUG: fixing NAN handling and adding tests for sin/cos
  • Loading branch information
Raghuveer Devulapalli committed Aug 3, 2019
commit e50e72513212764f673027328b9f33574cc3d254
10 changes: 6 additions & 4 deletions numpy/core/src/umath/simd.inc.src
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@

#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
@ISA@_sincos_FLOAT(npy_float * op, npy_float * ip, const npy_int array_size,
@ISA@_sincos_FLOAT(npy_float * op, npy_float * ip, const npy_intp array_size,
char* operation)
{
const npy_int num_lanes = @BYTES@/sizeof(npy_float);
Expand Down Expand Up @@ -1505,9 +1505,9 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
@vtype@ zero_f = _mm@vsize@_set1_ps(0.0f);
@vtype@ quadrant, reduced_x, reduced_x2, cos, sin;
@vtype@i iquadrant;
@mask@ glibc_mask, sine_mask, negate_mask;
@mask@ nan_mask, glibc_mask, sine_mask, negate_mask;
@mask@ load_mask = @isa@_get_full_load_mask();
npy_int num_remaining_elements = array_size;
npy_intp num_remaining_elements = array_size;

while (num_remaining_elements > 0) {

Expand All @@ -1524,7 +1524,8 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void

glibc_mask = @isa@_in_range_mask(x, large_number,-large_number);
glibc_mask = @and_masks@(load_mask, glibc_mask);
x = @isa@_set_masked_lanes(x, zero_f, glibc_mask);
nan_mask = _mm@vsize@_cmp_ps@vsub@(x, x, _CMP_NEQ_UQ);
x = @isa@_set_masked_lanes(x, zero_f, @or_masks@(nan_mask, glibc_mask));
npy_int iglibc_mask = @mask_to_int@(glibc_mask);

if (iglibc_mask != @full_mask@) {
Expand Down Expand Up @@ -1556,6 +1557,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
/* multiply by -1 for appropriate elements */
negate_mask = @isa@_should_negate(iquadrant, twos, twos);
cos = @isa@_blend(cos, _mm@vsize@_sub_ps(zero_f, cos), negate_mask);
cos = @isa@_set_masked_lanes(cos, _mm@vsize@_set1_ps(NPY_NANF), nan_mask);

@masked_store@(op, @cvtps_epi32@(load_mask), cos);
}
Expand Down
17 changes: 17 additions & 0 deletions numpy/core/tests/test_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,23 @@ def test_log_values(self):
assert_raises(FloatingPointError, np.log, np.float32(-np.inf))
assert_raises(FloatingPointError, np.log, np.float32(-1.0))

def test_sincos_values(self):
with np.errstate(all='ignore'):
x = [np.nan, np.nan, np.nan, np.nan]
y = [np.nan, -np.nan, np.inf, -np.inf]
for dt in ['f', 'd', 'g']:
xf = np.array(x, dtype=dt)
yf = np.array(y, dtype=dt)
assert_equal(np.sin(yf), xf)
assert_equal(np.cos(yf), xf)

with np.errstate(invalid='raise'):
assert_raises(FloatingPointError, np.sin, np.float32(-np.inf))
assert_raises(FloatingPointError, np.sin, np.float32(np.inf))
assert_raises(FloatingPointError, np.cos, np.float32(-np.inf))
assert_raises(FloatingPointError, np.cos, np.float32(np.inf))


class TestExpLogFloat32(object):
def test_exp_float32(self):
np.random.seed(42)
Expand Down
0