8000 Use Highway with Static Dispatch for some types in Absolute by Mousius · Pull Request #24385 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

Use Highway with Static Dispatch for some types in Absolute #24385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Highway POC
  • Loading branch information
Mousius committed Sep 7, 2023
commit 085dff14178c2601ee07db7eafb7da204586c12b
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
[submodule "vendored-meson/meson-python"]
path = vendored-meson/meson-python
url = https://github.com/numpy/meson-python.git
[submodule "numpy/core/src/highway"]
path = numpy/core/src/highway
url = https://github.com/google/highway.git
6 changes: 6 additions & 0 deletions numpy/core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ endif
# important, because code in NumPy typically does not check the value but only
# whether the symbol is defined. So `#define HAVE_SOMETHING 0` is wrong.

#cmake = import('cmake')
#hwy = cmake.subproject('highway')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Mousius was this useful for local debugging? I'm working on Meson subproject usage elsewhere, so I'm curious about the details here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could have been more useful. I wanted to use the meson integration with cmake to be cleaner, but I ended up just adding the files manually as there seemed to be a lot of steps involved.



cdata = configuration_data()

cdata.set('NPY_ABI_VERSION', C_ABI_VERSION)
Expand Down Expand Up @@ -1077,6 +1081,7 @@ src_umath = umath_gen_headers + [
src_file.process('src/umath/matmul.c.src'),
src_file.process('src/umath/matmul.h.src'),
'src/umath/ufunc_type_resolution.c',
'src/umath/absolute.cpp',
'src/umath/clip.cpp',
'src/umath/clip.h',
'src/umath/dispatching.c',
Expand Down Expand Up @@ -1141,6 +1146,7 @@ py.extension_module('_multiarray_umath',
'src/multiarray',
'src/npymath',
'src/umath',
'src/highway',
],
dependencies: blas_dep,
link_with: [npymath_lib, multiarray_umath_mtargets.static_lib('_multiarray_umath_mtargets')],
Expand Down
1 change: 1 addition & 0 deletions numpy/core/src/highway
Submodule highway added at 489e82
66 changes: 66 additions & 0 deletions numpy/core/src/umath/absolute.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#define _UMATHMODULE
#define _MULTIARRAYMODULE
#define NPY_NO_DEPRECATED_API NPY_API_VERSION

#define PY_SSIZE_T_CLEAN
#include <Python.h>

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "absolute.cpp" // this file
#include <hwy/foreach_target.h> // must come before highway.h
#include <hwy/highway.h>

#include "numpy/ndarraytypes.h"
#include "numpy/npy_common.h"
#include "numpy/npy_math.h"
#include "numpy/utils.h"

namespace numpy {
namespace HWY_NAMESPACE { // required: unique per target

// Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE.
namespace hn = hwy::HWY_NAMESPACE;
using T = npy_int;

// Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE
HWY_ATTR void SuperAbsolute(const T* HWY_RESTRICT input_array,
T* HWY_RESTRICT output_array,
const size_t size) {
const hn::ScalableTag<T> d;
for (size_t i = 0; i < size; i += hn::Lanes(d)) {
const auto in = hn::Load(d, input_array + i);
auto x = hn::Abs(in);
hn::Store(x, d, output_array + i);
}
}

}
}

#if HWY_ONCE
namespace numpy {

HWY_EXPORT(SuperAbsolute);

extern "C" {
NPY_NO_EXPORT void
INT_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
npy_int *ip1 = (npy_int*) args[0];
npy_int *op1 = (npy_int*) args[1];
// npy_intp is1 = steps[0];
// npy_intp os1 = steps[1];
npy_intp n = dimensions[0];
// npy_intp i;
// This must reside outside of HWY_NAMESPACE because it references (calls the
// appropriate one from) the per-target implementations there.
// For static dispatch, use HWY_STATIC_DISPATCH.
static auto dispatcher = HWY_DYNAMIC_DISPATCH(SuperAbsolute);
return dispatcher(ip1, op1, n);
}
}

}
#endif


3 changes: 3 additions & 0 deletions numpy/core/src/umath/loops_autovec.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,16 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_invert)
* #TYPE = BYTE, SHORT, INT, LONG, LONGLONG#
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
* #c = ,,,l,ll#
* #abs = 1,1,0,1,1#
*/

#if @abs@
NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_absolute)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
UNARY_LOOP_FAST(@type@, @type@, *out = (in >= 0) ? in : -in);
}
#endif

NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_sign)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
Expand Down
0