-
-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Use Highway with Static Dispatch for some types in Absolute #24385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 1 commit
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
085dff1
Highway POC
Mousius 8150c3e
Environment Variables and More Types
Mousius 6bec892
Static dispatch example
Mousius ec4110c
Working absolute implementation
Mousius b794bf2
Remove dispatcher singleton
Mousius be908f1
Fix features
Mousius 84695e0
Remove leftovers, align targets
Mousius File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next
Next commit
Highway POC
- Loading branch information
commit 085dff14178c2601ee07db7eafb7da204586c12b
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#define _UMATHMODULE | ||
#define _MULTIARRAYMODULE | ||
#define NPY_NO_DEPRECATED_API NPY_API_VERSION | ||
|
||
#define PY_SSIZE_T_CLEAN | ||
#include <Python.h> | ||
|
||
#undef HWY_TARGET_INCLUDE | ||
#define HWY_TARGET_INCLUDE "absolute.cpp" // this file | ||
#include <hwy/foreach_target.h> // must come before highway.h | ||
#include <hwy/highway.h> | ||
|
||
#include "numpy/ndarraytypes.h" | ||
#include "numpy/npy_common.h" | ||
#include "numpy/npy_math.h" | ||
#include "numpy/utils.h" | ||
|
||
namespace numpy { | ||
namespace HWY_NAMESPACE { // required: unique per target | ||
|
||
// Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE. | ||
namespace hn = hwy::HWY_NAMESPACE; | ||
using T = npy_int; | ||
|
||
// Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE | ||
HWY_ATTR void SuperAbsolute(const T* HWY_RESTRICT input_array, | ||
T* HWY_RESTRICT output_array, | ||
const size_t size) { | ||
const hn::ScalableTag<T> d; | ||
for (size_t i = 0; i < size; i += hn::Lanes(d)) { | ||
const auto in = hn::Load(d, input_array + i); | ||
auto x = hn::Abs(in); | ||
hn::Store(x, d, output_array + i); | ||
} | ||
} | ||
|
||
} | ||
} | ||
|
||
#if HWY_ONCE | ||
namespace numpy { | ||
|
||
HWY_EXPORT(SuperAbsolute); | ||
|
||
extern "C" { | ||
NPY_NO_EXPORT void | ||
INT_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) | ||
{ | ||
npy_int *ip1 = (npy_int*) args[0]; | ||
npy_int *op1 = (npy_int*) args[1]; | ||
// npy_intp is1 = steps[0]; | ||
// npy_intp os1 = steps[1]; | ||
npy_intp n = dimensions[0]; | ||
// npy_intp i; | ||
// This must reside outside of HWY_NAMESPACE because it references (calls the | ||
// appropriate one from) the per-target implementations there. | ||
// For static dispatch, use HWY_STATIC_DISPATCH. | ||
static auto dispatcher = HWY_DYNAMIC_DISPATCH(SuperAbsolute); | ||
return dispatcher(ip1, op1, n); | ||
} | ||
} | ||
|
||
} | ||
#endif | ||
|
||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Mousius was this useful for local debugging? I'm working on Meson subproject usage elsewhere, so I'm curious about the details here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could have been more useful. I wanted to use the meson integration with cmake to be cleaner, but I ended up just adding the files manually as there seemed to be a lot of steps involved.