8000 Convert unary_fp_le to highway. · numpy/numpy@f1877d1 · GitHub
[go: up one dir, main page]

Skip to content

Commit f1877d1

Browse files
committed
Convert unary_fp_le to highway.
1 parent ab01d8f commit f1877d1

File tree

6 files changed

+507
-557
lines changed

6 files changed

+507
-557
lines changed

numpy/_core/meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1021,12 +1021,13 @@ foreach gen_mtargets : [
10211021
],
10221022
[
10231023
'loops_unary_fp_le.dispatch.h',
1024-
src_file.process('src/umath/loops_unary_fp_le.dispatch.c.src'),
1024+
'src/umath/loops_unary_fp_le.dispatch.cpp',
10251025
[
10261026
SSE41, SSE2,
10271027
VSX2,
10281028
ASIMD, NEON,
10291029
LSX,
1030+
RVV,
10301031
]
10311032
],
10321033
[

numpy/_core/src/common/simd/simd.hpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#ifndef NUMPY__CORE_SRC_COMMON_SIMD_SIMD_HPP_
2+
#define NUMPY__CORE_SRC_COMMON_SIMD_SIMD_HPP_
3+
4+
/**
5+
* This header provides a thin wrapper over Google's Highway SIMD library.
6+
*
7+
* The wrapper aims to simplify the SIMD interface of Google's Highway by
8+
* get ride of its class tags and use lane types directly which can be deduced
9+
* from the args in most cases.
10+
*/
11+
/**
12+
* Since `NPY_SIMD` is only limited to NumPy C universal intrinsics,
13+
* `NPY_SIMDX` is defined to indicate the SIMD availability for Google's Highway
14+
* C++ code.
15+
*
16+
* Highway SIMD is only available when optimization is enabled.
17+
* When NPY_DISABLE_OPTIMIZATION is defined, SIMD operations are disabled
18+
* and the code falls back to scalar implementations.
19+
*/
20+
#ifndef NPY_DISABLE_OPTIMIZATION
21+
#include <hwy/highway.h>
22+
23+
/**
24+
* We avoid using Highway scalar operations for the following reasons:
25+
* 1. We already provide kernels for scalar operations, so falling back to
26+
* the NumPy implementation is more appropriate. Compilers can often
27+
* optimize these better since they rely on standard libraries.
28+
* 2. Not all Highway intrinsics are fully supported in scalar mode.
29+
*
30+
* Therefore, we only enable SIMD when the Highway target is not scalar.
31+
*/
32+
#define NPY_SIMDX (HWY_TARGET != HWY_SCALAR)
33+
34+
// Indicates if the SIMD operations are available for float16.
35+
#define NPY_SIMDX_F16 (NPY_SIMDX && HWY_HAVE_FLOAT16)
36+
// Note: Highway requires SIMD extentions with native float32 support, so we don't need
37+
// to check for it.
38+
39+
// Indicates if the SIMD operations are available for float64.
40+
#define NPY_SIMDX_F64 (NPY_SIMDX && HWY_HAVE_FLOAT64)
41+
42+
// Indicates if the SIMD floating operations are natively supports fma.
43+
#define NPY_SIMDX_FMA (NPY_SIMDX && HWY_NATIVE_FMA)
44+
45+
#else
46+
#define NPY_SIMDX 0
47+
#define NPY_SIMDX_F16 0
48+
#define NPY_SIMDX_F64 0
49+
#define NPY_SIMDX_FMA 0
50+
#endif
51+
52+
namespace np {
53+
54+
/// Represents the max SIMD width supported by the platform.
55+
namespace simd {
56+
#if NPY_SIMDX
57+
/// The highway namespace alias.
58+
/// We can not import all the symbols from the HWY_NAMESPACE because it will
59+
/// conflict with the existing symbols in the numpy namespace.
60+
namespace hn = hwy::HWY_NAMESPACE;
61+
// internaly used by the template header
62+
template <typename TLane>
63+
using _Tag = hn::ScalableTag<TLane>;
64+
#endif
65+
#include "simd.inc.hpp"
66+
} // namespace simd
67+
68+
/// Represents the 128-bit SIMD width.
69+
namespace simd128 {
70+
#if NPY_SIMDX
71+
namespace hn = hwy::HWY_NAMESPACE;
72+
template <typename TLane>
73+
using _Tag = hn::Full128<TLane>;
74+
#endif
75+
#include "simd.inc.hpp"
76+
} // namespace simd128
77+
78+
} // namespace np
79+
80+
#endif // NUMPY__CORE_SRC_COMMON_SIMD_SIMD_HPP_
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#ifndef NPY_SIMDX
2+
#error "This is not a standalone header. Include simd.hpp instead."
3+
#define NPY_SIMDX 1 // Prevent editors from graying out the happy branch
4+
#endif
5+
6+
// Using anonymous namespace instead of inline to ensure each translation unit
7+
// gets its own copy of constants based on local compilation flags
8+
namespace {
9+
10+
// NOTE: This file is included by simd.hpp multiple times with different namespaces
11+
// so avoid including any headers here
12+
13+
/**
14+
* Determines whether the specified lane type is supported by the SIMD extension.
15+
* Always defined as false when SIMD is not enabled, so it can be used in SFINAE.
16+
*
17+
* @tparam TLane The lane type to check for support.
18+
*/
19+
template <typename TLane>
20+
constexpr bool kSupportLane = NPY_SIMDX != 0;
21+
22+
#if NPY_SIMDX
23+
// Define lane type support based on Highway capabilities
24+
template <>
25+
constexpr bool kSupportLane<hwy::float16_t> = HWY_HAVE_FLOAT16 != 0;
26+
template <>
27+
constexpr bool kSupportLane<double> = HWY_HAVE_FLOAT64 != 0;
28+
template <>
29+
constexpr bool kSupportLane<long double> =
30+
HWY_HAVE_FLOAT64 != 0 && sizeof(long double) == sizeof(double);
31+
32+
/// Maximum number of lanes supported by the SIMD extension for the specified lane type.
33+
template <typename TLane>
34+
constexpr size_t kMaxLanes = HWY_MAX_LANES_D(_Tag<TLane>);
35+
36+
/// Represents an N-lane vector based on the specified lane type.
37+
/// @tparam TLane The scalar type for each vector lane
38+
template <typename TLane>
39+
using Vec = hn::Vec<_Tag<TLane>>;
40+
41+
/// Represents a mask vector with boolean values or as a bitmask.
42+
/// @tparam TLane The scalar type the mask corresponds to
43+
template <typename TLane>
44+
using Mask = hn::Mask<_Tag<TLane>>;
45+
46+
/// Unaligned load of a vector from memory.
47+
template <typename TLane>
48+
HWY_API Vec<TLane>
49+
LoadU(const TLane *ptr)
50+
{
51+
return hn::LoadU(_Tag<TLane>(), ptr);
52+
}
53+
54+
/// Unaligned store of a vector to memory.
55+
template <typename TLane>
56+
HWY_API void
57+
StoreU(const Vec<TLane> &a, TLane *ptr)
58+
{
59+
hn::StoreU(a, _Tag<TLane>(), ptr);
60+
}
61+
62+
/// Returns the number of vector lanes based on the lane type.
63+
template <typename TLane>
64+
HWY_API HWY_LANES_CONSTEXPR size_t
65+
Lanes(TLane tag = 0)
66+
{
67+
return hn::Lanes(_Tag<TLane>());
68+
}
69+
70+
/// Returns an uninitialized N-lane vector.
71+
template <typename TLane>
72+
HWY_API Vec<TLane>
73+
Undefined(TLane tag = 0)
74+
{
75+
return hn::Undefined(_Tag<TLane>());
76+
}
77+
78+
/// Returns N-lane vector with all lanes equal to zero.
79+
template <typename TLane>
80+
HWY_API Vec<TLane>
81+
Zero(TLane tag = 0)
82+
{
83+
return hn::Zero(_Tag<TLane>());
84+
}
85+
86+
/// Returns N-lane vector with all lanes equal to the given value of type `TLane`.
87+
template <typename TLane>
88+
HWY_API Vec<TLane>
89+
Set(TLane val)
90+
{
91+
return hn::Set(_Tag<TLane>(), val);
92+
}
93+
94+
/// Converts a mask to a vector based on the specified lane type.
95+
template <typename TLane, typename TMask>
96+
HWY_API Vec<TLane>
97+
VecFromMask(const TMask &m)
98+
{
99+
return hn::VecFromMask(_Tag<TLane>(), m);
100+
}
101+
102+
/// Convert (Reinterpret) an N-lane vector to a different type without modifying the
103+
/// underlying data.
104+
template <typename TLaneTo, typename TVec>
105+
HWY_API Vec<TLaneTo>
106+
BitCast(const TVec &v)
107+
{
108+
return hn::BitCast(_Tag<TLaneTo>(), v);
109+
}
110+
111+
// Import common Highway intrinsics
112+
using hn::Abs;
113+
using hn::Add;
114+
using hn::And;
115+
using hn::AndNot;
116+
using hn::Div;
117+
using hn::Eq;
118+
using hn::Ge;
119+
using hn::Gt;
120+
using hn::Le;
121+
using hn::Lt;
122+
using hn::Max;
123+
using hn::Min;
124+
using hn::Mul;
125+
using hn::Or;
126+
using hn::Sqrt;
127+
using hn::Sub;
128+
using hn::Xor;
129+
130+
#endif // NPY_SIMDX
131+
132+
} // namespace anonymous

numpy/_core/src/highway

Submodule highway updated 113 files

0 commit comments

Comments
 (0)
0