8000 [demo] how-to replacing numpy custom generation engine by raw C++ · numpy/numpy@a0a48ce · GitHub
[go: up one dir, main page]

Skip to content

Commit a0a48ce

Browse files
[demo] how-to replacing numpy custom generation engine by raw C++
This is just a technical prototype to measure and discuss the impact and implication of moving to C++ for kernel code generation.
1 parent 04216da commit a0a48ce

File tree

6 files changed

+198
-140
lines changed

6 files changed

+198
-140
lines changed

numpy/core/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -926,8 +926,8 @@ def generate_umath_c(ext, build_dir):
926926
join('src', 'umath', 'loops_exponent_log.dispatch.c.src'),
927927
join('src', 'umath', 'matmul.h.src'),
928928
join('src', 'umath', 'matmul.c.src'),
929-
join('src', 'umath', 'clip.h.src'),
930-
join('src', 'umath', 'clip.c.src'),
929+
join('src', 'umath', 'clip.h'),
930+
join('src', 'umath', 'clip.cpp'),
931931
join('src', 'umath', 'dispatching.c'),
932932
join('src', 'umath', 'legacy_array_method.c'),
933933
join('src', 'umath', 'ufunc_object.c'),

numpy/core/src/npymath/npy_math_private.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,17 +507,29 @@ typedef union {
507507
#else /* !_MSC_VER */
508508
typedef union {
509509
npy_cdouble npy_z;
510+
#ifdef __cplusplus
511+
std::complex<double> c99z;
512+
#else
510513
complex double c99_z;
514+
#endif
511515
} __npy_cdouble_to_c99_cast;
512516

513517
typedef union {
514518
npy_cfloat npy_z;
519+
#ifdef __cplusplus
520+
std::complex<float> c99z;
521+
#else
515522
complex float c99_z;
523+
#endif
516524
} __npy_cfloat_to_c99_cast;
517525

518526
typedef union {
519527
npy_clongdouble npy_z;
520-
complex long double c99_z;
528+
#ifdef __cplusplus
529+
std::complex<long double> c99_z;
530+
#else
531+
complex float c99_z;
532+
#endif
521533
} __npy_clongdouble_to_c99_cast;
522534
#endif /* !_MSC_VER */
523535

numpy/core/src/umath/clip.c.src

Lines changed: 0 additions & 119 deletions
This file was deleted.

numpy/core/src/umath/clip.cpp

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/**
2+
* This module provides the inner loops for the clip ufunc
3+
*/
4+
#define _UMATHMODULE
5+
#define _MULTIARRAYMODULE
6+
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
7+
8+
#include "Python.h"
9+
10+
#include "numpy/halffloat.h"
11+
#include "numpy/npy_math.h"
12+
#include "numpy/ndarraytypes.h"
13+
#include "numpy/npy_common.h"
14+
#include "numpy/utils.h"
15+
#include "fast_loop_macros.h"
16+
17+
namespace npy {
18+
19+
struct integral_tag {};
20+
struct floating_point_tag {};
21+
struct complex_tag {};
22+
struct date_tag {};
23+
24+
struct bool_tag : integral_tag { using type = npy_bool; };
25+
struct byte_tag : integral_tag {using type = npy_byte; } ;
26+
struct ubyte_tag : integral_tag {using type = npy_ubyte; } ;
27+
struct short_tag : integral_tag {using type = npy_short; } ;
28+
struct ushort_tag : integral_tag {using type = npy_ushort; } ;
29+
struct int_tag : integral_tag {using type = npy_int; } ;
30+
struct uint_tag : integral_tag {using type = npy_uint; } ;
31+
struct long_tag : integral_tag {using type = npy_long ; } ;
32+
struct ulong_tag : integral_tag {using type = npy_ulong ; } ;
33+
struct longlong_tag : integral_tag {using type = npy_longlong ; } ;
34+
struct ulonglong_tag : integral_tag {using type = npy_ulonglong ; } ;
35+
struct half_tag {using type = npy_half ; } ;
36+
struct float_tag : floating_point_tag {using type = npy_float ; } ;
37+
struct double_tag : floating_point_tag {using type = npy_double ; } ;
38+
struct longdouble_tag : floating_point_tag {using type = npy_longdouble ; } ;
39+
struct cfloat_tag : complex_tag {using type = npy_cfloat ; } ;
40+
struct cdouble_tag : complex_tag {using type = npy_cdouble ; } ;
41+
struct clongdouble_tag : complex_tag {using type = npy_clongdouble ; } ;
42+
struct datetime_tag : date_tag {using type = npy_datetime ; } ;
43+
struct timedelta_tag : date_tag {using type = npy_timedelta ; } ;
44+
45+
}
46+
47+
template<class T>
48+
T _NPY_MIN(T a, T b, npy::integral_tag const &) { return PyArray_MIN(a, b); }
49+
template<class T>
50+
T _NPY_MAX(T a, T b, npy::integral_tag const &) { return PyArray_MAX(a, b); }
51+
52+
template<class T>
53+
T _NPY_MIN(T a, T b, npy::half_tag const &) { return npy_half_isnan(a) || npy_half_le(a, b) ? (a) : (b); }
54+
template<class T>
55+
T _NPY_MAX(T a, T b, npy::half_tag const &) { return npy_half_isnan(a) || npy_half_ge(a, b) ? (a) : (b); }
56+
57+
template<class T>
58+
T _NPY_MIN(T a, T b, npy::floating_point_tag const &) { return npy_isnan(a) ? (a) : PyArray_MIN(a, b); }
59+
template<class T>
60+
T _NPY_MAX(T a, T b, npy::floating_point_tag const &) { return npy_isnan(a) ? (a) : PyArray_MAX(a, b); }
61+
62+
template<class T>
63+
T _NPY_MIN(T a, T b, npy::complex_tag const &) { return npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CLT(a, b) ? (a) : (b); }
64+
template<class T>
65+
T _NPY_MAX(T a, T b, npy::complex_tag const &) { return npy_isnan((a).real) 10000 || npy_isnan((a).imag) || PyArray_CGT(a, b) ? (a) : (b); }
66+
67+
template<class T>
68+
T _NPY_MIN(T a, T b, npy::date_tag const &) {
69+
return (a) == NPY_DATETIME_NAT
70+
? (a)
71+
: (b) == NPY_DATETIME_NAT ? (b) : (a) < (b) ? (a) : (b);
72+
}
73+
template<class T>
74+
T _NPY_MAX(T a, T b, npy::date_tag const &) {
75+
return (a) == NPY_DATETIME_NAT
76+
? (a)
77+
: (b) == NPY_DATETIME_NAT ? (b) : (a) > (b) ? (a) : (b);
78+
}
79+
80+
/* generic dispatcher */
81+
template<class Tag, class T=typename Tag::type>
82+
T _NPY_MIN(T const& a, T const& b) {
83+
return _NPY_MIN(a, b, Tag{});
84+
}
85+
template<class Tag, class T=typename Tag::type>
86+
T _NPY_MAX(T const& a, T const& b) {
87+
return _NPY_MAX(a, b, Tag{});
88+
}
89+
90+
91+
template<class Tag, class T>
92+
auto _NPY_CLIP(T x, T min, T max) {
93+
return _NPY_MIN<Tag>(_NPY_MAX<Tag>((x), (min)), (max));
94+
}
95+
96+
template<class Tag>
97+
static void
98+
_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
99+
{
100+
using T = typename Tag::type;
101+
if (steps[1] == 0 && steps[2] == 0) {
102+
/* min and max are constant throughout the loop, the most common case */
103+
/* NOTE: it may be possible to optimize these checks for nan */
104+
T min_val = *(T *)args[1];
105+
T max_val = *(T *)args[2];
106+
107+
char *ip1 = args[0], *op1 = args[3];
108+
npy_intp is1 = steps[0], os1 = steps[3];
109+
npy_intp n = dimensions[0];
110+
111+
/* contiguous, branch to let the compiler optimize */
112+
if (is1 == sizeof(T) && os1 == sizeof(T)) {
113+
for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
114+
*(T *)op1 = _NPY_CLIP<Tag>(*(T *)ip1, min_val, max_val);
115+
}
116+
}
117+
else {
118+
for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
119+
*(T *)op1 = _NPY_CLIP<Tag>(*(T *)ip1, min_val, max_val);
120+
}
121+
}
122+
}
123+
else {
124+
TERNARY_LOOP {
125+
*(T *)op1 = _NPY_CLIP<Tag>(*(T *)ip1, *(T *)ip2, *(T *)ip3);
126+
}
127+
}
128+
npy_clear_floatstatus_barrier((char*)dimensions);
129+
}
130+
131+
extern "C" {
132+
NPY_NO_EXPORT void BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::bool_tag>(args, dimensions, steps); }
133+
NPY_NO_EXPORT void BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::byte_tag>(args, dimensions, steps); }
134+
NPY_NO_EXPORT void UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::ubyte_tag>(args, dimensions, steps); }
135+
NPY_NO_EXPORT void SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::short_tag>(args, dimensions, steps); }
136+
NPY_NO_EXPORT void USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::ushort_tag>(args, dimensions, steps); }
137+
NPY_NO_EXPORT void INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::int_tag>(args, dimensions, steps); }
138+
NPY_NO_EXPORT void UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::uint_tag>(args, dimensions, steps); }
139+
NPY_NO_EXPORT void LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::long_tag>(args, dimensions, steps); }
140+
NPY_NO_EXPORT void ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::ulong_tag>(args, dimensions, steps); }
141+
NPY_NO_EXPORT void LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::longlong_tag>(args, dimensions, steps); }
142+
NPY_NO_EXPORT void ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::ulonglong_tag>(args, dimensions, steps); }
143+
NPY_NO_EXPORT void HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::half_tag>(args, dimensions, steps); }
144+
NPY_NO_EXPORT void FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::float_tag>(args, dimensions, steps); }
145+
NPY_NO_EXPORT void DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::double_tag>(args, dimensions, steps); }
146+
NPY_NO_EXPORT void LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::longdouble_tag>(args, dimensions, steps); }
147+
NPY_NO_EXPORT void CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::cfloat_tag>(args, dimensions, steps); }
148+
NPY_NO_EXPORT void CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::cdouble_tag>(args, dimensions, steps); }
149+
NPY_NO_EXPORT void CLONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::clongdouble_tag>(args, dimensions, steps); }
150+
NPY_NO_EXPORT void DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::datetime_tag>(args, dimensions, steps); }
151+
NPY_NO_EXPORT void TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { return _npy_clip<npy::timedelta_tag>(args, dimensions, steps); }
152+
}

numpy/core/src/umath/clip.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef _NPY_UMATH_CLIP_H_
2+
#define _NPY_UMATH_CLIP_H_
3+
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
NPY_NO_EXPORT void BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
8+
NPY_NO_EXPORT void BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
9+
NPY_NO_EXPORT void UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
10+
NPY_NO_EXPORT void SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
11+
NPY_NO_EXPORT void USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
12+
NPY_NO_EXPORT void INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
13+
NPY_NO_EXPORT void UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
14+
NPY_NO_EXPORT void LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
15+
NPY_NO_EXPORT void ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
16+
NPY_NO_EXPORT void LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
17+
NPY_NO_EXPORT void ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
18+
NPY_NO_EXPORT void HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
19+
NPY_NO_EXPORT void FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
20+
NPY_NO_EXPORT void DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
21+
NPY_NO_EXPORT void LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
22+
NPY_NO_EXPORT void CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
23+
NPY_NO_EXPORT void CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
24+
NPY_NO_EXPORT void CLONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
25+
NPY_NO_EXPORT void DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
26+
NPY_NO_EXPORT void TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
27+
#ifdef __cplusplus
28+
}
29+
#endif
30+
31+
#endif

numpy/core/src/umath/clip.h.src

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)
0