8000 Merge branch 'numpy:main' into main · numpy/numpy@f547352 · GitHub
[go: up one dir, main page]

Skip to content

Commit f547352

Browse files
committed
Merge branch 'numpy:main' into main
2 parents 2c17e2a + f249607 commit f547352

26 files changed

+359
-180
lines changed

.github/workflows/linux.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,19 @@ jobs:
5858
MESON_ARGS: "-Dallow-noblas=true -Dcpu-baseline=none -Dcpu-dispatch=none"
5959
strategy:
6060
matrix:
61-
version: ["3.11", "3.12", "3.13", "3.13t"]
61+
version: ["3.11", "3.12", "3.13", "3.14-dev", "3.14t-dev"]
6262
steps:
6363
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
6464
with:
6565
submodules: recursive
6666
fetch-tags: true
6767
persist-credentials: false
68-
- uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86
68+
- uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
6969
with:
7070
python-version: ${{ matrix.version }}
71-
enable-cache: false
72-
- run:
73-
uv pip install --python=${{ matrix.version }} pip
7471
# TODO: remove cython nightly install when cython does a release
7572
- name: Install nightly Cython
76-
if: matrix.version == '3.13t'
73+
if: matrix.version == '3.14t-dev'
7774
run: |
7875
pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
7976
- uses: ./.github/meson_actions

benchmarks/benchmarks/bench_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class CountNonzero(Benchmark):
151151
params = [
152152
[1, 2, 3],
153153
[100, 10000, 1000000],
154-
[bool, np.int8, np.int16, np.int32, np.int64, str, object]
154+
[bool, np.int8, np.int16, np.int32, np.int64, np.float32, np.float64, str, object]
155155
]
156156

157157
def setup(self, numaxes, size, dtype):

numpy/_core/code_generators/generate_umath.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -776,55 +776,55 @@ def english_upper(s):
776776
Ufunc(1, 1, None,
777777
docstrings.get('numpy._core.umath.arccos'),
778778
None,
779-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
779+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
780780
TD(inexact, f='acos', astype={'e': 'f'}),
781781
TD(P, f='arccos'),
782782
),
783783
'arccosh':
784784
Ufunc(1, 1, None,
785785
docstrings.get('numpy._core.umath.arccosh'),
786786
None,
787-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
787+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
788788
TD(inexact, f='acosh', astype={'e': 'f'}),
789789
TD(P, f='arccosh'),
790790
),
791791
'arcsin':
792792
Ufunc(1, 1, None,
793793
docstrings.get('numpy._core.umath.arcsin'),
794794
None,
795-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
795+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
796796
TD(inexact, f='asin', astype={'e': 'f'}),
797797
TD(P, f='arcsin'),
798798
),
799799
'arcsinh':
800800
Ufunc(1, 1, None,
801801
docstrings.get('numpy._core.umath.arcsinh'),
802802
None,
803-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
803+
TD('efd', dispatch< D7AE span class="pl-c1">=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
804804
TD(inexact, f='asinh', astype={'e': 'f'}),
805805
TD(P, f='arcsinh'),
806806
),
807807
'arctan':
808808
Ufunc(1, 1, None,
809809
docstrings.get('numpy._core.umath.arctan'),
810810
None,
811-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
811+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
812812
TD(inexact, f='atan', astype={'e': 'f'}),
813813
TD(P, f='arctan'),
814814
),
815815
'arctanh':
816816
Ufunc(1, 1, None,
817817
docstrings.get('numpy._core.umath.arctanh'),
818818
None,
819-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
819+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
820820
TD(inexact, f='atanh', astype={'e': 'f'}),
821821
TD(P, f='arctanh'),
822822
),
823823
'cos':
824824
Ufunc(1, 1, None,
825825
docstrings.get('numpy._core.umath.cos'),
826826
None,
827-
TD('e', dispatch=[('loops_umath_fp', 'e')]),
827+
TD('e', dispatch=[('loops_half', 'e')]),
828828
TD('f', dispatch=[('loops_trigonometric', 'f')]),
829829
TD('d', dispatch=[('loops_trigonometric', 'd')]),
830830
TD('g' + cmplx, f='cos'),
@@ -834,7 +834,7 @@ def english_upper(s):
834834
Ufunc(1, 1, None,
835835
docstrings.get('numpy._core.umath.sin'),
836836
None,
837-
TD('e', dispatch=[('loops_umath_fp', 'e')]),
837+
TD('e', dispatch=[('loops_half', 'e')]),
838838
TD('f', dispatch=[('loops_trigonometric', 'f')]),
839839
TD('d', dispatch=[('loops_trigonometric', 'd')]),
840840
TD('g' + cmplx, f='sin'),
@@ -844,31 +844,31 @@ def english_upper(s):
844844
Ufunc(1, 1, None,
845845
docstrings.get('numpy._core.umath.tan'),
846846
None,
847-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
847+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
848848
TD(inexact, f='tan', astype={'e': 'f'}),
849849
TD(P, f< F438 /span>='tan'),
850850
),
851851
'cosh':
852852
Ufunc(1, 1, None,
853853
docstrings.get('numpy._core.umath.cosh'),
854854
None,
855-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
855+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
856856
TD(inexact, f='cosh', astype={'e': 'f'}),
857857
TD(P, f='cosh'),
858858
),
859859
'sinh':
860860
Ufunc(1, 1, None,
861861
docstrings.get('numpy._core.umath.sinh'),
862862
None,
863-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
863+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
864864
TD(inexact, f='sinh', astype={'e': 'f'}),
865865
TD(P, f='sinh'),
866866
),
867867
'tanh':
868868
Ufunc(1, 1, None,
869869
docstrings.get('numpy._core.umath.tanh'),
870870
None,
871-
TD('e', dispatch=[('loops_umath_fp', 'e')]),
871+
TD('e', dispatch=[('loops_half', 'e')]),
872872
TD('fd', dispatch=[('loops_hyperbolic', 'fd')]),
873873
TD(inexact, f='tanh', astype={'e': 'f'}),
874874
TD(P, f='tanh'),
@@ -877,7 +877,7 @@ def english_upper(s):
877877
Ufunc(1, 1, None,
878878
docstrings.get('numpy._core.umath.exp'),
879879
None,
880-
TD('e', dispatch=[('loops_umath_fp', 'e')]),
880+
TD('e', dispatch=[('loops_half', 'e')]),
881881
TD('fd', dispatch=[('loops_exponent_log', 'fd')]),
882882
TD('fdg' + cmplx, f='exp'),
883883
TD(P, f='exp'),
@@ -886,23 +886,23 @@ def english_upper(s):
886886
Ufunc(1, 1, None,
887887
docstrings.get('numpy._core.umath.exp2'),
888888
None,
889-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
889+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
890890
TD(inexact, f='exp2', astype={'e': 'f'}),
891891
TD(P, f='exp2'),
892892
),
893893
'expm1':
894894
Ufunc(1, 1, None,
895895
docstrings.get('numpy._core.umath.expm1'),
896896
None,
897-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
897+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
898898
TD(inexact, f='expm1', astype={'e': 'f'}),
899899
TD(P, f='expm1'),
900900
),
901901
'log':
902902
Ufunc(1, 1, None,
903903
docstrings.get('numpy._core.umath.log'),
904904
None,
905-
TD('e', dispatch=[('loops_umath_fp', 'e')]),
905+
TD('e', dispatch=[('loops_half', 'e')]),
906906
TD('fd', dispatch=[('loops_exponent_log', 'fd')]),
907907
TD('fdg' + cmplx, f='log'),
908908
TD(P, f='log'),
@@ -911,23 +911,23 @@ def english_upper(s):
911911
Ufunc(1, 1, None,
912912
docstrings.get('numpy._core.umath.log2'),
913913
None,
914-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
914+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
915915
TD(inexact, f='log2', astype={'e': 'f'}),
916916
TD(P, f='log2'),
917917
),
918918
'log10':
919919
Ufunc(1, 1, None,
920920
docstrings.get('numpy._core.umath.log10'),
921921
None,
922-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
922+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
923923
TD(inexact, f='log10', astype={'e': 'f'}),
924924
TD(P, f='log10'),
925925
),
926926
'log1p':
927927
Ufunc(1, 1, None,
928928
docstrings.get('numpy._core.umath.log1p'),
929929
None,
930-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
930+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
931931
TD(inexact, f='log1p', astype={'e': 'f'}),
932932
TD(P, f='log1p'),
933933
),
@@ -944,7 +944,7 @@ def english_upper(s):
944944
Ufunc(1, 1, None,
945945
docstrings.get('numpy._core.umath.cbrt'),
946946
None,
947-
TD('efd', dispatch=[('loops_umath_fp', 'efd')]),
947+
TD('efd', dispatch=[('loops_umath_fp', 'fd'), ('loops_half', 'e')]),
948948
TD(flts, f='cbrt', astype={'e': 'f'}),
949949
TD(P, f='cbrt'),
950950
),

numpy/_core/meson.build

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -343,14 +343,26 @@ endif
343343
optional_function_attributes = [
344344
['optimize("unroll-loops")', 'OPTIMIZE_UNROLL_LOOPS'],
345345
['optimize("O3")', 'OPTIMIZE_OPT_3'],
346-
['optimize("O2")', 'OPTIMIZE_OPT_2'],
347-
['optimize("nonnull (1)")', 'NONNULL'],
346+
['nonnull(1)', 'NONNULL'],
348347
]
349-
#foreach attr: optional_function_attributes
350-
# if cc.has_function_attribute(attr[0])
351-
# cdata.set10('HAVE_ATTRIBUTE_' + attr[1], true)
352-
# endif
353-
#endforeach
348+
if get_option('disable-optimization') == false
349+
foreach attr: optional_function_attributes
350+
test_code = '''
351+
__attribute__((@0@)) void test_function(void *ptr) {
352+
(void*)ptr;
353+
return;
354+
}
355+
int main(void) {
356+
int dummy = 0;
357+
test_function(&dummy);
358+
return 0;
359+
}
360+
'''.format(attr[0])
361+
if cc.compiles(test_code, name: '__attribute__((' + attr[0] + '))', args: ['-Werror', '-Wattributes'])
362+
cdata.set10('HAVE_ATTRIBUTE_' + attr[1], true)
363+
endif
364+
endforeach
365+
endif
354366

355367
# Max possible optimization flags. We pass this flags to all our dispatch-able
356368
# (multi_targets) sources.
@@ -1039,6 +1051,11 @@ foreach gen_mtargets : [
10391051
LSX,
10401052
]
10411053
],
1054+
[
1055+
'loops_half.dispatch.h',
1056+
src_file.process('src/umath/loops_half.dispatch.c.src'),
1057+
[AVX512_SPR, AVX512_SKX]
1058+
],
10421059
]
10431060
mtargets = mod_features.multi_targets(
10441061
gen_mtargets[0], umath_gen_headers + gen_mtargets[1],

numpy/_core/src/multiarray/arraytypes.c.src

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2534,6 +2534,42 @@ static npy_bool
25342534
}
25352535
/**end repeat**/
25362536

2537+
/**begin repeat
2538+
*
2539+
* #name = BOOL, BYTE, UBYTE, USHORT, SHORT, UINT, INT, ULONG, LONG, FLOAT, DOUBLE#
2540+
* #type = npy_bool, npy_byte, npy_byte, npy_uint16, npy_int16, npy_uint32, npy_int32, npy_uint64, npy_int64, npy_float, npy_double#
2541+
* #nonzero = _NONZERO*11#
2542+
*/
2543+
static npy_intp
2544+
count_nonzero_trivial_@name@(npy_intp count, const char *data, npy_int stride)
2545+
{
2546+
npy_intp nonzero_count = 0;
2547+
while (count--) {
2548+
@type@ *ptmp = (@type@ *)data;
2549+
nonzero_count += (npy_bool) @nonzero@(*ptmp);
2550+
data += stride;
2551+
}
2552+
return nonzero_count;
2553+
}
2554+
/**end repeat**/
2555+
2556+
NPY_NO_EXPORT npy_intp
2557+
count_nonzero_trivial_dispatcher(npy_intp count, const char* data, npy_intp stride, int dtype_num) {
2558+
switch(dtype_num) {
2559+
/**begin repeat
2560+
*
2561+
* #dtypeID = NPY_BOOL, NPY_UINT8, NPY_INT8, NPY_UINT16, NPY_INT16, NPY_UINT32, NPY_INT32, NPY_UINT64, NPY_INT64, NPY_FLOAT32, NPY_FLOAT64#
2562+
* #name = BOOL, BYTE, UBYTE, USHORT, SHORT, UINT, INT, ULONG, LONG, FLOAT, DOUBLE#
2563+
*/
2564+
case @dtypeID@:
2565+
{
2566+
return count_nonzero_trivial_@name@(count, data, stride);
2567+
}
2568+
/**end repeat**/
2569+
}
2570+
return -1;
2571+
}
2572+
25372573
/**begin repeat
25382574
*
25392575
* #fname = CFLOAT, CDOUBLE, CLONGDOUBLE#

numpy/_core/src/multiarray/arraytypes.h.src

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT int BOOL_argmax,
164164
#undef INT_not_size_named
165165
#undef LONGLONG_not_size_named
166166

167+
NPY_NO_EXPORT npy_intp
168+
count_nonzero_trivial_dispatcher(npy_intp count, const char* data, npy_intp stride, int dtype_num);
169+
167170
#endif /* NUMPY_CORE_SRC_MULTIARRAY_ARRAYTYPES_H_ */

numpy/_core/src/multiarray/item_selection.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ PyArray_PutTo(PyArrayObject *self, PyObject* values0, PyObject *indices0,
398398
}
399399
ni = PyArray_SIZE(indices);
400400
if ((ni > 0) && (PyArray_Size((PyObject *)self) == 0)) {
401-
PyErr_SetString(PyExc_IndexError,
401+
PyErr_SetString(PyExc_IndexError,
402402
"cannot replace elements of an empty array");
403403
goto fail;
404404
}
@@ -2629,7 +2629,7 @@ count_nonzero_u64(const char *data, npy_intp bstride, npy_uintp len)
26292629
return count;
26302630
}
26312631
/*
2632-
* Counts the number of True values in a raw boolean array. This
2632+
* Counts the number of non-zero values in a raw int array. This
26332633
* is a low-overhead function which does no heap allocations.
26342634
*
26352635
* Returns -1 on error.
@@ -2739,6 +2739,15 @@ PyArray_CountNonzero(PyArrayObject *self)
27392739
}
27402740
}
27412741
else {
2742+
/* Special low-overhead version specific to the float types (and some others) */
2743+
if (PyArray_ISNOTSWAPPED(self) && PyArray_ISALIGNED(self)) {
2744+
npy_intp dispatched_nonzero_count = count_nonzero_trivial_dispatcher(count,
2745+
data, stride, dtype->type_num);
2746+
if (dispatched_nonzero_count >= 0) {
2747+
return dispatched_nonzero_count;
2748+
}
2749+
}
2750+
27422751
NPY_BEGIN_THREADS_THRESHOLDED(count);
27432752
while (count--) {
27442753
if (nonzero(data, self)) {

numpy/_core/src/multiarray/lowlevel_strided_loops.c.src

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
708708

709709
/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/
710710

711-
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) || defined(NPY_HAVE_F16C)
711+
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || defined(NPY_HAVE_F16C)
712712
#define EMULATED_FP16 0
713713
#define NATIVE_FP16 1
714714
typedef _Float16 _npy_half;

0 commit comments

Comments
 (0)
0