8000 MAINT,ENH: Reorganize buffered iteration setup by seberg · Pull Request #27883 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

MAINT,ENH: Reorganize buffered iteration setup #27883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8d69f05
MAINT: Refactor buffered loop to do an initial setup
seberg Nov 28, 2024
7d19f2d
TST: Fixup tests to pass (for now not correct)
seberg Nov 30, 2024
809cf0c
BUG: Avoid a coresize of 0
seberg Dec 2, 2024
9302b30
TST: The nullpointer doesn't print uniformly accross platforms
seberg Dec 2, 2024
54d967f
BUG: Add missing pointer dereference
seberg Dec 2, 2024
87ad5a2
BUG: Fix coreoffset mechanism and unset buf-reuse on different (parti…
seberg Dec 2, 2024
67cddf7
TST: Forgot one more nditer debug_print() fixup
seberg Dec 2, 2024
facd237
Apply suggestions from code review
seberg Dec 2, 2024
22f335c
MAINT: Remove bufnever logic from allocate-arrays (we do it later any…
seberg Dec 2, 2024
ac282fc
MAINT: Rename to BUF_SINGLESTRIDE and use the new gap in the flags
seberg Dec 2, 2024
6ed3b99
MAINT: Remove unnecessary fixed-stride logic
seberg Dec 3, 2024
162a951
MAINT: Fill in correct buffer transferstride always
seberg Dec 3, 2024
0cc37b9
MAINT: Don't use static zero (initialize by caller instead)
seberg Dec 3, 2024
786e546
DOC: Improve the function documentation somewhat.
seberg Dec 3, 2024
d09be24
BUG: Fix typo in new assert
seberg Dec 3, 2024
92ecbaf
MAINT: Reinstante the CONTIG flag (as buggy as it was)
seberg Dec 5, 2024
5bad357
remove debug print (was this part of the sanitizer problem?!)
seberg Dec 5, 2024
67cb5c5
TST: See if deleting a, b makes santizer tests pass
seberg Dec 6, 2024
5508cc5
MAINT: Move buffersize init (new code always limits it to itersize)
seberg Dec 10, 2024
be6749d
DOC: Explain buffer-setup cost more (maybe should rethink it...)
seberg Dec 12, 2024
426934d
DOC: Expand/change iteration explanation based on Marten's review
seberg Dec 15, 2024
1836fd1
MAINT: More review comments (mostly code clarifications/simplifications)
seberg Dec 15, 2024
e758983
MAINT: Smaller changes based on review
seberg Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
MAINT: Reinstante the CONTIG flag (as buggy as it was)
  • Loading branch information
seberg committed Dec 5, 2024
commit 92ecbaf29b2854a95108f6e8f7ceb627471cacf0
15 changes: 11 additions & 4 deletions numpy/_core/src/multiarray/nditer_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,8 @@ NpyIter_DebugPrint(NpyIter *iter)
printf("WRITEMASKED ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUF_SINGLESTRIDE)
printf("BUF_SINGLESTRIDE ");
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CONTIG)
printf("CONTIG ");
printf("\n");
}
printf("|\n");
Expand All @@ -1532,9 +1534,9 @@ NpyIter_DebugPrint(NpyIter *iter)
if (itflags&NPY_ITFLAG_REDUCE) {
printf("| REDUCE Pos: %d\n",
(int)NBF_REDUCE_POS(bufferdata));
printf("| BUFFER Reduce outersize: %d\n",
printf("| REDUCE OuterSize: %d\n",
(int)NBF_REDUCE_OUTERSIZE(bufferdata));
printf("| BUFFER OuterDim: %d\n",
printf("| REDUCE OuterDim: %d\n",
(int)NBF_OUTERDIM(bufferdata));
}
printf("| Strides: ");
Expand Down Expand Up @@ -1894,12 +1896,17 @@ npyiter_fill_buffercopy_params(
}

if (opitflags & NPY_OP_ITFLAG_BUF_SINGLESTRIDE) {
/* Flatten the copy into a single stride. */
*ndim_transfer = 1;
*op_shape = op_transfersize;
assert(**op_coords == 0); /* initialized by caller currently */
*op_strides = &NAD_STRIDES(axisdata)[iop];
if ((*op_strides)[0] == 0) {
if ((*op_strides)[0] == 0 && (
!(opitflags & NPY_OP_ITFLAG_CONTIG) ||
(opitflags & NPY_OP_ITFLAG_WRITE))) {
/*
* If the user didn't force contig, optimize single element.
* (Unless CONTIG was requested and this is not a write/reduce!)
*/
*op_transfersize = 1;
*buf_stride = 0;
}
Expand Down
26 changes: 22 additions & 4 deletions numpy/_core/src/multiarray/nditer_constr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,10 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags)
*op_itflags |= NPY_OP_ITFLAG_VIRTUAL;
}

if (op_flags & NPY_ITER_CONTIG) {
*op_itflags |= NPY_OP_ITFLAG_CONTIG;
}

return 1;
}

Expand Down Expand Up @@ -2175,6 +2179,11 @@ npyiter_find_buffering_setup(NpyIter *iter)
npy_bool is_reduce_op;
npy_bool op_is_buffered = (op_itflags[iop]&NPY_OP_ITFLAG_CAST) != 0;

/* If contig was requested and this is not writeable avoid zero strides */
npy_bool avoid_zero_strides = (
(op_itflags[iop] & NPY_OP_ITFLAG_CONTIG) != 0
&& !(op_itflags[iop] & NPY_OP_ITFLAG_WRITE));

/*
* Figure out if this is iterated as a reduce op. Even one marked
* for reduction may not be iterated as one.
Expand All @@ -2189,16 +2198,19 @@ npyiter_find_buffering_setup(NpyIter *iter)
else if (op_single_stride_dims[iop] == best_dim && !op_is_buffered) {
/*
* Optimization: This operand is not buffered and we might as well
* iterate it as an unbuffered reduce operand (if not yet buffered).
* iterate it as an unbuffered reduce operand.
*/
is_reduce_op = 1;
}
else if (NAD_STRIDES(reduce_axisdata)[iop] == 0
&& op_single_stride_dims[iop] <= best_dim) {
&& op_single_stride_dims[iop] <= best_dim
&& !avoid_zero_strides) {
/*
* Optimization: If the outer (reduce) stride is 0 on the operand
* then we can iterate this in a reduce way: buffer the core only
* and repeat it in the "outer" dimension.
* If user requested contig, we may have to avoid 0 strides, this
* is incompatible with the reduce path.
*/
is_reduce_op = 1;
}
Expand Down Expand Up @@ -2229,7 +2241,8 @@ npyiter_find_buffering_setup(NpyIter *iter)
*/
if (!is_reduce_op
&& NIT_OPITFLAGS(iter)[iop] & NPY_OP_ITFLAG_BUF_SINGLESTRIDE
&& NAD_STRIDES(axisdata)[iop] == 0) {
&& NAD_STRIDES(axisdata)[iop] == 0
&& !avoid_zero_strides) {
/* This op is always 0 strides, so even the buffer is that. */
inner_stride = 0;
reduce_outer_stride = 0;
Expand Down Expand Up @@ -3310,11 +3323,16 @@ npyiter_allocate_arrays(NpyIter *iter,
}

/* Here we can finally check for contiguous iteration */
if (op_flags[iop] & NPY_ITER_CONTIG) {
if (op_itflags[iop] & NPY_OP_ITFLAG_CONTIG) {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
npy_intp stride = NAD_STRIDES(axisdata)[iop];

if (stride != op_dtype[iop]->elsize) {
/*
* Need to copy to buffer (cast) to ensure contiguous
* NOTE: This is the wrong place in case of axes reorder
* (there is an xfailing test for this).
*/
NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
"because of NPY_ITER_CONTIG\n");
op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
Expand Down
2 changes: 2 additions & 0 deletions numpy/_core/src/multiarray/nditer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@
#define NPY_OP_ITFLAG_FORCECOPY 0x0200
/* The operand has temporary data, write it back at dealloc */
#define NPY_OP_ITFLAG_HAS_WRITEBACK 0x0400
/* Whether the user request a contiguous operand */
#define NPY_OP_ITFLAG_CONTIG 0x0800

/*
* The data layout of the iterator is fully specified by
Expand Down
77 changes: 75 additions & 2 deletions numpy/_core/tests/test_nditer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2320,6 +2320,79 @@ def test_iter_buffering_growinner():
assert_equal(i[0].size, a.size)


@pytest.mark.parametrize("read_or_readwrite", ["readonly", "readwrite"])
def test_iter_contig_flag_reduce_error(read_or_readwrite):
# Test that a non-contiguous operand is rejected without buffering.
# NOTE: This is true even for a reduction, where we return a 0-stride
# below!
with pytest.raises(TypeError, match="Iterator operand required buffering"):
it = np.nditer(
(np.zeros(()),), flags=["external_loop", "reduce_ok"],
op_flags=[(read_or_readwrite, "contig"),], itershape=(10,))


@pytest.mark.parametrize("arr", [
lambda: np.zeros(()),
lambda: np.zeros((20, 1))[::20],
lambda: np.zeros((1, 20))[:, ::20]
])
def test_iter_contig_flag_single_operand_strides(arr):
"""
Tests the strides with the contig flag for both broadcast and non-broadcast
operands in 3 cases where the logic is needed:
1. When everything has a zero stride, the broadcast op needs to repeated
2. When the reduce axis is the last axis (first to iterate).
3. When the reduce axis is the first axis (last to iterate).

NOTE: The semantics of the cast flag are not clearly defined when
it comes to reduction. It is unclear that there are any users.
"""
first_op = np.ones((10, 10))
broadcast_op = arr()
red_op = arr()
# Add a first operand to ensure no axis-reordering and the result shape.
iterator = np.nditer(
(first_op, broadcast_op, red_op),
flags=["external_loop", "reduce_ok", "buffered", "delay_bufalloc"],
op_flags=[("readonly", "contig")] * 2 + [("readwrite", "contig")])

with iterator:
iterator.reset()
try:
for f, b, r in iterator:
# The first operand is contigouos, we should have a view
assert np.shares_memory(f, first_op)
# Although broadcast, the second op always has a contiguous stride
assert b.strides[0] == 8
assert not np.shares_memory(b, broadcast_op)
# The reduction has a contiguous stride or a 0 stride
if red_op.ndim == 0 or red_op.shape[-1] == 1:
assert r.strides[0] == 0
else:
# The stride is 8, although it was not originally:
assert r.strides[0] == 8
# If the reduce stride is 0, buffering makes no difference, but we
# do it anyway right now:
assert not np.shares_memory(r, red_op)
finally:
iterator.debug_print()


@pytest.mark.xfail(reason="The contig flag was always buggy.")
def test_iter_contig_flag_incorrect():
# This case does the wrong thing...
iterator = np.nditer(
(np.ones((10, 10)).T, np.ones((1, 10))),
flags=["external_loop", "reduce_ok", "buffered", "delay_bufalloc"],
op_flags=[("readonly", "contig")] * 2)

with iterator:
iterator.reset()
for a, b in iterator:
assert a.strides == 8
assert b.strides == 8 # should be 8 but is 0 due to axis reorder


@pytest.mark.slow
def test_iter_buffered_reduce_reuse():
# large enough array for all views, including negative strides.
Expand Down Expand Up @@ -3324,8 +3397,8 @@ def test_debug_print(capfd):
| BufIterEnd: 5
| BUFFER CoreSize: 5
| REDUCE Pos: 0
| BUFFER Reduce outersize: 10
| BUFFER OuterDim: 1
| REDUCE OuterSize: 10
| REDUCE OuterDim: 1
| Strides: 8 4
| Ptrs:
| REDUCE Outer Strides: 40 0
Expand Down
Loading
0