@@ -3674,6 +3674,191 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
3674
3674
return res ;
3675
3675
}
3676
3676
3677
+ /*
3678
+ * Private implementation of PyArray_CopyAnyInto with an additional order
3679
+ * parameter.
3680
+ */
3681
+ NPY_NO_EXPORT int
3682
+ HPyArray_CopyAsFlat (HPyContext * ctx ,
3683
+ HPy /* PyArrayObject * */ dst ,
3684
+ HPy /* PyArrayObject * */ src , NPY_ORDER order )
3685
+ {
3686
+ NpyIter * dst_iter , * src_iter ;
3687
+
3688
+ NpyIter_IterNextFunc * dst_iternext , * src_iternext ;
3689
+ char * * dst_dataptr , * * src_dataptr ;
3690
+ npy_intp dst_stride , src_stride ;
3691
+ npy_intp * dst_countptr , * src_countptr ;
3692
+ npy_uint32 baseflags ;
3693
+
3694
+ npy_intp dst_count , src_count , count ;
3695
+ npy_intp dst_size , src_size ;
3696
+ int needs_api ;
3697
+
3698
+ HPY_NPY_BEGIN_THREADS_DEF ;
3699
+
3700
+ PyArrayObject * dst_data = PyArrayObject_AsStruct (ctx , dst );
3701
+ if (HPyArray_FailUnlessWriteableWithStruct (ctx , dst , dst_data , "destination array" ) < 0 ) {
3702
+ return -1 ;
3703
+ }
3704
+
3705
+ /*
3706
+ * If the shapes match and a particular order is forced
3707
+ * for both, use the more efficient CopyInto
3708
+ */
3709
+ PyArrayObject * src_data = PyArrayObject_AsStruct (ctx , src );
3710
+ if (order != NPY_ANYORDER && order != NPY_KEEPORDER &&
3711
+ PyArray_NDIM (dst_data ) == PyArray_NDIM (src_data ) &&
3712
+ PyArray_CompareLists (PyArray_DIMS (dst_data ), PyArray_DIMS (src_data ),
3713
+ PyArray_NDIM (dst_data ))) {
3714
+ return HPyArray_CopyInto (ctx , dst , src );
3715
+ }
3716
+
3717
+ dst_size = HPyArray_SIZE (dst_data );
3718
+ src_size = HPyArray_SIZE (src_data );
3719
+ if (dst_size != src_size ) {
3720
+ // PyErr_Format(PyExc_ValueError,
3721
+ // "cannot copy from array of size %" NPY_INTP_FMT " into an array "
3722
+ // "of size %" NPY_INTP_FMT, src_size, dst_size);
3723
+ HPyErr_SetString (ctx , ctx -> h_ValueError ,
3724
+ "cannot copy from array of size %" NPY_INTP_FMT " into an array "
3725
+ "of size %" NPY_INTP_FMT );
3726
+ return -1 ;
3727
+ }
3728
+
3729
+ /* Zero-sized arrays require nothing be done */
3730
+ if (dst_size == 0 ) {
3731
+ return 0 ;
3732
+ }
3733
+
3734
+ baseflags = NPY_ITER_EXTERNAL_LOOP |
3735
+ NPY_ITER_DONT_NEGATE_STRIDES |
3736
+ NPY_ITER_REFS_OK ;
3737
+
3738
+ /*
3739
+ * This copy is based on matching C-order traversals of src and dst.
3740
+ * By using two iterators, we can find maximal sub-chunks that
3741
+ * can be processed at once.
3742
+ */
3743
+ dst_iter = HNpyIter_New (ctx , dst , NPY_ITER_WRITEONLY | baseflags ,
3744
+ order ,
3745
+ NPY_NO_CASTING ,
3746
+ HPy_NULL );
3747
+ if (dst_iter == NULL ) {
3748
+ return -1 ;
3749
+ }
3750
+ src_iter = HNpyIter_New (ctx , src , NPY_ITER_READONLY | baseflags ,
3751
+ order ,
3752
+ NPY_NO_CASTING ,
3753
+ HPy_NULL );
3754
+ if (src_iter == NULL ) {
3755
+ HNpyIter_Deallocate (ctx , dst_iter );
3756
+ return -1 ;
3757
+ }
3758
+
3759
+ /* Get all the values needed for the inner loop */
3760
+ dst_iternext = HNpyIter_GetIterNext (ctx , dst_iter , NULL );
3761
+ dst_dataptr = NpyIter_GetDataPtrArray (dst_iter );
3762
+ /* Since buffering is disabled, we can cache the stride */
3763
+ dst_stride = NpyIter_GetInnerStrideArray (dst_iter )[0 ];
3764
+ dst_countptr = NpyIter_GetInnerLoopSizePtr (dst_iter );
3765
+
3766
+ src_iternext = HNpyIter_GetIterNext (ctx , src_iter , NULL );
3767
+ src_dataptr = NpyIter_GetDataPtrArray (src_iter );
3768
+ /* Since buffering is disabled, we can cache the stride */
3769
+ src_stride = NpyIter_GetInnerStrideArray (src_iter )[0 ];
3770
+ src_countptr = NpyIter_GetInnerLoopSizePtr (src_iter );
3771
+
3772
+ if (dst_iternext == NULL || src_iternext == NULL ) {
3773
+ HNpyIter_Deallocate (ctx , dst_iter );
3774
+ HNpyIter_Deallocate (ctx , src_iter );
3775
+ return -1 ;
3776
+ }
3777
+
3778
+ needs_api = NpyIter_IterationNeedsAPI (dst_iter ) ||
3779
+ NpyIter_IterationNeedsAPI (src_iter );
3780
+
3781
+ /*
3782
+ * Because buffering is disabled in the iterator, the inner loop
3783
+ * strides will be the same throughout the iteration loop. Thus,
3784
+ * we can pass them to this function to take advantage of
3785
+ * contiguous strides, etc.
3786
+ */
3787
+ NPY_cast_info cast_info ;
3788
+ HPy src_descr = HPyArray_DESCR (ctx , src , src_data );
3789
+ PyArray_Descr * src_descr_data = PyArray_Descr_AsStruct (ctx , src_descr );
3790
+ HPy dst_descr = HPyArray_DESCR (ctx , src , dst_data );
3791
+ PyArray_Descr * dst_descr_data = PyArray_Descr_AsStruct (ctx , dst_descr );
3792
+ if (HPyArray_GetDTypeTransferFunction (ctx ,
3793
+ HIsUintAlignedWithDescr (ctx , src , src_data , src_descr_data ) &&
3794
+ HPyIsAlignedWithDescr (ctx , src , src_data , src_descr_data ) &&
3795
+ HIsUintAlignedWithDescr (ctx , dst , dst_data , dst_descr_data ) &&
3796
+ HPyIsAlignedWithDescr (ctx , dst , dst_data , dst_descr_data ),
3797
+ src_stride , dst_stride ,
3798
+ src_descr , src_descr ,
3799
+ 0 ,
3800
+ & cast_info , & needs_api ) != NPY_SUCCEED ) {
3801
+ HNpyIter_Deallocate (ctx , dst_iter );
3802
+ HNpyIter_Deallocate (ctx , src_iter );
3803
+ return -1 ;
3804
+ }
3805
+
3806
+ if (!needs_api ) {
3807
+ HPY_NPY_BEGIN_THREADS (ctx );
3808
+ }
3809
+
3810
+ dst_count = * dst_countptr ;
3811
+ src_count = * src_countptr ;
3812
+ char * args [2 ] = {src_dataptr [0 ], dst_dataptr [0 ]};
3813
+ npy_intp strides [2 ] = {src_stride , dst_stride };
3814
+
3815
+ int res = 0 ;
3816
+ for (;;) {
3817
+ /* Transfer the biggest amount that fits both */
3818
+ count = (src_count < dst_count ) ? src_count : dst_count ;
3819
+ if (cast_info .func (ctx , & cast_info .context ,
3820
+ args , & count , strides , cast_info .auxdata ) < 0 ) {
3821
+ res = -1 ;
3822
+ break ;
3823
+ }
3824
+
3825
+ /* If we exhausted the dst block, refresh it */
3826
+ if (dst_count == count ) {
3827
+ res = dst_iternext (ctx , dst_iter );
3828
+ if (res == 0 ) {
3829
+ break ;
3830
+ }
3831
+ dst_count = * dst_countptr ;
3832
+ args [1 ] = dst_dataptr [0 ];
3833
+ }
3834
+ else {
3835
+ dst_count -= count ;
3836
+ args [1 ] += count * dst_stride ;
3837
+ }
3838
+
3839
+ /* If we exhausted the src block, refresh it */
3840
+ if (src_count == count ) {
3841
+ res = src_iternext (ctx , src_iter );
3842
+ if (res == 0 ) {
3843
+ break ;
3844
+ }
3845
+ src_count = * src_countptr ;
3846
+ args [0 ] = src_dataptr [0 ];
3847
+ }
3848
+ else {
3849
+ src_count -= count ;
3850
+ args [0 ] += count * src_stride ;
3851
+ }
3852
+ }
3853
+
3854
+ HPY_NPY_END_THREADS (ctx );
3855
+
3856
+ HNPY_cast_info_xfree (ctx , & cast_info );
3857
+ HNpyIter_Deallocate (ctx , dst_iter );
3858
+ HNpyIter_Deallocate (ctx , src_iter );
3859
+ return res ;
3860
+ }
3861
+
3677
3862
/*NUMPY_API
3678
3863
* Copy an Array into another array -- memory must not overlap
3679
3864
* Does not require src and dest to have "broadcastable" shapes
@@ -3691,6 +3876,25 @@ PyArray_CopyAnyInto(PyArrayObject *dst, PyArrayObject *src)
3691
3876
return PyArray_CopyAsFlat (dst , src , NPY_CORDER );
3692
3877
}
3693
3878
3879
+ /*HPY_NUMPY_API
3880
+ * Copy an Array into another array -- memory must not overlap
3881
+ * Does not require src and dest to have "broadcastable" shapes
3882
+ * (only the same number of elements).
3883
+ *
3884
+ * TODO: For NumPy 2.0, this could accept an order parameter which
3885
+ * only allows NPY_CORDER and NPY_FORDER. Could also rename
3886
+ * this to CopyAsFlat to make the name more intuitive.
3887
+ *
3888
+ * Returns 0 on success, -1 on error.
3889
+ */
3890
+ NPY_NO_EXPORT int
3891
+ HPyArray_CopyAnyInto (HPyContext * ctx ,
3892
+ HPy /* PyArrayObject * */ dst ,
3893
+ HPy /* PyArrayObject * */ src )
3894
+ {
3895
+ return HPyArray_CopyAsFlat (ctx , dst , src , NPY_CORDER );
3896
+ }
3897
+
3694
3898
/*NUMPY_API
3695
3899
* Copy an Array into another array.
3696
3900
* Broadcast to the destination shape if necessary.
0 commit comments