scikit-learn · jjerphan · Feb 3, 2023 · Jan 16, 2023 · Jan 27, 2023 · Jan 27, 2023
diff --git a/setup.py b/setup.py
@@ -70,6 +70,7 @@
 USE_NEWEST_NUMPY_C_API = (
     "sklearn.__check_build._check_build",
     "sklearn._loss._loss",
+    "sklearn._isotonic",
     "sklearn.cluster._dbscan_inner",
     "sklearn.cluster._hierarchical_fast",
     "sklearn.cluster._k_means_common",
@@ -108,23 +109,23 @@
     "sklearn.svm._liblinear",
     "sklearn.svm._libsvm",
     "sklearn.svm._libsvm_sparse",
+    "sklearn.svm._newrand",
     "sklearn.tree._splitter",
     "sklearn.tree._utils",
     "sklearn.utils._cython_blas",
     "sklearn.utils._fast_dict",
+    "sklearn.utils._heap",
+    "sklearn.utils._isfinite",
+    "sklearn.utils._logistic_sigmoid",
     "sklearn.utils._openmp_helpers",
-    "sklearn.utils._weight_vector",
     "sklearn.utils._random",
-    "sklearn.utils._logistic_sigmoid",
     "sklearn.utils._readonly_array_wrapper",
-    "sklearn.utils._typedefs",
-    "sklearn.utils._heap",
+    "sklearn.utils._seq_dataset",
     "sklearn.utils._sorting",
+    "sklearn.utils._typedefs",
     "sklearn.utils._vector_sentinel",
-    "sklearn.utils._isfinite",
+    "sklearn.utils._weight_vector",
     "sklearn.utils.murmurhash",
-    "sklearn.svm._newrand",
-    "sklearn._isotonic",
 )
 
 

diff --git a/sklearn/utils/_seq_dataset.pxd.tp b/sklearn/utils/_seq_dataset.pxd.tp
@@ -34,7 +34,7 @@ cimport numpy as cnp
 
 cdef class SequentialDataset{{name_suffix}}:
     cdef int current_index
-    cdef cnp.ndarray index
+    cdef int[::1] index
     cdef int *index_data_ptr
     cdef Py_ssize_t n_samples
     cdef cnp.uint32_t seed
@@ -53,24 +53,24 @@ cdef class SequentialDataset{{name_suffix}}:
 
 
 cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
-    cdef cnp.ndarray X
-    cdef cnp.ndarray Y
-    cdef cnp.ndarray sample_weights
+    cdef const {{c_type}}[:, ::1] X
+    cdef const {{c_type}}[::1] Y
+    cdef const {{c_type}}[::1] sample_weights
     cdef Py_ssize_t n_features
     cdef cnp.npy_intp X_stride
     cdef {{c_type}} *X_data_ptr
     cdef {{c_type}} *Y_data_ptr
-    cdef cnp.ndarray feature_indices
+    cdef const int[::1] feature_indices
     cdef int *feature_indices_ptr
     cdef {{c_type}} *sample_weight_data
 
 
 cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
-    cdef cnp.ndarray X_data
-    cdef cnp.ndarray X_indptr
-    cdef cnp.ndarray X_indices
-    cdef cnp.ndarray Y
-    cdef cnp.ndarray sample_weights
+    cdef const {{c_type}}[::1] X_data
+    cdef const int[::1] X_indptr
+    cdef const int[::1] X_indices
+    cdef const {{c_type}}[::1] Y
+    cdef const {{c_type}}[::1] sample_weights
     cdef {{c_type}} *X_data_ptr
     cdef int *X_indptr_ptr
     cdef int *X_indices_ptr

diff --git a/sklearn/utils/_seq_dataset.pyx.tp b/sklearn/utils/_seq_dataset.pyx.tp
@@ -197,19 +197,22 @@ cdef class SequentialDataset{{name_suffix}}:
                      current_index)
 
         # transform the pointed data in numpy CSR array
-        cdef cnp.ndarray[{{c_type}}, ndim=1] x_data = np.empty(nnz,
-                                                              dtype={{np_type}})
-        cdef cnp.ndarray[int, ndim=1] x_indices = np.empty(nnz, dtype=np.int32)
-        cdef cnp.ndarray[int, ndim=1] x_indptr = np.asarray([0, nnz],
-                                                           dtype=np.int32)
+        cdef {{c_type}}[:] x_data = np.empty(nnz, dtype={{np_type}})
+        cdef int[:] x_indices = np.empty(nnz, dtype=np.int32)
+        cdef int[:] x_indptr = np.asarray([0, nnz], dtype=np.int32)
 
         for j in range(nnz):
             x_data[j] = x_data_ptr[j]
             x_indices[j] = x_indices_ptr[j]
 
         cdef int sample_idx = self.index_data_ptr[current_index]
 
-        return (x_data, x_indices, x_indptr), y, sample_weight, sample_idx
+        return (
+            (np.asarray(x_data), np.asarray(x_indices), np.asarray(x_indptr)),
+            y,
+            sample_weight,
+            sample_idx,
+        )
 
 
 cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
@@ -219,10 +222,13 @@ cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
     and C-style memory layout.
     """
 
-    def __cinit__(self, cnp.ndarray[{{c_type}}, ndim=2, mode='c'] X,
-                  cnp.ndarray[{{c_type}}, ndim=1, mode='c'] Y,
-                  cnp.ndarray[{{c_type}}, ndim=1, mode='c'] sample_weights,
-                  cnp.uint32_t seed=1):
+    def __cinit__(
+        self,
+        const {{c_type}}[:, ::1] X,
+        const {{c_type}}[::1] Y,
+        const {{c_type}}[::1] sample_weights,
+        cnp.uint32_t seed=1,
+    ):
         """A ``SequentialDataset`` backed by a two-dimensional numpy array.
 
         Parameters
@@ -249,22 +255,18 @@ cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
         self.n_samples = X.shape[0]
         self.n_features = X.shape[1]
 
-        cdef cnp.ndarray[int, ndim=1, mode='c'] feature_indices = \
-            np.arange(0, self.n_features, dtype=np.intc)
-        self.feature_indices = feature_indices
-        self.feature_indices_ptr = <int *> feature_indices.data
+        self.feature_indices = np.arange(0, self.n_features, dtype=np.intc)
+        self.feature_indices_ptr = <int *> &self.feature_indices[0]
 
         self.current_index = -1
         self.X_stride = X.strides[0] // X.itemsize
-        self.X_data_ptr = <{{c_type}} *>X.data
-        self.Y_data_ptr = <{{c_type}} *>Y.data
-        self.sample_weight_data = <{{c_type}} *>sample_weights.data
+        self.X_data_ptr = <{{c_type}} *> &X[0, 0]
+        self.Y_data_ptr = <{{c_type}} *> &Y[0]
+        self.sample_weight_data = <{{c_type}} *> &sample_weights[0]
 
         # Use index array for fast shuffling
-        cdef cnp.ndarray[int, ndim=1, mode='c'] index = \
-            np.arange(0, self.n_samples, dtype=np.intc)
-        self.index = index
-        self.index_data_ptr = <int *>index.data
+        self.in
D0A0
dex = np.arange(0, self.n_samples, dtype=np.intc)
+        self.index_data_ptr = <int *> &self.index[0]
         # seed should not be 0 for our_rand_r
         self.seed = max(seed, 1)
 
@@ -284,12 +286,15 @@ cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
 cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
     """A ``SequentialDataset`` backed by a scipy sparse CSR matrix. """
 
-    def __cinit__(self, cnp.ndarray[{{c_type}}, ndim=1, mode='c'] X_data,
-                  cnp.ndarray[int, ndim=1, mode='c'] X_indptr,
-                  cnp.ndarray[int, ndim=1, mode='c'] X_indices,
-                  cnp.ndarray[{{c_type}}, ndim=1, mode='c'] Y,
-                  cnp.ndarray[{{c_type}}, ndim=1, mode='c'] sample_weights,
-                  cnp.uint32_t seed=1):
+    def __cinit__(
+        self,
+        const {{c_type}}[::1] X_data,
+        const int[::1] X_indptr,
+        const int[::1] X_indices,
+        const {{c_type}}[::1] Y,
+        const {{c_type}}[::1] sample_weights,
+        cnp.uint32_t seed=1,
+    ):
         """Dataset backed by a scipy sparse CSR matrix.
 
         The feature indices of ``x`` are given by x_ind_ptr[0:nnz].
@@ -322,18 +327,16 @@ cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
 
         self.n_samples = Y.shape[0]
         self.current_index = -1
-        self.X_data_ptr = <{{c_type}} *>X_data.data
-        self.X_indptr_ptr = <int *>X_indptr.data
-        self.X_indices_ptr = <int *>X_indices.data
+        self.X_data_ptr = <{{c_type}} *> &X_data[0]
+        self.X_indptr_ptr = <int *> &X_indptr[0]
+        self.X_indices_ptr = <int *> &X_indices[0]
 
-        self.Y_data_ptr = <{{c_type}} *>Y.data
-        self.sample_weight_data = <{{c_type}} *>sample_weights.data
+        self.Y_data_ptr = <{{c_type}} *> &Y[0]
+        self.sample_weight_data = <{{c_type}} *> &sample_weights[0]
 
         # Use index array for fast shuffling
-        cdef cnp.ndarray[int, ndim=1, mode='c'] idx = np.arange(self.n_samples,
-                                                               dtype=np.intc)
-        self.index = idx
-        self.index_data_ptr = <int *>idx.data
+        self.index = np.arange(self.n_samples, dtype=np.intc)
+        self.index_data_ptr = <int *> &self.index[0]
         # seed should not be 0 for our_rand_r
         self.seed = max(seed, 1)