8000 MAINT Remove -Wcpp warnings from sklearn.utils._seq_dataset by OmarManzoor · Pull Request #25406 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MAINT Remove -Wcpp warnings from sklearn.utils._seq_dataset #25406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
USE_NEWEST_NUMPY_C_API = (
"sklearn.__check_build._check_build",
"sklearn._loss._loss",
"sklearn._isotonic",
"sklearn.cluster._dbscan_inner",
"sklearn.cluster._hierarchical_fast",
"sklearn.cluster._k_means_common",
Expand Down Expand Up @@ -108,23 +109,23 @@
"sklearn.svm._liblinear",
"sklearn.svm._libsvm",
"sklearn.svm._libsvm_sparse",
"sklearn.svm._newrand",
"sklearn.tree._splitter",
"sklearn.tree._utils",
"sklearn.utils._cython_blas",
"sklearn.utils._fast_dict",
"sklearn.utils._heap",
"sklearn.utils._isfinite",
"sklearn.utils._logistic_sigmoid",
"sklearn.utils._openmp_helpers",
"sklearn.utils._weight_vector",
"sklearn.utils._random",
"sklearn.utils._logistic_sigmoid",
"sklearn.utils._readonly_array_wrapper",
"sklearn.utils._typedefs",
"sklearn.utils._heap",
"sklearn.utils._seq_dataset",
"sklearn.utils._sorting",
"sklearn.utils._typedefs",
"sklearn.utils._vector_sentinel",
"sklearn.utils._isfinite",
"sklearn.utils._weight_vector",
"sklearn.utils.murmurhash",
"sklearn.svm._newrand",
"sklearn._isotonic",
)


Expand Down
20 changes: 10 additions & 10 deletions sklearn/utils/_seq_dataset.pxd.tp
8000
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ cimport numpy as cnp

cdef class SequentialDataset{{name_suffix}}:
cdef int current_index
cdef cnp.ndarray index
cdef int[::1] index
cdef int *index_data_ptr
cdef Py_ssize_t n_samples
cdef cnp.uint32_t seed
Expand All @@ -53,24 +53,24 @@ cdef class SequentialDataset{{name_suffix}}:


cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
cdef cnp.ndarray X
cdef cnp.ndarray Y
cdef cnp.ndarray sample_weights
cdef const {{c_type}}[:, ::1] X
cdef const {{c_type}}[::1] Y
cdef const {{c_type}}[::1] sample_weights
cdef Py_ssize_t n_features
cdef cnp.npy_intp X_stride
cdef {{c_type}} *X_data_ptr
cdef {{c_type}} *Y_data_ptr
cdef cnp.ndarray feature_indices
cdef const int[::1] feature_indices
cdef int *feature_indices_ptr
cdef {{c_type}} *sample_weight_data


cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
cdef cnp.ndarray X_data
cdef cnp.ndarray X_indptr
cdef cnp.ndarray X_indices
cdef cnp.ndarray Y
cdef cnp.ndarray sample_weights
cdef const {{c_type}}[::1] X_data
cdef const int[::1] X_indptr
cdef const int[::1] X_indices
cdef const {{c_type}}[::1] Y
cdef const {{c_type}}[::1] sample_weights
cdef {{c_type}} *X_data_ptr
cdef int *X_indptr_ptr
cdef int *X_indices_ptr
Expand Down
75 changes: 39 additions & 36 deletions sklearn/utils/_seq_dataset.pyx.tp
cnp.ndarray[{{c_type}}, ndim=1, mode='c'] sample_weights,
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,22 @@ cdef class SequentialDataset{{name_suffix}}:
current_index)

# transform the pointed data in numpy CSR array
cdef cnp.ndarray[{{c_type}}, ndim=1] x_data = np.empty(nnz,
dtype={{np_type}})
cdef cnp.ndarray[int, ndim=1] x_indices = np.empty(nnz, dtype=np.int32)
cdef cnp.ndarray[int, ndim=1] x_indptr = np.asarray([0, nnz],
dtype=np.int32)
cdef {{c_type}}[:] x_data = np.empty(nnz, dtype={{np_type}})
cdef int[:] x_indices = np.empty(nnz, dtype=np.int32)
cdef int[:] x_indptr = np.asarray([0, nnz], dtype=np.int32)

for j in range(nnz):
x_data[j] = x_data_ptr[j]
x_indices[j] = x_indices_ptr[j]

cdef int sample_idx = self.index_data_ptr[current_index]

return (x_data, x_indices, x_indptr), y, sample_weight, sample_idx
return (
(np.asarray(x_data), np.asarray(x_indices), np.asarray(x_indptr)),
y,
sample_weight,
sample_idx,
)


cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
Expand All @@ -219,10 +222,13 @@ cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
and C-style memory layout.
"""

def __cinit__(self, cnp.ndarray[{{c_type}}, ndim=2, mode='c'] X,
cnp.ndarray[{{c_type}}, ndim=1, mode='c'] Y,
cnp.uint32_t seed=1):
def __cinit__(
self,
const {{c_type}}[:, ::1] X,
const {{c_type}}[::1] Y,
const {{c_type}}[::1] sample_weights,
cnp.uint32_t seed=1,
):
"""A ``SequentialDataset`` backed by a two-dimensional numpy array.

Parameters
Expand All @@ -249,22 +255,18 @@ cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
self.n_samples = X.shape[0]
self.n_features = X.shape[1]

cdef cnp.ndarray[int, ndim=1, mode='c'] feature_indices = \
np.arange(0, self.n_features, dtype=np.intc)
self.feature_indices = feature_indices
self.feature_indices_ptr = <int *> feature_indices.data
self.feature_indices = np.arange(0, self.n_features, dtype=np.intc)
self.feature_indices_ptr = <int *> &self.feature_indices[0]

self.current_index = -1
self.X_stride = X.strides[0] // X.itemsize
self.X_data_ptr = <{{c_type}} *>X.data
self.Y_data_ptr = <{{c_type}} *>Y.data
self.sample_weight_data = <{{c_type}} *>sample_weights.data
self.X_data_ptr = <{{c_type}} *> &X[0, 0]
self.Y_data_ptr = <{{c_type}} *> &Y[0]
self.sample_weight_data = <{{c_type}} *> &sample_weights[0]

# Use index array for fast shuffling
cdef cnp.ndarray[int, ndim=1, mode='c'] index = \
np.arange(0, self.n_samples, dtype=np.intc)
self.index = index
self.index_data_ptr = <int *>index.data
self.index = np.arange(0, self.n_samples, dtype=np.intc)
self.index_data_ptr = <int *> &self.index[0]
# seed should not be 0 for our_rand_r
self.seed = max(seed, 1)

Expand All @@ -284,12 +286,15 @@ cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
"""A ``SequentialDataset`` backed by a scipy sparse CSR matrix. """

def __cinit__(self, cnp.ndarray[{{c_type}}, ndim=1, mode='c'] X_data,
cnp.ndarray[int, ndim=1, mode='c'] X_indptr,
cnp.ndarray[int, ndim=1, mode='c'] X_indices,
cnp.ndarray[{{c_type}}, ndim=1, mode='c'] Y,
cnp.ndarray[{{c_type}}, ndim=1, mode='c'] sample_weights,
cnp.uint32_t seed=1):
def __cinit__(
self,
const {{c_type}}[::1] X_data,
const int[::1] X_indptr,
const int[::1] X_indices,
const {{c_type}}[::1] Y,
const {{c_type}}[::1] sample_weights,
cnp.uint32_t seed=1,
):
"""Dataset backed by a scipy sparse CSR matrix.

The feature indices of ``x`` are given by x_ind_ptr[0:nnz].
Expand Down Expand Up @@ -322,18 +327,16 @@ cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):

self.n_samples = Y.shape[0]
self.current_index = -1
self.X_data_ptr = <{{c_type}} *>X_data.data
self.X_indptr_ptr = <int *>X_indptr.data
self.X_indices_ptr = <int *>X_indices.data
self.X_data_ptr = <{{c_type}} *> &X_data[0]
self.X_indptr_ptr = <int *> &X_indptr[0]
self.X_indices_ptr = <int *> &X_indices[0]

self.Y_data_ptr = <{{c_type}} *>Y.data
self.sample_weight_data = <{{c_type}} *>sample_weights.data
self.Y_data_ptr = <{{c_type}} *> &Y[0]
self.sample_weight_data = <{{c_type}} *> &sample_weights[0]

# Use index array for fast shuffling
cdef cnp.ndarray[int, ndim=1, mode='c'] idx = np.arange(self.n_samples,
dtype=np.intc)
self.index = idx
self.index_data_ptr = <int *>idx.data
self.index = np.arange(self.n_samples, dtype=np.intc)
self.index_data_ptr = <int *> &self.index[0]
# seed should not be 0 for our_rand_r
self.seed = max(seed, 1)

Expand Down
0