8000 MAINT Fix ctypedef types in tree submodule (#27352) · jeremiedbb/scikit-learn@fb0ab5a · GitHub
[go: up one dir, main page]

Skip to content

Commit fb0ab5a

Browse files
authored
MAINT Fix ctypedef types in tree submodule (scikit-learn#27352)
Signed-off-by: Adam Li <adam2392@gmail.com>
1 parent 24b782f commit fb0ab5a

File tree

9 files changed

+732
-748
lines changed

9 files changed

+732
-748
lines changed

sklearn/ensemble/_gradient_boosting.pyx

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@ cnp.import_array()
1111

1212
from scipy.sparse import issparse
1313

14+
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int32_t
1415
from ..tree._tree cimport Node
1516
from ..tree._tree cimport Tree
16-
from ..tree._tree cimport DTYPE_t
17-
from ..tree._tree cimport SIZE_t
18-
from ..tree._tree cimport INT32_t
1917
from ..tree._utils cimport safe_realloc
2018

2119

@@ -24,10 +22,10 @@ from numpy import zeros as np_zeros
2422

2523

2624
# constant to mark tree leafs
27-
cdef SIZE_t TREE_LEAF = -1
25+
cdef intp_t TREE_LEAF = -1
2826

2927
cdef void _predict_regression_tree_inplace_fast_dense(
30-
const DTYPE_t[:, ::1] X,
28+
const float32_t[:, ::1] X,
3129
Node* root_node,
3230
double *value,
3331
double scale,
@@ -45,7 +43,7 @@ cdef void _predict_regression_tree_inplace_fast_dense(
4543
4644
Parameters
4745
----------
48-
X : DTYPE_t 2d memory view
46+
X : float32_t 2d memory view
4947
The memory view on the data ndarray of the input ``X``.
5048
Assumes that the array is c-continuous.
5149
root_node : tree Node pointer
@@ -63,7 +61,7 @@ cdef void _predict_regression_tree_inplace_fast_dense(
6361
``out`` is assumed to be a two-dimensional array of
6462
shape ``(n_samples, K)``.
6563
"""
66-
cdef SIZE_t n_samples = X.shape[0]
64+
cdef intp_t n_samples = X.shape[0]
6765
cdef Py_ssize_t i
6866
cdef Node *node
6967
for i in range(n_samples):
@@ -87,20 +85,20 @@ def _predict_regression_tree_stages_sparse(
8785
8886
The function assumes that the ndarray that wraps ``X`` is csr_matrix.
8987
"""
90-
cdef const DTYPE_t[::1] X_data = X.data
91-
cdef const INT32_t[::1] X_indices = X.indices
92-
cdef const INT32_t[::1] X_indptr = X.indptr
88+
cdef const float32_t[::1] X_data = X.data
89+
cdef const int32_t[::1] X_indices = X.indices
90+
cdef const int32_t[::1] X_indptr = X.indptr
9391

94-
cdef SIZE_t n_samples = X.shape[0]
95-
cdef SIZE_t n_features = X.shape[1]
96-
cdef SIZE_t n_stages = estimators.shape[0]
97-
cdef SIZE_t n_outputs = estimators.shape[1]
92+
cdef intp_t n_samples = X.shape[0]
93+
cdef intp_t n_features = X.shape[1]
< 8000 /td>
94+
cdef intp_t n_stages = estimators.shape[0]
95+
cdef intp_t n_outputs = estimators.shape[1]
9896

9997
# Indices and temporary variables
100-
cdef SIZE_t sample_i
101-
cdef SIZE_t feature_i
102-
cdef SIZE_t stage_i
103-
cdef SIZE_t output_i
98+
cdef intp_t sample_i
99+
cdef intp_t feature_i
100+
cdef intp_t stage_i
101+
cdef intp_t output_i
104102
cdef Node *root_node = NULL
105103
cdef Node *node = NULL
106104
cdef double *value = NULL
@@ -117,18 +115,18 @@ def _predict_regression_tree_stages_sparse(
117115
values[stage_i * n_outputs + output_i] = tree.value
118116

119117
# Initialize auxiliary data-structure
120-
cdef DTYPE_t feature_value = 0.
121-
cdef DTYPE_t* X_sample = NULL
118+
cdef float32_t feature_value = 0.
119+
cdef float32_t* X_sample = NULL
122120

123121
# feature_to_sample as a data structure records the last seen sample
124122
# for each feature; functionally, it is an efficient way to identify
125123
# which features are nonzero in the present sample.
126-
cdef SIZE_t* feature_to_sample = NULL
124+
cdef intp_t* feature_to_sample = NULL
127125

128126
safe_realloc(&X_sample, n_features)
129127
safe_realloc(&feature_to_sample, n_features)
130128

131-
memset(feature_to_sample, -1, n_features * sizeof(SIZE_t))
129+
memset(feature_to_sample, -1, n_features * sizeof(intp_t))
132130

133131
# Cycle through all samples
134132
for sample_i in range(n_samples):

sklearn/tree/_criterion.pxd

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,55 +10,52 @@
1010
# See _criterion.pyx for implementation details.
1111
cimport numpy as cnp
1212

13-
from ._tree cimport DTYPE_t # Type of X
14-
from ._tree cimport DOUBLE_t # Type of y, sample_weight
15-
from ._tree cimport SIZE_t # Type for indices and counters
16-
from ._tree cimport INT32_t # Signed 32 bit integer
17-
from ._tree cimport UINT32_t # Unsigned 32 bit integer
13+
from ..utils._typedefs cimport float64_t, intp_t
14+
1815

1916
cdef class Criterion:
2017
# The criterion computes the impurity of a node and the reduction of
2118
# impurity of a split on that node. It also computes the output statistics
2219
# such as the mean in regression and class probabilities in classification.
2320

2421
# Internal structures
25-
cdef const DOUBLE_t[:, ::1] y # Values of y
26-
cdef const DOUBLE_t[:] sample_weight # Sample weights
22+
cdef const float64_t[:, ::1] y # Values of y
23+
cdef const float64_t[:] sample_weight # Sample weights
2724

28-
cdef const SIZE_t[:] sample_indices # Sample indices in X, y
29-
cdef SIZE_t start # samples[start:pos] are the samples in the left node
30-
cdef SIZE_t pos # samples[pos:end] are the samples in the right node
31-
cdef SIZE_t end
32-
cdef SIZE_t n_missing # Number of missing values for the feature being evaluated
33-
cdef bint missing_go_to_left # Whether missing values go to the left node
25+
cdef const intp_t[:] sample_indices # Sample indices in X, y
26+
cdef intp_t start # samples[start:pos] are the samples in the left node
27+
cdef intp_t pos # samples[pos:end] are the samples in the right node
28+
cdef intp_t end
29+
cdef intp_t n_missing # Number of missing values for the feature being evaluated
30+
cdef bint missing_go_to_left # Whether missing values go to the left node
3431

35-
cdef SIZE_t n_outputs # Number of outputs
36-
cdef SIZE_t n_samples # Number of samples
37-
cdef SIZE_t n_node_samples # Number of samples in the node (end-start)
38-
cdef double weighted_n_samples # Weighted number of samples (in total)
39-
cdef double weighted_n_node_samples # Weighted number of samples in the node
40-
cdef double weighted_n_left # Weighted number of samples in the left node
41-
cdef double weighted_n_right # Weighted number of samples in the right node
42-
cdef double weighted_n_missing # Weighted number of samples that are missing
32+
cdef intp_t n_outputs # Number of outputs
33+
cdef intp_t n_samples # Number of samples
34+
cdef intp_t n_node_samples # Number of samples in the node (end-start)
35+
cdef double weighted_n_samples # Weighted number of samples (in total)
36+
cdef double weighted_n_node_samples # Weighted number of samples in the node
37+
cdef double weighted_n_left # Weighted number of samples in the left node
38+
cdef double weighted_n_right # Weighted number of samples in the right node
39+
cdef double weighted_n_missing # Weighted number of samples that are missing
4340

4441
# The criterion object is maintained such that left and right collected
4542
# statistics correspond to samples[start:pos] and samples[pos:end].
4643

4744
# Methods
4845
cdef int init(
4946
self,
50-
const DOUBLE_t[:, ::1] y,
51-
const DOUBLE_t[:] sample_weight,
47+
const float64_t[:, ::1] y,
48+
const float64_t[:] sample_weight,
5249
double weighted_n_samples,
53-
const SIZE_t[:] sample_indices,
54-
SIZE_t start,
55-
SIZE_t end
50+
const intp_t[:] sample_indices,
51+
intp_t start,
52+
intp_t end
5653
) except -1 nogil
5754
cdef void init_sum_missing(self)
58-
cdef void init_missing(self, SIZE_t n_missing) noexcept nogil
55+
cdef void init_missing(self, intp_t n_missing) noexcept nogil
5956
cdef int reset(self) except -1 nogil
6057
cdef int reverse_reset(self) except -1 nogil
61-
cdef int update(self, SIZE_t new_pos) except -1 nogil
58+
cdef int update(self, intp_t new_pos) except -1 nogil
6259
cdef double node_impurity(self) noexcept nogil
6360
cdef void children_impurity(
6461
self,
@@ -101,8 +98,8 @@ cdef class Criterion:
10198
cdef class ClassificationCriterion(Criterion):
10299
"""Abstract criterion for classification."""
103100

104-
cdef SIZE_t[::1] n_classes
105-
cdef SIZE_t max_n_classes
101+
cdef intp_t[::1] n_classes
102+
cdef intp_t max_n_classes
106103

107104
cdef double[:, ::1] sum_total # The sum of the weighted count of each label.
108105
cdef double[:, ::1] sum_left # Same as above, but for the left side of the split

0 commit comments

Comments
 (0)
0