8000 Scarliles/defuse partitioner by SamuelCarliles3 · Pull Request #70 · neurodata/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

Scarliles/defuse partitioner #70

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: submodulev3
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
broke sort functions, partitioners out of _splitter.pyx
  • Loading branch information
SamuelCarliles3 committed Jul 5, 2024
commit cf52ff582facba8232cfe0c517a30c6de2cfd187
101 changes: 101 additions & 0 deletions sklearn/tree/_partitioner.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t

# Constant to switch between algorithm non zero value extract algorithm
# in SparsePartitioner
cdef float32_t EXTRACT_NNZ_SWITCH = 0.1


# Introduce a fused-class to make it possible to share the split implementation
# between the dense and sparse cases in the node_split_best and node_split_random
# functions. The alternative would have been to use inheritance-based polymorphism
# but it would have resulted in a ~10% overall tree fitting performance
# degradation caused by the overhead frequent virtual method lookups.
ctypedef fused Partitioner:
DensePartitioner
SparsePartitioner


cdef class DensePartitioner:
"""Partitioner specialized for dense data.

Note that this partitioner is agnostic to the splitting strategy (best vs. random).
"""
cdef:
const float32_t[:, :] X
cdef intp_t[::1] samples
cdef float32_t[::1] feature_values
cdef intp_t start
cdef intp_t end
cdef intp_t n_missing
cdef const unsigned char[::1] missing_values_in_feature_mask

inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
inline void sort_samples_and_feature_values(
self,
intp_t current_feature
) noexcept nogil
inline void find_min_max(
self,
intp_t current_feature,
float32_t* min_feature_value_out,
float32_t* max_feature_value_out,
) noexcept nogil
inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil
inline void partition_samples_final(
self,
intp_t best_pos,
float64_t best_threshold,
intp_t best_feature,
intp_t best_n_missing,
) noexcept nogil


cdef class SparsePartitioner:
"""Partitioner specialized for sparse CSC data.

Note that this partitioner is agnostic to the splitting strategy (best vs. random).
"""
cdef:
intp_t[::1] samples
float32_t[::1] feature_values
intp_t start
intp_t end
intp_t n_missing
const unsigned char[::1] missing_values_in_feature_mask

const float32_t[::1] X_data
const int32_t[::1] X_indices
const int32_t[::1] X_indptr

intp_t n_total_samples

intp_t[::1] index_to_samples
intp_t[::1] sorted_samples

intp_t start_positive
intp_t end_negative
bint is_samples_sorted

inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
inline void sort_samples_and_feature_values(
self,
intp_t current_feature
) noexcept nogil
inline void find_min_max(
self,
intp_t current_feature,
float32_t* min_feature_value_out,
float32_t* max_feature_value_out,
) noexcept nogil
inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil
inline void partition_samples_final(
self,
intp_t best_pos,
float64_t best_threshold,
intp_t best_feature,
intp_t best_n_missing,
) noexcept nogil
inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil
inline void extract_nnz(self, intp_t feature) noexcept nogil
Loading
0