8000 Implemented callback system by Micky774 · Pull Request #8 · Micky774/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

Implemented callback system #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
8000 Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,12 @@ def check_package_status(package, min_version):
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_callbacks.pyx", "_callbacks.pxd"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
"sources": ["_base.pyx.tp", "_base.pxd.tp"],
"language": "c++",
Expand Down
10 changes: 8 additions & 2 deletions sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from cython.parallel cimport parallel, prange
from ...utils._heap cimport heap_push
from ...utils._sorting cimport simultaneous_sort
from ...utils._typedefs cimport ITYPE_t, DTYPE_t

from ._callbacks cimport argkmin_callback, ARGKMIN_CALLBACK_ARGS, CALLBACK
from ._engines cimport BaseEngine

import numpy as np
Expand Down Expand Up @@ -218,15 +218,21 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
ITYPE_t Y_end,
ITYPE_t thread_num,
) nogil:

# Struct intialization using constructor syntax fails in nogil
cdef ARGKMIN_CALLBACK_ARGS callback_args
callback_args.k = self.k

engine._compute_and_reduce_distances_on_chunks(
X_start,
X_end,
Y_start,
Y_end,
thread_num,
self.k,
self.heaps_r_distances_chunks[thread_num],
self.heaps_indices_chunks[thread_num],
<CALLBACK> argkmin_callback,
<void *> &callback_args,
)
#######################

Expand Down
41 changes: 41 additions & 0 deletions sklearn/metrics/_pairwise_distances_reduction/_callbacks.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from ...utils._typedefs cimport DTYPE_t, ITYPE_t

##############################################
# IMPORTANT: Each callback must define its own explicit struct that is
# implicitly passed through `callback_args`.

# Generic typedef for use in _engines.{pyx, pxd}
ctypedef void (*CALLBACK)(
ITYPE_t,
ITYPE_t,
ITYPE_t,
ITYPE_t,
ITYPE_t,
ITYPE_t,
ITYPE_t,
DTYPE_t,
DTYPE_t *,
ITYPE_t *,
void *,
) nogil

##############################################

cdef void argkmin_callback(
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
ITYPE_t i,
ITYPE_t j,
DTYPE_t val,
DTYPE_t * heaps_r_distances,
ITYPE_t * heaps_indices,
void * callback_args,
) nogil

cdef struct ARGKMIN_CALLBACK_ARGS:
ITYPE_t k

##############################################
24 changes: 24 additions & 0 deletions sklearn/metrics/_pairwise_distances_reduction/_callbacks.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from ...utils._typedefs cimport DTYPE_t, ITYPE_t
from ...utils._heap cimport heap_push

cdef inline void argkmin_callback(
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
ITYPE_t i,
ITYPE_t j,
DTYPE_t val,
DTYPE_t * heaps_r_distances,
ITYPE_t * heaps_indices,
void * callback_args,
) nogil:
cdef ITYPE_t k = (<ARGKMIN_CALLBACK_ARGS *> callback_args).k
heap_push(
values=heaps_r_distances + i * k,
indices=heaps_indices + i * k,
size=k,
val=val,
val_idx=j + Y_start,
)
5 changes: 3 additions & 2 deletions sklearn/metrics/_pairwise_distances_reduction/_engines.pxd.tp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ implementation_specific_values = [
cimport numpy as cnp
from cython cimport final
from libcpp.vector cimport vector

from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
from ._callbacks cimport CALLBACK

ctypedef DTYPE_t (*DIST_FUNC)(ITYPE_t, ITYPE_t) nogil

Expand Down Expand Up @@ -103,9 +103,10 @@ cdef class BaseEngine:
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
ITYPE_t k,
DTYPE_t * heaps_r_distances,
ITYPE_t * heaps_indices,
CALLBACK callback,
void * callback_args,
) nogil

{{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
Expand Down
55 changes: 32 additions & 23 deletions sklearn/metrics/_pairwise_distances_reduction/_engines.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ from ...utils._cython_blas cimport (
Trans,
_gemm,
)
from ...utils._heap cimport heap_push
from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
from ._callbacks cimport CALLBACK
# TODO: change for `libcpp.algorithm.fill` once Cython 3 is used
# Introduction in Cython:
#
Expand Down Expand Up @@ -81,8 +81,6 @@ cdef void _middle_term_sparse_sparse_64(
D[k] += -2 * X_data[X_i_ptr] * Y_data[Y_j_ptr]

cdef class BaseEngine:
def __init__(self):
return

cdef void _parallel_on_X_parallel_init(
self,
Expand Down Expand Up @@ -159,9 +157,10 @@ cdef class BaseEngine:
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
ITYPE_t k,
DTYPE_t * heaps_r_distances,
ITYPE_t * heaps_indices,
CALLBACK callback,
void * callback_args,
) nogil:
pass

Expand All @@ -175,35 +174,38 @@ cdef class BaseEngine{{name_suffix}}(BaseEngine):
# We keep this as an attribute to avoid relying on fused types in
# interface declaration due to Cython errors
self.dataset = dataset

cdef void _compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
ITYPE_t k,
DTYPE_t * heaps_r_distances,
ITYPE_t * heaps_indices,
CALLBACK callback,
void * callback_args,
) nogil:
cdef:
ITYPE_t i, j
DTYPE_t val
ITYPE_t n_X = X_end - X_start
ITYPE_t n_Y = Y_end - Y_start

# Pushing the distance and their associated indices on heaps
# which keep tracks of the argkmin.
for i in range(n_X):
for j in range(n_Y):
heap_push(
values=heaps_r_distances + i * k,
indices=heaps_indices + i * k,
size=k,
# While this still has a vtable looku, there is no
# regression since main does the same
val=self.dataset.surrogate_dist(X_start + i, Y_start + j),
val_idx=j + Y_start,
callback(
X_start,
X_end,
Y_start,
Y_end,
thread_num,
i,
j,
self.dataset.surrogate_dist(X_start + i, Y_start + j),
heaps_r_distances,
heaps_indices,
callback_args,
)

cdef class EuclideanEngine{{name_suffix}}(BaseEngine):
Expand Down Expand Up @@ -370,9 +372,10 @@ cdef class EuclideanEngine{{name_suffix}}(BaseEngine):
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
ITYPE_t k,
DTYPE_t * heaps_r_distances,
ITYPE_t * heaps_indices,
CALLBACK callback,
void * callback_args,
) nogil:
cdef:
ITYPE_t i, j
Expand All @@ -391,12 +394,18 @@ cdef class EuclideanEngine{{name_suffix}}(BaseEngine):
+ self.dist_middle_terms_chunks[thread_num][i * n_Y + j]
+ self.Y_norm_squared[j + Y_start]
)
heap_push(
values=heaps_r_distances + i * k,
indices=heaps_indices + i * k,
size=k,
val=val,
val_idx=j + Y_start,
callback(
X_start,
X_end,
Y_start,
Y_end,
thread_num,
i,
j,
val,
heaps_r_distances,
heaps_indices,
callback_args,
)

cdef class DenseDenseEuclideanEngine{{name_suffix}}(EuclideanEngine{{name_suffix}}):
Expand Down
0