10000 MAINT Generate DistanceMetrics for 32bit vectors · scikit-learn/scikit-learn@6d9f8a9 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6d9f8a9

Browse files
committed
MAINT Generate DistanceMetrics for 32bit vectors
1 parent 998e8f2 commit 6d9f8a9

File tree

5 files changed

+2838
-37
lines changed

5 files changed

+2838
-37
lines changed

sklearn/metrics/_dist_metrics.pxd

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,86 @@ cdef class DenseDenseDatasetsPair(DatasetsPair):
8585
const DTYPE_t[:, ::1] X
8686
const DTYPE_t[:, ::1] Y
8787
ITYPE_t d
88+
89+
######################################################################
90+
# Inline distance functions
91+
#
92+
# We use these for the default (euclidean) case so that they can be
93+
< 10000 span class="pl-c"># inlined. This leads to faster computation for the most common case
94+
cdef inline np.float32_t euclidean_dist32(const np.float32_t* x1, const np.float32_t* x2,
95+
ITYPE_t size) nogil except -1:
96+
cdef np.float32_t tmp, d=0
97+
cdef np.intp_t j
98+
for j in range(size):
99+
tmp = x1[j] - x2[j]
100+
d += tmp * tmp
101+
return sqrt(d)
102+
103+
104+
cdef inline np.float32_t euclidean_rdist32(const np.float32_t* x1, const np.float32_t* x2,
105+
ITYPE_t size) nogil except -1:
106+
cdef np.float32_t tmp, d=0
107+
cdef np.intp_t j
108+
for j in range(size):
109+
tmp = x1[j] - x2[j]
110+
d += tmp * tmp
111+
return d
112+
113+
114+
cdef inline np.float32_t euclidean_dist_to_rdist32(const np.float32_t dist) nogil except -1:
115+
return dist * dist
116+
117+
118+
cdef inline np.float32_t euclidean_rdist_to_dist32(const np.float32_t dist) nogil except -1:
119+
return sqrt(dist)
120+
121+
122+
######################################################################
123+
# DistanceMetric base class
124+
cdef class DistanceMetric32:
125+
# The following attributes are required for a few of the subclasses.
126+
# we must define them here so that cython's limited polymorphism will work.
127+
# Because we don't expect to instantiate a lot of these objects, the
128+
# extra memory overhead of this setup should not be an issue.
129+
cdef np.float32_t p
130+
cdef np.float32_t[::1] vec
131+
cdef np.float32_t[:, ::1] mat
132+
cdef ITYPE_t size
133+
cdef object func
134+
cdef object kwargs
135+
136+
cdef np.float32_t dist(self, const np.float32_t* x1, const np.float32_t* x2,
137+
ITYPE_t size) nogil except -1
138+
139+
cdef np.float32_t rdist(self, const np.float32_t* x1, const np.float32_t* x2,
140+
ITYPE_t size) nogil except -1
141+
142+
cdef int pdist(self, const np.float32_t[:, ::1] X, np.float32_t[:, ::1] D) except -1
143+
144+
cdef int cdist(self, const np.float32_t[:, ::1] X, const np.float32_t[:, ::1] Y,
145+
np.float32_t[:, ::1] D) except -1
146+
147+
cdef np.float32_t _rdist_to_dist(self, np.float32_t rdist) nogil except -1
148+
149+
cdef np.float32_t _dist_to_rdist(self, np.float32_t dist) nogil except -1
150+
151+
152+
######################################################################
153+
# DatasetsPair base class
154+
cdef class DatasetsPair32:
155+
cdef DistanceMetric32 distance_metric
156+
157+
cdef ITYPE_t n_samples_X(self) nogil
158+
159+
cdef ITYPE_t n_samples_Y(self) nogil
160+
161+
cdef np.float32_t dist(self, ITYPE_t i, ITYPE_t j) nogil
162+
163+
cdef np.float32_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) nogil
164+
165+
166+
cdef class DenseDenseDatasetsPair32(DatasetsPair32):
167+
cdef:
168+
const np.float32_t[:, ::1] X
169+
const np.float32_t[:, ::1] Y
170+
ITYPE_t d

sklearn/metrics/_dist_metrics.pxd.tp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
{{py:
2+
3+
dtypes = [
4+
('', 'DTYPE_t'),
5+
('32', 'np.float32_t'),
6+
]
7+
8+
}}
9+
cimport numpy as np
10+
from libc.math cimport sqrt, exp
11+
12+
from ..utils._typedefs cimport DTYPE_t, ITYPE_t
13+
14+
{{for name_suffix, DTYPE_t in dtypes}}
15+
16+
######################################################################
17+
# Inline distance functions
18+
#
19+
# We use these for the default (euclidean) case so that they can be
20+
# inlined. This leads to faster computation for the most common case
21+
cdef inline {{DTYPE_t}} euclidean_dist{{name_suffix}}(const {{DTYPE_t}}* x1, const {{DTYPE_t}}* x2,
22+
ITYPE_t size) nogil except -1:
23+
cdef {{DTYPE_t}} tmp, d=0
24+
cdef np.intp_t j
25+
for j in range(size):
26+
tmp = x1[j] - x2[j]
27+
d += tmp * tmp
28+
return sqrt(d)
29+
30+
31+
cdef inline {{DTYPE_t}} euclidean_rdist{{name_suffix}}(const {{DTYPE_t}}* x1, const {{DTYPE_t}}* x2,
32+
ITYPE_t size) nogil except -1:
33+
cdef {{DTYPE_t}} tmp, d=0
34+
cdef np.intp_t j
35+
for j in range(size):
36+
tmp = x1[j] - x2[j]
37+
d += tmp * tmp
38+
return d
39+
40+
41+
cdef inline {{DTYPE_t}} euclidean_dist_to_rdist{{name_suffix}}(const {{DTYPE_t}} dist) nogil except -1:
42+
return dist * dist
43+
44+
45+
cdef inline {{DTYPE_t}} euclidean_rdist_to_dist{{name_suffix}}(const {{DTYPE_t}} dist) nogil except -1:
46+
return sqrt(dist)
47+
48+
49+
######################################################################
50+
# DistanceMetric base class
51+
cdef class DistanceMetric{{name_suffix}}:
52+
# The following attributes are required for a few of the subclasses.
53+
# we must define them here so that cython's limited polymorphism will work.
54+
# Because we don't expect to instantiate a lot of these objects, the
55+
# extra memory overhead of this setup should not be an issue.
56+
cdef {{DTYPE_t}} p
57+
cdef {{DTYPE_t}}[::1] vec
58+
cdef {{DTYPE_t}}[:, ::1] mat
59+
cdef ITYPE_t size
60+
cdef object func
61+
cdef object kwargs
62+
63+
cdef {{DTYPE_t}} dist(self, const {{DTYPE_t}}* x1, const {{DTYPE_t}}* x2,
64+
ITYPE_t size) nogil except -1
65+
66+
cdef {{DTYPE_t}} rdist(self, const {{DTYPE_t}}* x1, const {{DTYPE_t}}* x2,
67+
ITYPE_t size) nogil except -1
68+
69+
cdef int pdist(self, const {{DTYPE_t}}[:, ::1] X, {{DTYPE_t}}[:, ::1] D) except -1
70+
71+
cdef int cdist(self, const {{DTYPE_t}}[:, ::1] X, const {{DTYPE_t}}[:, ::1] Y,
72+
{{DTYPE_t}}[:, ::1] D) except -1
73+
74+
cdef {{DTYPE_t}} _rdist_to_dist(self, {{DTYPE_t}} rdist) nogil except -1
75+
76+
cdef {{DTYPE_t}} _dist_to_rdist(self, {{DTYPE_t}} dist) nogil except -1
77+
78+
79+
######################################################################
80+
# DatasetsPair base class
81+
cdef class DatasetsPair{{name_suffix}}:
82+
cdef DistanceMetric{{name_suffix}} distance_metric
83+
84+
cdef ITYPE_t n_samples_X(self) nogil
85+
86+
cdef ITYPE_t n_samples_Y(self) nogil
87+
88+
cdef {{DTYPE_t}} dist(self, ITYPE_t i, ITYPE_t j) nogil
89+
90+
cdef {{DTYPE_t}} surrogate_dist(self, ITYPE_t i, ITYPE_t j) nogil
91+
92+
93+
cdef class DenseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}):
94+
cdef:
95+
const {{DTYPE_t}}[:, ::1] X
96+
const {{DTYPE_t}}[:, ::1] Y
97+
ITYPE_t d
98+
99+
{{endfor}}

0 commit comments

Comments
 (0)
0