10000 Make enet_coordinate_descent support fused types · scikit-learn/scikit-learn@0bab8d3 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0bab8d3

Browse files
committed
Make enet_coordinate_descent support fused types
1 parent bd97c33 commit 0bab8d3

File tree

2 files changed

+231
-126
lines changed

2 files changed

+231
-126
lines changed

sklearn/linear_model/cd_fast.pyx

Lines changed: 226 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ cdef extern from "cblas.h":
139139
@cython.boundscheck(False)
140140
@cython.wraparound(False)
141141
@cython.cdivision(True)
142-
def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
143-
double alpha, double beta,
144-
np.ndarray[DOUBLE, ndim=2, mode='fortran'] X,
145-
np.ndarray[DOUBLE, ndim=1, mode='c'] y,
146-
int max_iter, double tol,
142+
def enet_coordinate_descent(np.ndarray[floating, ndim=1] w,
143+
floating alpha, floating beta,
144+
np.ndarray[floating, ndim=2, mode='fortran'] X,
145+
np.ndarray[floating, ndim=1, mode='c'] y,
146+
int max_iter, floating tol,
147147
object rng, bint random=0, bint positive=0):
148148
"""Cython version of the coordinate descent algorithm
149149
for Elastic-Net regression
@@ -159,26 +159,26 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
159159
cdef unsigned int n_features = X.shape[1]
160160

161161
# get the number of tasks indirectly, using strides
162-
cdef unsigned int n_tasks = y.strides[0] / sizeof(DOUBLE)
162+
cdef unsigned int n_tasks = y.strides[0] / sizeof(floating)
163163

164164
# compute norms of the columns of X
165-
cdef np.ndarray[DOUBLE, ndim=1] norm_cols_X = (X**2).sum(axis=0)
165+
cdef np.ndarray[floating, ndim=1] norm_cols_X = (X**2).sum(axis=0)
166166

167167
# initial value of the residuals
168-
cdef np.ndarray[DOUBLE, ndim=1] R = np.empty(n_samples)
169-
170-
cdef np.ndarray[DOUBLE, ndim=1] XtA = np.empty(n_features)
171-
cdef double tmp
172-
cdef double w_ii
173-
cdef double d_w_max
174-
cdef double w_max
175-
cdef double d_w_ii
176-
cdef double gap = tol + 1.0
177-
cdef double d_w_tol = tol
178-
cdef double dual_norm_XtA
179-
cdef double R_norm2
180-
cdef double w_norm2
181-
cdef double l1_norm
168+
cdef np.ndarray[floating, ndim=1] R = np.empty(n_samples)
169+
170+
cdef np.ndarray[floating, ndim=1] XtA = np.empty(n_features)
171+
cdef floating tmp
172+
cdef floating w_ii
173+
cdef floating d_w_max
174+
cdef floating w_max
175+
cdef floating d_w_ii
176+
cdef floating gap = tol + 1.0
177+
cdef floating d_w_tol = tol
178+
cdef floating dual_norm_XtA
179+
cdef floating R_norm2
180+
cdef floating w_norm2
181+
cdef floating l1_norm
182182
cdef unsigned int ii
183183
cdef unsigned int i
184184
cdef unsigned int n_iter = 0
@@ -191,108 +191,212 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
191191
" results and is discouraged.")
192192

193193
with nogil:
194-
# R = y - np.dot(X, w)
195-
for i in range(n_samples):
196-
R[i] = y[i] - ddot(n_features,
197-
<DOUBLE*>(X.data + i * sizeof(DOUBLE)),
198-
n_samples, <DOUBLE*>w.data, 1)
199-
200-
# tol *= np.dot(y, y)
201-
tol *= ddot(n_samples, <DOUBLE*>y.data, n_tasks,
202-
<DOUBLE*>y.data, n_tasks)
203-
204-
for n_iter in range(max_iter):
205-
w_max = 0.0
206-
d_w_max = 0.0
207-
for f_iter in range(n_features): # Loop over coordinates
208-
if random:
209-
ii = rand_int(n_features, rand_r_state)
210-
else:
211-
ii = f_iter
212-
213-
if norm_cols_X[ii] == 0.0:
214-
continue
215-
216-
w_ii = w[ii] # Store previous value
217-
218-
if w_ii != 0.0:
219-
# R += w_ii * X[:,ii]
220-
daxpy(n_samples, w_ii,
221-
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
222-
1, <DOUBLE*>R.data, 1)
223-
224-
# tmp = (X[:,ii]*R).sum()
225-
tmp = ddot(n_samples,
226-
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
227-
1, <DOUBLE*>R.data, 1)
228-
229-
if positive and tmp < 0:
230-
w[ii] = 0.0
231-
else:
232-
w[ii] = (fsign(tmp) * fmax(fabs(tmp) - alpha, 0)
233-
/ (norm_cols_X[ii] + beta))
234-
235-
if w[ii] != 0.0:
236-
# R -= w[ii] * X[:,ii] # Update residual
237-
daxpy(n_samples, -w[ii],
238-
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
239-
1, <DOUBLE*>R.data, 1)
240-
241-
# update the maximum absolute coefficient update
242-
d_w_ii = fabs(w[ii] - w_ii)
243-
if d_w_ii > d_w_max:
244-
d_w_max = d_w_ii
245-
246-
if fabs(w[ii]) > w_max:
247-
w_max = fabs(w[ii])
248-
249-
if (w_max == 0.0
250-
or d_w_max / w_max < d_w_tol
251-
or n_iter == max_iter - 1):
252-
# the biggest coordinate update of this iteration was smaller
253-
# than the tolerance: check the duality gap as ultimate
254-
# stopping criterion
255-
256-
# XtA = np.dot(X.T, R) - beta * w
257-
for i in range(n_features):
258-
XtA[i] = ddot(
259-
n_samples,
260-
<DOUBLE*>(X.data + i * n_samples *sizeof(DOUBLE)),
261-
1, <DOUBLE*>R.data, 1) - beta * w[i]
262-
263-
if positive:
264-
dual_norm_XtA = max(n_features, <DOUBLE*>XtA.data)
265-
else:
266-
dual_norm_XtA = abs_max(n_features, <DOUBLE*>XtA.data)
267-
268-
# R_norm2 = np.dot(R, R)
269-
R_norm2 = ddot(n_samples, <DOUBLE*>R.data, 1,
270-
<DOUBLE*>R.data, 1)
271-
272-
# w_norm2 = np.dot(w, w)
273-
w_norm2 = ddot(n_features, <DOUBLE*>w.data, 1,
274-
<DOUBLE*>w.data, 1)
275-
276-
if (dual_norm_XtA > alpha):
277-
const = alpha / dual_norm_XtA
278-
A_norm2 = R_norm2 * (const ** 2)
279-
gap = 0.5 * (R_norm2 + A_norm2)
280-
else:
281-
const = 1.0
282-
gap = R_norm2
283-
284-
l1_norm = dasum(n_features, <DOUBLE*>w.data, 1)
285-
286-
# np.dot(R.T, y)
287-
gap += (alpha * l1_norm - const * ddot(
194+
if floating is double:
195+
# R = y - np.dot(X, w)
196+
for i in range(n_samples):
197+
R[i] = y[i] - ddot(n_features,
198+
<DOUBLE*>(X.data + i * sizeof(DOUBLE)),
199+
n_samples, <DOUBLE*>w.data, 1)
200+
201+
# tol *= np.dot(y, y)
202+
tol *= ddot(n_samples, <DOUBLE*>y.data, n_tasks,
203+
<DOUBLE*>y.data, n_tasks)
204+
205+
for n_iter in range(max_iter):
206+
w_max = 0.0
207+
d_w_max = 0.0
208+
for f_iter in range(n_features): # Loop over coordinates
209+
if random:
210+
ii = rand_int(n_features, rand_r_state)
211+
else:
212+
ii = f_iter
213+
214+
if norm_cols_X[ii] == 0.0:
215+
continue
216+
217+
w_ii = w[ii] # Store previous value
218+
219+
if w_ii != 0.0:
220+
# R += w_ii * X[:,ii]
221+
daxpy(n_samples, w_ii,
222+
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
223+
1, <DOUBLE*>R.data, 1)
224+
225+
# tmp = (X[:,ii]*R).sum()
226+
tmp = ddot(n_samples,
227+
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
228+
1, <DOUBLE*>R.data, 1)
229+
230+
if positive and tmp < 0:
231+
w[ii] = 0.0
232+
else:
233+
w[ii] = (fsign(tmp) * fmax(fabs(tmp) - alpha, 0)
234+
/ (norm_cols_X[ii] + beta))
235+
236+
if w[ii] != 0.0:
237+
# R -= w[ii] * X[:,ii] # Update residual
238+
daxpy(n_samples, -w[ii],
239+
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
240+
1, <DOUBLE*>R.data, 1)
241+
242+
# update the maximum absolute coefficient update
243+
d_w_ii = fabs(w[ii] - w_ii)
244+
if d_w_ii > d_w_max:
245+
d_w_max = d_w_ii
246+
247+
if fabs(w[ii]) > w_max:
248+
w_max = fabs(w[ii])
249+
250+
if (w_max == 0.0
251+
or d_w_max / w_max < d_w_tol
252+
or n_iter == max_iter - 1):
253+
# the biggest coordinate update of this iteration was smaller
254+
# than the tolerance: check the duality gap as ultimate
255+
# stopping criterion
256+
257+
# XtA = np.dot(X.T, R) - beta * w
258+
for i in range(n_features):
259+
XtA[i] = ddot(
288260
n_samples,
289-
<DOUBLE*>R.data, 1,
290-
<DOUBLE*>y.data, n_tasks)
291-
+ 0.5 * beta * (1 + const ** 2) * (w_norm2))
292-
293-
if gap < tol:
294-
# return if we reached desired tolerance
295-
break
261+
<DOUBLE*>(X.data + i * n_samples *sizeof(DOUBLE)),
262+
1, <DOUBLE*>R.data, 1) - beta * w[i]
263+
264+
if positive:
265+
dual_norm_XtA = max(n_features, <DOUBLE*>XtA.data)
266+
else:
267+
dual_norm_XtA = abs_max(n_features, <DOUBLE*>XtA.data)
268+
269+
# R_norm2 = np.dot(R, R)
270+
R_norm2 = ddot(n_samples, <DOUBLE*>R.data, 1,
271+
<DOUBLE*>R.data, 1)
272+
273+
# w_norm2 = np.dot(w, w)
274+
w_norm2 = ddot(n_features, <DOUBLE*>w.data, 1,
275+
<DOUBLE*>w.data, 1)
276+
277+
if (dual_norm_XtA > alpha):
278+
const = alpha / dual_norm_XtA
279+
A_norm2 = R_norm2 * (const ** 2)
280+
gap = 0.5 * (R_norm2 + A_norm2)
281+
else:
282+
const = 1.0
283+
gap = R_norm2
284+
285+
l1_norm = dasum(n_features, <DOUBLE*>w.data, 1)
286+
287+
# np.dot(R.T, y)
288+
gap += (alpha * l1_norm - const * ddot(
289+
n_samples,
290+
<DOUBLE*>R.data, 1,
291+
<DOUBLE*>y.data, n_tasks)
292+
+ 0.5 * beta * (1 + const ** 2) * (w_norm2))
293+
294+
if gap < tol:
295+
# return if we reached desired tolerance
296+
break
297+
else:
298+
# R = y - np.dot(X, w)
299+
for i in range(n_samples):
300+
R[i] = y[i] - sdot(n_features,
301+
<float*>(X.data + i * sizeof(float)),
302+
n_samples, <float*>w.data, 1)
303+
304+
# tol *= np.dot(y, y)
305+
tol *= sdot(n_samples, <float*>y.data, n_tasks,
306+
<float*>y.data, n_tasks)
307+
308+
for n_iter in range(max_iter):
309+
w_max = 0.0
310+
d_w_max = 0.0
311+
for f_iter in range(n_features): # Loop over coordinates
312+
if random:
313+
ii = rand_int(n_features, rand_r_state)
314+
else:
315+
ii = f_iter
316+
317+
if norm_cols_X[ii] == 0.0:
318+
continue
319+
320+
w_ii = w[ii] # Store previous value
321+
322+
if w_ii != 0.0:
323+
# R += w_ii * X[:,ii]
324+
saxpy(n_samples, w_ii,
325+
<float*>(X.data + ii * n_samples * sizeof(float)),
326+
1, <float*>R.data, 1)
327+
328+
# tmp = (X[:,ii]*R).sum()
329+
tmp = sdot(n_samples,
330+
<float*>(X.data + ii * n_samples * sizeof(float)),
331+
1, <float*>R.data, 1)
332+
333+
if positive and tmp < 0:
334+
w[ii] = 0.0
335+
else:
336+
w[ii] = (fsign(tmp) * fmax(fabs(tmp) - alpha, 0)
337+
/ (norm_cols_X[ii] + beta))
338+
339+
if w[ii] != 0.0:
340+
# R -= w[ii] * X[:,ii] # Update residual
341+
saxpy(n_samples, -w[ii],
342+
<float*>(X.data + ii * n_samples * sizeof(float)),
343+
1, <float*>R.data, 1)
344+
345+
# update the maximum absolute coefficient update
346+
d_w_ii = fabs(w[ii] - w_ii)
347+
if d_w_ii > d_w_max:
348+
d_w_max = d_w_ii
349+
350+
if fabs(w[ii]) > w_max:
351+
w_max = fabs(w[ii])
352+
353+
if (w_max == 0.0
354+
or d_w_max / w_max < d_w_tol
355+
or n_iter == max_iter - 1):
356+
# the biggest coordinate update of this iteration was smaller
357+
# than the tolerance: check the duality gap as ultimate
358+
# stopping criterion
359+
360+
# XtA = np.dot(X.T, R) - beta * w
361+
for i in range(n_features):
362+
XtA[i] = sdot(
363+
n_samples,
364+
<float*>(X.data + i * n_samples *sizeof(float)),
365+
1, <float*>R.data, 1) - beta * w[i]
366+
367+
if positive:
368+
dual_norm_XtA = max(n_features, <float*>XtA.data)
369+
else:
370+
dual_norm_XtA = abs_max(n_features, <float*>XtA.data)
371+
372+
# R_norm2 = np.dot(R, R)
373+
R_norm2 = sdot(n_samples, <float*>R.data, 1,
374+
<float*>R.data, 1)
375+
376+
# w_norm2 = np.dot(w, w)
377+
w_norm2 = sdot(n_features, <float*>w.data, 1,
378+
<float*>w.data, 1)
379+
380+
if (dual_norm_XtA > alpha):
381+
const = alpha / dual_norm_XtA
382+
A_norm2 = R_norm2 * (const ** 2)
383+
gap = 0.5 * (R_norm2 + A_norm2)
384+
else:
385+
const = 1.0
386+
gap = R_norm2
387+
388+
l1_norm = sasum(n_features, <float*>w.data, 1)
389+
390+
# np.dot(R.T, y)
391+
gap += (alpha * l1_norm - const * sdot(
392+
n_samples,
393+
<float*>R.data, 1,
394+
<float*>y.data, n_tasks)
395+
+ 0.5 * beta * (1 + const ** 2) * (w_norm2))
396+
397+
if gap < tol:
398+
# return if we reached desired tolerance
399+
break
296400

297401
return w, gap, tol, n_iter + 1
298402

0 commit comments

Comments
 (0)
0