8000 [MRG+3] Fused types for MultiTaskElasticNet (#8061) · scikit-learn/scikit-learn@fcb706a · GitHub
[go: up one dir, main page]

Skip to content

Commit fcb706a

Browse files
tguillemotraghavrv
authored andcommitted
[MRG+3] Fused types for MultiTaskElasticNet (#8061)
* Convert ElasticNet Multioutput to floating. * Remove all the float64 coordinate_descent. * Add the necessary cblas for use fused types. * Fix zeros dtype issue in cd_fast. * Remove cblas files. * Change random seed to let test_lle_simple_grid pass. * Add tests to check floatting issue for MultiTaskElasticNet. * Update cblas_sscal.c
1 parent 096a9cb commit fcb706a

File tree

12 files changed

+1082
-60
lines changed

12 files changed

+1082
-60
lines changed

sklearn/linear_model/cd_fast.pyx

Lines changed: 79 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,25 @@ cdef extern from "cblas.h":
122122
void dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha,
123123
double *X, int incX, double *Y, int incY,
124124
double *A, int lda) nogil
125+
void sger "cblas_sger"(CBLAS_ORDER Order, int M, int N, float alpha,
126+
float *X, int incX, float *Y, int incY,
127+
float *A, int lda) nogil
125128
void dgemv "cblas_dgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
126129
int M, int N, double alpha, double *A, int lda,
127130
double *X, int incX, double beta,
128131
double *Y, int incY) nogil
132+
void sgemv "cblas_sgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
133+
int M, int N, float alpha, float *A, int lda,
134+
float *X, int incX, float beta,
135+
float *Y, int incY) nogil
129136
double dnrm2 "cblas_dnrm2"(int N, double *X, int incX) nogil
137+
float snrm2 "cblas_snrm2"(int N, float *X, int incX) nogil
130138
void dcopy "cblas_dcopy"(int N, double *X, int incX, double *Y,
131139
int incY) nogil
140+
void scopy "cblas_scopy"(int N, float *X, int incX, float *Y,
141+
int incY) nogil
132142
void dscal "cblas_dscal"(int N, double alpha, double *X, int incX) nogil
143+
void sscal "cblas_sscal"(int N, float alpha, float *X, int incX) nogil
133144

134145

135146
@cython.boundscheck(False)
@@ -686,11 +697,11 @@ def enet_coordinate_descent_gram(floating[:] w, floating alpha, floating beta,
686697
@cython.boundscheck(False)
687698
@cython.wraparound(False)
688699
@cython.cdivision(True)
689-
def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
690-
double l2_reg,
691-
np.ndarray[double, ndim=2, mode='fortran'] X,
692-
np.ndarray[double, ndim=2] Y,
693-
int max_iter, double tol, object rng,
700+
def enet_coordinate_descent_multi_task(floating[::1, :] W, floating l1_reg,
701+
floating l2_reg,
702+
np.ndarray[floating, ndim=2, mode='fortran'] X,
703+
np.ndarray[floating, ndim=2] Y,
704+
int max_iter, floating tol, object rng,
694705
bint random=0):
695706
"""Cython version of the coordinate descent algorithm
696707
for Elastic-Net mult-task regression
@@ -700,42 +711,68 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
700711
(1/2) * norm(y - X w, 2)^2 + l1_reg ||w||_21 + (1/2) * l2_reg norm(w, 2)^2
701712
702713
"""
714+
# fused types version of BLAS functions
715+
cdef DOT dot
716+
cdef AXPY axpy
717+
cdef ASUM asum
718+
719+
if floating is float:
720+
dtype = np.float32
721+
dot = sdot
722+
nrm2 = snrm2
723+
asum = sasum
724+
copy = scopy
725+
scal = sscal
726+
ger = sger
727+
gemv = sgemv
728+
else:
729+
dtype = np.float64
730+
dot = ddot
731+
nrm2 = dnrm2
732+
asum = dasum
733+
copy = dcopy
734+
scal = dscal
735+
ger = dger
736+
gemv = dgemv
737+
703738
# get the data information into easy vars
704739
cdef unsigned int n_samples = X.shape[0]
705740
cdef unsigned int n_features = X.shape[1]
706741
cdef unsigned int n_tasks = Y.shape[1]
707742

708743
# to store XtA
709-
cdef double[:, ::1] XtA = np.zeros((n_features, n_tasks))
710-
cdef double XtA_axis1norm
711-
cdef double dual_norm_XtA
744+
cdef floating[:, ::1] XtA = np.zeros((n_features, n_tasks), dtype=dtype)
745+
cdef floating XtA_axis1norm
746+
cdef floating dual_norm_XtA
712747

713748
# initial value of the residuals
714-
cdef double[:, ::1] R = np.zeros((n_samples, n_tasks))
715-
716-
cdef double[:] norm_cols_X = np.zeros(n_features)
717-
cdef double[::1] tmp = np.zeros(n_tasks, dtype=np.float)
718-
cdef double[:] w_ii = np.zeros(n_tasks, dtype=np.float)
719-
cdef double d_w_max
720-
cdef double w_max
721-
cdef double d_w_ii
722-
cdef double nn
723-
cdef double W_ii_abs_max
724-
cdef double gap = tol + 1.0
725-
cdef double d_w_tol = tol
726-
cdef double ry_sum
727-
cdef double l21_norm
749+
cdef floating[:, ::1] R = np.zeros((n_samples, n_tasks), dtype=dtype)
750+
751+
cdef floating[:] norm_cols_X = np.zeros(n_features, dtype=dtype)
752+
cdef floating[::1] tmp = np.zeros(n_tasks, dtype=dtype)
753+
cdef floating[:] w_ii = np.zeros(n_tasks, dtype=dtype)
754+
cdef floating d_w_max
755+
cdef floating w_max
756+
cdef floating d_w_ii
757+
cdef floating nn
758+
cdef floating W_ii_abs_max
759+
cdef floating gap = tol + 1.0
760+
cdef floating d_w_tol = tol
761+
cdef floating R_norm
762+
cdef floating w_norm
763+
cdef floating ry_sum
764+
cdef floating l21_norm
728765
cdef unsigned int ii
729766
cdef unsigned int jj
730767
cdef unsigned int n_iter = 0
731768
cdef unsigned int f_iter
732769
cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
733770
cdef UINT32_t* rand_r_state = &rand_r_state_seed
734771

735-
cdef double* X_ptr = &X[0, 0]
736-
cdef double* W_ptr = &W[0, 0]
737-
cdef double* Y_ptr = &Y[0, 0]
738-
cdef double* wii_ptr = &w_ii[0]
772+
cdef floating* X_ptr = &X[0, 0]
773+
cdef floating* W_ptr = &W[0, 0]
774+
cdef floating* Y_ptr = &Y[0, 0]
775+
cdef floating* wii_ptr = &w_ii[0]
739776

740777
if l1_reg == 0:
741778
warnings.warn("Coordinate descent with l1_reg=0 may lead to unexpected"
@@ -751,11 +788,11 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
751788
for ii in range(n_samples):
752789
for jj in range(n_tasks):
753790
R[ii, jj] = Y[ii, jj] - (
754-
ddot(n_features, X_ptr + ii, n_samples, W_ptr + jj, n_tasks)
791+
dot(n_features, X_ptr + ii, n_samples, W_ptr + jj, n_tasks)
755792
)
756793

757794
# tol = tol * linalg.norm(Y, ord='fro') ** 2
758-
tol = tol * dnrm2(n_samples * n_tasks, Y_ptr, 1) ** 2
795+
tol = tol * nrm2(n_samples * n_tasks, Y_ptr, 1) ** 2
759796

760797
for n_iter in range(max_iter):
761798
w_max = 0.0
@@ -770,33 +807,33 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
770807
continue
771808

772809
# w_ii = W[:, ii] # Store previous value
773-
dcopy(n_tasks, W_ptr + ii * n_tasks, 1, wii_ptr, 1)
810+
copy(n_tasks, W_ptr + ii * n_tasks, 1, wii_ptr, 1)
774811

775812
# if np.sum(w_ii ** 2) != 0.0: # can do better
776-
if dnrm2(n_tasks, wii_ptr, 1) != 0.0:
813+
if nrm2(n_tasks, wii_ptr, 1) != 0.0:
777814
# R += np.dot(X[:, ii][:, None], w_ii[None, :]) # rank 1 update
778-
dger(CblasRowMajor, n_samples, n_tasks, 1.0,
815+
ger(CblasRowMajor, n_samples, n_tasks, 1.0,
779816
X_ptr + ii * n_samples, 1,
780817
wii_ptr, 1, &R[0, 0], n_tasks)
781818

782819
# tmp = np.dot(X[:, ii][None, :], R).ravel()
783-
dgemv(CblasRowMajor, CblasTrans,
820+
gemv(CblasRowMajor, CblasTrans,
784821
n_samples, n_tasks, 1.0, &R[0, 0], n_tasks,
785822
X_ptr + ii * n_samples, 1, 0.0, &tmp[0], 1)
786823

787824
# nn = sqrt(np.sum(tmp ** 2))
788-
nn = dnrm2(n_tasks, &tmp[0], 1)
825+
nn = nrm2(n_tasks, &tmp[0], 1)
789826

790827
# W[:, ii] = tmp * fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg)
791-
dcopy(n_tasks, &tmp[0], 1, W_ptr + ii * n_tasks, 1)
792-
dscal(n_tasks, fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg),
828+
copy(n_tasks, &tmp[0], 1, W_ptr + ii * n_tasks, 1)
829+
scal(n_tasks, fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg),
793830
W_ptr + ii * n_tasks, 1)
794831

795832
# if np.sum(W[:, ii] ** 2) != 0.0: # can do better
796-
if dnrm2(n_tasks, W_ptr + ii * n_tasks, 1) != 0.0:
833+
if nrm2(n_tasks, W_ptr + ii * n_tasks, 1) != 0.0:
797834
# R -= np.dot(X[:, ii][:, None], W[:, ii][None, :])
798835
# Update residual : rank 1 update
799-
dger(CblasRowMajor, n_samples, n_tasks, -1.0,
836+
ger(CblasRowMajor, n_samples, n_tasks, -1.0,
800837
X_ptr + ii * n_samples, 1, W_ptr + ii * n_tasks, 1,
801838
&R[0, 0], n_tasks)
802839

@@ -818,7 +855,7 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
818855
# XtA = np.dot(X.T, R) - l2_reg * W.T
819856
for ii in range(n_features):
820857
for jj in range(n_tasks):
821-
XtA[ii, jj] = ddot(
858+
XtA[ii, jj] = dot(
822859
n_samples, X_ptr + ii * n_samples, 1,
823860
&R[0, 0] + jj, n_tasks
824861
) - l2_reg * W[jj, ii]
@@ -827,15 +864,15 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
827864
dual_norm_XtA = 0.0
828865
for ii in range(n_features):
829866
# np.sqrt(np.sum(XtA ** 2, axis=1))
830-
XtA_axis1norm = dnrm2(n_tasks, &XtA[0, 0] + ii * n_tasks, 1)
867+
XtA_axis1norm = nrm2(n_tasks, &XtA[0, 0] + ii * n_tasks, 1)
831868
if XtA_axis1norm > dual_norm_XtA:
832869
dual_norm_XtA = XtA_axis1norm
833870

834871
# TODO: use squared L2 norm directly
835872
# R_norm = linalg.norm(R, ord='fro')
836873
# w_norm = linalg.norm(W, ord='fro')
837-
R_norm = dnrm2(n_samples * n_tasks, &R[0, 0], 1)
838-
w_norm = dnrm2(n_features * n_tasks, W_ptr, 1)
874+
R_norm = nrm2(n_samples * n_tasks, &R[0, 0], 1)
875+
w_norm = nrm2(n_features * n_tasks, W_ptr, 1)
839876
if (dual_norm_XtA > l1_reg):
840877
const = l1_reg / dual_norm_XtA
841878
A_norm = R_norm * const
@@ -854,7 +891,7 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
854891
l21_norm = 0.0
855892
for ii in range(n_features):
856893
# np.sqrt(np.sum(W ** 2, axis=0))
857-
l21_norm += dnrm2(n_tasks, W_ptr + n_tasks * ii, 1)
894+
l21_norm += nrm2(n_tasks, W_ptr + n_tasks * ii, 1)
858895

859896
gap += l1_reg * l21_norm - const * ry_sum + \
860897
0.5 * l2_reg * (1 + const ** 2) * (w_norm ** 2)

sklearn/linear_model/coordinate_descent.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
459459
# We expect precompute to be already Fortran ordered when bypassing
460460
# checks
461461
if check_input:
462-
precompute = check_array(precompute, dtype=np.float64,
462+
precompute = check_array(precompute, dtype=X.dtype.type,
463463
order='C')
464464
model = cd_fast.enet_coordinate_descent_gram(
465465
coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
@@ -733,7 +733,7 @@ def fit(self, X, y, check_input=True):
733733
self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
734734
self._set_intercept(X_offset, y_offset, X_scale)
735735

736-
# workaround since _set_intercept will cast self.coef_ into float64
736+
# workaround since _set_intercept will cast self.coef_ into X.dtype
737737
self.coef_ = np.asarray(self.coef_, dtype=X.dtype)
738738

739739
# return self for chaining fit and predict calls
@@ -1038,14 +1038,15 @@ def fit(self, X, y):
10381038
Parameters
10391039
----------
10401040
X : {array-like}, shape (n_samples, n_features)
1041-
Training data. Pass directly as float64, Fortran-contiguous data
1041+
Training data. Pass directly as Fortran-contiguous data
10421042
to avoid unnecessary memory duplication. If y is mono-output,
10431043
X can be sparse.
10441044
10451045
y : array-like, shape (n_samples,) or (n_samples, n_targets)
10461046
Target values
10471047
"""
1048-
y = np.asarray(y, dtype=np.float64)
1048+
y = check_array(y, copy=False, dtype=[np.float64, np.float32],
1049+
ensure_2d=False)
10491050
if y.shape[0] == 0:
10501051
raise ValueError("y has 0 samples: %r" % y)
10511052

@@ -1087,7 +1088,7 @@ def fit(self, X, y):
10871088
if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
10881089
# Keep a reference to X
10891090
reference_to_old_X = X
1090-
# Let us not impose fortran ordering or float64 so far: it is
1091+
# Let us not impose fortran ordering so far: it is
10911092
# not useful for the cross-validation loop and will be done
10921093
# by the model fitting itself
10931094
X = check_array(X, 'csc', copy=False)
@@ -1101,7 +1102,8 @@ def fit(self, X, y):
11011102
copy_X = False
11021103
del reference_to_old_X
11031104
else:
1104-
X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
1105+
X = check_array(X, 'csc', dtype=[np.float64, np.float32],
1106+
order='F', copy=copy_X)
11051107
copy_X = False
11061108

11071109
if X.shape[0] != y.shape[0]:
@@ -1155,7 +1157,7 @@ def fit(self, X, y):
11551157
jobs = (delayed(_path_residuals)(X, y, train, test, self.path,
11561158
path_params, alphas=this_alphas,
11571159
l1_ratio=this_l1_ratio, X_order='F',
1158-
dtype=np.float64)
1160+
dtype=X.dtype.type)
11591161
for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)
11601162
for train, test in folds)
11611163
mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
@@ -1675,10 +1677,9 @@ def fit(self, X, y):
16751677
To avoid memory re-allocation it is advised to allocate the
16761678
initial data in memory directly using that format.
16771679
"""
1678-
# X and y must be of type float64
1679-
X = check_array(X, dtype=np.float64, order='F',
1680+
X = check_array(X, dtype=[np.float64, np.float32], order='F',
16801681
copy=self.copy_X and self.fit_intercept)
1681-
y = check_array(y, dtype=np.float64, ensure_2d=False)
1682+
y = check_array(y, dtype=X.dtype.type, ensure_2d=False)
16821683

16831684
if hasattr(self, 'l1_ratio'):
16841685
model_str = 'ElasticNet'
@@ -1698,7 +1699,7 @@ def fit(self, X, y):
16981699
X, y, self.fit_intercept, self.normalize, copy=False)
16991700

17001701
if not self.warm_start or self.coef_ is None:
1701-
self.coef_ = np.zeros((n_tasks, n_features), dtype=np.float64,
1702+
self.coef_ = np.zeros((n_tasks, n_features), dtype=X.dtype.type,
17021703
order='F')
17031704

17041705
l1_reg = self.alpha * self.l1_ratio * n_samples

sklearn/linear_model/tests/test_coordinate_descent.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,8 @@ def test_enet_float_precision():
691691
y = dtype(y)
692692
ignore_warnings(clf.fit)(X, y)
693693

694-
coef[dtype] = clf.coef_
695-
intercept[dtype] = clf.intercept_
694+
coef[('simple', dtype)] = clf.coef_
695+
intercept[('simple', dtype)] = clf.intercept_
696696

697697
assert_equal(clf.coef_.dtype, dtype)
698698

@@ -707,11 +707,23 @@ def test_enet_float_precision():
707707
assert_array_almost_equal(clf.intercept_,
708708
clf_precompute.intercept_)
709709

710-
assert_array_almost_equal(coef[np.float32], coef[np.float64],
711-
decimal=4)
712-
assert_array_almost_equal(intercept[np.float32],
713-
intercept[np.float64],
714-
decimal=4)
710+
# test multi task enet
711+
multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
712+
clf_multioutput = MultiTaskElasticNet(
713+
alpha=0.5, max_iter=100, fit_intercept=fit_intercept,
714+
normalize=normalize)
715+
clf_multioutput.fit(X, multi_y)
716+
coef[('multi', dtype)] = clf_multioutput.coef_
717+
intercept[('multi', dtype)] = clf_multioutput.intercept_
718+
assert_equal(clf.coef_.dtype, dtype)
719+
720+
for v in ['simple', 'multi']:
721+
assert_array_almost_equal(coef[(v, np.float32)],
722+
coef[(v, np.float64)],
723+
decimal=4)
724+
assert_array_almost_equal(intercept[(v, np.float32)],
725+
intercept[(v, np.float64)],
726+
decimal=4)
715727

716728

717729
def test_enet_l1_ratio():

0 commit comments

Comments
 (0)
0