scikit-learn
diff --git a/‎sklearn/linear_model/cd_fast.pyx
Lines changed: 79 additions & 42 deletions b/‎sklearn/linear_model/cd_fast.pyx
Lines changed: 79 additions & 42 deletions
diff --git a/‎sklearn/linear_model/coordinate_descent.py
Lines changed: 12 additions & 11 deletions b/‎sklearn/linear_model/coordinate_descent.py
Lines changed: 12 additions & 11 deletions
diff --git a/‎sklearn/linear_model/tests/test_coordinate_descent.py
Lines changed: 19 additions & 7 deletions b/‎sklearn/linear_model/tests/test_coordinate_descent.py
Lines changed: 19 additions & 7 deletions
@@ -122,14 +122,25 @@ cdef extern from "cblas.h":
     void dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha,
                            double *X, int incX, double *Y, int incY,
                            double *A, int lda) nogil
+    void sger "cblas_sger"(CBLAS_ORDER Order, int M, int N, float alpha,
+                           float *X, int incX, float *Y, int incY,
+                           float *A, int lda) nogil
     void dgemv "cblas_dgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
                              int M, int N, double alpha, double *A, int lda,
                              double *X, int incX, double beta,
                              double *Y, int incY) nogil
+    void sgemv "cblas_sgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
+                             int M, int N, float alpha, float *A, int lda,
+                             float *X, int incX, float beta,
+                             float *Y, int incY) nogil
     double dnrm2 "cblas_dnrm2"(int N, double *X, int incX) nogil
+    float snrm2 "cblas_snrm2"(int N, float *X, int incX) nogil
     void dcopy "cblas_dcopy"(int N, double *X, int incX, double *Y,
                              int incY) nogil
+    void scopy "cblas_scopy"(int N, float *X, int incX, float *Y,
+                            int incY) nogil
     void dscal "cblas_dscal"(int N, double alpha, double *X, int incX) nogil
+    void sscal "cblas_sscal"(int N, float alpha, float *X, int incX) nogil
 
 
 @cython.boundscheck(False)
@@ -686,11 +697,11 @@ def enet_coordinate_descent_gram(floating[:] w, floating alpha, floating beta,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
-                                       double l2_reg,
-                                       np.ndarray[double, ndim=2, mode='fortran'] X,
-                                       np.ndarray[double, ndim=2] Y,
-                                       int max_iter, double tol, object rng,
+def enet_coordinate_descent_multi_task(floating[::1, :] W, floating l1_reg,
+                                       floating l2_reg,
+                                       np.ndarray[floating, ndim=2, mode='fortran'] X,
+                                       np.ndarray[floating, ndim=2] Y,
+                                       int max_iter, floating tol, object rng,
                                        bint random=0):
     """Cython version of the coordinate descent algorithm
         for Elastic-Net mult-task regression
@@ -700,42 +711,68 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
         (1/2) * norm(y - X w, 2)^2 + l1_reg ||w||_21 + (1/2) * l2_reg norm(w, 2)^2
 
     """
+    # fused types version of BLAS functions
+    cdef DOT dot
+    cdef AXPY axpy
+    cdef ASUM asum
+
+    if floating is float:
+        dtype = np.float32
+        dot = sdot
+        nrm2 = snrm2
+        asum = sasum
+        copy = scopy
+        scal = sscal
+        ger = sger
+        gemv = sgemv
+    else:
+        dtype = np.float64
+        dot = ddot
+        nrm2 = dnrm2
+        asum = dasum
+        copy = dcopy
+        scal = dscal
+        ger = dger
+        gemv = dgemv
+
     # get the data information into easy vars
     cdef unsigned int n_samples = X.shape[0]
     cdef unsigned int n_features = X.shape[1]
     cdef unsigned int n_tasks = Y.shape[1]
 
     # to store XtA
-    cdef double[:, ::1] XtA = np.zeros((n_features, n_tasks))
-    cdef double XtA_axis1norm
-    cdef double dual_norm_XtA
+    cdef floating[:, ::1] XtA = np.zeros((n_features, n_tasks), dtype=dtype)
+    cdef floating XtA_axis1norm
+    cdef floating dual_norm_XtA
 
     # initial value of the residuals
-    cdef double[:, ::1] R = np.zeros((n_samples, n_tasks))
-
-    cdef double[:] norm_cols_X = np.zeros(n_features)
-    cdef double[::1] tmp = np.zeros(n_tasks, dtype=np.float)
-    cdef double[:] w_ii = np.zeros(n_tasks, dtype=np.float)
-    cdef double d_w_max
-    cdef double w_max
-    cdef double d_w_ii
-    cdef double nn
-    cdef double W_ii_abs_max
-    cdef double gap = tol + 1.0
-    cdef double d_w_tol = tol
-    cdef double ry_sum
-    cdef double l21_norm
+    cdef floating[:, ::1] R = np.zeros((n_samples, n_tasks), dtype=dtype)
+
+    cdef floating[:] norm_cols_X = np.zeros(n_features, dtype=dtype)
+    cdef floating[::1] tmp = np.zeros(n_tasks, dtype=dtype)
+    cdef floating[:] w_ii = np.zeros(n_tasks, dtype=dtype)
+    cdef floating d_w_max
+    cdef floating w_max
+    cdef floating d_w_ii
+    cdef floating nn
+    cdef floating W_ii_abs_max
     cdef floating gap = tol + 1.0
+    cdef floating d_w_tol = tol
+    cdef floating R_norm
+    cdef floating w_norm
+    cdef floating ry_sum
+    cdef floating l21_norm
     cdef unsigned int ii
     cdef unsigned int jj
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
     cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
     cdef UINT32_t* rand_r_state = &rand_r_state_seed
 
-    cdef double* X_ptr = &X[0, 0]
-    cdef double* W_ptr = &W[0, 0]
-    cdef double* Y_ptr = &Y[0, 0]
-    cdef double* wii_ptr = &w_ii[0]
+    cdef floating* X_ptr = &X[0, 0]
+    cdef floating* W_ptr = &W[0, 0]
+    cdef floating* Y_ptr = &Y[0, 0]
+    cdef floating* wii_ptr = &w_ii[0]
 
     if l1_reg == 0:
         warnings.warn("Coordinate descent with l1_reg=0 may lead to unexpected"
@@ -751,11 +788,11 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
         for ii in range(n_samples):
             for jj in range(n_tasks):
                 R[ii, jj] = Y[ii, jj] - (
-                    ddot(n_features, X_ptr + ii, n_samples, W_ptr + jj, n_tasks)
+                    dot(n_features, X_ptr + ii, n_samples, W_ptr + jj, n_tasks)
                     )
 
         # tol = tol * linalg.norm(Y, ord='fro') ** 2
-        tol = tol * dnrm2(n_samples * n_tasks, Y_ptr, 1) ** 2
+        tol = tol * nrm2(n_samples * n_tasks, Y_ptr, 1) ** 2
 
         for n_iter in range(max_iter):
             w_max = 0.0
@@ -770,33 +807,33 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
                     continue
 
                 # w_ii = W[:, ii] # Store previous value
-                dcopy(n_tasks, W_ptr + ii * n_tasks, 1, wii_ptr, 1)
+                copy(n_tasks, W_ptr + ii * n_tasks, 1, wii_ptr, 1)
 
                 # if np.sum(w_ii ** 2) != 0.0:  # can do better
-                if dnrm2(n_tasks, wii_ptr, 1) != 0.0:
+                if nrm2(n_tasks, wii_ptr, 1) != 0.0:
                     # R += np.dot(X[:, ii][:, None], w_ii[None, :]) # rank 1 update
-                    dger(CblasRowMajor, n_samples, n_tasks, 1.0,
+                    ger(CblasRowMajor, n_samples, n_tasks, 1.0,
                          X_ptr + ii * n_samples, 1,
                          wii_ptr, 1, &R[0, 0], n_tasks)
 
                 # tmp = np.dot(X[:, ii][None, :], R).ravel()
-                dgemv(CblasRowMajor, CblasTrans,
+                gemv(CblasRowMajor, CblasTrans,
                       n_samples, n_tasks, 1.0, &R[0, 0], n_tasks,
                       X_ptr + ii * n_samples, 1, 0.0, &tmp[0], 1)
 
                 # nn = sqrt(np.sum(tmp ** 2))
-                nn = dnrm2(n_tasks, &tmp[0], 1)
+                nn = nrm2(n_tasks, &tmp[0], 1)
 
                 # W[:, ii] = tmp * fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg)
-                dcopy(n_tasks, &tmp[0], 1, W_ptr + ii * n_tasks, 1)
-                dscal(n_tasks, fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg),
+                copy(n_tasks, &tmp[0], 1, W_ptr + ii * n_tasks, 1)
+                scal(n_tasks, fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg),
                           W_ptr + ii * n_tasks, 1)
 
                 # if np.sum(W[:, ii] ** 2) != 0.0:  # can do better
-                if dnrm2(n_tasks, W_ptr + ii * n_tasks, 1) != 0.0:
+                if nrm2(n_tasks, W_ptr + ii * n_tasks, 1) != 0.0:
                     # R -= np.dot(X[:, ii][:, None], W[:, ii][None, :])
                     # Update residual : rank 1 update
-                    dger(CblasRowMajor, n_samples, n_tasks, -1.0,
+                    ger(CblasRowMajor, n_samples, n_tasks, -1.0,
                          X_ptr + ii * n_samples, 1, W_ptr + ii * n_tasks, 1,
                          &R[0, 0], n_tasks)
 
@@ -818,7 +855,7 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
                 # XtA = np.dot(X.T, R) - l2_reg * W.T
                 for ii in range(n_features):
                     for jj in range(n_tasks):
-                        XtA[ii, jj] = ddot(
+                        XtA[ii, jj] = dot(
                             n_samples, X_ptr + ii * n_samples, 1,
                             &R[0, 0] + jj, n_tasks
                             ) - l2_reg * W[jj, ii]
@@ -827,15 +864,15 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
                 dual_norm_XtA = 0.0
                 for ii in range(n_features):
                     # np.sqrt(np.sum(XtA ** 2, axis=1))
-                    XtA_axis1norm = dnrm2(n_tasks, &XtA[0, 0] + ii * n_tasks, 1)
+                    XtA_axis1norm = nrm2(n_tasks, &XtA[0, 0] + ii * n_tasks, 1)
                     if XtA_axis1norm > dual_norm_XtA:
                         dual_norm_XtA = XtA_axis1norm
 
                 # TODO: use squared L2 norm directly
                 # R_norm = linalg.norm(R, ord='fro')
                 # w_norm = linalg.norm(W, ord='fro')
-                R_norm = dnrm2(n_samples * n_tasks, &R[0, 0], 1)
-                w_norm = dnrm2(n_features * n_tasks, W_ptr, 1)
+                R_norm = nrm2(n_samples * n_tasks, &R[0, 0], 1)
+                w_norm = nrm2(n_features * n_tasks, W_ptr, 1)
                 if (dual_norm_XtA > l1_reg):
                     const =  l1_reg / dual_norm_XtA
                     A_norm = R_norm * const
@@ -854,7 +891,7 @@ def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
                 l21_norm = 0.0
                 for ii in range(n_features):
                     # np.sqrt(np.sum(W ** 2, axis=0))
-                    l21_norm += dnrm2(n_tasks, W_ptr + n_tasks * ii, 1)
+                    l21_norm += nrm2(n_tasks, W_ptr + n_tasks * ii, 1)
 
                 gap += l1_reg * l21_norm - const * ry_sum + \
                      0.5 * l2_reg * (1 + const ** 2) * (w_norm ** 2)
 
@@ -459,7 +459,7 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
             # We expect precompute to be already Fortran ordered when bypassing
             # checks
             if check_input:
-                precompute = check_array(precompute, dtype=np.float64,
+                precompute = check_array(precompute, dtype=X.dtype.type,
                                          order='C')
             model = cd_fast.enet_coordinate_descent_gram(
                 coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
@@ -733,7 +733,7 @@ def fit(self, X, y, check_input=True):
         self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
         self._set_intercept(X_offset, y_offset, X_scale)
 
-        # workaround since _set_intercept will cast self.coef_ into float64
+        # workaround since _set_intercept will cast self.coef_ into X.dtype
         self.coef_ = np.asarray(self.coef_, dtype=X.dtype)
 
         # return self for chaining fit and predict calls
@@ -1038,14 +1038,15 @@ def fit(self, X, y):
         Parameters
         ----------
         X : {array-like}, shape (n_samples, n_features)
-            Training data. Pass directly as float64, Fortran-contiguous data
+            Training data. Pass directly as Fortran-contiguous data
             to avoid unnecessary memory duplication. If y is mono-output,
             X can be sparse.
 
         y : array-like, shape (n_samples,) or (n_samples, n_targets)
             Target values
         """
-        y = np.asarray(y, dtype=np.float64)
+        y = check_array(y, copy=False, dtype=[np.float64, np.float32],
+                        ensure_2d=False)
         if y.shape[0] == 0:
             raise ValueError("y has 0 samples: %r" % y)
 
@@ -1087,7 +1088,7 @@ def fit(self, X, y):
         if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
             # Keep a reference to X
             reference_to_old_X = X
-            # Let us not impose fortran ordering or float64 so far: it is
+            # Let us not impose fortran ordering so far: it is
             # not useful for the cross-validation loop and will be done
             # by the model fitting itself
             X = check_array(X, 'csc', copy=False)
@@ -1101,7 +1102,8 @@ def fit(self, X, y):
                 copy_X = False
             del reference_to_old_X
         else:
-            X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
+            X = check_array(X, 'csc', dtype=[np.float64, np.float32],
+                            order='F', copy=copy_X)
             copy_X = False
 
         if X.shape[0] != y.shape[0]:
@@ -1155,7 +1157,7 @@ def fit(self, X, y):
         jobs = (delayed(_path_residuals)(X, y, train, test, self.path,
                                          path_params, alphas=this_alphas,
                                          l1_ratio=this_l1_ratio, X_order='F',
-                                         dtype=np.float64)
+                                         dtype=X.dtype.type)
                 for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)
                 for train, test in folds)
         mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
@@ -1675,10 +1677,9 @@ def fit(self, X, y):
         To avoid memory re-allocation it is advised to allocate the
         initial data in memory directly using that format.
         """
-        # X and y must be of type float64
-        X = check_array(X, dtype=np.float64, order='F',
+        X = check_array(X, dtype=[np.float64, np.float32], order='F',
                         copy=self.copy_X and self.fit_intercept)
-        y = check_array(y, dtype=np.float64, ensure_2d=False)
+        y = check_array(y, dtype=X.dtype.type, ensure_2d=False)
 
         if hasattr(self, 'l1_ratio'):
             model_str = 'ElasticNet'
@@ -1698,7 +1699,7 @@ def fit(self, X, y):
             X, y, self.fit_intercept, self.normalize, copy=False)
 
         if not self.warm_start or self.coef_ is None:
-            self.coef_ = np.zeros((n_tasks, n_features), dtype=np.float64,
+            self.coef_ = np.zeros((n_tasks, n_features), dtype=X.dtype.type,
                                   order='F')
 
         l1_reg = self.alpha * self.l1_ratio * n_samples
 
@@ -691,8 +691,8 @@ def test_enet_float_precision():
                 y = dtype(y)
                 ignore_warnings(clf.fit)(X, y)
 
-                coef[dtype] = clf.coef_
-                intercept[dtype] = clf.intercept_
+                coef[('simple', dtype)] = clf.coef_
+                intercept[('simple', dtype)] = clf.intercept_
 
                 assert_equal(clf.coef_.dtype, dtype)
 
@@ -707,11 +707,23 @@ def test_enet_float_precision():
                 assert_array_almost_equal(clf.intercept_,
                                           clf_precompute.intercept_)
 
-            assert_array_almost_equal(coef[np.float32], coef[np.float64],
-                                      decimal=4)
-            assert_array_almost_equal(intercept[np.float32],
-                                      intercept[np.float64],
-                                      decimal=4)
+                # test multi task enet
+                multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
+                clf_multioutput = MultiTaskElasticNet(
+                    alpha=0.5, max_iter=100, fit_intercept=fit_intercept,
+                    normalize=normalize)
+                clf_multioutput.fit(X, multi_y)
+                coef[('multi', dtype)] = clf_multioutput.coef_
+                intercept[('multi', dtype)] = clf_multioutput.intercept_
+                assert_equal(clf.coef_.dtype, dtype)
+
+            for v in ['simple', 'multi']:
+                assert_array_almost_equal(coef[(v, np.float32)],
+                                          coef[(v, np.float64)],
+                                          decimal=4)
+                assert_array_almost_equal(intercept[(v, np.float32)],
+                                          intercept[(v, np.float64)],
+                                          decimal=4)
 
 
 def test_enet_l1_ratio():