scikit-learn
diff --git a/‎sklearn/decomposition/dict_learning.py
Lines changed: 93 additions & 121 deletions b/‎sklearn/decomposition/dict_learning.py
Lines changed: 93 additions & 121 deletions
diff --git a/‎sklearn/decomposition/tests/test_dict_learning.py
Lines changed: 3 additions & 2 deletions b/‎sklearn/decomposition/tests/test_dict_learning.py
Lines changed: 3 additions & 2 deletions
@@ -309,19 +309,28 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
     return code
 
 
-def _update_dict(dictionary, Y, code, verbose=False, return_r2=False,
-                 random_state=None):
+def _proj_l2(v):
+    """Projects v unto l2 unit ball in-place.
+    """
+    vv = np.dot(v, v)
+    if vv > 1.:
+        v /= sqrt(vv)
+    return v
+
+
+def _update_dict(dictionary, B, A, verbose=False, return_r2=False,
+                 random_state=None, online=False):
     """Update the dense dictionary factor in place.
 
     Parameters
     ----------
     dictionary : array of shape (n_features, n_components)
         Value of the dictionary at the previous iteration.
 
-    Y : array of shape (n_features, n_samples)
+    B : array of shape (n_features, n_components)
         Data matrix.
 
-    code : array of shape (n_components, n_samples)
+    A : array of shape (n_components, n_components)
         Sparse coding of the data against which to optimize the dictionary.
 
     verbose:
@@ -343,35 +352,32 @@ def _update_dict(dictionary, Y, code, verbose=False, return_r2=False,
         Updated dictionary.
 
     """
-    n_components = len(code)
-    n_samples = Y.shape[0]
+    n_features, n_components = B.shape
     random_state = check_random_state(random_state)
     # Residuals, computed 'in-place' for efficiency
-    R = -np.dot(dictionary, code)
-    R += Y
+    R = -np.dot(dictionary, A)
+    R += B
     R = np.asfortranarray(R)
-    ger, = linalg.get_blas_funcs(('ger',), (dictionary, code))
+    ger, = linalg.get_blas_funcs(('ger',), (dictionary, A))
     for k in range(n_components):
         # R <- 1.0 * U_k * V_k^T + R
-        R = ger(1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)
-        dictionary[:, k] = np.dot(R, code[k, :].T)
+        R = ger(1.0, dictionary[:, k], A[k, :], a=R, overwrite_a=True)
+        dictionary[:, k] = np.dot(R, A[k, :])
         # Scale k'th atom
-        atom_norm_square = np.dot(dictionary[:, k], dictionary[:, k])
-        if atom_norm_square < 1e-20:
+        if A[k, k] < 1e-20:
             if verbose == 1:
                 sys.stdout.write("+")
                 sys.stdout.flush()
             elif verbose:
-            dictionary[:, k] = random_state.randn(n_samples)
+            dictionary[:, k] = random_state.randn(n_features)
             # Setting corresponding coefs to 0
-            code[k, :] = 0.0
-            dictionary[:, k] /= sqrt(np.dot(dictionary[:, k],
-                                            dictionary[:, k]))
+            A[k, :] = 0.
         else:
-            dictionary[:, k] /= sqrt(atom_norm_square)
-            # R <- -1.0 * U_k * V_k^T + R
-            R = ger(-1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)
+            dictionary[:, k] /= A[k, k]
+        _proj_l2(dictionary[:, k])
+        # R <- -1.0 * U_k * V_k^T + R
+        R = ger(-1.0, dictionary[:, k], A[k, :], a=R, overwrite_a=True)
     if return_r2:
         R **= 2
         # R is fortran-ordered. For numpy version < 1.6, sum does not
@@ -472,98 +478,39 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
     SparsePCA
     MiniBatchSparsePCA
     """
-    if method not in ('lars', 'cd'):
-        raise ValueError('Coding method %r not supported as a fit algorithm.'
-                         % method)
-    method = 'lasso_' + method
+    return dict_learning_online(
+        X, n_components=n_components, alpha=alpha, n_iter=max_iter,
+        return_code=True, dict_init=dict_init, callback=callback,
+        batch_size=len(X), verbose=verbose, shuffle=False,
+        return_n_iter=return_n_iter, n_jobs=n_jobs, method=method,
+        return_inner_stats=False, tol=tol)
 
-    t0 = time.time()
-    # Avoid integer division problems
-    alpha = float(alpha)
-    random_state = check_random_state(random_state)
 
-    if n_jobs == -1:
-        n_jobs = cpu_count()
+def _compute_residuals_from_code(X, V, U):
+    """Computes ||X - UV||_F^2 directly.
 
-    # Init the code and the dictionary with SVD of Y
-    if code_init is not None and dict_init is not None:
-        code = np.array(code_init, order='F')
-        # Don't copy V, it will happen below
-        dictionary = dict_init
-    else:
-        code, S, dictionary = linalg.svd(X, full_matrices=False)
-        dictionary = S[:, np.newaxis] * dictionary
-    r = len(dictionary)
-    if n_components <= r:  # True even if n_components=None
-        code = code[:, :n_components]
-        dictionary = dictionary[:n_components, :]
-    else:
-        code = np.c_[code, np.zeros((len(code), n_components - r))]
-        dictionary = np.r_[dictionary,
-                           np.zeros((n_components - r, dictionary.shape[1]))]
-
-    # Fortran-order dict, as we are going to access its row vectors
-    dictionary = np.array(dictionary, order='F')
-
-    residuals = 0
-
-    errors = []
-    current_cost = np.nan
-
-    if verbose == 1:
-        print('[dict_learning]', end=' ')
-
-    # If max_iter is 0, number of iterations returned should be zero
-    ii = -1
-
-    for ii in range(max_iter):
-        dt = (time.time() - t0)
-        if verbose == 1:
-            sys.stdout.write(".")
-            sys.stdout.flush()
-        elif verbose:
-            print("Iteration % 3i "
-                  "(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)"
-                  % (ii, dt, dt / 60, current_cost))
-
-        # Update code
-        code = sparse_encode(X, dictionary, algorithm=method, alpha=alpha,
-                             init=code, n_jobs=n_jobs)
-        # Update dictionary
-        dictionary, residuals = _update_dict(dictionary.T, X.T, code.T,
-                                             verbose=verbose, return_r2=True,
-                                             random_state=random_state)
-        dictionary = dictionary.T
-
-        # Cost function
-        current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code))
-        errors.append(current_cost)
-
-        if ii > 0:
-            dE = errors[-2] - errors[-1]
-            # assert(dE >= -tol * errors[-1])
-            if dE < tol * errors[-1]:
-                if verbose == 1:
-                    # A line return
-                    print("")
-                elif verbose:
-                    print("--- Convergence reached after %d iterations" % ii)
-                break
-        if ii % 5 == 0 and callback is not None:
-            callback(locals())
-
-    if return_n_iter:
-        return code, dictionary, errors, ii + 1
-    else:
-        return code, dictionary, errors
+    Parameters
+    ==========
+    X: ndarray, shape (n_samples, n_features)
+        The input data.
+    V: ndarray, shape (n_features, n_components)
+        The dictionary.
+    U: ndarray, shape (n_samples, n_components)
+        The codes.
+    """
+    residuals = V.dot(U)
+    residuals -= X.T
+    residuals **= 2
+    residuals = np.sum(residuals)
+    return residuals
 
 
 def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
                          return_code=True, dict_init=None, callback=None,
-                         batch_size=3, verbose=False, shuffle=True, n_jobs=1,
-                         method='lars', iter_offset=0, random_state=None,
-                         return_inner_stats=False, inner_stats=None,
-                         return_n_iter=False):
+                         batch_size=None, verbose=False, shuffle=True,
+                         n_jobs=1, method='lars', iter_offset=0, tol=0.,
+                         random_state=None, return_inner_stats=False,
+                         inner_stats=None, return_n_iter=False):
     """Solves a dictionary learning matrix factorization problem online.
 
     Finds the best dictionary and the corresponding sparse code for
@@ -711,6 +658,9 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
                              copy=False)
     X_train = check_array(X_train, order='C', dtype=np.float64, copy=False)
 
+    if batch_size is None:
+        batch_size = n_samples
+    online = batch_size < n_samples
     batches = gen_batches(n_samples, batch_size)
     batches = itertools.cycle(batches)
 
     # If n_iter is zero, we need to return zero.
     ii = iter_offset - 1
 
+    err = 0.
+    errors = []
     for ii, batch in zip(range(iter_offset, iter_offset + n_iter), batches):
         this_X = X_train[batch]
         dt = (time.time() - t0)
@@ -741,26 +693,46 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
                                   alpha=alpha, n_jobs=n_jobs).T
 
         # Update the auxiliary variables
-        if ii < batch_size - 1:
-            theta = float((ii + 1) * batch_size)
+        if online:
+            if ii < batch_size - 1:
+                theta = float((ii + 1) * batch_size)
+            else:
+                theta = float(batch_size ** 2 + ii + 1 - batch_size)
+            beta = (theta + 1 - batch_size) / (theta + 1)
         else:
-            theta = float(batch_size ** 2 + ii + 1 - batch_size)
-        beta = (theta + 1 - batch_size) / (theta + 1)
-
+            beta = 0.
         A *= beta
         A += np.dot(this_code, this_code.T)
         B *= beta
         B += np.dot(this_X.T, this_code.T)
 
         # Update dictionary
         dictionary = _update_dict(dictionary, B, A, verbose=verbose,
-                                  random_state=random_state)
-        # XXX: Can the residuals be of any use?
-
-        # Maybe we need a stopping criteria based on the amount of
-        # modification in the dictionary
-        if callback is not None:
-            callback(locals())
+                                  random_state=random_state, online=True,
+                                  return_r2=False)
+
+        # Check convergence
+        if not online and callback is None:
+            residuals = _compute_residuals_from_code(this_X, dictionary,
+                                                     this_code)
+            err = .5 * residuals + alpha * np.sum(np.abs(this_code))
+            errors.append(err)
+            if len(errors) > 1:
+                dE = errors[-2] - errors[-1]
+                # assert(dE >= -tol * errors[-1])
+                if np.abs(dE) < tol * errors[-1]:
+                    if verbose == 1:
+                        # A line return
+                        print("")
+                    elif verbose:
+                        print(
+                            "--- Convergence reached after %d iterations" % ii)
+                    break
+        elif callback is not None:
+            # Maybe we need a stopping criteria based on the amount of
+            # modification in the dictionary
+            if not callback(locals()):
+                break
 
     if return_inner_stats:
         if return_n_iter:
@@ -778,14 +750,14 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
             dt = (time.time() - t0)
             print('done (total time: % 3is, % 4.1fmn)' % (dt, dt / 60))
         if return_n_iter:
-            return code, dictionary.T, ii - iter_offset + 1
+            return code, dictionary.T, errors, ii - iter_offset + 1
         else:
-            return code, dictionary.T
+            return code, dictionary.T, errors
 
     if return_n_iter:
-        return dictionary.T, ii - iter_offset + 1
+        return dictionary.T, errors, ii - iter_offset + 1
     else:
-        return dictionary.T
+        return dictionary.T, errors
 
 
 class SparseCodingMixin(TransformerMixin):
 
@@ -13,6 +13,7 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import TempMemmap
+from sklearn.utils.testing import assert_almost_equal
 
 from sklearn.decomposition import DictionaryLearning
 from sklearn.decomposition import MiniBatchDictionaryLearning
@@ -128,8 +129,8 @@ def test_dict_learning_split():
 def test_dict_learning_online_shapes():
     rng = np.random.RandomState(0)
     n_components = 8
-    code, dictionary = dict_learning_online(X, n_components=n_components,
-                                            alpha=1, random_state=rng)
+    code, dictionary, _ = dict_learning_online(X, n_components=n_components,
+                                               alpha=1, random_state=rng)
     assert_equal(code.shape, (n_samples, n_components))
     assert_equal(dictionary.shape, (n_components, n_features))
     assert_equal(np.dot(code, dictionary).shape, X.shape)