nullnotfound
diff --git a/‎sklearn/decomposition/nmf.py
Lines changed: 25 additions & 6 deletions b/‎sklearn/decomposition/nmf.py
Lines changed: 25 additions & 6 deletions
@@ -154,7 +154,7 @@ def _initialize_nmf(X, n_components, variant=None, eps=1e-6,
     return W, H
 
 
-def _nls_subproblem(V, W, H_init, tol, max_iter):
+def _nls_subproblem(V, W, H_init, tol, max_iter, sigma=0.01, beta=0.1):
     """Non-negative least square solver
 
     Solves a non-negative least squares subproblem using the
@@ -175,6 +175,19 @@ def _nls_subproblem(V, W, H_init, tol, max_iter):
     max_iter : int
         Maximum number of iterations before timing out.
 
+    sigma : float
+        Constant used in the sufficient decrease condition checked by the line
+        search.  Smaller values lead to a looser sufficient decrease condition,
+        thus reducing the time taken by the line search, but potentially
+        increasing the number of iterations of the projected gradient procedure.
+        0.01 is a commonly used value in the optimization literature.
+
+    beta : float
+        Factor by which the step size is decreased (resp. increased) until
+        (resp. as long as) the sufficient decrease condition is satisfied.
+        Larger values allow to find a better step size but lead to longer line
+        search. 0.1 is a commonly used value in the optimization literature.
+
     Returns
     -------
     H : array-like
@@ -186,6 +199,14 @@ def _nls_subproblem(V, W, H_init, tol, max_iter):
     n_iter : int
         The number of iterations done by the algorithm.
 
+    Reference
+    ---------
+
+    C.-J. Lin. Projected gradient methods
+    for non-negative matrix factorization. 
FFFB
Neural
+    Computation, 19(2007), 2756-2779.
+    http://www.csie.ntu.edu.tw/~cjlin/nmf/
+
     """
     if (H_init < 0).any():
         raise ValueError("Negative values in H_init passed to NLS solver.")
@@ -196,7 +217,6 @@ def _nls_subproblem(V, W, H_init, tol, max_iter):
 
     # values justified in the paper
     alpha = 1
-    beta = 0.1
     for n_iter in range(1, max_iter + 1):
         grad = np.dot(WtW, H) - WtV
         proj_gradient = norm(grad[np.logical_or(grad < 0, H > 0)])
@@ -211,8 +231,7 @@ def _nls_subproblem(V, W, H_init, tol, max_iter):
             d = Hn - H
             gradd = np.sum(grad * d)
             dQd = np.sum(np.dot(WtW, d) * d)
-            # magic numbers whoa
-            suff_decr = 0.99 * gradd + 0.5 * dQd < 0
+            suff_decr = (1 - sigma) * gradd + 0.5 * dQd < 0
             if inner_iter == 1:
                 decr_alpha = not suff_decr
                 Hp = H
@@ -321,8 +340,8 @@ class ProjectedGradientNMF(BaseEstimator, TransformerMixin):
     >>> model.reconstruction_err_ #doctest: +ELLIPSIS
     0.513...
 
-    Notes
-    -----
+    References
+    ----------
     This implements
 
     C.-J. Lin. Projected gradient methods