8000 ENH micro-optimize NMF inner loop · Felixhawk/scikit-learn@02d9726 · GitHub
[go: up one dir, main page]

Skip to content

Commit 02d9726

Browse files
committed
ENH micro-optimize NMF inner loop
Also fixed the random seed on the example.
1 parent d208c13 commit 02d9726

File tree

3 files changed

+9
-19
lines changed

3 files changed

+9
-19
lines changed

examples/applications/topics_extraction_with_nmf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
# Fit the NMF model
5151
print("Fitting the NMF model with n_samples=%d and n_features=%d..."
5252
% (n_samples, n_features))
53-
nmf = NMF(n_components=n_topics).fit(tfidf)
53+
nmf = NMF(n_components=n_topics, random_state=1).fit(tfidf)
5454
print("done in %0.3fs." % (time() - t0))
5555

5656
feature_names = vectorizer.get_feature_names()

sklearn/decomposition/nmf.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _initialize_nmf(X, n_components, variant=None, eps=1e-6,
159159
return W, H
160160

161161

162-
def _nls_subproblem(V, W, H_init, tol, max_iter, sigma=0.01, beta=0.1):
162+
def _nls_subproblem(V, W, H, tol, max_iter, sigma=0.01, beta=0.1):
163163
"""Non-negative least square solver
164164
165165
Solves a non-negative least squares subproblem using the
@@ -171,7 +171,7 @@ def _nls_subproblem(V, W, H_init, tol, max_iter, sigma=0.01, beta=0.1):
171171
V, W : array-like
172172
Constant matrices.
173173
174-
H_init : array-like
174+
H : array-like
175175
Initial guess for the solution.
176176
177177
tol : float
@@ -214,11 +214,7 @@ def _nls_subproblem(V, W, H_init, tol, max_iter, sigma=0.01, beta=0.1):
214214
http://www.csie.ntu.edu.tw/~cjlin/nmf/
215215
216216
"""
217-
if (H_init < 0).any():
218-
raise ValueError("Negative values in H_init passed to NLS solver.")
219-
220-
H = H_init
221-
WtV = safe_sparse_dot(W.T, V, dense_output=True)
217+
WtV = safe_sparse_dot(W.T, V)
222218
WtW = np.dot(W.T, W)
223219

224220
# values justified in the paper
@@ -228,13 +224,12 @@ def _nls_subproblem(V, W, H_init, tol, max_iter, sigma=0.01, beta=0.1):
228224

229225
# The following multiplication with a boolean array is more than twice
230226
# as fast as indexing into grad.
231-
proj_gradient = norm(grad * np.logical_or(grad < 0, H > 0))
232-
if proj_gradient < tol:
227+
if norm(grad * np.logical_or(grad < 0, H > 0)) < tol:
233228
break
234229

235230
Hp = H
236231

237-
for inner_iter in range(1, 20):
232+
for inner_iter in range(19):
238233
# Gradient step.
239234
Hn = H - alpha * grad
240235
# Projection step.
@@ -243,7 +238,7 @@ def _nls_subproblem(V, W, H_init, tol, max_iter, sigma=0.01, beta=0.1):
243238
gradd = np.dot(grad.ravel(), d.ravel())
244239
dQd = np.dot(np.dot(WtW, d).ravel(), d.ravel())
245240
suff_decr = (1 - sigma) * gradd + 0.5 * dQd < 0
246-
if inner_iter == 1:
241+
if inner_iter == 0:
247242
decr_alpha = not suff_decr
248243

249244
if decr_alpha:
@@ -581,6 +576,8 @@ def transform(self, X):
581576
"""
582577
X, = check_arrays(X, sparse_format='csc')
583578
Wt = np.zeros((self.n_components_, X.shape[0]))
579+
check_non_negative(X, "ProjectedGradientNMF.transform")
580+
584581
if sp.issparse(X):
585582
Wt, _, _ = _nls_subproblem(X.T, self.components_.T, Wt,
586583
tol=self.tol,

sklearn/decomposition/tests/test_nmf.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,6 @@ def test_projgrad_nmf_fit_close():
8282
assert_less(pnmf.fit(X).reconstruction_err_, 0.05)
8383

8484

85-
@raises(ValueError)
86-
def test_nls_nn_input():
87-
"""Test NLS solver's behaviour on negative input"""
88-
A = np.ones((2, 2))
89-
nmf._nls_subproblem(A, A, -A, 0.001, 20)
90-
91-
9285
def test_nls_nn_output():
9386
"""Test that NLS solver doesn't return negative values"""
9487
A = np.arange(1, 5).reshape(1, -1)

0 commit comments

Comments
 (0)
0