8000 COSMIT friendlier output from faster NMF benchmark · Felixhawk/scikit-learn@d208c13 · GitHub
[go: up one dir, main page]

Skip to content

Commit d208c13

Browse files
committed
COSMIT friendlier output from faster NMF benchmark
Tolerance lowered to make it run in reasonable time; see scikit-learn#2537.
1 parent c14a619 commit d208c13

File tree

1 file changed

+21
-15
lines changed

1 file changed

+21
-15
lines changed

benchmarks/bench_plot_nmf.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44

55
from __future__ import print_function
66

7+
from collections import defaultdict
78
import gc
89
from time import time
10+
911
import numpy as np
10-
from collections import defaultdict
12+
from scipy.linalg import norm
1113

1214
from sklearn.decomposition.nmf import NMF, _initialize_nmf
1315
from sklearn.datasets.samples_generator import make_low_rank_matrix
@@ -27,7 +29,7 @@ def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
2729
r : integer
2830
number of latent features
2931
max_iter : integer, optional
30-
maximum number of iterations (default: 10000)
32+
maximum number of iterations (default: 1000)
3133
tol : double
3234
tolerance threshold for early exit (when the update factor is within
3335
tol of 1., the function exits)
@@ -62,25 +64,29 @@ def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
6264
H *= updateH
6365
updateW = np.dot(V, H.T) / (np.dot(W, np.dot(H, H.T)) + eps)
6466
W *= updateW
65-
if True or (i % 10) == 0:
67+
if i % 10 == 0:
6668
max_update = max(updateW.max(), updateH.max())
6769
if abs(1. - max_update) < tol:
6870
break
6971
return W, H
7072

7173

72-
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
74+
def report(error, time):
75+
print("Frobenius loss: %.5f" % error)
76+
print("Took: %.2fs" % time)
77+
print()
78+
79+
80+
def benchmark(samples_range, features_range, rank=50, tolerance=1e-5):
7381
it = 0
7482
timeset = defaultdict(lambda: [])
7583
err = defaultdict(lambda: [])
7684

7785
max_it = len(samples_range) * len(features_range)
7886
for n_samples in samples_range:
7987
for n_features in features_range:
80-
it += 1
81-
print('====================')
82-
print('Iteration %03d of %03d' % (it, max_it))
83-
print('====================')
88+
print("%2d samples, %2d features" % (n_samples, n_features))
89+
print('=======================')
8490
X = np.abs(make_low_rank_matrix(n_samples, n_features,
8591
effective_rank=rank, tail_strength=0.2))
8692

@@ -91,7 +97,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
9197
tend = time() - tstart
9298
timeset['nndsvd-nmf'].append(tend)
9399
err['nndsvd-nmf'].append(m.reconstruction_err_)
94-
print(m.reconstruction_err_, tend)
100+
report(m.reconstruction_err_, tend)
95101

96102
gc.collect()
97103
print("benchmarking nndsvda-nmf: ")
@@ -101,7 +107,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
101107
tend = time() - tstart
102108
timeset['nndsvda-nmf'].append(tend)
103109
err['nndsvda-nmf'].append(m.reconstruction_err_)
104-
print(m.reconstruction_err_, tend)
110+
report(m.reconstruction_err_, tend)
105111

106112
gc.collect()
107113
print("benchmarking nndsvdar-nmf: ")
@@ -111,7 +117,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
111117
tend = time() - tstart
112118
timeset['nndsvdar-nmf'].append(tend)
113119
err['nndsvdar-nmf'].append(m.reconstruction_err_)
114-
print(m.reconstruction_err_, tend)
120+
report(m.reconstruction_err_, tend)
115121

116122
gc.collect()
117123
print("benchmarking random-nmf")
@@ -121,7 +127,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
121127
tend = time() - tstart
122128
timeset['random-nmf'].append(tend)
123129
err['random-nmf'].append(m.reconstruction_err_)
124-
print(m.reconstruction_err_, tend)
130+
report(m.reconstruction_err_, tend)
125131

126132
gc.collect()
127133
print("benchmarking alt-random-nmf")
@@ -130,7 +136,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
130136
tend = time() - tstart
131137
timeset['alt-random-nmf'].append(tend)
132138
err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
133-
print(np.linalg.norm(X - np.dot(W, H)), tend)
139+
report(norm(X - np.dot(W, H)), tend)
134140

135141
return timeset, err
136142

@@ -142,10 +148,10 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
142148

143149
samples_range = np.linspace(50, 500, 3).astype(np.int)
144150
features_range = np.linspace(50, 500, 3).astype(np.int)
145-
timeset, err = compute_bench(samples_range, features_range)
151+
timeset, err = benchmark(samples_range, features_range)
146152

147153
for i, results in enumerate((timeset, err)):
148-
fig = plt.figure('scikit-learn Non-Negative Matrix Factorization benchmkar results')
154+
fig = plt.figure('scikit-learn Non-Negative Matrix Factorization benchmark results')
149155
ax = fig.gca(projection='3d')
150156
for c, (label, timings) in zip('rbgcm', sorted(results.iteritems())):
151157
X, Y = np.meshgrid(samples_range, features_range)

0 commit comments

Comments
 (0)
0