4
4
5
5
from __future__ import print_function
6
6
7
+ from collections import defaultdict
7
8
import gc
8
9
from time import time
10
+
9
11
import numpy as np
10
- from collections import defaultdict
12
+ from scipy . linalg import norm
11
13
12
14
from sklearn .decomposition .nmf import NMF , _initialize_nmf
13
15
from sklearn .datasets .samples_generator import make_low_rank_matrix
@@ -27,7 +29,7 @@ def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
27
29
r : integer
28
30
number of latent features
29
31
max_iter : integer, optional
30
- maximum number of iterations (default: 10000 )
32
+ maximum number of iterations (default: 1000 )
31
33
tol : double
32
34
tolerance threshold for early exit (when the update factor is within
33
35
tol of 1., the function exits)
@@ -62,25 +64,29 @@ def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
62
64
H *= updateH
63
65
updateW = np .dot (V , H .T ) / (np .dot (W , np .dot (H , H .T )) + eps )
64
66
W *= updateW
65
- if True or ( i % 10 ) == 0 :
67
+ if i % 10 == 0 :
66
68
max_update = max (updateW .max (), updateH .max ())
67
69
if abs (1. - max_update ) < tol :
68
70
break
69
71
return W , H
70
72
71
73
72
- def compute_bench (samples_range , features_range , rank = 50 , tolerance = 1e-7 ):
74
+ def report (error , time ):
75
+ print ("Frobenius loss: %.5f" % error )
76
+ print ("Took: %.2fs" % time )
77
+ print ()
78
+
79
+
80
+ def benchmark (samples_range , features_range , rank = 50 , tolerance = 1e-5 ):
73
81
it = 0
74
82
timeset = defaultdict (lambda : [])
75
83
err = defaultdict (lambda : [])
76
84
77
85
max_it = len (samples_range ) * len (features_range )
78
86
for n_samples in samples_range :
79
87
for n_features in features_range :
80
- it += 1
81
- print ('====================' )
82
- print ('Iteration %03d of %03d' % (it , max_it ))
83
- print ('====================' )
88
+ print ("%2d samples, %2d features" % (n_samples , n_features ))
89
+ print ('=======================' )
84
90
X = np .abs (make_low_rank_matrix (n_samples , n_features ,
85
91
effective_rank = rank , tail_strength = 0.2 ))
86
92
@@ -91,7 +97,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
91
97
tend = time () - tstart
92
98
timeset ['nndsvd-nmf' ].append (tend )
93
99
err ['nndsvd-nmf' ].append (m .reconstruction_err_ )
94
- print (m .reconstruction_err_ , tend )
100
+ report (m .reconstruction_err_ , tend )
95
101
96
102
gc .collect ()
97
103
print ("benchmarking nndsvda-nmf: " )
@@ -101,7 +107,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
101
107
tend = time () - tstart
102
108
timeset ['nndsvda-nmf' ].append (tend )
103
109
err ['nndsvda-nmf' ].append (m .reconstruction_err_ )
104
- print (m .reconstruction_err_ , tend )
110
+ report (m .reconstruction_err_ , tend )
105
111
106
112
gc .collect ()
107
113
print ("benchmarking nndsvdar-nmf: " )
@@ -111,7 +117,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
111
117
tend = time () - tstart
112
118
timeset ['nndsvdar-nmf' ].append (tend )
113
119
err ['nndsvdar-nmf' ].append (m .reconstruction_err_ )
114
- print (m .reconstruction_err_ , tend )
120
+ report (m .reconstruction_err_ , tend )
115
121
116
122
gc .collect ()
117
123
print ("benchmarking random-nmf" )
@@ -121,7 +127,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
121
127
tend = time () - tstart
122
128
timeset ['random-nmf' ].append (tend )
123
129
err ['random-nmf' ].append (m .reconstruction_err_ )
124
- print (m .reconstruction_err_ , tend )
130
+ report (m .reconstruction_err_ , tend )
125
131
126
132
gc .collect ()
127
133
print ("benchmarking alt-random-nmf" )
@@ -130,7 +136,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
130
136
tend = time () - tstart
131
137
timeset ['alt-random-nmf' ].append (tend )
132
138
err ['alt-random-nmf' ].append (np .linalg .norm (X - np .dot (W , H )))
133
- print ( np . linalg . norm (X - np .dot (W , H )), tend )
139
+ report ( norm (X - np .dot (W , H )), tend )
134
140
135
141
return timeset , err
136
142
@@ -142,10 +148,10 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
142
148
143
149
samples_range = np .linspace (50 , 500 , 3 ).astype (np .int )
144
150
features_range = np .linspace (50 , 500 , 3 ).astype (np .int )
145
- timeset , err = compute_bench (samples_range , features_range )
151
+ timeset , err = benchmark (samples_range , features_range )
146
152
147
153
for i , results in enumerate ((timeset , err )):
148
- fig = plt .figure ('scikit-learn Non-Negative Matrix Factorization benchmkar results' )
154
+ fig = plt .figure ('scikit-learn Non-Negative Matrix Factorization benchmark results' )
149
155
ax = fig .gca (projection = '3d' )
150
156
for c , (label , timings ) in zip ('rbgcm' , sorted (results .iteritems ())):
151
157
X , Y = np .meshgrid (samples_range , features_range )
0 commit comments