16
16
from sklearn .externals .six .moves import xrange
17
17
18
18
19
- def alt_nnmf (V , r , max_iter = 1000 , tol = 1e-3 , R = None ):
19
+ def alt_nnmf (V , r , max_iter = 1000 , tol = 1e-3 , init = 'random' ):
20
20
'''
21
21
A, S = nnmf(X, r, tol=1e-3, R=None)
22
22
@@ -33,8 +33,8 @@ def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
33
33
tol : double
34
34
tolerance threshold for early exit (when the update factor is within
35
35
tol of 1., the function exits)
36
- R : integer, optional
37
- random seed
36
+ init : string
37
+ Method used to initialize the procedure.
38
38
39
39
Returns
40
40
-------
@@ -52,12 +52,7 @@ def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
52
52
# Nomenclature in the function follows Lee & Seung
53
53
eps = 1e-5
54
54
n , m = V .shape
55
- if R == "svd" :
56
- W , H = _initialize_nmf (V , r )
57
- elif R is None :
58
- R = np .random .mtrand ._rand
59
- W = np .abs (R .standard_normal ((n , r )))
60
- H = np .abs (R .standard_normal ((r , m )))
55
+ W , H = _initialize_nmf (V , r , init , random_state = 0 )
61
56
62
57
for i in xrange (max_iter ):
63
58
updateH = np .dot (W .T , V ) / (np .dot (np .dot (W .T , W ), H ) + eps )
@@ -78,17 +73,15 @@ def report(error, time):
78
73
79
74
80
75
def benchmark (samples_range , features_range , rank = 50 , tolerance = 1e-5 ):
81
- it = 0
82
76
timeset = defaultdict (lambda : [])
83
77
err = defaultdict (lambda : [])
84
78
85
- max_it = len (samples_range ) * len (features_range )
86
79
for n_samples in samples_range :
87
80
for n_features in features_range :
88
81
print ("%2d samples, %2d features" % (n_samples , n_features ))
89
82
print ('=======================' )
90
83
X = np .abs (make_low_rank_matrix (n_samples , n_features ,
91
- effective_rank = rank , tail_strength = 0.2 ))
84
+ effective_rank = rank , tail_strength = 0.2 ))
92
85
93
86
gc .collect ()
94
87
print ("benchmarking nndsvd-nmf: " )
@@ -122,7 +115,7 @@ def benchmark(samples_range, features_range, rank=50, tolerance=1e-5):
122
115
gc .collect ()
123
116
print ("benchmarking random-nmf" )
124
117
tstart = time ()
125
- m = NMF (n_components = 30 , init = None , max_iter = 1000 ,
118
+ m = NMF (n_components = 30 , init = 'random' , max_iter = 1000 ,
126
119
tol = tolerance ).fit (X )
127
120
tend = time () - tstart
128
121
timeset ['random-nmf' ].append (tend )
@@ -132,7 +125,7 @@ def benchmark(samples_range, features_range, rank=50, tolerance=1e-5):
132
125
gc .collect ()
133
126
print ("benchmarking alt-random-nmf" )
134
127
tstart = time ()
135
- W , H = alt_nnmf (X , r = 30 , R = None , tol = tolerance )
128
+ W , H = alt_nnmf (X , r = 30 , init = 'random' , tol = tolerance )
136
129
tend = time () - tstart
137
130
timeset ['alt-random-nmf' ].append (tend )
138
131
err ['alt-random-nmf' ].append (np .linalg .norm (X - np .dot (W , H )))
@@ -151,7 +144,8 @@ def benchmark(samples_range, features_range, rank=50, tolerance=1e-5):
151
144
timeset , err = benchmark (samples_range , features_range )
152
145
153
146
for i , results in enumerate ((timeset , err )):
154
- fig = plt .figure ('scikit-learn Non-Negative Matrix Factorization benchmark results' )
147
+ fig = plt .figure ('scikit-learn Non-Negative Matrix Factorization'
148
+ 'benchmark results' )
155
149
ax = fig .gca (projection = '3d' )
156
150
for c , (label , timings ) in zip ('rbgcm' , sorted (results .iteritems ())):
157
151
X , Y = np .meshgrid (samples_range , features_range )
0 commit comments