@@ -147,8 +147,8 @@ def safe_sparse_dot(a, b, dense_output=False):
147
147
148
148
def randomized_range_finder (A , size , n_iter ,
149
149
power_iteration_normalizer = 'auto' ,
150
- subtract_mean = False ,
151
- random_state = None ):
150
+ random_state = None ,
151
+ subtract_mean = False ):
152
152
"""Computes an orthonormal matrix whose range approximates the range of A.
153
153
154
154
Parameters
@@ -172,20 +172,20 @@ def randomized_range_finder(A, size, n_iter,
172
172
173
173
.. versionadded:: 0.18
174
174
175
- subtract_mean : bool
176
- Whether the mean of `A` should be subtracted after each multiplication
177
- by the `A` matrix. This is equivalent to multiplying matrices by a
178
- centered `A` without ever having to explicitly center. This is
179
- especially useful for performing PCA on large sparse matrices, so they
180
- do not need to be centered.
181
-
182
175
random_state : int, RandomState instance or None, optional (default=None)
183
176
The seed of the pseudo random number generator to use when shuffling
184
177
the data. If int, random_state is the seed used by the random number
185
178
generator; If RandomState instance, random_state is the random number
186
179
generator; If None, the random number generator is the RandomState
187
180
instance used by `np.random`.
188
181
182
+ subtract_mean : bool
183
+ Whether the mean of `A` should be subtracted after each multiplication
184
+ by the `A` matrix. This is equivalent to multiplying matrices by a
185
+ centered `A` without ever having to explicitly center. This is
186
+ especially useful for performing PCA on large sparse matrices, so they
187
+ do not need to be centered.
188
+
189
189
Returns
190
190
-------
191
191
Q : 2D array
@@ -219,39 +219,45 @@ def randomized_range_finder(A, size, n_iter,
219
219
else :
220
220
power_iteration_normalizer = 'LU'
221
221
222
+ # Prepare funcitons that will multiply `Q` with `A`
222
223
if subtract_mean :
223
224
c = A .mean (axis = 0 ).reshape ((1 , - 1 ))
224
- applyA = lambda X : safe_sparse_dot (A , X ) - safe_sparse_dot (c , X )
225
- applyAT = lambda X : safe_sparse_dot (A .T , X ) - \
226
- safe_sparse_dot (c .T , Q .sum (axis = 0 ).reshape ((1 , - 1 )))
225
6377
+
226
+ def _apply_A (X ):
227
+ return safe_sparse_dot (A , X ) - safe_sparse_dot (c , X )
228
+
229
+ def _apply_AT (X ):
230
+ return safe_sparse_dot (A .T , X ) - \
231
+ safe_sparse_dot (c .T , Q .sum (axis = 0 ).reshape ((1 , - 1 )))
227
232
else :
228
- applyA = lambda X : safe_sparse_dot ( A , X )
229
- applyAT = lambda X : safe_sparse_dot (A . T , X )
233
+ def _apply_A ( X ):
234
+ return safe_sparse_dot (A , X )
230
235
231
- Q = applyA (Q )
236
+ def _apply_AT (X ):
237
+ return safe_sparse_dot (A .T , X )
232
238
233
239
# Perform power iterations with Q to further 'imprint' the top
234
240
# singular vectors of A in Q
235
241
for i in range (n_iter ):
236
242
if power_iteration_normalizer == 'none' :
237
- Q = applyAT (Q )
238
- Q = applyA (Q )
243
+ Q = _apply_A (Q )
244
+ Q = _apply_AT (Q )
239
245
elif power_iteration_normalizer == 'LU' :
240
- Q , _ = linalg .lu (applyAT (Q ), permute_l = True )
241
- Q , _ = linalg .lu (applyA (Q ), permute_l = True )
246
+ Q , _ = linalg .lu (_apply_A (Q ), permute_l = True )
247
+ Q , _ = linalg .lu (_apply_AT (Q ), permute_l = True )
242
248
elif power_iteration_normalizer == 'QR' :
243
- Q , _ = linalg .qr (applyAT (Q ), mode = 'economic' )
244
- Q , _ = linalg .qr (applyA (Q ), mode = 'economic' )
249
+ Q , _ = linalg .qr (_apply_A (Q ), mode = 'economic' )
250
+ Q , _ = linalg .qr (_apply_AT (Q ), mode = 'economic' )
245
251
246
252
# Sample the range of A using by linear projection of Q
247
253
# Extract an orthonormal basis
248
- Q , _ = linalg .qr (Q , mode = 'economic' )
254
+ Q , _ = linalg .qr (_apply_A ( Q ) , mode = 'economic' )
249
255
return Q
250
256
251
257
252
258
def randomized_svd (M , n_components , n_oversamples = 10 , n_iter = 'auto' ,
253
259
power_iteration_normalizer = 'auto' , transpose = 'auto' ,
254
- flip_sign = True , subtract_mean = False , random_state = 0 ):
260
+ flip_sign = True , random_state = 0 , subtract_mean = False ):
255
261
"""Computes a truncated randomized SVD
256
262
257
263
Parameters
@@ -302,20 +308,20 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
302
308
set to `True`, the sign ambiguity is resolved by making the largest
303
309
loadings for each component in the left singular vectors positive.
304
310
305
- subtract_mean : bool
306
- Whether the mean of `A` should be subtracted after each multiplication
307
- by the `A` matrix. This is equivalent to multiplying matrices by a
308
- centered `A` without ever having to explicitly center. This is
309
- especially useful for performing PCA on large sparse matrices, so they
310
- do not need to be centered.
311
-
312
311
random_state : int, RandomState instance or None, optional (default=None)
313
312
The seed of the pseudo random number generator to use when shuffling
314
313
the data. If int, random_state is the seed used by the random number
315
314
generator; If RandomState instance, random_state is the random number
316
315
generator; If None, the random number generator is the RandomState
317
316
instance used by `np.random`.
318
317
318
+ subtract_mean : bool
319
+ Whether the mean of `A` should be subtracted after each multiplication
320
+ by the `A` matrix. This is equivalent to multiplying matrices by a
321
+ centered `A` without ever having to explicitly center. This is
322
+ especially useful for performing PCA on large sparse matrices, so they
323
+ do not need to be centered.
324
+
319
325
Notes
320
326
-----
321
327
This algorithm finds a (usually very good) approximate truncated
@@ -359,14 +365,9 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
359
365
# this implementation is a bit faster with smaller shape[1]
360
366
M = M .T
361
367
362
- Q = randomized_range_finder (
363
- M ,
364
- size = n_random ,
365
- n_iter = n_iter ,
366
- power_iteration_normalizer = power_iteration_normalizer ,
367
- subtract_mean = subtract_mean ,
368
- random_state = random_state ,
369
- )
368
+ Q = randomized_range_finder (M , n_random , n_iter ,
369
+ power_iteration_normalizer , random_state ,
370
+ subtract_mean )
370
371
371
372
# project M to the (k + p) dimensional space using the basis vectors
372
373
B = safe_sparse_dot (Q .T , M )
0 commit comments