@@ -147,6 +147,7 @@ def safe_sparse_dot(a, b, dense_output=False):
147147
148148def randomized_range_finder (A , size , n_iter ,
149149 power_iteration_normalizer = 'auto' ,
150+ subtract_mean = False ,
150151 random_state = None ):
151152 """Computes an orthonormal matrix whose range approximates the range of A.
152153
@@ -171,6 +172,13 @@ def randomized_range_finder(A, size, n_iter,
171172
172173 .. versionadded:: 0.18
173174
175+ subtract_mean : bool
176+ Whether the mean of `A` should be subtracted after each multiplication
177+ by the `A` matrix. This is equivalent to multiplying matrices by a
178+ centered `A` without ever having to explicitly center. This is
179+ especially useful for performing PCA on large sparse matrices, so they
180+ do not need to be centered.
181+
6D3F
174182 random_state : int, RandomState instance or None, optional (default=None)
175183 The seed of the pseudo random number generator to use when shuffling
176184 the data. If int, random_state is the seed used by the random number
@@ -211,28 +219,39 @@ def randomized_range_finder(A, size, n_iter,
211219 else :
212220 power_iteration_normalizer = 'LU'
213221
222+ if subtract_mean :
223+ c = np .mean (A , axis = 0 ).reshape ((1 , - 1 ))
224+ applyA = lambda X : safe_sparse_dot (A , X ) - safe_sparse_dot (c , X )
225+ applyAT = lambda X : safe_sparse_dot (A .T , X ) - \
226+ safe_sparse_dot (c .T , Q .sum (axis = 0 ).reshape ((1 , - 1 )))
227+ else :
228+ applyA = lambda X : safe_sparse_dot (A , X )
229+ applyAT = lambda X : safe_sparse_dot (A .T , X )
230+
231+ Q = applyA (Q )
232+
214233 # Perform power iterations with Q to further 'imprint' the top
215234 # singular vectors of A in Q
216235 for i in range (n_iter ):
217236 if power_iteration_normalizer == 'none' :
218- Q = safe_sparse_dot ( A , Q )
219- Q = safe_sparse_dot ( A . T , Q )
237+ Q = applyAT ( Q )
238+ Q = applyA ( Q )
220239 elif power_iteration_normalizer == 'LU' :
221- Q , _ = linalg .lu (safe_sparse_dot ( A , Q ), permute_l = True )
222- Q , _ = linalg .lu (safe_sparse_dot ( A . T , Q ), permute_l = True )
240+ Q , _ = linalg .lu (applyAT ( Q ), permute_l = True )
241+ Q , _ = linalg .lu (applyA ( Q ), permute_l = True )
223242 elif power_iteration_normalizer == 'QR' :
224- Q , _ = linalg .qr (safe_sparse_dot ( A , Q ), mode = 'economic' )
225- Q , _ = linalg .qr (safe_sparse_dot ( A . T , Q ), mode = 'economic' )
243+ Q , _ = linalg .qr (applyAT ( Q ), mode = 'economic' )
244+ Q , _ = linalg .qr (applyA ( Q ), mode = 'economic' )
226245
227246 # Sample the range of A using by linear projection of Q
228247 # Extract an orthonormal basis
229- Q , _ = linalg .qr (safe_sparse_dot ( A , Q ) , mode = 'economic' )
248+ Q , _ = linalg .qr (Q , mode = 'economic' )
230249 return Q
231250
232251
2
D30D
33252def randomized_svd (M , n_components , n_oversamples = 10 , n_iter = 'auto' ,
234253 power_iteration_normalizer = 'auto' , transpose = 'auto' ,
235- flip_sign = True , random_state = 0 ):
254+ flip_sign = True , subtract_mean = False , random_state = 0 ):
236255 """Computes a truncated randomized SVD
237256
238257 Parameters
@@ -283,6 +302,13 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
283302 set to `True`, the sign ambiguity is resolved by making the largest
284303 loadings for each component in the left singular vectors positive.
285304
305+ subtract_mean : bool
306+ Whether the mean of `A` should be subtracted after each multiplication
307+ by the `A` matrix. This is equivalent to multiplying matrices by a
308+ centered `A` without ever having to explicitly center. This is
309+ especially useful for performing PCA on large sparse matrices, so they
310+ do not need to be centered.
311+
286312 random_state : int, RandomState instance or None, optional (default=None)
287313 The seed of the pseudo random number generator to use when shuffling
288314 the data. If int, random_state is the seed used by the random number
@@ -333,11 +359,20 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
333359 # this implementation is a bit faster with smaller shape[1]
334360 M = M .T
335361
336- Q = randomized_range_finder (M , n_random , n_iter ,
337- power_iteration_normalizer , random_state )
362+ Q = randomized_range_finder (
363+ M ,
364+ size = n_random ,
365+ n_iter = n_iter ,
366+ power_iteration_normalizer = power_iteration_normalizer ,
367+ subtract_mean = subtract_mean ,
368+ random_state = random_state ,
369+ )
338370
339371 # project M to the (k + p) dimensional space using the basis vectors
340372 B = safe_sparse_dot (Q .T , M )
373+ if subtract_mean :
374+ c = M .mean (axis = 0 ).reshape ((1 , - 1 ))
375+ B -= np .dot (c .T , Q .sum (axis = 0 ).reshape ((1 , - 1 ))).T
341376
342377 # compute the SVD on the thin matrix: (k + p) wide
343378 Uhat , s , V = linalg .svd (B , full_matrices = False )
0 commit comments