@@ -226,11 +226,9 @@ def my_g(x):
226226 K is 'None'.
227227
228228 W : array, shape (n_components, n_components)
229- Estimated un-mixing matrix.
230- The mixing matrix can be obtained by::
231-
232- w = np.dot(W, K.T)
233- A = w.T * (w * w.T).I
229+ The square matrix that unmixes the data after whitening.
230+ The mixing matrix is the pseudo-inverse of matrix ``W K``
231+ if K is not None, else it is the inverse of W.
234232
235233 S : array, shape (n_samples, n_components) | None
236234 Estimated source matrix
@@ -252,6 +250,10 @@ def my_g(x):
252250 contain the independent components and A is a linear mixing
253251 matrix. In short ICA attempts to `un-mix' the data by estimating an
254252 un-mixing matrix W where ``S = W K X.``
253+ While FastICA was proposed to estimate as many sources
254+ as features, it is possible to estimate less by setting
255+ n_components < n_features. It this case K is not a square matrix
256+ and the estimated A is the pseudo-inverse of ``W K``.
255257
256258 This implementation was originally made for data of shape
257259 [n_features, n_samples]. Now the input is transposed
@@ -264,120 +266,37 @@ def my_g(x):
264266 pp. 411-430*
265267
266268 """
267- random_state = check_random_state (random_state )
268- fun_args = {} if fun_args is None else fun_args
269- # make interface compatible with other decompositions
270- # a copy is required only for non whitened data
271- X = check_array (X , copy = whiten , dtype = FLOAT_DTYPES ,
272- ensure_min_samples = 2 ).T
273-
274- alpha = fun_args .get ('alpha' , 1.0 )
275- if not 1 <= alpha <= 2 :
276- raise ValueError ('alpha must be in [1,2]' )
277-
278- if fun == 'logcosh' :
279- g = _logcosh
280- elif fun == 'exp' :
281- g = _exp
282- elif fun == 'cube' :
283- g = _cube
284- elif callable (fun ):
285- def g (x , fun_args ):
286- return fun (x , ** fun_args )
287- else :
288- exc = ValueError if isinstance (fun , str ) else TypeError
289- raise exc ("Unknown function %r;"
290- " should be one of 'logcosh', 'exp', 'cube' or callable"
291- % fun )
292-
293- n , p = X .shape
294-
295- if not whiten and n_components is not None :
296- n_components = None
297- warnings .warn ('Ignoring n_components with whiten=False.' )
298-
299- if n_components is None :
300- n_components = min (n , p )
301- if (n_components > min (n , p )):
302- n_components = min (n , p )
303- warnings .warn ('n_components is too large: it will be set to %s' % n_components )
304-
305- if whiten :
306- # Centering the columns (ie the variables)
307- X_mean = X .mean (axis = - 1 )
308- X -= X_mean [:, np .newaxis ]
309-
310- # Whitening and preprocessing by PCA
311- u , d , _ = linalg .svd (X , full_matrices = False )
312-
313- del _
314- K = (u / d ).T [:n_components ] # see (6.33) p.140
315- del u , d
316- X1 = np .dot (K , X )
317- # see (13.6) p.267 Here X1 is white and data
318- # in X has been projected onto a subspace by PCA
319- X1 *= np .sqrt (p )
320- else :
321- # X must be casted to floats to avoid typing issues with numpy
322- # 2.0 and the line below
323- X1 = as_float_array (X , copy = False ) # copy has been taken care of
324269
325- if w_init is None :
326- w_init = np .asarray (random_state .normal (size = (n_components ,
327- n_components )), dtype = X1 .dtype )
328-
329- else :
330- w_init = np .asarray (w_init )
331- if w_init .shape != (n_components , n_components ):
332- raise ValueError ('w_init has invalid shape -- should be %(shape)s'
333- % {'shape' : (n_components , n_components )})
334-
335- kwargs = {'tol' : tol ,
336- 'g' : g ,
337- 'fun_args' : fun_args ,
338- 'max_iter' : max_iter ,
339- 'w_init' : w_init }
340-
341- if algorithm == 'parallel' :
342- W , n_iter = _ica_par (X1 , ** kwargs )
343- elif algorithm == 'deflation' :
344- W , n_iter = _ica_def (X1 , ** kwargs )
345- else :
346- raise ValueError ('Invalid algorithm: must be either `parallel` or'
347- ' `deflation`.' )
348- del X1
270+ est = FastICA (n_components = n_components , algorithm = algorithm ,
271+ whiten = whiten , fun = fun , fun_args = fun_args ,
272+ max_iter = max_iter , tol = tol , w_init = w_init ,
273+ random_state = random_state )
274+ sources = est ._fit (X , compute_sources = compute_sources )
349275
350276 if whiten :
351- if compute_sources :
352- S = np .dot (np .dot (W , K ), X ).T
353- else :
354- S = None
355277 if return_X_mean :
356278 if return_n_iter :
357- return K , W , S , X_mean , n_iter
279+ return (est .whitening_ , est ._unmixing , sources , est .mean_ ,
280+ est .n_iter_ )
358281 else :
359- return K , W , S , X_mean
282+ return est . whitening_ , est . _unmixing , sources , est . mean_
360283 else :
361284 if return_n_iter :
362- return K , W , S , n_iter
285+ return est . whitening_ , est . _unmixing , sources , est . n_iter_
363286 else :
364- return K , W , S
287+ return est . whitening_ , est . _unmixing , sources
365288
366289 else :
367- if compute_sources :
368- S = np .dot (W , X ).T
369- else :
370- S = None
371290 if return_X_mean :
372291 if return_n_iter :
373- return None , W , S , None , n_iter
292+ return None , est . _unmixing , sources , None , est . n_iter_
374293 else :
375- return None , W , S , None
294+ return None , est . _unmixing , sources , None
376295 else :
377296 if return_n_iter :
378- return None , W , S , n_iter
297+ return None , est . _unmixing , sources , est . n_iter_
379298 else :
380- return None , W , S
299+ return None , est . _unmixing , sources
381300
382301
383302class FastICA (TransformerMixin , BaseEstimator ):
@@ -431,10 +350,14 @@ def my_g(x):
431350 Attributes
432351 ----------
433352 components_ : 2D array, shape (n_components, n_features)
434- The unmixing matrix.
353+ The linear operator to apply to the data to get the independent
354+ sources. This is equal to the unmixing matrix when ``whiten`` is
355+ False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when
356+ ``whiten`` is True.
435357
436358 mixing_ : array, shape (n_features, n_components)
437- The mixing matrix.
359+ The pseudo-inverse of ``components_``. It is the linear operator
360+ that maps independent sources to the data.
438361
439362 mean_ : array, shape(n_features)
440363 The mean over features. Only set if `self.whiten` is True.
@@ -502,26 +425,121 @@ def _fit(self, X, compute_sources=False):
502425 X_new : array-like, shape (n_samples, n_components)
503426 """
504427 fun_args = {} if self .fun_args is None else self .fun_args
505- whitening , unmixing , sources , X_mean , self .n_iter_ = fastica (
506- X = X , n_components = self .n_components , algorithm = self .algorithm ,
507- whiten = self .whiten , fun = self .fun , fun_args = fun_args ,
508- max_iter = self .max_iter , tol = self .tol , w_init = self .w_init ,
509- random_state = self .random_state , return_X_mean = True ,
510- compute_sources = compute_sources , return_n_iter = True )
428+ random_state = check_random_state (self .random_state )
429+
430+ # make interface compatible with other decompositions
431+ # a copy is required only for non whitened data
432+ X = check_array (X , copy = self .whiten , dtype = FLOAT_DTYPES ,
433+ ensure_min_samples = 2 ).T
434+
435+ alpha = fun_args .get ('alpha' , 1.0 )
436+ if not 1 <= alpha <= 2 :
437+ raise ValueError ('alpha must be in [1,2]' )
438+
439+ if self .fun == 'logcosh' :
440+ g = _logcosh
441+ elif self .fun == 'exp' :
442+ g = _exp
443+ elif self .fun == 'cube' :
444+ g = _cube
445+ elif callable (self .fun ):
446+ def g (x , fun_args ):
447+ return self .fun (x , ** fun_args )
448+ else :
449+ exc = ValueError if isinstance (self .fun , str ) else TypeError
450+ raise exc (
451+ "Unknown function %r;"
452+ " should be one of 'logcosh', 'exp', 'cube' or callable"
453+ % self .fun
454+ )
455+
456+ n_samples , n_features = X .shape
457+
458+ n_components = self .n_components
459+ if not self .whiten and n_components is not None :
460+ n_components = None
461+ warnings .warn ('Ignoring n_components with whiten=False.' )
462+
463+ if n_components is None :
464+ n_components = min (n_samples , n_features )
465+ if (n_components > min (n_samples , n_features )):
466+ n_components = min (n_samples , n_features )
467+ warnings .warn (
468+ 'n_components is too large: it will be set to %s'
469+
4B7A
% n_components
470+ )
471+
472+ if self .whiten :
473+ # Centering the columns (ie the variables)
474+ X_mean = X .mean (axis = - 1 )
475+ X -= X_mean [:, np .newaxis ]
476+
477+ # Whitening and preprocessing by PCA
478+ u , d , _ = linalg .svd (X , full_matrices = False )
479+
480+ del _
481+ K = (u / d ).T [:n_components ] # see (6.33) p.140
482+ del u , d
483+ X1 = np .dot (K , X )
484+ # see (13.6) p.267 Here X1 is white and data
485+ # in X has been projected onto a subspace by PCA
486+ X1 *= np .sqrt (n_features )
487+ else :
488+ # X must be casted to floats to avoid typing issues with numpy
489+ # 2.0 and the line below
490+ X1 = as_float_array (X , copy = False ) # copy has been taken care of
491+
492+ w_init = self .w_init
493+ if w_init is None :
494+ w_init = np .asarray (random_state .normal (
495+ size = (n_components , n_components )), dtype = X1 .dtype )
496+
497+ else :
498+ w_init = np .asarray (w_init )
499+ if w_init .shape != (n_components , n_components ):
500+ raise ValueError (
501+ 'w_init has invalid shape -- should be %(shape)s'
502+ % {'shape' : (n_components , n_components )})
503+
504+ kwargs = {'tol' : self .tol ,
505+ 'g' : g ,
506+ 'fun_args' : fun_args ,
507+ 'max_iter' : self .max_iter ,
508+ 'w_init' : w_init }
509+
510+ if self .algorithm == 'parallel' :
511+ W , n_iter = _ica_par (X1 , ** kwargs )
512+ elif self .algorithm == 'deflation' :
513+ W , n_iter = _ica_def (X1 , ** kwargs )
514+ else :
515+ raise ValueError ('Invalid algorithm: must be either `parallel` or'
516+ ' `deflation`.' )
517+ del X1
518+
519+ if compute_sources :
520+ if self .whiten :
521+ S = np .dot (np .dot (W , K ), X ).T
522+ else :
523+ S = np .dot (W , X ).T
524+ else :
525+ S = None
526+
527+ self .n_iter_ = n_iter
511528<
B66A
/code>
512529 if self .whiten :
513- self .components_ = np .dot (unmixing , whitening )
530+ self .components_ = np .dot (W , K )
514531 self .mean_ = X_mean
515- self .whitening_ = whitening
532+ self .whitening_ = K
516533 else :
517- self .components_ = unmixing
534+ self .components_ = W
518535
519536 self .mixing_ = linalg .pinv (self .components_ )
537+ self ._unmixing = W
520538
521539 if compute_sources :
522- self .__sources = sources
540+ self .__sources = S
523541
524- return sources
542+ return S
525543
526544 def fit_transform (self , X , y = None ):
527545 """Fit the model and recover the sources from X.
0 commit comments