@@ -226,11 +226,9 @@ def my_g(x):
226
226
K is 'None'.
227
227
228
228
W : array, shape (n_components, n_components)
229
- Estimated un-mixing matrix.
230
- The mixing matrix can be obtained by::
231
-
232
- w = np.dot(W, K.T)
233
- A = w.T * (w * w.T).I
229
+ The square matrix that unmixes the data after whitening.
230
+ The mixing matrix is the pseudo-inverse of matrix ``W K``
231
+ if K is not None, else it is the inverse of W.
234
232
235
233
S : array, shape (n_samples, n_components) | None
236
234
Estimated source matrix
@@ -252,6 +250,10 @@ def my_g(x):
252
250
contain the independent components and A is a linear mixing
253
251
matrix. In short ICA attempts to `un-mix' the data by estimating an
254
252
un-mixing matrix W where ``S = W K X.``
253
+ While FastICA was proposed to estimate as many sources
254
+ as features, it is possible to estimate less by setting
255
+ n_components < n_features. It this case K is not a square matrix
256
+ and the estimated A is the pseudo-inverse of ``W K``.
255
257
256
258
This implementation was originally made for data of shape
257
259
[n_features, n_samples]. Now the input is transposed
@@ -264,120 +266,37 @@ def my_g(x):
264
266
pp. 411-430*
265
267
266
268
"""
267
- random_state = check_random_state (random_state )
268
- fun_args = {} if fun_args is None else fun_args
269
- # make interface compatible with other decompositions
270
- # a copy is required only for non whitened data
271
- X = check_array (X , copy = whiten , dtype = FLOAT_DTYPES ,
272
- ensure_min_samples = 2 ).T
273
-
274
- alpha = fun_args .get ('alpha' , 1.0 )
275
- if not 1 <= alpha <= 2 :
276
- raise ValueError ('alpha must be in [1,2]' )
277
-
278
- if fun == 'logcosh' :
279
- g = _logcosh
280
- elif fun == 'exp' :
281
- g = _exp
282
- elif fun == 'cube' :
283
- g = _cube
284
- elif callable (fun ):
285
- def g (x , fun_args ):
286
- return fun (x , ** fun_args )
287
- else :
288
- exc = ValueError if isinstance (fun , str ) else TypeError
289
- raise exc ("Unknown function %r;"
290
- " should be one of 'logcosh', 'exp', 'cube' or callable"
291
- % fun )
292
-
293
- n , p = X .shape
294
-
295
- if not whiten and n_components is not None :
296
- n_components = None
297
- warnings .warn ('Ignoring n_components with whiten=False.' )
298
-
299
- if n_components is None :
300
- n_components = min (n , p )
301
- if (n_components > min (n , p )):
302
- n_components = min (n , p )
303
- warnings .warn ('n_components is too large: it will be set to %s' % n_components )
304
-
305
- if whiten :
306
- # Centering the columns (ie the variables)
307
- X_mean = X .mean (axis = - 1 )
308
- X -= X_mean [:, np .newaxis ]
309
-
310
- # Whitening and preprocessing by PCA
311
- u , d , _ = linalg .svd (X , full_matrices = False )
312
-
313
- del _
314
- K = (u / d ).T [:n_components ] # see (6.33) p.140
315
- del u , d
316
- X1 = np .dot (K , X )
317
- # see (13.6) p.267 Here X1 is white and data
318
- # in X has been projected onto a subspace by PCA
319
- X1 *= np .sqrt (p )
320
- else :
321
- # X must be casted to floats to avoid typing issues with numpy
322
- # 2.0 and the line below
323
- X1 = as_float_array (X , copy = False ) # copy has been taken care of
324
269
325
- if w_init is None :
326
- w_init = np .asarray (random_state .normal (size = (n_components ,
327
- n_components )), dtype = X1 .dtype )
328
-
329
- else :
330
- w_init = np .asarray (w_init )
331
- if w_init .shape != (n_components , n_components ):
332
- raise ValueError ('w_init has invalid shape -- should be %(shape)s'
333
- % {'shape' : (n_components , n_components )})
334
-
335
- kwargs = {'tol' : tol ,
336
- 'g' : g ,
337
- 'fun_args' : fun_args ,
338
<
10000
/td>
- 'max_iter' : max_iter ,
339
- 'w_init' : w_init }
340
-
341
- if algorithm == 'parallel' :
342
- W , n_iter = _ica_par (X1 , ** kwargs )
343
- elif algorithm == 'deflation' :
344
- W , n_iter = _ica_def (X1 , ** kwargs )
345
- else :
346
- raise ValueError ('Invalid algorithm: must be either `parallel` or'
347
- ' `deflation`.' )
348
- del X1
270
+ est = FastICA (n_components = n_components , algorithm = algorithm ,
271
+ whiten = whiten , fun = fun , fun_args = fun_args ,
272
+ max_iter = max_iter , tol = tol , w_init = w_init ,
273
+ random_state = random_state )
274
+ sources = est ._fit (X , compute_sources = compute_sources )
349
275
350
276
if whiten :
351
- if compute_sources :
352
- S = np .dot (np .dot (W , K ), X ).T
353
- else :
354
- S = None
355
277
if return_X_mean :
356
278
if return_n_iter :
357
- return K , W , S , X_mean , n_iter
279
+ return (est .whitening_ , est ._unmixing , sources , est .mean_ ,
280
+ est .n_iter_ )
358
281
else :
359
- return K , W , S , X_mean
282
+ return est . whitening_ , est . _unmixing , sources , est . mean_
360
283
else :
361
284
if return_n_iter :
362
- return K , W , S , n_iter
285
+ return est . whitening_ , est . _unmixing , sources , est . n_iter_
363
286
else :
364
- return K , W , S
287
+ return est . whitening_ , est . _unmixing , sources
365
288
366
289
else :
367
- if compute_sources :
368
- S = np .dot (W , X ).T
369
- else :
370
- S = None
371
290
if return_X_mean :
372
291
if return_n_iter :
373
- return None , W , S , None , n_iter
292
+ return None , est . _unmixing , sources , None , est . n_iter_
374
293
else :
375
- return None , W , S , None
294
+ return None , est . _unmixing , sources , None
376
295
else :
377
296
if return_n_iter :
378
- return None , W , S , n_iter
297
+ return None , est . _unmixing , sources , est . n_iter_
379
298
else :
380
- return None , W , S
299
+ return None , est . _unmixing , sources
381
300
382
301
383
302
class FastICA (TransformerMixin , BaseEstimator ):
@@ -431,10 +350,14 @@ def my_g(x):
431
350
Attributes
432
351
----------
433
352
components_ : 2D array, shape (n_components, n_features)
434
- The unmixing matrix.
353
+ The linear operator to apply to the data to get the independent
354
+ sources. This is equal to the unmixing matrix when ``whiten`` is
355
+ False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when
356
+ ``whiten`` is True.
435
357
436
358
mixing_ : array, shape (n_features, n_components)
437
- The mixing matrix.
359
+ The pseudo-inverse of ``components_``. It is the linear operator
360
+ that maps independent sources to the data.
438
361
439
362
mean_ : array, shape(n_features)
440
363
The mean over features. Only set if `self.whiten` is True.
@@ -502,26 +425,121 @@ def _fit(self, X, compute_sources=False):
502
425
X_new : array-like, shape (n_samples, n_components)
503
426
"""
504
427
fun_args = {} if self .fun_args is None else self .fun_args
505
- whitening , unmixing , sources , X_mean , self .n_iter_ = fastica (
506
- X = X , n_components = self .n_components , algorithm = self .algorithm ,
507
- whiten = self .whiten , fun = self .fun , fun_args = fun_args ,
508
- max_iter = self .max_iter , tol = self .tol , w_init = self .w_init ,
509
- random_state = self .random_state , return_X_mean = True ,
510
- compute_sources = compute_sources , return_n_iter = True )
428
+ random_state = check_random_state (self .random_state )
429
+
430
+ # make interface compatible with other decompositions
431
+ # a copy is required only for non whitened data
432
+ X = check_array (X , copy = self .whiten , dtype = FLOAT_DTYPES ,
433
+ ensure_min_samples = 2 ).T
434
+
435
+ alpha = fun_args .get ('alpha' , 1.0 )
436
+ if not 1 <= alpha <= 2 :
437
+ raise ValueError ('alpha must be in [1,2]' )
438
+
439
+ if self .fun == 'logcosh' :
440
+ g = _logcosh
441
+ elif self .fun == 'exp' :
442
+ g = _exp
443
+ elif self .fun == 'cube' :
444
+ g = _cube
445
+ elif callable (self .fun ):
446
+ def g (x , fun_args ):
447
+ return self .fun (x , ** fun_args )
448
+ else :
449
+ exc = ValueError if isinstance (self .fun , str ) else TypeError
450
+ raise exc (
451
+ "Unknown function %r;"
452
+ " should be one of 'logcosh', 'exp', 'cube' or callable"
453
+ % self .fun
454
+ )
455
+
456
+ n_samples , n_features = X .shape
457
+
458
+ n_components = self .n_components
459
+ if not self .whiten and n_components is not None :
460
+ n_components = None
461
+ warnings .warn ('Ignoring n_components with whiten=False.' )
462
+
463
+ if n_components is None :
464
+ n_components = min (n_samples , n_features )
465
+ if (n_components > min (n_samples , n_features )):
466
+ n_components = min (n_samples , n_features )
467
+ warnings .warn (
468
+ 'n_components is too large: it will be set to %s'
469
+ % n_components
470
+ )
471
+
472
+ if self .whiten :
473
+ # Centering the columns (ie the variables)
474
+ X_mean = X .mean (axis = - 1 )
475
+ X -= X_mean [:, np .newaxis ]
476
+
477
+ # Whitening and preprocessing by PCA
478
+ u , d , _ = linalg .svd (X , full_matrices = False )
479
+
480
+ del _
481
+ K = (u / d ).T [:n_components ] # see (6.33) p.140
482
+ del u , d
483
+ X1 = np .dot (K , X )
484
+ # see (13.6) p.267 Here X1 is white and data
485
+ # in X has been projected onto a subspace by PCA
486
+ X1 *= np .sqrt (n_features )
487
+ else :
488
+ # X must be casted to floats to avoid typing issues with numpy
489
+ # 2.0 and the line below
490
+ X1 = as_float_array (X , copy = False ) # copy has been taken care of
491
+
492
+ w_init = self .w_init
493
+ if w_init is None :
494
+ w_init = np .asarray (random_state .normal (
495
+ size = (n_components , n_components )), dtype = X1 .dtype )
496
+
497
+ else :
498
+ w_init = np .asarray (w_init )
499
+ if w_init .shape != (n_components , n_components ):
500
+ raise ValueError (
501
+ 'w_init has invalid shape -- should be %(shape)s'
502
+ % {'shape' : (n_components , n_components )})
503
+
504
+ kwargs = {'tol' : self .tol ,
505
+ 'g' : g ,
506
+ 'fun_args' : fun_args ,
507
+ 'max_iter' : self .max_iter ,
508
+ 'w_init' : w_init }
509
+
510
+ if self .algorithm == 'parallel' :
511
+ W , n_iter = _ica_par (X1 , ** kwargs )
512
+ elif self .algorithm == 'deflation' :
513
+ W , n_iter = _ica_def (X1 , ** kwargs )
514
+ else :
515
+ raise ValueError ('Invalid algorithm: must be either `parallel` or'
516
+ ' `deflation`.' )
517
+ del X1
518
+
519
+ if compute_sources :
520
+ if self .whiten :
521
+ S = np .dot (np .dot (W , K ), X ).T
522
+ else :
523
+ S = np .dot (W , X ).T
524
+ else :
525
+ S = None
526
+
527
+ self .n_iter_ = n_iter
511
528
512
529
if self .whiten :
513
- self .components_ = np .dot (unmixing , whitening )
530
+ self .components_ = np .dot (W , K )
514
531
self .mean_ = X_mean
515
- self .whitening_ = whitening
532
+ self .whitening_ = K
516
533
else :
517
- self .components_ = unmixing
534
+ self .components_ = W
518
535
519
536
self .mixing_ = linalg .pinv (self .components_ )
537
+ self ._unmixing = W
520
538
521
539
if compute_sources :
522
- self .__sources = sources
540
+ self .__sources = S
523
541
524
- return sources
542
+ return S
525
543
526
544
def fit_transform (self , X , y = None ):
527
545
"""Fit the model and recover the sources from X.
0 commit comments