@@ -309,19 +309,28 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
309
309
return code
310
310
311
311
312
- def _update_dict (dictionary , Y , code , verbose = False , return_r2 = False ,
313
- random_state = None ):
312
+ def _proj_l2 (v ):
313
+ """Projects v unto l2 unit ball in-place.
314
+ """
315
+ vv = np .dot (v , v )
316
+ if vv > 1. :
317
+ v /= sqrt (vv )
318
+ return v
319
+
320
+
321
+ def _update_dict (dictionary , B , A , verbose = False , return_r2 = False ,
322
+ random_state = None , online = False ):
314
323
"""Update the dense dictionary factor in place.
315
324
316
325
Parameters
317
326
----------
318
327
dictionary : array of shape (n_features, n_components)
319
328
Value of the dictionary at the previous iteration.
320
329
321
- Y : array of shape (n_features, n_samples )
330
+ B : array of shape (n_features, n_components )
322
331
Data matrix.
323
332
324
- code : array of shape (n_components, n_samples )
333
+ A : array of shape (n_components, n_components )
325
334
Sparse coding of the data against which to optimize the dictionary.
326
335
327
336
verbose:
@@ -343,35 +352,32 @@ def _update_dict(dictionary, Y, code, verbose=False, return_r2=False,
343
352
Updated dictionary.
344
353
345
354
"""
346
- n_components = len (code )
347
- n_samples = Y .shape [0 ]
355
+ n_features , n_components = B .shape
348
356
random_state = check_random_state (random_state )
349
357
# Residuals, computed 'in-place' for efficiency
350
- R = - np .dot (dictionary , code )
351
- R += Y
358
+ R = - np .dot (dictionary , A )
359
+ R += B
352
360
R = np .asfortranarray (R )
353
- ger , = linalg .get_blas_funcs (('ger' ,), (dictionary , code ))
361
+ ger , = linalg .get_blas_funcs (('ger' ,), (dictionary , A ))
354
362
for k in range (n_components ):
355
363
# R <- 1.0 * U_k * V_k^T + R
356
- R = ger (1.0 , dictionary [:, k ], code [k , :], a = R , overwrite_a = True )
357
- dictionary [:, k ] = np .dot (R , code [k , :]. T )
364
+ R = ger (1.0 , dictionary [:, k ], A [k , :], a = R , overwrite_a = True )
365
+ dictionary [:, k ] = np .dot (R , A [k , :])
358
366
# Scale k'th atom
359
- atom_norm_square = np .dot (dictionary [:, k ], dictionary [:, k ])
360
- if atom_norm_square < 1e-20 :
367
+ if A [k , k ] < 1e-20 :
361
368
if verbose == 1 :
362
369
sys .stdout .write ("+" )
363
370
sys .stdout .flush ()
364
371
elif verbose :
365
372
print ("Adding new random atom" )
366
- dictionary [:, k ] = random_state .randn (n_samples )
373
+ dictionary [:, k ] = random_state .randn (n_features )
367
374
# Setting corresponding coefs to 0
368
- code [k , :] = 0.0
369
- dictionary [:, k ] /= sqrt (np .dot (dictionary [:, k ],
370
- dictionary [:, k ]))
375
+ A [k , :] = 0.
371
376
else :
372
- dictionary [:, k ] /= sqrt (atom_norm_square )
373
- # R <- -1.0 * U_k * V_k^T + R
374
- R = ger (- 1.0 , dictionary [:, k ], code [k , :], a = R , overwrite_a = True )
377
+ dictionary [:, k ] /= A [k , k ]
378
+ _proj_l2 (dictionary [:, k ])
379
+ # R <- -1.0 * U_k * V_k^T + R
380
+ R = ger (- 1.0 , dictionary [:, k ], A [k , :], a = R , overwrite_a = True )
375
381
if return_r2 :
376
382
R **= 2
377
383
# R is fortran-ordered. For numpy version < 1.6, sum does not
@@ -472,98 +478,39 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
472
478
SparsePCA
473
479
MiniBatchSparsePCA
474
480
"""
475
- if method not in ('lars' , 'cd' ):
476
- raise ValueError ('Coding method %r not supported as a fit algorithm.'
477
- % method )
478
- method = 'lasso_' + method
481
+ return dict_learning_online (
482
+ X , n_components = n_components , alpha = alpha , n_iter = max_iter ,
483
+ return_code = True , dict_init = dict_init , callback = callback ,
484
+ batch_size = len (X ), verbose = verbose , shuffle = False ,
485
+ return_n_iter = return_n_iter , n_jobs = n_jobs , method = method ,
486
+ return_inner_stats = False , tol = tol )
479
487
480
- t0 = time .time ()
481
- # Avoid integer division problems
482
- alpha = float (alpha )
483
- random_state = check_random_state (random_state )
484
488
485
- if n_jobs == - 1 :
486
- n_jobs = cpu_count ()
489
+ def _compute_residuals_from_code ( X , V , U ) :
490
+ """Computes ||X - UV||_F^2 directly.
487
491
488
- # Init the code and the dictionary with SVD of Y
489
- if code_init is not None and dict_init is not None :
490
- code = np .array (code_init , order = 'F' )
491
- # Don't copy V, it will happen below
492
- dictionary = dict_init
493
- else :
494
- code , S , dictionary = linalg .svd (X , full_matrices = False )
495
- dictionary = S [:, np .newaxis ] * dictionary
496
- r = len (dictionary )
497
- if n_components <= r : # True even if n_components=None
498
- code = code [:, :n_components ]
499
- dictionary = dictionary [:n_components , :]
500
- else :
501
- code = np .c_ [code , np .zeros ((len (code ), n_components - r ))]
502
- dictionary = np .r_ [dictionary ,
503
- np .zeros ((n_components - r , dictionary .shape [1 ]))]
504
-
505
- # Fortran-order dict, as we are going to access its row vectors
506
- dictionary = np .array (dictionary , order = 'F' )
507
-
508
- residuals = 0
509
-
510
- errors = []
511
- current_cost = np .nan
512
-
513
- if verbose == 1 :
514
- print ('[dict_learning]' , end = ' ' )
515
-
516
- # If max_iter is 0, number of iterations returned should be zero
517
- ii = - 1
518
-
519
- for ii in range (max_iter ):
520
- dt = (time .time () - t0 )
521
- if verbose == 1 :
522
- sys .stdout .write ("." )
523
- sys .stdout .flush ()
524
- elif verbose :
525
- print ("Iteration % 3i "
526
- "(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)"
527
- % (ii , dt , dt / 60 , current_cost ))
528
-
529
- # Update code
530
- code = sparse_encode (X , dictionary , algorithm = method , alpha = alpha ,
531
- init = code , n_jobs = n_jobs )
532
- # Update dictionary
533
- dictionary , residuals = _update_dict (dictionary .T , X .T , code .T ,
534
- verbose = verbose , return_r2 = True ,
535
- random_state = random_state )
536
- dictionary = dictionary .T
537
-
538
- # Cost function
539
- current_cost = 0.5 * residuals + alpha * np .sum (np .abs (code ))
540
- errors .append (current_cost )
541
-
542
- if ii > 0 :
543
- dE = errors [- 2 ] - errors [- 1 ]
544
- # assert(dE >= -tol * errors[-1])
545
- if dE < tol * errors [- 1 ]:
546
- if verbose == 1 :
547
- # A line return
548
- print ("" )
549
- elif verbose :
550
- print ("--- Convergence reached after %d iterations" % ii )
551
- break
552
- if ii % 5 == 0 and callback is not None :
553
- callback (locals ())
554
-
555
- if return_n_iter :
556
- return code , dictionary , errors , ii + 1
557
- else :
558
- return code , dictionary , errors
492
+ Parameters
493
+ ==========
494
+ X: ndarray, shape (n_samples, n_features)
495
+ The input data.
496
+ V: ndarray, shape (n_features, n_components)
497
+ The dictionary.
498
+ U: ndarray, shape (n_samples, n_components)
499
+ The codes.
500
+ """
501
+ residuals = V .dot (U )
502
+ residuals -= X .T
503
+ residuals **= 2
504
+ residuals = np .sum (residuals )
505
+ return residuals
559
506
560
507
561
508
def dict_learning_online (X , n_components = 2 , alpha = 1 , n_iter = 100 ,
562
509
return_code = True , dict_init = None , callback = None ,
563
- batch_size = 3 , verbose = False , shuffle = True , n_jobs = 1 ,
564
- method = 'lars' , iter_offset = 0 , random_state = None ,
565
- return_inner_stats = False , inner_stats = None ,
566
- return_n_iter = False ):
510
+ batch_size = None , verbose = False , shuffle = True ,
511
+ n_jobs = 1 , method = 'lars' , iter_offset = 0 , tol = 0. ,
512
+ random_state = None , return_inner_stats = False ,
513
+ inner_stats = None , return_n_iter = False ):
567
514
"""Solves a dictionary learning matrix factorization problem online.
568
515
569
516
Finds the best dictionary and the corresponding sparse code for
@@ -711,6 +658,9 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
711
658
copy = False )
712
659
X_train = check_array (X_train , order = 'C' , dtype = np .float64 , copy = False )
713
660
661
+ if batch_size is None :
662
+ batch_size = n_samples
663
+ online = batch_size < n_samples
714
664
batches = gen_batches (n_samples , batch_size )
715
665
batches = itertools .cycle (batches )
716
666
@@ -726,6 +676,8 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
726
676
# If n_iter is zero, we need to return zero.
727
677
ii = iter_offset - 1
728
678
679
+ err = 0.
680
+ errors = []
729
681
for ii , batch in zip (range (iter_offset , iter_offset + n_iter ), batches ):
730
682
this_X = X_train [batch ]
731
683
dt = (time .time () - t0 )
@@ -741,26 +693,46 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
741
693
alpha = alpha , n_jobs = n_jobs ).T
742
694
743
695
# Update the auxiliary variables
744
- if ii < batch_size - 1 :
745
- theta = float ((ii + 1 ) * batch_size )
696
+ if online :
697
+ if ii < batch_size - 1 :
698
+ theta = float ((ii + 1 ) * batch_size )
699
+ else :
700
+ theta = float (batch_size ** 2 + ii + 1 - batch_size )
701
+ beta = (theta + 1 - batch_size ) / (theta + 1 )
746
702
else :
747
- theta = float (batch_size ** 2 + ii + 1 - batch_size )
748
- beta = (theta + 1 - batch_size ) / (theta + 1 )
749
-
703
+ beta = 0.
750
704
A *= beta
751
705
A += np .dot (this_code , this_code .T )
752
706
B *= beta
753
707
B += np .dot (this_X .T , this_code .T )
754
708
755
709
# Update dictionary
756
710
dictionary = _update_dict (dictionary , B , A , verbose = verbose ,
757
- random_state = random_state )
758
- # XXX: Can the residuals be of any use?
759
-
760
- # Maybe we need a stopping criteria based on the amount of
761
- # modification in the dictionary
762
- if callback is not None :
763
- callback (locals ())
711
+ random_state = random_state , online = True ,
712
+ return_r2 = False )
713
+
714
+ # Check convergence
715
+ if not online and callback is None :
716
+ residuals = _compute_residuals_from_code (this_X , dictionary ,
717
+ this_code )
718
+ err = .5 * residuals + alpha * np .sum (np .abs (this_code ))
719
+ errors .append (err )
720
+ if len (errors ) > 1 :
721
+ dE = errors [- 2 ] - errors [- 1 ]
722
+ # assert(dE >= -tol * errors[-1])
723
+ if np .abs (dE ) < tol * errors [- 1 ]:
724
+ if verbose == 1 :
725
+ # A line return
726
+ print ("" )
727
+ elif verbose :
728
+ print (
729
+ "--- Convergence reached after %d iterations" % ii )
730
+ break
731
+ elif callback is not None :
732
+ # Maybe we need a stopping criteria based on the amount of
733
+ # modification in the dictionary
734
+ if not callback (locals ()):
735
+ break
764
736
765
737
if return_inner_stats :
766
738
if return_n_iter :
@@ -778,14 +750,14 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
778
750
dt = (time .time () - t0 )
779
751
print ('done (total time: % 3is, % 4.1fmn)' % (dt , dt / 60 ))
780
752
if return_n_iter :
781
- return code , dictionary .T , ii - iter_offset + 1
753
+ return code , dictionary .T , errors , ii - iter_offset + 1
782
754
else :
783
- return code , dictionary .T
755
+ return code , dictionary .T , errors
784
756
785
757
if return_n_iter :
786
- return dictionary .T , ii - iter_offset + 1
758
+ return dictionary .T , errors , ii - iter_offset + 1
787
759
else :
788
- return dictionary .T
760
+ return dictionary .T , errors
789
761
790
762
791
763
class SparseCodingMixin (TransformerMixin ):
0 commit comments