13
13
from collections import defaultdict
14
14
import matplotlib .pyplot as plt
15
15
from sklearn .datasets import fetch_lfw_people
16
- from sklearn .decomposition import IncrementalPCA , RandomizedPCA , PCA
16
+ from sklearn .decomposition import IncrementalPCA , PCA
17
17
18
18
19
19
def plot_results (X , y , label ):
@@ -37,7 +37,6 @@ def plot_feature_times(all_times, batch_size, all_components, data):
37
37
plot_results (all_components , all_times ['pca' ], label = "PCA" )
38
38
plot_results (all_components , all_times ['ipca' ],
39
39
label = "IncrementalPCA, bsize=%i" % batch_size )
40
- plot_results (all_components , all_times ['rpca' ], label = "RandomizedPCA" )
41
40
plt .legend (loc = "upper left" )
42
41
plt .suptitle ("Algorithm runtime vs. n_components\n \
43
42
LFW, size %i x %i" % data .shape )
@@ -50,7 +49,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
50
49
plot_results (all_components , all_errors ['pca' ], label = "PCA" )
51
50
plot_results (all_components , all_errors ['ipca' ],
52
51
label = "IncrementalPCA, bsize=%i" % batch_size )
53
- plot_results (all_components , all_errors ['rpca' ], label = "RandomizedPCA" )
54
52
plt .legend (loc = "lower left" )
55
53
plt .suptitle ("Algorithm error vs. n_components\n "
56
54
"LFW, size %i x %i" % data .shape )
@@ -61,7 +59,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
61
59
def plot_batch_times (all_times , n_features , all_batch_sizes , data ):
62
60
plt .figure ()
63
61
plot_results (all_batch_sizes , all_times ['pca' ], label = "PCA" )
64
- plot_results (all_batch_sizes , all_times ['rpca' ], label = "RandomizedPCA" )
65
62
plot_results (all_batch_sizes , all_times ['ipca' ], label = "IncrementalPCA" )
66
63
plt .legend (loc = "lower left" )
67
64
plt .suptitle ("Algorithm runtime vs. batch_size for n_components %i\n \
@@ -92,11 +89,9 @@ def fixed_batch_size_comparison(data):
92
89
all_errors = defaultdict (list )
93
90
for n_components in all_features :
94
91
pca = PCA (n_components = n_components )
95
- rpca = RandomizedPCA (n_components = n_components , random_state = 1999 )
96
92
ipca = IncrementalPCA (n_components = n_components , batch_size = batch_size )
97
93
results_dict = {k : benchmark (est , data ) for k , est in [('pca' , pca ),
98
- ('ipca' , ipca ),
99
- ('rpca' , rpca )]}
94
+ ('ipca' , ipca )]}
100
95
101
96
for k in sorted (results_dict .keys ()):
102
97
all_times [k ].append (results_dict [k ]['time' ])
@@ -116,7 +111,8 @@ def variable_batch_size_comparison(data):
116
111
all_times = defaultdict (list )
117
112
all_errors = defaultdict (list )
118
113
pca = PCA (n_components = n_components )
119
- rpca = RandomizedPCA (n_components = n_components , random_state = 1999 )
114
+ rpca = PCA (n_components = n_components , svd_solver = 'randomized' ,
115
+ random_state = 1999 )
120
116
results_dict = {k : benchmark (est , data ) for k , est in [('pca' , pca ),
121
117
('rpca' , rpca )]}
122
118
@@ -138,8 +134,6 @@ def variable_batch_size_comparison(data):
138
134
all_errors ['ipca' ].append (results_dict ['ipca' ]['error' ])
139
135
140
136
plot_batch_times (all_times , n_components , batch_sizes , data )
141
- # RandomizedPCA error is always worse (approx 100x) than other PCA
142
- # tests
143
137
plot_batch_errors (all_errors , n_components , batch_sizes , data )
144
138
145
139
faces = fetch_lfw_people (resize = .2 , min_faces_per_person = 5 )
0 commit comments