scikit-learn · massich · Nov 9, 2017 · Nov 9, 2017 · Nov 9, 2017 · Nov 9, 2017
diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py
@@ -13,7 +13,7 @@
 from collections import defaultdict
 import matplotlib.pyplot as plt
 from sklearn.datasets import fetch_lfw_people
-from sklearn.decomposition import IncrementalPCA, RandomizedPCA, PCA
+from sklearn.decomposition import IncrementalPCA, PCA
 
 
 def plot_results(X, y, label):
@@ -37,7 +37,6 @@ def plot_feature_times(all_times, batch_size, all_components, data):
     plot_results(all_components, all_times['pca'], label="PCA")
     plot_results(all_components, all_times['ipca'],
                  label="IncrementalPCA, bsize=%i" % batch_size)
-    plot_results(all_components, all_times['rpca'], label="RandomizedPCA")
     plt.legend(loc="upper left")
     plt.suptitle("Algorithm runtime vs. n_components\n \
                  LFW, size %i x %i" % data.shape)
@@ -50,7 +49,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
     plot_results(all_components, all_errors['pca'], label="PCA")
     plot_results(all_components, all_errors['ipca'],
                  label="IncrementalPCA, bsize=%i" % batch_size)
-    plot_results(all_components, all_errors['rpca'], label="RandomizedPCA")
     plt.legend(loc="lower left")
     plt.suptitle("Algorithm error vs. n_components\n"
                  "LFW, size %i x %i" % data.shape)
@@ -61,7 +59,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
 def plot_batch_times(all_times, n_features, all_batch_sizes, data):
     plt.figure()
     plot_results(all_batch_sizes, all_times['pca'], label="PCA")
-    plot_results(all_batch_sizes, all_times['rpca'], label="RandomizedPCA")
     plot_results(all_batch_sizes, all_times['ipca'], label="IncrementalPCA")
     plt.legend(loc="lower left")
     plt.suptitle("Algorithm runtime vs. batch_size for n_components %i\n \
@@ -92,11 +89,9 @@ def fixed_batch_size_comparison(data):
     all_errors = defaultdict(list)
     for n_components in all_features:
         pca = PCA(n_components=n_components)
-        rpca = RandomizedPCA(n_components=n_components, random_state=1999)
         ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
         results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
-                                                               ('ipca', ipca),
-                                                               ('rpca', rpca)]}
+                                                               ('ipca', ipca)]}
 
         for k in sorted(results_dict.keys()):
             all_times[k].append(results_dict[k]['time'])
@@ -116,9 +111,7 @@ def variable_batch_size_comparison(data):
         all_times = defaultdict(list)
         all_errors = defaultdict(list)
         pca = PCA(n_components=n_components)
-        rpca = RandomizedPCA(n_components=n_components, random_state=1999)
-        results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
-                                                               ('rpca', rpca)]}
+        results_dict = {k: benchmark(est, data) for k, est in [('pca', pca)]}
 
         # Create flat baselines to compare the variation over batch size
         all_times['pca'].extend([results_dict['pca']['time']] *
@@ -138,8 +131,6 @@ def variable_batch_size_comparison(data):
             all_errors['ipca'].append(results_dict['ipca']['error'])
 
         plot_batch_times(all_times, n_components, batch_sizes, data)
-        # RandomizedPCA error is always worse (approx 100x) than other PCA
-        # tests
         plot_batch_errors(all_errors, n_components, batch_sizes, data)
 
 faces = fetch_lfw_people(resize=.2, min_faces_per_person=5)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -104,7 +104,7 @@ Usage examples:
     >>> model = svm.SVC()
     >>> cross_val_score(model, X, y, scoring='wrong_choice')
     Traceback (most recent call last):
-    ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score']
+    ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score']
 
 .. note::
 

diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -235,9 +235,8 @@ data.
   independently, since a downstream model can further make some assumption
   on the linear independence of the features.
 
-  To address this issue you can use :class:`sklearn.decomposition.PCA`
-  or :class:`sklearn.decomposition.RandomizedPCA` with ``whiten=True``
-  to further remove the linear correlation across features.
+  To address this issue you can use :class:`sklearn.decomposition.PCA` with
+  ``whiten=True`` to further remove the linear correlation across features.
 
 .. topic:: Scaling a 1D array
 

diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py
@@ -75,7 +75,7 @@
 
     # Choose cross-validation techniques for the inner and outer loops,
     # independently of the dataset.
-    # E.g "LabelKFold", "LeaveOneOut", "LeaveOneLabelOut", etc.
+    # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
     inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
     outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)
 

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
@@ -135,15 +135,14 @@ def config_context(**new_config):
     __check_build  # avoid flakes unused variable error
 
     __all__ = ['calibration', 'cluster', 'covariance', 'cross_decomposition',
-               'cross_validation', 'datasets', 'decomposition', 'dummy',
-               'ensemble', 'exceptions', 'externals', 'feature_extraction',
-               'feature_selection', 'gaussian_process', 'grid_search',
-               'isotonic', 'kernel_approximation', 'kernel_ridge',
-               'learning_curve', 'linear_model', 'manifold', 'metrics',
+               'datasets', 'decomposition', 'dummy', 'ensemble', 'exceptions',
+               'externals', 'feature_extraction', 'feature_selection',
+               'gaussian_process', 'isotonic', 'kernel_approximation',
+               'kernel_ridge', 'linear_model', 'manifold', 'metrics',
                'mixture', 'model_selection', 'multiclass', 'multioutput',
                'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
-               'preprocessing', 'random_projection', 'semi_supervised',
-               'svm', 'tree', 'discriminant_analysis',
+               'preprocessing', 'random_projection', 'semi_supervised', 'svm',
+               'tree', 'discriminant_analysis',
                # Non-modules:
                'clone']