scikit-learn · MechCoder · Mar 10, 2016 · Sep 21, 2015 · amueller · Feb 9, 2016
diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst
@@ -38,13 +38,15 @@ is an estimator object::
     >>> from sklearn.svm import SVC
     >>> from sklearn.decomposition import PCA
     >>> estimators = [('reduce_dim', PCA()), ('svm', SVC())]
-    >>> clf = Pipeline(estimators)   
+    >>> clf = Pipeline(estimators)
     >>> clf # doctest: +NORMALIZE_WHITESPACE
-    Pipeline(steps=[('reduce_dim', PCA(copy=True, n_components=None,
-        whiten=False)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None,
-        coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
-        kernel='rbf', max_iter=-1, probability=False, random_state=None,
-        shrinking=True, tol=0.001, verbose=False))])
+    Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power=4,
+    n_components=None, random_state=None, svd_solver='auto', tol=0.0,
+    whiten=False)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None,
+    coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
+    kernel='rbf', max_iter=-1, probability=False, random_state=None,
+    shrinking=True, tol=0.001, verbose=False))])
+
 
 The utility function :func:`make_pipeline` is a shorthand
 for constructing pipelines;
@@ -63,23 +65,27 @@ filling in the names automatically::
 The estimators of a pipeline are stored as a list in the ``steps`` attribute::
 
     >>> clf.steps[0]
-    ('reduce_dim', PCA(copy=True, n_components=None, whiten=False))
+    ('reduce_dim', PCA(copy=True, iterated_power=4, n_components=None, random_state=None,
+      svd_solver='auto', tol=0.0, whiten=False))
 
 and as a ``dict`` in ``named_steps``::
 
     >>> clf.named_steps['reduce_dim']
-    PCA(copy=True, n_components=None, whiten=False)
+    PCA(copy=True, iterated_power=4, n_components=None, random_state=None,
+      svd_solver='auto', tol=0.0, whiten=False)
 
 Parameters of the estimators in the pipeline can be accessed using the
 ``<estimator>__<parameter>`` syntax::
 
     >>> clf.set_params(svm__C=10) # doctest: +NORMALIZE_WHITESPACE
-    Pipeline(steps=[('reduce_dim', PCA(copy=True, n_components=None,
+    Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power=4,
+        n_components=None, random_state=None, svd_solver='auto', tol=0.0,
         whiten=False)), ('svm', SVC(C=10, cache_size=200, class_weight=None,
         coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
         kernel='rbf', max_iter=-1, probability=False, random_state=None,
         shrinking=True, tol=0.001, verbose=False))])
 
+
 This is particularly important for doing grid searches::
 
     >>> from sklearn.model_selection import GridSearchCV
@@ -150,19 +156,22 @@ and ``value`` is an estimator object::
     >>> from sklearn.decomposition import PCA
     >>> from sklearn.decomposition import KernelPCA
     >>> estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
-    >>> combined = FeatureUnion(estimators)   
+    >>> combined = FeatureUnion(estimators)
     >>> combined # doctest: +NORMALIZE_WHITESPACE
-    FeatureUnion(n_jobs=1, transformer_list=[('linear_pca', PCA(copy=True,
-        n_components=None, whiten=False)), ('kernel_pca', KernelPCA(alpha=1.0,
-        coef0=1, degree=3, eigen_solver='auto', fit_inverse_transform=False,
-        gamma=None, kernel='linear', kernel_params=None, max_iter=None,
-        n_components=None, n_jobs=1, random_state=None, remove_zero_eig=False, tol=0))],
+    FeatureUnion(n_jobs=1, transformer_list=[('linear_pca',  PCA(copy=True,
+        iterated_power=4, n_components=None, random_state=None,
+        svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca',
+        KernelPCA(alpha=1.0, coef0=1, degree=3, eigen_solver='auto',
+        fit_inverse_transform=False, gamma=None, kernel='linear',
+        kernel_params=None, max_iter=None, n_components=None, n_jobs=1,
+        random_state=None, remove_zero_eig=False, tol=0))],
         transformer_weights=None)
 
+
 Like pipelines, feature unions have a shorthand constructor called
 :func:`make_union` that does not require explicit naming of the components.
 
-                                                                       
+
 .. topic:: Examples:
 
  * :ref:`example_feature_stacker.py`

diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -275,7 +275,8 @@ data by projecting on a principal subspace.
     >>> from sklearn import decomposition
     >>> pca = decomposition.PCA()
     >>> pca.fit(X)
-    PCA(copy=True, n_components=None, whiten=False)
+    PCA(copy=True, iterated_power=4, n_components=None, random_state=None,
+      svd_solver='auto', tol=0.0, whiten=False)
     >>> print(pca.explained_variance_)  # doctest: +SKIP
     [  2.18565811e+00   1.19346747e+00   8.43026679e-32]
 

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -22,6 +22,15 @@ New features
      :class:`feature_selection.SelectPercentile` as score functions.
      By `Andrea Bravi`_ and `Nikolay Mayorov`_.
 
+   - Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
+     and it is available calling with parameter ``svd_solver='randomized'``.
+     The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
+     behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
+     calls `arpack` and performs truncated (non-randomized) SVD. By default,
+     the best solver is selected depending on the size of the input and the
+     number of components requested.
+     (`#5299 <https://github.com/scikit-learn/scikit-learn/pull/5299>`_) by `Giorgio Patrini`_.
+
    - The Gaussian Process module has been reimplemented and now offers classification
      and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
      and  :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
@@ -114,17 +123,26 @@ Bug fixes
     - :class:`StratifiedKFold` now raises error if all n_labels for individual classes is less than n_folds.
       (`#6182 <https://github.com/scikit-learn/scikit-learn/pull/6182>`_) by `Devashish Deshpande`_.
 
-    - :class:`RandomizedPCA` default number of `iterated_power` is 2 instead of 3.
-      This is a speed up with a minor precision decrease. (`#5141 <https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_.
+    - :class:`RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
+      (`#5141 <https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_.
 
-    - :func:`randomized_svd` performs 2 power iterations by default, instead or 0.
-      In practice this is often enough for obtaining a good approximation of the
-      true eigenvalues/vectors in the presence of noise. (`#5141 <https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_.
+    - :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
+      In practice this is enough for obtaining a good approximation of the
+      true eigenvalues/vectors in the presence of noise. When `n_components` is
+      small (< .1 * min(X.shape)) `n_iter` is set to 7, unless the user specifies
+      a higher number. This improves precision with few components.
+      (`#5299 <https://github.com/scikit-learn/scikit-learn/pull/5299>`_) by `Giorgio Patrini`_.
 
-    - :func:`randomized_range_finder` is more numerically stable when many
+    - :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
       power iterations are requested, since it applies LU normalization by default.
       If `n_iter<2` numerical issues are unlikely, thus no normalization is applied.
-      Other normalization options are available: 'none', 'LU' and 'QR'. (`#5141 <https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_.
+      Other normalization options are available: 'none', 'LU' and 'QR'.
+      (`#5141 <https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_.
+
+    - Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
+      and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
+      New features) is fixed. `components_` are stored with no whitening.
+      (`#5299 <https://github.com/scikit-learn/scikit-learn/pull/5299>`_) by `Giorgio Patrini`_.
 
     - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
       Laplacian matrix was incorrectly set to 1. (`#4995 <https://github.com/scikit-learn/scikit-learn/pull/4995>`_) By `Peter Fischer`_.
@@ -213,7 +231,8 @@ Changelog
 
 New features
 ............
-   - All the Scaler classes but :class:`RobustScaler` can be fitted online by
+
+   - All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
      calling `partial_fit`. By `Giorgio Patrini`_.
 
    - The new class :class:`ensemble.VotingClassifier` implements a
@@ -445,6 +464,7 @@ Enhancements
 
 Bug fixes
 .........
+
     - Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
       multi-label output. By `Andreas Müller`_.
 

diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py
@@ -23,7 +23,6 @@
 by Thomas P. Minka is also compared.
 
 """
-print(__doc__)
 
 # Authors: Alexandre Gramfort
 #          Denis A. Engemann
@@ -38,6 +37,8 @@
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import GridSearchCV
 
+print(__doc__)
+
 ###############################################################################
 # Create the data
 
@@ -61,7 +62,7 @@
 
 
 def compute_scores(X):
-    pca = PCA()
+    pca = PCA(svd_solver='full')
     fa = FactorAnalysis()
 
     pca_scores, fa_scores = [], []
@@ -90,7 +91,7 @@ def lw_score(X):
     n_components_pca = n_components[np.argmax(pca_scores)]
7C9A
     n_components_fa = n_components[np.argmax(fa_scores)]
 
-    pca = PCA(n_components='mle')
+    pca = PCA(svd_solver='full', n_components='mle')
     pca.fit(X)
     n_components_pca_mle = pca.n_components_
 
@@ -105,7 +106,8 @@ def lw_score(X):
     plt.axvline(n_components_pca, color='b',
                 label='PCA CV: %d' % n_components_pca, linestyle='--')
     plt.axvline(n_components_fa, color='r',
-                label='FactorAnalysis CV: %d' % n_components_fa, linestyle='--')
+                label='FactorAnalysis CV: %d' % n_components_fa,
+                linestyle='--')
     plt.axvline(n_components_pca_mle, color='k',
                 label='PCA MLE: %d' % n_components_pca_mle, linestyle='--')