From 54041979093d006d1b7b325bd4a24cdd90b95be5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Wed, 31 Aug 2016 14:40:51 -0400
Subject: [PATCH 1/2] fix warning and behavior in randomized_svd wrt power
 iterations

---
 sklearn/utils/extmath.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index ab67c89e09525..8fe8f87860969 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -25,7 +25,7 @@
 from ..externals.six.moves import xrange
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
-from ..exceptions import NonBLASDotWarning
+from ..exceptions import NonBLASDotWarning, ChangedBehaviorWarning
 
 
 def norm(x):
@@ -351,10 +351,14 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=None,
 
     if n_iter is None:
         # Checks if the number of iterations is explicitely specified
-        n_iter = 4
-        n_iter_specified = False
-    else:
-        n_iter_specified = True
+        # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
+        if n_components < .1 * min(M.shape) and n_iter < 7:
+            n_iter = 7
+            warnings.warn("The default number of power iterations is increased from 4"
+                          "to 7 in version 0.18 to achieve higher precision.",
+                          ChangedBehaviorWarning)
+        else:
+            n_iter = 4
 
     if transpose == 'auto':
         transpose = n_samples < n_features
@@ -362,13 +366,6 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=None,
         # this implementation is a bit faster with smaller shape[1]
         M = M.T
 
-    # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
-    if n_components < .1 * min(M.shape) and n_iter < 7:
-        if n_iter_specified:
-            warnings.warn("The number of power iterations is increased to "
-                          "7 to achieve higher precision.")
-        n_iter = 7
-
     Q = randomized_range_finder(M, n_random, n_iter,
                                 power_iteration_normalizer, random_state)
 

From 970ace98fbfb9c49f1d3d80a47bc7b09e55c3482 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Wed, 31 Aug 2016 16:46:03 -0400
Subject: [PATCH 2/2] change default iterated_power to auto.

---
 sklearn/decomposition/pca.py | 14 +++++++-------
 sklearn/pipeline.py          |  2 +-
 sklearn/utils/extmath.py     | 22 ++++++++--------------
 3 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index b4643e6a8c58f..881a4a593cfd2 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -166,7 +166,7 @@ class PCA(_BasePCA):
 
         .. versionadded:: 0.18.0
 
-    iterated_power : int >= 0, optional (default 4)
+    iterated_power : int >= 0, or 'auto', (default 'auto')
         Number of iterations for the power method computed by
         svd_solver == 'randomized'.
 
@@ -240,21 +240,21 @@ class PCA(_BasePCA):
     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
     >>> pca = PCA(n_components=2)
     >>> pca.fit(X)
-    PCA(copy=True, iterated_power=4, n_components=2, random_state=None,
+    PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
       svd_solver='auto', tol=0.0, whiten=False)
     >>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS
     [ 0.99244...  0.00755...]
 
     >>> pca = PCA(n_components=2, svd_solver='full')
     >>> pca.fit(X)                 # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    PCA(copy=True, iterated_power=4, n_components=2, random_state=None,
+    PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
       svd_solver='full', tol=0.0, whiten=False)
     >>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS
     [ 0.99244...  0.00755...]
 
     >>> pca = PCA(n_components=1, svd_solver='arpack')
     >>> pca.fit(X)
-    PCA(copy=True, iterated_power=4, n_components=1, random_state=None,
+    PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
       svd_solver='arpack', tol=0.0, whiten=False)
     >>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS
     [ 0.99244...]
@@ -268,7 +268,7 @@ class PCA(_BasePCA):
     """
 
     def __init__(self, n_components=None, copy=True, whiten=False,
-                 svd_solver='auto', tol=0.0, iterated_power=4,
+                 svd_solver='auto', tol=0.0, iterated_power='auto',
                  random_state=None):
         self.n_components = n_components
         self.copy = copy
@@ -535,8 +535,8 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
         fit(X).transform(X) will not yield the expected results,
         use fit_transform(X) instead.
 
-    iterated_power : int, optional
-        Number of iterations for the power method. 2 by default.
+    iterated_power : int, default=2
+        Number of iterations for the power method.
 
         .. versionchanged:: 0.18
 
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 91e4fef0ec4d8..6c98ea70efad8 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -796,7 +796,7 @@ def make_union(*transformers):
     >>> make_union(PCA(), TruncatedSVD())    # doctest: +NORMALIZE_WHITESPACE
     FeatureUnion(n_jobs=1,
            transformer_list=[('pca',
-                              PCA(copy=True, iterated_power=4,
+                              PCA(copy=True, iterated_power='auto',
                                   n_components=None, random_state=None,
                                   svd_solver='auto', tol=0.0, whiten=False)),
                              ('truncatedsvd',
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 8fe8f87860969..be349e1bc73bf 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -25,7 +25,7 @@
 from ..externals.six.moves import xrange
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
-from ..exceptions import NonBLASDotWarning, ChangedBehaviorWarning
+from ..exceptions import NonBLASDotWarning
 
 
 def norm(x):
@@ -267,7 +267,7 @@ def randomized_range_finder(A, size, n_iter,
     return Q
 
 
-def randomized_svd(M, n_components, n_oversamples=10, n_iter=None,
+def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
                    power_iteration_normalizer='auto', transpose='auto',
                    flip_sign=True, random_state=0):
     """Computes a truncated randomized SVD
@@ -287,11 +287,11 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=None,
         number can improve speed but can negatively impact the quality of
         approximation of singular vectors and singular values.
 
-    n_iter: int (default is 4)
+    n_iter: int or 'auto' (default is 'auto')
         Number of power iterations. It can be used to deal with very noisy
-        problems. When `n_components` is small (< .1 * min(X.shape)) `n_iter`
-        is set to 7, unless the user specifies a higher number. This improves
-        precision with few components.
+        problems. When 'auto', it is set to 4, unless `n_components` is small
+        (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
+        This improves precision with few components.
 
         .. versionchanged:: 0.18
 
@@ -349,16 +349,10 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=None,
     n_random = n_components + n_oversamples
     n_samples, n_features = M.shape
 
-    if n_iter is None:
+    if n_iter is 'auto':
         # Checks if the number of iterations is explicitely specified
         # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
-        if n_components < .1 * min(M.shape) and n_iter < 7:
-            n_iter = 7
-            warnings.warn("The default number of power iterations is increased from 4"
-                          "to 7 in version 0.18 to achieve higher precision.",
-                          ChangedBehaviorWarning)
-        else:
-            n_iter = 4
+        n_iter = 7 if n_components < .1 * min(M.shape) else 4
 
     if transpose == 'auto':
         transpose = n_samples < n_features