scikit-learn · MechCoder · Aug 16, 2016 · Aug 13, 2016 · Aug 13, 2016 · Aug 13, 2016
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
@@ -137,7 +137,7 @@ validation iterator instead, for instance::
 
   >>> from sklearn.model_selection import ShuffleSplit
   >>> n_samples = iris.data.shape[0]
-  >>> cv = ShuffleSplit(n_iter=3, test_size=0.3, random_state=0)
+  >>> cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
   >>> cross_val_score(clf, iris.data, iris.target, cv=cv)
   ...                                                     # doctest: +ELLIPSIS
   array([ 0.97...,  0.97...,  1.        ])
@@ -224,7 +224,7 @@ Example of 2-fold cross-validation on a dataset with 4 samples::
   >>> from sklearn.model_selection import KFold
 
   >>> X = ["a", "b", "c", "d"]
-  >>> kf = KFold(n_folds=2)
+  >>> kf = KFold(n_splits=2)
   >>> for train, test in kf.split(X):
   ...     print("%s %s" % (train, test))
   [2 3] [0 1]
@@ -253,7 +253,7 @@ two slightly unbalanced classes::
 
   >>> X = np.ones(10)
   >>> y = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
-  >>> skf = StratifiedKFold(n_folds=3)
+  >>> skf = StratifiedKFold(n_splits=3)
   >>> for train, test in skf.split(X, y):
   ...     print("%s %s" % (train, test))
   [2 3 6 7 8 9] [0 1 4 5]
@@ -278,7 +278,7 @@ Imagine you have three subjects, each with an associated number from 1 to 3::
   >>> y = ["a", "b", "b", "b", "c", "c", "c", "d", "d", "d"]
   >>> labels = [1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
 
-  >>> lkf = LabelKFold(n_folds=3)
+  >>> lkf = LabelKFold(n_splits=3)
   >>> for train, test in lkf.split(X, y, labels):
   ...     print("%s %s" % (train, test))
   [0 1 2 3 4 5] [6 7 8 9]
@@ -454,7 +454,7 @@ Here is a usage example::
 
   >>> from sklearn.model_selection import ShuffleSplit
   >>> X = np.arange(5)
-  >>> ss = ShuffleSplit(n_iter=3, test_size=0.25,
+  >>> ss = ShuffleSplit(n_splits=3, test_size=0.25,
   ...     random_state=0)
   >>> for train_index, test_index in ss.split(X):
   ...     print("%s %s" % (train_index, test_index))
@@ -485,7 +485,7 @@ Here is a usage example::
   >>> X = [0.1, 0.2, 2.2, 2.4, 2.3, 4.55, 5.8, 0.001]
   >>> y = ["a", "b", "b", "b", "c", "c", "c", "a"]
   >>> labels = [1, 1, 2, 2, 3, 3, 4, 4]
-  >>> lss = LabelShuffleSplit(n_iter=4, test_size=0.5, random_state=0)
+  >>> lss = LabelShuffleSplit(n_splits=4, test_size=0.5, random_state=0)
   >>> for train, test in lss.split(X, y, labels):
   ...     print("%s %s" % (train, test))
   ...

diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
@@ -61,7 +61,7 @@ This example shows an example usage of the ``split`` method.
 
     >>> from sklearn.model_selection import KFold, cross_val_score
     >>> X = ["a", "a", "b", "c", "c", "c"]
-    >>> k_fold = KFold(n_folds=3)
+    >>> k_fold = KFold(n_splits=3)
     >>> for train_indices, test_indices in k_fold.split(X):
     ...      print('Train: %s | test: %s' % (train_indices, test_indices))
     Train: [2 3 4 5] | test: [0 1]
@@ -70,7 +70,7 @@ This example shows an example usage of the ``split`` method.
 
 The cross-validation can then be performed easily::
 
-    >>> kfold = KFold(n_folds=3)
+    >>> kfold = KFold(n_splits=3)
     >>> [svc.fit(X_digits[train], y_digits[train]).score(X_digits[test], y_digits[test])
     ...          for train, test in k_fold.split(X_digits)]
     [0.93489148580968284, 0.95659432387312182, 0.93989983305509184]
@@ -106,11 +106,11 @@ scoring method.
 
    *
 
-    - :class:`KFold` **(n_folds, shuffle, random_state)**
+    - :class:`KFold` **(n_splits, shuffle, random_state)**
 
     - :class:`StratifiedKFold` **(n_iter, test_size, train_size, random_state)**
 
-    - :class:`LabelKFold` **(n_folds, shuffle, random_state)**
+    - :class:`LabelKFold` **(n_splits, shuffle, random_state)**
 
 
    *

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -62,6 +62,17 @@ Model Selection Enhancements and API Changes
     the corresponding parameter is not applicable. Additionally a list of all
     the parameter dicts are stored at ``results_['params']``.
 
+  - **Parameters ``n_folds`` and ``n_iter`` renamed to ``n_splits``**
+
+    Some parameter names have changed: 
+    The ``n_folds`` parameter in :class:`model_selection.KFold`, 
+    :class:`model_selection.LabelKFold`, and 
+    :class:`model_selection.StratifiedKFold` is now renamed to ``n_splits``.
+    The ``n_iter`` parameter in :class:`model_selection.ShuffleSplit`,
+    :class:`model_selection.LabelShuffleSplit`, 
+    and :class:`model_selection.StratifiedShuffleSplit` is now renamed 
+    to ``n_splits``.
+
 
 New features
 ............
@@ -353,6 +364,12 @@ API changes summary
      (`#6697 <https://github.com/scikit-learn/scikit-learn/pull/6697>`_) by
      `Raghav R V`_.
 
+   - The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
+     by the new parameter ``n_splits`` since it can provide a consistent 
+     and unambiguous interface to represent the number of train-test splits.
+     (`#7187 <https://github.com/scikit-learn/scikit-learn/pull/7187>`_)
+     by `YenChen Lin`_.
+
 
 .. currentmodule:: sklearn
 

diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py
@@ -74,14 +74,14 @@ def heldout_score(clf, X_test, y_test):
     return score
 
 
-def cv_estimate(n_folds=3):
-    cv = KFold(n_folds=n_folds)
+def cv_estimate(n_splits=3):
+    cv = KFold(n_splits=n_splits)
     cv_clf = ensemble.GradientBoostingClassifier(**params)
     val_scores = np.zeros((n_estimators,), dtype=np.float64)
     for train, test in cv.split(X_train, y_train):
         cv_clf.fit(X_train[train], y_train[train])
         val_scores += heldout_score(cv_clf, X_train[test], y_train[test])
-    val_scores /= n_folds
+    val_scores /= n_splits
     return val_scores
 
 

diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
@@ -69,7 +69,7 @@ def make_ellipses(gmm, ax):
 
 # Break up the dataset into non-overlapping training (75%) and testing
 # (25%) sets.
-skf = StratifiedKFold(n_folds=4)
+skf = StratifiedKFold(n_splits=4)
 # Only take the first fold.
 train_index, test_index = next(iter(skf.split(iris.data, iris.target)))
 

diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
@@ -101,14 +101,14 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
 title = "Learning Curves (Naive Bayes)"
 # Cross validation with 100 iterations to get smoother mean test and train
 # score curves, each time with 20% data randomly selected as a validation set.
-cv = ShuffleSplit(n_iter=100, test_size=0.2, random_state=0)
+cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
 
 estimator = GaussianNB()
 plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4)
 
 title = "Learning Curves (SVM, RBF kernel, $\gamma=0.001$)"
 # SVC is more expensive so we do a lower number of CV iterations:
-cv = ShuffleSplit(n_iter=10, test_size=0.2, random_state=0)
+cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
 estimator = SVC(gamma=0.001)
 plot_learning_curve(estimator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4)
 

diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
@@ -58,7 +58,7 @@
 # Classification and ROC analysis
 
 # Run classifier with cross-validation and plot ROC curves
-cv = StratifiedKFold(n_folds=6)
+cv = StratifiedKFold(n_splits=6)
 classifier = svm.SVC(kernel='linear', probability=True,
                      random_state=random_state)
 

diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
@@ -59,7 +59,7 @@
 
 We should also note that small differences in scores results from the random
 splits of the cross-validation procedure. Those spurious variations can be
-smoothed out by increasing the number of CV iterations ``n_iter`` at the
+smoothed out by increasing the number of CV iterations ``n_splits`` at the
 expense of compute time. Increasing the value number of ``C_range`` and
 ``gamma_range`` steps will increase the resolution of the hyper-parameter heat
 map.
@@ -128,7 +128,7 @@ def __call__(self, value, clip=None):
 C_range = np.logspace(-2, 10, 13)
 gamma_range = np.logspace(-9, 3, 13)
 param_grid = dict(gamma=gamma_range, C=C_range)
-cv = StratifiedShuffleSplit(n_iter=5, test_size=0.2, random_state=42)
+cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
 grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
 grid.fit(X, y)
 

diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py
@@ -128,8 +128,8 @@
         # To get nice curve, we need a large number of iterations to
         # reduce the variance
         grid = GridSearchCV(clf, refit=False, param_grid=param_grid,
-                            cv=ShuffleSplit(train_size=train_size, n_iter=250,
-                                            random_state=1))
+                            cv=ShuffleSplit(train_size=train_size,
+                                            n_splits=250, random_state=1))
         grid.fit(X, y)
         scores = grid.results_['test_mean_score']