diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index d2d542cb791b6..96baa81e8db27 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -137,7 +137,7 @@ validation iterator instead, for instance::
 
   >>> from sklearn.model_selection import ShuffleSplit
   >>> n_samples = iris.data.shape[0]
-  >>> cv = ShuffleSplit(n_iter=3, test_size=0.3, random_state=0)
+  >>> cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
   >>> cross_val_score(clf, iris.data, iris.target, cv=cv)
   ...                                                     # doctest: +ELLIPSIS
   array([ 0.97...,  0.97...,  1.        ])
@@ -224,7 +224,7 @@ Example of 2-fold cross-validation on a dataset with 4 samples::
   >>> from sklearn.model_selection import KFold
 
   >>> X = ["a", "b", "c", "d"]
-  >>> kf = KFold(n_folds=2)
+  >>> kf = KFold(n_splits=2)
   >>> for train, test in kf.split(X):
   ...     print("%s %s" % (train, test))
   [2 3] [0 1]
@@ -253,7 +253,7 @@ two slightly unbalanced classes::
 
   >>> X = np.ones(10)
   >>> y = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
-  >>> skf = StratifiedKFold(n_folds=3)
+  >>> skf = StratifiedKFold(n_splits=3)
   >>> for train, test in skf.split(X, y):
   ...     print("%s %s" % (train, test))
   [2 3 6 7 8 9] [0 1 4 5]
@@ -278,7 +278,7 @@ Imagine you have three subjects, each with an associated number from 1 to 3::
   >>> y = ["a", "b", "b", "b", "c", "c", "c", "d", "d", "d"]
   >>> labels = [1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
 
-  >>> lkf = LabelKFold(n_folds=3)
+  >>> lkf = LabelKFold(n_splits=3)
   >>> for train, test in lkf.split(X, y, labels):
   ...     print("%s %s" % (train, test))
   [0 1 2 3 4 5] [6 7 8 9]
@@ -454,7 +454,7 @@ Here is a usage example::
 
   >>> from sklearn.model_selection import ShuffleSplit
   >>> X = np.arange(5)
-  >>> ss = ShuffleSplit(n_iter=3, test_size=0.25,
+  >>> ss = ShuffleSplit(n_splits=3, test_size=0.25,
   ...     random_state=0)
   >>> for train_index, test_index in ss.split(X):
   ...     print("%s %s" % (train_index, test_index))
@@ -485,7 +485,7 @@ Here is a usage example::
   >>> X = [0.1, 0.2, 2.2, 2.4, 2.3, 4.55, 5.8, 0.001]
   >>> y = ["a", "b", "b", "b", "c", "c", "c", "a"]
   >>> labels = [1, 1, 2, 2, 3, 3, 4, 4]
-  >>> lss = LabelShuffleSplit(n_iter=4, test_size=0.5, random_state=0)
+  >>> lss = LabelShuffleSplit(n_splits=4, test_size=0.5, random_state=0)
   >>> for train, test in lss.split(X, y, labels):
   ...     print("%s %s" % (train, test))
   ...
diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
index e61749b00c191..475e1c5e5b385 100644
--- a/doc/tutorial/statistical_inference/model_selection.rst
+++ b/doc/tutorial/statistical_inference/model_selection.rst
@@ -61,7 +61,7 @@ This example shows an example usage of the ``split`` method.
 
     >>> from sklearn.model_selection import KFold, cross_val_score
     >>> X = ["a", "a", "b", "c", "c", "c"]
-    >>> k_fold = KFold(n_folds=3)
+    >>> k_fold = KFold(n_splits=3)
     >>> for train_indices, test_indices in k_fold.split(X):
     ...      print('Train: %s | test: %s' % (train_indices, test_indices))
     Train: [2 3 4 5] | test: [0 1]
@@ -70,7 +70,7 @@ This example shows an example usage of the ``split`` method.
 
 The cross-validation can then be performed easily::
 
-    >>> kfold = KFold(n_folds=3)
+    >>> kfold = KFold(n_splits=3)
     >>> [svc.fit(X_digits[train], y_digits[train]).score(X_digits[test], y_digits[test])
     ...          for train, test in k_fold.split(X_digits)]
     [0.93489148580968284, 0.95659432387312182, 0.93989983305509184]
@@ -106,11 +106,11 @@ scoring method.
 
    *
 
-    - :class:`KFold` **(n_folds, shuffle, random_state)**
+    - :class:`KFold` **(n_splits, shuffle, random_state)**
 
     - :class:`StratifiedKFold` **(n_iter, test_size, train_size, random_state)**
 
-    - :class:`LabelKFold` **(n_folds, shuffle, random_state)**
+    - :class:`LabelKFold` **(n_splits, shuffle, random_state)**
 
 
    *
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index eef97a178e7bb..5b804c218596c 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -62,6 +62,17 @@ Model Selection Enhancements and API Changes
     the corresponding parameter is not applicable. Additionally a list of all
     the parameter dicts are stored at ``results_['params']``.
 
+  - **Parameters ``n_folds`` and ``n_iter`` renamed to ``n_splits``**
+
+    Some parameter names have changed: 
+    The ``n_folds`` parameter in :class:`model_selection.KFold`, 
+    :class:`model_selection.LabelKFold`, and 
+    :class:`model_selection.StratifiedKFold` is now renamed to ``n_splits``.
+    The ``n_iter`` parameter in :class:`model_selection.ShuffleSplit`,
+    :class:`model_selection.LabelShuffleSplit`, 
+    and :class:`model_selection.StratifiedShuffleSplit` is now renamed 
+    to ``n_splits``.
+
 
 New features
 ............
@@ -353,6 +364,12 @@ API changes summary
      (`#6697 <https://github.com/scikit-learn/scikit-learn/pull/6697>`_) by
      `Raghav R V`_.
 
+   - The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
+     by the new parameter ``n_splits`` since it can provide a consistent 
+     and unambiguous interface to represent the number of train-test splits.
+     (`#7187 <https://github.com/scikit-learn/scikit-learn/pull/7187>`_)
+     by `YenChen Lin`_.
+
 
 .. currentmodule:: sklearn
 
diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py
index 39e623f261cca..dfae1ad9b8a98 100644
--- a/examples/ensemble/plot_gradient_boosting_oob.py
+++ b/examples/ensemble/plot_gradient_boosting_oob.py
@@ -74,14 +74,14 @@ def heldout_score(clf, X_test, y_test):
     return score
 
 
-def cv_estimate(n_folds=3):
-    cv = KFold(n_folds=n_folds)
+def cv_estimate(n_splits=3):
+    cv = KFold(n_splits=n_splits)
     cv_clf = ensemble.GradientBoostingClassifier(**params)
     val_scores = np.zeros((n_estimators,), dtype=np.float64)
     for train, test in cv.split(X_train, y_train):
         cv_clf.fit(X_train[train], y_train[train])
         val_scores += heldout_score(cv_clf, X_train[test], y_train[test])
-    val_scores /= n_folds
+    val_scores /= n_splits
     return val_scores
 
 
diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
index dbd5be50f93e1..2b4cd88642a98 100644
--- a/examples/mixture/plot_gmm_covariances.py
+++ b/examples/mixture/plot_gmm_covariances.py
@@ -69,7 +69,7 @@ def make_ellipses(gmm, ax):
 
 # Break up the dataset into non-overlapping training (75%) and testing
 # (25%) sets.
-skf = StratifiedKFold(n_folds=4)
+skf = StratifiedKFold(n_splits=4)
 # Only take the first fold.
 train_index, test_index = next(iter(skf.split(iris.data, iris.target)))
 
diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
index 505ceb827338a..cb8cd87a78030 100644
--- a/examples/model_selection/plot_learning_curve.py
+++ b/examples/model_selection/plot_learning_curve.py
@@ -101,14 +101,14 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
 title = "Learning Curves (Naive Bayes)"
 # Cross validation with 100 iterations to get smoother mean test and train
 # score curves, each time with 20% data randomly selected as a validation set.
-cv = ShuffleSplit(n_iter=100, test_size=0.2, random_state=0)
+cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
 
 estimator = GaussianNB()
 plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4)
 
 title = "Learning Curves (SVM, RBF kernel, $\gamma=0.001$)"
 # SVC is more expensive so we do a lower number of CV iterations:
-cv = ShuffleSplit(n_iter=10, test_size=0.2, random_state=0)
+cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
 estimator = SVC(gamma=0.001)
 plot_learning_curve(estimator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4)
 
diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
index b208fc3d69079..6678dcb1af8b3 100644
--- a/examples/model_selection/plot_roc_crossval.py
+++ b/examples/model_selection/plot_roc_crossval.py
@@ -58,7 +58,7 @@
 # Classification and ROC analysis
 
 # Run classifier with cross-validation and plot ROC curves
-cv = StratifiedKFold(n_folds=6)
+cv = StratifiedKFold(n_splits=6)
 classifier = svm.SVC(kernel='linear', probability=True,
                      random_state=random_state)
 
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
index abbac81b18a0b..b71d6b22dc7c4 100644
--- a/examples/svm/plot_rbf_parameters.py
+++ b/examples/svm/plot_rbf_parameters.py
@@ -59,7 +59,7 @@
 
 We should also note that small differences in scores results from the random
 splits of the cross-validation procedure. Those spurious variations can be
-smoothed out by increasing the number of CV iterations ``n_iter`` at the
+smoothed out by increasing the number of CV iterations ``n_splits`` at the
 expense of compute time. Increasing the value number of ``C_range`` and
 ``gamma_range`` steps will increase the resolution of the hyper-parameter heat
 map.
@@ -128,7 +128,7 @@ def __call__(self, value, clip=None):
 C_range = np.logspace(-2, 10, 13)
 gamma_range = np.logspace(-9, 3, 13)
 param_grid = dict(gamma=gamma_range, C=C_range)
-cv = StratifiedShuffleSplit(n_iter=5, test_size=0.2, random_state=42)
+cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
 grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
 grid.fit(X, y)
 
diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py
index 996a0190e943a..09934c2f5d859 100644
--- a/examples/svm/plot_svm_scale_c.py
+++ b/examples/svm/plot_svm_scale_c.py
@@ -128,8 +128,8 @@
         # To get nice curve, we need a large number of iterations to
         # reduce the variance
         grid = GridSearchCV(clf, refit=False, param_grid=param_grid,
-                            cv=ShuffleSplit(train_size=train_size, n_iter=250,
-                                            random_state=1))
+                            cv=ShuffleSplit(train_size=train_size,
+                                            n_splits=250, random_state=1))
         grid.fit(X, y)
         scores = grid.results_['test_mean_score']
 
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 165d746d163ad..f378acfee1ed6 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -122,7 +122,7 @@ class LeaveOneOut(BaseCrossValidator):
     sample is used once as a test set (singleton) while the remaining
     samples form the training set.
 
-    Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_folds=n)`` and
+    Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and
     ``LeavePOut(p=1)`` where ``n`` is the number of samples.
 
     Due to the high number of test sets (which is the same as the
@@ -197,7 +197,7 @@ class LeavePOut(BaseCrossValidator):
     samples form the training set in each iteration.
 
     Note: ``LeavePOut(p)`` is NOT equivalent to
-    ``KFold(n_folds=n_samples // p)`` which creates non-overlapping test sets.
+    ``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.
 
     Due to the high number of iterations which grows combinatorically with the
     number of samples this cross-validation method can be very costly. For
@@ -264,24 +264,24 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)):
     """Base class for KFold, LabelKFold, and StratifiedKFold"""
 
     @abstractmethod
-    def __init__(self, n_folds, shuffle, random_state):
-        if not isinstance(n_folds, numbers.Integral):
+    def __init__(self, n_splits, shuffle, random_state):
+        if not isinstance(n_splits, numbers.Integral):
             raise ValueError('The number of folds must be of Integral type. '
                              '%s of type %s was passed.'
-                             % (n_folds, type(n_folds)))
-        n_folds = int(n_folds)
+                             % (n_splits, type(n_splits)))
+        n_splits = int(n_splits)
 
-        if n_folds <= 1:
+        if n_splits <= 1:
             raise ValueError(
                 "k-fold cross-validation requires at least one"
-                " train/test split by setting n_folds=2 or more,"
-                " got n_folds={0}.".format(n_folds))
+                " train/test split by setting n_splits=2 or more,"
+                " got n_splits={0}.".format(n_splits))
 
         if not isinstance(shuffle, bool):
             raise TypeError("shuffle must be True or False;"
                             " got {0}".format(shuffle))
 
-        self.n_folds = n_folds
+        self.n_splits = n_splits
         self.shuffle = shuffle
         self.random_state = random_state
 
@@ -311,10 +311,10 @@ def split(self, X, y=None, labels=None):
         """
         X, y, labels = indexable(X, y, labels)
         n_samples = _num_samples(X)
-        if self.n_folds > n_samples:
+        if self.n_splits > n_samples:
             raise ValueError(
-                ("Cannot have number of folds n_folds={0} greater"
-                 " than the number of samples: {1}.").format(self.n_folds,
+                ("Cannot have number of splits n_splits={0} greater"
+                 " than the number of samples: {1}.").format(self.n_splits,
                                                              n_samples))
 
         for train, test in super(_BaseKFold, self).split(X, y, labels):
@@ -339,7 +339,7 @@ def get_n_splits(self, X=None, y=None, labels=None):
         n_splits : int
             Returns the number of splitting iterations in the cross-validator.
         """
-        return self.n_folds
+        return self.n_splits
 
 
 class KFold(_BaseKFold):
@@ -355,7 +355,7 @@ class KFold(_BaseKFold):
 
     Parameters
     ----------
-    n_folds : int, default=3
+    n_splits : int, default=3
         Number of folds. Must be at least 2.
 
     shuffle : boolean, optional
@@ -370,11 +370,11 @@ class KFold(_BaseKFold):
     >>> from sklearn.model_selection import KFold
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([1, 2, 3, 4])
-    >>> kf = KFold(n_folds=2)
+    >>> kf = KFold(n_splits=2)
     >>> kf.get_n_splits(X)
     2
     >>> print(kf)  # doctest: +NORMALIZE_WHITESPACE
-    KFold(n_folds=2, random_state=None, shuffle=False)
+    KFold(n_splits=2, random_state=None, shuffle=False)
     >>> for train_index, test_index in kf.split(X):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -384,9 +384,9 @@ class KFold(_BaseKFold):
 
     Notes
     -----
-    The first ``n_samples % n_folds`` folds have size
-    ``n_samples // n_folds + 1``, other folds have size
-    ``n_samples // n_folds``, where ``n_samples`` is the number of samples.
+    The first ``n_samples % n_splits`` folds have size
+    ``n_samples // n_splits + 1``, other folds have size
+    ``n_samples // n_splits``, where ``n_samples`` is the number of samples.
 
     See also
     --------
@@ -398,9 +398,9 @@ class KFold(_BaseKFold):
     LabelKFold: K-fold iterator variant with non-overlapping labels.
     """
 
-    def __init__(self, n_folds=3, shuffle=False,
+    def __init__(self, n_splits=3, shuffle=False,
                  random_state=None):
-        super(KFold, self).__init__(n_folds, shuffle, random_state)
+        super(KFold, self).__init__(n_splits, shuffle, random_state)
 
     def _iter_test_indices(self, X, y=None, labels=None):
         n_samples = _num_samples(X)
@@ -408,9 +408,9 @@ def _iter_test_indices(self, X, y=None, labels=None):
         if self.shuffle:
             check_random_state(self.random_state).shuffle(indices)
 
-        n_folds = self.n_folds
-        fold_sizes = (n_samples // n_folds) * np.ones(n_folds, dtype=np.int)
-        fold_sizes[:n_samples % n_folds] += 1
+        n_splits = self.n_splits
+        fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
+        fold_sizes[:n_samples % n_splits] += 1
         current = 0
         for fold_size in fold_sizes:
             start, stop = current, current + fold_size
@@ -429,7 +429,7 @@ class LabelKFold(_BaseKFold):
 
     Parameters
     ----------
-    n_folds : int, default=3
+    n_splits : int, default=3
         Number of folds. Must be at least 2.
 
     Examples
@@ -438,11 +438,11 @@ class LabelKFold(_BaseKFold):
     >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
     >>> y = np.array([1, 2, 3, 4])
     >>> labels = np.array([0, 0, 2, 2])
-    >>> label_kfold = LabelKFold(n_folds=2)
+    >>> label_kfold = LabelKFold(n_splits=2)
     >>> label_kfold.get_n_splits(X, y, labels)
     2
     >>> print(label_kfold)
-    LabelKFold(n_folds=2)
+    LabelKFold(n_splits=2)
     >>> for train_index, test_index in label_kfold.split(X, y, labels):
     ...     print("TRAIN:", train_index, "TEST:", test_index)
     ...     X_train, X_test = X[train_index], X[test_index]
@@ -464,8 +464,8 @@ class LabelKFold(_BaseKFold):
         For splitting the data according to explicit domain-specific
         stratification of the dataset.
     """
-    def __init__(self, n_folds=3):
-        super(LabelKFold, self).__init__(n_folds, shuffle=False,
+    def __init__(self, n_splits=3):
+        super(LabelKFold, self).__init__(n_splits, shuffle=False,
                                          random_state=None)
 
     def _iter_test_indices(self, X, y, labels):
@@ -475,10 +475,10 @@ def _iter_test_indices(self, X, y, labels):
         unique_labels, labels = np.unique(labels, return_inverse=True)
         n_labels = len(unique_labels)
 
-        if self.n_folds > n_labels:
-            raise ValueError("Cannot have number of folds n_folds=%d greater"
+        if self.n_splits > n_labels:
+            raise ValueError("Cannot have number of splits n_splits=%d greater"
                              " than the number of labels: %d."
-                             % (self.n_folds, n_labels))
+                             % (self.n_splits, n_labels))
 
         # Weight labels by their number of occurrences
         n_samples_per_label = np.bincount(labels)
@@ -488,7 +488,7 @@ def _iter_test_indices(self, X, y, labels):
         n_samples_per_label = n_samples_per_label[indices]
 
         # Total weight of each fold
-        n_samples_per_fold = np.zeros(self.n_folds)
+        n_samples_per_fold = np.zeros(self.n_splits)
 
         # Mapping from label index to fold index
         label_to_fold = np.zeros(len(unique_labels))
@@ -501,7 +501,7 @@ def _iter_test_indices(self, X, y, labels):
 
         indices = label_to_fold[labels]
 
-        for f in range(self.n_folds):
+        for f in range(self.n_splits):
             yield np.where(indices == f)[0]
 
 
@@ -518,7 +518,7 @@ class StratifiedKFold(_BaseKFold):
 
     Parameters
     ----------
-    n_folds : int, default=3
+    n_splits : int, default=3
         Number of folds. Must be at least 2.
 
     shuffle : boolean, optional
@@ -534,11 +534,11 @@ class StratifiedKFold(_BaseKFold):
     >>> from sklearn.model_selection import StratifiedKFold
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([0, 0, 1, 1])
-    >>> skf = StratifiedKFold(n_folds=2)
+    >>> skf = StratifiedKFold(n_splits=2)
     >>> skf.get_n_splits(X, y)
     2
     >>> print(skf)  # doctest: +NORMALIZE_WHITESPACE
-    StratifiedKFold(n_folds=2, random_state=None, shuffle=False)
+    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
     >>> for train_index, test_index in skf.split(X, y):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -548,13 +548,13 @@ class StratifiedKFold(_BaseKFold):
 
     Notes
     -----
-    All the folds have size ``trunc(n_samples / n_folds)``, the last one has
+    All the folds have size ``trunc(n_samples / n_splits)``, the last one has
     the complementary.
 
     """
 
-    def __init__(self, n_folds=3, shuffle=False, random_state=None):
-        super(StratifiedKFold, self).__init__(n_folds, shuffle, random_state)
+    def __init__(self, n_splits=3, shuffle=False, random_state=None):
+        super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
 
     def _make_test_folds(self, X, y=None, labels=None):
         if self.shuffle:
@@ -566,26 +566,26 @@ def _make_test_folds(self, X, y=None, labels=None):
         unique_y, y_inversed = np.unique(y, return_inverse=True)
         y_counts = bincount(y_inversed)
         min_labels = np.min(y_counts)
-        if np.all(self.n_folds > y_counts):
+        if np.all(self.n_splits > y_counts):
             raise ValueError("All the n_labels for individual classes"
-                             " are less than %d folds."
-                             % (self.n_folds))
-        if self.n_folds > min_labels:
+                             " are less than n_splits=%d."
+                             % (self.n_splits))
+        if self.n_splits > min_labels:
             warnings.warn(("The least populated class in y has only %d"
                            " members, which is too few. The minimum"
                            " number of labels for any class cannot"
-                           " be less than n_folds=%d."
-                           % (min_labels, self.n_folds)), Warning)
+                           " be less than n_splits=%d."
+                           % (min_labels, self.n_splits)), Warning)
 
         # pre-assign each sample to a test fold index using individual KFold
         # splitting strategies for each class so as to respect the balance of
         # classes
         # NOTE: Passing the data corresponding to ith class say X[y==class_i]
         # will break when the data is not 100% stratifiable for all classes.
-        # So we pass np.zeroes(max(c, n_folds)) as data to the KFold
+        # So we pass np.zeroes(max(c, n_splits)) as data to the KFold
         per_cls_cvs = [
-            KFold(self.n_folds, shuffle=self.shuffle,
-                  random_state=rng).split(np.zeros(max(count, self.n_folds)))
+            KFold(self.n_splits, shuffle=self.shuffle,
+                  random_state=rng).split(np.zeros(max(count, self.n_splits)))
             for count in y_counts]
 
         test_folds = np.zeros(n_samples, dtype=np.int)
@@ -593,7 +593,7 @@ def _make_test_folds(self, X, y=None, labels=None):
             for cls, (_, test_split) in zip(unique_y, per_cls_splits):
                 cls_test_folds = test_folds[y == cls]
                 # the test split can be too big because we used
-                # KFold(...).split(X[:max(c, n_folds)]) when data is not 100%
+                # KFold(...).split(X[:max(c, n_splits)]) when data is not 100%
                 # stratifiable for all the classes
                 # (we use a warning instead of raising an exception)
                 # If this is the case, let's trim it:
@@ -605,7 +605,7 @@ def _make_test_folds(self, X, y=None, labels=None):
 
     def _iter_test_masks(self, X, y=None, labels=None):
         test_folds = self._make_test_folds(X, y)
-        for i in range(self.n_folds):
+        for i in range(self.n_splits):
             yield test_folds == i
 
     def split(self, X, y, labels=None):
@@ -634,6 +634,7 @@ def split(self, X, y, labels=None):
         """
         return super(StratifiedKFold, self).split(X, y, labels)
 
+
 class LeaveOneLabelOut(BaseCrossValidator):
     """Leave One Label Out cross-validator
 
@@ -803,10 +804,10 @@ def get_n_splits(self, X, y, labels):
 class BaseShuffleSplit(with_metaclass(ABCMeta)):
     """Base class for ShuffleSplit and StratifiedShuffleSplit"""
 
-    def __init__(self, n_iter=10, test_size=0.1, train_size=None,
+    def __init__(self, n_splits=10, test_size=0.1, train_size=None,
                  random_state=None):
         _validate_shuffle_split_init(test_size, train_size)
-        self.n_iter = n_iter
+        self.n_splits = n_splits
         self.test_size = test_size
         self.train_size = train_size
         self.random_state = random_state
@@ -862,7 +863,7 @@ def get_n_splits(self, X=None, y=None, labels=None):
         n_splits : int
             Returns the number of splitting iterations in the cross-validator.
         """
-        return self.n_iter
+        return self.n_splits
 
     def __repr__(self):
         return _build_repr(self)
@@ -881,7 +882,7 @@ class ShuffleSplit(BaseShuffleSplit):
 
     Parameters
     ----------
-    n_iter : int (default 10)
+    n_splits : int (default 10)
         Number of re-shuffling & splitting iterations.
 
     test_size : float, int, or None, default 0.1
@@ -904,18 +905,18 @@ class ShuffleSplit(BaseShuffleSplit):
     >>> from sklearn.model_selection import ShuffleSplit
     >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
     >>> y = np.array([1, 2, 1, 2])
-    >>> rs = ShuffleSplit(n_iter=3, test_size=.25, random_state=0)
+    >>> rs = ShuffleSplit(n_splits=3, test_size=.25, random_state=0)
     >>> rs.get_n_splits(X)
     3
     >>> print(rs)
-    ShuffleSplit(n_iter=3, random_state=0, test_size=0.25, train_size=None)
+    ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=None)
     >>> for train_index, test_index in rs.split(X):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...  # doctest: +ELLIPSIS
     TRAIN: [3 1 0] TEST: [2]
     TRAIN: [2 1 3] TEST: [0]
     TRAIN: [0 2 1] TEST: [3]
-    >>> rs = ShuffleSplit(n_iter=3, train_size=0.5, test_size=.25,
+    >>> rs = ShuffleSplit(n_splits=3, train_size=0.5, test_size=.25,
     ...                   random_state=0)
     >>> for train_index, test_index in rs.split(X):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
@@ -930,7 +931,7 @@ def _iter_indices(self, X, y=None, labels=None):
         n_train, n_test = _validate_shuffle_split(n_samples, self.test_size,
                                                   self.train_size)
         rng = check_random_state(self.random_state)
-        for i in range(self.n_iter):
+        for i in range(self.n_splits):
             # random partition
             permutation = rng.permutation(n_samples)
             ind_test = permutation[:n_test]
@@ -955,7 +956,7 @@ class LabelShuffleSplit(ShuffleSplit):
 
     For example, a less computationally intensive alternative to
     ``LeavePLabelOut(p=10)`` would be
-    ``LabelShuffleSplit(test_size=10, n_iter=100)``.
+    ``LabelShuffleSplit(test_size=10, n_splits=100)``.
 
     Note: The parameters ``test_size`` and ``train_size`` refer to labels, and
     not to samples, as in ShuffleSplit.
@@ -963,7 +964,7 @@ class LabelShuffleSplit(ShuffleSplit):
 
     Parameters
     ----------
-    n_iter : int (default 5)
+    n_splits : int (default 5)
         Number of re-shuffling & splitting iterations.
 
     test_size : float (default 0.2), int, or None
@@ -982,10 +983,10 @@ class LabelShuffleSplit(ShuffleSplit):
         Pseudo-random number generator state used for random sampling.
     '''
 
-    def __init__(self, n_iter=5, test_size=0.2, train_size=None,
+    def __init__(self, n_splits=5, test_size=0.2, train_size=None,
                  random_state=None):
         super(LabelShuffleSplit, self).__init__(
-            n_iter=n_iter,
+            n_splits=n_splits,
             test_size=test_size,
             train_size=train_size,
             random_state=random_state)
@@ -1022,7 +1023,7 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
 
     Parameters
     ----------
-    n_iter : int (default 10)
+    n_splits : int (default 10)
         Number of re-shuffling & splitting iterations.
 
     test_size : float (default 0.1), int, or None
@@ -1045,11 +1046,11 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
     >>> from sklearn.model_selection import StratifiedShuffleSplit
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([0, 0, 1, 1])
-    >>> sss = StratifiedShuffleSplit(n_iter=3, test_size=0.5, random_state=0)
+    >>> sss = StratifiedShuffleSplit(n_splits=3, test_size=0.5, random_state=0)
     >>> sss.get_n_splits(X, y)
     3
     >>> print(sss)       # doctest: +ELLIPSIS
-    StratifiedShuffleSplit(n_iter=3, random_state=0, ...)
+    StratifiedShuffleSplit(n_splits=3, random_state=0, ...)
     >>> for train_index, test_index in sss.split(X, y):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -1059,10 +1060,10 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
     TRAIN: [0 2] TEST: [3 1]
     """
 
-    def __init__(self, n_iter=10, test_size=0.1, train_size=None,
+    def __init__(self, n_splits=10, test_size=0.1, train_size=None,
                  random_state=None):
         super(StratifiedShuffleSplit, self).__init__(
-            n_iter, test_size, train_size, random_state)
+            n_splits, test_size, train_size, random_state)
 
     def _iter_indices(self, X, y, labels=None):
         n_samples = _num_samples(X)
@@ -1093,7 +1094,7 @@ def _iter_indices(self, X, y, labels=None):
         t_i = np.minimum(class_counts - n_i,
                          np.round(n_test * p_i).astype(int))
 
-        for _ in range(self.n_iter):
+        for _ in range(self.n_splits):
             train = []
             test = []
 
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index c3365bd3a7e60..75e0d5f71cb40 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -454,7 +454,7 @@ def test_X_as_list():
     y = np.array([0] * 5 + [1] * 5)
 
     clf = CheckingClassifier(check_X=lambda x: isinstance(x, list))
-    cv = KFold(n_folds=3)
+    cv = KFold(n_splits=3)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X.tolist(), y).score(X, y)
     assert_true(hasattr(grid_search, "results_"))
@@ -466,7 +466,7 @@ def test_y_as_list():
     y = np.array([0] * 5 + [1] * 5)
 
     clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
-    cv = KFold(n_folds=3)
+    cv = KFold(n_splits=3)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X, y.tolist()).score(X, y)
     assert_true(hasattr(grid_search, "results_"))
@@ -597,14 +597,14 @@ def test_grid_search_results():
     X, y = make_classification(n_samples=50, n_features=4,
                                random_state=42)
 
-    n_folds = 3
+    n_splits = 3
     n_grid_points = 6
     params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
               dict(kernel=['poly', ], degree=[1, 2])]
-    grid_search = GridSearchCV(SVC(), cv=n_folds, iid=False,
+    grid_search = GridSearchCV(SVC(), cv=n_splits, iid=False,
                                param_grid=params)
     grid_search.fit(X, y)
-    grid_search_iid = GridSearchCV(SVC(), cv=n_folds, iid=True,
+    grid_search_iid = GridSearchCV(SVC(), cv=n_splits, iid=True,
                                    param_grid=params)
     grid_search_iid.fit(X, y)
 
@@ -645,14 +645,15 @@ def test_random_search_results():
     # scipy.stats dists now supports `seed` but we still support scipy 0.12
     # which doesn't support the seed. Hence the assertions in the test for
     # random_search alone should not depend on randomization.
-    n_folds = 3
+    n_splits = 3
     n_search_iter = 30
     params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
-    random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_folds,
+    random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
+                                       cv=n_splits,
                                        iid=False, param_distributions=params)
     random_search.fit(X, y)
     random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
-                                           cv=n_folds, iid=True,
+                                           cv=n_splits, iid=True,
                                            param_distributions=params)
     random_search_iid.fit(X, y)
 
@@ -779,22 +780,22 @@ def test_search_results_none_param():
 
 def test_grid_search_correct_score_results():
     # test that correct scores are used
-    n_folds = 3
+    n_splits = 3
     clf = LinearSVC(random_state=0)
     X, y = make_blobs(random_state=0, centers=2)
     Cs = [.1, 1, 10]
     for score in ['f1', 'roc_auc']:
-        grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score, cv=n_folds)
+        grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score, cv=n_splits)
         results = grid_search.fit(X, y).results_
 
         # Test scorer names
         result_keys = list(results.keys())
         expected_keys = (("test_mean_score", "test_rank_score") +
                          tuple("test_split%d_score" % cv_i
-                               for cv_i in range(n_folds)))
+                               for cv_i in range(n_splits)))
         assert_true(all(in1d(expected_keys, result_keys)))
 
-        cv = StratifiedKFold(n_folds=n_folds)
+        cv = StratifiedKFold(n_splits=n_splits)
         n_splits = grid_search.n_splits_
         for candidate_i, C in enumerate(Cs):
             clf.set_params(C=C)
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 69749f8e4c0aa..89b227efcee94 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -132,9 +132,9 @@ def get_params(self, deep=False):
 def test_cross_validator_with_default_params():
     n_samples = 4
     n_unique_labels = 4
-    n_folds = 2
+    n_splits = 2
     p = 2
-    n_iter = 10  # (the default value)
+    n_shuffle_splits = 10  # (the default value)
 
     X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
     X_1d = np.array([1, 2, 3, 4])
@@ -142,8 +142,8 @@ def test_cross_validator_with_default_params():
     labels = np.array([1, 2, 3, 4])
     loo = LeaveOneOut()
     lpo = LeavePOut(p)
-    kf = KFold(n_folds)
-    skf = StratifiedKFold(n_folds)
+    kf = KFold(n_splits)
+    skf = StratifiedKFold(n_splits)
     lolo = LeaveOneLabelOut()
     lopo = LeavePLabelOut(p)
     ss = ShuffleSplit(random_state=0)
@@ -151,23 +151,24 @@ def test_cross_validator_with_default_params():
 
     loo_repr = "LeaveOneOut()"
     lpo_repr = "LeavePOut(p=2)"
-    kf_repr = "KFold(n_folds=2, random_state=None, shuffle=False)"
-    skf_repr = "StratifiedKFold(n_folds=2, random_state=None, shuffle=False)"
+    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
+    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
     lolo_repr = "LeaveOneLabelOut()"
     lopo_repr = "LeavePLabelOut(n_labels=2)"
-    ss_repr = ("ShuffleSplit(n_iter=10, random_state=0, test_size=0.1, "
+    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, test_size=0.1, "
                "train_size=None)")
     ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
 
-    n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds,
-                n_unique_labels, comb(n_unique_labels, p), n_iter, 2]
+    n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
+                         n_unique_labels, comb(n_unique_labels, p),
+                         n_shuffle_splits, 2]
 
     for i, (cv, cv_repr) in enumerate(zip(
             [loo, lpo, kf, skf, lolo, lopo, ss, ps],
             [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
              ss_repr, ps_repr])):
         # Test if get_n_splits works correctly
-        assert_equal(n_splits[i], cv.get_n_splits(X, y, labels))
+        assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, labels))
 
         # Test if the cross-validator works as expected even if
         # the data is 1d
@@ -194,13 +195,13 @@ def check_valid_split(train, test, n_samples=None):
         assert_equal(train.union(test), set(range(n_samples)))
 
 
-def check_cv_coverage(cv, X, y, labels, expected_n_iter=None):
+def check_cv_coverage(cv, X, y, labels, expected_n_splits=None):
     n_samples = _num_samples(X)
     # Check that a all the samples appear at least once in a test fold
-    if expected_n_iter is not None:
-        assert_equal(cv.get_n_splits(X, y, labels), expected_n_iter)
+    if expected_n_splits is not None:
+        assert_equal(cv.get_n_splits(X, y, labels), expected_n_splits)
     else:
-        expected_n_iter = cv.get_n_splits(X, y, labels)
+        expected_n_splits = cv.get_n_splits(X, y, labels)
 
     collected_test_samples = set()
     iterations = 0
@@ -210,7 +211,7 @@ def check_cv_coverage(cv, X, y, labels, expected_n_iter=None):
         collected_test_samples.update(test)
 
     # Check that the accumulated test samples cover the whole dataset
-    assert_equal(iterations, expected_n_iter)
+    assert_equal(iterations, expected_n_splits)
     if n_samples is not None:
         assert_equal(collected_test_samples, set(range(n_samples)))
 
@@ -234,10 +235,10 @@ def test_kfold_valueerrors():
     # side of the split at each split
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
-        check_cv_coverage(skf_3, X2, y, labels=None, expected_n_iter=3)
+        check_cv_coverage(skf_3, X2, y, labels=None, expected_n_splits=3)
 
     # Check that errors are raised if all n_labels for individual
-    # classes are less than n_folds.
+    # classes are less than n_splits.
     y = np.array([3, 3, -1, -1, 2])
 
     assert_raises(ValueError, next, skf_3.split(X2, y))
@@ -252,27 +253,27 @@ def test_kfold_valueerrors():
     assert_raise_message(ValueError, error_string,
                          StratifiedKFold, 1)
 
-    # When n_folds is not integer:
+    # When n_splits is not integer:
     assert_raises(ValueError, KFold, 1.5)
     assert_raises(ValueError, KFold, 2.0)
     assert_raises(ValueError, StratifiedKFold, 1.5)
     assert_raises(ValueError, StratifiedKFold, 2.0)
 
     # When shuffle is not  a bool:
-    assert_raises(TypeError, KFold, n_folds=4, shuffle=None)
+    assert_raises(TypeError, KFold, n_splits=4, shuffle=None)
 
 
 def test_kfold_indices():
     # Check all indices are returned in the test folds
     X1 = np.ones(18)
     kf = KFold(3)
-    check_cv_coverage(kf, X1, y=None, labels=None, expected_n_iter=3)
+    check_cv_coverage(kf, X1, y=None, labels=None, expected_n_splits=3)
 
     # Check all indices are returned in the test folds even when equal-sized
     # folds are not possible
     X2 = np.ones(17)
     kf = KFold(3)
-    check_cv_coverage(kf, X2, y=None, labels=None, expected_n_iter=3)
+    check_cv_coverage(kf, X2, y=None, labels=None, expected_n_splits=3)
 
     # Check if get_n_splits returns the number of folds
     assert_equal(5, KFold(5).get_n_splits(X2))
@@ -441,7 +442,7 @@ def test_shuffle_stratifiedkfold():
     for (_, test0), (_, test1) in zip(kf0.split(X_40, y),
                                       kf1.split(X_40, y)):
         assert_not_equal(set(test0), set(test1))
-    check_cv_coverage(kf0, X_40, y, labels=None, expected_n_iter=5)
+    check_cv_coverage(kf0, X_40, y, labels=None, expected_n_splits=5)
 
 
 def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
@@ -456,9 +457,9 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
     X, y = digits.data[:600], digits.target[:600]
     model = SVC(C=10, gamma=0.005)
 
-    n_folds = 3
+    n_splits = 3
 
-    cv = KFold(n_folds=n_folds, shuffle=False)
+    cv = KFold(n_splits=n_splits, shuffle=False)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
     assert_greater(0.92, mean_score)
     assert_greater(mean_score, 0.80)
@@ -467,11 +468,11 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
     # overfitting of the model with regards to the writing style of the authors
     # by yielding a seriously overestimated score:
 
-    cv = KFold(n_folds, shuffle=True, random_state=0)
+    cv = KFold(n_splits, shuffle=True, random_state=0)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
     assert_greater(mean_score, 0.92)
 
-    cv = KFold(n_folds, shuffle=True, random_state=1)
+    cv = KFold(n_splits, shuffle=True, random_state=1)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
     assert_greater(mean_score, 0.92)
 
@@ -482,7 +483,7 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
     # the estimated mean score is close to the score measured with
     # non-shuffled KFold
 
-    cv = StratifiedKFold(n_folds)
+    cv = StratifiedKFold(n_splits)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
     assert_greater(0.93, mean_score)
     assert_greater(mean_score, 0.80)
@@ -562,7 +563,7 @@ def test_stratified_shuffle_split_even():
     # Test the StratifiedShuffleSplit, indices are drawn with a
     # equal chance
     n_folds = 5
-    n_iter = 1000
+    n_splits = 1000
 
     def assert_counts_are_ok(idx_counts, p):
         # Here we test that the distribution of the counts
@@ -577,19 +578,19 @@ def assert_counts_are_ok(idx_counts, p):
 
     for n_samples in (6, 22):
         labels = np.array((n_samples // 2) * [0, 1])
-        splits = StratifiedShuffleSplit(n_iter=n_iter,
+        splits = StratifiedShuffleSplit(n_splits=n_splits,
                                         test_size=1. / n_folds,
                                         random_state=0)
 
         train_counts = [0] * n_samples
         test_counts = [0] * n_samples
-        n_splits = 0
+        n_splits_actual = 0
         for train, test in splits.split(X=np.ones(n_samples), y=labels):
-            n_splits += 1
+            n_splits_actual += 1
             for counter, ids in [(train_counts, train), (test_counts, test)]:
                 for id in ids:
                     counter[id] += 1
-        assert_equal(n_splits, n_iter)
+        assert_equal(n_splits_actual, n_splits)
 
         n_train, n_test = _validate_shuffle_split(n_samples,
                                                   test_size=1./n_folds,
@@ -616,10 +617,10 @@ def test_stratified_shuffle_split_overlap_train_test_bug():
     y = [0, 1, 2, 3] * 3 + [4, 5] * 5
     X = np.ones_like(y)
 
-    splits = StratifiedShuffleSplit(n_iter=1,
-                                    test_size=0.5, random_state=0)
+    sss = StratifiedShuffleSplit(n_splits=1,
+                                 test_size=0.5, random_state=0)
 
-    train, test = next(iter(splits.split(X=X, y=y)))
+    train, test = next(iter(sss.split(X=X, y=y)))
 
     assert_array_equal(np.intersect1d(train, test), [])
 
@@ -653,15 +654,15 @@ def test_label_shuffle_split():
 
     for l in labels:
         X = y = np.ones(len(l))
-        n_iter = 6
+        n_splits = 6
         test_size = 1./3
-        slo = LabelShuffleSplit(n_iter, test_size=test_size, random_state=0)
+        slo = LabelShuffleSplit(n_splits, test_size=test_size, random_state=0)
 
         # Make sure the repr works
         repr(slo)
 
         # Test that the length is correct
-        assert_equal(slo.get_n_splits(X, y, labels=l), n_iter)
+        assert_equal(slo.get_n_splits(X, y, labels=l), n_splits)
 
         l_unique = np.unique(l)
 
@@ -906,7 +907,7 @@ def test_label_kfold():
     # Parameters of the test
     n_labels = 15
     n_samples = 1000
-    n_folds = 5
+    n_splits = 5
 
     X = y = np.ones(n_samples)
 
@@ -914,12 +915,12 @@ def test_label_kfold():
     tolerance = 0.05 * n_samples  # 5 percent error allowed
     labels = rng.randint(0, n_labels, n_samples)
 
-    ideal_n_labels_per_fold = n_samples // n_folds
+    ideal_n_labels_per_fold = n_samples // n_splits
 
     len(np.unique(labels))
     # Get the test fold indices from the test set indices of each fold
     folds = np.zeros(n_samples)
-    lkf = LabelKFold(n_folds=n_folds)
+    lkf = LabelKFold(n_splits=n_splits)
     for i, (_, test) in enumerate(lkf.split(X, y, labels)):
         folds[test] = i
 
@@ -949,9 +950,9 @@ def test_label_kfold():
 
     n_labels = len(np.unique(labels))
     n_samples = len(labels)
-    n_folds = 5
+    n_splits = 5
     tolerance = 0.05 * n_samples  # 5 percent error allowed
-    ideal_n_labels_per_fold = n_samples // n_folds
+    ideal_n_labels_per_fold = n_samples // n_splits
 
     X = y = np.ones(n_samples)
 
@@ -980,8 +981,8 @@ def test_label_kfold():
     # Should fail if there are more folds than labels
     labels = np.array([1, 1, 1, 2, 2])
     X = y = np.ones(len(labels))
-    assert_raises_regexp(ValueError, "Cannot have number of folds.*greater",
-                         next, LabelKFold(n_folds=3).split(X, y, labels))
+    assert_raises_regexp(ValueError, "Cannot have number of splits.*greater",
+                         next, LabelKFold(n_splits=3).split(X, y, labels))
 
 
 def test_nested_cv():
@@ -992,7 +993,7 @@ def test_nested_cv():
     labels = rng.randint(0, 5, 15)
 
     cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(),
-           StratifiedShuffleSplit(n_iter=3, random_state=0)]
+           StratifiedShuffleSplit(n_splits=3, random_state=0)]
 
     for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
         gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 2e694fd45e59a..62a86000562f6 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -136,7 +136,8 @@ def _is_training_data(self, X):
 X = np.ones((10, 2))
 X_sparse = coo_matrix(X)
 y = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
-# The number of samples per class needs to be > n_folds, for StratifiedKFold(3)
+# The number of samples per class needs to be > n_splits,
+# for StratifiedKFold(n_splits=3)
 y2 = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
 
 
@@ -701,7 +702,7 @@ def test_learning_curve_with_boolean_indices():
                                n_redundant=0, n_classes=2,
                                n_clusters_per_class=1, random_state=0)
     estimator = MockImprovingEstimator(20)
-    cv = KFold(n_folds=3)
+    cv = KFold(n_splits=3)
     train_sizes, train_scores, test_scores = learning_curve(
         estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
     assert_array_equal(train_sizes, np.linspace(2, 20, 10))