scikit-learn
diff --git a/‎doc/whats_new/v0.22.rst
Lines changed: 8 additions & 0 deletions b/‎doc/whats_new/v0.22.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/model_selection/plot_learning_curve.py
Lines changed: 71 additions & 34 deletions b/‎examples/model_selection/plot_learning_curve.py
Lines changed: 71 additions & 34 deletions
diff --git a/‎sklearn/model_selection/_validation.py
Lines changed: 37 additions & 9 deletions b/‎sklearn/model_selection/_validation.py
Lines changed: 37 additions & 9 deletions
diff --git a/‎sklearn/model_selection/tests/test_validation.py
Lines changed: 11 additions & 4 deletions b/‎sklearn/model_selection/tests/test_validation.py
Lines changed: 11 additions & 4 deletions
@@ -63,6 +63,14 @@ Changelog
   of the maximization procedure in :term:`fit`.
   :pr:`13618` by :user:`Yoshihiro Uchida <c56pony>`.
 
+:mod:`sklearn.model_selection`
+..................
+
+- |Enhancement| :class:`model_selection.learning_curve` now accepts parameter
+  ``return_times`` which can be used to retrieve computation times in order to
+  plot model scalability (see learning_curve example).
+  :pr:`13938` by :user:`Hadrien Reboul <H4dr1en>`.
+
 :mod:`sklearn.pipeline`
 .......................
 
 
@@ -2,16 +2,18 @@
 ========================
 Plotting Learning Curves
 ========================
-
-On the left side the learning curve of a naive Bayes classifier is shown for
-the digits dataset. Note that the training score and the cross-validation score
-are both not very good at the end. However, the shape of the curve can be found
-in more complex datasets very often: the training score is very high at the
-beginning and decreases and the cross-validation score is very low at the
-beginning and increases. On the right side we see the learning curve of an SVM
-with RBF kernel. We can see clearly that the training score is still around
-the maximum and the validation score could be increased with more training
-samples.
+In the first column, first row the learning curve of a naive Bayes classifier
+is shown for the digits dataset. Note that the training score and the
+cross-validation score are both not very good at the end. However, the shape
+of the curve can be found in more complex datasets very often: the training
+score is very high at the beginning and decreases and the cross-validation
+score is very low at the beginning and increases. In the second column, first
+row we see the learning curve of an SVM with RBF kernel. We can see clearly
+that the training score is still around the maximum and the validation score
+could be increased with more training samples. The plots in the second row
+show the times required by the models to train with various sizes of training
+dataset. The plots in the third row show how much time was required to train
+the models for each training sizes.
 """
 print(__doc__)
 
@@ -24,10 +26,11 @@
 from sklearn.model_selection import ShuffleSplit
 
 
-def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
+def plot_learning_curve(estimator, title, X, y, axes=None, ylim=None, cv=None,
                         n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5)):
     """
-    Generate a simple plot of the test and training learning curve.
+    Generate 3 plots: the test and training learning curve, the training
+    samples vs fit times curve, the fit times vs score curve.
 
     Parameters
     ----------
@@ -45,6 +48,9 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
         Target relative to X for classification or regression;
         None for unsupervised learning.
 
+    axes : array of 3 axes, optional (default=None)
+        Axes to use for plotting the curves.
+
     ylim : tuple, shape (ymin, ymax), optional
         Defines minimum and maximum yvalues plotted.
 
@@ -79,34 +85,63 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
         be big enough to contain at least one sample from each class.
         (default: np.linspace(0.1, 1.0, 5))
     """
-    plt.figure()
-    plt.title(title)
+    if axes is None:
+        _, axes = plt.subplots(1, 3, figsize=(20, 5))
+
+    axes[0].set_title(title)
     if ylim is not None:
-        plt.ylim(*ylim)
-    plt.xlabel("Training examples")
-    plt.ylabel("Score")
-    train_sizes, train_scores, test_scores = learning_curve(
-        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
+        axes[0].set_ylim(*ylim)
+    axes[0].set_xlabel("Training examples")
+    axes[0].set_ylabel("Score")
+
+    train_sizes, train_scores, test_scores, fit_times, _ = \
+        learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs,
+                       train_sizes=train_sizes,
+                       return_times=True)
     train_scores_mean = np.mean(train_scores, axis=1)
     train_scores_std = np.std(train_scores, axis=1)
     test_scores_mean = np.mean(test_scores, axis=1)
     test_scores_std = np.std(test_scores, axis=1)
-    plt.grid()
-
-    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
-                     train_scores_mean + train_scores_std, alpha=0.1,
-                     color="r")
-    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
-                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
-    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
-             label="Training score")
-    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
-             label="Cross-validation score")
-
-    plt.legend(loc="best")
+    fit_times_mean = np.mean(fit_times, axis=1)
+    fit_times_std = np.std(fit_times, axis=1)
+
+    # Plot learning curve
+    axes[0].grid()
+    axes[0].fill_between(train_sizes, train_scores_mean - train_scores_std,
+                         train_scores_mean + train_scores_std, alpha=0.1,
+                         color="r")
+    axes[0].fill_between(train_sizes, test_scores_mean - test_scores_std,
+                         test_scores_mean + test_scores_std, alpha=0.1,
+                         color="g")
+    axes[0].plot(train_sizes, train_scores_mean, 'o-', color="r",
+                 label="Training score")
+    axes[0].plot(train_sizes, test_scores_mean, 'o-', color="g",
+                 label="Cross-validation score")
+    axes[0].legend(loc="best")
+
+    # Plot n_samples vs fit_times
+    axes[1].grid()
+    axes[1].plot(train_sizes, fit_times_mean, 'o-')
+    axes[1].fill_between(train_sizes, fit_times_mean - fit_times_std,
+                         fit_times_mean + fit_times_std, alpha=0.1)
+    axes[1].set_xlabel("Training examples")
+    axes[1].set_ylabel("fit_times")
+    axes[1].set_title("Scalability of the model")
+
+    # Plot fit_time vs score
+    axes[2].grid()
+    axes[2].plot(fit_times_mean, test_scores_mean, 'o-')
+    axes[2].fill_between(fit_times_mean, test_scores_mean - test_scores_std,
+                         test_scores_mean + test_scores_std, alpha=0.1)
+    axes[2].set_xlabel("fit_times")
+    axes[2].set_ylabel("Score")
+    axes[2].set_title("Performance of the model")
+
     return plt
 
 
+fig, axes = plt.subplots(3, 2, figsize=(10, 15))
+
 digits = load_digits()
 X, y = digits.data, digits.target
 
@@ -117,12 +152,14 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
 cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
 
 estimator = GaussianNB()
-plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4)
+plot_learning_curve(estimator, title, X, y, axes=axes[:, 0], ylim=(0.7, 1.01),
+                    cv=cv, n_jobs=4)
 
 title = r"Learning Curves (SVM, RBF kernel, $\gamma=0.001$)"
 # SVC is more expensive so we do a lower number of CV iterations:
 cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
 estimator = SVC(gamma=0.001)
-plot_learning_curve(estimator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4)
+plot_learning_curve(estimator, title, X, y, axes=axes[:, 1], ylim=(0.7, 1.01),
+                    cv=cv, n_jobs=4)
 
 plt.show()
@@ -1094,7 +1094,7 @@ def learning_curve(estimator, X, y, groups=None,
                    train_sizes=np.linspace(0.1, 1.0, 5), cv=None,
                    scoring=None, exploit_incremental_learning=False,
                    n_jobs=None, pre_dispatch="all", verbose=0, shuffle=False,
-                   random_state=None, error_score=np.nan):
+                   random_state=None, error_score=np.nan, return_times=False):
     """Learning curve.
 
     Determines cross-validated training and test scores for different training
@@ -1193,6 +1193,9 @@ def learning_curve(estimator, X, y, groups=None,
         If a numeric value is given, FitFailedWarning is raised. This parameter
         does not affect the refit step, which will always raise the error.
 
+    return_times : boolean, optional (default: False)
+        Whether to return the fit and score times.
+
     Returns
     -------
     train_sizes_abs : array, shape (n_unique_ticks,), dtype int
@@ -1206,6 +1209,12 @@ def learning_curve(estimator, X, y, groups=None,
     test_scores : array, shape (n_ticks, n_cv_folds)
         Scores on test set.
 
+    fit_times : array, shape (n_ticks, n_cv_folds)
+        Times spent for fitting in seconds.
+
+    score_times : array, shape (n_ticks, n_cv_folds)
+        Times spent for scoring in seconds.
+
     Notes
     -----
     See :ref:`examples/model_selection/plot_learning_curve.py
@@ -1243,7 +1252,7 @@ def learning_curve(estimator, X, y, groups=None,
         classes = np.unique(y) if is_classifier(estimator) else None
         out = parallel(delayed(_incremental_fit_estimator)(
             clone(estimator), X, y, classes, train, test, train_sizes_abs,
-            scorer, verbose) for train, test in cv_iter)
+            scorer, verbose, return_times) for train, test in cv_iter)
     else:
         train_test_proportions = []
         for train, test in cv_iter:
@@ -1253,15 +1262,21 @@ def learning_curve(estimator, X, y, groups=None,
         out = parallel(delayed(_fit_and_score)(
             clone(estimator), X, y, scorer, train, test, verbose,
             parameters=None, fit_params=None, return_train_score=True,
-            error_score=error_score)
+            error_score=error_score, return_times=return_times)
             for train, test in train_test_proportions)
         out = np.array(out)
         n_cv_folds = out.shape[0] // n_unique_ticks
-        out = out.reshape(n_cv_folds, n_unique_ticks, 2)
+        dim = 4 if return_times else 2
+        out = out.reshape(n_cv_folds, n_unique_ticks, dim)
 
     out = np.asarray(out).transpose((2, 1, 0))
 
-    return train_sizes_abs, out[0], out[1]
+    ret = train_sizes_abs, out[0], out[1]
+
+    if return_times:
+        ret = ret + (out[2], out[3])
+
+    return ret
 
 
 def _translate_train_sizes(train_sizes, n_max_training_samples):
@@ -1324,24 +1339,37 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
 
 
 def _incremental_fit_estimator(estimator, X, y, classes, train, test,
-                               train_sizes, scorer, verbose):
+                               train_sizes, scorer, verbose, return_times):
     """Train estimator on training subsets incrementally and compute scores."""
-    train_scores, test_scores = [], []
+    train_scores, test_scores, fit_times, score_times = [], [], [], []
     partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])
     for n_train_samples, partial_train in partitions:
         train_subset = train[:n_train_samples]
         X_train, y_train = _safe_split(estimator, X, y, train_subset)
         X_partial_train, y_partial_train = _safe_split(estimator, X, y,
                                                        partial_train)
         X_test, y_test = _safe_split(estimator, X, y, test, train_subset)
+        start_fit = time.time()
         if y_partial_train is None:
             estimator.partial_fit(X_partial_train, classes=classes)
         else:
             estimator.partial_fit(X_partial_train, y_partial_train,
                                   classes=classes)
-        train_scores.append(_score(estimator, X_train, y_train, scorer))
+        fit_time = time.time() - start_fit
+        fit_times.append(fit_time)
+
+        start_score = time.time()
+
         test_scores.append(_score(estimator, X_test, y_test, scorer))
-    return np.array((train_scores, test_scores)).T
+        train_scores.append(_score(estimator, X_train, y_train, scorer))
+
+        score_time = time.time() - start_score
+        score_times.append(score_time)
+
+    ret = ((train_scores, test_scores, fit_times, score_times)
+           if return_times else (train_scores, test_scores))
+
+    return np.array(ret).T
 
 
 def validation_curve(estimator, X, y, param_name, param_range, groups=None,
 
@@ -991,20 +991,27 @@ def test_learning_curve():
     estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
     for shuffle_train in [False, True]:
         with warnings.catch_warnings(record=True) as w:
-            train_sizes, train_scores, test_scores = learning_curve(
-                estimator, X, y, cv=KFold(n_splits=n_splits),
-                train_sizes=np.linspace(0.1, 1.0, 10),
-                shuffle=shuffle_train)
+            train_sizes, train_scores, test_scores, fit_times, score_times = \
+                learning_curve(estimator, X, y, cv=KFold(n_splits=n_splits),
+                               train_sizes=np.linspace(0.1, 1.0, 10),
+                               shuffle=shuffle_train, return_times=True)
         if len(w) > 0:
             raise RuntimeError("Unexpected warning: %r" % w[0].message)
         assert_equal(train_scores.shape, (10, 3))
         assert_equal(test_scores.shape, (10, 3))
+        assert_equal(fit_times.shape, (10, 3))
+        assert_equal(score_times.shape, (10, 3))
         assert_array_equal(train_sizes, np.linspace(2, 20, 10))
         assert_array_almost_equal(train_scores.mean(axis=1),
                                   np.linspace(1.9, 1.0, 10))
         assert_array_almost_equal(test_scores.mean(axis=1),
                                   np.linspace(0.1, 1.0, 10))
 
+        # Cannot use assert_array_almost_equal for fit and score times because
+        # the values are hardware-dependant
+        assert_equal(fit_times.dtype, "float64")
+        assert_equal(score_times.dtype, "float64")
+
         # Test a custom cv splitter that can iterate only once
         with warnings.catch_warnings(record=True) as w:
             train_sizes2, train_scores2, test_scores2 = learning_curve(