scikit-learn
diff --git a/‎doc/whats_new/v0.22.rst
Lines changed: 5 additions & 0 deletions b/‎doc/whats_new/v0.22.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎sklearn/metrics/scorer.py
Lines changed: 124 additions & 11 deletions b/‎sklearn/metrics/scorer.py
Lines changed: 124 additions & 11 deletions
diff --git a/‎sklearn/metrics/tests/test_score_objects.py
Lines changed: 113 additions & 1 deletion b/‎sklearn/metrics/tests/test_score_objects.py
Lines changed: 113 additions & 1 deletion
@@ -348,6 +348,11 @@ Changelog
 - |Enhancement| :class:`model_selection.RandomizedSearchCV` now accepts lists
   of parameter distributions. :pr:`14549` by `Andreas Müller`_.
 
+- |Efficiency| Improved performance of multimetric scoring in
+  :func:`model_selection.cross_validate`,
+  :class:`model_selection.GridSearchCV`, and
+  :class:`model_selection.RandomizedSearchCV`. :pr:`14593` by `Thomas Fan`_.
+
 - |Fix| Reimplemented :class:`model_selection.StratifiedKFold` to fix an issue
   where one test set could be `n_classes` larger than another. Test sets should
   now be near-equally sized. :pr:`14704` by `Joel Nothman`_.
 
@@ -18,8 +18,9 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
-from abc import ABCMeta
 from collections.abc import Iterable
+from functools import partial
+from collections import Counter
 
 import numpy as np
 
@@ -44,7 +45,82 @@
 from ..base import is_regressor
 
 
-class _BaseScorer(metaclass=ABCMeta):
+def _cached_call(cache, estimator, method, *args, **kwargs):
+    """Call estimator with method and args and kwargs."""
+    if cache is None:
+        return getattr(estimator, method)(*args, **kwargs)
+
+    try:
+        return cache[method]
+    except KeyError:
+        result = getattr(estimator, method)(*args, **kwargs)
+        cache[method] = result
+        return result
+
+
+class _MultimetricScorer:
+    """Callable for multimetric scoring used to avoid repeated calls
+    to `predict_proba`, `predict`, and `decision_function`.
+
+    `_MultimetricScorer` will return a dictionary of scores corresponding to
+    the scorers in the dictionary. Note that `_MultimetricScorer` can be
+    created with a dictionary with one key  (i.e. only one actual scorer).
+
+    Parameters
+    ----------
+    scorers : dict
+        Dictionary mapping names to callable scorers.
+    """
+    def __init__(self, **scorers):
+        self._scorers = scorers
+
+    def __call__(self, estimator, *args, **kwargs):
+        """Evaluate predicted target values."""
+        scores = {}
+        cache = {} if self._use_cache(estimator) else None
+        cached_call = partial(_cached_call, cache)
+
+        for name, scorer in self._scorers.items():
+            if isinstance(scorer, _BaseScorer):
+                score = scorer._score(cached_call, estimator,
+                                      *args, **kwargs)
+            else:
+                score = scorer(estimator, *args, **kwargs)
+            scores[name] = score
+        return scores
+
+    def _use_cache(self, estimator):
+        """Return True if using a cache is beneficial.
+
+        Caching may be beneficial when one of these conditions holds:
+          - `_ProbaScorer` will be called twice.
+          - `_PredictScorer` will be called twice.
+          - `_ThresholdScorer` will be called twice.
+          - `_ThresholdScorer` and `_PredictScorer` are called and
+             estimator is a regressor.
+          - `_ThresholdScorer` and `_ProbaScorer` are called and
+             estimator does not have a `decision_function` attribute.
+
+        """
+        if len(self._scorers) == 1:  # Only one scorer
+            return False
+
+        counter = Counter([type(v) for v in self._scorers.values()])
+
+        if any(counter[known_type] > 1 for known_type in
+               [_PredictScorer, _ProbaScorer, _ThresholdScorer]):
+            return True
+
+        if counter[_ThresholdScorer]:
+            if is_regressor(estimator) and counter[_PredictScorer]:
+                return True
+            elif (counter[_ProbaScorer] and
+                  not hasattr(estimator, "decision_function")):
+                return True
+        return False
+
+
+class _BaseScorer:
     def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
@@ -58,17 +134,47 @@ def __repr__(self):
                    "" if self._sign > 0 else ", greater_is_better=False",
                    self._factory_args(), kwargs_string))
 
+    def __call__(self, estimator, X, y_true, sample_weight=None):
+        """Evaluate predicted target values for X relative to y_true.
+
+        Parameters
+        ----------
+        estimator : object
+            Trained estimator to use for scoring. Must have a predict_proba
+            method; the output of that is used to compute the score.
+
+        X : array-like or sparse matrix
+            Test data that will be fed to estimator.predict.
+
+        y_true : array-like
+            Gold standard target values for X.
+
+        sample_weight : array-like, optional (default=None)
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Score function applied to prediction of estimator on X.
+        """
+        return self._score(partial(_cached_call, None), estimator, X, y_true,
+                           sample_weight=sample_weight)
+
     def _factory_args(self):
         """Return non-default make_scorer arguments for repr."""
         return ""
 
 
 class _PredictScorer(_BaseScorer):
-    def __call__(self, estimator, X, y_true, sample_weight=None):
+    def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
         """Evaluate predicted target values for X relative to y_true.
 
         Parameters
         ----------
+        method_caller : callable
+            Returns predictions given an estimator, method name, and other
+            arguments, potentially caching results.
+
         estimator : object
             Trained estimator to use for scoring. Must have a predict_proba
             method; the output of that is used to compute the score.
@@ -87,8 +193,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
         score : float
             Score function applied to prediction of estimator on X.
         """
-
-        y_pred = estimator.predict(X)
+        y_pred = method_caller(estimator, "predict", X)
         if sample_weight is not None:
             return self._sign * self._score_func(y_true, y_pred,
                                                  sample_weight=sample_weight,
@@ -99,11 +204,15 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
 
 
 class _ProbaScorer(_BaseScorer):
-    def __call__(self, clf, X, y, sample_weight=None):
+    def _score(self, method_caller, clf, X, y, sample_weight=None):
         """Evaluate predicted probabilities for X relative to y_true.
 
         Parameters
         ----------
+        method_caller : callable
+            Returns predictions given an estimator, method name, and other
+            arguments, potentially caching results.
+
         clf : object
             Trained classifier to use for scoring. Must have a predict_proba
             method; the output of that is used to compute the score.
@@ -124,7 +233,7 @@ def __call__(self, clf, X, y, sample_weight=None):
             Score function applied to prediction of estimator on X.
         """
         y_type = type_of_target(y)
-        y_pred = clf.predict_proba(X)
+        y_pred = method_caller(clf, "predict_proba", X)
         if y_type == "binary":
             if y_pred.shape[1] == 2:
                 y_pred = y_pred[:, 1]
@@ -145,11 +254,15 @@ def _factory_args(self):
 
 
 class _ThresholdScorer(_BaseScorer):
-    def __call__(self, clf, X, y, sample_weight=None):
+    def _score(self, method_caller, clf, X, y, sample_weight=None):
         """Evaluate decision function output for X relative to y_true.
 
         Parameters
         ----------
+        method_caller : callable
+            Returns predictions given an estimator, method name, and other
+            arguments, potentially caching results.
+
         clf : object
             Trained classifier to use for scoring. Must have either a
             decision_function method or a predict_proba method; the output of
@@ -176,17 +289,17 @@ def __call__(self, clf, X, y, sample_weight=None):
             raise ValueError("{0} format is not supported".format(y_type))
 
         if is_regressor(clf):
-            y_pred = clf.predict(X)
+            y_pred = method_caller(clf, "predict", X)
         else:
             try:
-                y_pred = clf.decision_function(X)
+                y_pred = method_caller(clf, "decision_function", X)
 
                 # For multi-output multi-class estimator
                 if isinstance(y_pred, list):
                     y_pred = np.vstack([p for p in y_pred]).T
 
             except (NotImplementedError, AttributeError):
-                y_pred = clf.predict_proba(X)
+                y_pred = method_caller(clf, "predict_proba", X)
 
                 if y_type == "binary":
                     if y_pred.shape[1] == 2:
 
@@ -3,11 +3,13 @@
 import shutil
 import os
 import numbers
+from unittest.mock import Mock
 
 import numpy as np
 import pytest
 import joblib
 
+from numpy.testing import assert_allclose
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import ignore_warnings
@@ -18,10 +20,11 @@
                              jaccard_score)
 from sklearn.metrics import cluster as cluster_module
 from sklearn.metrics.scorer import (check_scoring, _PredictScorer,
-                                    _passthrough_scorer)
+                                    _passthrough_scorer, _MultimetricScorer)
 from sklearn.metrics import accuracy_score
 from sklearn.metrics.scorer import _check_multimetric_scoring
 from sklearn.metrics import make_scorer, get_scorer, SCORERS
+from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import LinearSVC
 from sklearn.pipeline import make_pipeline
 from sklearn.cluster import KMeans
@@ -546,3 +549,112 @@ def test_scoring_is_not_metric():
         check_scoring(Ridge(), r2_score)
     with pytest.raises(ValueError, match='make_scorer'):
         check_scoring(KMeans(), cluster_module.adjusted_rand_score)
+
+
+@pytest.mark.parametrize(
+    ("scorers,expected_predict_count,"
+     "expected_predict_proba_count,expected_decision_func_count"),
+    [({'a1': 'accuracy', 'a2': 'accuracy',
+       'll1': 'neg_log_loss', 'll2': 'neg_log_loss',
+        'ra1': 'roc_auc', 'ra2': 'roc_auc'}, 1, 1, 1),
+     (['roc_auc', 'accuracy'], 1, 0, 1),
+     (['neg_log_loss', 'accuracy'], 1, 1, 0)])
+def test_multimetric_scorer_calls_method_once(scorers, expected_predict_count,
+                                              expected_predict_proba_count,
+                                              expected_decision_func_count):
+    X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])
+
+    mock_est = Mock()
+    fit_func = Mock(return_value=mock_est)
+    predict_func = Mock(return_value=y)
+
+    pos_proba = np.random.rand(X.shape[0])
+    proba = np.c_[1 - pos_proba, pos_proba]
+    predict_proba_func = Mock(return_value=proba)
+    decision_function_func = Mock(return_value=pos_proba)
+
+    mock_est.fit = fit_func
+    mock_est.predict = predict_func
+    mock_est.predict_proba = predict_proba_func
+    mock_est.decision_function = decision_function_func
+
+    scorer_dict, _ = _check_multimetric_scoring(LogisticRegression(), scorers)
+    multi_scorer = _MultimetricScorer(**scorer_dict)
+    results = multi_scorer(mock_est, X, y)
+
+    assert set(scorers) == set(results)  # compare dict keys
+
+    assert predict_func.call_count == expected_predict_count
+    assert predict_proba_func.call_count == expected_predict_proba_count
+    assert decision_function_func.call_count == expected_decision_func_count
+
+
+def test_multimetric_scorer_calls_method_once_classifier_no_decision():
+    predict_proba_call_cnt = 0
+
+    class MockKNeighborsClassifier(KNeighborsClassifier):
+        def predict_proba(self, X):
+            nonlocal predict_proba_call_cnt
+            predict_proba_call_cnt += 1
+            return super().predict_proba(X)
+
+    X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])
+
+    # no decision function
+    clf = MockKNeighborsClassifier(n_neighbors=1)
+    clf.fit(X, y)
+
+    scorers = ['roc_auc', 'neg_log_loss']
+    scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
+    scorer = _MultimetricScorer(**scorer_dict)
+    scorer(clf, X, y)
+
+    assert predict_proba_call_cnt == 1
+
+
+def test_multimetric_scorer_calls_method_once_regressor_threshold():
+    predict_called_cnt = 0
+
+    class MockDecisionTreeRegressor(DecisionTreeRegressor):
+        def predict(self, X):
+            nonlocal predict_called_cnt
+            predict_called_cnt += 1
+            return super().predict(X)
+
+    X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])
+
+    # no decision function
+    clf = MockDecisionTreeRegressor()
+    clf.fit(X, y)
+
+    scorers = {'neg_mse': 'neg_mean_squared_error', 'r2': 'roc_auc'}
+    scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
+    scorer = _MultimetricScorer(**scorer_dict)
+    scorer(clf, X, y)
+
+    assert predict_called_cnt == 1
+
+
+def test_multimetric_scorer_sanity_check():
+    # scoring dictionary returned is the same as calling each scorer seperately
+    scorers = {'a1': 'accuracy', 'a2': 'accuracy',
+               'll1': 'neg_log_loss', 'll2': 'neg_log_loss',
+               'ra1': 'roc_auc', 'ra2': 'roc_auc'}
+
+    X, y = make_classification(random_state=0)
+
+    clf = DecisionTreeClassifier()
+    clf.fit(X, y)
+
+    scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
+    multi_scorer = _MultimetricScorer(**scorer_dict)
+
+    result = multi_scorer(clf, X, y)
+
+    seperate_scores = {
+        name: get_scorer(name)(clf, X, y)
+        for name in ['accuracy', 'neg_log_loss', 'roc_auc']}
+
+    for key, value in result.items():
+        score_name = scorers[key]
+        assert_allclose(value, seperate_scores[score_name])