diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index f2962df4a74b8..07583e4491c3a 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -80,6 +80,9 @@ Changelog plot model scalability (see learning_curve example). :pr:`13938` by :user:`Hadrien Reboul `. +- |Efficiency| Multimetric scoring now caches predictions to avoid repeated + calls. :pr:`14261` by `Thomas Fan`_. + :mod:`sklearn.pipeline` ....................... diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py index 0b01340d4f1af..a028d35d138e6 100644 --- a/sklearn/ensemble/voting.py +++ b/sklearn/ensemble/voting.py @@ -337,6 +337,10 @@ def predict_proba(self): """ return self._predict_proba + @predict_proba.setter + def predict_proba(self, func): + self._predict_proba = func + def transform(self, X): """Return class labels or probabilities for X for each estimator. diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 625bdb5bdc3f9..9ac2b7f9241df 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -988,6 +988,10 @@ def predict_proba(self): self._check_proba() return self._predict_proba + @predict_proba.setter + def predict_proba(self, func): + self._predict_proba = func + def _predict_proba(self, X): check_is_fitted(self, "t_") @@ -1056,6 +1060,10 @@ def predict_log_proba(self): self._check_proba() return self._predict_log_proba + @predict_log_proba.setter + def predict_log_proba(self, func): + self._predict_log_proba = func + def _predict_log_proba(self, X): return np.log(self.predict_proba(X)) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index c5cf9e8ee59b6..6c37df7077887 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -14,6 +14,8 @@ import numbers import time from traceback import format_exception_only +from contextlib import contextmanager +from functools import partial import numpy as np import scipy.sparse as sp @@ -24,7 +26,8 @@ _message_with_time) from ..utils.validation import _is_arraylike, _num_samples from ..utils.metaestimators import _safe_split -from ..metrics.scorer import check_scoring, _check_multimetric_scoring +from ..metrics.scorer import (check_scoring, _check_multimetric_scoring, + _BaseScorer) from ..exceptions import FitFailedWarning from ._split import check_cv from ..preprocessing import LabelEncoder @@ -596,15 +599,43 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False): return score +@contextmanager +def _cache_estimator(estimator): + + def _call_func(X, *args, name=None, func=None, cache=None, **kwargs): + try: + return cache[name, id(X)] + except KeyError: + result = func(X, *args, **kwargs) + cache[name, id(X)] = result + return result + + cache = {} + names = ['predict', 'predict_proba', 'decision_function', 'score'] + cache_funcs = {name: getattr(estimator, name) for name in names if + hasattr(estimator, name)} + + # patch methods + for name, func in cache_funcs.items(): + setattr(estimator, name, + partial(_call_func, name=name, func=func, cache=cache)) + yield estimator + + # place methods back + for name, func in cache_funcs.items(): + setattr(estimator, name, func) + + def _multimetric_score(estimator, X_test, y_test, scorers): """Return a dict of score for multimetric scoring""" scores = {} for name, scorer in scorers.items(): - if y_test is None: - score = scorer(estimator, X_test) - else: - score = scorer(estimator, X_test, y_test) + with _cache_estimator(estimator) as cached_estimator: + if y_test is None: + score = scorer(cached_estimator, X_test) + else: + score = scorer(cached_estimator, X_test, y_test) if hasattr(score, 'item'): try: diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 6fa2e4fee5ed7..2c383f13a706d 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -5,6 +5,7 @@ import tempfile import os from time import sleep +from unittest.mock import Mock import pytest import numpy as np @@ -76,7 +77,7 @@ from sklearn.model_selection.tests.common import OneTimeSplitter from sklearn.model_selection import GridSearchCV - +from sklearn.model_selection._validation import _cache_estimator try: WindowsError @@ -1718,3 +1719,22 @@ def two_params_scorer(estimator, X_test): fit_and_score_args = [None, None, None, two_params_scorer] assert_raise_message(ValueError, error_message, _score, *fit_and_score_args) + + +@pytest.mark.parametrize('func_name', + ['predict', 'predict_proba', + 'decision_function', 'score']) +def test_cached_estimator(func_name): + mock_est = Mock() + mock_func = getattr(mock_est, func_name) + mock_func.return_value = 42 + X = np.array([[1]]) + + with _cache_estimator(mock_est) as cached_est: + # call func twice + func = getattr(cached_est, func_name) + assert func(X) == 42 + assert func(X) == 42 + + # only called once + assert mock_func.call_count == 1 diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py index a58997502be91..2fc40fdceafc4 100644 --- a/sklearn/neighbors/lof.py +++ b/sklearn/neighbors/lof.py @@ -296,6 +296,10 @@ def predict(self): return self._predict + @predict.setter + def predict(self, func): + self._predict = func + def _predict(self, X=None): """Predict the labels (1 inlier, -1 outlier) of X according to LOF. @@ -363,6 +367,10 @@ def decision_function(self): return self._decision_function + @decision_function.setter + def decision_function(self, func): + self._decision_function = func + def _decision_function(self, X): """Shifted opposite of the Local Outlier Factor of X. diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index e5325ecda69f0..c8b2fc5f8e489 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -664,6 +664,10 @@ def partial_fit(self): % self.solver) return self._partial_fit + @partial_fit.setter + def partial_fit(self, func): + self._partial_fit = func + def _partial_fit(self, X, y): return self._fit(X, y, incremental=True) @@ -1040,6 +1044,10 @@ def partial_fit(self): % self.solver) return self._partial_fit + @partial_fit.setter + def partial_fit(self, func): + self._partial_fit = func + def _partial_fit(self, X, y, classes=None): if _check_partial_fit_first_call(self, classes): self._label_binarizer = LabelBinarizer() diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 4a50ee479f030..428c6d96b5787 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -614,6 +614,10 @@ def predict_proba(self): self._check_proba() return self._predict_proba + @predict_proba.setter + def predict_proba(self, func): + self._predict_proba = func + def _predict_proba(self, X): X = self._validate_for_predict(X) if self.probA_.size == 0 or self.probB_.size == 0: @@ -653,6 +657,10 @@ def predict_log_proba(self): self._check_proba() return self._predict_log_proba + @predict_log_proba.setter + def predict_log_proba(self, func): + self._predict_log_proba = func + def _predict_log_proba(self, X): return np.log(self.predict_proba(X))