8000 [WIP] ENH Adds caching to multimetric scoring with a wrapper class by thomasjpfan · Pull Request #14261 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[WIP] ENH Adds caching to multimetric scoring with a wrapper class #14261

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ Changelog
plot model scalability (see learning_curve example).
:pr:`13938` by :user:`Hadrien Reboul <H4dr1en>`.

- |Efficiency| Multimetric scoring now caches predictions to avoid repeated
calls. :pr:`14261` by `Thomas Fan`_.

:mod:`sklearn.pipeline`
.......................

Expand Down
4 changes: 4 additions & 0 deletions sklearn/ensemble/voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,10 @@ def predict_proba(self):
"""
return self._predict_proba

@predict_proba.setter
def predict_proba(self, func):
self._predict_proba = func

def transform(self, X):
"""Return class labels or probabilities for X for each estimator.

Expand Down
8 changes: 8 additions & 0 deletions sklearn/linear_model/stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,10 @@ def predict_proba(self):
self._check_proba()
return self._predict_proba

@predict_proba.setter
def predict_proba(self, func):
self._predict_proba = func

def _predict_proba(self, X):
check_is_fitted(self, "t_")

Expand Down Expand Up @@ -1056,6 +1060,10 @@ def predict_log_proba(self):
self._check_proba()
return self._predict_log_proba

@predict_log_proba.setter
def predict_log_proba(self, func):
self._predict_log_proba = func

def _predict_log_proba(self, X):
return np.log(self.predict_proba(X))

Expand Down
41 changes: 36 additions & 5 deletions sklearn/model_selection/_validation.py
8000
Original file line numberDiff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import numbers
import time
from traceback import format_exception_only
from contextlib import contextmanager
from functools import partial

import numpy as np
import scipy.sparse as sp
Expand All @@ -24,7 +26,8 @@
_message_with_time)
from ..utils.validation import _is_arraylike, _num_samples
from ..utils.metaestimators import _safe_split
from ..metrics.scorer import check_scoring, _check_multimetric_scoring
from ..metrics.scorer import (check_scoring, _check_multimetric_scoring,
_BaseScorer)
from ..exceptions import FitFailedWarning
from ._split import check_cv
from ..preprocessing import LabelEncoder
Expand Down Expand Up @@ -596,15 +599,43 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
return score


@contextmanager
def _cache_estimator(estimator):

def _call_func(X, *args, name=None, func=None, cache=None, **kwargs):
try:
return cache[name, id(X)]
except KeyError:
result = func(X, *args, **kwargs)
cache[name, id(X)] = result
return result

cache = {}
names = ['predict', 'predict_proba', 'decision_function', 'score']
cache_funcs = {name: getattr(estimator, name) for name in names if
Copy link
Member
8000

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe name it orig_funcs

hasattr(estimator, name)}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well also check that it is callable??


# patch methods
for name, func in cache_funcs.items():
setattr(estimator, name,
partial(_call_func, name=name, func=func, cache=cache))
yield estimator

# place methods back
for name, func in cache_funcs.items():
setattr(estimator, name, func)


def _multimetric_score(estimator, X_test, y_test, scorers):
"""Return a dict of score for multimetric scoring"""
scores = {}

for name, scorer in scorers.items():
if y_test is None:
score = scorer(estimator, X_test)
else:
score = scorer(estimator, X_test, y_test)
with _cache_estimator(estimator) as cached_estimator:
if y_test is None:
score = scorer(cached_estimator, X_test)
else:
score = scorer(cached_estimator, X_test, y_test)

if hasattr(score, 'item'):
try:
Expand Down
22 changes: 21 additions & 1 deletion sklearn/model_selection/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import tempfile
import os
from time import sleep
from unittest.mock import Mock

import pytest
import numpy as np
Expand Down Expand Up @@ -76,7 +77,7 @@

from sklearn.model_selection.tests.common import OneTimeSplitter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection._validation import _cache_estimator

try:
WindowsError
Expand Down Expand Up @@ -1718,3 +1719,22 @@ def two_params_scorer(estimator, X_test):
fit_and_score_args = [None, None, None, two_params_scorer]
assert_raise_message(ValueError, error_message,
_score, *fit_and_score_args)


@pytest.mark.parametrize('func_name',
['predict', 'predict_proba',
'decision_function', 'score'])
def test_cached_estimator(func_name):
mock_est = Mock()
mock_func = getattr(mock_est, func_name)
mock_func.return_value = 42
X = np.array([[1]])

with _cache_estimator(mock_est) as cached_est:
# call func twice
func = getattr(cached_est, func_name)
assert func(X) == 42
assert func(X) == 42

# only called once
assert mock_func.call_count == 1
8 changes: 8 additions & 0 deletions sklearn/neighbors/lof.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,10 @@ def predict(self):

return self._predict

F438 @predict.setter
def predict(self, func):
self._predict = func

def _predict(self, X=None):
"""Predict the labels (1 inlier, -1 outlier) of X according to LOF.

Expand Down Expand Up @@ -363,6 +367,10 @@ def decision_function(self):

return self._decision_function

@decision_function.setter
def decision_function(self, func):
self._decision_function = func

def _decision_function(self, X):
"""Shifted opposite of the Local Outlier Factor of X.

Expand Down
8 changes: 8 additions & 0 deletions sklearn/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,10 @@ def partial_fit(self):
% self.solver)
return self._partial_fit

@partial_fit.setter
def partial_fit(self, func):
self._partial_fit = func

def _partial_fit(self, X, y):
return self._fit(X, y, incremental=True)

Expand Down Expand Up @@ -1040,6 +1044,10 @@ def partial_fit(self):
% self.solver)
return self._partial_fit

@partial_fit.setter
def partial_fit(self, func):
self._partial_fit = func

def _partial_fit(self, X, y, classes=None):
if _check_partial_fit_first_call(self, classes):
self._label_binarizer = LabelBinarizer()
Expand Down
8 changes: 8 additions & 0 deletions sklearn/svm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,10 @@ def predict_proba(self):
self._check_proba()
return self._predict_proba

@predict_proba.setter
def predict_proba(self, func):
self._predict_proba = func

def _predict_proba(self, X):
X = self._validate_for_predict(X)
if self.probA_.size == 0 or self.probB_.size == 0:
Expand Down Expand Up @@ -653,6 +657,10 @@ def predict_log_proba(self):
self._check_proba()
return self._predict_log_proba

@predict_log_proba.setter
def predict_log_proba(self, func):
self._predict_log_proba = func

def _predict_log_proba(self, X):
return np.log(self.predict_proba(X))

Expand Down
0