8000 [MRG] Multimetric GridSearch - Memoize prediction results (and address some previous comments) by raghavrv · Pull Request #9326 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] Multimetric GridSearch - Memoize prediction results (and address some previous comments) #9326

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
42 changes: 42 additions & 0 deletions sklearn/model_selection/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
from ..utils import indexable, check_random_state, safe_indexing
from ..utils.validation import _is_arraylike, _num_samples
from ..utils.metaestimators import _safe_split
from ..utils.metaestimators import if_delegate_has_method
from ..externals.joblib import Parallel, delayed, logger
from ..externals.six.moves import zip
from ..metrics.scorer import check_scoring, _check_multimetric_scoring
from ..metrics.scorer import _passthrough_scorer
from ..exceptions import FitFailedWarning
from ._split import check_cv
from ..preprocessing import LabelEncoder
Expand Down Expand Up @@ -521,10 +523,50 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
return score


class _MemoizedPredictEstimator:
def __init__(self, estimator):
self.estimator = estimator

def fit(self, X, y):
self.estimator.fit(X, y)

@if_delegate_has_method(delegate='estimator')
def predict(self, X):
if not hasattr(self, '_predictions'):
self._predictions = self.estimator.predict(X)
return self._predictions

@if_delegate_has_method(delegate='estimator')
def decision_function(self, X):
if not hasattr(self, '_decisions'):
self._decisions = self.estimator.decision_function(X)
return self._decisions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No predict_proba or predict_log_proba? Are you these call decidion_function internally? Probabilistic non-linear predictors will be among those most benefiting from memoization, and I don't think they tend to implement decision_function.

@if_delegate_has_method(delegate='estimator')
def predict_proba(self, X):
if not hasattr(self, '_probs'):
self._probs = self.estimator.predict_proba(X)
return self._probs

@if_delegate_has_method(delegate='estimator')
def predict_log_proba(self, X):
if not hasattr(self, '_log_probs'):
self._log_probs = self.estimator.predict_log_proba(X)
return self._log_probs

@if_delegate_has_method(delegate='estimator')
def score(self, *args, **kwargs):
return self.estimator.score(*args, **kwargs)


def _multimetric_score(estimator, X_test, y_test, scorers):
"""Return a dict of score for multimetric scoring"""
scores = {}

# Try wrapping the estimator in _MemoizedPredictEstimator
# If the estimator has a score, wrapping it will not do any harm
estimator = _MemoizedPredictEstimator(estimator)

for name, scorer in scorers.items():
if y_test is None:
score = scorer(estimator, X_test)
Expand Down
24 changes: 24 additions & 0 deletions sklearn/model_selection/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from sklearn.model_selection import learning_curve
from sklearn.model_selection import validation_curve
from sklearn.model_selection._validation import _check_is_permutation
from sklearn.model_selection._validation import _multimetric_score

from sklearn.datasets import make_regression
from sklearn.datasets import load_boston
Expand All @@ -50,6 +51,7 @@
from sklearn.metrics import precision_score
from sklearn.metrics import r2_score
from sklearn.metrics.scorer import check_scoring
from sklearn.metrics.scorer import _check_multimetric_scoring

from sklearn.linear_model import Ridge, LogisticRegression
from sklearn.linear_model import PassiveAggressiveClassifier
Expand Down Expand Up @@ -219,6 +221,17 @@ def get_params(self, deep=False):
return {'a': self.a, 'allow_nd': self.allow_nd}


class CountCallPredictedEstimator:
def __init__(self):
self._n_predict_calls = 0
self._rng = np.random.RandomState(0)
def fit(self, X, y):
return self
def predict(self, X):
self._n_predict_calls += 1
return self._rng.randint(0, 2, size=X.shape[0])


# XXX: use 2D array, since 1D X is being detected as a single sample in
# check_consistent_length
X = np.ones((10, 2))
Expand Down Expand Up @@ -1299,3 +1312,14 @@ def test_permutation_test_score_pandas():
check_series = lambda x: isinstance(x, TargetType)
clf = CheckingClassifier(check_X=check_df, check_y=check_series)
permutation_test_score(clf, X_df, y_ser)


def test_multiscore_memoizing():
# Check if memoizing works as expected in _multimetric_score
X, y = make_classification(n_samples=1000, random_state=0)
estimator = CountCallPredictedEstimator()
scorers, _ = _check_multimetric_scoring(estimator,
['neg_mean_squared_error',
'neg_median_absolute_error'])
scores = _multimetric_score(estimator, X, y, scorers=scorers)
assert estimator._n_predict_calls == 1
0