8000 ENH: Add support for Multimetric BayesSearchCV by QuentinSoubeyran · Pull Request #1062 · scikit-optimize/scikit-optimize · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Feb 28, 2024. It is now read-only.

ENH: Add support for Multimetric BayesSearchCV #1062

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
*.py[co]
*~
.ipynb_checkpoints/
venv
.venv
.python-version
# created by installing scikit-learn from git
src/*
build/
Expand All @@ -19,3 +22,6 @@ doc/auto_examples

# vim users
.*.swp

# vscode users
.vscode
6 changes: 6 additions & 0 deletions doc/whats_new/v0.9.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,9 @@ Version 0.9.0
for scikit-learn >= 1.0. :pr:`1063`
- Minor documentation improvements.
- Various small bugs and fixes.
:pr:`988`

Version 0.9.1
=============
- |Feature| Add support for multimetric scoring to :obj:`skopt.searchcv.BayesSearchCV`.
:pr:`1062`
138 changes: 113 additions & 25 deletions skopt/searchcv.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,25 @@
import warnings

try:
from collections.abc import Sized
except ImportError:
from collections import Sized

import numpy as np
from scipy.stats import rankdata

from sklearn.model_selection._search import BaseSearchCV
from sklearn.utils import check_random_state

from sklearn.utils.validation import check_is_fitted
try:
from sklearn.metrics import check_scoring
except ImportError:
from sklearn.metrics.scorer import check_scoring

from . import Optimizer
from .utils import point_asdict, dimensions_aslist, eval_callbacks
from .space import check_dimension
from .callbacks import check_callback


def _get_score_names(cv_results, *, kind="test"):
prefix = f"mean_{kind}_"
return {key[len(prefix):]
for key in cv_results.keys()
if key.startswith(prefix)}

class BayesSearchCV(BaseSearchCV):
"""Bayesian optimization over hyper parameters.

Expand Down Expand Up @@ -78,11 +75,24 @@ class BayesSearchCV(BaseSearchCV):
``{'base_estimator': 'RF'}`` would use a Random Forest surrogate
instead of the default Gaussian Process.

scoring : string, callable or None, default=None
A string (see model evaluation documentation) or
a scorer callable object / function with signature
``scorer(estimator, X, y)``.
If ``None``, the ``score`` method of the estimator is used.
scoring : str, callable, list, tuple or dict, default=None
Strategy to evaluate the performance of the cross-validated model on
the test set. If ``None``, the ``score`` method of the estimator is
used.

If `scoring` represents a single score, one can use:

- a single string (see :ref:`scoring_parameter`);
- a callable (see :ref:`scoring`) that returns a single value.

If `scoring` represents multiple scores, one can use:

- a list or tuple of unique strings;
- a callable returning a dictionary where the keys are the metric
names and the values are the metric scores;
- a dictionary with metric names as keys and callables a values.

Callables must have the signature ``scorer(estimator, X, y=None)``

fit_params : dict, optional
Parameters to pass to the fit method.
Expand Down Expand Up @@ -124,10 +134,14 @@ class BayesSearchCV(BaseSearchCV):
either binary or multiclass, :class:`StratifiedKFold` is used. In all
other cases, :class:`KFold` is used.

refit : boolean, default=True
refit : bool, str, default=True
Refit the best estimator with the entire dataset.
If "False", it is impossible to make predictions using
this RandomizedSearchCV instance after fitting.
this BayesSearchCV instance after fitting.

For multiple metric evaluation, this needs to be a `str` denoting the
scorer that would be used to direct the optimization process, and find
the best parameters for refitting the estimator at the end.

verbose : integer
Controls the verbosity: the higher, the more messages.
Expand Down Expand Up @@ -258,13 +272,21 @@ class BayesSearchCV(BaseSearchCV):
n_splits_ : int
The number of cross-validation splits (folds/iterations).

refit_time_ : float
Seconds used for refitting the best model on the whole dataset.

This is present only if ``refit`` is not False.

multimetric_ : bool
Whether or not the scorers compute several metrics.

Notes
-----
The parameters selected are those that maximize the score of the held-out
data, according to the scoring parameter.

If `n_jobs` was set to a value higher than one, the data is copied for each
parameter setting(and not `n_jobs` times). This is done for efficiency
parameter setting (and not `n_jobs` times). This is done for efficiency
reasons if individual jobs take very little time, but may raise errors if
the dataset is large and not enough memory is available. A workaround in
this case is to set `pre_dispatch`. Then, the memory is copied only
Expand Down Expand Up @@ -393,7 +415,8 @@ def _make_optimizer(self, params_space):

return optimizer

def _step(self, search_space, optimizer, evaluate_candidates, n_points=1):
def _step(self, search_space, optimizer, score_name,
evaluate_candidates, n_points=1):
"""Generate n_jobs parameters and evaluate them in parallel.
"""
# get parameter values to evaluate
Expand All @@ -406,10 +429,38 @@ def _step(self, search_space, optimizer, evaluate_candidates, n_points=1):
params_dict = [point_asdict(search_space, p) for p in params]

all_results = evaluate_candidates(params_dict)

# if self.scoring is a callable, we have to wait until here
# to get the score name
if score_name is None:
score_names = _get_score_names(all_results)
if len(score_names) > 1:
# multimetric case
# early check to fail before lengthy computations, as
# BaseSearchCV only performs this check *after* _run_search
self._check_refit_for_multimetric(score_names)
score_name = f"mean_test_{self.refit}"
elif len(score_names) == 1:
# single metric, or a callable self.scoring returning a dict
# with a single value
# In both case, we just use the score that is available
score_name = f"mean_test_{score_names.pop()}"
else:
# failsafe, shouldn't happen
raise ValueError(
"No score was detected after fitting. This is probably "
"due to a callable 'scoring' returning an empty dict."
)

# Feed the point and objective value back into optimizer
# Optimizer minimizes objective, hence provide negative score
local_results = all_results["mean_test_score"][-len(params):]
return optimizer.tell(params, [-score for score in local_results])
local_results = all_results[score_name][-len(params):]
# return the score_name to cache it if callable refit
# this avoids checking self.refit all the time
return (
optimizer.tell(params, [-score for score in local_results]),
score_name
)

@property
def total_iterations(self):
Expand Down Expand Up @@ -463,14 +514,24 @@ def fit(self, X, y=None, *, groups=None, callback=None, **fit_params):
else:
self.optimizer_kwargs_ = dict(self.optimizer_kwargs)

if callable(self.refit):
raise ValueError("BayesSearchCV doesn't support a callable refit, "
"as it doesn't define an implicit score to "
"optimize")

super().fit(X=X, y=y, groups=groups, **fit_params)

# BaseSearchCV never ranked train scores,
# but apparently we used to ship this (back-compat)
if self.return_train_score:
self.cv_results_["rank_train_score"] = \
rankdata(-np.array(self.cv_results_["mean_train_score"]),
method='min').astype(int)
for score in _get_score_names(self.cv_results_, kind="train"):
self.cv_results_[f"rank_train_{score}"] = (
rankdata(
-np.array(self.cv_results_[f"mean_train_{score}"]),
method='min'
)
.astype(int)
)
return self

def _run_search(self, evaluate_candidates):
Expand All @@ -484,6 +545,16 @@ def _run_search(self, evaluate_candidates):
random_state = check_random_state(self.random_state)
self.optimizer_kwargs_['random_state'] = random_state

# Adapted from BaseSearchCV fit() method
if callable(self.scoring):
# will be determined later
score_name = None
elif self.scoring is None or isinstance(self.scoring, str):
score_name = "mean_test_score"
else:
# proper checking took place before in BaseSearchCV.fit()
score_name = f"mean_test_{self.refit}"

# Instantiate optimizers for all the search spaces.
optimizers = []
for search_space in search_spaces:
Expand All @@ -509,12 +580,29 @@ def _run_search(self, evaluate_candidates):
# when n_iter < n_points points left for evaluation
n_points_adjusted = min(n_iter, n_points)

optim_result = self._step(
search_space, optimizer,
optim_result, score_name = self._step(
search_space, optimizer, score_name,
evaluate_candidates, n_points=n_points_adjusted
)
n_iter -= n_points

if eval_callbacks(callbacks, optim_result):
break
self._optim_results.append(optim_result)

def _check_refit_for_multimetric(self, scores):
"""Check `refit` is compatible with `scores` and valid"""
# override parent method to exclude False and callables
multimetric_refit_msg = (
"For multi-metric scoring, the 'refit' parameter must be set to a "
"scorer key, used to guide the bayesian optimization process "
"and refit an estimator with the best parameter settings on the "
"whole dataset (making the best_* attributes available for that "
f" metric). {self.refit!r} was passed."
)

is_refit_valid = (isinstance(self.refit, str) and
self.refit in scores)

if (not is_refit_valid):
raise ValueError(multimetric_refit_msg)
98 changes: 98 additions & 0 deletions skopt/tests/test_searchcv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@

import pytest

import sklearn as skl
from sklearn.datasets import load_iris, make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import clone
from sklearn.base import BaseEstimator
from packaging.version import parse as parse_version
from scipy.stats import rankdata
import numpy as np
from numpy.testing import assert_array_equal
Expand Down Expand Up @@ -463,3 +466,98 @@ def score(self, X, y):
)

model.fit(X, y)


def test_searchcv_multimetric_scoring():
# test that multi-metric scoring works as intened
# for BayesSearchCV
random_state = 42

X, y = make_classification(n_classes=2, random_state=random_state)
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=0.75, random_state=0
)
# test iterable scoring
opt = BayesSearchCV(
SVC(random_state=random_state),
{
'C': Real(1e-6, 1e+6, prior='log-uniform'),
'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
'degree': Integer(1, 8),
'kernel': Categorical(['linear', 'poly', 'rbf']),
},
scoring=["accuracy", "f1"],
refit="f1",
n_iter=11,
random_state=random_state
)
opt.fit(X_train, y_train)
y_pred = opt.predict(X_test)
assert f1_score(y_test, y_pred) > 0.9
assert (
len(opt.cv_results_["mean_test_accuracy"])
== len(opt.cv_results_["mean_test_f1"])
)

# test dict scoring
opt = BayesSearchCV(
SVC(random_state=random_state),
{
'C': Real(1e-6, 1e+6, prior='log-uniform'),
'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
'degree': Integer(1, 8),
'kernel': Categorical(['linear', 'poly', 'rbf']),
},
scoring={
"f1": "f1",
"accuracy": "accuracy",
},
refit="f1",
n_iter=11,
random_state=random_state
)
opt.fit(X_train, y_train)
y_pred = opt.predict(X_test)
assert f1_score(y_test, y_pred) > 0.9
assert (
len(opt.cv_results_["mean_test_accuracy"])
== len(opt.cv_results_["mean_test_f1"])
)


@pytest.mark.skipif(parse_version(skl.__version__) < parse_version("0.24"),
reason="requires sklearn>=0.24")
def test_searchcv_multimetric_callable_scoring():
random_state = 42

X, y = make_classification(n_classes=2, random_state=random_state)
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=0.75, random_state=0
)

# sample code taken from scikit-learn
def confusion_matrix_score(clf, X, y):
y_pred = clf.predict(X)
cm = confusion_matrix(y, y_pred)
return {'tn': cm[0, 0], 'fp': cm[0, 1],
'fn': cm[1, 0], 'tp': cm[1, 1]}

opt = BayesSearchCV(
SVC(random_state=random_state),
{
'C': Real(1e-6, 1e+6, prior='log-uniform'),
'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
'degree': Integer(1, 8),
'kernel': Categorical(['linear', 'poly', 'rbf']),
},
scoring=confusion_matrix_score,
refit="tp",
n_iter=11,
random_state=random_state
)
opt.fit(X_train, y_train)
assert confusion_matrix_score(opt, X_test, y_test)["tp"] > 0.9
assert (
len(opt.cv_results_["mean_test_tp"])
== len(opt.cv_results_["mean_test_fp"])
)
0