10000 ENH add new common test: `test_negative_sample_weight_support` and `allow_negative_sample_weight` tag by simonandras · Pull Request #21132 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

ENH add new common test: test_negative_sample_weight_support and allow_negative_sample_weight tag #21132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

16 changes: 16 additions & 0 deletions doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ Changelog
Setting a transformer to "passthrough" will pass the features unchanged.
:pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.

:mod:`sklearn.tests`
.......................

- |Enhancement| Added a common test: :func:`test_negative_sample_weight_support`,
which checks all estimators with `sample_weight` parameter in `fit`
in `_tested_estimators()` in case of using negative
`sample_weight` in `fit`. Added a new tag: `"allow_negative_sample_weight"`
for the test. If the estimator supports the negative case
(default tag is `True`) there should be no error in case of negative values
in `sample_weight` in `fit`. Otherwise a given error have to be raised.
The following estimators raised other, misleading error. They are updated:
:class:`BayesianRidge`, :class:`KernelRidge`,
:class:`Ridge`, :class:`RidgeClassifier`,
:class:`ElasticNetCV`, :class:`LassoCV`.
:pr:`21132` by :user:`András Simon <simonandras>`.

Code and Documentation Contributors
-----------------------------------

Expand Down
14 changes: 14 additions & 0 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,13 @@ def _sk_visual_block_(self):
final_estimator = self.final_estimator
return super()._sk_visual_block_(final_estimator)

def _more_tags(self):
for est in self.estimators:
if not est[1]._get_tags()["allow_negative_sample_weight"]:
return {"allow_negative_sample_weight": False}
else:
return {"allow_negative_sample_weight": True}


class StackingRegressor(RegressorMixin, _BaseStacking):
"""Stack of estimators with a final regressor.
Expand Down Expand Up @@ -778,3 +785,10 @@ def _sk_visual_block_(self):
else:
final_estimator = self.final_estimator
return super()._sk_visual_block_(final_estimator)

def _more_tags(self):
for est in self.estimators:
if not est[1]._get_tags()["allow_negative_sample_weight"]:
return {"allow_negative_sample_weight": False}
else:
return {"allow_negative_sample_weight": True}
14 changes: 14 additions & 0 deletions sklearn/ensemble/_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,13 @@ class labels predicted by each classifier.
else:
return self._predict(X)

def _more_tags(self):
for est in self.estimators:
if not est[1]._get_tags()["allow_negative_sample_weight"]:
return {"allow_negative_sample_weight": False}
else:
return {"allow_negative_sample_weight": True}


class VotingRegressor(RegressorMixin, _BaseVoting):
"""Prediction voting regressor for unfitted estimators.
Expand Down Expand Up @@ -562,3 +569,10 @@ def transform(self, X):
"""
check_is_fitted(self)
return self._predict(X)

def _more_tags(self):
for est in self.estimators:
if not est[1]._get_tags()["allow_negative_sample_weight"]:
return {"allow_negative_sample_weight": False}
else:
return {"allow_negative_sample_weight": True}
3 changes: 3 additions & 0 deletions sklearn/ensemble/_weight_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ def feature_importances_(self):
"feature_importances_ attribute"
) from e

def _more_tags(self):
return {"allow_negative_sample_weight": False}


def _samme_proba(estimator, n_classes, X):
"""Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
Expand Down
9 changes: 7 additions & 2 deletions sklearn/kernel_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ def _get_kernel(self, X, Y=None):
return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)

def _more_tags(self):
return {"pairwise": self.kernel == "precomputed"}
return {
"pairwise": self.kernel == "precomputed",
"allow_negative_sample_weight": False,
}

# TODO: Remove in 1.1
# mypy error: Decorated property not supported
Expand Down Expand Up @@ -192,7 +195,9 @@ def fit(self, X, y, sample_weight=None):
X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True
)
if sample_weight is not None and not isinstance(sample_weight, float):
sample_weight = _check_sample_weight(sample_weight, X)
sample_weight = _check_sample_weight(
sample_weight, X, only_non_negative=True
)

K = self._get_kernel(X)
alpha = np.atleast_1d(self.alpha)
Expand Down
3 changes: 3 additions & 0 deletions sklearn/linear_model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,9 @@ def rmatvec(b):
self._set_intercept(X_offset, y_offset, X_scale)
return self

def _more_tags(self):
return {"allow_negative_sample_weight": False}


def _check_precomputed_gram_matrix(
X, precompute, X_offset, X_scale, rtol=1e-7, atol=1e-5
Expand Down
7 changes: 6 additions & 1 deletion sklearn/linear_model/_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,9 @@ def fit(self, X, y, sample_weight=None):
X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True)

if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
sample_weight = _check_sample_weight(
sample_weight, X, dtype=X.dtype, only_non_negative=True
)

X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
X,
Expand Down Expand Up @@ -424,6 +426,9 @@ def _log_marginal_likelihood(

return score

def _more_tags(self):
return {"allow_negative_sample_weight": False}


###############################################################################
# ARD (Automatic Relevance Determination) regression
Expand Down
7 changes: 5 additions & 2 deletions sklearn/linear_model/_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,9 @@ def fit(self, X, y, sample_weight=None):
if sample_weight is not None:
if sparse.issparse(X):
raise ValueError("Sample weights do not (yet) support sparse matrices.")
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
sample_weight = _check_sample_weight(
sample_weight, X, dtype=X.dtype, only_non_negative=True
)

model = self._get_estimator()

Expand Down Expand Up @@ -1731,7 +1733,8 @@ def _more_tags(self):
"check_sample_weights_invariance": (
"zero sample_weight is not equivalent to removing samples"
),
}
},
"allow_negative_sample_weight": False,
}


Expand Down
5 changes: 4 additions & 1 deletion sklearn/linear_model/_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,5 +615,8 @@ def _more_tags(self):
"check_sample_weights_invariance": (
"zero sample_weight is not equivalent to removing samples"
),
}
},
"allow_negative_sample_weight": self.base_estimator._get_tags()[
"allow_negative_sample_weight"
],
}
15 changes: 14 additions & 1 deletion sklearn/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,9 @@ def fit(self, X, y, sample_weight=None):
solver = self.solver

if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
sample_weight = _check_sample_weight(
sample_weight, X, dtype=X.dtype, only_non_negative=True
)

# when X is sparse we only remove offset from y
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
Expand Down Expand Up @@ -807,6 +809,9 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
return {"allow_negative_sample_weight": False}


class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
"""Linear least squares with l2 regularization.
Expand Down Expand Up @@ -1953,6 +1958,11 @@ def fit(self, X, y, sample_weight=None):
cross-validation takes the sample weights into account when computing
the validation score.
"""
if sample_weight is not None:
sample_weight = _check_sample_weight(
sample_weight, X, only_non_negative=True
)

cv = self.cv
if cv is None:
estimator = _RidgeGCV(
Expand Down Expand Up @@ -2001,6 +2011,9 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
return {"allow_negative_sample_weight": False}


class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
"""Ridge regression with built-in cross-validation.
Expand Down
7 changes: 6 additions & 1 deletion sklearn/multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,12 @@ def predict(self, X):
return np.asarray(y).T

def _more_tags(self):
return {"multioutput_only": True}
return {
"multioutput_only": True,
"allow_negative_sample_weight": self.estimator._get_tags()[
"allow_negative_sample_weight"
],
}


class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
Expand Down
3 changes: 2 additions & 1 deletion sklearn/neighbors/_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,5 +327,6 @@ def _more_tags(self):
"check_sample_weights_invariance": (
"sample_weight must have positive values"
),
}
},
"allow_negative_sample_weight": False,
}
26 changes: 26 additions & 0 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,15 @@
from sklearn.pipeline import make_pipeline

from sklearn.utils import IS_PYPY
from sklearn.utils.validation import has_fit_parameter
from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags
from sklearn.utils._testing import (
SkipTest,
set_random_state,
)
from sklearn.utils.estimator_checks import (
_construct_instance,
_create_data_for_fit,
_set_checking_parameters,
_get_check_estimator_ids,
check_class_weight_balanced_linear_classifier,
Expand Down Expand Up @@ -331,6 +333,30 @@ def test_check_n_features_in_after_fitting(estimator):
check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)


@pytest.mark.parametrize(
"estimator", _tested_estimators(), ids=_get_check_estimator_ids
)
def test_negative_sample_weight_support(estimator):
"""
In `_tested_estimators()`, where `fit` has `sample_weight` parameter
all (`1darray`, `2darray`) `X_types` tags are supported in
`_create_data_for_fit`. If new estimators will be added to this
list with other `X_types` tags, then the `_create_data_for_fit`
function have to be updated with the new cases.
"""

if has_fit_parameter(estimator, "sample_weight"):
data = _create_data_for_fit(estimator=estimator, create_sample_weight=True)
# sample_weight was np.ones(30)
data["sample_weight"][0] = -1
if not estimator._get_tags()["allow_negative_sample_weight"]:
err_msg = "Negative values in data passed to `sample_weight`"
with pytest.raises(ValueError, match=err_msg):
estimator.fit(**data)
else:
estimator.fit(**data)


# NOTE: When running `check_dataframe_column_names_consistency` on a meta-estimator that
# delegates validation to a base estimator, the check is testing that the base estimator
# is checking for column name consistency.
Expand Down
1 change: 1 addition & 0 deletions sklearn/utils/_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"preserves_dtype": [np.float64],
"requires_y": False,
"pairwise": False,
"allow_negative_sample_weight": True,
}


Expand Down
42 changes: 42 additions & 0 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from ..metrics import accuracy_score, adjusted_rand_score, f1_score
from ..random_projection import BaseRandomProjection
from ..feature_selection import SelectKBest
from ..multioutput import MultiOutputClassifier, MultiOutputRegressor
from ..pipeline import make_pipeline
from ..exceptions import DataConversionWarning
from ..exceptions import NotFittedError
Expand Down Expand Up @@ -398,6 +399,47 @@ def _construct_instance(Estimator):
return estimator


def _create_data_for_fit(estimator, create_sample_weight=False):
"""
Supported `X_types` tags: `1darray`, `2darray`

Parameters
----------
estimator : estimator instance generated by `_construct_instance`.

create_sample_weight : bool, weather or not to add sample_weights
to the result dict

Returns
-------
data : dict, where estimator.fit(**data) can be used for testing
"""
instance_tags = estimator._get_tags()

data = {}
if "2darray" in instance_tags["X_types"]:
data["X"] = np.ones((30, 2))
elif "1darray" in instance_tags["X_types"]:
data["X"] = np.ones((30, 1))
else:
raise ValueError("X_type not supported")

if instance_tags["requires_y"]:
# 2 classes
if isinstance(estimator, MultiOutputClassifier):
# Y is capital only in this case
data["Y"] = np.concatenate((np.ones((15, 2)), 2 * np.ones((15, 2))))
elif isinstance(estimator, MultiOutputRegressor):
data["y"] = np.concatenate((np.ones((15, 2)), 2 * np.ones((15, 2))))
else:
data["y"] = np.concatenate((np.ones(15), 2 * np.ones(15)))

if create_sample_weight:
data["sample_weight"] = np.ones(30)

return data


def _maybe_mark_xfail(estimator, check, pytest):
# Mark (estimator, check) pairs as XFAIL if needed (see conditions in
# _should_be_skipped_or_marked())
Expand Down
0