diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 7a5e92f5f960d..baa980c74b61f 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -282,6 +282,18 @@ Changelog :pr:`10805` by :user:`Mathias Andersen ` and :pr:`23471` by :user:`Meekail Zain ` +:mod:`sklearn.naive_bayes` +.......................... + +- |Enhancement| A new parameter `force_alpha` was added to + :class:`naive_bayes.BernoulliNB`, :class:`naive_bayes.ComplementNB`, + :class:`naive_bayes.CategoricalNB`, and :class:`naive_bayes.MultinomialNB`, + allowing user to set parameter alpha to a very small number, greater or equal + 0, which was earlier automatically changed to `1e-10` instead. + :pr:`16747` by :user:`arka204`, + :pr:`18805` by :user:`hongshaoyang`, + :pr:`22269` by :user:`Meekail Zain `. + Code and Documentation Contributors ----------------------------------- diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index f75c9435aadd1..ff8b0f1454301 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -30,7 +30,7 @@ from .utils.multiclass import _check_partial_fit_first_call from .utils.validation import check_is_fitted, check_non_negative from .utils.validation import _check_sample_weight -from .utils._param_validation import Interval +from .utils._param_validation import Interval, Hidden, StrOptions __all__ = [ "BernoulliNB", @@ -549,12 +549,14 @@ class _BaseDiscreteNB(_BaseNB): "alpha": [Interval(Real, 0, None, closed="left"), "array-like"], "fit_prior": ["boolean"], "class_prior": ["array-like", None], + "force_alpha": ["boolean", Hidden(StrOptions({"warn"}))], } - def __init__(self, alpha=1.0, fit_prior=True, class_prior=None): + def __init__(self, alpha=1.0, fit_prior=True, class_prior=None, force_alpha="warn"): self.alpha = alpha self.fit_prior = fit_prior self.class_prior = class_prior + self.force_alpha = force_alpha @abstractmethod def _count(self, X, Y): @@ -622,6 +624,7 @@ def _check_alpha(self): alpha = ( np.asarray(self.alpha) if not isinstance(self.alpha, Real) else self.alpha ) + alpha_min = np.min(alpha) if isinstance(alpha, np.ndarray): if not alpha.shape[0] == self.n_features_in_: raise ValueError( @@ -629,15 +632,26 @@ def _check_alpha(self): f"Got {alpha.shape[0]} elements instead of {self.n_features_in_}." ) # check that all alpha are positive - if np.min(alpha) < 0: + if alpha_min < 0: raise ValueError("All values in alpha must be greater than 0.") - alpha_min = 1e-10 - if np.min(alpha) < alpha_min: + alpha_lower_bound = 1e-10 + # TODO(1.4): Replace w/ deprecation of self.force_alpha + # See gh #22269 + _force_alpha = self.force_alpha + if _force_alpha == "warn" and alpha_min < alpha_lower_bound: + _force_alpha = False + warnings.warn( + "The default value for `force_alpha` will change to `True` in 1.4. To" + " suppress this warning, manually set the value of `force_alpha`.", + FutureWarning, + ) + if alpha_min < alpha_lower_bound and not _force_alpha: warnings.warn( "alpha too small will result in numeric errors, setting alpha =" - f" {alpha_min:.1e}" + f" {alpha_lower_bound:.1e}. Use `force_alpha=True` to keep alpha" + " unchanged." ) - return np.maximum(alpha, alpha_min) + return np.maximum(alpha, alpha_lower_bound) return alpha def partial_fit(self, X, y, classes=None, sample_weight=None): @@ -812,7 +826,16 @@ class MultinomialNB(_BaseDiscreteNB): ---------- alpha : float or array-like of shape (n_features,), default=1.0 Additive (Laplace/Lidstone) smoothing parameter - (0 for no smoothing). + (set alpha=0 and force_alpha=True, for no smoothing). + + force_alpha : bool, default=False + If False and alpha is less than 1e-10, it will set alpha to + 1e-10. If True, alpha will remain unchanged. This may cause + numerical errors if alpha is too close to 0. + + .. versionadded:: 1.2 + .. deprecated:: 1.2 + The default value of `force_alpha` will change to `True` in v1.4. fit_prior : bool, default=True Whether to learn class prior probabilities or not. @@ -881,15 +904,22 @@ class MultinomialNB(_BaseDiscreteNB): >>> X = rng.randint(5, size=(6, 100)) >>> y = np.array([1, 2, 3, 4, 5, 6]) >>> from sklearn.naive_bayes import MultinomialNB - >>> clf = MultinomialNB() + >>> clf = MultinomialNB(force_alpha=True) >>> clf.fit(X, y) - MultinomialNB() + MultinomialNB(force_alpha=True) >>> print(clf.predict(X[2:3])) [3] """ - def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None): - super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior) + def __init__( + self, *, alpha=1.0, force_alpha="warn", fit_prior=True, class_prior=None + ): + super().__init__( + alpha=alpha, + fit_prior=fit_prior, + class_prior=class_prior, + force_alpha=force_alpha, + ) def _more_tags(self): return {"requires_positive_X": True} @@ -928,7 +958,17 @@ class ComplementNB(_BaseDiscreteNB): Parameters ---------- alpha : float or array-like of shape (n_features,), default=1.0 - Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + Additive (Laplace/Lidstone) smoothing parameter + (set alpha=0 and force_alpha=True, for no smoothing). + + force_alpha : bool, default=False + If False and alpha is less than 1e-10, it will set alpha to + 1e-10. If True, alpha will remain unchanged. This may cause + numerical errors if alpha is too close to 0. + + .. versionadded:: 1.2 + .. deprecated:: 1.2 + The default value of `force_alpha` will change to `True` in v1.4. fit_prior : bool, default=True Only used in edge case with a single class in the training set. @@ -1005,9 +1045,9 @@ class ComplementNB(_BaseDiscreteNB): >>> X = rng.randint(5, size=(6, 100)) >>> y = np.array([1, 2, 3, 4, 5, 6]) >>> from sklearn.naive_bayes import ComplementNB - >>> clf = ComplementNB() + >>> clf = ComplementNB(force_alpha=True) >>> clf.fit(X, y) - ComplementNB() + ComplementNB(force_alpha=True) >>> print(clf.predict(X[2:3])) [3] """ @@ -1017,8 +1057,21 @@ class ComplementNB(_BaseDiscreteNB): "norm": ["boolean"], } - def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None, norm=False): - super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior) + def __init__( + self, + *, + alpha=1.0, + force_alpha="warn", + fit_prior=True, + class_prior=None, + norm=False, + ): + super().__init__( + alpha=alpha, + force_alpha=force_alpha, + fit_prior=fit_prior, + class_prior=class_prior, + ) self.norm = norm def _more_tags(self): @@ -1064,7 +1117,16 @@ class BernoulliNB(_BaseDiscreteNB): ---------- alpha : float or array-like of shape (n_features,), default=1.0 Additive (Laplace/Lidstone) smoothing parameter - (0 for no smoothing). + (set alpha=0 and force_alpha=True, for no smoothing). + + force_alpha : bool, default=False + If False and alpha is less than 1e-10, it will set alpha to + 1e-10. If True, alpha will remain unchanged. This may cause + numerical errors if alpha is too close to 0. + + .. versionadded:: 1.2 + .. deprecated:: 1.2 + The default value of `force_alpha` will change to `True` in v1.4. binarize : float or None, default=0.0 Threshold for binarizing (mapping to booleans) of sample features. @@ -1144,9 +1206,9 @@ class BernoulliNB(_BaseDiscreteNB): >>> X = rng.randint(5, size=(6, 100)) >>> Y = np.array([1, 2, 3, 4, 4, 5]) >>> from sklearn.naive_bayes import BernoulliNB - >>> clf = BernoulliNB() + >>> clf = BernoulliNB(force_alpha=True) >>> clf.fit(X, Y) - BernoulliNB() + BernoulliNB(force_alpha=True) >>> print(clf.predict(X[2:3])) [3] """ @@ -1156,8 +1218,21 @@ class BernoulliNB(_BaseDiscreteNB): "binarize": [None, Interval(Real, 0, None, closed="left")], } - def __init__(self, *, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None): - super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior) + def __init__( + self, + *, + alpha=1.0, + force_alpha="warn", + binarize=0.0, + fit_prior=True, + class_prior=None, + ): + super().__init__( + alpha=alpha, + fit_prior=fit_prior, + class_prior=class_prior, + force_alpha=force_alpha, + ) self.binarize = binarize def _check_X(self, X): @@ -1219,7 +1294,16 @@ class CategoricalNB(_BaseDiscreteNB): ---------- alpha : float, default=1.0 Additive (Laplace/Lidstone) smoothing parameter - (0 for no smoothing). + (set alpha=0 and force_alpha=True, for no smoothing). + + force_alpha : bool, default=False + If False and alpha is less than 1e-10, it will set alpha to + 1e-10. If True, alpha will remain unchanged. This may cause + numerical errors if alpha is too close to 0. + + .. versionadded:: 1.2 + .. deprecated:: 1.2 + The default value of `force_alpha` will change to `True` in v1.4. fit_prior : bool, default=True Whether to learn class prior probabilities or not. @@ -1301,9 +1385,9 @@ class CategoricalNB(_BaseDiscreteNB): >>> X = rng.randint(5, size=(6, 100)) >>> y = np.array([1, 2, 3, 4, 5, 6]) >>> from sklearn.naive_bayes import CategoricalNB - >>> clf = CategoricalNB() + >>> clf = CategoricalNB(force_alpha=True) >>> clf.fit(X, y) - CategoricalNB() + CategoricalNB(force_alpha=True) >>> print(clf.predict(X[2:3])) [3] """ @@ -1319,9 +1403,20 @@ class CategoricalNB(_BaseDiscreteNB): } def __init__( - self, *, alpha=1.0, fit_prior=True, class_prior=None, min_categories=None + self, + *, + alpha=1.0, + force_alpha="warn", + fit_prior=True, + class_prior=None, + min_categories=None, ): - super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior) + super().__init__( + alpha=alpha, + force_alpha=force_alpha, + fit_prior=fit_prior, + class_prior=class_prior, + ) self.min_categories = min_categories def fit(self, X, y, sample_weight=None): diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index cb404b4c73199..216d2b0fc8d96 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -71,7 +71,7 @@ def test_calibration(data, method, ensemble): X_test, y_test = X[n_samples:], y[n_samples:] # Naive-Bayes - clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train) + clf = MultinomialNB(force_alpha=True).fit(X_train, y_train, sample_weight=sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] cal_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble) @@ -322,7 +322,7 @@ def test_calibration_prefit(): X_test, y_test = X[2 * n_samples :], y[2 * n_samples :] # Naive-Bayes - clf = MultinomialNB() + clf = MultinomialNB(force_alpha=True) # Check error if clf not prefit unfit_clf = CalibratedClassifierCV(clf, cv="prefit") with pytest.raises(NotFittedError): diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 2515b297aa153..519bd94bfa218 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -268,6 +268,14 @@ def test_fit_docstring_attributes(name, Estimator): est.set_params(n_init="auto") # TODO(1.4): TO BE REMOVED for 1.4 (avoid FutureWarning) + if Estimator.__name__ in ( + "MultinomialNB", + "ComplementNB", + "BernoulliNB", + "CategoricalNB", + ): + est.set_params(force_alpha=True) + if Estimator.__name__ == "QuantileRegressor": solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point" est.set_params(solver=solver) diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 8f12c5ac90354..342d76d357263 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -41,6 +41,9 @@ from sklearn import datasets from sklearn.datasets import load_breast_cancer +msg = "The default value for `force_alpha` will change" +pytestmark = pytest.mark.filterwarnings(f"ignore:{msg}:FutureWarning") + iris = datasets.load_iris() rng = np.random.RandomState(0) perm = rng.permutation(iris.target.size) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 38fcddf78bbe4..6a46162680373 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -24,6 +24,8 @@ DISCRETE_NAIVE_BAYES_CLASSES = [BernoulliNB, CategoricalNB, ComplementNB, MultinomialNB] ALL_NAIVE_BAYES_CLASSES = DISCRETE_NAIVE_BAYES_CLASSES + [GaussianNB] +msg = "The default value for `force_alpha` will change" +pytestmark = pytest.mark.filterwarnings(f"ignore:{msg}:FutureWarning") # Data is just 6 separable points in the plane X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) @@ -923,6 +925,63 @@ def test_n_features_deprecation(Estimator): est.n_features_ +# TODO(1.4): Remove +@pytest.mark.parametrize("Estimator", DISCRETE_NAIVE_BAYES_CLASSES) +@pytest.mark.parametrize("alpha", [1, [0.1, 1e-11], 1e-12]) +def test_force_alpha_deprecation(Estimator, alpha): + if Estimator is CategoricalNB and isinstance(alpha, list): + pytest.skip("CategoricalNB does not support array-like alpha values.") + X = np.array([[1, 2], [3, 4]]) + y = np.array([1, 0]) + alpha_min = 1e-10 + msg = "The default value for `force_alpha` will change to `True`" + est = Estimator(alpha=alpha) + est_force = Estimator(alpha=alpha, force_alpha=True) + if np.min(alpha) < alpha_min: + with pytest.warns(FutureWarning, match=msg): + est.fit(X, y) + else: + est.fit(X, y) + est_force.fit(X, y) + + +def test_check_alpha(): + """The provided value for alpha must only be + used if alpha < _ALPHA_MIN and force_alpha is True. + + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/10772 + """ + _ALPHA_MIN = 1e-10 + b = BernoulliNB(alpha=0, force_alpha=True) + assert b._check_alpha() == 0 + + alphas = np.array([0.0, 1.0]) + + b = BernoulliNB(alpha=alphas, force_alpha=True) + # We manually set `n_features_in_` not to have `_check_alpha` err + b.n_features_in_ = alphas.shape[0] + assert_array_equal(b._check_alpha(), alphas) + + msg = ( + "alpha too small will result in numeric errors, setting alpha = %.1e" + % _ALPHA_MIN + ) + b = BernoulliNB(alpha=0, force_alpha=False) + with pytest.warns(UserWarning, match=msg): + assert b._check_alpha() == _ALPHA_MIN + + b = BernoulliNB(alpha=0) + with pytest.warns(UserWarning, match=msg): + assert b._check_alpha() == _ALPHA_MIN + + b = BernoulliNB(alpha=alphas, force_alpha=False) + # We manually set `n_features_in_` not to have `_check_alpha` err + b.n_features_in_ = alphas.shape[0] + with pytest.warns(UserWarning, match=msg): + assert_array_equal(b._check_alpha(), np.array([_ALPHA_MIN, 1.0])) + + @pytest.mark.parametrize("Estimator", ALL_NAIVE_BAYES_CLASSES) def test_predict_joint_proba(Estimator): est = Estimator().fit(X2, y2)