8000 [MRG] Adding variable alphaCorrection to classes in naive_bayes.py. by arka204 · Pull Request #16747 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] Adding variable alphaCorrection to classes in naive_bayes.py. #16747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8000
11 changes: 10 additions & 1 deletion doc/whats_new/v0.24.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,23 @@ Changelog
:pr:`17478` by :user:`Teon Brooks <teonbrooks>` and
:user:`Mohamed Maskani <maskani-moh>`.

:mod:`sklearn.naive_bayes`
..........................

- |Fix| A new parameter `force_alpha` was added to :class:`BernoulliNB` and
class:`MultinomialNB`, allowing user to set parameter alpha to a very
small number, greater or equal 0, which was earlier automatically changed
to `_ALPHA_MIN` instead.
:pr:`16747` by :user:`arka204`

:mod:`sklearn.preprocessing`
............................

- |Enhancement| Add ``unit_variance`` to :class:`preprocessing.RobustScaler`,
which scales output data such that normally distributed features have a
variance of 1. :pr:`17193` by :user:`Lucy Liu <lucyleeow>` and
:user:`Mabel Villalba <mabelvj>`.

:mod:`sklearn.tree`
...................

Expand Down
58 changes: 45 additions & 13 deletions sklearn/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,14 @@ def _check_alpha(self):
raise ValueError("alpha should be a scalar or a numpy array "
"with shape [n_features]")
if np.min(self.alpha) < _ALPHA_MIN:
warnings.warn('alpha too small will result in numeric errors, '
'setting alpha = %.1e' % _ALPHA_MIN)
return np.maximum(self.alpha, _ALPHA_MIN)
if self.force_alpha:
warnings.warn('alpha too small will result in numeric errors, '
'force_alpha was set to True, '
'proceeding without changing alpha.')
else:
warnings.warn('alpha too small will result in numeric errors, '
'setting alpha = %.1e' % _ALPHA_MIN)
return np.maximum(self.alpha, _ALPHA_MIN)
return self.alpha

def partial_fit(self, X, y, classes=None, sample_weight=None):
Expand Down Expand Up @@ -677,7 +682,12 @@ class MultinomialNB(_BaseDiscreteNB):
----------
alpha : float, default=1.0
Additive (Laplace/Lidstone) smoothing parameter
(0 for no smoothing).
(set alpha=0 and force_alpha=True, for no smoothing).

force_alpha : bool, default=False
If false and alpha is too close to 0, it will set alpha to _ALPHA_MIN.
If true, warn user about potential numeric errors
and proceed with alpha unchanged.

fit_prior : bool, default=True
Whether to learn class prior probabilities or not.
Expand Down Expand Up @@ -746,8 +756,10 @@ class MultinomialNB(_BaseDiscreteNB):
"""

@_deprecate_positional_args
def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):
def __init__(self, *, alpha=1.0, force_alpha=False,
fit_prior=True, class_prior=None):
self.alpha = alpha
self.force_alpha = force_alpha
self.fit_prior = fit_prior
self.class_prior = class_prior

Expand Down Expand Up @@ -788,7 +800,13 @@ class ComplementNB(_BaseDiscreteNB):
Parameters
----------
alpha : float, default=1.0
Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
Additive (Laplace/Lidstone) smoothing parameter
(set alpha=0 and force_alpha=True, for no smoothing).

force_alpha : bool, default=False
If false and alpha is too close to 0, it will set alpha to _ALPHA_MIN.
If true, warn user about potential numeric errors
and proceed with alpha unchanged.

fit_prior : bool, default=True
Only used in edge case with a single class in the training set.
Expand Down Expand Up @@ -851,9 +869,10 @@ class ComplementNB(_BaseDiscreteNB):
"""

@_deprecate_positional_args
def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None,
norm=False):
def __init__(self, *, alpha=1.0, force_alpha=False, fit_prior=True,
class_prior=None, norm=False):
self.alpha = alpha
self.force_alpha = force_alpha
self.fit_prior = fit_prior
self.class_prior = class_prior
self.norm = norm
Expand Down Expand Up @@ -901,7 +920,12 @@ class BernoulliNB(_BaseDiscreteNB):
----------
alpha : float, default=1.0
Additive (Laplace/Lidstone) smoothing parameter
(0 for no smoothing).
(set alpha=0 and force_alpha=True, for no smoothing).

force_alpha : bool, default=False
If false and alpha is too close to 0, it will set alpha to _ALPHA_MIN.
If true, warn user about potential numeric errors
and proceed with alpha unchanged.

binarize : float or None, default=0.0
Threshold for binarizing (mapping to booleans) of sample features.
Expand Down Expand Up @@ -966,9 +990,10 @@ class BernoulliNB(_BaseDiscreteNB):
"""

@_deprecate_positional_args
def __init__(self, *, alpha=1.0, binarize=.0, fit_prior=True,
class_prior=None):
def __init__(self, *, alpha=1.0, force_alpha=False, binarize=.0,
fit_prior=True, class_prior=None):
self.alpha = alpha
self.force_alpha = force_alpha
self.binarize = binarize
self.fit_prior = fit_prior
self.class_prior = class_prior
Expand Down Expand Up @@ -1028,7 +1053,12 @@ class CategoricalNB(_BaseDiscreteNB):
----------
alpha : float, default=1.0
Additive (Laplace/Lidstone) smoothing parameter
(0 for no smoothing).
(set alpha=0 and force_alpha=True, for no smoothing).

force_alpha : bool, default=False
If false and alpha is too close to 0, it will set alpha to _ALPHA_MIN.
If true, warn user about potential numeric errors
and proceed with alpha unchanged.

fit_prior : bool, default=True
Whether to learn class prior probabilities or not.
Expand Down Expand Up @@ -1078,8 +1108,10 @@ class CategoricalNB(_BaseDiscreteNB):
"""

@_deprecate_positional_args
def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):
def __init__(self, *, alpha=1.0, force_alpha=False, fit_prior=True,
class_prior=None):
self.alpha = alpha
self.force_alpha = force_alpha
self.fit_prior = fit_prior
self.class_prior = class_prior

Expand Down
19 changes: 19 additions & 0 deletions sklearn/tests/test_naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,25 @@ def test_alpha():
X, y, classes=[0, 1])


def test_check_alpha():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also test that appropriate warnings are raised in each case (use pytest.warns)

# Test force_alpha if alpha < _ALPHA_MIN
_ALPHA_MIN = 1e-10 # const
msg1 = ('alpha too small will result in numeric errors, '
'force_alpha was set to True, '
'proceeding without changing alpha.')
msg2 = ('alpha too small will result in numeric errors, '
'setting alpha = %.1e' % _ALPHA_MIN)
b = BernoulliNB(alpha=0, force_alpha=True)
with pytest.warns(UserWarning, match=msg1):
assert b._check_alpha() == 0
b = BernoulliNB(alpha=0, force_alpha=False)
with pytest.warns(UserWarning, match=msg2):
assert b._check_alpha() == _ALPHA_MIN
b = BernoulliNB(alpha=0)
with pytest.warns(UserWarning, match=msg2):
assert b._check_alpha() == _ALPHA_MIN


def test_alpha_vector():
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
Expand Down
0