scikit-learn · yl565 · Sep 23, 2016 · Sep 23, 2016 · Sep 23, 2016 · Sep 24, 2016
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
@@ -15,6 +15,7 @@
 #         (parts based on earlier work by Mathieu Blondel)
 #
 # License: BSD 3 clause
+import warnings
 
 from abc import ABCMeta, abstractmethod
 
@@ -603,6 +604,9 @@ def _get_intercept(self):
     intercept_ = property(_get_intercept)
 
 
+_ALPHA_MIN = 1e-10
+
+
 class MultinomialNB(BaseDiscreteNB):
     """
     Naive Bayes classifier for multinomial models
@@ -680,10 +684,21 @@ class MultinomialNB(BaseDiscreteNB):
     """
 
     def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
-        self.alpha = alpha
+        self._alpha = alpha
         self.fit_prior = fit_prior
         self.class_prior = class_prior
 
+    @property
+    def alpha(self):
+        if self._alpha < 0:
+            raise ValueError('Smoothing parameter alpha = %e. '
+                             'alpha must be >= 0!' % self._alpha)
+        if self._alpha < _ALPHA_MIN:
+            warnings.warn('alpha too small will result in numeric errors, '
+                          'setting alpha = %e' % _ALPHA_MIN)
+            return _ALPHA_MIN
+        return self._alpha
+
     def _count(self, X, Y):
         """Count and smooth feature occurrences."""
         if np.any((X.data if issparse(X) else X) < 0):
@@ -781,11 +796,22 @@ class BernoulliNB(BaseDiscreteNB):
 
     def __init__(self, alpha=1.0, binarize=.0, fit_prior=True,
                  class_prior=None):
-        self.alpha = alpha
+        self._alpha = alpha
         self.binarize = binarize
         self.fit_prior = fit_prior
         self.class_prior = class_prior
 
+    @property
+    def alpha(self):
+        if self._alpha < 0:
+            raise ValueError('Smoothing parameter alpha = %e. '
+                             'alpha must be >= 0!' % self._alpha)
+        if self._alpha < _ALPHA_MIN:
+            warnings.warn('alpha too small will result in numeric errors, '
+                          'setting alpha = %e' % _ALPHA_MIN)
+            return _ALPHA_MIN
+        return self._alpha
+
     def _count(self, X, Y):
         """Count and smooth feature occurrences."""
         if self.binarize is not None:

diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
@@ -480,7 +480,7 @@ def test_feature_log_prob_bnb():
     denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T
 
     # Check manual estimate matches
-    assert_array_equal(clf.feature_log_prob_, (num - denom))
+    assert_array_almost_equal(clf.feature_log_prob_, (num - denom))
 
 
 def test_bnb():
@@ -536,3 +536,30 @@ def test_naive_bayes_scale_invariance():
               for f in [1E-10, 1, 1E10]]
     assert_array_equal(labels[0], labels[1])
     assert_array_equal(labels[1], labels[2])
+
+
+def test_alpha_zero():
+    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
+    X = np.array([[1, 0], [1, 1]])
+    y = np.array([0, 1])
+    nb = BernoulliNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[2./3, 1./3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    # Test sparse X
+    X = scipy.sparse.csr_matrix(X)
+    nb = BernoulliNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[2./3, 1./3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)