From d3bb0ec102a7b7d5949a5bb35c77190286dfc641 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Fri, 23 Sep 2016 13:22:56 -0400
Subject: [PATCH 1/8] Fix #5814

---
 sklearn/naive_bayes.py            | 39 ++++++++++++++++++-------------
 sklearn/tests/test_naive_bayes.py | 17 +++++++++++++-
 2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 6b0623843cec1..a8d7758f69b00 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -35,6 +35,14 @@
 __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB']
 
 
+def _inf_replace(x):
+    return np.clip(x, np.nan_to_num(-np.inf), np.nan_to_num(np.inf))
+
+
+def _safe_log(x):
+    return _inf_replace(np.log(x))
+
+
 class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)):
     """Abstract base class for naive Bayes estimators"""
 
@@ -424,8 +432,8 @@ def _joint_log_likelihood(self, X):
         X = check_array(X)
         joint_log_likelihood = []
         for i in range(np.size(self.classes_)):
-            jointi = np.log(self.class_prior_[i])
-            n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
+            jointi = _safe_log(self.class_prior_[i])
+            n_ij = - 0.5 * np.sum(_safe_log(2. * np.pi * self.sigma_[i, :]))
             n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /
                                  (self.sigma_[i, :]), 1)
             joint_log_likelihood.append(jointi + n_ij)
@@ -449,13 +457,13 @@ def _update_class_log_prior(self, class_prior=None):
             if len(class_prior) != n_classes:
                 raise ValueError("Number of priors must match number of"
                                  " classes.")
-            self.class_log_prior_ = np.log(class_prior)
+            self.class_log_prior_ = _safe_log(class_prior)
         elif self.fit_prior:
             # empirical prior, with sample_weight taken into account
-            self.class_log_prior_ = (np.log(self.class_count_) -
-                                     np.log(self.class_count_.sum()))
+            self.class_log_prior_ = (_safe_log(self.class_count_) -
+                                     _safe_log(self.class_count_.sum()))
         else:
-            self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
+            self.class_log_prior_ = np.zeros(n_classes) - _safe_log(n_classes)
 
     def partial_fit(self, X, y, classes=None, sample_weight=None):
         """Incremental fit on a batch of samples.
@@ -696,8 +704,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = smoothed_fc.sum(axis=1)
 
-        self.feature_log_prob_ = (np.log(smoothed_fc) -
-                                  np.log(smoothed_cc.reshape(-1, 1)))
+        self.feature_log_prob_ = (_safe_log(smoothed_fc) -
+                                  _safe_log(smoothed_cc.reshape(-1, 1)))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
@@ -705,7 +713,7 @@ def _joint_log_likelihood(self, X):
 
         X = check_array(X, accept_sparse='csr')
         return (safe_sparse_dot(X, self.feature_log_prob_.T) +
-                self.class_log_prior_)
+                            self.class_log_prior_)
 
 
 class BernoulliNB(BaseDiscreteNB):
@@ -798,8 +806,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = self.class_count_ + self.alpha * 2
 
-        self.feature_log_prob_ = (np.log(smoothed_fc) -
-                                  np.log(smoothed_cc.reshape(-1, 1)))
+        self.feature_log_prob_ = _safe_log(smoothed_fc /
+                                           smoothed_cc.reshape(-1, 1))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
@@ -816,10 +824,9 @@ def _joint_log_likelihood(self, X):
         if n_features_X != n_features:
             raise ValueError("Expected input with %d features, got %d instead"
                              % (n_features, n_features_X))
-
-        neg_prob = np.log(1 - np.exp(self.feature_log_prob_))
-        # Compute  neg_prob · (1 - X).T  as  ∑neg_prob - X · neg_prob
-        jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T)
-        jll += self.class_log_prior_ + neg_prob.sum(axis=1)
+        p = np.exp(self.feature_log_prob_).T
+        jll = (safe_sparse_dot(X, _safe_log(p)) +
+               safe_sparse_dot(1 - X, _safe_log(1 - p)))
+        jll += self.class_log_prior_
 
         return jll
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index be278ed884fda..faa27af8f2768 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -480,7 +480,7 @@ def test_feature_log_prob_bnb():
     denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T
 
     # Check manual estimate matches
-    assert_array_equal(clf.feature_log_prob_, (num - denom))
+    assert_array_almost_equal(clf.feature_log_prob_, (num - denom))
 
 
 def test_bnb():
@@ -536,3 +536,18 @@ def test_naive_bayes_scale_invariance():
               for f in [1E-10, 1, 1E10]]
     assert_array_equal(labels[0], labels[1])
     assert_array_equal(labels[1], labels[2])
+
+
+def test_alpha_zero():
+    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
+    X = np.array([[1, 0], [1, 1]])
+    y = np.array([0, 1])
+    nb = MultinomialNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[2/3, 1/3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = BernoulliNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)

From 14a9bafad6085d9ea2fd5d3064bc0614e14ffa78 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Fri, 23 Sep 2016 13:35:13 -0400
Subject: [PATCH 2/8] Fix pep8 in naive_bayes.py:716

---
 sklearn/naive_bayes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index a8d7758f69b00..285f83a81a162 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -713,7 +713,7 @@ def _joint_log_likelihood(self, X):
 
         X = check_array(X, accept_sparse='csr')
         return (safe_sparse_dot(X, self.feature_log_prob_.T) +
-                            self.class_log_prior_)
+                self.class_log_prior_)
 
 
 class BernoulliNB(BaseDiscreteNB):
@@ -806,8 +806,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = self.class_count_ + self.alpha * 2
 
-        self.feature_log_prob_ = _safe_log(smoothed_fc /
-                                           smoothed_cc.reshape(-1, 1))
+        self.feature_log_prob_ = (_safe_log(smoothed_fc) -
+                                  _safe_log(smoothed_cc.reshape(-1, 1)))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""

From 1468ab57480411875be361586f3683197e1594a9 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Fri, 23 Sep 2016 19:39:11 -0400
Subject: [PATCH 3/8] Fix sparse matrix incompatibility

---
 sklearn/naive_bayes.py            | 26 +++++++++++++++++---------
 sklearn/tests/test_naive_bayes.py | 12 ++++++++++++
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 285f83a81a162..4bb02654a52c3 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -35,12 +35,19 @@
 __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB']
 
 
-def _inf_replace(x):
-    return np.clip(x, np.nan_to_num(-np.inf), np.nan_to_num(np.inf))
+def _safe_log(x):
+    """
+    Setting log(0) = -inf as -th where `th` is a very larger number.
 
+    This is for avoid summation errors, e.g. np.log(0) - np.log(0) = nan, which
+    may happen while calculating dot product of log probability matrices for
+    joint likelihood estimation
 
-def _safe_log(x):
-    return _inf_replace(np.log(x))
+    Note a too large `th` will cause overflow during dot product, which will
+    also result in wrong estimation of join likelihood
+    """
+    th = 1e30
+    return np.clip(np.log(x), -th, th)
 
 
 class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)):
@@ -824,9 +831,10 @@ def _joint_log_likelihood(self, X):
         if n_features_X != n_features:
             raise ValueError("Expected input with %d features, got %d instead"
                              % (n_features, n_features_X))
-        p = np.exp(self.feature_log_prob_).T
-        jll = (safe_sparse_dot(X, _safe_log(p)) +
-               safe_sparse_dot(1 - X, _safe_log(1 - p)))
-        jll += self.class_log_prior_
 
-        return jll
+        # Compute X*logp + (1-X)*log(1-p) as X*[logp - log(1-p)] + ∑log(1-p)
+        # for sparse array support
+        logp = self.feature_log_prob_.T
+        log1_p = _safe_log(1 - np.exp(logp))
+        return (safe_sparse_dot(X, logp - log1_p) + log1_p.sum(axis=0) +
+                self.class_log_prior_)
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index faa27af8f2768..214bf16e2ba3c 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -542,12 +542,24 @@ def test_alpha_zero():
     # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
     X = np.array([[1, 0], [1, 1]])
     y = np.array([0, 1])
+    nb = BernoulliNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
     nb = MultinomialNB(alpha=0.)
     nb.fit(X, y)
     prob = np.array([[2/3, 1/3], [0, 1]])
     assert_array_almost_equal(nb.predict_proba(X), prob)
 
+    # Test sparse X
+    X = scipy.sparse.csr_matrix(X)
     nb = BernoulliNB(alpha=0.)
     nb.fit(X, y)
     prob = np.array([[1, 0], [0, 1]])
     assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.)
+    nb.fit(X, y)
+    prob = np.array([[2/3, 1/3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)

From ae281a4b1b53617e163bcf937ab84025a2d72c60 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Fri, 23 Sep 2016 21:30:49 -0400
Subject: [PATCH 4/8] Fix python 2.7 problem in test_naive_bayes

---
 sklearn/tests/test_naive_bayes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 214bf16e2ba3c..893d885bde529 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -549,7 +549,7 @@ def test_alpha_zero():
 
     nb = MultinomialNB(alpha=0.)
     nb.fit(X, y)
-    prob = np.array([[2/3, 1/3], [0, 1]])
+    prob = np.array([[2./3, 1./3], [0, 1]])
     assert_array_almost_equal(nb.predict_proba(X), prob)
 
     # Test sparse X
@@ -561,5 +561,5 @@ def test_alpha_zero():
 
     nb = MultinomialNB(alpha=0.)
     nb.fit(X, y)
-    prob = np.array([[2/3, 1/3], [0, 1]])
+    prob = np.array([[2./3, 1./3], [0, 1]])
     assert_array_almost_equal(nb.predict_proba(X), prob)

From 0200f8c4f25de1e18aaae4666ace9a627635e9ce Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Fri, 23 Sep 2016 22:52:05 -0400
Subject: [PATCH 5/8] Make sure the values are probabilities before log
 transform

---
 sklearn/naive_bayes.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 4bb02654a52c3..c60f44e8c0425 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -35,11 +35,16 @@
 __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB']
 
 
-def _safe_log(x):
+def _safe_logprob(p):
     """
     Setting log(0) = -inf as -th where `th` is a very larger number.
 
-    This is for avoid summation errors, e.g. np.log(0) - np.log(0) = nan, which
+    Parameters
+    ----------
+    p : numpy.array
+        Probabilities values. Must be within [0, 1] range.
+
+    This avoids summation errors, e.g. np.log(0) - np.log(0) = nan, which
     may happen while calculating dot product of log probability matrices for
     joint likelihood estimation
 
@@ -47,7 +52,11 @@ def _safe_log(x):
     also result in wrong estimation of join likelihood
     """
     th = 1e30
-    return np.clip(np.log(x), -th, th)
+    p = np.asarray(p)
+    if (p > 1).any() or (p < 0).any():
+        raise ValueError('Input `p` must be within [0, 1] range!')
+
+    return np.clip(np.log(p), -th, 0)
 
 
 class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)):
@@ -439,8 +448,8 @@ def _joint_log_likelihood(self, X):
         X = check_array(X)
         joint_log_likelihood = []
         for i in range(np.size(self.classes_)):
-            jointi = _safe_log(self.class_prior_[i])
-            n_ij = - 0.5 * np.sum(_safe_log(2. * np.pi * self.sigma_[i, :]))
+            jointi = _safe_logprob(self.class_prior_[i])
+            n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
             n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /
                                  (self.sigma_[i, :]), 1)
             joint_log_likelihood.append(jointi + n_ij)
@@ -464,13 +473,13 @@ def _update_class_log_prior(self, class_prior=None):
             if len(class_prior) != n_classes:
                 raise ValueError("Number of priors must match number of"
                                  " classes.")
-            self.class_log_prior_ = _safe_log(class_prior)
+            self.class_log_prior_ = _safe_logprob(class_prior)
         elif self.fit_prior:
             # empirical prior, with sample_weight taken into account
-            self.class_log_prior_ = (_safe_log(self.class_count_) -
-                                     _safe_log(self.class_count_.sum()))
+            self.class_log_prior_ = _safe_logprob(self.class_count_ /
+                                                  self.class_count_.sum())
         else:
-            self.class_log_prior_ = np.zeros(n_classes) - _safe_log(n_classes)
+            self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
 
     def partial_fit(self, X, y, classes=None, sample_weight=None):
         """Incremental fit on a batch of samples.
@@ -711,8 +720,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = smoothed_fc.sum(axis=1)
 
-        self.feature_log_prob_ = (_safe_log(smoothed_fc) -
-                                  _safe_log(smoothed_cc.reshape(-1, 1)))
+        self.feature_log_prob_ = _safe_logprob(smoothed_fc /
+                                               smoothed_cc.reshape(-1, 1))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
@@ -813,8 +822,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = self.class_count_ + self.alpha * 2
 
-        self.feature_log_prob_ = (_safe_log(smoothed_fc) -
-                                  _safe_log(smoothed_cc.reshape(-1, 1)))
+        self.feature_log_prob_ = _safe_logprob(smoothed_fc /
+                                               smoothed_cc.reshape(-1, 1))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
@@ -835,6 +844,6 @@ def _joint_log_likelihood(self, X):
         # Compute X*logp + (1-X)*log(1-p) as X*[logp - log(1-p)] + ∑log(1-p)
         # for sparse array support
         logp = self.feature_log_prob_.T
-        log1_p = _safe_log(1 - np.exp(logp))
+        log1_p = _safe_logprob(1 - np.exp(logp))
         return (safe_sparse_dot(X, logp - log1_p) + log1_p.sum(axis=0) +
                 self.class_log_prior_)

From 4fdc5d75fa579289aed969fbcc3f15f48451fb46 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Fri, 23 Sep 2016 22:58:58 -0400
Subject: [PATCH 6/8] Improve docstring of  `_safe_logprob`

---
 sklearn/naive_bayes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index c60f44e8c0425..93e1bb6f16afe 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -41,15 +41,15 @@ def _safe_logprob(p):
 
     Parameters
     ----------
-    p : numpy.array
+    p : array-like or scalar
         Probabilities values. Must be within [0, 1] range.
 
     This avoids summation errors, e.g. np.log(0) - np.log(0) = nan, which
     may happen while calculating dot product of log probability matrices for
     joint likelihood estimation
 
-    Note a too large `th` will cause overflow during dot product, which will
-    also result in wrong estimation of join likelihood
+    Note that `th` too large will cause overflow during dot product and wrongly
+    estimate the joint likelihood
     """
     th = 1e30
     p = np.asarray(p)

From 830317d9e6ddc0b44bbf7b1cba8e36b17e87f7b5 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Mon, 24 Oct 2016 08:53:48 -0400
Subject: [PATCH 7/8] Clip alpha solution

---
 sklearn/naive_bayes.py | 82 +++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 93e1bb6f16afe..a1b3306588e2c 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -15,6 +15,7 @@
 #         (parts based on earlier work by Mathieu Blondel)
 #
 # License: BSD 3 clause
+import warnings
 
 from abc import ABCMeta, abstractmethod
 
@@ -35,30 +36,6 @@
 __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB']
 
 
-def _safe_logprob(p):
-    """
-    Setting log(0) = -inf as -th where `th` is a very larger number.
-
-    Parameters
-    ----------
-    p : array-like or scalar
-        Probabilities values. Must be within [0, 1] range.
-
-    This avoids summation errors, e.g. np.log(0) - np.log(0) = nan, which
-    may happen while calculating dot product of log probability matrices for
-    joint likelihood estimation
-
-    Note that `th` too large will cause overflow during dot product and wrongly
-    estimate the joint likelihood
-    """
-    th = 1e30
-    p = np.asarray(p)
-    if (p > 1).any() or (p < 0).any():
-        raise ValueError('Input `p` must be within [0, 1] range!')
-
-    return np.clip(np.log(p), -th, 0)
-
-
 class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)):
     """Abstract base class for naive Bayes estimators"""
 
@@ -448,7 +425,7 @@ def _joint_log_likelihood(self, X):
         X = check_array(X)
         joint_log_likelihood = []
         for i in range(np.size(self.classes_)):
-            jointi = _safe_logprob(self.class_prior_[i])
+            jointi = np.log(self.class_prior_[i])
             n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
             n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /
                                  (self.sigma_[i, :]), 1)
@@ -473,11 +450,11 @@ def _update_class_log_prior(self, class_prior=None):
             if len(class_prior) != n_classes:
                 raise ValueError("Number of priors must match number of"
                                  " classes.")
-            self.class_log_prior_ = _safe_logprob(class_prior)
+            self.class_log_prior_ = np.log(class_prior)
         elif self.fit_prior:
             # empirical prior, with sample_weight taken into account
-            self.class_log_prior_ = _safe_logprob(self.class_count_ /
-                                                  self.class_count_.sum())
+            self.class_log_prior_ = (np.log(self.class_count_) -
+                                     np.log(self.class_count_.sum()))
         else:
             self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
 
@@ -627,6 +604,9 @@ def _get_intercept(self):
     intercept_ = property(_get_intercept)
 
 
+_ALPHA_MIN = 1e-10
+
+
 class MultinomialNB(BaseDiscreteNB):
     """
     Naive Bayes classifier for multinomial models
@@ -704,10 +684,21 @@ class MultinomialNB(BaseDiscreteNB):
     """
 
     def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
-        self.alpha = alpha
+        self._alpha = alpha
         self.fit_prior = fit_prior
         self.class_prior = class_prior
 
+    @property
+    def alpha(self):
+        if self._alpha < 0:
+            raise ValueError('Smoothing parameter alpha = %e. '
+                             'alpha must be >= 0!' % self._alpha)
+        if self._alpha == 0:
+            warnings.warn('alpha = 0 will result in numeric errors, setting'
+                          ' alpha = %e' % _ALPHA_MIN)
+            return _ALPHA_MIN
+        return self._alpha
+
     def _count(self, X, Y):
         """Count and smooth feature occurrences."""
         if np.any((X.data if issparse(X) else X) < 0):
@@ -720,8 +711,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = smoothed_fc.sum(axis=1)
 
-        self.feature_log_prob_ = _safe_logprob(smoothed_fc /
-                                               smoothed_cc.reshape(-1, 1))
+        self.feature_log_prob_ = (np.log(smoothed_fc) -
+                                  np.log(smoothed_cc.reshape(-1, 1)))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
@@ -805,11 +796,22 @@ class BernoulliNB(BaseDiscreteNB):
 
     def __init__(self, alpha=1.0, binarize=.0, fit_prior=True,
                  class_prior=None):
-        self.alpha = alpha
+        self._alpha = alpha
         self.binarize = binarize
         self.fit_prior = fit_prior
         self.class_prior = class_prior
 
+    @property
+    def alpha(self):
+        if self._alpha < 0:
+            raise ValueError('Smoothing parameter alpha = %e. '
+                             'alpha must be >= 0!' % self._alpha)
+        if self._alpha == 0:
+            warnings.warn('alpha = 0 will result in numeric errors, setting'
+                          ' alpha = %e' % _ALPHA_MIN)
+            return _ALPHA_MIN
+        return self._alpha
+
     def _count(self, X, Y):
         """Count and smooth feature occurrences."""
         if self.binarize is not None:
@@ -822,8 +824,8 @@ def _update_feature_log_prob(self):
         smoothed_fc = self.feature_count_ + self.alpha
         smoothed_cc = self.class_count_ + self.alpha * 2
 
-        self.feature_log_prob_ = _safe_logprob(smoothed_fc /
-                                               smoothed_cc.reshape(-1, 1))
+        self.feature_log_prob_ = (np.log(smoothed_fc) -
+                                  np.log(smoothed_cc.reshape(-1, 1)))
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
@@ -841,9 +843,9 @@ def _joint_log_likelihood(self, X):
             raise ValueError("Expected input with %d features, got %d instead"
                              % (n_features, n_features_X))
 
-        # Compute X*logp + (1-X)*log(1-p) as X*[logp - log(1-p)] + ∑log(1-p)
-        # for sparse array support
-        logp = self.feature_log_prob_.T
-        log1_p = _safe_logprob(1 - np.exp(logp))
-        return (safe_sparse_dot(X, logp - log1_p) + log1_p.sum(axis=0) +
-                self.class_log_prior_)
+        neg_prob = np.log(1 - np.exp(self.feature_log_prob_))
+        # Compute  neg_prob · (1 - X).T  as  ∑neg_prob - X · neg_prob
+        jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T)
+        jll += self.class_log_prior_ + neg_prob.sum(axis=1)
+
+        return jll

From 36a9f51cdf81436c1923622f3909baeea62bf1d8 Mon Sep 17 00:00:00 2001
From: Yichuan Liu <yichuanliu2004@gmail.com>
Date: Mon, 24 Oct 2016 08:58:02 -0400
Subject: [PATCH 8/8] Clip alpha solution

---
 sklearn/naive_bayes.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index a1b3306588e2c..931cafc2d6f86 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -693,9 +693,9 @@ def alpha(self):
         if self._alpha < 0:
             raise ValueError('Smoothing parameter alpha = %e. '
                              'alpha must be >= 0!' % self._alpha)
-        if self._alpha == 0:
-            warnings.warn('alpha = 0 will result in numeric errors, setting'
-                          ' alpha = %e' % _ALPHA_MIN)
+        if self._alpha < _ALPHA_MIN:
+            warnings.warn('alpha too small will result in numeric errors, '
+                          'setting alpha = %e' % _ALPHA_MIN)
             return _ALPHA_MIN
         return self._alpha
 
@@ -806,9 +806,9 @@ def alpha(self):
         if self._alpha < 0:
             raise ValueError('Smoothing parameter alpha = %e. '
                              'alpha must be >= 0!' % self._alpha)
-        if self._alpha == 0:
-            warnings.warn('alpha = 0 will result in numeric errors, setting'
-                          ' alpha = %e' % _ALPHA_MIN)
+        if self._alpha < _ALPHA_MIN:
+            warnings.warn('alpha too small will result in numeric errors, '
+                          'setting alpha = %e' % _ALPHA_MIN)
             return _ALPHA_MIN
         return self._alpha