From 9fe075d2cc493b679a7e3a85f07defd51fad84c7 Mon Sep 17 00:00:00 2001 From: Harold Fox Date: Fri, 15 Jun 2018 10:50:37 -0400 Subject: [PATCH 1/2] wip --- sklearn/neighbors/kde.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/neighbors/kde.py b/sklearn/neighbors/kde.py index 3cfdbc63042b7..2da82aea52c3d 100644 --- a/sklearn/neighbors/kde.py +++ b/sklearn/neighbors/kde.py @@ -144,7 +144,7 @@ def score_samples(self, X): Returns ------- density : ndarray, shape (n_samples,) - The array of log(density) evaluations. + The array of log(density) evaluations. These are normalized to be probability densities, so values will be low for high-dimensional data. """ # The returned density is normalized to the number of points. # For it to be a probability, we must scale it. For this reason @@ -159,7 +159,7 @@ def score_samples(self, X): return log_density def score(self, X, y=None): - """Compute the total log probability under the model. + """Compute the total log probability density under the model. Parameters ---------- @@ -170,7 +170,7 @@ def score(self, X, y=None): Returns ------- logprob : float - Total log-likelihood of the data in X. + Total log-likelihood of the data in X. This is normalized to be a probability density, so the value will be low for high-dimensional data. """ return np.sum(self.score_samples(X)) From d63d4b7036d5528f5b1ee51c3f0ff4959045aafd Mon Sep 17 00:00:00 2001 From: Harold Fox Date: Fri, 15 Jun 2018 11:00:50 -0400 Subject: [PATCH 2/2] wip --- sklearn/neighbors/kde.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearn/neighbors/kde.py b/sklearn/neighbors/kde.py index 2da82aea52c3d..b4d48627824fb 100644 --- a/sklearn/neighbors/kde.py +++ b/sklearn/neighbors/kde.py @@ -144,7 +144,9 @@ def score_samples(self, X): Returns ------- density : ndarray, shape (n_samples,) - The array of log(density) evaluations. These are normalized to be probability densities, so values will be low for high-dimensional data. + The array of log(density) evaluations. These are normalized to be + probability densities, so values will be low for high-dimensional + data. """ # The returned density is normalized to the number of points. # For it to be a probability, we must scale it. For this reason @@ -170,7 +172,9 @@ def score(self, X, y=None): Returns ------- logprob : float - Total log-likelihood of the data in X. This is normalized to be a probability density, so the value will be low for high-dimensional data. + Total log-likelihood of the data in X. This is normalized to be a + probability density, so the value will be low for high-dimensional + data. """ return np.sum(self.score_samples(X))