diff --git a/sklearn/neighbors/kde.py b/sklearn/neighbors/kde.py index 3cfdbc63042b7..b4d48627824fb 100644 --- a/sklearn/neighbors/kde.py +++ b/sklearn/neighbors/kde.py @@ -144,7 +144,9 @@ def score_samples(self, X): Returns ------- density : ndarray, shape (n_samples,) - The array of log(density) evaluations. + The array of log(density) evaluations. These are normalized to be + probability densities, so values will be low for high-dimensional + data. """ # The returned density is normalized to the number of points. # For it to be a probability, we must scale it. For this reason @@ -159,7 +161,7 @@ def score_samples(self, X): return log_density def score(self, X, y=None): - """Compute the total log probability under the model. + """Compute the total log probability density under the model. Parameters ---------- @@ -170,7 +172,9 @@ def score(self, X, y=None): Returns ------- logprob : float - Total log-likelihood of the data in X. + Total log-likelihood of the data in X. This is normalized to be a + probability density, so the value will be low for high-dimensional + data. """ return np.sum(self.score_samples(X))