From 2a9d3f7c2a458d7b88d5d0e232a83717e2505332 Mon Sep 17 00:00:00 2001 From: Kendrick Shaw Date: Fri, 4 Jan 2019 17:44:00 +0000 Subject: [PATCH 1/2] Fix calibration_curve docstring for empty bins The code for calibration_curve will remove bins that are empty, thus the number of bins in the return value may be smaller than n_bins. This commit fixes the docstring to document this (existing) behavior. --- sklearn/calibration.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index b563cdee143cb..0382a9b7e8ac6 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -543,15 +543,17 @@ def calibration_curve(y_true, y_prob, normalize=False, n_bins=5): onto 0 and the largest one onto 1. n_bins : int - Number of bins. A bigger number requires more data. + Number of bins. A bigger number requires more data. Bins with no data + points (i.e. without corresponding values in y_prob) will not be + returned, thus there may be fewer than n_bins in the return value. Returns ------- - prob_true : array, shape (n_bins,) - The true probability in each bin (fraction of positives). + prob_true : array, shape (n_non_empty_bins,) + The true probability in each non-empty bin (fraction of positives). - prob_pred : array, shape (n_bins,) - The mean predicted probability in each bin. + prob_pred : array, shape (n_non_empty_bins,) + The mean predicted probability in each non-empty bin. References ---------- From ef3a16b9f5b450a27fb9b104f97fed5e54880845 Mon Sep 17 00:00:00 2001 From: Kendrick Shaw Date: Fri, 11 Jan 2019 16:08:13 +0000 Subject: [PATCH 2/2] Improve clarity of docstring for calibration_curve Changed `shape (n_non_empty_bins)` to `shape (n_bins,) or smaller` based on reviewer feedback of pull request. --- sklearn/calibration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 0382a9b7e8ac6..9de7cb93d1322 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -549,11 +549,11 @@ def calibration_curve(y_true, y_prob, normalize=False, n_bins=5): Returns ------- - prob_true : array, shape (n_non_empty_bins,) - The true probability in each non-empty bin (fraction of positives). + prob_true : array, shape (n_bins,) or smaller + The true probability in each bin (fraction of positives). - prob_pred : array, shape (n_non_empty_bins,) - The mean predicted probability in each non-empty bin. + prob_pred : array, shape (n_bins,) or smaller + The mean predicted probability in each bin. References ----------