thomasjpfan
diff --git a/‎doc/whats_new/v1.0.rst
Lines changed: 9 additions & 3 deletions b/‎doc/whats_new/v1.0.rst
Lines changed: 9 additions & 3 deletions
diff --git a/‎sklearn/metrics/_ranking.py
Lines changed: 14 additions & 5 deletions b/‎sklearn/metrics/_ranking.py
Lines changed: 14 additions & 5 deletions
diff --git a/‎sklearn/metrics/tests/test_ranking.py
Lines changed: 64 additions & 57 deletions b/‎sklearn/metrics/tests/test_ranking.py
Lines changed: 64 additions & 57 deletions
diff --git a/‎sklearn/utils/validation.py
Lines changed: 2 additions & 1 deletion b/‎sklearn/utils/validation.py
Lines changed: 2 additions & 1 deletion
@@ -241,6 +241,12 @@ Changelog
   are integral.
   :pr:`9843` by :user:`Jon Crall <Erotemic>`.
 
+- |Fix| Samples with zero `sample_weight` values do not affect the results
+  from :func:`metrics.det_curve`, :func:`metrics.precision_recall_curve`
+  and :func:`metrics.roc_curve`.
+  :pr:`18328` by :user:`Albert Villanova del Moral <albertvillanova>` and
+  :user:`Alonso Silva Allende <alonsosilvaallende>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 
@@ -325,9 +331,9 @@ Changelog
   :pr:`19459` by :user:`Cindy Bezuidenhout <cinbez>` and
   :user:`Clifford Akai-Nettey<cliffordEmmanuel>`.
 
-  - |Fix| Fixed a bug in :func:`utils.sparsefuncs.mean_variance_axis` where the
-    precision of the computed variance was very poor when the real variance is
-    exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Fix| Fixed a bug in :func:`utils.sparsefuncs.mean_variance_axis` where the
+  precision of the computed variance was very poor when the real variance is
+  exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 Code and Documentation Contributors
 -----------------------------------
 
@@ -27,6 +27,7 @@
 
 from ..utils import assert_all_finite
 from ..utils import check_consistent_length
+from ..utils.validation import _check_sample_weight
 from ..utils import column_or_1d, check_array
 from ..utils.multiclass import type_of_target
 from ..utils.extmath import stable_cumsum
@@ -291,14 +292,14 @@ def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
     >>> thresholds
     array([0.35, 0.4 , 0.8 ])
     """
-    if len(np.unique(y_true)) != 2:
-        raise ValueError("Only one class present in y_true. Detection error "
-                         "tradeoff curve is not defined in that case.")
-
     fps, tps, thresholds = _binary_clf_curve(
         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight
     )
 
+    if len(np.unique(y_true)) != 2:
+        raise ValueError("Only one class present in y_true. Detection error "
+                         "tradeoff curve is not defined in that case.")
+
     fns = tps[-1] - tps
     p_count = tps[-1]
     n_count = fps[-1]
@@ -696,8 +697,14 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     assert_all_finite(y_true)
     assert_all_finite(y_score)
 
+    # Filter out zero-weighted samples, as they should not impact the result
     if sample_weight is not None:
         sample_weight = column_or_1d(sample_weight)
+        sample_weight = _check_sample_weight(sample_weight, y_true)
+        nonzero_weight_mask = sample_weight != 0
+        y_true = y_true[nonzero_weight_mask]
+        y_score = y_score[nonzero_weight_mask]
+        sample_weight = sample_weight[nonzero_weight_mask]
 
     pos_label = _check_pos_label_consistency(pos_label, y_true)
 
@@ -759,7 +766,9 @@ def precision_recall_curve(y_true, probas_pred, *, pos_label=None,
         pos_label should be explicitly given.
 
     probas_pred : ndarray of shape (n_samples,)
-        Estimated probabilities or output of a decision function.
+        Target scores, can either be probability estimates of the positive
+        class, or non-thresholded measure of decisions (as returned by
+        `decision_function` on some classifiers).
 
     pos_label : int or str, default=None
         The label of the positive class.
 
@@ -41,6 +41,13 @@
 ###############################################################################
 # Utilities for testing
 
+CURVE_FUNCS = [
+    det_curve,
+    precision_recall_curve,
+    roc_curve,
+]
+
+
 def make_prediction(dataset=None, binary=False):
     """Make some classification predictions on a toy dataset using a SVC
 
@@ -73,16 +80,16 @@ def make_prediction(dataset=None, binary=False):
 
     # run classifier, get class probabilities and label predictions
     clf = svm.SVC(kernel='linear', probability=True, random_state=0)
-    probas_pred = clf.fit(X[:half], y[:half]).predict_proba(X[half:])
+    y_score = clf.fit(X[:half], y[:half]).predict_proba(X[half:])
 
     if binary:
         # only interested in probabilities of the positive case
         # XXX: do we really want a special API for the binary case?
-        probas_pred = probas_pred[:, 1]
+        y_score = y_score[:, 1]
 
     y_pred = clf.predict(X[half:])
     y_true = y[half:]
-    return y_true, y_pred, probas_pred
+    return y_true, y_pred, y_score
 
 
 ###############################################################################
@@ -183,14 +190,14 @@ def _partial_roc(y_true, y_predict, max_fpr):
 @pytest.mark.parametrize('drop', [True, False])
 def test_roc_curve(drop):
     # Test Area under Receiver Operating Characteristic (ROC) curve
-    y_true, _, probas_pred = make_prediction(binary=True)
-    expected_auc = _auc(y_true, probas_pred)
+    y_true, _, y_score = make_prediction(binary=True)
+    expected_auc = _auc(y_true, y_score)
 
-    fpr, tpr, thresholds = roc_curve(y_true, probas_pred,
+    fpr, tpr, thresholds = roc_curve(y_true, y_score,
                                      drop_intermediate=drop)
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, expected_auc, decimal=2)
-    assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred))
+    assert_almost_equal(roc_auc, roc_auc_score(y_true, y_score))
     assert fpr.shape == tpr.shape
     assert fpr.shape == thresholds.shape
 
@@ -211,13 +218,13 @@ def test_roc_curve_end_points():
 def test_roc_returns_consistency():
     # Test whether the returned threshold matches up with tpr
     # make small toy dataset
-    y_true, _, probas_pred = make_prediction(binary=True)
-    fpr, tpr, thresholds = roc_curve(y_true, probas_pred)
+    y_true, _, y_score = make_prediction(binary=True)
+    fpr, tpr, thresholds = roc_curve(y_true, y_score)
 
     # use the given thresholds to determine the tpr
     tpr_correct = []
     for t in thresholds:
-        tp = np.sum((probas_pred >= t) & y_true)
+        tp = np.sum((y_score >= t) & y_true)
         p = np.sum(y_true)
         tpr_correct.append(1.0 * tp / p)
 
@@ -229,17 +236,17 @@ def test_roc_returns_consistency():
 
 def test_roc_curve_multi():
     # roc_curve not applicable for multi-class problems
-    y_true, _, probas_pred = make_prediction(binary=False)
+    y_true, _, y_score = make_prediction(binary=False)
 
     with pytest.raises(ValueError):
-        roc_curve(y_true, probas_pred)
+        roc_curve(y_true, y_score)
 
 
 def test_roc_curve_confidence():
     # roc_curve for confidence scores
-    y_true, _, probas_pred = make_prediction(binary=True)
+    y_true, _, y_score = make_prediction(binary=True)
 
-    fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5)
+    fpr, tpr, thresholds = roc_curve(y_true, y_score - 0.5)
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, 0.90, decimal=2)
     assert fpr.shape == tpr.shape
@@ -248,7 +255,7 @@ def test_roc_curve_confidence():
 
 def test_roc_curve_hard():
     # roc_curve for hard decisions
-    y_true, pred, probas_pred = make_prediction(binary=True)
+    y_true, pred, y_score = make_prediction(binary=True)
 
     # always predict one
     trivial_pred = np.ones(y_true.shape)
@@ -668,23 +675,17 @@ def test_auc_score_non_binary_class():
             roc_auc_score(y_true, y_pred)
 
 
-def test_binary_clf_curve_multiclass_error():
+@pytest.mark.parametrize("curve_func", CURVE_FUNCS)
+def test_binary_clf_curve_multiclass_error(curve_func):
     rng = check_random_state(404)
     y_true = rng.randint(0, 3, size=10)
     y_pred = rng.rand(10)
     msg = "multiclass format is not supported"
-
     with pytest.raises(ValueError, match=msg):
-        precision_recall_curve(y_true, y_pred)
-
-    with pytest.raises(ValueError, match=msg):
-        roc_curve(y_true, y_pred)
+        curve_func(y_true, y_pred)
 
 
-@pytest.mark.parametrize("curve_func", [
-    precision_recall_curve,
-    roc_curve,
-])
+@pytest.<
D3FE
span class=pl-c1>mark.parametrize("curve_func", CURVE_FUNCS)
 def test_binary_clf_curve_implicit_pos_label(curve_func):
     # Check that using string class labels raises an informative
     # error for any supported string dtype:
@@ -693,10 +694,10 @@ def test_binary_clf_curve_implicit_pos_label(curve_func):
            "value in {0, 1} or {-1, 1} or pass pos_label "
            "explicitly.")
     with pytest.raises(ValueError, match=msg):
-        roc_curve(np.array(["a", "b"], dtype='<U1'), [0., 1.])
+        curve_func(np.array(["a", "b"], dtype='<U1'), [0., 1.])
 
     with pytest.raises(ValueError, match=msg):
-        roc_curve(np.array(["a", "b"], dtype=object), [0., 1.])
+        curve_func(np.array(["a", "b"], dtype=object), [0., 1.])
 
     # The error message is slightly different for bytes-encoded
     # class labels, but otherwise the behavior is the same:
@@ -705,25 +706,39 @@ def test_binary_clf_curve_implicit_pos_label(curve_func):
            "value in {0, 1} or {-1, 1} or pass pos_label "
            "explicitly.")
     with pytest.raises(ValueError, match=msg):
-        roc_curve(np.array([b"a", b"b"], dtype='<S1'), [0., 1.])
+        curve_func(np.array([b"a", b"b"], dtype='<S1'), [0., 1.])
 
     # Check that it is possible to use floating point class labels
     # that are interpreted similarly to integer class labels:
     y_pred = [0., 1., 0.2, 0.42]
-    int_curve = roc_curve([0, 1, 1, 0], y_pred)
-    float_curve = roc_curve([0., 1., 1., 0.], y_pred)
+    int_curve = curve_func([0, 1, 1, 0], y_pred)
+    float_curve = curve_func([0., 1., 1., 0.], y_pred)
     for int_curve_part, float_curve_part in zip(int_curve, float_curve):
         np.testing.assert_allclose(int_curve_part, float_curve_part)
 
 
+@pytest.mark.parametrize("curve_func", CURVE_FUNCS)
+def test_binary_clf_curve_zero_sample_weight(curve_func):
+    y_true = [0, 0, 1, 1, 1]
+    y_score = [0.1, 0.2, 0.3, 0.4, 0.5]
+    sample_weight = [1, 1, 1, 0.5, 0]
+
+    result_1 = curve_func(y_true, y_score, sample_weight=sample_weight)
+    result_2 = curve_func(y_true[:-1], y_score[:-1],
+                          sample_weight=sample_weight[:-1])
+
+    for arr_1, arr_2 in zip(result_1, result_2):
+        assert_allclose(arr_1, arr_2)
+
+
 def test_precision_recall_curve():
-    y_true, _, probas_pred = make_prediction(binary=True)
-    _test_precision_recall_curve(y_true, probas_pred)
+    y_true, _, y_score = make_prediction(binary=True)
+    _test_precision_recall_curve(y_true, y_score)
 
     # Use {-1, 1} for labels; make sure original labels aren't modified
     y_true[np.where(y_true == 0)] = -1
     y_true_copy = y_true.copy()
-    _test_precision_recall_curve(y_true, probas_pred)
+    _test_precision_recall_curve(y_true, y_score)
     assert_array_equal(y_true_copy, y_true)
 
     labels = [1, 0, 0, 1]
@@ -736,31 +751,24 @@ def test_precision_recall_curve():
     assert p.size == t.size + 1
 
 
-def _test_precision_recall_curve(y_true, probas_pred):
+def _test_precision_recall_curve(y_true, y_score):
     # Test Precision-Recall and aread under PR curve
-    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
-    precision_recall_auc = _average_precision_slow(y_true, probas_pred)
+    p, r, thresholds = precision_recall_curve(y_true, y_score)
+    precision_recall_auc = _average_precision_slow(y_true, y_score)
     assert_array_almost_equal(precision_recall_auc, 0.859, 3)
     assert_array_almost_equal(precision_recall_auc,
-                              average_precision_score(y_true, probas_pred))
+                              average_precision_score(y_true, y_score))
     # `_average_precision` is not very precise in case of 0.5 ties: be tolerant
-    assert_almost_equal(_average_precision(y_true, probas_pred),
+    assert_almost_equal(_average_precision(y_true, y_score),
                         precision_recall_auc, decimal=2)
     assert p.size == r.size
     assert p.size == thresholds.size + 1
     # Smoke test in the case of proba having only one value
-    p, r, thresholds = precision_recall_curve(y_true,
-                                              np.zeros_like(probas_pred))
+    p, r, thresholds = precision_recall_curve(y_true, np.zeros_like(y_score))
     assert p.size == r.size
     assert p.size == thresholds.size + 1
 
 
-def test_precision_recall_curve_errors():
-    # Contains non-binary labels
-    with pytest.raises(ValueError):
-        precision_recall_curve([0, 1, 2], [[0.0], [1.0], [1.0]])
-
-
 def test_precision_recall_curve_toydata():
     with np.errstate(all="raise"):
         # Binary classification
@@ -913,20 +921,20 @@ def test_score_scale_invariance():
     # This test was expanded (added scaled_down) in response to github
     # issue #3864 (and others), where overly aggressive rounding was causing
     # problems for users with very small y_score values
-    y_true, _, probas_pred = make_prediction(binary=True)
+    y_true, _, y_score = make_prediction(binary=True)
 
-    roc_auc = roc_auc_score(y_true, probas_pred)
-    roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred)
-    roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred)
-    roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
+    roc_auc = roc_auc_score(y_true, y_score)
+    roc_auc_scaled_up = roc_auc_score(y_true, 100 * y_score)
+    roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * y_score)
+    roc_auc_shifted = roc_auc_score(y_true, y_score - 10)
     assert roc_auc == roc_auc_scaled_up
     assert roc_auc == roc_auc_scaled_down
     assert roc_auc == roc_auc_shifted
 
-    pr_auc = average_precision_score(y_true, probas_pred)
-    pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred)
-    pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred)
-    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
+    pr_auc = average_precision_score(y_true, y_score)
+    pr_auc_scaled_up = average_precision_score(y_true, 100 * y_score)
+    pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * y_score)
+    pr_auc_shifted = average_precision_score(y_true, y_score - 10)
     assert pr_auc == pr_auc_scaled_up
     assert pr_auc == pr_auc_scaled_down
     assert pr_auc == pr_auc_shifted
@@ -954,8 +962,7 @@ def test_score_scale_invariance():
     ([1, 0, 1], [0.5, 0.75, 1], [1, 1, 0], [0, 0.5, 0.5]),
     ([1, 0, 1], [0.25, 0.5, 0.75], [1, 1, 0], [0, 0.5, 0.5]),
 ])
-def test_det_curve_toydata(y_true, y_score,
-                                                expected_fpr, expected_fnr):
+def test_det_curve_toydata(y_true, y_score, expected_fpr, expected_fnr):
     # Check on a batch of small examples.
     fpr, fnr, _ = det_curve(y_true, y_score)
 
 
@@ -1346,7 +1346,7 @@ def _check_sample_weight(sample_weight, X, dtype=None, copy=False):
     X : {ndarray, list, sparse matrix}
         Input data.
 
-    dtype: dtype, default=None
+    dtype : dtype, default=None
        dtype of the validated `sample_weight`.
        If None, and the input `sample_weight` is an array, the dtype of the
        input is preserved; otherwise an array with the default numpy dtype
@@ -1383,6 +1383,7 @@ def _check_sample_weight(sample_weight, X, dtype=None, copy=False):
         if sample_weight.shape != (n_samples,):
             raise ValueError("sample_weight.shape == {}, expected {}!"
                              .format(sample_weight.shape, (n_samples,)))
+
     return sample_weight