From 9dbfbc85ef43487e6e032f9b772ebd9ae8c8c161 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 15:07:06 -0700 Subject: [PATCH 01/18] added a function with confusion matrix derived metrics (fpr, tpr, tnr, fnr) --- sklearn/metrics/__init__.py | 1 + sklearn/metrics/_classification.py | 203 +++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 8bcb047ec8161..b9d7ec3bd0a9e 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -33,6 +33,7 @@ from ._classification import zero_one_loss from ._classification import brier_score_loss from ._classification import multilabel_confusion_matrix +from ._classification import tpr_fpr_tnr_fnr_scores from . import cluster from .cluster import adjusted_mutual_info_score diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 2ceccca65203e..d4b0f4c7347b6 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1504,8 +1504,211 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, return precision, recall, f_score, true_sum +<<<<<<< HEAD @_deprecate_positional_args def precision_score(y_true, y_pred, *, labels=None, pos_label=1, +======= +def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=None, + warn_for=('tpr', 'fpr', 'tnr', 'fnr'), sample_weight=None,zero_division="warn"): + """Compute TPR, FPR, TNR, FNR for each class + + The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of + true positives and ``fn`` the number of false negatives. + + The FPR is the ratio ``fp / (tn + fp)`` where ``tn`` is the number of + true negatives and ``fp`` the number of false positives. + + The TNR is the ratio ``tn / (tn + fp)`` where ``tn`` is the number of + true negatives and ``fp`` the number of false positives. + + The FNR is the ratio ``fn / (tp + fn)`` where ``tp`` is the number of + true positives and ``fn`` the number of false negatives. + + If ``pos_label is None`` and in binary classification, this function + returns the average precision, recall and F-measure if ``average`` + is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``. + + Parameters + ---------- + y_true : 1d array-like, or label indicator array / sparse matrix + Ground truth (correct) target values. + + y_pred : 1d array-like, or label indicator array / sparse matrix + Estimated targets as returned by a classifier. + + labels : list, optional + The set of labels to include when ``average != 'binary'``, and their + order if ``average is None``. Labels present in the data can be + excluded, for example to calculate a multiclass average ignoring a + majority negative class, while labels not present in the data will + result in 0 components in a macro average. For multilabel targets, + labels are column indices. By default, all labels in ``y_true`` and + ``y_pred`` are used in sorted order. + + pos_label : str or int, 1 by default + The class to report if ``average='binary'`` and the data is binary. + If the data are multiclass or multilabel, this will be ignored; + setting ``labels=[pos_label]`` and ``average != 'binary'`` will report + scores for that label only. + + average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ + 'weighted'] + If ``None``, the scores for each class are returned. Otherwise, this + determines the type of averaging performed on the data: + + ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + ``'weighted'``: + Calculate metrics for each label, and find their average weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + + warn_for : tuple or set, for internal use + This determines which warnings will be made in the case that this + function is being used to return only one of its metrics. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. + + zero_division : "warn", 0 or 1, default="warn" + Sets the value to return when there is a zero division: + - recall: when there are no positive labels + - precision: when there are no positive predictions + - f-score: both + + If set to "warn", this acts as 0, but warnings are also raised. + + Returns + ------- + tpr : float (if average is not None) or array of float, shape =\ + [n_unique_labels] + + fpr : float (if average is not None) or array of float, , shape =\ + [n_unique_labels] + + tnr : float (if average is not None) or array of float, shape =\ + [n_unique_labels] + + fnr : float (if average is not None) or array of float, shape =\ + [n_unique_labels] + The number of occurrences of each label in ``y_true``. + + References + ---------- + .. [1] `Wikipedia entry for confusion matrix + `_ + + .. [2] `Discriminative Methods for Multi-labeled Classification Advances + in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu + Godbole, Sunita Sarawagi + `_ + + Examples + -------- + >>> import numpy as np + >>> from sklearn.metrics import precision_recall_fscore_support + >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) + >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) + >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro') + (0.33333333333333331, 0.0, 0.0, 0.66666666666666663) + >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro') + (0.33333333333333331, 0.0, 0.0, 0.66666666666666663) + >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted') + (0.22..., 0.33..., 0.26..., None) + + It is possible to compute per-label fpr, fnr, tnr, tpr and + supports instead of averaging: + + >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None, + ... labels=['pig', 'dog', 'cat']) + (array([0, 0, 1]), array([0, 0, 0]), + array([0, 0, 0]), array([1, 1, 0])) + + Notes + ----- + When ``true positive + false negative == 0``, TPR, FNR are not undefined; + When ``true negative + false positive == 0``, FPR, TNR are not undefined. + In such cases, by default the metric will be set to 0, as will f-score, + and ``UndefinedMetricWarning`` will be raised. This behavior can be + modified with ``zero_division``. + """ + _check_zero_division(zero_division) + + labels = _check_set_wise_labels(y_true, y_pred, average, labels, + pos_label) + + # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum ### + samplewise = average == 'samples' + MCM = multilabel_confusion_matrix(y_true, y_pred, + sample_weight=sample_weight, + labels=labels, samplewise=samplewise) + tn_sum = MCM[:, 0, 0] + fp_sum = MCM[:, 0, 1] + fn_sum = MCM[:, 1, 0] + tp_sum = MCM[:, 1, 1] + pred_sum = tp_sum + MCM[:, 0, 1] + neg_sum = tn_sum+fp_sum + pos_sum = fn_sum+tp_sum + + + if average == 'micro': + fp_sum = np.array([fp_sum.sum()]) + tn_sum = np.array([tn_sum.sum()]) + fn_sum = np.array([fn_sum.sum()]) + neg_sum = np.array([neg_sum.sum()]) + pos_sum = np.array([pos_sum.sum()]) + + # Divide, and on zero-division, set scores and/or warn according to + # zero_division: + fpr = _prf_divide(fp_sum, neg_sum, 'fpr', + 'negatives', average, warn_for, zero_division) + tnr = _prf_divide(tn_sum, neg_sum, 'tnr', + 'negatives', average, warn_for, zero_division) + fnr = _prf_divide(fn_sum, pos_sum, 'fnr', + 'positives', average, warn_for, zero_division) + tpr = _prf_divide(tp_sum, pos_sum, 'tpr', + 'positives', average, warn_for, zero_division) + + # Average the results + if average == 'weighted': + weights = pos_sum + if weights.sum() == 0: + zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0 + # precision is zero_division if there are no positive predictions + # recall is zero_division if there are no positive labels + # fscore is zero_division if all labels AND predictions are + # negative + return (zero_division_value if pred_sum.sum() == 0 else 0, + zero_division_value, + zero_division_value if pred_sum.sum() == 0 else 0) + + elif average == 'samples': + weights = sample_weight + else: + weights = None + + if average is not None: + assert average != 'binary' or len(fpr) == 1 + fpr = np.average(fpr, weights=weights) + tnr = np.average(tnr, weights=weights) + fnr = np.average(fnr, weights=weights) + tpr = np.average(tpr, weights=weights) + return tpr, fpr, tnr, fnr + + +def precision_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None, zero_division="warn"): """Compute the precision From 64a5a7b2fd6069726ca54fe2c2472f450c7fcbe2 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 16:07:58 -0700 Subject: [PATCH 02/18] changed the true postive sum in the function --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index d4b0f4c7347b6..43a246c24f36e 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1662,14 +1662,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non neg_sum = tn_sum+fp_sum pos_sum = fn_sum+tp_sum - if average == 'micro': + tp_sum = np.array([tp_sum.sum()]) fp_sum = np.array([fp_sum.sum()]) tn_sum = np.array([tn_sum.sum()]) fn_sum = np.array([fn_sum.sum()]) neg_sum = np.array([neg_sum.sum()]) pos_sum = np.array([pos_sum.sum()]) - + pred_sum = np.array([pred_sum.sum()]) # Divide, and on zero-division, set scores and/or warn according to # zero_division: fpr = _prf_divide(fp_sum, neg_sum, 'fpr', From 523eaa094051bdd1ef69f72c693648cf79062813 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 16:30:02 -0700 Subject: [PATCH 03/18] add print --- sklearn/metrics/_classification.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 43a246c24f36e..a06d4d5b39ced 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1661,7 +1661,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non pred_sum = tp_sum + MCM[:, 0, 1] neg_sum = tn_sum+fp_sum pos_sum = fn_sum+tp_sum - + print('before micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum) if average == 'micro': tp_sum = np.array([tp_sum.sum()]) fp_sum = np.array([fp_sum.sum()]) @@ -1670,17 +1670,19 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non neg_sum = np.array([neg_sum.sum()]) pos_sum = np.array([pos_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) + print('after micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum) # Divide, and on zero-division, set scores and/or warn according to # zero_division: + print('before divide',tpr,fpr,tnr,fnr) + tpr = _prf_divide(tp_sum, pos_sum, 'tpr', + 'positives', average, warn_for, zero_division) fpr = _prf_divide(fp_sum, neg_sum, 'fpr', 'negatives', average, warn_for, zero_division) tnr = _prf_divide(tn_sum, neg_sum, 'tnr', 'negatives', average, warn_for, zero_division) fnr = _prf_divide(fn_sum, pos_sum, 'fnr', 'positives', average, warn_for, zero_division) - tpr = _prf_divide(tp_sum, pos_sum, 'tpr', - 'positives', average, warn_for, zero_division) - + print('after divide',tpr,fpr,tnr,fnr) # Average the results if average == 'weighted': weights = pos_sum @@ -1698,13 +1700,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non weights = sample_weight else: weights = None - + print('before avg', tpr, fpr, tnr, fnr,weights) if average is not None: assert average != 'binary' or len(fpr) == 1 fpr = np.average(fpr, weights=weights) tnr = np.average(tnr, weights=weights) fnr = np.average(fnr, weights=weights) tpr = np.average(tpr, weights=weights) + print('after avg', tpr, fpr, tnr, fnr) return tpr, fpr, tnr, fnr From b9772166aeb75430e20ae2fe17b293acc503d365 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 16:53:37 -0700 Subject: [PATCH 04/18] remove one print --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index a06d4d5b39ced..62de8cbe91597 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1673,7 +1673,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non print('after micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum) # Divide, and on zero-division, set scores and/or warn according to # zero_division: - print('before divide',tpr,fpr,tnr,fnr) + #print('before divide',tpr,fpr,tnr,fnr) tpr = _prf_divide(tp_sum, pos_sum, 'tpr', 'positives', average, warn_for, zero_division) fpr = _prf_divide(fp_sum, neg_sum, 'fpr', From 5a061ef1f2eb95ff5180852cffd2f71389146c2d Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 17:20:58 -0700 Subject: [PATCH 05/18] remove print statements --- sklearn/metrics/_classification.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 62de8cbe91597..48f4ae38d6ac0 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1661,7 +1661,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non pred_sum = tp_sum + MCM[:, 0, 1] neg_sum = tn_sum+fp_sum pos_sum = fn_sum+tp_sum - print('before micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum) if average == 'micro': tp_sum = np.array([tp_sum.sum()]) fp_sum = np.array([fp_sum.sum()]) @@ -1670,10 +1669,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non neg_sum = np.array([neg_sum.sum()]) pos_sum = np.array([pos_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) - print('after micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum) # Divide, and on zero-division, set scores and/or warn according to # zero_division: - #print('before divide',tpr,fpr,tnr,fnr) tpr = _prf_divide(tp_sum, pos_sum, 'tpr', 'positives', average, warn_for, zero_division) fpr = _prf_divide(fp_sum, neg_sum, 'fpr', @@ -1682,7 +1679,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non 'negatives', average, warn_for, zero_division) fnr = _prf_divide(fn_sum, pos_sum, 'fnr', 'positives', average, warn_for, zero_division) - print('after divide',tpr,fpr,tnr,fnr) # Average the results if average == 'weighted': weights = pos_sum @@ -1700,14 +1696,12 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non weights = sample_weight else: weights = None - print('before avg', tpr, fpr, tnr, fnr,weights) if average is not None: assert average != 'binary' or len(fpr) == 1 fpr = np.average(fpr, weights=weights) tnr = np.average(tnr, weights=weights) fnr = np.average(fnr, weights=weights) tpr = np.average(tpr, weights=weights) - print('after avg', tpr, fpr, tnr, fnr) return tpr, fpr, tnr, fnr From 64939770f6e61e39580af48e068776a3b85c6e86 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 17:24:07 -0700 Subject: [PATCH 06/18] add coauthors. Co-authored-by: samskruthi padigepati Co-authored-by: Divya Dhar --- sklearn/metrics/_classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 48f4ae38d6ac0..b93fa3157e54d 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1661,6 +1661,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non pred_sum = tp_sum + MCM[:, 0, 1] neg_sum = tn_sum+fp_sum pos_sum = fn_sum+tp_sum + if average == 'micro': tp_sum = np.array([tp_sum.sum()]) fp_sum = np.array([fp_sum.sum()]) @@ -1669,6 +1670,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non neg_sum = np.array([neg_sum.sum()]) pos_sum = np.array([pos_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) + # Divide, and on zero-division, set scores and/or warn according to # zero_division: tpr = _prf_divide(tp_sum, pos_sum, 'tpr', @@ -2656,4 +2658,4 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): else: pos_label = y_true.max() y_true = np.array(y_true == pos_label, int) - return np.average((y_true - y_prob) ** 2, weights=sample_weight) + return np.average((y_true - y_prob) ** 2, weights=sample_weight) \ No newline at end of file From 141fa4ae28dc815586ba9336c893bfa7fd813615 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 17:43:29 -0700 Subject: [PATCH 07/18] fix doc string outputs Co-authored-by: samskruthi padigepati Co-authored-by: Divya Dhar --- sklearn/metrics/_classification.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index b93fa3157e54d..8efbd884694f8 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1622,19 +1622,18 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro') - (0.33333333333333331, 0.0, 0.0, 0.66666666666666663) + (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro') - (0.33333333333333331, 0.0, 0.0, 0.66666666666666663) + (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted') - (0.22..., 0.33..., 0.26..., None) + (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666) It is possible to compute per-label fpr, fnr, tnr, tpr and supports instead of averaging: >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None, ... labels=['pig', 'dog', 'cat']) - (array([0, 0, 1]), array([0, 0, 0]), - array([0, 0, 0]), array([1, 1, 0])) + (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]), array([0.75, 0.5 , 0.75]), array([1., 1., 0.])) Notes ----- From 9615ae881f790a2fd79cf9790d99e96d4ed830aa Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sat, 2 Nov 2019 17:53:36 -0700 Subject: [PATCH 08/18] pep8 test Co-authored-by: Divya Dhar Co-authored-by: samskruthi padigepati --- sklearn/metrics/_classification.py | 46 ++++++++++++++++-------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 8efbd884694f8..802d0b78630bb 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1504,12 +1504,10 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, return precision, recall, f_score, true_sum -<<<<<<< HEAD -@_deprecate_positional_args -def precision_score(y_true, y_pred, *, labels=None, pos_label=1, -======= -def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=None, - warn_for=('tpr', 'fpr', 'tnr', 'fnr'), sample_weight=None,zero_division="warn"): +def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, + average=None, warn_for=('tpr', 'fpr', + 'tnr', 'fnr'), + sample_weight=None, zero_division="warn"): """Compute TPR, FPR, TNR, FNR for each class The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of @@ -1622,18 +1620,22 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro') - (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666) + (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, + 0.6666666666666666) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro') - (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666) + (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, + 0.6666666666666666) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted') - (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666) + (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, + 0.6666666666666666) It is possible to compute per-label fpr, fnr, tnr, tpr and supports instead of averaging: >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None, ... labels=['pig', 'dog', 'cat']) - (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]), array([0.75, 0.5 , 0.75]), array([1., 1., 0.])) + (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]), + array([0.75, 0.5 , 0.75]), array([1., 1., 0.])) Notes ----- @@ -1645,8 +1647,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non """ _check_zero_division(zero_division) - labels = _check_set_wise_labels(y_true, y_pred, average, labels, - pos_label) + labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum ### samplewise = average == 'samples' @@ -1672,14 +1673,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non # Divide, and on zero-division, set scores and/or warn according to # zero_division: - tpr = _prf_divide(tp_sum, pos_sum, 'tpr', - 'positives', average, warn_for, zero_division) - fpr = _prf_divide(fp_sum, neg_sum, 'fpr', - 'negatives', average, warn_for, zero_division) - tnr = _prf_divide(tn_sum, neg_sum, 'tnr', - 'negatives', average, warn_for, zero_division) - fnr = _prf_divide(fn_sum, pos_sum, 'fnr', - 'positives', average, warn_for, zero_division) + tpr = _prf_divide(tp_sum, pos_sum, 'tpr', 'positives', + average, warn_for, zero_division) + fpr = _prf_divide(fp_sum, neg_sum, 'fpr', 'negatives', + average, warn_for, zero_division) + tnr = _prf_divide(tn_sum, neg_sum, 'tnr', 'negatives', + average, warn_for, zero_division) + fnr = _prf_divide(fn_sum, pos_sum, 'fnr', 'positives', + average, warn_for, zero_division) # Average the results if average == 'weighted': weights = pos_sum @@ -2338,7 +2339,8 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None, y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. - y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) + y_pred : array-like of float, shape = (n_samples, n_classes) + or (n_samples,) Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the @@ -2657,4 +2659,4 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): else: pos_label = y_true.max() y_true = np.array(y_true == pos_label, int) - return np.average((y_true - y_prob) ** 2, weights=sample_weight) \ No newline at end of file + return np.average((y_true - y_prob) ** 2, weights=sample_weight) From 79e156215a02fa8fe91d412be602dccbaf64a089 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sun, 3 Nov 2019 01:27:19 -0700 Subject: [PATCH 09/18] trivial Co-authored-by: samskruthi padigepati --- sklearn/metrics/_classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 802d0b78630bb..2433145c6d040 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1654,6 +1654,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, MCM = multilabel_confusion_matrix(y_true, y_pred, sample_weight=sample_weight, labels=labels, samplewise=samplewise) + tn_sum = MCM[:, 0, 0] fp_sum = MCM[:, 0, 1] fn_sum = MCM[:, 1, 0] From 8f21052163957fa06b188ea20f2f5c1a90ebb5ea Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sun, 3 Nov 2019 03:39:37 -0800 Subject: [PATCH 10/18] remove imported but unused flake8 --- sklearn/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index b9d7ec3bd0a9e..08e0c7844973e 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -153,6 +153,7 @@ 'SCORERS', 'silhouette_samples', 'silhouette_score', + 'tpr_fpr_tnr_fnr_scores', 'v_measure_score', 'zero_one_loss', 'brier_score_loss', From 3ffd83095b67be2df3a7657b98e7692e77603543 Mon Sep 17 00:00:00 2001 From: samskruthi reddy padigepati Date: Sun, 3 Nov 2019 13:28:18 -0800 Subject: [PATCH 11/18] to trigger test --- sklearn/metrics/_classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 2433145c6d040..802d0b78630bb 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1654,7 +1654,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, MCM = multilabel_confusion_matrix(y_true, y_pred, sample_weight=sample_weight, labels=labels, samplewise=samplewise) - tn_sum = MCM[:, 0, 0] fp_sum = MCM[:, 0, 1] fn_sum = MCM[:, 1, 0] From fb73c6e86cd6afc7e39653ce5778259d180601a8 Mon Sep 17 00:00:00 2001 From: haochunchang Date: Mon, 18 May 2020 22:07:46 +0800 Subject: [PATCH 12/18] Take over PR #15522 Modify doc and add deprecation to position arg. --- sklearn/metrics/_classification.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 802d0b78630bb..dd95b04fb6d2e 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1504,7 +1504,8 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, return precision, recall, f_score, true_sum -def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, +@_deprecate_positional_args +def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, average=None, warn_for=('tpr', 'fpr', 'tnr', 'fnr'), sample_weight=None, zero_division="warn"): @@ -1523,8 +1524,9 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, true positives and ``fn`` the number of false negatives. If ``pos_label is None`` and in binary classification, this function - returns the average precision, recall and F-measure if ``average`` - is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``. + returns the true positive rate, false positive rate, true negative rate + and false negative rate if ``average`` is one of ``'micro'``, ``'macro'``, + ``'weighted'`` or ``'samples'``. Parameters ---------- @@ -1566,8 +1568,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This - alters 'macro' to account for label imbalance; it can result in an - F-score that is not between precision and recall. + alters 'macro' to account for label imbalance. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from @@ -1593,7 +1594,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, tpr : float (if average is not None) or array of float, shape =\ [n_unique_labels] - fpr : float (if average is not None) or array of float, , shape =\ + fpr : float (if average is not None) or array of float, shape =\ [n_unique_labels] tnr : float (if average is not None) or array of float, shape =\ @@ -1616,7 +1617,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, Examples -------- >>> import numpy as np - >>> from sklearn.metrics import precision_recall_fscore_support >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro') @@ -1707,7 +1707,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, return tpr, fpr, tnr, fnr -def precision_score(y_true, y_pred, labels=None, pos_label=1, +@_deprecate_positional_args +def precision_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', sample_weight=None, zero_division="warn"): """Compute the precision @@ -2339,8 +2340,7 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None, y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. - y_pred : array-like of float, shape = (n_samples, n_classes) - or (n_samples,) + y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the From c780053a4809f961c40350317ab56a97cb9df377 Mon Sep 17 00:00:00 2001 From: haochunchang Date: Mon, 18 May 2020 23:31:41 +0800 Subject: [PATCH 13/18] Modify doc and zero-division in the weighted average. Add test for binary classification. (Modify some lines to pass flake8) --- sklearn/metrics/_classification.py | 31 ++++++++++---------- sklearn/metrics/tests/test_classification.py | 31 +++++++++++++++++--- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index dd95b04fb6d2e..62669dbd5970d 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1509,7 +1509,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, average=None, warn_for=('tpr', 'fpr', 'tnr', 'fnr'), sample_weight=None, zero_division="warn"): - """Compute TPR, FPR, TNR, FNR for each class + """Compute True Positive Rate (TPR), False Positive Rate (FPR),\ + True Negative Rate (TNR), False Negative Rate (FNR) for each class The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of true positives and ``fn`` the number of false negatives. @@ -1583,9 +1584,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, zero_division : "warn", 0 or 1, default="warn" Sets the value to return when there is a zero division: - - recall: when there are no positive labels - - precision: when there are no positive predictions - - f-score: both + - tpr, fnr: when there are no positive labels + - fpr, tnr: when there are no negative labels If set to "warn", this acts as 0, but warnings are also raised. @@ -1649,7 +1649,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) - # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum ### + # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum samplewise = average == 'samples' MCM = multilabel_confusion_matrix(y_true, y_pred, sample_weight=sample_weight, @@ -1659,8 +1659,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, fn_sum = MCM[:, 1, 0] tp_sum = MCM[:, 1, 1] pred_sum = tp_sum + MCM[:, 0, 1] - neg_sum = tn_sum+fp_sum - pos_sum = fn_sum+tp_sum + neg_sum = tn_sum + fp_sum + pos_sum = fn_sum + tp_sum if average == 'micro': tp_sum = np.array([tp_sum.sum()]) @@ -1686,18 +1686,18 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, weights = pos_sum if weights.sum() == 0: zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0 - # precision is zero_division if there are no positive predictions - # recall is zero_division if there are no positive labels - # fscore is zero_division if all labels AND predictions are - # negative - return (zero_division_value if pred_sum.sum() == 0 else 0, - zero_division_value, - zero_division_value if pred_sum.sum() == 0 else 0) + # TPR and FNR is zero_division if there are no positive labels + # FPR and TNR is zero_division if there are no negative labels + return (zero_division_value if pos_sum == 0 else 0, + zero_division_value if neg_sum == 0 else 0, + zero_division_value if neg_sum == 0 else 0, + zero_division_value if pos_sum == 0 else 0) elif average == 'samples': weights = sample_weight else: weights = None + if average is not None: assert average != 'binary' or len(fpr) == 1 fpr = np.average(fpr, weights=weights) @@ -2340,7 +2340,8 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None, y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. - y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) + y_pred : array-like of float, shape = (n_samples, n_classes) \ + or (n_samples,) Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 1f959d95ce844..d118e72dd0cad 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -42,6 +42,7 @@ from sklearn.metrics import precision_recall_fscore_support from sklearn.metrics import precision_score from sklearn.metrics import recall_score +from sklearn.metrics import tpr_fpr_tnr_fnr_scores from sklearn.metrics import zero_one_loss from sklearn.metrics import brier_score_loss from sklearn.metrics import multilabel_confusion_matrix @@ -213,6 +214,26 @@ def test_precision_recall_f1_score_binary(): (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2) +def test_tpr_fpr_tnr_fnr_scores_binary(): + # Test TPR, FPR, TNR, FNR Score for binary classification task + y_true, y_pred, _ = make_prediction(binary=True) + + # detailed measures for each class + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None) + assert_array_almost_equal(tpr, [0.88, 0.68], 2) + assert_array_almost_equal(fpr, [0.32, 0.12], 2) + assert_array_almost_equal(tnr, [0.68, 0.88], 2) + assert_array_almost_equal(fnr, [0.12, 0.32], 2) + + tn, fp, fn, tp = assert_no_warnings( + confusion_matrix, y_true, y_pred + ).ravel() + assert_array_almost_equal(tp / (tp + fn), 0.68, 2) + assert_array_almost_equal(fp / (tn + fp), 0.12, 2) + assert_array_almost_equal(tn / (tn + fp), 0.88, 2) + assert_array_almost_equal(fn / (tp + fn), 0.32, 2) + + @ignore_warnings def test_precision_recall_f_binary_single_class(): # Test precision, recall and F-scores behave with a single positive or @@ -2064,8 +2085,9 @@ def test_hinge_loss_multiclass(): ]) np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) - assert (hinge_loss(y_true, pred_decision) == - dummy_hinge_loss) + assert ( + hinge_loss(y_true, pred_decision) == dummy_hinge_loss + ) def test_hinge_loss_multiclass_missing_labels_with_labels_none(): @@ -2101,8 +2123,9 @@ def test_hinge_loss_multiclass_with_missing_labels(): ]) np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) - assert (hinge_loss(y_true, pred_decision, labels=labels) == - dummy_hinge_loss) + assert ( + hinge_loss(y_true, pred_decision, labels=labels) == dummy_hinge_loss + ) def test_hinge_loss_multiclass_invariance_lists(): From 408c2dbf67032cee899a565ed66c96bf9ffb923b Mon Sep 17 00:00:00 2001 From: haochunchang Date: Tue, 19 May 2020 22:39:32 +0800 Subject: [PATCH 14/18] Add tests for binary, multiclass and empty prediction. --- sklearn/metrics/tests/test_classification.py | 159 ++++++++++++++++--- 1 file changed, 139 insertions(+), 20 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index d118e72dd0cad..4b0dfd9ff6ec1 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -214,26 +214,6 @@ def test_precision_recall_f1_score_binary(): (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2) -def test_tpr_fpr_tnr_fnr_scores_binary(): - # Test TPR, FPR, TNR, FNR Score for binary classification task - y_true, y_pred, _ = make_prediction(binary=True) - - # detailed measures for each class - tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None) - assert_array_almost_equal(tpr, [0.88, 0.68], 2) - assert_array_almost_equal(fpr, [0.32, 0.12], 2) - assert_array_almost_equal(tnr, [0.68, 0.88], 2) - assert_array_almost_equal(fnr, [0.12, 0.32], 2) - - tn, fp, fn, tp = assert_no_warnings( - confusion_matrix, y_true, y_pred - ).ravel() - assert_array_almost_equal(tp / (tp + fn), 0.68, 2) - assert_array_almost_equal(fp / (tn + fp), 0.12, 2) - assert_array_almost_equal(tn / (tn + fp), 0.88, 2) - assert_array_almost_equal(fn / (tp + fn), 0.32, 2) - - @ignore_warnings def test_precision_recall_f_binary_single_class(): # Test precision, recall and F-scores behave with a single positive or @@ -328,6 +308,145 @@ def test_precision_recall_f_ignored_labels(): recall_all(average=average)) +def test_tpr_fpr_tnr_fnr_scores_binary_averaged(): + # Test TPR, FPR, TNR, FNR Score for binary classification task + y_true, y_pred, _ = make_prediction(binary=True) + + # compute scores with default labels introspection + tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average=None + ) + assert_array_almost_equal(tprs, [0.88, 0.68], 2) + assert_array_almost_equal(fprs, [0.32, 0.12], 2) + assert_array_almost_equal(tnrs, [0.68, 0.88], 2) + assert_array_almost_equal(fnrs, [0.12, 0.32], 2) + + tn, fp, fn, tp = assert_no_warnings( + confusion_matrix, y_true, y_pred + ).ravel() + assert_array_almost_equal(tp / (tp + fn), 0.68, 2) + assert_array_almost_equal(fp / (tn + fp), 0.12, 2) + assert_array_almost_equal(tn / (tn + fp), 0.88, 2) + assert_array_almost_equal(fn / (tp + fn), 0.32, 2) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average='macro' + ) + assert tpr == np.mean(tprs) + assert fpr == np.mean(fprs) + assert tnr == np.mean(tnrs) + assert fnr == np.mean(fnrs) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average='weighted' + ) + support = np.bincount(y_true) + assert tpr == np.average(tprs, weights=support) + assert fpr == np.average(fprs, weights=support) + assert tnr == np.average(tnrs, weights=support) + assert fnr == np.average(fnrs, weights=support) + + +def test_tpr_fpr_tnr_fnr_scores_multiclass(): + # Test TPR, FPR, TNR, FNR Score for multiclass classification task + y_true, y_pred, _ = make_prediction(binary=False) + + # compute scores with default labels introspection + tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average=None + ) + assert_array_almost_equal(tprs, [0.79, 0.1, 0.9], 2) + assert_array_almost_equal(fprs, [0.08, 0.14, 0.45], 2) + assert_array_almost_equal(tnrs, [0.92, 0.86, 0.55], 2) + assert_array_almost_equal(fnrs, [0.21, 0.9, 0.1], 2) + + # averaging tests + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average='micro' + ) + assert_array_almost_equal(tpr, 0.53, 2) + assert_array_almost_equal(fpr, 0.23, 2) + assert_array_almost_equal(tnr, 0.77, 2) + assert_array_almost_equal(fnr, 0.47, 2) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average='macro' + ) + assert_array_almost_equal(tpr, 0.6, 2) + assert_array_almost_equal(fpr, 0.22, 2) + assert_array_almost_equal(tnr, 0.78, 2) + assert_array_almost_equal(fnr, 0.4, 2) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, average='weighted' + ) + assert_array_almost_equal(tpr, 0.53, 2) + assert_array_almost_equal(fpr, 0.2, 2) + assert_array_almost_equal(tnr, 0.8, 2) + assert_array_almost_equal(fnr, 0.47, 2) + + with pytest.raises(ValueError): + tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="samples") + + # same prediction but with and explicit label ordering + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores( + y_true, y_pred, labels=[0, 2, 1], average=None + ) + assert_array_almost_equal(tpr, [0.79, 0.9, 0.1], 2) + assert_array_almost_equal(fpr, [0.08, 0.45, 0.14], 2) + assert_array_almost_equal(tnr, [0.92, 0.55, 0.86], 2) + assert_array_almost_equal(fnr, [0.21, 0.1, 0.9], 2) + + +@ignore_warnings +@pytest.mark.parametrize('zero_division', ["warn", 0, 1]) +def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division): + y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]]) + y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]]) + + zero_division = 1.0 if zero_division == 1.0 else 0.0 + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, + average=None, + zero_division=zero_division) + assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division], 2) + assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2) + assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2) + assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division], 2) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, + average="macro", + zero_division=zero_division) + assert_almost_equal(tpr, 0.625 if zero_division else 0.375) + assert_almost_equal(fpr, 1 / 3.0 / 4.0) + assert_almost_equal(tnr, 0.91666, 5) + assert_almost_equal(fnr, 0.625 if zero_division else 0.375) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, + average="micro", + zero_division=zero_division) + assert_almost_equal(tpr, 0.5) + assert_almost_equal(fpr, 0.125) + assert_almost_equal(tnr, 0.875) + assert_almost_equal(fnr, 0.5) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, + average="weighted", + zero_division=zero_division) + assert_almost_equal(tpr, 0.5) + assert_almost_equal(fpr, 0) + assert_almost_equal(tnr, 1.0) + assert_almost_equal(fnr, 0.5) + + tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, + average="samples", + sample_weight=[1, 1, 2], + zero_division=zero_division) + assert_almost_equal(tpr, 0.5) + assert_almost_equal(fpr, 0.08333, 5) + assert_almost_equal(tnr, 0.91666, 5) + assert_almost_equal(fnr, 0.5) + + def test_average_precision_score_score_non_binary_class(): # Test that average_precision_score function returns an error when trying # to compute average_precision_score for multiclass task. From 4adfe2e7b43215a54cbb682c31bd3cadb8d559a8 Mon Sep 17 00:00:00 2001 From: haochunchang Date: Wed, 20 May 2020 09:06:12 +0800 Subject: [PATCH 15/18] Add tpr_fpr_tnr_fnr_scores to test_common.py. --- sklearn/metrics/_classification.py | 8 ++++---- sklearn/metrics/tests/test_common.py | 29 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 62669dbd5970d..0ba8596a37b81 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1688,10 +1688,10 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0 # TPR and FNR is zero_division if there are no positive labels # FPR and TNR is zero_division if there are no negative labels - return (zero_division_value if pos_sum == 0 else 0, - zero_division_value if neg_sum == 0 else 0, - zero_division_value if neg_sum == 0 else 0, - zero_division_value if pos_sum == 0 else 0) + return (zero_division_value if pos_sum.sum() == 0 else 0, + zero_division_value if neg_sum.sum() == 0 else 0, + zero_division_value if neg_sum.sum() == 0 else 0, + zero_division_value if pos_sum.sum() == 0 else 0) elif average == 'samples': weights = sample_weight diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 7301d21a35f39..c54a984c5556b 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -53,6 +53,7 @@ from sklearn.metrics import recall_score from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_curve +from sklearn.metrics import tpr_fpr_tnr_fnr_scores from sklearn.metrics import zero_one_loss from sklearn.metrics import ndcg_score from sklearn.metrics import dcg_score @@ -140,6 +141,9 @@ "f2_score": partial(fbeta_score, beta=2), "f0.5_score": partial(fbeta_score, beta=0.5), "matthews_corrcoef_score": matthews_corrcoef, + "tpr_fpr_tnr_fnr_scores": tpr_fpr_tnr_fnr_scores, + "binary_tpr_fpr_tnr_fnr_scores": + partial(tpr_fpr_tnr_fnr_scores, average="binary"), "weighted_f0.5_score": partial(fbeta_score, average="weighted", beta=0.5), "weighted_f1_score": partial(f1_score, average="weighted"), @@ -147,6 +151,8 @@ "weighted_precision_score": partial(precision_score, average="weighted"), "weighted_recall_score": partial(recall_score, average="weighted"), "weighted_jaccard_score": partial(jaccard_score, average="weighted"), + "weighted_tpr_fpr_tnr_fnr_scores": + partial(tpr_fpr_tnr_fnr_scores, average="weighted"), "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5), "micro_f1_score": partial(f1_score, average="micro"), @@ -154,6 +160,8 @@ "micro_precision_score": partial(precision_score, average="micro"), "micro_recall_score": partial(recall_score, average="micro"), "micro_jaccard_score": partial(jaccard_score, average="micro"), + "micro_tpr_fpr_tnr_fnr_scores": + partial(tpr_fpr_tnr_fnr_scores, average="micro"), "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5), "macro_f1_score": partial(f1_score, average="macro"), @@ -161,6 +169,8 @@ "macro_precision_score": partial(precision_score, average="macro"), "macro_recall_score": partial(recall_score, average="macro"), "macro_jaccard_score": partial(jaccard_score, average="macro"), + "macro_tpr_fpr_tnr_fnr_scores": + partial(tpr_fpr_tnr_fnr_scores, average="macro"), "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5), "samples_f1_score": partial(f1_score, average="samples"), @@ -168,6 +178,8 @@ "samples_precision_score": partial(precision_score, average="samples"), "samples_recall_score": partial(recall_score, average="samples"), "samples_jaccard_score": partial(jaccard_score, average="samples"), + "samples_tpr_fpr_tnr_fnr_scores": + partial(tpr_fpr_tnr_fnr_scores, average="samples"), "cohen_kappa_score": cohen_kappa_score, } @@ -264,6 +276,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "samples_precision_score", "samples_recall_score", "samples_jaccard_score", + "samples_tpr_fpr_tnr_fnr_scores", "coverage_error", "unnormalized_multilabel_confusion_matrix_sample", "label_ranking_loss", @@ -282,6 +295,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "roc_auc_score", "weighted_roc_auc", + "tpr_fpr_tnr_fnr_scores", "average_precision_score", "weighted_average_precision_score", "micro_average_precision_score", @@ -295,6 +309,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "f1_score", "f2_score", "f0.5_score", + "binary_tpr_fpr_tnr_fnr_scores", # curves "roc_curve", @@ -326,6 +341,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", "jaccard_score", + "tpr_fpr_tnr_fnr_scores", "average_precision_score", "weighted_average_precision_score", "micro_average_precision_score", @@ -354,17 +370,21 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", "jaccard_score", + "tpr_fpr_tnr_fnr_scores", "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", "weighted_precision_score", "weighted_recall_score", "weighted_jaccard_score", + "weighted_tpr_fpr_tnr_fnr_scores", "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", "micro_jaccard_score", + "micro_tpr_fpr_tnr_fnr_scores", "macro_f0.5_score", "macro_f1_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", "macro_jaccard_score", + "macro_tpr_fpr_tnr_fnr_scores", "unnormalized_multilabel_confusion_matrix", "unnormalized_multilabel_confusion_matrix_sample", @@ -406,20 +426,24 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", "weighted_precision_score", "weighted_recall_score", "weighted_jaccard_score", + "weighted_tpr_fpr_tnr_fnr_scores", "macro_f0.5_score", "macro_f1_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", "macro_jaccard_score", + "macro_tpr_fpr_tnr_fnr_scores", "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", "micro_jaccard_score", + "micro_tpr_fpr_tnr_fnr_scores", "unnormalized_multilabel_confusion_matrix", "samples_f0.5_score", "samples_f1_score", "samples_f2_score", "samples_precision_score", "samples_recall_score", "samples_jaccard_score", + "samples_tpr_fpr_tnr_fnr_scores", } # Regression metrics with "multioutput-continuous" format support @@ -444,6 +468,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): # P = R = F = accuracy in multiclass case "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", + "micro_tpr_fpr_tnr_fnr_scores", "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error", "median_absolute_error", "max_error", @@ -465,6 +490,10 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "precision_score", "recall_score", "f2_score", "f0.5_score", + "tpr_fpr_tnr_fnr_scores", + "weighted_tpr_fpr_tnr_fnr_scores", + "macro_tpr_fpr_tnr_fnr_scores", + "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", "weighted_precision_score", "weighted_jaccard_score", "unnormalized_multilabel_confusion_matrix", From 53d6fd2f5c9e9c81a8cc7dd4639be87d4c00ec01 Mon Sep 17 00:00:00 2001 From: haochunchang Date: Wed, 20 May 2020 10:32:52 +0800 Subject: [PATCH 16/18] Remove pred_sum variable --- sklearn/metrics/_classification.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 0ba8596a37b81..8514e932263be 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1649,7 +1649,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) - # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum + # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pos_sum, neg_sum samplewise = average == 'samples' MCM = multilabel_confusion_matrix(y_true, y_pred, sample_weight=sample_weight, @@ -1658,7 +1658,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, fp_sum = MCM[:, 0, 1] fn_sum = MCM[:, 1, 0] tp_sum = MCM[:, 1, 1] - pred_sum = tp_sum + MCM[:, 0, 1] neg_sum = tn_sum + fp_sum pos_sum = fn_sum + tp_sum @@ -1669,7 +1668,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, fn_sum = np.array([fn_sum.sum()]) neg_sum = np.array([neg_sum.sum()]) pos_sum = np.array([pos_sum.sum()]) - pred_sum = np.array([pred_sum.sum()]) # Divide, and on zero-division, set scores and/or warn according to # zero_division: From a5b526234f2842a953a58537e0162b70a7ca6717 Mon Sep 17 00:00:00 2001 From: Hao Chun Chang Date: Tue, 29 Sep 2020 22:16:17 +0800 Subject: [PATCH 17/18] Fix linting --- sklearn/metrics/tests/test_classification.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index a312268ff3f6b..38619cfde94d6 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2258,14 +2258,9 @@ def test_hinge_loss_multiclass(): ]) np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) -<<<<<<< HEAD assert ( hinge_loss(y_true, pred_decision) == dummy_hinge_loss ) -======= - assert (hinge_loss(y_true, pred_decision) == - dummy_hinge_loss) ->>>>>>> 13bccedeb02fa650a247a8ab6420bf9d44df3424 def test_hinge_loss_multiclass_missing_labels_with_labels_none(): From f74fc1066358e0ca83ba6576c16ee21cad65bba7 Mon Sep 17 00:00:00 2001 From: Hao Chun Chang Date: Mon, 5 Oct 2020 22:35:12 +0800 Subject: [PATCH 18/18] Fix parameter documentation --- sklearn/metrics/_classification.py | 36 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 843a4f2e02b80..62eb4b42c189f 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1565,13 +1565,15 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, Parameters ---------- - y_true : 1d array-like, or label indicator array / sparse matrix + y_true : {array-like, label indicator array, sparse matrix} \ + of shape (n_samples,) Ground truth (correct) target values. - y_pred : 1d array-like, or label indicator array / sparse matrix + y_pred : {array-like, label indicator array, sparse matrix} \ + of shape (n_samples,) Estimated targets as returned by a classifier. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -1580,14 +1582,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ - 'weighted'] + average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'}, \ + default=None If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -1616,7 +1618,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, sample_weight : array-like of shape (n_samples,), default=None Sample weights. - zero_division : "warn", 0 or 1, default="warn" + zero_division : str or int, {'warn', 0, 1}, default="warn" Sets the value to return when there is a zero division: - tpr, fnr: when there are no positive labels - fpr, tnr: when there are no negative labels @@ -1625,17 +1627,17 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, Returns ------- - tpr : float (if average is not None) or array of float, shape =\ - [n_unique_labels] + tpr : float (if average is not None), \ + or ndarray of shape (n_unique_labels,) - fpr : float (if average is not None) or array of float, shape =\ - [n_unique_labels] + fpr : float (if average is not None), \ + or ndarray of shape (n_unique_labels,) - tnr : float (if average is not None) or array of float, shape =\ - [n_unique_labels] + tnr : float (if average is not None), \ + or ndarray of shape (n_unique_labels,) - fnr : float (if average is not None) or array of float, shape =\ - [n_unique_labels] + fnr : float (if average is not None), \ + or ndarray of shape (n_unique_labels,) The number of occurrences of each label in ``y_true``. References @@ -1673,8 +1675,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1, Notes ----- - When ``true positive + false negative == 0``, TPR, FNR are not undefined; - When ``true negative + false positive == 0``, FPR, TNR are not undefined. + When ``true positive + false negative == 0``, TPR, FNR are undefined; + When ``true negative + false positive == 0``, FPR, TNR are undefined. In such cases, by default the metric will be set to 0, as will f-score, and ``UndefinedMetricWarning`` will be raised. This behavior can be modified with ``zero_division``.