diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index a566d03ae1bbc..e5f2ad9524c9d 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -214,6 +214,16 @@ Changelog :class:`calibration.CalibratedClassifierCV can now properly be used on prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre ` +:mod:`sklearn.metrics` +............................ + +- |Enhancement| :func:`metrics.confusion_matrix` now can return a confusion + matrix with labels in form of dict as an option, :pr:`19190` + by :user:`Shubham Shinde ` and + :user:`Max Kinner ` and + :user:`Varun John ` and + :user:`Vinayak Parab ` + Code and Documentation Contributors ----------------------------------- diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 708bde662e765..ac7882bd81086 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -212,7 +212,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None): @_deprecate_positional_args def confusion_matrix(y_true, y_pred, *, labels=None, sample_weight=None, - normalize=None): + normalize=None, as_dict=False): """Compute confusion matrix to evaluate the accuracy of a classification. By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}` @@ -249,14 +249,26 @@ def confusion_matrix(y_true, y_pred, *, labels=None, sample_weight=None, conditions or all the population. If None, confusion matrix will not be normalized. + as_dict : bool, default=False + Returns a confusion matrix in dict representation with labels as keys + ('true', 'pred'), it can be easily converted into a unstacked series. + Refer Examples. + Returns ------- - C : ndarray of shape (n_classes, n_classes) - Confusion matrix whose i-th row and j-th + C : ndarray of shape (n_classes, n_classes) OR dict with + length (n_classes x n_classes) + + Confusion matrix as a ndarry whose i-th row and j-th column entry indicates the number of samples with true label being i-th class and predicted label being j-th class. + Confusion matrix as a dict whose keys are tuples + as ('true_label', 'predicted_label') and value + indicates the number of samples with true + label and predicted label. + See Also -------- ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix @@ -282,12 +294,26 @@ def confusion_matrix(y_true, y_pred, *, labels=None, sample_weight=None, [0, 0, 1], [1, 0, 2]]) + Using as_dict parameter as True, + >>> y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] >>> y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] - >>> confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat"]) - array([[2, 0, 0], - [0, 0, 1], - [1, 0, 2]]) + >>> cm = confusion_matrix(y_true, y_pred, + ... labels=["ant", "bird", "cat"], as_dict=True) + + {('ant', 'ant'): 2, ('bird', 'ant'): 0, ('cat', 'ant'): 1, + ('ant', 'bird'): 0, ('bird', 'bird'): 0, ('cat', 'bird'): 0, + ('ant', 'cat'): 0, ('bird', 'cat'): 1, ('cat', 'cat'): 2} + + Dict can be converted to unstacked series, + + >>> import pandas as pd + >>> pd.Series(cm).unstack() + ant bird cat + ant 2 0 0 + bird 0 0 1 + cat 1 0 2 + In the binary case, we can extract true positives, etc as follows: @@ -346,6 +372,15 @@ def confusion_matrix(y_true, y_pred, *, labels=None, sample_weight=None, shape=(n_labels, n_labels), dtype=dtype, ).toarray() + if as_dict: + label_list = labels.tolist() + cm_lol = cm.tolist() + cm_dict = {(str(label_list[j]), str(label_list[i])): cm_lol[j][i] + for i in range(0, len(label_list)) + for j in range(0, len(cm_lol))} + + return cm_dict + with np.errstate(all='ignore'): if normalize == 'true': cm = cm / cm.sum(axis=1, keepdims=True) @@ -1985,7 +2020,7 @@ class 2 1.00 0.67 0.80 3 if labels_given: warnings.warn( "labels size, {0}, does not match size of target_names, {1}" - .format(len(labels), len(target_names)) + .format(len(labels), len(target_names)) ) else: raise ValueError( @@ -2047,8 +2082,9 @@ class 2 1.00 0.67 0.80 3 else: if line_heading == 'accuracy': row_fmt_accuracy = '{:>{width}s} ' + \ - ' {:>9.{digits}}' * 2 + ' {:>9.{digits}f}' + \ - ' {:>9}\n' + ' {:>9.{digits}}' * 2 +\ + ' {:>9.{digits}f}' + \ + ' {:>9}\n' report += row_fmt_accuracy.format(line_heading, '', '', *avg[2:], width=width, digits=digits) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index c32e9c89ada47..0cec886cf2443 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1,4 +1,3 @@ - from functools import partial from itertools import product from itertools import chain @@ -429,7 +428,6 @@ def test(y_true, y_pred): cm = multilabel_confusion_matrix(y_true, y_pred) assert_array_equal(cm, [[[17, 8], [3, 22]], [[22, 3], [8, 17]]]) - test(y_true, y_pred) test([str(y) for y in y_true], [str(y) for y in y_pred]) @@ -590,6 +588,23 @@ def test_confusion_matrix_normalize_single_class(): assert not rec +def test_confusion_matrix_pprint(): + # Test pprint confusion matrix - binary classification case + y_true, y_pred, _ = make_prediction() + + def test(y_true, y_pred): + cm = confusion_matrix(y_true, y_pred, as_dict=True) + print(cm) + assert cm == {('0', '0'): 19, ('1', '0'): 4, ('2', '0'): 0, + ('0', '1'): 4, ('1', '1'): 3, ('2', '1'): 2, + ('0', '2'): 1, ('1', '2'): 24, ('2', '2'): 18} + + test(y_true, y_pred) + test([str(y) for y in y_true], + [str(y) for y in y_pred]) + + + def test_cohen_kappa(): # These label vectors reproduce the contingency matrix from Artstein and # Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]).