diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 93d21a146619a..369b08270d35d 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -14,6 +14,7 @@ from .ranking import roc_curve from .ranking import dcg_score from .ranking import ndcg_score +from .ranking import gini from .classification import accuracy_score from .classification import classification_report diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 9755732a4f910..34194c10d9d40 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -858,3 +858,33 @@ def ndcg_score(y_true, y_score, k=5): scores.append(actual / best) return np.mean(scores) + + +def gini(y_true, y_score): + """ Compute Gini coefficient + + Compute the Gini coefficient as Gini = 2 × AUC - 1 [1]. + + Parameters + ---------- + + y_true : array, shape = [n] + Actual target values for X. + + y_score : array, shape = [n] + Probability estimates of the positive class. + + Returns + ------- + gini : float + + References + ---------- + .. [1] David J. Hand and Robert J. Till (2001). + A Simple Generalisation of the Area Under the ROC Curve for + Multiple Class Classification Problems. In Machine Learning, 45, + pp.171–186 (Kluwer Academic Publishers). + + """ + + return 2*roc_auc_score(y_true, y_score)-1 diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index b1f01c1a18e1b..4a37dbee7b07c 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -514,7 +514,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg - +gini_scorer = make_scorer(gini, greater_is_better=True) # Clustering scores adjusted_rand_scorer = make_scorer(adjusted_rand_score)