arjoly · jnothman · Jan 30, 2014 · Jan 30, 2014 · Jan 31, 2014 · Jan 31, 2014
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -787,6 +787,18 @@ details.
    metrics.mean_squared_error
    metrics.r2_score
 
+Multilabel ranking metrics
+--------------------------
+See the :ref:`multilabel_ranking_metrics` section of the user guide for further
+details.
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   metrics.label_ranking_average_precision_score
+
+
 
 Clustering metrics
 ------------------

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -915,6 +915,52 @@ and with a list of labels format:
     elimination with cross-validation.
 
 
+.. _multilabel_ranking_metrics:
+
+Multilabel ranking metrics
+--------------------------
+
+.. currentmodule:: sklearn.metrics
+
+The goal in multilabel rank
8000
ing is to give high scores and better rank to
+relevant labels. The :mod:`sklearn.metrics` currently implements
+the label ranking average precision.
+
+Label ranking average precision
+...............................
+The :func:`label_ranking_average_precision_score` function
+implements the label ranking average precision (AP), which is also simply
+called average precision. It averages over each
+sample and each relevant label :math:`r` the ratio between the number of
+relevant labels with higher or equal score to the label :math:`r` and the rank
+of the label, i.e. the number of labels with higher or equal score. Given
+a binary indicator matrix of the relevant labels
+:math:`y \in \mathcal{R}^{n_\text{samples} \times n_\text{labels}}` and the
+score associated to each label
+:math:`\hat{f} \in \mathcal{R}^{n_\text{samples} \times n_\text{labels}}`,
+the average precision is defined as
+
+.. math::
+  AP(y, \hat{f}) = \frac{1}{n_{\text{samples}}}
+    \sum_{i=0}^{n_{\text{samples}} - 1} \frac{1}{|y_i|}
+    \sum_{j:y_{ij} = 1} \frac{|\mathcal{L}_{ij}|}{\text{rank}_{ij}}
+
+
+with :math:`\mathcal{L}_{ij} = \left\{k: y_{ik} = 1, \hat{f}_{ik} \geq \hat{f}_{ij} \right\}`,
+:math:`\text{rank}_{ij} = \left|\left\{k: \hat{f}_{ik} \geq \hat{f}_{ij} \right\}\right|`
+and :math:`|\cdot|` is the l0 norm or the cardinality of the set.
+The best value is one.
+
+Here a small example of usage of this function::
+
+    >>> import numpy as np
+    >>> from sklearn.metrics import label_ranking_average_precision_score
+    >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])
+    >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])
+    >>> label_ranking_average_precision_score(y_true, y_score) # doctest: +ELLIPSIS
+    0.416...
+
+
 .. _regression_metrics:
 
 Regression metrics

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -15,6 +15,7 @@
                       hamming_loss,
                       hinge_loss,
                       jaccard_similarity_score,
+                      label_ranking_average_precision_score,
                       log_loss,
                       matthews_corrcoef,
                       mean_squared_error,
@@ -72,6 +73,7 @@
            'homogeneity_completeness_v_measure',
            'homogeneity_score',
            'jaccard_similarity_score',
+           'label_ranking_average_precision_score',
            'log_loss',
            'matthews_corrcoef',
            'mean_squared_error',

diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py
@@ -22,6 +22,7 @@
 
 import warnings
 import numpy as np
+from numpy.lib.stride_tricks import broadcast_arrays
 
 from scipy.sparse import coo_matrix
 from scipy.spatial.distance import hamming as sp_hamming
@@ -2165,6 +2166,68 @@ def hamming_loss(y_true, y_pred, classes=None):
         raise ValueError("{0} is not supported".format(y_type))
 
 
+def label_ranking_average_precision_score(y_true, y_score):
+    """Compute ranking-based average precision
+
+    For each sample, ranking-based average precision average over
+    each relevant label r: the number of relevant labels with scores higher or
+    equal to the label r divided by the the number of labels with scores
+    higher or equal to the label r. The final score is obtained by averaging
+    over the samples. A label with higher score is thus considered as having
+    better rank.
+
+    The best value is one.
+
+    This metrics is used in multilabel ranking problem, where the goal
+    is to rank the labels associated to each sample.
+
+    Parameters
+    ----------
+    y_true : array, shape = [n_samples, n_labels]
+        True binary labels in binary indicator format.
+
+    y_score : array, shape = [n_samples, n_labels]
+        Target scores, can either be probability estimates of the positive
+        class, confidence values, or binary decisions.
+
+    Return
+    ------
+    score : float
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.metrics import label_ranking_average_precision_score
+    >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])
+    >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])
+    >>> label_ranking_average_precision_score(y_true, y_score) \
+        # doctest: +ELLIPSIS
+    0.416...
+
+    """
+    y_true, y_score = check_arrays(y_true, y_score)
+
+    if y_true.shape != y_score.shape:
+        raise ValueError("y_true and y_score have different shape")
+
+    # Handle badly formated array and the degenerate case with one label
+    y_type = type_of_target(y_true)
+    if (y_type != "multilabel-indicator"
+            and not (y_type == "binary" and y_true.ndim == 2)):
+        raise ValueError("{0} format is not supported".format(y_type))
+
+    # count the number of scores better than each score (per sample)
+    better = (y_score[:, :, None] >= y_score[:, None, :] - 1e-5)
+
+    true_mask = ~y_true.astype(bool)
+    true_mask2 = broadcast_arrays(better, true_mask[:, :, None])[-1]
+
+    L = np.ma.masked_array(better, mask=true_mask2).sum(axis=1)
+    rank = np.ma.masked_array(better.sum(axis=1), mask=true_mask)
+    precision = np.divide(L, rank, dtype=float).mean(axis=-1).filled(1)
+    return precision.mean()
+
+
 ###############################################################################
 # Regression metrics
 ###############################################################################