scikit-learn
diff --git a/‎sklearn/metrics/metrics.py
Lines changed: 29 additions & 13 deletions b/‎sklearn/metrics/metrics.py
Lines changed: 29 additions & 13 deletions
diff --git a/‎sklearn/metrics/tests/test_metrics.py
Lines changed: 29 additions & 21 deletions b/‎sklearn/metrics/tests/test_metrics.py
Lines changed: 29 additions & 21 deletions
@@ -472,34 +472,48 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
 
     y_true, y_score = check_arrays(y_true, y_score)
 
+    not_average_axis = 1
+    score_weight = sample_weight
+    average_weight = None
+
     if average == "micro":
+        if score_weight is not None:
+            score_weight = np.repeat(score_weight, y_true.shape[1])
         y_true = y_true.ravel()
         y_score = y_score.ravel()
 
-    if average == 'weighted':
-        weights = np.sum(y_true, axis=0)
-        if weights.sum() == 0:
+    elif average == 'weighted':
+        if score_weight is not None:
+            average_weight = np.sum(np.multiply(
+                y_true, np.reshape(score_weight, (-1, 1))), axis=0)
+        else:
+            average_weight = np.sum(y_true, axis=0)
+        if average_weight.sum() == 0:
             return 0
-    else:
-        weights = None
+
+    elif average == 'samples':
+        # swap average_weight <-> score_weight
+        average_weight = score_weight
+        score_weight = None
+        not_average_axis = 0
 
     if y_true.ndim == 1:
         y_true = y_true.reshape((-1, 1))
 
     if y_score.ndim == 1:
         y_score = y_score.reshape((-1, 1))
 
-    not_average_axis = 0 if average == 'samples' else 1
     n_classes = y_score.shape[not_average_axis]
     score = np.zeros((n_classes,))
     for c in range(n_classes):
         y_true_c = y_true.take([c], axis=not_average_axis).ravel()
         y_score_c = y_score.take([c], axis=not_average_axis).ravel()
-        score[c] = binary_metric(y_true_c, y_score_c)
+        score[c] = binary_metric(y_true_c, y_score_c,
+                                 sample_weight=score_weight)
 
     # Average the results
     if average is not None:
-        return np.average(score, weights=weights)
+        return np.average(score, weights=average_weight)
     else:
         return score
 
@@ -1687,20 +1701,20 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
             y_pred = y_pred == 1
 
         if sample_weight is None:
-            sample_weight = 1
+            sum_weight = 1
             dtype = int
         else:
-            sample_weight = np.expand_dims(sample_weight, 1)
+            sum_weight = np.expand_dims(sample_weight, 1)
             dtype = float
 
         sum_axis = 1 if average == 'samples' else 0
         tp_sum = np.sum(
-            np.multiply(np.logical_and(y_true, y_pred), sample_weight),
+            np.multiply(np.logical_and(y_true, y_pred), sum_weight),
             axis=sum_axis)
         pred_sum = np.sum(
-            np.multiply(y_pred, sample_weight), axis=sum_axis, dtype=dtype)
+            np.multiply(y_pred, sum_weight), axis=sum_axis, dtype=dtype)
         true_sum = np.sum(
-            np.multiply(y_true, sample_weight), axis=sum_axis, dtype=dtype)
+            np.multiply(y_true, sum_weight), axis=sum_axis, dtype=dtype)
 
     elif average == 'samples':
         raise ValueError("Sample-based precision, recall, fscore is "
@@ -1778,6 +1792,8 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
         weights = true_sum
         if weights.sum() == 0:
             return 0, 0, 0, None
+    elif average == 'samples':
+        weights = sample_weight
     else:
         weights = None
 
 
@@ -352,6 +352,14 @@
     "samples_precision_score", "samples_recall_score",
 ]
 
+MULTILABEL_INDICATOR_METRICS_WITH_SAMPLE_WEIGHT = [
+    "average_precision_score",
+    "weighted_average_precision_score",
+    "micro_average_precision_score",
+    "macro_average_precision_score",
+    "samples_average_precision_score",
+]
+
 # Regression metrics that support multioutput and weighted samples
 MULTIOUTPUT_METRICS_WITH_SAMPLE_WEIGHT = [
     "mean_squared_error",
@@ -2565,7 +2573,7 @@ def test_averaging_multilabel_all_ones():
 @ignore_warnings
 def check_sample_weight_invariance(name, metric, y1, y2):
     rng = np.random.RandomState(0)
-    sample_weight = rng.randint(10, size=len(y1))
+    sample_weight = rng.randint(1, 10, size=len(y1))
 
     # check that unit weights gives the same score as no weight
     unweighted_score = metric(y1, y2, sample_weight=None)
@@ -2591,14 +2599,13 @@ def check_sample_weight_invariance(name, metric, y1, y2):
             "not equal (%f != %f) for %s" % (
                 weighted_score, weighted_score_list, name))
 
-    if not name.startswith('samples'):
-        # check that integer weights is the same as repeated samples
-        repeat_weighted_score = metric(
-            np.repeat(y1, sample_weight, axis=0),
-            np.repeat(y2, sample_weight, axis=0), sample_weight=None)
-        assert_almost_equal(
-            weighted_score, repeat_weighted_score,
-            err_msg="Weighting %s is not equal to repeating samples" % name)
+    # check that integer weights is the same as repeated samples
+    repeat_weighted_score = metric(
+        np.repeat(y1, sample_weight, axis=0),
+        np.repeat(y2, sample_weight, axis=0), sample_weight=None)
+    assert_almost_equal(
+        weighted_score, repeat_weighted_score,
+        err_msg="Weighting %s is not equal to repeating samples" % name)
 
     if not name.startswith('unnormalized'):
         # check that the score is invariant under scaling of the weights by a
@@ -2612,33 +2619,34 @@ def check_sample_weight_invariance(name, metric, y1, y2):
 
 
 def test_sample_weight_invariance():
-    # generate some data
+    # binary
     y1, y2, _ = make_prediction(binary=True)
-
     for name in METRICS_WITH_SAMPLE_WEIGHT:
         metric = ALL_METRICS[name]
         yield check_sample_weight_invariance, name, metric, y1, y2
 
-    # multilabel
+    # multilabel sequence
     n_classes = 3
     n_samples = 10
-    _, y1_multilabel = make_multilabel_classification(
+    _, y1 = make_multilabel_classification(
         n_features=1, n_classes=n_classes,
         random_state=0, n_samples=n_samples)
-    _, y2_multilabel = make_multilabel_classification(
+    _, y2 = make_multilabel_classification(
         n_features=1, n_classes=n_classes,
         random_state=1, n_samples=n_samples)
-
     for name in MULTILABEL_METRICS_WITH_SAMPLE_WEIGHT:
         metric = ALL_METRICS[name]
-        yield (check_sample_weight_invariance,
-               name, metric, y1_multilabel, y2_multilabel)
+        yield (check_sample_weight_invariance, name, metric, y1, y2)
-    # multioutput
-    y1_multioutput = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
-    y2_multioutput = np.array([[0, 0, 1, 1], [1, 0, 1, 1], [1, 1, 0, 1]])
+    # multilabel indicator
+    y1 = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
+    y2 = np.array([[0, 0, 1, 1], [1, 0, 1, 1], [1, 1, 0, 1]])
+    for name in MULTILABEL_INDICATOR_METRICS_WITH_SAMPLE_WEIGHT:
+        metric = ALL_METRICS[name]
+        yield (check_sample_weight_invariance, name, metric, y1, y2)
 
+    # multioutput
     for name in MULTIOUTPUT_METRICS_WITH_SAMPLE_WEIGHT:
         metric = ALL_METRICS[name]
         yield (check_sample_weight_invariance,
-               name, metric, y1_multioutput, y2_multioutput)
+               name, metric, y1, y2)