scikit-learn
diff --git a/‎doc/whats_new.rst
Lines changed: 4 additions & 1 deletion b/‎doc/whats_new.rst
Lines changed: 4 additions & 1 deletion
diff --git a/‎sklearn/metrics/classification.py
Lines changed: 15 additions & 3 deletions b/‎sklearn/metrics/classification.py
Lines changed: 15 additions & 3 deletions
diff --git a/‎sklearn/metrics/tests/test_classification.py
Lines changed: 52 additions & 0 deletions b/‎sklearn/metrics/tests/test_classification.py
Lines changed: 52 additions & 0 deletions
diff --git a/‎sklearn/metrics/tests/test_common.py
Lines changed: 41 additions & 27 deletions b/‎sklearn/metrics/tests/test_common.py
Lines changed: 41 additions & 27 deletions
@@ -73,6 +73,9 @@ Enhancements
    - :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
      now support ``partial_fit``. By `Asish Panda`_ and `Philipp Dowling`_.
 
+   - Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
+     By `Jatin Shah`_ and `Raghav R V`_.
+
 Bug fixes
 .........
 
@@ -3925,7 +3928,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 
 .. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
 
-.. _Raghav R V: https://github.com/ragv
+.. _Raghav R V: https://github.com/rvraghav93
 
 .. _Trevor Stephens: http://trevorstephens.com/
 
 
@@ -398,7 +398,7 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True,
     return _weighted_sum(score, sample_weight, normalize)
 
 
-def matthews_corrcoef(y_true, y_pred):
+def matthews_corrcoef(y_true, y_pred, sample_weight=None):
     """Compute the Matthews correlation coefficient (MCC) for binary classes
 
     The Matthews correlation coefficient is used in machine learning as a
@@ -423,6 +423,9 @@ def matthews_corrcoef(y_true, y_pred):
     y_pred : array, shape = [n_samples]
         Estimated targets as returned by a classifier.
 
+    sample_weight : array-like of shape = [n_samples], default None
+        Sample weights.
+
     Returns
     -------
     mcc : float
@@ -457,8 +460,17 @@ def matthews_corrcoef(y_true, y_pred):
     lb.fit(np.hstack([y_true, y_pred]))
     y_true = lb.transform(y_true)
     y_pred = lb.transform(y_pred)
-    with np.errstate(invalid='ignore'):
-        mcc = np.corrcoef(y_true, y_pred)[0, 1]
+    mean_yt = np.average(y_true, weights=sample_weight)
+    mean_yp = np.average(y_pred, weights=sample_weight)
+
+    y_true_u_cent = y_true - mean_yt
+    y_pred_u_cent = y_pred - mean_yp
+
+    cov_ytyp = np.average(y_true_u_cent * y_pred_u_cent, weights=sample_weight)
+    var_yt = np.average(y_true_u_cent ** 2, weights=sample_weight)
+    var_yp = np.average(y_pred_u_cent ** 2, weights=sample_weight)
+
+    mcc = cov_ytyp / np.sqrt(var_yt * var_yp)
 
     if np.isnan(mcc):
         return 0.
 
@@ -331,6 +331,58 @@ def test_matthews_corrcoef_nan():
     assert_equal(matthews_corrcoef([0, 0], [0, 1]), 0.0)
 
 
+def test_matthews_corrcoef_against_numpy_corrcoef():
+    rng = np.random.RandomState(0)
+    y_true = rng.randint(0, 2, size=20)
+    y_pred = rng.randint(0, 2, size=20)
+
+    assert_almost_equal(matthews_corrcoef(y_true, y_pred),
+                        np.corrcoef(y_true, y_pred)[0, 1], 10)
+
+
+def test_matthews_corrcoef():
+    rng = np.random.RandomState(0)
+    y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]
+
+    # corrcoef of same vectors must be 1
+    assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
+
+    # corrcoef, when the two vectors are opposites of each other, should be -1
+    y_true_inv = ["b" if i == "a" else "a" for i in y_true]
+
+    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)
+    y_true_inv2 = label_binarize(y_true, ["a", "b"]) * -1
+    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)
+
+    # For the zero vector case, the corrcoef cannot be calculated and should
+    # result in a RuntimeWarning
+    mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
+                               matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])
+
+    # But will output 0
+    assert_almost_equal(mcc, 0.)
+
+    # And also for any other vector with 0 variance
+    mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
+                               matthews_corrcoef, y_true,
+                               rng.randint(-100, 100) * np.ones(20, dtype=int))
+
+    # But will output 0
+    assert_almost_equal(mcc, 0.)
+
+    # These two vectors have 0 correlation and hence mcc should be 0
+    y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
+    y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
+    assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)
+
+    # Check that sample weight is able to selectively exclude
+    mask = [1] * 10 + [0] * 10
+    # Now the first half of the vector elements are alone given a weight of 1
+    # and hence the mcc will not be a perfect 0 as in the previous case
+    assert_raises(AssertionError, assert_almost_equal,
+                  matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
+
+
 def test_precision_recall_f1_score_multiclass():
     # Test Precision Recall and F1 Score for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
 
@@ -187,26 +187,41 @@
 # When you add a new metric or functionality, check if a general test
 # is already written.
 
-# Metric undefined with "binary" or "multiclass" input
-METRIC_UNDEFINED_MULTICLASS = [
-    "samples_f0.5_score", "samples_f1_score", "samples_f2_score",
-    "samples_precision_score", "samples_recall_score",
+# Those metrics don't support binary inputs
+METRIC_UNDEFINED_BINARY = [
+    "samples_f0.5_score",
+    "samples_f1_score",
+    "samples_f2_score",
+    "samples_precision_score",
+    "samples_recall_score",
+    "coverage_error",
 
-    # Those metrics don't support multiclass outputs
-    "average_precision_score", "weighted_average_precision_score",
-    "micro_average_precision_score", "macro_average_precision_score",
+    "roc_auc_score",
+    "micro_roc_auc",
+    "weighted_roc_auc",
+    "macro_roc_auc",
+    "samples_roc_auc",
+
+    "average_precision_score",
+    "weighted_average_precision_score",
+    "micro_average_precision_score",
+    "macro_average_precision_score",
     "samples_average_precision_score",
 
+    "label_ranking_loss",
     "label_ranking_average_precision_score",
+]
 
-    "roc_auc_score", "micro_roc_auc", "weighted_roc_auc",
-    "macro_roc_auc",  "samples_roc_auc",
-
-    "coverage_error",
+# Those metrics don't support multiclass inputs
+METRIC_UNDEFINED_MULTICLASS = [
     "brier_score_loss",
-    "label_ranking_loss",
+    "matthews_corrcoef_score",
 ]
 
+# Metric undefined with "binary" or "multiclass" input
+METRIC_UNDEFINED_BINARY_MULTICLASS = set(METRIC_UNDEFINED_BINARY).union(
+    set(METRIC_UNDEFINED_MULTICLASS))
+
 # Metrics with an "average" argument
 METRICS_WITH_AVERAGING = [
     "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score"
@@ -346,7 +361,6 @@
 METRICS_WITHOUT_SAMPLE_WEIGHT = [
     "cohen_kappa_score",
     "confusion_matrix",
-    "matthews_corrcoef_score",
     "median_absolute_error",
 ]
 
@@ -359,10 +373,9 @@ def test_symmetry():
     y_pred = random_state.randint(0, 2, size=(20, ))
 
     # We shouldn't forget any metrics
-    assert_equal(set(SYMMETRIC_METRICS).union(NOT_SYMMETRIC_METRICS,
-                                              THRESHOLDED_METRICS,
-                                              METRIC_UNDEFINED_MULTICLASS),
-                 set(ALL_METRICS))
+    assert_equal(set(SYMMETRIC_METRICS).union(
+        NOT_SYMMETRIC_METRICS, THRESHOLDED_METRICS,
+        METRIC_UNDEFINED_BINARY_MULTICLASS), set(ALL_METRICS))
 
     assert_equal(
         set(SYMMETRIC_METRICS).intersection(set(NOT_SYMMETRIC_METRICS)),
@@ -390,7 +403,7 @@ def test_sample_order_invariance():
     y_true_shuffle, y_pred_shuffle = shuffle(y_true, y_pred, random_state=0)
 
     for name, metric in ALL_METRICS.items():
-        if name in METRIC_UNDEFINED_MULTICLASS:
+        if name in METRIC_UNDEFINED_BINARY_MULTICLASS:
             continue
 
         assert_almost_equal(metric(y_true, y_pred),
@@ -457,7 +470,7 @@ def test_format_invariance_with_1d_vectors():
     y2_row = np.reshape(y2_1d, (1, -1))
 
     for name, metric in ALL_METRICS.items():
-        if name in METRIC_UNDEFINED_MULTICLASS:
+        if name in METRIC_UNDEFINED_BINARY_MULTICLASS:
             continue
 
         measure = metric(y1, y2)
@@ -532,7 +545,7 @@ def test_invariance_string_vs_numbers_labels():
     labels_str = ["eggs", "spam"]
 
     for name, metric in CLASSIFICATION_METRICS.items():
-        if name in METRIC_UNDEFINED_MULTICLASS:
+        if name in METRIC_UNDEFINED_BINARY_MULTICLASS:
             continue
 
         measure_with_number = metric(y1, y2)
@@ -613,7 +626,8 @@ def check_single_sample_multioutput(name):
 
 def test_single_sample():
     for name in ALL_METRICS:
-        if name in METRIC_UNDEFINED_MULTICLASS or name in THRESHOLDED_METRICS:
+        if (name in METRIC_UNDEFINED_BINARY_MULTICLASS or
+                name in THRESHOLDED_METRICS):
             # Those metrics are not always defined with one sample
             # or in multiclass classification
             continue
@@ -915,9 +929,9 @@ def check_sample_weight_invariance(name, metric, y1, y2):
                                  sample_weight=sample_weight.tolist())
     assert_almost_equal(
         weighted_score, weighted_score_list,
-        err_msg="Weighted scores for array and list sample_weight input are "
-                "not equal (%f != %f) for %s" % (
-                    weighted_score, weighted_score_list, name))
+        err_msg=("Weighted scores for array and list "
+                 "sample_weight input are not equal (%f != %f) for %s") % (
+                     weighted_score, weighted_score_list, name))
 
     # check that integer weights is the same as repeated samples
     repeat_weighted_score = metric(
@@ -963,14 +977,14 @@ def check_sample_weight_invariance(name, metric, y1, y2):
 def test_sample_weight_invariance(n_samples=50):
     random_state = check_random_state(0)
 
-    # binary output
+    # binary
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples,))
     for name in ALL_METRICS:
         if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
-                name in METRIC_UNDEFINED_MULTICLASS):
+                name in METRIC_UNDEFINED_BINARY):
             continue
         metric = ALL_METRICS[name]
         if name in THRESHOLDED_METRICS:
@@ -985,7 +999,7 @@ def test_sample_weight_invariance(n_samples=50):
     y_score = random_state.random_sample(size=(n_samples, 5))
     for name in ALL_METRICS:
         if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
-                name in METRIC_UNDEFINED_MULTICLASS):
+                name in METRIC_UNDEFINED_BINARY_MULTICLASS):
             continue
         metric = ALL_METRICS[name]
         if name in THRESHOLDED_METRICS: