From a44d480f60f3c2873bcf58725066cbeed6ee4a06 Mon Sep 17 00:00:00 2001
From: Greg Stupp <gstupp@scripps.edu>
Date: Wed, 17 Aug 2016 18:24:05 -0700
Subject: [PATCH 1/4] use sparse contingency matrix for supervised cluster
 metrics Remove max_n_classes option

---
 sklearn/metrics/cluster/supervised.py         | 230 +++++++++---------
 .../metrics/cluster/tests/test_supervised.py  |  66 ++---
 2 files changed, 152 insertions(+), 144 deletions(-)

diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 131c14b5078ca..6960707ac4a03 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -9,12 +9,14 @@
 #          Diego Molla <dmolla-aliod@gmail.com>
 #          Arnaud Fouchet <foucheta@gmail.com>
 #          Thierry Guillemot <thierry.guillemot.work@gmail.com>
+#          Gregory Stupp <stuppie@gmail.com>
 # License: BSD 3 clause
 
 from math import log
 
 from scipy.misc import comb
-from scipy.sparse import coo_matrix
+from scipy.sparse import coo_matrix, find
+from scipy.sparse.data import _data_matrix
 import numpy as np
 
 from .expected_mutual_info_fast import expected_mutual_information
@@ -46,7 +48,7 @@ def check_clusterings(labels_true, labels_pred):
     return labels_true, labels_pred
 
 
-def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000):
+def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
     """Build a contingency matrix describing the relationship between labels.
 
     Parameters
@@ -62,38 +64,36 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000):
         matrix. This helps to stop NaN propagation.
         If ``None``, nothing is adjusted.
 
-    max_n_classes : int, optional (default=5000)
-        Maximal number of classeses handled for contingency_matrix.
-        This help to avoid Memory error with regression target
-        for mutual_information.
+    sparse: boolean, optional.
+        If True, return a sparse continency matrix. If ``eps is not None``,
+        and ``sparse is True``, will throw ValueError.
 
     Returns
     -------
-    contingency: array, shape=[n_classes_true, n_classes_pred]
+    contingency: {array-like, sparse matrix}, shape=[n_classes_true, n_classes_pred]
         Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in
         true class :math:`i` and in predicted class :math:`j`. If
         ``eps is None``, the dtype of this array will be integer. If ``eps`` is
         given, the dtype will be float.
     """
+
+    if eps is not None and sparse:
+        raise ValueError("Cannot set 'eps' and return a sparse matrix")
+
     classes, class_idx = np.unique(labels_true, return_inverse=True)
     clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)
     n_classes = classes.shape[0]
     n_clusters = clusters.shape[0]
-    if n_classes > max_n_classes:
-        raise ValueError("Too many classes for a clustering metric. If you "
-                         "want to increase the limit, pass parameter "
-                         "max_n_classes to the scoring function")
-    if n_clusters > max_n_classes:
-        raise ValueError("Too many clusters for a clustering metric. If you "
-                         "want to increase the limit, pass parameter "
-                         "max_n_classes to the scoring function")
+
     # Using coo_matrix to accelerate simple histogram calculation,
     # i.e. bins are consecutive integers
     # Currently, coo_matrix is faster than histogram2d for simple cases
     contingency = coo_matrix((np.ones(class_idx.shape[0]),
                               (class_idx, cluster_idx)),
                              shape=(n_classes, n_clusters),
-                             dtype=np.int).toarray()
+                             dtype=np.int)
+    if not sparse:
+        contingency = contingency.toarray()
     if eps is not None:
         # don't use += as contingency is integer
         contingency = contingency + eps
@@ -102,7 +102,7 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000):
 
 # clustering measures
 
-def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000):
+def adjusted_rand_score(labels_true, labels_pred, contingency=None):
     """Rand index adjusted for chance.
 
     The Rand Index computes a similarity measure between two clusterings
@@ -134,10 +134,10 @@ def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = [n_samples]
         Cluster labels to evaluate
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
+    contingency: {None, sparse matrix}, shape = [n_classes_true, n_classes_pred]
+        A contingency matrix given by the :func:`contingency_matrix` function.
+        If value is ``None``, it will be computed, otherwise the given value is
+        used, with ``labels_true`` and ``labels_pred`` ignored.
 
     Returns
     -------
@@ -188,33 +188,49 @@ def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000):
     adjusted_mutual_info_score: Adjusted Mutual Information
 
     """
-    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
-    n_samples = labels_true.shape[0]
-    classes = np.unique(labels_true)
-    clusters = np.unique(labels_pred)
+    if contingency is None:
+        labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+        n_samples = labels_true.shape[0]
+        n_classes = np.unique(labels_true).shape[0]
+        n_clusters = np.unique(labels_pred).shape[0]
+    elif isinstance(contingency, _data_matrix):  # scipy.sparse.data._data_matrix
+        n_samples = contingency.nnz
+        n_classes, n_clusters = contingency.shape
+    else:
+        raise ValueError("'contingency' must be a sparse matrix or None")
+
     # Special limit cases: no clustering since the data is not split;
     # or trivial clustering where each document is assigned a unique cluster.
     # These are perfect matches hence return 1.0.
-    if (classes.shape[0] == clusters.shape[0] == 1 or
-            classes.shape[0] == clusters.shape[0] == 0 or
-            classes.shape[0] == clusters.shape[0] == len(labels_true)):
+    if (n_classes == n_clusters == 1 or
+            n_classes == n_clusters == 0 or
+            n_classes == n_clusters == n_samples):
         return 1.0
 
-    contingency = contingency_matrix(labels_true, labels_pred,
-                                     max_n_classes=max_n_classes)
+    # Compute contingency matrix if we weren't given it
+    if contingency is None:
+        contingency = contingency_matrix(labels_true, labels_pred)
 
     # Compute the ARI using the contingency data
-    sum_comb_c = sum(comb2(n_c) for n_c in contingency.sum(axis=1))
-    sum_comb_k = sum(comb2(n_k) for n_k in contingency.sum(axis=0))
+    if isinstance(contingency, np.ndarray):
+        # For an array
+        sum_comb_c = sum(comb2(n_c) for n_c in contingency.sum(axis=1))
+        sum_comb_k = sum(comb2(n_k) for n_k in contingency.sum(axis=0))
+        sum_comb = sum(comb2(n_ij) for n_ij in contingency.flatten())
+    elif isinstance(contingency, _data_matrix):
+        # For a sparse matrix
+        sum_comb_c = sum(comb2(n_c) for n_c in np.array(contingency.sum(axis=1)))
+        sum_comb_k = sum(comb2(n_k) for n_k in np.array(contingency.sum(axis=0)).T)
+        sum_comb = sum(comb2(n_ij) for n_ij in find(contingency)[2])
+    else:
+        raise ValueError("Unsupported type for 'contingency': " + str(type(contingency)))
 
-    sum_comb = sum(comb2(n_ij) for n_ij in contingency.flatten())
     prod_comb = (sum_comb_c * sum_comb_k) / float(comb(n_samples, 2))
     mean_comb = (sum_comb_k + sum_comb_c) / 2.
-    return ((sum_comb - prod_comb) / (mean_comb - prod_comb))
+    return float((sum_comb - prod_comb) / (mean_comb - prod_comb))
 
 
-def homogeneity_completeness_v_measure(labels_true, labels_pred,
-                                       max_n_classes=5000):
+def homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=False):
     """Compute the homogeneity and completeness and V-Measure scores at once.
 
     Those metrics are based on normalized conditional entropy measures of
@@ -248,10 +264,9 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred,
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
+    sparse: boolean, optional.
+        If True, intermediate calculation of the contingency matrix
+        will calculate a sparse continency matrix.
 
     Returns
     -------
@@ -278,8 +293,11 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred,
     entropy_C = entropy(labels_true)
     entropy_K = entropy(labels_pred)
 
-    MI = mutual_info_score(labels_true, labels_pred,
-                           max_n_classes=max_n_classes)
+    if sparse:
+        contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
+        MI = mutual_info_score(None, None, contingency=contingency)
+    else:
+        MI = mutual_info_score(labels_true, labels_pred)
 
     homogeneity = MI / (entropy_C) if entropy_C else 1.0
     completeness = MI / (entropy_K) if entropy_K else 1.0
@@ -293,7 +311,7 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred,
     return homogeneity, completeness, v_measure_score
 
 
-def homogeneity_score(labels_true, labels_pred, max_n_classes=5000):
+def homogeneity_score(labels_true, labels_pred, sparse=False):
     """Homogeneity metric of a cluster labeling given a ground truth.
 
     A clustering result satisfies homogeneity if all of its clusters
@@ -317,10 +335,9 @@ def homogeneity_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
+    sparse: boolean, optional.
+        If True, intermediate calculation of the contingency matrix
+        will calculate a sparse continency matrix.
 
     Returns
     -------
@@ -369,11 +386,10 @@ def homogeneity_score(labels_true, labels_pred, max_n_classes=5000):
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred,
-                                              max_n_classes)[0]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse)[0]
 
 
-def completeness_score(labels_true, labels_pred, max_n_classes=5000):
+def completeness_score(labels_true, labels_pred, sparse=False):
     """Completeness metric of a cluster labeling given a ground truth.
 
     A clustering result satisfies completeness if all the data points
@@ -397,10 +413,9 @@ def completeness_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
+    sparse: boolean, optional.
+        If True, intermediate calculation of the contingency matrix
+        will calculate a sparse continency matrix.
 
     Returns
     -------
@@ -445,11 +460,10 @@ def completeness_score(labels_true, labels_pred, max_n_classes=5000):
       0.0
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred,
-                                              max_n_classes)[1]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse)[1]
 
 
-def v_measure_score(labels_true, labels_pred, max_n_classes=5000):
+def v_measure_score(labels_true, labels_pred, sparse=False):
     """V-measure cluster labeling given a ground truth.
 
     This score is identical to :func:`normalized_mutual_info_score`.
@@ -477,10 +491,9 @@ def v_measure_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
+    sparse: boolean, optional.
+        If True, intermediate calculation of the contingency matrix
+        will calculate a sparse continency matrix.
 
     Returns
     -------
@@ -546,12 +559,10 @@ def v_measure_score(labels_true, labels_pred, max_n_classes=5000):
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred,
-                                              max_n_classes)[2]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse)[2]
 
 
-def mutual_info_score(labels_true, labels_pred, contingency=None,
-                      max_n_classes=5000):
+def mutual_info_score(labels_true, labels_pred, contingency=None):
     """Mutual Information between two clusterings.
 
     The Mutual Information is a measure of the similarity between two labels of
@@ -586,16 +597,11 @@ def mutual_info_score(labels_true, labels_pred, contingency=None,
     labels_pred : array, shape = [n_samples]
         A clustering of the data into disjoint subsets.
 
-    contingency: None or array, shape = [n_classes_true, n_classes_pred]
+    contingency: {None, array, sparse matrix}, shape = [n_classes_true, n_classes_pred]
         A contingency matrix given by the :func:`contingency_matrix` function.
         If value is ``None``, it will be computed, otherwise the given value is
         used, with ``labels_true`` and ``labels_pred`` ignored.
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the mutual_info_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
-
     Returns
     -------
     mi: float
@@ -608,27 +614,43 @@ def mutual_info_score(labels_true, labels_pred, contingency=None,
     """
     if contingency is None:
         labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
-        contingency = contingency_matrix(labels_true, labels_pred,
-                                         max_n_classes=max_n_classes)
-    contingency = np.array(contingency, dtype='float')
-    contingency_sum = np.sum(contingency)
-    pi = np.sum(contingency, axis=1)
-    pj = np.sum(contingency, axis=0)
-    outer = np.outer(pi, pj)
-    nnz = contingency != 0.0
-    # normalized contingency
-    contingency_nm = contingency[nnz]
-    log_contingency_nm = np.log(contingency_nm)
-    contingency_nm /= contingency_sum
-    # log(a / b) should be calculated as log(a) - log(b) for
-    # possible loss of precision
-    log_outer = -np.log(outer[nnz]) + log(pi.sum()) + log(pj.sum())
-    mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
-          contingency_nm * log_outer)
-    return mi.sum()
+        contingency = contingency_matrix(labels_true, labels_pred)
+    if isinstance(contingency, np.ndarray):
+        # For an array
+        contingency = np.array(contingency, dtype='float')
+        contingency_sum = np.sum(contingency)
+        pi = np.sum(contingency, axis=1)
+        pj = np.sum(contingency, axis=0)
+        outer = np.outer(pi, pj)
+        nnz = contingency != 0.0
+        # normalized contingency
+        contingency_nm = contingency[nnz]
+        log_contingency_nm = np.log(contingency_nm)
+        contingency_nm /= contingency_sum
+        # log(a / b) should be calculated as log(a) - log(b) for
+        # possible loss of precision
+        log_outer = -np.log(outer[nnz]) + log(pi.sum()) + log(pj.sum())
+        mi = (contingency_nm * (log_contingency_nm - log(contingency_sum))
+              + contingency_nm * log_outer)
+        return mi.sum()
+    elif isinstance(contingency, _data_matrix):
+        # For a sparse matrix
+        contingency_sum = contingency.sum()
+        pi = np.array(contingency.sum(axis=1))
+        pj = np.array(contingency.sum(axis=0)).T
+        nnzx, nnzy, nnz_val = find(contingency)
+        log_contingency_nm = np.log(nnz_val)
+        contingency_nm = nnz_val * 1.0 / contingency_sum  # python2 integer division...
+        # Don't need to calculate the full outer product. Just for the non-zero values
+        outer = np.array([pi[x] * pj[y] for x, y in zip(nnzx, nnzy)]).T
+        log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
+        mi = contingency_nm * (log_contingency_nm - log(contingency_sum)) + contingency_nm * log_outer
+        return mi.sum()
+    else:
+        raise ValueError("Unsupported type for 'contingency': " + str(type(contingency)))
 
 
-def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
+def adjusted_mutual_info_score(labels_true, labels_pred):
     """Adjusted Mutual Information between two clusterings.
 
     Adjusted Mutual Information (AMI) is an adjustment of the Mutual
@@ -661,11 +683,6 @@ def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = [n_samples]
         A clustering of the data into disjoint subsets.
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
-
     Returns
     -------
     ami: float(upperlimited by 1.0)
@@ -716,8 +733,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     if (classes.shape[0] == clusters.shape[0] == 1 or
             classes.shape[0] == clusters.shape[0] == 0):
         return 1.0
-    contingency = contingency_matrix(labels_true, labels_pred,
-                                     max_n_classes=max_n_classes)
+    contingency = contingency_matrix(labels_true, labels_pred)
     contingency = np.array(contingency, dtype='float')
     # Calculate the MI for the two clusterings
     mi = mutual_info_score(labels_true, labels_pred,
@@ -730,7 +746,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     return ami
 
 
-def normalized_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
+def normalized_mutual_info_score(labels_true, labels_pred):
     """Normalized Mutual Information between two clusterings.
 
     Normalized Mutual Information (NMI) is an normalization of the Mutual
@@ -760,11 +776,6 @@ def normalized_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = [n_samples]
         A clustering of the data into disjoint subsets.
 
-    max_n_classes: int, optional (default=5000)
-        Maximal number of classes handled by the adjusted_rand_score
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
-
     Returns
     -------
     nmi: float
@@ -803,8 +814,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     if (classes.shape[0] == clusters.shape[0] == 1 or
             classes.shape[0] == clusters.shape[0] == 0):
         return 1.0
-    contingency = contingency_matrix(labels_true, labels_pred,
-                                     max_n_classes=max_n_classes)
+    contingency = contingency_matrix(labels_true, labels_pred)
     contingency = np.array(contingency, dtype='float')
     # Calculate the MI for the two clusterings
     mi = mutual_info_score(labels_true, labels_pred,
@@ -816,7 +826,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     return nmi
 
 
-def fowlkes_mallows_score(labels_true, labels_pred, max_n_classes=5000):
+def fowlkes_mallows_score(labels_true, labels_pred):
     """Measure the similarity of two clusterings of a set of points.
 
     The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of
@@ -845,11 +855,6 @@ def fowlkes_mallows_score(labels_true, labels_pred, max_n_classes=5000):
     labels_pred : array, shape = (``n_samples``, )
         A clustering of the data into disjoint subsets.
 
-    max_n_classes : int, optional (default=5000)
-        Maximal number of classes handled by the Fowlkes-Mallows
-        metric. Setting it too high can lead to MemoryError or OS
-        freeze
-
     Returns
     -------
     score : float
@@ -886,8 +891,7 @@ def fowlkes_mallows_score(labels_true, labels_pred, max_n_classes=5000):
     labels_true, labels_pred = check_clusterings(labels_true, labels_pred,)
     n_samples, = labels_true.shape
 
-    c = contingency_matrix(labels_true, labels_pred,
-                           max_n_classes=max_n_classes)
+    c = contingency_matrix(labels_true, labels_pred)
     tk = np.dot(c.ravel(), c.ravel()) - n_samples
     pk = np.sum(np.sum(c, axis=0) ** 2) - n_samples
     qk = np.sum(np.sum(c, axis=1) ** 2) - n_samples
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index 828c2c544574c..f345ee5615b0b 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -55,6 +55,22 @@ def test_perfect_matches():
         assert_equal(score_func([0., 1., 2.], [42., 7., 2.]), 1.0)
         assert_equal(score_func([0, 1, 2], [42, 7, 2]), 1.0)
 
+def test_homogeneity_completeness_v_measure_sparse():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    h, c, v = homogeneity_completeness_v_measure(labels_a, labels_b)
+    h_s, c_s, v_s = homogeneity_completeness_v_measure(labels_a, labels_b, sparse = True)
+    assert_array_almost_equal([h, c, v],[h_s, c_s, v_s])
+
+""" Takes too long...
+def test_homogeneity_completeness_v_measure_large():
+    # This will fail without sparse matrices with any reasonable amount of RAM (<~1TB)
+    from random import randrange
+    labels_a = [randrange(100000) for x in range(1000000)]
+    labels_b = [randrange(100000) for x in range(1000000)]
+    h_s, c_s, v_s = homogeneity_completeness_v_measure(labels_a, labels_b, sparse = True)    
+    assert_raises(MemoryError, homogeneity_completeness_v_measure, labels_a, labels_b)
+"""
 
 def test_homogeneous_but_not_complete_labeling():
     # homogeneous but not complete clustering
@@ -183,19 +199,30 @@ def test_contingency_matrix():
     assert_array_almost_equal(C, C2 + .1)
 
 
+def test_contingency_matrix_sparse():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    C = contingency_matrix(labels_a, labels_b)
+    C_sparse = contingency_matrix(labels_a, labels_b, sparse = True).toarray()
+    assert_array_almost_equal(C, C_sparse)
+    
+
+def test_adjusted_rand_score_sparse():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    C_sparse = contingency_matrix(labels_a, labels_b, sparse = True)
+    assert_almost_equal(adjusted_rand_score(labels_a,labels_b), adjusted_rand_score(None, None, C_sparse))
+
+
 def test_exactly_zero_info_score():
     # Check numerical stability when information is exactly zero
     for i in np.logspace(1, 4, 4).astype(np.int):
         labels_a, labels_b = np.ones(i, dtype=np.int),\
             np.arange(i, dtype=np.int)
-        assert_equal(normalized_mutual_info_score(labels_a, labels_b,
-                                                  max_n_classes=1e4), 0.0)
-        assert_equal(v_measure_score(labels_a, labels_b,
-                                     max_n_classes=1e4), 0.0)
-        assert_equal(adjusted_mutual_info_score(labels_a, labels_b,
-                                                max_n_classes=1e4), 0.0)
-        assert_equal(normalized_mutual_info_score(labels_a, labels_b,
-                                                  max_n_classes=1e4), 0.0)
+        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
+        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
+        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
+        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
 
 
 def test_v_measure_and_mutual_information(seed=36):
@@ -209,29 +236,6 @@ def test_v_measure_and_mutual_information(seed=36):
                             (entropy(labels_a) + entropy(labels_b)), 0)
 
 
-def test_max_n_classes():
-    rng = np.random.RandomState(seed=0)
-    labels_true = rng.rand(53)
-    labels_pred = rng.rand(53)
-    labels_zero = np.zeros(53)
-    labels_true[:2] = 0
-    labels_zero[:3] = 1
-    labels_pred[:2] = 0
-    for score_func in score_funcs:
-        expected = ("Too many classes for a clustering metric. If you "
-                    "want to increase the limit, pass parameter "
-                    "max_n_classes to the scoring function")
-        assert_raise_message(ValueError, expected, score_func,
-                             labels_true, labels_pred,
-                             max_n_classes=50)
-        expected = ("Too many clusters for a clustering metric. If you "
-                    "want to increase the limit, pass parameter "
-                    "max_n_classes to the scoring function")
-        assert_raise_message(ValueError, expected, score_func,
-                             labels_zero, labels_pred,
-                             max_n_classes=50)
-
-
 def test_fowlkes_mallows_score():
     # General case
     score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],

From cc50125b095ae8b374508fda0e3e4054cfe44531 Mon Sep 17 00:00:00 2001
From: Greg Stupp <gstupp@scripps.edu>
Date: Wed, 17 Aug 2016 19:11:14 -0700
Subject: [PATCH 2/4] merge sparse and max_n_classes functionality

---
 sklearn/metrics/cluster/supervised.py         | 117 +++++++++++++-----
 .../metrics/cluster/tests/test_supervised.py  |  78 ++++++++----
 2 files changed, 139 insertions(+), 56 deletions(-)

diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 6960707ac4a03..d8a3214ec3158 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -14,10 +14,10 @@
 
 from math import log
 
+import numpy as np
 from scipy.misc import comb
 from scipy.sparse import coo_matrix, find
 from scipy.sparse.data import _data_matrix
-import numpy as np
 
 from .expected_mutual_info_fast import expected_mutual_information
 from ...utils.fixes import bincount
@@ -48,7 +48,7 @@ def check_clusterings(labels_true, labels_pred):
     return labels_true, labels_pred
 
 
-def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
+def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, sparse=False):
     """Build a contingency matrix describing the relationship between labels.
 
     Parameters
@@ -59,11 +59,16 @@ def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
     labels_pred : array, shape = [n_samples]
         Cluster labels to evaluate
 
-    eps: None or float
+    eps: None or float, optional.
         If a float, that value is added to all values in the contingency
         matrix. This helps to stop NaN propagation.
         If ``None``, nothing is adjusted.
 
+    max_n_classes : int, optional (default=5000)
+        Maximal number of classeses handled for contingency_matrix.
+        This help to avoid Memory error with regression target
+        for mutual_information.
+
     sparse: boolean, optional.
         If True, return a sparse continency matrix. If ``eps is not None``,
         and ``sparse is True``, will throw ValueError.
@@ -84,12 +89,18 @@ def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
     clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)
     n_classes = classes.shape[0]
     n_clusters = clusters.shape[0]
-
+    if not sparse and (n_classes > max_n_classes):
+        raise ValueError("Too many classes for a clustering metric. If you "
+                         "want to increase the limit, pass parameter "
+                         "max_n_classes to the scoring function")
+    if not sparse and (n_clusters > max_n_classes):
+        raise ValueError("Too many clusters for a clustering metric. If you "
+                         "want to increase the limit, pass parameter "
+                         "max_n_classes to the scoring function")
     # Using coo_matrix to accelerate simple histogram calculation,
     # i.e. bins are consecutive integers
     # Currently, coo_matrix is faster than histogram2d for simple cases
-    contingency = coo_matrix((np.ones(class_idx.shape[0]),
-                              (class_idx, cluster_idx)),
+    contingency = coo_matrix((np.ones(class_idx.shape[0]), (class_idx, cluster_idx)),
                              shape=(n_classes, n_clusters),
                              dtype=np.int)
     if not sparse:
@@ -102,7 +113,7 @@ def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
 
 # clustering measures
 
-def adjusted_rand_score(labels_true, labels_pred, contingency=None):
+def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000, contingency=None):
     """Rand index adjusted for chance.
 
     The Rand Index computes a similarity measure between two clusterings
@@ -134,6 +145,11 @@ def adjusted_rand_score(labels_true, labels_pred, contingency=None):
     labels_pred : array, shape = [n_samples]
         Cluster labels to evaluate
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     contingency: {None, sparse matrix}, shape = [n_classes_true, n_classes_pred]
         A contingency matrix given by the :func:`contingency_matrix` function.
         If value is ``None``, it will be computed, otherwise the given value is
@@ -203,13 +219,13 @@ def adjusted_rand_score(labels_true, labels_pred, contingency=None):
     # or trivial clustering where each document is assigned a unique cluster.
     # These are perfect matches hence return 1.0.
     if (n_classes == n_clusters == 1 or
-            n_classes == n_clusters == 0 or
-            n_classes == n_clusters == n_samples):
+                    n_classes == n_clusters == 0 or
+                    n_classes == n_clusters == n_samples):
         return 1.0
 
     # Compute contingency matrix if we weren't given it
     if contingency is None:
-        contingency = contingency_matrix(labels_true, labels_pred)
+        contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
 
     # Compute the ARI using the contingency data
     if isinstance(contingency, np.ndarray):
@@ -230,7 +246,7 @@ def adjusted_rand_score(labels_true, labels_pred, contingency=None):
     return float((sum_comb - prod_comb) / (mean_comb - prod_comb))
 
 
-def homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=False):
+def homogeneity_completeness_v_measure(labels_true, labels_pred, max_n_classes=5000, sparse=False):
     """Compute the homogeneity and completeness and V-Measure scores at once.
 
     Those metrics are based on normalized conditional entropy measures of
@@ -264,6 +280,11 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=False):
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     sparse: boolean, optional.
         If True, intermediate calculation of the contingency matrix
         will calculate a sparse continency matrix.
@@ -297,7 +318,7 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=False):
         contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
         MI = mutual_info_score(None, None, contingency=contingency)
     else:
-        MI = mutual_info_score(labels_true, labels_pred)
+        MI = mutual_info_score(labels_true, labels_pred, max_n_classes=max_n_classes)
 
     homogeneity = MI / (entropy_C) if entropy_C else 1.0
     completeness = MI / (entropy_K) if entropy_K else 1.0
@@ -311,7 +332,7 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=False):
     return homogeneity, completeness, v_measure_score
 
 
-def homogeneity_score(labels_true, labels_pred, sparse=False):
+def homogeneity_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
     """Homogeneity metric of a cluster labeling given a ground truth.
 
     A clustering result satisfies homogeneity if all of its clusters
@@ -339,6 +360,11 @@ def homogeneity_score(labels_true, labels_pred, sparse=False):
         If True, intermediate calculation of the contingency matrix
         will calculate a sparse continency matrix.
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     homogeneity: float
@@ -386,10 +412,11 @@ def homogeneity_score(labels_true, labels_pred, sparse=False):
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse)[0]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse,
+                                              max_n_classes=max_n_classes)[0]
 
 
-def completeness_score(labels_true, labels_pred, sparse=False):
+def completeness_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
     """Completeness metric of a cluster labeling given a ground truth.
 
     A clustering result satisfies completeness if all the data points
@@ -417,6 +444,11 @@ def completeness_score(labels_true, labels_pred, sparse=False):
         If True, intermediate calculation of the contingency matrix
         will calculate a sparse continency matrix.
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     completeness: float
@@ -460,10 +492,11 @@ def completeness_score(labels_true, labels_pred, sparse=False):
       0.0
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse)[1]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse,
+                                              max_n_classes=max_n_classes)[1]
 
 
-def v_measure_score(labels_true, labels_pred, sparse=False):
+def v_measure_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
     """V-measure cluster labeling given a ground truth.
 
     This score is identical to :func:`normalized_mutual_info_score`.
@@ -495,6 +528,11 @@ def v_measure_score(labels_true, labels_pred, sparse=False):
         If True, intermediate calculation of the contingency matrix
         will calculate a sparse continency matrix.
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     v_measure: float
@@ -559,10 +597,11 @@ def v_measure_score(labels_true, labels_pred, sparse=False):
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse)[2]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred, max_n_classes=max_n_classes,
+                                              sparse=sparse)[2]
 
 
-def mutual_info_score(labels_true, labels_pred, contingency=None):
+def mutual_info_score(labels_true, labels_pred, contingency=None, max_n_classes=5000):
     """Mutual Information between two clusterings.
 
     The Mutual Information is a measure of the similarity between two labels of
@@ -602,6 +641,11 @@ def mutual_info_score(labels_true, labels_pred, contingency=None):
         If value is ``None``, it will be computed, otherwise the given value is
         used, with ``labels_true`` and ``labels_pred`` ignored.
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the mutual_info_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     mi: float
@@ -614,7 +658,7 @@ def mutual_info_score(labels_true, labels_pred, contingency=None):
     """
     if contingency is None:
         labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
-        contingency = contingency_matrix(labels_true, labels_pred)
+        contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
     if isinstance(contingency, np.ndarray):
         # For an array
         contingency = np.array(contingency, dtype='float')
@@ -650,7 +694,7 @@ def mutual_info_score(labels_true, labels_pred, contingency=None):
         raise ValueError("Unsupported type for 'contingency': " + str(type(contingency)))
 
 
-def adjusted_mutual_info_score(labels_true, labels_pred):
+def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     """Adjusted Mutual Information between two clusterings.
 
     Adjusted Mutual Information (AMI) is an adjustment of the Mutual
@@ -683,6 +727,11 @@ def adjusted_mutual_info_score(labels_true, labels_pred):
     labels_pred : array, shape = [n_samples]
         A clustering of the data into disjoint subsets.
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     ami: float(upperlimited by 1.0)
@@ -731,9 +780,9 @@ def adjusted_mutual_info_score(labels_true, labels_pred):
     # Special limit cases: no clustering since the data is not split.
     # This is a perfect match hence return 1.0.
     if (classes.shape[0] == clusters.shape[0] == 1 or
-            classes.shape[0] == clusters.shape[0] == 0):
+                    classes.shape[0] == clusters.shape[0] == 0):
         return 1.0
-    contingency = contingency_matrix(labels_true, labels_pred)
+    contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
     contingency = np.array(contingency, dtype='float')
     # Calculate the MI for the two clusterings
     mi = mutual_info_score(labels_true, labels_pred,
@@ -746,7 +795,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred):
     return ami
 
 
-def normalized_mutual_info_score(labels_true, labels_pred):
+def normalized_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     """Normalized Mutual Information between two clusterings.
 
     Normalized Mutual Information (NMI) is an normalization of the Mutual
@@ -776,6 +825,11 @@ def normalized_mutual_info_score(labels_true, labels_pred):
     labels_pred : array, shape = [n_samples]
         A clustering of the data into disjoint subsets.
 
+    max_n_classes: int, optional (default=5000)
+        Maximal number of classes handled by the adjusted_rand_score
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     nmi: float
@@ -812,9 +866,9 @@ def normalized_mutual_info_score(labels_true, labels_pred):
     # Special limit cases: no clustering since the data is not split.
     # This is a perfect match hence return 1.0.
     if (classes.shape[0] == clusters.shape[0] == 1 or
-            classes.shape[0] == clusters.shape[0] == 0):
+                    classes.shape[0] == clusters.shape[0] == 0):
         return 1.0
-    contingency = contingency_matrix(labels_true, labels_pred)
+    contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
     contingency = np.array(contingency, dtype='float')
     # Calculate the MI for the two clusterings
     mi = mutual_info_score(labels_true, labels_pred,
@@ -826,7 +880,7 @@ def normalized_mutual_info_score(labels_true, labels_pred):
     return nmi
 
 
-def fowlkes_mallows_score(labels_true, labels_pred):
+def fowlkes_mallows_score(labels_true, labels_pred, max_n_classes=5000):
     """Measure the similarity of two clusterings of a set of points.
 
     The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of
@@ -855,6 +909,11 @@ def fowlkes_mallows_score(labels_true, labels_pred):
     labels_pred : array, shape = (``n_samples``, )
         A clustering of the data into disjoint subsets.
 
+    max_n_classes : int, optional (default=5000)
+        Maximal number of classes handled by the Fowlkes-Mallows
+        metric. Setting it too high can lead to MemoryError or OS
+        freeze
+
     Returns
     -------
     score : float
@@ -888,10 +947,10 @@ def fowlkes_mallows_score(labels_true, labels_pred):
     .. [2] `Wikipedia entry for the Fowlkes-Mallows Index
            <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_
     """
-    labels_true, labels_pred = check_clusterings(labels_true, labels_pred,)
+    labels_true, labels_pred = check_clusterings(labels_true, labels_pred, )
     n_samples, = labels_true.shape
 
-    c = contingency_matrix(labels_true, labels_pred)
+    c = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
     tk = np.dot(c.ravel(), c.ravel()) - n_samples
     pk = np.sum(np.sum(c, axis=0) ** 2) - n_samples
     qk = np.sum(np.sum(c, axis=1) ** 2) - n_samples
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index f345ee5615b0b..367b9ce020de6 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -1,23 +1,21 @@
 import numpy as np
+from nose.tools import assert_almost_equal
+from nose.tools import assert_equal
+from numpy.testing import assert_array_almost_equal
 
+from sklearn.metrics.cluster import adjusted_mutual_info_score
 from sklearn.metrics.cluster import adjusted_rand_score
-from sklearn.metrics.cluster import homogeneity_score
 from sklearn.metrics.cluster import completeness_score
-from sklearn.metrics.cluster import v_measure_score
-from sklearn.metrics.cluster import homogeneity_completeness_v_measure
-from sklearn.metrics.cluster import adjusted_mutual_info_score
-from sklearn.metrics.cluster import normalized_mutual_info_score
-from sklearn.metrics.cluster import mutual_info_score
-from sklearn.metrics.cluster import expected_mutual_information
 from sklearn.metrics.cluster import contingency_matrix
-from sklearn.metrics.cluster import fowlkes_mallows_score
 from sklearn.metrics.cluster import entropy
-
+from sklearn.metrics.cluster import expected_mutual_information
+from sklearn.metrics.cluster import fowlkes_mallows_score
+from sklearn.metrics.cluster import homogeneity_completeness_v_measure
+from sklearn.metrics.cluster import homogeneity_score
+from sklearn.metrics.cluster import mutual_info_score
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.metrics.cluster import v_measure_score
 from sklearn.utils.testing import assert_raise_message
-from nose.tools import assert_almost_equal
-from nose.tools import assert_equal
-from numpy.testing import assert_array_almost_equal
-
 
 score_funcs = [
     adjusted_rand_score,
@@ -55,12 +53,14 @@ def test_perfect_matches():
         assert_equal(score_func([0., 1., 2.], [42., 7., 2.]), 1.0)
         assert_equal(score_func([0, 1, 2], [42, 7, 2]), 1.0)
 
+
 def test_homogeneity_completeness_v_measure_sparse():
     labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
     labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
     h, c, v = homogeneity_completeness_v_measure(labels_a, labels_b)
-    h_s, c_s, v_s = homogeneity_completeness_v_measure(labels_a, labels_b, sparse = True)
-    assert_array_almost_equal([h, c, v],[h_s, c_s, v_s])
+    h_s, c_s, v_s = homogeneity_completeness_v_measure(labels_a, labels_b, sparse=True)
+    assert_array_almost_equal([h, c, v], [h_s, c_s, v_s])
+
 
 """ Takes too long...
 def test_homogeneity_completeness_v_measure_large():
@@ -72,6 +72,7 @@ def test_homogeneity_completeness_v_measure_large():
     assert_raises(MemoryError, homogeneity_completeness_v_measure, labels_a, labels_b)
 """
 
+
 def test_homogeneous_but_not_complete_labeling():
     # homogeneous but not complete clustering
     h, c, v = homogeneity_completeness_v_measure(
@@ -203,39 +204,62 @@ def test_contingency_matrix_sparse():
     labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
     labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
     C = contingency_matrix(labels_a, labels_b)
-    C_sparse = contingency_matrix(labels_a, labels_b, sparse = True).toarray()
+    C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
     assert_array_almost_equal(C, C_sparse)
-    
+
 
 def test_adjusted_rand_score_sparse():
     labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
     labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
-    C_sparse = contingency_matrix(labels_a, labels_b, sparse = True)
-    assert_almost_equal(adjusted_rand_score(labels_a,labels_b), adjusted_rand_score(None, None, C_sparse))
+    C_sparse = contingency_matrix(labels_a, labels_b, sparse=True)
+    assert_almost_equal(adjusted_rand_score(labels_a, labels_b), adjusted_rand_score(None, None, contingency=C_sparse))
 
 
 def test_exactly_zero_info_score():
     # Check numerical stability when information is exactly zero
     for i in np.logspace(1, 4, 4).astype(np.int):
-        labels_a, labels_b = np.ones(i, dtype=np.int),\
-            np.arange(i, dtype=np.int)
-        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
-        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
-        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
-        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
+        labels_a, labels_b = np.ones(i, dtype=np.int), \
+                             np.arange(i, dtype=np.int)
+        assert_equal(normalized_mutual_info_score(labels_a, labels_b, max_n_classes=1e4), 0.0)
+        assert_equal(v_measure_score(labels_a, labels_b, max_n_classes=1e4), 0.0)
+        assert_equal(adjusted_mutual_info_score(labels_a, labels_b, max_n_classes=1e4), 0.0)
+        assert_equal(normalized_mutual_info_score(labels_a, labels_b, max_n_classes=1e4), 0.0)
 
 
 def test_v_measure_and_mutual_information(seed=36):
     # Check relation between v_measure, entropy and mutual information
     for i in np.logspace(1, 4, 4).astype(np.int):
         random_state = np.random.RandomState(seed)
-        labels_a, labels_b = random_state.randint(0, 10, i),\
-            random_state.randint(0, 10, i)
+        labels_a, labels_b = random_state.randint(0, 10, i), \
+                             random_state.randint(0, 10, i)
         assert_almost_equal(v_measure_score(labels_a, labels_b),
                             2.0 * mutual_info_score(labels_a, labels_b) /
                             (entropy(labels_a) + entropy(labels_b)), 0)
 
 
+def test_max_n_classes():
+    rng = np.random.RandomState(seed=0)
+    labels_true = rng.rand(53)
+    labels_pred = rng.rand(53)
+    labels_zero = np.zeros(53)
+    labels_true[:2] = 0
+    labels_zero[:3] = 1
+    labels_pred[:2] = 0
+    for score_func in score_funcs:
+        expected = ("Too many classes for a clustering metric. If you "
+                    "want to increase the limit, pass parameter "
+                    "max_n_classes to the scoring function")
+        assert_raise_message(ValueError, expected, score_func,
+                             labels_true, labels_pred,
+                             max_n_classes=50)
+        expected = ("Too many clusters for a clustering metric. If you "
+                    "want to increase the limit, pass parameter "
+                    "max_n_classes to the scoring function")
+        assert_raise_message(ValueError, expected, score_func,
+                             labels_zero, labels_pred,
+                             max_n_classes=50)
+
+
 def test_fowlkes_mallows_score():
     # General case
     score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],

From 8ec4d112f4ed03ced7c59a9498d19b7a52ee53b4 Mon Sep 17 00:00:00 2001
From: Greg Stupp <gstupp@scripps.edu>
Date: Wed, 17 Aug 2016 19:13:56 -0700
Subject: [PATCH 3/4] clarify docs

---
 sklearn/metrics/cluster/supervised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index d8a3214ec3158..34c729647870b 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -67,7 +67,7 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, s
     max_n_classes : int, optional (default=5000)
         Maximal number of classeses handled for contingency_matrix.
         This help to avoid Memory error with regression target
-        for mutual_information.
+        for mutual_information. If `sparse`, `max_n_classes` is ignored.
 
     sparse: boolean, optional.
         If True, return a sparse continency matrix. If ``eps is not None``,

From 630bfa10e1227037a99939b5730bc3cb96bc9f1f Mon Sep 17 00:00:00 2001
From: Greg Stupp <gstupp@scripps.edu>
Date: Thu, 18 Aug 2016 13:39:01 -0700
Subject: [PATCH 4/4] pep8

---
 sklearn/metrics/cluster/supervised.py | 83 +++++++++++++++++----------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 34c729647870b..0f5d14207262c 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -48,7 +48,8 @@ def check_clusterings(labels_true, labels_pred):
     return labels_true, labels_pred
 
 
-def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, sparse=False):
+def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000,
+                       sparse=False):
     """Build a contingency matrix describing the relationship between labels.
 
     Parameters
@@ -67,7 +68,8 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, s
     max_n_classes : int, optional (default=5000)
         Maximal number of classeses handled for contingency_matrix.
         This help to avoid Memory error with regression target
-        for mutual_information. If `sparse`, `max_n_classes` is ignored.
+        for mutual_information. If ``sparse is True``,
+        `max_n_classes` is ignored.
 
     sparse: boolean, optional.
         If True, return a sparse continency matrix. If ``eps is not None``,
@@ -75,11 +77,11 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, s
 
     Returns
     -------
-    contingency: {array-like, sparse matrix}, shape=[n_classes_true, n_classes_pred]
+    contingency: {array-like, sparse}, shape=[n_classes_true, n_classes_pred]
         Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in
         true class :math:`i` and in predicted class :math:`j`. If
         ``eps is None``, the dtype of this array will be integer. If ``eps`` is
-        given, the dtype will be float.
+        given, the dtype will be float. Will be sparse if ``sparse is True``
     """
 
     if eps is not None and sparse:
@@ -100,7 +102,8 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, s
     # Using coo_matrix to accelerate simple histogram calculation,
     # i.e. bins are consecutive integers
     # Currently, coo_matrix is faster than histogram2d for simple cases
-    contingency = coo_matrix((np.ones(class_idx.shape[0]), (class_idx, cluster_idx)),
+    contingency = coo_matrix((np.ones(class_idx.shape[0]),
+                              (class_idx, cluster_idx)),
                              shape=(n_classes, n_clusters),
                              dtype=np.int)
     if not sparse:
@@ -113,7 +116,8 @@ def contingency_matrix(labels_true, labels_pred, eps=None, max_n_classes=5000, s
 
 # clustering measures
 
-def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000, contingency=None):
+def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000,
+                        contingency=None):
     """Rand index adjusted for chance.
 
     The Rand Index computes a similarity measure between two clusterings
@@ -209,7 +213,7 @@ def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000, contingenc
         n_samples = labels_true.shape[0]
         n_classes = np.unique(labels_true).shape[0]
         n_clusters = np.unique(labels_pred).shape[0]
-    elif isinstance(contingency, _data_matrix):  # scipy.sparse.data._data_matrix
+    elif isinstance(contingency, _data_matrix):
         n_samples = contingency.nnz
         n_classes, n_clusters = contingency.shape
     else:
@@ -225,7 +229,8 @@ def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000, contingenc
 
     # Compute contingency matrix if we weren't given it
     if contingency is None:
-        contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
+        contingency = contingency_matrix(labels_true, labels_pred,
+                                         max_n_classes=max_n_classes)
 
     # Compute the ARI using the contingency data
     if isinstance(contingency, np.ndarray):
@@ -235,18 +240,22 @@ def adjusted_rand_score(labels_true, labels_pred, max_n_classes=5000, contingenc
         sum_comb = sum(comb2(n_ij) for n_ij in contingency.flatten())
     elif isinstance(contingency, _data_matrix):
         # For a sparse matrix
-        sum_comb_c = sum(comb2(n_c) for n_c in np.array(contingency.sum(axis=1)))
-        sum_comb_k = sum(comb2(n_k) for n_k in np.array(contingency.sum(axis=0)).T)
+        sum_comb_c = sum(
+            comb2(n_c) for n_c in np.array(contingency.sum(axis=1)))
+        sum_comb_k = sum(
+            comb2(n_k) for n_k in np.array(contingency.sum(axis=0)).T)
         sum_comb = sum(comb2(n_ij) for n_ij in find(contingency)[2])
     else:
-        raise ValueError("Unsupported type for 'contingency': " + str(type(contingency)))
+        raise ValueError(
+            "Unsupported type for 'contingency': " + str(type(contingency)))
 
     prod_comb = (sum_comb_c * sum_comb_k) / float(comb(n_samples, 2))
     mean_comb = (sum_comb_k + sum_comb_c) / 2.
     return float((sum_comb - prod_comb) / (mean_comb - prod_comb))
 
 
-def homogeneity_completeness_v_measure(labels_true, labels_pred, max_n_classes=5000, sparse=False):
+def homogeneity_completeness_v_measure(labels_true, labels_pred,
+                                       max_n_classes=5000, sparse=False):
     """Compute the homogeneity and completeness and V-Measure scores at once.
 
     Those metrics are based on normalized conditional entropy measures of
@@ -318,7 +327,8 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, max_n_classes=5
         contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
         MI = mutual_info_score(None, None, contingency=contingency)
     else:
-        MI = mutual_info_score(labels_true, labels_pred, max_n_classes=max_n_classes)
+        MI = mutual_info_score(labels_true, labels_pred,
+                               max_n_classes=max_n_classes)
 
     homogeneity = MI / (entropy_C) if entropy_C else 1.0
     completeness = MI / (entropy_K) if entropy_K else 1.0
@@ -332,7 +342,8 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, max_n_classes=5
     return homogeneity, completeness, v_measure_score
 
 
-def homogeneity_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
+def homogeneity_score(labels_true, labels_pred, max_n_classes=5000,
+                      sparse=False):
     """Homogeneity metric of a cluster labeling given a ground truth.
 
     A clustering result satisfies homogeneity if all of its clusters
@@ -412,11 +423,13 @@ def homogeneity_score(labels_true, labels_pred, max_n_classes=5000, sparse=False
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse,
-                                              max_n_classes=max_n_classes)[0]
+    return \
+    homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse,
+                                       max_n_classes=max_n_classes)[0]
 
 
-def completeness_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
+def completeness_score(labels_true, labels_pred, max_n_classes=5000,
+                       sparse=False):
     """Completeness metric of a cluster labeling given a ground truth.
 
     A clustering result satisfies completeness if all the data points
@@ -492,8 +505,9 @@ def completeness_score(labels_true, labels_pred, max_n_classes=5000, sparse=Fals
       0.0
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse,
-                                              max_n_classes=max_n_classes)[1]
+    return \
+    homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=sparse,
+                                       max_n_classes=max_n_classes)[1]
 
 
 def v_measure_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
@@ -597,11 +611,13 @@ def v_measure_score(labels_true, labels_pred, max_n_classes=5000, sparse=False):
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred, max_n_classes=max_n_classes,
+    return homogeneity_completeness_v_measure(labels_true, labels_pred,
+                                              max_n_classes=max_n_classes,
                                               sparse=sparse)[2]
 
 
-def mutual_info_score(labels_true, labels_pred, contingency=None, max_n_classes=5000):
+def mutual_info_score(labels_true, labels_pred, contingency=None,
+                      max_n_classes=5000):
     """Mutual Information between two clusterings.
 
     The Mutual Information is a measure of the similarity between two labels of
@@ -636,7 +652,8 @@ def mutual_info_score(labels_true, labels_pred, contingency=None, max_n_classes=
     labels_pred : array, shape = [n_samples]
         A clustering of the data into disjoint subsets.
 
-    contingency: {None, array, sparse matrix}, shape = [n_classes_true, n_classes_pred]
+    contingency: {None, array, sparse matrix},
+                shape = [n_classes_true, n_classes_pred]
         A contingency matrix given by the :func:`contingency_matrix` function.
         If value is ``None``, it will be computed, otherwise the given value is
         used, with ``labels_true`` and ``labels_pred`` ignored.
@@ -658,7 +675,8 @@ def mutual_info_score(labels_true, labels_pred, contingency=None, max_n_classes=
     """
     if contingency is None:
         labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
-        contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
+        contingency = contingency_matrix(labels_true, labels_pred,
+                                         max_n_classes=max_n_classes)
     if isinstance(contingency, np.ndarray):
         # For an array
         contingency = np.array(contingency, dtype='float')
@@ -684,14 +702,16 @@ def mutual_info_score(labels_true, labels_pred, contingency=None, max_n_classes=
         pj = np.array(contingency.sum(axis=0)).T
         nnzx, nnzy, nnz_val = find(contingency)
         log_contingency_nm = np.log(nnz_val)
-        contingency_nm = nnz_val * 1.0 / contingency_sum  # python2 integer division...
+        contingency_nm = nnz_val * 1.0 / contingency_sum
         # Don't need to calculate the full outer product. Just for the non-zero values
         outer = np.array([pi[x] * pj[y] for x, y in zip(nnzx, nnzy)]).T
         log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
-        mi = contingency_nm * (log_contingency_nm - log(contingency_sum)) + contingency_nm * log_outer
+        mi = contingency_nm * (log_contingency_nm - log(contingency_sum)) + \
+             contingency_nm * log_outer
         return mi.sum()
     else:
-        raise ValueError("Unsupported type for 'contingency': " + str(type(contingency)))
+        raise ValueError(
+            "Unsupported type for 'contingency': " + str(type(contingency)))
 
 
 def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
@@ -780,9 +800,10 @@ def adjusted_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     # Special limit cases: no clustering since the data is not split.
     # This is a perfect match hence return 1.0.
     if (classes.shape[0] == clusters.shape[0] == 1 or
-                    classes.shape[0] == clusters.shape[0] == 0):
+                classes.shape[0] == clusters.shape[0] == 0):
         return 1.0
-    contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
+    contingency = contingency_matrix(labels_true, labels_pred,
+                                     max_n_classes=max_n_classes)
     contingency = np.array(contingency, dtype='float')
     # Calculate the MI for the two clusterings
     mi = mutual_info_score(labels_true, labels_pred,
@@ -868,7 +889,8 @@ def normalized_mutual_info_score(labels_true, labels_pred, max_n_classes=5000):
     if (classes.shape[0] == clusters.shape[0] == 1 or
                     classes.shape[0] == clusters.shape[0] == 0):
         return 1.0
-    contingency = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
+    contingency = contingency_matrix(labels_true, labels_pred,
+                                     max_n_classes=max_n_classes)
     contingency = np.array(contingency, dtype='float')
     # Calculate the MI for the two clusterings
     mi = mutual_info_score(labels_true, labels_pred,
@@ -950,7 +972,8 @@ def fowlkes_mallows_score(labels_true, labels_pred, max_n_classes=5000):
     labels_true, labels_pred = check_clusterings(labels_true, labels_pred, )
     n_samples, = labels_true.shape
 
-    c = contingency_matrix(labels_true, labels_pred, max_n_classes=max_n_classes)
+    c = contingency_matrix(labels_true, labels_pred,
+                           max_n_classes=max_n_classes)
     tk = np.dot(c.ravel(), c.ravel()) - n_samples
     pk = np.sum(np.sum(c, axis=0) ** 2) - n_samples
     qk = np.sum(np.sum(c, axis=1) ** 2) - n_samples