@@ -724,8 +724,8 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0):
724
724
def mutual_info_score (labels_true , labels_pred , * , contingency = None ):
725
725
"""Mutual Information between two clusterings.
726
726
727
- The Mutual Information is a measure of the similarity between two labels of
728
- the same data. Where :math:`|U_i|` is the number of the samples
727
+ The Mutual Information is a measure of the similarity between two labels
728
+ of the same data. Where :math:`|U_i|` is the number of the samples
729
729
in cluster :math:`U_i` and :math:`|V_j|` is the number of the
730
730
samples in cluster :math:`V_j`, the Mutual Information
731
731
between clusterings :math:`U` and :math:`V` is given as:
@@ -739,20 +739,23 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
739
739
a permutation of the class or cluster label values won't change the
740
740
score value in any way.
741
741
742
- This metric is furthermore symmetric: switching ``label_true`` with
743
- ``label_pred`` will return the same score value. This can be useful to
744
- measure the agreement of two independent label assignments strategies
745
- on the same dataset when the real ground truth is not known.
742
+ This metric is furthermore symmetric: switching :math:`U` (i.e
743
+ ``label_true``) with :math:`V` (i.e. ``label_pred``) will return the
744
+ same score value. This can be useful to measure the agreement of two
745
+ independent label assignments strategies on the same dataset when the
746
+ real ground truth is not known.
746
747
747
748
Read more in the :ref:`User Guide <mutual_info_score>`.
748
749
749
750
Parameters
750
751
----------
751
752
labels_true : int array, shape = [n_samples]
752
- A clustering of the data into disjoint subsets.
753
+ A clustering of the data into disjoint subsets, called :math:`U` in
754
+ the above formula.
753
755
754
756
labels_pred : int array-like of shape (n_samples,)
755
- A clustering of the data into disjoint subsets.
757
+ A clustering of the data into disjoint subsets, called :math:`V` in
758
+ the above formula.
756
759
757
760
contingency : {ndarray, sparse matrix} of shape \
758
761
(n_classes_true, n_classes_pred), default=None
@@ -763,7 +766,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
763
766
Returns
764
767
-------
765
768
mi : float
766
- Mutual information, a non-negative value
769
+ Mutual information, a non-negative value, measured in nats using the
770
+ natural logarithm.
767
771
768
772
Notes
769
773
-----
@@ -829,10 +833,10 @@ def adjusted_mutual_info_score(
829
833
a permutation of the class or cluster label values won't change the
830
834
score value in any way.
831
835
832
- This metric is furthermore symmetric: switching `` label_true`` with
833
- ``label_pred`` will return the same score value. This can be useful to
834
- measure the agreement of two independent label assignments strategies
835
- on the same dataset when the real ground truth is not known.
836
+ This metric is furthermore symmetric: switching :math:`U` (`` label_true``)
837
+ with :math:`V` (``labels_pred``) will return the same score value. This can
838
+ be useful to measure the agreement of two independent label assignments
839
+ strategies on the same dataset when the real ground truth is not known.
836
840
837
841
Be mindful that this function is an order of magnitude slower than other
838
842
metrics, such as the Adjusted Rand Index.
@@ -842,10 +846,12 @@ def adjusted_mutual_info_score(
842
846
Parameters
843
847
----------
844
848
labels_true : int array, shape = [n_samples]
845
- A clustering of the data into disjoint subsets.
849
+ A clustering of the data into disjoint subsets, called :math:`U` in
850
+ the above formula.
846
851
847
852
labels_pred : int array-like of shape (n_samples,)
848
- A clustering of the data into disjoint subsets.
853
+ A clustering of the data into disjoint subsets, called :math:`V` in
854
+ the above formula.
849
855
850
856
average_method : str, default='arithmetic'
851
857
How to compute the normalizer in the denominator. Possible options
@@ -862,7 +868,8 @@ def adjusted_mutual_info_score(
862
868
ami: float (upperlimited by 1.0)
863
869
The AMI returns a value of 1 when the two partitions are identical
864
870
(ie perfectly matched). Random partitions (independent labellings) have
865
- an expected AMI around 0 on average hence can be negative.
871
+ an expected AMI around 0 on average hence can be negative. The value is
872
+ in adjusted nats (based on the natural logarithm).
866
873
867
874
See Also
868
875
--------
@@ -979,7 +986,8 @@ def normalized_mutual_info_score(
979
986
Returns
980
987
-------
981
988
nmi : float
982
- score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
989
+ Score between 0.0 and 1.0 in normalized nats (based on the natural
990
+ logarithm). 1.0 stands for perfectly complete labeling.
983
991
984
992
See Also
985
993
--------
0 commit comments