From 443dacc57b13c53093fbde6e6b5a353a6172700c Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Sat, 26 Nov 2016 12:00:59 -0600 Subject: [PATCH 1/5] cosine affinity cannot be used when X contains zero vectors --- sklearn/cluster/hierarchical.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 9c3e43e901bf5..809d683702dc9 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -379,6 +379,10 @@ def linkage_tree(X, connectivity=None, n_components=None, 'Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) + if np.sum(np.sum(np.abs(X), axis=1) == 0) > 0 and affinity == 'cosine': + raise ValueError( + 'Cosine affinity cannot be used when X contains zero vectors') + if connectivity is None: from scipy.cluster import hierarchy # imports PIL From cc76bbe2d0c6e70dbc0f4ae3df6a32c27f373253 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Sat, 26 Nov 2016 12:09:18 -0600 Subject: [PATCH 2/5] fixed issue with tabs spaces --- sklearn/cluster/hierarchical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 809d683702dc9..614eeac5fa889 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -379,7 +379,7 @@ def linkage_tree(X, connectivity=None, n_components=None, 'Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) - if np.sum(np.sum(np.abs(X), axis=1) == 0) > 0 and affinity == 'cosine': + if np.sum(np.sum(np.abs(X), axis=1) == 0) > 0 and affinity == 'cosine': raise ValueError( 'Cosine affinity cannot be used when X contains zero vectors') From ee0222e3bce53d980231cd2b1600d02d459bb6b5 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Sat, 26 Nov 2016 15:30:05 -0600 Subject: [PATCH 3/5] changed to np.any and created a test for this new ValueError --- sklearn/cluster/hierarchical.py | 2 +- sklearn/cluster/tests/test_hierarchical.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 614eeac5fa889..c75bd88c608b8 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -379,7 +379,7 @@ def linkage_tree(X, connectivity=None, n_components=None, 'Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) - if np.sum(np.sum(np.abs(X), axis=1) == 0) > 0 and affinity == 'cosine': + if np.sum(1 - np.any(X, axis=1)) > 0 and affinity == 'cosine': raise ValueError( 'Cosine affinity cannot be used when X contains zero vectors') diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index cbafac1bc355e..4da30b5b2321c 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -115,6 +115,14 @@ def test_height_linkage_tree(): assert_true(len(children) + n_leaves == n_nodes) +def test_zero_cosine_linkage_tree(): + # Check that zero vectors in X produce an error when + # 'cosine' affinity is used + X = np.array([[0, 1], + [0, 0]]) + assert_raises(ValueError, linkage_tree, X, affinity='cosine') + + def test_agglomerative_clustering(): # Check that we obtain the correct number of clusters with # agglomerative clustering. From a0c69b8b9994e808dc9e038906b284cfef6623a2 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Tue, 29 Nov 2016 20:51:48 -0600 Subject: [PATCH 4/5] use assert_raise_message and flipped order of if conditions --- sklearn/cluster/hierarchical.py | 2 +- sklearn/cluster/tests/test_hierarchical.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index c75bd88c608b8..5a5db151ad152 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -379,7 +379,7 @@ def linkage_tree(X, connectivity=None, n_components=None, 'Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) - if np.sum(1 - np.any(X, axis=1)) > 0 and affinity == 'cosine': + if affinity == 'cosine' and np.sum(1 - np.any(X, axis=1)) > 0: raise ValueError( 'Cosine affinity cannot be used when X contains zero vectors') diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 4da30b5b2321c..65c66f480bb88 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -120,7 +120,8 @@ def test_zero_cosine_linkage_tree(): # 'cosine' affinity is used X = np.array([[0, 1], [0, 0]]) - assert_raises(ValueError, linkage_tree, X, affinity='cosine') + msg = 'Cosine affinity cannot be used when X contains zero vectors' + assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine') def test_agglomerative_clustering(): From 67f3fdf2eb4b92caa32518613c08f0d498020c95 Mon Sep 17 00:00:00 2001 From: mthorrell Date: Fri, 21 Jun 2019 08:46:47 -0400 Subject: [PATCH 5/5] fixed 0 row calculation --- sklearn/cluster/hierarchical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 5a5db151ad152..3ba6c4978063d 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -379,7 +379,7 @@ def linkage_tree(X, connectivity=None, n_components=None, 'Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) - if affinity == 'cosine' and np.sum(1 - np.any(X, axis=1)) > 0: + if affinity == 'cosine' and np.any(~np.any(X, axis=1)): raise ValueError( 'Cosine affinity cannot be used when X contains zero vectors')