diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 9c3e43e901bf5..3ba6c4978063d 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -379,6 +379,10 @@ def linkage_tree(X, connectivity=None, n_components=None, 'Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) + if affinity == 'cosine' and np.any(~np.any(X, axis=1)): + raise ValueError( + 'Cosine affinity cannot be used when X contains zero vectors') + if connectivity is None: from scipy.cluster import hierarchy # imports PIL diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index cbafac1bc355e..65c66f480bb88 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -115,6 +115,15 @@ def test_height_linkage_tree(): assert_true(len(children) + n_leaves == n_nodes) +def test_zero_cosine_linkage_tree(): + # Check that zero vectors in X produce an error when + # 'cosine' affinity is used + X = np.array([[0, 1], + [0, 0]]) + msg = 'Cosine affinity cannot be used when X contains zero vectors' + assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine') + + def test_agglomerative_clustering(): # Check that we obtain the correct number of clusters with # agglomerative clustering.