8000 Resolved merge conflicts · scikit-learn/scikit-learn@508516d · GitHub
[go: up one dir, main page]

Skip to content

Commit 508516d

Browse files
committed
Resolved merge conflicts
1 parent c098aa5 commit 508516d

File tree

2 files changed

+4
-43
lines changed

2 files changed

+4
-43
lines changed

sklearn/metrics/cluster/tests/test_unsupervised.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from sklearn import datasets
55
from sklearn.metrics.cluster.unsupervised import silhouette_score
6+
from sklearn.metrics.cluster.unsupervised import silhouette_sample
67
from sklearn.metrics import pairwise_distances
78
from sklearn.utils.testing import assert_false
89
from sklearn.utils.testing import assert_almost_equal

sklearn/metrics/cluster/unsupervised.py

Lines changed: 3 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -161,50 +161,8 @@ def silhouette_samples(X, labels, metric='euclidean', **kwds):
161161
<http://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
162162
163163
"""
164-
<<<<<<< HEAD
165164
le = LabelEncoder()
166165
labels = le.fit_transform(labels)
167-
=======
168-
distances = pairwise_distances(X, metric=metric, **kwds)
169-
n = labels.shape[0]
170-
A = np.array([_intra_cluster_distance(distances[i], labels, i)
171-
for i in range(n)])
172-
B = np.array([_nearest_cluster_distance(distances[i], labels, i)
173-
for i in range(n)])
174-
sil_samples = (B - A) / np.maximum(A, B)
175-
# nan values are for clusters of size 1, and should be 0
176-
return np.nan_to_num(sil_samples)
177-
178-
179-
def _intra_cluster_distance(distances_row, labels, i):
180-
"""Calculate the mean intra-cluster distance for sample i.
181-
182-
Parameters
183-
----------
184-
distances_row : array, shape = [n_samples]
185-
Pairwise distance matrix between sample i and each sample.
186-
187-
labels : array, shape = [n_samples]
188-
label values for each sample
189-
190-
i : int
191-
Sample index being calculated. It is excluded from calculation and
192-
used to determine the current label
193-
194-
Returns
195-
-------
196-
a : float
197-
Mean intra-cluster distance for sample i
198-
"""
199-
mask = labels == labels[i]
200-
mask[i] = False
201-
if not np.any(mask):
202-
# cluster of size 1
203-
return 0
204-
a = np.mean(distances_row[mask])
205-
return a
206-
207-
>>>>>>> reverted the comment
208166

209167
distances = pairwise_distances(X, metric=metric, **kwds)
210168
unique_labels = le.classes_
@@ -242,4 +200,6 @@ def _intra_cluster_distance(distances_row, labels, i):
242200

243201
sil_samples = inter_clust_dists - intra_clust_dists
244202
sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists)
245-
return np.nan_to_num(sil_samples)
203+
204+
# nan values are for clusters of size 1, and should be 0
205+
return np.nan_to_num(sil_samples)

0 commit comments

Comments
 (0)
0