-
-
Notifications
You must be signed in to change notification settings - Fork 26.1k
Closed
Description
The following example shows OPTICS
on a small dataset, with one outlier. It seems that the outlier is incorrectly detected.
>>> from sklearn.cluster import OPTICS
>>> import numpy as np
>>> X = np.array([[1, 2], [2, 2], [2, 3],
... [80, 70], [80, 80], [250, 800]])
>>>
>>>
>>> np.random.seed(0)
>>> n_points_per_cluster = 10
>>>
>>> C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
>>> C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
>>> X = np.vstack((C1, C2, np.array([[100, 200]])))
>>>
>>> clustering = OPTICS(metric="euclidean").fit(X)
>>> np.hstack((X, clustering.labels_[:, np.newaxis]))
array([[ -3.58875812, -1.67987423, 0. ],
[ -4.21700961, -0.20728544, 0. ],
[ -3.50595361, -2.7818223 , 0. ],
[ -4.23992927, -2.12108577, 0. ],
[ -5.08257508, -1.6715212 , 0. ],
[ -4.88476514, -0.83658119, 0. ],
[ -4.39116982, -1.90265999, 0. ],
[ -4.64490941, -1.73306054, 0. ],
[ -3.80473674, -2.16412661, 0. ],
[ -4.74954584, -2.68327659, 0. ],
[ 3.74470102, -0.93463814, -1. ],
[ 4.08644362, -1.0742165 , 1. ],
[ 4.22697546, -1.14543657, 1. ],
[ 4.00457585, -1.01871839, 1. ],
[ 4.15327792, -0.85306412, 1. ],
[ 4.01549474, -0.96218375, 1. ],
[ 3.91122143, -1.19807965, 1. ],
[ 3.96520879, -0.9843651 , 1. ],
[ 4.12302907, -0.87976202, 1. ],
[ 3.96126732, -1.03023028, 1. ],
[100. , 200. , 1. ]])
>>> clustering # doctest: +NORMALIZE_WHITESPACE
OPTICS(algorithm='ball_tree', leaf_size=30, max_bound=inf, maxima_ratio=0.75,
metric='euclidean', metric_params=None, min_cluster_size_ratio=0.005,
min_maxima_ratio=0.001, min_samples=5, n_jobs=1, p=2,
rejection_ratio=0.7, significant_min=0.003, similarity_threshold=0.4)