@@ -259,3 +259,37 @@ def test_weighted_dbscan():
259
259
assert_array_equal (core1 , core5 )
260
260
assert_array_equal (label1 , label5 )
261
261
assert_array_equal (label1 , est .labels_ )
262
+
263
+
264
+ def test_dbscan_core_samples_toy ():
265
+ X = [[0 ], [2 ], [3 ], [4 ], [6 ], [8 ], [10 ]]
266
+ n_samples = len (X )
267
+
268
+ for algorithm in ['brute' , 'kd_tree' , 'ball_tree' ]:
269
+ # Degenerate case: every sample is a core sample, either with its own
270
+ # cluster or including other close core samples.
271
+ core_samples , labels = dbscan (X , algorithm = algorithm , eps = 1 ,
272
+ min_samples = 1 )
273
+ assert_array_equal (core_samples , np .arange (n_samples ))
274
+ assert_array_equal (labels , [0 , 1 , 1 , 1 , 2 , 3 , 4 ])
275
+
276
+ # With eps=1 and min_samples=2 only the 3 samples from the denser area
277
+ # are core samples. All other points are isolated and considered noise.
278
+ core_samples , labels = dbscan (X , algorithm = algorithm , eps = 1 ,
279
+ min_samples = 2 )
280
+ assert_array_equal (core_samples , [1 , 2 , 3 ])
281
+ assert_array_equal (labels , [- 1 , 0 , 0 , 0 , - 1 , - 1 , - 1 ])
282
+
283
+ # Only the sample in the middle of the dense area is core. Its two
284
+ # neighbors are edge samples. Remaining samples are noise.
285
+ core_samples , labels = dbscan (X , algorithm = algorithm , eps = 1 ,
286
+ min_samples = 3 )
287
+ assert_array_equal (core_samples , [2 ])
288
+ assert_array_equal (labels , [- 1 , 0 , 0 , 0 , - 1 , - 1 , - 1 ])
289
+
290
+ # It's no longer possible to extract core samples with eps=1:
291
+ # everything is noise.
292
+ core_samples , labels = dbscan (X , algorithm = algorithm , eps = 1 ,
293
+ min_samples = 4 )
294
+ assert_array_equal (core_samples , [])
295
+ assert_array_equal (labels , - np .ones (n_samples ))
0 commit comments