scikit-learn · jnothman · Feb 23, 2017 · Feb 13, 2017 · Feb 15, 2017 · Feb 15, 2017
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -149,6 +149,10 @@ Enhancements
 
 Bug fixes
 .........
+   - Fixed a bug where :class:`sklearn.cluster.DBSCAN` gives incorrect 
+     result when input is a precomputed sparse matrix with initial
+     rows all zero.
+     :issue:`8306` by :user:`Akshay Gupta <Akshay0724>`
 
    - Fixed a bug where :func:`sklearn.datasets.make_moons` gives an
      incorrect result when ``n_samples`` is odd.

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
@@ -124,7 +124,8 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
         X.sum_duplicates()  # XXX: modifies X's internals in-place
         X_mask = X.data <= eps
         masked_indices = astype(X.indices, np.intp, copy=False)[X_mask]
-        masked_indptr = np.cumsum(X_mask)[X.indptr[1:] - 1]
+        masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))[X.indptr[1:]]
+
         # insert the diagonal: a point is its own neighbor, but 0 distance
         # means absence from sparse matrix data
         masked_indices = np.insert(masked_indices, masked_indptr,

diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
@@ -350,3 +350,20 @@ def test_dbscan_precomputed_metric_with_degenerate_input_arrays():
     X = np.zeros((10, 10))
     labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
     assert_equal(len(set(labels)), 1)
+
+
+def test_dbscan_precomputed_metric_with_initial_rows_zero():
+    # sample matrix with initial two row all zero
+    ar = np.array([
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0],
+        [0.0, 0.0, 0.1, 0.1, 0.0, 0.0, 0.3],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1],
+        [0.0, 0.0, 0.0, 0.0, 0.3, 0.1, 0.0]
+    ])
+    matrix = sparse.csr_matrix(ar)
+    labels = DBSCAN(eps=0.2, metric='precomputed',
+                    min_samples=2).fit(matrix).labels_
+    assert_array_equal(labels, [-1, -1,  0,  0,  0,  1,  1])