scikit-learn
diff --git a/‎doc/whats_new/v0.20.rst
Lines changed: 7 additions & 3 deletions b/‎doc/whats_new/v0.20.rst
Lines changed: 7 additions & 3 deletions
diff --git a/‎doc/whats_new/v0.21.rst
Lines changed: 5 additions & 1 deletion b/‎doc/whats_new/v0.21.rst
Lines changed: 5 additions & 1 deletion
diff --git a/‎sklearn/cluster/dbscan_.py
Lines changed: 8 additions & 6 deletions b/‎sklearn/cluster/dbscan_.py
Lines changed: 8 additions & 6 deletions
diff --git a/‎sklearn/cluster/tests/test_dbscan.py
Lines changed: 19 additions & 2 deletions b/‎sklearn/cluster/tests/test_dbscan.py
Lines changed: 19 additions & 2 deletions
@@ -47,6 +47,10 @@ Changelog
   avoid pickling errors caused by the serialization of their methods.
   :issue:`12171` by :user:`Thomas Moreau <tomMoral>`
 
+- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
+  graph, which would add explicitly zeros on the diagonal even when already
+  present. :issue:`12105` by `Tom Dupre la Tour`_.
+
 .. _changes_0_20:
 
 Version 0.20.0
@@ -663,7 +667,7 @@ Support for Python 3.3 has been officially dropped.
 
 - |Feature| :func:`metrics.classification_report` now reports all applicable averages on
   the given data, including micro, macro and weighted average as well as samples
-  average for multilabel data. :issue:`11679` by :user:`Alexander Pacha <apacha>`. 
+  average for multilabel data. :issue:`11679` by :user:`Alexander Pacha <apacha>`.
 
 - |Feature| :func:`metrics.average_precision_score` now supports binary
   ``y_true`` other than ``{0, 1}`` or ``{-1, 1}`` through ``pos_label``
@@ -917,7 +921,7 @@ Support for Python 3.3 has been officially dropped.
   keyword arguments on to the pipeline's last estimator, enabling the use of
   parameters such as ``return_std`` in a pipeline with caution.
   :issue:`9304` by :user:`Breno Freitas <brenolf>`.
-  
+
 - |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer
   to drop features. :issue:`11144` by :user:`thomasjpfan`.
 
@@ -1039,7 +1043,7 @@ Support for Python 3.3 has been officially dropped.
 - |API| The NaN marker for the missing values has been changed
   between the :class:`preprocessing.Imputer` and the
   :class:`impute.SimpleImputer`.
-  ``missing_values='NaN'`` should now be
+  ``missing_values='NaN'`` should now be
   ``missing_values=np.nan``. :issue:`11211` by
   :user:`Jeremie du Boisberranger <jeremiedbb>`.
 
 
@@ -17,7 +17,7 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
-- please add class and reason here (see version 0.20 what's new)
+- :class:`cluster.DBSCAN` (bug fix)
 
 Details are listed in the changelog below.
 
@@ -48,6 +48,10 @@ Support for Python 3.4 and below has been officially dropped.
   to set and that scales better, by :user:`Shane <espg>` and
   :user:`Adrin Jalali <adrinjalali>`.
 
+- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
+  graph, which would add explicitly zeros on the diagonal even when already
+  present. :issue:`12105` by `Tom Dupre la Tour`_.
+
 Multiple modules
 ................
 
 
@@ -14,6 +14,7 @@
 
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_array, check_consistent_length
+from ..utils.testing import ignore_warnings
 from ..neighbors import NearestNeighbors
 
 from ._dbscan_inner import dbscan_inner
@@ -142,15 +143,16 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
     if metric == 'precomputed' and sparse.issparse(X):
         neighborhoods = np.empty(X.shape[0], dtype=object)
         X.sum_duplicates()  # XXX: modifies X's internals in-place
+
+        # set the diagonal to explicit values, as a point is its own neighbor
+        with ignore_warnings():
+            X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place
+
         X_mask = X.data <= eps
         masked_indices = X.indices.astype(np.intp, copy=False)[X_mask]
-        masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))[X.indptr[1:]]
+        masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))
+        masked_indptr = masked_indptr[X.indptr[1:-1]]
 
-        # insert the diagonal: a point is its own neighbor, but 0 distance
-        # means absence from sparse matrix data
-        masked_indices = np.insert(masked_indices, masked_indptr,
-                                   np.arange(X.shape[0]))
-        masked_indptr = masked_indptr[:-1] + np.arange(1, X.shape[0])
         # split into rows
         neighborhoods[:] = np.split(masked_indices, masked_indptr)
     else:
 
@@ -81,10 +81,12 @@ def test_dbscan_sparse():
     assert_array_equal(labels_dense, labels_sparse)
 
 
-def test_dbscan_sparse_precomputed():
+@pytest.mark.parametrize('include_self', [False, True])
+def test_dbscan_sparse_precomputed(include_self):
     D = pairwise_distances(X)
     nn = NearestNeighbors(radius=.9).fit(X)
-    D_sparse = nn.radius_neighbors_graph(mode='distance')
+    X_ = X if include_self else None
+    D_sparse = nn.radius_neighbors_graph(X=X_, mode='distance')
     # Ensure it is sparse not merely on diagonals:
     assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
     core_sparse, labels_sparse = dbscan(D_sparse,
@@ -97,6 +99,21 @@ def test_dbscan_sparse_precomputed():
     assert_array_equal(labels_dense, labels_sparse)
 
 
+@pytest.mark.parametrize('use_sparse', [True, False])
+@pytest.mark.parametrize('metric', ['precomputed', 'minkowski'])
+def test_dbscan_input_not_modified(use_sparse, metric):
+    # test that the input is not modified by dbscan
+    X = np.random.RandomState(0).rand(10, 10)
+    X = sparse.csr_matrix(X) if use_sparse else X
+    X_copy = X.copy()
+    dbscan(X, metric=metric)
+
+    if use_sparse:
+        assert_array_equal(X.toarray(), X_copy.toarray())
+    else:
+        assert_array_equal(X, X_copy)
+
+
 def test_dbscan_no_core_samples():
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)