diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 2f359ca87463f..92ef22d1b1228 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -155,6 +155,10 @@ Support for Python 3.4 and below has been officially dropped.
   and now it returns NaN and raises :class:`exceptions.UndefinedMetricWarning`.
   :issue:`12855` by :user:`Pawel Sendyk <psendyk>.`
 
+- |Efficiency| The pairwise manhattan distances with sparse input now uses the
+  BLAS shipped with scipy instead of the bundled BLAS. :issue:`12732` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`
+
 :mod:`sklearn.model_selection`
 ..............................
 
diff --git a/sklearn/metrics/pairwise_fast.pyx b/sklearn/metrics/pairwise_fast.pyx
index 4d7ad411fa20a..76ab64d9cd987 100644
--- a/sklearn/metrics/pairwise_fast.pyx
+++ b/sklearn/metrics/pairwise_fast.pyx
@@ -12,9 +12,7 @@ import numpy as np
 cimport numpy as np
 from cython cimport floating
 
-
-cdef extern from "cblas.h":
-    double cblas_dasum(int, const double *, int) nogil
+from ..utils._cython_blas cimport _asum
 
 
 np.import_array()
@@ -67,4 +65,4 @@ def _sparse_manhattan(floating[::1] X_data, int[:] X_indices, int[:] X_indptr,
                 for j in range(Y_indptr[iy], Y_indptr[iy + 1]):
                     row[Y_indices[j]] -= Y_data[j]
 
-                D[ix, iy] = cblas_dasum(n_features, &row[0], 1)
+                D[ix, iy] = _asum(n_features, &row[0], 1)
diff --git a/sklearn/metrics/setup.py b/sklearn/metrics/setup.py
index d9a10d1df3290..97175456220cd 100644
--- a/sklearn/metrics/setup.py
+++ b/sklearn/metrics/setup.py
@@ -1,33 +1,26 @@
 import os
-import os.path
 
-import numpy
 from numpy.distutils.misc_util import Configuration
 
-from sklearn._build_utils import get_blas_info
-
 
 def configuration(parent_package="", top_path=None):
     config = Configuration("metrics", parent_package, top_path)
 
-    cblas_libs, blas_info = get_blas_info()
+    libraries = []
     if os.name == 'posix':
-        cblas_libs.append('m')
+        libraries.append('m')
 
     config.add_subpackage('cluster')
+
     config.add_extension("pairwise_fast",
                          sources=["pairwise_fast.pyx"],
-                         include_dirs=[os.path.join('..', 'src', 'cblas'),
-                                       numpy.get_include(),
-                                       blas_info.pop('include_dirs', [])],
-                         libraries=cblas_libs,
-                         extra_compile_args=blas_info.pop('extra_compile_args',
-                                                          []),
-                         **blas_info)
+                         libraries=libraries)
+
     config.add_subpackage('tests')
 
     return config
 
+
 if __name__ == "__main__":
     from numpy.distutils.core import setup
     setup(**configuration().todict())
diff --git a/sklearn/utils/_cython_blas.pxd b/sklearn/utils/_cython_blas.pxd
new file mode 100644
index 0000000000000..4d82c7b1aaf13
--- /dev/null
+++ b/sklearn/utils/_cython_blas.pxd
@@ -0,0 +1,39 @@
+# cython: language_level=3
+
+from cython cimport floating
+
+
+cpdef enum BLAS_Order:
+    RowMajor  # C contiguous
+    ColMajor  # Fortran contiguous
+
+
+cpdef enum BLAS_Trans:
+    NoTrans = 110  # correspond to 'n'
+    Trans = 116    # correspond to 't'
+
+
+# BLAS Level 1 ################################################################
+cdef floating _dot(int, floating*, int, floating*, int) nogil
+
+cdef floating _asum(int, floating*, int) nogil
+
+cdef void _axpy(int, floating, floating*, int, floating*, int) nogil
+
+cdef floating _nrm2(int, floating*, int) nogil
+
+cdef void _copy(int, floating*, int, floating*, int) nogil
+
+cdef void _scal(int, floating, floating*, int) nogil
+
+# BLAS Level 2 ################################################################
+cdef void _gemv(BLAS_Order, BLAS_Trans, int, int, floating, floating*, int,
+                floating*, int, floating, floating*, int) nogil
+
+cdef void _ger(BLAS_Order, int, int, floating, floating*, int, floating*, int,
+               floating*, int) nogil
+
+# BLASLevel 3 ################################################################
+cdef void _gemm(BLAS_Order, BLAS_Trans, BLAS_Trans, int, int, int, floating,
+                floating*, int, floating*, int, floating, floating*,
+                int) nogil
diff --git a/sklearn/utils/_cython_blas.pyx b/sklearn/utils/_cython_blas.pyx
new file mode 100644
index 0000000000000..7585105227f9f
--- /dev/null
+++ b/sklearn/utils/_cython_blas.pyx
@@ -0,0 +1,198 @@
+from cython cimport floating
+
+from scipy.linalg.cython_blas cimport sdot, ddot
+from scipy.linalg.cython_blas cimport sasum, dasum
+from scipy.linalg.cython_blas cimport saxpy, daxpy
+from scipy.linalg.cython_blas cimport snrm2, dnrm2
+from scipy.linalg.cython_blas cimport scopy, dcopy
+from scipy.linalg.cython_blas cimport sscal, dscal
+from scipy.linalg.cython_blas cimport sgemv, dgemv
+from scipy.linalg.cython_blas cimport sger, dger
+from scipy.linalg.cython_blas cimport sgemm, dgemm
+
+
+################
+# BLAS Level 1 #
+################
+
+cdef floating _dot(int n, floating *x, int incx,
+                   floating *y, int incy) nogil:
+    """x.T.y"""
+    if floating is float:
+        return sdot(&n, x, &incx, y, &incy)
+    else:
+        return ddot(&n, x, &incx, y, &incy)
+
+
+cpdef _dot_memview(floating[::1] x, floating[::1] y):
+    return _dot(x.shape[0], &x[0], 1, &y[0], 1)
+
+
+cdef floating _asum(int n, floating *x, int incx) nogil:
+    """sum(|x_i|)"""
+    if floating is float:
+        return sasum(&n, x, &incx)
+    else:
+        return dasum(&n, x, &incx)
+
+
+cpdef _asum_memview(floating[::1] x):
+    return _asum(x.shape[0], &x[0], 1)
+
+
+cdef void _axpy(int n, floating alpha, floating *x, int incx,
+                floating *y, int incy) nogil:
+    """y := alpha * x + y"""
+    if floating is float:
+        saxpy(&n, &alpha, x, &incx, y, &incy)
+    else:
+        daxpy(&n, &alpha, x, &incx, y, &incy)
+
+
+cpdef _axpy_memview(floating alpha, floating[::1] x, floating[::1] y):
+    _axpy(x.shape[0], alpha, &x[0], 1, &y[0], 1)
+
+
+cdef floating _nrm2(int n, floating *x, int incx) nogil:
+    """sqrt(sum((x_i)^2))"""
+    if floating is float:
+        return snrm2(&n, x, &incx)
+    else:
+        return dnrm2(&n, x, &incx)
+
+
+cpdef _nrm2_memview(floating[::1] x):
+    return _nrm2(x.shape[0], &x[0], 1)
+
+
+cdef void _copy(int n, floating *x, int incx, floating *y, int incy) nogil:
+    """y := x"""
+    if floating is float:
+        scopy(&n, x, &incx, y, &incy)
+    else:
+        dcopy(&n, x, &incx, y, &incy)
+
+
+cpdef _copy_memview(floating[::1] x, floating[::1] y):
+    _copy(x.shape[0], &x[0], 1, &y[0], 1)
+
+
+cdef void _scal(int n, floating alpha, floating *x, int incx) nogil:
+    """x := alpha * x"""
+    if floating is float:
+        sscal(&n, &alpha, x, &incx)
+    else:
+        dscal(&n, &alpha, x, &incx)
+
+
+cpdef _scal_memview(floating alpha, floating[::1] x):
+    _scal(x.shape[0], alpha, &x[0], 1)
+
+
+################
+# BLAS Level 2 #
+################
+
+cdef void _gemv(BLAS_Order order, BLAS_Trans ta, int m, int n, floating alpha,
+                floating *A, int lda, floating *x, int incx,
+                floating beta, floating *y, int incy) nogil:
+    """y := alpha * op(A).x + beta * y"""
+    cdef char ta_ = ta
+    if order == RowMajor:
+        ta_ = NoTrans if ta == Trans else Trans
+        if floating is float:
+            sgemv(&ta_, &n, &m, &alpha, A, &lda, x, &incx, &beta, y, &incy)
+        else:
+            dgemv(&ta_, &n, &m, &alpha, A, &lda, x, &incx, &beta, y, &incy)
+    else:
+        if floating is float:
+            sgemv(&ta_, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy)
+        else:
+            dgemv(&ta_, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy)
+
+
+cpdef _gemv_memview(BLAS_Trans ta, floating alpha, floating[:, :] A,
+                    floating[::1] x, floating beta, floating[::1] y):
+    cdef:
+        int m = A.shape[0]
+        int n = A.shape[1]
+        BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
+        int lda = m if order == ColMajor else n
+
+    _gemv(order, ta, m, n, alpha, &A[0, 0], lda, &x[0], 1, beta, &y[0], 1)
+
+
+cdef void _ger(BLAS_Order order, int m, int n, floating alpha, floating *x,
+               int incx, floating *y, int incy, floating *A, int lda) nogil:
+    """A := alpha * x.y.T + A"""
+    if order == RowMajor:
+        if floating is float:
+            sger(&n, &m, &alpha, y, &incy, x, &incx, A, &lda)
+        else:
+            dger(&n, &m, &alpha, y, &incy, x, &incx, A, &lda)
+    else:
+        if floating is float:
+            sger(&m, &n, &alpha, x, &incx, y, &incy, A, &lda)
+        else:
+            dger(&m, &n, &alpha, x, &incx, y, &incy, A, &lda)
+
+
+cpdef _ger_memview(floating alpha, floating[::1] x, floating[::] y,
+                   floating[:, :] A):
+    cdef:
+        int m = A.shape[0]
+        int n = A.shape[1]
+        BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
+        int lda = m if order == ColMajor else n
+
+    _ger(order, m, n, alpha, &x[0], 1, &y[0], 1, &A[0, 0], lda)
+
+
+################
+# BLAS Level 3 #
+################
+
+cdef void _gemm(BLAS_Order order, BLAS_Trans ta, BLAS_Trans tb, int m, int n,
+                int k, floating alpha, floating *A, int lda, floating *B,
+                int ldb, floating beta, floating *C, int ldc) nogil:
+    """C := alpha * op(A).op(B) + beta * C"""
+    cdef:
+        char ta_ = ta
+        char tb_ = tb
+    if order == RowMajor:
+        if floating is float:
+            sgemm(&tb_, &ta_, &n, &m, &k, &alpha, B,
+                  &ldb, A, &lda, &beta, C, &ldc)
+        else:
+            dgemm(&tb_, &ta_, &n, &m, &k, &alpha, B,
+                  &ldb, A, &lda, &beta, C, &ldc)
+    else:
+        if floating is float:
+            sgemm(&ta_, &tb_, &m, &n, &k, &alpha, A,
+                  &lda, B, &ldb, &beta, C, &ldc)
+        else:
+            dgemm(&ta_, &tb_, &m, &n, &k, &alpha, A,
+                  &lda, B, &ldb, &beta, C, &ldc)
+
+
+cpdef _gemm_memview(BLAS_Trans ta, BLAS_Trans tb, floating alpha,
+                    floating[:, :] A, floating[:, :] B, floating beta,
+                    floating[:, :] C):
+    cdef:
+        int m = A.shape[0] if ta == NoTrans else A.shape[1]
+        int n = B.shape[1] if tb == NoTrans else B.shape[0]
+        int k = A.shape[1] if ta == NoTrans else A.shape[0]
+        int lda, ldb, ldc
+        BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
+
+    if order == RowMajor:
+        lda = k if ta == NoTrans else m
+        ldb = n if tb == NoTrans else k
+        ldc = n
+    else:
+        lda = m if ta == NoTrans else k
+        ldb = k if tb == NoTrans else n
+        ldc = m
+
+    _gemm(order, ta, tb, m, n, k, alpha, &A[0, 0],
+          lda, &B[0, 0], ldb, beta, &C[0, 0], ldc)
diff --git a/sklearn/utils/setup.py b/sklearn/utils/setup.py
index 13d772a5a53b7..97aeb602408c4 100644
--- a/sklearn/utils/setup.py
+++ b/sklearn/utils/setup.py
@@ -24,14 +24,17 @@ def configuration(parent_package='', top_path=None):
     config.add_extension('sparsefuncs_fast', sources=['sparsefuncs_fast.pyx'],
                          libraries=libraries)
 
+    config.add_extension('_cython_blas',
+                         sources=['_cython_blas.pyx'],
+                         libraries=libraries)
+
     config.add_extension('arrayfuncs',
                          sources=['arrayfuncs.pyx'],
                          depends=[join('src', 'cholesky_delete.h')],
                          libraries=cblas_libs,
                          include_dirs=cblas_includes,
                          extra_compile_args=cblas_compile_args,
-                         **blas_info
-                         )
+                         **blas_info)
 
     config.add_extension('murmurhash',
                          sources=['murmurhash.pyx', join(
diff --git a/sklearn/utils/tests/test_cython_blas.py b/sklearn/utils/tests/test_cython_blas.py
new file mode 100644
index 0000000000000..0305e5a5476dc
--- /dev/null
+++ b/sklearn/utils/tests/test_cython_blas.py
@@ -0,0 +1,178 @@
+import pytest
+import cython
+
+import numpy as np
+
+from sklearn.utils.testing import assert_allclose
+
+from sklearn.utils._cython_blas import _dot_memview
+from sklearn.utils._cython_blas import _asum_memview
+from sklearn.utils._cython_blas import _axpy_memview
+from sklearn.utils._cython_blas import _nrm2_memview
+from sklearn.utils._cython_blas import _copy_memview
+from sklearn.utils._cython_blas import _scal_memview
+from sklearn.utils._cython_blas import _gemv_memview
+from sklearn.utils._cython_blas import _ger_memview
+from sklearn.utils._cython_blas import _gemm_memview
+from sklearn.utils._cython_blas import RowMajor, ColMajor
+from sklearn.utils._cython_blas import Trans, NoTrans
+
+
+NUMPY_TO_CYTHON = {np.float32: cython.float, np.float64: cython.double}
+RTOL = {np.float32: 1e-6, np.float64: 1e-12}
+ORDER = {RowMajor: 'C', ColMajor: 'F'}
+
+
+def _no_op(x):
+    return x
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_dot(dtype):
+    dot = _dot_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = x.dot(y)
+    actual = dot(x, y)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_asum(dtype):
+    asum = _asum_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.abs(x).sum()
+    actual = asum(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_axpy(dtype):
+    axpy = _axpy_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x + y
+    axpy(alpha, x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_nrm2(dtype):
+    nrm2 = _nrm2_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.linalg.norm(x)
+    actual = nrm2(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_copy(dtype):
+    copy = _copy_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = np.empty_like(x)
+
+    expected = x.copy()
+    copy(x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_scal(dtype):
+    scal = _scal_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x
+    scal(alpha, x)
+
+    assert_allclose(x, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("opA, transA",
+                         [(_no_op, NoTrans), (np.transpose, Trans)],
+                         ids=["NoTrans", "Trans"])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor],
+                         ids=["RowMajor", "ColMajor"])
+def test_gemv(dtype, opA, transA, order):
+    gemv = _gemv_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(opA(rng.random_sample((20, 10)).astype(dtype, copy=False)),
+                   order=ORDER[order])
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(x) + beta * y
+    gemv(transA, alpha, A, x, beta, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor],
+                         ids=["RowMajor", "ColMajor"])
+def test_ger(dtype, order):
+    ger = _ger_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    A = np.asarray(rng.random_sample((10, 20)).astype(dtype, copy=False),
+                   order=ORDER[order])
+    alpha = 2.5
+
+    expected = alpha * np.outer(x, y) + A
+    ger(alpha, x, y, A)
+
+    assert_allclose(A, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("opB, transB",
+                         [(_no_op, NoTrans), (np.transpose, Trans)],
+                         ids=["NoTrans", "Trans"])
+@pytest.mark.parametrize("opA, transA",
+                         [(_no_op, NoTrans), (np.transpose, Trans)],
+                         ids=["NoTrans", "Trans"])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor],
+                         ids=["RowMajor", "ColMajor"])
+def test_gemm(dtype, opA, transA, opB, transB, order):
+    gemm = _gemm_memview[NUMPY_TO_CYTHON[dtype]]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(opA(rng.random_sample((30, 10)).astype(dtype, copy=False)),
+                   order=ORDER[order])
+    B = np.asarray(opB(rng.random_sample((10, 20)).astype(dtype, copy=False)),
+                   order=ORDER[order])
+    C = np.asarray(rng.random_sample((30, 20)).astype(dtype, copy=False),
+                   order=ORDER[order])
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(opB(B)) + beta * C
+    gemm(transA, transB, alpha, A, B, beta, C)
+
+    assert_allclose(C, expected, rtol=RTOL[dtype])