scikit-learn
diff --git a/‎doc/whats_new/v0.23.rst
Lines changed: 4 additions & 0 deletions b/‎doc/whats_new/v0.23.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎sklearn/linear_model/tests/test_base.py
Lines changed: 17 additions & 3 deletions b/‎sklearn/linear_model/tests/test_base.py
Lines changed: 17 additions & 3 deletions
diff --git a/‎sklearn/utils/tests/test_validation.py
Lines changed: 19 additions & 0 deletions b/‎sklearn/utils/tests/test_validation.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎sklearn/utils/validation.py
Lines changed: 9 additions & 2 deletions b/‎sklearn/utils/validation.py
Lines changed: 9 additions & 2 deletions
@@ -413,6 +413,10 @@ Changelog
   pandas sparse DataFrame.
   :pr:`16021` by :user:`Rushabh Vasani <rushabh-v>`.
 
+- |Enhancement| :func:`utils.validation.check_array` now constructs a sparse
+  matrix from a pandas DataFrame that contains only `SparseArray`s.
+  :pr:`16728` by `Thomas Fan`_.
+
 :mod:`sklearn.cluster`
 ......................
 
 
@@ -212,16 +212,30 @@ def test_linear_regression_pd_sparse_dataframe_warning():
     # restrict the pd versions < '0.24.0' as they have a bug in is_sparse func
     if LooseVersion(pd.__version__) < '0.24.0':
         pytest.skip("pandas 0.24+ required.")
-    df = pd.DataFrame()
-    for col in range(4):
+
+    # Warning is raised only when some of the columns is sparse
+    df = pd.DataFrame({'0': np.random.randn(10)})
+    for col in range(1, 4):
         arr = np.random.randn(10)
         arr[:8] = 0
-        df[str(col)] = pd.arrays.SparseArray(arr, fill_value=0)
+        # all columns but the first column is sparse
+        if col != 0:
+            arr = pd.arrays.SparseArray(arr, fill_value=0)
+        df[str(col)] = arr
+
     msg = "pandas.DataFrame with sparse columns found."
     with pytest.warns(UserWarning, match=msg):
         reg = LinearRegression()
         reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
 
+    # does not warn when the whole dataframe is sparse
+    df['0'] = pd.arrays.SparseArray(df['0'], fill_value=0)
+    assert hasattr(df, "sparse")
+
+    with pytest.warns(None) as record:
+        reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
+    assert not record
+
 
 def test_preprocess_data():
     n_samples = 200
 
@@ -1153,3 +1153,22 @@ def test_check_fit_params(indices):
         result['sparse-col'],
         _safe_indexing(fit_params['sparse-col'], indices_)
     )
+
+
+@pytest.mark.parametrize('sp_format', [True, 'csr', 'csc', 'coo', 'bsr'])
+def test_check_sparse_pandas_sp_format(sp_format):
+    # check_array converts pandas dataframe with only sparse arrays into
+    # sparse matrix
+    pd = pytest.importorskip("pandas")
+    sp_mat = _sparse_random_matrix(10, 3)
+
+    sdf = pd.DataFrame.sparse.from_spmatrix(sp_mat)
+    result = check_array(sdf, accept_sparse=sp_format)
+
+    if sp_format is True:
+        # by default pandas converts to coo when accept_sparse is True
+        sp_format = 'coo'
+
+    assert sp.issparse(result)
+    assert result.format == sp_format
+    assert_allclose_dense_sparse(sp_mat, result)
@@ -451,10 +451,12 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
     # DataFrame), and store them. If not, store None.
     dtypes_orig = None
     if hasattr(array, "dtypes") and hasattr(array.dtypes, '__array__'):
-        # throw warning if pandas dataframe is sparse
+        # throw warning if columns are sparse. If all columns are sparse, then
+        # array.sparse exists and sparsity will be perserved (later).
         with suppress(ImportError):
             from pandas.api.types import is_sparse
-            if array.dtypes.apply(is_sparse).any():
+            if (not hasattr(array, 'sparse') and
+                    array.dtypes.apply(is_sparse).any()):
                 warnings.warn(
                     "pandas.DataFrame with sparse columns found."
                     "It will be converted to a dense numpy array."
@@ -498,6 +500,11 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
         estimator_name = "Estimator"
     context = " by %s" % estimator_name if estimator is not None else ""
 
+    # When all dataframe columns are sparse, convert to a sparse array
+    if hasattr(array, 'sparse') and array.ndim > 1:
+        # DataFrame.sparse only supports `to_coo`
+        array = array.sparse.to_coo()
+
     if sp.issparse(array):
         _ensure_no_complex_data(array)
         array = _ensure_sparse_format(array, accept_sparse=accept_sparse,