8000 Revert "[MRG] Fast PolynomialFeatures on CSR matrices (#12197)" · xhluca/scikit-learn@c84752f · GitHub
[go: up one dir, main page]

Skip to content

Commit c84752f

Browse files
author
Xing
committed
Revert "[MRG] Fast PolynomialFeatures on CSR matrices (scikit-learn#12197)"
This reverts commit 49aab3d.
1 parent 3ca8795 commit c84752f

File tree

6 files changed

+30
-420
lines changed

6 files changed

+30
-420
lines changed

benchmarks/bench_feature_expansions.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

doc/whats_new/v0.21.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ Support for Python 3.4 and below has been officially dropped.
5555
of calculating it every time on the fly.
5656
:issue:`12116` by :user:`Ekaterina Krivich <kiote>` and `Joel Nothman`_.
5757

58-
- |Efficiency| :class:`preprocessing.PolynomialFeatures` now supports compressed
59-
sparse row (CSR) matrices as input for degrees 2 and 3. This is typically much
60-
faster than the dense case as it scales with matrix density and expansion degree
61-
(on the order of density^degree), and is much, much faster than the compressed
62-
sparse column (CSC) case. :issue:`12197` by :user:`Andrew Nystrom <awnystrom>`.
63-
6458
- |Efficiency| |API| Speed improvement in :class:`preprocessing.PolynomialFeatures`,
6559
in the dense case. Also added a new parameter ``order`` which controls output
6660
order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.

sklearn/preprocessing/_csr_polynomial_expansion.pyx

Lines changed: 0 additions & 156 deletions
This file was deleted.

sklearn/preprocessing/data.py

Lines changed: 22 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,9 @@
3333
from ..utils.validation import (check_is_fitted, check_random_state,
3434
FLOAT_DTYPES)
3535

36-
from ._csr_polynomial_expansion import _csr_polynomial_expansion
37-
3836
from ._encoders import OneHotEncoder
3937

38+
4039
BOUNDS_THRESHOLD = 1e-7
4140

4241

@@ -1444,71 +1443,41 @@ def transform(self, X):
14441443
----------
14451444
X : array-like or sparse matrix, shape [n_samples, n_features]
14461445
The data to transform, row by row.
1447-
Sparse input should preferably be in CSR format (for speed),
1448-
but must be in CSC format if the degree is 4 or higher.
1449-
1450-
If the input matrix is in CSR format and the expansion is of
1451-
degree 2 or 3, the method described in the work "Leveraging
1452-
Sparsity to Speed Up Polynomial Feature Expansions of CSR
1453-
Matrices Using K-Simplex Numbers" by Andrew Nystrom and
1454-
John Hughes is used, which is much faster than the method
1455-
used on CSC input.
1446+
Sparse input should preferably be in CSC format.
14561447
14571448
Returns
14581449
-------
1459-
XP : np.ndarray or CSR/CSC sparse matrix, shape [n_samples, NP]
1450+
XP : np.ndarray or CSC sparse matrix, shape [n_samples, NP]
14601451
The matrix of features, where NP is the number of polynomial
14611452
features generated from the combination of inputs.
14621453
"""
14631454
check_is_fitted(self, ['n_input_features_', 'n_output_features_'])
14641455

1465-
X = check_array(X, order='F', dtype=FLOAT_DTYPES,
1466-
accept_sparse=('csr', 'csc'))
1467-
1456+
X = check_array(X, order='F', dtype=FLOAT_DTYPES, accept_sparse='csc')
14681457
n_samples, n_features = X.shape
14691458

14701459
if n_features != self.n_input_features_:
14711460
raise ValueError("X shape does not match training shape")
14721461

1473-
if sparse.isspmatrix_csr(X):
1474-
if self.degree > 3:
1475-
return self.transform(X.tocsc()).tocsr()
1476-
to_stack = []
1477-
if self.include_bias:
1478-
to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))
1479-
to_stack.append(X)
1480-
for deg in range(2, self.degree+1):
1481-
Xp_next = _csr_polynomial_expansion(X.data, X.indices,
1482-
X.indptr, X.shape[1],
1483-
self.interaction_only,
1484-
deg)
1485-
if Xp_next is None:
1486-
break
1487-
to_stack.append(Xp_next)
1488-
XP = sparse.hstack(to_stack, format='csr')
1489-
elif sparse.isspmatrix_csc(X) and self.degree < 4:
1490-
return self.transform(X.tocsr()).tocsc()
1462+
combinations = self._combinations(n_features, self.degree,
1463+
self.interaction_only,
1464+
self.include_bias)
1465+
if sparse.isspmatrix(X):
1466+
columns = []
1467+
for comb in combinations:
1468+
if comb:
1469+
out_col = 1
1470+
for col_idx in comb:
1471+
out_col = X[:, col_idx].multiply(out_col)
1472+
columns.append(out_col)
1473+
else:
1474+
columns.append(sparse.csc_matrix(np.ones((X.shape[0], 1))))
1475+
XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
14911476
else:
1492-
combinations = self._combinations(n_features, self.degree,
1493-
self.interaction_only,
1494-
self.include_bias)
1495-
if sparse.isspmatrix(X):
1496-
columns = []
1497-
for comb in combinations:
1498-
if comb:
1499-
out_col = 1
1500-
for col_idx in comb:
1501-
out_col = X[:, col_idx].multiply(out_col)
1502-
columns.append(out_col)
1503-
else:
1504-
bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
1505-
columns.append(bias)
1506-
XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
1507-
else:
1508-
XP = np.empty((n_samples, self.n_output_features_),
1509-
dtype=X.dtype, order=self.order)
1510-
for i, comb in enumerate(combinations):
1511-
XP[:, i] = X[:, comb].prod(1)
1477+
XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype,
1478+
order=self.order)
1479+
for i, comb in enumerate(combinations):
1480+
XP[:, i] = X[:, comb].prod(1)
15121481

15131482
return XP
15141483

sklearn/preprocessing/setup.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)
0