|
33 | 33 | from ..utils.validation import (check_is_fitted, check_random_state,
|
34 | 34 | FLOAT_DTYPES)
|
35 | 35 |
|
36 |
| -from ._csr_polynomial_expansion import _csr_polynomial_expansion |
37 |
| - |
38 | 36 | from ._encoders import OneHotEncoder
|
39 | 37 |
|
| 38 | + |
40 | 39 | BOUNDS_THRESHOLD = 1e-7
|
41 | 40 |
|
42 | 41 |
|
@@ -1444,71 +1443,41 @@ def transform(self, X):
|
1444 | 1443 | ----------
|
1445 | 1444 | X : array-like or sparse matrix, shape [n_samples, n_features]
|
1446 | 1445 | The data to transform, row by row.
|
1447 |
| - Sparse input should preferably be in CSR format (for speed), |
1448 |
| - but must be in CSC format if the degree is 4 or higher. |
1449 |
| -
|
1450 |
| - If the input matrix is in CSR format and the expansion is of |
1451 |
| - degree 2 or 3, the method described in the work "Leveraging |
1452 |
| - Sparsity to Speed Up Polynomial Feature Expansions of CSR |
1453 |
| - Matrices Using K-Simplex Numbers" by Andrew Nystrom and |
1454 |
| - John Hughes is used, which is much faster than the method |
1455 |
| - used on CSC input. |
| 1446 | + Sparse input should preferably be in CSC format. |
1456 | 1447 |
|
1457 | 1448 | Returns
|
1458 | 1449 | -------
|
1459 |
| - XP : np.ndarray or CSR/CSC sparse matrix, shape [n_samples, NP] |
| 1450 | + XP : np.ndarray or CSC sparse matrix, shape [n_samples, NP] |
1460 | 1451 | The matrix of features, where NP is the number of polynomial
|
1461 | 1452 | features generated from the combination of inputs.
|
1462 | 1453 | """
|
1463 | 1454 | check_is_fitted(self, ['n_input_features_', 'n_output_features_'])
|
1464 | 1455 |
|
1465 |
| - X = check_array(X, order='F', dtype=FLOAT_DTYPES, |
1466 |
| - accept_sparse=('csr', 'csc')) |
1467 |
| - |
| 1456 | + X = check_array(X, order='F', dtype=FLOAT_DTYPES, accept_sparse='csc') |
1468 | 1457 | n_samples, n_features = X.shape
|
1469 | 1458 |
|
1470 | 1459 | if n_features != self.n_input_features_:
|
1471 | 1460 | raise ValueError("X shape does not match training shape")
|
1472 | 1461 |
|
1473 |
| - if sparse.isspmatrix_csr(X): |
1474 |
| - if self.degree > 3: |
1475 |
| - return self.transform(X.tocsc()).tocsr() |
1476 |
| - to_stack = [] |
1477 |
| - if self.include_bias: |
1478 |
| - to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype)) |
1479 |
| - to_stack.append(X) |
1480 |
| - for deg in range(2, self.degree+1): |
1481 |
| - Xp_next = _csr_polynomial_expansion(X.data, X.indices, |
1482 |
| - X.indptr, X.shape[1], |
1483 |
| - self.interaction_only, |
1484 |
| - deg) |
1485 |
| - if Xp_next is None: |
1486 |
| - break |
1487 |
| - to_stack.append(Xp_next) |
1488 |
| - XP = sparse.hstack(to_stack, format='csr') |
1489 |
| - elif sparse.isspmatrix_csc(X) and self.degree < 4: |
1490 |
| - return self.transform(X.tocsr()).tocsc() |
| 1462 | + combinations = self._combinations(n_features, self.degree, |
| 1463 | + self.interaction_only, |
| 1464 | + self.include_bias) |
| 1465 | + if sparse.isspmatrix(X): |
| 1466 | + columns = [] |
| 1467 | + for comb in combinations: |
| 1468 | + if comb: |
| 1469 | + out_col = 1 |
| 1470 | + for col_idx in comb: |
| 1471 | + out_col = X[:, col_idx].multiply(out_col) |
| 1472 | + columns.append(out_col) |
| 1473 | + else: |
| 1474 | + columns.append(sparse.csc_matrix(np.ones((X.shape[0], 1)))) |
| 1475 | + XP = sparse.hstack(columns, dtype=X.dtype).tocsc() |
1491 | 1476 | else:
|
1492 |
| - combinations = self._combinations(n_features, self.degree, |
1493 |
| - self.interaction_only, |
1494 |
| - self.include_bias) |
1495 |
| - if sparse.isspmatrix(X): |
1496 |
| - columns = [] |
1497 |
| - for comb in combinations: |
1498 |
| - if comb: |
1499 |
| - out_col = 1 |
1500 |
| - for col_idx in comb: |
1501 |
| - out_col = X[:, col_idx].multiply(out_col) |
1502 |
| - columns.append(out_col) |
1503 |
| - else: |
1504 |
| - bias = sparse.csc_matrix(np.ones((X.shape[0], 1))) |
1505 |
| - columns.append(bias) |
1506 |
| - XP = sparse.hstack(columns, dtype=X.dtype).tocsc() |
1507 |
| - else: |
1508 |
| - XP = np.empty((n_samples, self.n_output_features_), |
1509 |
| - dtype=X.dtype, order=self.order) |
1510 |
| - for i, comb in enumerate(combinations): |
1511 |
| - XP[:, i] = X[:, comb].prod(1) |
| 1477 | + XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype, |
| 1478 | + order=self.order) |
| 1479 | + for i, comb in enumerate(combinations): |
| 1480 | + XP[:, i] = X[:, comb].prod(1) |
1512 | 1481 |
|
1513 | 1482 | return XP
|
1514 | 1483 |
|
|
0 commit comments