8000 [MRG] Fast PolynomialFeatures on dense arrays (#12251) · lithuak/scikit-learn@3e5777a · GitHub
[go: up one dir, main page]

Skip to content

Commit 3e5777a

Browse files
TomDLTogrisel
authored andcommitted
[MRG] Fast PolynomialFeatures on dense arrays (scikit-learn#12251)
1 parent da85815 commit 3e5777a

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

doc/whats_new/v0.21.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,21 @@ Support for Python 3.4 and below has been officially dropped.
4848
to set and that scales better, by :user:`Shane <espg>` and
4949
:user:`Adrin Jalali <adrinjalali>`.
5050

51+
:mod:`sklearn.preprocessing`
52+
............................
53+
54+
- |Efficiency| |API| Speed improvement in :class:`preprocessing.PolynomialFeatures`,
55+
in the dense case. Also added a new parameter ``order`` which controls output
56+
order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
57+
58+
:mod:`sklearn.datasets`
59+
............................
60+
5161
- |Fix| Fixed integer overflow in :func:`datasets.make_classification`
5262
for values of ``n_informative`` parameter larger than 64.
5363
:issue:10811 by :user:`Roman Feldbauer <VarIr>`.
5464

65+
5566
Multiple modules
5667
................
5768

sklearn/preprocessing/data.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,6 +1323,12 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
13231323
all polynomial powers are zero (i.e. a column of ones - acts as an
13241324
intercept term in a linear model).
13251325
1326+
order : str in {'C', 'F'}, default 'C'
1327+
Order of output array in the dense case. 'F' order is faster to
1328+
compute, but may slow down subsequent estimators.
1329+
1330+
.. versionadded:: 0.21
1331+
13261332
Examples
13271333
--------
13281334
>>> X = np.arange(6).reshape(3, 2)
@@ -1363,10 +1369,12 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
13631369
See :ref:`examples/linear_model/plot_polynomial_interpolation.py
13641370
<sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
13651371
"""
1366-
def __init__(self, degree=2, interaction_only=False, include_bias=True):
1372+
def __init__(self, degree=2, interaction_only=False, include_bias=True,
1373+
order='C'):
13671374
self.degree = degree
13681375
self.interaction_only = interaction_only
13691376
self.include_bias = include_bias
1377+
self.order = order
13701378

13711379
@staticmethod
13721380
def _combinations(n_features, degree, interaction_only, include_bias):
@@ -1454,7 +1462,7 @@ def transform(self, X):
14541462
"""
14551463
check_is_fitted(self, ['n_input_features_', 'n_output_features_'])
14561464

1457-
X = check_array(X, dtype=FLOAT_DTYPES, accept_sparse='csc')
1465+
X = check_array(X, order='F', dtype=FLOAT_DTYPES, accept_sparse='csc')
14581466
n_samples, n_features = X.shape
14591467

14601468
if n_features != self.n_input_features_:
@@ -1475,7 +1483,8 @@ def transform(self, X):
14751483
columns.append(sparse.csc_matrix(np.ones((X.shape[0], 1))))
14761484
XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
14771485
else:
1478-
XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype)
1486+
XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype,
1487+
order=self.order)
14791488
for i, comb in enumerate(combinations):
14801489
XP[:, i] = X[:, comb].prod(1)
14811490

sklearn/preprocessing/tests/test_data.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,17 @@ def test_polynomial_feature_names():
157157
feature_names)
158158

159159

160+
def test_polynomial_feature_array_order():
161+
X = np.arange(10).reshape(5, 2)
162+
163+
def is_c_contiguous(a):
164+
return np.isfortran(a.T)
165+
166+
assert is_c_contiguous(PolynomialFeatures().fit_transform(X))
167+
assert is_c_contiguous(PolynomialFeatures(order='C').fit_transform(X))
168+
assert np.isfortran(PolynomialFeatures(order='F').fit_transform(X))
169+
170+
160171
@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
161172
[(1, True, False, int),
162173
(2, True, False, int),

0 commit comments

Comments
 (0)
0