8000 Merge pull request #6372 from amueller/poly_feature_names · raghavrv/scikit-learn@7895d38 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7895d38

Browse files
committed
Merge pull request scikit-learn#6372 from amueller/poly_feature_names
[MRG+2] add get_feature_names to PolynomialFeatures
2 parents b6adb92 + 8fb928d commi
10000
t 7895d38

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

sklearn/preprocessing/data.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ..utils.extmath import row_norms
2121
from ..utils.extmath import _incremental_mean_and_var
2222
from ..utils.fixes import combinations_with_replacement as combinations_w_r
23+
from ..utils.fixes import bincount
2324
from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
2425
inplace_csr_row_normalize_l2)
2526
from ..utils.sparsefuncs import (inplace_column_scale,
@@ -1140,7 +1141,7 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
11401141
11411142
Attributes
11421143
----------
1143-
powers_ : array, shape (n_input_features, n_output_features)
1144+
powers_ : array, shape (n_output_features, n_input_features)
11441145
powers_[i, j] is the exponent of the jth input in the ith output.
11451146
11461147
n_input_features_ : int
@@ -1179,9 +1180,39 @@ def powers_(self):
11791180
combinations = self._combinations(self.n_input_features_, self.degree,
11801181
self.interaction_only,
11811182
self.include_bias)
1182-
return np.vstack(np.bincount(c, minlength=self.n_input_features_)
1183+
return np.vstack(bincount(c, minlength=self.n_input_features_)
11831184
for c in combinations)
11841185

1186+
def get_feature_names(self, input_features=None):
1187+
"""
1188+
Return feature names for output features
1189+
1190+
Parameters
1191+
----------
1192+
input_features : list of string, length n_features, optional
1193+
String names for input features if available. By default,
1194+
"x0", "x1", ... "xn_features" is used.
1195+
1196+
Returns
1197+
-------
1198+
output_feature_names : list of string, length n_output_features
1199+
1200+
"""
1201+
powers = self.powers_
1202+
if input_features is None:
1203+
input_features = ['x%d' % i for i in range(powers.shape[1])]
1204+
feature_names = []
1205+
for row in powers:
1206+
inds = np.where(row)[0]
1207+
if len(inds):
1208+
name = " ".join("%s^%d" % (input_features[ind], exp)
1209+
if exp != 1 else input_features[ind]
1210+
for ind, exp in zip(inds, row[inds]))
1211+
else:
1212+
name = "1"
1213+
feature_names.append(name)
1214+
return feature_names
1215+
11851216
def fit(self, X, y=None):
11861217
"""
11871218
Compute number of output features.

sklearn/preprocessing/tests/test_data.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy.linalg as la
1111
from scipy import sparse
1212
from distutils.version import LooseVersion
13+
from sklearn.externals.six import u
1314

1415
from sklearn.utils import gen_batches
1516

@@ -118,6 +119,30 @@ def test_polynomial_features():
118119
X_poly = interact.fit_transform(X)
119120
assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
120121

122+
assert_equal(interact.powers_.shape, (interact.n_output_features_,
123+
interact.n_input_features_))
124+
125+
126+
def test_polynomial_feature_names():
127+
X = np.arange(30).reshape(10, 3)
128+
poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
129+
feature_names = poly.get_feature_names()
130+
assert_array_equal(['1', 'x0', 'x1', 'x2', 'x0^2', 'x0 x1',
131+
'x0 x2', 'x1^2', 'x1 x2', 'x2^2'],
132+
feature_names)
133+
134+
poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
135+
feature_names = poly.get_feature_names(["a", "b", "c"])
136+
assert_array_equal(['a', 'b', 'c', 'a^2', 'a b', 'a c', 'b^2',
137+
'b c', 'c^2', 'a^3', 'a^2 b', 'a^2 c',
138+
'a b^2', 'a b c', 'a c^2', 'b^3', 'b^2 c',
139+
'b c^2', 'c^3'], feature_names)
140+
# test some unicode
141+
poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
142+
feature_names = poly.get_feature_names([u"\u0001F40D", u"\u262E", u"\u05D0"])
143+
assert_array_equal([u"1", u"\u0001F40D", u"\u262E", u"\u05D0"],
144+
feature_names)
145+
121146

122147
def test_standard_scaler_1d():
123148
# Test scaling of dataset along single axis

0 commit comments

Comments
 (0)
0