8000 Add docs + doctests · scikit-learn/scikit-learn@5f6f7af · GitHub
[go: up one dir, main page]

Skip to content

Commit 5f6f7af

Browse files
committed
Add docs + doctests
1 parent e46ed34 commit 5f6f7af

File tree

3 files changed

+39
-2
lines changed

3 files changed

+39
-2
lines changed

doc/modules/pipeline.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ and ``value`` is an estimator object::
152152
>>> estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
153153
>>> combined = FeatureUnion(estimators)
154154
>>> combined # doctest: +NORMALIZE_WHITESPACE
155-
FeatureUnion(n_jobs=1, transformer_list=[('linear_pca', PCA(copy=True,
155+
FeatureUnion(fields=None, n_jobs=1, transformer_list=[('linear_pca', PCA(copy=True,
156156
n_components=None, whiten=False)), ('kernel_pca', KernelPCA(alpha=1.0,
157157
coef0=1, degree=3, eigen_solver='auto', fit_inverse_transform=False,
158158
gamma=None, kernel='linear', kernel_params=None, max_iter=None,

sklearn/pipeline.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,23 @@ class FeatureUnion(BaseEstimator, TransformerMixin):
308308
Multiplicative weights for features per transformer.
309309
Keys are transformer names, values the weights.
310310
311+
fields : list, optional
312+
If given needs to be of same length as transformer list.
313+
The i-th transformer will receive X[fields[i]]. This allows to select a
314+
subset of featues to be processed by each transformer. If None, all
315+
features are passed on.
316+
317+
Examples
318+
--------
319+
>>> from sklearn.preprocessing import Normalizer
320+
>>> union = FeatureUnion([("norm1", Normalizer(norm='l1')), \
321+
("norm2", Normalizer(norm='l1'))], \
322+
fields=['subset1', 'subset2'])
323+
>>> X = {'subset1': [[0., 1.], [2., 2.]], 'subset2': [[1., 1.], [0., 1.]]}
324+
>>> union.fit_transform(X) # doctest: +NORMALIZE_WHITESPACE
325+
array([[ 0. , 1. , 0.5, 0.5],
326+
[ 0.5, 0.5, 0. , 1. ]])
327+
311328
"""
312329
def __init__(self, transformer_list, n_jobs=1, transformer_weights=None,
313330
fields=None):
@@ -446,7 +463,7 @@ def make_union(*transformers):
446463
--------
447464
>>> from sklearn.decomposition import PCA, TruncatedSVD
448465
>>> make_union(PCA(), TruncatedSVD()) # doctest: +NORMALIZE_WHITESPACE
449-
FeatureUnion(n_jobs=1,
466+
FeatureUnion(fields=None, n_jobs=1,
450467
transformer_list=[('pca', PCA(copy=True, n_components=None,
451468
whiten=False)),
452469
('truncatedsvd',

sklearn/tests/test_pipeline.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,3 +394,23 @@ def test_classes_property():
394394
assert_raises(AttributeError, getattr, clf, "classes_")
395395
clf.fit(X, y)
396396
assert_array_equal(clf.classes_, np.unique(y))
397+
398+
399+
def test_fields():
400+
# dictionary
401+
X_dict = {'first': [[0], [1], [2]],
402+
'second': [[2], [4], [6]]}
403+
# recarray
404+
X_recarray = np.recarray((3, 1),
405+
dtype=[('first', np.int), ('second', np.int)])
406+
X_recarray['first'] = X_dict['first']
407+
X_recarray['second'] = X_dict['second']
408+
409+
for X in [X_dict, X_recarray]:
410+
first_feat = FeatureUnion([('trans', TransfT())], fields=['first'])
411+
second_feat = FeatureUnion([('trans', TransfT())], fields=['second'])
412+
both = FeatureUnion([('trans', TransfT()), ('trans', TransfT())],
413+
fields=['first', 'second'])
414+
assert_array_equal(first_feat.fit_transform(X), X['first'])
415+
assert_array_equal(second_feat.fit_transform(X), X['second'])
416+
assert_array_equal(both.fit_transform(X), np.hstack([X['first'], X['second']]))

0 commit comments

Comments
 (0)
0