E565 [MRG+1] - Voting classifier flatten transform (Continuation) (#9188) · musically-ut/scikit-learn@6f70202 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6f70202

Browse files
herilalainaamueller
authored andcommitted
[MRG+1] - Voting classifier flatten transform (Continuation) (scikit-learn#9188)
* flatten_transform parameter added to VotingClassifier * Regression test added * What's new section added * flake8 fix * Improve test and docstring * Add what's new entry * default value flatten_transofrm * Add test for warning msg * Fix bug in assert_warns_message * Move warn msg into transform * Add deprecation warning * Merge warning * Change warn msg * Move what's content into Trees and ensembles * Fixes minor bug * update what's new * update test
1 parent d96a462 commit 6f70202

File tree

3 files changed

+75
-6
lines changed

3 files changed

+75
-6
lines changed

doc/whats_new.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ Trees and ensembles
171171

172172
- :func:`tree.export_graphviz` now shows configurable number of decimal
173173
places. :issue:`8698` by :user:`Guillaume Lemaitre <glemaitre>`.
174+
175+
- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier`
176+
to change output shape of `transform` method to 2 dimensional.
177+
:issue:`7794` by :user:`Ibraim Ganiev <olologin>` and
178+
:user:`Herilalaina Rakotoarison <herilalaina>`.
174179

175180
Linear, kernelized and related models
176181

sklearn/ensemble/tests/test_voting_classifier.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sklearn.utils.testing import assert_almost_equal, assert_array_equal
55
from sklearn.utils.testing import assert_equal, assert_true, assert_false
66
from sklearn.utils.testing import assert_raise_message
7+
from sklearn.utils.testing import assert_warns_message
78
from sklearn.exceptions import NotFittedError
89
from sklearn.linear_model import LogisticRegression
910
from sklearn.naive_bayes import GaussianNB
@@ -223,7 +224,7 @@ def test_gridsearch():
223224
grid.fit(iris.data, iris.target)
224225

225226

226-
def test_parallel_predict():
227+
def test_parallel_fit():
227228
"""Check parallel backend of VotingClassifier on toy dataset."""
228229
clf1 = LogisticRegression(random_state=123)
229230
clf2 = RandomForestClassifier(random_state=123)
@@ -364,3 +365,38 @@ def test_estimator_weights_format():
364365
eclf1.fit(X, y)
365366
eclf2.fit(X, y)
366367
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
368+
369+
370+
def test_transform():
371+
"""Check transform method of VotingClassifier on toy dataset."""
372+
clf1 = LogisticRegression(random_state=123)
373+
clf2 = RandomForestClassifier(random_state=123)
374+
clf3 = GaussianNB()
375+
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
376+
y = np.array([1, 1, 2, 2])
377+
378+
eclf1 = VotingClassifier(estimators=[
379+
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
380+
voting='soft').fit(X, y)
381+
eclf2 = VotingClassifier(estimators=[
382+
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
383+
voting='soft',
384+
flatten_transform=True).fit(X, y)
385+
eclf3 = VotingClassifier(estimators=[
386+
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
387+
voting='soft',
388+
flatten_transform=False).fit(X, y)
389+
390+
warn_msg = ("'flatten_transform' default value will be "
391+
"changed to True in 0.21."
392+
"To silence this warning you may"
393+
" explicitly set flatten_transform=False.")
394+
res = assert_warns_message(DeprecationWarning, warn_msg,
395+
eclf1.transform, X)
396+
assert_array_equal(res.shape, (3, 4, 2))
397+
assert_array_equal(eclf2.transform(X).shape, (4, 6))
398+
assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
399+
assert_array_equal(res.swapaxes(0, 1).reshape((4, 6)),
400+
eclf2.transform(X))
401+
assert_array_equal(eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)),
402+
eclf2.transform(X))

sklearn/ensemble/voting_classifier.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# License: BSD 3 clause
1313

1414
import numpy as np
15+
import warnings
1516

1617
from ..base import ClassifierMixin
1718
from ..base import TransformerMixin
@@ -61,6 +62,13 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
6162
The number of jobs to run in parallel for ``fit``.
6263
If -1, then the number of jobs is set to the number of cores.
6364
65+
flatten_transform : bool, optional (default=None)
66+
Affects shape of transform output only when voting='soft'
67+
If voting='soft' and flatten_transform=True, transform method returns
68+
matrix with shape (n_samples, n_classifiers * n_classes). If
69+
flatten_transform=False, it returns
70+
(n_classifiers, n_samples, n_classes).
71+
6472
Attributes
6573
----------
6674
estimators_ : list of classifiers
@@ -94,18 +102,23 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
94102
[1 1 1 2 2 2]
95103
>>> eclf3 = VotingClassifier(estimators=[
96104
... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
97-
... voting='soft', weights=[2,1,1])
105+
... voting='soft', weights=[2,1,1],
106+
... flatten_transform=True)
98107
>>> eclf3 = eclf3.fit(X, y)
99108
>>> print(eclf3.predict(X))
100109
[1 1 1 2 2 2]
110+
>>> print(eclf3.transform(X).shape)
111+
(6, 6)
101112
>>>
102113
"""
103114

104-
def __init__(self, estimators, voting='hard', weights=None, n_jobs=1):
115+
def __init__(self, estimators, voting='hard', weights=None, n_jobs=1,
116+
flatten_transform=None):
105117
self.estimators = estimators
106118
self.voting = voting
107119
self.weights = weights
108120
self.n_jobs = n_jobs
121+
self.flatten_transform = flatten_transform
109122

110123
@property
111124
def named_estimators(self):
@@ -163,6 +176,7 @@ def fit(self, X, y, sample_weight=None):
163176
if n_isnone == len(self.estimators):
164177
raise ValueError('All estimators are None. At least one is '
165178
'required to be a classifier!')
179+
166180
self.le_ = LabelEncoder().fit(y)
167181
self.classes_ = self.le_.classes_
168182
self.estimators_ = []
@@ -256,16 +270,30 @@ def transform(self, X):
256270
257271
Returns
258272
-------
259-
If `voting='soft'`:
260-
array-like = [n_classifiers, n_samples, n_classes]
273+
If `voting='soft'` and `flatten_transform=True`:
274+
array-like = (n_classifiers, n_samples * n_classes)
275+
otherwise array-like = (n_classifiers, n_samples, n_classes)
261276
Class probabilities calculated by each classifier.
262277
If `voting='hard'`:
263278
array-like = [n_samples, n_classifiers]
264279
Class labels predicted by each classifier.
265280
"""
266281
check_is_fitted(self, 'estimators_')
282+
267283
if self.voting == 'soft':
268-
return self._collect_probas(X)
284+
probas = self._collect_probas(X)
285+
if self.flatten_transform is None:
286+
warnings.warn("'flatten_transform' default value will be "
287+
"changed to True in 0.21."
288+
"To silence this warning you may"
289+
" explicitly set flatten_transform=False.",
290+
DeprecationWarning)
291+
return probas
292+
elif not self.flatten_transform:
293+
return probas
294+
else:
295+
return np.hstack(probas)
296+
269297
else:
270298
return self._predict(X)
271299

0 commit comments

Comments
 (0)
0