8000 FEA VotingRegressor (#12513) · koenvandevelde/scikit-learn@65df9a7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 65df9a7

Browse files
Ramil Nugmanovkoenvandevelde
authored andcommitted
FEA VotingRegressor (scikit-learn#12513)
1 parent 77c66f2 commit 65df9a7

File tree

7 files changed

+394
-102
lines changed

7 files changed

+394
-102
lines changed

doc/modules/classes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ Samples generator
421421
ensemble.RandomForestRegressor
422422
ensemble.RandomTreesEmbedding
423423
ensemble.VotingClassifier
424+
ensemble.VotingRegressor
424425

425426
.. autosummary::
426427
:toctree: generated/

doc/modules/ensemble.rst

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ averaged.
927927
Voting Classifier
928928
========================
929929

930-
The idea behind the :class:`VotingClassifier` is to combine
930+
The idea behind the `VotingClassifier` is to combine
931931
conceptually different machine learning classifiers and use a majority vote
932932
or the average predicted probabilities (soft vote) to predict the class labels.
933933
Such a classifier can be useful for a set of equally well performing model
@@ -1084,3 +1084,46 @@ Optionally, weights can be provided for the individual classifiers::
10841084

10851085
>>> eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
10861086
... voting='soft', weights=[2, 5, 1])
1087+
1088+
1089+
.. _voting_regressor:
1090+
1091+
Voting Regressor
1092+
================
1093+
1094+
The idea behind the `VotingRegressor` is to combine conceptually
1095+
different machine learning regressors and return the average predicted values.
1096+
Such a regressor can be useful for a set of equally well performing models
1097+
in order to balance out their individual weaknesses.
1098+
1099+
Usage
1100+
.....
1101+
1102+
The following example shows how to fit the VotingRegressor::
1103+
1104+
>>> from sklearn import datasets
1105+
>>> from sklearn.ensemble import GradientBoostingRegressor
1106+
>>> from sklearn.ensemble import RandomForestRegressor
1107+
>>> from sklearn.linear_model import LinearRegression
1108+
>>> from sklearn.ensemble import VotingRegressor
1109+
1110+
>>> # Loading some example data
1111+
>>> boston = datasets.load_boston()
1112+
>>> X = boston.data
1113+
>>> y = boston.target
1114+
1115+
>>> # Training classifiers
1116+
>>> reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
1117+
>>> reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
1118+
>>> reg3 = LinearRegression()
1119+
>>> ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
1120+
>>> ereg = ereg.fit(X, y)
1121+
1122+
.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_regressor_001.png
1123+
:target: ../auto_examples/ensemble/plot_voting_regressor.html
1124+
:align: center
1125+
:scale: 75%
1126+
1127+
.. topic:: Examples:
1128+
1129+
* :ref:`sphx_glr_auto_examples_ensemble_plot_voting_regressor.py`

doc/whats_new/v0.21.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,12 @@ Support for Python 3.4 and below has been officially dropped.
230230
gradient boosting model has been trained with sample weights.
231231
:issue:`13193` by :user:`Samuel O. Ronsin <samronsin>`.
232232

233+
- |Feature| Add :class:`ensemble.VotingRegressor`
234+
which provides an equivalent of :class:`ensemble.VotingClassifier`
235+
for regression problems.
236+
:issue:`12513` by :user:`Ramil Nugmanov <stsouko>` and
237+
:user:`Mohamed Ali Jamaoui <mohamed-ali>`.
238+
233239
:mod:`sklearn.externals`
234240
........................
235241

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
=================================================
3+
Plot individual and voting regression predictions
4+
=================================================
5+
6+
Plot individual and averaged regression predictions for Boston dataset.
7+
8+
First, three exemplary regressors are initialized (`Grad 10000 ientBoostingRegressor`,
9+
`RandomForestRegressor`, and `LinearRegression`) and used to initialize a
10+
`VotingRegressor`.
11+
12+
The red starred dots are the averaged predictions.
13+
14+
"""
15+
print(__doc__)
16+
17+
import matplotlib.pyplot as plt
18+
19+
from sklearn import datasets
20+
from sklearn.ensemble import GradientBoostingRegressor
21+
from sklearn.ensemble import RandomForestRegressor
22+
from sklearn.linear_model import LinearRegression
23+
from sklearn.ensemble import VotingRegressor
24+
25+
# Loading some example data
26+
boston = datasets.load_boston()
27+
X = boston.data
28+
y = boston.target
29+
30+
# Training classifiers
31+
reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
32+
reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
33+
reg3 = LinearRegression()
34+
ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
35+
reg1.fit(X, y)
36+
reg2.fit(X, y)
37+
reg3.fit(X, y)
38+
ereg.fit(X, y)
39+
40+
xt = X[:20]
41+
42+
plt.figure()
43+
plt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor')
44+
plt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor')
45+
plt.plot(reg3.predict(xt), 'ys', label='LinearRegression')
46+
plt.plot(ereg.predict(xt), 'r*', label='VotingRegressor')
47+
plt.tick_params(axis='x', which='both', bottom=False, top=False,
48+
labelbottom=False)
49+
plt.ylabel('predicted')
50+
plt.xlabel('training samples')
51+
plt.legend(loc="best")
52+
plt.title('Comparison of individual predictions with averaged')
53+
plt.show()

sklearn/ensemble/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
from .weight_boosting import AdaBoostRegressor
1717
from .gradient_boosting import GradientBoostingClassifier
1818
from .gradient_boosting import GradientBoostingRegressor
19-
from .voting_classifier import VotingClassifier
19+
from .voting import VotingClassifier
20+
from .voting import VotingRegressor
2021

2122
from . import bagging
2223
from . import forest
@@ -30,6 +31,6 @@
3031
"ExtraTreesRegressor", "BaggingClassifier",
3132
"BaggingRegressor", "IsolationForest", "GradientBoostingClassifier",
3233
"GradientBoostingRegressor", "AdaBoostClassifier",
33-
"AdaBoostRegressor", "VotingClassifier",
34+
"AdaBoostRegressor", "VotingClassifier", "VotingRegressor",
3435
"bagging", "forest", "gradient_boosting",
3536
"partial_dependence", "weight_boosting"]

sklearn/ensemble/tests/test_voting_classifier.py renamed to sklearn/ensemble/tests/test_voting.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Testing for the VotingClassifier"""
1+
"""Testing for the VotingClassifier and VotingRegressor"""
22

33
import pytest
44
import numpy as np
@@ -11,21 +11,25 @@
1111
from sklearn.linear_model import LogisticRegression
1212
from sklearn.naive_bayes import GaussianNB
1313
from sklearn.ensemble import RandomForestClassifier
14-
from sklearn.ensemble import VotingClassifier
14+
from sklearn.ensemble import VotingClassifier, VotingRegressor
1515
from sklearn.model_selection import GridSearchCV
1616
from sklearn import datasets
17-
from sklearn.model_selection import cross_val_score
17+
from sklearn.model_selection import cross_val_score, train_test_split
1818
from sklearn.datasets import make_multilabel_classification
1919
from sklearn.svm import SVC
2020
from sklearn.multiclass import OneVsRestClassifier
2121
from sklearn.neighbors import KNeighborsClassifier
2222
from sklearn.base import BaseEstimator, ClassifierMixin
23+
from sklearn.dummy import DummyRegressor
2324

2425

25-
# Load the iris dataset and randomly permute it
26+
# Load datasets
2627
iris = datasets.load_iris()
2728
X, y = iris.data[:, 1:3], iris.target
2829

30+
boston = datasets.load_boston()
31+
X_r, y_r = boston.data, boston.target
32+
2933

3034
@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
3135
@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
@@ -42,7 +46,7 @@ def test_estimator_init():
4246
assert_raise_message(ValueError, msg, eclf.fit, X, y)
4347

4448
eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2])
45-
msg = ('Number of classifiers and weights must be equal'
49+
msg = ('Number of `estimators` and weights must be equal'
4650
'; got 2 weights, 1 estimators')
4751
assert_raise_message(ValueError, msg, eclf.fit, X, y)
4852

@@ -76,9 +80,19 @@ def test_notfitted():
7680
eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
7781
('lr2', LogisticRegression())],
7882
voting='soft')
79-
msg = ("This VotingClassifier instance is not fitted yet. Call \'fit\'"
83+
ereg = VotingRegressor([('dr', DummyRegressor())])
84+
msg = ("This %s instance is not fitted yet. Call \'fit\'"
8085
" with appropriate arguments before using this method.")
81-
assert_raise_message(NotFittedError, msg, eclf.predict_proba, X)
86+
assert_raise_message(NotFittedError, msg % 'VotingClassifier',
87+
eclf.predict, X)
88+
assert_raise_message(NotFittedError, msg % 'VotingClassifier',
89+
eclf.predict_proba, X)
90+
assert_raise_message(NotFittedError, msg % 'VotingClassifier',
91+
eclf.transform, X)
92+
assert_raise_message(NotFittedError, msg % 'VotingRegressor',
93+
ereg.predict, X_r)
94+
assert_raise_message(NotFittedError, msg % 'VotingRegressor',
95+
ereg.transform, X_r)
8296

8397

8498
@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
@@ -125,6 +139,38 @@ def test_weights_iris():
125139
assert_almost_equal(scores.mean(), 0.93, decimal=2)
126140

127141

142+
def test_weights_regressor():
143+
"""Check weighted average regression prediction on boston dataset."""
144+
reg1 = DummyRegressor(strategy='mean')
145+
reg2 = DummyRegressor(strategy='median')
146+
reg3 = DummyRegressor(strategy='quantile', quantile=.2)
147+
ereg = VotingRegressor([('mean', reg1), ('median', reg2),
148+
('quantile', reg3)], weights=[1, 2, 10])
149+
150+
X_r_train, X_r_test, y_r_train, y_r_test = \
151+
train_test_split(X_r, y_r, test_size=.25)
152+
153+
reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
154+
reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
155+
reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
156+
ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)
157+
158+
avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0,
159+
weights=[1, 2, 10])
160+
assert_almost_equal(ereg_pred, avg, decimal=2)
161+
162+
ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
163+
('quantile', reg3)], weights=None)
164+
ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
165+
('quantile', reg3)],
166+
weights=[1, 1, 1])
167+
ereg_weights_none.fit(X_r_train, y_r_train)
168+
ereg_weights_equal.fit(X_r_train, y_r_train)
169+
ereg_none_pred = ereg_weights_none.predict(X_r_test)
170+
ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
171+
assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
172+
173+
128174
@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
129175
@pytest.mark.f 7BD6 ilterwarnings('ignore: Default multi_class will') # 0.22
130176
@pytest.mark.filterwarnings('ignore:The default value of n_estimators')
@@ -382,8 +428,7 @@ def test_set_estimator_none():
382428
eclf2.set_params(voting='soft').fit(X, y)
383429
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
384430
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
385-
msg = ('All estimators are None. At least one is required'
386-
' to be a classifier!')
431+
msg = 'All estimators are None. At least one is required!'
387432
assert_raise_message(
388433
ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y)
389434

0 commit comments

Comments
 (0)
0