8000 [MRG] FIX run test for meta-estimator having estimators keyword by glemaitre · Pull Request #14305 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] FIX run test for meta-estimator having estimators keyword #14305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,15 @@ Changelog
preserve the class balance of the original training set. :pr:`14194`
by :user:`Johann Faouzi <johannfaouzi>`.

- |Fix| Run by default
:func:`utils.estimator_checks.check_estimator` on both
:class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`. It
leads to solve issues regarding shape consistency during `predict` which was
failing when the underlying estimators were not outputting consistent array
dimensions. Note that it should be replaced by refactoring the common tests
in the future.
:pr:`14305` by :user:`Guillaume Lemaitre <glemaitre>`.

- |Efficiency| :func:`ensemble.MissingIndicator.fit_transform` the
_get_missing_features_info function is now called once when calling
fit_transform for MissingIndicator class. :pr:`14356` by :user:
Expand Down
21 changes: 21 additions & 0 deletions sklearn/ensemble/tests/test_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@
from sklearn.utils.testing import assert_almost_equal, assert_array_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.estimator_checks import check_estimator
from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import datasets
from sklearn.model_selection import cross_val_score, train_test_split
Expand Down Expand Up @@ -508,3 +512,20 @@ def test_none_estimator_with_weights(X, y, voter, drop):
voter.fit(X, y, sample_weight=np.ones(y.shape))
y_pred = voter.predict(X)
assert y_pred.shape == y.shape


@pytest.mark.parametrize(
"estimator",
[VotingRegressor(
estimators=[('lr', LinearRegression()),
('tree', DecisionTreeRegressor(random_state=0))]),
VotingClassifier(
estimators=[('lr', LogisticRegression(random_state=0)),
('tree', DecisionTreeClassifier(random_state=0))])],
ids=['VotingRegressor', 'VotingClassifier']
)
def test_check_estimators_voting_estimator(estimator):
# FIXME: to be removed when meta-estimators can be specified themselves
# their testing parameters (for required parameters).
check_estimator(estimator)
check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)
11 changes: 8 additions & 3 deletions sklearn/ensemble/voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@
#
# License: BSD 3 clause

import numpy as np
from abc import abstractmethod

import numpy as np

from joblib import Parallel, delayed

from ..base import ClassifierMixin
from ..base import RegressorMixin
from ..base import TransformerMixin
from ..base import clone
from ..preprocessing import LabelEncoder
from ..utils import Bunch
from ..utils.validation import check_is_fitted
from ..utils.metaestimators import _BaseComposition
from ..utils import Bunch
from ..utils.multiclass import check_classification_targets
from ..utils.validation import column_or_1d


def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
Expand Down Expand Up @@ -67,7 +70,7 @@ def _weights_not_none(self):

def _predict(self, X):
"""Collect results from clf.predict calls. """
return np.asarray([clf.predict(X) for clf in self.estimators_]).T
return np.asarray([est.predict(X) for est in self.estimators_]).T

@abstractmethod
def fit(self, X, y, sample_weight=None):
Expand Down Expand Up @@ -264,6 +267,7 @@ def fit(self, X, y, sample_weight=None):
-------
self : object
"""
check_classification_targets(y)
if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
raise NotImplementedError('Multilabel and multi-output'
' classification is not supported.')
Expand Down Expand Up @@ -454,6 +458,7 @@ def fit(self, X, y, sample_weight=None):
-------
self : object
"""
y = column_or_1d(y, warn=True)
return super().fit(X, y, sample_weight)

def predict(self, X):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
from sklearn.base import RegressorMixin
from sklearn.cluster.bicluster import BiclusterMixin

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model.base import LinearClassifierMixin
from sklearn.linear_model import Ridge
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.utils import IS_PYPY
from sklearn.utils.estimator_checks import (
_yield_all_checks,
Expand Down
1 change: 0 additions & 1 deletion sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from ..discriminant_analysis import LinearDiscriminantAnalysis
from ..linear_model import Ridge


from ..base import (clone, ClusterMixin, is_classifier, is_regressor,
_DEFAULT_TAGS, RegressorMixin, is_outlier_detector)

Expand Down
0