10000 [MRG+1] Fixes #7578 added check_decision_proba_consistency in estimator_checks by shubham0704 · Pull Request #8253 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content
8000

[MRG+1] Fixes #7578 added check_decision_proba_consistency in estimator_checks #8253

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Mar 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
70a74c6
[WEP] added check_rank_corr in estimator_checks
Jan 31, 2017
7a53901
[WIP] linted, add check_rank_corr to estimator_checks.py
Feb 1, 2017
a29a1de
Merge branch 'master' of https://github.com/scikit-learn/scikit-learn
Feb 1, 2017
d94c892
[WIP] Fixes #7578 advised changes added for check_decision_proba_cons…
Feb 2, 2017
45fc811
[WIP] Fixes #7578 made recommended changes
Feb 2, 2017
1be0ddb
[WIP] minor changes
Feb 2, 2017
16ae6cd
Merge remote-tracking branch 'upstream/master'
Feb 2, 2017
b8c4b86
done nosetests and removed errors
Feb 3, 2017
942ebaa
[WIP] Fixes #7578 reinitiating ci-tests
Feb 3, 2017
32c6d56
Merge remote-tracking branch 'upstream/master'
Feb 4, 2017
890a335
tried some changes to travis
Feb 4, 2017
793e495
travis changes
Feb 4, 2017
1ba275c
removed travis changes
Feb 4, 2017
ed1fce7
[WIP] Fixes #8289 added get_max_squared_sum
Feb 4, 2017
9a9ebec
[WIP] Fixes #8289 minor changes
Feb 4, 2017
594a91c
--allow-empty
Feb 5, 2017
f1c98c3
Fixes #7578 made recommended changes
Feb 6, 2017
2fa494f
made recommended changes
Feb 13, 2017
5e35a4d
made AssertionError from Fail to Skip
Feb 13, 2017
20d552d
Merge remote-tracking branch 'upstream/master'
Feb 14, 2017
ff556ef
made recommended changes
Feb 14, 2017
5c5a923
made recommended changes
Feb 15, 2017
86a087b
made recommended changes
Feb 15, 2017
b6f3238
Merge remote-tracking branch 'upstream/master'
Feb 16, 2017
7d74887
made recommended changes
Feb 16, 2017
495f514
rounded results
Feb 16, 2017
f593502
made recommended changes
Feb 22, 2017
f4bd859
made recommended changes
Feb 22, 2017
10f440d
minor changes
Feb 22, 2017
aec7ce2
added proper indent
Feb 22, 2017
15deeff
added entry in whatsnew
Feb 22, 2017
ca5793c
Trigger
Feb 22, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,13 @@ API changes summary
selection classes to be used with tools such as
:func:`sklearn.model_selection.cross_val_predict`.
:issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.

- Estimators with both methods ``decision_function`` and ``predict_proba``
are now required to have a monotonic relation between them. The
method ``check_decision_proba_consistency`` has been added in
**sklearn.utils.estimator_checks** to check their consistency.
:issue:`7578` by :user:`Shubham Bhardwaj <shubham0704>`


.. _changes_0_18_1:

Expand Down
44 changes: 33 additions & 11 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import traceback
import pickle
from copy import deepcopy

import numpy as np
from scipy import sparse
from scipy.stats import rankdata
import struct

from sklearn.externals.six.moves import zip
Expand Down Expand Up @@ -113,10 +113,10 @@ def _yield_classifier_checks(name, Classifier):
# basic consistency testing
yield check_classifiers_train
yield check_classifiers_regression_target
if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]
if (name not in
["MultinomialNB", "LabelPropagation", "LabelSpreading"] and
# TODO some complication with -1 label
and name not in ["DecisionTreeClassifier",
"ExtraTreeClassifier"]):
name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]):
# We don't raise a warning in these classifiers, as
# the column y interface is used by the forests.

Expand All @@ -127,6 +127,8 @@ def _yield_classifier_checks(name, Classifier):
yield check_class_weight_classifiers

yield check_non_transformer_estimators_n_iter
# test if predict_proba is a monotonic transformation of decision_function
yield check_decision_proba_consistency


@ignore_warnings(category=DeprecationWarning)
Expand Down Expand Up @@ -269,8 +271,7 @@ def set_testing_parameters(estimator):
# set parameters to speed up some estimators and
# avoid deprecated behaviour
params = estimator.get_params()
if ("n_iter" in params
and estimator.__class__.__name__ != "TSNE"):
if ("n_iter" in params and estimator.__class__.__name__ != "TSNE"):
estimator.set_params(n_iter=5)
if "max_iter" in params:
warnings.simplefilter("ignore", ConvergenceWarning)
Expand Down Expand Up @@ -1112,8 +1113,7 @@ def check_classifiers_train(name, Classifier):
assert_equal(decision.shape, (n_samples,))
dec_pred = (decision.ravel() > 0).astype(np.int)
assert_array_equal(dec_pred, y_pred)
if (n_classes is 3
and not isinstance(classifier, BaseLibSVM)):
if (n_classes is 3 and not isinstance(classifier, BaseLibSVM)):
# 1on1 of LibSVM works differently
assert_equal(decision.shape, (n_samples, n_classes))
assert_array_equal(np.argmax(decision, axis=1), y_pred)
Expand Down Expand Up @@ -1574,9 +1574,9 @@ def check_parameters_default_constructible(name, Estimator):
try:
def param_filter(p):
"""Identify hyper parameters of an estimator"""
return (p.name != 'self'
and p.kind != p.VAR_KEYWORD
and p.kind != p.VAR_POSITIONAL)
return (p.name != 'self' and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For next time, try not to change things that are not related to your PR. This adds noise into the diff and makes it harder for the review to be efficient.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe thi 8000 s comment to others. Learn more.

Sure @lesteve .Thanks a lot.

p.kind != p.VAR_KEYWORD and
p.kind != p.VAR_POSITIONAL)

init_params = [p for p in signature(init).parameters.values()
if param_filter(p)]
Expand Down Expand Up @@ -1721,3 +1721,25 @@ def check_classifiers_regression_target(name, Estimator):
e = Estimator()
msg = 'Unknown label type: '
assert_raises_regex(ValueError, msg, e.fit, X, y)


@ignore_warnings(category=DeprecationWarning)
def check_decision_proba_consistency(name, Estimator):
# Check whether an estimator having both decision_function and
# predict_proba methods has outputs with perfect rank correlation.

centers = [(2, 2), (4, 4)]
X, y = make_blobs(n_samples=100, random_state=0, n_features=4,
centers=centers, cluster_std=1.0, shuffle=True)
X_test = np.random.randn(20, 2) + 4
estimator = Estimator()

set_testing_parameters(estimator)

if (hasattr(estimator, "decision_function") and
hasattr(estimator, "predict_proba")):

estimator.fit(X, y)
a = estimator.predict_proba(X_test)[:, 1]
b = estimator.decision_function(X_test)
assert_array_equal(rankdata(a), rankdata(b))
0