From 70a74c6bf935828acc0581b1f97240d299f52f63 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 1 Feb 2017 01:54:13 +0530 Subject: [PATCH 01/27] [WEP] added check_rank_corr in estimator_checks --- sklearn/utils/estimator_checks.py | 43 +++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index b5a59631c5c45..68580b8324c47 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -9,6 +9,7 @@ import numpy as np from scipy import sparse +from scipy.stats import stats import struct from sklearn.externals.six.moves import zip @@ -52,6 +53,7 @@ from sklearn.utils.validation import has_fit_parameter from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris, load_boston, make_blobs +from sklearn.datasets import make_multilabel_classification BOSTON = None @@ -114,7 +116,7 @@ def _yield_classifier_checks(name, Classifier): yield check_classifiers_regression_target if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"] # TODO some complication with -1 label - and name not in ["DecisionTreeClassifier", + and name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): # We don't raise a warning in these classifiers, as # the column y interface is used by the forests. @@ -161,6 +163,7 @@ def _yield_regressor_checks(name, Regressor): yield check_regressors_no_decision_function yield check_supervised_y_2d yield check_supervised_y_no_nan + yield check_rank_corr if name != 'CCA': # check that the regressor handles int input yield check_regressors_int @@ -268,8 +271,7 @@ def set_testing_parameters(estimator): # set parameters to speed up some estimators and # avoid deprecated behaviour params = estimator.get_params() - if ("n_iter" in params - and estimator.__class__.__name__ != "TSNE"): + if ("n_iter" in params and estimator.__class__.__name__ != "TSNE"): estimator.set_params(n_iter=5) if "max_iter" in params: warnings.simplefilter("ignore", ConvergenceWarning) @@ -1096,8 +1098,7 @@ def check_classifiers_train(name, Classifier): assert_equal(decision.shape, (n_samples,)) dec_pred = (decision.ravel() > 0).astype(np.int) assert_array_equal(dec_pred, y_pred) - if (n_classes is 3 - and not isinstance(classifier, BaseLibSVM)): + if (n_classes is 3 and not isinstance(classifier, BaseLibSVM)): # 1on1 of LibSVM works differently assert_equal(decision.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(decision, axis=1), y_pred) @@ -1705,3 +1706,35 @@ def check_classifiers_regression_target(name, Estimator): e = Estimator() msg = 'Unknown label type: ' assert_raises_regex(ValueError, msg, e.fit, X, y) + + +@ignore_warnings(category=DeprecationWarning) +def check_rank_corr(name, Estimator): + """ + Check whether an estimator having both decision_function and + predict_proba methods has outputs with perfect rank correlation. + """ + + X, Y = make_multilabel_classification(n_classes=2, n_labels=1, + allow_unlabeled=True, + random_state=1) + + estimator = Estimator() + + set_testing_parameters(estimator) + + if hasattr(estimator, "decision_function"): + + if hasattr(estimator, "predict_proba"): + try: + estimator.fit(X, Y) + a = estimator.predict_proba([i for i in range(20)]) + b = estimator.decision_function([i for i in range(20)]) + assert_equal(a.shape, b.shape) + assert_equal(stats.spearmanr(a[0], b[0]).correlation, 1) + + except ValueError: + pass + + else: + pass From 7a53901fc834ac6d4a42e8ccdac917238973c80b Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 1 Feb 2017 21:28:13 +0530 Subject: [PATCH 02/27] [WIP] linted, add check_rank_corr to estimator_checks.py --- sklearn/utils/estimator_checks.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 68580b8324c47..fec84d02137a3 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -6,7 +6,6 @@ import traceback import pickle from copy import deepcopy - import numpy as np from scipy import sparse from scipy.stats import stats @@ -114,12 +113,12 @@ def _yield_classifier_checks(name, Classifier): # basic consistency testing yield check_classifiers_train yield check_classifiers_regression_target - if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"] + if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]): # TODO some complication with -1 label - and name not in ["DecisionTreeClassifier", - "ExtraTreeClassifier"]): + if (name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): # We don't raise a warning in these classifiers, as # the column y interface is used by the forests. + pass yield check_supervised_y_2d # test if NotFittedError is raised @@ -1559,9 +1558,8 @@ def check_parameters_default_constructible(name, Estimator): try: def param_filter(p): """Identify hyper parameters of an estimator""" - return (p.name != 'self' - and p.kind != p.VAR_KEYWORD - and p.kind != p.VAR_POSITIONAL) + return (p.name != 'self' and p.kind != p.VAR_KEYWORD and + p.kind != p.VAR_POSITIONAL) init_params = [p for p in signature(init).parameters.values() if param_filter(p)] From d94c89257b81640051972d02dcdab8c4778dece4 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Thu, 2 Feb 2017 11:50:02 +0530 Subject: [PATCH 03/27] [WIP] Fixes #7578 advised changes added for check_decision_proba_consistency --- sklearn/utils/estimator_checks.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index fec84d02137a3..0f31c2d386e0a 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -8,7 +8,7 @@ from copy import deepcopy import numpy as np from scipy import sparse -from scipy.stats import stats +from scipy.stats import rankdata import struct from sklearn.externals.six.moves import zip @@ -46,6 +46,7 @@ from sklearn.exceptions import DataConversionWarning from sklearn.exceptions import SkipTestWarning from sklearn.model_selection import train_test_split +from sklearn.multiclass import OneVsRestClassifier from sklearn.utils import shuffle from sklearn.utils.fixes import signature @@ -162,7 +163,7 @@ def _yield_regressor_checks(name, Regressor): yield check_regressors_no_decision_function yield check_supervised_y_2d yield check_supervised_y_no_nan - yield check_rank_corr + yield check_decision_proba_consistency if name != 'CCA': # check that the regressor handles int input yield check_regressors_int @@ -1707,7 +1708,7 @@ def check_classifiers_regression_target(name, Estimator): @ignore_warnings(category=DeprecationWarning) -def check_rank_corr(name, Estimator): +def check_decision_proba_consistency(name, Estimator): """ Check whether an estimator having both decision_function and predict_proba methods has outputs with perfect rank correlation. @@ -1725,11 +1726,12 @@ def check_rank_corr(name, Estimator): if hasattr(estimator, "predict_proba"): try: - estimator.fit(X, Y) - a = estimator.predict_proba([i for i in range(20)]) - b = estimator.decision_function([i for i in range(20)]) - assert_equal(a.shape, b.shape) - assert_equal(stats.spearmanr(a[0], b[0]).correlation, 1) + classif = OneVsRestClassifier(estimator) + classif.fit(X, Y) + a = classif.predict_proba([i for i in range(20)]) + b = classif.decision_function([i for i in range(20)]) + assert_equal( + rankdata(a, method='average'), rankdata(b, method='average')) except ValueError: pass From 45fc8117aa1facb842cc4db7c4347bb22154587a Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Thu, 2 Feb 2017 15:19:59 +0530 Subject: [PATCH 04/27] [WIP] Fixes #7578 made recommended changes --- sklearn/utils/estimator_checks.py | 32 ++++++++++--------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 0f31c2d386e0a..63b62522e3027 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -46,14 +46,12 @@ from sklearn.exceptions import DataConversionWarning from sklearn.exceptions import SkipTestWarning from sklearn.model_selection import train_test_split -from sklearn.multiclass import OneVsRestClassifier from sklearn.utils import shuffle from sklearn.utils.fixes import signature from sklearn.utils.validation import has_fit_parameter from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris, load_boston, make_blobs -from sklearn.datasets import make_multilabel_classification BOSTON = None @@ -1713,28 +1711,18 @@ def check_decision_proba_consistency(name, Estimator): Check whether an estimator having both decision_function and predict_proba methods has outputs with perfect rank correlation. """ - - X, Y = make_multilabel_classification(n_classes=2, n_labels=1, - allow_unlabeled=True, - random_state=1) - + rnd = np.random.RandomState(0) + X_train = (3*rnd.uniform(size=(10, 4))).astype(int) + y = X[:, 0] estimator = Estimator() set_testing_parameters(estimator) - if hasattr(estimator, "decision_function"): + if (hasattr(estimator, "decision_function") and + hasattr(estimator, "predict_proba")): - if hasattr(estimator, "predict_proba"): - try: - classif = OneVsRestClassifier(estimator) - classif.fit(X, Y) - a = classif.predict_proba([i for i in range(20)]) - b = classif.decision_function([i for i in range(20)]) - assert_equal( - rankdata(a, method='average'), rankdata(b, method='average')) - - except ValueError: - pass - - else: - pass + estimator.fit(X_train, y) + X_test = (3*rnd.uniform(size=(5, 4))).astype(int) + a = estimator.predict_proba(X_test) + b = estimator.decision_function(X_test) + assert_array_equal(rankdata(a), rankdata(b)) From 1be0ddb142f87ca346fe4f006da089fbc00e49fb Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Thu, 2 Feb 2017 15:23:59 +0530 Subject: [PATCH 05/27] [WIP] minor changes --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 63b62522e3027..e5a330a88eb77 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1713,7 +1713,7 @@ def check_decision_proba_consistency(name, Estimator): """ rnd = np.random.RandomState(0) X_train = (3*rnd.uniform(size=(10, 4))).astype(int) - y = X[:, 0] + y = X_train[:, 0] estimator = Estimator() set_testing_parameters(estimator) From b8c4b8693d51069fdefd80f2e042e4833a06eeda Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Fri, 3 Feb 2017 16:19:54 +0530 Subject: [PATCH 06/27] done nosetests and removed errors --- sklearn/utils/estimator_checks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index e5a330a88eb77..a5579e8de7080 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -56,8 +56,9 @@ BOSTON = None CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'] -MULTI_OUTPUT = ['CCA', 'DecisionTreeRegressor', 'ElasticNet', - 'ExtraTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcess', +MULTI_OUTPUT = ['CCA', 'DecisionTreeClassifier', 'DecisionTreeRegressor', + 'ElasticNet', 'ExtraTreeClassifier', 'ExtraTreeRegressor', + 'ExtraTreesRegressor', 'GaussianProcess', 'GaussianProcessRegressor', 'KNeighborsRegressor', 'KernelRidge', 'Lars', 'Lasso', 'LassoLars', 'LinearRegression', 'MultiTaskElasticNet', From 942ebaaf28ddf370577eec3fa35aa8718ad2555f Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Fri, 3 Feb 2017 23:49:17 +0530 Subject: [PATCH 07/27] [WIP] Fixes #7578 reinitiating ci-tests --- build_tools/travis/travis.log | 86 ++++++++++++++++++++++++++++++++++ travis.log | 87 +++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 build_tools/travis/travis.log create mode 100644 travis.log diff --git a/build_tools/travis/travis.log b/build_tools/travis/travis.log new file mode 100644 index 0000000000000..cce76e3d9b2a0 --- /dev/null +++ b/build_tools/travis/travis.log @@ -0,0 +1,86 @@ +Command line: +"travis show master" + + + ________ __ + / | / | + ########/ ______ ______ __ __ ##/ _______ + ## | / \ / \ / \ / | / | / | + ## | /###### | ###### | ## \ /##/ ## | /#######/ + ## | ## | ##/ / ## | ## /##/ ## | ## \ + ## | ## | /####### | ## ##/ ## | ###### | + ## | ## | ## ## | ###/ ## | / ##/ + ##/ ##/ #######/ #/ ##/ #######/ + + TRajectory Analyzer and VISualizer - Open-source freeware under GNU GPL v3 + + Copyright (c) Martin Brehm (2009-2015) + Martin Thomas (2012-2015) + Barbara Kirchner (2009-2015) + University of Leipzig / University of Bonn. + + http://www.travis-analyzer.de + + Please cite: + M. Brehm and B. Kirchner, J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. + + There is absolutely no warranty on any results obtained from TRAVIS. + + # Running on master-Inspiron-5548 at Fri Feb 3 23:44:08 2017 (PID 4740). + # Running in /home/master/Documents/GitHub/scikit-learn/build_tools/travis + # Source code version: Nov 07 2015. + # Compiled at Nov 17 2015 23:12:21. + # Compiler version: 5.2.1 20151028 + # Target platform: Linux + # Compile flags: DEBUG_ARRAYS + # Machine: int=4b, long=8b, addr=8b, 0xA0B0C0D0=D0,C0,B0,A0. + # User home: /home/master + # Exe path: /usr/bin/travis + # Input from terminal, Output to terminal + + >>> Please use a color scheme with dark background or specify "-nocolor"! <<< + + Loading configuration from /home/master/.travis.conf ... + +Unknown parameter: "show". + + List of supported command line options: + + -p Loads position data from the specified trajectory file. + The file format may be *.xyz, *.pdb, *.lmp (Lammps), HISTORY (DLPOLY), or *.prmtop/*.mdcrd (Amber). + -i Reads input from the specified text file. + + -config Load the specified configuration file. + -stream Treats input trajectory as a stream (e.g. named pipe): No fseek, etc. + -showconf Shows a tree structure of the configuration file. + -writeconf Writes the default configuration file, including all defines values. + + -verbose Show detailed information about what's going on. + -nocolor Executes TRAVIS in monochrome mode. + -dimcolor Uses dim instead of bright colors. + + -credits Display a list of persons who contributed to TRAVIS. + -help, -? Shows this help. + + If only one argument is specified, it is assumed to be the name of a trajectory file. + If argument is specified at all, TRAVIS asks for the trajectory file to open. + + + Note: To show a list of all persons who contributed to TRAVIS, + please add "-credits" to your command line arguments, or set the + variable "SHOWCREDITS" to "TRUE" in your travis.conf file. + + Source code from other projects used in TRAVIS: + - lmfit from Joachim Wuttke + - kiss_fft from Mark Borgerding + - voro++ from Chris Rycroft + + http://www.travis-analyzer.de + + Please cite: + + * "TRAVIS - A Free Analyzer and Visualizer for Monte Carlo and Molecular Dynamics Trajectories", + M. Brehm, B. Kirchner; J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. + +*** The End *** + diff --git a/travis.log b/travis.log new file mode 100644 index 0000000000000..3213313b604c2 --- /dev/null +++ b/travis.log @@ -0,0 +1,87 @@ +Command line: +"travis show master" + + + ________ __ + / | / | + ########/ ______ ______ __ __ ##/ _______ + ## | / \ / \ / \ / | / | / | + ## | /###### | ###### | ## \ /##/ ## | /#######/ + ## | ## | ##/ / ## | ## /##/ ## | ## \ + ## | ## | /####### | ## ##/ ## | ###### | + ## | ## | ## ## | ###/ ## | / ##/ + ##/ ##/ #######/ #/ ##/ #######/ + + TRajectory Analyzer and VISualizer - Open-source freeware under GNU GPL v3 + + Copyright (c) Martin Brehm (2009-2015) + Martin Thomas (2012-2015) + Barbara Kirchner (2009-2015) + University of Leipzig / University of Bonn. + + http://www.travis-analyzer.de + + Please cite: + M. Brehm and B. Kirchner, J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. + + There is absolutely no warranty on any results obtained from TRAVIS. + + # Running on master-Inspiron-5548 at Fri Feb 3 23:40:52 2017 (PID 4708). + # Running in /home/master/Documents/GitHub/scikit-learn + # Source code version: Nov 07 2015. + # Compiled at Nov 17 2015 23:12:21. + # Compiler version: 5.2.1 20151028 + # Target platform: Linux + # Compile flags: DEBUG_ARRAYS + # Machine: int=4b, long=8b, addr=8b, 0xA0B0C0D0=D0,C0,B0,A0. + # User home: /home/master + # Exe path: /usr/bin/travis + # Input from terminal, Output to terminal + + >>> Please use a color scheme with dark background or specify "-nocolor"! <<< + + No configuration file found. + Writing default configuration to /home/master/.travis.conf ... + +Unknown parameter: "show". + + List of supported command line options: + + -p Loads position data from the specified trajectory file. + The file format may be *.xyz, *.pdb, *.lmp (Lammps), HISTORY (DLPOLY), or *.prmtop/*.mdcrd (Amber). + -i Reads input from the specified text file. + + -config Load the specified configuration file. + -stream Treats input trajectory as a stream (e.g. named pipe): No fseek, etc. + -showconf Shows a tree structure of the configuration file. + -writeconf Writes the default configuration file, including all defines values. + + -verbose Show detailed information about what's going on. + -nocolor Executes TRAVIS in monochrome mode. + -dimcolor Uses dim instead of bright colors. + + -credits Display a list of persons who contributed to TRAVIS. + -help, -? Shows this help. + + If only one argument is specified, it is assumed to be the name of a trajectory file. + If argument is specified at all, TRAVIS asks for the trajectory file to open. + + + Note: To show a list of all persons who contributed to TRAVIS, + please add "-credits" to your command line arguments, or set the + variable "SHOWCREDITS" to "TRUE" in your travis.conf file. + + Source code from other projects used in TRAVIS: + - lmfit from Joachim Wuttke + - kiss_fft from Mark Borgerding + - voro++ from Chris Rycroft + + http://www.travis-analyzer.de + + Please cite: + + * "TRAVIS - A Free Analyzer and Visualizer for Monte Carlo and Molecular Dynamics Trajectories", + M. Brehm, B. Kirchner; J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. + +*** The End *** + From 890a335a0cec406a5796773fe758fe4ddcdb08ab Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Sat, 4 Feb 2017 10:01:50 +0530 Subject: [PATCH 08/27] tried some changes to travis --- build_tools/travis/test_script.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh index 6ab342b932cf1..908cc65dbd1f3 100755 --- a/build_tools/travis/test_script.sh +++ b/build_tools/travis/test_script.sh @@ -7,6 +7,7 @@ # License: 3-clause BSD set -e +Cache-Control: no-cache, no-store, must-revalidate, proxy-revalidate python --version python -c "import numpy; print('numpy %s' % numpy.__version__)" From 793e495fe63e267339e1f5784b58daaa11edd93a Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Sat, 4 Feb 2017 10:20:38 +0530 Subject: [PATCH 09/27] travis changes --- build_tools/travis/install.sh | 4 +++- build_tools/travis/test_script.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index bc9f13f80d96f..b92e85676f5af 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -21,7 +21,9 @@ export CXX=g++ echo 'List files from cached directories' echo 'pip:' ls $HOME/.cache/pip - +cache : + before_cache: + - rm -f $HOME/.cache/pip/log/debug.log if [[ "$DISTRIB" == "conda" ]]; then # Deactivate the travis-provided virtual environment and setup a diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh index 908cc65dbd1f3..568d1289fa244 100755 --- a/build_tools/travis/test_script.sh +++ b/build_tools/travis/test_script.sh @@ -7,7 +7,7 @@ # License: 3-clause BSD set -e -Cache-Control: no-cache, no-store, must-revalidate, proxy-revalidate + python --version python -c "import numpy; print('numpy %s' % numpy.__version__)" From 1ba275c358cef97cb55c85fbd3b402fe4233016d Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Sat, 4 Feb 2017 10:35:17 +0530 Subject: [PATCH 10/27] removed travis changes --- build_tools/travis/install.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index b92e85676f5af..bc9f13f80d96f 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -21,9 +21,7 @@ export CXX=g++ echo 'List files from cached directories' echo 'pip:' ls $HOME/.cache/pip -cache : - before_cache: - - rm -f $HOME/.cache/pip/log/debug.log + if [[ "$DISTRIB" == "conda" ]]; then # Deactivate the travis-provided virtual environment and setup a From ed1fce7c352008e77f48b948f779440c5e4125ac Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Sat, 4 Feb 2017 14:46:35 +0530 Subject: [PATCH 11/27] [WIP] Fixes #8289 added get_max_squared_sum --- sklearn/linear_model/sag_fast.pyx | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/linear_model/sag_fast.pyx b/sklearn/linear_model/sag_fast.pyx index f4dce9c749161..485a1b9401a5b 100644 --- a/sklearn/linear_model/sag_fast.pyx +++ b/sklearn/linear_model/sag_fast.pyx @@ -561,3 +561,13 @@ cdef void predict_sample(double* x_data_ptr, int* x_ind_ptr, int xnnz, x_data_ptr[j]) prediction[class_ind] = wscale * innerprod + intercept[class_ind] + +cdef get_max_squared_sum(X): + """ + Computes the summation of square of individual elements along each row + finds the maximum and return it + Parameters: + ----------- + X : np.ndarray + """ + return np.sum(X ** 2, axis=1).max() From 9a9ebec050b493bc90624133fe6c6559c1d07920 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Sat, 4 Feb 2017 16:08:51 +0530 Subject: [PATCH 12/27] [WIP] Fixes #8289 minor changes --- sklearn/linear_model/sag_fast.pyx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sklearn/linear_model/sag_fast.pyx b/sklearn/linear_model/sag_fast.pyx index 485a1b9401a5b..96beb347a59cd 100644 --- a/sklearn/linear_model/sag_fast.pyx +++ b/sklearn/linear_model/sag_fast.pyx @@ -562,12 +562,12 @@ cdef void predict_sample(double* x_data_ptr, int* x_ind_ptr, int xnnz, prediction[class_ind] = wscale * innerprod + intercept[class_ind] -cdef get_max_squared_sum(X): - """ - Computes the summation of square of individual elements along each row - finds the maximum and return it - Parameters: - ----------- - X : np.ndarray - """ - return np.sum(X ** 2, axis=1).max() +def get_max_squared_sum(X): + """ + Computes the summation of square of individual elements along each row + finds the maximum and return it + Parameters: + ----------- + X : np.ndarray + """ + return np.sum(X ** 2, axis=1).max() From 594a91c06827eec69d638f565a3384317713ffd3 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Sun, 5 Feb 2017 20:35:45 +0530 Subject: [PATCH 13/27] --allow-empty Trigger --- build_tools/travis/travis.log | 86 ----------------------------------- 1 file changed, 86 deletions(-) delete mode 100644 build_tools/travis/travis.log diff --git a/build_tools/travis/travis.log b/build_tools/travis/travis.log deleted file mode 100644 index cce76e3d9b2a0..0000000000000 --- a/build_tools/travis/travis.log +++ /dev/null @@ -1,86 +0,0 @@ -Command line: -"travis show master" - - - ________ __ - / | / | - ########/ ______ ______ __ __ ##/ _______ - ## | / \ / \ / \ / | / | / | - ## | /###### | ###### | ## \ /##/ ## | /#######/ - ## | ## | ##/ / ## | ## /##/ ## | ## \ - ## | ## | /####### | ## ##/ ## | ###### | - ## | ## | ## ## | ###/ ## | / ##/ - ##/ ##/ #######/ #/ ##/ #######/ - - TRajectory Analyzer and VISualizer - Open-source freeware under GNU GPL v3 - - Copyright (c) Martin Brehm (2009-2015) - Martin Thomas (2012-2015) - Barbara Kirchner (2009-2015) - University of Leipzig / University of Bonn. - - http://www.travis-analyzer.de - - Please cite: - M. Brehm and B. Kirchner, J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. - - There is absolutely no warranty on any results obtained from TRAVIS. - - # Running on master-Inspiron-5548 at Fri Feb 3 23:44:08 2017 (PID 4740). - # Running in /home/master/Documents/GitHub/scikit-learn/build_tools/travis - # Source code version: Nov 07 2015. - # Compiled at Nov 17 2015 23:12:21. - # Compiler version: 5.2.1 20151028 - # Target platform: Linux - # Compile flags: DEBUG_ARRAYS - # Machine: int=4b, long=8b, addr=8b, 0xA0B0C0D0=D0,C0,B0,A0. - # User home: /home/master - # Exe path: /usr/bin/travis - # Input from terminal, Output to terminal - - >>> Please use a color scheme with dark background or specify "-nocolor"! <<< - - Loading configuration from /home/master/.travis.conf ... - -Unknown parameter: "show". - - List of supported command line options: - - -p Loads position data from the specified trajectory file. - The file format may be *.xyz, *.pdb, *.lmp (Lammps), HISTORY (DLPOLY), or *.prmtop/*.mdcrd (Amber). - -i Reads input from the specified text file. - - -config Load the specified configuration file. - -stream Treats input trajectory as a stream (e.g. named pipe): No fseek, etc. - -showconf Shows a tree structure of the configuration file. - -writeconf Writes the default configuration file, including all defines values. - - -verbose Show detailed information about what's going on. - -nocolor Executes TRAVIS in monochrome mode. - -dimcolor Uses dim instead of bright colors. - - -credits Display a list of persons who contributed to TRAVIS. - -help, -? Shows this help. - - If only one argument is specified, it is assumed to be the name of a trajectory file. - If argument is specified at all, TRAVIS asks for the trajectory file to open. - - - Note: To show a list of all persons who contributed to TRAVIS, - please add "-credits" to your command line arguments, or set the - variable "SHOWCREDITS" to "TRUE" in your travis.conf file. - - Source code from other projects used in TRAVIS: - - lmfit from Joachim Wuttke - - kiss_fft from Mark Borgerding - - voro++ from Chris Rycroft - - http://www.travis-analyzer.de - - Please cite: - - * "TRAVIS - A Free Analyzer and Visualizer for Monte Carlo and Molecular Dynamics Trajectories", - M. Brehm, B. Kirchner; J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. - -*** The End *** - From f1c98c350bcab26fae24541a3277e2e3600d84e8 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Mon, 6 Feb 2017 12:41:54 +0530 Subject: [PATCH 14/27] Fixes #7578 made recommended changes --- sklearn/linear_model/sag_fast.pyx | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sklearn/linear_model/sag_fast.pyx b/sklearn/linear_model/sag_fast.pyx index 96beb347a59cd..f4dce9c749161 100644 --- a/sklearn/linear_model/sag_fast.pyx +++ b/sklearn/linear_model/sag_fast.pyx @@ -561,13 +561,3 @@ cdef void predict_sample(double* x_data_ptr, int* x_ind_ptr, int xnnz, x_data_ptr[j]) prediction[class_ind] = wscale * innerprod + intercept[class_ind] - -def get_max_squared_sum(X): - """ - Computes the summation of square of individual elements along each row - finds the maximum and return it - Parameters: - ----------- - X : np.ndarray - """ - return np.sum(X ** 2, axis=1).max() From 2fa494f14d6dce839e28a85d1ae9d39abc607728 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Mon, 13 Feb 2017 17:54:42 +0530 Subject: [PATCH 15/27] made recommended changes --- build_tools/travis/test_script.sh | 1 - sklearn/utils/estimator_checks.py | 17 +++--- travis.log | 87 ------------------------------- 3 files changed, 9 insertions(+), 96 deletions(-) delete mode 100644 travis.log diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh index 568d1289fa244..6ab342b932cf1 100755 --- a/build_tools/travis/test_script.sh +++ b/build_tools/travis/test_script.sh @@ -8,7 +8,6 @@ set -e - python --version python -c "import numpy; print('numpy %s' % numpy.__version__)" python -c "import scipy; print('scipy %s' % scipy.__version__)" diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index ab09789082dc9..8be67c0719507 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -114,9 +114,9 @@ def _yield_classifier_checks(name, Classifier): # basic consistency testing yield check_classifiers_train yield check_classifiers_regression_target - if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]): + if name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]: # TODO some complication with -1 label - if (name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): + if name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]: # We don't raise a warning in these classifiers, as # the column y interface is used by the forests. pass @@ -128,6 +128,8 @@ def _yield_classifier_checks(name, Classifier): yield check_class_weight_classifiers yield check_non_transformer_estimators_n_iter + # test if predict_proba is a monotonic transformation of decision_function + yield check_decision_proba_consistency @ignore_warnings(category=DeprecationWarning) @@ -163,7 +165,6 @@ def _yield_regressor_checks(name, Regressor): yield check_regressors_no_decision_function yield check_supervised_y_2d yield check_supervised_y_no_nan - yield check_decision_proba_consistency if name != 'CCA': # check that the regressor handles int input yield check_regressors_int @@ -1574,7 +1575,8 @@ def check_parameters_default_constructible(name, Estimator): try: def param_filter(p): """Identify hyper parameters of an estimator""" - return (p.name != 'self' and p.kind != p.VAR_KEYWORD and + return (p.name != 'self' and + p.kind != p.VAR_KEYWORD and p.kind != p.VAR_POSITIONAL) init_params = [p for p in signature(init).parameters.values() @@ -1724,10 +1726,9 @@ def check_classifiers_regression_target(name, Estimator): @ignore_warnings(category=DeprecationWarning) def check_decision_proba_consistency(name, Estimator): - """ - Check whether an estimator having both decision_function and - predict_proba methods has outputs with perfect rank correlation. - """ + # Check whether an estimator having both decision_function and + # predict_proba methods has outputs with perfect rank correlation. + rnd = np.random.RandomState(0) X_train = (3*rnd.uniform(size=(10, 4))).astype(int) y = X_train[:, 0] diff --git a/travis.log b/travis.log deleted file mode 100644 index 3213313b604c2..0000000000000 --- a/travis.log +++ /dev/null @@ -1,87 +0,0 @@ -Command line: -"travis show master" - - - ________ __ - / | / | - ########/ ______ ______ __ __ ##/ _______ - ## | / \ / \ / \ / | / | / | - ## | /###### | ###### | ## \ /##/ ## | /#######/ - ## | ## | ##/ / ## | ## /##/ ## | ## \ - ## | ## | /####### | ## ##/ ## | ###### | - ## | ## | ## ## | ###/ ## | / ##/ - ##/ ##/ #######/ #/ ##/ #######/ - - TRajectory Analyzer and VISualizer - Open-source freeware under GNU GPL v3 - - Copyright (c) Martin Brehm (2009-2015) - Martin Thomas (2012-2015) - Barbara Kirchner (2009-2015) - University of Leipzig / University of Bonn. - - http://www.travis-analyzer.de - - Please cite: - M. Brehm and B. Kirchner, J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. - - There is absolutely no warranty on any results obtained from TRAVIS. - - # Running on master-Inspiron-5548 at Fri Feb 3 23:40:52 2017 (PID 4708). - # Running in /home/master/Documents/GitHub/scikit-learn - # Source code version: Nov 07 2015. - # Compiled at Nov 17 2015 23:12:21. - # Compiler version: 5.2.1 20151028 - # Target platform: Linux - # Compile flags: DEBUG_ARRAYS - # Machine: int=4b, long=8b, addr=8b, 0xA0B0C0D0=D0,C0,B0,A0. - # User home: /home/master - # Exe path: /usr/bin/travis - # Input from terminal, Output to terminal - - >>> Please use a color scheme with dark background or specify "-nocolor"! <<< - - No configuration file found. - Writing default configuration to /home/master/.travis.conf ... - -Unknown parameter: "show". - - List of supported command line options: - - -p Loads position data from the specified trajectory file. - The file format may be *.xyz, *.pdb, *.lmp (Lammps), HISTORY (DLPOLY), or *.prmtop/*.mdcrd (Amber). - -i Reads input from the specified text file. - - -config Load the specified configuration file. - -stream Treats input trajectory as a stream (e.g. named pipe): No fseek, etc. - -showconf Shows a tree structure of the configuration file. - -writeconf Writes the default configuration file, including all defines values. - - -verbose Show detailed information about what's going on. - -nocolor Executes TRAVIS in monochrome mode. - -dimcolor Uses dim instead of bright colors. - - -credits Display a list of persons who contributed to TRAVIS. - -help, -? Shows this help. - - If only one argument is specified, it is assumed to be the name of a trajectory file. - If argument is specified at all, TRAVIS asks for the trajectory file to open. - - - Note: To show a list of all persons who contributed to TRAVIS, - please add "-credits" to your command line arguments, or set the - variable "SHOWCREDITS" to "TRUE" in your travis.conf file. - - Source code from other projects used in TRAVIS: - - lmfit from Joachim Wuttke - - kiss_fft from Mark Borgerding - - voro++ from Chris Rycroft - - http://www.travis-analyzer.de - - Please cite: - - * "TRAVIS - A Free Analyzer and Visualizer for Monte Carlo and Molecular Dynamics Trajectories", - M. Brehm, B. Kirchner; J. Chem. Inf. Model. 2011, 51 (8), pp 2007-2023. - -*** The End *** - From 5e35a4dbdca9b530b4e56ac6317a500198a86f81 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Mon, 13 Feb 2017 23:39:09 +0530 Subject: [PATCH 16/27] made AssertionError from Fail to Skip --- sklearn/utils/estimator_checks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 8be67c0719507..7eb717f108131 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1743,4 +1743,7 @@ def check_decision_proba_consistency(name, Estimator): X_test = (3*rnd.uniform(size=(5, 4))).astype(int) a = estimator.predict_proba(X_test) b = estimator.decision_function(X_test) - assert_array_equal(rankdata(a), rankdata(b)) + try: + assert_array_equal(rankdata(a), rankdata(b)) + except AssertionError as e: + raise SkipTest(e) From ff556ef240cd9e4c2b9d0e4375eb05a7eea36019 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Tue, 14 Feb 2017 16:44:52 +0530 Subject: [PATCH 17/27] made recommended changes --- sklearn/utils/estimator_checks.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 7eb717f108131..1e548a221ea09 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1730,7 +1730,7 @@ def check_decision_proba_consistency(name, Estimator): # predict_proba methods has outputs with perfect rank correlation. rnd = np.random.RandomState(0) - X_train = (3*rnd.uniform(size=(10, 4))).astype(int) + X_train = (2*rnd.uniform(size=(10, 4))).astype(int) y = X_train[:, 0] estimator = Estimator() @@ -1740,10 +1740,7 @@ def check_decision_proba_consistency(name, Estimator): hasattr(estimator, "predict_proba")): estimator.fit(X_train, y) - X_test = (3*rnd.uniform(size=(5, 4))).astype(int) - a = estimator.predict_proba(X_test) + X_test = (2*rnd.uniform(size=(5, 4))).astype(int) + a = estimator.predict_proba(X_test)[:, 1] b = estimator.decision_function(X_test) - try: - assert_array_equal(rankdata(a), rankdata(b)) - except AssertionError as e: - raise SkipTest(e) + assert_array_equal(rankdata(a), rankdata(b)) From 5c5a923cbe6f7eaeaff42d0731bccc76dd124316 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 15 Feb 2017 07:43:22 +0530 Subject: [PATCH 18/27] made recommended changes --- sklearn/utils/estimator_checks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 1e548a221ea09..8552c788dcc29 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1730,8 +1730,8 @@ def check_decision_proba_consistency(name, Estimator): # predict_proba methods has outputs with perfect rank correlation. rnd = np.random.RandomState(0) - X_train = (2*rnd.uniform(size=(10, 4))).astype(int) - y = X_train[:, 0] + X_train = np.random.randint(2, size=(10, 4)) + y = np.random.randint(2, size=10) estimator = Estimator() set_testing_parameters(estimator) @@ -1740,7 +1740,7 @@ def check_decision_proba_consistency(name, Estimator): hasattr(estimator, "predict_proba")): estimator.fit(X_train, y) - X_test = (2*rnd.uniform(size=(5, 4))).astype(int) + X_test = np.random.randint(2, size=(5, 4)) a = estimator.predict_proba(X_test)[:, 1] b = estimator.decision_function(X_test) assert_array_equal(rankdata(a), rankdata(b)) From 86a087b9f2bda324f29f5590d0478f4684ebee91 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 15 Feb 2017 12:07:29 +0530 Subject: [PATCH 19/27] made recommended changes --- sklearn/utils/estimator_checks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 8552c788dcc29..481e356563039 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1730,8 +1730,8 @@ def check_decision_proba_consistency(name, Estimator): # predict_proba methods has outputs with perfect rank correlation. rnd = np.random.RandomState(0) - X_train = np.random.randint(2, size=(10, 4)) - y = np.random.randint(2, size=10) + X_train = rnd.randint(2, size=(10, 4)) + y = rnd.randint(2, size=10) estimator = Estimator() set_testing_parameters(estimator) @@ -1740,7 +1740,7 @@ def check_decision_proba_consistency(name, Estimator): hasattr(estimator, "predict_proba")): estimator.fit(X_train, y) - X_test = np.random.randint(2, size=(5, 4)) + X_test = rnd.randint(2, size=(10, 4)) a = estimator.predict_proba(X_test)[:, 1] b = estimator.decision_function(X_test) assert_array_equal(rankdata(a), rankdata(b)) From 7d7488722e3d4aa76f280d473408504a30749daf Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Thu, 16 Feb 2017 19:22:23 +0530 Subject: [PATCH 20/27] made recommended changes --- sklearn/utils/estimator_checks.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 481e356563039..3cb511b1ac516 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1729,9 +1729,12 @@ def check_decision_proba_consistency(name, Estimator): # Check whether an estimator having both decision_function and # predict_proba methods has outputs with perfect rank correlation. - rnd = np.random.RandomState(0) - X_train = rnd.randint(2, size=(10, 4)) - y = rnd.randint(2, size=10) + centers = [(2, 2), (4, 4)] + X, y = make_blobs(n_samples=100, random_state=0, n_features=4, + centers=centers, cluster_std=1.0, shuffle=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, + random_state=0) + estimator = Estimator() set_testing_parameters(estimator) @@ -1739,8 +1742,7 @@ def check_decision_proba_consistency(name, Estimator): if (hasattr(estimator, "decision_function") and hasattr(estimator, "predict_proba")): - estimator.fit(X_train, y) - X_test = rnd.randint(2, size=(10, 4)) + estimator.fit(X_train, y_train) a = estimator.predict_proba(X_test)[:, 1] b = estimator.decision_function(X_test) assert_array_equal(rankdata(a), rankdata(b)) From 495f51437cf870721cb71d1272f2a7acbfd2aced Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Thu, 16 Feb 2017 21:11:49 +0530 Subject: [PATCH 21/27] rounded results --- sklearn/utils/estimator_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3cb511b1ac516..412d4e0bdea26 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1743,6 +1743,6 @@ def check_decision_proba_consistency(name, Estimator): hasattr(estimator, "predict_proba")): estimator.fit(X_train, y_train) - a = estimator.predict_proba(X_test)[:, 1] - b = estimator.decision_function(X_test) + a = np.around(estimator.predict_proba(X_test)[:, 1], decimals=7) + b = np.around(estimator.decision_function(X_test), decimals=7) assert_array_equal(rankdata(a), rankdata(b)) From f59350247541003d18d1e8f624e48fc27f13c7ad Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 22 Feb 2017 06:39:50 +0530 Subject: [PATCH 22/27] made recommended changes --- sklearn/utils/estimator_checks.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 412d4e0bdea26..23c9b649e5955 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -56,9 +56,8 @@ BOSTON = None CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'] -MULTI_OUTPUT = ['CCA', 'DecisionTreeClassifier', 'DecisionTreeRegressor', - 'ElasticNet', 'ExtraTreeClassifier', 'ExtraTreeRegressor', - 'ExtraTreesRegressor', 'GaussianProcess', +MULTI_OUTPUT = ['CCA', 'DecisionTreeRegressor', 'ElasticNet', + 'ExtraTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcess', 'GaussianProcessRegressor', 'KNeighborsRegressor', 'KernelRidge', 'Lars', 'Lasso', 'LassoLars', 'LinearRegression', 'MultiTaskElasticNet', @@ -114,12 +113,11 @@ def _yield_classifier_checks(name, Classifier): # basic consistency testing yield check_classifiers_train yield check_classifiers_regression_target - if name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]: + if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"] # TODO some complication with -1 label - if name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]: + and name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): # We don't raise a warning in these classifiers, as # the column y interface is used by the forests. - pass yield check_supervised_y_2d # test if NotFittedError is raised @@ -1732,9 +1730,7 @@ def check_decision_proba_consistency(name, Estimator): centers = [(2, 2), (4, 4)] X, y = make_blobs(n_samples=100, random_state=0, n_features=4, centers=centers, cluster_std=1.0, shuffle=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, - random_state=0) - + X_test = np.random.randn(20, 2)+4 estimator = Estimator() set_testing_parameters(estimator) @@ -1743,6 +1739,6 @@ def check_decision_proba_consistency(name, Estimator): hasattr(estimator, "predict_proba")): estimator.fit(X_train, y_train) - a = np.around(estimator.predict_proba(X_test)[:, 1], decimals=7) - b = np.around(estimator.decision_function(X_test), decimals=7) + a = estimator.predict_proba(X_test)[:, 1] + b = estimator.decision_function(X_test) assert_array_equal(rankdata(a), rankdata(b)) From f4bd859db008a1b59c58ff10920d698d79cbc9a8 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 22 Feb 2017 06:55:10 +0530 Subject: [PATCH 23/27] made recommended changes --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 23c9b649e5955..08ccce3e786c2 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1730,7 +1730,7 @@ def check_decision_proba_consistency(name, Estimator): centers = [(2, 2), (4, 4)] X, y = make_blobs(n_samples=100, random_state=0, n_features=4, centers=centers, cluster_std=1.0, shuffle=True) - X_test = np.random.randn(20, 2)+4 + X_test = np.random.randn(20, 2) + 4 estimator = Estimator() set_testing_parameters(estimator) From 10f440dc530266d8ae0fa43f4882ef8f6770ebe5 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 22 Feb 2017 07:00:16 +0530 Subject: [PATCH 24/27] minor changes --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 08ccce3e786c2..b1eacbd7ed494 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1738,7 +1738,7 @@ def check_decision_proba_consistency(name, Estimator): if (hasattr(estimator, "decision_function") and hasattr(estimator, "predict_proba")): - estimator.fit(X_train, y_train) + estimator.fit(X, y) a = estimator.predict_proba(X_test)[:, 1] b = estimator.decision_function(X_test) assert_array_equal(rankdata(a), rankdata(b)) From aec7ce24ce890fb7b2174a00847796d11243b11b Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 22 Feb 2017 07:21:07 +0530 Subject: [PATCH 25/27] added proper indent --- sklearn/utils/estimator_checks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index b1eacbd7ed494..5c790e4f65221 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -113,9 +113,10 @@ def _yield_classifier_checks(name, Classifier): # basic consistency testing yield check_classifiers_train yield check_classifiers_regression_target - if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"] + if (name not in + ["MultinomialNB", "LabelPropagation", "LabelSpreading"] and # TODO some complication with -1 label - and name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): + name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): # We don't raise a warning in these classifiers, as # the column y interface is used by the forests. From 15deefff60d640eb38cd53dad613a9dbcdfc1eb4 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 22 Feb 2017 15:09:53 +0530 Subject: [PATCH 26/27] added entry in whatsnew --- doc/whats_new.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 5c78c817031b6..681e4b851e942 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -246,6 +246,13 @@ API changes summary selection classes to be used with tools such as :func:`sklearn.model_selection.cross_val_predict`. :issue:`2879` by :user:`Stephen Hoover `. + + - Estimators with both methods ``decision_function`` and ``predict_proba`` + are now required to have a monotonic relation between them. The + method ``check_decision_proba_consistency`` has been added in + **sklearn.utils.estimator_checks** to check their consistency. + :issue:`7578` by :user:`Shubham Bhardwaj ` + .. _changes_0_18_1: From ca5793ce80bd5a151714ef7b6ebf9c30f30f6bc8 Mon Sep 17 00:00:00 2001 From: shubham0704 Date: Wed, 22 Feb 2017 19:06:10 +0530 Subject: [PATCH 27/27] Trigger