From 0a256ddda58442d7df0eaef96f6c510be467af99 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 28 May 2020 16:15:28 +0200 Subject: [PATCH 1/2] amend doc string --- sklearn/model_selection/_validation.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index dd204ad4a57d0..ff42d21fd810d 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -965,9 +965,16 @@ def _check_is_permutation(indices, n_samples): def permutation_test_score(estimator, X, y, *, groups=None, cv=None, n_permutations=100, n_jobs=None, random_state=0, verbose=0, scoring=None): - """Evaluate the significance of a cross-validated score with permutations + """Evaluates the significance of a cross-validated score using permutations - Read more in the :ref:`User Guide `. + Permutes targest to generate 'randomized data' and compute the empirical + p-value against the null hypothesis that features and targets are + independent. + + The p-value represents the fraction of randomized data sets where the + estimator performed as well or better than in the original data. A small + p-value suggests that there is a real dependency between features and + targets which has been used by the estimator to give good predictions. Parameters ---------- @@ -1054,10 +1061,10 @@ def permutation_test_score(estimator, X, y, *, groups=None, cv=None, ----- This function implements Test 1 in: - Ojala and Garriga. Permutation Tests for Studying Classifier - Performance. The Journal of Machine Learning Research (2010) - vol. 11 - `[pdf] `_. + Ojala and Garriga. `Permutation Tests for Studying Classifier + Performance + `_. The + Journal of Machine Learning Research (2010) vol. 11 """ X, y, groups = indexable(X, y, groups) From cde33d14f53dc84e15a8d25ee890601136c9be2e Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 28 May 2020 16:25:24 +0200 Subject: [PATCH 2/2] explain high p val --- sklearn/model_selection/_validation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index ff42d21fd810d..f7735a883ab79 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -967,7 +967,7 @@ def permutation_test_score(estimator, X, y, *, groups=None, cv=None, verbose=0, scoring=None): """Evaluates the significance of a cross-validated score using permutations - Permutes targest to generate 'randomized data' and compute the empirical + Permutes targets to generate 'randomized data' and compute the empirical p-value against the null hypothesis that features and targets are independent. @@ -975,6 +975,9 @@ def permutation_test_score(estimator, X, y, *, groups=None, cv=None, estimator performed as well or better than in the original data. A small p-value suggests that there is a real dependency between features and targets which has been used by the estimator to give good predictions. + A large p-value may be due to lack of real dependency between features + and targets or the estimator was not able to use the dependency to + give good predictions. Parameters ----------