From b21724ec9b4b523f8303d171c482ae41a29e663a Mon Sep 17 00:00:00 2001 From: Lee Reeves Date: Fri, 10 Mar 2017 02:22:32 -0700 Subject: [PATCH 1/2] DOC improved explanation of p-value from permutation_test_score (#8379 and #8564). --- sklearn/cross_validation.py | 13 ++++++++----- sklearn/model_selection/_validation.py | 13 ++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 03c74b88f5f28..12a800eebc340 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1905,11 +1905,14 @@ def permutation_test_score(estimator, X, y, cv=None, The scores obtained for each permutations. pvalue : float - The returned value equals p-value if `scoring` returns bigger - numbers for better scores (e.g., accuracy_score). If `scoring` is - rather a loss function (i.e. when lower is better such as with - `mean_squared_error`) then this is actually the complement of the - p-value: 1 - p-value. + The p-value, which approximates the probability the score would be + obtained by chance. This is calculated as: + + `(C + 1) / (n_permutations + 1)` + + Where C is the number of permutations whose score >= the true score. + + The best possible p-value is 1/(n_permutations + 1), the worst is 1.0. Notes ----- diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index ab18d9035b4d2..c2e41b9f8c125 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -596,11 +596,14 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, The scores obtained for each permutations. pvalue : float - The returned value equals p-value if `scoring` returns bigger - numbers for better scores (e.g., accuracy_score). If `scoring` is - rather a loss function (i.e. when lower is better such as with - `mean_squared_error`) then this is actually the complement of the - p-value: 1 - p-value. + The p-value, which approximates the probability the score would be + obtained by chance. This is calculated as: + + `(C + 1) / (n_permutations + 1)` + + Where C is the number of permutations whose score >= the true score. + + The best possible p-value is 1/(n_permutations + 1), the worst is 1.0. Notes ----- From a097a15bfa7cc77b13bc94b59d31ec900807a2c3 Mon Sep 17 00:00:00 2001 From: Lee Reeves Date: Sun, 12 Mar 2017 15:00:41 -0700 Subject: [PATCH 2/2] DOC add missing word 'that' in docstring --- sklearn/cross_validation.py | 4 ++-- sklearn/model_selection/_validation.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 12a800eebc340..ff327a25e4924 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1905,8 +1905,8 @@ def permutation_test_score(estimator, X, y, cv=None, The scores obtained for each permutations. pvalue : float - The p-value, which approximates the probability the score would be - obtained by chance. This is calculated as: + The p-value, which approximates the probability that the score would + be obtained by chance. This is calculated as: `(C + 1) / (n_permutations + 1)` diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index c2e41b9f8c125..e65720b709555 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -596,8 +596,8 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, The scores obtained for each permutations. pvalue : float - The p-value, which approximates the probability the score would be - obtained by chance. This is calculated as: + The p-value, which approximates the probability that the score would + be obtained by chance. This is calculated as: `(C + 1) / (n_permutations + 1)`