|
1 | 1 | """
|
2 |
| -================================================================= |
3 |
| -Test with permutations the significance of a classification score |
4 |
| -================================================================= |
| 2 | +Moved example |
| 3 | +============== |
5 | 4 |
|
6 |
| -This example demonstrates the use of |
7 |
| -:func:`~sklearn.model_selection.permutation_test_score` to evaluate the |
8 |
| -significance of a cross-validated score using permutations. |
9 |
| -""" |
10 |
| - |
11 |
| -# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr> |
12 |
| -# Lucy Liu |
13 |
| -# License: BSD 3 clause |
14 |
| -# |
15 |
| -# Dataset |
16 |
| -# ------- |
17 |
| -# |
18 |
| -# We will use the :ref:`iris_dataset`, which consists of measurements taken |
19 |
| -# from 3 types of irises. |
20 |
| - |
21 |
| -from sklearn.datasets import load_iris |
22 |
| - |
23 |
| -iris = load_iris() |
24 |
| -X = iris.data |
25 |
| -y = iris.target |
26 |
| - |
27 |
| -# %% |
28 |
| -# We will also generate some random feature data (i.e., 2200 features), |
29 |
| -# uncorrelated with the class labels in the iris dataset. |
30 |
| - |
31 |
| -import numpy as np |
32 |
| - |
33 |
| -n_uncorrelated_features = 2200 |
34 |
| -rng = np.random.RandomState(seed=0) |
35 |
| -# Use same number of samples as in iris and 2200 features |
36 |
| -X_rand = rng.normal(size=(X.shape[0], n_uncorrelated_features)) |
37 |
| - |
38 |
| -# %% |
39 |
| -# Permutation test score |
40 |
| -# ---------------------- |
41 |
| -# |
42 |
| -# Next, we calculate the |
43 |
| -# :func:`~sklearn.model_selection.permutation_test_score` using the original |
44 |
| -# iris dataset, which strongly predict the labels and |
45 |
| -# the randomly generated features and iris labels, which should have |
46 |
| -# no dependency between features and labels. We use the |
47 |
| -# :class:`~sklearn.svm.SVC` classifier and :ref:`accuracy_score` to evaluate |
48 |
| -# the model at each round. |
49 |
| -# |
50 |
| -# :func:`~sklearn.model_selection.permutation_test_score` generates a null |
51 |
| -# distribution by calculating the accuracy of the classifier |
52 |
| -# on 1000 different permutations of the dataset, where features |
53 |
| -# remain the same but labels undergo different permutations. This is the |
54 |
| -# distribution for the null hypothesis which states there is no dependency |
55 |
| -# between the features and labels. An empirical p-value is then calculated as |
56 |
| -# the percentage of permutations for which the score obtained is greater |
57 |
| -# that the score obtained using the original data. |
58 |
| - |
59 |
| -from sklearn.svm import SVC |
60 |
| -from sklearn.model_selection import StratifiedKFold |
61 |
| -from sklearn.model_selection import permutation_test_score |
62 |
| - |
63 |
| -clf = SVC(kernel='linear', random_state=7) |
64 |
| -cv = StratifiedKFold(2, shuffle=True, random_state=0) |
65 |
| - |
66 |
| -score_iris, perm_scores_iris, pvalue_iris = permutation_test_score( |
67 |
| - clf, X, y, scoring="accuracy", cv=cv, n_permutations=1000) |
68 |
| - |
69 |
| -score_rand, perm_scores_rand, pvalue_rand = permutation_test_score( |
70 |
| - clf, X_rand, y, scoring="accuracy", cv=cv, n_permutations=1000) |
71 |
| - |
72 |
| -# %% |
73 |
| -# Original data |
74 |
| -# ^^^^^^^^^^^^^ |
75 |
| -# |
76 |
| -# Below we plot a histogram of the permutation scores (the null |
77 |
| -# distribution). The red line indicates the score obtained by the classifier |
78 |
| -# on the original data. The score is much better than those obtained by |
79 |
| -# using permuted data and the p-value is thus very low. This indicates that |
80 |
| -# there is a low likelihood that this good score would be obtained by chance |
81 |
| -# alone. It provides evidence that the iris dataset contains real dependency |
82 |
| -# between features and labels and the classifier was able to utilize this |
83 |
| -# to obtain good results. |
84 |
| - |
85 |
| -import matplotlib.pyplot as plt |
86 |
| - |
87 |
| -fig, ax = plt.subplots() |
88 |
| - |
89 |
| -ax.hist(perm_scores_iris, bins=20, density=True) |
90 |
| -ax.axvline(score_iris, ls='--', color='r') |
91 |
| -score_label = (f"Score on original\ndata: {score_iris:.2f}\n" |
92 |
| - f"(p-value: {pvalue_iris:.3f})") |
93 |
| -ax.text(0.7, 260, score_label, fontsize=12) |
94 |
| -ax.set_xlabel("Accuracy score") |
95 |
| -_ = ax.set_ylabel("Probability") |
96 |
| - |
97 |
| -# %% |
98 |
| -# Random data |
99 |
| -# ^^^^^^^^^^^ |
100 |
| -# |
101 |
| -# Below we plot the null distribution for the randomized data. The permutation |
102 |
| -# scores are similar to those obtained using the original iris dataset |
103 |
| -# because the permutation always destroys any feature label dependency present. |
104 |
| -# The score obtained on the original randomized data in this case though, is |
105 |
| -# very poor. This results in a large p-value, confirming that there was no |
106 |
| -# feature label dependency in the original data. |
| 5 | +This example was moved to |
| 6 | +:ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` |
107 | 7 |
|
108 |
| -fig, ax = plt.subplots() |
| 8 | +You will be redirected to the new example in 5 seconds. |
109 | 9 |
|
110 |
| -ax.hist(perm_scores_rand, bins=20, density=True) |
111 |
| -ax.set_xlim(0.13) |
112 |
| -ax.axvline(score_rand, ls='--', color='r') |
113 |
| -score_label = (f"Score on original\ndata: {score_rand:.2f}\n" |
114 |
| - f"(p-value: {pvalue_rand:.3f})") |
115 |
| -ax.text(0.14, 125, score_label, fontsize=12) |
116 |
| -ax.set_xlabel("Accuracy score") |
117 |
| -ax.set_ylabel("Probability") |
118 |
| -plt.show() |
| 10 | +.. meta:: |
| 11 | + :http-equiv=refresh: 5; ../model_selection/plot_permutation_tests_for_classification.html |
119 | 12 |
|
120 |
| -# %% |
121 |
| -# Another possible reason for obtaining a high p-value is that the classifier |
122 |
| -# was not able to use the structure in the data. In this case, the p-value |
123 |
| -# would only be low for classifiers that are able to utilize the dependency |
124 |
| -# present. In our case above, where the data is random, all classifiers would |
125 |
| -# have a high p-value as there is no structure present in the data. |
126 |
| -# |
127 |
| -# Finally, note that this test has been shown to produce low p-values even |
128 |
| -# if there is only weak structure in the data [1]_. |
129 |
| -# |
130 |
| -# .. topic:: References: |
131 |
| -# |
132 |
| -# .. [1] Ojala and Garriga. `Permutation Tests for Studying Classifier |
133 |
| -# Performance |
134 |
| -# <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_. The |
135 |
| -# Journal of Machine Learning Research (2010) vol. 11 |
136 |
| -# |
| 13 | +""" # noqa |
0 commit comments