|
6 | 6 | from numpy.testing import assert_allclose
|
7 | 7 | from scipy import sparse
|
8 | 8 |
|
9 |
| -from sklearn.base import BaseEstimator |
| 9 | +from sklearn.base import BaseEstimator, clone |
10 | 10 | from sklearn.dummy import DummyClassifier
|
11 | 11 | from sklearn.model_selection import LeaveOneOut, train_test_split
|
12 | 12 |
|
@@ -784,3 +784,97 @@ def test_calibration_display_ref_line(pyplot, iris_data_binary):
|
784 | 784 |
|
785 | 785 | labels = viz2.ax_.get_legend_handles_labels()[1]
|
786 | 786 | assert labels.count("Perfectly calibrated") == 1
|
| 787 | + |
| 788 | + |
| 789 | +@pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| 790 | +@pytest.mark.parametrize("ensemble", [True, False]) |
| 791 | +def test_calibrated_classifier_cv_double_sample_weights_equivalence(method, ensemble): |
| 792 | + """Check that passing repeating twice the dataset `X` is equivalent to |
| 793 | + passing a `sample_weight` with a factor 2.""" |
| 794 | + X, y = load_iris(return_X_y=True) |
| 795 | + # Scale the data to avoid any convergence issue |
| 796 | + X = StandardScaler().fit_transform(X) |
| 797 | + # Only use 2 classes |
| 798 | + X, y = X[:100], y[:100] |
| 799 | + sample_weight = np.ones_like(y) * 2 |
| 800 | + |
| 801 | + # Interlace the data such that a 2-fold cross-validation will be equivalent |
| 802 | + # to using the original dataset with a sample weights of 2 |
| 803 | + X_twice = np.zeros((X.shape[0] * 2, X.shape[1]), dtype=X.dtype) |
| 804 | + X_twice[::2, :] = X |
| 805 | + X_twice[1::2, :] = X |
| 806 | + y_twice = np.zeros(y.shape[0] * 2, dtype=y.dtype) |
| 807 | + y_twice[::2] = y |
| 808 | + y_twice[1::2] = y |
| 809 | + |
| 810 | + base_estimator = LogisticRegression() |
| 811 | + calibrated_clf_without_weights = CalibratedClassifierCV( |
| 812 | + base_estimator, |
| 813 | + method=method, |
| 814 | + ensemble=ensemble, |
| 815 | + cv=2, |
| 816 | + ) |
| 817 | + calibrated_clf_with_weights = clone(calibrated_clf_without_weights) |
| 818 | + |
| 819 | + calibrated_clf_with_weights.fit(X, y, sample_weight=sample_weight) |
| 820 | + calibrated_clf_without_weights.fit(X_twice, y_twice) |
| 821 | + |
| 822 | + # Check that the underlying fitted estimators have the same coefficients |
| 823 | + for est_with_weights, est_without_weights in zip( |
| 824 | + calibrated_clf_with_weights.calibrated_classifiers_, |
| 825 | + calibrated_clf_without_weights.calibrated_classifiers_, |
| 826 | + ): |
| 827 | + assert_allclose( |
| 828 | + est_with_weights.base_estimator.coef_, |
| 829 | + est_without_weights.base_estimator.coef_, |
| 830 | + ) |
| 831 | + |
| 832 | + # Check that the predictions are the same |
| 833 | + y_pred_with_weights = calibrated_clf_with_weights.predict_proba(X) |
| 834 | + y_pred_without_weights = calibrated_clf_without_weights.predict_proba(X) |
| 835 | + |
| 836 | + assert_allclose(y_pred_with_weights, y_pred_without_weights) |
| 837 | + |
| 838 | + |
| 839 | +@pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| 840 | +@pytest.mark.parametrize("ensemble", [True, False]) |
| 841 | +def test_calibrated_classifier_cv_zeros_sample_weights_equivalence(method, ensemble): |
| 842 | + """Check that passing removing some sample from the dataset `X` is |
| 843 | + equivalent to passing a `sample_weight` with a factor 0.""" |
| 844 | + X, y = load_iris(return_X_y=True) |
| 845 | + # Scale the data to avoid any convergence issue |
| 846 | + X = StandardScaler().fit_transform(X) |
| 847 | + # Only use 2 classes and select samples such that 2-fold cross-validation |
| 848 | + # split will lead to an equivalence with a `sample_weight` of 0 |
| 849 | + X = np.vstack((X[:40], X[50:90])) |
| 850 | + y = np.hstack((y[:40], y[50:90])) |
| 851 | + sample_weight = np.zeros_like(y) |
| 852 | + sample_weight[::2] = 1 |
| 853 | + |
| 854 | + base_estimator = LogisticRegression() |
| 855 | + calibrated_clf_without_weights = CalibratedClassifierCV( |
| 856 | + base_estimator, |
| 857 | + method=method, |
| 858 | + ensemble=ensemble, |
| 859 | + cv=2, |
| 860 | + ) |
| 861 | + calibrated_clf_with_weights = clone(calibrated_clf_without_weights) |
| 862 | + |
| 863 | + calibrated_clf_with_weights.fit(X, y, sample_weight=sample_weight) |
| 864 | + calibrated_clf_without_weights.fit(X[::2], y[::2]) |
| 865 | + |
| 866 | + # Check that the underlying fitted estimators have the same coefficients |
| 867 | + for est_with_weights, est_without_weights in zip( |
| 868 | + calibrated_clf_with_weights.calibrated_classifiers_, |
| 869 | + calibrated_clf_without_weights.calibrated_classifiers_, |
| 870 | + ): |
| 871 | + assert_allclose( |
| 872 | + est_with_weights.base_estimator.coef_, |
| 873 | + est_without_weights.base_estimator.coef_, |
| 874 | + ) |
| 875 | + |
| 876 | + # Check that the predictions are the same |
| 877 | + y_pred_with_weights = calibrated_clf_with_weights.predict_proba(X) |
| 878 | + y_pred_without_weights = calibrated_clf_without_weights.predict_proba(X) |
| 879 | + |
| 880 | + assert_allclose(y_pred_with_weights, y_pred_without_weights) |
0 commit comments