|
51 | 51 | from sklearn.cluster import KMeans
|
52 | 52 |
|
53 | 53 | from sklearn.preprocessing import Imputer
|
| 54 | +from sklearn.preprocessing import LabelEncoder |
54 | 55 | from sklearn.pipeline import Pipeline
|
55 | 56 |
|
56 | 57 | from sklearn.externals.six.moves import cStringIO as StringIO
|
@@ -940,6 +941,79 @@ def test_cross_val_predict_with_method():
|
940 | 941 | cv=kfold)
|
941 | 942 | assert_array_almost_equal(expected_predictions, predictions)
|
942 | 943 |
|
| 944 | + # Test alternative representations of y |
| 945 | + predictions_y1 = cross_val_predict(est, X, y + 1, method=method, |
| 946 | + cv=kfold) |
| 947 | + assert_array_equal(predictions, predictions_y1) |
| 948 | + |
| 949 | + predictions_y2 = cross_val_predict(est, X, y - 2, method=method, |
| 950 | + cv=kfold) |
| 951 | + assert_array_equal(predictions, predictions_y2) |
| 952 | + |
| 953 | + predictions_ystr = cross_val_predict(est, X, y.astype('str'), |
| 954 | + method=method, cv=kfold) |
| 955 | + assert_array_equal(predictions, predictions_ystr) |
| 956 | + |
| 957 | + |
| 958 | +def get_expected_predictions(X, y, cv, classes, est, method): |
| 959 | + |
| 960 | + expected_predictions = np.zeros([len(y), classes]) |
| 961 | + func = getattr(est, method) |
| 962 | + |
| 963 | + for train, test in cv.split(X, y): |
| 964 | + est.fit(X[train], y[train]) |
| 965 | + expected_predictions_ = func(X[test]) |
| 966 | + # To avoid 2 dimensional indexing |
| 967 | + exp_pred_test = np.zeros((len(test), classes)) |
| 968 | + if method is 'decision_function' and len(est.classes_) == 2: |
| 969 | + exp_pred_test[:, est.classes_[-1]] = expected_predictions_ |
| 970 | + else: |
| 971 | + exp_pred_test[:, est.classes_] = expected_predictions_ |
| 972 | + expected_predictions[test] = exp_pred_test |
| 973 | + |
| 974 | + return expected_predictions |
| 975 | + |
| 976 | + |
| 977 | +def test_cross_val_predict_class_subset(): |
| 978 | + |
| 979 | + X = np.arange(8).reshape(4, 2) |
| 980 | + y = np.array([0, 0, 1, 2]) |
| 981 | + classes = 3 |
| 982 | + |
| 983 | + kfold3 = KFold(n_splits=3) |
| 984 | + kfold4 = KFold(n_splits=4) |
| 985 | + |
| 986 | + le = LabelEncoder() |
| 987 | + |
| 988 | + methods = ['decision_function', 'predict_proba', 'predict_log_proba'] |
| 989 | + for method in methods: |
| 990 | + est = LogisticRegression() |
| 991 | + |
| 992 | + # Test with n_splits=3 |
| 993 | + predictions = cross_val_predict(est, X, y, method=method, |
| 994 | + cv=kfold3) |
| 995 | + |
| 996 | + # Runs a naive loop (should be same as cross_val_predict): |
| 997 | + expected_predictions = get_expected_predictions(X, y, kfold3, classes, |
| 998 | + est, method) |
| 999 | + assert_array_almost_equal(expected_predictions, predictions) |
| 1000 | + |
| 1001 | + # Test with n_splits=4 |
| 1002 | + predictions = cross_val_predict(est, X, y, method=method, |
| 1003 | + cv=kfold4) |
| 1004 | + expected_predictions = get_expected_predictions(X, y, kfold4, classes, |
| 1005 | + est, method) |
| 1006 | + assert_array_almost_equal(expected_predictions, predictions) |
| 1007 | + |
| 1008 | + # Testing unordered labels |
| 1009 | + y = [1, 1, -4, 6] |
| 1010 | + predictions = cross_val_predict(est, X, y, method=method, |
| 1011 | + cv=kfold3) |
| 1012 | + y = le.fit_transform(y) |
| 1013 | + expected_predictions = get_expected_predictions(X, y, kfold3, classes, |
| 1014 | + est, method) |
| 1015 | + assert_array_almost_equal(expected_predictions, predictions) |
| 1016 | + |
943 | 1017 |
|
944 | 1018 | def test_score_memmap():
|
945 | 1019 | # Ensure a scalar score of memmap type is accepted
|
|
0 commit comments