-
-
Notifications
You must be signed in to change notification settings - Fork 25.8k
[MRG+1] Fix semi_supervised #9239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cb2bbed
cbde082
979d872
262f758
7e3a2ed
03311bf
9a68a41
c356a82
76310e5
ce15e24
e863810
356ccc2
1506b1b
5508a24
31b1be1
6d8cefc
6a8ba66
6ee6dd1
4d7433b
3783ef3
5cb7983
d5643d8
caf8655
ae23455
1f70682
2705d73
a56f985
37b0dee
8fd2ddd
77174e8
81c0964
8f21418
105bcf7
f8fa824
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,8 +3,12 @@ | |
import numpy as np | ||
|
||
from sklearn.utils.testing import assert_equal | ||
from sklearn.utils.testing import assert_warns | ||
from sklearn.utils.testing import assert_raises | ||
from sklearn.utils.testing import assert_no_warnings | ||
from sklearn.semi_supervised import label_propagation | ||
from sklearn.metrics.pairwise import rbf_kernel | ||
from sklearn.datasets import make_classification | ||
from numpy.testing import assert_array_almost_equal | ||
from numpy.testing import assert_array_equal | ||
|
||
|
@@ -59,3 +63,85 @@ def test_predict_proba(): | |
clf = estimator(**parameters).fit(samples, labels) | ||
assert_array_almost_equal(clf.predict_proba([[1., 1.]]), | ||
np.array([[0.5, 0.5]])) | ||
|
||
|
||
def test_alpha_deprecation(): | ||
X, y = make_classification(n_samples=100) | ||
y[::3] = -1 | ||
|
||
lp_default = label_propagation.LabelPropagation(kernel='rbf', gamma=0.1) | ||
lp_default_y = assert_no_warnings(lp_default.fit, X, y).transduction_ | ||
|
||
lp_0 = label_propagation.LabelPropagation(alpha=0, kernel='rbf', gamma=0.1) | ||
lp_0_y = assert_warns(DeprecationWarning, lp_0.fit, X, y).transduction_ | ||
|
||
assert_array_equal(lp_default_y, lp_0_y) | ||
|
||
|
||
def test_label_spreading_closed_form(): | ||
n_classes = 2 | ||
X, y = make_classification(n_classes=n_classes, n_samples=200, | ||
random_state=0) | ||
y[::3] = -1 | ||
clf = label_propagation.LabelSpreading().fit(X, y) | ||
# adopting notation from Zhou et al (2004): | ||
S = clf._build_graph() | ||
Y = np.zeros((len(y), n_classes + 1)) | ||
Y[np.arange(len(y)), y] = 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Surprisingly, no: In [18]: Y[:, y] = 1
In [19]: Y
Out[19]:
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
In [20]: Y = np.zeros((len(y), 3))
In [21]: Y[np.arange(len(y)), y] = 1
In [22]: Y
Out[22]:
array([[ 0., 0., 1.],
[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.],
[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.],
[ 1., 0., 0.],
[ 1., 0., 0.],
[ 0., 0., 1.]]) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed. I learn every day. |
||
Y = Y[:, :-1] | ||
for alpha in [0.1, 0.3, 0.5, 0.7, 0.9]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume this is fast enough, but we could probably remove some alphas if we wish. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather leave them there; these were giving me some hints as to how to test the numerical stability of the inversion operation in the closed form. |
||
expected = np.dot(np.linalg.inv(np.eye(len(S)) - alpha * S), Y) | ||
expected /= expected.sum(axis=1)[:, np.newaxis] | ||
clf = label_propagation.LabelSpreading(max_iter=10000, alpha=alpha) | ||
clf.fit(X, y) | ||
assert_array_almost_equal(expected, clf.label_distributions_, 4) | ||
|
||
|
||
def test_label_propagation_closed_form(): | ||
n_classes = 2 | ||
F987 | X, y = make_classification(n_classes=n_classes, n_samples=200, | |
random_state=0) | ||
y[::3] = -1 | ||
Y = np.zeros((len(y), n_classes + 1)) | ||
Y[np.arange(len(y)), y] = 1 | ||
unlabelled_idx = Y[:, (-1,)].nonzero()[0] | ||
labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0] | ||
|
||
clf = label_propagation.LabelPropagation(max_iter=10000, | ||
gamma=0.1).fit(X, y) | ||
# adopting notation from Zhu et al 2002 | ||
T_bar = clf._build_graph() | ||
Tuu = T_bar[np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij')] | ||
Tul = T_bar[np.meshgrid(unlabelled_idx, labelled_idx, indexing='ij')] | ||
Y = Y[:, :-1] | ||
Y_l = Y[labelled_idx, :] | ||
Y_u = np.dot(np.dot(np.linalg.inv(np.eye(Tuu.shape[0]) - Tuu), Tul), Y_l) | ||
|
||
expected = Y.copy() | ||
expected[unlabelled_idx, :] = Y_u | ||
expected /= expected.sum(axis=1)[:, np.newaxis] | ||
|
||
assert_array_almost_equal(expected, clf.label_distributions_, 4) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this test take from "Zhou et al (2004)" as well? If so please add a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really. Joel came up with this "sanity check" by calculating the same value in an iterative way (which is supposed to be numerically stable and should work for large datasets) and the closed form way (which involves inversion of a matrix). The test just compares the two set of values calculated. Nevertheless, the closed form formula is indeed from Zhou et al (2004) and it may help any future maintainer. I'll add it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. |
||
|
||
|
||
def test_valid_alpha(): | ||
n_classes = 2 | ||
X, y = make_classification(n_classes=n_classes, n_samples=200, | ||
random_state=0) | ||
for alpha in [-0.1, 0, 1, 1.1, None]: | ||
assert_raises(ValueError, | ||
lambda **kwargs: | ||
label_propagation.LabelSpreading(**kwargs).fit(X, y), | ||
alpha=alpha) | ||
|
||
|
||
def test_convergence_speed(): | ||
# This is a non-regression test for #5774 | ||
X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) | ||
y = np.array([0, 1, -1]) | ||
mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5000) | ||
mdl.fit(X, y) | ||
|
||
# this should converge quickly: | ||
assert mdl.n_iter_ < 10 | ||
assert_array_equal(mdl.predict(X), [0, 1, 1]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
2002, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LabelPropagation
's reference:LabelSpreading
's reference:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah ok sorry.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The names had me confused more than once as well. :)