-
-
Notifications
You must be signed in to change notification settings - Fork 25.9k
[MRG+1] Added sample weight parameter to linearsvc.fit, includes tests and documentation. #6939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
94dc0a6
e3aeb37
9764127
98c14b9
ae27e3c
f4ac81b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -655,6 +655,36 @@ def test_linearsvc_crammer_singer(): | |
assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data)) | ||
|
||
|
||
def test_linearsvc_fit_sampleweight(): | ||
# check correct result when sample_weight is 1 | ||
n_samples = len(X) | ||
unit_weight = np.ones(n_samples) | ||
clf = svm.LinearSVC(random_state=0).fit(X, Y) | ||
clf_unitweight = svm.LinearSVC(random_state=0).\ | ||
fit(X, Y, sample_weight=unit_weight) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually meant just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But this is okay... |
||
|
||
# check if same as sample_weight=None | ||
assert_array_equal(clf_unitweight.predict(T), clf.predict(T)) | ||
assert_allclose(clf.coef_, clf_unitweight.coef_, 1, 0.0001) | ||
|
||
# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where | ||
# X = X1 repeated n1 times, X2 repeated n2 times and so forth | ||
|
||
random_state = check_random_state(0) | ||
random_weight = random_state.randint(0, 10, n_samples) | ||
lsvc_unflat = svm.LinearSVC(random_state=0).\ | ||
fit(X, Y, sample_weight=random_weight) | ||
pred1 = lsvc_unflat.predict(T) | ||
|
||
X_flat = np.repeat(X, random_weight, axis=0) | ||
y_flat = np.repeat(Y, random_weight, axis=0) | ||
lsvc_flat = svm.LinearSVC(random_state=0).fit(X_flat, y_flat) | ||
pred2 = lsvc_flat.predict(T) | ||
|
||
assert_array_equal(pred1, pred2) | ||
assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001) | ||
|
||
|
||
def test_crammer_singer_binary(): | ||
# Test Crammer-Singer formulation in the binary case | ||
X, y = make_classification(n_classes=2, random_state=0) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,7 +106,7 @@ def test_calibration(): | |
assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train) | ||
|
||
|
||
def test_sample_weight_warning(): | ||
def test_sample_weight(): | ||
n_samples = 100 | ||
X, y = make_classification(n_samples=2 * n_samples, n_features=6, | ||
random_state=42) | ||
|
@@ -119,12 +119,7 @@ def test_sample_weight_warning(): | |
for method in ['sigmoid', 'isotonic']: | ||
base_estimator = LinearSVC(random_state=42) | ||
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method) | ||
# LinearSVC does not currently support sample weights but they | ||
# can still be used for the calibration step (with a warning) | ||
msg = "LinearSVC does not support sample_weight." | ||
assert_warns_message( | ||
UserWarning, msg, | ||
calibrated_clf.fit, X_train, y_train, sample_weight=sw_train) | ||
calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we remove this test alltogether? It looks like it was added to test a warning that is not issued anymore. If it is still useful we should rename the test function to better reflect what it is testing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps rename it to test_sample_weight? |
||
probs_with_sw = calibrated_clf.predict_proba(X_test) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How strange that I don't see where this was formerly raised. |
||
# As the weights are used for the calibration, they should still yield | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one is correct :).