8000 [MRG+1] Added sample weight parameter to linearsvc.fit, includes tests and documentation. by imaculate · Pull Request #6939 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG+1] Added sample weight parameter to linearsvc.fit, includes tests and documentation. #6939

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions sklearn/svm/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4,
self.penalty = penalty
self.loss = loss

def fit(self, X, y):
def fit(self, X, y, sample_weight=None):
"""Fit the model according to the given training data.

Parameters
Expand All @@ -177,6 +177,11 @@ def fit(self, X, y):
y : array-like, shape = [n_samples]
Target vector relative to X

sample_weight : array-like, shape = [n_samples], optional
Array of weights that are assigned to individual
samples. If not provided,
then each sample is given unit weight.

Returns
-------
self : object
Expand Down Expand Up @@ -210,7 +215,7 @@ def fit(self, X, y):
X, y, self.C, self.fit_intercept, self.intercept_scaling,
self.class_weight, self.penalty, self.dual, self.verbose,
self.max_iter, self.tol, self.random_state, self.multi_class,
self.loss)
self.loss, sample_weight=sample_weight)

if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
Expand Down Expand Up @@ -341,6 +346,11 @@ def fit(self, X, y, sample_weight=None):
y : array-like, shape = [n_samples]
Target vector relative to X

sample_weight : array-like, shape = [n_samples], optional
Array of weights that are assigned to individual
samples. If not provided,
then each sample is given unit weight.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is correct :).


Returns
-------
self : object
Expand Down
30 changes: 30 additions & 0 deletions sklearn/svm/tests/test_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,36 @@ def test_linearsvc_crammer_singer():
assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data))


def test_linearsvc_fit_sampleweight():
# check correct result when sample_weight is 1
n_samples = len(X)
unit_weight = np.ones(n_samples)
clf = svm.LinearSVC(random_state=0).fit(X, Y)
clf_unitweight = svm.LinearSVC(random_state=0).\
fit(X, Y, sample_weight=unit_weight)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually meant just clf_unitweight.fit(...)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this is okay...


# check if same as sample_weight=None
assert_array_equal(clf_unitweight.predict(T), clf.predict(T))
assert_allclose(clf.coef_, clf_unitweight.coef_, 1, 0.0001)

# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
# X = X1 repeated n1 times, X2 repeated n2 times and so forth

random_state = check_random_state(0)
random_weight = random_state.randint(0, 10, n_samples)
lsvc_unflat = svm.LinearSVC(random_state=0).\
fit(X, Y, sample_weight=random_weight)
pred1 = lsvc_unflat.predict(T)

X_flat = np.repeat(X, random_weight, axis=0)
y_flat = np.repeat(Y, random_weight, axis=0)
lsvc_flat = svm.LinearSVC(random_state=0).fit(X_flat, y_flat)
pred2 = lsvc_flat.predict(T)

assert_array_equal(pred1, pred2)
assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001)


def test_crammer_singer_binary():
# Test Crammer-Singer formulation in the binary case
X, y = make_classification(n_classes=2, random_state=0)
Expand Down
9 changes: 2 additions & 7 deletions sklearn/tests/test_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_calibration():
assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)


def test_sample_weight_warning():
def test_sample_weight():
n_samples = 100
X, y = make_classification(n_samples=2 * n_samples, n_features=6,
random_state=42)
Expand All @@ -119,12 +119,7 @@ def test_sample_weight_warning():
for method in ['sigmoid', 'isotonic']:
base_estimator = LinearSVC(random_state=42)
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
# LinearSVC does not currently support sample weights but they
# can still be used for the calibration step (with a warning)
msg = "LinearSVC does not support sample_weight."
assert_warns_message(
UserWarning, msg,
calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we remove this test alltogether? It looks like it was added to test a warning that is not issued anymore. If it is still useful we should rename the test function to better reflect what it is testing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps rename it to test_sample_weight?

probs_with_sw = calibrated_clf.predict_proba(X_test)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How strange that I don't see where this was formerly raised.

# As the weights are used for the calibration, they should still yield
Expand Down
0