diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py index 0dd969aa215ed..1a309693d9d1c 100644 --- a/sklearn/svm/classes.py +++ b/sklearn/svm/classes.py @@ -165,7 +165,7 @@ def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, self.penalty = penalty self.loss = loss - def fit(self, X, y): + def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. Parameters @@ -177,6 +177,11 @@ def fit(self, X, y): y : array-like, shape = [n_samples] Target vector relative to X + sample_weight : array-like, shape = [n_samples], optional + Array of weights that are assigned to individual + samples. If not provided, + then each sample is given unit weight. + Returns ------- self : object @@ -210,7 +215,7 @@ def fit(self, X, y): X, y, self.C, self.fit_intercept, self.intercept_scaling, self.class_weight, self.penalty, self.dual, self.verbose, self.max_iter, self.tol, self.random_state, self.multi_class, - self.loss) + self.loss, sample_weight=sample_weight) if self.multi_class == "crammer_singer" and len(self.classes_) == 2: self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1) @@ -341,6 +346,11 @@ def fit(self, X, y, sample_weight=None): y : array-like, shape = [n_samples] Target vector relative to X + sample_weight : array-like, shape = [n_samples], optional + Array of weights that are assigned to individual + samples. If not provided, + then each sample is given unit weight. + Returns ------- self : object diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 6c25c6d9da10e..73ae3728662f3 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -655,6 +655,36 @@ def test_linearsvc_crammer_singer(): assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data)) +def test_linearsvc_fit_sampleweight(): + # check correct result when sample_weight is 1 + n_samples = len(X) + unit_weight = np.ones(n_samples) + clf = svm.LinearSVC(random_state=0).fit(X, Y) + clf_unitweight = svm.LinearSVC(random_state=0).\ + fit(X, Y, sample_weight=unit_weight) + + # check if same as sample_weight=None + assert_array_equal(clf_unitweight.predict(T), clf.predict(T)) + assert_allclose(clf.coef_, clf_unitweight.coef_, 1, 0.0001) + + # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where + # X = X1 repeated n1 times, X2 repeated n2 times and so forth + + random_state = check_random_state(0) + random_weight = random_state.randint(0, 10, n_samples) + lsvc_unflat = svm.LinearSVC(random_state=0).\ + fit(X, Y, sample_weight=random_weight) + pred1 = lsvc_unflat.predict(T) + + X_flat = np.repeat(X, random_weight, axis=0) + y_flat = np.repeat(Y, random_weight, axis=0) + lsvc_flat = svm.LinearSVC(random_state=0).fit(X_flat, y_flat) + pred2 = lsvc_flat.predict(T) + + assert_array_equal(pred1, pred2) + assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001) + + def test_crammer_singer_binary(): # Test Crammer-Singer formulation in the binary case X, y = make_classification(n_classes=2, random_state=0) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 296d28a4ba94e..61cb51c67365d 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -106,7 +106,7 @@ def test_calibration(): assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train) -def test_sample_weight_warning(): +def test_sample_weight(): n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) @@ -119,12 +119,7 @@ def test_sample_weight_warning(): for method in ['sigmoid', 'isotonic']: base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV(base_estimator, method=method) - # LinearSVC does not currently support sample weights but they - # can still be used for the calibration step (with a warning) - msg = "LinearSVC does not support sample_weight." - assert_warns_message( - UserWarning, msg, - calibrated_clf.fit, X_train, y_train, sample_weight=sw_train) + calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) probs_with_sw = calibrated_clf.predict_proba(X_test) # As the weights are used for the calibration, they should still yield