From 94dc0a68b90bac47e783e4257109d2e0ce19f289 Mon Sep 17 00:00:00 2001
From: imaculate <imaculatemosha@yahoo.com>
Date: Fri, 24 Jun 2016 19:28:37 +0200
Subject: [PATCH 1/6] Fresh branch for linearsvr_fit_sample_weight with weights
 and documentation

---
 sklearn/svm/classes.py        | 14 ++++++++++--
 sklearn/svm/tests/test_svm.py | 40 +++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 0dd969aa215ed..394ff782404fa 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -165,7 +165,7 @@ def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4,
         self.penalty = penalty
         self.loss = loss
 
-    def fit(self, X, y):
+    def fit(self, X, y, sample_weight=None):
         """Fit the model according to the given training data.
 
         Parameters
@@ -177,6 +177,11 @@ def fit(self, X, y):
         y : array-like, shape = [n_samples]
             Target vector relative to X
 
+        sample_weight : array-like, shape = [n_samples], optional
+                        Array of weights that are assigned to individual
+                        samples. If not provided,
+                        then each sample is given unit weight.
+
         Returns
         -------
         self : object
@@ -210,7 +215,7 @@ def fit(self, X, y):
             X, y, self.C, self.fit_intercept, self.intercept_scaling,
             self.class_weight, self.penalty, self.dual, self.verbose,
             self.max_iter, self.tol, self.random_state, self.multi_class,
-            self.loss)
+            self.loss, sample_weight=sample_weight)
 
         if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
             self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
@@ -341,6 +346,11 @@ def fit(self, X, y, sample_weight=None):
         y : array-like, shape = [n_samples]
             Target vector relative to X
 
+        sample_weight : array-like, shape = [n_samples], optional
+                        Array of weights that are assigned to individual
+                        samples. If not provided,
+                        then each sample is given unit weight.
+
         Returns
         -------
         self : object
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 6c25c6d9da10e..afe0a71a34875 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -655,6 +655,46 @@ def test_linearsvc_crammer_singer():
     assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data))
 
 
+def test_linearsvc_fit_sampleweight():
+    # check correct result when sample_weight is 1
+    # check that SVR(kernel='linear') and LinearSVC() give
+    # comparable results
+
+    # Test basic routines using LinearSVC
+    n_samples = len(X)
+    unit_weight = np.ones(n_samples)
+    clf = svm.LinearSVC(random_state=0).fit(X, Y)
+    clf_unitweight = svm.LinearSVC(random_state=0).fit(X, Y,
+                                                  sample_weight=unit_weight)
+
+    # sanity check, by default should have intercept
+    assert_true(clf_unitweight.fit_intercept)
+    assert_array_almost_equal(clf_unitweight.intercept_, [0], decimal=3)
+
+    # check if same as sample_weight=None
+    assert_array_equal(clf_unitweight.predict(T), clf.predict(T))
+    assert_allclose(np.linalg.norm(clf.coef_),
+                    np.linalg.norm(clf_unitweight.coef_), 1, 0.0001)
+
+    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
+    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
+
+    random_state = check_random_state(0)
+    random_weight = random_state.randint(0, 10, n_samples)
+    lsvc_unflat = svm.LinearSVC(random_state=0).fit(X, Y,
+                                                    sample_weight=random_weight)
+    pred1 = lsvc_unflat.predict(T)
+
+    X_flat = np.repeat(X, random_weight, axis=0)
+    y_flat = np.repeat(Y, random_weight, axis=0)
+    lsvc_flat = svm.LinearSVC(random_state=0).fit(X_flat, y_flat)
+    pred2 = lsvc_flat.predict(T)
+
+    assert_array_equal(pred1, pred2)
+    assert_allclose(np.linalg.norm(lsvc_unflat.coef_),
+                    np.linalg.norm(lsvc_flat.coef_), 1, 0.0001)
+
+
 def test_crammer_singer_binary():
     # Test Crammer-Singer formulation in the binary case
     X, y = make_classification(n_classes=2, random_state=0)

From e3aeb37d2f7bc6d0e822e21a79835864d2da754a Mon Sep 17 00:00:00 2001
From: imaculate <imaculatemosha@yahoo.com>
Date: Tue, 28 Jun 2016 13:06:06 +0200
Subject: [PATCH 2/6] Fixed pep8 violations, changed CI tests to allow
 linearsvr with sample_weight

---
 sklearn/svm/tests/test_svm.py     | 16 ++++------------
 sklearn/tests/test_calibration.py |  6 ------
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index afe0a71a34875..5612aaa994bbc 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -657,19 +657,11 @@ def test_linearsvc_crammer_singer():
 
 def test_linearsvc_fit_sampleweight():
     # check correct result when sample_weight is 1
-    # check that SVR(kernel='linear') and LinearSVC() give
-    # comparable results
-
-    # Test basic routines using LinearSVC
     n_samples = len(X)
     unit_weight = np.ones(n_samples)
     clf = svm.LinearSVC(random_state=0).fit(X, Y)
-    clf_unitweight = svm.LinearSVC(random_state=0).fit(X, Y,
-                                                  sample_weight=unit_weight)
-
-    # sanity check, by default should have intercept
-    assert_true(clf_unitweight.fit_intercept)
-    assert_array_almost_equal(clf_unitweight.intercept_, [0], decimal=3)
+    clf_unitweight = svm.LinearSVC(random_state=0).\
+        fit(X, Y, sample_weight=unit_weight)
 
     # check if same as sample_weight=None
     assert_array_equal(clf_unitweight.predict(T), clf.predict(T))
@@ -681,8 +673,8 @@ def test_linearsvc_fit_sampleweight():
 
     random_state = check_random_state(0)
     random_weight = random_state.randint(0, 10, n_samples)
-    lsvc_unflat = svm.LinearSVC(random_state=0).fit(X, Y,
-                                                    sample_weight=random_weight)
+    lsvc_unflat = svm.LinearSVC(random_state=0).\
+        fit(X, Y, sample_weight=random_weight)
     pred1 = lsvc_unflat.predict(T)
 
     X_flat = np.repeat(X, random_weight, axis=0)
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 296d28a4ba94e..a2bb47984c8b4 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -119,12 +119,6 @@ def test_sample_weight_warning():
     for method in ['sigmoid', 'isotonic']:
         base_estimator = LinearSVC(random_state=42)
         calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
-        # LinearSVC does not currently support sample weights but they
-        # can still be used for the calibration step (with a warning)
-        msg = "LinearSVC does not support sample_weight."
-        assert_warns_message(
-            UserWarning, msg,
-            calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
         probs_with_sw = calibrated_clf.predict_proba(X_test)
 
         # As the weights are used for the calibration, they should still yield

From 976412730b545158df7f4f0560ca271b8cbee1b8 Mon Sep 17 00:00:00 2001
From: imaculate <imaculatemosha@yahoo.com>
Date: Tue, 28 Jun 2016 13:23:19 +0200
Subject: [PATCH 3/6] Changed test_calibration

---
 sklearn/tests/test_calibration.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index a2bb47984c8b4..763faf4df37f6 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -119,6 +119,7 @@ def test_sample_weight_warning():
     for method in ['sigmoid', 'isotonic']:
         base_estimator = LinearSVC(random_state=42)
         calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
+        calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
         probs_with_sw = calibrated_clf.predict_proba(X_test)
 
         # As the weights are used for the calibration, they should still yield

From 98c14b9edbf8779be5bd6ccc0fe7593bb9f0fb41 Mon Sep 17 00:00:00 2001
From: imaculate <imaculatemosha@yahoo.com>
Date: Tue, 28 Jun 2016 17:37:39 +0200
Subject: [PATCH 4/6] Corrected indentation for docstrings

---
 sklearn/svm/classes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 394ff782404fa..f1a7923979b98 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -347,9 +347,9 @@ def fit(self, X, y, sample_weight=None):
             Target vector relative to X
 
         sample_weight : array-like, shape = [n_samples], optional
-                        Array of weights that are assigned to individual
-                        samples. If not provided,
-                        then each sample is given unit weight.
+            Array of weights that are assigned to individual
+            samples. If not provided,
+            then each sample is given unit weight.
 
         Returns
         -------

From ae27e3cc5d507e650b3467b643253447c825800e Mon Sep 17 00:00:00 2001
From: imaculate <imaculatemosha@yahoo.com>
Date: Fri, 1 Jul 2016 00:06:16 +0200
Subject: [PATCH 5/6] Fixed docstring, remove normalization of coefficients in
 tests

---
 sklearn/svm/classes.py        | 6 +++---
 sklearn/svm/tests/test_svm.py | 6 ++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index f1a7923979b98..1a309693d9d1c 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -178,9 +178,9 @@ def fit(self, X, y, sample_weight=None):
             Target vector relative to X
 
         sample_weight : array-like, shape = [n_samples], optional
-                        Array of weights that are assigned to individual
-                        samples. If not provided,
-                        then each sample is given unit weight.
+            Array of weights that are assigned to individual
+            samples. If not provided,
+            then each sample is given unit weight.
 
         Returns
         -------
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 5612aaa994bbc..73ae3728662f3 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -665,8 +665,7 @@ def test_linearsvc_fit_sampleweight():
 
     # check if same as sample_weight=None
     assert_array_equal(clf_unitweight.predict(T), clf.predict(T))
-    assert_allclose(np.linalg.norm(clf.coef_),
-                    np.linalg.norm(clf_unitweight.coef_), 1, 0.0001)
+    assert_allclose(clf.coef_, clf_unitweight.coef_, 1, 0.0001)
 
     # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
     # X = X1 repeated n1 times, X2 repeated n2 times and so forth
@@ -683,8 +682,7 @@ def test_linearsvc_fit_sampleweight():
     pred2 = lsvc_flat.predict(T)
 
     assert_array_equal(pred1, pred2)
-    assert_allclose(np.linalg.norm(lsvc_unflat.coef_),
-                    np.linalg.norm(lsvc_flat.coef_), 1, 0.0001)
+    assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001)
 
 
 def test_crammer_singer_binary():

From f4ac81bebefe580d5bf316ca7da6d1061e35e3ef Mon Sep 17 00:00:00 2001
From: imaculate <imaculatemosha@yahoo.com>
Date: Mon, 11 Jul 2016 13:35:19 +0200
Subject: [PATCH 6/6] Renamed function (test_sample_weight_calibration)

---
 sklearn/tests/test_calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 763faf4df37f6..61cb51c67365d 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -106,7 +106,7 @@ def test_calibration():
         assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
 
 
-def test_sample_weight_warning():
+def test_sample_weight():
     n_samples = 100
     X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                                random_state=42)