8000 add class_weight to PA cls, remove from PA reg · scikit-learn/scikit-learn@718825d · GitHub
[go: up one dir, main page]

Skip to content

Commit 718825d

Browse files
add class_weight to PA cls, remove from PA reg
rebase on top of #4347 improve error message
1 parent ec2fd72 commit 718825d

File tree

3 files changed

+102
-6
lines changed

3 files changed

+102
-6
lines changed

doc/whats_new.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ Enhancements
6161
option, which has a simpler forumlar and interpretation.
6262
By Hanna Wallach and `Andreas Müller`_.
6363

64+
- Add ``class_weight`` parameter to automatically weight samples by class
65+
frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
66+
`Trevor Stephens`_.
67+
6468
- Added backlinks from the API reference pages to the user guide. By
6569
`Andreas Müller`_.
6670

@@ -572,7 +576,7 @@ API changes summary
572576

573577
- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
574578
:class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
575-
:class:`linear_model.PassiveAgressiveClassivier` and
579+
:class:`linear_model.PassiveAgressiveClassifier` and
576580
:class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
577581

578582
- :class:`cluster.DBSCAN` now uses a deterministic initialization. The

sklearn/linear_model/passive_aggressive.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,16 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
4949
When set to True, reuse the solution of the previous call to fit as
5050
initialization, otherwise, just erase the previous solution.
5151
52+
class_weight : dict, {class_label: weight} or "balanced" or None, optional
53+
Preset for the class_weight fit parameter.
54+
55+
Weights associated with classes. If not given, all classes
56+
are supposed to have weight one.
57+
58+
The "balanced" mode uses the values of y to automatically adjust
59+
weights inversely proportional to class frequencies in the input data
60+
as ``n_samples / (n_classes * np.bincount(y))``
61+
5262
Attributes
5363
----------
5464
coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
@@ -71,9 +81,9 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
7181
K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
7282
7383
"""
74-
def __init__(self, C=1.0, fit_intercept=True,
75-
n_iter=5, shuffle=True, verbose=0, loss="hinge",
76-
n_jobs=1, random_state=None, warm_start=False):
84+
def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
85+
verbose=0, loss="hinge", n_jobs=1, random_state=None,
86+
warm_start=False, class_weight=None):
7787
BaseSGDClassifier.__init__(self,
7888
penalty=None,
7989
fit_intercept=fit_intercept,
@@ -83,6 +93,7 @@ def __init__(self, C=1.0, fit_intercept=True,
8393
random_state=random_state,
8494
eta0=1.0,
8595
warm_start=warm_start,
96+
class_weight=class_weight,
8697
n_jobs=n_jobs)
8798
self.C = C
8899
self.loss = loss
@@ -110,6 +121,16 @@ def partial_fit(self, X, y, classes=None):
110121
-------
111122
self : returns an instance of self.
112123
"""
124+
if self.class_weight == 'balanced':
125+
raise ValueError("class_weight 'balanced' is not supported for "
126+
"partial_fit. In order to use 'balanced' "
127+
"weights, from the sklearn.utils module use "
128+
"compute_class_weight('balanced', classes, y). "
129+
"In place of y you can us a large enough sample "
130+
"of the full training set target to properly "
131+
"estimate the class frequency distributions. "
132+
"Pass the resulting weights as the class_weight "
133+
"parameter.")
113134
lr = "pa1" if self.loss == "hinge" else "pa2"
114135
return self._partial_fit(X, y, alpha=1.0, C=self.C,
115136
loss="hinge", learning_rate=lr, n_iter=1,
@@ -209,8 +230,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
209230
"""
210231
def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
211232
verbose=0, loss="epsilon_insensitive",
212-
epsilon=DEFAULT_EPSILON, random_state=None, class_weight=None,
213-
warm_start=False):
233+
epsilon=DEFAULT_EPSILON, random_state=None, warm_start=False):
214234
BaseSGDRegressor.__init__(self,
215235
penalty=None,
216236
l1_ratio=0,

sklearn/linear_model/tests/test_passive_aggressive.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sklearn.utils.testing import A3E2 assert_less
55
from sklearn.utils.testing import assert_greater
66
from sklearn.utils.testing import assert_array_almost_equal, assert_array_equal
7+
from sklearn.utils.testing import assert_almost_equal
78
from sklearn.utils.testing import assert_raises
89

910
from sklearn.base import ClassifierMixin
@@ -125,6 +126,77 @@ def test_classifier_undefined_methods():
125126
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
126127

127128

129+
def test_class_weights():
130+
# Test class weights.
131+
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
132+
[1.0, 1.0], [1.0, 0.0]])
133+
y2 = [1, 1, 1, -1, -1]
134+
135+
clf = PassiveAggressiveClassifier(C=0.1, n_iter=100, class_weight=None,
136+
random_state=100)
137+
clf.fit(X2, y2)
138+
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
139+
140+
# we give a small weights to class 1
141+
clf = PassiveAggressiveClassifier(C=0.1, n_iter=100,
142+
class_weight={1: 0.001},
143+
random_state=100)
144+
clf.fit(X2, y2)
145+
146+
# now the hyperplane should rotate clock-wise and
147+
# the prediction on this point should shift
148+
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
149+
150+
151+
def test_partial_fit_weight_class_balanced():
152+
# partial_fit with class_weight='balanced' not supported
153+
clf = PassiveAggressiveClassifier(class_weight="balanced")
154+
assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))
155+
156+
157+
def test_equal_class_weight():
158+
X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
159+
y2 = [0, 0, 1, 1]
160+
clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None)
161+
clf.fit(X2, y2)
162+
163+
# Already balanced, so "balanced" weights should have no effect
164+
clf_balanced = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
165+
class_weight="balanced")
166+
clf_balanced.fit(X2, y2)
167+
168+
clf_weighted = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
169+
class_weight={0: 0.5, 1: 0.5})
170+
clf_weighted.fit(X2, y2)
171+
172+
# should be similar up to some epsilon due to learning rate schedule
173+
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
174+
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
175+
176+
177+
def test_wrong_class_weight_label():
178+
# ValueError due to wrong class_weight label.
179+
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
180+
[1.0, 1.0], [1.0, 0.0]])
181+
y2 = [1, 1, 1, -1, -1]
182+
183+
clf = PassiveAggressiveClassifier(class_weight={0: 0.5})
184+
assert_raises(ValueError, clf.fit, X2, y2)
185+
186+
187+
def test_wrong_class_weight_format():
188+
# ValueError due to wrong class_weight argument type.
189+
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
190+
[1.0, 1.0], [1.0, 0.0]])
191+
y2 = [1, 1, 1, -1, -1]
192+
193+
clf = PassiveAggressiveClassifier(class_weight=[0.5])
194+
assert_raises(ValueError, clf.fit, X2, y2)
195+
196+
clf = PassiveAggressiveClassifier(class_weight="the larch")
197+
assert_raises(ValueError, clf.fit, X2, y2)
198+
199+
128200
def test_regressor_mse():
129201
y_bin = y.copy()
130202
y_bin[y != 1] = -1

0 commit comments

Comments
 (0)
0