|
34 | 34 | from sklearn.datasets import fetch_20newsgroups
|
35 | 35 | from sklearn.feature_extraction.text import TfidfVectorizer
|
36 | 36 | from sklearn.feature_extraction.text import HashingVectorizer
|
| 37 | +from sklearn.feature_selection import SelectFromModel |
37 | 38 | from sklearn.feature_selection import SelectKBest, chi2
|
38 | 39 | from sklearn.linear_model import RidgeClassifier
|
39 | 40 | from sklearn.pipeline import Pipeline
|
@@ -259,8 +260,8 @@ def benchmark(clf):
|
259 | 260 | print('=' * 80)
|
260 | 261 | print("%s penalty" % penalty.upper())
|
261 | 262 | # Train Liblinear model
|
262 |
| - results.append(benchmark(LinearSVC(loss='l2', penalty=penalty, |
263 |
| - dual=False, tol=1e-3))) |
| 263 | + results.append(benchmark(LinearSVC(penalty=penalty, dual=False, |
| 264 | + tol=1e-3))) |
264 | 265 |
|
265 | 266 | # Train SGD model
|
266 | 267 | results.append(benchmark(SGDClassifier(alpha=.0001, n_iter=50,
|
@@ -288,9 +289,9 @@ def benchmark(clf):
|
288 | 289 | # The smaller C, the stronger the regularization.
|
289 | 290 | # The more regularization, the more sparsity.
|
290 | 291 | results.append(benchmark(Pipeline([
|
291 |
| - ('feature_selection', LinearSVC(penalty="l1", dual=False, tol=1e-3)), |
292 |
| - ('classification', LinearSVC()) |
293 |
| -]))) |
| 292 | + ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False, |
| 293 | + tol=1e-3))), |
| 294 | + ('classification', LinearSVC(penalty="l2"))]))) |
294 | 295 |
|
295 | 296 | # make some plots
|
296 | 297 |
|
|
0 commit comments