DOC additional fixes to 20 newsgroups to prevent TypeError (#8204)

BasilBeirouti · jnothman · commit 3dc8d2f83bfb · 2017-01-16T00:09:44.000+11:00
* removed stray space in '__main__ '

* fixed pipeline bug causing TypeError and removed incorrect keyword argument

pipeline was not created properly (both classes had .predict) causing
TypeError to be thrown.

Also removed incorrect keyword argument loss=“l2” for LinearSVC
diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py
@@ -34,6 +34,7 @@
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.feature_extraction.text import HashingVectorizer
+from sklearn.feature_selection import SelectFromModel
 from sklearn.feature_selection import SelectKBest, chi2
 from sklearn.linear_model import RidgeClassifier
 from sklearn.pipeline import Pipeline
@@ -259,8 +260,8 @@ def benchmark(clf):
     print('=' * 80)
     print("%s penalty" % penalty.upper())
     # Train Liblinear model
-    results.append(benchmark(LinearSVC(loss='l2', penalty=penalty,
-                                            dual=False, tol=1e-3)))
+    results.append(benchmark(LinearSVC(penalty=penalty, dual=False,
+                                       tol=1e-3)))
 
     # Train SGD model
     results.append(benchmark(SGDClassifier(alpha=.0001, n_iter=50,
@@ -288,9 +289,9 @@ def benchmark(clf):
 # The smaller C, the stronger the regularization.
 # The more regularization, the more sparsity.
 results.append(benchmark(Pipeline([
-  ('feature_selection', LinearSVC(penalty="l1", dual=False, tol=1e-3)),
-  ('classification', LinearSVC())
-])))
+  ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False,
+                                                  tol=1e-3))),
+  ('classification', LinearSVC(penalty="l2"))])))
 
 # make some plots