From 3b901903fa6de0fb76ec68cfcc9ca49c7d6cda06 Mon Sep 17 00:00:00 2001
From: Basil Beirouti <BasilBeirouti@gmail.com>
Date: Sun, 15 Jan 2017 02:21:37 -0600
Subject: [PATCH 1/2] removed stray space in '__main__ '

---
 examples/text/document_classification_20newsgroups.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py
index 20e8f16b0d9d3..23656b56f2bfc 100644
--- a/examples/text/document_classification_20newsgroups.py
+++ b/examples/text/document_classification_20newsgroups.py
@@ -85,7 +85,7 @@
 
 
 def is_interactive():
-    return not hasattr(sys.modules['__main__ '], '__file__')
+    return not hasattr(sys.modules['__main__'], '__file__')
 
 # work-around for Jupyter notebook and IPython console
 argv = [] if is_interactive() else sys.argv[1:]

From d30dccd80fb04153c5efc5fa3991bb4e6214cead Mon Sep 17 00:00:00 2001
From: Basil Beirouti <BasilBeirouti@gmail.com>
Date: Sun, 15 Jan 2017 03:00:48 -0600
Subject: [PATCH 2/2] fixed pipeline bug causing TypeError and removed
 incorrect keyword argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pipeline was not created properly (both classes had .predict) causing
TypeError to be thrown.

Also removed incorrect keyword argument loss=“l2” for LinearSVC
---
 examples/text/document_classification_20newsgroups.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py
index 23656b56f2bfc..f34bbd10cbe55 100644
--- a/examples/text/document_classification_20newsgroups.py
+++ b/examples/text/document_classification_20newsgroups.py
@@ -34,6 +34,7 @@
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.feature_extraction.text import HashingVectorizer
+from sklearn.feature_selection import SelectFromModel
 from sklearn.feature_selection import SelectKBest, chi2
 from sklearn.linear_model import RidgeClassifier
 from sklearn.pipeline import Pipeline
@@ -259,8 +260,8 @@ def benchmark(clf):
     print('=' * 80)
     print("%s penalty" % penalty.upper())
     # Train Liblinear model
-    results.append(benchmark(LinearSVC(loss='l2', penalty=penalty,
-                                            dual=False, tol=1e-3)))
+    results.append(benchmark(LinearSVC(penalty=penalty, dual=False,
+                                       tol=1e-3)))
 
     # Train SGD model
     results.append(benchmark(SGDClassifier(alpha=.0001, n_iter=50,
@@ -288,9 +289,9 @@ def benchmark(clf):
 # The smaller C, the stronger the regularization.
 # The more regularization, the more sparsity.
 results.append(benchmark(Pipeline([
-  ('feature_selection', LinearSVC(penalty="l1", dual=False, tol=1e-3)),
-  ('classification', LinearSVC())
-])))
+  ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False,
+                                                  tol=1e-3))),
+  ('classification', LinearSVC(penalty="l2"))])))
 
 # make some plots