From 3b901903fa6de0fb76ec68cfcc9ca49c7d6cda06 Mon Sep 17 00:00:00 2001 From: Basil Beirouti Date: Sun, 15 Jan 2017 02:21:37 -0600 Subject: [PATCH 1/2] removed stray space in '__main__ ' --- examples/text/document_classification_20newsgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py index 20e8f16b0d9d3..23656b56f2bfc 100644 --- a/examples/text/document_classification_20newsgroups.py +++ b/examples/text/document_classification_20newsgroups.py @@ -85,7 +85,7 @@ def is_interactive(): - return not hasattr(sys.modules['__main__ '], '__file__') + return not hasattr(sys.modules['__main__'], '__file__') # work-around for Jupyter notebook and IPython console argv = [] if is_interactive() else sys.argv[1:] From d30dccd80fb04153c5efc5fa3991bb4e6214cead Mon Sep 17 00:00:00 2001 From: Basil Beirouti Date: Sun, 15 Jan 2017 03:00:48 -0600 Subject: [PATCH 2/2] fixed pipeline bug causing TypeError and removed incorrect keyword argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pipeline was not created properly (both classes had .predict) causing TypeError to be thrown. Also removed incorrect keyword argument loss=“l2” for LinearSVC --- examples/text/document_classification_20newsgroups.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py index 23656b56f2bfc..f34bbd10cbe55 100644 --- a/examples/text/document_classification_20newsgroups.py +++ b/examples/text/document_classification_20newsgroups.py @@ -34,6 +34,7 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import HashingVectorizer +from sklearn.feature_selection import SelectFromModel from sklearn.feature_selection import SelectKBest, chi2 from sklearn.linear_model import RidgeClassifier from sklearn.pipeline import Pipeline @@ -259,8 +260,8 @@ def benchmark(clf): print('=' * 80) print("%s penalty" % penalty.upper()) # Train Liblinear model - results.append(benchmark(LinearSVC(loss='l2', penalty=penalty, - dual=False, tol=1e-3))) + results.append(benchmark(LinearSVC(penalty=penalty, dual=False, + tol=1e-3))) # Train SGD model results.append(benchmark(SGDClassifier(alpha=.0001, n_iter=50, @@ -288,9 +289,9 @@ def benchmark(clf): # The smaller C, the stronger the regularization. # The more regularization, the more sparsity. results.append(benchmark(Pipeline([ - ('feature_selection', LinearSVC(penalty="l1", dual=False, tol=1e-3)), - ('classification', LinearSVC()) -]))) + ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False, + tol=1e-3))), + ('classification', LinearSVC(penalty="l2"))]))) # make some plots