From 1bec8e3937f199ee2fb2aa7426e73a0e63f03fac Mon Sep 17 00:00:00 2001 From: hashcode55 Date: Sun, 19 Jun 2016 02:53:34 +0530 Subject: [PATCH 1/3] first commit --- doc/datasets/twenty_newsgroups.rst | 8 ++++---- examples/model_selection/grid_search_digits.py | 2 +- sklearn/metrics/tests/test_classification.py | 8 ++++---- sklearn/svm/tests/test_svm.py | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst index 01c2a53ff77e5..e0e845a04f539 100644 --- a/doc/datasets/twenty_newsgroups.rst +++ b/doc/datasets/twenty_newsgroups.rst @@ -132,8 +132,8 @@ which is fast to train and achieves a decent F-score:: >>> clf = MultinomialNB(alpha=.01) >>> clf.fit(vectors, newsgroups_train.target) >>> pred = clf.predict(vectors_test) - >>> metrics.f1_score(newsgroups_test.target, pred, average='weighted') - 0.88251152461278892 + >>> metrics.f1_score(newsgroups_test.target, pred, average='macro') + 0.88213592402729568 (The example :ref:`example_text_document_classification_20newsgroups.py` shuffles the training and test data, instead of segmenting by time, and in that case @@ -182,8 +182,8 @@ blocks, and quotation blocks respectively. ... categories=categories) >>> vectors_test = vectorizer.transform(newsgroups_test.data) >>> pred = clf.predict(vectors_test) - >>> metrics.f1_score(pred, newsgroups_test.target, average='weighted') - 0.78409163025839435 + >>> metrics.f1_score(pred, newsgroups_test.target, average='macro') + 0.77310350681274775 This classifier lost over a lot of its F-score, just because we removed metadata that has little to do with topic classification. diff --git a/examples/model_selection/grid_search_digits.py b/examples/model_selection/grid_search_digits.py index 40ed573247efd..13755b0bc8c10 100644 --- a/examples/model_selection/grid_search_digits.py +++ b/examples/model_selection/grid_search_digits.py @@ -51,7 +51,7 @@ print() clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5, - scoring='%s_weighted' % score) + scoring='%s_macro' % score) clf.fit(X_train, y_train) print("Best parameters set found on development set:") diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index f28e6cc77093b..2794948ce93d5 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -469,7 +469,7 @@ def test_precision_recall_f1_score_multiclass_pos_label_none(): # compute scores with default labels introspection p, r, f, s = precision_recall_fscore_support(y_true, y_pred, pos_label=None, - average='weighted') + average='binary') def test_zero_precision_recall(): @@ -482,10 +482,10 @@ def test_zero_precision_recall(): y_pred = np.array([2, 0, 1, 1, 2, 0]) assert_almost_equal(precision_score(y_true, y_pred, - average='weighted'), 0.0, 2) - assert_almost_equal(recall_score(y_true, y_pred, average='weighted'), + average='macro'), 0.0, 2) + assert_almost_equal(recall_score(y_true, y_pred, average='macro'), 0.0, 2) - assert_almost_equal(f1_score(y_true, y_pred, average='weighted'), + assert_almost_equal(f1_score(y_true, y_pred, average='macro'), 0.0, 2) finally: diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 670180695e452..df9f6f988c0c5 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -439,9 +439,9 @@ def test_auto_weight(): y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) clf.set_params(class_weight='balanced') y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X) - assert_true(metrics.f1_score(y, y_pred, average='weighted') + assert_true(metrics.f1_score(y, y_pred, average='macro') <= metrics.f1_score(y, y_pred_balanced, - average='weighted')) + average='macro')) def test_bad_input(): From 488f7d609c556f7cd1333bdee2f9615f5d290439 Mon Sep 17 00:00:00 2001 From: hashcode55 Date: Sun, 19 Jun 2016 14:40:48 +0530 Subject: [PATCH 2/3] changed binary average back to macro --- doc/datasets/twenty_newsgroups.rst | 4 ++-- sklearn/metrics/tests/test_classification.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst index e0e845a04f539..55fe227682190 100644 --- a/doc/datasets/twenty_newsgroups.rst +++ b/doc/datasets/twenty_newsgroups.rst @@ -197,8 +197,8 @@ It loses even more if we also strip this metadata from the training data: >>> clf.fit(vectors, newsgroups_train.target) >>> vectors_test = vectorizer.transform(newsgroups_test.data) >>> pred = clf.predict(vectors_test) - >>> metrics.f1_score(newsgroups_test.target, pred, average='weighted') - 0.73160869205141166 + >>> metrics.f1_score(newsgroups_test.target, pred, average='macro') + 0.65437545099490202 Some other classifiers cope better with this harder version of the task. Try running :ref:`example_model_selection_grid_search_text_feature_extraction.py` with and without diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 2794948ce93d5..5f93333e585cf 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -469,7 +469,7 @@ def test_precision_recall_f1_score_multiclass_pos_label_none(): # compute scores with default labels introspection p, r, f, s = precision_recall_fscore_support(y_true, y_pred, pos_label=None, - average='binary') + average='macro') def test_zero_precision_recall(): From 7d6c6297c146b99d0e75c3cd4efdf641bad9d14a Mon Sep 17 00:00:00 2001 From: hashcode55 Date: Sun, 19 Jun 2016 14:54:18 +0530 Subject: [PATCH 3/3] changed binomialNB to multinomialNB --- doc/datasets/twenty_newsgroups.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst index 55fe227682190..2850b244eb12b 100644 --- a/doc/datasets/twenty_newsgroups.rst +++ b/doc/datasets/twenty_newsgroups.rst @@ -193,12 +193,12 @@ It loses even more if we also strip this metadata from the training data: ... remove=('headers', 'footers', 'quotes'), ... categories=categories) >>> vectors = vectorizer.fit_transform(newsgroups_train.data) - >>> clf = BernoulliNB(alpha=.01) + >>> clf = MultinomialNB(alpha=.01) >>> clf.fit(vectors, newsgroups_train.target) >>> vectors_test = vectorizer.transform(newsgroups_test.data) >>> pred = clf.predict(vectors_test) >>> metrics.f1_score(newsgroups_test.target, pred, average='macro') - 0.65437545099490202 + 0.76995175184521725 Some other classifiers cope better with this harder version of the task. Try running :ref:`example_model_selection_grid_search_text_feature_extraction.py` with and without