From e9492b7ec2d341d4bb8309b4116f33b78377295a Mon Sep 17 00:00:00 2001 From: dsquareindia Date: Sun, 24 Jan 2016 21:56:50 +0530 Subject: [PATCH] LabelBinarizer single label case now works for sparse and dense case --- sklearn/preprocessing/label.py | 11 +++++++---- sklearn/preprocessing/tests/test_label.py | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py index 1aaeacd1785ef..e571d3f44be7f 100644 --- a/sklearn/preprocessing/label.py +++ b/sklearn/preprocessing/label.py @@ -472,10 +472,13 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False): classes = np.asarray(classes) if y_type == "binary": - if len(classes) == 1: - Y = np.zeros((len(y), 1), dtype=np.int) - Y += neg_label - return Y + if n_classes == 1: + if sparse_output: + return sp.csr_matrix((n_samples, 1), dtype=int) + else: + Y = np.zeros((len(y), 1), dtype=np.int) + Y += neg_label + return Y elif len(classes) >= 3: y_type = "multiclass" diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index 406989a4a2d15..baf1cfbc8bddd 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -11,6 +11,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_equal +from sklearn.utils.testing import assert_true from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import ignore_warnings @@ -35,16 +36,25 @@ def toarray(a): def test_label_binarizer(): - lb = LabelBinarizer() - # one-class case defaults to negative label + # For dense case: inp = ["pos", "pos", "pos", "pos"] + lb = LabelBinarizer(sparse_output=False) expected = np.array([[0, 0, 0, 0]]).T got = lb.fit_transform(inp) assert_array_equal(lb.classes_, ["pos"]) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) + # For sparse case: + lb = LabelBinarizer(sparse_output=True) + got = lb.fit_transform(inp) + assert_true(issparse(got)) + assert_array_equal(lb.classes_, ["pos"]) + assert_array_equal(expected, got.toarray()) + assert_array_equal(lb.inverse_transform(got.toarray()), inp) + + lb = LabelBinarizer(sparse_output=False) # two-class case inp = ["neg", "pos", "pos", "neg"] expected = np.array([[0, 1, 1, 0]]).T