diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py index cb9a2f527b40f..ce62373f780f9 100644 --- a/sklearn/preprocessing/label.py +++ b/sklearn/preprocessing/label.py @@ -105,9 +105,12 @@ def fit(self, y): ------- self : returns an instance of self. """ - y = column_or_1d(y, warn=True) + # y = column_or_1d(y, warn=True) _check_numpy_unicode_bug(y) - self.classes_ = np.unique(y) + if 'multioutput' in type_of_target(y): + self.classes_ = [np.unique(y[:, k]) for k in range(y.shape[1])] + else: + self.classes_ = np.unique(y) return self def fit_transform(self, y): @@ -122,9 +125,17 @@ def fit_transform(self, y): ------- y : array-like of shape [n_samples] """ - y = column_or_1d(y, warn=True) + # y = column_or_1d(y, warn=True) _check_numpy_unicode_bug(y) - self.classes_, y = np.unique(y, return_inverse=True) + if 'multioutput' in type_of_target(y): + self.classes_ = [] + y_enc = np.empty(y.shape, dtype=np.int) + for k in range(y_enc.shape[1]): + classes, y_enc[:, k] = np.unique(y[:, k], return_inverse=True) + self.classes_.append(classes) + y = y_enc + else: + self.classes_, y = np.unique(y, return_inverse=True) return y def transform(self, y): @@ -141,12 +152,18 @@ def transform(self, y): """ self._check_fitted() - classes = np.unique(y) - _check_numpy_unicode_bug(classes) - if len(np.intersect1d(classes, self.classes_)) < len(classes): - diff = np.setdiff1d(classes, self.classes_) - raise ValueError("y contains new labels: %s" % str(diff)) - return np.searchsorted(self.classes_, y) + if 'multioutput' in type_of_target(y): + y_enc = np.empty(y.shape, dtype=np.int) + for k in range(y.shape[1]): + y_enc[:, k] = np.searchsorted(self.classes_[k], y[:, k]) + return y_enc + else: + classes = np.unique(y) + _check_numpy_unicode_bug(classes) + if len(np.intersect1d(classes, self.classes_)) < len(classes): + diff = np.setdiff1d(classes, self.classes_) + raise ValueError("y contains new labels: %s" % str(diff)) + return np.searchsorted(self.classes_, y) def inverse_transform(self, y): """Transform labels back to original encoding. @@ -668,9 +685,6 @@ class MultiLabelBinarizer(BaseEstimator, TransformerMixin): classes : array-like of shape [n_classes] (optional) Indicates an ordering for the class labels - sparse_output : boolean (default: False), - Set to true if output binary array is desired in CSR sparse format - Attributes ---------- classes_ : array of labels diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index f523870ffd2fd..bdaa62fa53a2e 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -229,6 +229,79 @@ def test_label_encoder(): assert_raises(ValueError, le.transform, [0, 6]) +def test_label_encoder_multioutput(): + """Test LabelEncoder's with multioutput target data""" + le = LabelEncoder() + y = np.array([[1,2,3,4,0], + [1,0,0,0,0], + [1,1,1,1,1], + [1,-1,2,0,1], + [1,0,1,1,0], + [1,2,3,4,5]]) + classes = [[1],[-1,0,1,2],[0,1,2,3],[0,1,4],[0,1,5]] + y_enc = np.array([[0,3,3,2,0], + [0,1,0,0,0], + [0,2,1,1,1], + [0,0,2,0,1], + [0,1,1,1,0], + [0,3,3,2,2]]) + + # Test fit + le.fit(y) + for i in range(y.shape[1]): + assert_array_equal(le.classes_[i], classes[i]) + + # Test transform + assert_array_equal(le.transform(y), y_enc) + + # Test inverse transform + assert_array_equal(le.inverse_transform(y_enc), y) + + # Test fit transform + le = LabelEncoder() + ret = le.fit_transform(y) + assert_array_equal(ret, y_enc) + + # Test unseeen label error + assert_raises(ValueError, le.transform, [0, 6]) + +def test_label_encoder_sparse_multioutput(): + """Test LabelEncoder's with multioutput target data in sparse formats""" + """Test LabelEncoder's with multioutput target data""" + le = LabelEncoder() + y = sp.csc_matrix(np.array([[0,2,3,4,0], + [0,0,0,0,0], + [0,1,1,1,0], + [0,-1,2,0,1], + [0,0,1,1,0], + [0,2,3,4,5]])) + classes = [[0],[-1,0,1,2],[0,1,2,3],[0,1,4],[0,1,5]] + y = sp.csc_matrix(np.array([[0,3,3,2,0], + [0,2,0,0,0], + [0,2,1,1,0], + [0,0,2,0,1], + [0,1,1,1,0], + [0,3,3,2,2]])) + + # Test fit + le.fit(y) + for i in range(y.shape[1]): + assert_array_equal(le.classes_[i], classes[i]) + + # Test transform + assert_array_equal(le.transform(y).toarray(), y_enc) + + # Test inverse transform + assert_array_equal(le.inverse_transform(y_enc).toarray(), y) + + # Test fit transform + le = LabelEncoder() + ret = le.fit_transform(y) + assert_array_equal(ret.toarray(), y_enc) + + # Test unseeen label error + assert_raises(ValueError, le.transform, [0, 6]) + def test_label_encoder_fit_transform(): """Test fit_transform""" le = LabelEncoder()