8000 Revert "FIX incorrect error when OneHotEncoder.transform called prior… · xhluca/scikit-learn@7edaffa · GitHub
[go: up one dir, main page]

Skip to content

Commit 7edaffa

Browse files
author
Xing
authored
Revert "FIX incorrect error when OneHotEncoder.transform called prior to fit (scikit-learn#12443)"
This reverts commit 6d389ba.
1 parent 056e574 commit 7edaffa

File tree

2 files changed

+3
-31
lines changed

2 files changed

+3
-31
lines changed

sklearn/preprocessing/_encoders.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
from .base import _transform_selected
2222
from .label import _encode, _encode_check_unknown
2323

24+
2425
range = six.moves.range
2526

27+
2628
__all__ = [
2729
'OneHotEncoder',
2830
'OrdinalEncoder'
@@ -381,12 +383,6 @@ def _handle_deprecations(self, X):
381383
"The 'categorical_features' keyword is deprecated in "
382384
"version 0.20 and will be removed in 0.22. You can "
383385
"use the ColumnTransformer instead.", DeprecationWarning)
384-
# Set categories_ to empty list if no categorical columns exist
385-
n_features = X.shape[1]
386-
sel = np.zeros(n_features, dtype=bool)
387-
sel[np.asarray(self.categorical_features)] = True
388-
if sum(sel) == 0:
389-
self.categories_ = []
390386
self._legacy_mode = True
391387
self._categorical_features = self.categorical_features
392388
else:
@@ -595,7 +591,6 @@ def transform(self, X):
595591
X_out : sparse matrix if sparse=True else a 2-d array
596592
Transformed input.
597593
"""
598-
check_is_fitted(self, 'categories_')
599594
if self._legacy_mode:
600595
return _transform_selected(X, self._legacy_transform, self.dtype,
601596
self._categorical_features,
@@ -688,7 +683,7 @@ def get_feature_names(self, input_features=None):
688683
cats = self.categories_
689684
if input_features is None:
690685
input_features = ['x%d' % i for i in range(len(cats))]
691-
elif len(input_features) != len(self.categories_):
686+
elif(len(input_features) != len(self.categories_)):
692687
raise ValueError(
693688
"input_features should have length equal to number of "
694689
"features ({}), got {}".format(len(self.categories_),

sklearn/preprocessing/tests/test_encoders.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from scipy import sparse
88
import pytest
99

10-
from sklearn.exceptions import NotFittedError
1110
from sklearn.utils.testing import assert_array_equal
1211
from sklearn.utils.testing import assert_equal
1312
from sklearn.utils.testing import assert_raises
@@ -251,28 +250,6 @@ def test_one_hot_encoder_handle_unknown():
251250
assert_raises(ValueError, oh.fit, X)
252251

253252

254-
def test_one_hot_encoder_not_fitted():
255-
X = np.array([['a'], ['b']])
256-
enc = OneHotEncoder(categories=['a', 'b'])
257-
msg = ("This OneHotEncoder instance is not fitted yet. "
258-
"Call 'fit' with appropriate arguments before using this method.")
259-
with pytest.raises(NotFittedError, match=msg):
260-
enc.transform(X)
261-
262-
263-
def test_one_hot_encoder_no_categorical_features():
264-
X = np.array([[3, 2, 1], [0, 1, 1]], dtype='float64')
265-
266-
cat = [False, False, False]
267-
enc = OneHotEncoder(categorical_features=cat)
268-
with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
269-
X_tr = enc.fit_transform(X)
270-
expected_features = np.array(list(), dtype='object')
271-
assert_array_equal(X, X_tr)
272-
assert_array_equal(enc.get_feature_names(), expected_features)
273-
assert enc.categories_ == []
274-
275-
276253
@pytest.mark.parametrize("output_dtype", [np.int32, np.float32, np.float64])
277254
@pytest.mark.parametrize("input_dtype", [np.int32, np.float32, np.float64])
278255
def test_one_hot_encoder_dtype(input_dtype, output_dtype):

0 commit comments

Comments
 (0)
0