10000 FIX deprecation handling in OneHotEncoder for categorical_features + … · scikit-learn/scikit-learn@fce73db · GitHub
[go: up one dir, main page]

Skip to content

Commit fce73db

Browse files
jorisvandenbosschejnothman
authored andcommitted
FIX deprecation handling in OneHotEncoder for categorical_features + handle_unknown='ignore' case (#12923)
1 parent 7ee321f commit fce73db

File tree

3 files changed

+37
-2
lines changed

3 files changed

+37
-2
lines changed

doc/whats_new/v0.20.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,27 @@
22

33
.. currentmodule:: sklearn
44

5+
.. _changes_0_20_3:
6+
7+
Version 0.20.3
8+
==============
9+
10+
**??, 2019**
11+
12+
This is a bug-fix release with some minor documentation improvements and
13+
enhancements to features released in 0.20.0.
14+
15+
Changelog
16+
---------
17+
18+
:mod:`sklearn.preprocessing`
19+
............................
20+
21+
- |Fix| Fixed a bug in :class:`preprocessing.OneHotEncoder` where the
22+
deprecation of ``categorical_features`` was handled incorrectly in
23+
combination with ``handle_unknown='ignore'``.
24+
:issue:`12881` by `Joris Van den Bossche`_.
25+
526
.. _changes_0_20_2:
627

728
Version 0.20.2

sklearn/preprocessing/_encoders.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,9 @@ def _handle_deprecations(self, X):
331331
self._legacy_mode = True
332332

333333
else: # n_values = 'auto'
334+
# n_values can also be None (default to catch usage), so set
335+
# _n_values to 'auto' explicitly
336+
self._n_values = 'auto'
334337
if self.handle_unknown == 'ignore':
335338
# no change in behaviour, no need to raise deprecation warning
336339
self._legacy_mode = False
@@ -366,7 +369,6 @@ def _handle_deprecations(self, X):
366369
)
367370
warnings.warn(msg, FutureWarning)
368371
self._legacy_mode = True
369-
self._n_values = 'auto'
370372

371373
# if user specified categorical_features -> always use legacy mode
372374
if self.categorical_features is not None:
@@ -452,7 +454,7 @@ def _legacy_fit_transform(self, X):
452454
except (ValueError, TypeError):
453455
raise TypeError("Wrong type for parameter `n_values`. Expected"
454456
" 'auto', int or array of ints, got %r"
455-
% type(X))
457+
% type(self._n_values))
456458
if n_values.ndim < 1 or n_values.shape[0] != X.shape[1]:
457459
raise ValueError("Shape mismatch: if n_values is an array,"
458460
" it has to be of shape (n_features,).")

sklearn/preprocessing/tests/test_encoders.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,18 @@ def test_one_hot_encoder_categorical_features():
226226
assert_raises(ValueError, oh.fit, X)
227227

228228

229+
def test_one_hot_encoder_categorical_features_ignore_unknown():
230+
# GH12881 bug in combination of categorical_features with ignore
231+
X = np.array([[1, 2, 3], [4, 5, 6], [2, 3, 2]]).T
232+
oh = OneHotEncoder(categorical_features=[2], handle_unknown='ignore')
233+
234+
with ignore_warnings(category=DeprecationWarning):
235+
res = oh.fit_transform(X)
236+
237+
expected = np.array([[1, 0, 1], [0, 1, 0], [1, 2, 3], [4, 5, 6]]).T
238+
assert_array_equal(res.toarray(), expected)
239+
240+
229241
def test_one_hot_encoder_handle_unknown():
230242
X = np.array([[0, 2, 1], [1, 0, 3], [1, 0, 2]])
231243
X2 = np.array([[4, 1, 1]])

0 commit comments

Comments
 (0)
0