2
2
import pytest
3
3
from numpy .testing import assert_array_equal
4
4
5
- from sklearn .utils ._encode import _unique , _encode
5
+ from sklearn .utils ._encode import _unique
6
+ from sklearn .utils ._encode import _encode
7
+ from sklearn .utils ._encode import _encode_check_unknown
6
8
7
9
8
10
@pytest .mark .parametrize (
@@ -21,7 +23,7 @@ def test_encode_util(values, expected):
21
23
assert_array_equal (encoded , np .array ([1 , 0 , 2 , 0 , 2 ]))
22
24
23
25
24
- def test_encode_check_unknown ():
26
+ def test_encode_with_check_unknown ():
25
27
# test for the check_unknown parameter of _encode()
26
28
uniques = np .array ([1 , 2 , 3 ])
27
29
values = np .array ([1 , 2 , 3 , 4 ])
@@ -40,3 +42,20 @@ def test_encode_check_unknown():
40
42
with pytest .raises (ValueError ,
41
43
match = 'y contains previously unseen labels' ):
42
44
_encode (values , uniques = uniques , check_unknown = False )
45
+
46
+
47
+ @pytest .mark .parametrize ("values, uniques, expected_diff" , [
48
+ (np .array ([1 , 2 , 3 , 4 ]), np .array ([1 , 2 , 3 ]), [4 ]),
49
+ (np .array (['a' , 'b' , 'c' , 'd' ], dtype = object ),
50
+ np .array (['a' , 'b' , 'c' ], dtype = object ),
51
+ np .array (['d' ]))
52
+ ])
53
+ def test_encode_check_unknown (values , uniques , expected_diff ):
54
+ diff = _encode_check_unknown (values , uniques )
55
+
56
+ assert_array_equal (diff , expected_diff )
57
+
58
+ diff , valid_mask = _encode_check_unknown (values , uniques , return_mask = True )
59
+
60
+ assert_array_equal (diff , expected_diff )
61
+ assert_array_equal (valid_mask , [True , True , True , False ])
0 commit comments