@@ -574,24 +574,6 @@ def test_ordinal_encoder_inverse():
574
574
enc .inverse_transform (X_tr )
575
575
576
576
577
- @pytest .mark .parametrize ("X" , [np .array ([[1 , np .nan ]]).T ,
578
- np .array ([['a' , np .nan ]], dtype = object ).T ],
579
- ids = ['numeric' , 'object' ])
580
- def test_ordinal_encoder_raise_missing (X ):
581
- ohe = OrdinalEncoder ()
582
-
583
- with pytest .raises (ValueError , match = "Input contains NaN" ):
584
- ohe .fit (X )
585
-
586
- with pytest .raises (ValueError , match = "Input contains NaN" ):
587
- ohe .fit_transform (X )
588
-
589
- ohe .fit (X [:1 , :])
590
-
591
- with pytest .raises (ValueError , match = "Input contains NaN" ):
592
- ohe .transform (X )
593
-
594
-
595
577
def test_ordinal_encoder_handle_unknowns_string ():
596
578
enc = OrdinalEncoder (handle_unknown = 'use_encoded_value' , unknown_value = - 2 )
597
579
X_fit = np .array ([['a' , 'x' ], ['b' , 'y' ], ['c' , 'z' ]], dtype = object )
@@ -930,3 +912,122 @@ def test_ohe_missing_value_support_pandas_categorical(pd_nan_type):
930
912
assert len (ohe .categories_ ) == 1
931
913
assert_array_equal (ohe .categories_ [0 ][:- 1 ], ['a' , 'b' , 'c' ])
932
914
assert np .isnan (ohe .categories_ [0 ][- 1 ])
915
+
916
+
917
+ def test_ordinal_encoder_passthrough_missing_values_float_errors_dtype ():
918
+ """Test ordinal encoder with nan passthrough fails when dtype=np.int32."""
919
+
920
+ X = np .array ([[np .nan , 3.0 , 1.0 , 3.0 ]]).T
921
+ oe = OrdinalEncoder (dtype = np .int32 )
922
+
923
+ msg = (r"There are missing values in features \[0\]. For OrdinalEncoder "
924
+ "to passthrough missing values, the dtype parameter must be a "
925
+ "float" )
926
+ with pytest .raises (ValueError , match = msg ):
927
+ oe .fit (X )
928
+
929
+
930
+ def test_ordinal_encoder_passthrough_missing_values_float ():
931
+ """Test ordinal encoder with nan on float dtypes."""
932
+
933
+ X = np .array ([[np .nan , 3.0 , 1.0 , 3.0 ]], dtype = np .float64 ).T
934
+ oe = OrdinalEncoder ().fit (X )
935
+
936
+ assert len (oe .categories_ ) == 1
937
+ assert_allclose (oe .categories_ [0 ], [1.0 , 3.0 , np .nan ])
938
+
939
+ X_trans = oe .transform (X )
940
+ assert_allclose (X_trans , [[np .nan ], [1.0 ], [0.0 ], [1.0 ]])
941
+
942
+ X_inverse = oe .inverse_transform (X_trans )
943
+ assert_allclose (X_inverse , X )
944
+
945
+
946
+ @pytest .mark .parametrize ('pd_nan_type' , ['pd.NA' , 'np.nan' ])
947
+ def test_ordinal_encoder_missing_value_support_pandas_categorical (pd_nan_type ):
948
+ """Check ordinal encoder is compatible with pandas."""
949
+ # checks pandas dataframe with categorical features
950
+ if pd_nan_type == 'pd.NA' :
951
+ # pd.NA is in pandas 1.0
952
+ pd = pytest .importorskip ('pandas' , minversion = "1.0" )
953
+ pd_missing_value = pd .NA
954
+ else : # np.nan
955
+ pd = pytest .importorskip ('pandas' )
956
+ pd_missing_value = np .nan
957
+
958
+ df = pd .DataFrame ({
959
+ 'col1' : pd .Series (['c' , 'a' , pd_missing_value , 'b' , 'a' ],
960
+ dtype = 'category' ),
961
+ })
962
+
963
+ oe = OrdinalEncoder ().fit (df )
964
+ assert len (oe .categories_ ) == 1
965
+ assert_array_equal (oe .categories_ [0 ][:3 ], ['a' , 'b' , 'c' ])
966
+ assert np .isnan (oe .categories_ [0 ][- 1 ])
967
+
968
+ df_trans = oe .transform (df )
969
+
970
+ assert_allclose (df_trans , [[2.0 ], [0.0 ], [np .nan ], [1.0 ], [0.0 ]])
971
+
972
+ X_inverse = oe .inverse_transform (df_trans )
973
+ assert X_inverse .shape == (5 , 1 )
974
+ assert_array_equal (X_inverse [:2 , 0 ], ['c' , 'a' ])
975
+ assert_array_equal (X_inverse [3 :, 0 ], ['b' , 'a' ])
976
+ assert np .isnan (X_inverse [2 , 0 ])
977
+
978
+
979
+ @pytest .mark .parametrize ("X, X2, cats, cat_dtype" , [
980
+ ((np .array ([['a' , np .nan ]], dtype = object ).T ,
981
+ np .array ([['a' , 'b' ]], dtype = object ).T ,
982
+ [np .array (['a' , np .nan , 'd' ], dtype = object )], np .object_ )),
983
+ ((np .array ([['a' , np .nan ]], dtype = object ).T ,
984
+ np .array ([['a' , 'b' ]], dtype = object ).T ,
985
+ [np .array (['a' , np .nan , 'd' ], dtype = object )], np .object_ )),
986
+ ((np .array ([[2.0 , np .nan ]], dtype = np .float64 ).T ,
987
+ np .array ([[3.0 ]], dtype = np .float64 ).T ,
988
+ [np .array ([2.0 , 4.0 , np .nan ])], np .float64 )),
989
+ ], ids = ['object-None-missing-value' , 'object-nan-missing_value' ,
990
+ 'numeric-missing-value' ])
991
+ def test_ordinal_encoder_specified_categories_missing_passthrough (
992
+ X , X2 , cats , cat_dtype ):
993
+ """Test ordinal encoder for specified categories."""
994
+ oe = OrdinalEncoder (categories = cats )
995
+ exp = np .array ([[0. ], [np .nan ]])
996
+ assert_array_equal (oe .fit_transform (X ), exp )
997
+ # manually specified categories should have same dtype as
998
+ # the data when coerced from lists
999
+ assert oe .categories_ [0 ].dtype == cat_dtype
1000
+
1001
+ # when specifying categories manually, unknown categories should already
1002
+ # raise when fitting
1003
+ oe = OrdinalEncoder (categories = cats )
1004
+ with pytest .raises (ValueError , match = "Found unknown categories" ):
1005
+ oe .fit (X2 )
1006
+
1007
+
1008
+ @pytest .mark .parametrize ("X, expected_X_trans, X_test" , [
1009
+ (np .array ([[1.0 , np .nan , 3.0 ]]).T ,
1010
+ np .array ([[0.0 , np .nan , 1.0 ]]).T ,
1011
+ np .array ([[4.0 ]])),
1012
+ (np .array ([[1.0 , 4.0 , 3.0 ]]).T ,
1013
+ np .array ([[0.0 , 2.0 , 1.0 ]]).T ,
1014
+ np .array ([[np .nan ]])),
1015
+ (np .array ([['c' , np .nan , 'b' ]], dtype = object ).T ,
1016
+ np .array ([[1.0 , np .nan , 0.0 ]]).T ,
1017
+ np .array ([['d' ]], dtype = object )),
1018
+ (np .array ([['c' , 'a' , 'b' ]], dtype = object ).T ,
1019
+ np .array ([[2.0 , 0.0 , 1.0 ]]).T ,
1020
+ np .array ([[np .nan ]], dtype = object )),
1021
+ ])
1022
+ def test_ordinal_encoder_handle_missing_and_unknown (
1023
+ X , expected_X_trans , X_test
1024
+ ):
1025
+ """Test the interaction between missing values and handle_unknown"""
1026
+
1027
+ oe = OrdinalEncoder (handle_unknown = "use_encoded_value" ,
1028
+ unknown_value = - 1 )
1029
+
1030
+ X_trans = oe .fit_transform (X )
1031
+ assert_allclose (X_trans , expected_X_trans )
1032
+
1033
+ assert_allclose (oe .transform (X_test ), [[- 1.0 ]])
0 commit comments