@@ -1744,8 +1744,13 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
1744
1744
1745
1745
Parameters
1746
1746
----------
1747
- values : 'auto', int, list of ints, or list of lists of objects
1748
- - 'auto' : determine set of values from training data.
1747
+ values : 'auto', 'seen', int, list of ints, or list of lists of objects
1748
+ - 'auto' : determine set of values from training data. If the input
1749
+ is an int array, values are determined from range in
1750
+ training data. For all other inputs, only values observed
1751
+ during `fit` are considered valid values for each feature.
1752
+ - 'seen': Only values observed during `fit` are considered valid
1753
+ values for each feature.
1749
1754
- int : values are in ``range(values)`` for all features
1750
1755
- list of ints : values for feature ``i`` are in ``range(values[i])``
1751
1756
- list of lists : values for feature ``i`` are in ``values[i]``
@@ -1828,7 +1833,8 @@ def fit(self, X, y=None):
1828
1833
self
1829
1834
"""
1830
1835
1831
- X = check_array (X , dtype = np .object , accept_sparse = 'csc' , copy = self .copy )
1836
+ X = check_array (X , dtype = np .object , accept_sparse = 'csc' ,
1837
+ copy = self .copy )
1832
1838
n_samples , n_features = X .shape
1833
1839
1834
1840
_apply_selected (X , self ._fit , dtype = self .dtype ,
@@ -1873,6 +1879,8 @@ def _fit(self, X):
1873
1879
for i in range (n_features ):
1874
1880
le = self .label_encoders_ [i ]
1875
1881
if self .values == 'auto' :
1882
+ le .fit (np .arange (1 + np .max (X [:, i ])))
1883
+ elif self .values == 'seen' :
1876
1884
le .fit (X [:, i ])
1877
1885
elif isinstance (self .values , numbers .Integral ):
1878
1886
if (np .max (X , axis = 0 ) >= self .values ).any ():
0 commit comments