8000 docstring changes and minor optimizations · scikit-learn/scikit-learn@faebd86 · GitHub
[go: up one dir, main page]

Skip to content

Commit faebd86

Browse files
docstring changes and minor optimizations
1 parent b9fad64 commit faebd86

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

sklearn/preprocessing/data.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,7 +1694,7 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
16941694
16951695
Parameters
16961696
----------
1697-
values : 'auto', 'seen', int, list of ints, or list of lists of objects
1697+
values : 'auto', int, list of ints, or list of lists of objects
16981698
- 'auto' : determine set of values from training data. See the
16991699
documentation of `handle_unknown` for which values are considered
17001700
acceptable.
@@ -1731,11 +1731,11 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
17311731
Attributes
17321732
----------
17331733
feature_index_range_ : array, shape [n_feature, 2]
1734-
`feature_index_range_[i]` specifies the range of column indices
1735-
occupied by the feature `i` in the one-hot encoded array.
1734+
``feature_index_range_[i]`` specifies the range of column indices
1735+
occupied by the input feature `i` in the one-hot encoded array.
17361736
17371737
one_hot_feature_index_ : array, shape [n_features_new]
1738-
`one_hot_feature_index_[i]` specifies which feature of the input
1738+
``one_hot_feature_index_[i]`` specifies which feature of the input
17391739
is encoded by column `i` in the one-hot encoded array.
17401740
17411741
Examples
@@ -1820,7 +1820,7 @@ def fit(self, X, y=None):
18201820
le = self._label_encoders[cat_index]
18211821
end = start + len(le.classes_)
18221822
self.feature_index_range_[i] = start, end
1823-
start += len(le.classes_)
1823+
start = end
18241824
cat_index += 1
18251825

18261826
indices = np.arange(start, start + n_features - num_cat)
@@ -1844,7 +1844,8 @@ def fit(self, X, y=None):
18441844
def _fit(self, X):
18451845
"Assumes `X` contains only catergorical features."
18461846

1847-
X = check_array(X, dtype=np.object)
1847+
if not np.issubdtype(X.dtype.type, np.integer):
1848+
X = check_array(X, dtype=np.object)
18481849
n_samples, n_features = X.shape
18491850

18501851
self._n_features = n_features
@@ -1854,7 +1855,7 @@ def _fit(self, X):
18541855

18551856
if self.n_values is not None:
18561857
warnings.warn('The parameter `n_values` is deprecated, use the'
1857-
'parameter `classes_` instead and specify the '
1858+
'parameter `values` instead and specify the '
18581859
'expected values for each feature')
18591860

18601861
if isinstance(self.n_values, numbers.Integral):

0 commit comments

Comments
 (0)
0