8000 don't change self.n_values in OneHotEncoder.fit by amueller · Pull Request #12286 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

don't change self.n_values in OneHotEncoder.fit #12286

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
8000
Diff view
26 changes: 13 additions & 13 deletions sklearn/preprocessing/_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@ def n_values_(self):
return self._n_values_

def _handle_deprecations(self, X):

# internal version of the attributes to handle deprecations
self._n_values = self.n_values
self._categories = getattr(self, '_categories', None)
self._categorical_features = getattr(self, '_categorical_features',
None)
Expand Down Expand Up @@ -362,7 +362,7 @@ def _handle_deprecations(self, X):
)
warnings.warn(msg, FutureWarning)
self._legacy_mode = True
self.n_values = 'auto'
self._n_values = 'auto'

# if user specified categorical_features -> always use legacy mode
if self.categorical_features is not None:
Expand Down Expand Up @@ -427,18 +427,18 @@ def _legacy_fit_transform(self, X):
"be able to use arbitrary integer values as "
"category identifiers.")
n_samples, n_features = X.shape
if (isinstance(self.n_values, six.string_types) and
self.n_values == 'auto'):
if (isinstance(self._n_values, six.string_types) and
self._n_values == 'auto'):
n_values = np.max(X, axis=0) + 1
elif isinstance(self.n_values, numbers.Integral):
if (np.max(X, axis=0) >= self.n_values).any():
elif isinstance(self._n_values, numbers.Integral):
if (np.max(X, axis=0) >= self._n_values).any():
raise ValueError("Feature out of bounds for n_values=%d"
% self.n_values)
% self._n_values)
n_values = np.empty(n_features, dtype=np.int)
n_values.fill(self.n_values)
n_values.fill(self._n_values)
else:
try:
n_values = np.asarray(self.n_values, dtype=int)
n_values = np.asarray(self._n_values, dtype=int)
except (ValueError, TypeError):
raise TypeError("Wrong type for parameter `n_values`. Expected"
" 'auto', int or array of ints, got %r"
Expand All @@ -462,8 +462,8 @@ def _legacy_fit_transform(self, X):
shape=(n_samples, indices[-1]),
dtype=self.dtype).tocsr()

if (isinstance(self.n_values, six.string_types) and
self.n_values == 'auto'):
if (isinstance(self._n_values, six.string_types) and
self._n_values == 'auto'):
mask = np.array(out.sum(axis=0)).ravel() != 0
active_features = np.where(mask)[0]
out = out[:, active_features]
Expand Down Expand Up @@ -542,8 +542,8 @@ def _legacy_transform(self, X):
out = sparse.coo_matrix((data, (row_indices, column_indices)),
shape=(n_samples, indices[-1]),
dtype=self.dtype).tocsr()
if (isinstance(self.n_values, six.string_types) and
self.n_values == 'auto'):
if (isinstance(self._n_values, six.string_types) and
self._n_values == 'auto'):
out = out[:, self._active_features_]

return out if self.sparse else out.toarray()
Expand Down
0