10000 [WIP] MNT Use isinstance instead of dtype.kind check for scalar validation. by massich · Pull Request #10017 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[WIP] MNT Use isinstance instead of dtype.kind check for scalar validation. #10017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions benchmarks/bench_plot_nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
import matplotlib.pyplot as plt
import pandas

from ..utils.validation import SCALAR_INTEGER_TYPES
from sklearn.utils.testing import ignore_warnings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition.nmf import NMF
from sklearn.decomposition.nmf import _initialize_nmf
from sklearn.decomposition.nmf import _beta_divergence
from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
from sklearn.decomposition.nmf import _check_init
from sklearn.externals.joblib import Memory
from sklearn.exceptions import ConvergenceWarning
from sklearn.utils.extmath import safe_sparse_dot, squared_norm
Expand Down Expand Up @@ -237,11 +238,12 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
if n_components is None:
n_components = n_features

if (not isinstance(n_components, INTEGER_TYPES) or
if (not isinstance(n_components, SCALAR_INTEGER_TYPES) or
n_components <= 0):
raise ValueError("Number of components must be a positive integer;"
" got (n_components=%r)" % n_components)
if not isinstance(self.max_iter, INTEGER_TYPES) or self.max_iter < 0:
if (not isinstance(self.max_iter, SCALAR_INTEGER_TYPES) or
self.max_iter < 0):
raise ValueError("Maximum number of iterations must be a positive "
"integer; got (max_iter=%r)" % self.max_iter)
if not isinstance(self.tol, numbers.Number) or self.tol < 0:
Expand Down
7 changes: 3 additions & 4 deletions sklearn/decomposition/nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@
from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
from ..utils.extmath import safe_min
from ..utils.validation import check_is_fitted, check_non_negative
from ..utils.validation import SCALAR_INTEGER_TYPES
from ..exceptions import ConvergenceWarning
from .cdnmf_fast import _update_cdnmf_fast

EPSILON = np.finfo(np.float32).eps

INTEGER_TYPES = (numbers.Integral, np.integer)


def norm(x):
"""Dot product-based Euclidean norm implementation
Expand Down Expand Up @@ -984,10 +983,10 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
if n_components is None:
n_components = n_features

if not isinstance(n_components, INTEGER_TYPES) or n_components <= 0:
if not isinstance(n_components, SCALAR_INTEGER_TYPES) or n_components <= 0:
raise ValueError("Number of components must be a positive integer;"
" got (n_components=%r)" % n_components)
if not isinstance(max_iter, INTEGER_TYPES) or max_iter < 0:
if not isinstance(max_iter, SCALAR_INTEGER_TYPES) or max_iter < 0:
raise ValueError("Maximum number of iterations must be a positive "
"integer; got (max_iter=%r)" % max_iter)
if not isinstance(tol, numbers.Number) or tol < 0:
Expand Down
7 changes: 3 additions & 4 deletions sklearn/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# License: BSD 3 clause

from math import log, sqrt
import numbers

import numpy as np
from scipy import linalg
Expand All @@ -28,7 +27,7 @@
from ..utils import check_array
from ..utils.extmath import fast_logdet, randomized_svd, svd_flip
from ..utils.extmath import stable_cumsum
from ..utils.validation import check_is_fitted
from ..utils.validation import check_is_fitted, SCALAR_INTEGER_TYPES


def _assess_dimension_(spectrum, rank, n_samples, n_features):
Expand Down Expand Up @@ -423,7 +422,7 @@ def _fit_full(self, X, n_components):
"svd_solver='full'"
% (n_components, min(n_samples, n_features)))
elif n_components >= 1:
if not isinstance(n_components, (numbers.Integral, np.integer)):
if not isinstance(n_components, SCALAR_INTEGER_TYPES):
raise ValueError("n_components=%r must be of type int "
"when greater than or equal to 1, "
"was of type=%r"
Expand Down Expand Up @@ -488,7 +487,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
"svd_solver='%s'"
% (n_components, min(n_samples, n_features),
svd_solver))
elif not isinstance(n_components, (numbers.Integral, np.integer)):
elif not isinstance(n_components, SCALAR_INTEGER_TYPES):
raise ValueError("n_components=%r must be of type int "
"when greater than or equal to 1, was of type=%r"
% (n_components, type(n_components)))
Expand Down
7 changes: 3 additions & 4 deletions sklearn/ensemble/bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from __future__ import division

import itertools
import numbers
import numpy as np
from warnings import warn
from abc import ABCMeta, abstractmethod
Expand All @@ -20,6 +19,7 @@
from ..utils import check_random_state, check_X_y, check_array, column_or_1d
from ..utils.random import sample_without_replacement
from ..utils.validation import has_fit_parameter, check_is_fitted
from ..utils.validation import SCALAR_INTEGER_TYPES
from ..utils import indices_to_mask, check_consistent_length
from ..utils.metaestimators import if_delegate_has_method
from ..utils.multiclass import check_classification_targets
Expand Down Expand Up @@ -299,7 +299,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
# Validate max_samples
if max_samples is None:
max_samples = self.max_samples
elif not isinstance(max_samples, (numbers.Integral, np.integer)):
elif not isinstance(max_samples, SCALAR_INTEGER_TYPES):
max_samples = int(max_samples * X.shape[0])

if not (0 < max_samples <= X.shape[0]):
Expand All @@ -309,7 +309,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
self._max_samples = max_samples

# Validate max_features
if isinstance(self.max_features, (numbers.Integral, np.integer)):
if isinstance(self.max_features, SCALAR_INTEGER_TYPES):
max_features = self.max_features
else: # float
max_features = int(self.max_features * self.n_features_)
Expand Down Expand Up @@ -578,7 +578,6 @@ def _validate_estimator(self):
def _set_oob_score(self, X, y):
n_samples = y.shape[0]
n_classes_ = self.n_classes_
classes_ = self.classes_

predictions = np.zeros((n_samples, n_classes_))

Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
# License: BSD 3 clause

import numpy as np
import numbers

from ..base import clone
from ..base import BaseEstimator
from ..base import MetaEstimatorMixin
from ..utils import _get_n_jobs, check_random_state
from ..utils.validation import SCALAR_INTEGER_TYPES
from ..externals import six
from abc import ABCMeta, abstractmethod

Expand Down Expand Up @@ -100,7 +100,7 @@ def __init__(self, base_estimator, n_estimators=10,
def _validate_estimator(self, default=None):
"""Check the estimator and the n_estimator attribute, set the
`base_estimator_` attribute."""
if not isinstance(self.n_estimators, (numbers.Integral, np.integer)):
if not isinstance(self.n_estimators, SCALAR_INTEGER_TYPES):
raise ValueError("n_estimators must be an integer, "
"got {0}.".format(type(self.n_estimators)))

Expand Down
8 changes: 4 additions & 4 deletions sklearn/ensemble/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from ._gradient_boosting import predict_stage
from ._gradient_boosting import _random_sample_mask

import numbers
import numpy as np

from scipy import stats
Expand All @@ -59,6 +58,7 @@
from ..utils.fixes import logsumexp
from ..utils.stats import _weighted_percentile
from ..utils.validation import check_is_fitted
from ..utils.validation import SCALAR_INTEGER_TYPES
from ..utils.multiclass import check_classification_targets
from ..exceptions import NotFittedError

Expand Down Expand Up @@ -870,7 +870,7 @@ def _check_params(self):
"or 'log2'." % self.max_features)
elif self.max_features is None:
max_features = self.n_features_
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
elif isinstance(self.max_features, SCALAR_INTEGER_TYPES):
max_features = self.max_features
else: # float
if 0. < self.max_features <= 1.:
Expand All @@ -881,8 +881,8 @@ def _check_params(self):

self.max_features_ = max_features

if not isinstance(self.n_iter_no_change,
(numbers.Integral, np.integer, type(None))):
if not (isinstance(self.n_iter_no_change, SCALAR_INTEGER_TYPES) or
self.n_iter_no_change is None):
raise ValueError("n_iter_no_change should either be None or an "
"integer. %r was passed"
% self.n_iter_no_change)
Expand Down
11 changes: 5 additions & 6 deletions sklearn/ensemble/iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,15 @@

from scipy.sparse import issparse

import numbers
from ..externals import six
from ..tree import ExtraTreeRegressor
from ..utils import check_random_state, check_array
from ..utils.validation import SCALAR_INTEGER_TYPES

from .bagging import BaseBagging

__all__ = ["IsolationForest"]

INTEGER_TYPES = (numbers.Integral, np.integer)


class IsolationForest(BaseBagging):
"""Isolation Forest Algorithm
Expand Down Expand Up @@ -179,7 +177,7 @@ def fit(self, X, y=None, sample_weight=None):
'Valid choices are: "auto", int or'
'float' % self.max_samples)

elif isinstance(self.max_samples, INTEGER_TYPES):
elif isinstance(self.max_samples, SCALAR_INTEGER_TYPES):
if self.max_samples > n_samples:
warn("max_samples (%s) is greater than the "
"total number of samples (%s). max_samples "
Expand Down Expand Up @@ -278,7 +276,8 @@ def decision_function(self, X):

depths += _average_path_length(n_samples_leaf)

scores = 2 ** (-depths.mean(axis=1) / _average_path_length(self.max_samples_))
scores = 2 ** (-depths.mean(axis=1) /
_average_path_length(self.max_samples_))

# Take the opposite of the scores as bigger is better (here less
# abnormal) and add 0.5 (this value plays a special role as described
Expand All @@ -301,7 +300,7 @@ def _average_path_length(n_samples_leaf):
average_path_length : array, same shape as n_samples_leaf

"""
if isinstance(n_samples_leaf, INTEGER_TYPES):
if isinstance(n_samples_leaf, SCALAR_INTEGER_TYPES):
if n_samples_leaf <= 1:
return 1.
else:
Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_extraction/hashing.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Author: Lars Buitinck
# License: BSD 3 clause

import numbers
import warnings

import numpy as np
import scipy.sparse as sp

from . import _hashing
from ..base import BaseEstimator, TransformerMixin
from ..utils.validation import SCALAR_INTEGER_TYPES


def _iteritems(d):
Expand Down Expand Up @@ -103,7 +103,7 @@ def __init__(self, n_features=(2 ** 20), input_type="dict",
def _validate_params(n_features, input_type):
# strangely, np.int16 instances are not instances of Integral,
# while np.int64 instances are...
if not isinstance(n_features, (numbers.Integral, np.integer)):
if not isinstance(n_features, SCALAR_INTEGER_TYPES):
raise TypeError("n_features must be integral, got %r (%s)."
% (n_features, type(n_features)))
elif n_features < 1 or n_features >= 2 ** 31:
Expand Down
30 changes: 16 additions & 14 deletions sklearn/model_selection/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from itertools import chain, combinations
from collections import Iterable
from math import ceil, floor
import numbers
from abc import ABCMeta, abstractmethod

import numpy as np

from ..utils import indexable, check_random_state, safe_indexing
from ..utils.validation import _num_samples, column_or_1d
from ..utils.validation import check_array
from ..utils.validation import SCALAR_INTEGER_TYPES, SCALAR_FLOATING_TYPES
from ..utils.multiclass import type_of_target
from ..externals.six import with_metaclass
from ..externals.six.moves import zip
Expand Down Expand Up @@ -271,7 +271,7 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)):

@abstractmethod
def __init__(self, n_splits, shuffle, random_state):
if not isinstance(n_splits, numbers.Integral):
if not isinstance(n_splits, SCALAR_INTEGER_TYPES):
raise ValueError('The number of folds must be of Integral type. '
'%s of type %s was passed.'
% (n_splits, type(n_splits)))
Expand Down Expand Up @@ -989,7 +989,7 @@ class _RepeatedSplits(with_metaclass(ABCMeta)):
and shuffle.
"""
def __init__(self, cv, n_repeats=10, random_state=None, **cvargs):
if not isinstance(n_repeats, (np.integer, numbers.Integral)):
if not isinstance(n_repeats, SCALAR_INTEGER_TYPES):
raise ValueError("Number of repetitions must be of Integral type.")

if n_repeats <= 0:
Expand Down Expand Up @@ -1643,27 +1643,27 @@ def _validate_shuffle_split_init(test_size, train_size):
raise ValueError('test_size and train_size can not both be None')

if test_size is not None:
if np.asarray(test_size).dtype.kind == 'f':
if isinstance(test_size, SCALAR_FLOATING_TYPES):
if test_size >= 1.:
raise ValueError(
'test_size=%f should be smaller '
'than 1.0 or be an integer' % test_size)
elif np.asarray(test_size).dtype.kind != 'i':
elif not isinstance(test_size, SCALAR_INTEGER_TYPES):
# int values are checked during split based on the input
raise ValueError("Invalid value for test_size: %r" % test_size)

if train_size is not None:
if np.asarray(train_size).dtype.kind == 'f':
if isinstance(train_size, SCALAR_FLOATING_TYPES):
if train_size >= 1.:
raise ValueError("train_size=%f should be smaller "
"than 1.0 or be an integer" % train_size)
elif (np.asarray(test_size).dtype.kind == 'f' and
elif (isinstance(test_size, SCALAR_FLOATING_TYPES) and
(train_size + test_size) > 1.):
raise ValueError('The sum of test_size and train_size = %f, '
'should be smaller than 1.0. Reduce '
'test_size and/or train_size.' %
(train_size + test_size))
elif np.asarray(train_size).dtype.kind != 'i':
elif not isinstance(train_size, SCALAR_INTEGER_TYPES):
# int values are checked during split based on the input
raise ValueError("Invalid value for train_size: %r" % train_size)

Expand All @@ -1672,30 +1672,32 @@ def _validate_shuffle_split(n_samples, test_size, train_size):
"""
Validation helper to check if the test/test sizes are meaningful wrt to the
size of the data (n_samples)

test_size, defaults to 0.1
"""
if (test_size is not None and
np.asarray(test_size).dtype.kind == 'i' and
isinstance(test_size, SCALAR_INTEGER_TYPES) and
test_size >= n_samples):
raise ValueError('test_size=%d should be smaller than the number of '
'samples %d' % (test_size, n_samples))

if (train_size is not None and
np.asarray(train_size).dtype.kind == 'i' and
isinstance(train_size, SCALAR_INTEGER_TYPES) and
train_size >= n_samples):
raise ValueError("train_size=%d should be smaller than the number of"
" samples %d" % (train_size, n_samples))

if test_size == "default":
test_size = 0.1

if np.asarray(test_size).dtype.kind == 'f':
if isinstance(test_size, SCALAR_FLOATING_TYPES):
n_test = ceil(test_size * n_samples)
elif np.asarray(test_size).dtype.kind == 'i':
elif isinstance(test_size, SCALAR_INTEGER_TYPES):
n_test = float(test_size)

if train_size is None:
n_train = n_samples - n_test
elif np.asarray(train_size).dtype.kind == 'f':
elif isinstance(train_size, SCALAR_FLOATING_TYPES):
n_train = floor(train_size * n_samples)
else:
n_train = float(train_size)
Expand Down Expand Up @@ -1900,7 +1902,7 @@ def check_cv(cv=3, y=None, classifier=False):
if cv is None:
cv = 3

if isinstance(cv, numbers.Integral):
if isinstance(cv, SCALAR_INTEGER_TYPES):
if (classifier and (y is not None) and
(type_of_target(y) in ('binary', 'multiclass'))):
return StratifiedKFold(cv)
Expand Down
Loading
0