From 01e776e63fd522a2834f931854e084c92ec33314 Mon Sep 17 00:00:00 2001 From: Raghav R V Date: Mon, 12 Sep 2016 11:55:45 +0200 Subject: [PATCH 1/6] MNT Use isinstance(..., float/numbers.Integral) --- sklearn/model_selection/_split.py | 26 +++++++++++---------- sklearn/model_selection/tests/test_split.py | 7 +++++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 24d9423b22278..32c0af6e2fec4 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -271,7 +271,7 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)): @abstractmethod def __init__(self, n_splits, shuffle, random_state): - if not isinstance(n_splits, numbers.Integral): + if not isinstance(n_splits, (numbers.Integral, np.integer)): raise ValueError('The number of folds must be of Integral type. ' '%s of type %s was passed.' % (n_splits, type(n_splits))) @@ -1643,27 +1643,27 @@ def _validate_shuffle_split_init(test_size, train_size): raise ValueError('test_size and train_size can not both be None') if test_size is not None: - if np.asarray(test_size).dtype.kind == 'f': + if isinstance(test_size, (float, np.floating)): if test_size >= 1.: raise ValueError( 'test_size=%f should be smaller ' 'than 1.0 or be an integer' % test_size) - elif np.asarray(test_size).dtype.kind != 'i': + elif not isinstance(test_size, (numbers.Integral, np.integer)): # int values are checked during split based on the input raise ValueError("Invalid value for test_size: %r" % test_size) if train_size is not None: - if np.asarray(train_size).dtype.kind == 'f': + if isinstance(train_size, (float, np.floating)): if train_size >= 1.: raise ValueError("train_size=%f should be smaller " "than 1.0 or be an integer" % train_size) - elif (np.asarray(test_size).dtype.kind == 'f' and + elif (isinstance(test_size, (float, np.floating)) and (train_size + test_size) > 1.): raise ValueError('The sum of test_size and train_size = %f, ' 'should be smaller than 1.0. Reduce ' 'test_size and/or train_size.' % (train_size + test_size)) - elif np.asarray(train_size).dtype.kind != 'i': + elif not isinstance(train_size, (numbers.Integral, np.integer)): # int values are checked during split based on the input raise ValueError("Invalid value for train_size: %r" % train_size) @@ -1672,15 +1672,17 @@ def _validate_shuffle_split(n_samples, test_size, train_size): """ Validation helper to check if the test/test sizes are meaningful wrt to the size of the data (n_samples) + + test_size, defaults to 0.1 """ if (test_size is not None and - np.asarray(test_size).dtype.kind == 'i' and + isinstance(test_size, (numbers.Integral, np.integer)) and test_size >= n_samples): raise ValueError('test_size=%d should be smaller than the number of ' 'samples %d' % (test_size, n_samples)) if (train_size is not None and - np.asarray(train_size).dtype.kind == 'i' and + isinstance(train_size, (numbers.Integral, np.integer)) and train_size >= n_samples): raise ValueError("train_size=%d should be smaller than the number of" " samples %d" % (train_size, n_samples)) @@ -1688,14 +1690,14 @@ def _validate_shuffle_split(n_samples, test_size, train_size): if test_size == "default": test_size = 0.1 - if np.asarray(test_size).dtype.kind == 'f': + if isinstance(test_size, (float, np.floating)): n_test = ceil(test_size * n_samples) - elif np.asarray(test_size).dtype.kind == 'i': + elif isinstance(test_size, (numbers.Integral, np.integer)): n_test = float(test_size) if train_size is None: n_train = n_samples - n_test - elif np.asarray(train_size).dtype.kind == 'f': + elif isinstance(train_size, (float, np.floating)): n_train = floor(train_size * n_samples) else: n_train = float(train_size) @@ -1900,7 +1902,7 @@ def check_cv(cv=3, y=None, classifier=False): if cv is None: cv = 3 - if isinstance(cv, numbers.Integral): + if isinstance(cv, (numbers.Integral, np.integer)): if (classifier and (y is not None) and (type_of_target(y) in ('binary', 'multiclass'))): return StratifiedKFold(cv) diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 3f54aaf3c66fc..439e8a2f230df 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -543,15 +543,20 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372 def test_shuffle_split(): + # Use numpy float as input + ss0 = ShuffleSplit(test_size=np.float16(0.2), random_state=0).split(X) ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X) ss2 = ShuffleSplit(test_size=2, random_state=0).split(X) + # Use numpy int as input ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X) for typ in six.integer_types: ss4 = ShuffleSplit(test_size=typ(2), random_state=0).split(X) - for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4): + for t0, t1, t2, t3, t4 in zip(ss0, ss1, ss2, ss3, ss4): + assert_array_equal(t0[0], t1[0]) assert_array_equal(t1[0], t2[0]) assert_array_equal(t2[0], t3[0]) assert_array_equal(t3[0], t4[0]) + assert_array_equal(t0[1], t1[1]) assert_array_equal(t1[1], t2[1]) assert_array_equal(t2[1], t3[1]) assert_array_equal(t3[1], t4[1]) From f838fa91eef3f79faf976d15d77c4eb7579b174c Mon Sep 17 00:00:00 2001 From: Joan Massich Date: Thu, 26 Oct 2017 18:32:05 +0200 Subject: [PATCH 2/6] factorize the reference class to compare to --- sklearn/model_selection/_split.py | 28 ++++++++++++++-------------- sklearn/utils/validation.py | 2 ++ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 32c0af6e2fec4..c98a71ed5b6af 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -17,7 +17,6 @@ from itertools import chain, combinations from collections import Iterable from math import ceil, floor -import numbers from abc import ABCMeta, abstractmethod import numpy as np @@ -25,6 +24,7 @@ from ..utils import indexable, check_random_state, safe_indexing from ..utils.validation import _num_samples, column_or_1d from ..utils.validation import check_array +from ..utils.validation import integer_types, floating_types from ..utils.multiclass import type_of_target from ..externals.six import with_metaclass from ..externals.six.moves import zip @@ -271,7 +271,7 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)): @abstractmethod def __init__(self, n_splits, shuffle, random_state): - if not isinstance(n_splits, (numbers.Integral, np.integer)): + if not isinstance(n_splits, integer_types): raise ValueError('The number of folds must be of Integral type. ' '%s of type %s was passed.' % (n_splits, type(n_splits))) @@ -989,7 +989,7 @@ class _RepeatedSplits(with_metaclass(ABCMeta)): and shuffle. """ def __init__(self, cv, n_repeats=10, random_state=None, **cvargs): - if not isinstance(n_repeats, (np.integer, numbers.Integral)): + if not isinstance(n_repeats, integer_types): raise ValueError("Number of repetitions must be of Integral type.") if n_repeats <= 0: @@ -1643,27 +1643,27 @@ def _validate_shuffle_split_init(test_size, train_size): raise ValueError('test_size and train_size can not both be None') if test_size is not None: - if isinstance(test_size, (float, np.floating)): + if isinstance(test_size, floating_types): if test_size >= 1.: raise ValueError( 'test_size=%f should be smaller ' 'than 1.0 or be an integer' % test_size) - elif not isinstance(test_size, (numbers.Integral, np.integer)): + elif not isinstance(test_size, integer_types): # int values are checked during split based on the input raise ValueError("Invalid value for test_size: %r" % test_size) if train_size is not None: - if isinstance(train_size, (float, np.floating)): + if isinstance(train_size, floating_types): if train_size >= 1.: raise ValueError("train_size=%f should be smaller " "than 1.0 or be an integer" % train_size) - elif (isinstance(test_size, (float, np.floating)) and + elif (isinstance(test_size, floating_types) and (train_size + test_size) > 1.): raise ValueError('The sum of test_size and train_size = %f, ' 'should be smaller than 1.0. Reduce ' 'test_size and/or train_size.' % (train_size + test_size)) - elif not isinstance(train_size, (numbers.Integral, np.integer)): + elif not isinstance(train_size, integer_types): # int values are checked during split based on the input raise ValueError("Invalid value for train_size: %r" % train_size) @@ -1676,13 +1676,13 @@ def _validate_shuffle_split(n_samples, test_size, train_size): test_size, defaults to 0.1 """ if (test_size is not None and - isinstance(test_size, (numbers.Integral, np.integer)) and + isinstance(test_size, integer_types) and test_size >= n_samples): raise ValueError('test_size=%d should be smaller than the number of ' 'samples %d' % (test_size, n_samples)) if (train_size is not None and - isinstance(train_size, (numbers.Integral, np.integer)) and + isinstance(train_size, integer_types) and train_size >= n_samples): raise ValueError("train_size=%d should be smaller than the number of" " samples %d" % (train_size, n_samples)) @@ -1690,14 +1690,14 @@ def _validate_shuffle_split(n_samples, test_size, train_size): if test_size == "default": test_size = 0.1 - if isinstance(test_size, (float, np.floating)): + if isinstance(test_size, floating_types): n_test = ceil(test_size * n_samples) - elif isinstance(test_size, (numbers.Integral, np.integer)): + elif isinstance(test_size, integer_types): n_test = float(test_size) if train_size is None: n_train = n_samples - n_test - elif isinstance(train_size, (float, np.floating)): + elif isinstance(train_size, floating_types): n_train = floor(train_size * n_samples) else: n_train = float(train_size) @@ -1902,7 +1902,7 @@ def check_cv(cv=3, y=None, classifier=False): if cv is None: cv = 3 - if isinstance(cv, (numbers.Integral, np.integer)): + if isinstance(cv, integer_types): if (classifier and (y is not None) and (type_of_target(y) in ('binary', 'multiclass'))): return StratifiedKFold(cv) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index b3538a7925892..d2382f21aafdd 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -24,6 +24,8 @@ from ..externals.joblib import Memory +integer_types = (numbers.Integral, np.integer) +floating_types = (float, np.floating) FLOAT_DTYPES = (np.float64, np.float32, np.float16) # Silenced by default to reduce verbosity. Turn on at runtime for From 1f1aeb804d947ec9cc6954b8dad3349a70fa6d92 Mon Sep 17 00:00:00 2001 From: Joan Massich Date: Fri, 3 Nov 2017 16:13:16 +0100 Subject: [PATCH 3/6] [PEP8] fix long line --- sklearn/ensemble/iforest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index 216d2c4f78631..d4430946048b9 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -278,7 +278,8 @@ def decision_function(self, X): depths += _average_path_length(n_samples_leaf) - scores = 2 ** (-depths.mean(axis=1) / _average_path_length(self.max_samples_)) + scores = 2 ** (-depths.mean(axis=1) / + _average_path_length(self.max_samples_)) # Take the opposite of the scores as bigger is better (here less # abnormal) and add 0.5 (this value plays a special role as described From 6a1b5eca58ded14f00fb0bdb295ef8ccacaf3a3a Mon Sep 17 00:00:00 2001 From: Joan Massich Date: Fri, 3 Nov 2017 16:34:09 +0100 Subject: [PATCH 4/6] [PEP8] clean up unused variable --- sklearn/ensemble/bagging.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index 7ea3030bdf120..05e1e77e177f1 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -578,7 +578,6 @@ def _validate_estimator(self): def _set_oob_score(self, X, y): n_samples = y.shape[0] n_classes_ = self.n_classes_ - classes_ = self.classes_ predictions = np.zeros((n_samples, n_classes_)) From f8b262868c46504a2e4f752e6f13afabf9850840 Mon Sep 17 00:00:00 2001 From: Joan Massich Date: Fri, 3 Nov 2017 17:09:28 +0100 Subject: [PATCH 5/6] Change (numbers.Integral, np.integer) or INTEGER_TYPES for SCALAR_INTEGRAL_TYPES --- benchmarks/bench_plot_nmf.py | 8 +++++--- sklearn/decomposition/nmf.py | 7 +++---- sklearn/decomposition/pca.py | 7 +++---- sklearn/ensemble/bagging.py | 6 +++--- sklearn/ensemble/base.py | 4 ++-- sklearn/ensemble/gradient_boosting.py | 8 ++++---- sklearn/ensemble/iforest.py | 8 +++----- sklearn/feature_extraction/hashing.py | 4 ++-- sklearn/model_selection/_split.py | 18 +++++++++--------- sklearn/tree/tree.py | 11 +++++------ sklearn/utils/validation.py | 4 ++-- 11 files changed, 41 insertions(+), 44 deletions(-) diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index c48977a49a725..f936bacadcb9a 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -16,12 +16,13 @@ import matplotlib.pyplot as plt import pandas +from ..utils.validation import SCALAR_INTEGER_TYPES from sklearn.utils.testing import ignore_warnings from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition.nmf import NMF from sklearn.decomposition.nmf import _initialize_nmf from sklearn.decomposition.nmf import _beta_divergence -from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init +from sklearn.decomposition.nmf import _check_init from sklearn.externals.joblib import Memory from sklearn.exceptions import ConvergenceWarning from sklearn.utils.extmath import safe_sparse_dot, squared_norm @@ -237,11 +238,12 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): if n_components is None: n_components = n_features - if (not isinstance(n_components, INTEGER_TYPES) or + if (not isinstance(n_components, SCALAR_INTEGER_TYPES) or n_components <= 0): raise ValueError("Number of components must be a positive integer;" " got (n_components=%r)" % n_components) - if not isinstance(self.max_iter, INTEGER_TYPES) or self.max_iter < 0: + if (not isinstance(self.max_iter, SCALAR_INTEGER_TYPES) or + self.max_iter < 0): raise ValueError("Maximum number of iterations must be a positive " "integer; got (max_iter=%r)" % self.max_iter) if not isinstance(self.tol, numbers.Number) or self.tol < 0: diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py index 8b3830470921b..7705b615dcabd 100644 --- a/sklearn/decomposition/nmf.py +++ b/sklearn/decomposition/nmf.py @@ -22,13 +22,12 @@ from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm from ..utils.extmath import safe_min from ..utils.validation import check_is_fitted, check_non_negative +from ..utils.validation import SCALAR_INTEGER_TYPES from ..exceptions import ConvergenceWarning from .cdnmf_fast import _update_cdnmf_fast EPSILON = np.finfo(np.float32).eps -INTEGER_TYPES = (numbers.Integral, np.integer) - def norm(x): """Dot product-based Euclidean norm implementation @@ -984,10 +983,10 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, if n_components is None: n_components = n_features - if not isinstance(n_components, INTEGER_TYPES) or n_components <= 0: + if not isinstance(n_components, SCALAR_INTEGER_TYPES) or n_components <= 0: raise ValueError("Number of components must be a positive integer;" " got (n_components=%r)" % n_components) - if not isinstance(max_iter, INTEGER_TYPES) or max_iter < 0: + if not isinstance(max_iter, SCALAR_INTEGER_TYPES) or max_iter < 0: raise ValueError("Maximum number of iterations must be a positive " "integer; got (max_iter=%r)" % max_iter) if not isinstance(tol, numbers.Number) or tol < 0: diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index 2b715b7e06824..a9675947cb04b 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -11,7 +11,6 @@ # License: BSD 3 clause from math import log, sqrt -import numbers import numpy as np from scipy import linalg @@ -28,7 +27,7 @@ from ..utils import check_array from ..utils.extmath import fast_logdet, randomized_svd, svd_flip from ..utils.extmath import stable_cumsum -from ..utils.validation import check_is_fitted +from ..utils.validation import check_is_fitted, SCALAR_INTEGER_TYPES def _assess_dimension_(spectrum, rank, n_samples, n_features): @@ -423,7 +422,7 @@ def _fit_full(self, X, n_components): "svd_solver='full'" % (n_components, min(n_samples, n_features))) elif n_components >= 1: - if not isinstance(n_components, (numbers.Integral, np.integer)): + if not isinstance(n_components, SCALAR_INTEGER_TYPES): raise ValueError("n_components=%r must be of type int " "when greater than or equal to 1, " "was of type=%r" @@ -488,7 +487,7 @@ def _fit_truncated(self, X, n_components, svd_solver): "svd_solver='%s'" % (n_components, min(n_samples, n_features), svd_solver)) - elif not isinstance(n_components, (numbers.Integral, np.integer)): + elif not isinstance(n_components, SCALAR_INTEGER_TYPES): raise ValueError("n_components=%r must be of type int " "when greater than or equal to 1, was of type=%r" % (n_components, type(n_components))) diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index 05e1e77e177f1..2b4651f04e841 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -6,7 +6,6 @@ from __future__ import division import itertools -import numbers import numpy as np from warnings import warn from abc import ABCMeta, abstractmethod @@ -20,6 +19,7 @@ from ..utils import check_random_state, check_X_y, check_array, column_or_1d from ..utils.random import sample_without_replacement from ..utils.validation import has_fit_parameter, check_is_fitted +from ..utils.validation import SCALAR_INTEGER_TYPES from ..utils import indices_to_mask, check_consistent_length from ..utils.metaestimators import if_delegate_has_method from ..utils.multiclass import check_classification_targets @@ -299,7 +299,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None): # Validate max_samples if max_samples is None: max_samples = self.max_samples - elif not isinstance(max_samples, (numbers.Integral, np.integer)): + elif not isinstance(max_samples, SCALAR_INTEGER_TYPES): max_samples = int(max_samples * X.shape[0]) if not (0 < max_samples <= X.shape[0]): @@ -309,7 +309,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None): self._max_samples = max_samples # Validate max_features - if isinstance(self.max_features, (numbers.Integral, np.integer)): + if isinstance(self.max_features, SCALAR_INTEGER_TYPES): max_features = self.max_features else: # float max_features = int(self.max_features * self.n_features_) diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py index 2477cc1c21c7d..c64cfc9f1e561 100644 --- a/sklearn/ensemble/base.py +++ b/sklearn/ensemble/base.py @@ -6,12 +6,12 @@ # License: BSD 3 clause import numpy as np -import numbers from ..base import clone from ..base import BaseEstimator from ..base import MetaEstimatorMixin from ..utils import _get_n_jobs, check_random_state +from ..utils.validation import SCALAR_INTEGER_TYPES from ..externals import six from abc import ABCMeta, abstractmethod @@ -100,7 +100,7 @@ def __init__(self, base_estimator, n_estimators=10, def _validate_estimator(self, default=None): """Check the estimator and the n_estimator attribute, set the `base_estimator_` attribute.""" - if not isinstance(self.n_estimators, (numbers.Integral, np.integer)): + if not isinstance(self.n_estimators, SCALAR_INTEGER_TYPES): raise ValueError("n_estimators must be an integer, " "got {0}.".format(type(self.n_estimators))) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 2c155f11c6282..795d33ffb7c0b 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -35,7 +35,6 @@ from ._gradient_boosting import predict_stage from ._gradient_boosting import _random_sample_mask -import numbers import numpy as np from scipy import stats @@ -59,6 +58,7 @@ from ..utils.fixes import logsumexp from ..utils.stats import _weighted_percentile from ..utils.validation import check_is_fitted +from ..utils.validation import SCALAR_INTEGER_TYPES from ..utils.multiclass import check_classification_targets from ..exceptions import NotFittedError @@ -870,7 +870,7 @@ def _check_params(self): "or 'log2'." % self.max_features) elif self.max_features is None: max_features = self.n_features_ - elif isinstance(self.max_features, (numbers.Integral, np.integer)): + elif isinstance(self.max_features, SCALAR_INTEGER_TYPES): max_features = self.max_features else: # float if 0. < self.max_features <= 1.: @@ -881,8 +881,8 @@ def _check_params(self): self.max_features_ = max_features - if not isinstance(self.n_iter_no_change, - (numbers.Integral, np.integer, type(None))): + if not (isinstance(self.n_iter_no_change, SCALAR_INTEGER_TYPES) or + self.n_iter_no_change is None): raise ValueError("n_iter_no_change should either be None or an " "integer. %r was passed" % self.n_iter_no_change) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index d4430946048b9..7bb52273d8c1f 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -11,17 +11,15 @@ from scipy.sparse import issparse -import numbers from ..externals import six from ..tree import ExtraTreeRegressor from ..utils import check_random_state, check_array +from ..utils.validation import SCALAR_INTEGER_TYPES from .bagging import BaseBagging __all__ = ["IsolationForest"] -INTEGER_TYPES = (numbers.Integral, np.integer) - class IsolationForest(BaseBagging): """Isolation Forest Algorithm @@ -179,7 +177,7 @@ def fit(self, X, y=None, sample_weight=None): 'Valid choices are: "auto", int or' 'float' % self.max_samples) - elif isinstance(self.max_samples, INTEGER_TYPES): + elif isinstance(self.max_samples, SCALAR_INTEGER_TYPES): if self.max_samples > n_samples: warn("max_samples (%s) is greater than the " "total number of samples (%s). max_samples " @@ -302,7 +300,7 @@ def _average_path_length(n_samples_leaf): average_path_length : array, same shape as n_samples_leaf """ - if isinstance(n_samples_leaf, INTEGER_TYPES): + if isinstance(n_samples_leaf, SCALAR_INTEGER_TYPES): if n_samples_leaf <= 1: return 1. else: diff --git a/sklearn/feature_extraction/hashing.py b/sklearn/feature_extraction/hashing.py index d586e6302e540..3d4ccc4838bc0 100644 --- a/sklearn/feature_extraction/hashing.py +++ b/sklearn/feature_extraction/hashing.py @@ -1,7 +1,6 @@ # Author: Lars Buitinck # License: BSD 3 clause -import numbers import warnings import numpy as np @@ -9,6 +8,7 @@ from . import _hashing from ..base import BaseEstimator, TransformerMixin +from ..utils.validation import SCALAR_INTEGER_TYPES def _iteritems(d): @@ -103,7 +103,7 @@ def __init__(self, n_features=(2 ** 20), input_type="dict", def _validate_params(n_features, input_type): # strangely, np.int16 instances are not instances of Integral, # while np.int64 instances are... - if not isinstance(n_features, (numbers.Integral, np.integer)): + if not isinstance(n_features, SCALAR_INTEGER_TYPES): raise TypeError("n_features must be integral, got %r (%s)." % (n_features, type(n_features))) elif n_features < 1 or n_features >= 2 ** 31: diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index c98a71ed5b6af..da7cb0a373b25 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -24,7 +24,7 @@ from ..utils import indexable, check_random_state, safe_indexing from ..utils.validation import _num_samples, column_or_1d from ..utils.validation import check_array -from ..utils.validation import integer_types, floating_types +from ..utils.validation import SCALAR_INTEGER_TYPES, floating_types from ..utils.multiclass import type_of_target from ..externals.six import with_metaclass from ..externals.six.moves import zip @@ -271,7 +271,7 @@ class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)): @abstractmethod def __init__(self, n_splits, shuffle, random_state): - if not isinstance(n_splits, integer_types): + if not isinstance(n_splits, SCALAR_INTEGER_TYPES): raise ValueError('The number of folds must be of Integral type. ' '%s of type %s was passed.' % (n_splits, type(n_splits))) @@ -989,7 +989,7 @@ class _RepeatedSplits(with_metaclass(ABCMeta)): and shuffle. """ def __init__(self, cv, n_repeats=10, random_state=None, **cvargs): - if not isinstance(n_repeats, integer_types): + if not isinstance(n_repeats, SCALAR_INTEGER_TYPES): raise ValueError("Number of repetitions must be of Integral type.") if n_repeats <= 0: @@ -1648,7 +1648,7 @@ def _validate_shuffle_split_init(test_size, train_size): raise ValueError( 'test_size=%f should be smaller ' 'than 1.0 or be an integer' % test_size) - elif not isinstance(test_size, integer_types): + elif not isinstance(test_size, SCALAR_INTEGER_TYPES): # int values are checked during split based on the input raise ValueError("Invalid value for test_size: %r" % test_size) @@ -1663,7 +1663,7 @@ def _validate_shuffle_split_init(test_size, train_size): 'should be smaller than 1.0. Reduce ' 'test_size and/or train_size.' % (train_size + test_size)) - elif not isinstance(train_size, integer_types): + elif not isinstance(train_size, SCALAR_INTEGER_TYPES): # int values are checked during split based on the input raise ValueError("Invalid value for train_size: %r" % train_size) @@ -1676,13 +1676,13 @@ def _validate_shuffle_split(n_samples, test_size, train_size): test_size, defaults to 0.1 """ if (test_size is not None and - isinstance(test_size, integer_types) and + isinstance(test_size, SCALAR_INTEGER_TYPES) and test_size >= n_samples): raise ValueError('test_size=%d should be smaller than the number of ' 'samples %d' % (test_size, n_samples)) if (train_size is not None and - isinstance(train_size, integer_types) and + isinstance(train_size, SCALAR_INTEGER_TYPES) and train_size >= n_samples): raise ValueError("train_size=%d should be smaller than the number of" " samples %d" % (train_size, n_samples)) @@ -1692,7 +1692,7 @@ def _validate_shuffle_split(n_samples, test_size, train_size): if isinstance(test_size, floating_types): n_test = ceil(test_size * n_samples) - elif isinstance(test_size, integer_types): + elif isinstance(test_size, SCALAR_INTEGER_TYPES): n_test = float(test_size) if train_size is None: @@ -1902,7 +1902,7 @@ def check_cv(cv=3, y=None, classifier=False): if cv is None: cv = 3 - if isinstance(cv, integer_types): + if isinstance(cv, SCALAR_INTEGER_TYPES): if (classifier and (y is not None) and (type_of_target(y) in ('binary', 'multiclass'))): return StratifiedKFold(cv) diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 789ffb8b61cac..a5a53e78532e2 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -17,7 +17,6 @@ from __future__ import division -import numbers import warnings from abc import ABCMeta from abc import abstractmethod @@ -35,7 +34,7 @@ from ..utils import check_random_state from ..utils import compute_sample_weight from ..utils.multiclass import check_classification_targets -from ..utils.validation import check_is_fitted +from ..utils.validation import check_is_fitted, SCALAR_INTEGER_TYPES from ._criterion import Criterion from ._splitter import Splitter @@ -173,7 +172,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, max_leaf_nodes = (-1 if self.max_leaf_nodes is None else self.max_leaf_nodes) - if isinstance(self.min_samples_leaf, (numbers.Integral, np.integer)): + if isinstance(self.min_samples_leaf, SCALAR_INTEGER_TYPES): if not 1 <= self.min_samples_leaf: raise ValueError("min_samples_leaf must be at least 1 " "or in (0, 0.5], got %s" @@ -186,7 +185,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, % self.min_samples_leaf) min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples)) - if isinstance(self.min_samples_split, (numbers.Integral, np.integer)): + if isinstance(self.min_samples_split, SCALAR_INTEGER_TYPES): if not 2 <= self.min_samples_split: raise ValueError("min_samples_split must be an integer " "greater than 1 or a float in (0.0, 1.0]; " @@ -220,7 +219,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, 'values are "auto", "sqrt" or "log2".') elif self.max_features is None: max_features = self.n_features_ - elif isinstance(self.max_features, (numbers.Integral, np.integer)): + elif isinstance(self.max_features, SCALAR_INTEGER_TYPES): max_features = self.max_features else: # float if self.max_features > 0.0: @@ -240,7 +239,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, raise ValueError("max_depth must be greater than zero. ") if not (0 < max_features <= self.n_features_): raise ValueError("max_features must be in (0, n_features]") - if not isinstance(max_leaf_nodes, (numbers.Integral, np.integer)): + if not isinstance(max_leaf_nodes, SCALAR_INTEGER_TYPES): raise ValueError("max_leaf_nodes must be integral number but was " "%r" % max_leaf_nodes) if -1 < max_leaf_nodes < 2: diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index d2382f21aafdd..93dfe70f4f6b8 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -24,7 +24,7 @@ from ..externals.joblib import Memory -integer_types = (numbers.Integral, np.integer) +SCALAR_INTEGER_TYPES = (numbers.Integral, np.integer) floating_types = (float, np.floating) FLOAT_DTYPES = (np.float64, np.float32, np.float16) @@ -662,7 +662,7 @@ def check_random_state(seed): """ if seed is None or seed is np.random: return np.random.mtrand._rand - if isinstance(seed, (numbers.Integral, np.integer)): + if isinstance(seed, SCALAR_INTEGER_TYPES): return np.random.RandomState(seed) if isinstance(seed, np.random.RandomState): return seed From e0e628111a623023897cf12e893be912432204b8 Mon Sep 17 00:00:00 2001 From: Joan Massich Date: Wed, 8 Nov 2017 16:53:56 +0100 Subject: [PATCH 6/6] rename floating_types to SCALAR_FLOATING_TYPES --- sklearn/model_selection/_split.py | 12 ++++++------ sklearn/utils/validation.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index da7cb0a373b25..ccf362b824c62 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -24,7 +24,7 @@ from ..utils import indexable, check_random_state, safe_indexing from ..utils.validation import _num_samples, column_or_1d from ..utils.validation import check_array -from ..utils.validation import SCALAR_INTEGER_TYPES, floating_types +from ..utils.validation import SCALAR_INTEGER_TYPES, SCALAR_FLOATING_TYPES from ..utils.multiclass import type_of_target from ..externals.six import with_metaclass from ..externals.six.moves import zip @@ -1643,7 +1643,7 @@ def _validate_shuffle_split_init(test_size, train_size): raise ValueError('test_size and train_size can not both be None') if test_size is not None: - if isinstance(test_size, floating_types): + if isinstance(test_size, SCALAR_FLOATING_TYPES): if test_size >= 1.: raise ValueError( 'test_size=%f should be smaller ' @@ -1653,11 +1653,11 @@ def _validate_shuffle_split_init(test_size, train_size): raise ValueError("Invalid value for test_size: %r" % test_size) if train_size is not None: - if isinstance(train_size, floating_types): + if isinstance(train_size, SCALAR_FLOATING_TYPES): if train_size >= 1.: raise ValueError("train_size=%f should be smaller " "than 1.0 or be an integer" % train_size) - elif (isinstance(test_size, floating_types) and + elif (isinstance(test_size, SCALAR_FLOATING_TYPES) and (train_size + test_size) > 1.): raise ValueError('The sum of test_size and train_size = %f, ' 'should be smaller than 1.0. Reduce ' @@ -1690,14 +1690,14 @@ def _validate_shuffle_split(n_samples, test_size, train_size): if test_size == "default": test_size = 0.1 - if isinstance(test_size, floating_types): + if isinstance(test_size, SCALAR_FLOATING_TYPES): n_test = ceil(test_size * n_samples) elif isinstance(test_size, SCALAR_INTEGER_TYPES): n_test = float(test_size) if train_size is None: n_train = n_samples - n_test - elif isinstance(train_size, floating_types): + elif isinstance(train_size, SCALAR_FLOATING_TYPES): n_train = floor(train_size * n_samples) else: n_train = float(train_size) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 93dfe70f4f6b8..f9c4461a5fb99 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -25,7 +25,7 @@ SCALAR_INTEGER_TYPES = (numbers.Integral, np.integer) -floating_types = (float, np.floating) +SCALAR_FLOATING_TYPES = (float, np.floating) FLOAT_DTYPES = (np.float64, np.float32, np.float16) # Silenced by default to reduce verbosity. Turn on at runtime for