From 1650d8d55292cc0a478ab618bc276d69d19c64e5 Mon Sep 17 00:00:00 2001 From: Frans Larsson Date: Fri, 31 Jul 2020 18:01:04 +0200 Subject: [PATCH 1/3] update docstring --- sklearn/utils/validation.py | 97 +++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 3c9a662264132..ee437892f4b1b 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -33,7 +33,7 @@ def _deprecate_positional_args(f): - """Decorator for methods that issues warnings for positional arguments + """Decorator for methods that issues warnings for positional arguments. Using the keyword-only argument syntax in pep 3102, arguments after the * will issue a warning when passed as a positional argument. @@ -41,7 +41,7 @@ def _deprecate_positional_args(f): Parameters ---------- f : function - function to check arguments on + function to check arguments on. """ sig = signature(f) kwonly_args = [] @@ -109,9 +109,9 @@ def assert_all_finite(X, *, allow_nan=False): Parameters ---------- - X : array or sparse matrix + X : {ndarray, sparse matrix} - allow_nan : bool + allow_nan : bool, default=False """ _assert_all_finite(X.data if sp.issparse(X) else X, allow_nan) @@ -132,7 +132,7 @@ def as_float_array(X, *, copy=True, force_all_finite=True): If True, a copy of X will be created. If False, a copy may still be returned if X's dtype is not a floating point type. - force_all_finite : boolean or 'allow-nan', default=True + force_all_finite : bool or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in X. The possibilities are: @@ -150,7 +150,7 @@ def as_float_array(X, *, copy=True, force_all_finite=True): Returns ------- XT : {array, sparse matrix} - An array of type float + An array of type float. """ if isinstance(X, np.matrix) or (not isinstance(X, np.ndarray) and not sp.issparse(X)): @@ -170,7 +170,7 @@ def as_float_array(X, *, copy=True, force_all_finite=True): def _is_arraylike(x): - """Returns whether the input is array-like""" + """Returns whether the input is array-like.""" return (hasattr(x, '__len__') or hasattr(x, 'shape') or hasattr(x, '__array__')) @@ -284,7 +284,7 @@ def indexable(*iterables): Parameters ---------- - *iterables : lists, dataframes, arrays, sparse matrices + *iterables : {lists, dataframes, arrays, sparse matrices} List of objects to ensure sliceability. """ result = [_make_indexable(X) for X in iterables] @@ -303,21 +303,21 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix : scipy sparse matrix Input to validate and convert. - accept_sparse : string, boolean or list/tuple of strings + accept_sparse : str, bool or list/tuple of str String[s] representing allowed sparse matrix formats ('csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error. - dtype : string, type or None + dtype : str, type or None Data type of result. If None, the dtype of the input is preserved. - copy : boolean + copy : bool Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. - force_all_finite : boolean or 'allow-nan', default=True + force_all_finite : bool or 'allow-nan' Whether to raise an error on np.inf, np.nan, pd.NA in X. The possibilities are: @@ -410,7 +410,7 @@ def check_array(array, accept_sparse=False, *, accept_large_sparse=True, array : object Input object to check / convert. - accept_sparse : string, boolean or list/tuple of strings, default=False + accept_sparse : str, bool or list/tuple of str, default=False String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input @@ -424,24 +424,24 @@ def check_array(array, accept_sparse=False, *, accept_large_sparse=True, .. versionadded:: 0.20 - dtype : string, type, list of types or None, default="numeric" + dtype : 'numeric', type, list of type or None, default='numeric' Data type of result. If None, the dtype of the input is preserved. If "numeric", dtype is preserved unless array.dtype is object. If dtype is a list of types, conversion on the first type is only performed if the dtype of the input is not in the list. - order : 'F', 'C' or None, default=None + order : {'F', 'C'} or None, default=None Whether an array will be forced to be fortran or c-style. When order is None (default), then if copy=False, nothing is ensured about the memory layout of the output array; otherwise (copy=True) the memory layout of the returned array is kept as close as possible to the original array. - copy : boolean, default=False + copy : bool, default=False Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. - force_all_finite : boolean or 'allow-nan', default=True + force_all_finite : bool or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: @@ -456,10 +456,10 @@ def check_array(array, accept_sparse=False, *, accept_large_sparse=True, .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan` - ensure_2d : boolean, default=True + ensure_2d : bool, default=True Whether to raise a value error if array is not 2D. - allow_nd : boolean, default=False + allow_nd : bool, default=False Whether to allow array.ndim > 2. ensure_min_samples : int, default=1 @@ -701,13 +701,13 @@ def check_X_y(X, y, accept_sparse=False, *, accept_large_sparse=True, Parameters ---------- - X : nd-array, list or sparse matrix + X : {ndarray, list, sparse matrix} Input data. - y : nd-array, list or sparse matrix + y : {ndarray, list, sparse matrix} Labels. - accept_sparse : string, boolean or list of string, default=False + accept_sparse : str, bool or list of str, default=False String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input @@ -721,20 +721,20 @@ def check_X_y(X, y, accept_sparse=False, *, accept_large_sparse=True, .. versionadded:: 0.20 - dtype : string, type, list of types or None, default="numeric" + dtype : 'numeric', type, list of type or None, default='numeric' Data type of result. If None, the dtype of the input is preserved. If "numeric", dtype is preserved unless array.dtype is object. If dtype is a list of types, conversion on the first type is only performed if the dtype of the input is not in the list. - order : 'F', 'C' or None, default=None + order : {'F', 'C'}, default=None Whether an array will be forced to be fortran or c-style. - copy : boolean, default=False + copy : bool, default=False Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. - force_all_finite : boolean or 'allow-nan', default=True + force_all_finite : bool or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter does not influence whether y can have np.inf, np.nan, pd.NA values. The possibilities are: @@ -750,13 +750,13 @@ def check_X_y(X, y, accept_sparse=False, *, accept_large_sparse=True, .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan` - ensure_2d : boolean, default=True + ensure_2d : bool, default=True Whether to raise a value error if X is not 2D. - allow_nd : boolean, default=False + allow_nd : bool, default=False Whether to allow X.ndim > 2. - multi_output : boolean, default=False + multi_output : bool, default=False Whether to allow 2D y (array or sparse matrix). If false, y will be validated as a vector. y cannot have np.nan or np.inf values if multi_output=True. @@ -772,7 +772,7 @@ def check_X_y(X, y, accept_sparse=False, *, accept_large_sparse=True, is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check. - y_numeric : boolean, default=False + y_numeric : bool, default=False Whether to ensure that y has a numeric type. If dtype of y is object, it is converted to float64. Should only be used for regression algorithms. @@ -815,18 +815,18 @@ def check_X_y(X, y, accept_sparse=False, *, accept_large_sparse=True, @_deprecate_positional_args def column_or_1d(y, *, warn=False): - """ Ravel column or 1d numpy array, else raises an error + """ Ravel column or 1d numpy array, else raises an error. Parameters ---------- y : array-like - warn : boolean, default False + warn : bool, default=False To control display of warnings. Returns ------- - y : array + y : ndarray """ y = np.asarray(y) @@ -851,7 +851,7 @@ def check_random_state(seed): Parameters ---------- - seed : None | int | instance of RandomState + seed : None, int or instance of RandomState If seed is None, return the RandomState singleton used by np.random. If seed is an int, return a new RandomState instance seeded with seed. If seed is already a RandomState instance, return it. @@ -905,19 +905,22 @@ def check_symmetric(array, *, tol=1E-10, raise_warning=True, Parameters ---------- - array : nd-array or sparse matrix + array : {ndarray, sparse matrix} Input object to check / convert. Must be two-dimensional and square, otherwise a ValueError will be raised. - tol : float + + tol : float, default=1e-10 Absolute tolerance for equivalence of arrays. Default = 1E-10. - raise_warning : boolean, default=True + + raise_warning : bool, default=True If True then raise a warning if conversion is required. - raise_exception : boolean, default=False + + raise_exception : bool, default=False If True then raise an exception if array is not symmetric. Returns ------- - array_sym : ndarray or sparse matrix + array_sym : {ndarray, sparse matrix} Symmetrized version of the input array, i.e. the average of array and array.transpose(). If sparse, then duplicate entries are first summed and zeros are eliminated. @@ -975,7 +978,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): attribute that ends with a underscore and does not start with double underscore. - msg : string + msg : str, default=None The default error message is, "This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." @@ -1024,10 +1027,10 @@ def check_non_negative(X, whom): Parameters ---------- - X : array-like or sparse matrix + X : {array-like, sparse matrix} Input data. - whom : string + whom : str Who passed X to this function. """ # avoid X.min() on sparse matrix since it also sorts the indices @@ -1263,7 +1266,7 @@ def _check_sample_weight(sample_weight, X, dtype=None): sample_weight : {ndarray, Number or None}, shape (n_samples,) Input sample weights. - X : nd-array, list or sparse matrix + X : {ndarray, list, sparse matrix} Input data. dtype: dtype @@ -1275,7 +1278,7 @@ def _check_sample_weight(sample_weight, X, dtype=None): Returns ------- - sample_weight : ndarray, shape (n_samples,) + sample_weight : ndarray of shape (n_samples,) Validated sample weight. It is guaranteed to be "C" contiguous. """ n_samples = _num_samples(X) @@ -1311,14 +1314,14 @@ def _allclose_dense_sparse(x, y, rtol=1e-7, atol=1e-9): Parameters ---------- - x : array-like or sparse matrix + x : {array-like, sparse matrix} First array to compare. - y : array-like or sparse matrix + y : {array-like, sparse matrix} Second array to compare. rtol : float, default=1e-7 - relative tolerance; see numpy.allclose + relative tolerance; see numpy.allclose. atol : float, default=1e-9 absolute tolerance; see numpy.allclose. Note that the default here is From d24976f66228e6b5d535c7fa6d8fe7ce5f7ed817 Mon Sep 17 00:00:00 2001 From: Frans Larsson Date: Fri, 31 Jul 2020 19:39:52 +0200 Subject: [PATCH 2/3] minor updates to docstring --- sklearn/utils/validation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index ee437892f4b1b..dc2dcab124bfc 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -149,7 +149,7 @@ def as_float_array(X, *, copy=True, force_all_finite=True): Returns ------- - XT : {array, sparse matrix} + XT : {ndarray, sparse matrix} An array of type float. """ if isinstance(X, np.matrix) or (not isinstance(X, np.ndarray) @@ -263,7 +263,7 @@ def _make_indexable(iterable): Parameters ---------- - iterable : {list, dataframe, array, sparse} or None + iterable : {list, dataframe, ndarray, sparse} or None Object to be converted to an indexable iterable. """ if sp.issparse(iterable): @@ -284,7 +284,7 @@ def indexable(*iterables): Parameters ---------- - *iterables : {lists, dataframes, arrays, sparse matrices} + *iterables : {lists, dataframes, ndarrays, sparse matrices} List of objects to ensure sliceability. """ result = [_make_indexable(X) for X in iterables] @@ -300,7 +300,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, Parameters ---------- - spmatrix : scipy sparse matrix + spmatrix : sparse matrix Input to validate and convert. accept_sparse : str, bool or list/tuple of str @@ -334,7 +334,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, Returns ------- - spmatrix_converted : scipy sparse matrix. + spmatrix_converted : sparse matrix. Matrix that is ensured to have an allowed type. """ if dtype is None: @@ -967,7 +967,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): Parameters ---------- - estimator : estimator instance. + estimator : estimator instance estimator instance for which the check is performed. attributes : str, list or tuple of str, default=None From d9b6a01747f5db5507a6b7af100be13f4ec5be8c Mon Sep 17 00:00:00 2001 From: Frans Larsson Date: Sat, 1 Aug 2020 13:57:21 +0200 Subject: [PATCH 3/3] Update docstring according to review Co-authored-by: Guillaume Lemaitre --- sklearn/utils/validation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index dc2dcab124bfc..5714c55419c06 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -40,8 +40,8 @@ def _deprecate_positional_args(f): Parameters ---------- - f : function - function to check arguments on. + f : callable + Function to check arguments on. """ sig = signature(f) kwonly_args = [] @@ -263,7 +263,7 @@ def _make_indexable(iterable): Parameters ---------- - iterable : {list, dataframe, ndarray, sparse} or None + iterable : {list, dataframe, ndarray, sparse matrix} or None Object to be converted to an indexable iterable. """ if sp.issparse(iterable): @@ -1321,7 +1321,7 @@ def _allclose_dense_sparse(x, y, rtol=1e-7, atol=1e-9): Second array to compare. rtol : float, default=1e-7 - relative tolerance; see numpy.allclose. + Relative tolerance; see numpy.allclose. atol : float, default=1e-9 absolute tolerance; see numpy.allclose. Note that the default here is