diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index b5f6fd22f9c33..dab3c92d654bb 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -141,10 +141,10 @@ def load_files( Parameters ---------- - container_path : str or unicode + container_path : str Path to the main folder holding one subfolder per category - description : str or unicode, default=None + description : str, default=None A paragraph describing the characteristic of the dataset: its source, reference, etc. diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py index 34a936e51cbb2..59ff356e90838 100644 --- a/sklearn/datasets/_california_housing.py +++ b/sklearn/datasets/_california_housing.py @@ -102,7 +102,7 @@ def fetch_california_housing( If ``as_frame`` is True, ``target`` is a pandas object. feature_names : list of length 8 Array of ordered feature names used in the dataset. - DESCR : string + DESCR : str Description of the California housing dataset. frame : pandas DataFrame Only present when `as_frame=True`. DataFrame with ``data`` and diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index fb7d603bfc0ff..0af8c8635bc85 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -301,7 +301,7 @@ def fetch_lfw_people( target : numpy array of shape (13233,) Labels associated to each face image. Those labels range from 0-5748 and correspond to the person IDs. - DESCR : string + DESCR : str Description of the Labeled Faces in the Wild (LFW) dataset. (data, target) : tuple if ``return_X_y`` is True @@ -486,7 +486,7 @@ def fetch_lfw_pairs( target : numpy array of shape (2200,). Shape depends on ``subset``. Labels associated to each pair of images. The two label values being different persons or the same person. - DESCR : string + DESCR : str Description of the Labeled Faces in the Wild (LFW) dataset. """ diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index 48e258b4e8512..6a7d9dcc1936c 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -446,7 +446,7 @@ def dump_svmlight_file( integer or float, or array-like objects of integer or float for multilabel classifications. - f : string or file-like in binary mode + f : str or file-like in binary mode If string, specifies the path that will contain the data. If file-like, data will be written to f. f should be opened in binary mode. @@ -455,7 +455,7 @@ def dump_svmlight_file( Whether column indices should be written zero-based (True) or one-based (False). - comment : string, default=None + comment : str, default=None Comment to insert at the top of the file. This should be either a Unicode string, which will be encoded as UTF-8, or an ASCII byte string. @@ -478,7 +478,6 @@ def dump_svmlight_file( # Convert comment string to list of lines in UTF-8. # If a byte string is passed, then check whether it's ASCII; # if a user wants to get fancy, they'll have to decode themselves. - # Avoid mention of str and unicode types for Python 3.x compat. if isinstance(comment, bytes): comment.decode("ascii") # just for the exception else: diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 24046367c69c6..ef0ce6b99a25e 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -184,7 +184,7 @@ def fetch_20newsgroups( Select the dataset to load: 'train' for the training set, 'test' for the test set, 'all' for both, with shuffled ordering. - categories : array-like, dtype=str or unicode, default=None + categories : array-like, dtype=str, default=None If None (default), load all the categories. If not None, list of category names to load (other categories ignored). diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 8dd743813fa27..c2b60977c89d1 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -156,7 +156,7 @@ def strip_accents_ascii(s): Parameters ---------- - s : string + s : str The string to strip See Also @@ -175,7 +175,7 @@ def strip_tags(s): Parameters ---------- - s : string + s : str The string to strip """ return re.compile(r"<([^>]+)>", flags=re.UNICODE).sub(" ", s) @@ -204,7 +204,7 @@ def decode(self, doc): Parameters ---------- - doc : str + doc : bytes or str The string to decode. Returns @@ -620,7 +620,7 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator): Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have - an direct ASCII mapping. + a direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 8b5102ecdd403..841ed6a1c1cc4 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -66,7 +66,7 @@ def _deprecate_normalize(normalize, default, estimator_name): default : bool, default normalize value used by the estimator - estimator_name : string, + estimator_name : str name of the linear estimator which calls this function. The name will be used for writing the deprecation warnings diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 23ba7c77d85ac..3ae077f4331cc 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -392,7 +392,7 @@ def fit_binary( C : float Maximum step size for passive aggressive - learning_rate : string + learning_rate : str The learning rate. Accepted values are 'constant', 'optimal', 'invscaling', 'pa1' and 'pa2'. diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py index 5640848b1a9d4..dd0258f600ccc 100644 --- a/sklearn/metrics/_base.py +++ b/sklearn/metrics/_base.py @@ -32,7 +32,7 @@ def _average_binary_score(binary_metric, y_true, y_score, average, sample_weight Target scores, can either be probability estimates of the positive class, confidence values, or binary decisions. - average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted'] + average : {None, 'micro', 'macro', 'samples', 'weighted'}, default='macro' If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 7237fa53fda25..b4316053c0f74 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2009,7 +2009,7 @@ def classification_report( Returns ------- - report : string / dict + report : str or dict Text summary of the precision, recall, F1 score for each class. Dictionary returned if output_dict is True. Dictionary has the following structure:: diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index d40903899c187..bbe9699859ded 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -28,7 +28,7 @@ def _check_shape(param, param_shape, name): param_shape : tuple - name : string + name : str """ param = np.array(param) if param.shape != param_shape: diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 850adfdd6d47f..f4bb194e1e33d 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -108,7 +108,7 @@ def _check_precisions(precisions, covariance_type, n_components, n_features): 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,) - covariance_type : string + covariance_type : str n_components : int Number of components. diff --git a/sklearn/neighbors/_dist_metrics.pyx b/sklearn/neighbors/_dist_metrics.pyx index 240a7a3f7d14d..db93263ee8eda 100644 --- a/sklearn/neighbors/_dist_metrics.pyx +++ b/sklearn/neighbors/_dist_metrics.pyx @@ -235,7 +235,7 @@ cdef class DistanceMetric: Parameters ---------- - metric : string or class name + metric : str or class name The distance metric to use **kwargs additional arguments will be passed to the requested metric diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py index 0429dc00c2322..dcc07d25af5fd 100644 --- a/sklearn/preprocessing/tests/test_encoders.py +++ b/sklearn/preprocessing/tests/test_encoders.py @@ -800,8 +800,8 @@ def test_encoder_dtypes(): for X in [ np.array([[1, 2], [3, 4]], dtype="int64"), np.array([[1, 2], [3, 4]], dtype="float64"), - np.array([["a", "b"], ["c", "d"]]), # unicode dtype - np.array([[b"a", b"b"], [b"c", b"d"]]), # string dtype + np.array([["a", "b"], ["c", "d"]]), # str dtype + np.array([[b"a", b"b"], [b"c", b"d"]]), # bytes dtype np.array([[1, "a"], [3, "b"]], dtype="object"), ]: enc.fit(X) diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py index 18f98d36871b9..dc50ee70f05f0 100644 --- a/sklearn/tree/_export.py +++ b/sklearn/tree/_export.py @@ -839,7 +839,7 @@ def export_graphviz( Returns ------- - dot_data : string + dot_data : str String representation of the input tree in GraphViz dot format. Only returned if ``out_file`` is None. @@ -961,7 +961,7 @@ def export_text( Returns ------- - report : string + report : str Text summary of all the rules in the decision tree. Examples diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py index 8eacb17e628c3..020227ba001a9 100644 --- a/sklearn/utils/graph.py +++ b/sklearn/utils/graph.py @@ -92,7 +92,7 @@ def graph_shortest_path(dist_matrix, directed=True, method="auto"): if False, then find the shortest path on an undirected graph: the algorithm can progress from a point to its neighbors and vice versa. - method : string ['auto'|'FW'|'D'] + method : {'auto', 'FW', 'D'}, default='auto' method to use. Options are 'auto' : attempt to choose the best method for the current problem 'FW' : Floyd-Warshall algorithm. O[N^3] diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index bd43eeba2a3dd..5d71d28c5ffab 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -205,7 +205,7 @@ def if_delegate_has_method(delegate): Parameters ---------- - delegate : string, list of strings or tuple of strings + delegate : str, list of str or tuple of str Name of the sub-estimator that can be accessed as an attribute of the base object. If a list or a tuple of names are provided, the first sub-estimator that is an attribute of the base object will be used.