diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index b5f6fd22f9c33..dab3c92d654bb 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -141,10 +141,10 @@ def load_files(
 
     Parameters
     ----------
-    container_path : str or unicode
+    container_path : str
         Path to the main folder holding one subfolder per category
 
-    description : str or unicode, default=None
+    description : str, default=None
         A paragraph describing the characteristic of the dataset: its source,
         reference, etc.
 
diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
index 34a936e51cbb2..59ff356e90838 100644
--- a/sklearn/datasets/_california_housing.py
+++ b/sklearn/datasets/_california_housing.py
@@ -102,7 +102,7 @@ def fetch_california_housing(
             If ``as_frame`` is True, ``target`` is a pandas object.
         feature_names : list of length 8
             Array of ordered feature names used in the dataset.
-        DESCR : string
+        DESCR : str
             Description of the California housing dataset.
         frame : pandas DataFrame
             Only present when `as_frame=True`. DataFrame with ``data`` and
diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py
index fb7d603bfc0ff..0af8c8635bc85 100644
--- a/sklearn/datasets/_lfw.py
+++ b/sklearn/datasets/_lfw.py
@@ -301,7 +301,7 @@ def fetch_lfw_people(
         target : numpy array of shape (13233,)
             Labels associated to each face image.
             Those labels range from 0-5748 and correspond to the person IDs.
-        DESCR : string
+        DESCR : str
             Description of the Labeled Faces in the Wild (LFW) dataset.
 
     (data, target) : tuple if ``return_X_y`` is True
@@ -486,7 +486,7 @@ def fetch_lfw_pairs(
         target : numpy array of shape (2200,). Shape depends on ``subset``.
             Labels associated to each pair of images.
             The two label values being different persons or the same person.
-        DESCR : string
+        DESCR : str
             Description of the Labeled Faces in the Wild (LFW) dataset.
 
     """
diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py
index 48e258b4e8512..6a7d9dcc1936c 100644
--- a/sklearn/datasets/_svmlight_format_io.py
+++ b/sklearn/datasets/_svmlight_format_io.py
@@ -446,7 +446,7 @@ def dump_svmlight_file(
         integer or float, or array-like objects of integer or float for
         multilabel classifications.
 
-    f : string or file-like in binary mode
+    f : str or file-like in binary mode
         If string, specifies the path that will contain the data.
         If file-like, data will be written to f. f should be opened in binary
         mode.
@@ -455,7 +455,7 @@ def dump_svmlight_file(
         Whether column indices should be written zero-based (True) or one-based
         (False).
 
-    comment : string, default=None
+    comment : str, default=None
         Comment to insert at the top of the file. This should be either a
         Unicode string, which will be encoded as UTF-8, or an ASCII byte
         string.
@@ -478,7 +478,6 @@ def dump_svmlight_file(
         # Convert comment string to list of lines in UTF-8.
         # If a byte string is passed, then check whether it's ASCII;
         # if a user wants to get fancy, they'll have to decode themselves.
-        # Avoid mention of str and unicode types for Python 3.x compat.
         if isinstance(comment, bytes):
             comment.decode("ascii")  # just for the exception
         else:
diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py
index 24046367c69c6..ef0ce6b99a25e 100644
--- a/sklearn/datasets/_twenty_newsgroups.py
+++ b/sklearn/datasets/_twenty_newsgroups.py
@@ -184,7 +184,7 @@ def fetch_20newsgroups(
         Select the dataset to load: 'train' for the training set, 'test'
         for the test set, 'all' for both, with shuffled ordering.
 
-    categories : array-like, dtype=str or unicode, default=None
+    categories : array-like, dtype=str, default=None
         If None (default), load all the categories.
         If not None, list of category names to load (other categories
         ignored).
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 8dd743813fa27..c2b60977c89d1 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -156,7 +156,7 @@ def strip_accents_ascii(s):
 
     Parameters
     ----------
-    s : string
+    s : str
         The string to strip
 
     See Also
@@ -175,7 +175,7 @@ def strip_tags(s):
 
     Parameters
     ----------
-    s : string
+    s : str
         The string to strip
     """
     return re.compile(r"<([^>]+)>", flags=re.UNICODE).sub(" ", s)
@@ -204,7 +204,7 @@ def decode(self, doc):
 
         Parameters
         ----------
-        doc : str
+        doc : bytes or str
             The string to decode.
 
         Returns
@@ -620,7 +620,7 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
         Remove accents and perform other character normalization
         during the preprocessing step.
         'ascii' is a fast method that only works on characters that have
-        an direct ASCII mapping.
+        a direct ASCII mapping.
         'unicode' is a slightly slower method that works on any characters.
         None (default) does nothing.
 
diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 8b5102ecdd403..841ed6a1c1cc4 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -66,7 +66,7 @@ def _deprecate_normalize(normalize, default, estimator_name):
     default : bool,
         default normalize value used by the estimator
 
-    estimator_name : string,
+    estimator_name : str
         name of the linear estimator which calls this function.
         The name will be used for writing the deprecation warnings
 
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 23ba7c77d85ac..3ae077f4331cc 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -392,7 +392,7 @@ def fit_binary(
     C : float
         Maximum step size for passive aggressive
 
-    learning_rate : string
+    learning_rate : str
         The learning rate. Accepted values are 'constant', 'optimal',
         'invscaling', 'pa1' and 'pa2'.
 
diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py
index 5640848b1a9d4..dd0258f600ccc 100644
--- a/sklearn/metrics/_base.py
+++ b/sklearn/metrics/_base.py
@@ -32,7 +32,7 @@ def _average_binary_score(binary_metric, y_true, y_score, average, sample_weight
         Target scores, can either be probability estimates of the positive
         class, confidence values, or binary decisions.
 
-    average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
+    average : {None, 'micro', 'macro', 'samples', 'weighted'}, default='macro'
         If ``None``, the scores for each class are returned. Otherwise,
         this determines the type of averaging performed on the data:
 
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 7237fa53fda25..b4316053c0f74 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2009,7 +2009,7 @@ def classification_report(
 
     Returns
     -------
-    report : string / dict
+    report : str or dict
         Text summary of the precision, recall, F1 score for each class.
         Dictionary returned if output_dict is True. Dictionary has the
         following structure::
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index d40903899c187..bbe9699859ded 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -28,7 +28,7 @@ def _check_shape(param, param_shape, name):
 
     param_shape : tuple
 
-    name : string
+    name : str
     """
     param = np.array(param)
     if param.shape != param_shape:
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index 850adfdd6d47f..f4bb194e1e33d 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -108,7 +108,7 @@ def _check_precisions(precisions, covariance_type, n_components, n_features):
         'diag' : shape of (n_components, n_features)
         'spherical' : shape of (n_components,)
 
-    covariance_type : string
+    covariance_type : str
 
     n_components : int
         Number of components.
diff --git a/sklearn/neighbors/_dist_metrics.pyx b/sklearn/neighbors/_dist_metrics.pyx
index 240a7a3f7d14d..db93263ee8eda 100644
--- a/sklearn/neighbors/_dist_metrics.pyx
+++ b/sklearn/neighbors/_dist_metrics.pyx
@@ -235,7 +235,7 @@ cdef class DistanceMetric:
 
         Parameters
         ----------
-        metric : string or class name
+        metric : str or class name
             The distance metric to use
         **kwargs
             additional arguments will be passed to the requested metric
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 0429dc00c2322..dcc07d25af5fd 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -800,8 +800,8 @@ def test_encoder_dtypes():
     for X in [
         np.array([[1, 2], [3, 4]], dtype="int64"),
         np.array([[1, 2], [3, 4]], dtype="float64"),
-        np.array([["a", "b"], ["c", "d"]]),  # unicode dtype
-        np.array([[b"a", b"b"], [b"c", b"d"]]),  # string dtype
+        np.array([["a", "b"], ["c", "d"]]),  # str dtype
+        np.array([[b"a", b"b"], [b"c", b"d"]]),  # bytes dtype
         np.array([[1, "a"], [3, "b"]], dtype="object"),
     ]:
         enc.fit(X)
diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py
index 18f98d36871b9..dc50ee70f05f0 100644
--- a/sklearn/tree/_export.py
+++ b/sklearn/tree/_export.py
@@ -839,7 +839,7 @@ def export_graphviz(
 
     Returns
     -------
-    dot_data : string
+    dot_data : str
         String representation of the input tree in GraphViz dot format.
         Only returned if ``out_file`` is None.
 
@@ -961,7 +961,7 @@ def export_text(
 
     Returns
     -------
-    report : string
+    report : str
         Text summary of all the rules in the decision tree.
 
     Examples
diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py
index 8eacb17e628c3..020227ba001a9 100644
--- a/sklearn/utils/graph.py
+++ b/sklearn/utils/graph.py
@@ -92,7 +92,7 @@ def graph_shortest_path(dist_matrix, directed=True, method="auto"):
         if False, then find the shortest path on an undirected graph: the
         algorithm can progress from a point to its neighbors and vice versa.
 
-    method : string ['auto'|'FW'|'D']
+    method : {'auto', 'FW', 'D'}, default='auto'
         method to use.  Options are
         'auto' : attempt to choose the best method for the current problem
         'FW' : Floyd-Warshall algorithm.  O[N^3]
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index bd43eeba2a3dd..5d71d28c5ffab 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -205,7 +205,7 @@ def if_delegate_has_method(delegate):
 
     Parameters
     ----------
-    delegate : string, list of strings or tuple of strings
+    delegate : str, list of str or tuple of str
         Name of the sub-estimator that can be accessed as an attribute of the
         base object. If a list or a tuple of names are provided, the first
         sub-estimator that is an attribute of the base object will be used.