diff --git a/doc/modules/classification_threshold.rst b/doc/modules/classification_threshold.rst
index ec0963d9da9a2..ee7028f469b5f 100644
--- a/doc/modules/classification_threshold.rst
+++ b/doc/modules/classification_threshold.rst
@@ -38,8 +38,8 @@ probability estimates :math:`P(y|X)` and class labels::
     >>> classifier.predict_proba(X[:4])
     array([[0.94     , 0.06     ],
            [0.94     , 0.06     ],
-           [0.0416..., 0.9583...],
-           [0.0416..., 0.9583...]])
+           [0.0416, 0.9583],
+           [0.0416, 0.9583]])
     >>> classifier.predict(X[:4])
     array([0, 0, 1, 1])
 
@@ -112,10 +112,10 @@ a meaningful metric for their use case.
         >>> base_model = LogisticRegression()
         >>> model = TunedThresholdClassifierCV(base_model, scoring=scorer)
         >>> scorer(model.fit(X, y), X, y)
-        0.88...
+        0.88
         >>> # compare it with the internal score found by cross-validation
         >>> model.best_score_
-        np.float64(0.86...)
+        np.float64(0.86)
 
 Important notes regarding the internal cross-validation
 -------------------------------------------------------
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 6489d8f245201..cdf8421a103e3 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -1310,32 +1310,32 @@ ignoring permutations::
   >>> labels_true = [0, 0, 0, 1, 1, 1]
   >>> labels_pred = [0, 0, 1, 1, 2, 2]
   >>> metrics.rand_score(labels_true, labels_pred)
-  0.66...
+  0.66
 
 The Rand index does not ensure to obtain a value close to 0.0 for a
 random labelling. The adjusted Rand index **corrects for chance** and
 will give such a baseline.
 
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
-  0.24...
+  0.24
 
 As with all clustering metrics, one can permute 0 and 1 in the predicted
 labels, rename 2 to 3, and get the same score::
 
   >>> labels_pred = [1, 1, 0, 0, 3, 3]
   >>> metrics.rand_score(labels_true, labels_pred)
-  0.66...
+  0.66
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
-  0.24...
+  0.24
 
 Furthermore, both :func:`rand_score` and :func:`adjusted_rand_score` are
 **symmetric**: swapping the argument does not change the scores. They can
 thus be used as **consensus measures**::
 
   >>> metrics.rand_score(labels_pred, labels_true)
-  0.66...
+  0.66
   >>> metrics.adjusted_rand_score(labels_pred, labels_true)
-  0.24...
+  0.24
 
 Perfect labeling is scored 1.0::
 
@@ -1353,9 +1353,9 @@ will not necessarily be close to zero::
   >>> labels_true = [0, 0, 0, 0, 0, 0, 1, 1]
   >>> labels_pred = [0, 1, 2, 3, 4, 5, 5, 6]
   >>> metrics.rand_score(labels_true, labels_pred)
-  0.39...
+  0.39
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
-  -0.07...
+  -0.072
 
 
 .. topic:: Advantages:
@@ -1466,21 +1466,21 @@ proposed more recently and is **normalized against chance**::
   >>> labels_pred = [0, 0, 1, 1, 2, 2]
 
   >>> metrics.adjusted_mutual_info_score(labels_true, labels_pred)  # doctest: +SKIP
-  0.22504...
+  0.22504
 
 One can permute 0 and 1 in the predicted labels, rename 2 to 3 and get
 the same score::
 
   >>> labels_pred = [1, 1, 0, 0, 3, 3]
   >>> metrics.adjusted_mutual_info_score(labels_true, labels_pred)  # doctest: +SKIP
-  0.22504...
+  0.22504
 
 All, :func:`mutual_info_score`, :func:`adjusted_mutual_info_score` and
 :func:`normalized_mutual_info_score` are symmetric: swapping the argument does
 not change the score. Thus they can be used as a **consensus measure**::
 
   >>> metrics.adjusted_mutual_info_score(labels_pred, labels_true)  # doctest: +SKIP
-  0.22504...
+  0.22504
 
 Perfect labeling is scored 1.0::
 
@@ -1494,14 +1494,14 @@ Perfect labeling is scored 1.0::
 This is not true for ``mutual_info_score``, which is therefore harder to judge::
 
   >>> metrics.mutual_info_score(labels_true, labels_pred)  # doctest: +SKIP
-  0.69...
+  0.69
 
 Bad (e.g. independent labelings) have non-positive scores::
 
   >>> labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
   >>> labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
   >>> metrics.adjusted_mutual_info_score(labels_true, labels_pred)  # doctest: +SKIP
-  -0.10526...
+  -0.10526
 
 
 .. topic:: Advantages:
@@ -1649,16 +1649,16 @@ We can turn those concept as scores :func:`homogeneity_score` and
   >>> labels_pred = [0, 0, 1, 1, 2, 2]
 
   >>> metrics.homogeneity_score(labels_true, labels_pred)
-  0.66...
+  0.66
 
   >>> metrics.completeness_score(labels_true, labels_pred)
-  0.42...
+  0.42
 
 Their harmonic mean called **V-measure** is computed by
 :func:`v_measure_score`::
 
   >>> metrics.v_measure_score(labels_true, labels_pred)
-  0.51...
+  0.516
 
 This function's formula is as follows:
 
@@ -1667,12 +1667,12 @@ This function's formula is as follows:
 `beta` defaults to a value of 1.0, but for using a value less than 1 for beta::
 
   >>> metrics.v_measure_score(labels_true, labels_pred, beta=0.6)
-  0.54...
+  0.547
 
 more weight will be attributed to homogeneity, and using a value greater than 1::
 
   >>> metrics.v_measure_score(labels_true, labels_pred, beta=1.8)
-  0.48...
+  0.48
 
 more weight will be attributed to completeness.
 
@@ -1683,14 +1683,14 @@ Homogeneity, completeness and V-measure can be computed at once using
 :func:`homogeneity_completeness_v_measure` as follows::
 
   >>> metrics.homogeneity_completeness_v_measure(labels_true, labels_pred)
-  (0.66..., 0.42..., 0.51...)
+  (0.67, 0.42, 0.52)
 
 The following clustering assignment is slightly better, since it is
 homogeneous but not complete::
 
   >>> labels_pred = [0, 0, 0, 1, 2, 2]
   >>> metrics.homogeneity_completeness_v_measure(labels_true, labels_pred)
-  (1.0, 0.68..., 0.81...)
+  (1.0, 0.68, 0.81)
 
 .. note::
 
@@ -1820,7 +1820,7 @@ between two clusters.
   >>> labels_pred = [0, 0, 1, 1, 2, 2]
 
   >>> metrics.fowlkes_mallows_score(labels_true, labels_pred)
-  0.47140...
+  0.47140
 
 One can permute 0 and 1 in the predicted labels, rename 2 to 3 and get
 the same score::
@@ -1828,7 +1828,7 @@ the same score::
   >>> labels_pred = [1, 1, 0, 0, 3, 3]
 
   >>> metrics.fowlkes_mallows_score(labels_true, labels_pred)
-  0.47140...
+  0.47140
 
 Perfect labeling is scored 1.0::
 
@@ -1917,7 +1917,7 @@ cluster analysis.
   >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X)
   >>> labels = kmeans_model.labels_
   >>> metrics.silhouette_score(X, labels, metric='euclidean')
-  0.55...
+  0.55
 
 .. topic:: Advantages:
 
@@ -1974,7 +1974,7 @@ cluster analysis:
   >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X)
   >>> labels = kmeans_model.labels_
   >>> metrics.calinski_harabasz_score(X, labels)
-  561.59...
+  561.59
 
 
 .. topic:: Advantages:
@@ -2048,7 +2048,7 @@ cluster analysis as follows:
   >>> kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
   >>> labels = kmeans.labels_
   >>> davies_bouldin_score(X, labels)
-  0.666...
+  0.666
 
 
 .. topic:: Advantages:
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 3db1104602a5d..3ef0d94236aa6 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -504,10 +504,10 @@ on data type or column name::
   ...       OneHotEncoder(),
   ...       make_column_selector(pattern='city', dtype_include=object))])
   >>> ct.fit_transform(X)
-  array([[ 0.904...,  0.      ,  1. ,  0. ,  0. ],
-         [-1.507...,  1.414...,  1. ,  0. ,  0. ],
-         [-0.301...,  0.      ,  0. ,  1. ,  0. ],
-         [ 0.904..., -1.414...,  0. ,  0. ,  1. ]])
+  array([[ 0.904,  0.      ,  1. ,  0. ,  0. ],
+         [-1.507,  1.414,  1. ,  0. ,  0. ],
+         [-0.301,  0.      ,  0. ,  1. ,  0. ],
+         [ 0.904, -1.414,  0. ,  0. ,  1. ]])
 
 Strings can reference columns if the input is a DataFrame, integers are always
 interpreted as the positional columns.
@@ -571,9 +571,9 @@ will use the column names to select the columns::
   >>> X_new = pd.DataFrame({"expert_rating": [5, 6, 1],
   ...                       "ignored_new_col": [1.2, 0.3, -0.1]})
   >>> ct.transform(X_new)
-  array([[ 0.9...],
-         [ 2.1...],
-         [-3.9...]])
+  array([[ 0.9],
+         [ 2.1],
+         [-3.9]])
 
 .. _visualizing_composite_estimators:
 
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 84a6c1a985a3d..bfdee6c8a043d 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -55,7 +55,7 @@ data for testing (evaluating) our classifier::
 
   >>> clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
   >>> clf.score(X_test, y_test)
-  0.96...
+  0.96
 
 When evaluating different settings ("hyperparameters") for estimators,
 such as the ``C`` setting that must be manually set for an SVM,
@@ -120,7 +120,7 @@ time)::
   >>> clf = svm.SVC(kernel='linear', C=1, random_state=42)
   >>> scores = cross_val_score(clf, X, y, cv=5)
   >>> scores
-  array([0.96..., 1. , 0.96..., 0.96..., 1. ])
+  array([0.96, 1. , 0.96, 0.96, 1. ])
 
 The mean score and the standard deviation are hence given by::
 
@@ -135,7 +135,7 @@ scoring parameter::
   >>> scores = cross_val_score(
   ...     clf, X, y, cv=5, scoring='f1_macro')
   >>> scores
-  array([0.96..., 1.  ..., 0.96..., 0.96..., 1.        ])
+  array([0.96, 1., 0.96, 0.96, 1.])
 
 See :ref:`scoring_parameter` for details.
 In the case of the Iris dataset, the samples are balanced across target
@@ -153,7 +153,7 @@ validation iterator instead, for instance::
   >>> n_samples = X.shape[0]
   >>> cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
   >>> cross_val_score(clf, X, y, cv=cv)
-  array([0.977..., 0.977..., 1.  ..., 0.955..., 1.        ])
+  array([0.977, 0.977, 1., 0.955, 1.])
 
 Another option is to use an iterable yielding (train, test) splits as arrays of
 indices, for example::
@@ -168,7 +168,7 @@ indices, for example::
   ...
   >>> custom_cv = custom_cv_2folds(X)
   >>> cross_val_score(clf, X, y, cv=custom_cv)
-  array([1.        , 0.973...])
+  array([1.        , 0.973])
 
 .. dropdown:: Data transformation with held-out data
 
@@ -185,7 +185,7 @@ indices, for example::
     >>> clf = svm.SVC(C=1).fit(X_train_transformed, y_train)
     >>> X_test_transformed = scaler.transform(X_test)
     >>> clf.score(X_test_transformed, y_test)
-    0.9333...
+    0.9333
 
   A :class:`Pipeline <sklearn.pipeline.Pipeline>` makes it easier to compose
   estimators, providing this behavior under cross-validation::
@@ -193,7 +193,7 @@ indices, for example::
     >>> from sklearn.pipeline import make_pipeline
     >>> clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1))
     >>> cross_val_score(clf, X, y, cv=cv)
-    array([0.977..., 0.933..., 0.955..., 0.933..., 0.977...])
+    array([0.977, 0.933, 0.955, 0.933, 0.977])
 
   See :ref:`combining_estimators`.
 
@@ -237,7 +237,7 @@ predefined scorer names::
     >>> sorted(scores.keys())
     ['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']
     >>> scores['test_recall_macro']
-    array([0.96..., 1.  ..., 0.96..., 0.96..., 1.        ])
+    array([0.96, 1., 0.96, 0.96, 1.])
 
 Or as a dict mapping scorer name to a predefined or custom scoring function::
 
@@ -250,7 +250,7 @@ Or as a dict mapping scorer name to a predefined or custom scoring function::
     ['fit_time', 'score_time', 'test_prec_macro', 'test_rec_macro',
      'train_prec_macro', 'train_rec_macro']
     >>> scores['train_rec_macro']
-    array([0.97..., 0.97..., 0.99..., 0.98..., 0.98...])
+    array([0.97, 0.97, 0.99, 0.98, 0.98])
 
 Here is an example of ``cross_validate`` using a single metric::
 
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index b336a25d8048d..f0f14c60e4867 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -241,7 +241,7 @@ The following toy example demonstrates that samples with a sample weight of zero
     >>> gb.predict([[1, 0]])
     array([1])
     >>> gb.predict_proba([[1, 0]])[0, 1]
-    np.float64(0.999...)
+    np.float64(0.999)
 
 As you can see, the `[1, 0]` is comfortably classified as `1` since the first
 two samples are ignored due to their sample weights.
@@ -513,7 +513,7 @@ parameters of these estimators are `n_estimators` and `learning_rate`.
       >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
       ...     max_depth=1, random_state=0).fit(X_train, y_train)
       >>> clf.score(X_test, y_test)
-      0.913...
+      0.913
 
   The number of weak learners (i.e. regression trees) is controlled by the
   parameter ``n_estimators``; :ref:`The size of each tree
@@ -556,7 +556,7 @@ parameters of these estimators are `n_estimators` and `learning_rate`.
       ...     loss='squared_error'
       ... ).fit(X_train, y_train)
       >>> mean_squared_error(y_test, est.predict(X_test))
-      5.00...
+      5.00
 
   The figure below shows the results of applying :class:`GradientBoostingRegressor`
   with least squares loss and 500 base learners to the diabetes dataset
@@ -604,11 +604,11 @@ fitted model.
   ... )
   >>> est = est.fit(X_train, y_train)  # fit with 100 trees
   >>> mean_squared_error(y_test, est.predict(X_test))
-  5.00...
+  5.00
   >>> _ = est.set_params(n_estimators=200, warm_start=True)  # set warm_start and increase num of trees
   >>> _ = est.fit(X_train, y_train) # fit additional 100 trees to est
   >>> mean_squared_error(y_test, est.predict(X_test))
-  3.84...
+  3.84
 
 .. _gradient_boosting_tree_size:
 
@@ -900,7 +900,8 @@ accessed via the ``feature_importances_`` property::
     >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
     ...     max_depth=1, random_state=0).fit(X, y)
     >>> clf.feature_importances_
-    array([0.10..., 0.10..., 0.11..., ...
+    array([0.107, 0.105, 0.113, 0.0987, 0.0947,
+           0.107, 0.0916, 0.0972, 0.0958, 0.0906])
 
 Note that this computation of feature importance is based on entropy, and it
 is distinct from :func:`sklearn.inspection.permutation_importance` which is
@@ -1035,13 +1036,13 @@ in bias::
     ...     random_state=0)
     >>> scores = cross_val_score(clf, X, y, cv=5)
     >>> scores.mean()
-    np.float64(0.98...)
+    np.float64(0.98)
 
     >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None,
     ...     min_samples_split=2, random_state=0)
     >>> scores = cross_val_score(clf, X, y, cv=5)
     >>> scores.mean()
-    np.float64(0.999...)
+    np.float64(0.999)
 
     >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,
     ...     min_samples_split=2, random_state=0)
@@ -1578,11 +1579,11 @@ Note that it is also possible to get the output of the stacked
 `estimators` using the `transform` method::
 
   >>> reg.transform(X_test[:5])
-  array([[142..., 138..., 146...],
-         [179..., 182..., 151...],
-         [139..., 132..., 158...],
-         [286..., 292..., 225...],
-         [126..., 124..., 164...]])
+  array([[142, 138, 146],
+         [179, 182, 151],
+         [139, 132, 158],
+         [286, 292, 225],
+         [126, 124, 164]])
 
 In practice, a stacking predictor predicts as good as the best predictor of the
 base layer and even sometimes outperforms it by combining the different
@@ -1684,7 +1685,7 @@ learners::
     >>> clf = AdaBoostClassifier(n_estimators=100)
     >>> scores = cross_val_score(clf, X, y, cv=5)
     >>> scores.mean()
-    np.float64(0.9...)
+    np.float64(0.95)
 
 The number of weak learners is controlled by the parameter ``n_estimators``. The
 ``learning_rate`` parameter controls the contribution of the weak learners in
diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index 1f2e18dfc31b2..42bcf18e1d572 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -583,7 +583,7 @@ Again please see the :ref:`reference documentation
   attribute::
 
     >>> transformer.idf_
-    array([1. ..., 2.25..., 1.84...])
+    array([1., 2.25, 1.84])
 
   As tf-idf is very often used for text features, there is also another
   class called :class:`TfidfVectorizer` that combines all the options of
diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
index aff37f466521c..ffee801f34ccc 100644
--- a/doc/modules/feature_selection.rst
+++ b/doc/modules/feature_selection.rst
@@ -262,7 +262,7 @@ meta-transformer)::
   >>> clf = ExtraTreesClassifier(n_estimators=50)
   >>> clf = clf.fit(X, y)
   >>> clf.feature_importances_  # doctest: +SKIP
-  array([ 0.04...,  0.05...,  0.4...,  0.4...])
+  array([ 0.04,  0.05,  0.4,  0.4])
   >>> model = SelectFromModel(clf, prefit=True)
   >>> X_new = model.transform(X)
   >>> X_new.shape               # doctest: +SKIP
diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst
index d26492402274f..59367b647dd58 100644
--- a/doc/modules/impute.rst
+++ b/doc/modules/impute.rst
@@ -50,7 +50,7 @@ that contain the missing values::
     >>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
     >>> print(imp.transform(X))
     [[4.          2.        ]
-     [6.          3.666...]
+     [6.          3.666]
      [7.          6.        ]]
 
 The :class:`SimpleImputer` class also supports sparse matrices::
diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst
index 77c627d189f2a..6dca0a29af7cb 100644
--- a/doc/modules/learning_curve.rst
+++ b/doc/modules/learning_curve.rst
@@ -83,13 +83,13 @@ The function :func:`validation_curve` can help in this case::
   ...     SVC(kernel="linear"), X, y, param_name="C", param_range=np.logspace(-7, 3, 3),
   ... )
   >>> train_scores
-  array([[0.90..., 0.94..., 0.91..., 0.89..., 0.92...],
-         [0.9... , 0.92..., 0.93..., 0.92..., 0.93...],
-         [0.97..., 1...   , 0.98..., 0.97..., 0.99...]])
+  array([[0.90, 0.94, 0.91, 0.89, 0.92],
+         [0.9 , 0.92, 0.93, 0.92, 0.93],
+         [0.97, 1   , 0.98, 0.97, 0.99]])
   >>> valid_scores
-  array([[0.9..., 0.9... , 0.9... , 0.96..., 0.9... ],
-         [0.9..., 0.83..., 0.96..., 0.96..., 0.93...],
-         [1.... , 0.93..., 1....  , 1....  , 0.9... ]])
+  array([[0.9, 0.9 , 0.9 , 0.96, 0.9 ],
+         [0.9, 0.83, 0.96, 0.96, 0.93],
+         [1. , 0.93, 1   , 1   , 0.9 ]])
 
 If you intend to plot the validation curves only, the class
 :class:`~sklearn.model_selection.ValidationCurveDisplay` is more direct than
@@ -154,13 +154,13 @@ average scores on the validation sets)::
   >>> train_sizes
   array([ 50, 80, 110])
   >>> train_scores
-  array([[0.98..., 0.98 , 0.98..., 0.98..., 0.98...],
-         [0.98..., 1.   , 0.98..., 0.98..., 0.98...],
-         [0.98..., 1.   , 0.98..., 0.98..., 0.99...]])
+  array([[0.98, 0.98 , 0.98, 0.98, 0.98],
+         [0.98, 1.   , 0.98, 0.98, 0.98],
+         [0.98, 1.   , 0.98, 0.98, 0.99]])
   >>> valid_scores
-  array([[1. ,  0.93...,  1. ,  1. ,  0.96...],
-         [1. ,  0.96...,  1. ,  1. ,  0.96...],
-         [1. ,  0.96...,  1. ,  1. ,  0.96...]])
+  array([[1. ,  0.93,  1. ,  1. ,  0.96],
+         [1. ,  0.96,  1. ,  1. ,  0.96],
+         [1. ,  0.96,  1. ,  1. ,  0.96]])
 
 If you intend to plot the learning curves only, the class
 :class:`~sklearn.model_selection.LearningCurveDisplay` will be easier to use.
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 2a06bc5d1ff91..69a2bf9b7f477 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -126,7 +126,7 @@ its ``coef_`` member::
     >>> reg.coef_
     array([0.34545455, 0.34545455])
     >>> reg.intercept_
-    np.float64(0.13636...)
+    np.float64(0.13636)
 
 Note that the class :class:`Ridge` allows for the user to specify that the
 solver be automatically chosen by setting `solver="auto"`. When this option
@@ -627,7 +627,7 @@ function of the norm of its coefficients.
    >>> reg.fit([[0, 0], [1, 1]], [0, 1])
    LassoLars(alpha=0.1)
    >>> reg.coef_
-   array([0.6..., 0.        ])
+   array([0.6, 0.        ])
 
 .. rubric:: Examples
 
@@ -1282,9 +1282,9 @@ Usage example::
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
     TweedieRegressor(alpha=0.5, link='log', power=1)
     >>> reg.coef_
-    array([0.2463..., 0.4337...])
+    array([0.2463, 0.4337])
     >>> reg.intercept_
-    np.float64(-0.7638...)
+    np.float64(-0.7638)
 
 
 .. rubric:: Examples
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 672ed48f9c0d3..cf168295a6024 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -268,7 +268,7 @@ Usage examples:
     >>> X, y = datasets.load_iris(return_X_y=True)
     >>> clf = svm.SVC(random_state=0)
     >>> cross_val_score(clf, X, y, cv=5, scoring='recall_macro')
-    array([0.96..., 0.96..., 0.96..., 0.93..., 1.        ])
+    array([0.96, 0.96, 0.96, 0.93, 1.        ])
 
 .. note::
 
@@ -389,9 +389,9 @@ You can create your own custom scorer object using
       >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
       >>> clf = clf.fit(X, y)
       >>> my_custom_loss_func(y, clf.predict(X))
-      0.69...
+      0.69
       >>> score(clf, X, y)
-      -0.69...
+      -0.69
 
 .. dropdown:: Custom scorer objects from scratch
 
@@ -1091,15 +1091,15 @@ Here are some small examples in binary classification::
   >>> metrics.recall_score(y_true, y_pred)
   0.5
   >>> metrics.f1_score(y_true, y_pred)
-  0.66...
+  0.66
   >>> metrics.fbeta_score(y_true, y_pred, beta=0.5)
-  0.83...
+  0.83
   >>> metrics.fbeta_score(y_true, y_pred, beta=1)
-  0.66...
+  0.66
   >>> metrics.fbeta_score(y_true, y_pred, beta=2)
-  0.55...
+  0.55
   >>> metrics.precision_recall_fscore_support(y_true, y_pred, beta=0.5)
-  (array([0.66..., 1.        ]), array([1. , 0.5]), array([0.71..., 0.83...]), array([2, 2]))
+  (array([0.66, 1.        ]), array([1. , 0.5]), array([0.71, 0.83]), array([2, 2]))
 
 
   >>> import numpy as np
@@ -1109,13 +1109,13 @@ Here are some small examples in binary classification::
   >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
   >>> precision, recall, threshold = precision_recall_curve(y_true, y_scores)
   >>> precision
-  array([0.5       , 0.66..., 0.5       , 1.        , 1.        ])
+  array([0.5       , 0.66, 0.5       , 1.        , 1.        ])
   >>> recall
   array([1. , 1. , 0.5, 0.5, 0. ])
   >>> threshold
   array([0.1 , 0.35, 0.4 , 0.8 ])
   >>> average_precision_score(y_true, y_scores)
-  0.83...
+  0.83
 
 
 
@@ -1178,15 +1178,15 @@ Then the metrics are defined as:
   >>> y_true = [0, 1, 2, 0, 1, 2]
   >>> y_pred = [0, 2, 1, 0, 0, 1]
   >>> metrics.precision_score(y_true, y_pred, average='macro')
-  0.22...
+  0.22
   >>> metrics.recall_score(y_true, y_pred, average='micro')
-  0.33...
+  0.33
   >>> metrics.f1_score(y_true, y_pred, average='weighted')
-  0.26...
+  0.267
   >>> metrics.fbeta_score(y_true, y_pred, average='macro', beta=0.5)
-  0.23...
+  0.238
   >>> metrics.precision_recall_fscore_support(y_true, y_pred, beta=0.5, average=None)
-  (array([0.66..., 0.        , 0.        ]), array([1., 0., 0.]), array([0.71..., 0.        , 0.        ]), array([2, 2, 2]...))
+  (array([0.667, 0., 0.]), array([1., 0., 0.]), array([0.714, 0., 0.]), array([2, 2, 2]))
 
 For multiclass classification with a "negative class", it is possible to exclude some labels:
 
@@ -1197,7 +1197,7 @@ For multiclass classification with a "negative class", it is possible to exclude
 Similarly, labels not present in the data sample may be accounted for in macro-averaging.
 
   >>> metrics.precision_score(y_true, y_pred, labels=[0, 1, 2, 3], average='macro')
-  0.166...
+  0.166
 
 .. rubric:: References
 
@@ -1234,7 +1234,7 @@ In the binary case::
   >>> y_pred = np.array([[1, 1, 1],
   ...                    [1, 0, 0]])
   >>> jaccard_score(y_true[0], y_pred[0])
-  0.6666...
+  0.6666
 
 In the 2D comparison case (e.g. image similarity):
 
@@ -1244,9 +1244,9 @@ In the 2D comparison case (e.g. image similarity):
 In the multilabel case with binary label indicators::
 
   >>> jaccard_score(y_true, y_pred, average='samples')
-  0.5833...
+  0.5833
   >>> jaccard_score(y_true, y_pred, average='macro')
-  0.6666...
+  0.6666
   >>> jaccard_score(y_true, y_pred, average=None)
   array([0.5, 0.5, 1. ])
 
@@ -1256,11 +1256,11 @@ multilabel problem::
   >>> y_pred = [0, 2, 1, 2]
   >>> y_true = [0, 1, 2, 2]
   >>> jaccard_score(y_true, y_pred, average=None)
-  array([1. , 0. , 0.33...])
+  array([1. , 0. , 0.33])
   >>> jaccard_score(y_true, y_pred, average='macro')
-  0.44...
+  0.44
   >>> jaccard_score(y_true, y_pred, average='micro')
-  0.33...
+  0.33
 
 .. _hinge_loss:
 
@@ -1313,9 +1313,9 @@ with a svm classifier in a binary class problem::
   LinearSVC(random_state=0)
   >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
   >>> pred_decision
-  array([-2.18...,  2.36...,  0.09...])
+  array([-2.18,  2.36,  0.09])
   >>> hinge_loss([-1, 1, 1], pred_decision)
-  0.3...
+  0.3
 
 Here is an example demonstrating the use of the :func:`hinge_loss` function
 with a svm classifier in a multiclass problem::
@@ -1329,7 +1329,7 @@ with a svm classifier in a multiclass problem::
   >>> pred_decision = est.decision_function([[-1], [2], [3]])
   >>> y_true = [0, 2, 3]
   >>> hinge_loss(y_true, pred_decision, labels=labels)
-  0.56...
+  0.56
 
 .. _log_loss:
 
@@ -1379,7 +1379,7 @@ method.
     >>> y_true = [0, 0, 1, 1]
     >>> y_pred = [[.9, .1], [.8, .2], [.3, .7], [.01, .99]]
     >>> log_loss(y_true, y_pred)
-    0.1738...
+    0.1738
 
 The first ``[.9, .1]`` in ``y_pred`` denotes 90% probability that the first
 sample has label 0.  The log loss is non-negative.
@@ -1445,7 +1445,7 @@ function:
     >>> y_true = [+1, +1, +1, -1]
     >>> y_pred = [+1, -1, +1, +1]
     >>> matthews_corrcoef(y_true, y_pred)
-    -0.33...
+    -0.33
 
 .. rubric:: References
 
@@ -1640,12 +1640,12 @@ We can use the probability estimates corresponding to `clf.classes_[1]`.
 
   >>> y_score = clf.predict_proba(X)[:, 1]
   >>> roc_auc_score(y, y_score)
-  0.99...
+  0.99
 
 Otherwise, we can use the non-thresholded decision values
 
   >>> roc_auc_score(y, clf.decision_function(X))
-  0.99...
+  0.99
 
 .. _roc_auc_multiclass:
 
@@ -1732,7 +1732,7 @@ class with the greater label for each output.
   >>> clf = MultiOutputClassifier(inner_clf).fit(X, y)
   >>> y_score = np.transpose([y_pred[:, 1] for y_pred in clf.predict_proba(X)])
   >>> roc_auc_score(y, y_score, average=None)
-  array([0.82..., 0.85..., 0.93..., 0.86..., 0.94...])
+  array([0.828, 0.851, 0.94, 0.87, 0.95])
 
 And the decision values do not require such processing.
 
@@ -1740,7 +1740,7 @@ And the decision values do not require such processing.
   >>> clf = RidgeClassifierCV().fit(X, y)
   >>> y_score = clf.decision_function(X)
   >>> roc_auc_score(y, y_score, average=None)
-  array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])
+  array([0.82, 0.85, 0.93, 0.87, 0.94])
 
 .. rubric:: Examples
 
@@ -1980,7 +1980,7 @@ two above definitions to follow.
     ...    [[0.8, 0.1, 0.1], [0.2, 0.7, 0.1], [0.2, 0.2, 0.6]],
     ...    labels=["eggs", "ham", "spam"],
     ... )
-    0.146...
+    0.146
 
 The Brier score can be used to assess how well a classifier is calibrated.
 However, a lower Brier score loss does not always mean a better calibration.
@@ -2199,7 +2199,7 @@ of 0.0.
     ...     [0.01, 0.01, 0.98],
     ... ]
     >>> d2_log_loss_score(y_true, y_pred)
-    0.981...
+    0.981
     >>> y_true = [1, 2, 3]
     >>> y_pred = [
     ...     [0.1, 0.6, 0.3],
@@ -2207,7 +2207,7 @@ of 0.0.
     ...     [0.4, 0.5, 0.1],
     ... ]
     >>> d2_log_loss_score(y_true, y_pred)
-    -0.552...
+    -0.552
 
 
 .. _multilabel_ranking_metrics:
@@ -2306,7 +2306,7 @@ Here is a small example of usage of this function::
     >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])
     >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])
     >>> label_ranking_average_precision_score(y_true, y_score)
-    0.416...
+    0.416
 
 .. _label_ranking_loss:
 
@@ -2341,7 +2341,7 @@ Here is a small example of usage of this function::
     >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])
     >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])
     >>> label_ranking_loss(y_true, y_score)
-    0.75...
+    0.75
     >>> # With the following prediction, we have perfect and minimal loss
     >>> y_score = np.array([[1.0, 0.1, 0.2], [0.1, 0.2, 0.9]])
     >>> label_ranking_loss(y_true, y_score)
@@ -2499,19 +2499,19 @@ Here is a small example of usage of the :func:`r2_score` function::
   >>> y_true = [3, -0.5, 2, 7]
   >>> y_pred = [2.5, 0.0, 2, 8]
   >>> r2_score(y_true, y_pred)
-  0.948...
+  0.948
   >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
   >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
   >>> r2_score(y_true, y_pred, multioutput='variance_weighted')
-  0.938...
+  0.938
   >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
   >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
   >>> r2_score(y_true, y_pred, multioutput='uniform_average')
-  0.936...
+  0.936
   >>> r2_score(y_true, y_pred, multioutput='raw_values')
-  array([0.965..., 0.908...])
+  array([0.965, 0.908])
   >>> r2_score(y_true, y_pred, multioutput=[0.3, 0.7])
-  0.925...
+  0.925
   >>> y_true = [-2, -2, -2]
   >>> y_pred = [-2, -2, -2]
   >>> r2_score(y_true, y_pred)
@@ -2563,7 +2563,7 @@ Here is a small example of usage of the :func:`mean_absolute_error` function::
   >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
   array([0.5, 1. ])
   >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
-  0.85...
+  0.85
 
 .. _mean_squared_error:
 
@@ -2594,7 +2594,7 @@ function::
   >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
   >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
   >>> mean_squared_error(y_true, y_pred)
-  0.7083...
+  0.7083
 
 .. rubric:: Examples
 
@@ -2636,11 +2636,11 @@ function::
   >>> y_true = [3, 5, 2.5, 7]
   >>> y_pred = [2.5, 5, 4, 8]
   >>> mean_squared_log_error(y_true, y_pred)
-  0.039...
+  0.0397
   >>> y_true = [[0.5, 1], [1, 2], [7, 6]]
   >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
   >>> mean_squared_log_error(y_true, y_pred)
-  0.044...
+  0.044
 
 The root mean squared logarithmic error (RMSLE) is available through the
 :func:`root_mean_squared_log_error` function.
@@ -2674,7 +2674,7 @@ function::
   >>> y_true = [1, 10, 1e6]
   >>> y_pred = [0.9, 15, 1.2e6]
   >>> mean_absolute_percentage_error(y_true, y_pred)
-  0.2666...
+  0.2666
 
 In above example, if we had used `mean_absolute_error`, it would have ignored
 the small magnitude values and only reflected the error in prediction of highest
@@ -2802,13 +2802,13 @@ function::
     >>> y_true = [3, -0.5, 2, 7]
     >>> y_pred = [2.5, 0.0, 2, 8]
     >>> explained_variance_score(y_true, y_pred)
-    0.957...
+    0.957
     >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
     >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
     >>> explained_variance_score(y_true, y_pred, multioutput='raw_values')
-    array([0.967..., 1.        ])
+    array([0.967, 1.        ])
     >>> explained_variance_score(y_true, y_pred, multioutput=[0.3, 0.7])
-    0.990...
+    0.990
     >>> y_true = [-2, -2, -2]
     >>> y_pred = [-2, -2, -2]
     >>> explained_variance_score(y_true, y_pred)
@@ -2880,16 +2880,16 @@ prediction difference of the second point,::
 If we increase ``power`` to 1,::
 
     >>> mean_tweedie_deviance([1.0], [1.5], power=1)
-    0.18...
+    0.189
     >>> mean_tweedie_deviance([100.], [150.], power=1)
-    18.9...
+    18.9
 
 the difference in errors decreases. Finally, by setting, ``power=2``::
 
     >>> mean_tweedie_deviance([1.0], [1.5], power=2)
-    0.14...
+    0.144
     >>> mean_tweedie_deviance([100.], [150.], power=2)
-    0.14...
+    0.144
 
 we would get identical errors. The deviance when ``power=2`` is thus only
 sensitive to relative errors.
@@ -2916,13 +2916,13 @@ Here is a small example of usage of the :func:`mean_pinball_loss` function::
   >>> from sklearn.metrics import mean_pinball_loss
   >>> y_true = [1, 2, 3]
   >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)
-  0.03...
+  0.033
   >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)
-  0.3...
+  0.3
   >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)
-  0.3...
+  0.3
   >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)
-  0.03...
+  0.033
   >>> mean_pinball_loss(y_true, y_true, alpha=0.1)
   0.0
   >>> mean_pinball_loss(y_true, y_true, alpha=0.9)
@@ -2947,7 +2947,7 @@ quantile regressor via cross-validation:
   ...     random_state=0,
   ... )
   >>> cross_val_score(estimator, X, y, cv=5, scoring=mean_pinball_loss_95p)
-  array([13.6..., 9.7..., 23.3..., 9.5..., 10.4...])
+  array([13.6, 9.7, 23.3, 9.5, 10.4])
 
 It is also possible to build scorer objects for hyper-parameter tuning. The
 sign of the loss must be switched to ensure that greater means better as
@@ -3034,7 +3034,7 @@ of 0.0.
     >>> y_true = [3, -0.5, 2, 7]
     >>> y_pred = [2.5, 0.0, 2, 8]
     >>> d2_absolute_error_score(y_true, y_pred)
-    0.764...
+    0.764
     >>> y_true = [1, 2, 3]
     >>> y_pred = [1, 2, 3]
     >>> d2_absolute_error_score(y_true, y_pred)
@@ -3172,19 +3172,19 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``::
   >>> from sklearn.svm import SVC
   >>> clf = SVC(kernel='linear', C=1).fit(X_train, y_train)
   >>> clf.score(X_test, y_test)
-  0.63...
+  0.63
   >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
   >>> clf.fit(X_train, y_train)
   DummyClassifier(random_state=0, strategy='most_frequent')
   >>> clf.score(X_test, y_test)
-  0.57...
+  0.579
 
 We see that ``SVC`` doesn't do much better than a dummy classifier. Now, let's
 change the kernel::
 
   >>> clf = SVC(kernel='rbf', C=1).fit(X_train, y_train)
   >>> clf.score(X_test, y_test)
-  0.94...
+  0.94
 
 We see that the accuracy was boosted to almost 100%.  A cross validation
 strategy is recommended for a better estimate of the accuracy, if it
diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
index 1c0802f0ac92f..13611b7f52775 100644
--- a/doc/modules/neural_networks_supervised.rst
+++ b/doc/modules/neural_networks_supervised.rst
@@ -116,8 +116,8 @@ classification, it minimizes the Cross-Entropy loss function, giving a vector
 of probability estimates :math:`P(y|x)` per sample :math:`x`::
 
     >>> clf.predict_proba([[2., 2.], [1., 2.]])
-    array([[1.967...e-04, 9.998...-01],
-           [1.967...e-04, 9.998...-01]])
+    array([[1.967e-04, 9.998e-01],
+           [1.967e-04, 9.998e-01]])
 
 :class:`MLPClassifier` supports multi-class classification by
 applying `Softmax <https://en.wikipedia.org/wiki/Softmax_activation_function>`_
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 2c7f7af1fe130..69dff95518c41 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -57,16 +57,16 @@ dataset::
   StandardScaler()
 
   >>> scaler.mean_
-  array([1. ..., 0. ..., 0.33...])
+  array([1., 0., 0.33])
 
   >>> scaler.scale_
-  array([0.81..., 0.81..., 1.24...])
+  array([0.81, 0.81, 1.24])
 
   >>> X_scaled = scaler.transform(X_train)
   >>> X_scaled
-  array([[ 0.  ..., -1.22...,  1.33...],
-         [ 1.22...,  0.  ..., -0.26...],
-         [-1.22...,  1.22..., -1.06...]])
+  array([[ 0.  , -1.22,  1.33 ],
+         [ 1.22,  0.  , -0.267],
+         [-1.22,  1.22, -1.06 ]])
 
 ..
         >>> import numpy as np
@@ -147,10 +147,10 @@ It is possible to introspect the scaler attributes to find about the exact
 nature of the transformation learned on the training data::
 
   >>> min_max_scaler.scale_
-  array([0.5       , 0.5       , 0.33...])
+  array([0.5       , 0.5       , 0.33])
 
   >>> min_max_scaler.min_
-  array([0.        , 0.5       , 0.33...])
+  array([0.        , 0.5       , 0.33])
 
 If :class:`MinMaxScaler` is given an explicit ``feature_range=(min, max)`` the
 full formula is::
@@ -351,7 +351,7 @@ previously defined::
 
   >>> np.percentile(X_train_trans[:, 0], [0, 25, 50, 75, 100])
   ... # doctest: +SKIP
-  array([ 0.00... ,  0.24...,  0.49...,  0.73...,  0.99... ])
+  array([ 0.00 ,  0.24,  0.49,  0.73,  0.99 ])
 
 This can be confirmed on an independent testing set with similar remarks::
 
@@ -360,7 +360,7 @@ This can be confirmed on an independent testing set with similar remarks::
   array([ 4.4  ,  5.125,  5.75 ,  6.175,  7.3  ])
   >>> np.percentile(X_test_trans[:, 0], [0, 25, 50, 75, 100])
   ... # doctest: +SKIP
-  array([ 0.01...,  0.25...,  0.46...,  0.60... ,  0.94...])
+  array([ 0.01,  0.25,  0.46,  0.60 ,  0.94])
 
 Mapping to a Gaussian distribution
 ----------------------------------
@@ -401,13 +401,13 @@ the Yeo-Johnson transform and the Box-Cox transform.
     >>> pt = preprocessing.PowerTransformer(method='box-cox', standardize=False)
     >>> X_lognormal = np.random.RandomState(616).lognormal(size=(3, 3))
     >>> X_lognormal
-    array([[1.28..., 1.18..., 0.84...],
-          [0.94..., 1.60..., 0.38...],
-          [1.35..., 0.21..., 1.09...]])
+    array([[1.28, 1.18 , 0.84 ],
+           [0.94, 1.60 , 0.388],
+           [1.35, 0.217, 1.09 ]])
     >>> pt.fit_transform(X_lognormal)
-    array([[ 0.49...,  0.17..., -0.15...],
-          [-0.05...,  0.58..., -0.57...],
-          [ 0.69..., -0.84...,  0.10...]])
+    array([[ 0.49 ,  0.179, -0.156],
+           [-0.051,  0.589, -0.576],
+           [ 0.69 , -0.849,  0.101]])
 
   While the above example sets the `standardize` option to `False`,
   :class:`PowerTransformer` will apply zero-mean, unit-variance normalization
@@ -470,9 +470,9 @@ operation on a single array-like dataset, either using the ``l1``, ``l2``, or
   >>> X_normalized = preprocessing.normalize(X, norm='l2')
 
   >>> X_normalized
-  array([[ 0.40..., -0.40...,  0.81...],
-         [ 1.  ...,  0.  ...,  0.  ...],
-         [ 0.  ...,  0.70..., -0.70...]])
+  array([[ 0.408, -0.408,  0.812],
+         [ 1.   ,  0.   ,  0.   ],
+         [ 0.   ,  0.707, -0.707]])
 
 The ``preprocessing`` module further provides a utility class
 :class:`Normalizer` that implements the same operation using the
@@ -490,12 +490,12 @@ This class is hence suitable for use in the early steps of a
 The normalizer instance can then be used on sample vectors as any transformer::
 
   >>> normalizer.transform(X)
-  array([[ 0.40..., -0.40...,  0.81...],
-         [ 1.  ...,  0.  ...,  0.  ...],
-         [ 0.  ...,  0.70..., -0.70...]])
+  array([[ 0.408, -0.408,  0.812],
+         [ 1.   ,  0.   ,  0.   ],
+         [ 0.   ,  0.707, -0.707]])
 
   >>> normalizer.transform([[-1.,  1., 0.]])
-  array([[-0.70...,  0.70...,  0.  ...]])
+  array([[-0.707,  0.707,  0.]])
 
 
 Note: L2 normalization is also known as spatial sign preprocessing.
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index b97c6d135dcfe..103ae205387e3 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -91,12 +91,12 @@ SGD fits a linear model to the training data. The ``coef_`` attribute holds
 the model parameters::
 
     >>> clf.coef_
-    array([[9.9..., 9.9...]])
+    array([[9.9, 9.9]])
 
 The ``intercept_`` attribute holds the intercept (aka offset or bias)::
 
     >>> clf.intercept_
-    array([-9.9...])
+    array([-9.9])
 
 Whether or not the model should use an intercept, i.e. a biased
 hyperplane, is controlled by the parameter ``fit_intercept``.
@@ -106,7 +106,7 @@ the coefficients and the input sample, plus the intercept) is given by
 :meth:`SGDClassifier.decision_function`::
 
     >>> clf.decision_function([[2., 2.]])
-    array([29.6...])
+    array([29.6])
 
 The concrete loss function can be set via the ``loss``
 parameter. :class:`SGDClassifier` supports the following loss functions:
@@ -131,7 +131,7 @@ Using ``loss="log_loss"`` or ``loss="modified_huber"`` enables the
 
     >>> clf = SGDClassifier(loss="log_loss", max_iter=5).fit(X, y)
     >>> clf.predict_proba([[1., 1.]]) # doctest: +SKIP
-    array([[0.00..., 0.99...]])
+    array([[0.00, 0.99]])
 
 The concrete penalty can be set via the ``penalty`` parameter.
 SGD supports the following penalties:
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 70337f8c82be4..5b2bca2edfcc0 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -225,11 +225,11 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator)
     >>> len(calibrated_clf.calibrated_classifiers_)
     3
     >>> calibrated_clf.predict_proba(X)[:5, :]
-    array([[0.110..., 0.889...],
-           [0.072..., 0.927...],
-           [0.928..., 0.071...],
-           [0.928..., 0.071...],
-           [0.071..., 0.928...]])
+    array([[0.110, 0.889],
+           [0.072, 0.927],
+           [0.928, 0.072],
+           [0.928, 0.072],
+           [0.072, 0.928]])
     >>> from sklearn.model_selection import train_test_split
     >>> X, y = make_classification(n_samples=100, n_features=2,
     ...                            n_redundant=0, random_state=42)
@@ -246,7 +246,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator)
     >>> len(calibrated_clf.calibrated_classifiers_)
     1
     >>> calibrated_clf.predict_proba([[-0.5, 0.5]])
-    array([[0.936..., 0.063...]])
+    array([[0.936, 0.063]])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index c122692cd0c2a..1ba4409d14698 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -82,7 +82,7 @@ def estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=0, n_job
     >>> X = np.array([[1, 1], [2, 1], [1, 0],
     ...               [4, 7], [3, 5], [3, 6]])
     >>> estimate_bandwidth(X, quantile=0.5)
-    np.float64(1.61...)
+    np.float64(1.61)
     """
     X = check_array(X)
 
@@ -227,8 +227,8 @@ def mean_shift(
     ...               [4, 7], [3, 5], [3, 6]])
     >>> cluster_centers, labels = mean_shift(X, bandwidth=2)
     >>> cluster_centers
-    array([[3.33..., 6.     ],
-           [1.33..., 0.66...]])
+    array([[3.33, 6.     ],
+           [1.33, 0.66]])
     >>> labels
     array([1, 1, 1, 0, 0, 0])
     """
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index 4b33f03f526fa..0cd32023de46c 100644
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -585,10 +585,10 @@ def compute_optics_graph(
     >>> ordering
     array([0, 1, 2, 5, 3, 4])
     >>> core_distances
-    array([3.16..., 1.41..., 1.41..., 1.        , 1.        ,
-           4.12...])
+    array([3.16, 1.41, 1.41, 1.        , 1.        ,
+           4.12])
     >>> reachability
-    array([       inf, 3.16..., 1.41..., 4.12..., 1.        ,
+    array([       inf, 3.16, 1.41, 4.12, 1.        ,
            5.        ])
     >>> predecessor
     array([-1,  0,  1,  5,  3,  2])
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
index 8907616bde5b0..d5255ead1ffdc 100644
--- a/sklearn/conftest.py
+++ b/sklearn/conftest.py
@@ -372,3 +372,4 @@ def print_changed_only_false():
 if dt_config is not None:
     # Strict mode to differentiate between 3.14 and np.float64(3.14)
     dt_config.strict_check = True
+    # dt_config.rtol = 0.01
diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py
index 81ae86b4ad76e..71fb72ccd683d 100644
--- a/sklearn/covariance/_elliptic_envelope.py
+++ b/sklearn/covariance/_elliptic_envelope.py
@@ -135,10 +135,10 @@ class EllipticEnvelope(OutlierMixin, MinCovDet):
     ...              [3, 3]])
     array([ 1, -1])
     >>> cov.covariance_
-    array([[0.7411..., 0.2535...],
-           [0.2535..., 0.3053...]])
+    array([[0.7411, 0.2535],
+           [0.2535, 0.3053]])
     >>> cov.location_
-    array([0.0813... , 0.0427...])
+    array([0.0813 , 0.0427])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py
index 955046fa37d4b..7c4db63b4e363 100644
--- a/sklearn/covariance/_empirical_covariance.py
+++ b/sklearn/covariance/_empirical_covariance.py
@@ -177,10 +177,10 @@ class EmpiricalCovariance(BaseEstimator):
     ...                             size=500)
     >>> cov = EmpiricalCovariance().fit(X)
     >>> cov.covariance_
-    array([[0.7569..., 0.2818...],
-           [0.2818..., 0.3928...]])
+    array([[0.7569, 0.2818],
+           [0.2818, 0.3928]])
     >>> cov.location_
-    array([0.0622..., 0.0193...])
+    array([0.0622, 0.0193])
     """
 
     # X_test should have been called X
diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py
index b3f653de64149..e94663120216d 100644
--- a/sklearn/covariance/_graph_lasso.py
+++ b/sklearn/covariance/_graph_lasso.py
@@ -334,9 +334,9 @@ def graphical_lasso(
     >>> emp_cov = empirical_covariance(X, assume_centered=True)
     >>> emp_cov, _ = graphical_lasso(emp_cov, alpha=0.05)
     >>> emp_cov
-    array([[ 1.68...,  0.21..., -0.20...],
-           [ 0.21...,  0.22..., -0.08...],
-           [-0.20..., -0.08...,  0.23...]])
+    array([[ 1.687,  0.212, -0.209],
+           [ 0.212,  0.221, -0.0817],
+           [-0.209, -0.0817, 0.232]])
     """
     model = GraphicalLasso(
         alpha=alpha,
diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py
index 559401f7bbc5b..f386879e693fb 100644
--- a/sklearn/covariance/_robust_covariance.py
+++ b/sklearn/covariance/_robust_covariance.py
@@ -697,10 +697,10 @@ class MinCovDet(EmpiricalCovariance):
     ...                                   size=500)
     >>> cov = MinCovDet(random_state=0).fit(X)
     >>> cov.covariance_
-    array([[0.7411..., 0.2535...],
-           [0.2535..., 0.3053...]])
+    array([[0.7411, 0.2535],
+           [0.2535, 0.3053]])
     >>> cov.location_
-    array([0.0813... , 0.0427...])
+    array([0.0813 , 0.0427])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index d3197e1b2e6fe..99d6f70f57d6e 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -142,8 +142,8 @@ def shrunk_covariance(emp_cov, shrinkage=0.1):
     >>> rng = np.random.RandomState(0)
     >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=500)
     >>> shrunk_covariance(empirical_covariance(X))
-    array([[0.73..., 0.25...],
-           [0.25..., 0.41...]])
+    array([[0.739, 0.254],
+           [0.254, 0.411]])
     """
     emp_cov = check_array(emp_cov, allow_nd=True)
     n_features = emp_cov.shape[-1]
@@ -234,10 +234,10 @@ class ShrunkCovariance(EmpiricalCovariance):
     ...                                   size=500)
     >>> cov = ShrunkCovariance().fit(X)
     >>> cov.covariance_
-    array([[0.7387..., 0.2536...],
-           [0.2536..., 0.4110...]])
+    array([[0.7387, 0.2536],
+           [0.2536, 0.4110]])
     >>> cov.location_
-    array([0.0622..., 0.0193...])
+    array([0.0622, 0.0193])
     """
 
     _parameter_constraints: dict = {
@@ -336,7 +336,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=50)
     >>> shrinkage_coefficient = ledoit_wolf_shrinkage(X)
     >>> shrinkage_coefficient
-    np.float64(0.23...)
+    np.float64(0.23)
     """
     X = check_array(X)
     # for only one feature, the result is the same whatever the shrinkage
@@ -450,10 +450,10 @@ def ledoit_wolf(X, *, assume_centered=False, block_size=1000):
     >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=50)
     >>> covariance, shrinkage = ledoit_wolf(X)
     >>> covariance
-    array([[0.44..., 0.16...],
-           [0.16..., 0.80...]])
+    array([[0.44, 0.16],
+           [0.16, 0.80]])
     >>> shrinkage
-    np.float64(0.23...)
+    np.float64(0.23)
     """
     estimator = LedoitWolf(
         assume_centered=assume_centered,
@@ -559,10 +559,10 @@ class LedoitWolf(EmpiricalCovariance):
     ...                                   size=50)
     >>> cov = LedoitWolf().fit(X)
     >>> cov.covariance_
-    array([[0.4406..., 0.1616...],
-           [0.1616..., 0.8022...]])
+    array([[0.4406, 0.1616],
+           [0.1616, 0.8022]])
     >>> cov.location_
-    array([ 0.0595... , -0.0075...])
+    array([ 0.0595 , -0.0075])
 
     See also :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py`
     and :ref:`sphx_glr_auto_examples_covariance_plot_lw_vs_oas.py`
@@ -674,10 +674,10 @@ def oas(X, *, assume_centered=False):
     >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=500)
     >>> shrunk_cov, shrinkage = oas(X)
     >>> shrunk_cov
-    array([[0.7533..., 0.2763...],
-           [0.2763..., 0.3964...]])
+    array([[0.7533, 0.2763],
+           [0.2763, 0.3964]])
     >>> shrinkage
-    np.float64(0.0195...)
+    np.float64(0.0195)
     """
     estimator = OAS(
         assume_centered=assume_centered,
@@ -777,13 +777,13 @@ class OAS(EmpiricalCovariance):
     ...                             size=500)
     >>> oas = OAS().fit(X)
     >>> oas.covariance_
-    array([[0.7533..., 0.2763...],
-           [0.2763..., 0.3964...]])
+    array([[0.7533, 0.2763],
+           [0.2763, 0.3964]])
     >>> oas.precision_
-    array([[ 1.7833..., -1.2431... ],
-           [-1.2431...,  3.3889...]])
+    array([[ 1.7833, -1.2431 ],
+           [-1.2431,  3.3889]])
     >>> oas.shrinkage_
-    np.float64(0.0195...)
+    np.float64(0.0195)
 
     See also :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py`
     and :ref:`sphx_glr_auto_examples_covariance_plot_lw_vs_oas.py`
diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py
index 04810675f66a4..e2d80422e7df7 100644
--- a/sklearn/datasets/_samples_generator.py
+++ b/sklearn/datasets/_samples_generator.py
@@ -739,13 +739,13 @@ def make_regression(
     >>> from sklearn.datasets import make_regression
     >>> X, y = make_regression(n_samples=5, n_features=2, noise=1, random_state=42)
     >>> X
-    array([[ 0.4967..., -0.1382... ],
-        [ 0.6476...,  1.523...],
-        [-0.2341..., -0.2341...],
-        [-0.4694...,  0.5425...],
-        [ 1.579...,  0.7674...]])
+    array([[ 0.4967, -0.1382 ],
+        [ 0.6476,  1.523],
+        [-0.2341, -0.2341],
+        [-0.4694,  0.5425],
+        [ 1.579,  0.7674]])
     >>> y
-    array([  6.737...,  37.79..., -10.27...,   0.4017...,   42.22...])
+    array([  6.737,  37.79, -10.27,   0.4017,   42.22])
     """
     n_informative = min(n_features, n_informative)
     generator = check_random_state(random_state)
@@ -1228,7 +1228,7 @@ def make_friedman1(n_samples=100, n_features=10, *, noise=0.0, random_state=None
     >>> y.shape
     (100,)
     >>> list(y[:3])
-    [np.float64(16.8...), np.float64(5.8...), np.float64(9.4...)]
+    [np.float64(16.8), np.float64(5.87), np.float64(9.46)]
     """
     generator = check_random_state(random_state)
 
@@ -1310,7 +1310,7 @@ def make_friedman2(n_samples=100, *, noise=0.0, random_state=None):
     >>> y.shape
     (100,)
     >>> list(y[:3])
-    [np.float64(1229.4...), np.float64(27.0...), np.float64(65.6...)]
+    [np.float64(1229.4), np.float64(27.0), np.float64(65.6)]
     """
     generator = check_random_state(random_state)
 
@@ -1394,7 +1394,7 @@ def make_friedman3(n_samples=100, *, noise=0.0, random_state=None):
     >>> y.shape
     (100,)
     >>> list(y[:3])
-    [np.float64(1.5...), np.float64(0.9...), np.float64(0.4...)]
+    [np.float64(1.54), np.float64(0.956), np.float64(0.414)]
     """
     generator = check_random_state(random_state)
 
@@ -1718,8 +1718,8 @@ def make_spd_matrix(n_dim, *, random_state=None):
     --------
     >>> from sklearn.datasets import make_spd_matrix
     >>> make_spd_matrix(n_dim=2, random_state=42)
-    array([[2.09..., 0.34...],
-           [0.34..., 0.21...]])
+    array([[2.093, 0.346],
+           [0.346, 0.218]])
     """
     generator = check_random_state(random_state)
 
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 2e724c856b967..ae40e28e9f013 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -842,7 +842,7 @@ def dict_learning_online(
     We can check the level of sparsity of `U`:
 
     >>> np.mean(U == 0)
-    np.float64(0.53...)
+    np.float64(0.53)
 
     We can compare the average squared euclidean norm of the reconstruction
     error of the sparse coded signal relative to the squared euclidean norm of
@@ -850,7 +850,7 @@ def dict_learning_online(
 
     >>> X_hat = U @ V
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    np.float64(0.05...)
+    np.float64(0.053)
     """
     transform_algorithm = "lasso_" + method
 
@@ -1033,7 +1033,7 @@ def dict_learning(
     We can check the level of sparsity of `U`:
 
     >>> np.mean(U == 0)
-    np.float64(0.6...)
+    np.float64(0.62)
 
     We can compare the average squared euclidean norm of the reconstruction
     error of the sparse coded signal relative to the squared euclidean norm of
@@ -1041,7 +1041,7 @@ def dict_learning(
 
     >>> X_hat = U @ V
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    np.float64(0.01...)
+    np.float64(0.0192)
     """
     estimator = DictionaryLearning(
         n_components=n_components,
@@ -1587,7 +1587,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
     We can check the level of sparsity of `X_transformed`:
 
     >>> np.mean(X_transformed == 0)
-    np.float64(0.52...)
+    np.float64(0.527)
 
     We can compare the average squared euclidean norm of the reconstruction
     error of the sparse coded signal relative to the squared euclidean norm of
@@ -1595,7 +1595,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     >>> X_hat = X_transformed @ dict_learner.components_
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    np.float64(0.05...)
+    np.float64(0.056)
     """
 
     _parameter_constraints: dict = {
@@ -1954,7 +1954,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     >>> X_hat = X_transformed @ dict_learner.components_
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    np.float64(0.052...)
+    np.float64(0.052)
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 41b0ac5394be1..1b0d21d5d38be 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -353,25 +353,25 @@ class PCA(_BasePCA):
     >>> pca.fit(X)
     PCA(n_components=2)
     >>> print(pca.explained_variance_ratio_)
-    [0.9924... 0.0075...]
+    [0.9924 0.0075]
     >>> print(pca.singular_values_)
-    [6.30061... 0.54980...]
+    [6.30061 0.54980]
 
     >>> pca = PCA(n_components=2, svd_solver='full')
     >>> pca.fit(X)
     PCA(n_components=2, svd_solver='full')
     >>> print(pca.explained_variance_ratio_)
-    [0.9924... 0.00755...]
+    [0.9924 0.00755]
     >>> print(pca.singular_values_)
-    [6.30061... 0.54980...]
+    [6.30061 0.54980]
 
     >>> pca = PCA(n_components=1, svd_solver='arpack')
     >>> pca.fit(X)
     PCA(n_components=1, svd_solver='arpack')
     >>> print(pca.explained_variance_ratio_)
-    [0.99244...]
+    [0.99244]
     >>> print(pca.singular_values_)
-    [6.30061...]
+    [6.30061]
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
index d32874cb54616..2717230c9df92 100644
--- a/sklearn/decomposition/_sparse_pca.py
+++ b/sklearn/decomposition/_sparse_pca.py
@@ -267,7 +267,7 @@ class SparsePCA(_BaseSparsePCA):
     (200, 5)
     >>> # most values in the components_ are zero (sparsity)
     >>> np.mean(transformer.components_ == 0)
-    np.float64(0.9666...)
+    np.float64(0.9666)
     """
 
     _parameter_constraints: dict = {
@@ -469,7 +469,7 @@ class MiniBatchSparsePCA(_BaseSparsePCA):
     (200, 5)
     >>> # most values in the components_ are zero (sparsity)
     >>> np.mean(transformer.components_ == 0)
-    np.float64(0.9...)
+    np.float64(0.9)
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index 26127b2b522fd..6165aba4e8db6 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -151,11 +151,11 @@ class to data once, then keep the instance around to do transformations.
     >>> svd.fit(X)
     TruncatedSVD(n_components=5, n_iter=7, random_state=42)
     >>> print(svd.explained_variance_ratio_)
-    [0.0157... 0.0512... 0.0499... 0.0479... 0.0453...]
+    [0.0157 0.0512 0.0499 0.0479 0.0453]
     >>> print(svd.explained_variance_ratio_.sum())
-    0.2102...
+    0.2102
     >>> print(svd.singular_values_)
-    [35.2410...  4.5981...   4.5420...  4.4486...  4.3288...]
+    [35.2410  4.5981   4.5420  4.4486  4.3288]
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 94c89b9841ef8..34b613b15281a 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -1348,7 +1348,7 @@ class BaggingRegressor(RegressorMixin, BaseBagging):
     >>> regr = BaggingRegressor(estimator=SVR(),
     ...                         n_estimators=10, random_state=0).fit(X, y)
     >>> regr.predict([[0, 0, 0, 0]])
-    array([-2.8720...])
+    array([-2.8720])
     """
 
     def __init__(
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 8bfbfe640aead..55c8e79e062df 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -1454,7 +1454,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
     >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
     ...     max_depth=1, random_state=0).fit(X_train, y_train)
     >>> clf.score(X_test, y_test)
-    0.913...
+    0.913
     """
 
     _parameter_constraints: dict = {
@@ -2052,7 +2052,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     >>> reg.fit(X_train, y_train)
     GradientBoostingRegressor(random_state=0)
     >>> reg.predict(X_test[1:2])
-    array([-61...])
+    array([-61.1])
     >>> reg.score(X_test, y_test)
     0.4...
 
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index d72e5806bbae0..e7e670dd869b6 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -622,7 +622,7 @@ class VotingRegressor(RegressorMixin, _BaseVoting):
     >>> y = np.array([2, 6, 12, 20, 30, 42])
     >>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
     >>> print(er.fit(X, y).predict(X))
-    [ 6.8...  8.4... 12.5... 17.8... 26...  34...]
+    [ 6.8  8.4 12.5 17.8 26  34]
 
     In the following example, we drop the `'lr'` estimator with
     :meth:`~VotingRegressor.set_params` and fit the remaining two estimators:
diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py
index 494d78b9ff63d..37c6468a5ebf6 100644
--- a/sklearn/ensemble/_weight_boosting.py
+++ b/sklearn/ensemble/_weight_boosting.py
@@ -476,7 +476,7 @@ class AdaBoostClassifier(
     >>> clf.predict([[0, 0, 0, 0]])
     array([1])
     >>> clf.score(X, y)
-    0.96...
+    0.96
 
     For a detailed example of using AdaBoost to fit a sequence of DecisionTrees
     as weaklearners, please refer to
@@ -973,9 +973,9 @@ class AdaBoostRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseWeightBoo
     >>> regr.fit(X, y)
     AdaBoostRegressor(n_estimators=100, random_state=0)
     >>> regr.predict([[0, 0, 0, 0]])
-    array([4.7972...])
+    array([4.7972])
     >>> regr.score(X, y)
-    0.9771...
+    0.9771
 
     For a detailed example of utilizing :class:`~sklearn.ensemble.AdaBoostRegressor`
     to fit a sequence of decision trees as weak learners, please refer to
diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
index d73b53eea647e..f4f4f0dcecae1 100644
--- a/sklearn/feature_selection/_from_model.py
+++ b/sklearn/feature_selection/_from_model.py
@@ -204,9 +204,9 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
     >>> y = [0, 1, 0, 1]
     >>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)
     >>> selector.estimator_.coef_
-    array([[-0.3252...,  0.8345...,  0.4976...]])
+    array([[-0.3252,  0.8345,  0.4976]])
     >>> selector.threshold_
-    np.float64(0.55249...)
+    np.float64(0.55249)
     >>> selector.get_support()
     array([False,  True, False])
     >>> selector.transform(X)
diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py
index ede6fa9a21c34..aef9097879fca 100644
--- a/sklearn/feature_selection/_mutual_info.py
+++ b/sklearn/feature_selection/_mutual_info.py
@@ -436,7 +436,7 @@ def mutual_info_regression(
     ...     n_samples=50, n_features=3, n_informative=1, noise=1e-4, random_state=42
     ... )
     >>> mutual_info_regression(X, y)
-    array([0.1..., 2.6...  , 0.0...])
+    array([0.117, 2.645, 0.0287])
     """
     return _estimate_mi(
         X,
@@ -564,8 +564,8 @@ def mutual_info_classif(
     ...     shuffle=False, random_state=42
     ... )
     >>> mutual_info_classif(X, y)
-    array([0.58..., 0.10..., 0.19..., 0.09... , 0.        ,
-           0.     , 0.     , 0.     , 0.      , 0.        ])
+    array([0.589, 0.107, 0.196, 0.0968 , 0.,
+           0.   , 0.   , 0.   , 0.     , 0.])
     """
     check_classification_targets(y)
     return _estimate_mi(
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index fe07b48f4fc2e..7671a7ad7921d 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -158,13 +158,13 @@ def f_classif(X, y):
     ... )
     >>> f_statistic, p_values = f_classif(X, y)
     >>> f_statistic
-    array([2.2...e+02, 7.0...e-01, 1.6...e+00, 9.3...e-01,
-           5.4...e+00, 3.2...e-01, 4.7...e-02, 5.7...e-01,
-           7.5...e-01, 8.9...e-02])
+    array([2.21e+02, 7.02e-01, 1.70e+00, 9.31e-01,
+           5.41e+00, 3.25e-01, 4.71e-02, 5.72e-01,
+           7.54e-01, 8.90e-02])
     >>> p_values
-    array([7.1...e-27, 4.0...e-01, 1.9...e-01, 3.3...e-01,
-           2.2...e-02, 5.7...e-01, 8.2...e-01, 4.5...e-01,
-           3.8...e-01, 7.6...e-01])
+    array([7.14e-27, 4.04e-01, 1.96e-01, 3.37e-01,
+           2.21e-02, 5.70e-01, 8.29e-01, 4.51e-01,
+           3.87e-01, 7.66e-01])
     """
     X, y = check_X_y(X, y, accept_sparse=["csr", "csc", "coo"])
     args = [X[safe_mask(X, y == k)] for k in np.unique(y)]
@@ -253,9 +253,9 @@ def chi2(X, y):
     >>> y = np.array([1, 1, 0, 0, 2, 2])
     >>> chi2_stats, p_values = chi2(X, y)
     >>> chi2_stats
-    array([15.3...,  6.5       ,  8.9...])
+    array([15.3,  6.5       ,  8.9])
     >>> p_values
-    array([0.0004..., 0.0387..., 0.0116... ])
+    array([0.000456, 0.0387, 0.0116 ])
     """
 
     # XXX: we might want to do some of the following in logspace instead for
@@ -359,7 +359,7 @@ def r_regression(X, y, *, center=True, force_finite=True):
     ...     n_samples=50, n_features=3, n_informative=1, noise=1e-4, random_state=42
     ... )
     >>> r_regression(X, y)
-    array([-0.15...,  1.        , -0.22...])
+    array([-0.157,  1.        , -0.229])
     """
     X, y = check_X_y(X, y, accept_sparse=["csr", "csc", "coo"], dtype=np.float64)
     n_samples = X.shape[0]
@@ -492,9 +492,9 @@ def f_regression(X, y, *, center=True, force_finite=True):
     ... )
     >>> f_statistic, p_values = f_regression(X, y)
     >>> f_statistic
-    array([1.2...+00, 2.6...+13, 2.6...+00])
+    array([1.21, 2.67e13, 2.66])
     >>> p_values
-    array([2.7..., 1.5..., 1.0...])
+    array([0.276, 1.54e-283, 0.11])
     """
     correlation_coefficient = r_regression(
         X, y, center=center, force_finite=force_finite
diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py
index 208d6cb12a16c..d56e7735be787 100644
--- a/sklearn/gaussian_process/_gpr.py
+++ b/sklearn/gaussian_process/_gpr.py
@@ -186,7 +186,7 @@ def optimizer(obj_func, initial_theta, bounds):
     >>> gpr.score(X, y)
     0.3680...
     >>> gpr.predict(X[:2,:], return_std=True)
-    (array([653.0..., 592.1...]), array([316.6..., 316.6...]))
+    (array([653.0, 592.1]), array([316.6, 316.6]))
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index b5b9d56a20612..4a0a6ec667be4 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -1024,9 +1024,9 @@ class Exponentiation(Kernel):
     >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
     ...         random_state=0).fit(X, y)
     >>> gpr.score(X, y)
-    0.419...
+    0.419
     >>> gpr.predict(X[:1,:], return_std=True)
-    (array([635.5...]), array([0.559...]))
+    (array([635.5]), array([0.559]))
     """
 
     def __init__(self, kernel, exponent):
@@ -1223,9 +1223,9 @@ class ConstantKernel(StationaryKernelMixin, GenericKernelMixin, Kernel):
     >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
     ...         random_state=0).fit(X, y)
     >>> gpr.score(X, y)
-    0.3696...
+    0.3696
     >>> gpr.predict(X[:1,:], return_std=True)
-    (array([606.1...]), array([0.24...]))
+    (array([606.1]), array([0.248]))
     """
 
     def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):
@@ -1353,9 +1353,9 @@ class WhiteKernel(StationaryKernelMixin, GenericKernelMixin, Kernel):
     >>> gpr = GaussianProcessRegressor(kernel=kernel,
     ...         random_state=0).fit(X, y)
     >>> gpr.score(X, y)
-    0.3680...
+    0.3680
     >>> gpr.predict(X[:2,:], return_std=True)
-    (array([653.0..., 592.1... ]), array([316.6..., 316.6...]))
+    (array([653.0, 592.1 ]), array([316.6, 316.6]))
     """
 
     def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):
@@ -1497,10 +1497,10 @@ class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
     >>> gpc = GaussianProcessClassifier(kernel=kernel,
     ...         random_state=0).fit(X, y)
     >>> gpc.score(X, y)
-    0.9866...
+    0.9866
     >>> gpc.predict_proba(X[:2,:])
-    array([[0.8354..., 0.03228..., 0.1322...],
-           [0.7906..., 0.0652..., 0.1441...]])
+    array([[0.8354, 0.03228, 0.1322],
+           [0.7906, 0.0652, 0.1441]])
     """
 
     def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):
@@ -1667,10 +1667,10 @@ class Matern(RBF):
     >>> gpc = GaussianProcessClassifier(kernel=kernel,
     ...         random_state=0).fit(X, y)
     >>> gpc.score(X, y)
-    0.9866...
+    0.9866
     >>> gpc.predict_proba(X[:2,:])
-    array([[0.8513..., 0.0368..., 0.1117...],
-            [0.8086..., 0.0693..., 0.1220...]])
+    array([[0.8513, 0.0368, 0.1117],
+            [0.8086, 0.0693, 0.1220]])
     """
 
     def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5):
@@ -1850,10 +1850,10 @@ class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
     >>> gpc = GaussianProcessClassifier(kernel=kernel,
     ...         random_state=0).fit(X, y)
     >>> gpc.score(X, y)
-    0.9733...
+    0.9733
     >>> gpc.predict_proba(X[:2,:])
-    array([[0.8881..., 0.0566..., 0.05518...],
-            [0.8678..., 0.0707... , 0.0614...]])
+    array([[0.8881, 0.0566, 0.05518],
+            [0.8678, 0.0707 , 0.0614]])
     """
 
     def __init__(
@@ -1999,9 +1999,9 @@ class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
     >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
     ...         random_state=0).fit(X, y)
     >>> gpr.score(X, y)
-    0.0144...
+    0.0144
     >>> gpr.predict(X[:2,:], return_std=True)
-    (array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))
+    (array([425.6, 457.5]), array([0.3894, 0.3467]))
     """
 
     def __init__(
@@ -2146,9 +2146,9 @@ class DotProduct(Kernel):
     >>> gpr = GaussianProcessRegressor(kernel=kernel,
     ...         random_state=0).fit(X, y)
     >>> gpr.score(X, y)
-    0.3680...
+    0.3680
     >>> gpr.predict(X[:2,:], return_std=True)
-    (array([653.0..., 592.1...]), array([316.6..., 316.6...]))
+    (array([653.0, 592.1]), array([316.6, 316.6]))
     """
 
     def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):
@@ -2296,10 +2296,10 @@ class PairwiseKernel(Kernel):
     >>> gpc = GaussianProcessClassifier(kernel=kernel,
     ...         random_state=0).fit(X, y)
     >>> gpc.score(X, y)
-    0.9733...
+    0.9733
     >>> gpc.predict_proba(X[:2,:])
-    array([[0.8880..., 0.05663..., 0.05532...],
-           [0.8676..., 0.07073..., 0.06165...]])
+    array([[0.8880, 0.05663, 0.05532],
+           [0.8676, 0.07073, 0.06165]])
     """
 
     def __init__(
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index 86723c8245d44..ddae5373c5460 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -281,9 +281,9 @@ class IterativeImputer(_BaseImputer):
     IterativeImputer(random_state=0)
     >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]
     >>> imp_mean.transform(X)
-    array([[ 6.9584...,  2.       ,  3.        ],
-           [ 4.       ,  2.6000...,  6.        ],
-           [10.       ,  4.9999...,  9.        ]])
+    array([[ 6.9584,  2.       ,  3.        ],
+           [ 4.       ,  2.6000,  6.        ],
+           [10.       ,  4.9999,  9.        ]])
 
     For a more detailed example see
     :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py` or
diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index 4d75daa8b95ae..ad352c45cc03b 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -572,7 +572,7 @@ def partial_dependence(
     >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
     >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
     ...                    grid_resolution=2) # doctest: +SKIP
-    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
+    (array([[-4.52,  4.52]]), [array([ 0.,  1.])])
     """
     check_is_fitted(estimator)
 
diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py
index 4ee3a0ca3cb74..451062fbe272e 100644
--- a/sklearn/inspection/_permutation_importance.py
+++ b/sklearn/inspection/_permutation_importance.py
@@ -262,9 +262,9 @@ def permutation_importance(
     >>> result = permutation_importance(clf, X, y, n_repeats=10,
     ...                                 random_state=0)
     >>> result.importances_mean
-    array([0.4666..., 0.       , 0.       ])
+    array([0.4666, 0.       , 0.       ])
     >>> result.importances_std
-    array([0.2211..., 0.       , 0.       ])
+    array([0.2211, 0.       , 0.       ])
     """
     if not hasattr(X, "iloc"):
         X = check_array(X, ensure_all_finite="allow-nan", dtype=None)
diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py
index 451d0544f672d..2f2c56ae5d13c 100644
--- a/sklearn/isotonic.py
+++ b/sklearn/isotonic.py
@@ -151,8 +151,8 @@ def isotonic_regression(
     --------
     >>> from sklearn.isotonic import isotonic_regression
     >>> isotonic_regression([5, 3, 1, 2, 8, 10, 7, 9, 6, 4])
-    array([2.75   , 2.75   , 2.75   , 2.75   , 7.33...,
-           7.33..., 7.33..., 7.33..., 7.33..., 7.33...])
+    array([2.75   , 2.75   , 2.75   , 2.75   , 7.33,
+           7.33, 7.33, 7.33, 7.33, 7.33])
     """
     y = check_array(y, ensure_2d=False, input_name="y", dtype=[np.float64, np.float32])
     if sp_base_version >= parse_version("1.12.0"):
@@ -271,7 +271,7 @@ class IsotonicRegression(RegressorMixin, TransformerMixin, BaseEstimator):
     >>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)
     >>> iso_reg = IsotonicRegression().fit(X, y)
     >>> iso_reg.predict([.1, .2])
-    array([1.8628..., 3.7256...])
+    array([1.8628, 3.7256])
     """
 
     # T should have been called X
diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 1c9ab10531177..c059e3fa84310 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -559,7 +559,7 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
     >>> reg.coef_
     array([1., 2.])
     >>> reg.intercept_
-    np.float64(3.0...)
+    np.float64(3.0)
     >>> reg.predict(np.array([[3, 5]]))
     array([16.])
     """
diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index c0c14cbb12f32..62096133ada2f 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -535,16 +535,16 @@ def enet_path(
     ...    n_samples=100, n_features=5, n_informative=2, coef=True, random_state=0
     ... )
     >>> true_coef
-    array([ 0.        ,  0.        ,  0.        , 97.9..., 45.7...])
+    array([ 0.        ,  0.        ,  0.        , 97.9, 45.7])
     >>> alphas, estimated_coef, _ = enet_path(X, y, n_alphas=3)
     >>> alphas.shape
     (3,)
     >>> estimated_coef
-     array([[ 0.        ,  0.78...,  0.56...],
-            [ 0.        ,  1.12...,  0.61...],
-            [-0.        , -2.12..., -1.12...],
-            [ 0.        , 23.04..., 88.93...],
-            [ 0.        , 10.63..., 41.56...]])
+     array([[ 0.,  0.787,  0.568],
+            [ 0.,  1.120,  0.620],
+            [-0., -2.129, -1.128],
+            [ 0., 23.046, 88.939],
+            [ 0., 10.637, 41.566]])
     """
     X_offset_param = params.pop("X_offset", None)
     X_scale_param = params.pop("X_scale", None)
@@ -872,9 +872,9 @@ class ElasticNet(MultiOutputMixin, RegressorMixin, LinearModel):
     >>> print(regr.coef_)
     [18.83816048 64.55968825]
     >>> print(regr.intercept_)
-    1.451...
+    1.451
     >>> print(regr.predict([[0, 0]]))
-    [1.451...]
+    [1.451]
     """
 
     # "check_input" is used for optimisation and isn't something to be passed
@@ -1303,7 +1303,7 @@ class Lasso(ElasticNet):
     >>> print(clf.coef_)
     [0.85 0.  ]
     >>> print(clf.intercept_)
-    0.15...
+    0.15
     """
 
     _parameter_constraints: dict = {
@@ -2093,9 +2093,9 @@ class LassoCV(RegressorMixin, LinearModelCV):
     >>> X, y = make_regression(noise=4, random_state=0)
     >>> reg = LassoCV(cv=5, random_state=0).fit(X, y)
     >>> reg.score(X, y)
-    0.9993...
+    0.9993
     >>> reg.predict(X[:1,])
-    array([-78.4951...])
+    array([-78.4951])
     """
 
     path = staticmethod(lasso_path)
@@ -2375,11 +2375,11 @@ class ElasticNetCV(RegressorMixin, LinearModelCV):
     >>> regr.fit(X, y)
     ElasticNetCV(cv=5, random_state=0)
     >>> print(regr.alpha_)
-    0.199...
+    0.199
     >>> print(regr.intercept_)
-    0.398...
+    0.398
     >>> print(regr.predict([[0, 0]]))
-    [0.398...]
+    [0.398]
     """
 
     _parameter_constraints: dict = {
@@ -3305,11 +3305,11 @@ class MultiTaskLassoCV(RegressorMixin, LinearModelCV):
     >>> X, y = make_regression(n_targets=2, noise=4, random_state=0)
     >>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
     >>> r2_score(y, reg.predict(X))
-    0.9994...
+    0.9994
     >>> reg.alpha_
-    np.float64(0.5713...)
+    np.float64(0.5713)
     >>> reg.predict(X[:1,])
-    array([[153.7971...,  94.9015...]])
+    array([[153.7971,  94.9015]])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index fc31f9825d2e5..c9e10c6378bac 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -558,13 +558,13 @@ class PoissonRegressor(_GeneralizedLinearRegressor):
     >>> clf.fit(X, y)
     PoissonRegressor()
     >>> clf.score(X, y)
-    np.float64(0.990...)
+    np.float64(0.990)
     >>> clf.coef_
-    array([0.121..., 0.158...])
+    array([0.121, 0.158])
     >>> clf.intercept_
-    np.float64(2.088...)
+    np.float64(2.088)
     >>> clf.predict([[1, 1], [3, 4]])
-    array([10.676..., 21.875...])
+    array([10.676, 21.875])
     """
 
     _parameter_constraints: dict = {
@@ -690,13 +690,13 @@ class GammaRegressor(_GeneralizedLinearRegressor):
     >>> clf.fit(X, y)
     GammaRegressor()
     >>> clf.score(X, y)
-    np.float64(0.773...)
+    np.float64(0.773)
     >>> clf.coef_
-    array([0.072..., 0.066...])
+    array([0.073, 0.067])
     >>> clf.intercept_
-    np.float64(2.896...)
+    np.float64(2.896)
     >>> clf.predict([[1, 0], [2, 8]])
-    array([19.483..., 35.795...])
+    array([19.483, 35.795])
     """
 
     _parameter_constraints: dict = {
@@ -852,13 +852,13 @@ class TweedieRegressor(_GeneralizedLinearRegressor):
     >>> clf.fit(X, y)
     TweedieRegressor()
     >>> clf.score(X, y)
-    np.float64(0.839...)
+    np.float64(0.839)
     >>> clf.coef_
-    array([0.599..., 0.299...])
+    array([0.599, 0.299])
     >>> clf.intercept_
-    np.float64(1.600...)
+    np.float64(1.600)
     >>> clf.predict([[1, 1], [3, 4]])
-    array([2.500..., 4.599...])
+    array([2.500, 4.599])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py
index 598d208df535c..51f24035a3c83 100644
--- a/sklearn/linear_model/_huber.py
+++ b/sklearn/linear_model/_huber.py
@@ -235,9 +235,9 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
     >>> y[:4] = rng.uniform(10, 20, 4)
     >>> huber = HuberRegressor().fit(X, y)
     >>> huber.score(X, y)
-    -7.284...
+    -7.284
     >>> huber.predict(X[:1,])
-    array([806.7200...])
+    array([806.7200])
     >>> linear = LinearRegression().fit(X, y)
     >>> print("True coefficients:", coef)
     True coefficients: [20.4923...  34.1698...]
diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index abbd3837bcf43..4bffe5f6e8c0d 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -197,7 +197,7 @@ def lars_path(
     ...    n_samples=100, n_features=5, n_informative=2, coef=True, random_state=0
     ... )
     >>> true_coef
-    array([ 0.        ,  0.        ,  0.        , 97.9..., 45.7...])
+    array([ 0.        ,  0.        ,  0.        , 97.9, 45.7])
     >>> alphas, _, estimated_coef = lars_path(X, y)
     >>> alphas.shape
     (3,)
@@ -205,8 +205,8 @@ def lars_path(
     array([[ 0.     ,  0.     ,  0.     ],
            [ 0.     ,  0.     ,  0.     ],
            [ 0.     ,  0.     ,  0.     ],
-           [ 0.     , 46.96..., 97.99...],
-           [ 0.     ,  0.     , 45.70...]])
+           [ 0.     , 46.96, 97.99],
+           [ 0.     ,  0.     , 45.70]])
     """
     if X is None and Gram is not None:
         raise ValueError(
@@ -378,7 +378,7 @@ def lars_path_gram(
     ...    n_samples=100, n_features=5, n_informative=2, coef=True, random_state=0
     ... )
     >>> true_coef
-    array([ 0.        ,  0.        ,  0.        , 97.9..., 45.7...])
+    array([ 0.        ,  0.        ,  0.        , 97.9, 45.7])
     >>> alphas, _, estimated_coef = lars_path_gram(X.T @ y, X.T @ X, n_samples=100)
     >>> alphas.shape
     (3,)
@@ -386,8 +386,8 @@ def lars_path_gram(
     array([[ 0.     ,  0.     ,  0.     ],
            [ 0.     ,  0.     ,  0.     ],
            [ 0.     ,  0.     ,  0.     ],
-           [ 0.     , 46.96..., 97.99...],
-           [ 0.     ,  0.     , 45.70...]])
+           [ 0.     , 46.96, 97.99],
+           [ 0.     ,  0.     , 45.70]])
     """
     return _lars_path_solver(
         X=None,
@@ -1024,7 +1024,7 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
     >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
     Lars(n_nonzero_coefs=1)
     >>> print(reg.coef_)
-    [ 0. -1.11...]
+    [ 0. -1.11]
     """
 
     _parameter_constraints: dict = {
@@ -1345,7 +1345,7 @@ class LassoLars(Lars):
     >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
     LassoLars(alpha=0.01)
     >>> print(reg.coef_)
-    [ 0.         -0.955...]
+    [ 0.         -0.955]
     """
 
     _parameter_constraints: dict = {
@@ -1642,11 +1642,11 @@ class LarsCV(Lars):
     >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
     >>> reg = LarsCV(cv=5).fit(X, y)
     >>> reg.score(X, y)
-    0.9996...
+    0.9996
     >>> reg.alpha_
-    np.float64(0.2961...)
+    np.float64(0.2961)
     >>> reg.predict(X[:1,])
-    array([154.3996...])
+    array([154.3996])
     """
 
     _parameter_constraints: dict = {
@@ -1984,11 +1984,11 @@ class LassoLarsCV(LarsCV):
     >>> X, y = make_regression(noise=4.0, random_state=0)
     >>> reg = LassoLarsCV(cv=5).fit(X, y)
     >>> reg.score(X, y)
-    0.9993...
+    0.9993
     >>> reg.alpha_
-    np.float64(0.3972...)
+    np.float64(0.3972)
     >>> reg.predict(X[:1,])
-    array([-78.4831...])
+    array([-78.4831])
     """
 
     _parameter_constraints = {
@@ -2177,7 +2177,7 @@ class LassoLarsIC(LassoLars):
     >>> reg.fit(X, y)
     LassoLarsIC(criterion='bic')
     >>> print(reg.coef_)
-    [ 0.  -1.11...]
+    [ 0.  -1.11]
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 94e180ba54238..89a17b7fffe0d 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -1107,10 +1107,10 @@ class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
     >>> clf.predict(X[:2, :])
     array([0, 0])
     >>> clf.predict_proba(X[:2, :])
-    array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
-           [9.7...e-01, 2.8...e-02, ...e-08]])
+    array([[9.82e-01, 1.82e-02, 1.44e-08],
+           [9.72e-01, 2.82e-02, 3.02e-08]])
     >>> clf.score(X, y)
-    0.97...
+    0.97
 
     For a comparison of the LogisticRegression with other classifiers see:
     :ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py`.
diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py
index aad9d1184fb8f..2f4dbac2d7634 100644
--- a/sklearn/linear_model/_omp.py
+++ b/sklearn/linear_model/_omp.py
@@ -397,7 +397,7 @@ def orthogonal_mp(
     >>> coef.shape
     (100,)
     >>> X[:1,] @ coef
-    array([-78.68...])
+    array([-78.68])
     """
     X = check_array(X, order="F", copy=copy_X)
     copy_X = False
@@ -575,7 +575,7 @@ def orthogonal_mp_gram(
     >>> coef.shape
     (100,)
     >>> X[:1,] @ coef
-    array([-78.68...])
+    array([-78.68])
     """
     Gram = check_array(Gram, order="F", copy=copy_Gram)
     Xy = np.asarray(Xy)
@@ -727,9 +727,9 @@ class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
     >>> X, y = make_regression(noise=4, random_state=0)
     >>> reg = OrthogonalMatchingPursuit().fit(X, y)
     >>> reg.score(X, y)
-    0.9991...
+    0.9991
     >>> reg.predict(X[:1,])
-    array([-78.3854...])
+    array([-78.3854])
     """
 
     _parameter_constraints: dict = {
@@ -994,11 +994,11 @@ class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):
     ...                        noise=4, random_state=0)
     >>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
     >>> reg.score(X, y)
-    0.9991...
+    0.9991
     >>> reg.n_nonzero_coefs_
     np.int64(10)
     >>> reg.predict(X[:1,])
-    array([-78.3854...])
+    array([-78.3854])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index 30e5b4ff39613..c18065436dc35 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -249,9 +249,9 @@ class RANSACRegressor(
     ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
     >>> reg = RANSACRegressor(random_state=0).fit(X, y)
     >>> reg.score(X, y)
-    0.9885...
+    0.9885
     >>> reg.predict(X[:1,])
-    array([-31.9417...])
+    array([-31.9417])
 
     For a more detailed example, see
     :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py`
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 27bc81c095d7b..0a55291a70ace 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -568,11 +568,12 @@ def ridge_regression(
     >>> rng = np.random.RandomState(0)
     >>> X = rng.randn(100, 4)
     >>> y = 2.0 * X[:, 0] - 1.0 * X[:, 1] + 0.1 * rng.standard_normal(100)
-    >>> coef, intercept = ridge_regression(X, y, alpha=1.0, return_intercept=True)
-    >>> list(coef)
-    [np.float64(1.9...), np.float64(-1.0...), np.float64(-0.0...), np.float64(-0.0...)]
+    >>> coef, intercept = ridge_regression(X, y, alpha=1.0, return_intercept=True,
+    ...                                    random_state=0)
+    >>> coef
+    array([ 1.97, -1., -2.69e-3, -9.27e-4 ])
     >>> intercept
-    np.float64(-0.0...)
+    np.float64(-.0012)
     """
     return _ridge_regression(
         X,
diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py
index 88afc17fcf5ff..4b25145a8ca55 100644
--- a/sklearn/linear_model/_theil_sen.py
+++ b/sklearn/linear_model/_theil_sen.py
@@ -320,9 +320,9 @@ class TheilSenRegressor(RegressorMixin, LinearModel):
     ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
     >>> reg = TheilSenRegressor(random_state=0).fit(X, y)
     >>> reg.score(X, y)
-    0.9884...
+    0.9884
     >>> reg.predict(X[:1,])
-    array([-31.5871...])
+    array([-31.5871])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index f7898b2018e52..cae227ac7edb8 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1036,7 +1036,7 @@ def jaccard_score(
     In the binary case:
 
     >>> jaccard_score(y_true[0], y_pred[0])
-    0.6666...
+    0.6666
 
     In the 2D comparison case (e.g. image similarity):
 
@@ -1046,9 +1046,9 @@ def jaccard_score(
     In the multilabel case:
 
     >>> jaccard_score(y_true, y_pred, average='samples')
-    0.5833...
+    0.5833
     >>> jaccard_score(y_true, y_pred, average='macro')
-    0.6666...
+    0.6666
     >>> jaccard_score(y_true, y_pred, average=None)
     array([0.5, 0.5, 1. ])
 
@@ -1057,7 +1057,7 @@ def jaccard_score(
     >>> y_pred = [0, 2, 1, 2]
     >>> y_true = [0, 1, 2, 2]
     >>> jaccard_score(y_true, y_pred, average=None)
-    array([1. , 0. , 0.33...])
+    array([1. , 0. , 0.33])
     """
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
     samplewise = average == "samples"
@@ -1167,7 +1167,7 @@ def matthews_corrcoef(y_true, y_pred, *, sample_weight=None):
     >>> y_true = [+1, +1, +1, -1]
     >>> y_pred = [+1, -1, +1, +1]
     >>> matthews_corrcoef(y_true, y_pred)
-    -0.33...
+    -0.33
     """
     y_true, y_pred = attach_unique(y_true, y_pred)
     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
@@ -1437,11 +1437,11 @@ def f1_score(
     >>> y_true = [0, 1, 2, 0, 1, 2]
     >>> y_pred = [0, 2, 1, 0, 0, 1]
     >>> f1_score(y_true, y_pred, average='macro')
-    0.26...
+    0.267
     >>> f1_score(y_true, y_pred, average='micro')
-    0.33...
+    0.33
     >>> f1_score(y_true, y_pred, average='weighted')
-    0.26...
+    0.267
     >>> f1_score(y_true, y_pred, average=None)
     array([0.8, 0. , 0. ])
 
@@ -1641,17 +1641,17 @@ def fbeta_score(
     >>> y_true = [0, 1, 2, 0, 1, 2]
     >>> y_pred = [0, 2, 1, 0, 0, 1]
     >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)
-    0.23...
+    0.238
     >>> fbeta_score(y_true, y_pred, average='micro', beta=0.5)
-    0.33...
+    0.33
     >>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5)
-    0.23...
+    0.238
     >>> fbeta_score(y_true, y_pred, average=None, beta=0.5)
-    array([0.71..., 0.        , 0.        ])
+    array([0.71, 0.        , 0.        ])
     >>> y_pred_empty = [0, 0, 0, 0, 0, 0]
     >>> fbeta_score(y_true, y_pred_empty,
     ...             average="macro", zero_division=np.nan, beta=0.5)
-    0.12...
+    0.128
     """
 
     _, _, f, _ = precision_recall_fscore_support(
@@ -1951,18 +1951,18 @@ def precision_recall_fscore_support(
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
     >>> precision_recall_fscore_support(y_true, y_pred, average='macro')
-    (0.22..., 0.33..., 0.26..., None)
+    (0.222, 0.333, 0.267, None)
     >>> precision_recall_fscore_support(y_true, y_pred, average='micro')
-    (0.33..., 0.33..., 0.33..., None)
+    (0.33, 0.33, 0.33, None)
     >>> precision_recall_fscore_support(y_true, y_pred, average='weighted')
-    (0.22..., 0.33..., 0.26..., None)
+    (0.222, 0.333, 0.267, None)
 
     It is possible to compute per-label precisions, recalls, F1-scores and
     supports instead of averaging:
 
     >>> precision_recall_fscore_support(y_true, y_pred, average=None,
     ... labels=['pig', 'dog', 'cat'])
-    (array([0.        , 0.        , 0.66...]),
+    (array([0.        , 0.        , 0.66]),
      array([0., 0., 1.]), array([0. , 0. , 0.8]),
      array([2, 2, 2]))
     """
@@ -2184,7 +2184,7 @@ class are present in `y_true`): both likelihood ratios are undefined.
     >>> y_true = np.array(["non-cat", "cat", "non-cat", "cat", "non-cat"])
     >>> y_pred = np.array(["cat", "cat", "non-cat", "non-cat", "non-cat"])
     >>> class_likelihood_ratios(y_true, y_pred, replace_undefined_by=1.0)
-    (1.33..., 0.66...)
+    (1.33, 0.66)
     >>> y_true = np.array(["non-zebra", "zebra", "non-zebra", "zebra", "non-zebra"])
     >>> y_pred = np.array(["zebra", "zebra", "non-zebra", "non-zebra", "non-zebra"])
     >>> class_likelihood_ratios(y_true, y_pred, replace_undefined_by=1.0)
@@ -2499,20 +2499,20 @@ def precision_score(
     >>> y_true = [0, 1, 2, 0, 1, 2]
     >>> y_pred = [0, 2, 1, 0, 0, 1]
     >>> precision_score(y_true, y_pred, average='macro')
-    0.22...
+    0.22
     >>> precision_score(y_true, y_pred, average='micro')
-    0.33...
+    0.33
     >>> precision_score(y_true, y_pred, average='weighted')
-    0.22...
+    0.22
     >>> precision_score(y_true, y_pred, average=None)
-    array([0.66..., 0.        , 0.        ])
+    array([0.66, 0.        , 0.        ])
     >>> y_pred = [0, 0, 0, 0, 0, 0]
     >>> precision_score(y_true, y_pred, average=None)
-    array([0.33..., 0.        , 0.        ])
+    array([0.33, 0.        , 0.        ])
     >>> precision_score(y_true, y_pred, average=None, zero_division=1)
-    array([0.33..., 1.        , 1.        ])
+    array([0.33, 1.        , 1.        ])
     >>> precision_score(y_true, y_pred, average=None, zero_division=np.nan)
-    array([0.33...,        nan,        nan])
+    array([0.33,        nan,        nan])
 
     >>> # multilabel classification
     >>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]]
@@ -2681,11 +2681,11 @@ def recall_score(
     >>> y_true = [0, 1, 2, 0, 1, 2]
     >>> y_pred = [0, 2, 1, 0, 0, 1]
     >>> recall_score(y_true, y_pred, average='macro')
-    0.33...
+    0.33
     >>> recall_score(y_true, y_pred, average='micro')
-    0.33...
+    0.33
     >>> recall_score(y_true, y_pred, average='weighted')
-    0.33...
+    0.33
     >>> recall_score(y_true, y_pred, average=None)
     array([1., 0., 0.])
     >>> y_true = [0, 0, 0, 0, 0, 0]
@@ -3234,7 +3234,7 @@ def log_loss(y_true, y_pred, *, normalize=True, sample_weight=None, labels=None)
     >>> from sklearn.metrics import log_loss
     >>> log_loss(["spam", "ham", "ham", "spam"],
     ...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
-    0.21616...
+    0.21616
     """
     transformed_labels, y_pred = _validate_multiclass_probabilistic_prediction(
         y_true, y_pred, sample_weight, labels
@@ -3320,9 +3320,9 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
     LinearSVC(random_state=0)
     >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
     >>> pred_decision
-    array([-2.18...,  2.36...,  0.09...])
+    array([-2.18,  2.36,  0.09])
     >>> hinge_loss([-1, 1, 1], pred_decision)
-    0.30...
+    0.30
 
     In the multiclass case:
 
@@ -3336,7 +3336,7 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
     >>> pred_decision = est.decision_function([[-1], [2], [3]])
     >>> y_true = [0, 2, 3]
     >>> hinge_loss(y_true, pred_decision, labels=labels)
-    0.56...
+    0.56
     """
     check_consistent_length(y_true, pred_decision, sample_weight)
     pred_decision = check_array(pred_decision, ensure_2d=False)
@@ -3584,21 +3584,21 @@ def brier_score_loss(
     >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"])
     >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])
     >>> brier_score_loss(y_true, y_prob)
-    0.037...
+    0.0375
     >>> brier_score_loss(y_true, 1-y_prob, pos_label=0)
-    0.037...
+    0.0375
     >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham")
-    0.037...
+    0.0375
     >>> brier_score_loss(y_true, np.array(y_prob) > 0.5)
     0.0
     >>> brier_score_loss(y_true, y_prob, scale_by_half=False)
-    0.074...
+    0.075
     >>> brier_score_loss(
     ...    ["eggs", "ham", "spam"],
     ...    [[0.8, 0.1, 0.1], [0.2, 0.7, 0.1], [0.2, 0.2, 0.6]],
     ...    labels=["eggs", "ham", "spam"]
     ... )
-    0.146...
+    0.146
     """
     y_proba = check_array(
         y_proba, ensure_2d=False, dtype=[np.float64, np.float32, np.float16]
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 560fd81076914..d4fba69440f13 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -203,7 +203,7 @@ def average_precision_score(
     >>> y_true = np.array([0, 0, 1, 1])
     >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
     >>> average_precision_score(y_true, y_scores)
-    0.83...
+    0.83
     >>> y_true = np.array([0, 0, 1, 1, 2, 2])
     >>> y_scores = np.array([
     ...     [0.7, 0.2, 0.1],
@@ -214,7 +214,7 @@ def average_precision_score(
     ...     [0.1, 0.2, 0.7],
     ... ])
     >>> average_precision_score(y_true, y_scores)
-    0.77...
+    0.77
     """
 
     def _binary_uninterpolated_average_precision(
@@ -624,9 +624,9 @@ class scores must correspond to the order of ``labels``,
     >>> X, y = load_breast_cancer(return_X_y=True)
     >>> clf = LogisticRegression(solver="newton-cholesky", random_state=0).fit(X, y)
     >>> roc_auc_score(y, clf.predict_proba(X)[:, 1])
-    0.99...
+    0.99
     >>> roc_auc_score(y, clf.decision_function(X))
-    0.99...
+    0.99
 
     Multiclass case:
 
@@ -634,7 +634,7 @@ class scores must correspond to the order of ``labels``,
     >>> X, y = load_iris(return_X_y=True)
     >>> clf = LogisticRegression(solver="newton-cholesky").fit(X, y)
     >>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')
-    0.99...
+    0.99
 
     Multilabel case:
 
@@ -649,11 +649,11 @@ class scores must correspond to the order of ``labels``,
     >>> # extract the positive columns for each output
     >>> y_score = np.transpose([score[:, 1] for score in y_score])
     >>> roc_auc_score(y, y_score, average=None)
-    array([0.82..., 0.85..., 0.93..., 0.86..., 0.94...])
+    array([0.828, 0.852, 0.94, 0.869, 0.95])
     >>> from sklearn.linear_model import RidgeClassifierCV
     >>> clf = RidgeClassifierCV().fit(X, y)
     >>> roc_auc_score(y, clf.decision_function(X), average=None)
-    array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])
+    array([0.82, 0.847, 0.93, 0.872, 0.944])
     """
 
     y_type = type_of_target(y_true, input_name="y_true")
@@ -1257,7 +1257,7 @@ def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None
     >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])
     >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])
     >>> label_ranking_average_precision_score(y_true, y_score)
-    0.416...
+    0.416
     """
     check_consistent_length(y_true, y_score, sample_weight)
     y_true = check_array(y_true, ensure_2d=False, accept_sparse="csr")
@@ -1441,7 +1441,7 @@ def label_ranking_loss(y_true, y_score, *, sample_weight=None):
     >>> y_true = [[1, 0, 0], [0, 0, 1]]
     >>> y_score = [[0.75, 0.5, 1], [1, 0.2, 0.1]]
     >>> label_ranking_loss(y_true, y_score)
-    0.75...
+    0.75
     """
     y_true = check_array(y_true, ensure_2d=False, accept_sparse="csr")
     y_score = check_array(y_score, ensure_2d=False)
@@ -1697,10 +1697,10 @@ def dcg_score(
     >>> # we predict scores for the answers
     >>> scores = np.asarray([[.1, .2, .3, 4, 70]])
     >>> dcg_score(true_relevance, scores)
-    9.49...
+    9.49
     >>> # we can set k to truncate the sum; only top k answers contribute
     >>> dcg_score(true_relevance, scores, k=2)
-    5.63...
+    5.63
     >>> # now we have some ties in our prediction
     >>> scores = np.asarray([[1, 0, 0, 0, 1]])
     >>> # by default ties are averaged, so here we get the average true
@@ -1859,13 +1859,13 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
     >>> # we predict some scores (relevance) for the answers
     >>> scores = np.asarray([[.1, .2, .3, 4, 70]])
     >>> ndcg_score(true_relevance, scores)
-    0.69...
+    0.69
     >>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])
     >>> ndcg_score(true_relevance, scores)
-    0.49...
+    0.49
     >>> # we can set k to truncate the sum; only top k answers contribute.
     >>> ndcg_score(true_relevance, scores, k=4)
-    0.35...
+    0.35
     >>> # the normalization takes k into account so a perfect answer
     >>> # would still get 1.0
     >>> ndcg_score(true_relevance, true_relevance, k=4)
@@ -1875,7 +1875,7 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
     >>> # by default ties are averaged, so here we get the average (normalized)
     >>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75
     >>> ndcg_score(true_relevance, scores, k=1)
-    0.75...
+    0.75
     >>> # we can choose to ignore ties for faster results, but only
     >>> # if we know there aren't ties in our scores, otherwise we get
     >>> # wrong results:
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index b46c76f9feba6..ccc11d752adba 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -324,7 +324,7 @@ def rand_score(labels_true, labels_pred):
     are complete but may not always be pure, hence penalized:
 
       >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])
-      0.83...
+      0.83
     """
     contingency = pair_confusion_matrix(labels_true, labels_pred)
     numerator = contingency.diagonal().sum()
@@ -417,13 +417,13 @@ def adjusted_rand_score(labels_true, labels_pred):
     are complete but may not always be pure, hence penalized::
 
       >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])
-      0.57...
+      0.57
 
     ARI is symmetric, so labelings that have pure clusters with members
     coming from the same classes but unnecessary splits are penalized::
 
       >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])
-      0.57...
+      0.57
 
     If classes members are completely split across different clusters, the
     assignment is totally incomplete, hence the ARI is very low::
@@ -523,7 +523,7 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
     >>> from sklearn.metrics import homogeneity_completeness_v_measure
     >>> y_true, y_pred = [0, 0, 1, 1, 2, 2], [0, 0, 1, 2, 2, 2]
     >>> homogeneity_completeness_v_measure(y_true, y_pred)
-    (0.71..., 0.77..., 0.73...)
+    (0.71, 0.771, 0.74)
     """
     labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
 
@@ -691,7 +691,7 @@ def completeness_score(labels_true, labels_pred):
       >>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))
       1.0
       >>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))
-      0.999...
+      0.999
 
     If classes members are split across different clusters, the
     assignment cannot be complete::
@@ -780,30 +780,30 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0):
     are complete but not homogeneous, hence penalized::
 
       >>> print("%.6f" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))
-      0.8...
+      0.8
       >>> print("%.6f" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))
-      0.66...
+      0.67
 
     Labelings that have pure clusters with members coming from the same
     classes are homogeneous but un-necessary splits harm completeness
     and thus penalize V-measure as well::
 
       >>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))
-      0.8...
+      0.8
       >>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))
-      0.66...
+      0.67
 
     If classes members are completely split across different clusters,
     the assignment is totally incomplete, hence the V-Measure is null::
 
       >>> print("%.6f" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))
-      0.0...
+      0.0
 
     Clusters that include samples from totally different classes totally
     destroy the homogeneity of the labeling, hence::
 
       >>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))
-      0.0...
+      0.0
     """
     return homogeneity_completeness_v_measure(labels_true, labels_pred, beta=beta)[2]
 
@@ -880,7 +880,7 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
     >>> labels_true = [0, 1, 1, 0, 1, 0]
     >>> labels_pred = [0, 1, 0, 0, 1, 1]
     >>> mutual_info_score(labels_true, labels_pred)
-    0.056...
+    0.0566
     """
     if contingency is None:
         labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index fa90dedb06da7..f0e6cee65bc28 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1158,8 +1158,8 @@ def cosine_distances(X, Y=None):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> cosine_distances(X, Y)
-    array([[1.     , 1.     ],
-           [0.42..., 0.18...]])
+    array([[1.   , 1.   ],
+           [0.422, 0.183]])
     """
     xp, _ = get_namespace(X, Y)
 
@@ -1291,7 +1291,7 @@ def paired_cosine_distances(X, Y):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> paired_cosine_distances(X, Y)
-    array([0.5       , 0.18...])
+    array([0.5       , 0.184])
     """
     X, Y = check_paired_arrays(X, Y)
     return 0.5 * row_norms(normalize(X) - normalize(Y), squared=True)
@@ -1476,7 +1476,7 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> polynomial_kernel(X, Y, degree=2)
     array([[1.     , 1.     ],
-           [1.77..., 2.77...]])
+           [1.77, 2.77]])
     """
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:
@@ -1536,8 +1536,8 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> sigmoid_kernel(X, Y)
-    array([[0.76..., 0.76...],
-           [0.87..., 0.93...]])
+    array([[0.76, 0.76],
+           [0.87, 0.93]])
     """
     xp, _ = get_namespace(X, Y)
     X, Y = check_pairwise_arrays(X, Y)
@@ -1597,8 +1597,8 @@ def rbf_kernel(X, Y=None, gamma=None):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> rbf_kernel(X, Y)
-    array([[0.71..., 0.51...],
-           [0.51..., 0.71...]])
+    array([[0.71, 0.51],
+           [0.51, 0.71]])
     """
     xp, _ = get_namespace(X, Y)
     X, Y = check_pairwise_arrays(X, Y)
@@ -1660,8 +1660,8 @@ def laplacian_kernel(X, Y=None, gamma=None):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> laplacian_kernel(X, Y)
-    array([[0.71..., 0.51...],
-           [0.51..., 0.71...]])
+    array([[0.71, 0.51],
+           [0.51, 0.71]])
     """
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:
@@ -1722,8 +1722,8 @@ def cosine_similarity(X, Y=None, dense_output=True):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> cosine_similarity(X, Y)
-    array([[0.     , 0.     ],
-           [0.57..., 0.81...]])
+    array([[0.   , 0.   ],
+           [0.577, 0.816]])
     """
     X, Y = check_pairwise_arrays(X, Y)
 
@@ -1884,8 +1884,8 @@ def chi2_kernel(X, Y=None, gamma=1.0):
     >>> X = [[0, 0, 0], [1, 1, 1]]
     >>> Y = [[1, 0, 0], [1, 1, 0]]
     >>> chi2_kernel(X, Y)
-    array([[0.36..., 0.13...],
-           [0.13..., 0.36...]])
+    array([[0.368, 0.135],
+           [0.135, 0.368]])
     """
     xp, _ = get_namespace(X, Y)
     K = additive_chi2_kernel(X, Y)
@@ -2166,11 +2166,11 @@ def pairwise_distances_chunked(
     >>> X = np.random.RandomState(0).rand(5, 3)
     >>> D_chunk = next(pairwise_distances_chunked(X))
     >>> D_chunk
-    array([[0.  ..., 0.29..., 0.41..., 0.19..., 0.57...],
-           [0.29..., 0.  ..., 0.57..., 0.41..., 0.76...],
-           [0.41..., 0.57..., 0.  ..., 0.44..., 0.90...],
-           [0.19..., 0.41..., 0.44..., 0.  ..., 0.51...],
-           [0.57..., 0.76..., 0.90..., 0.51..., 0.  ...]])
+    array([[0.   , 0.295, 0.417, 0.197, 0.572],
+           [0.295, 0.   , 0.576, 0.419, 0.764],
+           [0.417, 0.576, 0.   , 0.449, 0.903],
+           [0.197, 0.419, 0.449, 0.   , 0.512],
+           [0.572, 0.764, 0.903, 0.512, 0.   ]])
 
     Retrieve all neighbors and average distance within radius r:
 
@@ -2184,7 +2184,7 @@ def pairwise_distances_chunked(
     >>> neigh
     [array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]
     >>> avg_dist
-    array([0.039..., 0.        , 0.        , 0.039..., 0.        ])
+    array([0.039, 0.        , 0.        , 0.039, 0.        ])
 
     Where r is defined per sample, we need to make use of ``start``:
 
diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index 466035332eaee..57220186faf61 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -342,8 +342,8 @@ class BayesianGaussianMixture(BaseMixture):
     >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])
     >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)
     >>> bgm.means_
-    array([[2.49... , 2.29...],
-           [8.45..., 4.52... ]])
+    array([[2.49 , 2.29],
+           [8.45, 4.52 ]])
     >>> bgm.predict([[0, 0], [9, 3]])
     array([0, 1])
     """
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 869e2dcaf57e4..61dbd7c1b1d80 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -1936,7 +1936,7 @@ class RandomizedSearchCV(BaseSearchCV):
     >>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)
     >>> search = clf.fit(iris.data, iris.target)
     >>> search.best_params_
-    {'C': np.float64(2...), 'penalty': 'l1'}
+    {'C': np.float64(2.2), 'penalty': 'l1'}
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 5275cab66b3f7..e9aa7dc77f4c6 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -335,7 +335,7 @@ def cross_validate(
     ...                         scoring=('r2', 'neg_mean_squared_error'),
     ...                         return_train_score=True)
     >>> print(scores['test_neg_mean_squared_error'])
-    [-3635.5... -3573.3... -6114.7...]
+    [-3635.5 -3573.3 -6114.7]
     >>> print(scores['train_r2'])
     [0.28009951 0.3908844  0.22784907]
     """
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 48b9fbd3bdf9a..08b0c95c94558 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -406,7 +406,7 @@ class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
     >>> X, y = load_linnerud(return_X_y=True)
     >>> regr = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
     >>> regr.predict(X[[0]])
-    array([[176..., 35..., 57...]])
+    array([[176, 35.1, 57.1]])
     """
 
     def __init__(self, estimator, *, n_jobs=None):
@@ -1018,9 +1018,9 @@ class labels for each estimator in the chain.
            [1., 0., 0.],
            [0., 1., 0.]])
     >>> chain.predict_proba(X_test)
-    array([[0.8387..., 0.9431..., 0.4576...],
-           [0.8878..., 0.3684..., 0.2640...],
-           [0.0321..., 0.9935..., 0.0626...]])
+    array([[0.8387, 0.9431, 0.4576],
+           [0.8878, 0.3684, 0.2640],
+           [0.0321, 0.9935, 0.0626]])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index 6ef690eb8bbe4..c70b83cb1d3bd 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -182,7 +182,7 @@ class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):
     >>> print(neigh.predict([[1.1]]))
     [0]
     >>> print(neigh.predict_proba([[0.9]]))
-    [[0.666... 0.333...]]
+    [[0.666 0.333]]
     """
 
     _parameter_constraints: dict = {**NeighborsBase._parameter_constraints}
diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py
index c05a4f60773b0..d9f00be42570e 100644
--- a/sklearn/neighbors/_lof.py
+++ b/sklearn/neighbors/_lof.py
@@ -179,7 +179,7 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase):
     >>> clf.fit_predict(X)
     array([ 1,  1, -1,  1])
     >>> clf.negative_outlier_factor_
-    array([ -0.9821...,  -1.0370..., -73.3697...,  -0.9821...])
+    array([ -0.9821,  -1.0370, -73.3697,  -0.9821])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
index d18f873e8a0db..a8a00fe3b4ac5 100644
--- a/sklearn/neural_network/_multilayer_perceptron.py
+++ b/sklearn/neural_network/_multilayer_perceptron.py
@@ -1143,7 +1143,7 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron):
     ...                                                     random_state=1)
     >>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
     >>> clf.predict_proba(X_test[:1])
-    array([[0.038..., 0.961...]])
+    array([[0.0383, 0.961]])
     >>> clf.predict(X_test[:5, :])
     array([1, 0, 1, 0, 1])
     >>> clf.score(X_test, y_test)
@@ -1662,9 +1662,9 @@ class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron):
     >>> regr.fit(X_train, y_train)
     MLPRegressor(max_iter=2000, random_state=1, tol=0.1)
     >>> regr.predict(X_test[:2])
-    array([  28..., -290...])
+    array([  28.98, -291])
     >>> regr.score(X_test, y_test)
-    0.98...
+    0.98
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 9a61d06664da7..f3fbf1e3b3299 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -1648,12 +1648,12 @@ class FeatureUnion(TransformerMixin, _BaseComposition):
     ...                       ("svd", TruncatedSVD(n_components=2))])
     >>> X = [[0., 1., 3], [2., 2., 5]]
     >>> union.fit_transform(X)
-    array([[-1.5       ,  3.0..., -0.8...],
-           [ 1.5       ,  5.7...,  0.4...]])
+    array([[-1.5       ,  3.04, -0.872],
+           [ 1.5       ,  5.72,  0.463]])
     >>> # An estimator's parameter can be set using '__' syntax
     >>> union.set_params(svd__n_components=1).fit_transform(X)
-    array([[-1.5       ,  3.0...],
-           [ 1.5       ,  5.7...]])
+    array([[-1.5       ,  3.04],
+           [ 1.5       ,  5.72]])
 
     For a more detailed example of usage, see
     :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py`.
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 1349374a61ea8..fe138cda73803 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -218,8 +218,8 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
     array([[-1.,  1.,  1.],
            [ 1., -1., -1.]])
     >>> scale(X, axis=1)  # scaling each row independently
-    array([[-1.37...,  0.39...,  0.98...],
-           [-1.22...,  0.     ,  1.22...]])
+    array([[-1.37,  0.39,  0.98],
+           [-1.22,  0.     ,  1.22]])
     """
     X = check_array(
         X,
@@ -1966,8 +1966,8 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
     array([[-0.4,  0.2,  0.4],
            [-0.5,  0. ,  0.5]])
     >>> normalize(X, norm="l2")  # L2 normalization each row independently
-    array([[-0.66...,  0.33...,  0.66...],
-           [-0.70...,  0.     ,  0.70...]])
+    array([[-0.67, 0.33, 0.67],
+           [-0.71, 0.  , 0.71]])
     """
     if axis == 0:
         sparse_format = "csc"
@@ -3275,11 +3275,11 @@ class PowerTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     >>> print(pt.fit(data))
     PowerTransformer()
     >>> print(pt.lambdas_)
-    [ 1.386... -3.100...]
+    [ 1.386 -3.100]
     >>> print(pt.transform(data))
-    [[-1.316... -0.707...]
-     [ 0.209... -0.707...]
-     [ 1.106...  1.414...]]
+    [[-1.316 -0.707]
+     [ 0.209 -0.707]
+     [ 1.106  1.414]]
     """
 
     _parameter_constraints: dict = {
@@ -3686,9 +3686,9 @@ def power_transform(X, method="yeo-johnson", *, standardize=True, copy=True):
     >>> from sklearn.preprocessing import power_transform
     >>> data = [[1, 2], [3, 2], [4, 5]]
     >>> print(power_transform(data, method='box-cox'))
-    [[-1.332... -0.707...]
-     [ 0.256... -0.707...]
-     [ 1.076...  1.414...]]
+    [[-1.332 -0.707]
+     [ 0.256 -0.707]
+     [ 1.076  1.414]]
 
     .. warning:: Risk of data leak.
         Do not use :func:`~sklearn.preprocessing.power_transform` unless you
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index 0363f8c5b6120..3503fead2ba59 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -142,8 +142,8 @@ class FunctionTransformer(TransformerMixin, BaseEstimator):
     >>> transformer = FunctionTransformer(np.log1p)
     >>> X = np.array([[0, 1], [2, 3]])
     >>> transformer.transform(X)
-    array([[0.       , 0.6931...],
-           [1.0986..., 1.3862...]])
+    array([[0.       , 0.6931],
+           [1.0986, 1.3862]])
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py
index dc328dc5cf5db..77b404e3e39e9 100644
--- a/sklearn/preprocessing/_target_encoder.py
+++ b/sklearn/preprocessing/_target_encoder.py
@@ -175,15 +175,15 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
     >>> # encodings:
     >>> enc_high_smooth = TargetEncoder(smooth=5000.0).fit(X, y)
     >>> enc_high_smooth.target_mean_
-    np.float64(44...)
+    np.float64(44.3)
     >>> enc_high_smooth.encodings_
-    [array([44..., 44..., 44...])]
+    [array([44.1, 44.4, 44.3])]
 
     >>> # On the other hand, a low `smooth` parameter puts more weight on target
     >>> # conditioned on the value of the categorical:
     >>> enc_low_smooth = TargetEncoder(smooth=1.0).fit(X, y)
     >>> enc_low_smooth.encodings_
-    [array([20..., 80..., 43...])]
+    [array([21, 80.8, 43.2])]
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index 81d32719a10ff..f98b11365dd3b 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -746,7 +746,7 @@ class SparseRandomProjection(BaseRandomProjection):
     (25, 2759)
     >>> # very few components are non-zero
     >>> np.mean(transformer.components_ != 0)
-    np.float64(0.0182...)
+    np.float64(0.0182)
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 8f243937bccf1..277da42893eaf 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -225,10 +225,10 @@ class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
                     ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])
 
     >>> print(clf.named_steps['linearsvc'].coef_)
-    [[0.141...   0.526... 0.679... 0.493...]]
+    [[0.141   0.526 0.679 0.493]]
 
     >>> print(clf.named_steps['linearsvc'].intercept_)
-    [0.1693...]
+    [0.1693]
     >>> print(clf.predict([[0, 0, 0, 0]]))
     [1]
     """
@@ -496,11 +496,11 @@ class LinearSVR(RegressorMixin, LinearModel):
                     ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])
 
     >>> print(regr.named_steps['linearsvr'].coef_)
-    [18.582... 27.023... 44.357... 64.522...]
+    [18.582 27.023 44.357 64.522]
     >>> print(regr.named_steps['linearsvr'].intercept_)
-    [-4...]
+    [-4.]
     >>> print(regr.predict([[0, 0, 0, 0]]))
-    [-2.384...]
+    [-2.384]
     """
 
     _parameter_constraints: dict = {
@@ -1662,7 +1662,7 @@ class OneClassSVM(OutlierMixin, BaseLibSVM):
     >>> clf.predict(X)
     array([-1,  1,  1,  1, -1])
     >>> clf.score_samples(X)
-    array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])
+    array([1.7798, 2.0547, 2.0556, 2.0561, 1.7332])
 
     For a more extended example,
     see :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index ec814f088d1d9..8536ccf0d6f6b 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -942,8 +942,8 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree):
     >>> cross_val_score(clf, iris.data, iris.target, cv=10)
     ...                             # doctest: +SKIP
     ...
-    array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
-            0.93...,  0.93...,  1.     ,  0.93...,  1.      ])
+    array([ 1.     ,  0.93,  0.86,  0.93,  0.93,
+            0.93,  0.93,  1.     ,  0.93,  1.      ])
     """
 
     # "check_input" is used for optimisation and isn't something to be passed
@@ -1324,8 +1324,8 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):
     >>> cross_val_score(regressor, X, y, cv=10)
     ...                    # doctest: +SKIP
     ...
-    array([-0.39..., -0.46...,  0.02...,  0.06..., -0.50...,
-           0.16...,  0.11..., -0.73..., -0.30..., -0.00...])
+    array([-0.39, -0.46,  0.02,  0.06, -0.50,
+           0.16,  0.11, -0.73, -0.30, -0.00])
     """
 
     # "check_input" is used for optimisation and isn't something to be passed
@@ -1689,7 +1689,7 @@ class ExtraTreeClassifier(DecisionTreeClassifier):
     >>> cls = BaggingClassifier(extra_tree, random_state=0).fit(
     ...    X_train, y_train)
     >>> cls.score(X_test, y_test)
-    0.8947...
+    0.8947
     """
 
     def __init__(
@@ -1950,7 +1950,7 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
     >>> reg = BaggingRegressor(extra_tree, random_state=0).fit(
     ...     X_train, y_train)
     >>> reg.score(X_test, y_test)
-    0.33...
+    0.33
     """
 
     def __init__(
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 535505e77c010..b98a7747c28aa 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -269,9 +269,9 @@ def randomized_range_finder(
     >>> from sklearn.utils.extmath import randomized_range_finder
     >>> A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     >>> randomized_range_finder(A, size=2, n_iter=2, random_state=42)
-    array([[-0.21...,  0.88...],
-           [-0.52...,  0.24...],
-           [-0.82..., -0.38...]])
+    array([[-0.214,  0.887],
+           [-0.521,  0.249],
+           [-0.826, -0.388]])
     """
     A = check_array(A, accept_sparse=True)
 
diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py
index a9f2c14035b80..00e359bf79547 100644
--- a/sklearn/utils/sparsefuncs.py
+++ b/sklearn/utils/sparsefuncs.py
@@ -251,7 +251,7 @@ def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n, weights=Non
     >>> sparsefuncs.incr_mean_variance_axis(
     ...     csr, axis=0, last_mean=np.zeros(3), last_var=np.zeros(3), last_n=2
     ... )
-    (array([1.3..., 0.1..., 1.1...]), array([8.8..., 0.1..., 3.4...]),
+    (array([1.33, 0.167, 1.17]), array([8.88, 0.139, 3.47]),
     array([6., 6., 6.]))
     """
     _raise_error_wrong_axis(axis)