DeaMariaLeon · DeaMariaLeon · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021
diff --git a/build_tools/github/build_minimal_windows_image.sh b/build_tools/github/build_minimal_windows_image.sh
@@ -14,12 +14,6 @@ cp $WHEEL_PATH $WHEEL_NAME
 # Dot the Python version for identyfing the base Docker image
 PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2})
 
-# TODO: Remove when 3.11 images will be available for
-# windows (for now the docker image is tagged as 3.11-rc)
-if [[ "$PYTHON_VERSION" == "3.11" ]]; then
-    PYTHON_VERSION=$(echo ${PYTHON_VERSION}-rc)
-fi
-
 # Build a minimal Windows Docker image for testing the wheels
 docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \
              --build-arg WHEEL_NAME=$WHEEL_NAME \

diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
@@ -670,7 +670,10 @@ when defining a custom subclass::
             ...
 
 The default value for `auto_wrap_output_keys` is `("transform",)`, which automatically
-wraps `fit_transform` and `transform`.
+wraps `fit_transform` and `transform`. The `TransformerMixin` uses the
+`__init_subclass__` mechanism to consume `auto_wrap_output_keys` and pass all other
+keyword arguments to it's super class. Super classes' `__init_subclass__` should
+**not** depend on `auto_wrap_output_keys`.
 
 For transformers that return multiple arrays in `transform`, auto wrapping will
 only wrap the first array and not alter the other arrays.

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
@@ -400,7 +400,7 @@ Changelog
 :mod:`sklearn.cluster`
 ......................
 
-- |MajorFeature| :class:`BisectingKMeans` introducing Bisecting K-Means algorithm
+- |MajorFeature| :class:`cluster.BisectingKMeans` introducing Bisecting K-Means algorithm
   :pr:`20031` by :user:`Michal Krawczyk <michalkrawczyk>`,
   :user:`Tom Dupre la Tour <TomDLT>`
   and :user:`Jérémie du Boisberranger <jeremiedbb>`.

diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -54,6 +54,10 @@ random sampling procedures.
   scores will all be set to the maximum possible rank.
   :pr:`24543` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Enhancement| The default of `eps` in :func:`metrics.logloss` is will change
+  from `1e-15` to `"auto"` that defaults to `np.finfo(y_pred.dtype).eps`.
+  :pr:`24354` by :user:`Safiuddin Khaja <Safikh>` and :user:`gsiisg <gsiisg>`.
+
 Changes impacting all modules
 -----------------------------
 
@@ -295,6 +299,11 @@ Changelog
 - |Efficiency| Improve runtime performance of :class:`ensemble.IsolationForest`
   by avoiding data copies. :pr:`23252` by :user:`Zhehao Liu <MaxwellLZH>`.
 
+- |Enhancement| Make it possible to pass the `categorical_features` parameter
+  of :class:`ensemble.HistGradientBoostingClassifier` and
+  :class:`ensemble.HistGradientBoostingRegressor` as feature names.
+  :pr:`24889` by :user:`Olivier Grisel <ogrisel>`.
+
 - |Enhancement| :class:`ensemble.StackingClassifier` now supports
   multilabel-indicator target
   :pr:`24146` by :user:`Nicolas Peretti <nicoperetti>`,
@@ -457,6 +466,12 @@ Changelog
   :pr:`22710` by :user:`Conroy Trinh <trinhcon>` and
   :pr:`23461` by :user:`Meekail Zain <micky774>`.
 
+- |Enhancement| Adds an `"auto"` option to `eps` in :func:`metrics.logloss`.
+  This option will automatically set the `eps` value depending on the data
+  type of `y_pred`. In addition, the default value of `eps` is changed from
+  `1e-15` to the new `"auto"` option.
+  :pr:`24354` by :user:`Safiuddin Khaja <Safikh>` and :user:`gsiisg <gsiisg>`.
+
 - |FIX| :func:`metrics.log_loss` with `eps=0` now returns a correct value of 0 or
   `np.inf` instead of `nan` for predictions at the boundaries (0 or 1). It also accepts
   integer input.
@@ -520,6 +535,11 @@ Changelog
   :pr:`10468` by :user:`Ruben <icfly2>` and :pr:`22993` by
   :user:`Jovan Stojanovic <jovan-stojanovic>`.
 
+- |Enhancement| :class:`neighbors.NeighborsBase` now accepts
+  Minkowski semi-metric (i.e. when :math:`0 < p < 1` for
+  `metric="minkowski"`) for `algorithm="auto"` or `algorithm="brute"`.
+  :pr:`24750` by :user:`Rudresh Veerkhare <RudreshVeerkhare>`
+
 - |Efficiency| :class:`neighbors.NearestCentroid` is faster and requires
   less memory as it better leverages CPUs' caches to compute predictions.
   :pr:`24645` by :user:`Olivier Grisel <ogrisel>`.
@@ -564,6 +584,11 @@ Changelog
 - |Fix| :class:`preprocessing.LabelEncoder` correctly encodes NaNs in `transform`.
   :pr:`22629` by `Thomas Fan`_.
 
+- |Enhancement| Added support for `sample_weight` in :class:`preprocessing.KBinsDiscretizer`.
+  This allows specifying the parameter weights for each sample to be used while
+  fitting. The option is only available when `strategy` is set to `quantile`.
+  :pr:`22048` by :user:`Seladus <seladus>`.
+
 :mod:`sklearn.svm`
 ..................
 

diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py
@@ -37,7 +37,7 @@
 
 
 fig, ax = plt.subplots(figsize=(12, 4))
-average_week_demand = df.groupby(["weekday", "hour"]).mean()["count"]
+average_week_demand = df.groupby(["weekday", "hour"])["count"].mean()
 average_week_demand.plot(ax=ax)
 _ = ax.set(
     title="Average hourly bike demand during the week",
@@ -209,11 +209,15 @@
             ("categorical", ordinal_encoder, categorical_columns),
         ],
         remainder="passthrough",
+        # Use short feature names to make it easier to specify the categorical
+        # variables in the HistGradientBoostingRegressor in the next
+        # step of the pipeline.
+        verbose_feature_names_out=False,
     ),
     HistGradientBoostingRegressor(
-        categorical_features=range(4),
+        categorical_features=categorical_columns,
     ),
-)
+).set_output(transform="pandas")
 
 # %%
 #
@@ -263,7 +267,7 @@ def evaluate(model, X, y, cv):
 import numpy as np
 
 
-one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse=False)
+one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
 alphas = np.logspace(-6, 6, 25)
 naive_linear_pipeline = make_pipeline(
     ColumnTransformer(

diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py
@@ -266,7 +266,7 @@ def plot_influence(conf, mse_values, prediction_times, complexities):
     ax2.tick_params(axis="y", colors=line2.get_color())
 
     plt.legend(
-        (line1, line2), ("prediction error", "prediction latency"), loc="upper right"
+        (line1, line2), ("prediction error", "prediction latency"), loc="upper center"
     )
 
     plt.title(

diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py
@@ -81,7 +81,9 @@ def build_tokenizer(self):
 cocluster = SpectralCoclustering(
     n_clusters=len(categories), svd_method="arpack", random_state=0
 )
-kmeans = MiniBatchKMeans(n_clusters=len(categories), batch_size=20000, random_state=0)
+kmeans = MiniBatchKMeans(
+    n_clusters=len(categories), batch_size=20000, random_state=0, n_init=3
+)
 
 print("Vectorizing...")
 X = vectorizer.fit_transform(newsgroups.data)

diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py
@@ -44,7 +44,7 @@
 
 for i, (algorithm_name, Algorithm) in enumerate(clustering_algorithms.items()):
     for j, n_clusters in enumerate(n_clusters_list):
-        algo = Algorithm(n_clusters=n_clusters, random_state=random_state)
+        algo = Algorithm(n_clusters=n_clusters, random_state=random_state, n_init=3)
         algo.fit(X)
         centers = algo.cluster_centers_
 

diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py
@@ -52,7 +52,9 @@
 print("Fitting model on a small sub-sample of the data")
 t0 = time()
 image_array_sample = shuffle(image_array, random_state=0, n_samples=1_000)
-kmeans = KMeans(n_clusters=n_colors, random_state=0).fit(image_array_sample)
+kmeans = KMeans(n_clusters=n_colors, n_init="auto", random_state=0).fit(
+    image_array_sample
+)
 print(f"done in {time() - t0:0.3f}s.")
 
 # Get labels for all points

diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py
@@ -1,39 +1,53 @@
-# -*- coding: utf-8 -*-
 """
 ===================================
 Demo of DBSCAN clustering algorithm
 ===================================
 
-DBSCAN (Density-Based Spatial Clustering of Applications with Noise)
-finds core samples of high density and expands clusters from them.
-This algorithm is good for data which contains clusters of similar density.
+DBSCAN (Density-Based Spatial Clustering of Applications with Noise) finds core
+samples in regions of high density and expands clusters from them. This
+algorithm is good for data which contains clusters of similar density.
+
+See the :ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py` example
+for a demo of different clustering algorithms on 2D datasets.
 
 """
 
-import numpy as np
+# %%
+# Data generation
+# ---------------
+#
+# We use :class:`~sklearn.datasets.make_blobs` to create 3 synthetic clusters.
 
-from sklearn.cluster import DBSCAN
-from sklearn import metrics
 from sklearn.datasets import make_blobs
 from sklearn.preprocessing import StandardScaler
 
-
-# %%
-# Generate sample data
-# --------------------
 centers = [[1, 1], [-1, -1], [1, -1]]
 X, labels_true = make_blobs(
     n_samples=750, centers=centers, cluster_std=0.4, random_state=0
 )
 
 X = StandardScaler().fit_transform(X)
 
+# %%
+# We can visualize the resulting data:
+
+import matplotlib.pyplot as plt
+
+plt.scatter(X[:, 0], X[:, 1])
+plt.show()
+
 # %%
 # Compute DBSCAN
 # --------------
+#
+# One can access the labels assigned by :class:`~sklearn.cluster.DBSCAN` using
+# the `labels_` attribute. Noisy samples are given the label math:`-1`.
+
+import numpy as np
+from sklearn.cluster import DBSCAN
+from sklearn import metrics
+
 db = DBSCAN(eps=0.3, min_samples=10).fit(X)
-core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
-core_samples_mask[db.core_sample_indices_] = True
 labels = db.labels_
 
 # Number of clusters in labels, ignoring noise if present.
@@ -42,23 +56,46 @@
 
 print("Estimated number of clusters: %d" % n_clusters_)
 print("Estimated number of noise points: %d" % n_noise_)
-print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
-print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
-print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
-print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
+
+# %%
+# Clustering algorithms are fundamentally unsupervised learning methods.
+# However, since :class:`~sklearn.datasets.make_blobs` gives access to the true
+# labels of the synthetic clusters, it is possible to use evaluation metrics
+# that leverage this "supervised" ground truth information to quantify the
+# quality of the resulting clusters. Examples of such metrics are the
+# homogeneity, completeness, V-measure, Rand-Index, Adjusted Rand-Index and
+# Adjusted Mutual Information (AMI).
+#
+# If the ground truth labels are not known, evaluation can only be performed
+# using the model results itself. In that case, the Silhouette Coefficient comes
+# in handy.
+#
+# For more information, see the
+# :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`
+# example or the :ref:`clustering_evaluation` module.
+
+print(f"Homogeneity: {metrics.homogeneity_score(labels_true, labels):.3f}")
+print(f"Completeness: {metrics.completeness_score(labels_true, labels):.3f}")
+print(f"V-measure: {metrics.v_measure_score(labels_true, labels):.3f}")
+print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(labels_true, labels):.3f}")
 print(
-    "Adjusted Mutual Information: %0.3f"
-    % metrics.adjusted_mutual_info_score(labels_true, labels)
+    "Adjusted Mutual Information:"
+    f" {metrics.adjusted_mutual_info_score(labels_true, labels):.3f}"
 )
-print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))
+print(f"Silhouette Coefficient: {metrics.silhouette_score(X, labels):.3f}")
 
 # %%
-# Plot result
-# -----------
-import matplotlib.pyplot as plt
+# Plot results
+# ------------
+#
+# Core samples (large dots) and non-core samples (small dots) are color-coded
+# according to the asigned cluster. Samples tagged as noise are represented in
+# black.
 
-# Black removed and is used for noise instead.
 unique_labels = set(labels)
+core_samples_mask = np.zeros_like(labels, dtype=bool)
+core_samples_mask[db.core_sample_indices_] = True
+
 colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
 for k, col in zip(unique_labels, colors):
     if k == -1:
@@ -87,5 +124,5 @@
    <
17AE
span class='blob-code-inner blob-code-marker ' data-code-marker=" ">        markersize=6,
     )
 
-plt.title("Estimated number of clusters: %d" % n_clusters_)
+plt.title(f"Estimated number of clusters: {n_clusters_}")
 plt.show()
diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py
@@ -42,7 +42,7 @@
 
 print("Learning the dictionary... ")
 rng = np.random.RandomState(0)
-kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)
+kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True, n_init=3)
 patch_size = (20, 20)
 
 buffer = []

diff --git a/examples/cluster/plot_optics.py b/examples/cluster/plot_optics.py
@@ -88,10 +88,10 @@
 ax2.set_title("Automatic Clustering\nOPTICS")
 
 # DBSCAN at 0.5
-colors = ["g", "greenyellow", "olive", "r", "b", "c"]
-for klass, color in zip(range(0, 6), colors):
+colors = ["g.", "r.", "b.", "c."]
+for klass, color in zip(range(0, 4), colors):
     Xk = X[labels_050 == klass]
-    ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3, marker=".")
+    ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
 ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
 ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")
 

diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py
@@ -153,7 +153,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 
 # %%
 batch_pca_estimator = decomposition.MiniBatchSparsePCA(
-    n_components=n_components, alpha=0.1, n_iter=100, batch_size=3, random_state=rng
+    n_components=n_components, alpha=0.1, max_iter=100, batch_size=3, random_state=rng
 )
 batch_pca_estimator.fit(faces_centered)
 plot_gallery(
@@ -171,7 +171,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 
 # %%
 batch_dict_estimator = decomposition.MiniBatchDictionaryLearning(
-    n_components=n_components, alpha=0.1, n_iter=50, batch_size=3, random_state=rng
+    n_components=n_components, alpha=0.1, max_iter=50, batch_size=3, random_state=rng
 )
 batch_dict_estimator.fit(faces_centered)
 plot_gallery("Dictionary learning", batch_dict_estimator.components_[:n_components])
@@ -272,7 +272,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 dict_pos_dict_estimator = decomposition.MiniBatchDictionaryLearning(
     n_components=n_components,
     alpha=0.1,
-    n_iter=50,
+    max_iter=50,
     batch_size=3,
     random_state=rng,
     positive_dict=True,
@@ -294,7 +294,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 dict_pos_code_estimator = decomposition.MiniBatchDictionaryLearning(
     n_components=n_components,
     alpha=0.1,
-    n_iter=50,
+    max_iter=50,
     batch_size=3,
     fit_algorithm="cd",
     random_state=rng,
@@ -318,7 +318,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 dict_pos_estimator = decomposition.MiniBatchDictionaryLearning(
     n_components=n_components,
     alpha=0.1,
-    n_iter=50,
+    max_iter=50,
     batch_size=3,
     fit_algorithm="cd",
     random_state=rng,

diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py
@@ -44,7 +44,7 @@
 from sklearn.decomposition import FastICA, PCA
 
 # Compute ICA
-ica = FastICA(n_components=3)
+ica = FastICA(n_components=3, whiten="arbitrary-variance")
 S_ = ica.fit_transform(X)  # Reconstruct signals
 A_ = ica.mixing_  # Get estimated mixing matrix