scikit-learn
diff --git a/‎.circleci/config.yml
Lines changed: 3 additions & 1 deletion b/‎.circleci/config.yml
Lines changed: 3 additions & 1 deletion
diff --git a/‎build_tools/generate_authors_table.py
Lines changed: 16 additions & 3 deletions b/‎build_tools/generate_authors_table.py
Lines changed: 16 additions & 3 deletions
diff --git a/‎doc/about.rst
Lines changed: 10 additions & 0 deletions b/‎doc/about.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/communication_team.rst
Lines changed: 16 additions & 0 deletions b/‎doc/communication_team.rst
Lines changed: 16 additions & 0 deletions
diff --git a/‎doc/governance.rst
Lines changed: 17 additions & 0 deletions b/‎doc/governance.rst
Lines changed: 17 additions & 0 deletions
diff --git a/‎doc/whats_new/v1.0.rst
Lines changed: 9 additions & 0 deletions b/‎doc/whats_new/v1.0.rst
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/whats_new/v1.1.rst
Lines changed: 4 additions & 7 deletions b/‎doc/whats_new/v1.1.rst
Lines changed: 4 additions & 7 deletions
diff --git a/‎examples/cluster/plot_birch_vs_minibatchkmeans.py
Lines changed: 13 additions & 5 deletions b/‎examples/cluster/plot_birch_vs_minibatchkmeans.py
Lines changed: 13 additions & 5 deletions
diff --git a/‎sklearn/calibration.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/calibration.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/datasets/tests/test_base.py
Lines changed: 4 additions & 1 deletion b/‎sklearn/datasets/tests/test_base.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎sklearn/metrics/_dist_metrics.pyx
Lines changed: 6 additions & 6 deletions b/‎sklearn/metrics/_dist_metrics.pyx
Lines changed: 6 additions & 6 deletions
diff --git a/‎sklearn/metrics/tests/test_dist_metrics.py
Lines changed: 23 additions & 1 deletion b/‎sklearn/metrics/tests/test_dist_metrics.py
Lines changed: 23 additions & 1 deletion
@@ -59,7 +59,9 @@ jobs:
       - MATPLOTLIB_VERSION: 'latest'
       - CYTHON_VERSION: 'latest'
       - SCIKIT_IMAGE_VERSION: 'latest'
-      - SPHINX_VERSION: 'min'
+      # Bump the sphinx version from time to time. Avoid latest sphinx version
+      # that tends to break things slightly too often
       - PANDAS_VERSION: 'latest'
       - SPHINX_GALLERY_VERSION: 'latest'
       - NUMPYDOC_VERSION: 'latest'
 
@@ -43,7 +43,14 @@ def get_contributors():
     # get core devs and triage team
     core_devs = []
     triage_team = []
-    for team_id, lst in zip((11523, 3593183), (core_devs, triage_team)):
+    comm_team = []
+    core_devs_id = 11523
+    triage_team_id = 3593183
+    comm_team_id = 5368696
+    for team_id, lst in zip(
+        (core_devs_id, triage_team_id, comm_team_id),
+        (core_devs, triage_team, comm_team),
+    ):
         for page in [1, 2]:  # 30 per page
             reply = get(f"https://api.github.com/teams/{team_id}/members?page={page}")
             lst.extend(reply.json())
@@ -59,6 +66,7 @@ def get_contributors():
     # keep only the logins
     core_devs = set(c["login"] for c in core_devs)
     triage_team = set(c["login"] for c in triage_team)
+    comm_team = set(c["login"] for c in comm_team)
     members = set(c["login"] for c in members)
 
     # add missing contributors with GitHub accounts
@@ -75,13 +83,15 @@ def get_contributors():
     core_devs = [get_profile(login) for login in core_devs]
     emeritus = [get_profile(login) for login in emeritus]
     triage_team = [get_profile(login) for login in triage_team]
+    comm_team = [get_profile(login) for login in comm_team]
 
     # sort by last name
     core_devs = sorted(core_devs, key=key)
     emeritus = sorted(emeritus, key=key)
     triage_team = sorted(triage_team, key=key)
+    comm_team = sorted(comm_team, key=key)
 
-    return core_devs, emeritus, triage_team
+    return core_devs, emeritus, triage_team, comm_team
 
 
 def get_profile(login):
@@ -145,7 +155,7 @@ def generate_list(contributors):
 
 if __name__ == "__main__":
 
-    core_devs, emeritus, triage_team = get_contributors()
+    core_devs, emeritus, triage_team, comm_team = get_contributors()
 
     with open(REPO_FOLDER / "doc" / "authors.rst", "w+") as rst_file:
         rst_file.write(generate_table(core_devs))
@@ -155,3 +165,6 @@ def generate_list(contributors):
 
     with open(REPO_FOLDER / "doc" / "triage_team.rst", "w+") as rst_file:
         rst_file.write(generate_table(triage_team))
+
+    with open(REPO_FOLDER / "doc" / "communication_team.rst", "w+") as rst_file:
+        rst_file.write(generate_table(comm_team))
@@ -18,6 +18,7 @@ been leading the development.
 
 Governance
 ----------
+
 The decision making process and governance structure of scikit-learn is laid
 out in the :ref:`governance document <governance>`.
 
@@ -47,6 +48,15 @@ maintenance:
 
 .. include:: triage_team.rst
 
+Communication Team
+------------------
+
+The following people help with :ref:`communication around scikit-learn
+<communication_team>`.
+
+.. include:: communication_team.rst
+
+
 Emeritus Core Developers
 ------------------------
 
 
@@ -0,0 +1,16 @@
+.. raw :: html
+
+    <!-- Generated by generate_authors_table.py -->
+    <div class="sk-authors-container">
+    <style>
+      img.avatar {border-radius: 10px;}
+    </style>
+    <div>
+    <a href='https://github.com/reshamas'><img src='https://avatars.githubusercontent.com/u/2507232?v=4' class='avatar' /></a> <br />
+    <p>Reshama Shaikh</p>
+    </div>
+    <div>
+    <a href='https://github.com/laurburke'><img src='https://avatars.githubusercontent.com/u/35973528?v=4' class='avatar' /></a> <br />
+    <p>Lauren Burke</p>
+    </div>
+    </div>
@@ -48,8 +48,25 @@ Every new triager will be announced in the mailing list.
 Triagers are welcome to participate in `monthly core developer meetings
 <https://github.com/scikit-learn/administrative/tree/master/meeting_notes>`_.
 
+.. _communication_team:
+
+Communication team
+-------------------
+
+Members of the communication team help with outreach and communication
+for scikit-learn. The goal of the team is to develop public awareness of
+scikit-learn, of its features and usage, as well as branding.
+
+For this, they can operate the scikit-learn accounts on various social
+networks and produce materials.
+
+Every new communicator will be announced in the mailing list.
+Communicators are welcome to participate in `monthly core developer meetings
+<https://github.com/scikit-learn/administrative/tree/master/meeting_notes>`_.
+
 Core developers
 ---------------
+
 Core developers are community members who have shown that they are dedicated to
 the continued development of the project through ongoing engagement with the
 community. They have shown they can be trusted to maintain scikit-learn with
 
@@ -28,6 +28,15 @@ Changelog
   and :class:`decomposition.MiniBatchSparsePCA` to be convex and match the referenced
   article. :pr:`19210` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| All :class:`sklearn.metrics.DistanceMetric` subclasses now correctly support
+  read-only buffer attributes.
+  This fixes a regression introduced in 1.0.0 with respect to 0.24.2.
+  :pr:`21694` by :user:`Julien Jerphanion <jjerphan>`.
+
+
 :mod:`sklearn.preprocessing`
 ............................
 
 
@@ -92,6 +92,10 @@ Changelog
 - |Fix| :class:`decomposition.FastICA` now validates input parameters in `fit` instead of `__init__`.
   :pr:`21432` by :user:`Hannah Bohle <hhnnhh>` and :user:`Maren Westermann <marenwestermann>`.
 
+- |Fix| :class:`decomposition.KernelPCA` now validates input parameters in
+  `fit` instead of `__init__`.
+  :pr:`21567` by :user:`Maggie Chege <MaggieChege>`.
+
 - |API| Adds :term:`get_feature_names_out` to all transformers in the
   :mod:`~sklearn.decomposition` module:
   :class:`~sklearn.decomposition.DictionaryLearning`,
@@ -186,13 +190,6 @@ Changelog
   instead of `__init__`.
   :pr:`21434` by :user:`Krum Arnaudov <krumeto>`.
 
-
-:mod:`sklearn.decomposition.KernelPCA`
-......................................
-- |Fix| :class:`decomposition.KernelPCA` now validates input parameters in
-  `fit` instead of `__init__`.
-  :pr:`21567` by :user:`Maggie Chege <MaggieChege>`.
-
 :mod:`sklearn.svm`
 ..................
 
 
@@ -5,9 +5,16 @@
 
 This example compares the timing of BIRCH (with and without the global
 clustering step) and MiniBatchKMeans on a synthetic dataset having
-100,000 samples and 2 features generated using make_blobs.
+25,000 samples and 2 features generated using make_blobs.
 
-If ``n_clusters`` is set to None, the data is reduced from 100,000
+Both ``MiniBatchKMeans`` and ``BIRCH`` are very scalable algorithms and could
+run efficiently on hundreds of thousands or even millions of datapoints. We
+chose to limit the dataset size of this example in the interest of keeping
+our Continuous Integration resource usage reasonable but the interested
+reader might enjoy editing this script to rerun it with a larger value for
+`n_samples`.
+
+If ``n_clusters`` is set to None, the data is reduced from 25,000
 samples to a set of 158 clusters. This can be viewed as a preprocessing
 step before the final (global) clustering step that further reduces these
 158 clusters to 100 clusters.
@@ -18,6 +25,7 @@
 #          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
+from joblib import cpu_count
 from itertools import cycle
 from time import time
 import numpy as np
@@ -32,10 +40,10 @@
 xx = np.linspace(-22, 22, 10)
 yy = np.linspace(-22, 22, 10)
 xx, yy = np.meshgrid(xx, yy)
-n_centres = np.hstack((np.ravel(xx)[:, np.newaxis], np.ravel(yy)[:, np.newaxis]))
+n_centers = np.hstack((np.ravel(xx)[:, np.newaxis], np.ravel(yy)[:, np.newaxis]))
 
 # Generate blobs to do a comparison between MiniBatchKMeans and BIRCH.
-X, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)
+X, y = make_blobs(n_samples=25000, centers=n_centers, random_state=0)
 
 # Use all colors that matplotlib provides by default.
 colors_ = cycle(colors.cnames.keys())
@@ -78,7 +86,7 @@
 mbk = MiniBatchKMeans(
     init="k-means++",
     n_clusters=100,
-    batch_size=100,
+    batch_size=256 * cpu_count(),
     n_init=10,
     max_no_improvement=10,
     verbose=0,
 
@@ -1099,8 +1099,8 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs):
             ax.plot([0, 1], [0, 1], "k:", label=ref_line_label)
         self.line_ = ax.plot(self.prob_pred, self.prob_true, "s-", **line_kwargs)[0]
 
-        if "label" in line_kwargs:
-            ax.legend(loc="lower right")
+        # We always have to show the legend for at least the reference line
+        ax.legend(loc="lower right")
 
         xlabel = f"Mean predicted probability {info_pos_label}"
         ylabel = f"Fraction of positives {info_pos_label}"
 
@@ -341,7 +341,10 @@ def test_load_boston_alternative():
     boston_sklearn = load_boston()
 
     data_url = "http://lib.stat.cmu.edu/datasets/boston"
-    raw_df = pd.read_csv(data_url, sep=r"\s+", skiprows=22, header=None)
+    try:
+        raw_df = pd.read_csv(data_url, sep=r"\s+", skiprows=22, header=None)
+    except ConnectionError as e:
+        pytest.xfail(f"The dataset can't be downloaded. Got exception: {e}")
     data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
     target = raw_df.values[1::2, 2]
 
 
@@ -29,7 +29,7 @@ cdef DTYPE_t INF = np.inf
 
 from ..utils._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t, DTYPECODE
 from ..utils._typedefs import DTYPE, ITYPE
-
+from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper
 
 ######################################################################
 # newObj function
@@ -214,8 +214,8 @@ cdef class DistanceMetric:
         set state for pickling
         """
         self.p = state[0]
-        self.vec = state[1]
-        self.mat = state[2]
+        self.vec = ReadonlyArrayWrapper(state[1])
+        self.mat = ReadonlyArrayWrapper(state[2])
         if self.__class__.__name__ == "PyFuncDistance":
             self.func = state[3]
             self.kwargs = state[4]
@@ -444,7 +444,7 @@ cdef class SEuclideanDistance(DistanceMetric):
        D(x, y) = \sqrt{ \sum_i \frac{ (x_i - y_i) ^ 2}{V_i} }
     """
     def __init__(self, V):
-        self.vec = np.asarray(V, dtype=DTYPE)
+        self.vec = ReadonlyArrayWrapper(np.asarray(V, dtype=DTYPE))
         self.size = self.vec.shape[0]
         self.p = 2
 
@@ -605,7 +605,7 @@ cdef class WMinkowskiDistance(DistanceMetric):
             raise ValueError("WMinkowskiDistance requires finite p. "
                              "For p=inf, use ChebyshevDistance.")
         self.p = p
-        self.vec = np.asarray(w, dtype=DTYPE)
+        self.vec = ReadonlyArrayWrapper(np.asarray(w, dtype=DTYPE))
         self.size = self.vec.shape[0]
 
     def _validate_data(self, X):
@@ -665,7 +665,7 @@ cdef class MahalanobisDistance(DistanceMetric):
         if VI.ndim != 2 or VI.shape[0] != VI.shape[1]:
             raise ValueError("V/VI must be square")
 
-        self.mat = np.asarray(VI, dtype=float, order='C')
+        self.mat = ReadonlyArrayWrapper(np.asarray(VI, dtype=float, order='C'))
 
         self.size = self.mat.shape[0]
 
 
@@ -158,11 +158,16 @@ def check_pdist_bool(metric, D_true):
     assert_array_almost_equal(D12, D_true)
 
 
+@pytest.mark.parametrize("use_read_only_kwargs", [True, False])
 @pytest.mark.parametrize("metric", METRICS_DEFAULT_PARAMS)
-def test_pickle(metric):
+def test_pickle(use_read_only_kwargs, metric):
     argdict = METRICS_DEFAULT_PARAMS[metric]
     keys = argdict.keys()
     for vals in itertools.product(*argdict.values()):
+        if use_read_only_kwargs:
+            for val in vals:
+                if isinstance(val, np.ndarray):
+                    val.setflags(write=False)
         kwargs = dict(zip(keys, vals))
         check_pickle(metric, kwargs)
 
@@ -242,3 +247,20 @@ def custom_metric(x, y):
     pyfunc = DistanceMetric.get_metric("pyfunc", func=custom_metric)
     eucl = DistanceMetric.get_metric("euclidean")
     assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X) ** 2)
+
+
+def test_readonly_kwargs():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/21685
+
+    rng = check_random_state(0)
+
+    weights = rng.rand(100)
+    VI = rng.rand(10, 10)
+    weights.setflags(write=False)
+    VI.setflags(write=False)
+
+    # Those distances metrics have to support readonly buffers.
+    DistanceMetric.get_metric("seuclidean", V=weights)
+    DistanceMetric.get_metric("wminkowski", p=1, w=weights)
+    DistanceMetric.get_metric("mahalanobis", VI=VI)