punndcoder28
diff --git a/‎build_tools/circle/doc_environment.yml
Lines changed: 1 addition & 0 deletions b/‎build_tools/circle/doc_environment.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎build_tools/circle/doc_linux-64_conda.lock
Lines changed: 3 additions & 1 deletion b/‎build_tools/circle/doc_linux-64_conda.lock
Lines changed: 3 additions & 1 deletion
diff --git a/‎build_tools/circle/doc_min_dependencies_environment.yml
Lines changed: 1 addition & 0 deletions b/‎build_tools/circle/doc_min_dependencies_environment.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎build_tools/circle/doc_min_dependencies_linux-64_conda.lock
Lines changed: 2 additions & 1 deletion b/‎build_tools/circle/doc_min_dependencies_linux-64_conda.lock
Lines changed: 2 additions & 1 deletion
diff --git a/‎build_tools/update_environments_and_lock_files.py
Lines changed: 3 additions & 0 deletions b/‎build_tools/update_environments_and_lock_files.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/release_highlights/plot_release_highlights_1_4_0.py
Lines changed: 206 additions & 0 deletions b/‎examples/release_highlights/plot_release_highlights_1_4_0.py
Lines changed: 206 additions & 0 deletions
@@ -28,6 +28,7 @@ dependencies:
   - numpydoc
   - sphinx-prompt
   - plotly
+  - polars
   - pooch
   - sphinxext-opengraph
   - pip
 
@@ -1,6 +1,6 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 74e9e451b651d0b84d1c066a106b93d1a0f711e6aa6c5a48d2169af2e01f4d90
+# input_hash: 0d62c56444fc81a1e285d3657990a983d2c40ceb6fb44130975b4e8e72626137
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.11.17-hbcca054_0.conda#01ffc8d36f9eba0ce0b3c1955fa780ee
@@ -178,6 +178,7 @@ https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.c
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
 https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hd1e30aa_1.conda#cbe186eefb0bcd91e8f47c3908489874
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.9.0-pyha770c72_0.conda#a92a6440c3fe7052d63244f3aba2a4a7
 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8
 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73
@@ -225,6 +226,7 @@ https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2023.9.18-py39hf9b8f
 https://conda.anaconda.org/conda-forge/noarch/imageio-2.33.1-pyh8c1a49c_0.conda#1c34d58ac469a34e7e96832861368bce
 https://conda.anaconda.org/conda-forge/linux-64/pandas-2.1.4-py39hddac248_0.conda#dcfd2f15c6f8f0bbf234412b18a2a5d0
 https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.4-pyhd8ed1ab_0.conda#1184267eddebb57e47f8e1419c225595
+https://conda.anaconda.org/conda-forge/linux-64/polars-0.19.19-py39h90d8ae4_0.conda#9cefe0d7ce9208c3afbbac29951aff59
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.0-pyhd8ed1ab_0.conda#134b2b57b7865d2316a7cce1915a51ed
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
 https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h44dd56e_1.conda#d037c20e3da2e85f03ebd20ad480c359
 
@@ -28,6 +28,7 @@ dependencies:
   - numpydoc=1.2.0  # min
   - sphinx-prompt=1.3.0  # min
   - plotly=5.14.0  # min
+  - polars=0.19.12  # min
   - pooch
   - pip
   - pip:
 
@@ -1,6 +1,6 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 35f943b65f19232746bf1ac103664d9fa08c9fce0bcc39d7ee2ecf873d996bff
+# input_hash: 63e92fdc759dcf030bf7e6d4a5d86bec102c98562cfb7ebd4d3d4991c895678b
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.11.17-hbcca054_0.conda#01ffc8d36f9eba0ce0b3c1955fa780ee
@@ -208,6 +208,7 @@ https://conda.anaconda.org/conda-forge/noarch/imageio-2.33.1-pyh8c1a49c_0.conda#
 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py39h2fa2bec_0.tar.bz2#9ec0b2186fab9121c54f4844f93ee5b7
 https://conda.anaconda.org/conda-forge/linux-64/pandas-1.1.5-py39hde0f152_0.tar.bz2#79fc4b5b3a865b90dd3701cecf1ad33c
 https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.4-pyhd8ed1ab_0.conda#1184267eddebb57e47f8e1419c225595
+https://conda.anaconda.org/conda-forge/linux-64/polars-0.19.12-py39h90d8ae4_0.conda#191828961c95f8d59fa2b86a590f9905
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.0-pyhd8ed1ab_0.conda#134b2b57b7865d2316a7cce1915a51ed
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
 https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.3.0-py39hd257fcd_1.tar.bz2#c4b698994b2d8d2e659ae02202e6abe4
 
@@ -276,6 +276,7 @@ def remove_from(alist, to_remove):
             "numpydoc",
             "sphinx-prompt",
             "plotly",
+            "polars",
             "pooch",
         ],
         "pip_dependencies": ["sphinxext-opengraph"],
@@ -294,6 +295,7 @@ def remove_from(alist, to_remove):
             "sphinx-prompt": "min",
             "sphinxext-opengraph": "min",
             "plotly": "min",
+            "polars": "min",
         },
     },
     {
@@ -312,6 +314,7 @@ def remove_from(alist, to_remove):
             "numpydoc",
             "sphinx-prompt",
             "plotly",
+            "polars",
             "pooch",
             "sphinxext-opengraph",
         ],
 
@@ -0,0 +1,206 @@
+# ruff: noqa
+"""
+=======================================
+Release Highlights for scikit-learn 1.4
+=======================================
+
+.. currentmodule:: sklearn
+
+We are pleased to announce the release of scikit-learn 1.4! Many bug fixes
+and improvements were added, as well as some new key features. We detail
+below a few of the major features of this release. **For an exhaustive list of
+all the changes**, please refer to the :ref:`release notes <changes_1_4>`.
+
+To install the latest version (with pip)::
+
+    pip install --upgrade scikit-learn
+
+or with conda::
+
+    conda install -c conda-forge scikit-learn
+
+"""
+
+# %%
+# HistGradientBoosting Natively Supports Categorical DTypes in DataFrames
+# -----------------------------------------------------------------------
+# :class:`ensemble.HistGradientBoostingClassifier` and
+# :class:`ensemble.HistGradientBoostingRegressor` now directly supports dataframes with
+# categorical features.  Here we have a dataset with a mixture of
+# categorical and numerical features:
+from sklearn.datasets import fetch_openml
+
+X_adult, y_adult = fetch_openml("adult", version=2, return_X_y=True)
+
+# Remove redundant and non-feature columns
+X_adult = X_adult.drop(["education-num", "fnlwgt"], axis="columns")
+X_adult.dtypes
+
+# %%
+# By setting `categorical_features="from_dtype"`, the gradient boosting classifier
+# treats the columns with categorical dtypes as categorical features in the
+# algorithm:
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import roc_auc_score
+
+X_train, X_test, y_train, y_test = train_test_split(X_adult, y_adult, random_state=0)
+hist = HistGradientBoostingClassifier(categorical_features="from_dtype")
+
+hist.fit(X_train, y_train)
+y_decision = hist.decision_function(X_test)
+print(f"ROC AUC score is {roc_auc_score(y_test, y_decision)}")
+
+# %%
+# Polars output in `set_output`
+# -----------------------------
+# scikit-learn's transformers now support polars output with the `set_output` API.
+import polars as pl
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.compose import ColumnTransformer
+
+df = pl.DataFrame(
+    {"height": [120, 140, 150, 110, 100], "pet": ["dog", "cat", "dog", "cat", "cat"]}
+)
+preprocessor = ColumnTransformer(
+    [
+        ("numerical", StandardScaler(), ["height"]),
+        ("categorical", OneHotEncoder(sparse_output=False), ["pet"]),
+    ],
+    verbose_feature_names_out=False,
+)
+preprocessor.set_output(transform="polars")
+
+df_out = preprocessor.fit_transform(df)
+print(f"Output type: {type(df_out)}")
+
+# %%
+# Missing value support for Random Forest
+# ---------------------------------------
+# The classes :class:`ensemble.RandomForestClassifier` and
+# :class:`ensemble.RandomForestRegressor` now support missing values. When training
+# every individual tree, the splitter evaluates each potential threshold with the
+# missing values going to the left and right nodes. More details in the
+# :ref:`User Guide <tree_missing_value_support>`.
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+
+X = np.array([0, 1, 6, np.nan]).reshape(-1, 1)
+y = [0, 0, 1, 1]
+
+forest = RandomForestClassifier(random_state=0).fit(X, y)
+forest.predict(X)
+
+# %%
+# Add support for monotonic constraints in tree-based models
+# ----------------------------------------------------------
+# While we added support for monotonic constraints in histogram-based gradient boosting
+# in scikit-learn 0.23, we now support this feature for all other tree-based models as
+# trees, random forests, extra-trees, and exact gradient boosting. Here, we show this
+# feature for random forest on a regression problem.
+import matplotlib.pyplot as plt
+from sklearn.inspection import PartialDependenceDisplay
+from sklearn.ensemble import RandomForestRegressor
+
+n_samples = 500
+rng = np.random.RandomState(0)
+X = rng.randn(n_samples, 2)
+noise = rng.normal(loc=0.0, scale=0.01, size=n_samples)
+y = 5 * X[:, 0] + np.sin(10 * np.pi * X[:, 0]) - noise
+
+rf_no_cst = RandomForestRegressor().fit(X, y)
+rf_cst = RandomForestRegressor(monotonic_cst=[1, 0]).fit(X, y)
+
+disp = PartialDependenceDisplay.from_estimator(
+    rf_no_cst,
+    X,
+    features=[0],
+    feature_names=["feature 0"],
+    line_kw={"linewidth": 4, "label": "unconstrained", "color": "tab:blue"},
+)
+PartialDependenceDisplay.from_estimator(
+    rf_cst,
+    X,
+    features=[0],
+    line_kw={"linewidth": 4, "label": "constrained", "color": "tab:orange"},
+    ax=disp.axes_,
+)
+disp.axes_[0, 0].plot(
+    X[:, 0], y, "o", alpha=0.5, zorder=-1, label="samples", color="tab:green"
+)
+disp.axes_[0, 0].set_ylim(-3, 3)
+disp.axes_[0, 0].set_xlim(-1, 1)
+disp.axes_[0, 0].legend()
+plt.show()
+
+# %%
+# Enriched estimator displays
+# ---------------------------
+# Estimators displays have been enriched: if we look at `forest`, defined above:
+forest
+
+# %%
+# One can access the documentation of the estimator by clicking on the icon "?" on
+# the top right corner of the diagram.
+#
+# In addition, the display changes color, from orange to blue, when the estimator is
+# fitted. You can also get this information by hovering on the icon "i".
+from sklearn.base import clone
+
+clone(forest)  # the clone is not fitted
+
+# %%
+# Metadata Routing Support
+# ------------------------
+# Many meta-estimators and cross-validation routines now support metadata
+# routing, which are listed in the :ref:`user guide
+# <_metadata_routing_models>`. For instance, this is how you can do a nested
+# cross-validation with sample weights and :class:`~model_selection.GroupKFold`:
+import sklearn
+from sklearn.metrics import get_scorer
+from sklearn.datasets import make_regression
+from sklearn.linear_model import Lasso
+from sklearn.model_selection import GridSearchCV, cross_validate, GroupKFold
+
+# For now by default metadata routing is disabled, and need to be explicitly
+# enabled.
+sklearn.set_config(enable_metadata_routing=True)
+
+n_samples = 100
+X, y = make_regression(n_samples=n_samples, n_features=5, noise=0.5)
+rng = np.random.RandomState(7)
+groups = rng.randint(0, 10, size=n_samples)
+sample_weights = rng.rand(n_samples)
+estimator = Lasso().set_fit_request(sample_weight=True)
+hyperparameter_grid = {"alpha": [0.1, 0.5, 1.0, 2.0]}
+scoring_inner_cv = get_scorer("neg_mean_squared_error").set_score_request(
+    sample_weight=True
+)
+inner_cv = GroupKFold(n_splits=5)
+
+grid_search = GridSearchCV(
+    estimator=estimator,
+    param_grid=hyperparameter_grid,
+    cv=inner_cv,
+    scoring=scoring_inner_cv,
+)
+
+outer_cv = GroupKFold(n_splits=5)
+scorers = {
+    "mse": get_scorer("neg_mean_squared_error").set_score_request(sample_weight=True)
+}
+results = cross_validate(
+    grid_search,
+    X,
+    y,
+    cv=outer_cv,
+    scoring=scorers,
+    return_estimator=True,
+    params={"sample_weight": sample_weights, "groups": groups},
+)
+print("cv error on test sets:", results["test_mse"])
+
+# Setting the flag to the default `False` to avoid interference with other
+# scripts.
+sklearn.set_config(enable_metadata_routing=False)