scikit-learn · thomasjpfan · Oct 11, 2022 · Sep 12, 2021 · Sep 12, 2021 · Sep 12, 2021
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
@@ -317,9 +317,9 @@ to the prediction function.
 
 .. topic:: References
 
- .. [L2014] G. Louppe,
-         "Understanding Random Forests: From Theory to Practice",
-         PhD Thesis, U. of Liege, 2014.
+ .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to
+    Practice" <1407.7502>`,
+    PhD Thesis, U. of Liege, 2014.
 
 .. _random_trees_embedding:
 
@@ -711,7 +711,7 @@ space.
   accurate enough: the tree can only output integer values. As a result, the
   leaves values of the tree :math:`h_m` are modified once the tree is
   fitted, such that the leaves values minimize the loss :math:`L_m`. The
-  update is loss-dependent: for the absolute error loss, the value of 
+  update is loss-dependent: for the absolute error loss, the value of
   a leaf is updated to the median of the samples in that leaf.
 
 Classification
@@ -1174,6 +1174,44 @@ Also, monotonic constraints are not supported for multiclass classification.
 
   * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py`
 
+.. _interaction_cst_hgbt:
+
+Interaction constraints
+-----------------------
+
+A priori, the histogram gradient boosting trees are allowed to use any feature
+to split a node into child nodes. This creates so called interactions between
+features, i.e. usage of different features as split along a branch. Sometimes,
+one wants to restrict the possible interactions, see [Mayer2022]_. This can be
+done by the parameter ``interaction_cst``, where one can specify the indices
+of features that are allowed to interact.
+For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]``
+forbids all interactions.
+The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly
+interacting features. Features 0 and 1 may interact with each other, as well
+as features 1 and 2. But note that features 0 and 2 are forbidden to interact.
+The following depicts a tree and the possible splits of the tree:
+
+.. code-block:: none
+
+      1      <- Both constraint groups could be applied from now on
+     / \
+    1   2    <- Left split still fulfills both constraint groups.
+   / \ / \      Right split at feature 2 has only group {1, 2} from now on.
+
+LightGBM uses the same logic for overlapping groups.
+
+Note that features not listed in ``interaction_cst`` are automatically
+assigned an interaction group for themselves. With again 3 features, this
+means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``.
+
+.. topic:: References
+
+  .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio.
+     2022. :doi:`Machine Learning Applications to Land and Structure Valuation
+     <10.3390/jrfm15050193>`.
+     Journal of Risk and Financial Management 15, no. 5: 193
+
 Low-level parallelism
 ---------------------
 

diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -242,6 +242,12 @@ Changelog
 :mod:`sklearn.ensemble`
 .......................
 
+- |Feature| :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+  :class:`~sklearn.ensemble.HistGradientBoostingRegressor` now support
+  interaction constraints via the argument `interaction_cst` of their
+  constructors.
+  :pr:`21020` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Feature| Adds `class_weight` to :class:`ensemble.HistGradientBoostingClassifier`.
   :pr:`22014` by `Thomas Fan`_.
 

diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py
@@ -255,6 +255,85 @@
 # house age, whereas for values less than two there is a strong dependence on
 # age.
 #
+# Interaction constraints
+# .......................
+#
+# The histogram gradient boosters have an interesting option to constrain
+# possible interactions among features. In the following, we do not allow any
+# interactions and thus render the model as a version of a tree-based boosted
+# generalized additive model (GAM). This makes the model more interpretable
+# as the effect of each feature can be investigated independently of all others.
+#
+# We train the :class:`~sklearn.ensemble.HistGradientBoostingRegressor` again,
+# now with `interaction_cst`, where we  pass for each feature a list containing
+# only its own index, e.g. `[[0], [1], [2], ..]`.
+
+print("Training interaction constraint HistGradientBoostingRegressor...")
+tic = time()
+est_no_interactions = HistGradientBoostingRegressor(
+    interaction_cst=[[i] for i in range(X_train.shape[1])]
+)
+est_no_interactions.fit(X_train, y_train)
+print(f"done in {time() - tic:.3f}s")
+
+# %%
+# The easiest way to show the effect of forbidden interactions is again the
+# ICE plots.
+
+print("Computing partial dependence plots...")
+tic = time()
+display = PartialDependenceDisplay.from_estimator(
+    est_no_interactions,
+    X_train,
+    ["MedInc", "AveOccup", "HouseAge", "AveRooms"],
+    kind="both",
+    subsample=50,
+    n_jobs=3,
+    grid_resolution=20,
+    random_state=0,
+    ice_lines_kw={"color": "tab:blue", "alpha": 0.2, "linewidth": 0.5},
+    pd_line_kw={"color": "tab:orange", "linestyle": "--"},
+)
+
+print(f"done in {time() - tic:.3f}s")
+display.figure_.suptitle(
+    "Partial dependence of house value with Gradient Boosting\n"
+    "and no interactions allowed"
+)
+display.figure_.subplots_adjust(wspace=0.4, hspace=0.3)
+
+# %%
+# All 4 plots have parallel ICE lines meaning there is no interaction in the
+# model.
+# Let us also have a look at the corresponding 2D-plot.
+
+print("Computing partial dependence plots...")
+tic = time()
+_, ax = plt.subplots(ncols=3, figsize=(9, 4))
+display = PartialDependenceDisplay.from_estimator(
+    est_no_interactions,
+    X_train,
+    ["AveOccup", "HouseAge", ("AveOccup", "HouseAge")],
+    kind="average",
+    n_jobs=3,
+    grid_resolution=20,
+    ax=ax,
+)
+print(f"done in {time() - tic:.3f}s")
+display.figure_.suptitle(
+    "Partial dependence of house value with Gradient Boosting\n"
+    "and no interactions allowed"
+)
+display.figure_.subplots_adjust(wspace=0.4, hspace=0.3)
+
+# %%
+# Although the 2D-plot shows much less interaction compared with the 2D-plot
+# from above, it is much harder to come to the conclusion that there is no
+# interaction at all. This might be a cause of the discrete predictions of
+# trees in combination with numerically precision of partial dependence.
+# We also observe that the univariate dependence plots have slightly changed
+# as the model tries to compensate for the forbidden interactions.
+#
 # 3D interaction plots
 # --------------------
 #

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -2,6 +2,7 @@
 # Author: Nicolas Hug
 
 from abc import ABC, abstractmethod
+from collections.abc import Iterable
 from functools import partial
 from numbers import Real, Integral
 import warnings
@@ -91,6 +92,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC):
         "min_samples_leaf": [Interval(Integral, 1, None, closed="left")],
         "l2_regularization": [Interval(Real, 0, None, closed="left")],
         "monotonic_cst": ["array-like", None],
+        "interaction_cst": [Iterable, None],
         "n_iter_no_change": [Interval(Integral, 1, None, closed="left")],
         "validation_fraction": [
             Interval(Real, 0, 1, closed="neither"),
@@ -121,6 +123,7 @@ def __init__(
         max_bins,
         categorical_features,
         monotonic_cst,
+        interaction_cst,
         warm_start,
         early_stopping,
         scoring,
@@ -139,6 +142,7 @@ def __init__(
         self.l2_regularization = l2_regularization
         self.max_bins = max_bins
         self.monotonic_cst = monotonic_cst
+        self.interaction_cst = interaction_cst
         self.categorical_features = categorical_features
         self.warm_start = warm_start
         self.early_stopping = early_stopping
@@ -252,6 +256,42 @@ def _check_categories(self, X):
 
         return is_categorical, known_categories
 
+    def _check_interaction_cst(self, n_features):
+        """Check and validation for interaction constraints."""
+        if self.interaction_cst is None:
+            return None
+
+        if not (
+            isinstance(self.interaction_cst, Iterable)
+            and all(isinstance(x, Iterable) for x in self.interaction_cst)
+        ):
+            raise ValueError(
+                "Interaction constraints must be None or an iterable of iterables, "
+                f"got: {self.interaction_cst!r}."
+            )
+
+        invalid_indices = [
+            x
+            for cst_set in self.interaction_cst
+            for x in cst_set
+            if not (isinstance(x, Integral) and 0 <= x < n_features)
+        ]
+        if invalid_indices:
+            raise ValueError(
+                "Interaction constraints must consist of integer indices in [0,"
+                f" n_features - 1] = [0, {n_features - 1}], specifying the position of"
+                f" features, got invalid indices: {invalid_indices!r}"
+            )
+
+        constraints = [set(group) for group in self.interaction_cst]
+
+        # Add all not listed features as own group by default.
+        rest = set(range(n_features)) - set().union(*constraints)
+        if len(rest) > 0:
+            constraints.append(rest)
+
+        return constraints
+
     def fit(self, X, y, sample_weight=None):
         """Fit the gradient boosting model.
 
@@ -308,6 +348,9 @@ def fit(self, X, y, sample_weight=None):
 
         self.is_categorical_, known_categories = self._check_categories(X)
 
+        # Encode constraints into a list of sets of features indices (integers).
+        interaction_cst = self._check_interaction_cst(self._n_features)
+
         # we need this stateful variable to tell raw_predict() that it was
         # called from fit() (this current method), and that the data it has
         # received is pre-binned.
@@ -595,6 +638,7 @@ def fit(self, X, y, sample_weight=None):
                     has_missing_values=has_missing_values,
                     is_categorical=self.is_categorical_,
                     monotonic_cst=self.monotonic_cst,
+                    interaction_cst=interaction_cst,
                     max_leaf_nodes=self.max_leaf_nodes,
                     max_depth=self.max_depth,
                     min_samples_leaf=self.min_samples_leaf,
@@ -1191,6 +1235,22 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
 
         .. versionadded:: 0.23
 
+    interaction_cst : iterable of iterables of int, default=None
+        Specify interaction constraints, i.e. sets of features which can
+        only interact with each other in child nodes splits.
+
+        Each iterable materializes a constraint by the set of indices of
+        the features that are allowed to interact with each other.
+        If there are more features than specified in these constraints,
+        they are treated as if they were specified as an additional set.
+
+        For instance, with 5 features in total, `interaction_cst=[{0, 1}]`
+        is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,
+        and specifies that each branch of a tree will either only split
+        on features 0 and 1 or only split on features 2, 3 and 4.
+
+        .. versionadded:: 1.2
+
     warm_start : bool, default=False
         When set to ``True``, reuse the solution of the previous call to fit
         and add more estimators to the ensemble. For results to be valid, the
@@ -1315,6 +1375,7 @@ def __init__(
         max_bins=255,
         categorical_features=None,
         monotonic_cst=None,
+        interaction_cst=None,
         warm_start=False,
         early_stopping="auto",
         scoring="loss",
@@ -1334,6 +1395,7 @@ def __init__(
             l2_regularization=l2_regularization,
             max_bins=max_bins,
             monotonic_cst=monotonic_cst,
+            interaction_cst=interaction_cst,
             categorical_features=categorical_features,
             early_stopping=early_stopping,
             warm_start=warm_start,
@@ -1505,6 +1567,22 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
 
         .. versionadded:: 0.23
 
+    interaction_cst : iterable of iterables of int, default=None
+        Specify interaction constraints, i.e. sets of features which can
+        only interact with each other in child nodes splits.
+
+        Each iterable materializes a constraint by the set of indices of
+        the features that are allowed to interact with each other.
+        If there are more features than specified in these constraints,
+        they are treated as if they were specified as an additional set.
+
+        For instance, with 5 features in total, `interaction_cst=[{0, 1}]`
+        is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,
+        and specifies that each branch of a tree will either only split
+        on features 0 and 1 or only split on features 2, 3 and 4.
+
+        .. versionadded:: 1.2
+
     warm_start : bool, default=False
         When set to ``True``, reuse the solution of the previous call to fit
         and add more estimators to the ensemble. For results to be valid, the
@@ -1653,6 +1731,7 @@ def __init__(
         max_bins=255,
         categorical_features=None,
         monotonic_cst=None,
+        interaction_cst=None,
         warm_start=False,
         early_stopping="auto",
         scoring="loss",
@@ -1674,6 +1753,7 @@ def __init__(
             max_bins=max_bins,
             categorical_features=categorical_features,
             monotonic_cst=monotonic_cst,
+            interaction_cst=interaction_cst,
             warm_start=warm_start,
             early_stopping=early_stopping,
             scoring=scoring,