From 272e75fde2558f7bb0037fda06b75c3a10c05479 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 15 Dec 2021 00:58:45 +0100 Subject: [PATCH 01/20] callback API --- sklearn/__init__.py | 1 + sklearn/base.py | 136 +++++++++ sklearn/callback/__init__.py | 25 ++ sklearn/callback/_base.py | 126 ++++++++ sklearn/callback/_computation_tree.py | 268 ++++++++++++++++++ sklearn/callback/_convergence_monitor.py | 118 ++++++++ sklearn/callback/_early_stopping.py | 48 ++++ sklearn/callback/_progressbar.py | 257 +++++++++++++++++ sklearn/callback/_snapshot.py | 82 ++++++ sklearn/callback/_text_verbose.py | 44 +++ .../callback/tests/test_computation_tree.py | 98 +++++++ sklearn/decomposition/_nmf.py | 95 ++++++- sklearn/linear_model/_logistic.py | 62 +++- sklearn/linear_model/_sag.py | 4 + sklearn/linear_model/_sag_fast.pyx.tp | 21 +- sklearn/pipeline.py | 30 +- sklearn/utils/optimize.py | 21 +- 17 files changed, 1416 insertions(+), 20 deletions(-) create mode 100644 sklearn/callback/__init__.py create mode 100644 sklearn/callback/_base.py create mode 100644 sklearn/callback/_computation_tree.py create mode 100644 sklearn/callback/_convergence_monitor.py create mode 100644 sklearn/callback/_early_stopping.py create mode 100644 sklearn/callback/_progressbar.py create mode 100644 sklearn/callback/_snapshot.py create mode 100644 sklearn/callback/_text_verbose.py create mode 100644 sklearn/callback/tests/test_computation_tree.py diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 77ee28271bfaf..0e667babf1cee 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -84,6 +84,7 @@ __all__ = [ "calibration", + "callback", "cluster", "covariance", "cross_decomposition", diff --git a/sklearn/base.py b/sklearn/base.py index 06e9a63630923..4f6b63cb2add1 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -9,6 +9,7 @@ import platform import inspect import re +import pickle import numpy as np @@ -28,6 +29,9 @@ from .utils.validation import check_is_fitted from .utils._estimator_html_repr import estimator_html_repr from .utils.validation import _get_feature_names +from .callback import BaseCallback +from .callback import AutoPropagatedMixin +from .callback import ComputationTree def clone(estimator, *, safe=True): @@ -84,6 +88,10 @@ def clone(estimator, *, safe=True): new_object = klass(**new_object_params) params_set = new_object.get_params(deep=False) + # copy callbacks + if hasattr(estimator, "_callbacks"): + new_object._callbacks = clone(estimator._callbacks, safe=False) + # quick sanity check of the parameters of the clone for name in new_object_params: param1 = new_object_params[name] @@ -597,6 +605,134 @@ def _validate_data( return out + def _set_callbacks(self, callbacks): + """Set callbacks for the estimator. + + Parameters + ---------- + callbacks : callback or list of callbacks + the callbacks to set. + """ + if not isinstance(callbacks, list): + callbacks = [callbacks] + + if not all(isinstance(callback, BaseCallback) for callback in callbacks): + raise TypeError(f"callbacks must be subclasses of BaseCallback.") + + self._callbacks = callbacks + + # XXX should be a method of MetaEstimatorMixin but this mixin can't handle all + # meta-estimators. + def _propagate_callbacks(self, sub_estimator, parent_node): + """Propagate the auto-propagated callbacks to a sub-estimator + + Parameters + ---------- + sub_estimator : estimator instance + The sub-estimator to propagate the callbacks to. + + parent_node : ComputationNode instance + The computation node in this estimator to set as parent_node to the + computation tree of the sub-estimator. It must be the node where the fit + method of the sub-estimator is called. + """ + if not hasattr(self, "_callbacks"): + return + + if hasattr(sub_estimator, "_callbacks") and any( + isinstance(callback, AutoPropagatedMixin) + for callback in sub_estimator._callbacks + ): + bad_callbacks = [ + callback.__class__.__name__ + for callback in sub_estimator._callbacks + if isinstance(callback, AutoPropagatedMixin) + ] + raise TypeError( + f"The sub-estimators ({sub_estimator.__class__.__name__}) of a" + f" meta-estimator ({self.__class__.__name__}) can't have" + f" auto-propagated callbacks ({bad_callbacks})." + " Set them directly on the meta-estimator." + ) + + propagated_callbacks = [ + callback + for callback in self._callbacks + if isinstance(callback, AutoPropagatedMixin) + ] + + if not propagated_callbacks: + return + + sub_estimator._parent_node = parent_node + + if not hasattr(sub_estimator, "_callbacks"): + sub_estimator._callbacks = propagated_callbacks + else: + sub_estimator._callbacks.extend(propagated_callbacks) + + def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): + """Evaluate the on_fit_begin method of the callbacks + + The computation tree is also built at this point. + + This method should be called after all data and parameters validation. + + Parameters + ---------- + X : ndarray or sparse matrix, default=None + The training data. + + y : ndarray, default=None + The target. + + levels : list of dict + A description of the nested levels of computation of the estimator to build + the computation tree. It's a list of dict with "descr" and "max_iter" keys. + + Returns + ------- + root : ComputationNode instance + The root of the computation tree. + """ + self._computation_tree = ComputationTree( + estimator_name=self.__class__.__name__, + levels=levels, + parent_node=getattr(self, "_parent_node", None), + ) + + if hasattr(self, "_callbacks"): + file_path = self._computation_tree.tree_dir / "computation_tree.pkl" + with open(file_path, "wb") as f: + pickle.dump(self._computation_tree, f) + + for callback in self._callbacks: + is_propagated = hasattr(self, "_parent_node") and isinstance( + callback, AutoPropagatedMixin + ) + if not is_propagated: + # Only call the on_fit_begin method of callbacks that are not + # propagated from a meta-estimator. + callback.on_fit_begin(estimator=self, X=X, y=y) + + return self._computation_tree.root + + def _eval_callbacks_on_fit_end(self): + """Evaluate the on_fit_end method of the callbacks""" + if not hasattr(self, "_callbacks"): + return + + self._computation_tree._tree_status[0] = True + + for callback in self._callbacks: + is_propagated = isinstance(callback, AutoPropagatedMixin) and hasattr( + self, "_parent_node" + ) + if not is_propagated: + # Only call the on_fit_end method of callbacks that are not + # propagated from a meta-estimator. + callback.on_fit_end() + @property def _repr_html_(self): """HTML representation of estimator. diff --git a/sklearn/callback/__init__.py b/sklearn/callback/__init__.py new file mode 100644 index 0000000000000..1f0f3f7215a18 --- /dev/null +++ b/sklearn/callback/__init__.py @@ -0,0 +1,25 @@ +# License: BSD 3 clause + +from ._base import AutoPropagatedMixin +from ._base import BaseCallback +from ._computation_tree import ComputationNode +from ._computation_tree import ComputationTree +from ._computation_tree import load_computation_tree +from ._convergence_monitor import ConvergenceMonitor +from ._early_stopping import EarlyStopping +from ._progressbar import ProgressBar +from ._snapshot import Snapshot +from ._text_verbose import TextVerbose + +__all__ = [ + "AutoPropagatedMixin", + "Basecallback", + "ComputationNode", + "ComputationTree", + "load_computation_tree", + "ConvergenceMonitor", + "EarlyStopping", + "ProgressBar", + "Snapshot", + "TextVerbose", +] diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py new file mode 100644 index 0000000000000..604a450336610 --- /dev/null +++ b/sklearn/callback/_base.py @@ -0,0 +1,126 @@ +# License: BSD 3 clause + +from abc import ABC, abstractmethod + + +# Not a method of BaseEstimator because it might be called from an extern function +def _eval_callbacks_on_fit_iter_end(**kwargs): + """Evaluate the on_fit_iter_end method of the callbacks + + This function should be called at the end of each computation node. + + Parameters + ---------- + kwargs : dict + arguments passed to the callback. + + Returns + ------- + stop : bool + Whether or not to stop the fit at this node. + """ + estimator = kwargs.get("estimator") + node = kwargs.get("node") + + if not hasattr(estimator, "_callbacks") or node is None: + return False + + estimator._computation_tree._tree_status[node.tree_status_idx] = True + + # stopping_criterion and reconstruction_attributes can be costly to compute. They + # are passed as lambdas for lazy evaluation. We only actually compute them if a + # callback requests it. + if any( + getattr(callback, "request_stopping_criterion", False) + for callback in estimator._callbacks + ): + kwarg = kwargs.pop("stopping_criterion", lambda: None)() + kwargs["stopping_criterion"] = kwarg + + if any( + getattr(callback, "request_reconstruction_attributes", False) + for callback in estimator._callbacks + ): + kwarg = kwargs.pop("reconstruction_attributes", lambda: None)() + kwargs["reconstruction_attributes"] = kwarg + + return any(callback.on_fit_iter_end(**kwargs) for callback in estimator._callbacks) + + +class BaseCallback(ABC): + """Abstract class for the callbacks""" + + @abstractmethod + def on_fit_begin(self, estimator, *, X=None, y=None): + """Method called at the beginning of the fit method of the estimator + + Parameters + ---------- + estimator: estimator instance + The estimator the callback is set on. + X: ndarray or sparse matrix, default=None + The training data. + y: ndarray, default=None + The target. + """ + pass + + @abstractmethod + def on_fit_end(self): + """Method called at the end of the fit method of the estimator""" + pass + + @abstractmethod + def on_fit_iter_end(self, estimator, node, **kwargs): + """Method called at the end of each computation node of the estimator + + Parameters + ---------- + estimator : estimator instance + The caller estimator. It might differ from the estimator passed to the + `on_fit_begin` method for auto-propagated callbacks. + + node : ComputationNode instance + The caller computation node. + + kwargs : dict + arguments passed to the callback. Possible keys are + + - stopping_criterion: float + Usually iterations stop when `stopping_criterion <= tol`. + This is only provided at the innermost level of iterations. + + - tol: float + Tolerance for the stopping criterion. + This is only provided at the innermost level of iterations. + + - reconstruction_attributes: dict + Necessary attributes to construct an estimator (by copying this + estimator and setting these as attributes) which will behave as if + the fit stopped at this node. + This is only provided at the outermost level of iterations. + + - fit_state: dict + Model specific quantities updated during fit. This is not meant to be + used by generic callbacks but by a callback designed for a specific + estimator instead. + + Returns + ------- + stop : bool or None + Whether or not to stop the current level of iterations at this node. + """ + pass + + +class AutoPropagatedMixin: + """Mixin for auto-propagated callbacks + + An auto-propagated callback (from a meta-estimator to its sub-estimators) must be + set on the meta-estimator. Its `on_fit_begin` and `on_fit_end` methods will only be + called at the beginning and end of the fit method of the meta-estimator, while its + `on_fit_iter_end` method will be called at each computation node of the + meta-estimator and its sub-estimators. + """ + + pass diff --git a/sklearn/callback/_computation_tree.py b/sklearn/callback/_computation_tree.py new file mode 100644 index 0000000000000..edd3c8f1f657f --- /dev/null +++ b/sklearn/callback/_computation_tree.py @@ -0,0 +1,268 @@ +# License: BSD 3 clause + +from tempfile import mkdtemp +from pathlib import Path +import pickle +import os + +import numpy as np + + +class ComputationNode: + """A node in a ComputationTree + + Parameters + ---------- + computation_tree : ComputationTree instance + The computation tree it belongs to. + + parent : ComputationNode instance, default=None + The parent node. None means this is the root. + + max_iter : int, default=None + The number of its children. None means it's a leaf. + + description : str, default=None + A description of this computation node. None means it's a leaf. + + tree_status_idx : int, default=0 + The index of the status of this node in the `tree_status` array of its + computation tree. + + idx : int, default=0 + The index of this node in the children list of its parent. + + Attributes + ---------- + children : list + The list of its children nodes. For a leaf, it's an empty list + + depth : int + The depth of this node in its computation tree. The root has a depth of 0. + """ + + def __init__( + self, + computation_tree, + parent=None, + max_iter=None, + description=None, + tree_status_idx=0, + idx=0, + ): + self.computation_tree = computation_tree + self.parent = parent + self.max_iter = max_iter + self.description = description + self.tree_status_idx = tree_status_idx + self.idx = idx + self.children = [] + self.depth = 0 if self.parent is None else self.parent.depth + 1 + + def get_ancestors(self, include_ancestor_trees=True): + """Get the list of all nodes in the path from the node to the root + + Parameters + ---------- + include_ancestor_trees : bool, default=True + If True, propagate to the tree of the `parent_node` of this tree if it + exists and so on. + + Returns + ------- + ancestors : list + The list of ancestors of this node (included). + """ + node = self + ancestors = [node] + + while node.parent is not None: + node = node.parent + ancestors.append(node) + + if include_ancestor_trees: + node_parent_tree = node.computation_tree.parent_node + if node_parent_tree is not None: + ancestors.extend(node_parent_tree.get_ancestors()) + + return ancestors + + +class ComputationTree: + """Data structure to store the computation tree of an estimator + + Parameters + ---------- + estimator_name : str + The name of the estimator. + + levels : list of dict + A description of the nested levels of computation of the estimator to build the + tree. It's a list of dict with "descr" and "max_iter" keys. + + parent_node : ComputationNode, default=None + The node where the estimator is used in the computation tree of a + meta-estimator. This node is not set to be the parent of the root of this tree. + + Attributes + ---------- + depth : int + The depth of the tree. It corresponds to the depth of its deepest leaf. + + root : ComputationNode instance + The root of the computation tree. + + tree_dir : pathlib.Path instance + The path of the directory where the computation tree is dumped during the fit of + its estimator. If it has a parent tree, this is a sub-directory of the + `tree_dir` of its parent. + """ + + def __init__(self, estimator_name, levels, *, parent_node=None): + self.estimator_name = estimator_name + self.parent_node = parent_node + + self.depth = len(levels) - 1 + self.root, self.n_nodes = self._build_tree(levels) + + parent_tree_dir = ( + None + if self.parent_node is None + else self.parent_node.computation_tree.tree_dir + ) + if parent_tree_dir is None: + self.tree_dir = Path(mkdtemp()) + else: + # This tree has a parent tree. Place it in a subdir of its parent dir + # and give it a name that allows from the parent tree to find the sub dir + # of the sub tree of a given leaf. + self.tree_dir = parent_tree_dir / str(parent_node.tree_status_idx) + self.tree_dir.mkdir() + self._filename = self.tree_dir / "tree_status.memmap" + + self._set_tree_status(mode="w+") + self._tree_status[:] = False + + def _build_tree(self, levels): + """Build the computation tree from the description of the levels""" + root = ComputationNode( + computation_tree=self, + max_iter=levels[0]["max_iter"], + description=levels[0]["descr"], + ) + + n_nodes = self._recursive_build_tree(root, levels) + + return root, n_nodes + + def _recursive_build_tree(self, parent, levels, n_nodes=1): + """Recursively build the tree from the root the leaves""" + if parent.depth == self.depth: + return n_nodes + + for i in range(parent.max_iter): + children_max_iter = levels[parent.depth + 1]["max_iter"] + description = levels[parent.depth + 1]["descr"] + + node = ComputationNode( + computation_tree=self, + parent=parent, + max_iter=children_max_iter, + description=description, + tree_status_idx=n_nodes, + idx=i, + ) + parent.children.append(node) + + n_nodes = self._recursive_build_tree(node, levels, n_nodes + 1) + + return n_nodes + + def _set_tree_status(self, mode): + """Create a memory-map to the tree_status array stored on the disk""" + # This has to be done each time we unpickle the tree + self._tree_status = np.memmap( + self._filename, dtype=bool, mode=mode, shape=(self.n_nodes,) + ) + + def get_progress(self, node): + """Return the number of finished child nodes of this node""" + if self._tree_status[node.tree_status_idx]: + return node.max_iter + + # Since the children of a node are not ordered (to account for parallel + # execution), we can't rely on the highest index for which the status is True. + return sum( + [self._tree_status[child.tree_status_idx] for child in node.children] + ) + + def iterate(self, include_leaves=False): + """Return an iterable over the nodes of the computation tree + + Nodes are discovered in a depth first search manner. + + Parameters + ---------- + include_leaves : bool + Whether or not to include the leaves of the tree in the iterable + + Returns + ------- + nodes_list : list + A list of the nodes of the computation tree. + """ + return self._recursive_iterate(include_leaves=include_leaves) + + def _recursive_iterate(self, node=None, include_leaves=False, node_list=None): + """Recursively constructs the iterable""" + # TODO make it a generator + if node is None: + node = self.root + node_list = [] + + if node.children or include_leaves: + node_list.append(node) + + for child in node.children: + self._recursive_iterate(child, include_leaves, node_list) + + return node_list + + def __repr__(self): + res = ( + f"[{self.estimator_name}] {self.root.description} : progress " + f"{self.get_progress(self.root)} / {self.root.max_iter}\n" + ) + for node in self.iterate(include_leaves=False): + if node is not self.root: + res += ( + f"{' ' * node.depth}{node.description} {node.idx}: progress " + f"{self.get_progress(node)} / {node.max_iter}\n" + ) + return res + + +def load_computation_tree(directory): + """load the computation tree of a directory + + Parameters + ---------- + directory : pathlib.Path instance + The directory where the computation tree is dumped + + Returns + ------- + computation_tree : ComputationTree instance + The loaded computation tree + """ + file_path = directory / "computation_tree.pkl" + if not file_path.exists() or not os.path.getsize(file_path) > 0: + # Do not try to load the tree when it's created but not yet written + return + + with open(file_path, "rb") as f: + computation_tree = pickle.load(f) + + computation_tree._set_tree_status(mode="r") + + return computation_tree diff --git a/sklearn/callback/_convergence_monitor.py b/sklearn/callback/_convergence_monitor.py new file mode 100644 index 0000000000000..9f53d657cc75a --- /dev/null +++ b/sklearn/callback/_convergence_monitor.py @@ -0,0 +1,118 @@ +# License: BSD 3 clause + +from copy import copy +from pathlib import Path +from tempfile import mkdtemp +import time + +import matplotlib.pyplot as plt +import pandas as pd + +from . import BaseCallback + + +class ConvergenceMonitor(BaseCallback): + """Monitor model convergence. + + Parameters + ---------- + monitor : + + X_val : ndarray, default=None + Validation data + + y_val : ndarray, default=None + Validation target + + Attributes + ---------- + data : pandas.DataFrame + The monitored quantities at each iteration. + """ + + request_reconstruction_attributes = True + + def __init__(self, *, monitor="objective_function", X_val=None, y_val=None): + self.X_val = X_val + self.y_val = y_val + self._data_file = Path(mkdtemp()) / "convergence_monitor.csv" + + def on_fit_begin(self, estimator, *, X=None, y=None): + self.estimator = estimator + self.X_train = X + self.y_train = y + self._start_time = {} + + def on_fit_iter_end(self, *, node, **kwargs): + if node.depth != node.computation_tree.depth: + return + + reconstruction_attributes = kwargs.get("reconstruction_attributes", None) + if reconstruction_attributes is None: + return + + new_estimator = copy(self.estimator) + for key, val in reconstruction_attributes.items(): + setattr(new_estimator, key, val) + + if node.idx == 0: + self._start_time[node.parent] = time.perf_counter() + curr_time = 0 + else: + curr_time = time.perf_counter() - self._start_time[node.parent] + + obj_train, *_ = new_estimator.objective_function(self.X_train, self.y_train, normalize=True) + if self.X_val is not None: + obj_val, *_ = new_estimator.objective_function(self.X_val, self.y_val, normalize=True) + else: + obj_val = None + + ancestors = node.get_ancestors()[:0:-1] + ancestors_desc = [ + f"{n.computation_tree.estimator_name}-{n.description}" for n in ancestors + ] + ancestors_idx = [f"{n.idx}" for n in ancestors] + + if not self._data_file.exists(): + with open(self._data_file, "w") as f: + f.write( + f"{','.join(ancestors_desc)},iteration,time,obj_train,obj_val\n" + ) + + with open(self._data_file, "a") as f: + f.write( + f"{','.join(ancestors_idx)},{node.idx},{curr_time},{obj_train},{obj_val}\n" + ) + + def on_fit_end(self): + pass + + def get_data(self): + if not hasattr(self, "data"): + self.data = pd.read_csv(self._data_file) + return self.data + + def plot(self, x="iteration"): + data = self.get_data() + + # all columns but iteration, time, obj_train, obj_val + group_by_columns = list(data.columns[:-4]) + groups = data.groupby(group_by_columns) + + for key in groups.groups.keys(): + group = groups.get_group(key) + fig, ax = plt.subplots() + + ax.plot(group[x], group["obj_train"], label="obj_train") + if self.X_val is not None: + ax.plot(group[x], group["obj_val"], label="obj_val") + + if x == "iteration": + x_label = "Number of iterations" + elif x == "time": + x_label = "Time (s)" + ax.set_xlabel(x_label) + ax.set_ylabel("objective function") + + ax.legend() + plt.show() diff --git a/sklearn/callback/_early_stopping.py b/sklearn/callback/_early_stopping.py new file mode 100644 index 0000000000000..44a0108e04b26 --- /dev/null +++ b/sklearn/callback/_early_stopping.py @@ -0,0 +1,48 @@ +# License: BSD 3 clause + +from . import BaseCallback + + +class EarlyStopping(BaseCallback): + def __init__( + self, + X_val=None, + y_val=None, + monitor="objective_function", + max_no_improvement=10, + tol=1e-2, + ): + self.X_val = X_val + self.y_val = y_val + self.monitor = monitor + self.max_no_improvement = max_no_improvement + self.tol = tol + + def on_fit_begin(self, estimator, X=None, y=None): + self.estimator = estimator + self._no_improvement = {} + self._last_monitored = {} + + def on_fit_iter_end(self, *, node, **kwargs): + if node.depth != self.estimator._computation_tree.depth: + return + + if self.monitor == "objective_function": + objective_function = kwargs.get("objective_function", None) + monitored, *_ = objective_function(self.X_val) + elif self.monitor == "TODO": + pass + + if node.parent not in self._last_monitored or monitored < self._last_monitored[ + node.parent + ] * (1 - self.tol): + self._no_improvement[node.parent] = 0 + self._last_monitored[node.parent] = monitored + else: + self._no_improvement[node.parent] += 1 + + if self._no_improvement[node.parent] >= self.max_no_improvement: + return True + + def on_fit_end(self): + pass diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py new file mode 100644 index 0000000000000..ae11e67d59f57 --- /dev/null +++ b/sklearn/callback/_progressbar.py @@ -0,0 +1,257 @@ +# License: BSD 3 clause + +from copy import copy +import pickle +from threading import Thread, Event + +import numpy as np +from tqdm import tqdm +from rich.progress import Progress +from rich.progress import BarColumn, TimeRemainingColumn, TextColumn +from rich.style import Style + +from . import BaseCallback +from . import AutoPropagatedMixin +from . import load_computation_tree + + +class ProgressBar(BaseCallback, AutoPropagatedMixin): + """Callback that displays progress bars for each iterative steps of the estimator + + Parameters + ---------- + backend: {"rich"}, default="rich" + The backend for the progress bars display. + + max_depth_show : int, default=None + The maximum nested level of progress bars to display. + + max_depth_keep : int, default=None + The maximum nested level of progress bars to keep displayed when they are + finished. + """ + + def __init__(self, backend="rich", max_depth_show=None, max_depth_keep=None): + self.backend = backend + if max_depth_show is not None and max_depth_show < 0: + raise ValueError(f"max_depth_show should be >= 0.") + if max_depth_keep is not None and max_depth_keep < 0: + raise ValueError(f"max_depth_keep should be >= 0.") + self.max_depth_show = max_depth_show + self.max_depth_keep = max_depth_keep + + def on_fit_begin(self, estimator, X=None, y=None): + self._stop_event = Event() + + if self.backend == "rich": + self.progress_monitor = _RichProgressMonitor( + estimator=estimator, + event=self._stop_event, + max_depth_show=self.max_depth_show, + max_depth_keep=self.max_depth_keep, + ) + else: + raise ValueError(f"backend should be 'rich', got {self.backend} instead.") + + self.progress_monitor.start() + + def on_fit_iter_end(self, *, estimator, node, **kwargs): + pass + + def on_fit_end(self): + self._stop_event.set() + self.progress_monitor.join() + + def __getstate__(self): + state = self.__dict__.copy() + if "_stop_event" in state: + del state["_stop_event"] + if "progress_monitor" in state: + del state["progress_monitor"] + return state + + +# Custom Progress class to allow showing the tasks in a given order (given by setting +# the _ordered_tasks attribute). In particular it allows to dynamically create and +# insert tasks between existing tasks. +class _Progress(Progress): + def get_renderables(self): + table = self.make_tasks_table(getattr(self, "_ordered_tasks", [])) + yield table + + +class _RichProgressMonitor(Thread): + """Thread monitoring the progress of an estimator with rich based display + + The display is a list of nested rich tasks using rich.Progress. There is one for + each node in the computation tree of the estimator and in the computation trees of + estimators used in the estimator. + + Parameters + ---------- + estimator : estimator instance + The estimator to monitor + + event : threading.Event instance + This thread will run until event is set. + + max_depth_show : int, default=None + The maximum nested level of progress bars to display. + + max_depth_keep : int, default=None + The maximum nested level of progress bars to keep displayed when they are + finished. + """ + + def __init__(self, estimator, event, max_depth_show=None, max_depth_keep=None): + Thread.__init__(self) + self.estimator = estimator + self.event = event + self.max_depth_show = max_depth_show + self.max_depth_keep = max_depth_keep + + # _computation_trees is a dict `directory: tuple` where + # - tuple[0] is the computation tree of the directory + # - tuple[1] is a dict `node.tree_status_idx: task_id` + self._computation_trees = {} + + def run(self): + with _Progress( + TextColumn("[progress.description]{task.description}"), + BarColumn( + complete_style=Style(color="dark_orange"), + finished_style=Style(color="cyan"), + ), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + auto_refresh=False, + ) as progress_ctx: + self._progress_ctx = progress_ctx + + while not self.event.wait(0.05): + self._recursive_update_tasks() + self._progress_ctx.refresh() + + self._recursive_update_tasks() + self._progress_ctx.refresh() + + def _recursive_update_tasks(self, this_dir=None, depth=0): + """Recursively loop through directories and init or update tasks + + Parameters + ---------- + this_dir : pathlib.Path instance + The directory to + + depth : int + The current depth + """ + if self.max_depth_show is not None and depth > self.max_depth_show: + # Fast exit if this dir is deeper than what we want to show anyway + return + + if this_dir is None: + this_dir = self.estimator._computation_tree.tree_dir + # _ordered_tasks holds the list of the tasks in the order we want them to + # be displayed. + self._progress_ctx._ordered_tasks = [] + + if this_dir not in self._computation_trees: + # First time we discover this directory -> store the computation tree + # If the computation tree is not readable yet, skip and try again next time + computation_tree = load_computation_tree(this_dir) + if computation_tree is None: + return + + self._computation_trees[this_dir] = (computation_tree, {}) + + computation_tree, task_ids = self._computation_trees[this_dir] + + for node in computation_tree.iterate(include_leaves=True): + if node.children: + # node is not a leaf, create or update its task + if node.tree_status_idx not in task_ids: + visible = True + if ( + self.max_depth_show is not None + and depth + node.depth > self.max_depth_show + ): + # If this node is deeper than what we want to show, we create + # the task anyway but make it not visible + visible = False + + task_ids[node.tree_status_idx] = self._progress_ctx.add_task( + self._format_task_description(node, computation_tree, depth), + total=node.max_iter, + visible=visible, + ) + + task_id = task_ids[node.tree_status_idx] + task = self._progress_ctx.tasks[task_id] + self._progress_ctx._ordered_tasks.append(task) + + parent_task = self._get_parent_task(node, computation_tree, task_ids) + if parent_task is not None and parent_task.finished: + # If the task of the parent node is finished, make this task + # finished. It can happen if some computations are stopped + # before reaching max_iter. + visible = True + if ( + self.max_depth_keep is not None + and depth + node.depth > self.max_depth_keep + ): + # If this node is deeper than what we want to keep in the output + # make it not visible + visible = False + self._progress_ctx.update( + task_id, completed=node.max_iter, visible=visible, refresh=False + ) + else: + node_progress = computation_tree.get_progress(node) + if node_progress != task.completed: + self._progress_ctx.update( + task_id, completed=node_progress, refresh=False + ) + else: + # node is a leaf, look for tasks of its sub computation tree before + # going to the next node + child_dir = this_dir / str(node.tree_status_idx) + if child_dir.exists(): + self._recursive_update_tasks( + child_dir, depth + computation_tree.depth + ) + + def _format_task_description(self, node, computation_tree, depth): + """Return a formatted description for the task of the node""" + colors = ["red", "green", "blue", "yellow"] + + indent = f"{' ' * (depth + node.depth)}" + style = f"[{colors[(depth + node.depth)%len(colors)]}]" + + description = f"{computation_tree.estimator_name} - {node.description}" + if node.parent is None and computation_tree.parent_node is not None: + description = ( + f"{computation_tree.parent_node.description} {computation_tree.parent_node.idx} |" + f" {description}" + ) + if node.parent is not None: + description = f"{description} {node.idx}" + + return f"{style}{indent}{description}" + + def _get_parent_task(self, node, computation_tree, task_ids): + """Get the task of the parent node""" + if node.parent is not None: + # node is not the root, return the task of its parent + task_id = task_ids[node.parent.tree_status_idx] + return self._progress_ctx.tasks[task_id] + if computation_tree.parent_node is not None: + # node is the root, return the task of the parent of the parent_node of + # its computation tree + parent_dir = computation_tree.parent_node.computation_tree.tree_dir + _, parent_tree_task_ids = self._computation_trees[parent_dir] + task_id = parent_tree_task_ids[ + computation_tree.parent_node.parent.tree_status_idx + ] + return self._progress_ctx._tasks[task_id] + return diff --git a/sklearn/callback/_snapshot.py b/sklearn/callback/_snapshot.py new file mode 100644 index 0000000000000..231eafc8cbb9e --- /dev/null +++ b/sklearn/callback/_snapshot.py @@ -0,0 +1,82 @@ +# License: BSD 3 clause + +from copy import copy +from datetime import datetime +from pathlib import Path +import pickle + +import numpy as np + +from . import BaseCallback + + +class Snapshot(BaseCallback): + """Take regular snapshots of an estimator + + Parameters + ---------- + keep_last_n : int or None, default=1 + Only the last `keep_last_n` snapshots are kept on the disk. None means all + snapshots are kept. + + base_dir : str or pathlib.Path instance, default=None + The directory where the snapshots should be stored. If None, they are stored in + the current directory. + + Attributes + ---------- + directory : pathlib.Path instance + The directory where the snapshots are saved. It's a sub-directory of `base_dir`. + """ + + request_reconstruction_attributes = True + + def __init__(self, keep_last_n=1, base_dir=None): + self.keep_last_n = keep_last_n + if keep_last_n is not None and keep_last_n <= 0: + raise ValueError( + "keep_last_n must be a positive integer, got" + f" {self.keep_last_n} instead." + ) + + self.base_dir = Path("." if base_dir is None else base_dir) + + def on_fit_begin(self, estimator, X=None, y=None): + self.estimator = estimator + + # Use a hash in the name of this directory to avoid name collision if several + # clones of this estimator are fitted in parallel in a meta-estimator for + # instance. + dir_name = ( + "snapshots_" + f"{self.estimator.__class__.__name__}_" + f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')}_" + f"{hash(self.estimator._computation_tree)}" + ) + + self.directory = self.base_dir / dir_name + self.directory.mkdir() + + def on_fit_iter_end(self, *, node, **kwargs): + reconstruction_attributes = kwargs.get("reconstruction_attributes", None) + if reconstruction_attributes is None: + return + + new_estimator = copy(self.estimator) + for key, val in reconstruction_attributes.items(): + setattr(new_estimator, key, val) + + file_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')}.pkl" + file_path = self.directory / file_name + + with open(file_path, "wb") as f: + pickle.dump(new_estimator, f) + + if self.keep_last_n is not None: + for snapshot in sorted(self.directory.iterdir())[: -self.keep_last_n]: + snapshot.unlink(missing_ok=True) + + def on_fit_end(self): + if self.keep_last_n is not None: + for snapshot in sorted(self.directory.iterdir())[: -self.keep_last_n]: + snapshot.unlink() diff --git a/sklearn/callback/_text_verbose.py b/sklearn/callback/_text_verbose.py new file mode 100644 index 0000000000000..b857ff592c87c --- /dev/null +++ b/sklearn/callback/_text_verbose.py @@ -0,0 +1,44 @@ +# License: BSD 3 clause + +import time + +from . import BaseCallback +from . import AutoPropagatedMixin + + +class TextVerbose(BaseCallback, AutoPropagatedMixin): + request_stopping_criterion = True + + def __init__(self, min_time_between_calls=0): + self.min_time_between_calls = min_time_between_calls + + def on_fit_begin(self, estimator, X=None, y=None): + self.estimator = estimator + self._start_time = time.perf_counter() + + def on_fit_iter_end(self, *, node, **kwargs): + if node.depth != node.computation_tree.depth: + return + + stopping_criterion = kwargs.get("stopping_criterion", None) + tol = kwargs.get("tol", None) + + current_time = time.perf_counter() - self._start_time + + s = f"{node.description} {node.idx}" + parent = node.parent + while parent is not None and parent.parent is not None: + s = f"{parent.description} {parent.idx} - {s}" + parent = parent.parent + + msg = ( + f"[{parent.computation_tree.estimator_name}] {s} | time {current_time:.5f}s" + ) + + if stopping_criterion is not None and tol is not None: + msg += f" | stopping_criterion={stopping_criterion:.3E} | tol={tol:.3E}" + + print(msg) + + def on_fit_end(self): + pass diff --git a/sklearn/callback/tests/test_computation_tree.py b/sklearn/callback/tests/test_computation_tree.py new file mode 100644 index 0000000000000..b726177a342ec --- /dev/null +++ b/sklearn/callback/tests/test_computation_tree.py @@ -0,0 +1,98 @@ +# License: BSD 3 clause + +import numpy as np +import pytest + +from sklearn.callback import ComputationTree +from sklearn.callback import ComputationNode +from sklearn.callback import load_computation_tree + + +levels = [ + {"descr": "level0", "max_iter": 3}, + {"descr": "level1", "max_iter": 5}, + {"descr": "level2", "max_iter": 7}, + {"descr": "level3", "max_iter": None}, +] + + +def test_computation_tree(): + # Check the construction of the computation tree + computation_tree = ComputationTree(estimator_name="estimator", levels=levels) + assert computation_tree.estimator_name == "estimator" + + root = computation_tree.root + assert root.parent is None + assert root.idx == 0 + + assert len(root.children) == root.max_iter == 3 + assert [node.idx for node in root.children] == list(range(3)) + + for node1 in root.children: + assert len(node1.children) == 5 + assert [n.idx for n in node1.children] == list(range(5)) + + for node2 in node1.children: + assert len(node2.children) == 7 + assert [n.idx for n in node2.children] == list(range(7)) + + for node3 in node2.children: + assert not node3.children + + +def test_n_nodes(): + # Check that the number of node in a comutation tree corresponds to what we expect + # from the level descriptions + computation_tree = ComputationTree(estimator_name="", levels=levels) + + max_iter_per_level = [level["max_iter"] for level in levels[:-1]] + expected_n_nodes = 1 + np.sum(np.cumprod(max_iter_per_level)) + + assert computation_tree.n_nodes == expected_n_nodes + assert len(computation_tree.iterate(include_leaves=True)) == expected_n_nodes + assert computation_tree._tree_status.shape == (expected_n_nodes,) + + +def test_tree_status_idx(): + # Check that each node has a unique index in the _tree_status array and that their + # order corresponds to the order given by a depth first search. + computation_tree = ComputationTree(estimator_name="", levels=levels) + + indexes = [ + node.tree_status_idx for node in computation_tree.iterate(include_leaves=True) + ] + assert indexes == list(range(computation_tree.n_nodes)) + + +def test_get_ancestors(): + # Check that the ancestor search excludes the root and can propagate to parent trees + parent_levels = [ + {"descr": "parent_level0", "max_iter": 2}, + {"descr": "parent_level1", "max_iter": 4}, + {"descr": "parent_level2", "max_iter": None}, + ] + + parent_computation_tree = ComputationTree( + estimator_name="parent_estimator", levels=parent_levels + ) + parent_node = parent_computation_tree.root.children[0].children[2] + + computation_tree = ComputationTree( + estimator_name="estimator", levels=levels, parent_node=parent_node + ) + node = computation_tree.root.children[1].children[3].children[5] + + ancestors = node.get_ancestors(include_ancestor_trees=False) + assert ancestors == [node, node.parent, node.parent.parent] + assert [n.idx for n in ancestors] == [5, 3, 1] + assert computation_tree.root not in ancestors + + ancestors = node.get_ancestors(include_ancestor_trees=True) + assert ancestors == [ + node, + node.parent, + node.parent.parent, + parent_node, + parent_node.parent, + ] + assert [n.idx for n in ancestors] == [5, 3, 1, 2, 0] diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index cc1451be54567..f53f33c6b804a 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -6,6 +6,7 @@ # Tom Dupre la Tour # License: BSD 3 clause +from functools import partial import numbers import numpy as np import scipy.sparse as sp @@ -23,6 +24,7 @@ check_is_fitted, check_non_negative, ) +from ..callback._base import _eval_callbacks_on_fit_iter_end EPSILON = np.finfo(np.float32).eps @@ -424,6 +426,8 @@ def _fit_coordinate_descent( verbose=0, shuffle=False, random_state=None, + estimator=None, + parent_node=None, ): """Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent @@ -500,7 +504,9 @@ def _fit_coordinate_descent( rng = check_random_state(random_state) - for n_iter in range(1, max_iter + 1): + nodes = parent_node.children if parent_node is not None else [None] * max_iter + + for n_iter, node in enumerate(nodes, 1): violation = 0.0 # Update W @@ -519,6 +525,21 @@ def _fit_coordinate_descent( if violation_init == 0: break + if _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=node, + stopping_criterion=lambda: violation / violation_init, + tol=tol, + fit_state={"H": Ht.T, "W": W}, + reconstruction_attributes=lambda: { + "n_components_": Ht.T.shape[0], + "components_": H, + "n_iter_": n_iter, + "reconstruction_err_": _beta_divergence(X, W, Ht.T, 2, True), + }, + ): + break + if verbose: print("violation:", violation / violation_init) @@ -731,6 +752,8 @@ def _fit_multiplicative_update( l2_reg_H=0, update_H=True, verbose=0, + estimator=None, + parent_node=None, ): """Compute Non-negative Matrix Factorization with Multiplicative Update. @@ -815,8 +838,10 @@ def _fit_multiplicative_update( error_at_init = _beta_divergence(X, W, H, beta_loss, square_root=True) previous_error = error_at_init + nodes = parent_node.children if parent_node is not None else [None] * max_iter + H_sum, HHt, XHt = None, None, None - for n_iter in range(1, max_iter + 1): + for n_iter, node in enumerate(nodes, 1): # update W # H_sum, HHt and XHt are saved and reused if not update_H delta_W, H_sum, HHt, XHt = _multiplicative_update_w( @@ -842,6 +867,27 @@ def _fit_multiplicative_update( if beta_loss <= 1: H[H < np.finfo(np.float64).eps] = 0.0 + if _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=node, + stopping_criterion=lambda: ( + ( + previous_error + - _beta_divergence(X, W, H, beta_loss, square_root=True) + ) + / error_at_init + ), + tol=tol, + fit_state={"H": H, "W": W}, + reconstruction_attributes=lambda: { + "n_components_": H.shape[0], + "components_": H, + "n_iter_": n_iter, + "reconstruction_err_": _beta_divergence(X, W, H, 2, True), + }, + ): + break + # test convergence criterion every 10 iterations if tol > 0 and n_iter % 10 == 0: error = _beta_divergence(X, W, H, beta_loss, square_root=True) @@ -1538,20 +1584,27 @@ def fit_transform(self, X, y=None, W=None, H=None): X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32] ) - with config_context(assume_finite=True): - W, H, n_iter = self._fit_transform(X, W=W, H=H) - - self.reconstruction_err_ = _beta_divergence( - X, W, H, self._beta_loss, square_root=True + root = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": self.max_iter}, + {"descr": "iter", "max_iter": None}, + ], + X=X, ) + W, H, n_iter = self._fit_transform(X, W=W, H=H, parent_node=root) + self.n_components_ = H.shape[0] self.components_ = H self.n_iter_ = n_iter + self._eval_callbacks_on_fit_end() + return W - def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): + def _fit_transform( + self, X, y=None, W=None, H=None, update_H=True, parent_node=None + ): """Learn a NMF model for the data X and returns the transformed data. Parameters @@ -1618,6 +1671,8 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): verbose=self.verbose, shuffle=self.shuffle, random_state=self.random_state, + estimator=self, + parent_node=parent_node, ) elif self.solver == "mu": W, H, n_iter = _fit_multiplicative_update( @@ -1633,6 +1688,8 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): l2_reg_H, update_H=update_H, verbose=self.verbose, + estimator=self, + parent_node=parent_node, ) else: raise ValueError("Invalid solver parameter '%s'." % self.solver) @@ -1713,6 +1770,28 @@ def inverse_transform(self, W): check_is_fitted(self) return np.dot(W, self.components_) + def objective_function(self, X, y=None, *, W=None, H=None, normalize=False): + if W is None: + W = self.transform(X) + if H is None: + H = self.components_ + + data_fit = _beta_divergence(X, W, H, self._beta_loss) + + l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._scale_regularization(X) + penalization = ( + l1_reg_W * W.sum() + + l1_reg_H * H.sum() + + l2_reg_W * (W ** 2).sum() + + l2_reg_H * (H ** 2).sum() + ) + + if normalize: + data_fit /= X.shape[0] + penalization /= X.shape[0] + + return data_fit + penalization, data_fit, penalization + @property def _n_features_out(self): """Number of transformed output features.""" diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 08e71edbc69ab..82063f36d0434 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -33,6 +33,7 @@ from ..utils.fixes import delayed from ..model_selection import check_cv from ..metrics import get_scorer +from ..callback._base import _eval_callbacks_on_fit_iter_end _LOGISTIC_SOLVER_CONVERGENCE_MSG = ( @@ -505,6 +506,8 @@ def _logistic_regression_path( max_squared_sum=None, sample_weight=None, l1_ratio=None, + estimator=None, + parent_node=None, ): """Compute a Logistic Regression model for a list of regularization parameters. @@ -796,13 +799,20 @@ def grad(x, *args): hess = _logistic_grad_hess warm_start_sag = {"coef": np.expand_dims(w0, axis=1)} + # Distinguish between LogReg and LogRegCV + if parent_node is not None: + nodes = [parent_node] if len(Cs) == 1 else parent_node.children + else: + nodes = [None] * len(Cs) + coefs = list() n_iter = np.zeros(len(Cs), dtype=np.int32) - for i, C in enumerate(Cs): + for i, (C, node) in enumerate(zip(Cs, nodes)): if solver == "lbfgs": iprint = [-1, 50, 1, 100, 101][ np.searchsorted(np.array([0, 1, 2, 3]), verbose) ] + children = iter(node.children) if node is not None else None opt_res = optimize.minimize( func, w0, @@ -810,6 +820,10 @@ def grad(x, *args): jac=True, args=(X, target, 1.0 / C, sample_weight), options={"iprint": iprint, "gtol": tol, "maxiter": max_iter}, + callback=lambda xk: _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=next(children) if children is not None else None, + ), ) n_iter_i = _check_optimize_result( solver, @@ -821,7 +835,15 @@ def grad(x, *args): elif solver == "newton-cg": args = (X, target, 1.0 / C, sample_weight) w0, n_iter_i = _newton_cg( - hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol + hess, + func, + grad, + w0, + args=args, + maxiter=max_iter, + tol=tol, + estimator=estimator, + parent_node=node, ) elif solver == "liblinear": coef_, intercept_, n_iter_i, = _fit_liblinear( @@ -876,6 +898,8 @@ def grad(x, *args): max_squared_sum, warm_start_sag, is_saga=(solver == "saga"), + estimator=estimator, + parent_node=node, ) else: @@ -893,8 +917,20 @@ def grad(x, *args): else: coefs.append(w0.copy()) + if len(Cs) > 1: + _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=node, + ) + n_iter[i] = n_iter_i + if multi_class == "ovr": + _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=parent_node, + ) + return np.array(coefs), np.array(Cs), n_iter @@ -1578,6 +1614,22 @@ def fit(self, X, y, sample_weight=None): if warm_start_coef is None: warm_start_coef = [None] * n_classes + if len(classes_) == 1: + levels = [ + {"descr": "fit", "max_iter": self.max_iter}, + {"descr": "iter", "max_iter": None}, + ] + else: + levels = [ + {"descr": "fit", "max_iter": len(classes_)}, + {"descr": "class", "max_iter": self.max_iter}, + {"descr": "iter", "max_iter": None}, + ] + root = self._eval_callbacks_on_fit_begin(levels=levels, X=X, y=y) + + # distinguish between multinomial and ovr + nodes = [root] if len(classes_) == 1 else root.children + path_func = delayed(_logistic_regression_path) # The SAG solver releases the GIL so it's more efficient to use @@ -1610,8 +1662,10 @@ def fit(self, X, y, sample_weight=None): penalty=penalty, max_squared_sum=max_squared_sum, sample_weight=sample_weight, + estimator=self, + parent_node=node, ) - for class_, warm_start_coef_ in zip(classes_, warm_start_coef) + for class_, warm_start_coef_, node in zip(classes_, warm_start_coef, nodes) ) fold_coefs_, _, n_iter_ = zip(*fold_coefs_) @@ -1632,6 +1686,8 @@ def fit(self, X, y, sample_weight=None): else: self.intercept_ = np.zeros(n_classes) + self._eval_callbacks_on_fit_end() + return self def predict_proba(self, X): diff --git a/sklearn/linear_model/_sag.py b/sklearn/linear_model/_sag.py index 48dcd7aef8ad3..7307ca76c4408 100644 --- a/sklearn/linear_model/_sag.py +++ b/sklearn/linear_model/_sag.py @@ -101,6 +101,8 @@ def sag_solver( max_squared_sum=None, warm_start_mem=None, is_saga=False, + estimator=None, + parent_node=None, ): """SAG solver for Ridge and LogisticRegression. @@ -346,6 +348,8 @@ def sag_solver( intercept_decay, is_saga, verbose, + estimator=estimator, + parent_node=parent_node, ) if n_iter_ == max_iter: diff --git a/sklearn/linear_model/_sag_fast.pyx.tp b/sklearn/linear_model/_sag_fast.pyx.tp index 756a048eea999..8144c98df3012 100644 --- a/sklearn/linear_model/_sag_fast.pyx.tp +++ b/sklearn/linear_model/_sag_fast.pyx.tp @@ -47,6 +47,7 @@ from ._sgd_fast cimport LossFunction from ._sgd_fast cimport Log, SquaredLoss from ..utils._seq_dataset cimport SequentialDataset32, SequentialDataset64 +from ..callback._base import _eval_callbacks_on_fit_iter_end from libc.stdio cimport printf @@ -231,7 +232,9 @@ def sag{{name_suffix}}(SequentialDataset{{name_suffix}} dataset, np.ndarray[{{c_type}}, ndim=1, mode='c'] intercept_sum_gradient_init, double intercept_decay, bint saga, - bint verbose): + bint verbose, + estimator, + parent_node): """Stochastic Average Gradient (SAG) and SAGA solvers. Used in Ridge and LogisticRegression. @@ -515,6 +518,22 @@ def sag{{name_suffix}}(SequentialDataset{{name_suffix}} dataset, fabs(weights[idx] - previous_weights[idx])) previous_weights[idx] = weights[idx] + + with gil: + if _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=parent_node.children[n_iter] if parent_node is not None else None, + stopping_criterion = ( + lambda: max_change / max_weight + if max_weight != 0 + else 0 + if max_weight == max_change == 0 + else np.inf + ), + tol=tol, + ): + break + if ((max_weight != 0 and max_change / max_weight <= tol) or max_weight == 0 and max_change == 0): if verbose: diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 6134b6318c838..47553d07ac169 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -26,12 +26,13 @@ ) from .utils.deprecation import deprecated from .utils._tags import _safe_tags +from .utils.metaestimators import _BaseComposition from .utils.validation import check_memory from .utils.validation import check_is_fitted from .utils.fixes import delayed from .exceptions import NotFittedError +from .callback._base import _eval_callbacks_on_fit_iter_end -from .utils.metaestimators import _BaseComposition __all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"] @@ -318,15 +319,24 @@ def _fit(self, X, y=None, **fit_params_steps): # Setup the memory memory = check_memory(self.memory) + root = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": len(self.steps)}, + {"descr": "step", "max_iter": None}, + ], + X=X, + y=y, + ) + fit_transform_one_cached = memory.cache(_fit_transform_one) - for (step_idx, name, transformer) in self._iter( + for (step_idx, name, transformer), node in zip(self._iter( with_final=False, filter_passthrough=False - ): + ), root.children[:-1]): if transformer is None or transformer == "passthrough": + _eval_callbacks_on_fit_iter_end(estimator=self, node=node) with _print_elapsed_time("Pipeline", self._log_message(step_idx)): continue - if hasattr(memory, "location"): # joblib >= 0.12 if memory.location is None: @@ -346,6 +356,7 @@ def _fit(self, X, y=None, **fit_params_steps): else: cloned_transformer = clone(transformer) # Fit or load from cache the current transformer + self._propagate_callbacks(cloned_transformer, parent_node=node) X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, @@ -359,6 +370,9 @@ def _fit(self, X, y=None, **fit_params_steps): # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) + + _eval_callbacks_on_fit_iter_end(estimator=self, node=node) + return X def fit(self, X, y=None, **fit_params): @@ -388,12 +402,20 @@ def fit(self, X, y=None, **fit_params): Pipeline with fitted steps. """ fit_params_steps = self._check_fit_params(**fit_params) + Xt = self._fit(X, y, **fit_params_steps) with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): if self._final_estimator != "passthrough": + node = self._computation_tree.root.children[-1] + self._propagate_callbacks(self._final_estimator, parent_node=node) + fit_params_last_step = fit_params_steps[self.steps[-1][0]] self._final_estimator.fit(Xt, y, **fit_params_last_step) + _eval_callbacks_on_fit_iter_end(estimator=self, node=node) + + self._eval_callbacks_on_fit_end() + return self def fit_transform(self, X, y=None, **fit_params): diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py index bd2ac8bdfd27d..2e3b6eb1c125b 100644 --- a/sklearn/utils/optimize.py +++ b/sklearn/utils/optimize.py @@ -18,6 +18,7 @@ from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 from ..exceptions import ConvergenceWarning +from ..callback._base import _eval_callbacks_on_fit_iter_end class _LineSearchError(RuntimeError): @@ -120,6 +121,8 @@ def _newton_cg( maxinner=200, line_search=True, warn=True, + estimator=None, + parent_node=None, ): """ Minimization of scalar function of one or more variables using the @@ -168,20 +171,31 @@ def _newton_cg( """ x0 = np.asarray(x0).flatten() xk = x0 - k = 0 if line_search: old_fval = func(x0, *args) old_old_fval = None + nodes = parent_node.children if parent_node is not None else [None] * maxiter + # Outer loop: our Newton iteration - while k < maxiter: + for k, node in enumerate(nodes, 1): # Compute a search direction pk by applying the CG method to # del2 f(xk) p = - fgrad f(xk) starting from 0. fgrad, fhess_p = grad_hess(xk, *args) absgrad = np.abs(fgrad) - if np.max(absgrad) <= tol: + max_absgrad = np.max(absgrad) + + if _eval_callbacks_on_fit_iter_end( + estimator=estimator, + node=node, + stopping_criterion=lambda: max_absgrad, + tol=tol, + ): + break + + if max_absgrad <= tol: break maggrad = np.sum(absgrad) @@ -204,7 +218,6 @@ def _newton_cg( break xk = xk + alphak * xsupi # upcast if necessary - k += 1 if warn and k >= maxiter: warnings.warn( From 584bdf72f1dfa969eceb0b7ab3ffbba5cfcf6aea Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 17 Dec 2021 18:18:35 +0100 Subject: [PATCH 02/20] cln nmf and test reconstruction attributes --- sklearn/decomposition/_nmf.py | 18 +++++++-------- sklearn/decomposition/tests/test_nmf.py | 29 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index f53f33c6b804a..4fa46dd2cb12c 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -6,7 +6,6 @@ # Tom Dupre la Tour # License: BSD 3 clause -from functools import partial import numbers import numpy as np import scipy.sparse as sp @@ -504,9 +503,7 @@ def _fit_coordinate_descent( rng = check_random_state(random_state) - nodes = parent_node.children if parent_node is not None else [None] * max_iter - - for n_iter, node in enumerate(nodes, 1): + for n_iter in range(1, max_iter + 1): violation = 0.0 # Update W @@ -527,7 +524,7 @@ def _fit_coordinate_descent( if _eval_callbacks_on_fit_iter_end( estimator=estimator, - node=node, + node=parent_node.children[n_iter - 1] if parent_node is not None else None, stopping_criterion=lambda: violation / violation_init, tol=tol, fit_state={"H": Ht.T, "W": W}, @@ -838,10 +835,8 @@ def _fit_multiplicative_update( error_at_init = _beta_divergence(X, W, H, beta_loss, square_root=True) previous_error = error_at_init - nodes = parent_node.children if parent_node is not None else [None] * max_iter - H_sum, HHt, XHt = None, None, None - for n_iter, node in enumerate(nodes, 1): + for n_iter in range(1, max_iter + 1): # update W # H_sum, HHt and XHt are saved and reused if not update_H delta_W, H_sum, HHt, XHt = _multiplicative_update_w( @@ -869,7 +864,7 @@ def _fit_multiplicative_update( if _eval_callbacks_on_fit_iter_end( estimator=estimator, - node=node, + node=parent_node.children[n_iter - 1] if parent_node is not None else None, stopping_criterion=lambda: ( ( previous_error @@ -883,7 +878,7 @@ def _fit_multiplicative_update( "n_components_": H.shape[0], "components_": H, "n_iter_": n_iter, - "reconstruction_err_": _beta_divergence(X, W, H, 2, True), + "reconstruction_err_": _beta_divergence(X, W, H, beta_loss, True), }, ): break @@ -1594,6 +1589,9 @@ def fit_transform(self, X, y=None, W=None, H=None): W, H, n_iter = self._fit_transform(X, W=W, H=H, parent_node=root) + self.reconstruction_err_ = _beta_divergence( + X, W, H, self._beta_loss, square_root=True + ) self.n_components_ = H.shape[0] self.components_ = H self.n_iter_ = n_iter diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index c95b7ceb737db..7a58b64d6464d 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -1,4 +1,6 @@ +import pickle import re +import tempfile import numpy as np import scipy.sparse as sp @@ -18,6 +20,7 @@ from sklearn.utils.extmath import squared_norm from sklearn.base import clone from sklearn.exceptions import ConvergenceWarning +from sklearn.callback import Snapshot @pytest.mark.parametrize("solver", ["cd", "mu"]) @@ -719,3 +722,29 @@ def test_feature_names_out(): names = nmf.get_feature_names_out() assert_array_equal([f"nmf{i}" for i in range(3)], names) + + +@pytest.mark.parametrize("solver, beta_loss", [("mu", 0), ("mu", 2), ("cd", 2)]) +def test_nmf_callback_reconstruction_attributes(solver, beta_loss): + # Check that the reconstruction attributes passed to the callback allow to make + # a new estimator as if the fit ended when the callback is called. + X = np.random.RandomState(0).random_sample((100, 100)) + + nmf = NMF(n_components=3, solver=solver, beta_loss=beta_loss, random_state=0) + nmf.fit(X) + + with tempfile.TemporaryDirectory() as tmp_dir: + callback = Snapshot(base_dir=tmp_dir) + nmf._set_callbacks(callback) + nmf.fit(X) + + # load model from last iteration + snapshot = sorted(callback.directory.iterdir())[-1] + with open(snapshot, "rb") as f: + loaded_nmf = pickle.load(f) + + # The model loaded from the last iteration is the same as the original model + assert nmf.n_iter_ == loaded_nmf.n_iter_ + assert_allclose(nmf.components_, loaded_nmf.components_) + assert_allclose(nmf.reconstruction_err_, loaded_nmf.reconstruction_err_) + assert_allclose(nmf.transform(X), loaded_nmf.transform(X)) From bb32ff3bbcd798f1cd2e204c2437dc38359a36a0 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Mon, 20 Dec 2021 19:12:27 +0100 Subject: [PATCH 03/20] cln snapshot + test snapshot + uuid for computation tree --- sklearn/callback/_computation_tree.py | 10 +- sklearn/callback/_snapshot.py | 46 ++++----- sklearn/callback/tests/test_callbacks.py | 120 +++++++++++++++++++++++ sklearn/decomposition/tests/test_nmf.py | 9 +- 4 files changed, 151 insertions(+), 34 deletions(-) create mode 100644 sklearn/callback/tests/test_callbacks.py diff --git a/sklearn/callback/_computation_tree.py b/sklearn/callback/_computation_tree.py index edd3c8f1f657f..161891ca32004 100644 --- a/sklearn/callback/_computation_tree.py +++ b/sklearn/callback/_computation_tree.py @@ -1,9 +1,10 @@ # License: BSD 3 clause -from tempfile import mkdtemp +import os from pathlib import Path import pickle -import os +from tempfile import mkdtemp +from uuid import uuid4 import numpy as np @@ -116,6 +117,9 @@ class ComputationTree: The path of the directory where the computation tree is dumped during the fit of its estimator. If it has a parent tree, this is a sub-directory of the `tree_dir` of its parent. + + uid : uuid.UUID + Unique indentifier for a ComputationTree instance. """ def __init__(self, estimator_name, levels, *, parent_node=None): @@ -125,6 +129,8 @@ def __init__(self, estimator_name, levels, *, parent_node=None): self.depth = len(levels) - 1 self.root, self.n_nodes = self._build_tree(levels) + self.uid = uuid4() + parent_tree_dir = ( None if self.parent_node is None diff --git a/sklearn/callback/_snapshot.py b/sklearn/callback/_snapshot.py index 231eafc8cbb9e..99a1bcc0ce68a 100644 --- a/sklearn/callback/_snapshot.py +++ b/sklearn/callback/_snapshot.py @@ -22,11 +22,6 @@ class Snapshot(BaseCallback): base_dir : str or pathlib.Path instance, default=None The directory where the snapshots should be stored. If None, they are stored in the current directory. - - Attributes - ---------- - directory : pathlib.Path instance - The directory where the snapshots are saved. It's a sub-directory of `base_dir`. """ request_reconstruction_attributes = True @@ -42,41 +37,36 @@ def __init__(self, keep_last_n=1, base_dir=None): self.base_dir = Path("." if base_dir is None else base_dir) def on_fit_begin(self, estimator, X=None, y=None): - self.estimator = estimator - - # Use a hash in the name of this directory to avoid name collision if several - # clones of this estimator are fitted in parallel in a meta-estimator for - # instance. - dir_name = ( - "snapshots_" - f"{self.estimator.__class__.__name__}_" - f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')}_" - f"{hash(self.estimator._computation_tree)}" - ) - - self.directory = self.base_dir / dir_name - self.directory.mkdir() + subdir = self._get_subdir(estimator._computation_tree) + subdir.mkdir() - def on_fit_iter_end(self, *, node, **kwargs): + def on_fit_iter_end(self, *, estimator, node, **kwargs): reconstruction_attributes = kwargs.get("reconstruction_attributes", None) if reconstruction_attributes is None: return - new_estimator = copy(self.estimator) + new_estimator = copy(estimator) for key, val in reconstruction_attributes.items(): setattr(new_estimator, key, val) - file_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')}.pkl" - file_path = self.directory / file_name + subdir = self._get_subdir(node.computation_tree) + snapshot_filename = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')}.pkl" - with open(file_path, "wb") as f: + with open(subdir / snapshot_filename, "wb") as f: pickle.dump(new_estimator, f) if self.keep_last_n is not None: - for snapshot in sorted(self.directory.iterdir())[: -self.keep_last_n]: + for snapshot in sorted(subdir.iterdir())[: -self.keep_last_n]: snapshot.unlink(missing_ok=True) def on_fit_end(self): - if self.keep_last_n is not None: - for snapshot in sorted(self.directory.iterdir())[: -self.keep_last_n]: - snapshot.unlink() + pass + + def _get_subdir(self, computation_tree): + """Return the sub directory containing the snapshots of the estimator""" + subdir = ( + self.base_dir + / f"snapshots_{computation_tree.estimator_name}_{str(computation_tree.uid)}" + ) + + return subdir diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py new file mode 100644 index 0000000000000..17dafc616f457 --- /dev/null +++ b/sklearn/callback/tests/test_callbacks.py @@ -0,0 +1,120 @@ +# License: BSD 3 clause + +import pickle +import pytest +import tempfile +from time import sleep + +from joblib import Parallel, delayed + +from sklearn.base import BaseEstimator, clone +from sklearn.callback import Snapshot +from sklearn.callback._base import _eval_callbacks_on_fit_iter_end +from sklearn.datasets import make_classification + + +class Estimator(BaseEstimator): + def __init__(self, max_iter=20): + self.max_iter = max_iter + + def fit(self, X, y): + root = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": self.max_iter}, + {"descr": "iter", "max_iter": None}, + ], + X=X, + y=y, + ) + + for i in range(self.max_iter): + if _eval_callbacks_on_fit_iter_end( + estimator=self, + node=root.children[i], + reconstruction_attributes=lambda: {"n_iter_": i + 1}, + ): + break + + self.n_iter_ = i + 1 + + self._eval_callbacks_on_fit_end() + + return self + + +class MetaEstimator(BaseEstimator): + def __init__( + self, estimator, n_outer=4, n_inner=3, n_jobs=None, prefer="processes" + ): + self.estimator = estimator + self.n_outer = n_outer + self.n_inner = n_inner + self.n_jobs = n_jobs + self.prefer = prefer + + def fit(self, X, y): + root = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": self.n_outer}, + {"descr": "outer", "max_iter": self.n_inner}, + {"descr": "inner", "max_iter": None}, + ], + X=X, + y=y, + ) + + res = Parallel(n_jobs=self.n_jobs, prefer=self.prefer)( + delayed(self._func)(self.estimator, X, y, node, i) + for i, node in enumerate(root.children) + ) + + self._eval_callbacks_on_fit_end() + + return self + + def _func(self, estimator, X, y, parent_node, i): + for j, node in enumerate(parent_node.children): + est = clone(estimator) + self._propagate_callbacks(est, parent_node=node) + est.fit(X, y) + + _eval_callbacks_on_fit_iter_end(estimator=self, node=node) + + _eval_callbacks_on_fit_iter_end(estimator=self, node=parent_node) + + return + + +@pytest.mark.parametrize("n_jobs", (1, 2)) +@pytest.mark.parametrize("prefer", ("threads", "processes")) +def test_snapshot_meta_estimator(n_jobs, prefer): + # Test for the Snapshot callback + X, y = make_classification() + estimator = Estimator(max_iter=20) + + with tempfile.TemporaryDirectory() as tmp_dir: + keep_last_n = 5 + callback = Snapshot(keep_last_n=keep_last_n, base_dir=tmp_dir) + estimator._set_callbacks(callback) + metaestimator = MetaEstimator( + estimator=estimator, n_outer=4, n_inner=3, n_jobs=n_jobs, prefer=prefer + ) + + metaestimator.fit(X, y) + + # There's a subdir of base_dir for each clone of estimator fitted in + # metaestimator. There are n_outer * n_inner such clones + snapshot_dirs = list(callback.base_dir.iterdir()) + assert len(snapshot_dirs) == metaestimator.n_outer * metaestimator.n_inner + + for snapshot_dir in snapshot_dirs: + snapshots = sorted(snapshot_dir.iterdir()) + assert len(snapshots) == keep_last_n + + for i, snapshot in enumerate(snapshots): + with open(snapshot, "rb") as f: + loaded_estimator = pickle.load(f) + + # We kept last 5 snapshots out of 20 iterations. + # This one is the 16 + i-th. + assert loaded_estimator.n_iter_ == 16 + i diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 7a58b64d6464d..a1ef1e90792af 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -728,9 +728,9 @@ def test_feature_names_out(): def test_nmf_callback_reconstruction_attributes(solver, beta_loss): # Check that the reconstruction attributes passed to the callback allow to make # a new estimator as if the fit ended when the callback is called. - X = np.random.RandomState(0).random_sample((100, 100)) + X = np.random.RandomState(0).random_sample((100, 20)) - nmf = NMF(n_components=3, solver=solver, beta_loss=beta_loss, random_state=0) + nmf = NMF(n_components=5, solver=solver, beta_loss=beta_loss, random_state=0) nmf.fit(X) with tempfile.TemporaryDirectory() as tmp_dir: @@ -739,11 +739,12 @@ def test_nmf_callback_reconstruction_attributes(solver, beta_loss): nmf.fit(X) # load model from last iteration - snapshot = sorted(callback.directory.iterdir())[-1] + snapshot_dir = next(callback.base_dir.iterdir()) + snapshot = sorted(snapshot_dir.iterdir())[-1] with open(snapshot, "rb") as f: loaded_nmf = pickle.load(f) - # The model loaded from the last iteration is the same as the original model + # The model saved during the last iteration is the same as the original model assert nmf.n_iter_ == loaded_nmf.n_iter_ assert_allclose(nmf.components_, loaded_nmf.components_) assert_allclose(nmf.reconstruction_err_, loaded_nmf.reconstruction_err_) From 7a1825db4c9d2a3a7170235fd95fdd7747c3ff96 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 31 Dec 2021 17:20:55 +0100 Subject: [PATCH 04/20] cln --- sklearn/base.py | 14 +++++++++++ sklearn/callback/_base.py | 16 ++++++------ sklearn/callback/_snapshot.py | 11 +++------ sklearn/callback/tests/test_callbacks.py | 6 ++++- sklearn/decomposition/_nmf.py | 31 +++++++++++++++--------- sklearn/linear_model/_logistic.py | 11 +++------ sklearn/utils/optimize.py | 8 +++--- 7 files changed, 57 insertions(+), 40 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 4f6b63cb2add1..7823e61f63c1e 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -733,6 +733,20 @@ def _eval_callbacks_on_fit_end(self): # propagated from a meta-estimator. callback.on_fit_end() + def _from_reconstruction_attributes(self, *, reconstruction_attributes): + """ + + Parameters + ---------- + reconstruction_attributes : callable + The necessary fitted attributes to create a working fitted estimator from + this instance. + """ + new_estimator = copy.copy(self) + for key, val in reconstruction_attributes().items(): + setattr(new_estimator, key, val) + return new_estimator + @property def _repr_html_(self): """HTML representation of estimator. diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index 604a450336610..a473f172fd575 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -38,11 +38,11 @@ def _eval_callbacks_on_fit_iter_end(**kwargs): kwargs["stopping_criterion"] = kwarg if any( - getattr(callback, "request_reconstruction_attributes", False) + getattr(callback, "request_from_reconstruction_attributes", False) for callback in estimator._callbacks ): - kwarg = kwargs.pop("reconstruction_attributes", lambda: None)() - kwargs["reconstruction_attributes"] = kwarg + kwarg = kwargs.pop("from_reconstruction_attributes", lambda: None)() + kwargs["from_reconstruction_attributes"] = kwarg return any(callback.on_fit_iter_end(**kwargs) for callback in estimator._callbacks) @@ -94,11 +94,11 @@ def on_fit_iter_end(self, estimator, node, **kwargs): Tolerance for the stopping criterion. This is only provided at the innermost level of iterations. - - reconstruction_attributes: dict - Necessary attributes to construct an estimator (by copying this - estimator and setting these as attributes) which will behave as if - the fit stopped at this node. - This is only provided at the outermost level of iterations. + - from_reconstruction_attributes: estimator instance + A ready to predict, transform, etc ... estimator as if the fit stopped + at this node. Usually it's a copy of the caller estimator with the + necessary attributes set but it can sometimes be an instance of another + class (e.g. LogisticRegressionCV -> LogisticRegression) - fit_state: dict Model specific quantities updated during fit. This is not meant to be diff --git a/sklearn/callback/_snapshot.py b/sklearn/callback/_snapshot.py index 99a1bcc0ce68a..cbf200336c749 100644 --- a/sklearn/callback/_snapshot.py +++ b/sklearn/callback/_snapshot.py @@ -1,6 +1,5 @@ # License: BSD 3 clause -from copy import copy from datetime import datetime from pathlib import Path import pickle @@ -24,7 +23,7 @@ class Snapshot(BaseCallback): the current directory. """ - request_reconstruction_attributes = True + request_from_reconstruction_attributes = True def __init__(self, keep_last_n=1, base_dir=None): self.keep_last_n = keep_last_n @@ -41,14 +40,10 @@ def on_fit_begin(self, estimator, X=None, y=None): subdir.mkdir() def on_fit_iter_end(self, *, estimator, node, **kwargs): - reconstruction_attributes = kwargs.get("reconstruction_attributes", None) - if reconstruction_attributes is None: + new_estimator = kwargs.get("from_reconstruction_attributes", None) + if new_estimator is None: return - new_estimator = copy(estimator) - for key, val in reconstruction_attributes.items(): - setattr(new_estimator, key, val) - subdir = self._get_subdir(node.computation_tree) snapshot_filename = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')}.pkl" diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index 17dafc616f457..c43241f469f8c 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -1,5 +1,6 @@ # License: BSD 3 clause +from functools import partial import pickle import pytest import tempfile @@ -31,7 +32,10 @@ def fit(self, X, y): if _eval_callbacks_on_fit_iter_end( estimator=self, node=root.children[i], - reconstruction_attributes=lambda: {"n_iter_": i + 1}, + from_reconstruction_attributes=partial( + self._from_reconstruction_attributes, + reconstruction_attributes=lambda : {"n_iter_": i + 1}, + ) ): break diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 4fa46dd2cb12c..154dbb3db6532 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -6,6 +6,7 @@ # Tom Dupre la Tour # License: BSD 3 clause +from functools import partial import numbers import numpy as np import scipy.sparse as sp @@ -528,12 +529,15 @@ def _fit_coordinate_descent( stopping_criterion=lambda: violation / violation_init, tol=tol, fit_state={"H": Ht.T, "W": W}, - reconstruction_attributes=lambda: { - "n_components_": Ht.T.shape[0], - "components_": H, - "n_iter_": n_iter, - "reconstruction_err_": _beta_divergence(X, W, Ht.T, 2, True), - }, + from_reconstruction_attributes=partial( + estimator._from_reconstruction_attributes, + reconstruction_attributes=lambda : { + "n_components_": Ht.T.shape[0], + "components_": H, + "n_iter_": n_iter, + "reconstruction_err_": _beta_divergence(X, W, Ht.T, 2, True), + } + ), ): break @@ -874,12 +878,15 @@ def _fit_multiplicative_update( ), tol=tol, fit_state={"H": H, "W": W}, - reconstruction_attributes=lambda: { - "n_components_": H.shape[0], - "components_": H, - "n_iter_": n_iter, - "reconstruction_err_": _beta_divergence(X, W, H, beta_loss, True), - }, + from_reconstruction_attributes=partial( + estimator._from_reconstruction_attributes, + reconstruction_attributes=lambda { + "n_components_": H.shape[0], + "components_": H, + "n_iter_": n_iter, + "reconstruction_err_": _beta_divergence(X, W, H, beta_loss, True), + } + ), ): break diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 82063f36d0434..540f1a656c077 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -799,15 +799,12 @@ def grad(x, *args): hess = _logistic_grad_hess warm_start_sag = {"coef": np.expand_dims(w0, axis=1)} - # Distinguish between LogReg and LogRegCV - if parent_node is not None: - nodes = [parent_node] if len(Cs) == 1 else parent_node.children - else: - nodes = [None] * len(Cs) - coefs = list() n_iter = np.zeros(len(Cs), dtype=np.int32) - for i, (C, node) in enumerate(zip(Cs, nodes)): + for i, C in enumerate(Cs): + # Distinguish between LogReg and LogRegCV + node = None if parent_node is None else parent_node if len(Cs) == 1 else parent_node.children + if solver == "lbfgs": iprint = [-1, 50, 1, 100, 101][ np.searchsorted(np.array([0, 1, 2, 3]), verbose) diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py index 2e3b6eb1c125b..b634f457bd287 100644 --- a/sklearn/utils/optimize.py +++ b/sklearn/utils/optimize.py @@ -171,15 +171,14 @@ def _newton_cg( """ x0 = np.asarray(x0).flatten() xk = x0 + k = 0 if line_search: old_fval = func(x0, *args) old_old_fval = None - nodes = parent_node.children if parent_node is not None else [None] * maxiter - # Outer loop: our Newton iteration - for k, node in enumerate(nodes, 1): + while k < maxiter: # Compute a search direction pk by applying the CG method to # del2 f(xk) p = - fgrad f(xk) starting from 0. fgrad, fhess_p = grad_hess(xk, *args) @@ -189,7 +188,7 @@ def _newton_cg( if _eval_callbacks_on_fit_iter_end( estimator=estimator, - node=node, + node=None if parent_node is None else parent_node.children[k], stopping_criterion=lambda: max_absgrad, tol=tol, ): @@ -218,6 +217,7 @@ def _newton_cg( break xk = xk + alphak * xsupi # upcast if necessary + k += 1 if warn and k >= maxiter: warnings.warn( From 3e3b25f3d5202a3a56a7fdfcc22e373a538a30bd Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 31 Dec 2021 17:25:57 +0100 Subject: [PATCH 05/20] black --- sklearn/callback/tests/test_callbacks.py | 4 ++-- sklearn/decomposition/_nmf.py | 8 ++++---- sklearn/decomposition/tests/test_nmf.py | 2 +- sklearn/linear_model/_logistic.py | 8 +++++++- sklearn/pipeline.py | 5 +++-- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index c43241f469f8c..1f5fcf6bdd3c4 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -34,8 +34,8 @@ def fit(self, X, y): node=root.children[i], from_reconstruction_attributes=partial( self._from_reconstruction_attributes, - reconstruction_attributes=lambda : {"n_iter_": i + 1}, - ) + reconstruction_attributes=lambda: {"n_iter_": i + 1}, + ), ): break diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 154dbb3db6532..f63146dc11250 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -531,12 +531,12 @@ def _fit_coordinate_descent( fit_state={"H": Ht.T, "W": W}, from_reconstruction_attributes=partial( estimator._from_reconstruction_attributes, - reconstruction_attributes=lambda : { + reconstruction_attributes=lambda: { "n_components_": Ht.T.shape[0], "components_": H, "n_iter_": n_iter, "reconstruction_err_": _beta_divergence(X, W, Ht.T, 2, True), - } + }, ), ): break @@ -880,12 +880,12 @@ def _fit_multiplicative_update( fit_state={"H": H, "W": W}, from_reconstruction_attributes=partial( estimator._from_reconstruction_attributes, - reconstruction_attributes=lambda { + reconstruction_attributes=lambda: { "n_components_": H.shape[0], "components_": H, "n_iter_": n_iter, "reconstruction_err_": _beta_divergence(X, W, H, beta_loss, True), - } + }, ), ): break diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index a1ef1e90792af..c84ee43175df4 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -746,6 +746,6 @@ def test_nmf_callback_reconstruction_attributes(solver, beta_loss): # The model saved during the last iteration is the same as the original model assert nmf.n_iter_ == loaded_nmf.n_iter_ - assert_allclose(nmf.components_, loaded_nmf.components_) + assert_allclose(nmf.components_, loaded_nmf.components_) assert_allclose(nmf.reconstruction_err_, loaded_nmf.reconstruction_err_) assert_allclose(nmf.transform(X), loaded_nmf.transform(X)) diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 540f1a656c077..1d4bbc815bb3d 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -803,7 +803,13 @@ def grad(x, *args): n_iter = np.zeros(len(Cs), dtype=np.int32) for i, C in enumerate(Cs): # Distinguish between LogReg and LogRegCV - node = None if parent_node is None else parent_node if len(Cs) == 1 else parent_node.children + node = ( + None + if parent_node is None + else parent_node + if len(Cs) == 1 + else parent_node.children + ) if solver == "lbfgs": iprint = [-1, 50, 1, 100, 101][ diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 47553d07ac169..657ba79307ce3 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -330,9 +330,10 @@ def _fit(self, X, y=None, **fit_params_steps): fit_transform_one_cached = memory.cache(_fit_transform_one) - for (step_idx, name, transformer), node in zip(self._iter( + for (step_idx, name, transformer), in self._iter( with_final=False, filter_passthrough=False - ), root.children[:-1]): + ): + node = root.children[step_idx] if transformer is None or transformer == "passthrough": _eval_callbacks_on_fit_iter_end(estimator=self, node=node) with _print_elapsed_time("Pipeline", self._log_message(step_idx)): From 26dbb6954c4155daf2ce7b09b91911379bf4705f Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 31 Dec 2021 17:33:47 +0100 Subject: [PATCH 06/20] lint --- sklearn/base.py | 4 ++-- sklearn/callback/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 7823e61f63c1e..c14a5d314a502 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -617,7 +617,7 @@ def _set_callbacks(self, callbacks): callbacks = [callbacks] if not all(isinstance(callback, BaseCallback) for callback in callbacks): - raise TypeError(f"callbacks must be subclasses of BaseCallback.") + raise TypeError("callbacks must be subclasses of BaseCallback.") self._callbacks = callbacks @@ -734,7 +734,7 @@ def _eval_callbacks_on_fit_end(self): callback.on_fit_end() def _from_reconstruction_attributes(self, *, reconstruction_attributes): - """ + """Return a as if fitted copy of this estimator Parameters ---------- diff --git a/sklearn/callback/__init__.py b/sklearn/callback/__init__.py index 1f0f3f7215a18..c8d5ea0bf0606 100644 --- a/sklearn/callback/__init__.py +++ b/sklearn/callback/__init__.py @@ -13,7 +13,7 @@ __all__ = [ "AutoPropagatedMixin", - "Basecallback", + "BaseCallback", "ComputationNode", "ComputationTree", "load_computation_tree", From eb7b8246d5fc1e770cc0d5d98b1f6130d6fba461 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Mon, 14 Feb 2022 16:02:42 +0100 Subject: [PATCH 07/20] wip --- sklearn/callback/_convergence_monitor.py | 35 ++++++++++++++---------- sklearn/pipeline.py | 2 +- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/sklearn/callback/_convergence_monitor.py b/sklearn/callback/_convergence_monitor.py index 9f53d657cc75a..ac04335e04661 100644 --- a/sklearn/callback/_convergence_monitor.py +++ b/sklearn/callback/_convergence_monitor.py @@ -3,20 +3,21 @@ from copy import copy from pathlib import Path from tempfile import mkdtemp -import time import matplotlib.pyplot as plt import pandas as pd from . import BaseCallback +# import ..metrics as metrics + class ConvergenceMonitor(BaseCallback): """Monitor model convergence. Parameters ---------- - monitor : + monitor : X_val : ndarray, default=None Validation data @@ -33,37 +34,41 @@ class ConvergenceMonitor(BaseCallback): request_reconstruction_attributes = True def __init__(self, *, monitor="objective_function", X_val=None, y_val=None): + if monitor == "objective_function": + self._monitor = "objective_function" + else: + self._monitor = getattr(metrics, monitor, None) + if self._monitor is None: + raise ValueError(f"unknown metric {monitor}") + self.X_val = X_val self.y_val = y_val + self._data_file = Path(mkdtemp()) / "convergence_monitor.csv" def on_fit_begin(self, estimator, *, X=None, y=None): self.estimator = estimator self.X_train = X self.y_train = y - self._start_time = {} - - def on_fit_iter_end(self, *, node, **kwargs): - if node.depth != node.computation_tree.depth: - return + def on_fit_iter_end(self, *, estimator, node, **kwargs): reconstruction_attributes = kwargs.get("reconstruction_attributes", None) if reconstruction_attributes is None: return - new_estimator = copy(self.estimator) + new_estimator = copy(estimator) for key, val in reconstruction_attributes.items(): setattr(new_estimator, key, val) - if node.idx == 0: - self._start_time[node.parent] = time.perf_counter() - curr_time = 0 - else: - curr_time = time.perf_counter() - self._start_time[node.parent] + # if self._monitor = - obj_train, *_ = new_estimator.objective_function(self.X_train, self.y_train, normalize=True) + obj_train, *_ = new_estimator.objective_function( + self.X_train, self.y_train, normalize=True + ) if self.X_val is not None: - obj_val, *_ = new_estimator.objective_function(self.X_val, self.y_val, normalize=True) + obj_val, *_ = new_estimator.objective_function( + self.X_val, self.y_val, normalize=True + ) else: obj_val = None diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 657ba79307ce3..96a4738a9196a 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -330,7 +330,7 @@ def _fit(self, X, y=None, **fit_params_steps): fit_transform_one_cached = memory.cache(_fit_transform_one) - for (step_idx, name, transformer), in self._iter( + for (step_idx, name, transformer) in self._iter( with_final=False, filter_passthrough=False ): node = root.children[step_idx] From f78442ebc9895210f34905c96e6ca7fd4d2b6e3a Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 23 Feb 2022 14:46:17 +0100 Subject: [PATCH 08/20] class --- sklearn/model_selection/_search.py | 73 +++++++++++++++++++++++--- sklearn/model_selection/_validation.py | 8 +++ 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 5ceb71569b932..fc16eefe8070f 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -15,6 +15,7 @@ from collections.abc import Mapping, Sequence, Iterable from functools import partial, reduce from itertools import product +from itertools import cycle import numbers import operator import time @@ -23,6 +24,7 @@ import numpy as np from numpy.ma import MaskedArray from scipy.stats import rankdata +from joblib import Parallel from ..base import BaseEstimator, is_classifier, clone from ..base import MetaEstimatorMixin @@ -33,7 +35,6 @@ from ._validation import _normalize_score_results from ._validation import _warn_or_raise_about_fit_failures from ..exceptions import NotFittedError -from joblib import Parallel from ..utils import check_random_state from ..utils.random import sample_without_replacement from ..utils._tags import _safe_tags @@ -783,7 +784,7 @@ def fit(self, X, y=None, *, groups=None, **fit_params): X, y, groups = indexable(X, y, groups) fit_params = _check_fit_params(X, fit_params) - cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator)) + cv_orig = self._checked_cv_orig n_splits = cv_orig.get_n_splits(X, y, groups) base_estimator = clone(self.estimator) @@ -806,7 +807,7 @@ def fit(self, X, y=None, *, groups=None, **fit_params): all_out = [] all_more_results = defaultdict(list) - def evaluate_candidates(candidate_params, cv=None, more_results=None): + def evaluate_candidates(candidate_params, cv=None, more_results=None, parent_node=None): cv = cv or cv_orig candidate_params = list(candidate_params) n_candidates = len(candidate_params) @@ -819,6 +820,11 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None): ) ) + if parent_node is not None: + nodes = parent_node.children + else: + nodes = cycle([None]) + out = parallel( delayed(_fit_and_score)( clone(base_estimator), @@ -830,10 +836,11 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None): split_progress=(split_idx, n_splits), candidate_progress=(cand_idx, n_candidates), **fit_and_score_kwargs, + caller=self, + node=node, ) - for (cand_idx, parameters), (split_idx, (train, test)) in product( - enumerate(candidate_params), enumerate(cv.split(X, y, groups)) - ) + for ((cand_idx, parameters), (split_idx, (train, test))), node in zip(product( + enumerate(candidate_params), enumerate(cv.split(X, y, groups))), nodes) ) if len(out) < 1: @@ -1370,10 +1377,60 @@ def __init__( ) self.param_grid = param_grid + def fit(self, X, y=None, *, groups=None, **fit_params): + """Run fit with all sets of parameters. + + Parameters + ---------- + + X : array-like of shape (n_samples, n_features) + Training vector, where `n_samples` is the number of samples and + `n_features` is the number of features. + + y : array-like of shape (n_samples, n_output) or (n_samples,), default=None + Target relative to X for classification or regression; + None for unsupervised learning. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). + + **fit_params : dict of str -> object + Parameters passed to the `fit` method of the estimator. + + If a fit parameter is an array-like whose length is equal to + `num_samples` then it will be split across CV groups along with `X` + and `y`. For example, the :term:`sample_weight` parameter is split + because `len(sample_weights) = len(X)`. + + Returns + ------- + self : object + Instance of fitted estimator. + """ + self._param_grid = ParameterGrid(self.param_grid) + + self._checked_cv_orig = check_cv( + self.cv, y, classifier=is_classifier(self.estimator) + ) + n_splits = self._checked_cv_orig.get_n_splits(X, y, groups) + + self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": len(self._param_grid) * n_splits}, + {"descr": "param - fold", "max_iter": None}, + ], + X=X, + y=y, + ) + super().fit(X, y=y, groups=groups, **fit_params) + + self._eval_callbacks_on_fit_end() + def _run_search(self, evaluate_candidates): """Search all candidates in param_grid""" - evaluate_candidates(ParameterGrid(self.param_grid)) - + evaluate_candidates(self._param_grid, parent_node=self._computation_tree.root) class RandomizedSearchCV(BaseSearchCV): """Randomized search on hyper parameters. diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 927fe7a2cc452..6bf61bf246302 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -33,6 +33,7 @@ from ..exceptions import FitFailedWarning from ._split import check_cv from ..preprocessing import LabelEncoder +from ..callback._base import _eval_callbacks_on_fit_iter_end __all__ = [ @@ -547,6 +548,8 @@ def _fit_and_score( split_progress=None, candidate_progress=None, error_score=np.nan, + caller=None, + node=None, ): """Fit estimator and compute scores for a given dataset split. @@ -673,6 +676,9 @@ def _fit_and_score( cloned_parameters[k] = clone(v, safe=False) estimator = estimator.set_params(**cloned_parameters) + + if caller is not None: + caller._propagate_callbacks(estimator, parent_node=node) start_time = time.time() @@ -736,6 +742,8 @@ def _fit_and_score( end_msg += result_msg print(end_msg) + _eval_callbacks_on_fit_iter_end(estimator=caller, node=node) + result["test_scores"] = test_scores if return_train_score: result["train_scores"] = train_scores From 34bab15d7feb1dc191df80d731f03c5454244011 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 23 Feb 2022 17:57:36 +0100 Subject: [PATCH 09/20] more tests --- sklearn/base.py | 47 +++++---- sklearn/callback/_computation_tree.py | 8 +- .../test_base_estimator_callback_methods.py | 95 +++++++++++++++++++ sklearn/callback/tests/test_callbacks.py | 89 ++--------------- .../callback/tests/test_computation_tree.py | 33 ++++--- 5 files changed, 161 insertions(+), 111 deletions(-) create mode 100644 sklearn/callback/tests/test_base_estimator_callback_methods.py diff --git a/sklearn/base.py b/sklearn/base.py index cf3459267d13a..c542332280a07 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -616,6 +616,11 @@ def _set_callbacks(self, callbacks): ---------- callbacks : callback or list of callbacks the callbacks to set. + + Returns + ------- + self : estimator instance + The estimator instance itself. """ if not isinstance(callbacks, list): callbacks = [callbacks] @@ -625,9 +630,11 @@ def _set_callbacks(self, callbacks): self._callbacks = callbacks + return self + # XXX should be a method of MetaEstimatorMixin but this mixin can't handle all # meta-estimators. - def _propagate_callbacks(self, sub_estimator, parent_node): + def _propagate_callbacks(self, sub_estimator, *, parent_node): """Propagate the auto-propagated callbacks to a sub-estimator Parameters @@ -640,9 +647,6 @@ def _propagate_callbacks(self, sub_estimator, parent_node): computation tree of the sub-estimator. It must be the node where the fit method of the sub-estimator is called. """ - if not hasattr(self, "_callbacks"): - return - if hasattr(sub_estimator, "_callbacks") and any( isinstance(callback, AutoPropagatedMixin) for callback in sub_estimator._callbacks @@ -659,6 +663,9 @@ def _propagate_callbacks(self, sub_estimator, parent_node): " Set them directly on the meta-estimator." ) + if not hasattr(self, "_callbacks"): + return + propagated_callbacks = [ callback for callback in self._callbacks @@ -668,7 +675,7 @@ def _propagate_callbacks(self, sub_estimator, parent_node): if not propagated_callbacks: return - sub_estimator._parent_node = parent_node + sub_estimator._parent_ct_node = parent_node if not hasattr(sub_estimator, "_callbacks"): sub_estimator._callbacks = propagated_callbacks @@ -702,7 +709,7 @@ def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): self._computation_tree = ComputationTree( estimator_name=self.__class__.__name__, levels=levels, - parent_node=getattr(self, "_parent_node", None), + parent_node=getattr(self, "_parent_ct_node", None), ) if hasattr(self, "_callbacks"): @@ -710,13 +717,13 @@ def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): with open(file_path, "wb") as f: pickle.dump(self._computation_tree, f) + # Only call the on_fit_begin method of callbacks that are not + # propagated from a meta-estimator. for callback in self._callbacks: - is_propagated = hasattr(self, "_parent_node") and isinstance( + is_propagated = hasattr(self, "_parent_ct_node") and isinstance( callback, AutoPropagatedMixin ) if not is_propagated: - # Only call the on_fit_begin method of callbacks that are not - # propagated from a meta-estimator. callback.on_fit_begin(estimator=self, X=X, y=y) return self._computation_tree.root @@ -728,25 +735,33 @@ def _eval_callbacks_on_fit_end(self): self._computation_tree._tree_status[0] = True + # Only call the on_fit_end method of callbacks that are not + # propagated from a meta-estimator. for callback in self._callbacks: is_propagated = isinstance(callback, AutoPropagatedMixin) and hasattr( - self, "_parent_node" + self, "_parent_ct_node" ) if not is_propagated: - # Only call the on_fit_end method of callbacks that are not - # propagated from a meta-estimator. callback.on_fit_end() def _from_reconstruction_attributes(self, *, reconstruction_attributes): - """Return a as if fitted copy of this estimator + """Return an as if fitted copy of this estimator Parameters ---------- reconstruction_attributes : callable - The necessary fitted attributes to create a working fitted estimator from - this instance. + A callable that has no arguments and returns the necessary fitted attributes + to create a working fitted estimator from this instance. + + Using a callable allows lazy evaluation of the potentially costly + reconstruction attributes. + + Returns + ------- + fitted_estimator : estimator instance + The fitted copy of this estimator. """ - new_estimator = copy.copy(self) + new_estimator = copy.copy(self) # XXX deepcopy ? for key, val in reconstruction_attributes().items(): setattr(new_estimator, key, val) return new_estimator diff --git a/sklearn/callback/_computation_tree.py b/sklearn/callback/_computation_tree.py index 161891ca32004..a69a8788e26c5 100644 --- a/sklearn/callback/_computation_tree.py +++ b/sklearn/callback/_computation_tree.py @@ -88,6 +88,12 @@ def get_ancestors(self, include_ancestor_trees=True): return ancestors + def __repr__(self): + return ( + f"ComputationNode(description={self.description}, " + f"depth={self.depth}, idx={self.idx})" + ) + class ComputationTree: """Data structure to store the computation tree of an estimator @@ -221,7 +227,7 @@ def iterate(self, include_leaves=False): def _recursive_iterate(self, node=None, include_leaves=False, node_list=None): """Recursively constructs the iterable""" - # TODO make it a generator + # TODO make it an iterator ? if node is None: node = self.root node_list = [] diff --git a/sklearn/callback/tests/test_base_estimator_callback_methods.py b/sklearn/callback/tests/test_base_estimator_callback_methods.py new file mode 100644 index 0000000000000..676f0a5cfdd0e --- /dev/null +++ b/sklearn/callback/tests/test_base_estimator_callback_methods.py @@ -0,0 +1,95 @@ +# License: BSD 3 clause + +from pathlib import Path +import pytest + +from sklearn.callback.tests._utils import TestingCallback +from sklearn.callback.tests._utils import TestingAutoPropagatedCallback +from sklearn.callback.tests._utils import NotValidCallback +from sklearn.callback.tests._utils import Estimator +from sklearn.callback.tests._utils import MetaEstimator + + +@pytest.mark.parametrize("callbacks", + [ + TestingCallback(), + [TestingCallback()], + [TestingCallback(), TestingAutoPropagatedCallback()], + ] +) +def test_set_callbacks(callbacks): + """Sanity check for the _set_callbacks method""" + estimator = Estimator() + + set_callbacks_return = estimator._set_callbacks(callbacks) + assert hasattr(estimator, "_callbacks") + assert estimator._callbacks in (callbacks, [callbacks]) + assert set_callbacks_return is estimator + + +@pytest.mark.parametrize("callbacks", [None, NotValidCallback()]) +def test_set_callbacks_error(callbacks): + """Check the error message when not passing a valid callback to _set_callbacks""" + estimator = Estimator() + + with pytest.raises(TypeError, match="callbacks must be subclasses of BaseCallback"): + estimator._set_callbacks(callbacks) + + +def test_propagate_callbacks(): + """Sanity check for the _propagate_callbacks method""" + not_propagated_callback = TestingCallback() + propagated_callback = TestingAutoPropagatedCallback() + + estimator = Estimator() + estimator._set_callbacks([not_propagated_callback, propagated_callback]) + + sub_estimator = Estimator() + estimator._propagate_callbacks(sub_estimator, parent_node=None) + + assert hasattr(sub_estimator, "_parent_ct_node") + assert not_propagated_callback not in sub_estimator._callbacks + assert propagated_callback in sub_estimator._callbacks + + +def test_propagate_callback_no_callback(): + """Check that no callback is propagated if there's no callback""" + estimator = Estimator() + sub_estimator = Estimator() + estimator._propagate_callbacks(sub_estimator, parent_node=None) + + assert not hasattr(estimator, "_callbacks") + assert not hasattr(sub_estimator, "_callbacks") + + +def test_auto_propagated_callbacks(): + """Check that it's not possible to set an auto-propagated callback on the + sub-estimator of a meta-estimator. + """ + estimator = Estimator() + estimator._set_callbacks(TestingAutoPropagatedCallback()) + + meta_estimator = MetaEstimator(estimator=estimator) + + match = ( + r"sub-estimators .*of a meta-estimator .*can't have auto-propagated callbacks" + ) + with pytest.raises(TypeError, match=match): + meta_estimator.fit(X=None, y=None) + + +def test_eval_callbacks_on_fit_begin(): + """Check that _eval_callbacks_on_fit_begin creates and dumps the computation tree""" + estimator = Estimator()._set_callbacks(TestingCallback()) + assert not hasattr(estimator, "_computation_tree") + + levels = [ + {"descr": "fit", "max_iter": 10}, + {"descr": "iter", "max_iter": None}, + ] + ct_root = estimator._eval_callbacks_on_fit_begin(levels=levels) + assert hasattr(estimator, "_computation_tree") + assert ct_root is estimator._computation_tree.root + + ct_pickle = Path(estimator._computation_tree.tree_dir) / "computation_tree.pkl" + assert ct_pickle.exists() diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index 1f5fcf6bdd3c4..a87cdbcbf3199 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -1,99 +1,24 @@ # License: BSD 3 clause -from functools import partial import pickle import pytest import tempfile -from time import sleep -from joblib import Parallel, delayed +import numpy as np -from sklearn.base import BaseEstimator, clone from sklearn.callback import Snapshot -from sklearn.callback._base import _eval_callbacks_on_fit_iter_end -from sklearn.datasets import make_classification +from sklearn.callback.tests._utils import Estimator +from sklearn.callback.tests._utils import MetaEstimator -class Estimator(BaseEstimator): - def __init__(self, max_iter=20): - self.max_iter = max_iter - - def fit(self, X, y): - root = self._eval_callbacks_on_fit_begin( - levels=[ - {"descr": "fit", "max_iter": self.max_iter}, - {"descr": "iter", "max_iter": None}, - ], - X=X, - y=y, - ) - - for i in range(self.max_iter): - if _eval_callbacks_on_fit_iter_end( - estimator=self, - node=root.children[i], - from_reconstruction_attributes=partial( - self._from_reconstruction_attributes, - reconstruction_attributes=lambda: {"n_iter_": i + 1}, - ), - ): - break - - self.n_iter_ = i + 1 - - self._eval_callbacks_on_fit_end() - - return self - - -class MetaEstimator(BaseEstimator): - def __init__( - self, estimator, n_outer=4, n_inner=3, n_jobs=None, prefer="processes" - ): - self.estimator = estimator - self.n_outer = n_outer - self.n_inner = n_inner - self.n_jobs = n_jobs - self.prefer = prefer - - def fit(self, X, y): - root = self._eval_callbacks_on_fit_begin( - levels=[ - {"descr": "fit", "max_iter": self.n_outer}, - {"descr": "outer", "max_iter": self.n_inner}, - {"descr": "inner", "max_iter": None}, - ], - X=X, - y=y, - ) - - res = Parallel(n_jobs=self.n_jobs, prefer=self.prefer)( - delayed(self._func)(self.estimator, X, y, node, i) - for i, node in enumerate(root.children) - ) - - self._eval_callbacks_on_fit_end() - - return self - - def _func(self, estimator, X, y, parent_node, i): - for j, node in enumerate(parent_node.children): - est = clone(estimator) - self._propagate_callbacks(est, parent_node=node) - est.fit(X, y) - - _eval_callbacks_on_fit_iter_end(estimator=self, node=node) - - _eval_callbacks_on_fit_iter_end(estimator=self, node=parent_node) - - return +X = np.zeros((100, 3)) +y = np.zeros(100, dtype=int) @pytest.mark.parametrize("n_jobs", (1, 2)) @pytest.mark.parametrize("prefer", ("threads", "processes")) def test_snapshot_meta_estimator(n_jobs, prefer): - # Test for the Snapshot callback - X, y = make_classification() + """Test for the Snapshot callback""" estimator = Estimator(max_iter=20) with tempfile.TemporaryDirectory() as tmp_dir: @@ -122,3 +47,5 @@ def test_snapshot_meta_estimator(n_jobs, prefer): # We kept last 5 snapshots out of 20 iterations. # This one is the 16 + i-th. assert loaded_estimator.n_iter_ == 16 + i + + diff --git a/sklearn/callback/tests/test_computation_tree.py b/sklearn/callback/tests/test_computation_tree.py index b726177a342ec..902175b71a250 100644 --- a/sklearn/callback/tests/test_computation_tree.py +++ b/sklearn/callback/tests/test_computation_tree.py @@ -1,11 +1,8 @@ # License: BSD 3 clause import numpy as np -import pytest from sklearn.callback import ComputationTree -from sklearn.callback import ComputationNode -from sklearn.callback import load_computation_tree levels = [ @@ -17,7 +14,7 @@ def test_computation_tree(): - # Check the construction of the computation tree + """Check the construction of the computation tree""" computation_tree = ComputationTree(estimator_name="estimator", levels=levels) assert computation_tree.estimator_name == "estimator" @@ -41,8 +38,9 @@ def test_computation_tree(): def test_n_nodes(): - # Check that the number of node in a comutation tree corresponds to what we expect - # from the level descriptions + """Check that the number of node in a comutation tree corresponds to what we expect + from the level descriptions + """ computation_tree = ComputationTree(estimator_name="", levels=levels) max_iter_per_level = [level["max_iter"] for level in levels[:-1]] @@ -54,8 +52,9 @@ def test_n_nodes(): def test_tree_status_idx(): - # Check that each node has a unique index in the _tree_status array and that their - # order corresponds to the order given by a depth first search. + """Check that each node has a unique index in the _tree_status array and that their + order corresponds to the order given by a depth first search. + """ computation_tree = ComputationTree(estimator_name="", levels=levels) indexes = [ @@ -65,7 +64,7 @@ def test_tree_status_idx(): def test_get_ancestors(): - # Check that the ancestor search excludes the root and can propagate to parent trees + """Check the ancestor search and its propagation to parent trees""" parent_levels = [ {"descr": "parent_level0", "max_iter": 2}, {"descr": "parent_level1", "max_iter": 4}, @@ -76,23 +75,31 @@ def test_get_ancestors(): estimator_name="parent_estimator", levels=parent_levels ) parent_node = parent_computation_tree.root.children[0].children[2] + # indices of each node (in its parent children) in this chain are 0, 0, 2. + # (root is always 0). + expected_parent_indices = [2, 0, 0] computation_tree = ComputationTree( estimator_name="estimator", levels=levels, parent_node=parent_node ) node = computation_tree.root.children[1].children[3].children[5] + expected_node_indices = [5, 3, 1, 0] ancestors = node.get_ancestors(include_ancestor_trees=False) - assert ancestors == [node, node.parent, node.parent.parent] - assert [n.idx for n in ancestors] == [5, 3, 1] - assert computation_tree.root not in ancestors + assert ancestors == [ + node, node.parent, node.parent.parent, node.parent.parent.parent + ] + assert [n.idx for n in ancestors] == expected_node_indices + assert computation_tree.root in ancestors ancestors = node.get_ancestors(include_ancestor_trees=True) assert ancestors == [ node, node.parent, node.parent.parent, + node.parent.parent.parent, parent_node, parent_node.parent, + parent_node.parent.parent, ] - assert [n.idx for n in ancestors] == [5, 3, 1, 2, 0] + assert [n.idx for n in ancestors] == expected_node_indices + expected_parent_indices From 596a58ef39815701346c47769d2bb14ab7814da9 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 23 Feb 2022 17:57:49 +0100 Subject: [PATCH 10/20] cln --- sklearn/pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index c845f684a7945..433b9e4d57c56 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -344,8 +344,10 @@ def _fit(self, X, y=None, **fit_params_steps): cloned_transformer = transformer else: cloned_transformer = clone(transformer) - # Fit or load from cache the current transformer + self._propagate_callbacks(cloned_transformer, parent_node=node) + + # Fit or load from cache the current transformer X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, From 4f9363cf7ec622bab3cd320c9fcadc128ffcbb47 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Mon, 12 Sep 2022 18:29:10 +0200 Subject: [PATCH 11/20] wip --- sklearn/callback/_base.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index a473f172fd575..0e11acd4f54ef 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -3,7 +3,8 @@ from abc import ABC, abstractmethod -# Not a method of BaseEstimator because it might be called from an extern function +# Not a method of BaseEstimator because it might not be directly called from fit but +# by a non-method function called by fit def _eval_callbacks_on_fit_iter_end(**kwargs): """Evaluate the on_fit_iter_end method of the callbacks @@ -54,6 +55,8 @@ class BaseCallback(ABC): def on_fit_begin(self, estimator, *, X=None, y=None): """Method called at the beginning of the fit method of the estimator + Only called + Parameters ---------- estimator: estimator instance @@ -105,6 +108,11 @@ class (e.g. LogisticRegressionCV -> LogisticRegression) used by generic callbacks but by a callback designed for a specific estimator instead. + - extra_verbose: dict + Model specific . This is not meant to be + used by generic callbacks but by a callback designed for a specific + estimator instead. + Returns ------- stop : bool or None From 35c5284239faf6f962d8d3f66436889da0020291 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 16 Sep 2022 11:05:15 +0200 Subject: [PATCH 12/20] wip --- sklearn/base.py | 8 ++ sklearn/callback/_base.py | 11 ++ sklearn/callback/tests/_utils.py | 109 ++++++++++++++++++ .../test_base_estimator_callback_methods.py | 29 +++++ sklearn/callback/tests/test_callbacks.py | 17 +++ 5 files changed, 174 insertions(+) create mode 100644 sklearn/callback/tests/_utils.py diff --git a/sklearn/base.py b/sklearn/base.py index 11de1ecfb1fd2..e8938f1c134e8 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -10,6 +10,8 @@ import inspect import re import pickle +from shutil import rmtree +from functools import partial import numpy as np @@ -32,6 +34,7 @@ from .callback import BaseCallback from .callback import AutoPropagatedMixin from .callback import ComputationTree +from .callback._base import CallbackContext def clone(estimator, *, safe=True): @@ -678,6 +681,11 @@ def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): ) if hasattr(self, "_callbacks"): + # + #if self._computation_tree.parent_node is None: + CallbackContext(self._callbacks, finalizer=partial(rmtree, ignore_errors=True), finalizer_args=self._computation_tree.tree_dir) + + # file_path = self._computation_tree.tree_dir / "computation_tree.pkl" with open(file_path, "wb") as f: pickle.dump(self._computation_tree, f) diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index 0e11acd4f54ef..ea0b28be5f937 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -1,6 +1,7 @@ # License: BSD 3 clause from abc import ABC, abstractmethod +import weakref # Not a method of BaseEstimator because it might not be directly called from fit but @@ -120,6 +121,9 @@ class (e.g. LogisticRegressionCV -> LogisticRegression) """ pass + def _set_context(self, context): + self._callback_context = context + class AutoPropagatedMixin: """Mixin for auto-propagated callbacks @@ -132,3 +136,10 @@ class AutoPropagatedMixin: """ pass + + +class CallbackContext: + def __init__(self, callbacks, finalizer, finalizer_args): + for callback in callbacks: + callback._set_context(self) + weakref.finalize(self, finalizer, finalizer_args) diff --git a/sklearn/callback/tests/_utils.py b/sklearn/callback/tests/_utils.py new file mode 100644 index 0000000000000..84e94fce16e7c --- /dev/null +++ b/sklearn/callback/tests/_utils.py @@ -0,0 +1,109 @@ +from functools import partial + +from joblib.parallel import Parallel, delayed + +from sklearn.base import BaseEstimator, clone +from sklearn.callback import BaseCallback +from sklearn.callback import AutoPropagatedMixin +from sklearn.callback._base import _eval_callbacks_on_fit_iter_end + + +class TestingCallback(BaseCallback): + def on_fit_begin(self, estimator, *, X=None, y=None): + pass + + def on_fit_end(self): + pass + + def on_fit_iter_end(self, estimator, node, **kwargs): + pass + + +class TestingAutoPropagatedCallback(TestingCallback, AutoPropagatedMixin): + pass + + +class NotValidCallback: + def on_fit_begin(self, estimator, *, X=None, y=None): + pass + + def on_fit_end(self): + pass + + def on_fit_iter_end(self, estimator, node, **kwargs): + pass + + +class Estimator(BaseEstimator): + def __init__(self, max_iter=20): + self.max_iter = max_iter + + def fit(self, X, y): + root = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": self.max_iter}, + {"descr": "iter", "max_iter": None}, + ], + X=X, + y=y, + ) + + for i in range(self.max_iter): + if _eval_callbacks_on_fit_iter_end( + estimator=self, + node=root.children[i], + from_reconstruction_attributes=partial( + self._from_reconstruction_attributes, + reconstruction_attributes=lambda: {"n_iter_": i + 1}, + ), + ): + break + + self.n_iter_ = i + 1 + + self._eval_callbacks_on_fit_end() + + return self + + +class MetaEstimator(BaseEstimator): + def __init__( + self, estimator, n_outer=4, n_inner=3, n_jobs=None, prefer="processes" + ): + self.estimator = estimator + self.n_outer = n_outer + self.n_inner = n_inner + self.n_jobs = n_jobs + self.prefer = prefer + + def fit(self, X, y): + root = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": self.n_outer}, + {"descr": "outer", "max_iter": self.n_inner}, + {"descr": "inner", "max_iter": None}, + ], + X=X, + y=y, + ) + + res = Parallel(n_jobs=self.n_jobs, prefer=self.prefer)( + delayed(self._func)(self.estimator, X, y, node, i) + for i, node in enumerate(root.children) + ) + + self._eval_callbacks_on_fit_end() + + return self + + def _func(self, estimator, X, y, parent_node, i): + for j, node in enumerate(parent_node.children): + est = clone(estimator) + self._propagate_callbacks(est, parent_node=node) + est.fit(X, y) + + _eval_callbacks_on_fit_iter_end(estimator=self, node=node) + + _eval_callbacks_on_fit_iter_end(estimator=self, node=parent_node) + + return \ No newline at end of file diff --git a/sklearn/callback/tests/test_base_estimator_callback_methods.py b/sklearn/callback/tests/test_base_estimator_callback_methods.py index 676f0a5cfdd0e..ea750abbcf890 100644 --- a/sklearn/callback/tests/test_base_estimator_callback_methods.py +++ b/sklearn/callback/tests/test_base_estimator_callback_methods.py @@ -9,6 +9,8 @@ from sklearn.callback.tests._utils import Estimator from sklearn.callback.tests._utils import MetaEstimator +from sklearn.callback import ProgressBar + @pytest.mark.parametrize("callbacks", [ @@ -93,3 +95,30 @@ def test_eval_callbacks_on_fit_begin(): ct_pickle = Path(estimator._computation_tree.tree_dir) / "computation_tree.pkl" assert ct_pickle.exists() + + +def test_callback_context_finalize(): + """Check that the folder containing the computation tree of the estimator is + deleted when there are no reference left to its callbacks. + """ + callback = TestingCallback() + + # estimator is not fitted, its computation tree is not built yet + est = Estimator()._set_callbacks(callbacks=callback) + assert not hasattr(est, "_computation_tree") + + # estimator is fitted, a folder has been created to hold its computation tree + est.fit(X=None, y=None) + assert hasattr(est, "_computation_tree") + tree_dir = est._computation_tree.tree_dir + assert tree_dir.is_dir() + + # there is no more reference to the estimator, but there is still a reference to the + # callback which might need to access the computation tree + del est + assert tree_dir.is_dir() + + # there is no more reference to the callback, the computation tree folder must be + # deleted + del callback + assert not tree_dir.is_dir() diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index a87cdbcbf3199..bd76325e0af28 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -6,7 +6,11 @@ import numpy as np +from sklearn.callback import ConvergenceMonitor +from sklearn.callback import EarlyStopping +from sklearn.callback import ProgressBar from sklearn.callback import Snapshot +from sklearn.callback import TextVerbose from sklearn.callback.tests._utils import Estimator from sklearn.callback.tests._utils import MetaEstimator @@ -15,6 +19,19 @@ y = np.zeros(100, dtype=int) +@pytest.mark.parametrize("Callback", [ConvergenceMonitor, EarlyStopping, ProgressBar, Snapshot, TextVerbose,]) +def test_callback_doesnt_hold_ref_to_estimator(Callback): + callback = Callback() + est = Estimator()._set_callbacks(callbacks=callback) + est.fit(X, y) + + tree_dir = est._computation_tree.tree_dir + + del est + del callback + assert not tree_dir.is_dir() + + @pytest.mark.parametrize("n_jobs", (1, 2)) @pytest.mark.parametrize("prefer", ("threads", "processes")) def test_snapshot_meta_estimator(n_jobs, prefer): From 115e1840122e542fc940c2dbec056273e6440965 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 16 Sep 2022 17:48:42 +0200 Subject: [PATCH 13/20] wip --- sklearn/base.py | 15 ++++++++------- sklearn/callback/_base.py | 5 ++++- sklearn/callback/_progressbar.py | 4 ++-- sklearn/callback/tests/test_callbacks.py | 16 +++++++++------- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index e8938f1c134e8..14da63c1b9cb2 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -3,6 +3,7 @@ # Author: Gael Varoquaux # License: BSD 3 clause +from codecs import ignore_errors import copy import warnings from collections import defaultdict @@ -645,10 +646,11 @@ def _propagate_callbacks(self, sub_estimator, *, parent_node): sub_estimator._parent_ct_node = parent_node - if not hasattr(sub_estimator, "_callbacks"): - sub_estimator._callbacks = propagated_callbacks - else: - sub_estimator._callbacks.extend(propagated_callbacks) + # if not hasattr(sub_estimator, "_callbacks"): + # sub_estimator._callbacks = propagated_callbacks + # else: + # sub_estimator._callbacks.extend(propagated_callbacks) + sub_estimator._set_callbacks(getattr(sub_estimator, "_callbacks", []) + propagated_callbacks) def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): """Evaluate the on_fit_begin method of the callbacks @@ -681,11 +683,10 @@ def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): ) if hasattr(self, "_callbacks"): - # - #if self._computation_tree.parent_node is None: + # CallbackContext(self._callbacks, finalizer=partial(rmtree, ignore_errors=True), finalizer_args=self._computation_tree.tree_dir) - # + # file_path = self._computation_tree.tree_dir / "computation_tree.pkl" with open(file_path, "wb") as f: pickle.dump(self._computation_tree, f) diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index ea0b28be5f937..045065801cbbd 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -122,7 +122,10 @@ class (e.g. LogisticRegressionCV -> LogisticRegression) pass def _set_context(self, context): - self._callback_context = context + if not hasattr(self, "_callback_contexts"): + self._callback_contexts = [] + + self._callback_contexts.append(context) class AutoPropagatedMixin: diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py index ae11e67d59f57..fd7201de5c918 100644 --- a/sklearn/callback/_progressbar.py +++ b/sklearn/callback/_progressbar.py @@ -105,7 +105,7 @@ class _RichProgressMonitor(Thread): def __init__(self, estimator, event, max_depth_show=None, max_depth_keep=None): Thread.__init__(self) - self.estimator = estimator + self.computation_tree = estimator._computation_tree self.event = event self.max_depth_show = max_depth_show self.max_depth_keep = max_depth_keep @@ -151,7 +151,7 @@ def _recursive_update_tasks(self, this_dir=None, depth=0): return if this_dir is None: - this_dir = self.estimator._computation_tree.tree_dir + this_dir = self.computation_tree.tree_dir # _ordered_tasks holds the list of the tasks in the order we want them to # be displayed. self._progress_ctx._ordered_tasks = [] diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index bd76325e0af28..2a457d354077e 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -2,6 +2,7 @@ import pickle import pytest +import sys import tempfile import numpy as np @@ -22,14 +23,15 @@ @pytest.mark.parametrize("Callback", [ConvergenceMonitor, EarlyStopping, ProgressBar, Snapshot, TextVerbose,]) def test_callback_doesnt_hold_ref_to_estimator(Callback): callback = Callback() - est = Estimator()._set_callbacks(callbacks=callback) + est = Estimator() + callback_refcount = sys.getrefcount(callback) + est_refcount = sys.getrefcount(est) + + est._set_callbacks(callbacks=callback) est.fit(X, y) - - tree_dir = est._computation_tree.tree_dir - - del est - del callback - assert not tree_dir.is_dir() + # estimator has a ref on the callback but the callback has no ref to the estimator + assert sys.getrefcount(est) == est_refcount + assert sys.getrefcount(callback) == callback_refcount + 1 @pytest.mark.parametrize("n_jobs", (1, 2)) From bdb49901b778faf670e08a8025e8e0d727466608 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 21 Sep 2022 12:11:49 +0200 Subject: [PATCH 14/20] wip --- sklearn/base.py | 7 +++++++ sklearn/callback/_base.py | 21 +++++++++++++++++++++ sklearn/callback/_progressbar.py | 3 +++ sklearn/decomposition/_nmf.py | 5 +++++ sklearn/pipeline.py | 2 ++ 5 files changed, 38 insertions(+) diff --git a/sklearn/base.py b/sklearn/base.py index 14da63c1b9cb2..78dfa06178bc7 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -718,6 +718,13 @@ def _eval_callbacks_on_fit_end(self): if not is_propagated: callback.on_fit_end() + def _eval_callbacks_on_fit_exception(self): + if not hasattr(self, "_callbacks"): + return + + for callback in self._callbacks: + callback.on_fit_exception() + def _from_reconstruction_attributes(self, *, reconstruction_attributes): """Return an as if fitted copy of this estimator diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index 045065801cbbd..c78fb3c773b61 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -1,6 +1,7 @@ # License: BSD 3 clause from abc import ABC, abstractmethod +from functools import wraps import weakref @@ -121,6 +122,10 @@ class (e.g. LogisticRegressionCV -> LogisticRegression) """ pass + @abstractmethod + def on_fit_exception(self): + pass + def _set_context(self, context): if not hasattr(self, "_callback_contexts"): self._callback_contexts = [] @@ -146,3 +151,19 @@ def __init__(self, callbacks, finalizer, finalizer_args): for callback in callbacks: callback._set_context(self) weakref.finalize(self, finalizer, finalizer_args) + + +def callback_aware(fit_method): + """Decorator ... + """ + @wraps(fit_method) + def inner(self, *args, **kwargs): + try: + return fit_method(self, *args, **kwargs) + except BaseException: + self._eval_callbacks_on_fit_exception() + raise + finally: + self._eval_callbacks_on_fit_end() + + return inner diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py index fd7201de5c918..713ab995e169a 100644 --- a/sklearn/callback/_progressbar.py +++ b/sklearn/callback/_progressbar.py @@ -62,6 +62,9 @@ def on_fit_end(self): self._stop_event.set() self.progress_monitor.join() + def on_fit_exception(self): + pass + def __getstate__(self): state = self.__dict__.copy() if "_stop_event" in state: diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index f54fd2d18e690..759946afc8c5a 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -9,6 +9,7 @@ from abc import ABC from functools import partial from numbers import Integral, Real +from subprocess import call import numpy as np import scipy.sparse as sp import time @@ -34,6 +35,7 @@ validate_params, ) from ..callback._base import _eval_callbacks_on_fit_iter_end +from ..callback._base import callback_aware EPSILON = np.finfo(np.float32).eps @@ -869,6 +871,8 @@ def _fit_multiplicative_update( H_sum, HHt, XHt = None, None, None for n_iter in range(1, max_iter + 1): + if n_iter == 30: + raise ValueError("eh ouais") # update W # H_sum, HHt and XHt are saved and reused if not update_H W, H_sum, HHt, XHt = _multiplicative_update_w( @@ -1726,6 +1730,7 @@ def _check_params(self, X): return self + @callback_aware def fit_transform(self, X, y=None, W=None, H=None): """Learn a NMF model for the data X and returns the transformed data. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index b93d412020bef..81f4500726ff3 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -32,6 +32,7 @@ from .utils.fixes import delayed from .exceptions import NotFittedError from .callback._base import _eval_callbacks_on_fit_iter_end +from .callback._base import callback_aware __all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"] @@ -366,6 +367,7 @@ def _fit(self, X, y=None, **fit_params_steps): return X + @callback_aware def fit(self, X, y=None, **fit_params): """Fit the model. From 7a43c306b6f57b847b3f7348905c47d2786757fc Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 23 Sep 2022 18:15:57 +0200 Subject: [PATCH 15/20] wip --- sklearn/base.py | 26 +++------ sklearn/callback/__init__.py | 2 - sklearn/callback/_base.py | 49 +++++++++------- sklearn/callback/_computation_tree.py | 5 ++ sklearn/callback/_progressbar.py | 84 +++++++++++++++++++++------ sklearn/callback/_snapshot.py | 2 - sklearn/callback/_text_verbose.py | 5 +- sklearn/callback/tests/_utils.py | 6 +- sklearn/decomposition/_nmf.py | 2 - 9 files changed, 112 insertions(+), 69 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 78dfa06178bc7..9b4e659d8647a 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -33,7 +33,6 @@ from .utils._estimator_html_repr import estimator_html_repr from .utils._param_validation import validate_parameter_constraints from .callback import BaseCallback -from .callback import AutoPropagatedMixin from .callback import ComputationTree from .callback._base import CallbackContext @@ -617,13 +616,12 @@ def _propagate_callbacks(self, sub_estimator, *, parent_node): method of the sub-estimator is called. """ if hasattr(sub_estimator, "_callbacks") and any( - isinstance(callback, AutoPropagatedMixin) - for callback in sub_estimator._callbacks + callback.auto_propagate for callback in sub_estimator._callbacks ): bad_callbacks = [ callback.__class__.__name__ for callback in sub_estimator._callbacks - if isinstance(callback, AutoPropagatedMixin) + if callback.auto_propagate ] raise TypeError( f"The sub-estimators ({sub_estimator.__class__.__name__}) of a" @@ -638,7 +636,7 @@ def _propagate_callbacks(self, sub_estimator, *, parent_node): propagated_callbacks = [ callback for callback in self._callbacks - if isinstance(callback, AutoPropagatedMixin) + if callback.auto_propagate ] if not propagated_callbacks: @@ -646,11 +644,9 @@ def _propagate_callbacks(self, sub_estimator, *, parent_node): sub_estimator._parent_ct_node = parent_node - # if not hasattr(sub_estimator, "_callbacks"): - # sub_estimator._callbacks = propagated_callbacks - # else: - # sub_estimator._callbacks.extend(propagated_callbacks) - sub_estimator._set_callbacks(getattr(sub_estimator, "_callbacks", []) + propagated_callbacks) + sub_estimator._set_callbacks( + getattr(sub_estimator, "_callbacks", []) + propagated_callbacks + ) def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): """Evaluate the on_fit_begin method of the callbacks @@ -694,10 +690,7 @@ def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): # Only call the on_fit_begin method of callbacks that are not # propagated from a meta-estimator. for callback in self._callbacks: - is_propagated = hasattr(self, "_parent_ct_node") and isinstance( - callback, AutoPropagatedMixin - ) - if not is_propagated: + if not callback._is_propagated(estimator=self): callback.on_fit_begin(estimator=self, X=X, y=y) return self._computation_tree.root @@ -712,10 +705,7 @@ def _eval_callbacks_on_fit_end(self): # Only call the on_fit_end method of callbacks that are not # propagated from a meta-estimator. for callback in self._callbacks: - is_propagated = isinstance(callback, AutoPropagatedMixin) and hasattr( - self, "_parent_ct_node" - ) - if not is_propagated: + if not callback._is_propagated(estimator=self): callback.on_fit_end() def _eval_callbacks_on_fit_exception(self): diff --git a/sklearn/callback/__init__.py b/sklearn/callback/__init__.py index c8d5ea0bf0606..9767411b6c934 100644 --- a/sklearn/callback/__init__.py +++ b/sklearn/callback/__init__.py @@ -1,6 +1,5 @@ # License: BSD 3 clause -from ._base import AutoPropagatedMixin from ._base import BaseCallback from ._computation_tree import ComputationNode from ._computation_tree import ComputationTree @@ -12,7 +11,6 @@ from ._text_verbose import TextVerbose __all__ = [ - "AutoPropagatedMixin", "BaseCallback", "ComputationNode", "ComputationTree", diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index c78fb3c773b61..65b24dc85e9bb 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -33,17 +33,11 @@ def _eval_callbacks_on_fit_iter_end(**kwargs): # stopping_criterion and reconstruction_attributes can be costly to compute. They # are passed as lambdas for lazy evaluation. We only actually compute them if a # callback requests it. - if any( - getattr(callback, "request_stopping_criterion", False) - for callback in estimator._callbacks - ): + if any(cb.request_stopping_criterion for cb in estimator._callbacks): kwarg = kwargs.pop("stopping_criterion", lambda: None)() kwargs["stopping_criterion"] = kwarg - if any( - getattr(callback, "request_from_reconstruction_attributes", False) - for callback in estimator._callbacks - ): + if any(cb.request_from_reconstruction_attributes for cb in estimator._callbacks): kwarg = kwargs.pop("from_reconstruction_attributes", lambda: None)() kwargs["from_reconstruction_attributes"] = kwarg @@ -126,6 +120,32 @@ class (e.g. LogisticRegressionCV -> LogisticRegression) def on_fit_exception(self): pass + @property + def auto_propagate(self): + """Whether or not this callback should be propagated to sub-estimators. + + An auto-propagated callback (from a meta-estimator to its sub-estimators) must + be set on the meta-estimator. Its `on_fit_begin` and `on_fit_end` methods will + only be called at the beginning and end of the fit method of the meta-estimator, + while its `on_fit_iter_end` method will be called at each computation node of + the meta-estimator and its sub-estimators. + """ + return False + + def _is_propagated(self, estimator): + """Check if this callback attached to estimator has been propagated from a + meta-estimator. + """ + return self.auto_propagate and hasattr(estimator, "_parent_ct_node") + + @property + def request_stopping_criterion(self): + return False + + @property + def request_from_reconstruction_attributes(self): + return False + def _set_context(self, context): if not hasattr(self, "_callback_contexts"): self._callback_contexts = [] @@ -133,19 +153,6 @@ def _set_context(self, context): self._callback_contexts.append(context) -class AutoPropagatedMixin: - """Mixin for auto-propagated callbacks - - An auto-propagated callback (from a meta-estimator to its sub-estimators) must be - set on the meta-estimator. Its `on_fit_begin` and `on_fit_end` methods will only be - called at the beginning and end of the fit method of the meta-estimator, while its - `on_fit_iter_end` method will be called at each computation node of the - meta-estimator and its sub-estimators. - """ - - pass - - class CallbackContext: def __init__(self, callbacks, finalizer, finalizer_args): for callback in callbacks: diff --git a/sklearn/callback/_computation_tree.py b/sklearn/callback/_computation_tree.py index a69a8788e26c5..a6eb739580446 100644 --- a/sklearn/callback/_computation_tree.py +++ b/sklearn/callback/_computation_tree.py @@ -208,6 +208,11 @@ def get_progress(self, node): [self._tree_status[child.tree_status_idx] for child in node.children] ) + def get_child_computation_tree_dir(self, node): + if node.children: + raise ValueError("node is not a leaf") + return self.tree_dir / str(node.tree_status_idx) + def iterate(self, include_leaves=False): """Return an iterable over the nodes of the computation tree diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py index 713ab995e169a..1de13c87f2a8f 100644 --- a/sklearn/callback/_progressbar.py +++ b/sklearn/callback/_progressbar.py @@ -1,21 +1,30 @@ # License: BSD 3 clause -from copy import copy -import pickle +import importlib from threading import Thread, Event -import numpy as np -from tqdm import tqdm -from rich.progress import Progress -from rich.progress import BarColumn, TimeRemainingColumn, TextColumn -from rich.style import Style - from . import BaseCallback -from . import AutoPropagatedMixin from . import load_computation_tree -class ProgressBar(BaseCallback, AutoPropagatedMixin): +def _check_backend_support(backend, caller_name): + """Raise ImportError with detailed error message if backend is not installed. + + Parameters + ---------- + backend : {"rich", "tqdm"} + The requested backend. + + caller_name : str + The name of the caller that requires the backend. + """ + try: + importlib.import_module(backend) # noqa + except ImportError as e: + raise ImportError(f"{caller_name} requires {backend} installed.") from e + + +class ProgressBar(BaseCallback): """Callback that displays progress bars for each iterative steps of the estimator Parameters @@ -31,13 +40,20 @@ class ProgressBar(BaseCallback, AutoPropagatedMixin): finished. """ + auto_propagate = True + def __init__(self, backend="rich", max_depth_show=None, max_depth_keep=None): + if backend not in ("rich", "tqdm"): + raise ValueError(f"backend should be 'rich' or 'tqdm', got {self.backend} instead.") + _check_backend_support(backend, caller_name="Progressbar") self.backend = backend + if max_depth_show is not None and max_depth_show < 0: raise ValueError(f"max_depth_show should be >= 0.") + self.max_depth_show = max_depth_show + if max_depth_keep is not None and max_depth_keep < 0: raise ValueError(f"max_depth_keep should be >= 0.") - self.max_depth_show = max_depth_show self.max_depth_keep = max_depth_keep def on_fit_begin(self, estimator, X=None, y=None): @@ -50,8 +66,11 @@ def on_fit_begin(self, estimator, X=None, y=None): max_depth_show=self.max_depth_show, max_depth_keep=self.max_depth_keep, ) - else: - raise ValueError(f"backend should be 'rich', got {self.backend} instead.") + elif self.backend == "tqdm": + self.progress_monitor = _TqdmProgressMonitor( + estimator=estimator, + event=self._stop_event, + ) self.progress_monitor.start() @@ -77,10 +96,15 @@ def __getstate__(self): # Custom Progress class to allow showing the tasks in a given order (given by setting # the _ordered_tasks attribute). In particular it allows to dynamically create and # insert tasks between existing tasks. -class _Progress(Progress): - def get_renderables(self): - table = self.make_tasks_table(getattr(self, "_ordered_tasks", [])) - yield table + +try: + from rich.progress import Progress + class _Progress(Progress): + def get_renderables(self): + table = self.make_tasks_table(getattr(self, "_ordered_tasks", [])) + yield table +except: + pass class _RichProgressMonitor(Thread): @@ -119,6 +143,9 @@ def __init__(self, estimator, event, max_depth_show=None, max_depth_keep=None): self._computation_trees = {} def run(self): + from rich.progress import BarColumn, TimeRemainingColumn, TextColumn + from rich.style import Style + with _Progress( TextColumn("[progress.description]{task.description}"), BarColumn( @@ -218,7 +245,8 @@ def _recursive_update_tasks(self, this_dir=None, depth=0): else: # node is a leaf, look for tasks of its sub computation tree before # going to the next node - child_dir = this_dir / str(node.tree_status_idx) + child_dir = computation_tree.get_child_computation_tree_dir(node) + # child_dir = this_dir / str(node.tree_status_idx) if child_dir.exists(): self._recursive_update_tasks( child_dir, depth + computation_tree.depth @@ -258,3 +286,23 @@ def _get_parent_task(self, node, computation_tree, task_ids): ] return self._progress_ctx._tasks[task_id] return + + +class _TqdmProgressMonitor(Thread): + def __init__(self, estimator, event): + Thread.__init__(self) + self.computation_tree = estimator._computation_tree + self.event = event + + def run(self): + from tqdm import tqdm + + root = self.computation_tree.root + + with tqdm(total=len(root.children)) as pbar: + while not self.event.wait(0.05): + node_progress = self.computation_tree.get_progress(root) + if node_progress != pbar.total: + pbar.update(node_progress - pbar.n) + + pbar.update(pbar.total - pbar.n) diff --git a/sklearn/callback/_snapshot.py b/sklearn/callback/_snapshot.py index cbf200336c749..238bc29cf8543 100644 --- a/sklearn/callback/_snapshot.py +++ b/sklearn/callback/_snapshot.py @@ -4,8 +4,6 @@ from pathlib import Path import pickle -import numpy as np - from . import BaseCallback diff --git a/sklearn/callback/_text_verbose.py b/sklearn/callback/_text_verbose.py index b857ff592c87c..0064ec97f2052 100644 --- a/sklearn/callback/_text_verbose.py +++ b/sklearn/callback/_text_verbose.py @@ -3,10 +3,11 @@ import time from . import BaseCallback -from . import AutoPropagatedMixin -class TextVerbose(BaseCallback, AutoPropagatedMixin): +class TextVerbose(BaseCallback): + + auto_propagate = True request_stopping_criterion = True def __init__(self, min_time_between_calls=0): diff --git a/sklearn/callback/tests/_utils.py b/sklearn/callback/tests/_utils.py index 84e94fce16e7c..888a5649c19bb 100644 --- a/sklearn/callback/tests/_utils.py +++ b/sklearn/callback/tests/_utils.py @@ -4,7 +4,6 @@ from sklearn.base import BaseEstimator, clone from sklearn.callback import BaseCallback -from sklearn.callback import AutoPropagatedMixin from sklearn.callback._base import _eval_callbacks_on_fit_iter_end @@ -19,9 +18,8 @@ def on_fit_iter_end(self, estimator, node, **kwargs): pass -class TestingAutoPropagatedCallback(TestingCallback, AutoPropagatedMixin): - pass - +class TestingAutoPropagatedCallback(TestingCallback): + auto_propagate = True class NotValidCallback: def on_fit_begin(self, estimator, *, X=None, y=None): diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 7d0ae56f09c31..75185df49de5f 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -847,8 +847,6 @@ def _fit_multiplicative_update( H_sum, HHt, XHt = None, None, None for n_iter in range(1, max_iter + 1): - if n_iter == 30: - raise ValueError("eh ouais") # update W # H_sum, HHt and XHt are saved and reused if not update_H W, H_sum, HHt, XHt = _multiplicative_update_w( From a218068ef0a6627e1e1436b6f7fd6c02186b1dd8 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Thu, 13 Oct 2022 09:55:20 +0200 Subject: [PATCH 16/20] wip --- sklearn/base.py | 7 ------- sklearn/callback/_base.py | 7 ------- sklearn/callback/_early_stopping.py | 8 +++++++- sklearn/callback/_progressbar.py | 5 +---- .../tests/test_base_estimator_callback_methods.py | 2 -- sklearn/callback/tests/test_callbacks.py | 2 -- 6 files changed, 8 insertions(+), 23 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 9b4e659d8647a..687c1a9954ab8 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -708,13 +708,6 @@ def _eval_callbacks_on_fit_end(self): if not callback._is_propagated(estimator=self): callback.on_fit_end() - def _eval_callbacks_on_fit_exception(self): - if not hasattr(self, "_callbacks"): - return - - for callback in self._callbacks: - callback.on_fit_exception() - def _from_reconstruction_attributes(self, *, reconstruction_attributes): """Return an as if fitted copy of this estimator diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index 65b24dc85e9bb..96cc1619651dd 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -116,10 +116,6 @@ class (e.g. LogisticRegressionCV -> LogisticRegression) """ pass - @abstractmethod - def on_fit_exception(self): - pass - @property def auto_propagate(self): """Whether or not this callback should be propagated to sub-estimators. @@ -167,9 +163,6 @@ def callback_aware(fit_method): def inner(self, *args, **kwargs): try: return fit_method(self, *args, **kwargs) - except BaseException: - self._eval_callbacks_on_fit_exception() - raise finally: self._eval_callbacks_on_fit_end() diff --git a/sklearn/callback/_early_stopping.py b/sklearn/callback/_early_stopping.py index 44a0108e04b26..dc45da6379a52 100644 --- a/sklearn/callback/_early_stopping.py +++ b/sklearn/callback/_early_stopping.py @@ -1,9 +1,13 @@ # License: BSD 3 clause +from urllib import request from . import BaseCallback class EarlyStopping(BaseCallback): + + request_from_reconstruction_attributes = True + def __init__( self, X_val=None, @@ -23,7 +27,9 @@ def on_fit_begin(self, estimator, X=None, y=None): self._no_improvement = {} self._last_monitored = {} - def on_fit_iter_end(self, *, node, **kwargs): + def on_fit_iter_end(self, *, estimator, node, **kwargs): + new_estimator = kwargs.get("from_reconstruction_attributes", None) + if node.depth != self.estimator._computation_tree.depth: return diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py index 1de13c87f2a8f..bd371bc1c3a7c 100644 --- a/sklearn/callback/_progressbar.py +++ b/sklearn/callback/_progressbar.py @@ -29,7 +29,7 @@ class ProgressBar(BaseCallback): Parameters ---------- - backend: {"rich"}, default="rich" + backend: {"rich", "tqdm"}, default="rich" The backend for the progress bars display. max_depth_show : int, default=None @@ -81,9 +81,6 @@ def on_fit_end(self): self._stop_event.set() self.progress_monitor.join() - def on_fit_exception(self): - pass - def __getstate__(self): state = self.__dict__.copy() if "_stop_event" in state: diff --git a/sklearn/callback/tests/test_base_estimator_callback_methods.py b/sklearn/callback/tests/test_base_estimator_callback_methods.py index ea750abbcf890..c77d88b68ce3d 100644 --- a/sklearn/callback/tests/test_base_estimator_callback_methods.py +++ b/sklearn/callback/tests/test_base_estimator_callback_methods.py @@ -9,8 +9,6 @@ from sklearn.callback.tests._utils import Estimator from sklearn.callback.tests._utils import MetaEstimator -from sklearn.callback import ProgressBar - @pytest.mark.parametrize("callbacks", [ diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index 2a457d354077e..fb99003eb3b09 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -66,5 +66,3 @@ def test_snapshot_meta_estimator(n_jobs, prefer): # We kept last 5 snapshots out of 20 iterations. # This one is the 16 + i-th. assert loaded_estimator.n_iter_ == 16 + i - - From f794694ce9fea32213503d926a314511bade5e8e Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Thu, 13 Oct 2022 16:27:27 +0200 Subject: [PATCH 17/20] update poor_score --- doc/developers/develop.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 0e4b8258476da..3329649d20513 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -553,8 +553,9 @@ preserves_dtype (default=``[np.float64]``) poor_score (default=False) whether the estimator fails to provide a "reasonable" test-set score, which - currently for regression is an R2 of 0.5 on a subset of the boston housing - dataset, and for classification an accuracy of 0.83 on + currently for regression is an R2 of 0.5 on ``make_regression(n_samples=200, + n_features=10, n_informative=1, bias=5.0, noise=20, random_state=42)``, and + for classification an accuracy of 0.83 on ``make_blobs(n_samples=300, random_state=0)``. These datasets and values are based on current estimators in sklearn and might be replaced by something more systematic. From 37e569b13a7202aa79d7b5aa1b8d30de323139ca Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 21 Jun 2023 10:24:55 +0200 Subject: [PATCH 18/20] wip --- sklearn/base.py | 60 +++++++++----- sklearn/callback/_base.py | 27 +++---- sklearn/callback/_convergence_monitor.py | 11 ++- sklearn/callback/_early_stopping.py | 65 +++++++++++----- sklearn/callback/_progressbar.py | 6 +- sklearn/callback/_text_verbose.py | 1 - sklearn/callback/tests/_utils.py | 13 +++- .../test_base_estimator_callback_methods.py | 7 +- sklearn/callback/tests/test_callbacks.py | 13 +++- .../callback/tests/test_computation_tree.py | 5 +- sklearn/decomposition/_nmf.py | 78 ++++++++++++------- sklearn/linear_model/_logistic.py | 8 +- sklearn/model_selection/_search.py | 17 +++- sklearn/model_selection/_validation.py | 2 +- sklearn/pipeline.py | 4 +- 15 files changed, 205 insertions(+), 112 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index feb0fb4e31a57..9c802b536f89d 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -3,7 +3,6 @@ # Author: Gael Varoquaux # License: BSD 3 clause -from codecs import ignore_errors import copy import functools import warnings @@ -659,7 +658,7 @@ def _set_callbacks(self, callbacks): Returns ------- self : estimator instance - The estimator instance itself. + The estimator instance itself. """ if not isinstance(callbacks, list): callbacks = [callbacks] @@ -705,9 +704,7 @@ def _propagate_callbacks(self, sub_estimator, *, parent_node): return propagated_callbacks = [ - callback - for callback in self._callbacks - if callback.auto_propagate + callback for callback in self._callbacks if callback.auto_propagate ] if not propagated_callbacks: @@ -749,28 +746,50 @@ def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None): parent_node=getattr(self, "_parent_ct_node", None), ) - if hasattr(self, "_callbacks"): - # - CallbackContext(self._callbacks, finalizer=partial(rmtree, ignore_errors=True), finalizer_args=self._computation_tree.tree_dir) + if not hasattr(self, "_callbacks"): + return self._computation_tree.root, None, None, None, None + + X_val, y_val = None, None - # - file_path = self._computation_tree.tree_dir / "computation_tree.pkl" - with open(file_path, "wb") as f: - pickle.dump(self._computation_tree, f) + if any(callback.request_validation_split for callback in self._callbacks): + splitter = next( + callback.validation_split for callback in self._callbacks if hasattr(callback, "validation_split") + ) - # Only call the on_fit_begin method of callbacks that are not - # propagated from a meta-estimator. - for callback in self._callbacks: - if not callback._is_propagated(estimator=self): - callback.on_fit_begin(estimator=self, X=X, y=y) + train, val = next(splitter.split(X)) + if X is not None: + X, X_val = X[train], X[val] + if y is not None: + y, y_val = y[train], y[val] + + # + CallbackContext( + self._callbacks, + finalizer=partial(rmtree, ignore_errors=True), + finalizer_args=self._computation_tree.tree_dir, + ) - return self._computation_tree.root + # + file_path = self._computation_tree.tree_dir / "computation_tree.pkl" + with open(file_path, "wb") as f: + pickle.dump(self._computation_tree, f) + + # Only call the on_fit_begin method of callbacks that are not + # propagated from a meta-estimator. + for callback in self._callbacks: + if not callback._is_propagated(estimator=self): + callback.on_fit_begin(estimator=self, X=X, y=y) + + return self._computation_tree.root, X, y, X_val, y_val def _eval_callbacks_on_fit_end(self): """Evaluate the on_fit_end method of the callbacks""" if not hasattr(self, "_callbacks"): return + if not hasattr(self, "_computation_tree"): + return + self._computation_tree._tree_status[0] = True # Only call the on_fit_end method of callbacks that are not @@ -1309,7 +1328,10 @@ def wrapper(estimator, *args, **kwargs): prefer_skip_nested_validation or global_skip_validation ) ): - return fit_method(estimator, *args, **kwargs) + try: + return fit_method(estimator, *args, **kwargs) + finally: + estimator._eval_callbacks_on_fit_end() return wrapper diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py index 96cc1619651dd..a07115f7e4e0c 100644 --- a/sklearn/callback/_base.py +++ b/sklearn/callback/_base.py @@ -30,9 +30,9 @@ def _eval_callbacks_on_fit_iter_end(**kwargs): estimator._computation_tree._tree_status[node.tree_status_idx] = True - # stopping_criterion and reconstruction_attributes can be costly to compute. They - # are passed as lambdas for lazy evaluation. We only actually compute them if a - # callback requests it. + # stopping_criterion and reconstruction_attributes can be costly to compute. + # They are passed as lambdas for lazy evaluation. We only actually + # compute them if a callback requests it. if any(cb.request_stopping_criterion for cb in estimator._callbacks): kwarg = kwargs.pop("stopping_criterion", lambda: None)() kwargs["stopping_criterion"] = kwarg @@ -51,7 +51,7 @@ class BaseCallback(ABC): def on_fit_begin(self, estimator, *, X=None, y=None): """Method called at the beginning of the fit method of the estimator - Only called + Only called Parameters ---------- @@ -141,11 +141,15 @@ def request_stopping_criterion(self): @property def request_from_reconstruction_attributes(self): return False + + @property + def request_validation_split(self): + return False def _set_context(self, context): if not hasattr(self, "_callback_contexts"): self._callback_contexts = [] - + self._callback_contexts.append(context) @@ -154,16 +158,3 @@ def __init__(self, callbacks, finalizer, finalizer_args): for callback in callbacks: callback._set_context(self) weakref.finalize(self, finalizer, finalizer_args) - - -def callback_aware(fit_method): - """Decorator ... - """ - @wraps(fit_method) - def inner(self, *args, **kwargs): - try: - return fit_method(self, *args, **kwargs) - finally: - self._eval_callbacks_on_fit_end() - - return inner diff --git a/sklearn/callback/_convergence_monitor.py b/sklearn/callback/_convergence_monitor.py index ac04335e04661..98fec496d6eb7 100644 --- a/sklearn/callback/_convergence_monitor.py +++ b/sklearn/callback/_convergence_monitor.py @@ -33,7 +33,13 @@ class ConvergenceMonitor(BaseCallback): request_reconstruction_attributes = True - def __init__(self, *, monitor="objective_function", X_val=None, y_val=None): + def __init__( + self, + *, + monitor="objective_function", + on="val", + higher_is_better=False, + ): if monitor == "objective_function": self._monitor = "objective_function" else: @@ -41,9 +47,6 @@ def __init__(self, *, monitor="objective_function", X_val=None, y_val=None): if self._monitor is None: raise ValueError(f"unknown metric {monitor}") - self.X_val = X_val - self.y_val = y_val - self._data_file = Path(mkdtemp()) / "convergence_monitor.csv" def on_fit_begin(self, estimator, *, X=None, y=None): diff --git a/sklearn/callback/_early_stopping.py b/sklearn/callback/_early_stopping.py index dc45da6379a52..6d408dda8c960 100644 --- a/sklearn/callback/_early_stopping.py +++ b/sklearn/callback/_early_stopping.py @@ -1,54 +1,77 @@ # License: BSD 3 clause -from urllib import request from . import BaseCallback class EarlyStopping(BaseCallback): - request_from_reconstruction_attributes = True def __init__( self, - X_val=None, - y_val=None, monitor="objective_function", + on="validation_set", + higher_is_better=False, + validation_split="auto", max_no_improvement=10, - tol=1e-2, + threshold=1e-2, ): - self.X_val = X_val - self.y_val = y_val + from ..model_selection import KFold + self.validation_split = validation_split + if validation_split == "auto": + self.validation_split = KFold(n_splits=5, shuffle=True, random_state=42) self.monitor = monitor + self.on = on + self.higher_is_better = higher_is_better self.max_no_improvement = max_no_improvement - self.tol = tol + self.threshold = threshold def on_fit_begin(self, estimator, X=None, y=None): - self.estimator = estimator self._no_improvement = {} self._last_monitored = {} + self.early_stopped_ = None def on_fit_iter_end(self, *, estimator, node, **kwargs): - new_estimator = kwargs.get("from_reconstruction_attributes", None) - - if node.depth != self.estimator._computation_tree.depth: + if node.depth != estimator._computation_tree.depth: return + reconstructed_estimator = kwargs.pop("from_reconstruction_attributes") + data = kwargs.pop("data") + + X = data["X_val"] if self.on == "validation_set" else data["X"] + y = data["y_val"] if self.on == "validation_set" else data["y"] + if self.monitor == "objective_function": - objective_function = kwargs.get("objective_function", None) - monitored, *_ = objective_function(self.X_val) - elif self.monitor == "TODO": - pass - - if node.parent not in self._last_monitored or monitored < self._last_monitored[ - node.parent - ] * (1 - self.tol): + new_monitored, *_ = reconstructed_estimator.objective_function(X, y, normalize=True) + elif callable(self.monitor): + new_monitored = self.monitor(reconstructed_estimator, X, y) + elif self.monitor is None or isinstance(self.monitor, str): + from ..metrics import check_scoring + scorer = check_scoring(estimator, self.monitor) + new_monitored = scorer(estimator, X, y) + + if self._score_improved(node, new_monitored): self._no_improvement[node.parent] = 0 - self._last_monitored[node.parent] = monitored + self._last_monitored[node.parent] = new_monitored else: self._no_improvement[node.parent] += 1 if self._no_improvement[node.parent] >= self.max_no_improvement: + self.early_stopped_ = node.idx + return True + + def _score_improved(self, node, new_monitored): + if node.parent not in self._last_monitored: return True + + last_monitored = self._last_monitored[node.parent] + if self.higher_is_better: + return new_monitored > last_monitored * (1 + self.threshold) + else: + return new_monitored < last_monitored * (1 - self.threshold) def on_fit_end(self): pass + + @property + def request_validation_split(self): + return self.on == "val" diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py index bd371bc1c3a7c..738e8f897ce4a 100644 --- a/sklearn/callback/_progressbar.py +++ b/sklearn/callback/_progressbar.py @@ -44,7 +44,9 @@ class ProgressBar(BaseCallback): def __init__(self, backend="rich", max_depth_show=None, max_depth_keep=None): if backend not in ("rich", "tqdm"): - raise ValueError(f"backend should be 'rich' or 'tqdm', got {self.backend} instead.") + raise ValueError( + f"backend should be 'rich' or 'tqdm', got {self.backend} instead." + ) _check_backend_support(backend, caller_name="Progressbar") self.backend = backend @@ -96,10 +98,12 @@ def __getstate__(self): try: from rich.progress import Progress + class _Progress(Progress): def get_renderables(self): table = self.make_tasks_table(getattr(self, "_ordered_tasks", [])) yield table + except: pass diff --git a/sklearn/callback/_text_verbose.py b/sklearn/callback/_text_verbose.py index 0064ec97f2052..93f783a297d30 100644 --- a/sklearn/callback/_text_verbose.py +++ b/sklearn/callback/_text_verbose.py @@ -6,7 +6,6 @@ class TextVerbose(BaseCallback): - auto_propagate = True request_stopping_criterion = True diff --git a/sklearn/callback/tests/_utils.py b/sklearn/callback/tests/_utils.py index 888a5649c19bb..f61ffc4077dff 100644 --- a/sklearn/callback/tests/_utils.py +++ b/sklearn/callback/tests/_utils.py @@ -21,6 +21,7 @@ def on_fit_iter_end(self, estimator, node, **kwargs): class TestingAutoPropagatedCallback(TestingCallback): auto_propagate = True + class NotValidCallback: def on_fit_begin(self, estimator, *, X=None, y=None): pass @@ -37,7 +38,7 @@ def __init__(self, max_iter=20): self.max_iter = max_iter def fit(self, X, y): - root = self._eval_callbacks_on_fit_begin( + root, X, y, X_val, y_val = self._eval_callbacks_on_fit_begin( levels=[ {"descr": "fit", "max_iter": self.max_iter}, {"descr": "iter", "max_iter": None}, @@ -54,6 +55,7 @@ def fit(self, X, y): self._from_reconstruction_attributes, reconstruction_attributes=lambda: {"n_iter_": i + 1}, ), + data={"X": X, "y": y, "X_val": X_val, "y_val": y_val"}, ): break @@ -63,6 +65,9 @@ def fit(self, X, y): return self + def objective_function(self, X, y=None): + return 0, 0, 0 + class MetaEstimator(BaseEstimator): def __init__( @@ -75,7 +80,7 @@ def __init__( self.prefer = prefer def fit(self, X, y): - root = self._eval_callbacks_on_fit_begin( + root, *_ = self._eval_callbacks_on_fit_begin( levels=[ {"descr": "fit", "max_iter": self.n_outer}, {"descr": "outer", "max_iter": self.n_inner}, @@ -93,7 +98,7 @@ def fit(self, X, y): self._eval_callbacks_on_fit_end() return self - + def _func(self, estimator, X, y, parent_node, i): for j, node in enumerate(parent_node.children): est = clone(estimator) @@ -104,4 +109,4 @@ def _func(self, estimator, X, y, parent_node, i): _eval_callbacks_on_fit_iter_end(estimator=self, node=parent_node) - return \ No newline at end of file + return diff --git a/sklearn/callback/tests/test_base_estimator_callback_methods.py b/sklearn/callback/tests/test_base_estimator_callback_methods.py index c77d88b68ce3d..01669a5494dde 100644 --- a/sklearn/callback/tests/test_base_estimator_callback_methods.py +++ b/sklearn/callback/tests/test_base_estimator_callback_methods.py @@ -10,12 +10,13 @@ from sklearn.callback.tests._utils import MetaEstimator -@pytest.mark.parametrize("callbacks", +@pytest.mark.parametrize( + "callbacks", [ TestingCallback(), [TestingCallback()], [TestingCallback(), TestingAutoPropagatedCallback()], - ] + ], ) def test_set_callbacks(callbacks): """Sanity check for the _set_callbacks method""" @@ -49,7 +50,7 @@ def test_propagate_callbacks(): assert hasattr(sub_estimator, "_parent_ct_node") assert not_propagated_callback not in sub_estimator._callbacks - assert propagated_callback in sub_estimator._callbacks + assert propagated_callback in sub_estimator._callbacks def test_propagate_callback_no_callback(): diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index fb99003eb3b09..aa79503545acb 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -20,13 +20,22 @@ y = np.zeros(100, dtype=int) -@pytest.mark.parametrize("Callback", [ConvergenceMonitor, EarlyStopping, ProgressBar, Snapshot, TextVerbose,]) +@pytest.mark.parametrize( + "Callback", + [ + ConvergenceMonitor, + EarlyStopping, + ProgressBar, + Snapshot, + TextVerbose, + ], +) def test_callback_doesnt_hold_ref_to_estimator(Callback): callback = Callback() est = Estimator() callback_refcount = sys.getrefcount(callback) est_refcount = sys.getrefcount(est) - + est._set_callbacks(callbacks=callback) est.fit(X, y) # estimator has a ref on the callback but the callback has no ref to the estimator diff --git a/sklearn/callback/tests/test_computation_tree.py b/sklearn/callback/tests/test_computation_tree.py index 902175b71a250..2fe3766eba489 100644 --- a/sklearn/callback/tests/test_computation_tree.py +++ b/sklearn/callback/tests/test_computation_tree.py @@ -87,7 +87,10 @@ def test_get_ancestors(): ancestors = node.get_ancestors(include_ancestor_trees=False) assert ancestors == [ - node, node.parent, node.parent.parent, node.parent.parent.parent + node, + node.parent, + node.parent.parent, + node.parent.parent.parent, ] assert [n.idx for n in ancestors] == expected_node_indices assert computation_tree.root in ancestors diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 003dbec919033..a3eac9c7e3468 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -36,7 +36,6 @@ ) from ..utils import metadata_routing from ..callback._base import _eval_callbacks_on_fit_iter_end -from ..callback._base import callback_aware EPSILON = np.finfo(np.float32).eps @@ -403,6 +402,7 @@ def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle, random_state): def _fit_coordinate_descent( X, + X_val, W, H, tol=1e-4, @@ -429,6 +429,9 @@ def _fit_coordinate_descent( X : array-like of shape (n_samples, n_features) Constant matrix. + X_val : array-like of shape (n_samples_val, n_features) + Constant validation matrix. + W : array-like of shape (n_samples, n_components) Initial guess for the solution. @@ -469,6 +472,12 @@ def _fit_coordinate_descent( results across multiple function calls. See :term:`Glossary `. + estimator : estimator instance, default=None + The estimator calling this function. Used by callbacks. + + parent_node : ComputationNode instance, default=None + The parent node of the current node. Used by callbacks. + Returns ------- W : ndarray of shape (n_samples, n_components) @@ -490,6 +499,8 @@ def _fit_coordinate_descent( # so W and Ht are both in C order in memory Ht = check_array(H.T, order="C") X = check_array(X, accept_sparse="csr") + if X_val is not None: + X_val = check_array(X_val, accept_sparse="csr") rng = check_random_state(random_state) @@ -527,6 +538,7 @@ def _fit_coordinate_descent( "reconstruction_err_": _beta_divergence(X, W, Ht.T, 2, True), }, ), + data={"X": X, "y": None, "X_val": X_val, "y_val": None}, ): break @@ -748,6 +760,7 @@ def _multiplicative_update_h( def _fit_multiplicative_update( X, + X_val, W, H, beta_loss="frobenius", @@ -773,6 +786,9 @@ def _fit_multiplicative_update( X : array-like of shape (n_samples, n_features) Constant input matrix. + X_val : array-like of shape (n_samples_val, n_features) + Constant validation matrix. + W : array-like of shape (n_samples, n_components) Initial guess for the solution. @@ -813,6 +829,12 @@ def _fit_multiplicative_update( verbose : int, default=0 The verbosity level. + estimator : estimator instance, default=None + The estimator calling this function. Used by callbacks. + + parent_node : ComputationNode instance, default=None + The parent node of the current node. Used by callbacks. + Returns ------- W : ndarray of shape (n_samples, n_components) @@ -909,6 +931,7 @@ def _fit_multiplicative_update( "reconstruction_err_": _beta_divergence(X, W, H, beta_loss, True), }, ), + data={"X": X, "y": None, "X_val": X_val, "y_val": None}, ): break @@ -1340,6 +1363,28 @@ def inverse_transform(self, Xt=None, W=None): check_is_fitted(self) return Xt @ self.components_ + + def objective_function(self, X, y=None, *, W=None, H=None, normalize=False): + if W is None: + W = self.transform(X) + if H is None: + H = self.components_ + + data_fit = _beta_divergence(X, W, H, self._beta_loss) + + l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X) + penalization = ( + l1_reg_W * W.sum() + + l1_reg_H * H.sum() + + l2_reg_W * (W**2).sum() + + l2_reg_H * (H**2).sum() + ) + + if normalize: + data_fit /= X.shape[0] + penalization /= X.shape[0] + + return data_fit + penalization, data_fit, penalization @property def _n_features_out(self): @@ -1617,7 +1662,6 @@ def _check_params(self, X): return self - @callback_aware @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None, W=None, H=None): """Learn a NMF model for the data X and returns the transformed data. @@ -1650,7 +1694,7 @@ def fit_transform(self, X, y=None, W=None, H=None): X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32] ) - root = self._eval_callbacks_on_fit_begin( + root, X, _, X_val, _ = self._eval_callbacks_on_fit_begin( levels=[ {"descr": "fit", "max_iter": self.max_iter}, {"descr": "iter", "max_iter": None}, @@ -1658,7 +1702,7 @@ def fit_transform(self, X, y=None, W=None, H=None): X=X, ) - W, H, n_iter = self._fit_transform(X, W=W, H=H, parent_node=root) + W, H, n_iter = self._fit_transform(X, X_val, W=W, H=H, parent_node=root) self.reconstruction_err_ = _beta_divergence( X, W, H, self._beta_loss, square_root=True @@ -1672,7 +1716,7 @@ def fit_transform(self, X, y=None, W=None, H=None): return W def _fit_transform( - self, X, y=None, W=None, H=None, update_H=True, parent_node=None + self, X, X_val=None, W=None, H=None, update_H=True, parent_node=None ): """Learn a NMF model for the data X and returns the transformed data. @@ -1733,6 +1777,7 @@ def _fit_transform( if self.solver == "cd": W, H, n_iter = _fit_coordinate_descent( X, + X_val, W, H, self.tol, @@ -1751,6 +1796,7 @@ def _fit_transform( elif self.solver == "mu": W, H, n_iter, *_ = _fit_multiplicative_update( X, + X_val, W, H, self._beta_loss, @@ -2439,28 +2485,6 @@ def partial_fit(self, X, y=None, W=None, H=None): return self - def objective_function(self, X, y=None, *, W=None, H=None, normalize=False): - if W is None: - W = self.transform(X) - if H is None: - H = self.components_ - - data_fit = _beta_divergence(X, W, H, self._beta_loss) - - l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._scale_regularization(X) - penalization = ( - l1_reg_W * W.sum() - + l1_reg_H * H.sum() - + l2_reg_W * (W ** 2).sum() - + l2_reg_H * (H ** 2).sum() - ) - - if normalize: - data_fit /= X.shape[0] - penalization /= X.shape[0] - - return data_fit + penalization, data_fit, penalization - @property def _n_features_out(self): """Number of transformed output features.""" diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 732f3c25a93eb..b949a35b28d02 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -449,9 +449,7 @@ def _logistic_regression_path( node = ( None if parent_node is None - else parent_node - if len(Cs) == 1 - else parent_node.children + else parent_node if len(Cs) == 1 else parent_node.children ) if solver == "lbfgs": @@ -1324,7 +1322,9 @@ def fit(self, X, y, sample_weight=None): {"descr": "class", "max_iter": self.max_iter}, {"descr": "iter", "max_iter": None}, ] - root = self._eval_callbacks_on_fit_begin(levels=levels, X=X, y=y) + root, X, y, X_val, y_val = self._eval_callbacks_on_fit_begin( + levels=levels, X=X, y=y + ) # distinguish between multinomial and ovr nodes = [root] if len(classes_) == 1 else root.children diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 9b9450cfee0ec..9ec5ce8414201 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -831,7 +831,9 @@ def fit(self, X, y=None, *, groups=None, **fit_params): all_out = [] all_more_results = defaultdict(list) - def evaluate_candidates(candidate_params, cv=None, more_results=None, parent_node=None): + def evaluate_candidates( + candidate_params, cv=None, more_results=None, parent_node=None + ): cv = cv or cv_orig candidate_params = list(candidate_params) n_candidates = len(candidate_params) @@ -863,8 +865,16 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None, parent_nod caller=self, node=node, ) - for ((cand_idx, parameters), (split_idx, (train, test))), node in zip(product( - enumerate(candidate_params), enumerate(cv.split(X, y, groups))), nodes) + for ( + (cand_idx, parameters), + (split_idx, (train, test)), + ), node in zip( + product( + enumerate(candidate_params), + enumerate(cv.split(X, y, groups)), + ), + nodes, + ) ) if len(out) < 1: @@ -1477,6 +1487,7 @@ def _run_search(self, evaluate_candidates): """Search all candidates in param_grid""" evaluate_candidates(self._param_grid, parent_node=self._computation_tree.root) + class RandomizedSearchCV(BaseSearchCV): """Randomized search on hyper parameters. diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index cb3563723027c..30fc160880d89 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -723,7 +723,7 @@ def _fit_and_score( cloned_parameters[k] = clone(v, safe=False) estimator = estimator.set_params(**cloned_parameters) - + if caller is not None: caller._propagate_callbacks(estimator, parent_node=node) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 9002bfcb0d8ad..0eb02009ecf91 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -34,7 +34,6 @@ from .utils.parallel import delayed, Parallel from .exceptions import NotFittedError from .callback._base import _eval_callbacks_on_fit_iter_end -from .callback._base import callback_aware __all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"] @@ -357,7 +356,7 @@ def _fit(self, X, y=None, **fit_params_steps): # Setup the memory memory = check_memory(self.memory) - root = self._eval_callbacks_on_fit_begin( + root, *_ = self._eval_callbacks_on_fit_begin( levels=[ {"descr": "fit", "max_iter": len(self.steps)}, {"descr": "step", "max_iter": None}, @@ -405,7 +404,6 @@ def _fit(self, X, y=None, **fit_params_steps): return X - @callback_aware @_fit_context( # estimators in Pipeline.steps are not validated yet prefer_skip_nested_validation=False From d7208facafece078b0c8e687dc066b432eac2cbc Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Thu, 29 Jun 2023 15:04:29 +0200 Subject: [PATCH 19/20] wip --- sklearn/base.py | 2 +- sklearn/callback/__init__.py | 4 +- sklearn/callback/_convergence_monitor.py | 126 ------------------ sklearn/callback/_early_stopping.py | 16 +-- sklearn/callback/_monitoring.py | 111 +++++++++++++++ sklearn/callback/_text_verbose.py | 4 - sklearn/callback/tests/_utils.py | 18 +-- .../test_base_estimator_callback_methods.py | 2 +- sklearn/callback/tests/test_callbacks.py | 4 +- sklearn/decomposition/_nmf.py | 2 - .../gradient_boosting.py | 46 +++++++ sklearn/linear_model/_logistic.py | 2 - sklearn/model_selection/_search.py | 2 - sklearn/pipeline.py | 2 - 14 files changed, 181 insertions(+), 160 deletions(-) delete mode 100644 sklearn/callback/_convergence_monitor.py create mode 100644 sklearn/callback/_monitoring.py diff --git a/sklearn/base.py b/sklearn/base.py index 9c802b536f89d..09c76277b986e 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -119,7 +119,7 @@ def _clone_parametrized(estimator, *, safe=True): # copy callbacks if hasattr(estimator, "_callbacks"): - new_object._callbacks = clone(estimator._callbacks, safe=False) + new_object._callbacks = estimator._callbacks # quick sanity check of the parameters of the clone for name in new_object_params: diff --git a/sklearn/callback/__init__.py b/sklearn/callback/__init__.py index 9767411b6c934..b74126e1ce327 100644 --- a/sklearn/callback/__init__.py +++ b/sklearn/callback/__init__.py @@ -4,7 +4,7 @@ from ._computation_tree import ComputationNode from ._computation_tree import ComputationTree from ._computation_tree import load_computation_tree -from ._convergence_monitor import ConvergenceMonitor +from ._monitoring import Monitoring from ._early_stopping import EarlyStopping from ._progressbar import ProgressBar from ._snapshot import Snapshot @@ -15,7 +15,7 @@ "ComputationNode", "ComputationTree", "load_computation_tree", - "ConvergenceMonitor", + "Monitoring", "EarlyStopping", "ProgressBar", "Snapshot", diff --git a/sklearn/callback/_convergence_monitor.py b/sklearn/callback/_convergence_monitor.py deleted file mode 100644 index 98fec496d6eb7..0000000000000 --- a/sklearn/callback/_convergence_monitor.py +++ /dev/null @@ -1,126 +0,0 @@ -# License: BSD 3 clause - -from copy import copy -from pathlib import Path -from tempfile import mkdtemp - -import matplotlib.pyplot as plt -import pandas as pd - -from . import BaseCallback - -# import ..metrics as metrics - - -class ConvergenceMonitor(BaseCallback): - """Monitor model convergence. - - Parameters - ---------- - monitor : - - X_val : ndarray, default=None - Validation data - - y_val : ndarray, default=None - Validation target - - Attributes - ---------- - data : pandas.DataFrame - The monitored quantities at each iteration. - """ - - request_reconstruction_attributes = True - - def __init__( - self, - *, - monitor="objective_function", - on="val", - higher_is_better=False, - ): - if monitor == "objective_function": - self._monitor = "objective_function" - else: - self._monitor = getattr(metrics, monitor, None) - if self._monitor is None: - raise ValueError(f"unknown metric {monitor}") - - self._data_file = Path(mkdtemp()) / "convergence_monitor.csv" - - def on_fit_begin(self, estimator, *, X=None, y=None): - self.estimator = estimator - self.X_train = X - self.y_train = y - - def on_fit_iter_end(self, *, estimator, node, **kwargs): - reconstruction_attributes = kwargs.get("reconstruction_attributes", None) - if reconstruction_attributes is None: - return - - new_estimator = copy(estimator) - for key, val in reconstruction_attributes.items(): - setattr(new_estimator, key, val) - - # if self._monitor = - - obj_train, *_ = new_estimator.objective_function( - self.X_train, self.y_train, normalize=True - ) - if self.X_val is not None: - obj_val, *_ = new_estimator.objective_function( - self.X_val, self.y_val, normalize=True - ) - else: - obj_val = None - - ancestors = node.get_ancestors()[:0:-1] - ancestors_desc = [ - f"{n.computation_tree.estimator_name}-{n.description}" for n in ancestors - ] - ancestors_idx = [f"{n.idx}" for n in ancestors] - - if not self._data_file.exists(): - with open(self._data_file, "w") as f: - f.write( - f"{','.join(ancestors_desc)},iteration,time,obj_train,obj_val\n" - ) - - with open(self._data_file, "a") as f: - f.write( - f"{','.join(ancestors_idx)},{node.idx},{curr_time},{obj_train},{obj_val}\n" - ) - - def on_fit_end(self): - pass - - def get_data(self): - if not hasattr(self, "data"): - self.data = pd.read_csv(self._data_file) - return self.data - - def plot(self, x="iteration"): - data = self.get_data() - - # all columns but iteration, time, obj_train, obj_val - group_by_columns = list(data.columns[:-4]) - groups = data.groupby(group_by_columns) - - for key in groups.groups.keys(): - group = groups.get_group(key) - fig, ax = plt.subplots() - - ax.plot(group[x], group["obj_train"], label="obj_train") - if self.X_val is not None: - ax.plot(group[x], group["obj_val"], label="obj_val") - - if x == "iteration": - x_label = "Number of iterations" - elif x == "time": - x_label = "Time (s)" - ax.set_xlabel(x_label) - ax.set_ylabel("objective function") - - ax.legend() - plt.show() diff --git a/sklearn/callback/_early_stopping.py b/sklearn/callback/_early_stopping.py index 6d408dda8c960..1ad9ad8437d37 100644 --- a/sklearn/callback/_early_stopping.py +++ b/sklearn/callback/_early_stopping.py @@ -25,13 +25,14 @@ def __init__( self.max_no_improvement = max_no_improvement self.threshold = threshold - def on_fit_begin(self, estimator, X=None, y=None): self._no_improvement = {} self._last_monitored = {} - self.early_stopped_ = None + + def on_fit_begin(self, estimator, X=None, y=None): + pass def on_fit_iter_end(self, *, estimator, node, **kwargs): - if node.depth != estimator._computation_tree.depth: + if node.depth != node.computation_tree.depth: return reconstructed_estimator = kwargs.pop("from_reconstruction_attributes") @@ -46,8 +47,8 @@ def on_fit_iter_end(self, *, estimator, node, **kwargs): new_monitored = self.monitor(reconstructed_estimator, X, y) elif self.monitor is None or isinstance(self.monitor, str): from ..metrics import check_scoring - scorer = check_scoring(estimator, self.monitor) - new_monitored = scorer(estimator, X, y) + scorer = check_scoring(reconstructed_estimator, self.monitor) + new_monitored = scorer(reconstructed_estimator, X, y) if self._score_improved(node, new_monitored): self._no_improvement[node.parent] = 0 @@ -56,9 +57,8 @@ def on_fit_iter_end(self, *, estimator, node, **kwargs): self._no_improvement[node.parent] += 1 if self._no_improvement[node.parent] >= self.max_no_improvement: - self.early_stopped_ = node.idx return True - + def _score_improved(self, node, new_monitored): if node.parent not in self._last_monitored: return True @@ -74,4 +74,4 @@ def on_fit_end(self): @property def request_validation_split(self): - return self.on == "val" + return self.on == "validation_set" diff --git a/sklearn/callback/_monitoring.py b/sklearn/callback/_monitoring.py new file mode 100644 index 0000000000000..2eb7c8de5885f --- /dev/null +++ b/sklearn/callback/_monitoring.py @@ -0,0 +1,111 @@ +# License: BSD 3 clause + +# import os +from pathlib import Path +from tempfile import NamedTemporaryFile, TemporaryDirectory +from tempfile import mkdtemp + +import matplotlib.pyplot as plt +import pandas as pd + +from . import BaseCallback + + +class Monitoring(BaseCallback): + """Monitor model convergence. + + Parameters + ---------- + monitor : + + X_val : ndarray, default=None + Validation data + + y_val : ndarray, default=None + Validation target + + Attributes + ---------- + data : pandas.DataFrame + The monitored quantities at each iteration. + """ + + request_from_reconstruction_attributes = True + + def __init__( + self, + *, + monitor="objective_function", + on="validation_set", + validation_split="auto", + ): + from ..model_selection import KFold + self.validation_split = validation_split + if validation_split == "auto": + self.validation_split = KFold(n_splits=5, shuffle=True, random_state=42) + self.monitor = monitor + self.on = on + + self._data_dir = TemporaryDirectory() + self._data_files = {} + + if isinstance(self.monitor, str): + self.monitor_name = self.monitor + elif callable(self.monitor): + self.monitor_name = self.monitor.__name__ + + def on_fit_begin(self, estimator, *, X=None, y=None): + fname = Path(self._data_dir.name) / f"{estimator._computation_tree.uid}.csv" + with open(fname, "w") as file: + file.write(f"iteration,{self.monitor_name}_train,{self.monitor_name}_val\n") + self._data_files[estimator._computation_tree] = fname + + def on_fit_iter_end(self, *, estimator, node, from_reconstruction_attributes, data, **kwargs): + if node.depth != node.computation_tree.depth: + return + + new_estimator = from_reconstruction_attributes + + X, y, X_val, y_val = data["X"], data["y"], data["X_val"], data["y_val"] + + if self.monitor == "objective_function": + new_monitored_train, *_ = new_estimator.objective_function(X, y, normalize=True) + if X_val is not None: + new_monitored_val, *_ = new_estimator.objective_function(X_val, y_val, normalize=True) + elif callable(self.monitor): + new_monitored_train = self.monitor(new_estimator, X, y) + if X_val is not None: + new_monitored_val = self.monitor(new_estimator, X_val, y_val) + elif self.monitor is None or isinstance(self.monitor, str): + from ..metrics import check_scoring + scorer = check_scoring(new_estimator, self.monitor) + new_monitored_train = scorer(new_estimator, X, y) + if X_val is not None: + new_monitored_val = scorer(new_estimator, X_val, y_val) + + if X_val is None: + new_monitored_val = None + + with open(self._data_files[node.computation_tree], "a") as f: + f.write(f"{node.idx},{new_monitored_train},{new_monitored_val}\n") + + def on_fit_end(self): + pass + + # @property + # def data(self): + + def plot(self): + data_files = [p for p in Path(self._data_dir.name).iterdir() if p.is_file()] + for f in data_files: + data = pd.read_csv(f) + fig, ax = plt.subplots() + ax.plot(data["iteration"], data[f"{self.monitor_name}_train"], label=f"train set") + if self.on != "train_set": + ax.plot(data["iteration"], data[f"{self.monitor_name}_val"], label=f"validation set") + + ax.set_xlabel("Number of iterations") + ax.set_ylabel(self.monitor_name) + + ax.legend() + plt.show() diff --git a/sklearn/callback/_text_verbose.py b/sklearn/callback/_text_verbose.py index 93f783a297d30..9773f1c8a6f51 100644 --- a/sklearn/callback/_text_verbose.py +++ b/sklearn/callback/_text_verbose.py @@ -9,11 +9,7 @@ class TextVerbose(BaseCallback): auto_propagate = True request_stopping_criterion = True - def __init__(self, min_time_between_calls=0): - self.min_time_between_calls = min_time_between_calls - def on_fit_begin(self, estimator, X=None, y=None): - self.estimator = estimator self._start_time = time.perf_counter() def on_fit_iter_end(self, *, node, **kwargs): diff --git a/sklearn/callback/tests/_utils.py b/sklearn/callback/tests/_utils.py index f61ffc4077dff..4144ba3ddf3f3 100644 --- a/sklearn/callback/tests/_utils.py +++ b/sklearn/callback/tests/_utils.py @@ -2,7 +2,7 @@ from joblib.parallel import Parallel, delayed -from sklearn.base import BaseEstimator, clone +from sklearn.base import BaseEstimator, clone, _fit_context from sklearn.callback import BaseCallback from sklearn.callback._base import _eval_callbacks_on_fit_iter_end @@ -34,9 +34,12 @@ def on_fit_iter_end(self, estimator, node, **kwargs): class Estimator(BaseEstimator): + _parameter_constraints = {} + def __init__(self, max_iter=20): self.max_iter = max_iter + @_fit_context(prefer_skip_nested_validation=False) def fit(self, X, y): root, X, y, X_val, y_val = self._eval_callbacks_on_fit_begin( levels=[ @@ -55,21 +58,21 @@ def fit(self, X, y): self._from_reconstruction_attributes, reconstruction_attributes=lambda: {"n_iter_": i + 1}, ), - data={"X": X, "y": y, "X_val": X_val, "y_val": y_val"}, + data={"X": X, "y": y, "X_val": X_val, "y_val": y_val}, ): break self.n_iter_ = i + 1 - self._eval_callbacks_on_fit_end() - return self - def objective_function(self, X, y=None): + def objective_function(self, X, y=None, normalize=False): return 0, 0, 0 class MetaEstimator(BaseEstimator): + _parameter_constraints = {} + def __init__( self, estimator, n_outer=4, n_inner=3, n_jobs=None, prefer="processes" ): @@ -79,8 +82,9 @@ def __init__( self.n_jobs = n_jobs self.prefer = prefer + @_fit_context(prefer_skip_nested_validation=False) def fit(self, X, y): - root, *_ = self._eval_callbacks_on_fit_begin( + root, X, y, _, _ = self._eval_callbacks_on_fit_begin( levels=[ {"descr": "fit", "max_iter": self.n_outer}, {"descr": "outer", "max_iter": self.n_inner}, @@ -95,8 +99,6 @@ def fit(self, X, y): for i, node in enumerate(root.children) ) - self._eval_callbacks_on_fit_end() - return self def _func(self, estimator, X, y, parent_node, i): diff --git a/sklearn/callback/tests/test_base_estimator_callback_methods.py b/sklearn/callback/tests/test_base_estimator_callback_methods.py index 01669a5494dde..2f554101dcfa3 100644 --- a/sklearn/callback/tests/test_base_estimator_callback_methods.py +++ b/sklearn/callback/tests/test_base_estimator_callback_methods.py @@ -88,7 +88,7 @@ def test_eval_callbacks_on_fit_begin(): {"descr": "fit", "max_iter": 10}, {"descr": "iter", "max_iter": None}, ] - ct_root = estimator._eval_callbacks_on_fit_begin(levels=levels) + ct_root, *_ = estimator._eval_callbacks_on_fit_begin(levels=levels) assert hasattr(estimator, "_computation_tree") assert ct_root is estimator._computation_tree.root diff --git a/sklearn/callback/tests/test_callbacks.py b/sklearn/callback/tests/test_callbacks.py index aa79503545acb..e453705b637e1 100644 --- a/sklearn/callback/tests/test_callbacks.py +++ b/sklearn/callback/tests/test_callbacks.py @@ -7,7 +7,7 @@ import numpy as np -from sklearn.callback import ConvergenceMonitor +from sklearn.callback import Monitoring from sklearn.callback import EarlyStopping from sklearn.callback import ProgressBar from sklearn.callback import Snapshot @@ -23,7 +23,7 @@ @pytest.mark.parametrize( "Callback", [ - ConvergenceMonitor, + Monitoring, EarlyStopping, ProgressBar, Snapshot, diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index a3eac9c7e3468..8cd485114ac9c 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1711,8 +1711,6 @@ def fit_transform(self, X, y=None, W=None, H=None): self.components_ = H self.n_iter_ = n_iter - self._eval_callbacks_on_fit_end() - return W def _fit_transform( diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index e44b6428f8f4e..e5df230279b59 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -19,6 +19,7 @@ ) from ...base import BaseEstimator, RegressorMixin, ClassifierMixin, is_classifier from ...base import _fit_context +from ...callback._base import _eval_callbacks_on_fit_iter_end from ...utils import check_random_state, resample, compute_sample_weight from ...utils.validation import ( check_is_fitted, @@ -462,6 +463,17 @@ def fit(self, X, y, sample_weight=None): X_train, y_train, sample_weight_train = X, y, sample_weight X_val = y_val = sample_weight_val = None + begin_at_stage = 0 if not (self._is_fitted() and self.warm_start) else self.n_iter_ + + root, X_train, y_train, X_val, y_val = self._eval_callbacks_on_fit_begin( + levels=[ + {"descr": "fit", "max_iter": self.max_iter - begin_at_stage}, + {"descr": "iter", "max_iter": None}, + ], + X=X, + y=y, + ) + # Bin the data # For ease of use of the API, the user-facing GBDT classes accept the # parameter max_bins, which doesn't take into account the bin for @@ -756,6 +768,26 @@ def fit(self, X, y, sample_weight=None): if should_early_stop: break + if _eval_callbacks_on_fit_iter_end( + estimator=self, + node=root.children[iteration - begin_at_stage], + fit_state={}, + from_reconstruction_attributes=partial( + self._from_reconstruction_attributes, + reconstruction_attributes=lambda: { + "train_score_": np.asarray(self.train_score_), + "validation_score_": np.asarray(self.validation_score_), + }, + ), + data={ + "X": X_binned_train, + "y": y_train, + "X_val": X_binned_val, + "y_val": y_val + }, + ): + break + if self.verbose: duration = time() - fit_start_time n_total_leaves = sum( @@ -794,8 +826,22 @@ def fit(self, X, y, sample_weight=None): self.train_score_ = np.asarray(self.train_score_) self.validation_score_ = np.asarray(self.validation_score_) del self._in_fit # hard delete so we're sure it can't be used anymore + return self + def objective_function(self, X, y, *, raw_predictions=None, normalize=False): + if raw_predictions is None: + raw_predictions = self._raw_predict(X) + + loss = self._loss( + y_true=y, + raw_prediction=raw_predictions, + ) + if normalize: + loss /= raw_predictions.shape[0] + + return loss, loss, 0 + def _is_fitted(self): return len(getattr(self, "_predictors", [])) > 0 diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index b949a35b28d02..dcaf6377dbe1f 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -1397,8 +1397,6 @@ def fit(self, X, y, sample_weight=None): else: self.intercept_ = np.zeros(n_classes) - self._eval_callbacks_on_fit_end() - return self def predict_proba(self, X): diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 9ec5ce8414201..c6c718ca0d4fa 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -1481,8 +1481,6 @@ def fit(self, X, y=None, *, groups=None, **fit_params): ) super().fit(X, y=y, groups=groups, **fit_params) - self._eval_callbacks_on_fit_end() - def _run_search(self, evaluate_candidates): """Search all candidates in param_grid""" evaluate_candidates(self._param_grid, parent_node=self._computation_tree.root) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 0eb02009ecf91..0d563cbb10c12 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -447,8 +447,6 @@ def fit(self, X, y=None, **fit_params): _eval_callbacks_on_fit_iter_end(estimator=self, node=node) - self._eval_callbacks_on_fit_end() - return self def _can_fit_transform(self): From b8ac1a5e86aeee791675aebd36758c23831a3efa Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 18 Oct 2023 12:10:21 +0200 Subject: [PATCH 20/20] cln --- sklearn/callback/_progressbar.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/callback/_progressbar.py b/sklearn/callback/_progressbar.py index f802cc63b3b9a..f8ed251add34a 100644 --- a/sklearn/callback/_progressbar.py +++ b/sklearn/callback/_progressbar.py @@ -103,7 +103,7 @@ def get_renderables(self): table = self.make_tasks_table(getattr(self, "_ordered_tasks", [])) yield table -except: +except ImportError: pass @@ -262,7 +262,8 @@ def _format_task_description(self, node, computation_tree, depth): description = f"{computation_tree.estimator_name} - {node.description}" if node.parent is None and computation_tree.parent_node is not None: description = ( - f"{computation_tree.parent_node.description} {computation_tree.parent_node.idx} |" + f"{computation_tree.parent_node.description} " + f"{computation_tree.parent_node.idx} |" f" {description}" ) if node.parent is not None: