pytorch
diff --git a/‎test/distributed/_tools/test_mod_tracker.py
Lines changed: 140 additions & 0 deletions b/‎test/distributed/_tools/test_mod_tracker.py
Lines changed: 140 additions & 0 deletions
diff --git a/‎torch/distributed/_tools/__init__.py
Lines changed: 1 addition & 0 deletions b/‎torch/distributed/_tools/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎torch/distributed/_tools/mod_tracker.py
Lines changed: 232 additions & 0 deletions b/‎torch/distributed/_tools/mod_tracker.py
Lines changed: 232 additions & 0 deletions
@@ -0,0 +1,140 @@
+# Owner(s): ["module: unknown"]
+
+from copy import copy
+
+import torch
+from torch.distributed._tools.mod_tracker import ModTracker
+from torch.testing._internal.common_utils import run_tests, TestCase, xfailIfTorchDynamo
+
+
+class TestModTracker(TestCase):
+    # "https://github.com/pytorch/pytorch/issues/127112
+    @xfailIfTorchDynamo
+    def test_module_hierarchy(self):
+        seen_fw = []
+        seen_bw = []
+
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                x = x["a"].relu_()
+                seen_fw.append((copy(tracker.parents), tracker.is_bw))
+                x.register_hook(
+                    lambda grad: seen_bw.append((copy(tracker.parents), tracker.is_bw))
+                )
+                return {"a": torch.mm(x, x)}
+
+        class Mod(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.a = Foo()
+                self.b = torch.nn.ModuleDict({"nest": Foo()})
+                self.c = torch.nn.ModuleList([Foo()])
+
+            def forward(self, x):
+                x = self.c[0](x)
+                return self.b["nest"](self.a(x))
+
+        mod = Mod()
+
+        with ModTracker() as tracker:
+            mod({"a": torch.randn(10, 10, requires_grad=True).clone()})[
+                "a"
+            ].sum().backward()
+            mod({"a": torch.randn(10, 10, requires_grad=True).clone()})[
+                "a"
+            ].sum().backward()
+
+        self.assertEqual(
+            seen_fw,
+            [
+                ({"Global", "Mod", "Mod.c.0"}, False),
+                ({"Global", "Mod", "Mod.a"}, False),
+                ({"Global", "Mod", "Mod.b.nest"}, False),
+                ({"Global", "Mod", "Mod.c.0"}, False),
+                ({"Global", "Mod", "Mod.a"}, False),
+                ({"Global", "Mod", "Mod.b.nest"}, False),
+            ],
+        )
+
+        self.assertEqual(
+            seen_bw,
+            [
+                ({"Global", "Mod", "Mod.b.nest"}, True),
+                ({"Global", "Mod", "Mod.a"}, True),
+                ({"Global", "Mod", "Mod.c.0"}, True),
+                ({"Global", "Mod", "Mod.b.nest"}, True),
+                ({"Global", "Mod", "Mod.a"}, True),
+                ({"Global", "Mod", "Mod.c.0"}, True),
+            ],
+        )
+
+    def test_bw_detection(self):
+        mod = torch.nn.Linear(2, 2)
+
+        with ModTracker() as tracker:
+            mod(torch.rand(2, requires_grad=True)).sum().backward()
+            self.assertFalse(tracker.is_bw)
+            self.assertEqual(tracker.parents, {"Global"})
+
+    @xfailIfTorchDynamo
+    def test_user_hooks(self):
+        class Bar(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.foo = torch.nn.Linear(10, 10)
+
+            def forward(self, x):
+                return self.foo(x).relu_()
+
+        mt = ModTracker()
+        test_op = []
+
+        def hook(mod, hook_name):
+            mfqn = mt.get_known_fqn(mod) if mod is not None else None
+            test_op.append((hook_name, mfqn, mfqn in mt.parents, mt.is_bw))
+
+        mod = Bar()
+
+        mt.register_user_hooks(
+            lambda m, inp: hook(m, "pre_fw"),
+            lambda m, inp, op: hook(m, "post_fw"),
+            lambda m, gop: hook(m, "pre_bw"),
+            lambda m, ginp: hook(m, "post_bw"),
+        )
+        with mt:
+            mod(torch.rand(10, 10, requires_grad=True)).sum().backward()
+        expected_op = [
+            ("pre_fw", "Bar", True, False),
+            ("pre_fw", "Bar.foo", True, False),
+            ("post_fw", "Bar.foo", True, False),
+            ("post_fw", "Bar", True, False),
+            ("pre_bw", "Bar", True, True),
+            ("pre_bw", "Bar.foo", True, True),
+            ("post_bw", "Bar", True, True),
+            ("post_bw", "Bar.foo", True, True),
+        ]
+        self.assertEqual(test_op, expected_op)
+
+        with self.assertRaises(AssertionError):
+            mt.register_user_hooks(lambda x, y: x, None, None, None)
+
+        test_op.clear()
+        with mt:
+            loss = mod(torch.rand(10, 10, requires_grad=True)).sum()
+            del mod
+            loss.backward()
+        expected_op = [
+            ("pre_fw", "Bar", True, False),
+            ("pre_fw", "Bar.foo", True, False),
+            ("post_fw", "Bar.foo", True, False),
+            ("post_fw", "Bar", True, False),
+            ("pre_bw", None, False, True),
+            ("pre_bw", None, False, True),
+            ("post_bw", None, False, True),
+            ("post_bw", None, False, True),
+        ]
+        self.assertEqual(test_op, expected_op)
+
+
+if __name__ == "__main__":
+    run_tests()
@@ -1 +1,2 @@
 from .memory_tracker import MemoryTracker
+from .mod_tracker import ModTracker
@@ -0,0 +1,232 @@
+# mypy: allow-untyped-defs
+import warnings
+import weakref
+from typing import Callable, Optional, Set
+
+import torch
+from torch.autograd.graph import register_multi_grad_hook
+from torch.nn.modules.module import (
+    register_module_forward_hook,
+    register_module_forward_pre_hook,
+)
+from torch.utils._pytree import tree_flatten
+
+
+__all__ = ["ModTracker"]
+
+
+class ModTracker:
+    """
+    ``ModTracker`` is a context manager that tracks the nn.Module hierarchy during execution
+    so that other system can query which Module is currently being executed (or its backward is being
+    executed).
+
+    You can access the ``parents`` attribute on this context manager to get the set of all the
+    Modules currently being executed via their fqn (fully qualified name, also used as the key within
+    the state_dict).
+    You can access the ``is_bw`` attribute to know if you are currently running in backward or not.
+
+    Note that ``parents`` is never empty and always contains the "Global" key. The ``is_bw`` flag
+    will remain ``True`` after the forward until another Module is executed. If you need it to be
+    more accurate, please submit an issue requesting this. Adding a map from fqn to the module instance
+    is possible but not done yet, please submit an issue requesting this if you need it.
+
+    Example usage
+
+    .. code-block:: python
+
+        mod = torch.nn.Linear(2, 2)
+
+        with ModTracker() as tracker:
+            # Access anything during the forward pass
+            def my_linear(m1, m2, bias):
+                print(f"Current modules: {tracker.parents}")
+                return torch.mm(m1, m2.t()) + bias
+            torch.nn.functional.linear = my_linear
+
+            mod(torch.rand(2, 2))
+
+    """
+
+    parents: Set[str]
+    """
+    A Set containing the fqn for each module currently running their forward
+    """
+
+    def __init__(self):
+        self.parents = {"Global"}
+        self._known_modules: weakref.WeakKeyDictionary = weakref.WeakKeyDictionary()
+        self._seen_modules: weakref.WeakSet = weakref.WeakSet()
+        self._has_callback = False
+        self._user_pre_fw_hook = None
+        self._user_post_fw_hook = None
+        self._user_pre_bw_hook = None
+        self._user_post_bw_hook = None
+
+    def _maybe_set_engine_callback(self):
+        # This assumes no concurrent calls to backward
+        if self._has_callback:
+            return
+
+        def callback():
+            self.parents = {"Global"}
+            self._has_callback = False
+
+        torch.autograd.Variable._execution_engine.queue_callback(callback)
+        self._has_callback = True
+
+    @property
+    def is_bw(self):
+        """
+        A boolean marking if this is currently running during the backward pass or not
+        """
+        return torch._C._current_graph_task_id() != -1
+
+    def get_known_fqn(self, mod):
+        """
+        Return the fqn for the given module if it is known to the ``ModTracker``, otherwise ``None``.
+        """
+        return self._known_modules.get(mod, None)
+
+    def register_user_hooks(
+        self,
+        pre_fw_hook: Optional[Callable] = None,
+        post_fw_hook: Optional[Callable] = None,
+        pre_bw_hook: Optional[Callable] = None,
+        post_bw_hook: Optional[Callable] = None,
+    ):
+        """
+        Registers user-specified hooks to be called before/after the forward/backward pass for each
+        module tracked by the ``ModTracker``. One or more can be ``None``.
+        Args:
+            pre_fw_hook (Callable, optional): A hook to be called before the forward pass for the
+                module. It should have the following signature:
+                pre_fw_hook (module, input) -> None
+            post_fw_hook (Callable, optional): A hook to be called after the forward pass for the
+                module. It should have the following signature:
+                post_fw_hook (module, input, output) -> None
+            pre_bw_hook (Callable, optional): A multi-grad hook to be called on all the outputs of
+                the module that require gradients. It should have the following signature:
+                pre_bw_hook (module, grad_output) -> None
+            post_bw_hook (Callable, optional): A multi-grad hook to be called on all the inputs of
+                the module that require gradients. It should have the following signature:
+                post_bw_hook (module, grad_input) -> None
+        Raises:
+            AssertionError: If a new hook is provided when one is already registered.
+        Note:
+            If the module is not alive during the backward pass, the pre_bw_hook and post_bw_hook will
+            will receive None as the module argument.
+            The module fqn will be present in the ``parents`` attribute when each of the hooks is called.
+            Hooks are intended to be used as markers only not to modify the inputs/outputs.
+        """
+
+        def set_hook(hook, user_hook, hook_name):
+            if hook is not None and user_hook is not None:
+                raise AssertionError(
+                    f"Only one {hook_name} can be registered at a time"
+                    f" Clear the existing hook by calling ``clear_user_hooks`` before registering a new one"
+                )
+            return hook
+
+        self._user_pre_fw_hook = set_hook(
+            pre_fw_hook, self._user_pre_fw_hook, "pre_fw_hook"
+        )
+        self._user_post_fw_hook = set_hook(
+            post_fw_hook, self._user_post_fw_hook, "post_fw_hook"
+        )
+        self._user_pre_bw_hook = set_hook(
+            pre_bw_hook, self._user_pre_bw_hook, "pre_bw_hook"
+        )
+        self._user_post_bw_hook = set_hook(
+            post_bw_hook, self._user_post_bw_hook, "post_bw_hook"
+        )
+
+    def clear_user_hooks(self):
+        """
+        Clears the user specified hooks registered with ``register_user_hooks``
+        """
+        self._user_pre_fw_hook = None
+        self._user_post_fw_hook = None
+        self._user_pre_bw_hook = None
+        self._user_post_bw_hook = None
+
+    def _get_mod_name(self, mod):
+        if mod not in self._known_modules:
+            self._known_modules[mod] = type(mod).__name__
+        mod_name = self._known_modules[mod]
+        if mod not in self._seen_modules:
+            for name, submod in mod.named_children():
+                self._known_modules[submod] = f"{mod_name}.{name}"
+                self._get_mod_name(submod)
+            self._seen_modules.add(mod)
+        return mod_name
+
+    def _get_append_fn(self, w_mod, name, is_bw):
+        def fn(*args):
+            if is_bw:
+                self._maybe_set_engine_callback()
+            if name in self.parents and not self.is_bw:
+
+                def custom_formatwarning(msg, category, filename, lineno, line=None):
+                    return f"{filename}:{lineno}: {category.__name__}: {msg} \n"
+
+                warnings.formatwarning = custom_formatwarning
+                warnings.warn(
+                    "The module hierarchy tracking maybe be messed up."
+                    " Please file a bug to PyTorch, if it is the case."
+                )
+            self.parents.add(name)
+
+            if self._user_pre_bw_hook is not None and is_bw:
+                self._user_pre_bw_hook(w_mod(), args)
+
+        return fn
+
+    def _get_pop_fn(self, w_mod, name, is_bw):
+        def fn(*args):
+            if self._user_post_bw_hook is not None and is_bw:
+                self._user_post_bw_hook(w_mod(), args)
+
+            if name in self.parents:
+                self.parents.remove(name)
+            elif not is_bw:
+                # Due to some input/output not requiring gradients, we cannot enforce
+                # proper nesting in backward
+                raise RuntimeError(
+                    "The Module hierarchy tracking is wrong. Report a bug to PyTorch"
+                )
+
+        return fn
+
+    def _fw_pre_hook(self, mod, input):
+        name = self._get_mod_name(mod)
+        w_mod = weakref.ref(mod)
+        self._get_append_fn(w_mod, name, False)()
+        if self._user_pre_fw_hook is not None:
+            self._user_pre_fw_hook(mod, input)
+        args, _ = tree_flatten(input)
+        tensors = [a for a in args if isinstance(a, torch.Tensor) and a.requires_grad]
+        if not self.is_bw and tensors:
+            register_multi_grad_hook(tensors, self._get_pop_fn(w_mod, name, True))
+
+    def _fw_post_hook(self, mod, input, output):
+        name = self._get_mod_name(mod)
+        w_mod = weakref.ref(mod)
+        if self._user_post_fw_hook is not None:
+            self._user_post_fw_hook(mod, input, output)
+        self._get_pop_fn(w_mod, name, False)()
+        args, _ = tree_flatten(output)
+        tensors = [a for a in args if isinstance(a, torch.Tensor) and a.requires_grad]
+        if not self.is_bw and tensors:
+            register_multi_grad_hook(tensors, self._get_append_fn(w_mod, name, True))
+
+    def __enter__(self):
+        self._fw_pre_handle = register_module_forward_pre_hook(self._fw_pre_hook)
+        self._fw_post_handle = register_module_forward_hook(
+            self._fw_post_hook, always_call=True
+        )
+        return self
+
+    def __exit__(self, *args):
+        self._fw_pre_handle.remove()
+        self._fw_post_handle.remove()
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`from .memory_tracker import MemoryTracker`
	`2`	`+from .mod_tracker import ModTracker`