[NOT FOR MERGE] Re-implement #148962 for benchmarking

benjaminglass1 · benjaminglass1 · commit 956f86def102 · 2025-04-10T03:50:48.000Z
ghstack-source-id: ac1a806 Pull Request resolved: #149961
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
@@ -2909,7 +2909,7 @@ def parse_args(args=None):
     )
     parser.add_argument("--cosine", action="store_true", help="use cosine similarity")
     parser.add_argument(
-        "--freezing", action="store_true", help="turn on freezing", default=False
+        "--freezing", action="store_true", help="turn on freezing", default=None
     )
     parser.add_argument(
         "--inductor-config",
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
@@ -3689,12 +3689,12 @@ def add_constexpr_arg(arg_name):
                 if (
                     len(non_constexpr_signature(signature)) == 4
                 ):  # input, output and 2 args
-                    tile_hint = "tile_hint=TileHint.SQUARE,"
+                    tile_hint = " tile_hint=TileHint.SQUARE,"
                 else:
-                    tile_hint = "tile_hint=TileHint.DEFAULT,"
+                    tile_hint = " tile_hint=TileHint.DEFAULT,"
             heuristics_line = f"""
                 @triton_heuristics.{self._get_heuristic()}(
-                    size_hints={size_hints!r}, {tile_hint}
+                    size_hints={size_hints!r},{tile_hint}
                     filename=__file__,
                     triton_meta={triton_meta!r},
                     inductor_meta={inductor_meta!r},
diff --git a/torch/_inductor/compile_fx.py b/torch/_inductor/compile_fx.py
@@ -13,7 +13,7 @@
 import warnings
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from contextlib import AbstractContextManager
+from contextlib import AbstractContextManager, nullcontext
 from inspect import currentframe
 from itertools import count
 from typing import Any, Callable, Optional, TYPE_CHECKING, TypeVar, Union
@@ -1553,35 +1553,34 @@ def compile_fx_aot(
     model_: GraphModule,
     example_inputs_: list[InputType],
     inner_compile: _CompileFxCallable = compile_fx_inner,
-    config_patches: Optional[dict[str, str]] = None,
+    config_patches: Optional[dict[str, Any]] = None,
 ) -> Union[list[str], str]:
     assert isinstance(model_, GraphModule), model_
 
     # [See NOTE] Unwrapping subclasses AOT
     unwrap_tensor_subclass_parameters(model_)
 
-    config_patches: dict[str, Any] = (
-        {"cpp_wrapper": True}
-        if config_patches is None
-        else {**config_patches, "cpp_wrapper": True}
-    )
+    if config_patches is None:
+        config_patches = {}
 
-    output_path = config_patches.get(
-        "aot_inductor.output_path", config.aot_inductor.output_path
+    config_patches.update(
+        cpp_wrapper=True,
+        freezing=config.freezing
+        if config.freezing is not None
+        else not config.aot_inductor.use_runtime_constant_folding,
     )
 
-    if output_path:
+    if output_path := config_patches.get(
+        "aot_inductor.output_path", config.aot_inductor.output_path
+    ):
         assert not output_path.endswith(".pt2"), (
             "The output path for aot_compile should not have an extension with .pt2 "
             "this is for specifying the output path for the .so in AOTInductor. "
             "If you would like to package the AOTInductor generated files "
             "into a pt2, please call `torch._inductor.aoti_compile_and_package`."
         )
     else:
-        config_patches = {
-            **config_patches,
-            "aot_inductor.output_path": code_hash(model_.code),
-        }
+        config_patches["aot_inductor.output_path"] = code_hash(model_.code)
 
     extern_node_serializer = config_patches.pop("extern_node_serializer", None)
     saved_compile_id = model_.meta.get("dynamo_compile_id", None)
@@ -1687,7 +1686,11 @@ def fw_compiler_freezing(
         if tracing_context.fw_metadata:
             static_input_idxs += tracing_context.fw_metadata.static_input_indices
 
-    with mock.patch.object(fake_mode, "allow_non_fake_inputs", True):
+    with (
+        mock.patch.object(fake_mode, "allow_non_fake_inputs", True)
+        if fake_mode
+        else nullcontext()
+    ):
         optimized_function = inner_compile(
             opt_model,
             aot_example_inputs,
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
@@ -847,7 +847,7 @@ def decide_compile_threads() -> int:
 # Freezing will attempt to inline weights as constants in optimization
 # and run constant folding and other optimizations on them. After freezing, weights
 # can no longer be updated.
-freezing: bool = os.environ.get("TORCHINDUCTOR_FREEZING", "0") == "1"
+freezing: Optional[bool] = get_tristate_env("TORCHINDUCTOR_FREEZING")
 
 # Make freezing invalidate the eager Parameters of nn modules, to avoid memory overhead
 # of potentially keeping multiple copies of weights.

Original file line number	Diff line number	Diff line change
`@@ -2909,7 +2909,7 @@ def parse_args(args=None):`
`2909`	`2909`	`)`
`2910`	`2910`	`parser.add_argument("--cosine", action="store_true", help="use cosine similarity")`
`2911`	`2911`	`parser.add_argument(`
`2912`		`- "--freezing", action="store_true", help="turn on freezing", default=False`
	`2912`	`+ "--freezing", action="store_true", help="turn on freezing", default=None`
`2913`	`2913`	`)`
`2914`	`2914`	`parser.add_argument(`
`2915`	`2915`	`"--inductor-config",`