pytorch
diff --git a/‎torch/_inductor/codecache.py
Lines changed: 56 additions & 53 deletions b/‎torch/_inductor/codecache.py
Lines changed: 56 additions & 53 deletions
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 import base64
 import copyreg
 import dataclasses
@@ -25,6 +23,8 @@
 import threading
 import warnings
 from bisect import bisect_right
+from collections.abc import Generator, KeysView, Sequence
+from concurrent.futures import Future
 from copy import copy
 from ctypes import c_void_p, CDLL, cdll
 from datetime import timedelta
@@ -112,25 +112,22 @@
     )
 else:
 
-    def log_global_cache_errors(*args: Any, **kwargs: Any) -> None:  # type: ignore[misc]
+    def log_global_cache_errors(*args: Any, **kwargs: Any) -> None:
         pass
 
-    def log_global_cache_stats(*args: Any, **kwargs: Any) -> None:  # type: ignore[misc]
+    def log_global_cache_stats(*args: Any, **kwargs: Any) -> None:
         pass
 
-    def log_global_cache_vals(*args: Any, **kwargs: Any) -> None:  # type: ignore[misc]
+    def log_global_cache_vals(*args: Any, **kwargs: Any) -> None:
         pass
 
-    def use_global_cache() -> bool:  # type: ignore[misc]
+    def use_global_cache() -> bool:
         return False
 
 
 T = TypeVar("T")
 
 if TYPE_CHECKING:
-    from collections.abc import Generator, KeysView, Sequence
-    from concurrent.futures import Future
-
     from .compile_fx import _CompileFxKwargs
     from .graph import GraphLowering
     from .ir import ChoiceCaller
@@ -267,11 +264,11 @@ def get_global_cache(self) -> dict[str, Any]:
 
     def lookup(
         self,
-        choices: list[ChoiceCaller],
+        choices: list["ChoiceCaller"],
         op: str,
         inputs: str,
-        benchmark: Optional[Callable[[Any], dict[ChoiceCaller, float]]],
-    ) -> dict[ChoiceCaller, float]:
+        benchmark: Optional[Callable[[Any], dict["ChoiceCaller", float]]],
+    ) -> dict["ChoiceCaller", float]:
         """
         Check to see if we have benchmarked the given choice callers. For each
         choice caller:
@@ -617,7 +614,7 @@ def get_hash(self, obj: Any) -> str:
         serialized_data = self.dumps(obj)
         return sha256_hash(serialized_data)
 
-    def debug_lines(self, inp: FxGraphHashDetails) -> list[str]:
+    def debug_lines(self, inp: "FxGraphHashDetails") -> list[str]:
         """
         Get a printable string describing in more detail all the attributes
         comprising an object. Useful for debugging when one graph hashes
@@ -652,7 +649,7 @@ def get_str(obj: Any) -> str:
 
 
 def build_code_hash(
-    roots: list[str] | None, prefix: str, hasher: hashlib._Hash
+    roots: Optional[list[str]], prefix: str, hasher: "hashlib._Hash"
 ) -> None:
     for lib in sorted(pkgutil.iter_modules(roots, prefix), key=lambda x: x.name):
         spec = lib.module_finder.find_spec(lib.name, None)
@@ -759,8 +756,8 @@ class FxGraphHashDetails:
     def __init__(
         self,
         gm: torch.fx.GraphModule,
-        example_inputs: Sequence[InputType],
-        fx_kwargs: _CompileFxKwargs,
+        example_inputs: Sequence["InputType"],
+        fx_kwargs: "_CompileFxKwargs",
         inputs_to_check: Sequence[int],
     ) -> None:
         self.gm = gm
@@ -877,8 +874,8 @@ def _get_custom_pass_detail(
 
 def compiled_fx_graph_hash(
     gm: torch.fx.GraphModule,
-    example_inputs: Sequence[InputType],
-    fx_kwargs: _CompileFxKwargs,
+    example_inputs: Sequence["InputType"],
+    fx_kwargs: "_CompileFxKwargs",
     inputs_to_check: Sequence[int],
 ) -> tuple[str, list[str]]:
     """
@@ -931,14 +928,14 @@ class GuardedCache(Generic[T]):
     """
 
     @classmethod
-    def _get_tmp_dir_for_key(cls: type[GuardedCache[T]], _key: str) -> str:
+    def _get_tmp_dir_for_key(cls: type["GuardedCache[T]"], _key: str) -> str:
         raise NotImplementedError("Implement _get_tmp_dir_for_key on parent class")
 
     @classmethod
     def iterate_over_candidates(
-        cls: type[GuardedCache[T]],
+        cls: type["GuardedCache[T]"],
         local: bool,
-        remote_cache: Optional[RemoteCache[JsonDataTy]],
+        remote_cache: Optional["RemoteCache[JsonDataTy]"],
         key: str,
     ) -> Generator[tuple[T, bytes], None, None]:
         if local:
@@ -970,10 +967,10 @@ def iterate_over_candidates(
 
     @classmethod
     def find_guarded_entry(
-        cls: type[GuardedCache[T]],
+        cls: type["GuardedCache[T]"],
         key: str,
         local: bool,
-        remote_cache: Optional[RemoteCache[JsonDataTy]],
+        remote_cache: Optional["RemoteCache[JsonDataTy]"],
         evaluate_guards: Callable[[str, Union[list[int], list[torch.SymInt]]], bool],
         hints: list[int],
     ) -> tuple[Optional[T], Optional[bytes], dict[str, str]]:
@@ -1031,7 +1028,7 @@ def find_guarded_entry(
 
     @classmethod
     def _filter_backed_symints(
-        cls: type[GuardedCache[T]], inputs: Sequence[InputType]
+        cls: type["GuardedCache[T]"], inputs: Sequence["InputType"]
     ) -> list[torch.SymInt]:
         """
         Get the backed SymInt objects from the input list. Note that we can never
@@ -1040,7 +1037,7 @@ def _filter_backed_symints(
         return [s for s in inputs if isinstance(s, torch.SymInt) and has_hint(s)]
 
     @classmethod
-    def _get_shape_env(cls: type[GuardedCache[T]]) -> Optional[ShapeEnv]:
+    def _get_shape_env(cls: type["GuardedCache[T]"]) -> Optional[ShapeEnv]:
         """
         Helper to get the shape env 
6377
from the tracing context.
         """
@@ -1088,7 +1085,7 @@ def _get_tmp_dir() -> str:
         return os.path.join(cache_dir(), "fxgraph")
 
     @classmethod
-    def _get_tmp_dir_for_key(cls: type[FxGraphCache], key: str) -> str:
+    def _get_tmp_dir_for_key(cls: type["FxGraphCache"], key: str) -> str:
         """
         Return the disk location for a given cache key.
         """
@@ -1098,7 +1095,7 @@ def _get_tmp_dir_for_key(cls: type[FxGraphCache], key: str) -> str:
     def cache_hit_post_compile(
         graph: CompiledFxGraph,
         cache_info: dict[str, Any],
-        constants: CompiledFxGraphConstants,
+        constants: "CompiledFxGraphConstants",
     ) -> tuple[Optional[CompiledFxGraph], dict[str, Any]]:
         """
         Cache specific post compile steps that need to run if we find a graph in the cache
@@ -1173,10 +1170,10 @@ def cache_hit_post_compile(
     @staticmethod
     def _lookup_graph(
         key: str,
-        example_inputs: Sequence[InputType],
+        example_inputs: Sequence["InputType"],
         local: bool,
-        remote_cache: Optional[RemoteCache[JsonDataTy]],
-        constants: CompiledFxGraphConstants,
+        remote_cache: Optional["RemoteCache[JsonDataTy]"],
+        constants: "CompiledFxGraphConstants",
         evaluate_guards: Optional[
             Callable[[str, Union[list[int], list[torch.SymInt]]], bool]
         ] = None,
@@ -1245,10 +1242,10 @@ def _write_to_local_cache(key: str, content: bytes) -> None:
     @staticmethod
     def _save_graph(
         key: str,
-        compiled_graph: OutputCode,
-        example_inputs: Sequence[InputType],
+        compiled_graph: "OutputCode",
+        example_inputs: Sequence["InputType"],
         local: bool,
-        remote_cache: Optional[RemoteCache[JsonDataTy]],
+        remote_cache: Optional["RemoteCache[JsonDataTy]"],
     ) -> None:
         """
         Store a serialized CompiledFxGraph on disk.
@@ -1361,8 +1358,8 @@ def _check_can_cache(gm: torch.fx.GraphModule) -> None:
     @staticmethod
     def prepare_key(
         gm: torch.fx.GraphModule,
-        example_inputs: Sequence[InputType],
-        fx_kwargs: _CompileFxKwargs,
+        example_inputs: Sequence["InputType"],
+        fx_kwargs: "_CompileFxKwargs",
         inputs_to_check: Sequence[int],
         remote: bool,
     ) -> tuple[Optional[tuple[str, list[str]]], dict[str, Any]]:
@@ -1396,7 +1393,7 @@ def prepare_key(
         return (key, debug_lines), {}
 
     @staticmethod
-    def get_remote_cache() -> Optional[RemoteCache[JsonDataTy]]:
+    def get_remote_cache() -> Optional["RemoteCache[JsonDataTy]"]:
         """
         Attempts to load the remote cache, returns None on error.
         """
@@ -1412,15 +1409,15 @@ def get_remote_cache() -> Optional[RemoteCache[JsonDataTy]]:
     def load_with_key(
         key: str,
         debug_lines: list[str],
-        example_inputs: Sequence[InputType],
+        example_inputs: Sequence["InputType"],
         local: bool,
-        remote_cache: Optional[RemoteCache[JsonDataTy]],
+        remote_cache: Optional["RemoteCache[JsonDataTy]"],
         is_backward: bool,
-        constants: CompiledFxGraphConstants,
+        constants: "CompiledFxGraphConstants",
         evaluate_guards: Optional[
             Callable[[str, Union[list[int], list[torch.SymInt]]], bool]
         ] = None,
-    ) -> tuple[Optional[CompiledFxGraph], dict[str, Any]]:
+    ) -> tuple[Optional["CompiledFxGraph"], dict[str, Any]]:
         """
         Lookup the graph with the given key, and return results and metadata.
         Doesn't do any logging on its own, because AOTAutograd handles a cache miss
@@ -1535,7 +1532,7 @@ class AotCodeCompiler:
     @classmethod
     def compile(
         cls,
-        graph: GraphLowering,
+        graph: "GraphLowering",
         wrapper_code: str,
         kernel_code: str,
         serialized_extern_kernel_nodes: Optional[str],
@@ -2252,7 +2249,7 @@ def _load_library(cls, path: str, key: str) -> Union[CDLL, ModuleType]:
             raise
 
     @classmethod
-    def _get_uncompiled_header(cls, device: str) -> str | None:
+    def _get_uncompiled_header(cls, device: str) -> Optional[str]:
         """
         Given a device type, returns the path to a CPP header file to be precompiled.
         Currently, this is only utilized by the cpp_wrapper classes.
@@ -2472,7 +2469,8 @@ def _load_library_inner(cls, path: str, key: str) -> ModuleType:
         assert spec is not None
         module = importlib.util.module_from_spec(spec)
         sys.modules[module_name] = module
-        spec.loader.exec_module(module)  # type: ignore[union-attr]
+        assert spec.loader is not None
+        spec.loader.exec_module(module)
         return module
 
     @classmethod
@@ -2592,7 +2590,7 @@ class CppWrapperCodeCache(CppPythonBindingsCodeCache):
     )
 
     @classmethod
-    def _get_uncompiled_header(cls, device: str) -> str | None:
+    def _get_uncompiled_header(cls, device: str) -> Optional[str]:
         """
         Given a device type, returns the path to a CPP header file to be precompiled.
         Currently, this is only utilized by the cpp_wrapper classes.
@@ -2679,7 +2677,9 @@ class HalideCodeCache(CppPythonBindingsCodeCache):
     )
 
     @classmethod
-    def _codegen_buffer(cls, name: str, arg: HalideInputSpec, cuda: bool) -> list[str]:
+    def _codegen_buffer(
+        cls, name: str, arg: "HalideInputSpec", cuda: bool
+    ) -> list[str]:
         assert arg.shape is not None
         assert arg.stride is not None and len(arg.shape) == len(arg.stride)
         assert arg.offset is not None
@@ -2713,7 +2713,7 @@ def _codegen_buffer(cls, name: str, arg: HalideInputSpec, cuda: bool) -> list[st
         ]
 
     @classmethod
-    def _codegen_glue(cls, meta: HalideMeta, headerfile: object) -> str:
+    def _codegen_glue(cls, meta: "HalideMeta", headerfile: object) -> str:
         is_cuda = meta.is_cuda()
         assert is_cuda is ("user_context" in meta.target)
         assert "no_runtime" in meta.target
@@ -2821,7 +2821,7 @@ def find_header(name: str) -> str:
 
     @classmethod
     def generate_halide_async(
-        cls, meta: HalideMeta, source_code: str, submit_fn: Any = None
+        cls, meta: "HalideMeta", source_code: str, submit_fn: Any = None
     ) -> Callable[[], Any]:
         dirpath = Path(
             get_path(
@@ -2961,6 +2961,7 @@ def _worker_task_halide(lockfile: str, jobs: list[partial[Any]]) -> None:
                 job()
     except subprocess.SubprocessError as e:
         if os.environ.get("HALIDE_REPRO") == "1":
+            cmd: list[Any]
             python, script, *cmd = getattr(e, "cmd", ("", "", ""))
             if os.path.basename(python).startswith("python"):
                 code = open(script).read()
@@ -2971,7 +2972,9 @@ class Out:
                     def __repr__(self) -> str:
                         return "out"
 
-                cmd[cmd.index("-o") + 1] = Out()  # type: ignore[call-overload]
+                ci = cmd.index("-o")
+                assert isinstance(ci, int)
+                cmd[ci + 1] = Out()
                 repl = textwrap.indent(
                     textwrap.dedent(
                         f"""\
@@ -3098,7 +3101,7 @@ def parse_stack_trace(stack_trace: str) -> list[dict[str, Any]]:
 
 def _load_triton_kernel_from_source(
     kernel_name: str, source_code: str
-) -> CachingAutotuner:
+) -> "CachingAutotuner":
     return getattr(PyCodeCache.load(source_code), kernel_name)
 
 
@@ -3557,7 +3560,7 @@ def __init__(
         self.result_fn = result_fn
         self.future = future
 
-    def result(self) -> Callable[..., Any]:  # type: ignore[override]
+    def result(self) -> Callable[..., Any]:
         return self.result_fn()
 
 
@@ -3566,7 +3569,7 @@ class StaticAutotunerFuture(CodeCacheFuture):
     A statically launchable CachingAutotuner, loaded from TritonBundler
     """
 
-    def __init__(self, static_autotuner: CachingAutotuner) -> None:
+    def __init__(self, static_autotuner: "CachingAutotuner") -> None:
         # Pickled version of CachingAutotuner
         self.static_autotuner = static_autotuner
         # This needs to be set in AsyncCompile.triton, in case
@@ -3575,10 +3578,10 @@ def __init__(self, static_autotuner: CachingAutotuner) -> None:
         # since it can be very large.
         self.reload_kernel_from_src: Optional[Callable[[], Any]] = None
 
-    def result(self) -> CachingAutotuner:
+    def result(self) -> "CachingAutotuner":
         assert self.reload_kernel_from_src is not None
         with dynamo_timed("StaticAutotunerFuture.warm_precompile"):
-            self.static_autotuner.precompile(  # type: ignore[union-attr]
+            self.static_autotuner.precompile(
                 warm_cache_only=False,
                 reload_kernel=self.reload_kernel_from_src,
                 static_triton_bundle_key=None,  # no need to save again