pytorch
diff --git a/‎torch/_subclasses/_fake_tensor_utils.py
Lines changed: 3 additions & 0 deletions b/‎torch/_subclasses/_fake_tensor_utils.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎torch/_subclasses/fake_tensor.py
Lines changed: 97 additions & 31 deletions b/‎torch/_subclasses/fake_tensor.py
Lines changed: 97 additions & 31 deletions
diff --git a/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 1 addition & 1 deletion b/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 1 addition & 1 deletion
@@ -217,6 +217,7 @@ class _CacheKeyState:
     # We track the SymNodes so when we get the output we can see if it exactly
     # matches one of the inputs so we can uncache it properly.
     sym_node_lookup: dict[int, int]  # id(SymNode) -> index
+    known_symbols: set[sympy.Basic]
 
     # There are cases where we're asked to perform an op when we have no
     # ShapeEnv on the FakeTensorMode - but for SymNodes we MUST have a
@@ -226,6 +227,7 @@ class _CacheKeyState:
 
     def __init__(self, shape_env: Optional[ShapeEnv] = None) -> None:
         self.sym_node_lookup = {}
+        self.known_symbols = set()
         self.shape_env = shape_env
 
     def cache_on_shape_env(self) -> bool:
@@ -247,6 +249,7 @@ def convert_sym_int(self, result: list[object], arg: SymInt) -> None:
             result.append(_InputBackref(self.sym_node_lookup[node_id]))
         else:
             self.sym_node_lookup[node_id] = len(result)
+            self.known_symbols.add(arg.node.expr)
             if self.shape_env is None:
                 self.shape_env = arg.node.shape_env
             result.append(_PySymInputStub(arg))
 
@@ -1164,7 +1164,7 @@ class DispatchCacheInfo:
 
 
 class FakeTensorMode(TorchDispatchMode):
-    cache: dict[_DispatchCacheKey, _DispatchCacheEntry] = {}
+    cache: dict[_DispatchCacheKey, Optional[_DispatchCacheEntry]] = {}
     cache_hits: int = 0
     cache_misses: int = 0
     cache_bypasses: dict[str, int] = defaultdict(int)
@@ -1418,33 +1418,65 @@ def _cached_dispatch_impl(
         Lookup a cache entry for the given arguments. If none exists, dispatch
         and cache the result (if the result is eligible for caching).
         """
-        output: object = _UNASSIGNED
         try:
             state = _CacheKeyState(self.shape_env)
             key = self._cache_key(state, func, args, kwargs)
-            if state.cache_on_shape_env():
-                assert state.shape_env is not None
-                cache = state.shape_env.fake_tensor_cache
-            else:
-                cache = FakeTensorMode.cache
-            entry = cache.get(key, None)
-            if entry is not None:
-                output = self._output_from_cache_entry(state, entry, key, func, args)
-                FakeTensorMode.cache_hits += 1
-                if self.cache_crosscheck_enabled:
-                    # For debugging / testing: Validate that the output synthesized
-                    # from the cache matches the output created by normal dispatch.
-                    with disable_fake_tensor_cache(self):
-                        self._crosscheck_cache_output(output, func, types, args, kwargs)
-            else:
-                self._validate_cache_key(func, args, kwargs)
-                output = self._dispatch_impl(func, types, args, kwargs)
-                entry = self._make_cache_entry(state, key, func, args, kwargs, output)
-                key.strip_shape_env()
-                cache[key] = entry
-                FakeTensorMode.cache_misses += 1
         except _BypassDispatchCache as e:
+            # We couldn't create the cache key at all
+            FakeTensorMode.cache_bypasses[e.reason] += 1
+            return self._dispatch_impl(func, types, args, kwargs)
+
+        if state.cache_on_shape_env():
+            assert state.shape_env is not None
+            cache = state.shape_env.fake_tensor_cache
+            set_cache_key = _set_cache_key_for_shape_env
+        else:
+            cache = FakeTensorMode.cache
+            set_cache_key = _set_cache_key
+        entry = cache.get(key, _UNASSIGNED)
+
+        if entry is None:
+            # This represents a negative cache entry - we already saw that the
+            # output is uncachable. Compute it from first principals.
+            return self._dispatch_impl(func, types, args, kwargs)
+
+        if entry is not _UNASSIGNED:
+            # We have a cache entry.
+            if TYPE_CHECKING:
+                assert isinstance(entry, _DispatchCacheEntry)
+            output = self._output_from_cache_entry(state, entry, key, func, args)
+            FakeTensorMode.cache_hits += 1
+            if self.cache_crosscheck_enabled:
+                # For debugging / testing: Validate that the output synthesized
+                # from the cache matches the output created by normal dispatch.
+                with disable_fake_tensor_cache(self):
+                    self._crosscheck_cache_output(output, func, types, args, kwargs)
+            return output
+        # We don't have a cache entry.
+        try:
+            self._validate_cache_key(func, args, kwargs)
+        except _BypassDispatchCache as e:
+            # We ran "extra" checks on the cache key and determined that it's no
+            # good. Record the reason and mark it so we don't bother validating
+            # again.
+            FakeTensorMode.cache_bypasses[e.reason] += 1
+            set_cache_key(cache, key, None)
+            return self._dispatch_impl(func, types, args, kwargs)
+
+        output = self._dispatch_impl(func, types, args, kwargs)
+        try:
+            entry = self._make_cache_entry(state, key, func, args, kwargs, output)
+        except _BypassDispatchCache as e:
+            # We had trouble making the cache entry. Record the reason and mark
+            # it - but note that at this point we DO have a valid output already
+            # so no reason to recompute it.
             FakeTensorMode.cache_bypasses[e.reason] += 1
+            set_cache_key(cache, key, None)
+            return output
+
+        set_cache_key(cache, key, entry)
+        FakeTensorMode.cache_misses += 1
 
         if output is _UNASSIGNED:
             output = self._dispatch_impl(func, types, args, kwargs)
@@ -1634,17 +1666,15 @@ def _validate_output_for_cache_entry(
         kwargs: Mapping[str, object],
         output: Optional[FakeTensor],
     ) -> None:
-        from torch.fx.experimental.symbolic_shapes import has_free_unbacked_symbols
-
+        # Is this even possible?
         if isinstance(output, (int, type(None))):
             return
 
-        if isinstance(output, torch.SymInt):
-            if has_free_unbacked_symbols(output):
-                # This is unreachable but adding the check for extra safety in
-                # case we change code path in future.
-                raise _BypassDispatchCache("unbacked symbol in output")
-            return
+        if _has_unrepresented_unbacked_symbols(state, output):
+            # Unbacked symbols are fine - but only if they're also represented
+            # in the input. If there are any new unbacked symbols then we can't
+            # cache this output.
+            raise _BypassDispatchCache("unbacked symbol in output")
 
         # Some ops return tuples of Tensors, but it's rare, so avoid
         # the complexity of caching other types.
@@ -2886,6 +2916,25 @@ def from_tensor(
 _StoragePointer = object
 
 
+def _has_unrepresented_unbacked_symbols(
+    state: _CacheKeyState, output: Optional[FakeTensor]
+) -> bool:
+    from sympy.core.traversal import iterargs
+
+    from torch.fx.experimental.symbolic_shapes import _iterate_exprs
+    from torch.utils._sympy.symbol import symbol_is_type, SymT
+
+    for s in _iterate_exprs(output):
+        for arg in iterargs(s):
+            if arg.is_Symbol and symbol_is_type(
+                arg, (SymT.UNBACKED_INT, SymT.UNBACKED_FLOAT)
+            ):
+                if arg not in state.known_symbols:
+                    return True
+
+    return False
+
+
 # NB: returns fake tensors
 def run_fallback_kernel(
     fake_mode: FakeTensorMode,
@@ -2951,6 +3000,23 @@ def map_out(e: T) -> Union[T, FakeTensor]:
     return pytree.tree_map(map_out, r)
 
 
+def _set_cache_key_for_shape_env(
+    cache: dict[_DispatchCacheKey, Optional[_DispatchCacheEntry]],
+    key: _DispatchCacheKey,
+    entry: Optional[_DispatchCacheEntry],
+) -> None:
+    key.strip_shape_env()
+    cache[key] = entry
+
+
+def _set_cache_key(
+    cache: dict[_DispatchCacheKey, Optional[_DispatchCacheEntry]],
+    key: _DispatchCacheKey,
+    entry: Optional[_DispatchCacheEntry],
+) -> None:
+    cache[key] = entry
+
+
 # Just for use to allow copying a module to fake tensors,
 # does not apply elsewhere
 class FakeCopyMode(TorchFunctionMode):
 
@@ -3307,7 +3307,7 @@ def __init__(
         # with the GC.
         self.fake_tensor_cache: dict[
             torch._subclasses.fake_tensor._DispatchCacheKey,
-            torch._subclasses.fake_tensor._DispatchCacheEntry,
+            Optional[torch._subclasses.fake_tensor._DispatchCacheEntry],
         ] = {}
 
     @contextlib.contextmanager