pytorch
diff --git a/‎test/test_ops.py
Lines changed: 32 additions & 0 deletions b/‎test/test_ops.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎torch/csrc/autograd/VariableTypeUtils.h
Lines changed: 41 additions & 12 deletions b/‎torch/csrc/autograd/VariableTypeUtils.h
Lines changed: 41 additions & 12 deletions
diff --git a/‎torch/csrc/autograd/autograd_not_implemented_fallback.cpp
Lines changed: 16 additions & 1 deletion b/‎torch/csrc/autograd/autograd_not_implemented_fallback.cpp
Lines changed: 16 additions & 1 deletion
diff --git a/‎torch/testing/_internal/common_methods_invocations.py
Lines changed: 6 additions & 1 deletion b/‎torch/testing/_internal/common_methods_invocations.py
Lines changed: 6 additions & 1 deletion
@@ -999,6 +999,38 @@ def _case_four_transform(t):
                     with self.assertRaises(RuntimeError, msg=msg_fail):
                         op_out(out=out)
 
+    @ops(
+        [op for op in op_db if op.supports_out and (op.supports_autograd or op.is_factory_function)],
+        dtypes=OpDTypes.supported,
+        allowed_dtypes=[torch.float, torch.cfloat]
+    )
+    def test_out_requires_grad_error(self, device, dtype, op):
+        sample = first_sample(self, op.sample_inputs(device, dtype))
+
+        # Call op to get prototype for out arguments
+        expect = op(sample.input, *sample.args, **sample.kwargs)
+        any_requires_grad = False
+
+        def set_requires_grad(x):
+            nonlocal any_requires_grad
+            if isinstance(x, torch.Tensor) and (
+                x.is_floating_point() or x.is_complex()
+            ):
+                any_requires_grad = True
+                x.requires_grad_(True)
+            return x
+
+        out = pytree.tree_map_(set_requires_grad, expect)
+        if not any_requires_grad:
+            # Skip ops without any floating point outputs, e.g. isnan
+            return
+
+        msg = (
+            "functions with out=... arguments don't support automatic "
+            "differentiation, but one of the arguments requires grad."
+        )
+        with self.assertRaises(RuntimeError, msg=msg):
+            op(sample.input, *sample.args, **sample.kwargs, out=out)
 
     @ops(filter(reduction_dtype_filter, ops_and_refs), dtypes=(torch.int16,))
     def test_out_integral_dtype(self, device, dtype, op):
 
@@ -31,31 +31,60 @@
 
 namespace torch {
 namespace autograd {
+enum class can_mutate_inplace_result {
+  success,
+  non_default_backward_view,
+  view_of_leaf,
+  is_leaf,
+};
 
 // The requires_grad argument is used to know if the inplace operation needs
 // gradient to be setup for it.
 // In particular, we can have tensor.requires_grad() != requires_grad when
 // writing a Tensor that requires gradients inplace into a Tensor that does not
 // require gradients: a = torch.rand(2) b = torch.rand(2, requires_grad=True)
 // a.copy_(b)
+inline can_mutate_inplace_result can_mutate_inplace(
+    const at::Tensor& tensor,
+    bool requires_grad) {
+  if (!requires_grad || !GradMode::is_enabled()) {
+    return can_mutate_inplace_result::success;
+  }
+  auto diff_view_meta = impl::get_view_autograd_meta(tensor);
+  if (diff_view_meta && diff_view_meta->has_bw_view()) {
+    if (diff_view_meta->get_creation_meta() != CreationMeta::DEFAULT) {
+      return can_mutate_inplace_result::non_default_backward_view;
+    }
+    if (tensor.requires_grad() && tensor._base().is_leaf()) {
+      return can_mutate_inplace_result::view_of_leaf;
+    }
+  }
+  if (tensor.requires_grad() && tensor.is_leaf()) {
+    return can_mutate_inplace_result::is_leaf;
+  }
+  return can_mutate_inplace_result::success;
+}
+
 inline void check_inplace(const at::Tensor& tensor, bool requires_grad) {
-  if (requires_grad && GradMode::is_enabled()) {
-    auto diff_view_meta = impl::get_view_autograd_meta(tensor);
-    if (diff_view_meta && diff_view_meta->has_bw_view()) {
-      // This can throw or warn
-      handle_view_on_rebase(diff_view_meta);
-      if (tensor.requires_grad() && tensor._base().is_leaf()) {
-        TORCH_CHECK(
-            false,
-            "a view of a leaf Variable that requires grad is being used in an in-place o
8000
peration.");
-      }
+  switch (can_mutate_inplace(tensor, requires_grad)) {
+    case can_mutate_inplace_result::success:
+      return;
+    case can_mutate_inplace_result::non_default_backward_view: {
+      return handle_view_on_rebase(impl::get_view_autograd_meta(tensor));
     }
-    if (tensor.requires_grad() && tensor.is_leaf()) {
+    case can_mutate_inplace_result::view_of_leaf:
+      TORCH_CHECK(
+          false,
+          "a view of a leaf Variable that requires grad is being used in an in-place operation.");
+      break;
+
+    case can_mutate_inplace_result::is_leaf:
       TORCH_CHECK(
           false,
           "a leaf Variable that requires grad is being used in an in-place operation.");
-    }
+      break;
   }
+  TORCH_INTERNAL_ASSERT(false);
 }
 
 inline void check_inplace(at::ITensorListRef tensors, bool requires_grad) {
 
@@ -299,11 +299,26 @@ static void autogradNotImplementedFallbackImpl(
       num_arguments);
 
   const bool any_requires_grad = !tensors_requiring_grad_on_stack.empty();
+  const bool has_out_arg = std::any_of(
+      schema.arguments().begin(),
+      schema.arguments().end(),
+      [](const c10::Argument& arg) { return arg.is_out(); });
 
   _foreach_tensor(
       [&](size_t _, size_t i, const at::Tensor& t) {
         if (schema.is_mutable({c10::SchemaArgType::input, i})) {
-          check_inplace(t, any_requires_grad);
+          if (has_out_arg) {
+            // Normally out argument overloads would not support any arguments
+            // that require grad. However, we loosen this check to maintain
+            // backward compatibility.
+            // See https://github.com/pytorch/pytorch/issues/120988
+            if (can_mutate_inplace(t, any_requires_grad) !=
+                can_mutate_inplace_result::success) {
+              throw_error_out_requires_grad(schema.name().c_str());
+            }
+          } else {
+            check_inplace(t, any_requires_grad);
+          }
         }
       },
       stack,
 
@@ -14005,6 +14005,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1):
            operator_variant=None,
            inplace_operator_variant=None,
            check_batched_gradgrad=False,
+           supports_out=False,
            supports_forward_ad=True,
            supports_fwgrad_bwgrad=True,
            check_batched_forward_grad=False,
@@ -15567,7 +15568,11 @@ def reference_flatten(input, start_dim=0, end_dim=-1):
            sample_inputs_func=sample_inputs_split_with_sizes,
            supports_out=True,
            supports_forward_ad=True,
-           supports_fwgrad_bwgrad=True),
+           supports_fwgrad_bwgrad=True,
+           skips=(
+               # No error raised
+               DecorateInfo(unittest.expectedFailure, "TestCommon", "test_out_requires_grad_error"),
+           )),
     BinaryUfuncInfo('__radd__',
                     op=torch.Tensor.__radd__,
                     dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),