pytorch · zeshengzong · Nov 20, 2024 · Nov 22, 2024 · Dec 13, 2024 · Dec 19, 2024
diff --git a/aten/src/ATen/native/Lerp.cpp b/aten/src/ATen/native/Lerp.cpp
@@ -16,10 +16,16 @@ TORCH_META_FUNC(lerp_Tensor)(
     const Tensor& self, const Tensor& end, const Tensor& weight) {
   TORCH_CHECK(self.dtype() == end.dtype(), "expected dtype ", self.dtype(),
               " for `end` but got dtype ", end.dtype());
-  TORCH_CHECK(self.dtype() == weight.dtype(), "expected dtype ", self.dtype(),
-              " for `weight` but got dtype ", weight.dtype());
+  bool promote_weight = weight.dim() == 0;
+  if (!promote_weight) {
+    TORCH_CHECK(self.dtype() == weight.dtype(), "expected dtype ", self.dtype(),
+                " for `weight` but got dtype ", weight.dtype());
+  }
   build(at::TensorIteratorConfig()
         .allow_cpu_scalars(true)
+        .promote_inputs_to_common_dtype(promote_weight)
+        .enforce_safe_casting_to_output(promote_weight)
+        .cast_common_dtype_to_outputs(promote_weight)
 if (config.enforce_safe_casting_to_output_ && op.is_output && op.current_dtype != common_dtype_) { 
   TORCH_CHECK(canCast(common_dtype_, op.current_dtype), 
               "result type ", common_dtype_, " can't be cast to the " 
               "desired output type ", op.current_dtype); 
 } 
 if (common_device == kCPU) { 
   // Casts to outputs by creating temporaries of the correct dtype (if needed) 
   // NB: we skip this on is_meta_, because the temporary allocation here is 
   // unnecessary if we aren't going to actually do the compute 
   if (config.cast_common_dtype_to_outputs_ && op.is_output && op.current_dtype != common_dtype_ && !is_meta_) { 
     TORCH_INTERNAL_ASSERT(op.tensor_base().defined()); 
     // Marker [Output original_tensor is set] 
     // NB: do NOT use set_output here, as the temporary is NOT a true output; 
     // op.tensor is the true output and it was pre-provided for us. 
     // TODO: The logic for cast_outputs will need to be handled by the 
     // structured kernels implementation.  What probably should happen 
     // is that we pass in the inferred dtype into the out kernel, and 
     // then after calling the out kernel, do the conversion (which 
     // is cast_outputs here), but integrating this with existing 
     // TensorIterator will take a little doing 
     op.exchange_tensor(c10::MaybeOwned<TensorBase>::owned( 
         at::empty_like(op.tensor(), 
                        op.tensor_base().options().dtype(common_dtype_), 
                        LEGACY_CONTIGUOUS_MEMORY_FORMAT))); 
     if (!names_.empty()) { 
       namedinference::propagate_names(op.tensor_base(), names_); 
     } 
     op.current_dtype = common_dtype_; 
     op.target_dtype = common_dtype_; 
   } 
 #define BINARY_OP_CONFIG()                              \ 
   TensorIteratorConfig()                                \ 
     .set_check_mem_overlap(true)                        \ 
     .allow_cpu_scalars(true)                            \ 
     .promote_inputs_to_common_dtype(true)               \ 
     .cast_common_dtype_to_outputs(true)                 \ 
     .enforce_safe_casting_to_output(true)               \ 
 void TensorIteratorBase::build_binary_op(const TensorBase& out, const TensorBase& a, const TensorBase& b) { 
   build(BINARY_OP_CONFIG() 
       .add_owned_output(out) 
       .add_owned_const_input(a) 
       .add_owned_const_input(b)); 
 } 
 void TensorIteratorBase::build_borrowing_binary_op( 
     const TensorBase& out, const TensorBase& a, const TensorBase& b) { 
   build(BINARY_OP_CONFIG() 
       .add_output(out) 
       .add_const_input(a) 
       .add_const_input(b)); 
 } 
 if (config.enforce_safe_casting_to_output_ && op.is_output && op.current_dtype != common_dtype_) { 
   TORCH_CHECK(canCast(common_dtype_, op.current_dtype), 
               "result type ", common_dtype_, " can't be cast to the " 
               "desired output type ", op.current_dtype); 
 } 
 if (common_device == kCPU) { 
   // Casts to outputs by creating temporaries of the correct dtype (if needed) 
   // NB: we skip this on is_meta_, because the temporary allocation here is 
   // unnecessary if we aren't going to actually do the compute 
   if (config.cast_common_dtype_to_outputs_ && op.is_output && op.current_dtype != common_dtype_ && !is_meta_) { 
     TORCH_INTERNAL_ASSERT(op.tensor_base().defined()); 
     // Marker [Output original_tensor is set] 
     // NB: do NOT use set_output here, as the temporary is NOT a true output; 
     // op.tensor is the true output and it was pre-provided for us. 
     // TODO: The logic for cast_outputs will need to be handled by the 
     // structured kernels implementation.  What probably should happen 
     // is that we pass in the inferred dtype into the out kernel, and 
     // then after calling the out kernel, do the conversion (which 
     // is cast_outputs here), but integrating this with existing 
     // TensorIterator will take a little doing 
     op.exchange_tensor(c10::MaybeOwned<TensorBase>::owned( 
         at::empty_like(op.tensor(), 
                        op.tensor_base().options().dtype(common_dtype_), 
                        LEGACY_CONTIGUOUS_MEMORY_FORMAT))); 
     if (!names_.empty()) { 
       namedinference::propagate_names(op.tensor_base(), names_); 
     } 
     op.current_dtype = common_dtype_; 
     op.target_dtype = common_dtype_; 
   } 
 #define BINARY_OP_CONFIG()                              \ 
   TensorIteratorConfig()                                \ 
     .set_check_mem_overlap(true)                        \ 
     .allow_cpu_scalars(true)                            \ 
     .promote_inputs_to_common_dtype(true)               \ 
     .cast_common_dtype_to_outputs(true)                 \ 
     .enforce_safe_casting_to_output(true)               \ 
  
 void TensorIteratorBase::build_binary_op(const TensorBase& out, const TensorBase& a, const TensorBase& b) { 
   build(BINARY_OP_CONFIG() 
       .add_owned_output(out) 
       .add_owned_const_input(a) 
       .add_owned_const_input(b)); 
 } 
  
 void TensorIteratorBase::build_borrowing_binary_op( 
     const TensorBase& out, const TensorBase& a, const TensorBase& b) { 
   build(BINARY_OP_CONFIG() 
       .add_output(out) 
       .add_const_input(a) 
       .add_const_input(b)); 
 } 
         .add_output(maybe_get_output())
         .add_const_input(self)
         .add_const_input(end)

@@ -3519,6 +3519,24 @@ def test_lerp_lowp_cpu(self, device, dtype):
                 expected = torch.lerp(xref, yref, wref).to(dtype)
                 self.assertEqual(actual, expected, atol=0.0, rtol=0.0)
 
+    @dtypes(torch.float, torch.double, torch.cfloat, torch.cdouble)
+    def test_lerp_weight_scalar_tensor_promotion(self, device, dtype):
+        start = make_tensor((5, 5), dtype=dtype, device=device, low=1, high=100)
+        end = make_tensor((5, 5), dtype=dtype, device=device, low=1, high=100)
+        weight = torch.rand((), dtype=torch.float, device=device)
+
+        actual = torch.lerp(start, end, weight)
+        expected = start + weight.to(dtype) * (end - start)
+        self.assertEqual(expected, actual)
+
+    @dtypes(torch.double, torch.cfloat, torch.cdouble)
+    def test_lerp_weight_tensor_promotion_error(self, device, dtype):
+        start = make_tensor((5, 5), dtype=dtype, device=device, low=1, high=100)
+        end = make_tensor((5, 5), dtype=dtype, device=device, low=1, high=100)
+        weight = torch.rand((5, 5), dtype=torch.float, device=device)
+        with self.assertRaisesRegex(RuntimeError, "expected dtype"):
+            torch.lerp(start, end, weight)
+
     def _test_logaddexp(self, device, dtype, base2):
         if base2:
             ref_func = np.logaddexp2

diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py
@@ -6971,10 +6971,11 @@ def lerp(start, end, weight):
     )
     args = [start, end]
     if isinstance(weight, TensorLike):
-        torch._check(
-            start.dtype == weight.dtype,
-            lambda: f"expected dtype {start.dtype} for `weight`, but got dtype {weight.dtype}",
-        )
+        if weight.ndim != 0:
+            torch._check(
+                start.dtype == weight.dtype,
+                lambda: f"expected dtype {start.dtype} for `weight`, but got dtype {weight.dtype}",
+            )
         args.append(weight)
     return elementwise_meta(
         *args, type_promotion=ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT