pytorch
diff --git a/‎aten/src/ATen/native/cuda/ForeachTensorAddScalar.cu
Lines changed: 0 additions & 3 deletions b/‎aten/src/ATen/native/cuda/ForeachTensorAddScalar.cu
Lines changed: 0 additions & 3 deletions
diff --git a/‎test/test_foreach.py
Lines changed: 23 additions & 39 deletions b/‎test/test_foreach.py
Lines changed: 23 additions & 39 deletions
@@ -2,9 +2,6 @@
 #include <ATen/native/cuda/ForeachUtils.cuh>
 #include <ATen/native/cuda/MultiTensorApply.cuh>
 
-// NOTE: CUDA on Windows requires that the enclosing function
-// of a __device__ lambda not have internal linkage.
-
 namespace at { namespace native {
 
 namespace {
 
@@ -8,9 +8,7 @@ def test_add_scalar__same_size_tensors(self, device, dtype):
         N = 20
         H = 20
         W = 20
-        tensors = []
-        for _ in range(N):
-            tensors.append(torch.zeros(H, W, device=device, dtype=dtype))
+        tensors = [torch.zeros(H, W, device=device, dtype=dtype) for n in range(N)]
 
         # bool tensor + 1 will result in int64 tensor
         if dtype == torch.bool:
@@ -26,9 +24,7 @@ def test_add_scalar_with_same_size_tensors(self, device, dtype):
         N = 20
         H = 20
         W = 20
-        tensors = []
-        for _ in range(N):
-            tensors.append(torch.zeros(H, W, device=device, dtype=dtype))
+        tensors = [torch.zeros(H, W, device=device, dtype=dtype) for n in range(N)]
 
         res = torch._foreach_add(tensors, 1)
         for t in res:
@@ -43,27 +39,23 @@ def test_add_scalar_with_different_size_tensors(self, device, dtype):
         H = 20
         W = 20
 
-        tensors = []
-        size_change = 0
-        for _ in range(N):
-            tensors.append(torch.zeros(H + size_change, W + size_change, device=device, dtype=dtype))
-            size_change += 1
-
+        tensors = [torch.zeros(H + n, W + n, device=device, dtype=dtype) for n in range(N)]
         res = torch._foreach_add(tensors, 1)
 
-        size_change = 0
-        for t in res: 
-            # bool tensor + 1 will result in int64 tensor
-            if dtype == torch.bool:
-                dtype = torch.int64
-            self.assertEqual(t, torch.ones(H + size_change, W + size_change, device=device, dtype=dtype))
-            size_change += 1
+        # bool tensor + 1 will result in int64 tensor
+        if dtype == torch.bool:
+            dtype = torch.int64
+        self.assertEqual([torch.ones(H + n, W + n, device=device, dtype=dtype) for n in range(N)], torch._foreach_add(tensors, 1))
 
     @dtypes(*torch.testing.get_all_dtypes())
-    def test_add_scalar_with_empty_list(self, device, dtype):
-        tensors = []
-        with self.assertRaises(RuntimeError):
-            torch._foreach_add(tensors, 1)
+    def test_add_scalar_with_empty_list_and_empty_tensor(self, device, dtype):
+        # TODO: enable empty list case
+        for tensors in [[torch.randn([0])]]:
+            res = torch._foreach_add(tensors, 1)
+            self.assertEqual(res, tensors)
+
+            torch._foreach_add_(tensors, 1)
+            self.assertEqual(res, tensors)
 
     @dtypes(*torch.testing.get_all_dtypes())
     def test_add_scalar_with_overlapping_tensors(self, device, dtype):
@@ -78,43 +70,35 @@ def test_add_scalar_with_overlapping_tensors(self, device, dtype):
         self.assertEqual(res, expected)
 
     def test_add_scalar_with_different_tensor_dtypes(self, device):
-        tensors = [torch.tensor([1], dtype=torch.float, device=device), 
-                   torch.tensor([1], dtype=torch.int, device=device)]
+        tensors = [torch.tensor([1.1], dtype=torch.float, device=device), 
+                   torch.tensor([1], dtype=torch.long, device=device)]
 
-        expected = [torch.tensor([2], dtype=torch.float, device=device), 
-                    torch.tensor([2], dtype=torch.int, device=device)]
+        expected = [torch.tensor([2.1], dtype=torch.float, device=device), 
+                    torch.tensor([2], dtype=torch.long, device=device)]
 
         res = torch._foreach_add(tensors, 1)
         self.assertEqual(res, expected)
 
     def test_add_scalar_with_different_scalar_type(self, device):
         # int tensor with float scalar
-        # should go 'slow' route
         scalar = 1.1
         tensors = [torch.tensor([1], dtype=torch.int, device=device)]
-        res = torch._foreach_add(tensors, scalar)
-        self.assertEqual(res, [torch.tensor([2.1], device=device)])
+        self.assertEqual([x + scalar for x in tensors], torch._foreach_add(tensors, scalar))
 
         # float tensor with int scalar
-        # should go 'fast' route
         scalar = 1
         tensors = [torch.tensor([1.1], device=device)]
-        res = torch._foreach_add(tensors, scalar)
-        self.assertEqual(res, [torch.tensor([2.1], device=device)])
+        self.assertEqual([x + scalar for x in tensors], torch._foreach_add(tensors, scalar))
 
         # bool tensor with int scalar
-        # should go 'slow' route
         scalar = 1
         tensors = [torch.tensor([False], device=device)]
-        res = torch._foreach_add(tensors, scalar)
-        self.assertEqual(res, [torch.tensor([1], device=device)])
+        self.assertEqual([x + scalar for x in tensors], torch._foreach_add(tensors, scalar))
 
         # bool tensor with float scalar
-        # should go 'slow' route
         scalar = 1.1
         tensors = [torch.tensor([False], device=device)]
-        res = torch._foreach_add(tensors, scalar)
-        self.assertEqual(res, [torch.tensor([1.1], device=device)])
+        self.assertEqual([x + scalar for x in tensors], torch._foreach_add(tensors, scalar))
 
 instantiate_device_type_tests(TestForeach, globals())