pytorch
diff --git a/‎aten/src/ATen/native/mps/operations/TensorCompare.mm
Lines changed: 28 additions & 27 deletions b/‎aten/src/ATen/native/mps/operations/TensorCompare.mm
Lines changed: 28 additions & 27 deletions
diff --git a/‎test/test_mps.py
Lines changed: 7 additions & 2 deletions b/‎test/test_mps.py
Lines changed: 7 additions & 2 deletions
diff --git a/‎torch/testing/_internal/common_methods_invocations.py
Lines changed: 9 additions & 0 deletions b/‎torch/testing/_internal/common_methods_invocations.py
Lines changed: 9 additions & 0 deletions
@@ -29,45 +29,42 @@
 
 static void clamp_mps_graph(CachedGraph* cachedGraph,
                             const Tensor& input_tensor,
-                            const Tensor& min_tensor,
-                            const Tensor& max_tensor) {
-  auto input_dtype = input_tensor.scalar_type();
-  auto min_dtype = cachedGraph->minTensor ? min_tensor.scalar_type() : input_dtype;
-  auto max_dtype = cachedGraph->maxTensor ? max_tensor.scalar_type() : input_dtype;
-
+                            const at::ScalarType min_type,
+                            const at::ScalarType max_type,
+                            const at::ScalarType result_type) {
   MPSGraph* mpsGraph = cachedGraph->graph();
 
   cachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, input_tensor);
 
   auto minTensor = cachedGraph->minTensor;
   auto maxTensor = cachedGraph->maxTensor;
+  auto inputTensor = cachedGraph->inputTensor;
 
-  if (input_dtype != min_dtype) {
-    minTensor = castMPSTensor(mpsGraph, cachedGraph->minTensor, input_dtype);
+  if (minTensor && min_type != result_type) {
+    minTensor = castMPSTensor(mpsGraph, minTensor, result_type);
+  }
+  if (maxTensor && max_type != result_type) {
   }
-  if (input_dtype != max_dtype) {
-    maxTensor = castMPSTensor(mpsGraph, cachedGraph->maxTensor, input_dtype);
+  if (input_tensor.scalar_type() != result_type) {
+    inputTensor = castMPSTensor(mpsGraph, inputTensor, result_type);
   }
-  if (c10::isIntegralType(input_dtype, /*includeBool=*/true)) {
+  if (c10::isIntegralType(result_type, /*includeBool=*/true)) {
     if (minTensor && maxTensor) {
-      cachedGraph->outputTensor = [mpsGraph clampWithTensor:cachedGraph->inputTensor
+      cachedGraph->outputTensor = [mpsGraph clampWithTensor:inputTensor
                                              minValueTensor:minTensor
                                              maxValueTensor:maxTensor
                                                        name:nil];
     } else if (maxTensor) {
-      cachedGraph->outputTensor = [mpsGraph minimumWithPrimaryTensor:cachedGraph->inputTensor
-                                                     secondaryTensor:maxTensor
-                                                                name:nil];
+      cachedGraph->outputTensor = [mpsGraph minimumWithPrimaryTensor:inputTensor secondaryTensor:maxTensor name:nil];
     } else if (minTensor) {
-      cachedGraph->outputTensor = [mpsGraph maximumWithPrimaryTensor:cachedGraph->inputTensor
-                                                     secondaryTensor:minTensor
-                                                                name:nil];
+      cachedGraph->outputTensor = [mpsGraph maximumWithPrimaryTensor:inputTensor secondaryTensor:minTensor name:nil];
     }
     return;
   }
   // clampWithTensor doesn't propagate NaN through so simulate it as composition of
   // maximumWithNaNPropagationWithPrimaryTensor and minimumWithNaNPropagationWithPrimaryTensor
-  auto outputTensor = cachedGraph->inputTensor;
+  auto outputTensor = inputTensor;
   if (minTensor) {
     outputTensor = [mpsGraph maximumWithNaNPropagationWithPrimaryTensor:outputTensor
                                                         secondaryTensor:minTensor
@@ -134,6 +131,8 @@ static void clamp_tensor_out_mps(const Tensor& input_t,
   if (output_t.numel() == 0)
     return;
 
+  auto result_type = output_t.scalar_type();
+
   IntArrayRef new_min_shape;
   IntArrayRef new_max_shape;
 
@@ -182,7 +181,7 @@ static void clamp_tensor_out_mps(const Tensor& input_t,
         ;
       }
 
-      clamp_mps_graph(newCachedGraph, input_t, min_opt_tensor, max_opt_tensor);
+      clamp_mps_graph(newCachedGraph, input_t, min_opt_tensor.scalar_type(), max_opt_tensor.scalar_type(), result_type);
     });
 
     bool gatherTensorData = true;
@@ -238,21 +237,23 @@ static void clamp_scalar_out_mps(const Tensor& input_t,
   if (output_t.numel() == 0)
     return;
 
+  auto result_type = output_t.scalar_type();
+
   @autoreleasepool {
     // the optional min/max refs could affect how we build the cached graph
     string key = op_name + (has_min ? ("_min:" + to_string(min_scalar)) : "") +
         (has_max ? ("_max:" + to_string(max_scalar)) : "") + "_scalar:" + getTensorsStringKey({input_t});
     auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
       if (has_min)
-        newCachedGraph->minTensor = [mpsGraph
-            constantWithScalar:min_scalar
-                         shape:(mps::getMPSShape(input_t))dataType:(mps::getMPSScalarType(input_t.scalar_type()))];
+        newCachedGraph->minTensor = [mpsGraph constantWithScalar:min_scalar
+                                                           shape:mps::getMPSShape(input_t)
+                                                        dataType:mps::getMPSScalarType(result_type)];
       if (has_max)
-        newCachedGraph->maxTensor = [mpsGraph
-            constantWithScalar:max_scalar
-                         shape:(mps::getMPSShape(input_t))dataType:(mps::getMPSScalarType(input_t.scalar_type()))];
+        newCachedGraph->maxTensor = [mpsGraph constantWithScalar:max_scalar
+                                                           shape:mps::getMPSShape(input_t)
+                                                        dataType:mps::getMPSScalarType(result_type)];
 
-      clamp_mps_graph(newCachedGraph, input_t, input_t, input_t);
+      clamp_mps_graph(newCachedGraph, input_t, result_type, result_type, result_type);
     });
 
     bool gatherTensorData = true;
 
@@ -12042,8 +12042,13 @@ def test_numpy_ref_mps(self, device, dtype, op):
         # does not support float64 Tensors.
         # A few ops are currently broken on their reference inputs, but not their sample inputs. These should
         # get patched up and this workaround removed.
-        broken_on_ref_inputs = op.name in ['clamp', 'where']
-        inputs = op.reference_inputs(device, dtype) if not broken_on_ref_inputs else op.sample_inputs(device, dtype)
+        broken_on_ref_inputs = op.name in ('where',)
+
+        # TODO: Enable per-sample seed setting and tweak tolerances / fix xfails
+        inputs = (
+            op.reference_inputs(device, dtype, set_seed=False) if not broken_on_ref_inputs
+            else op.sample_inputs(device, dtype, set_seed=False)
+        )
         for sample_input in inputs:
             self.compare_with_reference(op, op.ref, sample_input)
 
 
@@ -6223,13 +6223,17 @@ def error_inputs_flipud(op, device, **kwargs):
 
 def sample_inputs_clamp(op_info, device, dtype, requires_grad, **kwargs):
     make_arg = partial(make_tensor, dtype=dtype, device=device, low=None, high=None, requires_grad=requires_grad)
+    make_integral_arg = partial(make_tensor, dtype=torch.int32, device=device, low=None, high=None, requires_grad=False)
     shape = (S, M, S)
 
     yield SampleInput(make_arg(shape), args=(make_arg(shape), make_arg(shape)))
     yield SampleInput(make_arg(shape), args=(make_arg(shape[1:]), make_arg(shape[1:])))
     yield SampleInput(make_arg(shape), args=(make_arg((S, 1, S)),))
     yield SampleInput(make_arg(shape), args=(None, make_arg(shape)))
     yield SampleInput(make_arg(shape), args=(make_arg(shape), None))
+    # test type promotion
+    yield SampleInput(make_arg(shape), args=(make_integral_arg(shape), None))
+    yield SampleInput(make_arg(shape), args=(make_arg(shape), make_integral_arg(shape)))
 
 def reference_inputs_elementwise_ternary(op, device, dtype, requires_grad, *, sample_inputs_func, supports_scalars=False, **kwargs):
     yield from sample_inputs_func(op, device, dtype, requires_grad, **kwargs)
@@ -12666,6 +12670,11 @@ def reference_flatten(input, start_dim=0, end_dim=-1):
                             'TestNNCOpInfo',
                             'test_nnc_correctness',
                             dtypes=(torch.bool,)),
+               # MPS does not support float64, while numpy does internal computations in float64.
+               # See https://github.com/pytorch/pytorch/blob/3c1cf03fde145bdbe1f5ffb81765d076c10b4c04/test/test_ops.py#L260-L264
+               DecorateInfo(unittest.expectedFailure,
+                            'TestCommon',
+                            'test_numpy_ref_mps'),
            )),
     UnaryUfuncInfo('positive',
                    ref=np.positive,