[XLA] Emit FCMP_UNE (true if unordered) instead of FCMP_ONE (false if unordered)

tensorflower-gardener · tensorflower-gardener · commit 183e2c953a81 · 2017-05-11T22:36:36.000-07:00
for not_equal operation to be more compliant with TF and other languages.

PiperOrigin-RevId: 155835642
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -240,14 +240,18 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
       return ir_builder_->CreateFDiv(lhs_value, rhs_value);
     case HloOpcode::kRemainder:
       return ir_builder_->CreateFRem(lhs_value, rhs_value);
-
-    // The 'O' prefix on the LLVM ops means "ordered" compare where comparisons
-    // with NAN always return false.
+    // LLVM comparisons can be "unordered" (U) or "ordered" (O) -- ordered
+    // comparisons always return false when one of the operands is NaN, whereas
+    // unordered comparisons return true.
+    //
+    // We use ordered comparisons for everything except kNe, where we use an
+    // unordered comparison.  This makes x != y equivalent to !(x == y), and
+    // matches C++'s semantics.
     case HloOpcode::kEq:
       return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, lhs_value,
                                      rhs_value, ir_builder_);
     case HloOpcode::kNe:
-      return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_ONE, lhs_value,
+      return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, lhs_value,
                                      rhs_value, ir_builder_);
     case HloOpcode::kLt:
       return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OLT, lhs_value,
@@ -739,11 +743,11 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
           const HloInstruction* operand = hlo->operand(operand_idx);
           auto true_block = llvm_ir::CreateBasicBlock(
               exit_block, tensorflow::strings::StrCat(
-                              "concat_index_from_operand", operand_idx),
+                      "concat_index_from_operand", operand_idx),
               ir_builder_);
           auto false_block = llvm_ir::CreateBasicBlock(
               exit_block, tensorflow::strings::StrCat(
-                              "concat_index_not_from_operand", operand_idx),
+                      "concat_index_not_from_operand", operand_idx),
               ir_builder_);
           auto concat_dim_size =
               llvm::ConstantInt::get(source_index[concat_dim]->getType(),
diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
@@ -486,6 +486,18 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementS32s) {
   ComputeAndCompareR1<bool>(&builder, {}, {});
 }
 
+TEST_F(ArrayElementwiseOpTest, CompareNeF32s) {
+  // Disable fast-math because we're operating on NaNs.
+  SetFastMathDisabled(true);
+
+  ComputationBuilder builder(client_, TestName());
+  auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f});
+  auto rhs = builder.ConstantR1<float>({10.0f, 25.5f, 1.0f, 10.0f, NAN});
+  auto compare = builder.Ne(lhs, rhs);
+
+  ComputeAndCompareR1<bool>(&builder, {true, false, true, true, true}, {});
+}
+
 TEST_F(ArrayElementwiseOpTest, CompareNeS32s) {
   const int32 min = std::numeric_limits<int32>::min();
   const int32 max = std::numeric_limits<int32>::max();