@@ -240,14 +240,18 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
240
240
return ir_builder_->CreateFDiv (lhs_value, rhs_value);
241
241
case HloOpcode::kRemainder :
242
242
return ir_builder_->CreateFRem (lhs_value, rhs_value);
243
-
244
- // The 'O' prefix on the LLVM ops means "ordered" compare where comparisons
245
- // with NAN always return false.
243
+ // LLVM comparisons can be "unordered" (U) or "ordered" (O) -- ordered
244
+ // comparisons always return false when one of the operands is NaN, whereas
245
+ // unordered comparisons return true.
246
+ //
247
+ // We use ordered comparisons for everything except kNe, where we use an
248
+ // unordered comparison. This makes x != y equivalent to !(x == y), and
249
+ // matches C++'s semantics.
246
250
case HloOpcode::kEq :
247
251
return llvm_ir::EmitComparison (llvm::CmpInst::FCMP_OEQ, lhs_value,
248
252
rhs_value, ir_builder_);
249
253
case HloOpcode::kNe :
250
- return llvm_ir::EmitComparison (llvm::CmpInst::FCMP_ONE , lhs_value,
254
+ return llvm_ir::EmitComparison (llvm::CmpInst::FCMP_UNE , lhs_value,
251
255
rhs_value, ir_builder_);
252
256
case HloOpcode::kLt :
253
257
return llvm_ir::EmitComparison (llvm::CmpInst::FCMP_OLT, lhs_value,
@@ -739,11 +743,11 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
739
743
const HloInstruction* operand = hlo->operand (operand_idx);
740
744
auto true_block = llvm_ir::CreateBasicBlock (
741
745
exit_block, tensorflow::strings::StrCat (
742
- " concat_index_from_operand" , operand_idx),
746
+ " concat_index_from_operand" , operand_idx),
743
747
ir_builder_);
744
748
auto false_block = llvm_ir::CreateBasicBlock (
745
749
exit_block, tensorflow::strings::StrCat (
746
- " concat_index_not_from_operand" , operand_idx),
750
+ " concat_index_not_from_operand" , operand_idx),
747
751
ir_builder_);
748
752
auto concat_dim_size =
749
753
llvm::ConstantInt::get (source_index[concat_dim]->getType (),
0 commit comments