8000 Merge commit for internal changes · staticfloat/tensorflow@613a015 · GitHub
[go: up one dir, main page]

Skip to content

Commit 613a015

Browse files
committed
Merge commit for internal changes
2 parents a33022c + 183e2c9 commit 613a015

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+2117
-916
lines changed

WORKSPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ http_archive(
55
sha256 = "4be8a887f6f38f F438 883236e77bb25c2da10d506f2bf1a8e5d785c0f35574c74ca4",
66
strip_prefix = "rules_closure-aac19edc557aec9b603cd7ffe359401264ceff0d",
77
urls = [
8-
"http://bazel-mirror.storage.googleapis.com/github.com/bazelbuild/rules_closure/archive/aac19edc557aec9b603cd7ffe359401264ceff0d.tar.gz", # 2017-05-10
8+
"http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/aac19edc557aec9b603cd7ffe359401264ceff0d.tar.gz", # 2017-05-10
99
"https://github.com/bazelbuild/rules_closure/archive/aac19edc557aec9b603cd7ffe359401264ceff0d.tar.gz",
1010
],
1111
)

tensorflow/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,8 @@ filegroup(
351351
"//tensorflow/tensorboard/components/tf_globals_d3v4:all_files",
352352
"//tensorflow/tensorboard/components/tf_graph:all_files",
353353
"//tensorflow/tensorboard/components/tf_graph/demo:all_files",
354+
"//tensorflow/tensorboard/components/tf_graph_app:all_files",
355+
"//tensorflow/tensorboard/components/tf_graph_app/demo:all_files",
354356
"//tensorflow/tensorboard/components/tf_graph_board:all_files",
355357
"//tensorflow/tensorboard/components/tf_graph_board/demo:all_files",
356358
"//tensorflow/tensorboard/components/tf_graph_common:all_files",

tensorflow/compiler/xla/client/local_client.cc

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,6 @@ StatusOr<std::unique_ptr<GlobalData>> LocalClient::AllocateBufferOnDevice(
253253
return std::unique_ptr<GlobalData>(new GlobalData(local_service_, handle));
254254
}
255255

256-
tensorflow::Status LocalClient::ResolveArguments(
257-
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
258-
int device_ordinal,
259-
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs) {
260-
return local_service_->ResolveArguments(arguments, device_ordinal,
261-
argument_ptrs);
262-
}
263-
264256
se::Platform* LocalClient::platform() const {
265257
return local_service_->backend().platform();
266258
}

tensorflow/compiler/xla/client/local_client.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,6 @@ class LocalClient : public Client {
158158
LocalClient(const LocalClient&) = delete;
159159
void operator=(const LocalClient&) = delete;
160160

161-
// For an array of arguments held on the local service, validate
162-
// that each is placed on the specified device_ordinal, and return
163-
// the DeviceMemoryBase corresponding to each argument.
164-
tensorflow::Status ResolveArguments(
165-
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
166-
int device_ordinal,
167-
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs);
168-
169161
// Return a handle to a buffer large enough to hold shape, allocated
170162
// on device_ordinal on the local service. If
171163
// allocate_space_for_deep_copy, the buffer is large enough to hold

tensorflow/compiler/xla/service/elemental_ir_emitter.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,14 +240,18 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
240240
return ir_builder_->CreateFDiv(lhs_value, rhs_value);
241241
case HloOpcode::kRemainder:
242242
return ir_builder_->CreateFRem(lhs_value, rhs_value);
243-
244-
// The 'O' prefix on the LLVM ops means "ordered" compare where comparisons
245-
// with NAN always return false.
243+
// LLVM comparisons can be "unordered" (U) or "ordered" (O) -- ordered
244+
// comparisons always return false when one of the operands is NaN, whereas
245+
// unordered comparisons return true.
246+
//
247+
// We use ordered comparisons for everything except kNe, where we use an
248+
// unordered comparison. This makes x != y equivalent to !(x == y), and
249+
// matches C++'s semantics.
246250
case HloOpcode::kEq:
247251
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, lhs_value,
248252
rhs_value, ir_builder_);
249253
case HloOpcode::kNe:
250-
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_ONE, lhs_value,
254+
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, lhs_value,
251255
rhs_value, ir_builder_);
252256
case HloOpcode::kLt:
253257
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OLT, lhs_value,
@@ -739,11 +743,11 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
739743
const HloInstruction* operand = hlo->operand(operand_idx);
740744
auto true_block = llvm_ir::CreateBasicBlock(
741745
exit_block, tensorflow::strings::StrCat(
742-
"concat_index_from_operand", operand_idx),
746+
"concat_index_from_operand", operand_idx),
743747
ir_builder_);
744748
auto false_block = llvm_ir::CreateBasicBlock(
745749
exit_block, tensorflow::strings::StrCat(
746-
"concat_index_not_from_operand", operand_idx),
750+
"concat_index_not_from_operand", operand_idx),
747751
ir_builder_);
748752
auto concat_dim_size =
749753
llvm::ConstantInt::get(source_index[concat_dim]->getType(),

tensorflow/compiler/xla/service/local_service.cc

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -77,21 +77,6 @@ LocalService::LocalService(std::unique_ptr<Backend> execute_backend,
7777
runs_in_client_process_ = true;
7878
}
7979

80-
tensorflow::Status LocalService::ResolveArguments(
81-
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
82-
int device_ordinal,
83-
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs) {
84-
TF_ASSIGN_OR_RETURN(std::vector<const Allocation*> arg_allocations,
85-
ResolveAndValidateArguments(
86-
arguments, execute_backend_.get(), device_ordinal));
87-
argument_ptrs->resize(arg_allocations.size());
88-
for (int i = 0; i < arguments.size(); ++i) {
89-
const Allocation& allocation = *arg_allocations[i];
90-
(*argument_ptrs)[i] = allocation.device_memory();
91-
}
92-
return tensorflow::Status::OK();
93-
}
94-
9580
namespace {
9681
// Returns the space required to allocate a shape. If
9782
// allocate_space_for_deep_copy the space includes all sub-buffers of

tensorflow/compiler/xla/service/local_service.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,6 @@ class LocalService : public Service {
4343
static StatusOr<std::unique_ptr<LocalService>> NewService(
4444
const ServiceOptions& options);
4545

46-
// For an array of arguments, validate that each is placed on the
47-
// specified device_ordinal, and return the DeviceMemoryBase
48-
// corresponding to each argument.
49-
tensorflow::Status ResolveArguments(
50-
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
51-
int device_ordinal,
52-
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs);
53-
5446
// Return a handle to a buffer large enough to hold shape, allocated
5547
// on device_ordinal. If allocate_space_for_deep_copy, the buffer is
5648
// large enough to hold all sub-buffers of a tuple shape, otherwise

tensorflow/compiler/xla/service_interface.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ limitations under the License.
2121

2222
namespace xla {
2323

24-
// Defines the interface for an XLA service.
24+
// Defines the interface for an XLA service on the client side. This service
25+
// helps abstract around the actual implementation of a service - the service
26+
// can be local (running in the same process), or remote - in which case an RPC
27+
// stub is used as the implementation.
2528
class ServiceInterface {
2629
public:
2730
ServiceInterface() {}

tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,18 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementS32s) {
486486
ComputeAndCompareR1<bool>(&builder, {}, {});
487487
}
488488

489+
TEST_F(ArrayElementwiseOpTest, CompareNeF32s) {
490+
// Disable fast-math because we're operating on NaNs.
491+
SetFastMathDisabled(true);
492+
493+
ComputationBuilder builder(client_, TestName());
494+
auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f});
495+
auto rhs = builder.ConstantR1<float>({10.0f, 25.5f, 1.0f, 10.0f, NAN});
496+
auto compare = builder.Ne(lhs, rhs);
497+
498+
ComputeAndCompareR1<bool>(&builder, {true, false, true, true, true}, {});
499+
}
500+
489501
TEST_F(ArrayElementwiseOpTest, CompareNeS32s) {
490502
const int32 min = std::numeric_limits<int32>::min();
491503
const int32 max = std::numeric_limits<int32>::max();

0 commit comments

Comments
 (0)
0