compnerd
diff --git a/‎tensorflow/core/grappler/costs/op_level_cost_estimator.cc
Lines changed: 62 additions & 0 deletions b/‎tensorflow/core/grappler/costs/op_level_cost_estimator.cc
Lines changed: 62 additions & 0 deletions
diff --git a/‎tensorflow/core/grappler/costs/op_level_cost_estimator.h
Lines changed: 1 addition & 0 deletions b/‎tensorflow/core/grappler/costs/op_level_cost_estimator.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
Lines changed: 51 additions & 0 deletions b/‎tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
Lines changed: 51 additions & 0 deletions
@@ -60,6 +60,13 @@ constexpr char kStopGradient[] = "StopGradient";
 constexpr char kPreventGradient[] = "PreventGradient";
 constexpr char kGather[] = "Gather";
 constexpr char kGatherV2[] = "GatherV2";
+constexpr char kScatterAdd[] = "ScatterAdd";
+constexpr char kScatterDiv[] = "ScatterDiv";
+constexpr char kScatterMax[] = "ScatterMax";
+constexpr char kScatterMin[] = "ScatterMin";
+constexpr char kScatterMul[] = "ScatterMul";
+constexpr char kScatterSub[] = "ScatterSub";
+constexpr char kScatterUpdate[] = "ScatterUpdate";
 constexpr char kSlice[] = "Slice";
 constexpr char kMaxPool[] = "MaxPool";
 constexpr char kMaxPoolGrad[] = "MaxPoolGrad";
@@ -275,6 +282,14 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
 
       {kGather, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)},
       {kGatherV2, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)},
+      {kScatterAdd, wrap(&OpLevelCostEstimator::PredictScatter)},
+      {kScatterDiv, wrap(&OpLevelCostEstimator::PredictScatter)},
+      {kScatterMax, wrap(&OpLevelCostEstimator::PredictScatter)},
+      {kScatterMin, wrap(&OpLevelCostEstimator::PredictScatter)},
+      {kScatterMul, wrap(&OpLevelCostEstimator::PredictScatter)},
+      {kScatterSub, wrap(&OpLevelCostEstimator::PredictScatter)},
+      {kScatterUpdate, wrap(&OpLevelCostEstimator::PredictScatter)},
+
       {kSlice, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)},
 
       {kPlaceholder, wrap(&OpLevelCostEstimator::PredictIdentity)},
@@ -1551,6 +1566,53 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice(
   return costs;
 }
 
+Costs OpLevelCostEstimator::PredictScatter(const OpContext& op_context) const {
+  // Scatter ops sparsely access a reference input and output tensor.
+  const auto& op_info = op_context.op_info;
+  bool found_unknown_shapes = false;
+
+  // input[0]: ref tensor that will be sparsely accessed
+  // input[1]: indices - A tensor of indices into the first dimension of ref.
+  // input[2]: updates where updates.shape = indices.shape + ref.shape[1:]
+  // See
+  // https://www.tensorflow.org/api_docs/python/tf/scatter_add and
+  // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/state_ops.cc#L146
+
+  const int64 num_indices =
+      CalculateTensorElementCount(op_info.inputs(1), &found_unknown_shapes);
+
+  int64 num_elems_in_ref_per_index = 1;
+  auto ref_tensor_shape = MaybeGetMinimumShape(
+      op_info.inputs(0).shape(), op_info.inputs(0).shape().dim_size(),
+      &found_unknown_shapes);
+  for (int i = 1; i < ref_tensor_shape.dim().size(); ++i) {
+    num_elems_in_ref_per_index *= ref_tensor_shape.dim(i).size();
+  }
+  const int64 op_count = num_indices * num_elems_in_ref_per_index;
+
+  // Sparsely access ref so input size depends on the number of operations
+  int64 ref_input_size =
+      op_count * DataTypeSize(BaseType(op_info.inputs(0).dtype()));
+  int64 indices_input_size =
+      CalculateTensorSize(op_info.inputs(1), &found_unknown_shapes);
+  int64 updates_input_size =
+      CalculateTensorSize(op_info.inputs(2), &found_unknown_shapes);
+
+  double total_input_size =
+      ref_input_size + indices_input_size + updates_input_size;
+
+  // Sparsely access ref so output size depends on the number of operations
+  double total_output_size =
+      op_count * DataTypeSize(BaseType(op_info.outputs(0).dtype()));
+
+  auto costs = PredictOpCountBasedCost(op_count, total_input_size,
+                                       total_output_size, op_info);
+  costs.inaccurate = found_unknown_shapes;
+  costs.num_ops_with_unknown_shapes = found_unknown_shapes;
+
+  return costs;
+}
+
 Costs OpLevelCostEstimator::PredictFusedOp(
     const OpContext& op_context,
     const std::vector<OpContext>& fused_op_contexts) const {
 
@@ -141,6 +141,7 @@ class OpLevelCostEstimator {
   Costs PredictBatchMatMul(const OpContext& op_context) const;
   Costs PredictMetadata(const OpContext& op_context) const;
   Costs PredictGatherOrSlice(const OpContext& op_context) const;
+  Costs PredictScatter(const OpContext& op_context) const;
   Costs PredictMaxPool(const OpContext& op_context) const;
   Costs PredictMaxPoolGrad(const OpContext& op_context) const;
   Costs PredictAvgPool(const OpContext& op_context) const;
 
@@ -612,6 +612,57 @@ TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) {
   EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
 }
 
+TEST_F(OpLevelCostEstimatorTest, TestScatterOps) {
+  std::vector<string> scatter_ops = {"ScatterAdd",   "ScatterDiv", "ScatterMax",
+                                     "ScatterMin",   "ScatterMul", "ScatterSub",
+                                     "ScatterUpdate"};
+  for (const auto& op : scatter_ops) {
+    // Test updates.shape = indices.shape + ref.shape[1:]
+    {
+      OpContext op_context;
+      SetCpuDevice(&op_context.op_info);
+      op_context.op_info.set_op(op);
+      // Huge first dimension in input shouldn't affect Scatter execution and
+      // memory costs.
+      DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info);
+      DescribeArbitraryRankInput({16}, DT_INT64, &op_context.op_info);
+      DescribeArbitraryRankInput({16, 10}, DT_FLOAT, &op_context.op_info);
+      DescribeArbitraryRankOutput({10000000, 10}, DT_FLOAT,
+                                  &op_context.op_info);
+
+      auto cost = estimator_.PredictCosts(op_context);
+      EXPECT_EQ(Costs::Duration(205), cost.memory_time);
+      EXPECT_EQ(Costs::Duration(16), cost.compute_time);
+      EXPECT_EQ(Costs::Duration(221), cost.execution_time);
+      EXPECT_EQ(1, cost.num_ops_total);
+      EXPECT_FALSE(cost.inaccurate);
+      EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+    }
+
+    // Test updates.shape = [] and INT32 indices
+    {
+      OpContext op_context;
+      SetCpuDevice(&op_context.op_info);
+      op_context.op_info.set_op(op);
+      // Huge first dimension in input shouldn't affect Scatter execution and
+      // memory costs.
+      DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info);
+      DescribeArbitraryRankInput({16}, DT_INT32, &op_context.op_info);
+      DescribeArbitraryRankInput({}, DT_FLOAT, &op_context.op_info);
+      DescribeArbitraryRankOutput({10000000, 10}, DT_FLOAT,
+                                  &op_context.op_info);
+
+      auto cost = estimator_.PredictCosts(op_context);
+      EXPECT_EQ(Costs::Duration(135), cost.memory_time);
+      EXPECT_EQ(Costs::Duration(16), cost.compute_time);
+      EXPECT_EQ(Costs::Duration(151), cost.execution_time);
+      EXPECT_EQ(1, cost.num_ops_total);
+      EXPECT_FALSE(cost.inaccurate);
+      EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+    }
+  }
+}
+
 TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) {
   auto cost = PredictCosts(DescribeBiasAdd(1000, 10));
   EXPECT_EQ(Costs::Duration(8400), cost.memory_time);