IBMZ-Linux-OSS-Python
diff --git a/‎tensorflow/compiler/mlir/tfrt/tests/mlrt/tpu_conversions.mlir
Lines changed: 19 additions & 0 deletions b/‎tensorflow/compiler/mlir/tfrt/tests/mlrt/tpu_conversions.mlir
Lines changed: 19 additions & 0 deletions
diff --git a/‎tensorflow/compiler/mlir/tfrt/transforms/mlrt/BUILD
Lines changed: 1 addition & 0 deletions b/‎tensorflow/compiler/mlir/tfrt/transforms/mlrt/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc
Lines changed: 14 additions & 3 deletions b/‎tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc
Lines changed: 14 additions & 3 deletions
diff --git a/‎tensorflow/compiler/mlir/tfrt/transforms/mlrt/tpu_conversion_patterns.cc
Lines changed: 29 additions & 1 deletion b/‎tensorflow/compiler/mlir/tfrt/transforms/mlrt/tpu_conversion_patterns.cc
Lines changed: 29 additions & 1 deletion
@@ -166,3 +166,22 @@ func.func private @NopMapFnBody(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: t
   %a = "tf.AddV2"(%arg2, %const) {__op_key = 3: i32}: (tensor<i32>, tensor<i32>) -> tensor<i32>
   return
 }
+
+
+// -----
+func.func @callee(%arg0: tensor<i32>, %arg1: tensor<i32>) -> (tensor<i32>) {
+  %1 = "tf.TPUCompileMlirAndExecute"(%arg0) {metadata = "metadata", mlir_module = "mlir_module", operandSegmentSizes = array<i32: 1, 0>, producer_name = "producer_name"} : (tensor<i32>) -> tensor<i32>
+  func.return %1: tensor<i32>
+}
+
+// CHECK-LABEL: func @batch_function
+func.func @batch_function(%arg0: tensor<i32>, %arg1: tensor<i32>) -> (tensor<i32>) {
+  // CHECK: [[device:%.*]] = tf_mlrt_tpu.get_tpu_host_device
+  // CHECK: [[batch_result_future:%.*]] = tf_mlrt.batch_function.device([[device]]) (%arg0, %arg1)
+  // CHECK: [[batch_result:%.*]] = tf_mlrt.await [[batch_result_future]]
+  // CHECK: return [[batch_result]]
+  %0 = "tf.BatchFunction"(%arg0, %arg1) {device = "/device:CPU:0", allowed_batch_sizes = [64], batch_timeout_micros = 1 : i64, batching_queue = "", container = "", f = @callee, max_batch_size = 256 : i64, num_batch_threads = 2 : i64, operandSegmentSizes = array<i32: 1, 1>, shared_name = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+
@@ -144,6 +144,7 @@ cc_library(
         "//tensorflow/compiler/mlir/tfrt/ir/mlrt:tf_mlrt_tpu_ops",
         "@com_google_absl//absl/log:check",
         "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:FuncDialect",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:TransformUtils",
 
@@ -842,9 +842,20 @@ class BatchFunctionOpConversion
     llvm::SmallVector<mlir::Type, 4> result_types(
         op->getNumResults(), rewriter.getType<mlrt::compiler::FutureType>());
 
-    rewriter.replaceOpWithNewOp<tf_mlrt::BatchFunctionOp>(
-        op, result_types, adaptor.getOperands(), node_def.device(),
-        op.getFAttr(), node_def_text);
+    if (auto custom_device =
+            op->getAttrOfType<mlir::StringAttr>(kTfMlrtCustomDevice)) {
+      mlir::Value device =
+          CreateCustomDevice(op->getLoc(), custom_device.getValue(), rewriter);
+      if (!device) return op->emitWarning("Failed to create custom device.");
+
+      rewriter.replaceOpWithNewOp<tf_mlrt::BatchFunctionWithDeviceOp>(
+          op, result_types, device, adaptor.getOperands(), node_def.device(),
+          op.getFAttr(), node_def_text);
+    } else {
+      rewriter.replaceOpWithNewOp<tf_mlrt::BatchFunctionOp>(
+          op, result_types, adaptor.getOperands(), node_def.device(),
+          op.getFAttr(), node_def_text);
+    }
 
     return mlir::success();
   }
 
@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/compiler/mlir/tfrt/transforms/mlrt/tpu_conversion_patterns.h"
 
+#include "llvm/Support/Casting.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tfrt/ir/mlrt/mlrt_dialect.h"
 #include "tensorflow/compiler/mlir/tfrt/ir/mlrt/tf_mlrt_ops.h"
@@ -90,8 +92,18 @@ class TPUCompileMlirAndExecuteOpPreParallelizationConversion
           }
         }
       }
+      if (replaced_ops.empty()) {
+        auto caller_batch_ops = FindCallerBatchFunctionOps(op);
+        for (auto* batch_op : caller_batch_ops) {
+          mlir::ConversionPatternRewriter::InsertionGuard guard(rewriter);
+          rewriter.setInsertionPoint(batch_op);
+          mlir::Operation* batch_op_with_device = rewriter.clone(*batch_op);
+          batch_op_with_device->setAttr(kTfMlrtCustomDevice,
+                                        rewriter.getStringAttr(kTpuHostDevice));
+          rewriter.replaceOp(batch_op, batch_op_with_device->getResults());
+        }
+      }
     }
-
     auto compile_and_execute_op =
         rewriter.create<tf_mlrt::TFTPUCompileAndExecuteOp>(
             op.getLoc(), op.getResultTypes(), operands,
@@ -108,6 +120,22 @@ class TPUCompileMlirAndExecuteOpPreParallelizationConversion
 
  private:
   bool use_tpu_host_allocator_for_inputs_ = false;
+
+  llvm::SmallVector<mlir::Operation*, 4> FindCallerBatchFunctionOps(
+      mlir::Operation* op) const {
+    llvm::SmallVector<mlir::Operation*, 4> result;
+    if (auto func = llvm::dyn_cast<mlir::func::FuncOp>(op->getParentOp())) {
+      if (auto uses = func.getSymbolUses(func->getParentOp())) {
+        for (auto& use : uses.value()) {
+          auto* user = use.getUser();
+          if (auto batch_op = llvm::dyn_cast<mlir::TF::BatchFunctionOp>(user)) {
+            result.push_back(batch_op);
+          }
+        }
+      }
+    }
+    return result;
+  }
 };
 
 class TPUCompileMlirAndExecuteOpConversion