@@ -487,7 +487,11 @@ class TfrtGpuAsyncHostToDeviceTransferManager final
487
487
TF_CHECK_OK (transfer_manager->TransferLiteralToDeviceAsync (
488
8000
488
stream, literal, shaped_buffer));
489
489
490
- absl::Status status = stream->BlockHostUntilDone ();
490
+ absl::Status status;
491
+ {
492
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
493
+ status = stream->BlockHostUntilDone ();
494
+ }
491
495
VLOG (3 ) << " Finish transfer h2d for literal with shape "
492
496
<< literal.shape ().ToString () << " on device "
493
497
<< device_->DebugString () << " with status " << status;
@@ -596,7 +600,11 @@ class TfrtGpuAsyncHostToDeviceTransferManager final
596
600
TF_CHECK_OK (stream->Memcpy (&sub_buffer, host_data_ptr, transfer_size))
597
601
<< " Failed to copy data to GPU" ;
598
602
599
- absl::Status status = stream->BlockHostUntilDone ();
603
+ absl::Status status;
604
+ {
605
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
606
+ status = stream->BlockHostUntilDone ();
607
+ }
600
608
VLOG (3 ) << " H2D copy done: " << status;
601
609
CHECK_OK (status) << " Failed to block host until done" ;
602
610
}
@@ -907,12 +915,15 @@ SendDeviceMemoryFunction ConvertSendCallbacksToSendFunction(
907
915
}
908
916
909
917
// Wait for the data to be available on the host.
910
- absl::Status st = stream->BlockHostUntilDone ();
918
+ {
919
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
920
+ status = stream->BlockHostUntilDone ();
921
+ }
911
922
VLOG (3 ) << " D2H copy done. " << status;
912
- if (!st .ok ()) {
923
+ if (!status .ok ()) {
913
924
done_event.SetError (absl::InternalError (absl::StrFormat (
914
925
" failed to synchronize send operation with a stream: %s" ,
915
- st .message ())));
926
+ status .message ())));
916
927
return ;
917
928
}
918
929
@@ -2063,7 +2074,10 @@ absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtGpuClient::BufferFromHostBuffer(
2063
2074
dst_definition_event.SetError (status);
2064
2075
return ;
2065
2076
}
2066
- status = stream->BlockHostUntilDone ();
2077
+ {
2078
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
2079
+ status = stream->BlockHostUntilDone ();
2080
+ }
2067
2081
VLOG (3 ) << " H2D copy done. " << status;
2068
2082
if (status.ok ()) {
2069
2083
copy_event.SetStateConcrete ();
@@ -2177,7 +2191,11 @@ TfrtGpuClient::BufferFromHostLiteral(const LiteralSlice& literal,
2177
2191
TF_CHECK_OK (transfer_manager->TransferLiteralToDeviceAsync (
2178
2192
stream, literal, shaped_buffer));
2179
2193
2180
- auto status = stream->BlockHostUntilDone ();
2194
+ absl::Status status;
2195
+ {
2196
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
2197
+ status = stream->BlockHostUntilDone ();
2198
+ }
2181
2199
CHECK_OK (status) << " Failed to block host until done" ;
2182
2200
VLOG (3 ) << " BufferFromHostLiteral done for device_buffer: "
2183
2201
<< device_buffer << " AsyncValue: " << av.get ();
@@ -2592,7 +2610,10 @@ absl::StatusOr<Shape> TfrtGpuBuffer::logical_on_device_shape() {
2592
2610
auto stream = device_->stream ();
2593
2611
TF_RETURN_IF_ERROR (
2594
2612
transfer_manager->ReadDynamicShapes (stream, &shaped_buffer, &ret_shape));
2595
- TF_RETURN_IF_ERROR (stream->BlockHostUntilDone ());
2613
+ {
2614
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
2615
+ TF_RETURN_IF_ERROR (stream->BlockHostUntilDone ());
2616
+ }
2596
2617
return ret_shape;
2597
2618
}
2598
2619
@@ -2870,7 +2891,11 @@ PjRtFuture<> TfrtGpuBuffer::ToLiteral(MutableLiteralBase* literal) {
2870
2891
byte_size))
2871
2892
<< " stream->Memcpy failed copying from GPU to host" ;
2872
2893
2873
- absl::Status status = stream->BlockHostUntilDone ();
2894
+ absl::Status status;
2895
+ {
2896
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
2897
+ status = stream->BlockHostUntilDone ();
2898
+ }
2874
2899
VLOG (3 ) << " D2H copy done. " << status;
2875
2900
if (!status.ok ()) {
2876
2901
VLOG (3 ) << " stream->BlockHostUntilDone failed: " << status;
@@ -3010,7 +3035,11 @@ PjRtFuture<> TfrtGpuBuffer::CopyRawToHostFuture(PjRtFuture<void*> dst,
3010
3035
<< host_ptr << " (" << transfer_size << " bytes)" ;
3011
3036
CHECK_OK (stream->Memcpy (host_ptr, *sub_buffer, transfer_size))
3012
3037
<< " stream->Memcpy failed copying from GPU to host" ;
3013
- absl::Status status = stream->BlockHostUntilDone ();
3038
+ absl::Status status;
3039
+ {
3040
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
3041
+ status = stream->BlockHostUntilDone ();
3042
+ }
3014
3043
VLOG (3 ) << " D2H copy done. " << status;
3015
3044
if (!status.ok ()) {
3016
3045
LOG (ERROR) << " stream->BlockHostUntilDone failed: " << status;
@@ -3204,7 +3233,10 @@ absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtGpuBuffer::CopyToMemorySpace(
3204
3233
dst_definition_event.SetError (status);
3205
3234
return ;
3206
3235
}
3207
- status = stream->BlockHostUntilDone ();
3236
+ {
3237
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
3238
+ status = stream->BlockHostUntilDone ();
3239
+ }
3208
3240
if (status.ok ()) {
3209
3241
VLOG (3 ) << " D2D copy done. dst: " << dst.opaque ();
3210
3242
dst_definition_event.SetStateConcrete ();
@@ -3788,7 +3820,11 @@ absl::StatusOr<PjRtLoadedExecutable::Result> TfrtGpuExecutable::ExecuteHelper(
3788
3820
// has completed, so that the next execute_fn can start.
3789
3821
scheduled_event.SetStateConcrete ();
3790
3822
3791
- absl::Status status = stream->BlockHostUntilDone ();
3823
+ absl::Status status;
3824
+ {
3825
+ tsl::profiler::TraceMe traceme (" BlockHostUntilDone" );
3826
+ status = stream->BlockHostUntilDone ();
3827
+ }
3792
3828
if (!status.ok ()) {
3793
3829
LOG (ERROR) << " BlockHostUntilDone failed for executable "
3794
3830
<< executable_name << " on device "
0 commit comments