@@ -103,7 +103,19 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
103
103
// Delete the event previously recorded.
104
104
if (xpu_event)
105
105
delete xpu_event;
106
+ #if SYCL_COMPILER_VERSION >= 20250000
107
+ if (flag == EventFlag::BACKEND_DEFAULT) {
108
+ // Use the profiling tag to record the event to enable timing feature.
109
+ xpu_event =
110
+ new sycl::event (sycl::ext::oneapi::experimental::submit_profiling_tag (
111
+ xpu_stream.queue ()));
112
+ } else {
113
+ xpu_event =
114
+ new sycl::event (xpu_stream.queue ().ext_oneapi_submit_barrier ());
115
+ }
116
+ #else
106
117
xpu_event = new sycl::event (xpu_stream.queue ().ext_oneapi_submit_barrier ());
118
+ #endif
107
119
*event = reinterpret_cast <void *>(xpu_event);
108
120
109
121
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace ();
@@ -140,6 +152,30 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
140
152
event_command_status::complete;
141
153
}
142
8000
code>
154
155
+ double elapsedTime (
156
+ void * start_event,
157
+ void * end_event,
158
+ const DeviceIndex device_index) const override {
159
+ #if SYCL_COMPILER_VERSION < 20250000
160
+ TORCH_CHECK_NOT_IMPLEMENTED (
161
+ false ,
162
+ " elapsedTime requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer." );
163
+ #endif
164
+ TORCH_CHECK (
165
+ start_event && end_event,
166
+ " Both events must be recorded before calculating elapsed time." );
167
+ auto * xpu_start_event = reinterpret_cast <sycl::event*>(start_event);
168
+ auto * xpu_end_event = reinterpret_cast <sycl::event*>(end_event);
169
+
170
+ using namespace sycl ::info::event_profiling;
171
+ // Block until both of the recorded events are completed.
172
+ uint64_t end_time_ns = xpu_end_event->get_profiling_info <command_end>();
173
+ uint64_t start_time_ns = xpu_start_event->get_profiling_info <command_end>();
174
+ // Return the eplased time in milliseconds.
175
+ return 1e-6 *
176
+ (static_cast <double >(end_time_ns) - static_cast <double >(start_time_ns));
177
+ }
178
+
143
179
// Stream-related functions
144
180
bool queryStream (const Stream& stream) const override {
145
181
const XPUStream xpu_stream{stream};
@@ -176,12 +212,6 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
176
212
const XPUStream xpu_stream{stream};
177
213
XPUCachingAllocator::recordStream (data_ptr, xpu_stream);
178
214
}
179
-
180
- double elapsedTime (void * event1, void * event2, const DeviceIndex device_index)
181
- const override {
182
- TORCH_CHECK_NOT_IMPLEMENTED (
183
- false , " elapsedTime is not supported by XPU backend." );
184
- }
185
215
};
186
216
187
217
} // namespace c10::xpu::impl
0 commit comments