pytorch
diff --git a/‎aten/src/ATen/xpu/XPUEvent.h
Lines changed: 9 additions & 34 deletions b/‎aten/src/ATen/xpu/XPUEvent.h
Lines changed: 9 additions & 34 deletions
diff --git a/‎c10/xpu/impl/XPUGuardImpl.h
Lines changed: 6 additions & 24 deletions b/‎c10/xpu/impl/XPUGuardImpl.h
Lines changed: 6 additions & 24 deletions
diff --git a/‎test/test_xpu.py
Lines changed: 6 additions & 27 deletions b/‎test/test_xpu.py
Lines changed: 6 additions & 27 deletions
@@ -85,7 +85,8 @@ struct TORCH_XPU_API XPUEvent {
   void record(const XPUStream& stream) {
     if (!isCreated()) {
       device_index_ = stream.device_index();
-      assignEvent(stream.queue());
+      event_ = std::make_unique<sycl::event>(
+          stream.queue().ext_oneapi_submit_barrier());
       const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
       if (C10_UNLIKELY(interp)) {
         (*interp)->trace_gpu_event_creation(
@@ -99,7 +100,9 @@ struct TORCH_XPU_API XPUEvent {
           " does not match recording stream's device ",
           stream.device_index(),
           ".");
-      reassignEvent(stream.queue());
+      event_.reset();
+      event_ = std::make_unique<sycl::event>(
+          stream.queue().ext_oneapi_submit_barrier());
     }
     const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
     if (C10_UNLIKELY(interp)) {
@@ -125,7 +128,7 @@ struct TORCH_XPU_API XPUEvent {
     }
   }
 
-  double elapsed_time(const XPUEvent& other) const {
+  float elapsed_time(const XPUEvent& other) const {
     TORCH_CHECK(
         isCreated() && other.isCreated(),
         "Both events must be recorded before calculating elapsed time.");
@@ -135,20 +138,10 @@ struct TORCH_XPU_API XPUEvent {
     TORCH_CHECK(
         enable_timing_ && other.enable_timing_,
         "Both events must be created with argument 'enable_timing=True'.");
-
-#if SYCL_COMPILER_VERSION < 20250000
+    // TODO: provides the ability to time the execution of commands in a SYCL
+    // queue without enabling profiling on the entire queue
     TORCH_CHECK_NOT_IMPLEMENTED(
-        false,
-        "elapsed_time of XPUEvent requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.");
-#endif
-
-    using namespace sycl::info::event_profiling;
-    // Block until both of the recorded events are completed.
-    uint64_t end_time_ns = other.event().get_profiling_info<command_end>();
-    uint64_t start_time_ns = event().get_profiling_info<command_end>();
-    // Return the elapsed time in milliseconds.
-    return 1e-6 *
-        (static_cast<double>(end_time_ns) - static_cast<double>(start_time_ns));
+        false, "elapsed_time is not supported by XPUEvent.");
   }
 
   void synchronize() const {
@@ -163,24 +156,6 @@ struct TORCH_XPU_API XPUEvent {
   }
 
  private:
-  void assignEvent(sycl::queue& queue) {
-#if SYCL_COMPILER_VERSION >= 20250000
-    if (enable_timing_) {
-      event_ = std::make_unique<sycl::event>(
-          sycl::ext::oneapi::experimental::submit_profiling_tag(queue));
-    } else {
-      event_ = std::make_unique<sycl::event>(queue.ext_oneapi_submit_barrier());
-    }
-#else
-    event_ = std::make_unique<sycl::event>(queue.ext_oneapi_submit_barrier());
-#endif
-  }
-
-  void reassignEvent(sycl::queue& queue) {
-    event_.reset();
-    assignEvent(queue);
-  }
-
   bool enable_timing_ = false;
   DeviceIndex device_index_ = -1;
   // Only need to track the last event, as events in an in-order queue are
 
@@ -140,30 +140,6 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
         event_command_status::complete;
   }
 
-  double elapsedTime(
-      void* start_event,
-      void* end_event,
-      const DeviceIndex device_index) const override {
-#if SYCL_COMPILER_VERSION < 20250000
-    TORCH_CHECK_NOT_IMPLEMENTED(
-        false,
-        "elapsedTime requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.");
-#endif
-    TORCH_CHECK(
-        start_event && end_event,
-        "Both events must be recorded before calculating elapsed time.");
-    auto* xpu_start_event = reinterpret_cast<sycl::event*>(start_event);
-    auto* xpu_end_event = reinterpret_cast<sycl::event*>(end_event);
-
-    using namespace sycl::info::event_profiling;
-    // Block until both of the recorded events are completed.
-    uint64_t end_time_ns = xpu_end_event->get_profiling_info<command_end>();
-    uint64_t start_time_ns = xpu_start_event->get_profiling_info<command_end>();
-    // Return the elapsed time in milliseconds.
-    return 1e-6 *
-        (static_cast<double>(end_time_ns) - static_cast<double>(start_time_ns));
-  }
-
   // Stream-related functions
   bool queryStream(const Stream& stream) const override {
     const XPUStream xpu_stream{stream};
@@ -200,6 +176,12 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
     const XPUStream xpu_stream{stream};
     XPUCachingAllocator::recordStream(data_ptr, xpu_stream);
   }
+
+  double elapsedTime(void* event1, void* event2, const DeviceIndex device_index)
+      const override {
+    TORCH_CHECK_NOT_IMPLEMENTED(
+        false, "elapsedTime is not supported by XPU backend.");
+  }
 };
 
 } // namespace c10::xpu::impl
@@ -3,7 +3,6 @@
 import subprocess
 import sys
 import tempfile
-import time
 import unittest
 
 import torch
@@ -236,21 +235,6 @@ def test_events(self):
         stream.record_event(event)
         event.synchronize()
         self.assertTrue(event.query())
-        start_event = torch.xpu.Event(enable_timing=True)
-        end_event = torch.xpu.Event(enable_timing=True)
-        stream.record_event(start_event)
-        time.sleep(0.1)
-        stream.record_event(end_event)
-        torch.xpu.synchronize()
-        if int(torch.version.xpu) >= 20250000:
-            self.assertGreater(start_event.elapsed_time(end_event), 0)
-            self.assertLess(end_event.elapsed_time(start_event), 0)
-        else:
-            with self.assertRaisesRegex(
-                NotImplementedError,
-                "elapsed_time of XPUEvent requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.",
-            ):
-                start_event.elapsed_time(end_event)
 
     def test_generic_stream_event(self):
         stream = torch.Stream("xpu")
@@ -266,8 +250,8 @@ def test_generic_stream_event(self):
         self.assertEqual(stream.stream_id, xpu_stream.stream_id)
         self.assertNotEqual(stream.stream_id, torch.xpu.current_stream().stream_id)
 
-        event1 = torch.Event("xpu", enable_timing=True)
-        event2 = torch.Event("xpu", enable_timing=True)
+        event1 = torch.Event("xpu")
+        event2 = torch.Event("xpu")
         self.assertEqual(event1.event_id, 0)
         a = torch.randn(1000)
         b = torch.randn(1000)
@@ -284,15 +268,10 @@ def test_generic_stream_event(self):
         self.assertTrue(event2.query())
         self.assertNotEqual(event1.event_id, event2.event_id)
         self.assertEqual(c_xpu.cpu(), a + b)
-        if int(torch.version.xpu) >= 20250000:
-            self.assertGreater(event1.elapsed_time(event2), 0)
-            self.assertLess(event2.elapsed_time(event1), 0)
-        else:
-            with self.assertRaisesRegex(
-                NotImplementedError,
-                "elapsedTime requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.",
-            ):
-                event1.elapsed_time(event2)
+        with self.assertRaisesRegex(
+            NotImplementedError, "elapsedTime is not supported by XPU backend."
+        ):
+            event1.elapsed_time(event2)
         xpu_event = torch.xpu.Event()
         self.assertIsInstance(xpu_event, torch.Event)
         self.assertTrue(issubclass(type(xpu_event), torch.Event))