pytorch
diff --git a/‎aten/src/ATen/DLConvertor.cpp
Lines changed: 49 additions & 23 deletions b/‎aten/src/ATen/DLConvertor.cpp
Lines changed: 49 additions & 23 deletions
diff --git a/‎aten/src/ATen/DLConvertor.h
Lines changed: 24 additions & 3 deletions b/‎aten/src/ATen/DLConvertor.h
Lines changed: 24 additions & 3 deletions
diff --git a/‎aten/src/ATen/dlpack.h
Lines changed: 107 additions & 9 deletions b/‎aten/src/ATen/dlpack.h
Lines changed: 107 additions & 9 deletions
diff --git a/‎torch/_C/__init__.pyi.in
Lines changed: 1 addition & 0 deletions b/‎torch/_C/__init__.pyi.in
Lines changed: 1 addition & 0 deletions
diff --git a/‎torch/__init__.py
Lines changed: 1 addition & 1 deletion b/‎torch/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch/_tensor.py
Lines changed: 16 additions & 4 deletions b/‎torch/_tensor.py
Lines changed: 16 additions & 4 deletions
@@ -261,19 +261,38 @@ ScalarType toScalarType(const DLDataType& dtype) {
 }
 
 namespace {
+
+// The templated classes below are needed for supporting both:
+//   - DLManagedTensor
+//   - DLManagedTensorVersioned
+template <class T>
 struct ATenDLMTensor {
   Tensor handle;
-  DLManagedTensor tensor{};
+  T tensor{};
 };
-} // namespace
 
-static void deleter(DLManagedTensor* arg) {
-  delete static_cast<ATenDLMTensor*>(arg->manager_ctx);
+template <class T>
+void deleter(T* arg) {
+  delete static_cast<ATenDLMTensor<T>*>(arg->manager_ctx);
+}
+
+// Adds version information for DLManagedTensorVersioned.
+// This is a no-op for the other types.
+template <class T>
+void fillVersion(T* tensor) {}
+
+template <>
+void fillVersion<DLManagedTensorVersioned>(
+    DLManagedTensorVersioned* tensor) {
+  tensor->flags = 0;
+  tensor->version.major = DLPACK_MAJOR_VERSION;
   tensor->version.minor = DLPACK_MINOR_VERSION;
 }
 
 // This function returns a shared_ptr to memory managed DLpack tensor
 // constructed out of ATen tensor
-DLManagedTensor* toDLPack(const Tensor& src) {
+template <class T>
+T* toDLPackImpl(const Tensor& src) {
   // create a new tensor with possibly normalized strides
   // gh-83069
   auto shape = src.sizes();
@@ -285,10 +304,10 @@ DLManagedTensor* toDLPack(const Tensor& src) {
   }
 
   auto view = src.as_strided(shape, strides, src.storage_offset());
-  ATenDLMTensor* atDLMTensor(new ATenDLMTensor);
+  ATenDLMTensor<T>* atDLMTensor(new ATenDLMTensor<T>);
   atDLMTensor->handle = view;
   atDLMTensor->tensor.manager_ctx = atDLMTensor;
-  atDLMTensor->tensor.deleter = &deleter;
+  atDLMTensor->tensor.deleter = &deleter<T>;
   atDLMTensor->tensor.dl_tensor.data = view.data_ptr();
   c10::DeviceIndex device_id = 0;
   if (src.is_cuda() || src.is_privateuseone()) {
@@ -300,33 +319,40 @@ DLManagedTensor* toDLPack(const Tensor& src) {
   atDLMTensor->tensor.dl_tensor.shape = view.sizes().data();
   atDLMTensor->tensor.dl_tensor.strides = view.strides().data();
   atDLMTensor->tensor.dl_tensor.byte_offset = 0;
+  fillVersion(&atDLMTensor->tensor);
+
   return &(atDLMTensor->tensor);
 }
 
-Tensor fromDLPack(DLManagedTensor* src) {
-  auto deleter = [src](void* self [[maybe_unused]]) {
-    if (src->deleter) {
-      src->deleter(src);
-    }
-  };
-  return fromDLPack(src, std::move(deleter));
+// Explicitly instantiate the template above for both classes.
+template DLManagedTensor* toDLPackImpl<DLManagedTensor>(const Tensor&);
+template DLManagedTensorVersioned* toDLPackImpl<DLManagedTensorVersioned>(const Tensor&);
+
+} // namespace
+
+DLManagedTensorVersioned* toDLPack(const Tensor& src) {
+  return toDLPackImpl<DLManagedTensorVersioned>(src);
+}
+
+DLManagedTensor* toDLPackUnversioned(const Tensor& src) {
+  return toDLPackImpl<DLManagedTensor>(src);
 }
 
-Tensor fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter) {
-  Device device = getATenDevice(src->dl_tensor.device, src->dl_tensor.data);
-  ScalarType stype = toScalarType(src->dl_tensor.dtype);
-  if (!src->dl_tensor.strides) {
+Tensor fromDLPack(DLTensor& dl_tensor, std::function<void(void*)> deleter) {
+  Device device = getATenDevice(dl_tensor.device, dl_tensor.data);
+  ScalarType stype = toScalarType(dl_tensor.dtype);
+  if (!dl_tensor.strides) {
     return at::from_blob(
-        src->dl_tensor.data,
-        IntArrayRef(src->dl_tensor.shape, src->dl_tensor.ndim),
+        dl_tensor.data,
+        IntArrayRef(dl_tensor.shape, dl_tensor.ndim),
         std::move(deleter),
         at::device(device).dtype(stype),
         {device});
   }
   return at::from_blob(
-      src->dl_tensor.data,
-      IntArrayRef(src->dl_tensor.shape, src->dl_tensor.ndim),
-      IntArrayRef(src->dl_tensor.strides, src->dl_tensor.ndim),
+      dl_tensor.data,
+      IntArrayRef(dl_tensor.shape, dl_tensor.ndim),
+      IntArrayRef(dl_tensor.strides, dl_tensor.ndim),
       deleter,
       at::device(device).dtype(stype),
       {device});
 
@@ -10,11 +10,32 @@
 
 namespace at {
 
+// This trait class is used for retrieving the different PyCapsule names for
+// both DLPack tensor classes: `DLManagedTensor` and `DLManagedTensorVersioned`.
+//
+// Each specialization should contain the following 2 traits:
+//   - `capsule`: actual name of the capsule
+//   - `used`: name of the capsule after using it
+template <class T>
+struct DLPackTraits {};
+
+template<>
+struct DLPackTraits<DLManagedTensor> {
+    inline static const char* capsule = "dltensor";
+    inline static const char* used = "used_dltensor";
+};
+
+template<>
+struct DLPackTraits<DLManagedTensorVersioned> {
+    inline static const char* capsule = "dltensor_versioned";
+    inline static const char* used = "used_dltensor_versioned";
+};
+
 TORCH_API ScalarType toScalarType(const DLDataType& dtype);
-TORCH_API DLManagedTensor* toDLPack(const Tensor& src);
-TORCH_API Tensor fromDLPack(DLManagedTensor* src);
+TORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);
+TORCH_API DLManagedTensor* toDLPackUnversioned(const Tensor& src);
 TORCH_API Tensor
-fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter);
+fromDLPack(DLTensor& dl_tensor, std::function<void(void*)> deleter);
 TORCH_API DLDataType getDLDataType(const Tensor& t);
 TORCH_API DLDevice getDLContext(const Tensor& tensor, const int64_t& device_id);
 
 
@@ -15,11 +15,11 @@
 #define DLPACK_EXTERN_C
 #endif
 
-/*! \brief The current version of dlpack */
-#define DLPACK_VERSION 80
+/*! \brief The current major version of dlpack */
+#define DLPACK_MAJOR_VERSION 1
 
-/*! \brief The current ABI version of dlpack */
-#define DLPACK_ABI_VERSION 1
+/*! \brief The current minor version of dlpack */
+#define DLPACK_MINOR_VERSION 0
 
 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
@@ -40,6 +40,33 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
+
+/*!
+ * \brief The DLPack version.
+ *
+ * A change in major version indicates that we have changed the
+ * data layout of the ABI - DLManagedTensorVersioned.
+ *
+ * A change in minor version indicates that we have added new
+ * code, such as a new device type, but the ABI is kept the same.
+ *
+ * If an obtained DLPack tensor has a major version that disagrees
+ * with the version number specified in this header file
+ * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
+ * (and it is safe to do so). It is not safe to access any other fields
+ * as the memory layout will have changed.
+ *
+ * In the case of a minor version mismatch, the tensor can be safely used as
+ * long as the consumer knows how to interpret all fields. Minor version
+ * updates indicate the addition of enumeration values.
+ */
+typedef struct {
+  /*! \brief DLPack major version. */
+  uint32_t major;
+  /*! \brief DLPack minor version. */
+  uint32_t minor;
+} DLPackVersion;
+
 /*!
  * \brief The device type in DLDevice.
  */
@@ -91,7 +118,7 @@ typedef enum {
   kDLWebGPU = 15,
   /*! \brief Qualcomm Hexagon DSP */
   kDLHexagon = 16,
-  /*! \brief Microsoft AI Accelerator */
+  /*! \brief Microsoft MAIA devices */
   kDLMAIA = 17,
 } DLDeviceType;
 
@@ -190,6 +217,9 @@ typedef struct {
    *   return size;
    * }
    * \endcode
+   *
+   * Note that if the tensor is of size zero, then the data pointer should be
+   * set to `NULL`.
    */
   void* data;
   /*! \brief The device of the tensor */
@@ -215,6 +245,13 @@ typedef struct {
  *  not meant to transfer the tensor. When the borrowing framework doesn't need
  *  the tensor, it should call the deleter to notify the host that the resource
  *  is no longer needed.
+ *
+ * \note This data structure is used as Legacy DLManagedTensor
+ *       in DLPack exchange and is deprecated after DLPack v0.8
+ *       Use DLManagedTensorVersioned instead.
+ *       This data structure may get renamed or deleted in future versions.
+ *
+ * \sa DLManagedTensorVersioned
  */
 typedef struct DLManagedTensor {
   /*! \brief DLTensor which is being memory managed */
@@ -223,13 +260,74 @@ typedef struct DLManagedTensor {
    *   which DLManagedTensor is used in the framework. It can also be NULL.
    */
   void * manager_ctx;
-  /*! \brief Destructor signature void (*)(void*) - this should be called
-   *   to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
-   *   if there is no way for the caller to provide a reasonable destructor.
-   *   The destructors deletes the argument self as well.
+  /*!
+   * \brief Destructor - this should be called
+   * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
+   * NULL if there is no way for the caller to provide a reasonable destructor.
+   * The destructor deletes the argument self as well.
    */
   void (*deleter)(struct DLManagedTensor * self);
 } DLManagedTensor;
+
+// bit masks used in in the DLManagedTensorVersioned
+
+/*! \brief bit mask to indicate that the tensor is read only. */
+#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
+
+/*!
+ * \brief bit mask to indicate that the tensor is a copy made by the producer.
+ *
+ * If set, the tensor is considered solely owned throughout its lifetime by the
+ * consumer, until the producer-provided deleter is invoked.
+ */
+#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
+
+/*!
+ * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
+ *
+ * This data structure is intended to facilitate the borrowing of DLTensor by
+ * another framework. It is not meant to transfer the tensor. When the borrowing
+ * framework doesn't need the tensor, it should call the deleter to notify the
+ * host that the resource is no longer needed.
+ *
+ * \note This is the current standard DLPack exchange data structure.
+ */
+struct DLManagedTensorVersioned {
+  /*!
+   * \brief The API and ABI version of the current managed Tensor
+   */
+  DLPackVersion version;
+  /*!
+   * \brief the context of the original host framework.
+   *
+   * Stores DLManagedTensorVersioned is used in the
+   * framework. It can also be NULL.
+   */
+  void *manager_ctx;
+  /*!
+   * \brief Destructor.
+   *
+   * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
+   * It can be NULL if there is no way for the caller to provide a reasonable
+   * destructor. The destructor deletes the argument self as well.
+   */
+  void (*deleter)(struct DLManagedTensorVersioned *self);
+  /*!
+   * \brief Additional bitmask flags information about the tensor.
+   *
+   * By default the flags should be set to 0.
+   *
+   * \note Future ABI changes should keep everything until this field
+   *       stable, to ensure that deleter can be correctly called.
+   *
+   * \sa DLPACK_FLAG_BITMASK_READ_ONLY
+   * \sa DLPACK_FLAG_BITMASK_IS_COPIED
+   */
+  uint64_t flags;
+  /*! \brief DLTensor which is being memory managed */
+  DLTensor dl_tensor;
+};
+
 #ifdef __cplusplus
 }  // DLPACK_EXTERN_C
 #endif
 
@@ -1230,6 +1230,7 @@ def _group_tensors_by_device_and_dtype(nested_tensorlists: List[List[Optional[Te
 # NB: There is no Capsule type in typing, see
 # https://code.activestate.com/lists/python-dev/139675/
 def _to_dlpack(data: Tensor) -> Any: ...  # THPModule_toDLPack
+def _to_dlpack_unversioned(data: Tensor) -> Any: ...  # THPModule_toDLPackUnversioned
 def _from_dlpack(data: Any) -> Tensor: ...  # THPModule_fromDLPack
 def _get_cpp_backtrace(
     frames_to_skip: _int,
 
@@ -2234,7 +2234,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:
     matrix_rank,
     solve,
 )
-from torch.utils.dlpack import from_dlpack, to_dlpack
+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned
 
 
 class _TorchCompileInductorWrapper:
 
@@ -1655,7 +1655,7 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
 
     __torch_dispatch__ = _C._disabled_torch_dispatch_impl
 
-    def __dlpack__(self, stream=None):
+    def __dlpack__(self, stream=None, max_version=None):
         """
         Creates a DLpack `capsule https://data-apis.org/array-api/latest/design_topics/data_interchange.html#data-interchange`_
         of the current tensor to be exported to other libraries.
@@ -1672,6 +1672,11 @@ def __dlpack__(self, stream=None):
             both streams.  If None or -1 is passed then no synchronization is performed.
             If 1 (on CUDA) or 0 (on ROCM) then the default stream is used for
             synchronization.
+
+            max_version (tuple[int, int] or None): An optional Python tuple with
+            2 integers, representing the maximum version the caller supports. If
+            None is passed, then PyTorch will fallback to DLPack 0.X, where versions
+            are not supported.
         """
         if has_torch_function_unary(self):
             return handle_torch_function(Tensor.__dlpack__, (self,), self, stream)
@@ -1722,7 +1727,14 @@ def __dlpack__(self, stream=None):
                 raise RuntimeError(
                     "Can't export to dlpack an XLA tensor that is not on CUDA."
                 )
+
+            # Does not support DLPack 1.0, yet.
             return xla_dlpack.to_dlpack(self)
+
+        if max_version is None or max_version[0] < 1:
+            # Fallback to the old, unversioned variant.
+            return torch.to_dlpack_unversioned(self)
+
         return torch.to_dlpack(self)
 
     def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:
@@ -1737,9 +1749,9 @@ def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:
         if torch_device_type == "cuda" and torch.version.hip is not None:
             device_type = DLDeviceType.kDLROCM
         elif torch_device_type == "cpu" and self.is_pinned():
-            device_type = DLDeviceType.kDLCPUPinned
+            device_type = DLDeviceType.kDLCUDAHost
         elif torch_device_type == "cuda":
-            device_type = DLDeviceType.kDLGPU
+            device_type = DLDeviceType.kDLCUDA
         elif torch_device_type == "cpu":
             device_type = DLDeviceType.kDLCPU
         elif torch_device_type == "xpu":
@@ -1755,7 +1767,7 @@ def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:
             ):
                 raise ValueError(f"Unknown device type {torch_device_type} for Dlpack")
 
-            device_type = DLDeviceType.kDLGPU
+            device_type = DLDeviceType.kDLCUDA
         else:
             raise ValueError(f"Unknown device type {torch_device_type} for Dlpack")
         return (device_type, idx)
Original file line number	Diff line number	Diff line change
`@@ -2234,7 +2234,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:`
`2234`	`2234`	`matrix_rank,`
`2235`	`2235`	`solve,`
`2236`	`2236`	`)`
`2237`		`-from torch.utils.dlpack import from_dlpack, to_dlpack`
	`2237`	`+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned`
`2238`	`2238`
`2239`	`2239`
`2240`	`2240`	`class _TorchCompileInductorWrapper:`