opencv
diff --git a/‎modules/core/include/opencv2/core/cuda.hpp
Lines changed: 30 additions & 0 deletions b/‎modules/core/include/opencv2/core/cuda.hpp
Lines changed: 30 additions & 0 deletions
diff --git a/‎modules/core/src/cuda_stream.cpp
Lines changed: 8 additions & 0 deletions b/‎modules/core/src/cuda_stream.cpp
Lines changed: 8 additions & 0 deletions
diff --git a/‎modules/python/test/test_cuda.py
Lines changed: 4 additions & 0 deletions b/‎modules/python/test/test_cuda.py
Lines changed: 4 additions & 0 deletions
@@ -567,6 +567,29 @@ The function does not reallocate memory if the matrix has proper attributes alre
  */
 CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
 
+/** @brief Python overload to create a GpuMat from existing GPU memory
+
+@param rows Row count.
+@param cols Column count.
+@param type Type of the matrix.
+@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
+@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
+ */
+CV_EXPORTS_W GpuMat inline createGpuMat(int rows, int cols, int type, uint64 cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
+    return GpuMat(rows, cols, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
+};
+
+ /** @overload
+
+@param size 2D array size: Size(cols, rows). In the Size() constructor, the number of rows and the number of columns go in the reverse order.
+@param type Type of the matrix.
+@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
+@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
+ */
+CV_EXPORTS_W inline GpuMat createGpuMat(Size size, int type, uint64 cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
+    return GpuMat(size, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
+};
+
 /** @brief BufferPool for use with CUDA streams
 
 BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
@@ -921,6 +944,13 @@ class CV_EXPORTS_W Stream
     friend class DefaultDeviceInitializer;
 };
 
+
+/** @brief Python overload to create a Stream object from the address stored in an existing CUDA Runtime API stream pointer (cudaStream_t)
+
+@param cudaStreamMemoryAddress Memory address stored in a CUDA Runtime API stream pointer (cudaStream_t). The created Stream object does not perform and allocation or deallocation and simply wraps existing raw CUDA Runtime API stream pointer.
+ */
+CV_EXPORTS_W Stream wrapStream(uint64 cudaStreamMemoryAddress);
+
 class CV_EXPORTS_W Event
 {
 public:
 
@@ -586,6 +586,14 @@ Stream cv::cuda::StreamAccessor::wrapStream(cudaStream_t stream)
 
 #endif
 
+Stream cv::cuda::wrapStream(uint64 cudaStreamMemoryAddress) {
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
+    return cv::cuda::StreamAccessor::wrapStream(reinterpret_cast<cudaStream_t>(cudaStreamMemoryAddress));
+#endif
+}
+
 /////////////////////////////////////////////////////////////
 /// StackAllocator
 
 
@@ -40,8 +40,12 @@ def test_cuda_interop(self):
         cuMat = cv.cuda_GpuMat()
         cuMat.upload(npMat)
         self.assertTrue(cuMat.cudaPtr() != 0)
+        cuMatFromPtr = cv.cuda.createGpuMat(cuMat.size(),cuMat.type(),cuMat.cudaPtr(), cuMat.step)
+        self.assertTrue(cuMat.cudaPtr() == cuMatFromPtr.cudaPtr())
         stream = cv.cuda_Stream()
         self.assertTrue(stream.cudaPtr() != 0)
+        streamFromPtr = cv.cuda.wrapStream(stream.cudaPtr())
+        self.assertTrue(stream.cudaPtr() == streamFromPtr.cudaPtr())
         asyncstream = cv.cuda_Stream(1)  # cudaStreamNonBlocking
         self.assertTrue(asyncstream.cudaPtr() != 0)