8000 Merge pull request #23371 from cudawarped:cuda_add_futher_python_interop · opencv/opencv@e3c5c09 · GitHub
[go: up one dir, main page]

Skip to content

Commit e3c5c09

Browse files
authored
Merge pull request #23371 from cudawarped:cuda_add_futher_python_interop
`cuda`: Add bindings to allow `GpuMat` and `Stream` objects to be initialized from memory initialized in other libraries
2 parents 2e9eb05 + 7539abe commit e3c5c09

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

modules/core/include/opencv2/core/cuda.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,29 @@ The function does not reallocate memory if the matrix has proper attributes alre
567567
*/
568568
CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
569569

570+
/** @brief Bindings overload to create a GpuMat from existing GPU memory.
571+
@param rows Row count.
572+
@param cols Column count.
573+
@param type Type of the matrix.
574+
@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
575+
@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
576+
@note Overload for generation of bindings only, not exported or intended for use internally from C++.
577+
*/
578+
CV_EXPORTS_W GpuMat inline createGpuMatFromCudaMemory(int rows, int cols, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
579+
return GpuMat(rows, cols, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
580+
};
581+
582+
/** @overload
583+
@param size 2D array size: Size(cols, rows). In the Size() constructor, the number of rows and the number of columns go in the reverse order.
584+
@param type Type of the matrix.
585+
@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
586+
@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
587+
@note Overload for generation of bindings only, not exported or intended for use internally from C++.
588+
*/
589+
CV_EXPORTS_W inline GpuMat createGpuMatFromCudaMemory(Size size, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
590+
return GpuMat(size, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
591+
};
592+
570593
/** @brief BufferPool for use with CUDA streams
571594
572595
BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
@@ -921,6 +944,13 @@ class CV_EXPORTS_W Stream
921944
friend class DefaultDeviceInitializer;
922945
};
923946

947+
948+
/** @brief Bindings overload to create a Stream object from the address stored in an existing CUDA Runtime API stream pointer (cudaStream_t).
949+
@param cudaStreamMemoryAddress Memory address stored in a CUDA Runtime API stream pointer (cudaStream_t). The created Stream object does not perform any allocation or deallocation and simply wraps existing raw CUDA Runtime API stream pointer.
950+
@note Overload for generation of bindings only, not exported or intended for use internally from C++.
951+
*/
952+
CV_EXPORTS_W Stream wrapStream(size_t cudaStreamMemoryAddress);
953+
924954
class CV_EXPORTS_W Event
925955
{
926956
public:

modules/core/src/cuda_stream.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,15 @@ Stream cv::cuda::StreamAccessor::wrapStream(cudaStream_t stream)
586586

587587
#endif
588588

589+
Stream cv::cuda::wrapStream(size_t cudaStreamMemoryAddress) {
590+
#ifndef HAVE_CUDA
591+
CV_UNUSED(cudaStreamMemoryAddress);
592+
throw_no_cuda();
593+
#else
594+
return cv::cuda::StreamAccessor::wrapStream(reinterpret_cast<cudaStream_t>(cudaStreamMemoryAddress));
595+
#endif
596+
}
597+
589598
//////////////////////////////////////////////////////////// 8000 /
590599
/// StackAllocator
591600

modules/python/test/test_cuda.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ def test_cuda_interop(self):
4040
cuMat = cv.cuda_GpuMat()
4141
cuMat.upload(npMat)
4242
self.assertTrue(cuMat.cudaPtr() != 0)
43+
cuMatFromPtrSz = cv.cuda.createGpuMatFromCudaMemory(cuMat.size(),cuMat.type(),cuMat.cudaPtr(), cuMat.step)
44+
self.assertTrue(cuMat.cudaPtr() == cuMatFromPtrSz.cudaPtr())
45+
cuMatFromPtrRc = cv.cuda.createGpuMatFromCudaMemory(cuMat.size()[1],cuMat.size()[0],cuMat.type(),cuMat.cudaPtr(), cuMat.step)
46+
self.assertTrue(cuMat.cudaPtr() == cuMatFromPtrRc.cudaPtr())
4347
stream = cv.cuda_Stream()
4448
self.assertTrue(stream.cudaPtr() != 0)
49+
streamFromPtr = cv.cuda.wrapStream(stream.cudaPtr())
50+
self.assertTrue(stream.cudaPtr() == streamFromPtr.cudaPtr())
4551
asyncstream = cv.cuda_Stream(1) # cudaStreamNonBlocking
4652
self.assertTrue(asyncstream.cudaPtr() != 0)
4753

0 commit comments

Comments
 (0)
0