-
Notifications
You must be signed in to change notification settings - Fork 341
GPU Support via OpenCL #1377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Pencilcaseman
wants to merge
23
commits into
rust-ndarray:master
Choose a base branch
from
Pencilcaseman:master
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
GPU Support via OpenCL #1377
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
28bdd02
Initial OpenCL things
Pencilcaseman aa19194
Update CI
Pencilcaseman 24bcf1d
Why is CI failing?
Pencilcaseman 931dde4
Remove some clippy warnings and fix some bugs
Pencilcaseman 67a0382
Fix
Pencilcaseman b03ff6f
Fix tests
Pencilcaseman 8dd39d1
Update bench config
Pencilcaseman 772bda4
Do not allow operations on mismatched devices
Pencilcaseman 4ac3f96
oops
Pencilcaseman 00e3955
Apparently strip isn't allowed in 1.57.0
Pencilcaseman 0642766
Bump MSRV
Pencilcaseman 1b5d086
Remove unnecessary unsafe block
Pencilcaseman f92fe0d
Fix issue
Pencilcaseman b04c533
Why did that not work... Bump MSRV again
Pencilcaseman 722c4f3
Update CI and tests. Decrease MSRV.
Pencilcaseman fdcd653
Update CI
Pencilcaseman d813d66
Further update CI
Pencilcaseman 2253f78
Version matching is hard
Pencilcaseman ed26986
Again, change MSRV
Pencilcaseman c4541f5
First OpenCL kernels!
Pencilcaseman 77bfad6
Clean up some code and update CI
Pencilcaseman 48c5ee2
Fix include order
Pencilcaseman 96aba35
Revert formatting changes :(
Pencilcaseman File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
First OpenCL kernels!
- Loading branch information
commit c4541f58c90f04952715063b020cace93d7ec869
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,8 +6,12 @@ | |
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
use std::mem::ManuallyDrop; | ||
use crate::dimension::DimMax; | ||
use crate::Device; | ||
use crate::Zip; | ||
use crate::Layout; | ||
use crate::OwnedRepr; | ||
use num_complex::Complex; | ||
|
||
/// Elements that can be used as direct operands in arithmetic with arrays. | ||
|
@@ -58,6 +62,16 @@ macro_rules! device_check_assert( | |
} | ||
); | ||
|
||
// Pick the expression $a for commutative and $b for ordered binop | ||
macro_rules! if_commutative { | ||
(Commute { $a:expr } or { $b:expr }) => { | ||
$a | ||
}; | ||
(Ordered { $a:expr } or { $b:expr }) => { | ||
$b | ||
}; | ||
} | ||
|
||
macro_rules! impl_binary_op( | ||
($rs_trait:ident, $operator:tt, $math_op:ident, $inplace_op:tt, $docstring:expr) => ( | ||
/// Perform elementwise | ||
|
@@ -186,7 +200,7 @@ where | |
/// **Panics** if broadcasting isn’t possible. | ||
impl<'a, A, B, S, S2, D, E> $rs_trait<&'a ArrayBase<S2, E>> for &'a ArrayBase<S, D> | ||
where | ||
A: Clone + $rs_trait<B, Output=A>, | ||
A: Clone + $rs_trait<B, Output=A> + std::fmt::Debug, | ||
B: Clone, | ||
S: Data<Elem=A>, | ||
S2: Data<Elem=B>, | ||
|
@@ -205,7 +219,97 @@ where | |
} else { | ||
self.broadcast_with(rhs).unwrap() | ||
}; | ||
Zip::from(lhs).and(rhs).map_collect(clone_opf(A::$math_op)) | ||
|
||
match self.device() { | ||
Device::Host => { | ||
Zip::from(lhs).and(rhs).map_collect(clone_opf(A::$math_op)) | ||
} | ||
|
||
#[cfg(feature = "opencl")] | ||
Device::OpenCL => { | ||
if lhs.raw_dim().ndim() == 0 && rhs.raw_dim().ndim() == 0 { | ||
// println!("Scalar"); | ||
todo!(); | ||
} else if lhs.layout_impl().is(Layout::CORDER | Layout::FORDER) && | ||
rhs.layout_impl().is(Layout::CORDER | Layout::FORDER) && | ||
lhs.layout_impl().matches(rhs.layout_impl()) { | ||
// println!("Contiguous"); | ||
|
||
static mut KERNEL_BUILT: bool = false; // todo: fix monomorphization issue | ||
|
||
let typename = match crate::opencl::rust_type_to_c_name::<A>() { | ||
Some(x) => x, | ||
None => panic!("The Rust type {} is not supported by the \ | ||
OpenCL backend", std::any::type_name::<A>()) | ||
}; | ||
|
||
let kernel_name = format!("binary_op_{}_{}", stringify!($math_op), typename); | ||
|
||
#[cold] | ||
if unsafe { !KERNEL_BUILT } { | ||
let kernel = crate::opencl::gen_contiguous_linear_kernel_3( | ||
&kernel_name, | ||
typename, | ||
stringify!($operator)); | ||
|
||
unsafe { | ||
hasty_::opencl::opencl_add_kernel(&kernel); | ||
KERNEL_BUILT = true; | ||
} | ||
} | ||
|
||
unsafe { | ||
let elements = self.len(); | ||
let self_ptr = self.as_ptr() as *mut std::ffi::c_void; | ||
let other_ptr = rhs.as_ptr() as *mut std::ffi::c_void; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming this operation can be invalid here because they are only known to be on the same device by a debug assertion, not a hard assertion? |
||
let res_ptr = match hasty_::opencl::opencl_allocate( | ||
elements * std::mem::size_of::<A>(), | ||
hasty_::opencl::OpenCLMemoryType::ReadWrite | ||
) { | ||
Ok(buf) => buf, | ||
Err(e) => panic!("Failed to allocate OpenCL buffer. Exited with: {:?}", e) | ||
}; | ||
|
||
match hasty_::opencl::opencl_run_contiguous_linear_kernel_3( | ||
&kernel_name, | ||
elements, | ||
self_ptr, | ||
other_ptr, | ||
res_ptr, | ||
) { | ||
Ok(()) => { | ||
use std::ptr::NonNull; | ||
|
||
let ptr = NonNull::new(res_ptr as *mut A).unwrap(); | ||
let data = OwnedRepr::<A>::from_components( | ||
ptr, | ||
self.len(), | ||
self.len(), | ||
self.device(), | ||
); | ||
|
||
Self::Output::from_parts( | ||
data, | ||
ptr, | ||
<D as DimMax<E>>::Output::from_dimension(&self.raw_dim()).unwrap(), | ||
<D as DimMax<E>>::Output::from_dimension(&self.raw_strides()).unwrap(), | ||
) | ||
} | ||
Err(e) => panic!("Failed to run OpenCL kernel '{}'. \ | ||
Exited with code: {:?}", kernel_name, e), | ||
} | ||
} | ||
} else { | ||
println!("Strided"); | ||
todo!(); | ||
} | ||
} | ||
|
||
#[cfg(feature = "cuda")] | ||
Device::CUDA => { | ||
todo!(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
@@ -248,16 +352,6 @@ impl<'a, A, S, D, B> $rs_trait<B> for &'a ArrayBase<S, D> | |
); | ||
); | ||
|
||
// Pick the expression $a for commutative and $b for ordered binop | ||
macro_rules! if_commutative { | ||
(Commute { $a:expr } or { $b:expr }) => { | ||
$a | ||
}; | ||
(Ordered { $a:expr } or { $b:expr }) => { | ||
$b | ||
}; | ||
} | ||
|
||
macro_rules! impl_scalar_lhs_op { | ||
// $commutative flag. Reuse the self + scalar impl if we can. | ||
// We can do this safely since these are the primitive numeric types | ||
|
@@ -304,10 +398,11 @@ impl<'a, S, D> $trt<&'a ArrayBase<S, D>> for $scalar | |
} | ||
|
||
mod arithmetic_ops { | ||
use super::*; | ||
use std::ops::*; | ||
|
||
use crate::imp_prelude::*; | ||
|
||
use std::ops::*; | ||
use super::*; | ||
|
||
fn clone_opf<A: Clone, B: Clone, C>(f: impl Fn(A, B) -> C) -> impl FnMut(&A, &B) -> C { | ||
move |x, y| f(x.clone(), y.clone()) | ||
|
@@ -447,9 +542,10 @@ mod arithmetic_ops { | |
} | ||
|
||
mod assign_ops { | ||
use super::*; | ||
use crate::imp_prelude::*; | ||
|
||
use super::*; | ||
|
||
macro_rules! impl_assign_op { | ||
($trt:ident, $method:ident, $doc:expr) => { | ||
use std::ops::$trt; | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
pub(crate) fn rust_type_to_c_name<T>() -> Option<&'static str> { | ||
match std::any::type_name::<T>() { | ||
"f32" => Some("float"), | ||
"f64" => Some("double"), | ||
"i8" => Some("int8_t"), | ||
"i16" => Some("int16_t"), | ||
"i32" => Some("int32_t"), | ||
"i64" => Some("int64_t"), | ||
"u8" => Some("uint8_t"), | ||
"u16" => Some("uint16_t"), | ||
"u32" => Some("uint32_t"), | ||
"u64" | "usize" => Some("uint64_t"), | ||
_ => None, | ||
} | ||
} | ||
|
||
pub(crate) fn gen_contiguous_linear_kernel_3(kernel_name: &str, typename: &str, op: &str) -> String { | ||
format!( | ||
r#" | ||
#ifndef NDARRAY_INCLUDE_STDINT | ||
#define NDARRAY_INCLUDE_STDINT | ||
|
||
// We should probably verify that these are, in fact, correct | ||
typedef char int8_t; | ||
typedef short int16_t; | ||
typedef int int32_t; | ||
typedef long int64_t; | ||
typedef unsigned char uint8_t; | ||
typedef unsigned short uint16_t; | ||
typedef unsigned int uint32_t; | ||
typedef unsigned long uint64_t; | ||
#endif // NDARRAY_INCLUDE_STDINT | ||
|
||
__kernel void {kernel_name}(__global const {typename} *a, __global const {typename} *b, __global {typename} *c) {{ | ||
// Get id as 64-bit integer to avoid overflow | ||
uint64_t i = get_global_id(0); | ||
c[i] = a[i] {op} b[i]; | ||
}} | ||
"#, | ||
kernel_name = kernel_name, | ||
typename = typename, | ||
op = op, | ||
) | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Enforce at type level?