7
7
//
8
8
// ===----------------------------------------------------------------------===//
9
9
// RUN: dpct --rule-file=%S/../../tools/dpct/DpctOptRules/intel_specific_math.yaml --format-range=none -out-root %T/math_specific_UDR_test %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only
10
- // RUN: FileCheck --input-file %T/math_specific_UDR_test/math_specific_UDR_test.dp.cpp --match-full-lines %s
11
-
12
- // CHECK: #include <CL/sycl.hpp>
13
- // CHECK: #include <dpct/dpct.hpp>
14
-
15
- // CHECK: #include <sycl/ext/intel/math.hpp>
16
10
17
11
#include " cuda_fp16.h"
18
12
#include < iostream>
19
- // CHECK: void kernelFunc(double *deviceArray) {
20
- // CHECK: double &d0 = *deviceArray;
21
- // CHECK: d0 = sycl::ext::intel::math::erfinv(d0);
22
- // CHECK: d0 = sycl::ext::intel::math::cdfnorm(d0);
23
- // CHECK: }
13
+
24
14
__global__ void kernelFunc (double *deviceArray) {
25
15
double &d0 = *deviceArray;
26
16
d0 = erfinv (d0);
27
17
d0 = normcdf (d0);
28
18
29
19
}
30
20
31
- // CHECK: void kernelFunc(float *deviceArray) {
32
- // CHECK: float &f0 = *deviceArray;
33
- // CHECK: f0 = sycl::ext::intel::math::erfinv(f0);
34
- // CHECK: f0 = sycl::ext::intel::math::cdfnorm(f0);
35
- // CHECK: }
36
21
__global__ void kernelFunc (float *deviceArray) {
37
22
float &f0 = *deviceArray;
38
23
f0 = erfinvf (f0);
39
24
f0 = normcdff (f0);
40
25
41
26
}
42
27
43
-
44
- // CHECK: void testDouble() {
45
- // CHECK: dpct::device_ext &dev_ct1 = dpct::get_current_device();
46
- // CHECK: sycl::queue &q_ct1 = dev_ct1.default_queue();
47
- // CHECK: const unsigned int NUM = 1;
48
- // CHECK: const unsigned int bytes = NUM * sizeof(double);
49
- // CHECK: double *hostArrayDouble = (double *)malloc(bytes);
50
- // CHECK: memset(hostArrayDouble, 0, bytes);
51
- // CHECK: double *deviceArrayDouble;
52
- // CHECK: deviceArrayDouble = (double *)sycl::malloc_device(bytes, q_ct1);
53
- // CHECK: q_ct1.memcpy(deviceArrayDouble, hostArrayDouble, bytes).wait();
54
- // CHECK: q_ct1.parallel_for(
55
- // CHECK: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
56
- // CHECK: [=](sycl::nd_item<3> item_ct1) {
57
- // CHECK: kernelFunc(deviceArrayDouble);
58
- // CHECK: });
59
- // CHECK: q_ct1.memcpy(hostArrayDouble, deviceArrayDouble, bytes).wait();
60
- // CHECK: sycl::free(deviceArrayDouble, q_ct1);
61
- // CHECK: }
62
28
bool testDouble () {
63
29
double *hostArrayDouble = (double *)malloc (sizeof (double ));
64
30
*hostArrayDouble = 0.956841 ;
@@ -76,24 +42,6 @@ bool testDouble() {
76
42
return true ;
77
43
}
78
44
79
- // CHECK: void testFloat() {
80
- // CHECK: dpct::device_ext &dev_ct1 = dpct::get_current_device();
81
- // CHECK: sycl::queue &q_ct1 = dev_ct1.default_queue();
82
- // CHECK: const unsigned int NUM = 1;
83
- // CHECK: const unsigned int bytes = NUM * sizeof(float);
84
- // CHECK: float *hostArrayFloat = (float *)malloc(bytes);
85
- // CHECK: memset(hostArrayFloat, 0, bytes);
86
- // CHECK: float *deviceArrayFloat;
87
- // CHECK: deviceArrayFloat = (float *)sycl::malloc_device(bytes, q_ct1);
88
- // CHECK: q_ct1.memcpy(deviceArrayFloat, hostArrayFloat, bytes).wait();
89
- // CHECK: q_ct1.parallel_for(
90
- // CHECK: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
91
- // CHECK: [=](sycl::nd_item<3> item_ct1) {
92
- // CHECK: kernelFunc(deviceArrayFloat);
93
- // CHECK: });
94
- // CHECK: q_ct1.memcpy(hostArrayFloat, deviceArrayFloat, bytes).wait();
95
- // CHECK: sycl::free(deviceArrayFloat, q_ct1);
96
- // CHECK: }
97
45
bool testFloat () {
98
46
float *hostArrayFloat = (float *)malloc (sizeof (float ));
99
47
*hostArrayFloat = 0 .1568541541f ;
0 commit comments