8000 Time parallel debug · hzyjerry/robotics-toolbox-python@d84dd98 · GitHub
[go: up one dir, main page]

Skip to content

Commit d84dd98

Browse files
committed
Time parallel debug
1 parent 475a31b commit d84dd98

File tree

2 files changed

+249
-114
lines changed

2 files changed

+249
-114
lines changed

roboticstoolbox/cuda/fknm.cu

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ __device__ void _eye(double *data);
1818
* T: double(N, 4, 4) the final transform matrix of all points (shared)
1919
* tool: double(N, 4, 4) the tool transform matrix of all points (shared)
2020
* nlinks_pt: long(N,): the number of links associated with each (shared)
21-
* link_A: double(nlinks, 4, 4) the transformation matrix of all joints
22-
* link_axes: long(nlinks, ): axes of all links
23-
* link_isjoint: long(nlinks, ): 1/0 whether links are joints
21+
* link_A: double(max_nlinks, 4, 4) the transformation matrix of all joints
22+
* link_axes: long(max_nlinks, ): axes of all links
23+
* link_isjoint: long(max_nlinks, ): 1/0 whether links are joints
2424
* N: (int) number of points
2525
* njoints: (int) number of joints
2626
* out: (N, 6, njoints)
@@ -33,7 +33,6 @@ __global__ void _jacob0(double *T,
3333
long *link_axes,
3434
long *link_isjoint,
3535
int N,
36-
int nlinks,
3736
int njoints,
3837
double *out)
3938
{
@@ -63,7 +62,7 @@ __global__ void _jacob0(double *T,
6362
return;
6463
}
6564

66-
// int nlinks = nlinks_pt[tid];
65+
int nlinks = nlinks_pt[tid];
6766
// printf("Hello from tid %d nlinks %d\n", tid, nlinks);
6867
for (int i = 0; i < nlinks; i++) {
6968
// printf("Hello from tid %d link_i %d link_axis %ld isjoint %ld \n", tid, i, link_axes[i], link_isjoint[i]);
@@ -358,11 +357,11 @@ extern "C"{
358357
* T: double(N, 4, 4) the final transform matrix of all points (shared)
359358
* tool: double(N, 4, 4) the tool transform matrix of all points (shared)
360359
* nlinks_pt: long(N,): the number of links associated with each (shared)
361-
* link_A: double(nlinks, 4, 4) the transformation matrix of all joints
362-
* link_axes: long(nlinks, ): axes of all links
363-
* link_isjoint: long(nlinks, ): 1/0 whether links are joints
360+
* link_A: double(max_nlinks, 4, 4) the transformation matrix of all joints
361+
* link_axes: long(max_nlinks, ): axes of all links
362+
* link_isjoint: long(max_nlinks, ): 1/0 whether links are joints
364363
* N: (int) number of points
365-
* nlinks: (int) max number of links on the path
364+
* max_nlinks: (int) max number of links on the path
366365
* njoints: (int) number of joints
367366
* out: (N, 6, njoints)
368367
*/
@@ -374,7 +373,7 @@ void jacob0(double *T,
374373
long *link_axes,
375374
long *link_isjoint,
376375
int N,
377-
int nlinks,
376+
int max_nlinks,
378377
int njoi 10000 nts,
379378
double *out)
380379
{
@@ -385,21 +384,21 @@ void jacob0(double *T,
385384
cudaMalloc((void**)&d_T, sizeof(double) * N * 16);
386385
cudaMalloc((void**)&d_tool, sizeof(double) * N * 16);
387386
cudaMalloc((void**)&d_etool, sizeof(double) * N * 16);
388-
cudaMalloc((void**)&d_link_A, sizeof(double) * nlinks * 16);
387+
cudaMalloc((void**)&d_link_A, sizeof(double) * max_nlinks * 16);
389388
cudaMalloc((void**)&d_nlinks_pt, sizeof(long) * N);
390-
cudaMalloc((void**)&d_link_axes, sizeof(long) * nlinks);
391-
cudaMalloc((void**)&d_link_isjoint, sizeof(long) * nlinks);
389+
cudaMalloc((void**)&d_link_axes, sizeof(long) * max_nlinks);
390+
cudaMalloc((void**)&d_link_isjoint, sizeof(long) * max_nlinks);
392391
cudaMalloc((void**)&d_out, sizeof(double) * N * 6 * njoints);
393392

394393

395394
// Transfer data from host to device memory
396395
cudaMemcpy(d_T, T, sizeof(double) * N * 16, cudaMemcpyHostToDevice);
397396
cudaMemcpy(d_tool, tool, sizeof(double) * N * 16, cudaMemcpyHostToDevice);
398397
cudaMemcpy(d_etool, etool, sizeof(double) * N * 16, cudaMemcpyHostToDevice);
399-
cudaMemcpy(d_link_A, link_A, sizeof(double) * nlinks * 16, cudaMemcpyHostToDevice);
398+
cudaMemcpy(d_link_A, link_A, sizeof(double) * max_nlinks * 16, cudaMemcpyHostToDevice);
400399
cudaMemcpy(d_nlinks_pt, nlinks_pt, sizeof(long) * N, cudaMemcpyHostToDevice);
401-
cudaMemcpy(d_link_axes, link_axes, sizeof(long) * nlinks, cudaMemcpyHostToDevice);
402-
cudaMemcpy(d_link_isjoint, link_isjoint, sizeof(long) * nlinks, cudaMemcpyHostToDevice);
400+
cudaMemcpy(d_link_axes, link_axes, sizeof(long) * max_nlinks, cudaMemcpyHostToDevice);
401+
cudaMemcpy(d_link_isjoint, link_isjoint, sizeof(long) * max_nlinks, cudaMemcpyHostToDevice);
403402
cudaMemcpy(d_out, out, sizeof(double) * N * 6 * njoints, cudaMemcpyHostToDevice);
404403

405404

@@ -414,7 +413,6 @@ void jacob0(double *T,
414413
d_link_axes,
415414
d_link_isjoint,
416415
N,
417-
nlinks,
418416
njoints,
419417
d_out);
420418

0 commit comments

Comments
 (0)
0