8000 CUDA-python communication · hzyjerry/robotics-toolbox-python@cb5b5f5 · GitHub
[go: up one dir, main page]

Skip to content

Commit cb5b5f5

Browse files
committed
CUDA-python communication
1 parent c6d3d5b commit cb5b5f5

File tree

2 files changed

+171
-44
lines changed

2 files changed

+171
-44
lines changed

roboticstoolbox/cuda/fknm.cu

Lines changed: 52 additions & 43 deletions
F438
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
#include <cuda.h>
66
#include <cuda_runtime.h>
77

8-
__device__ int _inv(float *m, float *invOut);
9-
__device__ void mult(float *A, float *B, float *C);
10-
__device__ void copy(float *A, float *B);
11-
__device__ void _eye(float *data);
8+
__device__ int _inv(double *m, double *invOut);
9+
__device__ void mult(double *A, double *B, double *C);
10+
__device__ void copy(double *A, double *B);
11+
__device__ void _eye(double *data);
1212

1313

1414

@@ -24,25 +24,25 @@ __device__ void _eye(float *data);
2424
* cdim: (int) number of joints
2525
* out: (N, 6, cdim)
2626
*/
27-
__global__ void _jacob0(float *T,
28-
float *tool,
29-
float *e_tool,
30-
float *link_A,
27+
__global__ void _jacob0(double *T,
28+
double *tool,
29+
double *e_tool,
30+
double *link_A,
3131
int *link_axes,
3232
int *link_isjoint,
3333
int N,
3434
int cdim,
35-
float *out)
35+
double *out)
3636
{
3737
int tid = blockIdx.x * blockDim.x + threadIdx.x;
38-
float *T_i, *tool_i;
39-
float *U, *temp, *etool_i;
40-
float *invU;
41-
float *link_iA;
42-
43-
cudaMalloc((void**)&U, sizeof(float) * 16);
44-
cudaMalloc((void**)&invU, sizeof(float) * 16);
45-
cudaMalloc((void**)&temp, sizeof(float) * 16);
38+
double *T_i, *tool_i;
39+
double *U, *temp, *etool_i;
40+
double *invU;
41+
double *link_iA;
42+
43+
cudaMalloc((void**)&U, sizeof(double) * 16);
44+
cudaMalloc((void**)&invU, sizeof(double) * 16);
45+
cudaMalloc((void**)&temp, sizeof(double) * 16);
4646
int j = 0;
4747

4848
T_i = &T[tid * 16];
@@ -65,7 +65,7 @@ __global__ void _jacob0(float *T,
6565
_inv(U, invU);
6666
mult(invU, T_i, temp);
6767

68-
float *out_tid = &out[tid + 16];
68+
double *out_tid = &out[tid + 16];
6969

7070
if (link_axes[i] == 0) {
7171
out_tid[0 * tid + j] = U[0 * 4 + 2] * temp[1 * 4 + 3] - U[0 * 4 + 1] * temp[2 * 4 + 3];
@@ -134,7 +134,7 @@ __global__ void _jacob0(float *T,
134134
}
135135

136136

137-
__device__ void _eye(float *data)
137+
__device__ void _eye(double *data)
138138
{
139139
data[0] = 1;
140140
data[1] = 0;
@@ -154,7 +154,7 @@ __device__ void _eye(float *data)
154154
data[15] = 1;
155155
}
156156

157-
__device__ void copy(float *A, float *B)
157+
__device__ void copy(double *A, double *B)
158158
{
159159
// copy A into B
160160
B[0] = A[0];
@@ -175,7 +175,7 @@ __device__ void copy(float *A, float *B)
175175
B[15] = A[15];
176176
}
177177

178-
__device__ void mult(float *A, float *B, float *C)
178+
__device__ void mult(double *A, double *B, double *C)
179179
{
180180
const int N = 4;
181181
int i, j, k;
@@ -195,10 +195,10 @@ __device__ void mult(float *A, float *B, float *C)
195195
}
196196
}
197197

198-
__device__ int _inv(float *m, float *invOut)
198+
__device__ int _inv(double *m, double *invOut)
199199
{
200-
float *inv;
201-
cudaMalloc((void**)&inv, sizeof(float) * 16);
200+
double *inv;
201+
cudaMalloc((void**)&inv, sizeof(double) * 16);
202202
double det;
203203
int i;
204204

@@ -329,6 +329,9 @@ __device__ int _inv(float *m, float *invOut)
329329
}
330330

331331

332+
333+
extern "C"{
334+
332335
/*
333336
* Params
334337
* T: (N, 4, 4) the final transform matrix of all points (shared)
@@ -340,42 +343,42 @@ __device__ int _inv(float *m, float *invOut)
340343
* cdim: (int) number of joints
341344
* out: (N, 6, cdim)
342345
*/
343-
void jacob0(float *T,
344-
float *tool,
345-
float *etool,
346-
float *link_A,
346+
void jacob0(double *T,
347+
double *tool,
348+
double *etool,
349+
double *link_A,
347350
int *link_axes,
348351
int *link_isjoint,
349352
int N,
350353
int cdim,
351-
float *out)
354+
double *out)
352355
// affine_T[N]
353356
// link_axes[cdim]
354357
// link_A[cdim]
355358
// link_isjoint[cdim]
356359
// out
357360
{
358-
float *d_T, *d_tool, *d_etool, *d_link_A;
361+
double *d_T, *d_tool, *d_etool, *d_link_A;
359362
int *d_link_axes, *d_link_isjoint;
360-
float *d_out;
363+
double *d_out;
361364

362-
cudaMalloc((void**)&d_T, sizeof(float) * N * 16);
363-
cudaMalloc((void**)&d_tool, sizeof(float) * N * 16);
364-
cudaMalloc((void**)&d_etool, sizeof(float) * N * 16);
365-
cudaMalloc((void**)&d_link_A, sizeof(float) * cdim * 16);
365+
cudaMalloc((void**)&d_T, sizeof(double) * N * 16);
366+
cudaMalloc((void**)&d_tool, sizeof(double) * N * 16);
367+
cudaMalloc((void**)&d_etool, sizeof(double) * N * 16);
368+
cudaMalloc((void**)&d_link_A, sizeof(double) * cdim * 16);
366369
cudaMalloc((void**)&d_link_axes, sizeof(int) * cdim);
367370
cudaMalloc((void**)&d_link_isjoint, sizeof(int) * cdim);
368-
cudaMalloc((void**)&d_out, sizeof(float) * 6 * cdim);
371+
cudaMalloc((void**)&d_out, sizeof(double) * 6 * cdim);
369372

370373

371374
// Transfer data from host to device memory
372-
cudaMemcpy(d_T, T, sizeof(float) * N * 16, cudaMemcpyHostToDevice);
373-
cudaMemcpy(d_tool, tool, sizeof(float) * N * 16, cudaMemcpyHostToDevice);
374-
cudaMemcpy(d_etool, etool, sizeof(float) * N * 16, cudaMemcpyHostToDevice);
375-
cudaMemcpy(d_link_A, link_A, sizeof(float) * cdim * 16, cudaMemcpyHostToDevice);
375+
cudaMemcpy(d_T, T, sizeof(double) * N * 16, cudaMemcpyHostToDevice);
376+
cudaMemcpy(d_tool, tool, sizeof(double) * N * 16, cudaMemcpyHostToDevice);
377+
cudaMemcpy(d_etool, etool, sizeof(double) * N * 16, cudaMemcpyHostToDevice);
378+
cudaMemcpy(d_link_A, link_A, sizeof(double) * cdim * 16, cudaMemcpyHostToDevice);
376379
cudaMemcpy(d_link_axes, link_axes, sizeof(int) * cdim, cudaMemcpyHostToDevice);
377380
cudaMemcpy(d_link_isjoint, link_isjoint, sizeof(int) * cdim, cudaMemcpyHostToDevice);
378-
cudaMemcpy(d_out, out, sizeof(float) * 6 * cdim, cudaMemcpyHostToDevice);
381+
cudaMemcpy(d_out, out, sizeof(double) * 6 * cdim, cudaMemcpyHostToDevice);
379382

380383

381384
int block_size = 256;
@@ -390,7 +393,10 @@ void jacob0(float *T,
390393
cdim,
391394
d_out);
392395

393-
cudaMemcpy(out, d_out, sizeof(float) * 6 * cdim, cudaMemcpyDeviceToHost);
396+
// memset(out, 1, N * 6 * cdim);
397+
// out[0] = 1;
398+
cudaMemcpy(out, d_out, sizeof(double) * 6 * cdim, cudaMemcpyDeviceToHost);
399+
printf("Out size %d %d %f %f %f %f %f", N, cdim, out[0], out[1], out[2], out[3], out[4]);
394400

395401
// Deallocate device memory
396402
cudaFree(d_T);
@@ -400,4 +406,7 @@ void jacob0(float *T,
400406
cudaFree(d_link_axes);
401407
cudaFree(d_link_isjoint);
402408
cudaFree(d_out);
403-
}
409+
}
410+
411+
412+
}//extern "C"

roboticstoolbox/examples/chomp.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,125 @@ def get_link_cost(robot, meshes, link, num=-1, parallel=True):
239239
"""
240240
vertice_xyz = get_vertices_xyz(robot, meshes, link, num, parallel)
241241

242+
def test_parallel():
243+
robot = rtb.models.DH.Panda() # load Mesh version (for collisions)
244+
T = robot.fkine(robot.qz) # forward kinematics
245+
T = SE3(0.7, 0.2, 0.1) * SE3.OA([0, 1, 0], [0, 0, -1])
246+
sol = robot.ikine_LM(T) # solve IK
247+
q_pickup = sol.q
248+
qtraj = rtb.jtraj(robot.qz, q_pickup, 50)
249+
robot = rtb.models.URDF.Panda() # load URDF version of the Panda
250+
meshes = {}
251+
for link in robot.links:
252+
if len(link.geometry) != 1:
253+
print(len(link.geometry))
254+
continue
255+
kwargs = trimesh.exchange.dae.load_collada(link.geometry[0].filename)
256+
# kwargs = trimesh.exchange.dae.load_collada(filename)
257+
mesh = trimesh.exchange.load.load_kwargs(kwargs)
258+
meshes[link.name] = mesh.dump(concatenate=True)
259+
print(link.name, mesh)
260+
261+
# Hyperparameters
262+
dt = 1
263+
nq = 50
264+
lmbda = 1000
265+
eta = 1000
266+
iters = 4
267+
num_pts = 5
268+
269+
270+
# Make cost field, starting & end points
271+
cdim = len(qtraj.q[0])
272+
xidim = cdim * nq
273+
AA = np.zeros((xidim, xidim))
274+
xi = np.zeros(xidim)
275+
robot._set_link_fk(qtraj.q[1])
276+
277+
link = robot.links[-2]
278+
k = link.jindex
279+
280+
281+
link_base = robot.fkine(robot.q, end=link) # x_current, (4, 4)
282+
# Non-parallel, use for loop
283+
mesh = meshes[link.name]
284+
285+
# for j in range(num_pts):
286+
# # For each point: compute Jacobian, compute cost, compute cost gradient
287+
288+
# pt_rel = mesh.vertices[j]
289+
# pt_tool = link_base @ SE3(pt_rel)
290+
# pt_pos = pt_tool.t
291+
292+
# JJ = robot.jacob0(qt, end=link, tool=SE3(pt_rel)) # (k, 6)
293+
# import pdb; pdb.set_trace()
294+
# xd = JJ.dot(qd[:k+1]) # x_delta
295+
# vel = np.linalg.norm(xd)
296+
297+
# xdn = xd / (vel + 1e-3) # speed normalized
298+
# xdd = JJ.dot(xidd[cdim * i: cdim * i + k + 1]) # second derivative of xi
299+
# prj = np.eye(6) - xdn[:, None].dot(xdn[:, None].T) # curvature vector (6, 6)
300+
# kappa = (prj.dot(xdd) / (vel ** 2 + 1e-3)) # (6,)
301+
302+
# cost = np.sum(pt_pos)
303+
# total_cost += cost / num_pts
304+
# # delta := negative gradient of obstacle cost in work space, (6, cdim)
305+
# delta = -1 * np.concatenate([[1, 1, 0], np.zeros(3)])
306+
# # for link in robot.links:
307+
# # cost = get_link_cost(robot, meshes, link)
308+
# delta_nabla_obs += JJ.T.dot(vel).dot(prj.dot(delta) - cost * kappa)
309+
310+
# nabla_obs[cdim * i: cdim * i + k + 1] += (delta_nabla_obs / num_pts)
311+
# Parallel, use cuda
312+
313+
fknm_=np.ctypeslib.load_library('roboticstoolbox/cuda/fknm','.')
314+
import ctypes as ct
315+
pts = np.array(meshes[link.name].vertices[:num_pts])
316+
e_pts = np.zeros((num_pts, 3))
317+
pts_tool = np.array(SE3(pts).A)
318+
pts_etool = np.array(SE3(e_pts).A)
319+
320+
link_As = []
321+
link_axes = []
322+
for link in robot.links:
323+
link_As.append(link.A().A)
324+
axis = None
325+
if not link.isjoint:
326+
axis = -1
327+
elif link._v.axis == "Rx":
328+
axis = 0
329+
elif link._v.axis == "Ry":
330+
axis = 1
331+
elif link._v.axis == "Rz":
332+
axis = 2
333+
elif link._v.axis == "tx":
334+
axis = 3
335+
elif link._v.axis == "ty":
336+
axis = 4
337+
elif link._v.axis == "tz":
338+
axis = 5
339+
else:
340+
raise NotImplementedError
341+
link_axes.append(axis)
342+
link_As = np.array(link_As)
343+
link_axes = np.array(link_axes, dtype=int)
344+
link_isjoint = np.array([l.isjoint for l in robot.links], dtype=int)
345+
jacob_out = np.ones((num_pts, 6, cdim))
346+
347+
fknm_.jacob0(pts.ctypes.data_as(ct.c_void_p),
348+
pts_tool.ctypes.data_as(ct.c_void_p),
349+
pts_etool.ctypes.data_as(ct.c_void_p),
350+
# link_As.ctypes.data_as(ct.c_void_p),
351+
ct.c_void_p(link_As.ctypes.data),
352+
link_axes.ctypes.data_as(ct.c_void_p),
353+
link_isjoint.ctypes.data_as(ct.c_void_p),
354+
ct.c_int(num_pts),
355+
ct.c_int(len(robot.links)),
356+
jacob_out.ctypes.data_as(ct.c_void_p))
357+
print(jacob_out)
358+
import pdb; pdb.set_trace()
242359

243360

244361
if __name__ == "__main__":
245-
chomp()
362+
# chomp()
363+
test_parallel()

0 commit comments

Comments
 (0)
0