| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/kernel.h> |
| 3 | #include <linux/errno.h> |
| 4 | #include <linux/file.h> |
| 5 | #include <linux/slab.h> |
| 6 | #include <linux/nospec.h> |
| 7 | #include <linux/io_uring.h> |
| 8 | |
| 9 | #include <uapi/linux/io_uring.h> |
| 10 | |
| 11 | #include "io_uring.h" |
| 12 | #include "rsrc.h" |
| 13 | #include "filetable.h" |
| 14 | #include "msg_ring.h" |
| 15 | |
| 16 | /* All valid masks for MSG_RING */ |
| 17 | #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ |
| 18 | IORING_MSG_RING_FLAGS_PASS) |
| 19 | |
| 20 | struct io_msg { |
| 21 | struct file *file; |
| 22 | struct file *src_file; |
| 23 | struct callback_head tw; |
| 24 | u64 user_data; |
| 25 | u32 len; |
| 26 | u32 cmd; |
| 27 | u32 src_fd; |
| 28 | union { |
| 29 | u32 dst_fd; |
| 30 | u32 cqe_flags; |
| 31 | }; |
| 32 | u32 flags; |
| 33 | }; |
| 34 | |
| 35 | static void io_double_unlock_ctx(struct io_ring_ctx *octx) |
| 36 | { |
| 37 | mutex_unlock(lock: &octx->uring_lock); |
| 38 | } |
| 39 | |
| 40 | static int io_lock_external_ctx(struct io_ring_ctx *octx, |
| 41 | unsigned int issue_flags) |
| 42 | { |
| 43 | /* |
| 44 | * To ensure proper ordering between the two ctxs, we can only |
| 45 | * attempt a trylock on the target. If that fails and we already have |
| 46 | * the source ctx lock, punt to io-wq. |
| 47 | */ |
| 48 | if (!(issue_flags & IO_URING_F_UNLOCKED)) { |
| 49 | if (!mutex_trylock(&octx->uring_lock)) |
| 50 | return -EAGAIN; |
| 51 | return 0; |
| 52 | } |
| 53 | mutex_lock(&octx->uring_lock); |
| 54 | return 0; |
| 55 | } |
| 56 | |
| 57 | void io_msg_ring_cleanup(struct io_kiocb *req) |
| 58 | { |
| 59 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 60 | |
| 61 | if (WARN_ON_ONCE(!msg->src_file)) |
| 62 | return; |
| 63 | |
| 64 | fput(msg->src_file); |
| 65 | msg->src_file = NULL; |
| 66 | } |
| 67 | |
| 68 | static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) |
| 69 | { |
| 70 | return target_ctx->task_complete; |
| 71 | } |
| 72 | |
| 73 | static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw) |
| 74 | { |
| 75 | struct io_kiocb *req = tw_req.req; |
| 76 | struct io_ring_ctx *ctx = req->ctx; |
| 77 | |
| 78 | io_add_aux_cqe(ctx, user_data: req->cqe.user_data, res: req->cqe.res, cflags: req->cqe.flags); |
| 79 | kfree_rcu(req, rcu_head); |
| 80 | percpu_ref_put(ref: &ctx->refs); |
| 81 | } |
| 82 | |
| 83 | static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, |
| 84 | int res, u32 cflags, u64 user_data) |
| 85 | { |
| 86 | if (!READ_ONCE(ctx->submitter_task)) { |
| 87 | kfree_rcu(req, rcu_head); |
| 88 | return -EOWNERDEAD; |
| 89 | } |
| 90 | req->opcode = IORING_OP_NOP; |
| 91 | req->cqe.user_data = user_data; |
| 92 | io_req_set_res(req, res, cflags); |
| 93 | percpu_ref_get(ref: &ctx->refs); |
| 94 | req->ctx = ctx; |
| 95 | req->tctx = NULL; |
| 96 | req->io_task_work.func = io_msg_tw_complete; |
| 97 | io_req_task_work_add_remote(req, flags: IOU_F_TWQ_LAZY_WAKE); |
| 98 | return 0; |
| 99 | } |
| 100 | |
| 101 | static int io_msg_data_remote(struct io_ring_ctx *target_ctx, |
| 102 | struct io_msg *msg) |
| 103 | { |
| 104 | struct io_kiocb *target; |
| 105 | u32 flags = 0; |
| 106 | |
| 107 | target = kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO) ; |
| 108 | if (unlikely(!target)) |
| 109 | return -ENOMEM; |
| 110 | |
| 111 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
| 112 | flags = msg->cqe_flags; |
| 113 | |
| 114 | return io_msg_remote_post(ctx: target_ctx, req: target, res: msg->len, cflags: flags, |
| 115 | user_data: msg->user_data); |
| 116 | } |
| 117 | |
| 118 | static int __io_msg_ring_data(struct io_ring_ctx *target_ctx, |
| 119 | struct io_msg *msg, unsigned int issue_flags) |
| 120 | { |
| 121 | u32 flags = 0; |
| 122 | int ret; |
| 123 | |
| 124 | if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) |
| 125 | return -EINVAL; |
| 126 | if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) |
| 127 | return -EINVAL; |
| 128 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
| 129 | return -EBADFD; |
| 130 | |
| 131 | if (io_msg_need_remote(target_ctx)) |
| 132 | return io_msg_data_remote(target_ctx, msg); |
| 133 | |
| 134 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
| 135 | flags = msg->cqe_flags; |
| 136 | |
| 137 | ret = -EOVERFLOW; |
| 138 | if (target_ctx->flags & IORING_SETUP_IOPOLL) { |
| 139 | if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) |
| 140 | return -EAGAIN; |
| 141 | } |
| 142 | if (io_post_aux_cqe(ctx: target_ctx, user_data: msg->user_data, res: msg->len, cflags: flags)) |
| 143 | ret = 0; |
| 144 | if (target_ctx->flags & IORING_SETUP_IOPOLL) |
| 145 | io_double_unlock_ctx(octx: target_ctx); |
| 146 | return ret; |
| 147 | } |
| 148 | |
| 149 | static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) |
| 150 | { |
| 151 | struct io_ring_ctx *target_ctx = req->file->private_data; |
| 152 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 153 | |
| 154 | return __io_msg_ring_data(target_ctx, msg, issue_flags); |
| 155 | } |
| 156 | |
| 157 | static int io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) |
| 158 | { |
| 159 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 160 | struct io_ring_ctx *ctx = req->ctx; |
| 161 | struct io_rsrc_node *node; |
| 162 | int ret = -EBADF; |
| 163 | |
| 164 | io_ring_submit_lock(ctx, issue_flags); |
| 165 | node = io_rsrc_node_lookup(data: &ctx->file_table.data, index: msg->src_fd); |
| 166 | if (node) { |
| 167 | msg->src_file = io_slot_file(node); |
| 168 | if (msg->src_file) |
| 169 | get_file(f: msg->src_file); |
| 170 | req->flags |= REQ_F_NEED_CLEANUP; |
| 171 | ret = 0; |
| 172 | } |
| 173 | io_ring_submit_unlock(ctx, issue_flags); |
| 174 | return ret; |
| 175 | } |
| 176 | |
| 177 | static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) |
| 178 | { |
| 179 | struct io_ring_ctx *target_ctx = req->file->private_data; |
| 180 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 181 | struct file *src_file = msg->src_file; |
| 182 | int ret; |
| 183 | |
| 184 | if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) |
| 185 | return -EAGAIN; |
| 186 | |
| 187 | ret = __io_fixed_fd_install(ctx: target_ctx, file: src_file, file_slot: msg->dst_fd); |
| 188 | if (ret < 0) |
| 189 | goto out_unlock; |
| 190 | |
| 191 | msg->src_file = NULL; |
| 192 | req->flags &= ~REQ_F_NEED_CLEANUP; |
| 193 | |
| 194 | if (msg->flags & IORING_MSG_RING_CQE_SKIP) |
| 195 | goto out_unlock; |
| 196 | /* |
| 197 | * If this fails, the target still received the file descriptor but |
| 198 | * wasn't notified of the fact. This means that if this request |
| 199 | * completes with -EOVERFLOW, then the sender must ensure that a |
| 200 | * later IORING_OP_MSG_RING delivers the message. |
| 201 | */ |
| 202 | if (!io_post_aux_cqe(ctx: target_ctx, user_data: msg->user_data, res: ret, cflags: 0)) |
| 203 | ret = -EOVERFLOW; |
| 204 | out_unlock: |
| 205 | io_double_unlock_ctx(octx: target_ctx); |
| 206 | return ret; |
| 207 | } |
| 208 | |
| 209 | static void io_msg_tw_fd_complete(struct callback_head *head) |
| 210 | { |
| 211 | struct io_msg *msg = container_of(head, struct io_msg, tw); |
| 212 | struct io_kiocb *req = cmd_to_io_kiocb(ptr: msg); |
| 213 | int ret = -EOWNERDEAD; |
| 214 | |
| 215 | if (!(current->flags & PF_EXITING)) |
| 216 | ret = io_msg_install_complete(req, issue_flags: IO_URING_F_UNLOCKED); |
| 217 | if (ret < 0) |
| 218 | req_set_fail(req); |
| 219 | io_req_queue_tw_complete(req, res: ret); |
| 220 | } |
| 221 | |
| 222 | static int io_msg_fd_remote(struct io_kiocb *req) |
| 223 | { |
| 224 | struct io_ring_ctx *ctx = req->file->private_data; |
| 225 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 226 | struct task_struct *task = READ_ONCE(ctx->submitter_task); |
| 227 | |
| 228 | if (unlikely(!task)) |
| 229 | return -EOWNERDEAD; |
| 230 | |
| 231 | init_task_work(twork: &msg->tw, func: io_msg_tw_fd_complete); |
| 232 | if (task_work_add(task, twork: &msg->tw, mode: TWA_SIGNAL)) |
| 233 | return -EOWNERDEAD; |
| 234 | |
| 235 | return IOU_ISSUE_SKIP_COMPLETE; |
| 236 | } |
| 237 | |
| 238 | static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) |
| 239 | { |
| 240 | struct io_ring_ctx *target_ctx = req->file->private_data; |
| 241 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 242 | struct io_ring_ctx *ctx = req->ctx; |
| 243 | |
| 244 | if (msg->len) |
| 245 | return -EINVAL; |
| 246 | if (target_ctx == ctx) |
| 247 | return -EINVAL; |
| 248 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
| 249 | return -EBADFD; |
| 250 | if (!msg->src_file) { |
| 251 | int ret = io_msg_grab_file(req, issue_flags); |
| 252 | if (unlikely(ret)) |
| 253 | return ret; |
| 254 | } |
| 255 | |
| 256 | if (io_msg_need_remote(target_ctx)) |
| 257 | return io_msg_fd_remote(req); |
| 258 | return io_msg_install_complete(req, issue_flags); |
| 259 | } |
| 260 | |
| 261 | static int __io_msg_ring_prep(struct io_msg *msg, const struct io_uring_sqe *sqe) |
| 262 | { |
| 263 | if (unlikely(sqe->buf_index || sqe->personality)) |
| 264 | return -EINVAL; |
| 265 | |
| 266 | msg->src_file = NULL; |
| 267 | msg->user_data = READ_ONCE(sqe->off); |
| 268 | msg->len = READ_ONCE(sqe->len); |
| 269 | msg->cmd = READ_ONCE(sqe->addr); |
| 270 | msg->src_fd = READ_ONCE(sqe->addr3); |
| 271 | msg->dst_fd = READ_ONCE(sqe->file_index); |
| 272 | msg->flags = READ_ONCE(sqe->msg_ring_flags); |
| 273 | if (msg->flags & ~IORING_MSG_RING_MASK) |
| 274 | return -EINVAL; |
| 275 | |
| 276 | return 0; |
| 277 | } |
| 278 | |
| 279 | int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) |
| 280 | { |
| 281 | return __io_msg_ring_prep(io_kiocb_to_cmd(req, struct io_msg), sqe); |
| 282 | } |
| 283 | |
| 284 | int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) |
| 285 | { |
| 286 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 287 | int ret; |
| 288 | |
| 289 | ret = -EBADFD; |
| 290 | if (!io_is_uring_fops(file: req->file)) |
| 291 | goto done; |
| 292 | |
| 293 | switch (msg->cmd) { |
| 294 | case IORING_MSG_DATA: |
| 295 | ret = io_msg_ring_data(req, issue_flags); |
| 296 | break; |
| 297 | case IORING_MSG_SEND_FD: |
| 298 | ret = io_msg_send_fd(req, issue_flags); |
| 299 | break; |
| 300 | default: |
| 301 | ret = -EINVAL; |
| 302 | break; |
| 303 | } |
| 304 | |
| 305 | done: |
| 306 | if (ret < 0) { |
| 307 | if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) |
| 308 | return ret; |
| 309 | req_set_fail(req); |
| 310 | } |
| 311 | io_req_set_res(req, res: ret, cflags: 0); |
| 312 | return IOU_COMPLETE; |
| 313 | } |
| 314 | |
| 315 | int io_uring_sync_msg_ring(struct io_uring_sqe *sqe) |
| 316 | { |
| 317 | struct io_msg io_msg = { }; |
| 318 | int ret; |
| 319 | |
| 320 | ret = __io_msg_ring_prep(msg: &io_msg, sqe); |
| 321 | if (unlikely(ret)) |
| 322 | return ret; |
| 323 | |
| 324 | /* |
| 325 | * Only data sending supported, not IORING_MSG_SEND_FD as that one |
| 326 | * doesn't make sense without a source ring to send files from. |
| 327 | */ |
| 328 | if (io_msg.cmd != IORING_MSG_DATA) |
| 329 | return -EINVAL; |
| 330 | |
| 331 | CLASS(fd, f)(fd: sqe->fd); |
| 332 | if (fd_empty(f)) |
| 333 | return -EBADF; |
| 334 | if (!io_is_uring_fops(fd_file(f))) |
| 335 | return -EBADFD; |
| 336 | return __io_msg_ring_data(fd_file(f)->private_data, |
| 337 | msg: &io_msg, issue_flags: IO_URING_F_UNLOCKED); |
| 338 | } |
| 339 | |