| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/capability.h> |
| 3 | #include <linux/compat.h> |
| 4 | #include <linux/blkdev.h> |
| 5 | #include <linux/export.h> |
| 6 | #include <linux/gfp.h> |
| 7 | #include <linux/blkpg.h> |
| 8 | #include <linux/hdreg.h> |
| 9 | #include <linux/backing-dev.h> |
| 10 | #include <linux/fs.h> |
| 11 | #include <linux/blktrace_api.h> |
| 12 | #include <linux/pr.h> |
| 13 | #include <linux/uaccess.h> |
| 14 | #include <linux/pagemap.h> |
| 15 | #include <linux/io_uring/cmd.h> |
| 16 | #include <linux/blk-integrity.h> |
| 17 | #include <uapi/linux/blkdev.h> |
| 18 | #include "blk.h" |
| 19 | #include "blk-crypto-internal.h" |
| 20 | |
| 21 | static int blkpg_do_ioctl(struct block_device *bdev, |
| 22 | struct blkpg_partition __user *upart, int op) |
| 23 | { |
| 24 | struct gendisk *disk = bdev->bd_disk; |
| 25 | struct blkpg_partition p; |
| 26 | sector_t start, length, capacity, end; |
| 27 | |
| 28 | if (!capable(CAP_SYS_ADMIN)) |
| 29 | return -EACCES; |
| 30 | if (copy_from_user(to: &p, from: upart, n: sizeof(struct blkpg_partition))) |
| 31 | return -EFAULT; |
| 32 | if (bdev_is_partition(bdev)) |
| 33 | return -EINVAL; |
| 34 | |
| 35 | if (p.pno <= 0) |
| 36 | return -EINVAL; |
| 37 | |
| 38 | if (op == BLKPG_DEL_PARTITION) |
| 39 | return bdev_del_partition(disk, partno: p.pno); |
| 40 | |
| 41 | if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) |
| 42 | return -EINVAL; |
| 43 | /* Check that the partition is aligned to the block size */ |
| 44 | if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) |
| 45 | return -EINVAL; |
| 46 | |
| 47 | start = p.start >> SECTOR_SHIFT; |
| 48 | length = p.length >> SECTOR_SHIFT; |
| 49 | capacity = get_capacity(disk); |
| 50 | |
| 51 | if (check_add_overflow(start, length, &end)) |
| 52 | return -EINVAL; |
| 53 | |
| 54 | if (start >= capacity || end > capacity) |
| 55 | return -EINVAL; |
| 56 | |
| 57 | switch (op) { |
| 58 | case BLKPG_ADD_PARTITION: |
| 59 | return bdev_add_partition(disk, partno: p.pno, start, length); |
| 60 | case BLKPG_RESIZE_PARTITION: |
| 61 | return bdev_resize_partition(disk, partno: p.pno, start, length); |
| 62 | default: |
| 63 | return -EINVAL; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | static int blkpg_ioctl(struct block_device *bdev, |
| 68 | struct blkpg_ioctl_arg __user *arg) |
| 69 | { |
| 70 | struct blkpg_partition __user *udata; |
| 71 | int op; |
| 72 | |
| 73 | if (get_user(op, &arg->op) || get_user(udata, &arg->data)) |
| 74 | return -EFAULT; |
| 75 | |
| 76 | return blkpg_do_ioctl(bdev, upart: udata, op); |
| 77 | } |
| 78 | |
| 79 | #ifdef CONFIG_COMPAT |
| 80 | struct compat_blkpg_ioctl_arg { |
| 81 | compat_int_t op; |
| 82 | compat_int_t flags; |
| 83 | compat_int_t datalen; |
| 84 | compat_caddr_t data; |
| 85 | }; |
| 86 | |
| 87 | static int compat_blkpg_ioctl(struct block_device *bdev, |
| 88 | struct compat_blkpg_ioctl_arg __user *arg) |
| 89 | { |
| 90 | compat_caddr_t udata; |
| 91 | int op; |
| 92 | |
| 93 | if (get_user(op, &arg->op) || get_user(udata, &arg->data)) |
| 94 | return -EFAULT; |
| 95 | |
| 96 | return blkpg_do_ioctl(bdev, upart: compat_ptr(uptr: udata), op); |
| 97 | } |
| 98 | #endif |
| 99 | |
| 100 | /* |
| 101 | * Check that [start, start + len) is a valid range from the block device's |
| 102 | * perspective, including verifying that it can be correctly translated into |
| 103 | * logical block addresses. |
| 104 | */ |
| 105 | static int blk_validate_byte_range(struct block_device *bdev, |
| 106 | uint64_t start, uint64_t len) |
| 107 | { |
| 108 | unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; |
| 109 | uint64_t end; |
| 110 | |
| 111 | if ((start | len) & bs_mask) |
| 112 | return -EINVAL; |
| 113 | if (!len) |
| 114 | return -EINVAL; |
| 115 | if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) |
| 116 | return -EINVAL; |
| 117 | |
| 118 | return 0; |
| 119 | } |
| 120 | |
| 121 | static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, |
| 122 | unsigned long arg) |
| 123 | { |
| 124 | uint64_t range[2], start, len; |
| 125 | struct bio *prev = NULL, *bio; |
| 126 | sector_t sector, nr_sects; |
| 127 | struct blk_plug plug; |
| 128 | int err; |
| 129 | |
| 130 | if (copy_from_user(to: range, from: (void __user *)arg, n: sizeof(range))) |
| 131 | return -EFAULT; |
| 132 | start = range[0]; |
| 133 | len = range[1]; |
| 134 | |
| 135 | if (!bdev_max_discard_sectors(bdev)) |
| 136 | return -EOPNOTSUPP; |
| 137 | |
| 138 | if (!(mode & BLK_OPEN_WRITE)) |
| 139 | return -EBADF; |
| 140 | if (bdev_read_only(bdev)) |
| 141 | return -EPERM; |
| 142 | err = blk_validate_byte_range(bdev, start, len); |
| 143 | if (err) |
| 144 | return err; |
| 145 | |
| 146 | inode_lock(inode: bdev->bd_mapping->host); |
| 147 | filemap_invalidate_lock(mapping: bdev->bd_mapping); |
| 148 | err = truncate_bdev_range(bdev, mode, lstart: start, lend: start + len - 1); |
| 149 | if (err) |
| 150 | goto fail; |
| 151 | |
| 152 | sector = start >> SECTOR_SHIFT; |
| 153 | nr_sects = len >> SECTOR_SHIFT; |
| 154 | |
| 155 | blk_start_plug(&plug); |
| 156 | while (1) { |
| 157 | if (fatal_signal_pending(current)) { |
| 158 | if (prev) |
| 159 | bio_await_chain(bio: prev); |
| 160 | err = -EINTR; |
| 161 | goto out_unplug; |
| 162 | } |
| 163 | bio = blk_alloc_discard_bio(bdev, sector: §or, nr_sects: &nr_sects, |
| 164 | GFP_KERNEL); |
| 165 | if (!bio) |
| 166 | break; |
| 167 | prev = bio_chain_and_submit(prev, new: bio); |
| 168 | } |
| 169 | if (prev) { |
| 170 | err = submit_bio_wait(bio: prev); |
| 171 | if (err == -EOPNOTSUPP) |
| 172 | err = 0; |
| 173 | bio_put(prev); |
| 174 | } |
| 175 | out_unplug: |
| 176 | blk_finish_plug(&plug); |
| 177 | fail: |
| 178 | filemap_invalidate_unlock(mapping: bdev->bd_mapping); |
| 179 | inode_unlock(inode: bdev->bd_mapping->host); |
| 180 | return err; |
| 181 | } |
| 182 | |
| 183 | static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, |
| 184 | void __user *argp) |
| 185 | { |
| 186 | uint64_t start, len, end; |
| 187 | uint64_t range[2]; |
| 188 | int err; |
| 189 | |
| 190 | if (!(mode & BLK_OPEN_WRITE)) |
| 191 | return -EBADF; |
| 192 | if (!bdev_max_secure_erase_sectors(bdev)) |
| 193 | return -EOPNOTSUPP; |
| 194 | if (copy_from_user(to: range, from: argp, n: sizeof(range))) |
| 195 | return -EFAULT; |
| 196 | |
| 197 | start = range[0]; |
| 198 | len = range[1]; |
| 199 | if ((start & 511) || (len & 511)) |
| 200 | return -EINVAL; |
| 201 | if (check_add_overflow(start, len, &end) || |
| 202 | end > bdev_nr_bytes(bdev)) |
| 203 | return -EINVAL; |
| 204 | |
| 205 | inode_lock(inode: bdev->bd_mapping->host); |
| 206 | filemap_invalidate_lock(mapping: bdev->bd_mapping); |
| 207 | err = truncate_bdev_range(bdev, mode, lstart: start, lend: end - 1); |
| 208 | if (!err) |
| 209 | err = blkdev_issue_secure_erase(bdev, sector: start >> 9, nr_sects: len >> 9, |
| 210 | GFP_KERNEL); |
| 211 | filemap_invalidate_unlock(mapping: bdev->bd_mapping); |
| 212 | inode_unlock(inode: bdev->bd_mapping->host); |
| 213 | return err; |
| 214 | } |
| 215 | |
| 216 | |
| 217 | static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, |
| 218 | unsigned long arg) |
| 219 | { |
| 220 | uint64_t range[2]; |
| 221 | uint64_t start, end, len; |
| 222 | int err; |
| 223 | |
| 224 | if (!(mode & BLK_OPEN_WRITE)) |
| 225 | return -EBADF; |
| 226 | |
| 227 | if (copy_from_user(to: range, from: (void __user *)arg, n: sizeof(range))) |
| 228 | return -EFAULT; |
| 229 | |
| 230 | start = range[0]; |
| 231 | len = range[1]; |
| 232 | end = start + len - 1; |
| 233 | |
| 234 | if (start & 511) |
| 235 | return -EINVAL; |
| 236 | if (len & 511) |
| 237 | return -EINVAL; |
| 238 | if (end >= (uint64_t)bdev_nr_bytes(bdev)) |
| 239 | return -EINVAL; |
| 240 | if (end < start) |
| 241 | return -EINVAL; |
| 242 | |
| 243 | /* Invalidate the page cache, including dirty pages */ |
| 244 | inode_lock(inode: bdev->bd_mapping->host); |
| 245 | filemap_invalidate_lock(mapping: bdev->bd_mapping); |
| 246 | err = truncate_bdev_range(bdev, mode, lstart: start, lend: end); |
| 247 | if (err) |
| 248 | goto fail; |
| 249 | |
| 250 | err = blkdev_issue_zeroout(bdev, sector: start >> 9, nr_sects: len >> 9, GFP_KERNEL, |
| 251 | BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); |
| 252 | |
| 253 | fail: |
| 254 | filemap_invalidate_unlock(mapping: bdev->bd_mapping); |
| 255 | inode_unlock(inode: bdev->bd_mapping->host); |
| 256 | return err; |
| 257 | } |
| 258 | |
| 259 | static int put_ushort(unsigned short __user *argp, unsigned short val) |
| 260 | { |
| 261 | return put_user(val, argp); |
| 262 | } |
| 263 | |
| 264 | static int put_int(int __user *argp, int val) |
| 265 | { |
| 266 | return put_user(val, argp); |
| 267 | } |
| 268 | |
| 269 | static int put_uint(unsigned int __user *argp, unsigned int val) |
| 270 | { |
| 271 | return put_user(val, argp); |
| 272 | } |
| 273 | |
| 274 | static int put_long(long __user *argp, long val) |
| 275 | { |
| 276 | return put_user(val, argp); |
| 277 | } |
| 278 | |
| 279 | static int put_ulong(unsigned long __user *argp, unsigned long val) |
| 280 | { |
| 281 | return put_user(val, argp); |
| 282 | } |
| 283 | |
| 284 | static int put_u64(u64 __user *argp, u64 val) |
| 285 | { |
| 286 | return put_user(val, argp); |
| 287 | } |
| 288 | |
| 289 | #ifdef CONFIG_COMPAT |
| 290 | static int compat_put_long(compat_long_t __user *argp, long val) |
| 291 | { |
| 292 | return put_user(val, argp); |
| 293 | } |
| 294 | |
| 295 | static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) |
| 296 | { |
| 297 | return put_user(val, argp); |
| 298 | } |
| 299 | #endif |
| 300 | |
| 301 | #ifdef CONFIG_COMPAT |
| 302 | /* |
| 303 | * This is the equivalent of compat_ptr_ioctl(), to be used by block |
| 304 | * drivers that implement only commands that are completely compatible |
| 305 | * between 32-bit and 64-bit user space |
| 306 | */ |
| 307 | int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, |
| 308 | unsigned cmd, unsigned long arg) |
| 309 | { |
| 310 | struct gendisk *disk = bdev->bd_disk; |
| 311 | |
| 312 | if (disk->fops->ioctl) |
| 313 | return disk->fops->ioctl(bdev, mode, cmd, |
| 314 | (unsigned long)compat_ptr(uptr: arg)); |
| 315 | |
| 316 | return -ENOIOCTLCMD; |
| 317 | } |
| 318 | EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); |
| 319 | #endif |
| 320 | |
| 321 | static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) |
| 322 | { |
| 323 | /* no sense to make reservations for partitions */ |
| 324 | if (bdev_is_partition(bdev)) |
| 325 | return false; |
| 326 | |
| 327 | if (capable(CAP_SYS_ADMIN)) |
| 328 | return true; |
| 329 | /* |
| 330 | * Only allow unprivileged reservations if the file descriptor is open |
| 331 | * for writing. |
| 332 | */ |
| 333 | return mode & BLK_OPEN_WRITE; |
| 334 | } |
| 335 | |
| 336 | static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, |
| 337 | struct pr_registration __user *arg) |
| 338 | { |
| 339 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 340 | struct pr_registration reg; |
| 341 | |
| 342 | if (!blkdev_pr_allowed(bdev, mode)) |
| 343 | return -EPERM; |
| 344 | if (!ops || !ops->pr_register) |
| 345 | return -EOPNOTSUPP; |
| 346 | if (copy_from_user(to: ®, from: arg, n: sizeof(reg))) |
| 347 | return -EFAULT; |
| 348 | |
| 349 | if (reg.flags & ~PR_FL_IGNORE_KEY) |
| 350 | return -EOPNOTSUPP; |
| 351 | return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); |
| 352 | } |
| 353 | |
| 354 | static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, |
| 355 | struct pr_reservation __user *arg) |
| 356 | { |
| 357 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 358 | struct pr_reservation rsv; |
| 359 | |
| 360 | if (!blkdev_pr_allowed(bdev, mode)) |
| 361 | return -EPERM; |
| 362 | if (!ops || !ops->pr_reserve) |
| 363 | return -EOPNOTSUPP; |
| 364 | if (copy_from_user(to: &rsv, from: arg, n: sizeof(rsv))) |
| 365 | return -EFAULT; |
| 366 | |
| 367 | if (rsv.flags & ~PR_FL_IGNORE_KEY) |
| 368 | return -EOPNOTSUPP; |
| 369 | return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); |
| 370 | } |
| 371 | |
| 372 | static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, |
| 373 | struct pr_reservation __user *arg) |
| 374 | { |
| 375 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 376 | struct pr_reservation rsv; |
| 377 | |
| 378 | if (!blkdev_pr_allowed(bdev, mode)) |
| 379 | return -EPERM; |
| 380 | if (!ops || !ops->pr_release) |
| 381 | return -EOPNOTSUPP; |
| 382 | if (copy_from_user(to: &rsv, from: arg, n: sizeof(rsv))) |
| 383 | return -EFAULT; |
| 384 | |
| 385 | if (rsv.flags) |
| 386 | return -EOPNOTSUPP; |
| 387 | return ops->pr_release(bdev, rsv.key, rsv.type); |
| 388 | } |
| 389 | |
| 390 | static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, |
| 391 | struct pr_preempt __user *arg, bool abort) |
| 392 | { |
| 393 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 394 | struct pr_preempt p; |
| 395 | |
| 396 | if (!blkdev_pr_allowed(bdev, mode)) |
| 397 | return -EPERM; |
| 398 | if (!ops || !ops->pr_preempt) |
| 399 | return -EOPNOTSUPP; |
| 400 | if (copy_from_user(to: &p, from: arg, n: sizeof(p))) |
| 401 | return -EFAULT; |
| 402 | |
| 403 | if (p.flags) |
| 404 | return -EOPNOTSUPP; |
| 405 | return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); |
| 406 | } |
| 407 | |
| 408 | static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, |
| 409 | struct pr_clear __user *arg) |
| 410 | { |
| 411 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 412 | struct pr_clear c; |
| 413 | |
| 414 | if (!blkdev_pr_allowed(bdev, mode)) |
| 415 | return -EPERM; |
| 416 | if (!ops || !ops->pr_clear) |
| 417 | return -EOPNOTSUPP; |
| 418 | if (copy_from_user(to: &c, from: arg, n: sizeof(c))) |
| 419 | return -EFAULT; |
| 420 | |
| 421 | if (c.flags) |
| 422 | return -EOPNOTSUPP; |
| 423 | return ops->pr_clear(bdev, c.key); |
| 424 | } |
| 425 | |
| 426 | static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, |
| 427 | struct pr_read_keys __user *arg) |
| 428 | { |
| 429 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 430 | struct pr_keys *keys_info; |
| 431 | struct pr_read_keys read_keys; |
| 432 | u64 __user *keys_ptr; |
| 433 | size_t keys_info_len; |
| 434 | size_t keys_copy_len; |
| 435 | int ret; |
| 436 | |
| 437 | if (!blkdev_pr_allowed(bdev, mode)) |
| 438 | return -EPERM; |
| 439 | if (!ops || !ops->pr_read_keys) |
| 440 | return -EOPNOTSUPP; |
| 441 | |
| 442 | if (copy_from_user(to: &read_keys, from: arg, n: sizeof(read_keys))) |
| 443 | return -EFAULT; |
| 444 | |
| 445 | if (read_keys.num_keys > PR_KEYS_MAX) |
| 446 | return -EINVAL; |
| 447 | |
| 448 | keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); |
| 449 | |
| 450 | keys_info = kvzalloc(keys_info_len, GFP_KERNEL); |
| 451 | if (!keys_info) |
| 452 | return -ENOMEM; |
| 453 | |
| 454 | keys_info->num_keys = read_keys.num_keys; |
| 455 | |
| 456 | ret = ops->pr_read_keys(bdev, keys_info); |
| 457 | if (ret) |
| 458 | goto out; |
| 459 | |
| 460 | /* Copy out individual keys */ |
| 461 | keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); |
| 462 | keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * |
| 463 | sizeof(keys_info->keys[0]); |
| 464 | |
| 465 | if (copy_to_user(to: keys_ptr, from: keys_info->keys, n: keys_copy_len)) { |
| 466 | ret = -EFAULT; |
| 467 | goto out; |
| 468 | } |
| 469 | |
| 470 | /* Copy out the arg struct */ |
| 471 | read_keys.generation = keys_info->generation; |
| 472 | read_keys.num_keys = keys_info->num_keys; |
| 473 | |
| 474 | if (copy_to_user(to: arg, from: &read_keys, n: sizeof(read_keys))) |
| 475 | ret = -EFAULT; |
| 476 | out: |
| 477 | kvfree(addr: keys_info); |
| 478 | return ret; |
| 479 | } |
| 480 | |
| 481 | static int blkdev_pr_read_reservation(struct block_device *bdev, |
| 482 | blk_mode_t mode, struct pr_read_reservation __user *arg) |
| 483 | { |
| 484 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| 485 | struct pr_held_reservation rsv = {}; |
| 486 | struct pr_read_reservation out = {}; |
| 487 | int ret; |
| 488 | |
| 489 | if (!blkdev_pr_allowed(bdev, mode)) |
| 490 | return -EPERM; |
| 491 | if (!ops || !ops->pr_read_reservation) |
| 492 | return -EOPNOTSUPP; |
| 493 | |
| 494 | ret = ops->pr_read_reservation(bdev, &rsv); |
| 495 | if (ret) |
| 496 | return ret; |
| 497 | |
| 498 | out.key = rsv.key; |
| 499 | out.generation = rsv.generation; |
| 500 | out.type = rsv.type; |
| 501 | |
| 502 | if (copy_to_user(to: arg, from: &out, n: sizeof(out))) |
| 503 | return -EFAULT; |
| 504 | return 0; |
| 505 | } |
| 506 | |
| 507 | static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, |
| 508 | unsigned long arg) |
| 509 | { |
| 510 | if (!capable(CAP_SYS_ADMIN)) |
| 511 | return -EACCES; |
| 512 | |
| 513 | mutex_lock(&bdev->bd_holder_lock); |
| 514 | if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) |
| 515 | bdev->bd_holder_ops->sync(bdev); |
| 516 | else { |
| 517 | mutex_unlock(lock: &bdev->bd_holder_lock); |
| 518 | sync_blockdev(bdev); |
| 519 | } |
| 520 | |
| 521 | invalidate_bdev(bdev); |
| 522 | return 0; |
| 523 | } |
| 524 | |
| 525 | static int blkdev_roset(struct block_device *bdev, unsigned cmd, |
| 526 | unsigned long arg) |
| 527 | { |
| 528 | int ret, n; |
| 529 | |
| 530 | if (!capable(CAP_SYS_ADMIN)) |
| 531 | return -EACCES; |
| 532 | |
| 533 | if (get_user(n, (int __user *)arg)) |
| 534 | return -EFAULT; |
| 535 | if (bdev->bd_disk->fops->set_read_only) { |
| 536 | ret = bdev->bd_disk->fops->set_read_only(bdev, n); |
| 537 | if (ret) |
| 538 | return ret; |
| 539 | } |
| 540 | if (n) |
| 541 | bdev_set_flag(bdev, BD_READ_ONLY); |
| 542 | else |
| 543 | bdev_clear_flag(bdev, BD_READ_ONLY); |
| 544 | return 0; |
| 545 | } |
| 546 | |
| 547 | static int blkdev_getgeo(struct block_device *bdev, |
| 548 | struct hd_geometry __user *argp) |
| 549 | { |
| 550 | struct gendisk *disk = bdev->bd_disk; |
| 551 | struct hd_geometry geo; |
| 552 | int ret; |
| 553 | |
| 554 | if (!argp) |
| 555 | return -EINVAL; |
| 556 | if (!disk->fops->getgeo) |
| 557 | return -ENOTTY; |
| 558 | |
| 559 | /* |
| 560 | * We need to set the startsect first, the driver may |
| 561 | * want to override it. |
| 562 | */ |
| 563 | memset(&geo, 0, sizeof(geo)); |
| 564 | geo.start = get_start_sect(bdev); |
| 565 | ret = disk->fops->getgeo(disk, &geo); |
| 566 | if (ret) |
| 567 | return ret; |
| 568 | if (copy_to_user(to: argp, from: &geo, n: sizeof(geo))) |
| 569 | return -EFAULT; |
| 570 | return 0; |
| 571 | } |
| 572 | |
| 573 | #ifdef CONFIG_COMPAT |
| 574 | struct compat_hd_geometry { |
| 575 | unsigned char heads; |
| 576 | unsigned char sectors; |
| 577 | unsigned short cylinders; |
| 578 | u32 start; |
| 579 | }; |
| 580 | |
| 581 | static int compat_hdio_getgeo(struct block_device *bdev, |
| 582 | struct compat_hd_geometry __user *ugeo) |
| 583 | { |
| 584 | struct gendisk *disk = bdev->bd_disk; |
| 585 | struct hd_geometry geo; |
| 586 | int ret; |
| 587 | |
| 588 | if (!ugeo) |
| 589 | return -EINVAL; |
| 590 | if (!disk->fops->getgeo) |
| 591 | return -ENOTTY; |
| 592 | |
| 593 | memset(&geo, 0, sizeof(geo)); |
| 594 | /* |
| 595 | * We need to set the startsect first, the driver may |
| 596 | * want to override it. |
| 597 | */ |
| 598 | geo.start = get_start_sect(bdev); |
| 599 | ret = disk->fops->getgeo(disk, &geo); |
| 600 | if (ret) |
| 601 | return ret; |
| 602 | |
| 603 | ret = copy_to_user(to: ugeo, from: &geo, n: 4); |
| 604 | ret |= put_user(geo.start, &ugeo->start); |
| 605 | if (ret) |
| 606 | ret = -EFAULT; |
| 607 | |
| 608 | return ret; |
| 609 | } |
| 610 | #endif |
| 611 | |
| 612 | /* set the logical block size */ |
| 613 | static int blkdev_bszset(struct file *file, blk_mode_t mode, |
| 614 | int __user *argp) |
| 615 | { |
| 616 | // this one might be file_inode(file)->i_rdev - a rare valid |
| 617 | // use of file_inode() for those. |
| 618 | dev_t dev = I_BDEV(inode: file->f_mapping->host)->bd_dev; |
| 619 | struct file *excl_file; |
| 620 | int ret, n; |
| 621 | |
| 622 | if (!capable(CAP_SYS_ADMIN)) |
| 623 | return -EACCES; |
| 624 | if (!argp) |
| 625 | return -EINVAL; |
| 626 | if (get_user(n, argp)) |
| 627 | return -EFAULT; |
| 628 | |
| 629 | if (mode & BLK_OPEN_EXCL) |
| 630 | return set_blocksize(file, size: n); |
| 631 | |
| 632 | excl_file = bdev_file_open_by_dev(dev, mode, holder: &dev, NULL); |
| 633 | if (IS_ERR(ptr: excl_file)) |
| 634 | return -EBUSY; |
| 635 | ret = set_blocksize(file: excl_file, size: n); |
| 636 | fput(excl_file); |
| 637 | return ret; |
| 638 | } |
| 639 | |
| 640 | /* |
| 641 | * Common commands that are handled the same way on native and compat |
| 642 | * user space. Note the separate arg/argp parameters that are needed |
| 643 | * to deal with the compat_ptr() conversion. |
| 644 | */ |
| 645 | static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, |
| 646 | unsigned int cmd, unsigned long arg, |
| 647 | void __user *argp) |
| 648 | { |
| 649 | unsigned int max_sectors; |
| 650 | |
| 651 | switch (cmd) { |
| 652 | case BLKFLSBUF: |
| 653 | return blkdev_flushbuf(bdev, cmd, arg); |
| 654 | case BLKROSET: |
| 655 | return blkdev_roset(bdev, cmd, arg); |
| 656 | case BLKDISCARD: |
| 657 | return blk_ioctl_discard(bdev, mode, arg); |
| 658 | case BLKSECDISCARD: |
| 659 | return blk_ioctl_secure_erase(bdev, mode, argp); |
| 660 | case BLKZEROOUT: |
| 661 | return blk_ioctl_zeroout(bdev, mode, arg); |
| 662 | case BLKGETDISKSEQ: |
| 663 | return put_u64(argp, val: bdev->bd_disk->diskseq); |
| 664 | case BLKREPORTZONE: |
| 665 | case BLKREPORTZONEV2: |
| 666 | return blkdev_report_zones_ioctl(bdev, cmd, arg); |
| 667 | case BLKRESETZONE: |
| 668 | case BLKOPENZONE: |
| 669 | case BLKCLOSEZONE: |
| 670 | case BLKFINISHZONE: |
| 671 | return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); |
| 672 | case BLKGETZONESZ: |
| 673 | return put_uint(argp, val: bdev_zone_sectors(bdev)); |
| 674 | case BLKGETNRZONES: |
| 675 | return put_uint(argp, val: bdev_nr_zones(bdev)); |
| 676 | case BLKROGET: |
| 677 | return put_int(argp, val: bdev_read_only(bdev) != 0); |
| 678 | case BLKSSZGET: /* get block device logical block size */ |
| 679 | return put_int(argp, val: bdev_logical_block_size(bdev)); |
| 680 | case BLKPBSZGET: /* get block device physical block size */ |
| 681 | return put_uint(argp, val: bdev_physical_block_size(bdev)); |
| 682 | case BLKIOMIN: |
| 683 | return put_uint(argp, val: bdev_io_min(bdev)); |
| 684 | case BLKIOOPT: |
| 685 | return put_uint(argp, val: bdev_io_opt(bdev)); |
| 686 | case BLKALIGNOFF: |
| 687 | return put_int(argp, val: bdev_alignment_offset(bdev)); |
| 688 | case BLKDISCARDZEROES: |
| 689 | return put_uint(argp, val: 0); |
| 690 | case BLKSECTGET: |
| 691 | max_sectors = min_t(unsigned int, USHRT_MAX, |
| 692 | queue_max_sectors(bdev_get_queue(bdev))); |
| 693 | return put_ushort(argp, val: max_sectors); |
| 694 | case BLKROTATIONAL: |
| 695 | return put_ushort(argp, val: !bdev_nonrot(bdev)); |
| 696 | case BLKRASET: |
| 697 | case BLKFRASET: |
| 698 | if(!capable(CAP_SYS_ADMIN)) |
| 699 | return -EACCES; |
| 700 | bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; |
| 701 | return 0; |
| 702 | case BLKRRPART: |
| 703 | if (!capable(CAP_SYS_ADMIN)) |
| 704 | return -EACCES; |
| 705 | if (bdev_is_partition(bdev)) |
| 706 | return -EINVAL; |
| 707 | return disk_scan_partitions(disk: bdev->bd_disk, |
| 708 | mode: mode | BLK_OPEN_STRICT_SCAN); |
| 709 | case BLKTRACESTART: |
| 710 | case BLKTRACESTOP: |
| 711 | case BLKTRACETEARDOWN: |
| 712 | return blk_trace_ioctl(bdev, cmd, argp); |
| 713 | case BLKCRYPTOIMPORTKEY: |
| 714 | case BLKCRYPTOGENERATEKEY: |
| 715 | case BLKCRYPTOPREPAREKEY: |
| 716 | return blk_crypto_ioctl(bdev, cmd, argp); |
| 717 | case IOC_PR_REGISTER: |
| 718 | return blkdev_pr_register(bdev, mode, arg: argp); |
| 719 | case IOC_PR_RESERVE: |
| 720 | return blkdev_pr_reserve(bdev, mode, arg: argp); |
| 721 | case IOC_PR_RELEASE: |
| 722 | return blkdev_pr_release(bdev, mode, arg: argp); |
| 723 | case IOC_PR_PREEMPT: |
| 724 | return blkdev_pr_preempt(bdev, mode, arg: argp, abort: false); |
| 725 | case IOC_PR_PREEMPT_ABORT: |
| 726 | return blkdev_pr_preempt(bdev, mode, arg: argp, abort: true); |
| 727 | case IOC_PR_CLEAR: |
| 728 | return blkdev_pr_clear(bdev, mode, arg: argp); |
| 729 | case IOC_PR_READ_KEYS: |
| 730 | return blkdev_pr_read_keys(bdev, mode, arg: argp); |
| 731 | case IOC_PR_READ_RESERVATION: |
| 732 | return blkdev_pr_read_reservation(bdev, mode, arg: argp); |
| 733 | default: |
| 734 | return blk_get_meta_cap(bdev, cmd, argp); |
| 735 | } |
| 736 | } |
| 737 | |
| 738 | /* |
| 739 | * Always keep this in sync with compat_blkdev_ioctl() |
| 740 | * to handle all incompatible commands in both functions. |
| 741 | * |
| 742 | * New commands must be compatible and go into blkdev_common_ioctl |
| 743 | */ |
| 744 | long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| 745 | { |
| 746 | struct block_device *bdev = I_BDEV(inode: file->f_mapping->host); |
| 747 | void __user *argp = (void __user *)arg; |
| 748 | blk_mode_t mode = file_to_blk_mode(file); |
| 749 | int ret; |
| 750 | |
| 751 | switch (cmd) { |
| 752 | /* These need separate implementations for the data structure */ |
| 753 | case HDIO_GETGEO: |
| 754 | return blkdev_getgeo(bdev, argp); |
| 755 | case BLKPG: |
| 756 | return blkpg_ioctl(bdev, arg: argp); |
| 757 | |
| 758 | /* Compat mode returns 32-bit data instead of 'long' */ |
| 759 | case BLKRAGET: |
| 760 | case BLKFRAGET: |
| 761 | if (!argp) |
| 762 | return -EINVAL; |
| 763 | return put_long(argp, |
| 764 | val: (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); |
| 765 | case BLKGETSIZE: |
| 766 | if (bdev_nr_sectors(bdev) > ~0UL) |
| 767 | return -EFBIG; |
| 768 | return put_ulong(argp, val: bdev_nr_sectors(bdev)); |
| 769 | |
| 770 | /* The data is compatible, but the command number is different */ |
| 771 | case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ |
| 772 | return put_int(argp, val: block_size(bdev)); |
| 773 | case BLKBSZSET: |
| 774 | return blkdev_bszset(file, mode, argp); |
| 775 | case BLKGETSIZE64: |
| 776 | return put_u64(argp, val: bdev_nr_bytes(bdev)); |
| 777 | |
| 778 | /* Incompatible alignment on i386 */ |
| 779 | case BLKTRACESETUP: |
| 780 | case BLKTRACESETUP2: |
| 781 | return blk_trace_ioctl(bdev, cmd, argp); |
| 782 | default: |
| 783 | break; |
| 784 | } |
| 785 | |
| 786 | ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); |
| 787 | if (ret != -ENOIOCTLCMD) |
| 788 | return ret; |
| 789 | |
| 790 | if (!bdev->bd_disk->fops->ioctl) |
| 791 | return -ENOTTY; |
| 792 | return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); |
| 793 | } |
| 794 | |
| 795 | #ifdef CONFIG_COMPAT |
| 796 | |
| 797 | #define BLKBSZGET_32 _IOR(0x12, 112, int) |
| 798 | #define BLKBSZSET_32 _IOW(0x12, 113, int) |
| 799 | #define BLKGETSIZE64_32 _IOR(0x12, 114, int) |
| 800 | |
| 801 | /* Most of the generic ioctls are handled in the normal fallback path. |
| 802 | This assumes the blkdev's low level compat_ioctl always returns |
| 803 | ENOIOCTLCMD for unknown ioctls. */ |
| 804 | long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| 805 | { |
| 806 | int ret; |
| 807 | void __user *argp = compat_ptr(uptr: arg); |
| 808 | struct block_device *bdev = I_BDEV(inode: file->f_mapping->host); |
| 809 | struct gendisk *disk = bdev->bd_disk; |
| 810 | blk_mode_t mode = file_to_blk_mode(file); |
| 811 | |
| 812 | switch (cmd) { |
| 813 | /* These need separate implementations for the data structure */ |
| 814 | case HDIO_GETGEO: |
| 815 | return compat_hdio_getgeo(bdev, ugeo: argp); |
| 816 | case BLKPG: |
| 817 | return compat_blkpg_ioctl(bdev, arg: argp); |
| 818 | |
| 819 | /* Compat mode returns 32-bit data instead of 'long' */ |
| 820 | case BLKRAGET: |
| 821 | case BLKFRAGET: |
| 822 | if (!argp) |
| 823 | return -EINVAL; |
| 824 | return compat_put_long(argp, |
| 825 | val: (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); |
| 826 | case BLKGETSIZE: |
| 827 | if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) |
| 828 | return -EFBIG; |
| 829 | return compat_put_ulong(argp, val: bdev_nr_sectors(bdev)); |
| 830 | |
| 831 | /* The data is compatible, but the command number is different */ |
| 832 | case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ |
| 833 | return put_int(argp, val: bdev_logical_block_size(bdev)); |
| 834 | case BLKBSZSET_32: |
| 835 | return blkdev_bszset(file, mode, argp); |
| 836 | case BLKGETSIZE64_32: |
| 837 | return put_u64(argp, val: bdev_nr_bytes(bdev)); |
| 838 | |
| 839 | /* Incompatible alignment on i386 */ |
| 840 | case BLKTRACESETUP32: |
| 841 | return blk_trace_ioctl(bdev, cmd, argp); |
| 842 | default: |
| 843 | break; |
| 844 | } |
| 845 | |
| 846 | ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); |
| 847 | if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) |
| 848 | ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); |
| 849 | |
| 850 | return ret; |
| 851 | } |
| 852 | #endif |
| 853 | |
| 854 | struct blk_iou_cmd { |
| 855 | int res; |
| 856 | bool nowait; |
| 857 | }; |
| 858 | |
| 859 | static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw) |
| 860 | { |
| 861 | struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); |
| 862 | struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 863 | |
| 864 | if (bic->res == -EAGAIN && bic->nowait) |
| 865 | io_uring_cmd_issue_blocking(ioucmd: cmd); |
| 866 | else |
| 867 | io_uring_cmd_done(ioucmd: cmd, ret: bic->res, |
| 868 | IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); |
| 869 | } |
| 870 | |
| 871 | static void bio_cmd_bio_end_io(struct bio *bio) |
| 872 | { |
| 873 | struct io_uring_cmd *cmd = bio->bi_private; |
| 874 | struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 875 | |
| 876 | if (unlikely(bio->bi_status) && !bic->res) |
| 877 | bic->res = blk_status_to_errno(status: bio->bi_status); |
| 878 | |
| 879 | io_uring_cmd_do_in_task_lazy(ioucmd: cmd, task_work_cb: blk_cmd_complete); |
| 880 | bio_put(bio); |
| 881 | } |
| 882 | |
| 883 | static int blkdev_cmd_discard(struct io_uring_cmd *cmd, |
| 884 | struct block_device *bdev, |
| 885 | uint64_t start, uint64_t len, bool nowait) |
| 886 | { |
| 887 | struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 888 | gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; |
| 889 | sector_t sector = start >> SECTOR_SHIFT; |
| 890 | sector_t nr_sects = len >> SECTOR_SHIFT; |
| 891 | struct bio *prev = NULL, *bio; |
| 892 | int err; |
| 893 | |
| 894 | if (!bdev_max_discard_sectors(bdev)) |
| 895 | return -EOPNOTSUPP; |
| 896 | if (!(file_to_blk_mode(file: cmd->file) & BLK_OPEN_WRITE)) |
| 897 | return -EBADF; |
| 898 | if (bdev_read_only(bdev)) |
| 899 | return -EPERM; |
| 900 | err = blk_validate_byte_range(bdev, start, len); |
| 901 | if (err) |
| 902 | return err; |
| 903 | |
| 904 | err = filemap_invalidate_pages(mapping: bdev->bd_mapping, pos: start, |
| 905 | end: start + len - 1, nowait); |
| 906 | if (err) |
| 907 | return err; |
| 908 | |
| 909 | while (true) { |
| 910 | bio = blk_alloc_discard_bio(bdev, sector: §or, nr_sects: &nr_sects, gfp_mask: gfp); |
| 911 | if (!bio) |
| 912 | break; |
| 913 | if (nowait) { |
| 914 | /* |
| 915 | * Don't allow multi-bio non-blocking submissions as |
| 916 | * subsequent bios may fail but we won't get a direct |
| 917 | * indication of that. Normally, the caller should |
| 918 | * retry from a blocking context. |
| 919 | */ |
| 920 | if (unlikely(nr_sects)) { |
| 921 | bio_put(bio); |
| 922 | return -EAGAIN; |
| 923 | } |
| 924 | bio->bi_opf |= REQ_NOWAIT; |
| 925 | } |
| 926 | |
| 927 | prev = bio_chain_and_submit(prev, new: bio); |
| 928 | } |
| 929 | if (unlikely(!prev)) |
| 930 | return -EAGAIN; |
| 931 | if (unlikely(nr_sects)) |
| 932 | bic->res = -EAGAIN; |
| 933 | |
| 934 | prev->bi_private = cmd; |
| 935 | prev->bi_end_io = bio_cmd_bio_end_io; |
| 936 | submit_bio(bio: prev); |
| 937 | return -EIOCBQUEUED; |
| 938 | } |
| 939 | |
| 940 | int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 941 | { |
| 942 | struct block_device *bdev = I_BDEV(inode: cmd->file->f_mapping->host); |
| 943 | struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 944 | const struct io_uring_sqe *sqe = cmd->sqe; |
| 945 | u32 cmd_op = cmd->cmd_op; |
| 946 | uint64_t start, len; |
| 947 | |
| 948 | if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || |
| 949 | sqe->rw_flags || sqe->file_index)) |
| 950 | return -EINVAL; |
| 951 | |
| 952 | bic->res = 0; |
| 953 | bic->nowait = issue_flags & IO_URING_F_NONBLOCK; |
| 954 | |
| 955 | start = READ_ONCE(sqe->addr); |
| 956 | len = READ_ONCE(sqe->addr3); |
| 957 | |
| 958 | switch (cmd_op) { |
| 959 | case BLOCK_URING_CMD_DISCARD: |
| 960 | return blkdev_cmd_discard(cmd, bdev, start, len, nowait: bic->nowait); |
| 961 | } |
| 962 | return -EINVAL; |
| 963 | } |
| 964 | |