| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Functions related to generic helpers functions |
| 4 | */ |
| 5 | #include <linux/kernel.h> |
| 6 | #include <linux/module.h> |
| 7 | #include <linux/bio.h> |
| 8 | #include <linux/blkdev.h> |
| 9 | #include <linux/scatterlist.h> |
| 10 | |
| 11 | #include "blk.h" |
| 12 | |
| 13 | static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) |
| 14 | { |
| 15 | unsigned int discard_granularity = bdev_discard_granularity(bdev); |
| 16 | sector_t granularity_aligned_sector; |
| 17 | |
| 18 | if (bdev_is_partition(bdev)) |
| 19 | sector += bdev->bd_start_sect; |
| 20 | |
| 21 | granularity_aligned_sector = |
| 22 | round_up(sector, discard_granularity >> SECTOR_SHIFT); |
| 23 | |
| 24 | /* |
| 25 | * Make sure subsequent bios start aligned to the discard granularity if |
| 26 | * it needs to be split. |
| 27 | */ |
| 28 | if (granularity_aligned_sector != sector) |
| 29 | return granularity_aligned_sector - sector; |
| 30 | |
| 31 | /* |
| 32 | * Align the bio size to the discard granularity to make splitting the bio |
| 33 | * at discard granularity boundaries easier in the driver if needed. |
| 34 | */ |
| 35 | return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; |
| 36 | } |
| 37 | |
| 38 | struct bio *blk_alloc_discard_bio(struct block_device *bdev, |
| 39 | sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask) |
| 40 | { |
| 41 | sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector)); |
| 42 | struct bio *bio; |
| 43 | |
| 44 | if (!bio_sects) |
| 45 | return NULL; |
| 46 | |
| 47 | bio = bio_alloc(bdev, nr_vecs: 0, opf: REQ_OP_DISCARD, gfp_mask); |
| 48 | if (!bio) |
| 49 | return NULL; |
| 50 | bio->bi_iter.bi_sector = *sector; |
| 51 | bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT; |
| 52 | *sector += bio_sects; |
| 53 | *nr_sects -= bio_sects; |
| 54 | /* |
| 55 | * We can loop for a long time in here if someone does full device |
| 56 | * discards (like mkfs). Be nice and allow us to schedule out to avoid |
| 57 | * softlocking if preempt is disabled. |
| 58 | */ |
| 59 | cond_resched(); |
| 60 | return bio; |
| 61 | } |
| 62 | |
| 63 | int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
| 64 | sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) |
| 65 | { |
| 66 | struct bio *bio; |
| 67 | |
| 68 | while ((bio = blk_alloc_discard_bio(bdev, sector: §or, nr_sects: &nr_sects, |
| 69 | gfp_mask))) |
| 70 | *biop = bio_chain_and_submit(prev: *biop, new: bio); |
| 71 | return 0; |
| 72 | } |
| 73 | EXPORT_SYMBOL(__blkdev_issue_discard); |
| 74 | |
| 75 | /** |
| 76 | * blkdev_issue_discard - queue a discard |
| 77 | * @bdev: blockdev to issue discard for |
| 78 | * @sector: start sector |
| 79 | * @nr_sects: number of sectors to discard |
| 80 | * @gfp_mask: memory allocation flags (for bio_alloc) |
| 81 | * |
| 82 | * Description: |
| 83 | * Issue a discard request for the sectors in question. |
| 84 | */ |
| 85 | int blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
| 86 | sector_t nr_sects, gfp_t gfp_mask) |
| 87 | { |
| 88 | struct bio *bio = NULL; |
| 89 | struct blk_plug plug; |
| 90 | int ret = 0; |
| 91 | |
| 92 | blk_start_plug(&plug); |
| 93 | __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); |
| 94 | if (bio) { |
| 95 | ret = submit_bio_wait(bio); |
| 96 | if (ret == -EOPNOTSUPP) |
| 97 | ret = 0; |
| 98 | bio_put(bio); |
| 99 | } |
| 100 | blk_finish_plug(&plug); |
| 101 | |
| 102 | return ret; |
| 103 | } |
| 104 | EXPORT_SYMBOL(blkdev_issue_discard); |
| 105 | |
| 106 | static sector_t bio_write_zeroes_limit(struct block_device *bdev) |
| 107 | { |
| 108 | sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; |
| 109 | |
| 110 | return min(bdev_write_zeroes_sectors(bdev), |
| 111 | (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); |
| 112 | } |
| 113 | |
| 114 | /* |
| 115 | * There is no reliable way for the SCSI subsystem to determine whether a |
| 116 | * device supports a WRITE SAME operation without actually performing a write |
| 117 | * to media. As a result, write_zeroes is enabled by default and will be |
| 118 | * disabled if a zeroing operation subsequently fails. This means that this |
| 119 | * queue limit is likely to change at runtime. |
| 120 | */ |
| 121 | static void __blkdev_issue_write_zeroes(struct block_device *bdev, |
| 122 | sector_t sector, sector_t nr_sects, gfp_t gfp_mask, |
| 123 | struct bio **biop, unsigned flags, sector_t limit) |
| 124 | { |
| 125 | |
| 126 | while (nr_sects) { |
| 127 | unsigned int len = min(nr_sects, limit); |
| 128 | struct bio *bio; |
| 129 | |
| 130 | if ((flags & BLKDEV_ZERO_KILLABLE) && |
| 131 | fatal_signal_pending(current)) |
| 132 | break; |
| 133 | |
| 134 | bio = bio_alloc(bdev, nr_vecs: 0, opf: REQ_OP_WRITE_ZEROES, gfp_mask); |
| 135 | bio->bi_iter.bi_sector = sector; |
| 136 | if (flags & BLKDEV_ZERO_NOUNMAP) |
| 137 | bio->bi_opf |= REQ_NOUNMAP; |
| 138 | |
| 139 | bio->bi_iter.bi_size = len << SECTOR_SHIFT; |
| 140 | *biop = bio_chain_and_submit(prev: *biop, new: bio); |
| 141 | |
| 142 | nr_sects -= len; |
| 143 | sector += len; |
| 144 | cond_resched(); |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, |
| 149 | sector_t nr_sects, gfp_t gfp, unsigned flags) |
| 150 | { |
| 151 | sector_t limit = bio_write_zeroes_limit(bdev); |
| 152 | struct bio *bio = NULL; |
| 153 | struct blk_plug plug; |
| 154 | int ret = 0; |
| 155 | |
| 156 | blk_start_plug(&plug); |
| 157 | __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask: gfp, biop: &bio, |
| 158 | flags, limit); |
| 159 | if (bio) { |
| 160 | if ((flags & BLKDEV_ZERO_KILLABLE) && |
| 161 | fatal_signal_pending(current)) { |
| 162 | bio_await_chain(bio); |
| 163 | blk_finish_plug(&plug); |
| 164 | return -EINTR; |
| 165 | } |
| 166 | ret = submit_bio_wait(bio); |
| 167 | bio_put(bio); |
| 168 | } |
| 169 | blk_finish_plug(&plug); |
| 170 | |
| 171 | /* |
| 172 | * For some devices there is no non-destructive way to verify whether |
| 173 | * WRITE ZEROES is actually supported. These will clear the capability |
| 174 | * on an I/O error, in which case we'll turn any error into |
| 175 | * "not supported" here. |
| 176 | */ |
| 177 | if (ret && !bdev_write_zeroes_sectors(bdev)) |
| 178 | return -EOPNOTSUPP; |
| 179 | return ret; |
| 180 | } |
| 181 | |
| 182 | /* |
| 183 | * Convert a number of 512B sectors to a number of pages. |
| 184 | * The result is limited to a number of pages that can fit into a BIO. |
| 185 | * Also make sure that the result is always at least 1 (page) for the cases |
| 186 | * where nr_sects is lower than the number of sectors in a page. |
| 187 | */ |
| 188 | static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) |
| 189 | { |
| 190 | sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); |
| 191 | |
| 192 | return min(pages, (sector_t)BIO_MAX_VECS); |
| 193 | } |
| 194 | |
| 195 | static void __blkdev_issue_zero_pages(struct block_device *bdev, |
| 196 | sector_t sector, sector_t nr_sects, gfp_t gfp_mask, |
| 197 | struct bio **biop, unsigned int flags) |
| 198 | { |
| 199 | struct folio *zero_folio = largest_zero_folio(); |
| 200 | |
| 201 | while (nr_sects) { |
| 202 | unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); |
| 203 | struct bio *bio; |
| 204 | |
| 205 | if ((flags & BLKDEV_ZERO_KILLABLE) && |
| 206 | fatal_signal_pending(current)) |
| 207 | break; |
| 208 | |
| 209 | bio = bio_alloc(bdev, nr_vecs, opf: REQ_OP_WRITE, gfp_mask); |
| 210 | bio->bi_iter.bi_sector = sector; |
| 211 | |
| 212 | do { |
| 213 | unsigned int len; |
| 214 | |
| 215 | len = min_t(sector_t, folio_size(zero_folio), |
| 216 | nr_sects << SECTOR_SHIFT); |
| 217 | if (!bio_add_folio(bio, folio: zero_folio, len, off: 0)) |
| 218 | break; |
| 219 | nr_sects -= len >> SECTOR_SHIFT; |
| 220 | sector += len >> SECTOR_SHIFT; |
| 221 | } while (nr_sects); |
| 222 | |
| 223 | *biop = bio_chain_and_submit(prev: *biop, new: bio); |
| 224 | cond_resched(); |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, |
| 229 | sector_t nr_sects, gfp_t gfp, unsigned flags) |
| 230 | { |
| 231 | struct bio *bio = NULL; |
| 232 | struct blk_plug plug; |
| 233 | int ret = 0; |
| 234 | |
| 235 | if (flags & BLKDEV_ZERO_NOFALLBACK) |
| 236 | return -EOPNOTSUPP; |
| 237 | |
| 238 | blk_start_plug(&plug); |
| 239 | __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask: gfp, biop: &bio, flags); |
| 240 | if (bio) { |
| 241 | if ((flags & BLKDEV_ZERO_KILLABLE) && |
| 242 | fatal_signal_pending(current)) { |
| 243 | bio_await_chain(bio); |
| 244 | blk_finish_plug(&plug); |
| 245 | return -EINTR; |
| 246 | } |
| 247 | ret = submit_bio_wait(bio); |
| 248 | bio_put(bio); |
| 249 | } |
| 250 | blk_finish_plug(&plug); |
| 251 | |
| 252 | return ret; |
| 253 | } |
| 254 | |
| 255 | /** |
| 256 | * __blkdev_issue_zeroout - generate number of zero filed write bios |
| 257 | * @bdev: blockdev to issue |
| 258 | * @sector: start sector |
| 259 | * @nr_sects: number of sectors to write |
| 260 | * @gfp_mask: memory allocation flags (for bio_alloc) |
| 261 | * @biop: pointer to anchor bio |
| 262 | * @flags: controls detailed behavior |
| 263 | * |
| 264 | * Description: |
| 265 | * Zero-fill a block range, either using hardware offload or by explicitly |
| 266 | * writing zeroes to the device. |
| 267 | * |
| 268 | * If a device is using logical block provisioning, the underlying space will |
| 269 | * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. |
| 270 | * |
| 271 | * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return |
| 272 | * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided. |
| 273 | */ |
| 274 | int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
| 275 | sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, |
| 276 | unsigned flags) |
| 277 | { |
| 278 | sector_t limit = bio_write_zeroes_limit(bdev); |
| 279 | |
| 280 | if (bdev_read_only(bdev)) |
| 281 | return -EPERM; |
| 282 | |
| 283 | if (limit) { |
| 284 | __blkdev_issue_write_zeroes(bdev, sector, nr_sects, |
| 285 | gfp_mask, biop, flags, limit); |
| 286 | } else { |
| 287 | if (flags & BLKDEV_ZERO_NOFALLBACK) |
| 288 | return -EOPNOTSUPP; |
| 289 | __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, |
| 290 | biop, flags); |
| 291 | } |
| 292 | return 0; |
| 293 | } |
| 294 | EXPORT_SYMBOL(__blkdev_issue_zeroout); |
| 295 | |
| 296 | /** |
| 297 | * blkdev_issue_zeroout - zero-fill a block range |
| 298 | * @bdev: blockdev to write |
| 299 | * @sector: start sector |
| 300 | * @nr_sects: number of sectors to write |
| 301 | * @gfp_mask: memory allocation flags (for bio_alloc) |
| 302 | * @flags: controls detailed behavior |
| 303 | * |
| 304 | * Description: |
| 305 | * Zero-fill a block range, either using hardware offload or by explicitly |
| 306 | * writing zeroes to the device. See __blkdev_issue_zeroout() for the |
| 307 | * valid values for %flags. |
| 308 | */ |
| 309 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
| 310 | sector_t nr_sects, gfp_t gfp_mask, unsigned flags) |
| 311 | { |
| 312 | int ret; |
| 313 | |
| 314 | if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1)) |
| 315 | return -EINVAL; |
| 316 | if (bdev_read_only(bdev)) |
| 317 | return -EPERM; |
| 318 | |
| 319 | if (bdev_write_zeroes_sectors(bdev)) { |
| 320 | ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects, |
| 321 | gfp: gfp_mask, flags); |
| 322 | if (ret != -EOPNOTSUPP) |
| 323 | return ret; |
| 324 | } |
| 325 | |
| 326 | return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp: gfp_mask, flags); |
| 327 | } |
| 328 | EXPORT_SYMBOL(blkdev_issue_zeroout); |
| 329 | |
| 330 | int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, |
| 331 | sector_t nr_sects, gfp_t gfp) |
| 332 | { |
| 333 | sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; |
| 334 | unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); |
| 335 | struct bio *bio = NULL; |
| 336 | struct blk_plug plug; |
| 337 | int ret = 0; |
| 338 | |
| 339 | /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ |
| 340 | if (max_sectors > UINT_MAX >> SECTOR_SHIFT) |
| 341 | max_sectors = UINT_MAX >> SECTOR_SHIFT; |
| 342 | max_sectors &= ~bs_mask; |
| 343 | |
| 344 | if (max_sectors == 0) |
| 345 | return -EOPNOTSUPP; |
| 346 | if ((sector | nr_sects) & bs_mask) |
| 347 | return -EINVAL; |
| 348 | if (bdev_read_only(bdev)) |
| 349 | return -EPERM; |
| 350 | |
| 351 | blk_start_plug(&plug); |
| 352 | while (nr_sects) { |
| 353 | unsigned int len = min_t(sector_t, nr_sects, max_sectors); |
| 354 | |
| 355 | bio = blk_next_bio(bio, bdev, nr_pages: 0, opf: REQ_OP_SECURE_ERASE, gfp); |
| 356 | bio->bi_iter.bi_sector = sector; |
| 357 | bio->bi_iter.bi_size = len << SECTOR_SHIFT; |
| 358 | |
| 359 | sector += len; |
| 360 | nr_sects -= len; |
| 361 | cond_resched(); |
| 362 | } |
| 363 | if (bio) { |
| 364 | ret = submit_bio_wait(bio); |
| 365 | bio_put(bio); |
| 366 | } |
| 367 | blk_finish_plug(&plug); |
| 368 | |
| 369 | return ret; |
| 370 | } |
| 371 | EXPORT_SYMBOL(blkdev_issue_secure_erase); |
| 372 | |