1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2021 Western Digital Corporation or its affiliates. |
4 | */ |
5 | |
6 | #include <linux/blkdev.h> |
7 | #include <linux/mm.h> |
8 | #include <linux/sched/mm.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/bitmap.h> |
11 | |
12 | #include "dm-core.h" |
13 | |
14 | #define DM_MSG_PREFIX "zone" |
15 | |
16 | #define DM_ZONE_INVALID_WP_OFST UINT_MAX |
17 | |
18 | /* |
19 | * For internal zone reports bypassing the top BIO submission path. |
20 | */ |
21 | static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, |
22 | sector_t sector, unsigned int nr_zones, |
23 | report_zones_cb cb, void *data) |
24 | { |
25 | struct gendisk *disk = md->disk; |
26 | int ret; |
27 | struct dm_report_zones_args args = { |
28 | .next_sector = sector, |
29 | .orig_data = data, |
30 | .orig_cb = cb, |
31 | }; |
32 | |
33 | do { |
34 | struct dm_target *tgt; |
35 | |
36 | tgt = dm_table_find_target(t, sector: args.next_sector); |
37 | if (WARN_ON_ONCE(!tgt->type->report_zones)) |
38 | return -EIO; |
39 | |
40 | args.tgt = tgt; |
41 | ret = tgt->type->report_zones(tgt, &args, |
42 | nr_zones - args.zone_idx); |
43 | if (ret < 0) |
44 | return ret; |
45 | } while (args.zone_idx < nr_zones && |
46 | args.next_sector < get_capacity(disk)); |
47 | |
48 | return args.zone_idx; |
49 | } |
50 | |
51 | /* |
52 | * User facing dm device block device report zone operation. This calls the |
53 | * report_zones operation for each target of a device table. This operation is |
54 | * generally implemented by targets using dm_report_zones(). |
55 | */ |
56 | int dm_blk_report_zones(struct gendisk *disk, sector_t sector, |
57 | unsigned int nr_zones, report_zones_cb cb, void *data) |
58 | { |
59 | struct mapped_device *md = disk->private_data; |
60 | struct dm_table *map; |
61 | int srcu_idx, ret; |
62 | |
63 | if (dm_suspended_md(md)) |
64 | return -EAGAIN; |
65 | |
66 | map = dm_get_live_table(md, srcu_idx: &srcu_idx); |
67 | if (!map) |
68 | return -EIO; |
69 | |
70 | ret = dm_blk_do_report_zones(md, t: map, sector, nr_zones, cb, data); |
71 | |
72 | dm_put_live_table(md, srcu_idx); |
73 | |
74 | return ret; |
75 | } |
76 | |
77 | static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, |
78 | void *data) |
79 | { |
80 | struct dm_report_zones_args *args = data; |
81 | sector_t sector_diff = args->tgt->begin - args->start; |
82 | |
83 | /* |
84 | * Ignore zones beyond the target range. |
85 | */ |
86 | if (zone->start >= args->start + args->tgt->len) |
87 | return 0; |
88 | |
89 | /* |
90 | * Remap the start sector and write pointer position of the zone |
91 | * to match its position in the target range. |
92 | */ |
93 | zone->start += sector_diff; |
94 | if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { |
95 | if (zone->cond == BLK_ZONE_COND_FULL) |
96 | zone->wp = zone->start + zone->len; |
97 | else if (zone->cond == BLK_ZONE_COND_EMPTY) |
98 | zone->wp = zone->start; |
99 | else |
100 | zone->wp += sector_diff; |
101 | } |
102 | |
103 | args->next_sector = zone->start + zone->len; |
104 | return args->orig_cb(zone, args->zone_idx++, args->orig_data); |
105 | } |
106 | |
107 | /* |
108 | * Helper for drivers of zoned targets to implement struct target_type |
109 | * report_zones operation. |
110 | */ |
111 | int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, |
112 | struct dm_report_zones_args *args, unsigned int nr_zones) |
113 | { |
114 | /* |
115 | * Set the target mapping start sector first so that |
116 | * dm_report_zones_cb() can correctly remap zone information. |
117 | */ |
118 | args->start = start; |
119 | |
120 | return blkdev_report_zones(bdev, sector, nr_zones, |
121 | cb: dm_report_zones_cb, data: args); |
122 | } |
123 | EXPORT_SYMBOL_GPL(dm_report_zones); |
124 | |
125 | bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) |
126 | { |
127 | struct request_queue *q = md->queue; |
128 | |
129 | if (!blk_queue_is_zoned(q)) |
130 | return false; |
131 | |
132 | switch (bio_op(bio)) { |
133 | case REQ_OP_WRITE_ZEROES: |
134 | case REQ_OP_WRITE: |
135 | return !op_is_flush(op: bio->bi_opf) && bio_sectors(bio); |
136 | default: |
137 | return false; |
138 | } |
139 | } |
140 | |
141 | void dm_cleanup_zoned_dev(struct mapped_device *md) |
142 | { |
143 | if (md->disk) { |
144 | bitmap_free(bitmap: md->disk->conv_zones_bitmap); |
145 | md->disk->conv_zones_bitmap = NULL; |
146 | bitmap_free(bitmap: md->disk->seq_zones_wlock); |
147 | md->disk->seq_zones_wlock = NULL; |
148 | } |
149 | |
150 | kvfree(addr: md->zwp_offset); |
151 | md->zwp_offset = NULL; |
152 | md->nr_zones = 0; |
153 | } |
154 | |
155 | static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone) |
156 | { |
157 | switch (zone->cond) { |
158 | case BLK_ZONE_COND_IMP_OPEN: |
159 | case BLK_ZONE_COND_EXP_OPEN: |
160 | case BLK_ZONE_COND_CLOSED: |
161 | return zone->wp - zone->start; |
162 | case BLK_ZONE_COND_FULL: |
163 | return zone->len; |
164 | case BLK_ZONE_COND_EMPTY: |
165 | case BLK_ZONE_COND_NOT_WP: |
166 | case BLK_ZONE_COND_OFFLINE: |
167 | case BLK_ZONE_COND_READONLY: |
168 | default: |
169 | /* |
170 | * Conventional, offline and read-only zones do not have a valid |
171 | * write pointer. Use 0 as for an empty zone. |
172 | */ |
173 | return 0; |
174 | } |
175 | } |
176 | |
177 | static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, |
178 | void *data) |
179 | { |
180 | struct mapped_device *md = data; |
181 | struct gendisk *disk = md->disk; |
182 | |
183 | switch (zone->type) { |
184 | case BLK_ZONE_TYPE_CONVENTIONAL: |
185 | if (!disk->conv_zones_bitmap) { |
186 | disk->conv_zones_bitmap = bitmap_zalloc(nbits: disk->nr_zones, |
187 | GFP_NOIO); |
188 | if (!disk->conv_zones_bitmap) |
189 | return -ENOMEM; |
190 | } |
191 | set_bit(nr: idx, addr: disk->conv_zones_bitmap); |
192 | break; |
193 | case BLK_ZONE_TYPE_SEQWRITE_REQ: |
194 | case BLK_ZONE_TYPE_SEQWRITE_PREF: |
195 | if (!disk->seq_zones_wlock) { |
196 | disk->seq_zones_wlock = bitmap_zalloc(nbits: disk->nr_zones, |
197 | GFP_NOIO); |
198 | if (!disk->seq_zones_wlock) |
199 | return -ENOMEM; |
200 | } |
201 | if (!md->zwp_offset) { |
202 | md->zwp_offset = |
203 | kvcalloc(n: disk->nr_zones, size: sizeof(unsigned int), |
204 | GFP_KERNEL); |
205 | if (!md->zwp_offset) |
206 | return -ENOMEM; |
207 | } |
208 | md->zwp_offset[idx] = dm_get_zone_wp_offset(zone); |
209 | |
210 | break; |
211 | default: |
212 | DMERR("Invalid zone type 0x%x at sectors %llu" , |
213 | (int)zone->type, zone->start); |
214 | return -ENODEV; |
215 | } |
216 | |
217 | return 0; |
218 | } |
219 | |
220 | /* |
221 | * Revalidate the zones of a mapped device to initialize resource necessary |
222 | * for zone append emulation. Note that we cannot simply use the block layer |
223 | * blk_revalidate_disk_zones() function here as the mapped device is suspended |
224 | * (this is called from __bind() context). |
225 | */ |
226 | static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) |
227 | { |
228 | struct gendisk *disk = md->disk; |
229 | unsigned int noio_flag; |
230 | int ret; |
231 | |
232 | /* |
233 | * Check if something changed. If yes, cleanup the current resources |
234 | * and reallocate everything. |
235 | */ |
236 | if (!disk->nr_zones || disk->nr_zones != md->nr_zones) |
237 | dm_cleanup_zoned_dev(md); |
238 | if (md->nr_zones) |
239 | return 0; |
240 | |
241 | /* |
242 | * Scan all zones to initialize everything. Ensure that all vmalloc |
243 | * operations in this context are done as if GFP_NOIO was specified. |
244 | */ |
245 | noio_flag = memalloc_noio_save(); |
246 | ret = dm_blk_do_report_zones(md, t, sector: 0, nr_zones: disk->nr_zones, |
247 | cb: dm_zone_revalidate_cb, data: md); |
248 | memalloc_noio_restore(flags: noio_flag); |
249 | if (ret < 0) |
250 | goto err; |
251 | if (ret != disk->nr_zones) { |
252 | ret = -EIO; |
253 | goto err; |
254 | } |
255 | |
256 | md->nr_zones = disk->nr_zones; |
257 | |
258 | return 0; |
259 | |
260 | err: |
261 | DMERR("Revalidate zones failed %d" , ret); |
262 | dm_cleanup_zoned_dev(md); |
263 | return ret; |
264 | } |
265 | |
266 | static int device_not_zone_append_capable(struct dm_target *ti, |
267 | struct dm_dev *dev, sector_t start, |
268 | sector_t len, void *data) |
269 | { |
270 | return !bdev_is_zoned(bdev: dev->bdev); |
271 | } |
272 | |
273 | static bool dm_table_supports_zone_append(struct dm_table *t) |
274 | { |
275 | for (unsigned int i = 0; i < t->num_targets; i++) { |
276 | struct dm_target *ti = dm_table_get_target(t, index: i); |
277 | |
278 | if (ti->emulate_zone_append) |
279 | return false; |
280 | |
281 | if (!ti->type->iterate_devices || |
282 | ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) |
283 | return false; |
284 | } |
285 | |
286 | return true; |
287 | } |
288 | |
289 | int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) |
290 | { |
291 | struct mapped_device *md = t->md; |
292 | |
293 | /* |
294 | * For a zoned target, the number of zones should be updated for the |
295 | * correct value to be exposed in sysfs queue/nr_zones. |
296 | */ |
297 | WARN_ON_ONCE(queue_is_mq(q)); |
298 | md->disk->nr_zones = bdev_nr_zones(bdev: md->disk->part0); |
299 | |
300 | /* Check if zone append is natively supported */ |
301 | if (dm_table_supports_zone_append(t)) { |
302 | clear_bit(DMF_EMULATE_ZONE_APPEND, addr: &md->flags); |
303 | dm_cleanup_zoned_dev(md); |
304 | return 0; |
305 | } |
306 | |
307 | /* |
308 | * Mark the mapped device as needing zone append emulation and |
309 | * initialize the emulation resources once the capacity is set. |
310 | */ |
311 | set_bit(DMF_EMULATE_ZONE_APPEND, addr: &md->flags); |
312 | if (!get_capacity(disk: md->disk)) |
313 | return 0; |
314 | |
315 | return dm_revalidate_zones(md, t); |
316 | } |
317 | |
318 | static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, |
319 | void *data) |
320 | { |
321 | unsigned int *wp_offset = data; |
322 | |
323 | *wp_offset = dm_get_zone_wp_offset(zone); |
324 | |
325 | return 0; |
326 | } |
327 | |
328 | static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, |
329 | unsigned int *wp_ofst) |
330 | { |
331 | sector_t sector = zno * bdev_zone_sectors(bdev: md->disk->part0); |
332 | unsigned int noio_flag; |
333 | struct dm_table *t; |
334 | int srcu_idx, ret; |
335 | |
336 | t = dm_get_live_table(md, srcu_idx: &srcu_idx); |
337 | if (!t) |
338 | return -EIO; |
339 | |
340 | /* |
341 | * Ensure that all memory allocations in this context are done as if |
342 | * GFP_NOIO was specified. |
343 | */ |
344 | noio_flag = memalloc_noio_save(); |
345 | ret = dm_blk_do_report_zones(md, t, sector, nr_zones: 1, |
346 | cb: dm_update_zone_wp_offset_cb, data: wp_ofst); |
347 | memalloc_noio_restore(flags: noio_flag); |
348 | |
349 | dm_put_live_table(md, srcu_idx); |
350 | |
351 | if (ret != 1) |
352 | return -EIO; |
353 | |
354 | return 0; |
355 | } |
356 | |
357 | struct orig_bio_details { |
358 | enum req_op op; |
359 | unsigned int nr_sectors; |
360 | }; |
361 | |
362 | /* |
363 | * First phase of BIO mapping for targets with zone append emulation: |
364 | * check all BIO that change a zone writer pointer and change zone |
365 | * append operations into regular write operations. |
366 | */ |
367 | static bool dm_zone_map_bio_begin(struct mapped_device *md, |
368 | unsigned int zno, struct bio *clone) |
369 | { |
370 | sector_t zsectors = bdev_zone_sectors(bdev: md->disk->part0); |
371 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
372 | |
373 | /* |
374 | * If the target zone is in an error state, recover by inspecting the |
375 | * zone to get its current write pointer position. Note that since the |
376 | * target zone is already locked, a BIO issuing context should never |
377 | * see the zone write in the DM_ZONE_UPDATING_WP_OFST state. |
378 | */ |
379 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) { |
380 | if (dm_update_zone_wp_offset(md, zno, wp_ofst: &zwp_offset)) |
381 | return false; |
382 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset); |
383 | } |
384 | |
385 | switch (bio_op(bio: clone)) { |
386 | case REQ_OP_ZONE_RESET: |
387 | case REQ_OP_ZONE_FINISH: |
388 | return true; |
389 | case REQ_OP_WRITE_ZEROES: |
390 | case REQ_OP_WRITE: |
391 | /* Writes must be aligned to the zone write pointer */ |
392 | if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset) |
393 | return false; |
394 | break; |
395 | case REQ_OP_ZONE_APPEND: |
396 | /* |
397 | * Change zone append operations into a non-mergeable regular |
398 | * writes directed at the current write pointer position of the |
399 | * target zone. |
400 | */ |
401 | clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | |
402 | (clone->bi_opf & (~REQ_OP_MASK)); |
403 | clone->bi_iter.bi_sector += zwp_offset; |
404 | break; |
405 | default: |
406 | DMWARN_LIMIT("Invalid BIO operation" ); |
407 | return false; |
408 | } |
409 | |
410 | /* Cannot write to a full zone */ |
411 | if (zwp_offset >= zsectors) |
412 | return false; |
413 | |
414 | return true; |
415 | } |
416 | |
417 | /* |
418 | * Second phase of BIO mapping for targets with zone append emulation: |
419 | * update the zone write pointer offset array to account for the additional |
420 | * data written to a zone. Note that at this point, the remapped clone BIO |
421 | * may already have completed, so we do not touch it. |
422 | */ |
423 | static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, |
424 | struct orig_bio_details *orig_bio_details, |
425 | unsigned int nr_sectors) |
426 | { |
427 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
428 | |
429 | /* The clone BIO may already have been completed and failed */ |
430 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) |
431 | return BLK_STS_IOERR; |
432 | |
433 | /* Update the zone wp offset */ |
434 | switch (orig_bio_details->op) { |
435 | case REQ_OP_ZONE_RESET: |
436 | WRITE_ONCE(md->zwp_offset[zno], 0); |
437 | return BLK_STS_OK; |
438 | case REQ_OP_ZONE_FINISH: |
439 | WRITE_ONCE(md->zwp_offset[zno], |
440 | bdev_zone_sectors(md->disk->part0)); |
441 | return BLK_STS_OK; |
442 | case REQ_OP_WRITE_ZEROES: |
443 | case REQ_OP_WRITE: |
444 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); |
445 | return BLK_STS_OK; |
446 | case REQ_OP_ZONE_APPEND: |
447 | /* |
448 | * Check that the target did not truncate the write operation |
449 | * emulating a zone append. |
450 | */ |
451 | if (nr_sectors != orig_bio_details->nr_sectors) { |
452 | DMWARN_LIMIT("Truncated write for zone append" ); |
453 | return BLK_STS_IOERR; |
454 | } |
455 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); |
456 | return BLK_STS_OK; |
457 | default: |
458 | DMWARN_LIMIT("Invalid BIO operation" ); |
459 | return BLK_STS_IOERR; |
460 | } |
461 | } |
462 | |
463 | static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno, |
464 | struct bio *clone) |
465 | { |
466 | if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) |
467 | return; |
468 | |
469 | wait_on_bit_lock_io(word: disk->seq_zones_wlock, bit: zno, TASK_UNINTERRUPTIBLE); |
470 | bio_set_flag(bio: clone, bit: BIO_ZONE_WRITE_LOCKED); |
471 | } |
472 | |
473 | static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno, |
474 | struct bio *clone) |
475 | { |
476 | if (!bio_flagged(bio: clone, bit: BIO_ZONE_WRITE_LOCKED)) |
477 | return; |
478 | |
479 | WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock)); |
480 | clear_bit_unlock(nr: zno, addr: disk->seq_zones_wlock); |
481 | smp_mb__after_atomic(); |
482 | wake_up_bit(word: disk->seq_zones_wlock, bit: zno); |
483 | |
484 | bio_clear_flag(bio: clone, bit: BIO_ZONE_WRITE_LOCKED); |
485 | } |
486 | |
487 | static bool dm_need_zone_wp_tracking(struct bio *bio) |
488 | { |
489 | /* |
490 | * Special processing is not needed for operations that do not need the |
491 | * zone write lock, that is, all operations that target conventional |
492 | * zones and all operations that do not modify directly a sequential |
493 | * zone write pointer. |
494 | */ |
495 | if (op_is_flush(op: bio->bi_opf) && !bio_sectors(bio)) |
496 | return false; |
497 | switch (bio_op(bio)) { |
498 | case REQ_OP_WRITE_ZEROES: |
499 | case REQ_OP_WRITE: |
500 | case REQ_OP_ZONE_RESET: |
501 | case REQ_OP_ZONE_FINISH: |
502 | case REQ_OP_ZONE_APPEND: |
503 | return bio_zone_is_seq(bio); |
504 | default: |
505 | return false; |
506 | } |
507 | } |
508 | |
509 | /* |
510 | * Special IO mapping for targets needing zone append emulation. |
511 | */ |
512 | int dm_zone_map_bio(struct dm_target_io *tio) |
513 | { |
514 | struct dm_io *io = tio->io; |
515 | struct dm_target *ti = tio->ti; |
516 | struct mapped_device *md = io->md; |
517 | struct bio *clone = &tio->clone; |
518 | struct orig_bio_details orig_bio_details; |
519 | unsigned int zno; |
520 | blk_status_t sts; |
521 | int r; |
522 | |
523 | /* |
524 | * IOs that do not change a zone write pointer do not need |
525 | * any additional special processing. |
526 | */ |
527 | if (!dm_need_zone_wp_tracking(bio: clone)) |
528 | return ti->type->map(ti, clone); |
529 | |
530 | /* Lock the target zone */ |
531 | zno = bio_zone_no(bio: clone); |
532 | dm_zone_lock(disk: md->disk, zno, clone); |
533 | |
534 | orig_bio_details.nr_sectors = bio_sectors(clone); |
535 | orig_bio_details.op = bio_op(bio: clone); |
536 | |
537 | /* |
538 | * Check that the bio and the target zone write pointer offset are |
539 | * both valid, and if the bio is a zone append, remap it to a write. |
540 | */ |
541 | if (!dm_zone_map_bio_begin(md, zno, clone)) { |
542 | dm_zone_unlock(disk: md->disk, zno, clone); |
543 | return DM_MAPIO_KILL; |
544 | } |
545 | |
546 | /* Let the target do its work */ |
547 | r = ti->type->map(ti, clone); |
548 | switch (r) { |
549 | case DM_MAPIO_SUBMITTED: |
550 | /* |
551 | * The target submitted the clone BIO. The target zone will |
552 | * be unlocked on completion of the clone. |
553 | */ |
554 | sts = dm_zone_map_bio_end(md, zno, orig_bio_details: &orig_bio_details, |
555 | nr_sectors: *tio->len_ptr); |
556 | break; |
557 | case DM_MAPIO_REMAPPED: |
558 | /* |
559 | * The target only remapped the clone BIO. In case of error, |
560 | * unlock the target zone here as the clone will not be |
561 | * submitted. |
562 | */ |
563 | sts = dm_zone_map_bio_end(md, zno, orig_bio_details: &orig_bio_details, |
564 | nr_sectors: *tio->len_ptr); |
565 | if (sts != BLK_STS_OK) |
566 | dm_zone_unlock(disk: md->disk, zno, clone); |
567 | break; |
568 | case DM_MAPIO_REQUEUE: |
569 | case DM_MAPIO_KILL: |
570 | default: |
571 | dm_zone_unlock(disk: md->disk, zno, clone); |
572 | sts = BLK_STS_IOERR; |
573 | break; |
574 | } |
575 | |
576 | if (sts != BLK_STS_OK) |
577 | return DM_MAPIO_KILL; |
578 | |
579 | return r; |
580 | } |
581 | |
582 | /* |
583 | * IO completion callback called from clone_endio(). |
584 | */ |
585 | void dm_zone_endio(struct dm_io *io, struct bio *clone) |
586 | { |
587 | struct mapped_device *md = io->md; |
588 | struct gendisk *disk = md->disk; |
589 | struct bio *orig_bio = io->orig_bio; |
590 | unsigned int zwp_offset; |
591 | unsigned int zno; |
592 | |
593 | /* |
594 | * For targets that do not emulate zone append, we only need to |
595 | * handle native zone-append bios. |
596 | */ |
597 | if (!dm_emulate_zone_append(md)) { |
598 | /* |
599 | * Get the offset within the zone of the written sector |
600 | * and add that to the original bio sector position. |
601 | */ |
602 | if (clone->bi_status == BLK_STS_OK && |
603 | bio_op(bio: clone) == REQ_OP_ZONE_APPEND) { |
604 | sector_t mask = |
605 | (sector_t)bdev_zone_sectors(bdev: disk->part0) - 1; |
606 | |
607 | orig_bio->bi_iter.bi_sector += |
608 | clone->bi_iter.bi_sector & mask; |
609 | } |
610 | |
611 | return; |
612 | } |
613 | |
614 | /* |
615 | * For targets that do emulate zone append, if the clone BIO does not |
616 | * own the target zone write lock, we have nothing to do. |
617 | */ |
618 | if (!bio_flagged(bio: clone, bit: BIO_ZONE_WRITE_LOCKED)) |
619 | return; |
620 | |
621 | zno = bio_zone_no(bio: orig_bio); |
622 | |
623 | if (clone->bi_status != BLK_STS_OK) { |
624 | /* |
625 | * BIOs that modify a zone write pointer may leave the zone |
626 | * in an unknown state in case of failure (e.g. the write |
627 | * pointer was only partially advanced). In this case, set |
628 | * the target zone write pointer as invalid unless it is |
629 | * already being updated. |
630 | */ |
631 | WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST); |
632 | } else if (bio_op(bio: orig_bio) == REQ_OP_ZONE_APPEND) { |
633 | /* |
634 | * Get the written sector for zone append operation that were |
635 | * emulated using regular write operations. |
636 | */ |
637 | zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
638 | if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio))) |
639 | WRITE_ONCE(md->zwp_offset[zno], |
640 | DM_ZONE_INVALID_WP_OFST); |
641 | else |
642 | orig_bio->bi_iter.bi_sector += |
643 | zwp_offset - bio_sectors(orig_bio); |
644 | } |
645 | |
646 | dm_zone_unlock(disk, zno, clone); |
647 | } |
648 | |