1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * SCSI Zoned Block commands |
4 | * |
5 | * Copyright (C) 2014-2015 SUSE Linux GmbH |
6 | * Written by: Hannes Reinecke <hare@suse.de> |
7 | * Modified by: Damien Le Moal <damien.lemoal@hgst.com> |
8 | * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com> |
9 | */ |
10 | |
11 | #include <linux/blkdev.h> |
12 | #include <linux/vmalloc.h> |
13 | #include <linux/sched/mm.h> |
14 | #include <linux/mutex.h> |
15 | |
16 | #include <asm/unaligned.h> |
17 | |
18 | #include <scsi/scsi.h> |
19 | #include <scsi/scsi_cmnd.h> |
20 | |
21 | #include "sd.h" |
22 | |
23 | #define CREATE_TRACE_POINTS |
24 | #include "sd_trace.h" |
25 | |
26 | /** |
27 | * sd_zbc_get_zone_wp_offset - Get zone write pointer offset. |
28 | * @zone: Zone for which to return the write pointer offset. |
29 | * |
30 | * Return: offset of the write pointer from the start of the zone. |
31 | */ |
32 | static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone) |
33 | { |
34 | if (zone->type == ZBC_ZONE_TYPE_CONV) |
35 | return 0; |
36 | |
37 | switch (zone->cond) { |
38 | case BLK_ZONE_COND_IMP_OPEN: |
39 | case BLK_ZONE_COND_EXP_OPEN: |
40 | case BLK_ZONE_COND_CLOSED: |
41 | return zone->wp - zone->start; |
42 | case BLK_ZONE_COND_FULL: |
43 | return zone->len; |
44 | case BLK_ZONE_COND_EMPTY: |
45 | case BLK_ZONE_COND_OFFLINE: |
46 | case BLK_ZONE_COND_READONLY: |
47 | default: |
48 | /* |
49 | * Offline and read-only zones do not have a valid |
50 | * write pointer. Use 0 as for an empty zone. |
51 | */ |
52 | return 0; |
53 | } |
54 | } |
55 | |
56 | /* Whether or not a SCSI zone descriptor describes a gap zone. */ |
57 | static bool sd_zbc_is_gap_zone(const u8 buf[64]) |
58 | { |
59 | return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP; |
60 | } |
61 | |
62 | /** |
63 | * sd_zbc_parse_report - Parse a SCSI zone descriptor |
64 | * @sdkp: SCSI disk pointer. |
65 | * @buf: SCSI zone descriptor. |
66 | * @idx: Index of the zone relative to the first zone reported by the current |
67 | * sd_zbc_report_zones() call. |
68 | * @cb: Callback function pointer. |
69 | * @data: Second argument passed to @cb. |
70 | * |
71 | * Return: Value returned by @cb. |
72 | * |
73 | * Convert a SCSI zone descriptor into struct blk_zone format. Additionally, |
74 | * call @cb(blk_zone, @data). |
75 | */ |
76 | static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], |
77 | unsigned int idx, report_zones_cb cb, void *data) |
78 | { |
79 | struct scsi_device *sdp = sdkp->device; |
80 | struct blk_zone zone = { 0 }; |
81 | sector_t start_lba, gran; |
82 | int ret; |
83 | |
84 | if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) |
85 | return -EINVAL; |
86 | |
87 | zone.type = buf[0] & 0x0f; |
88 | zone.cond = (buf[1] >> 4) & 0xf; |
89 | if (buf[1] & 0x01) |
90 | zone.reset = 1; |
91 | if (buf[1] & 0x02) |
92 | zone.non_seq = 1; |
93 | |
94 | start_lba = get_unaligned_be64(p: &buf[16]); |
95 | zone.start = logical_to_sectors(sdev: sdp, blocks: start_lba); |
96 | zone.capacity = logical_to_sectors(sdev: sdp, blocks: get_unaligned_be64(p: &buf[8])); |
97 | zone.len = zone.capacity; |
98 | if (sdkp->zone_starting_lba_gran) { |
99 | gran = logical_to_sectors(sdev: sdp, blocks: sdkp->zone_starting_lba_gran); |
100 | if (zone.len > gran) { |
101 | sd_printk(KERN_ERR, sdkp, |
102 | "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n" , |
103 | start_lba, |
104 | sectors_to_logical(sdp, zone.capacity), |
105 | sectors_to_logical(sdp, zone.len), |
106 | sectors_to_logical(sdp, gran)); |
107 | return -EINVAL; |
108 | } |
109 | /* |
110 | * Use the starting LBA granularity instead of the zone length |
111 | * obtained from the REPORT ZONES command. |
112 | */ |
113 | zone.len = gran; |
114 | } |
115 | if (zone.cond == ZBC_ZONE_COND_FULL) |
116 | zone.wp = zone.start + zone.len; |
117 | else |
118 | zone.wp = logical_to_sectors(sdev: sdp, blocks: get_unaligned_be64(p: &buf[24])); |
119 | |
120 | ret = cb(&zone, idx, data); |
121 | if (ret) |
122 | return ret; |
123 | |
124 | if (sdkp->rev_wp_offset) |
125 | sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone: &zone); |
126 | |
127 | return 0; |
128 | } |
129 | |
130 | /** |
131 | * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. |
132 | * @sdkp: The target disk |
133 | * @buf: vmalloc-ed buffer to use for the reply |
134 | * @buflen: the buffer size |
135 | * @lba: Start LBA of the report |
136 | * @partial: Do partial report |
137 | * |
138 | * For internal use during device validation. |
139 | * Using partial=true can significantly speed up execution of a report zones |
140 | * command because the disk does not have to count all possible report matching |
141 | * zones and will only report the count of zones fitting in the command reply |
142 | * buffer. |
143 | */ |
144 | static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, |
145 | unsigned int buflen, sector_t lba, |
146 | bool partial) |
147 | { |
148 | struct scsi_device *sdp = sdkp->device; |
149 | const int timeout = sdp->request_queue->rq_timeout; |
150 | struct scsi_sense_hdr sshdr; |
151 | const struct scsi_exec_args exec_args = { |
152 | .sshdr = &sshdr, |
153 | }; |
154 | unsigned char cmd[16]; |
155 | unsigned int rep_len; |
156 | int result; |
157 | |
158 | memset(cmd, 0, 16); |
159 | cmd[0] = ZBC_IN; |
160 | cmd[1] = ZI_REPORT_ZONES; |
161 | put_unaligned_be64(val: lba, p: &cmd[2]); |
162 | put_unaligned_be32(val: buflen, p: &cmd[10]); |
163 | if (partial) |
164 | cmd[14] = ZBC_REPORT_ZONE_PARTIAL; |
165 | |
166 | result = scsi_execute_cmd(sdev: sdp, cmd, opf: REQ_OP_DRV_IN, buffer: buf, bufflen: buflen, |
167 | timeout, SD_MAX_RETRIES, args: &exec_args); |
168 | if (result) { |
169 | sd_printk(KERN_ERR, sdkp, |
170 | "REPORT ZONES start lba %llu failed\n" , lba); |
171 | sd_print_result(sdkp, msg: "REPORT ZONES" , result); |
172 | if (result > 0 && scsi_sense_valid(sshdr: &sshdr)) |
173 | sd_print_sense_hdr(sdkp, sshdr: &sshdr); |
174 | return -EIO; |
175 | } |
176 | |
177 | rep_len = get_unaligned_be32(p: &buf[0]); |
178 | if (rep_len < 64) { |
179 | sd_printk(KERN_ERR, sdkp, |
180 | "REPORT ZONES report invalid length %u\n" , |
181 | rep_len); |
182 | return -EIO; |
183 | } |
184 | |
185 | return 0; |
186 | } |
187 | |
188 | /** |
189 | * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply. |
190 | * @sdkp: The target disk |
191 | * @nr_zones: Maximum number of zones to report |
192 | * @buflen: Size of the buffer allocated |
193 | * |
194 | * Try to allocate a reply buffer for the number of requested zones. |
195 | * The size of the buffer allocated may be smaller than requested to |
196 | * satify the device constraint (max_hw_sectors, max_segments, etc). |
197 | * |
198 | * Return the address of the allocated buffer and update @buflen with |
199 | * the size of the allocated buffer. |
200 | */ |
201 | static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, |
202 | unsigned int nr_zones, size_t *buflen) |
203 | { |
204 | struct request_queue *q = sdkp->disk->queue; |
205 | size_t bufsize; |
206 | void *buf; |
207 | |
208 | /* |
209 | * Report zone buffer size should be at most 64B times the number of |
210 | * zones requested plus the 64B reply header, but should be aligned |
211 | * to SECTOR_SIZE for ATA devices. |
212 | * Make sure that this size does not exceed the hardware capabilities. |
213 | * Furthermore, since the report zone command cannot be split, make |
214 | * sure that the allocated buffer can always be mapped by limiting the |
215 | * number of pages allocated to the HBA max segments limit. |
216 | */ |
217 | nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); |
218 | bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); |
219 | bufsize = min_t(size_t, bufsize, |
220 | queue_max_hw_sectors(q) << SECTOR_SHIFT); |
221 | bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); |
222 | |
223 | while (bufsize >= SECTOR_SIZE) { |
224 | buf = __vmalloc(size: bufsize, |
225 | GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); |
226 | if (buf) { |
227 | *buflen = bufsize; |
228 | return buf; |
229 | } |
230 | bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); |
231 | } |
232 | |
233 | return NULL; |
234 | } |
235 | |
236 | /** |
237 | * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. |
238 | * @sdkp: The target disk |
239 | */ |
240 | static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) |
241 | { |
242 | return logical_to_sectors(sdev: sdkp->device, blocks: sdkp->zone_info.zone_blocks); |
243 | } |
244 | |
245 | /** |
246 | * sd_zbc_report_zones - SCSI .report_zones() callback. |
247 | * @disk: Disk to report zones for. |
248 | * @sector: Start sector. |
249 | * @nr_zones: Maximum number of zones to report. |
250 | * @cb: Callback function called to report zone information. |
251 | * @data: Second argument passed to @cb. |
252 | * |
253 | * Called by the block layer to iterate over zone information. See also the |
254 | * disk->fops->report_zones() calls in block/blk-zoned.c. |
255 | */ |
256 | int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, |
257 | unsigned int nr_zones, report_zones_cb cb, void *data) |
258 | { |
259 | struct scsi_disk *sdkp = scsi_disk(disk); |
260 | sector_t lba = sectors_to_logical(sdev: sdkp->device, sector); |
261 | unsigned int nr, i; |
262 | unsigned char *buf; |
263 | u64 zone_length, start_lba; |
264 | size_t offset, buflen = 0; |
265 | int zone_idx = 0; |
266 | int ret; |
267 | |
268 | if (!sd_is_zoned(sdkp)) |
269 | /* Not a zoned device */ |
270 | return -EOPNOTSUPP; |
271 | |
272 | if (!sdkp->capacity) |
273 | /* Device gone or invalid */ |
274 | return -ENODEV; |
275 | |
276 | buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, buflen: &buflen); |
277 | if (!buf) |
278 | return -ENOMEM; |
279 | |
280 | while (zone_idx < nr_zones && lba < sdkp->capacity) { |
281 | ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, partial: true); |
282 | if (ret) |
283 | goto out; |
284 | |
285 | offset = 0; |
286 | nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); |
287 | if (!nr) |
288 | break; |
289 | |
290 | for (i = 0; i < nr && zone_idx < nr_zones; i++) { |
291 | offset += 64; |
292 | start_lba = get_unaligned_be64(p: &buf[offset + 16]); |
293 | zone_length = get_unaligned_be64(p: &buf[offset + 8]); |
294 | if ((zone_idx == 0 && |
295 | (lba < start_lba || |
296 | lba >= start_lba + zone_length)) || |
297 | (zone_idx > 0 && start_lba != lba) || |
298 | start_lba + zone_length < start_lba) { |
299 | sd_printk(KERN_ERR, sdkp, |
300 | "Zone %d at LBA %llu is invalid: %llu + %llu\n" , |
301 | zone_idx, lba, start_lba, zone_length); |
302 | ret = -EINVAL; |
303 | goto out; |
304 | } |
305 | lba = start_lba + zone_length; |
306 | if (sd_zbc_is_gap_zone(buf: &buf[offset])) { |
307 | if (sdkp->zone_starting_lba_gran) |
308 | continue; |
309 | sd_printk(KERN_ERR, sdkp, |
310 | "Gap zone without constant LBA offsets\n" ); |
311 | ret = -EINVAL; |
312 | goto out; |
313 | } |
314 | |
315 | ret = sd_zbc_parse_report(sdkp, buf: buf + offset, idx: zone_idx, |
316 | cb, data); |
317 | if (ret) |
318 | goto out; |
319 | |
320 | zone_idx++; |
321 | } |
322 | } |
323 | |
324 | ret = zone_idx; |
325 | out: |
326 | kvfree(addr: buf); |
327 | return ret; |
328 | } |
329 | |
330 | static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) |
331 | { |
332 | struct request *rq = scsi_cmd_to_rq(scmd: cmd); |
333 | struct scsi_disk *sdkp = scsi_disk(disk: rq->q->disk); |
334 | sector_t sector = blk_rq_pos(rq); |
335 | |
336 | if (!sd_is_zoned(sdkp)) |
337 | /* Not a zoned device */ |
338 | return BLK_STS_IOERR; |
339 | |
340 | if (sdkp->device->changed) |
341 | return BLK_STS_IOERR; |
342 | |
343 | if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) |
344 | /* Unaligned request */ |
345 | return BLK_STS_IOERR; |
346 | |
347 | return BLK_STS_OK; |
348 | } |
349 | |
350 | #define SD_ZBC_INVALID_WP_OFST (~0u) |
351 | #define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1) |
352 | |
353 | static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx, |
354 | void *data) |
355 | { |
356 | struct scsi_disk *sdkp = data; |
357 | |
358 | lockdep_assert_held(&sdkp->zones_wp_offset_lock); |
359 | |
360 | sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone); |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | /* |
366 | * An attempt to append a zone triggered an invalid write pointer error. |
367 | * Reread the write pointer of the zone(s) in which the append failed. |
368 | */ |
369 | static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) |
370 | { |
371 | struct scsi_disk *sdkp; |
372 | unsigned long flags; |
373 | sector_t zno; |
374 | int ret; |
375 | |
376 | sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); |
377 | |
378 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
379 | for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) { |
380 | if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) |
381 | continue; |
382 | |
383 | spin_unlock_irqrestore(lock: &sdkp->zones_wp_offset_lock, flags); |
384 | ret = sd_zbc_do_report_zones(sdkp, buf: sdkp->zone_wp_update_buf, |
385 | SD_BUF_SIZE, |
386 | lba: zno * sdkp->zone_info.zone_blocks, partial: true); |
387 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
388 | if (!ret) |
389 | sd_zbc_parse_report(sdkp, buf: sdkp->zone_wp_update_buf + 64, |
390 | idx: zno, cb: sd_zbc_update_wp_offset_cb, |
391 | data: sdkp); |
392 | } |
393 | spin_unlock_irqrestore(lock: &sdkp->zones_wp_offset_lock, flags); |
394 | |
395 | scsi_device_put(sdkp->device); |
396 | } |
397 | |
398 | /** |
399 | * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command. |
400 | * @cmd: the command to setup |
401 | * @lba: the LBA to patch |
402 | * @nr_blocks: the number of LBAs to be written |
403 | * |
404 | * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND. |
405 | * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and |
406 | * patching of the lba for an emulated ZONE_APPEND command. |
407 | * |
408 | * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will |
409 | * schedule a REPORT ZONES command and return BLK_STS_IOERR. |
410 | */ |
411 | blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, |
412 | unsigned int nr_blocks) |
413 | { |
414 | struct request *rq = scsi_cmd_to_rq(scmd: cmd); |
415 | struct scsi_disk *sdkp = scsi_disk(disk: rq->q->disk); |
416 | unsigned int wp_offset, zno = blk_rq_zone_no(rq); |
417 | unsigned long flags; |
418 | blk_status_t ret; |
419 | |
420 | ret = sd_zbc_cmnd_checks(cmd); |
421 | if (ret != BLK_STS_OK) |
422 | return ret; |
423 | |
424 | if (!blk_rq_zone_is_seq(rq)) |
425 | return BLK_STS_IOERR; |
426 | |
427 | /* Unlock of the write lock will happen in sd_zbc_complete() */ |
428 | if (!blk_req_zone_write_trylock(rq)) |
429 | return BLK_STS_ZONE_RESOURCE; |
430 | |
431 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
432 | wp_offset = sdkp->zones_wp_offset[zno]; |
433 | switch (wp_offset) { |
434 | case SD_ZBC_INVALID_WP_OFST: |
435 | /* |
436 | * We are about to schedule work to update a zone write pointer |
437 | * offset, which will cause the zone append command to be |
438 | * requeued. So make sure that the scsi device does not go away |
439 | * while the work is being processed. |
440 | */ |
441 | if (scsi_device_get(sdkp->device)) { |
442 | ret = BLK_STS_IOERR; |
443 | break; |
444 | } |
445 | sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST; |
446 | schedule_work(work: &sdkp->zone_wp_offset_work); |
447 | fallthrough; |
448 | case SD_ZBC_UPDATING_WP_OFST: |
449 | ret = BLK_STS_DEV_RESOURCE; |
450 | break; |
451 | default: |
452 | wp_offset = sectors_to_logical(sdev: sdkp->device, sector: wp_offset); |
453 | if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) { |
454 | ret = BLK_STS_IOERR; |
455 | break; |
456 | } |
457 | |
458 | trace_scsi_prepare_zone_append(cmnd: cmd, lba: *lba, wp_offset); |
459 | *lba += wp_offset; |
460 | } |
461 | spin_unlock_irqrestore(lock: &sdkp->zones_wp_offset_lock, flags); |
462 | if (ret) |
463 | blk_req_zone_write_unlock(rq); |
464 | return ret; |
465 | } |
466 | |
467 | /** |
468 | * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations |
469 | * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. |
470 | * @cmd: the command to setup |
471 | * @op: Operation to be performed |
472 | * @all: All zones control |
473 | * |
474 | * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, |
475 | * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. |
476 | */ |
477 | blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, |
478 | unsigned char op, bool all) |
479 | { |
480 | struct request *rq = scsi_cmd_to_rq(scmd: cmd); |
481 | sector_t sector = blk_rq_pos(rq); |
482 | struct scsi_disk *sdkp = scsi_disk(disk: rq->q->disk); |
483 | sector_t block = sectors_to_logical(sdev: sdkp->device, sector); |
484 | blk_status_t ret; |
485 | |
486 | ret = sd_zbc_cmnd_checks(cmd); |
487 | if (ret != BLK_STS_OK) |
488 | return ret; |
489 | |
490 | cmd->cmd_len = 16; |
491 | memset(cmd->cmnd, 0, cmd->cmd_len); |
492 | cmd->cmnd[0] = ZBC_OUT; |
493 | cmd->cmnd[1] = op; |
494 | if (all) |
495 | cmd->cmnd[14] = 0x1; |
496 | else |
497 | put_unaligned_be64(val: block, p: &cmd->cmnd[2]); |
498 | |
499 | rq->timeout = SD_TIMEOUT; |
500 | cmd->sc_data_direction = DMA_NONE; |
501 | cmd->transfersize = 0; |
502 | cmd->allowed = 0; |
503 | |
504 | return BLK_STS_OK; |
505 | } |
506 | |
507 | static bool sd_zbc_need_zone_wp_update(struct request *rq) |
508 | { |
509 | switch (req_op(req: rq)) { |
510 | case REQ_OP_ZONE_APPEND: |
511 | case REQ_OP_ZONE_FINISH: |
512 | case REQ_OP_ZONE_RESET: |
513 | case REQ_OP_ZONE_RESET_ALL: |
514 | return true; |
515 | case REQ_OP_WRITE: |
516 | case REQ_OP_WRITE_ZEROES: |
517 | return blk_rq_zone_is_seq(rq); |
518 | default: |
519 | return false; |
520 | } |
521 | } |
522 | |
523 | /** |
524 | * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion |
525 | * @cmd: Completed command |
526 | * @good_bytes: Command reply bytes |
527 | * |
528 | * Called from sd_zbc_complete() to handle the update of the cached zone write |
529 | * pointer value in case an update is needed. |
530 | */ |
531 | static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, |
532 | unsigned int good_bytes) |
533 | { |
534 | int result = cmd->result; |
535 | struct request *rq = scsi_cmd_to_rq(scmd: cmd); |
536 | struct scsi_disk *sdkp = scsi_disk(disk: rq->q->disk); |
537 | unsigned int zno = blk_rq_zone_no(rq); |
538 | enum req_op op = req_op(req: rq); |
539 | unsigned long flags; |
540 | |
541 | /* |
542 | * If we got an error for a command that needs updating the write |
543 | * pointer offset cache, we must mark the zone wp offset entry as |
544 | * invalid to force an update from disk the next time a zone append |
545 | * command is issued. |
546 | */ |
547 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
548 | |
549 | if (result && op != REQ_OP_ZONE_RESET_ALL) { |
550 | if (op == REQ_OP_ZONE_APPEND) { |
551 | /* Force complete completion (no retry) */ |
552 | good_bytes = 0; |
553 | scsi_set_resid(cmd, resid: blk_rq_bytes(rq)); |
554 | } |
555 | |
556 | /* |
557 | * Force an update of the zone write pointer offset on |
558 | * the next zone append access. |
559 | */ |
560 | if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) |
561 | sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST; |
562 | goto unlock_wp_offset; |
563 | } |
564 | |
565 | switch (op) { |
566 | case REQ_OP_ZONE_APPEND: |
567 | trace_scsi_zone_wp_update(cmnd: cmd, rq_sector: rq->__sector, |
568 | wp_offset: sdkp->zones_wp_offset[zno], good_bytes); |
569 | rq->__sector += sdkp->zones_wp_offset[zno]; |
570 | fallthrough; |
571 | case REQ_OP_WRITE_ZEROES: |
572 | case REQ_OP_WRITE: |
573 | if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp)) |
574 | sdkp->zones_wp_offset[zno] += |
575 | good_bytes >> SECTOR_SHIFT; |
576 | break; |
577 | case REQ_OP_ZONE_RESET: |
578 | sdkp->zones_wp_offset[zno] = 0; |
579 | break; |
580 | case REQ_OP_ZONE_FINISH: |
581 | sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp); |
582 | break; |
583 | case REQ_OP_ZONE_RESET_ALL: |
584 | memset(sdkp->zones_wp_offset, 0, |
585 | sdkp->zone_info.nr_zones * sizeof(unsigned int)); |
586 | break; |
587 | default: |
588 | break; |
589 | } |
590 | |
591 | unlock_wp_offset: |
592 | spin_unlock_irqrestore(lock: &sdkp->zones_wp_offset_lock, flags); |
593 | |
594 | return good_bytes; |
595 | } |
596 | |
597 | /** |
598 | * sd_zbc_complete - ZBC command post processing. |
599 | * @cmd: Completed command |
600 | * @good_bytes: Command reply bytes |
601 | * @sshdr: command sense header |
602 | * |
603 | * Called from sd_done() to handle zone commands errors and updates to the |
604 | * device queue zone write pointer offset cahce. |
605 | */ |
606 | unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, |
607 | struct scsi_sense_hdr *sshdr) |
608 | { |
609 | int result = cmd->result; |
610 | struct request *rq = scsi_cmd_to_rq(scmd: cmd); |
611 | |
612 | if (op_is_zone_mgmt(op: req_op(req: rq)) && |
613 | result && |
614 | sshdr->sense_key == ILLEGAL_REQUEST && |
615 | sshdr->asc == 0x24) { |
616 | /* |
617 | * INVALID FIELD IN CDB error: a zone management command was |
618 | * attempted on a conventional zone. Nothing to worry about, |
619 | * so be quiet about the error. |
620 | */ |
621 | rq->rq_flags |= RQF_QUIET; |
622 | } else if (sd_zbc_need_zone_wp_update(rq)) |
623 | good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes); |
624 | |
625 | if (req_op(req: rq) == REQ_OP_ZONE_APPEND) |
626 | blk_req_zone_write_unlock(rq); |
627 | |
628 | return good_bytes; |
629 | } |
630 | |
631 | /** |
632 | * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics |
633 | * @sdkp: Target disk |
634 | * @buf: Buffer where to store the VPD page data |
635 | * |
636 | * Read VPD page B6, get information and check that reads are unconstrained. |
637 | */ |
638 | static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, |
639 | unsigned char *buf) |
640 | { |
641 | u64 zone_starting_lba_gran; |
642 | |
643 | if (scsi_get_vpd_page(sdkp->device, page: 0xb6, buf, buf_len: 64)) { |
644 | sd_printk(KERN_NOTICE, sdkp, |
645 | "Read zoned characteristics VPD page failed\n" ); |
646 | return -ENODEV; |
647 | } |
648 | |
649 | if (sdkp->device->type != TYPE_ZBC) { |
650 | /* Host-aware */ |
651 | sdkp->urswrz = 1; |
652 | sdkp->zones_optimal_open = get_unaligned_be32(p: &buf[8]); |
653 | sdkp->zones_optimal_nonseq = get_unaligned_be32(p: &buf[12]); |
654 | sdkp->zones_max_open = 0; |
655 | return 0; |
656 | } |
657 | |
658 | /* Host-managed */ |
659 | sdkp->urswrz = buf[4] & 1; |
660 | sdkp->zones_optimal_open = 0; |
661 | sdkp->zones_optimal_nonseq = 0; |
662 | sdkp->zones_max_open = get_unaligned_be32(p: &buf[16]); |
663 | /* Check zone alignment method */ |
664 | switch (buf[23] & 0xf) { |
665 | case 0: |
666 | case ZBC_CONSTANT_ZONE_LENGTH: |
667 | /* Use zone length */ |
668 | break; |
669 | case ZBC_CONSTANT_ZONE_START_OFFSET: |
670 | zone_starting_lba_gran = get_unaligned_be64(p: &buf[24]); |
671 | if (zone_starting_lba_gran == 0 || |
672 | !is_power_of_2(n: zone_starting_lba_gran) || |
673 | logical_to_sectors(sdev: sdkp->device, blocks: zone_starting_lba_gran) > |
674 | UINT_MAX) { |
675 | sd_printk(KERN_ERR, sdkp, |
676 | "Invalid zone starting LBA granularity %llu\n" , |
677 | zone_starting_lba_gran); |
678 | return -ENODEV; |
679 | } |
680 | sdkp->zone_starting_lba_gran = zone_starting_lba_gran; |
681 | break; |
682 | default: |
683 | sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n" ); |
684 | return -ENODEV; |
685 | } |
686 | |
687 | /* |
688 | * Check for unconstrained reads: host-managed devices with |
689 | * constrained reads (drives failing read after write pointer) |
690 | * are not supported. |
691 | */ |
692 | if (!sdkp->urswrz) { |
693 | if (sdkp->first_scan) |
694 | sd_printk(KERN_NOTICE, sdkp, |
695 | "constrained reads devices are not supported\n" ); |
696 | return -ENODEV; |
697 | } |
698 | |
699 | return 0; |
700 | } |
701 | |
702 | /** |
703 | * sd_zbc_check_capacity - Check the device capacity |
704 | * @sdkp: Target disk |
705 | * @buf: command buffer |
706 | * @zblocks: zone size in logical blocks |
707 | * |
708 | * Get the device zone size and check that the device capacity as reported |
709 | * by READ CAPACITY matches the max_lba value (plus one) of the report zones |
710 | * command reply for devices with RC_BASIS == 0. |
711 | * |
712 | * Returns 0 upon success or an error code upon failure. |
713 | */ |
714 | static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, |
715 | u32 *zblocks) |
716 | { |
717 | u64 zone_blocks; |
718 | sector_t max_lba; |
719 | unsigned char *rec; |
720 | int ret; |
721 | |
722 | /* Do a report zone to get max_lba and the size of the first zone */ |
723 | ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, lba: 0, partial: false); |
724 | if (ret) |
725 | return ret; |
726 | |
727 | if (sdkp->rc_basis == 0) { |
728 | /* The max_lba field is the capacity of this device */ |
729 | max_lba = get_unaligned_be64(p: &buf[8]); |
730 | if (sdkp->capacity != max_lba + 1) { |
731 | if (sdkp->first_scan) |
732 | sd_printk(KERN_WARNING, sdkp, |
733 | "Changing capacity from %llu to max LBA+1 %llu\n" , |
734 | (unsigned long long)sdkp->capacity, |
735 | (unsigned long long)max_lba + 1); |
736 | sdkp->capacity = max_lba + 1; |
737 | } |
738 | } |
739 | |
740 | if (sdkp->zone_starting_lba_gran == 0) { |
741 | /* Get the size of the first reported zone */ |
742 | rec = buf + 64; |
743 | zone_blocks = get_unaligned_be64(p: &rec[8]); |
744 | if (logical_to_sectors(sdev: sdkp->device, blocks: zone_blocks) > UINT_MAX) { |
745 | if (sdkp->first_scan) |
746 | sd_printk(KERN_NOTICE, sdkp, |
747 | "Zone size too large\n" ); |
748 | return -EFBIG; |
749 | } |
750 | } else { |
751 | zone_blocks = sdkp->zone_starting_lba_gran; |
752 | } |
753 | |
754 | if (!is_power_of_2(n: zone_blocks)) { |
755 | sd_printk(KERN_ERR, sdkp, |
756 | "Zone size %llu is not a power of two.\n" , |
757 | zone_blocks); |
758 | return -EINVAL; |
759 | } |
760 | |
761 | *zblocks = zone_blocks; |
762 | |
763 | return 0; |
764 | } |
765 | |
766 | static void sd_zbc_print_zones(struct scsi_disk *sdkp) |
767 | { |
768 | if (!sd_is_zoned(sdkp) || !sdkp->capacity) |
769 | return; |
770 | |
771 | if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1)) |
772 | sd_printk(KERN_NOTICE, sdkp, |
773 | "%u zones of %u logical blocks + 1 runt zone\n" , |
774 | sdkp->zone_info.nr_zones - 1, |
775 | sdkp->zone_info.zone_blocks); |
776 | else |
777 | sd_printk(KERN_NOTICE, sdkp, |
778 | "%u zones of %u logical blocks\n" , |
779 | sdkp->zone_info.nr_zones, |
780 | sdkp->zone_info.zone_blocks); |
781 | } |
782 | |
783 | static int sd_zbc_init_disk(struct scsi_disk *sdkp) |
784 | { |
785 | sdkp->zones_wp_offset = NULL; |
786 | spin_lock_init(&sdkp->zones_wp_offset_lock); |
787 | sdkp->rev_wp_offset = NULL; |
788 | mutex_init(&sdkp->rev_mutex); |
789 | INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn); |
790 | sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL); |
791 | if (!sdkp->zone_wp_update_buf) |
792 | return -ENOMEM; |
793 | |
794 | return 0; |
795 | } |
796 | |
797 | void sd_zbc_free_zone_info(struct scsi_disk *sdkp) |
798 | { |
799 | if (!sdkp->zone_wp_update_buf) |
800 | return; |
801 | |
802 | /* Serialize against revalidate zones */ |
803 | mutex_lock(&sdkp->rev_mutex); |
804 | |
805 | kvfree(addr: sdkp->zones_wp_offset); |
806 | sdkp->zones_wp_offset = NULL; |
807 | kfree(objp: sdkp->zone_wp_update_buf); |
808 | sdkp->zone_wp_update_buf = NULL; |
809 | |
810 | sdkp->early_zone_info = (struct zoned_disk_info){ }; |
811 | sdkp->zone_info = (struct zoned_disk_info){ }; |
812 | |
813 | mutex_unlock(lock: &sdkp->rev_mutex); |
814 | } |
815 | |
816 | static void sd_zbc_revalidate_zones_cb(struct gendisk *disk) |
817 | { |
818 | struct scsi_disk *sdkp = scsi_disk(disk); |
819 | |
820 | swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset); |
821 | } |
822 | |
823 | /* |
824 | * Call blk_revalidate_disk_zones() if any of the zoned disk properties have |
825 | * changed that make it necessary to call that function. Called by |
826 | * sd_revalidate_disk() after the gendisk capacity has been set. |
827 | */ |
828 | int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) |
829 | { |
830 | struct gendisk *disk = sdkp->disk; |
831 | struct request_queue *q = disk->queue; |
832 | u32 zone_blocks = sdkp->early_zone_info.zone_blocks; |
833 | unsigned int nr_zones = sdkp->early_zone_info.nr_zones; |
834 | int ret = 0; |
835 | unsigned int flags; |
836 | |
837 | /* |
838 | * For all zoned disks, initialize zone append emulation data if not |
839 | * already done. |
840 | */ |
841 | if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) { |
842 | ret = sd_zbc_init_disk(sdkp); |
843 | if (ret) |
844 | return ret; |
845 | } |
846 | |
847 | /* |
848 | * There is nothing to do for regular disks, including host-aware disks |
849 | * that have partitions. |
850 | */ |
851 | if (!blk_queue_is_zoned(q)) |
852 | return 0; |
853 | |
854 | /* |
855 | * Make sure revalidate zones are serialized to ensure exclusive |
856 | * updates of the scsi disk data. |
857 | */ |
858 | mutex_lock(&sdkp->rev_mutex); |
859 | |
860 | if (sdkp->zone_info.zone_blocks == zone_blocks && |
861 | sdkp->zone_info.nr_zones == nr_zones && |
862 | disk->nr_zones == nr_zones) |
863 | goto unlock; |
864 | |
865 | flags = memalloc_noio_save(); |
866 | sdkp->zone_info.zone_blocks = zone_blocks; |
867 | sdkp->zone_info.nr_zones = nr_zones; |
868 | sdkp->rev_wp_offset = kvcalloc(n: nr_zones, size: sizeof(u32), GFP_KERNEL); |
869 | if (!sdkp->rev_wp_offset) { |
870 | ret = -ENOMEM; |
871 | memalloc_noio_restore(flags); |
872 | goto unlock; |
873 | } |
874 | |
875 | blk_queue_chunk_sectors(q, |
876 | logical_to_sectors(sdev: sdkp->device, blocks: zone_blocks)); |
877 | blk_queue_max_zone_append_sectors(q, |
878 | max_zone_append_sectors: q->limits.max_segments << PAGE_SECTORS_SHIFT); |
879 | |
880 | ret = blk_revalidate_disk_zones(disk, update_driver_data: sd_zbc_revalidate_zones_cb); |
881 | |
882 | memalloc_noio_restore(flags); |
883 | kvfree(addr: sdkp->rev_wp_offset); |
884 | sdkp->rev_wp_offset = NULL; |
885 | |
886 | if (ret) { |
887 | sdkp->zone_info = (struct zoned_disk_info){ }; |
888 | sdkp->capacity = 0; |
889 | goto unlock; |
890 | } |
891 | |
892 | sd_zbc_print_zones(sdkp); |
893 | |
894 | unlock: |
895 | mutex_unlock(lock: &sdkp->rev_mutex); |
896 | |
897 | return ret; |
898 | } |
899 | |
900 | /** |
901 | * sd_zbc_read_zones - Read zone information and update the request queue |
902 | * @sdkp: SCSI disk pointer. |
903 | * @buf: 512 byte buffer used for storing SCSI command output. |
904 | * |
905 | * Read zone information and update the request queue zone characteristics and |
906 | * also the zoned device information in *sdkp. Called by sd_revalidate_disk() |
907 | * before the gendisk capacity has been set. |
908 | */ |
909 | int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) |
910 | { |
911 | struct gendisk *disk = sdkp->disk; |
912 | struct request_queue *q = disk->queue; |
913 | unsigned int nr_zones; |
914 | u32 zone_blocks = 0; |
915 | int ret; |
916 | |
917 | if (!sd_is_zoned(sdkp)) { |
918 | /* |
919 | * Device managed or normal SCSI disk, no special handling |
920 | * required. Nevertheless, free the disk zone information in |
921 | * case the device type changed. |
922 | */ |
923 | sd_zbc_free_zone_info(sdkp); |
924 | return 0; |
925 | } |
926 | |
927 | /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ |
928 | sdkp->device->use_16_for_rw = 1; |
929 | sdkp->device->use_10_for_rw = 0; |
930 | sdkp->device->use_16_for_sync = 1; |
931 | |
932 | /* Check zoned block device characteristics (unconstrained reads) */ |
933 | ret = sd_zbc_check_zoned_characteristics(sdkp, buf); |
934 | if (ret) |
935 | goto err; |
936 | |
937 | /* Check the device capacity reported by report zones */ |
938 | ret = sd_zbc_check_capacity(sdkp, buf, zblocks: &zone_blocks); |
939 | if (ret != 0) |
940 | goto err; |
941 | |
942 | /* The drive satisfies the kernel restrictions: set it up */ |
943 | blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); |
944 | blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); |
945 | if (sdkp->zones_max_open == U32_MAX) |
946 | disk_set_max_open_zones(disk, max_open_zones: 0); |
947 | else |
948 | disk_set_max_open_zones(disk, max_open_zones: sdkp->zones_max_open); |
949 | disk_set_max_active_zones(disk, max_active_zones: 0); |
950 | nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); |
951 | |
952 | sdkp->early_zone_info.nr_zones = nr_zones; |
953 | sdkp->early_zone_info.zone_blocks = zone_blocks; |
954 | |
955 | return 0; |
956 | |
957 | err: |
958 | sdkp->capacity = 0; |
959 | |
960 | return ret; |
961 | } |
962 | |