1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2020 Western Digital Corporation or its affiliates. |
4 | */ |
5 | |
6 | #include <linux/blkdev.h> |
7 | #include <linux/vmalloc.h> |
8 | #include "nvme.h" |
9 | |
10 | static int nvme_set_max_append(struct nvme_ctrl *ctrl) |
11 | { |
12 | struct nvme_command c = { }; |
13 | struct nvme_id_ctrl_zns *id; |
14 | int status; |
15 | |
16 | id = kzalloc(size: sizeof(*id), GFP_KERNEL); |
17 | if (!id) |
18 | return -ENOMEM; |
19 | |
20 | c.identify.opcode = nvme_admin_identify; |
21 | c.identify.cns = NVME_ID_CNS_CS_CTRL; |
22 | c.identify.csi = NVME_CSI_ZNS; |
23 | |
24 | status = nvme_submit_sync_cmd(q: ctrl->admin_q, cmd: &c, buf: id, bufflen: sizeof(*id)); |
25 | if (status) { |
26 | kfree(objp: id); |
27 | return status; |
28 | } |
29 | |
30 | if (id->zasl) |
31 | ctrl->max_zone_append = 1 << (id->zasl + 3); |
32 | else |
33 | ctrl->max_zone_append = ctrl->max_hw_sectors; |
34 | kfree(objp: id); |
35 | return 0; |
36 | } |
37 | |
38 | int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf, |
39 | struct nvme_zone_info *zi) |
40 | { |
41 | struct nvme_effects_log *log = ns->head->effects; |
42 | struct nvme_command c = { }; |
43 | struct nvme_id_ns_zns *id; |
44 | int status; |
45 | |
46 | /* Driver requires zone append support */ |
47 | if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) & |
48 | NVME_CMD_EFFECTS_CSUPP)) { |
49 | if (test_and_clear_bit(NVME_NS_FORCE_RO, addr: &ns->flags)) |
50 | dev_warn(ns->ctrl->device, |
51 | "Zone Append supported for zoned namespace:%d. Remove read-only mode\n" , |
52 | ns->head->ns_id); |
53 | } else { |
54 | set_bit(NVME_NS_FORCE_RO, addr: &ns->flags); |
55 | dev_warn(ns->ctrl->device, |
56 | "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n" , |
57 | ns->head->ns_id); |
58 | } |
59 | |
60 | /* Lazily query controller append limit for the first zoned namespace */ |
61 | if (!ns->ctrl->max_zone_append) { |
62 | status = nvme_set_max_append(ctrl: ns->ctrl); |
63 | if (status) |
64 | return status; |
65 | } |
66 | |
67 | id = kzalloc(size: sizeof(*id), GFP_KERNEL); |
68 | if (!id) |
69 | return -ENOMEM; |
70 | |
71 | c.identify.opcode = nvme_admin_identify; |
72 | c.identify.nsid = cpu_to_le32(ns->head->ns_id); |
73 | c.identify.cns = NVME_ID_CNS_CS_NS; |
74 | c.identify.csi = NVME_CSI_ZNS; |
75 | |
76 | status = nvme_submit_sync_cmd(q: ns->ctrl->admin_q, cmd: &c, buf: id, bufflen: sizeof(*id)); |
77 | if (status) |
78 | goto free_data; |
79 | |
80 | /* |
81 | * We currently do not handle devices requiring any of the zoned |
82 | * operation characteristics. |
83 | */ |
84 | if (id->zoc) { |
85 | dev_warn(ns->ctrl->device, |
86 | "zone operations:%x not supported for namespace:%u\n" , |
87 | le16_to_cpu(id->zoc), ns->head->ns_id); |
88 | status = -ENODEV; |
89 | goto free_data; |
90 | } |
91 | |
92 | zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze); |
93 | if (!is_power_of_2(n: zi->zone_size)) { |
94 | dev_warn(ns->ctrl->device, |
95 | "invalid zone size: %llu for namespace: %u\n" , |
96 | zi->zone_size, ns->head->ns_id); |
97 | status = -ENODEV; |
98 | goto free_data; |
99 | } |
100 | zi->max_open_zones = le32_to_cpu(id->mor) + 1; |
101 | zi->max_active_zones = le32_to_cpu(id->mar) + 1; |
102 | |
103 | free_data: |
104 | kfree(objp: id); |
105 | return status; |
106 | } |
107 | |
108 | void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, |
109 | struct nvme_zone_info *zi) |
110 | { |
111 | lim->zoned = 1; |
112 | lim->max_open_zones = zi->max_open_zones; |
113 | lim->max_active_zones = zi->max_active_zones; |
114 | lim->max_zone_append_sectors = ns->ctrl->max_zone_append; |
115 | lim->chunk_sectors = ns->head->zsze = |
116 | nvme_lba_to_sect(head: ns->head, lba: zi->zone_size); |
117 | blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q: ns->queue); |
118 | } |
119 | |
120 | static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, |
121 | unsigned int nr_zones, size_t *buflen) |
122 | { |
123 | struct request_queue *q = ns->disk->queue; |
124 | size_t bufsize; |
125 | void *buf; |
126 | |
127 | const size_t min_bufsize = sizeof(struct nvme_zone_report) + |
128 | sizeof(struct nvme_zone_descriptor); |
129 | |
130 | nr_zones = min_t(unsigned int, nr_zones, |
131 | get_capacity(ns->disk) >> ilog2(ns->head->zsze)); |
132 | |
133 | bufsize = sizeof(struct nvme_zone_report) + |
134 | nr_zones * sizeof(struct nvme_zone_descriptor); |
135 | bufsize = min_t(size_t, bufsize, |
136 | queue_max_hw_sectors(q) << SECTOR_SHIFT); |
137 | bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); |
138 | |
139 | while (bufsize >= min_bufsize) { |
140 | buf = __vmalloc(size: bufsize, GFP_KERNEL | __GFP_NORETRY); |
141 | if (buf) { |
142 | *buflen = bufsize; |
143 | return buf; |
144 | } |
145 | bufsize >>= 1; |
146 | } |
147 | return NULL; |
148 | } |
149 | |
150 | static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, |
151 | struct nvme_ns_head *head, |
152 | struct nvme_zone_descriptor *entry, |
153 | unsigned int idx, report_zones_cb cb, |
154 | void *data) |
155 | { |
156 | struct blk_zone zone = { }; |
157 | |
158 | if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { |
159 | dev_err(ctrl->device, "invalid zone type %#x\n" , |
160 | entry->zt); |
161 | return -EINVAL; |
162 | } |
163 | |
164 | zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; |
165 | zone.cond = entry->zs >> 4; |
166 | zone.len = head->zsze; |
167 | zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap)); |
168 | zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba)); |
169 | if (zone.cond == BLK_ZONE_COND_FULL) |
170 | zone.wp = zone.start + zone.len; |
171 | else |
172 | zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp)); |
173 | |
174 | return cb(&zone, idx, data); |
175 | } |
176 | |
177 | int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, |
178 | unsigned int nr_zones, report_zones_cb cb, void *data) |
179 | { |
180 | struct nvme_zone_report *report; |
181 | struct nvme_command c = { }; |
182 | int ret, zone_idx = 0; |
183 | unsigned int nz, i; |
184 | size_t buflen; |
185 | |
186 | if (ns->head->ids.csi != NVME_CSI_ZNS) |
187 | return -EINVAL; |
188 | |
189 | report = nvme_zns_alloc_report_buffer(ns, nr_zones, buflen: &buflen); |
190 | if (!report) |
191 | return -ENOMEM; |
192 | |
193 | c.zmr.opcode = nvme_cmd_zone_mgmt_recv; |
194 | c.zmr.nsid = cpu_to_le32(ns->head->ns_id); |
195 | c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen)); |
196 | c.zmr.zra = NVME_ZRA_ZONE_REPORT; |
197 | c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; |
198 | c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; |
199 | |
200 | sector &= ~(ns->head->zsze - 1); |
201 | while (zone_idx < nr_zones && sector < get_capacity(disk: ns->disk)) { |
202 | memset(report, 0, buflen); |
203 | |
204 | c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector)); |
205 | ret = nvme_submit_sync_cmd(q: ns->queue, cmd: &c, buf: report, bufflen: buflen); |
206 | if (ret) { |
207 | if (ret > 0) |
208 | ret = -EIO; |
209 | goto out_free; |
210 | } |
211 | |
212 | nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); |
213 | if (!nz) |
214 | break; |
215 | |
216 | for (i = 0; i < nz && zone_idx < nr_zones; i++) { |
217 | ret = nvme_zone_parse_entry(ctrl: ns->ctrl, head: ns->head, |
218 | entry: &report->entries[i], |
219 | idx: zone_idx, cb, data); |
220 | if (ret) |
221 | goto out_free; |
222 | zone_idx++; |
223 | } |
224 | |
225 | sector += ns->head->zsze * nz; |
226 | } |
227 | |
228 | if (zone_idx > 0) |
229 | ret = zone_idx; |
230 | else |
231 | ret = -EINVAL; |
232 | out_free: |
233 | kvfree(addr: report); |
234 | return ret; |
235 | } |
236 | |
237 | blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, |
238 | struct nvme_command *c, enum nvme_zone_mgmt_action action) |
239 | { |
240 | memset(c, 0, sizeof(*c)); |
241 | |
242 | c->zms.opcode = nvme_cmd_zone_mgmt_send; |
243 | c->zms.nsid = cpu_to_le32(ns->head->ns_id); |
244 | c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); |
245 | c->zms.zsa = action; |
246 | |
247 | if (req_op(req) == REQ_OP_ZONE_RESET_ALL) |
248 | c->zms.select_all = 1; |
249 | |
250 | return BLK_STS_OK; |
251 | } |
252 | |