1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ |
3 | #include <linux/units.h> |
4 | #include <linux/io-64-nonatomic-lo-hi.h> |
5 | #include <linux/device.h> |
6 | #include <linux/delay.h> |
7 | #include <linux/pci.h> |
8 | #include <linux/pci-doe.h> |
9 | #include <linux/aer.h> |
10 | #include <cxlpci.h> |
11 | #include <cxlmem.h> |
12 | #include <cxl.h> |
13 | #include "core.h" |
14 | #include "trace.h" |
15 | |
16 | /** |
17 | * DOC: cxl core pci |
18 | * |
19 | * Compute Express Link protocols are layered on top of PCIe. CXL core provides |
20 | * a set of helpers for CXL interactions which occur via PCIe. |
21 | */ |
22 | |
23 | static unsigned short media_ready_timeout = 60; |
24 | module_param(media_ready_timeout, ushort, 0644); |
25 | MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready" ); |
26 | |
27 | struct cxl_walk_context { |
28 | struct pci_bus *bus; |
29 | struct cxl_port *port; |
30 | int type; |
31 | int error; |
32 | int count; |
33 | }; |
34 | |
35 | static int match_add_dports(struct pci_dev *pdev, void *data) |
36 | { |
37 | struct cxl_walk_context *ctx = data; |
38 | struct cxl_port *port = ctx->port; |
39 | int type = pci_pcie_type(dev: pdev); |
40 | struct cxl_register_map map; |
41 | struct cxl_dport *dport; |
42 | u32 lnkcap, port_num; |
43 | int rc; |
44 | |
45 | if (pdev->bus != ctx->bus) |
46 | return 0; |
47 | if (!pci_is_pcie(dev: pdev)) |
48 | return 0; |
49 | if (type != ctx->type) |
50 | return 0; |
51 | if (pci_read_config_dword(dev: pdev, where: pci_pcie_cap(dev: pdev) + PCI_EXP_LNKCAP, |
52 | val: &lnkcap)) |
53 | return 0; |
54 | |
55 | rc = cxl_find_regblock(pdev, type: CXL_REGLOC_RBI_COMPONENT, map: &map); |
56 | if (rc) |
57 | dev_dbg(&port->dev, "failed to find component registers\n" ); |
58 | |
59 | port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); |
60 | dport = devm_cxl_add_dport(port, dport: &pdev->dev, port_id: port_num, component_reg_phys: map.resource); |
61 | if (IS_ERR(ptr: dport)) { |
62 | ctx->error = PTR_ERR(ptr: dport); |
63 | return PTR_ERR(ptr: dport); |
64 | } |
65 | ctx->count++; |
66 | |
67 | return 0; |
68 | } |
69 | |
70 | /** |
71 | * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port |
72 | * @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated |
73 | * |
74 | * Returns a positive number of dports enumerated or a negative error |
75 | * code. |
76 | */ |
77 | int devm_cxl_port_enumerate_dports(struct cxl_port *port) |
78 | { |
79 | struct pci_bus *bus = cxl_port_to_pci_bus(port); |
80 | struct cxl_walk_context ctx; |
81 | int type; |
82 | |
83 | if (!bus) |
84 | return -ENXIO; |
85 | |
86 | if (pci_is_root_bus(pbus: bus)) |
87 | type = PCI_EXP_TYPE_ROOT_PORT; |
88 | else |
89 | type = PCI_EXP_TYPE_DOWNSTREAM; |
90 | |
91 | ctx = (struct cxl_walk_context) { |
92 | .port = port, |
93 | .bus = bus, |
94 | .type = type, |
95 | }; |
96 | pci_walk_bus(top: bus, cb: match_add_dports, userdata: &ctx); |
97 | |
98 | if (ctx.count == 0) |
99 | return -ENODEV; |
100 | if (ctx.error) |
101 | return ctx.error; |
102 | return ctx.count; |
103 | } |
104 | EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); |
105 | |
106 | static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) |
107 | { |
108 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
109 | int d = cxlds->cxl_dvsec; |
110 | bool valid = false; |
111 | int rc, i; |
112 | u32 temp; |
113 | |
114 | if (id > CXL_DVSEC_RANGE_MAX) |
115 | return -EINVAL; |
116 | |
117 | /* Check MEM INFO VALID bit first, give up after 1s */ |
118 | i = 1; |
119 | do { |
120 | rc = pci_read_config_dword(dev: pdev, |
121 | where: d + CXL_DVSEC_RANGE_SIZE_LOW(id), |
122 | val: &temp); |
123 | if (rc) |
124 | return rc; |
125 | |
126 | valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); |
127 | if (valid) |
128 | break; |
129 | msleep(msecs: 1000); |
130 | } while (i--); |
131 | |
132 | if (!valid) { |
133 | dev_err(&pdev->dev, |
134 | "Timeout awaiting memory range %d valid after 1s.\n" , |
135 | id); |
136 | return -ETIMEDOUT; |
137 | } |
138 | |
139 | return 0; |
140 | } |
141 | |
142 | static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) |
143 | { |
144 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
145 | int d = cxlds->cxl_dvsec; |
146 | bool active = false; |
147 | int rc, i; |
148 | u32 temp; |
149 | |
150 | if (id > CXL_DVSEC_RANGE_MAX) |
151 | return -EINVAL; |
152 | |
153 | /* Check MEM ACTIVE bit, up to 60s timeout by default */ |
154 | for (i = media_ready_timeout; i; i--) { |
155 | rc = pci_read_config_dword( |
156 | dev: pdev, where: d + CXL_DVSEC_RANGE_SIZE_LOW(id), val: &temp); |
157 | if (rc) |
158 | return rc; |
159 | |
160 | active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); |
161 | if (active) |
162 | break; |
163 | msleep(msecs: 1000); |
164 | } |
165 | |
166 | if (!active) { |
167 | dev_err(&pdev->dev, |
168 | "timeout awaiting memory active after %d seconds\n" , |
169 | media_ready_timeout); |
170 | return -ETIMEDOUT; |
171 | } |
172 | |
173 | return 0; |
174 | } |
175 | |
176 | /* |
177 | * Wait up to @media_ready_timeout for the device to report memory |
178 | * active. |
179 | */ |
180 | int cxl_await_media_ready(struct cxl_dev_state *cxlds) |
181 | { |
182 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
183 | int d = cxlds->cxl_dvsec; |
184 | int rc, i, hdm_count; |
185 | u64 md_status; |
186 | u16 cap; |
187 | |
188 | rc = pci_read_config_word(dev: pdev, |
189 | where: d + CXL_DVSEC_CAP_OFFSET, val: &cap); |
190 | if (rc) |
191 | return rc; |
192 | |
193 | hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); |
194 | for (i = 0; i < hdm_count; i++) { |
195 | rc = cxl_dvsec_mem_range_valid(cxlds, id: i); |
196 | if (rc) |
197 | return rc; |
198 | } |
199 | |
200 | for (i = 0; i < hdm_count; i++) { |
201 | rc = cxl_dvsec_mem_range_active(cxlds, id: i); |
202 | if (rc) |
203 | return rc; |
204 | } |
205 | |
206 | md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
207 | if (!CXLMDEV_READY(md_status)) |
208 | return -EIO; |
209 | |
210 | return 0; |
211 | } |
212 | EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); |
213 | |
214 | static int wait_for_valid(struct pci_dev *pdev, int d) |
215 | { |
216 | u32 val; |
217 | int rc; |
218 | |
219 | /* |
220 | * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high |
221 | * and Size Low registers are valid. Must be set within 1 second of |
222 | * deassertion of reset to CXL device. Likely it is already set by the |
223 | * time this runs, but otherwise give a 1.5 second timeout in case of |
224 | * clock skew. |
225 | */ |
226 | rc = pci_read_config_dword(dev: pdev, where: d + CXL_DVSEC_RANGE_SIZE_LOW(0), val: &val); |
227 | if (rc) |
228 | return rc; |
229 | |
230 | if (val & CXL_DVSEC_MEM_INFO_VALID) |
231 | return 0; |
232 | |
233 | msleep(msecs: 1500); |
234 | |
235 | rc = pci_read_config_dword(dev: pdev, where: d + CXL_DVSEC_RANGE_SIZE_LOW(0), val: &val); |
236 | if (rc) |
237 | return rc; |
238 | |
239 | if (val & CXL_DVSEC_MEM_INFO_VALID) |
240 | return 0; |
241 | |
242 | return -ETIMEDOUT; |
243 | } |
244 | |
245 | static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) |
246 | { |
247 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
248 | int d = cxlds->cxl_dvsec; |
249 | u16 ctrl; |
250 | int rc; |
251 | |
252 | rc = pci_read_config_word(dev: pdev, where: d + CXL_DVSEC_CTRL_OFFSET, val: &ctrl); |
253 | if (rc < 0) |
254 | return rc; |
255 | |
256 | if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) |
257 | return 1; |
258 | ctrl &= ~CXL_DVSEC_MEM_ENABLE; |
259 | ctrl |= val; |
260 | |
261 | rc = pci_write_config_word(dev: pdev, where: d + CXL_DVSEC_CTRL_OFFSET, val: ctrl); |
262 | if (rc < 0) |
263 | return rc; |
264 | |
265 | return 0; |
266 | } |
267 | |
268 | static void clear_mem_enable(void *cxlds) |
269 | { |
270 | cxl_set_mem_enable(cxlds, val: 0); |
271 | } |
272 | |
273 | static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) |
274 | { |
275 | int rc; |
276 | |
277 | rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); |
278 | if (rc < 0) |
279 | return rc; |
280 | if (rc > 0) |
281 | return 0; |
282 | return devm_add_action_or_reset(host, clear_mem_enable, cxlds); |
283 | } |
284 | |
285 | /* require dvsec ranges to be covered by a locked platform window */ |
286 | static int dvsec_range_allowed(struct device *dev, void *arg) |
287 | { |
288 | struct range *dev_range = arg; |
289 | struct cxl_decoder *cxld; |
290 | |
291 | if (!is_root_decoder(dev)) |
292 | return 0; |
293 | |
294 | cxld = to_cxl_decoder(dev); |
295 | |
296 | if (!(cxld->flags & CXL_DECODER_F_RAM)) |
297 | return 0; |
298 | |
299 | return range_contains(r1: &cxld->hpa_range, r2: dev_range); |
300 | } |
301 | |
302 | static void disable_hdm(void *_cxlhdm) |
303 | { |
304 | u32 global_ctrl; |
305 | struct cxl_hdm *cxlhdm = _cxlhdm; |
306 | void __iomem *hdm = cxlhdm->regs.hdm_decoder; |
307 | |
308 | global_ctrl = readl(addr: hdm + CXL_HDM_DECODER_CTRL_OFFSET); |
309 | writel(val: global_ctrl & ~CXL_HDM_DECODER_ENABLE, |
310 | addr: hdm + CXL_HDM_DECODER_CTRL_OFFSET); |
311 | } |
312 | |
313 | static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) |
314 | { |
315 | void __iomem *hdm = cxlhdm->regs.hdm_decoder; |
316 | u32 global_ctrl; |
317 | |
318 | global_ctrl = readl(addr: hdm + CXL_HDM_DECODER_CTRL_OFFSET); |
319 | writel(val: global_ctrl | CXL_HDM_DECODER_ENABLE, |
320 | addr: hdm + CXL_HDM_DECODER_CTRL_OFFSET); |
321 | |
322 | return devm_add_action_or_reset(host, disable_hdm, cxlhdm); |
323 | } |
324 | |
325 | int cxl_dvsec_rr_decode(struct device *dev, int d, |
326 | struct cxl_endpoint_dvsec_info *info) |
327 | { |
328 | struct pci_dev *pdev = to_pci_dev(dev); |
329 | int hdm_count, rc, i, ranges = 0; |
330 | u16 cap, ctrl; |
331 | |
332 | if (!d) { |
333 | dev_dbg(dev, "No DVSEC Capability\n" ); |
334 | return -ENXIO; |
335 | } |
336 | |
337 | rc = pci_read_config_word(dev: pdev, where: d + CXL_DVSEC_CAP_OFFSET, val: &cap); |
338 | if (rc) |
339 | return rc; |
340 | |
341 | rc = pci_read_config_word(dev: pdev, where: d + CXL_DVSEC_CTRL_OFFSET, val: &ctrl); |
342 | if (rc) |
343 | return rc; |
344 | |
345 | if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { |
346 | dev_dbg(dev, "Not MEM Capable\n" ); |
347 | return -ENXIO; |
348 | } |
349 | |
350 | /* |
351 | * It is not allowed by spec for MEM.capable to be set and have 0 legacy |
352 | * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this |
353 | * driver is for a spec defined class code which must be CXL.mem |
354 | * capable, there is no point in continuing to enable CXL.mem. |
355 | */ |
356 | hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); |
357 | if (!hdm_count || hdm_count > 2) |
358 | return -EINVAL; |
359 | |
360 | rc = wait_for_valid(pdev, d); |
361 | if (rc) { |
362 | dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n" , rc); |
363 | return rc; |
364 | } |
365 | |
366 | /* |
367 | * The current DVSEC values are moot if the memory capability is |
368 | * disabled, and they will remain moot after the HDM Decoder |
369 | * capability is enabled. |
370 | */ |
371 | info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); |
372 | if (!info->mem_enabled) |
373 | return 0; |
374 | |
375 | for (i = 0; i < hdm_count; i++) { |
376 | u64 base, size; |
377 | u32 temp; |
378 | |
379 | rc = pci_read_config_dword( |
380 | dev: pdev, where: d + CXL_DVSEC_RANGE_SIZE_HIGH(i), val: &temp); |
381 | if (rc) |
382 | return rc; |
383 | |
384 | size = (u64)temp << 32; |
385 | |
386 | rc = pci_read_config_dword( |
387 | dev: pdev, where: d + CXL_DVSEC_RANGE_SIZE_LOW(i), val: &temp); |
388 | if (rc) |
389 | return rc; |
390 | |
391 | size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; |
392 | if (!size) { |
393 | info->dvsec_range[i] = (struct range) { |
394 | .start = 0, |
395 | .end = CXL_RESOURCE_NONE, |
396 | }; |
397 | continue; |
398 | } |
399 | |
400 | rc = pci_read_config_dword( |
401 | dev: pdev, where: d + CXL_DVSEC_RANGE_BASE_HIGH(i), val: &temp); |
402 | if (rc) |
403 | return rc; |
404 | |
405 | base = (u64)temp << 32; |
406 | |
407 | rc = pci_read_config_dword( |
408 | dev: pdev, where: d + CXL_DVSEC_RANGE_BASE_LOW(i), val: &temp); |
409 | if (rc) |
410 | return rc; |
411 | |
412 | base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; |
413 | |
414 | info->dvsec_range[i] = (struct range) { |
415 | .start = base, |
416 | .end = base + size - 1 |
417 | }; |
418 | |
419 | ranges++; |
420 | } |
421 | |
422 | info->ranges = ranges; |
423 | |
424 | return 0; |
425 | } |
426 | EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, CXL); |
427 | |
428 | /** |
429 | * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint |
430 | * @cxlds: Device state |
431 | * @cxlhdm: Mapped HDM decoder Capability |
432 | * @info: Cached DVSEC range registers info |
433 | * |
434 | * Try to enable the endpoint's HDM Decoder Capability |
435 | */ |
436 | int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, |
437 | struct cxl_endpoint_dvsec_info *info) |
438 | { |
439 | void __iomem *hdm = cxlhdm->regs.hdm_decoder; |
440 | struct cxl_port *port = cxlhdm->port; |
441 | struct device *dev = cxlds->dev; |
442 | struct cxl_port *root; |
443 | int i, rc, allowed; |
444 | u32 global_ctrl = 0; |
445 | |
446 | if (hdm) |
447 | global_ctrl = readl(addr: hdm + CXL_HDM_DECODER_CTRL_OFFSET); |
448 | |
449 | /* |
450 | * If the HDM Decoder Capability is already enabled then assume |
451 | * that some other agent like platform firmware set it up. |
452 | */ |
453 | if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled)) |
454 | return devm_cxl_enable_mem(host: &port->dev, cxlds); |
455 | else if (!hdm) |
456 | return -ENODEV; |
457 | |
458 | root = to_cxl_port(dev: port->dev.parent); |
459 | while (!is_cxl_root(port: root) && is_cxl_port(dev: root->dev.parent)) |
460 | root = to_cxl_port(dev: root->dev.parent); |
461 | if (!is_cxl_root(port: root)) { |
462 | dev_err(dev, "Failed to acquire root port for HDM enable\n" ); |
463 | return -ENODEV; |
464 | } |
465 | |
466 | for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { |
467 | struct device *cxld_dev; |
468 | |
469 | cxld_dev = device_find_child(dev: &root->dev, data: &info->dvsec_range[i], |
470 | match: dvsec_range_allowed); |
471 | if (!cxld_dev) { |
472 | dev_dbg(dev, "DVSEC Range%d denied by platform\n" , i); |
473 | continue; |
474 | } |
475 | dev_dbg(dev, "DVSEC Range%d allowed by platform\n" , i); |
476 | put_device(dev: cxld_dev); |
477 | allowed++; |
478 | } |
479 | |
480 | if (!allowed && info->mem_enabled) { |
481 | dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n" ); |
482 | return -ENXIO; |
483 | } |
484 | |
485 | /* |
486 | * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base |
487 | * [High,Low] when HDM operation is enabled the range register values |
488 | * are ignored by the device, but the spec also recommends matching the |
489 | * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges |
490 | * are expected even though Linux does not require or maintain that |
491 | * match. If at least one DVSEC range is enabled and allowed, skip HDM |
492 | * Decoder Capability Enable. |
493 | */ |
494 | if (info->mem_enabled) |
495 | return 0; |
496 | |
497 | rc = devm_cxl_enable_hdm(host: &port->dev, cxlhdm); |
498 | if (rc) |
499 | return rc; |
500 | |
501 | return devm_cxl_enable_mem(host: &port->dev, cxlds); |
502 | } |
503 | EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); |
504 | |
505 | #define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff |
506 | #define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0 |
507 | #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00 |
508 | #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0 |
509 | #define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000 |
510 | #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff |
511 | #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 |
512 | |
513 | #define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \ |
514 | (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ |
515 | CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ |
516 | FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ |
517 | CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \ |
518 | FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) |
519 | |
520 | static int cxl_cdat_get_length(struct device *dev, |
521 | struct pci_doe_mb *doe_mb, |
522 | size_t *length) |
523 | { |
524 | __le32 request = CDAT_DOE_REQ(0); |
525 | __le32 response[2]; |
526 | int rc; |
527 | |
528 | rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL, |
529 | CXL_DOE_PROTOCOL_TABLE_ACCESS, |
530 | request: &request, request_sz: sizeof(request), |
531 | response: &response, response_sz: sizeof(response)); |
532 | if (rc < 0) { |
533 | dev_err(dev, "DOE failed: %d" , rc); |
534 | return rc; |
535 | } |
536 | if (rc < sizeof(response)) |
537 | return -EIO; |
538 | |
539 | *length = le32_to_cpu(response[1]); |
540 | dev_dbg(dev, "CDAT length %zu\n" , *length); |
541 | |
542 | return 0; |
543 | } |
544 | |
545 | static int cxl_cdat_read_table(struct device *dev, |
546 | struct pci_doe_mb *doe_mb, |
547 | struct cdat_doe_rsp *rsp, size_t *length) |
548 | { |
549 | size_t received, remaining = *length; |
550 | unsigned int entry_handle = 0; |
551 | union cdat_data *data; |
552 | __le32 saved_dw = 0; |
553 | |
554 | do { |
555 | __le32 request = CDAT_DOE_REQ(entry_handle); |
556 | int rc; |
557 | |
558 | rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL, |
559 | CXL_DOE_PROTOCOL_TABLE_ACCESS, |
560 | request: &request, request_sz: sizeof(request), |
561 | response: rsp, response_sz: sizeof(*rsp) + remaining); |
562 | if (rc < 0) { |
563 | dev_err(dev, "DOE failed: %d" , rc); |
564 | return rc; |
565 | } |
566 | |
567 | if (rc < sizeof(*rsp)) |
568 | return -EIO; |
569 | |
570 | data = (union cdat_data *)rsp->data; |
571 | received = rc - sizeof(*rsp); |
572 | |
573 | if (entry_handle == 0) { |
574 | if (received != sizeof(data->header)) |
575 | return -EIO; |
576 | } else { |
577 | if (received < sizeof(data->entry) || |
578 | received != le16_to_cpu(data->entry.length)) |
579 | return -EIO; |
580 | } |
581 | |
582 | /* Get the CXL table access header entry handle */ |
583 | entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, |
584 | le32_to_cpu(rsp->doe_header)); |
585 | |
586 | /* |
587 | * Table Access Response Header overwrote the last DW of |
588 | * previous entry, so restore that DW |
589 | */ |
590 | rsp->doe_header = saved_dw; |
591 | remaining -= received; |
592 | rsp = (void *)rsp + received; |
593 | saved_dw = rsp->doe_header; |
594 | } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); |
595 | |
596 | /* Length in CDAT header may exceed concatenation of CDAT entries */ |
597 | *length -= remaining; |
598 | |
599 | return 0; |
600 | } |
601 | |
602 | static unsigned char cdat_checksum(void *buf, size_t size) |
603 | { |
604 | unsigned char sum, *data = buf; |
605 | size_t i; |
606 | |
607 | for (sum = 0, i = 0; i < size; i++) |
608 | sum += data[i]; |
609 | return sum; |
610 | } |
611 | |
612 | /** |
613 | * read_cdat_data - Read the CDAT data on this port |
614 | * @port: Port to read data from |
615 | * |
616 | * This call will sleep waiting for responses from the DOE mailbox. |
617 | */ |
618 | void read_cdat_data(struct cxl_port *port) |
619 | { |
620 | struct device *uport = port->uport_dev; |
621 | struct device *dev = &port->dev; |
622 | struct pci_doe_mb *doe_mb; |
623 | struct pci_dev *pdev = NULL; |
624 | struct cxl_memdev *cxlmd; |
625 | struct cdat_doe_rsp *buf; |
626 | size_t table_length, length; |
627 | int rc; |
628 | |
629 | if (is_cxl_memdev(dev: uport)) { |
630 | struct device *host; |
631 | |
632 | cxlmd = to_cxl_memdev(dev: uport); |
633 | host = cxlmd->dev.parent; |
634 | if (dev_is_pci(host)) |
635 | pdev = to_pci_dev(host); |
636 | } else if (dev_is_pci(uport)) { |
637 | pdev = to_pci_dev(uport); |
638 | } |
639 | |
640 | if (!pdev) |
641 | return; |
642 | |
643 | doe_mb = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL, |
644 | CXL_DOE_PROTOCOL_TABLE_ACCESS); |
645 | if (!doe_mb) { |
646 | dev_dbg(dev, "No CDAT mailbox\n" ); |
647 | return; |
648 | } |
649 | |
650 | port->cdat_available = true; |
651 | |
652 | if (cxl_cdat_get_length(dev, doe_mb, length: &length)) { |
653 | dev_dbg(dev, "No CDAT length\n" ); |
654 | return; |
655 | } |
656 | |
657 | /* |
658 | * The begin of the CDAT buffer needs space for additional 4 |
659 | * bytes for the DOE header. Table data starts afterwards. |
660 | */ |
661 | buf = devm_kzalloc(dev, size: sizeof(*buf) + length, GFP_KERNEL); |
662 | if (!buf) |
663 | goto err; |
664 | |
665 | table_length = length; |
666 | |
667 | rc = cxl_cdat_read_table(dev, doe_mb, rsp: buf, length: &length); |
668 | if (rc) |
669 | goto err; |
670 | |
671 | if (table_length != length) |
672 | dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n" , |
673 | table_length, length); |
674 | |
675 | if (cdat_checksum(buf: buf->data, size: length)) |
676 | goto err; |
677 | |
678 | port->cdat.table = buf->data; |
679 | port->cdat.length = length; |
680 | |
681 | return; |
682 | err: |
683 | /* Don't leave table data allocated on error */ |
684 | devm_kfree(dev, p: buf); |
685 | dev_err(dev, "Failed to read/validate CDAT.\n" ); |
686 | } |
687 | EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); |
688 | |
689 | static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, |
690 | void __iomem *ras_base) |
691 | { |
692 | void __iomem *addr; |
693 | u32 status; |
694 | |
695 | if (!ras_base) |
696 | return; |
697 | |
698 | addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; |
699 | status = readl(addr); |
700 | if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { |
701 | writel(val: status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); |
702 | trace_cxl_aer_correctable_error(cxlmd: cxlds->cxlmd, status); |
703 | } |
704 | } |
705 | |
706 | static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) |
707 | { |
708 | return __cxl_handle_cor_ras(cxlds, ras_base: cxlds->regs.ras); |
709 | } |
710 | |
711 | /* CXL spec rev3.0 8.2.4.16.1 */ |
712 | static void (void __iomem *ras_base, u32 *log) |
713 | { |
714 | void __iomem *addr; |
715 | u32 *log_addr; |
716 | int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); |
717 | |
718 | addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; |
719 | log_addr = log; |
720 | |
721 | for (i = 0; i < log_u32_size; i++) { |
722 | *log_addr = readl(addr); |
723 | log_addr++; |
724 | addr += sizeof(u32); |
725 | } |
726 | } |
727 | |
728 | /* |
729 | * Log the state of the RAS status registers and prepare them to log the |
730 | * next error status. Return 1 if reset needed. |
731 | */ |
732 | static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, |
733 | void __iomem *ras_base) |
734 | { |
735 | u32 hl[CXL_HEADERLOG_SIZE_U32]; |
736 | void __iomem *addr; |
737 | u32 status; |
738 | u32 fe; |
739 | |
740 | if (!ras_base) |
741 | return false; |
742 | |
743 | addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; |
744 | status = readl(addr); |
745 | if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) |
746 | return false; |
747 | |
748 | /* If multiple errors, log header points to first error from ctrl reg */ |
749 | if (hweight32(status) > 1) { |
750 | void __iomem *rcc_addr = |
751 | ras_base + CXL_RAS_CAP_CONTROL_OFFSET; |
752 | |
753 | fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, |
754 | readl(rcc_addr))); |
755 | } else { |
756 | fe = status; |
757 | } |
758 | |
759 | header_log_copy(ras_base, log: hl); |
760 | trace_cxl_aer_uncorrectable_error(cxlmd: cxlds->cxlmd, status, fe, hl); |
761 | writel(val: status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); |
762 | |
763 | return true; |
764 | } |
765 | |
766 | static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) |
767 | { |
768 | return __cxl_handle_ras(cxlds, ras_base: cxlds->regs.ras); |
769 | } |
770 | |
771 | #ifdef CONFIG_PCIEAER_CXL |
772 | |
773 | static void cxl_dport_map_rch_aer(struct cxl_dport *dport) |
774 | { |
775 | struct cxl_rcrb_info *ri = &dport->rcrb; |
776 | void __iomem *dport_aer = NULL; |
777 | resource_size_t aer_phys; |
778 | struct device *host; |
779 | |
780 | if (dport->rch && ri->aer_cap) { |
781 | host = dport->reg_map.host; |
782 | aer_phys = ri->aer_cap + ri->base; |
783 | dport_aer = devm_cxl_iomap_block(dev: host, addr: aer_phys, |
784 | length: sizeof(struct aer_capability_regs)); |
785 | } |
786 | |
787 | dport->regs.dport_aer = dport_aer; |
788 | } |
789 | |
790 | static void cxl_dport_map_regs(struct cxl_dport *dport) |
791 | { |
792 | struct cxl_register_map *map = &dport->reg_map; |
793 | struct device *dev = dport->dport_dev; |
794 | |
795 | if (!map->component_map.ras.valid) |
796 | dev_dbg(dev, "RAS registers not found\n" ); |
797 | else if (cxl_map_component_regs(map, regs: &dport->regs.component, |
798 | BIT(CXL_CM_CAP_CAP_ID_RAS))) |
799 | dev_dbg(dev, "Failed to map RAS capability.\n" ); |
800 | |
801 | if (dport->rch) |
802 | cxl_dport_map_rch_aer(dport); |
803 | } |
804 | |
805 | static void cxl_disable_rch_root_ints(struct cxl_dport *dport) |
806 | { |
807 | void __iomem *aer_base = dport->regs.dport_aer; |
808 | struct pci_host_bridge *bridge; |
809 | u32 aer_cmd_mask, aer_cmd; |
810 | |
811 | if (!aer_base) |
812 | return; |
813 | |
814 | bridge = to_pci_host_bridge(dport->dport_dev); |
815 | |
816 | /* |
817 | * Disable RCH root port command interrupts. |
818 | * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors |
819 | * |
820 | * This sequence may not be necessary. CXL spec states disabling |
821 | * the root cmd register's interrupts is required. But, PCI spec |
822 | * shows these are disabled by default on reset. |
823 | */ |
824 | if (bridge->native_aer) { |
825 | aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | |
826 | PCI_ERR_ROOT_CMD_NONFATAL_EN | |
827 | PCI_ERR_ROOT_CMD_FATAL_EN); |
828 | aer_cmd = readl(addr: aer_base + PCI_ERR_ROOT_COMMAND); |
829 | aer_cmd &= ~aer_cmd_mask; |
830 | writel(val: aer_cmd, addr: aer_base + PCI_ERR_ROOT_COMMAND); |
831 | } |
832 | } |
833 | |
834 | void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) |
835 | { |
836 | struct device *dport_dev = dport->dport_dev; |
837 | struct pci_host_bridge *host_bridge; |
838 | |
839 | host_bridge = to_pci_host_bridge(dport_dev); |
840 | if (host_bridge->native_aer) |
841 | dport->rcrb.aer_cap = cxl_rcrb_to_aer(dev: dport_dev, rcrb: dport->rcrb.base); |
842 | |
843 | dport->reg_map.host = host; |
844 | cxl_dport_map_regs(dport); |
845 | |
846 | if (dport->rch) |
847 | cxl_disable_rch_root_ints(dport); |
848 | } |
849 | EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL); |
850 | |
851 | static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, |
852 | struct cxl_dport *dport) |
853 | { |
854 | return __cxl_handle_cor_ras(cxlds, ras_base: dport->regs.ras); |
855 | } |
856 | |
857 | static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, |
858 | struct cxl_dport *dport) |
859 | { |
860 | return __cxl_handle_ras(cxlds, ras_base: dport->regs.ras); |
861 | } |
862 | |
863 | /* |
864 | * Copy the AER capability registers using 32 bit read accesses. |
865 | * This is necessary because RCRB AER capability is MMIO mapped. Clear the |
866 | * status after copying. |
867 | * |
868 | * @aer_base: base address of AER capability block in RCRB |
869 | * @aer_regs: destination for copying AER capability |
870 | */ |
871 | static bool cxl_rch_get_aer_info(void __iomem *aer_base, |
872 | struct aer_capability_regs *aer_regs) |
873 | { |
874 | int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); |
875 | u32 *aer_regs_buf = (u32 *)aer_regs; |
876 | int n; |
877 | |
878 | if (!aer_base) |
879 | return false; |
880 | |
881 | /* Use readl() to guarantee 32-bit accesses */ |
882 | for (n = 0; n < read_cnt; n++) |
883 | aer_regs_buf[n] = readl(addr: aer_base + n * sizeof(u32)); |
884 | |
885 | writel(val: aer_regs->uncor_status, addr: aer_base + PCI_ERR_UNCOR_STATUS); |
886 | writel(val: aer_regs->cor_status, addr: aer_base + PCI_ERR_COR_STATUS); |
887 | |
888 | return true; |
889 | } |
890 | |
891 | /* Get AER severity. Return false if there is no error. */ |
892 | static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, |
893 | int *severity) |
894 | { |
895 | if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { |
896 | if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) |
897 | *severity = AER_FATAL; |
898 | else |
899 | *severity = AER_NONFATAL; |
900 | return true; |
901 | } |
902 | |
903 | if (aer_regs->cor_status & ~aer_regs->cor_mask) { |
904 | *severity = AER_CORRECTABLE; |
905 | return true; |
906 | } |
907 | |
908 | return false; |
909 | } |
910 | |
911 | static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) |
912 | { |
913 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
914 | struct aer_capability_regs aer_regs; |
915 | struct cxl_dport *dport; |
916 | struct cxl_port *port; |
917 | int severity; |
918 | |
919 | port = cxl_pci_find_port(pdev, dport: &dport); |
920 | if (!port) |
921 | return; |
922 | |
923 | put_device(dev: &port->dev); |
924 | |
925 | if (!cxl_rch_get_aer_info(aer_base: dport->regs.dport_aer, aer_regs: &aer_regs)) |
926 | return; |
927 | |
928 | if (!cxl_rch_get_aer_severity(aer_regs: &aer_regs, severity: &severity)) |
929 | return; |
930 | |
931 | pci_print_aer(dev: pdev, aer_severity: severity, aer: &aer_regs); |
932 | |
933 | if (severity == AER_CORRECTABLE) |
934 | cxl_handle_rdport_cor_ras(cxlds, dport); |
935 | else |
936 | cxl_handle_rdport_ras(cxlds, dport); |
937 | } |
938 | |
939 | #else |
940 | static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } |
941 | #endif |
942 | |
943 | void cxl_cor_error_detected(struct pci_dev *pdev) |
944 | { |
945 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
946 | struct device *dev = &cxlds->cxlmd->dev; |
947 | |
948 | scoped_guard(device, dev) { |
949 | if (!dev->driver) { |
950 | dev_warn(&pdev->dev, |
951 | "%s: memdev disabled, abort error handling\n" , |
952 | dev_name(dev)); |
953 | return; |
954 | } |
955 | |
956 | if (cxlds->rcd) |
957 | cxl_handle_rdport_errors(cxlds); |
958 | |
959 | cxl_handle_endpoint_cor_ras(cxlds); |
960 | } |
961 | } |
962 | EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); |
963 | |
964 | pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, |
965 | pci_channel_state_t state) |
966 | { |
967 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
968 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
969 | struct device *dev = &cxlmd->dev; |
970 | bool ue; |
971 | |
972 | scoped_guard(device, dev) { |
973 | if (!dev->driver) { |
974 | dev_warn(&pdev->dev, |
975 | "%s: memdev disabled, abort error handling\n" , |
976 | dev_name(dev)); |
977 | return PCI_ERS_RESULT_DISCONNECT; |
978 | } |
979 | |
980 | if (cxlds->rcd) |
981 | cxl_handle_rdport_errors(cxlds); |
982 | /* |
983 | * A frozen channel indicates an impending reset which is fatal to |
984 | * CXL.mem operation, and will likely crash the system. On the off |
985 | * chance the situation is recoverable dump the status of the RAS |
986 | * capability registers and bounce the active state of the memdev. |
987 | */ |
988 | ue = cxl_handle_endpoint_ras(cxlds); |
989 | } |
990 | |
991 | |
992 | switch (state) { |
993 | case pci_channel_io_normal: |
994 | if (ue) { |
995 | device_release_driver(dev); |
996 | return PCI_ERS_RESULT_NEED_RESET; |
997 | } |
998 | return PCI_ERS_RESULT_CAN_RECOVER; |
999 | case pci_channel_io_frozen: |
1000 | dev_warn(&pdev->dev, |
1001 | "%s: frozen state error detected, disable CXL.mem\n" , |
1002 | dev_name(dev)); |
1003 | device_release_driver(dev); |
1004 | return PCI_ERS_RESULT_NEED_RESET; |
1005 | case pci_channel_io_perm_failure: |
1006 | dev_warn(&pdev->dev, |
1007 | "failure state error detected, request disconnect\n" ); |
1008 | return PCI_ERS_RESULT_DISCONNECT; |
1009 | } |
1010 | return PCI_ERS_RESULT_NEED_RESET; |
1011 | } |
1012 | EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); |
1013 | |
1014 | static int cxl_flit_size(struct pci_dev *pdev) |
1015 | { |
1016 | if (cxl_pci_flit_256(pdev)) |
1017 | return 256; |
1018 | |
1019 | return 68; |
1020 | } |
1021 | |
1022 | /** |
1023 | * cxl_pci_get_latency - calculate the link latency for the PCIe link |
1024 | * @pdev: PCI device |
1025 | * |
1026 | * return: calculated latency or 0 for no latency |
1027 | * |
1028 | * CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation |
1029 | * Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency |
1030 | * LinkProgationLatency is negligible, so 0 will be used |
1031 | * RetimerLatency is assumed to be negligible and 0 will be used |
1032 | * FlitLatency = FlitSize / LinkBandwidth |
1033 | * FlitSize is defined by spec. CXL rev3.0 4.2.1. |
1034 | * 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used. |
1035 | * The FlitLatency is converted to picoseconds. |
1036 | */ |
1037 | long cxl_pci_get_latency(struct pci_dev *pdev) |
1038 | { |
1039 | long bw; |
1040 | |
1041 | bw = pcie_link_speed_mbps(pdev); |
1042 | if (bw < 0) |
1043 | return 0; |
1044 | bw /= BITS_PER_BYTE; |
1045 | |
1046 | return cxl_flit_size(pdev) * MEGA / bw; |
1047 | } |
1048 | |