1 | // SPDX-License-Identifier: GPL-2.0-only |
---|---|
2 | /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ |
3 | #include <asm-generic/unaligned.h> |
4 | #include <linux/io-64-nonatomic-lo-hi.h> |
5 | #include <linux/moduleparam.h> |
6 | #include <linux/module.h> |
7 | #include <linux/delay.h> |
8 | #include <linux/sizes.h> |
9 | #include <linux/mutex.h> |
10 | #include <linux/list.h> |
11 | #include <linux/pci.h> |
12 | #include <linux/aer.h> |
13 | #include <linux/io.h> |
14 | #include "cxlmem.h" |
15 | #include "cxlpci.h" |
16 | #include "cxl.h" |
17 | #include "pmu.h" |
18 | |
19 | /** |
20 | * DOC: cxl pci |
21 | * |
22 | * This implements the PCI exclusive functionality for a CXL device as it is |
23 | * defined by the Compute Express Link specification. CXL devices may surface |
24 | * certain functionality even if it isn't CXL enabled. While this driver is |
25 | * focused around the PCI specific aspects of a CXL device, it binds to the |
26 | * specific CXL memory device class code, and therefore the implementation of |
27 | * cxl_pci is focused around CXL memory devices. |
28 | * |
29 | * The driver has several responsibilities, mainly: |
30 | * - Create the memX device and register on the CXL bus. |
31 | * - Enumerate device's register interface and map them. |
32 | * - Registers nvdimm bridge device with cxl_core. |
33 | * - Registers a CXL mailbox with cxl_core. |
34 | */ |
35 | |
36 | #define cxl_doorbell_busy(cxlds) \ |
37 | (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ |
38 | CXLDEV_MBOX_CTRL_DOORBELL) |
39 | |
40 | /* CXL 2.0 - 8.2.8.4 */ |
41 | #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) |
42 | |
43 | /* |
44 | * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to |
45 | * dictate how long to wait for the mailbox to become ready. The new |
46 | * field allows the device to tell software the amount of time to wait |
47 | * before mailbox ready. This field per the spec theoretically allows |
48 | * for up to 255 seconds. 255 seconds is unreasonably long, its longer |
49 | * than the maximum SATA port link recovery wait. Default to 60 seconds |
50 | * until someone builds a CXL device that needs more time in practice. |
51 | */ |
52 | static unsigned short mbox_ready_timeout = 60; |
53 | module_param(mbox_ready_timeout, ushort, 0644); |
54 | MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); |
55 | |
56 | static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) |
57 | { |
58 | const unsigned long start = jiffies; |
59 | unsigned long end = start; |
60 | |
61 | while (cxl_doorbell_busy(cxlds)) { |
62 | end = jiffies; |
63 | |
64 | if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { |
65 | /* Check again in case preempted before timeout test */ |
66 | if (!cxl_doorbell_busy(cxlds)) |
67 | break; |
68 | return -ETIMEDOUT; |
69 | } |
70 | cpu_relax(); |
71 | } |
72 | |
73 | dev_dbg(cxlds->dev, "Doorbell wait took %dms", |
74 | jiffies_to_msecs(end) - jiffies_to_msecs(start)); |
75 | return 0; |
76 | } |
77 | |
78 | #define cxl_err(dev, status, msg) \ |
79 | dev_err_ratelimited(dev, msg ", device state %s%s\n", \ |
80 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ |
81 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") |
82 | |
83 | #define cxl_cmd_err(dev, cmd, status, msg) \ |
84 | dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ |
85 | (cmd)->opcode, \ |
86 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ |
87 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") |
88 | |
89 | /* |
90 | * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique |
91 | * wrapper object for each irq within the same cxlds. |
92 | */ |
93 | struct cxl_dev_id { |
94 | struct cxl_dev_state *cxlds; |
95 | }; |
96 | |
97 | static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, |
98 | irq_handler_t thread_fn) |
99 | { |
100 | struct device *dev = cxlds->dev; |
101 | struct cxl_dev_id *dev_id; |
102 | |
103 | dev_id = devm_kzalloc(dev, size: sizeof(*dev_id), GFP_KERNEL); |
104 | if (!dev_id) |
105 | return -ENOMEM; |
106 | dev_id->cxlds = cxlds; |
107 | |
108 | return devm_request_threaded_irq(dev, irq, NULL, thread_fn, |
109 | IRQF_SHARED | IRQF_ONESHOT, NULL, |
110 | dev_id); |
111 | } |
112 | |
113 | static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) |
114 | { |
115 | u64 reg; |
116 | |
117 | reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
118 | return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; |
119 | } |
120 | |
121 | static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) |
122 | { |
123 | u64 reg; |
124 | u16 opcode; |
125 | struct cxl_dev_id *dev_id = id; |
126 | struct cxl_dev_state *cxlds = dev_id->cxlds; |
127 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
128 | |
129 | if (!cxl_mbox_background_complete(cxlds)) |
130 | return IRQ_NONE; |
131 | |
132 | reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
133 | opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); |
134 | if (opcode == CXL_MBOX_OP_SANITIZE) { |
135 | mutex_lock(&mds->mbox_mutex); |
136 | if (mds->security.sanitize_node) |
137 | mod_delayed_work(wq: system_wq, dwork: &mds->security.poll_dwork, delay: 0); |
138 | mutex_unlock(lock: &mds->mbox_mutex); |
139 | } else { |
140 | /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ |
141 | rcuwait_wake_up(w: &mds->mbox_wait); |
142 | } |
143 | |
144 | return IRQ_HANDLED; |
145 | } |
146 | |
147 | /* |
148 | * Sanitization operation polling mode. |
149 | */ |
150 | static void cxl_mbox_sanitize_work(struct work_struct *work) |
151 | { |
152 | struct cxl_memdev_state *mds = |
153 | container_of(work, typeof(*mds), security.poll_dwork.work); |
154 | struct cxl_dev_state *cxlds = &mds->cxlds; |
155 | |
156 | mutex_lock(&mds->mbox_mutex); |
157 | if (cxl_mbox_background_complete(cxlds)) { |
158 | mds->security.poll_tmo_secs = 0; |
159 | if (mds->security.sanitize_node) |
160 | sysfs_notify_dirent(kn: mds->security.sanitize_node); |
161 | mds->security.sanitize_active = false; |
162 | |
163 | dev_dbg(cxlds->dev, "Sanitization operation ended\n"); |
164 | } else { |
165 | int timeout = mds->security.poll_tmo_secs + 10; |
166 | |
167 | mds->security.poll_tmo_secs = min(15 * 60, timeout); |
168 | schedule_delayed_work(dwork: &mds->security.poll_dwork, delay: timeout * HZ); |
169 | } |
170 | mutex_unlock(lock: &mds->mbox_mutex); |
171 | } |
172 | |
173 | /** |
174 | * __cxl_pci_mbox_send_cmd() - Execute a mailbox command |
175 | * @mds: The memory device driver data |
176 | * @mbox_cmd: Command to send to the memory device. |
177 | * |
178 | * Context: Any context. Expects mbox_mutex to be held. |
179 | * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. |
180 | * Caller should check the return code in @mbox_cmd to make sure it |
181 | * succeeded. |
182 | * |
183 | * This is a generic form of the CXL mailbox send command thus only using the |
184 | * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory |
185 | * devices, and perhaps other types of CXL devices may have further information |
186 | * available upon error conditions. Driver facilities wishing to send mailbox |
187 | * commands should use the wrapper command. |
188 | * |
189 | * The CXL spec allows for up to two mailboxes. The intention is for the primary |
190 | * mailbox to be OS controlled and the secondary mailbox to be used by system |
191 | * firmware. This allows the OS and firmware to communicate with the device and |
192 | * not need to coordinate with each other. The driver only uses the primary |
193 | * mailbox. |
194 | */ |
195 | static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, |
196 | struct cxl_mbox_cmd *mbox_cmd) |
197 | { |
198 | struct cxl_dev_state *cxlds = &mds->cxlds; |
199 | void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; |
200 | struct device *dev = cxlds->dev; |
201 | u64 cmd_reg, status_reg; |
202 | size_t out_len; |
203 | int rc; |
204 | |
205 | lockdep_assert_held(&mds->mbox_mutex); |
206 | |
207 | /* |
208 | * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. |
209 | * 1. Caller reads MB Control Register to verify doorbell is clear |
210 | * 2. Caller writes Command Register |
211 | * 3. Caller writes Command Payload Registers if input payload is non-empty |
212 | * 4. Caller writes MB Control Register to set doorbell |
213 | * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured |
214 | * 6. Caller reads MB Status Register to fetch Return code |
215 | * 7. If command successful, Caller reads Command Register to get Payload Length |
216 | * 8. If output payload is non-empty, host reads Command Payload Registers |
217 | * |
218 | * Hardware is free to do whatever it wants before the doorbell is rung, |
219 | * and isn't allowed to change anything after it clears the doorbell. As |
220 | * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can |
221 | * also happen in any order (though some orders might not make sense). |
222 | */ |
223 | |
224 | /* #1 */ |
225 | if (cxl_doorbell_busy(cxlds)) { |
226 | u64 md_status = |
227 | readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
228 | |
229 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, |
230 | "mailbox queue busy"); |
231 | return -EBUSY; |
232 | } |
233 | |
234 | /* |
235 | * With sanitize polling, hardware might be done and the poller still |
236 | * not be in sync. Ensure no new command comes in until so. Keep the |
237 | * hardware semantics and only allow device health status. |
238 | */ |
239 | if (mds->security.poll_tmo_secs > 0) { |
240 | if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) |
241 | return -EBUSY; |
242 | } |
243 | |
244 | cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, |
245 | mbox_cmd->opcode); |
246 | if (mbox_cmd->size_in) { |
247 | if (WARN_ON(!mbox_cmd->payload_in)) |
248 | return -EINVAL; |
249 | |
250 | cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, |
251 | mbox_cmd->size_in); |
252 | memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); |
253 | } |
254 | |
255 | /* #2, #3 */ |
256 | writeq(val: cmd_reg, addr: cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
257 | |
258 | /* #4 */ |
259 | dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); |
260 | writel(CXLDEV_MBOX_CTRL_DOORBELL, |
261 | addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
262 | |
263 | /* #5 */ |
264 | rc = cxl_pci_mbox_wait_for_doorbell(cxlds); |
265 | if (rc == -ETIMEDOUT) { |
266 | u64 md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
267 | |
268 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); |
269 | return rc; |
270 | } |
271 | |
272 | /* #6 */ |
273 | status_reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); |
274 | mbox_cmd->return_code = |
275 | FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); |
276 | |
277 | /* |
278 | * Handle the background command in a synchronous manner. |
279 | * |
280 | * All other mailbox commands will serialize/queue on the mbox_mutex, |
281 | * which we currently hold. Furthermore this also guarantees that |
282 | * cxl_mbox_background_complete() checks are safe amongst each other, |
283 | * in that no new bg operation can occur in between. |
284 | * |
285 | * Background operations are timesliced in accordance with the nature |
286 | * of the command. In the event of timeout, the mailbox state is |
287 | * indeterminate until the next successful command submission and the |
288 | * driver can get back in sync with the hardware state. |
289 | */ |
290 | if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { |
291 | u64 bg_status_reg; |
292 | int i, timeout; |
293 | |
294 | /* |
295 | * Sanitization is a special case which monopolizes the device |
296 | * and cannot be timesliced. Handle asynchronously instead, |
297 | * and allow userspace to poll(2) for completion. |
298 | */ |
299 | if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { |
300 | if (mds->security.sanitize_active) |
301 | return -EBUSY; |
302 | |
303 | /* give first timeout a second */ |
304 | timeout = 1; |
305 | mds->security.poll_tmo_secs = timeout; |
306 | mds->security.sanitize_active = true; |
307 | schedule_delayed_work(dwork: &mds->security.poll_dwork, |
308 | delay: timeout * HZ); |
309 | dev_dbg(dev, "Sanitization operation started\n"); |
310 | goto success; |
311 | } |
312 | |
313 | dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", |
314 | mbox_cmd->opcode); |
315 | |
316 | timeout = mbox_cmd->poll_interval_ms; |
317 | for (i = 0; i < mbox_cmd->poll_count; i++) { |
318 | if (rcuwait_wait_event_timeout(&mds->mbox_wait, |
319 | cxl_mbox_background_complete(cxlds), |
320 | TASK_UNINTERRUPTIBLE, |
321 | msecs_to_jiffies(timeout)) > 0) |
322 | break; |
323 | } |
324 | |
325 | if (!cxl_mbox_background_complete(cxlds)) { |
326 | dev_err(dev, "timeout waiting for background (%d ms)\n", |
327 | timeout * mbox_cmd->poll_count); |
328 | return -ETIMEDOUT; |
329 | } |
330 | |
331 | bg_status_reg = readq(addr: cxlds->regs.mbox + |
332 | CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
333 | mbox_cmd->return_code = |
334 | FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, |
335 | bg_status_reg); |
336 | dev_dbg(dev, |
337 | "Mailbox background operation (0x%04x) completed\n", |
338 | mbox_cmd->opcode); |
339 | } |
340 | |
341 | if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { |
342 | dev_dbg(dev, "Mailbox operation had an error: %s\n", |
343 | cxl_mbox_cmd_rc2str(mbox_cmd)); |
344 | return 0; /* completed but caller must check return_code */ |
345 | } |
346 | |
347 | success: |
348 | /* #7 */ |
349 | cmd_reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
350 | out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); |
351 | |
352 | /* #8 */ |
353 | if (out_len && mbox_cmd->payload_out) { |
354 | /* |
355 | * Sanitize the copy. If hardware misbehaves, out_len per the |
356 | * spec can actually be greater than the max allowed size (21 |
357 | * bits available but spec defined 1M max). The caller also may |
358 | * have requested less data than the hardware supplied even |
359 | * within spec. |
360 | */ |
361 | size_t n; |
362 | |
363 | n = min3(mbox_cmd->size_out, mds->payload_size, out_len); |
364 | memcpy_fromio(mbox_cmd->payload_out, payload, n); |
365 | mbox_cmd->size_out = n; |
366 | } else { |
367 | mbox_cmd->size_out = 0; |
368 | } |
369 | |
370 | return 0; |
371 | } |
372 | |
373 | static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, |
374 | struct cxl_mbox_cmd *cmd) |
375 | { |
376 | int rc; |
377 | |
378 | mutex_lock_io(&mds->mbox_mutex); |
379 | rc = __cxl_pci_mbox_send_cmd(mds, mbox_cmd: cmd); |
380 | mutex_unlock(lock: &mds->mbox_mutex); |
381 | |
382 | return rc; |
383 | } |
384 | |
385 | static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) |
386 | { |
387 | struct cxl_dev_state *cxlds = &mds->cxlds; |
388 | const int cap = readl(addr: cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); |
389 | struct device *dev = cxlds->dev; |
390 | unsigned long timeout; |
391 | int irq, msgnum; |
392 | u64 md_status; |
393 | u32 ctrl; |
394 | |
395 | timeout = jiffies + mbox_ready_timeout * HZ; |
396 | do { |
397 | md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
398 | if (md_status & CXLMDEV_MBOX_IF_READY) |
399 | break; |
400 | if (msleep_interruptible(msecs: 100)) |
401 | break; |
402 | } while (!time_after(jiffies, timeout)); |
403 | |
404 | if (!(md_status & CXLMDEV_MBOX_IF_READY)) { |
405 | cxl_err(dev, md_status, "timeout awaiting mailbox ready"); |
406 | return -ETIMEDOUT; |
407 | } |
408 | |
409 | /* |
410 | * A command may be in flight from a previous driver instance, |
411 | * think kexec, do one doorbell wait so that |
412 | * __cxl_pci_mbox_send_cmd() can assume that it is the only |
413 | * source for future doorbell busy events. |
414 | */ |
415 | if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { |
416 | cxl_err(dev, md_status, "timeout awaiting mailbox idle"); |
417 | return -ETIMEDOUT; |
418 | } |
419 | |
420 | mds->mbox_send = cxl_pci_mbox_send; |
421 | mds->payload_size = |
422 | 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); |
423 | |
424 | /* |
425 | * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register |
426 | * |
427 | * If the size is too small, mandatory commands will not work and so |
428 | * there's no point in going forward. If the size is too large, there's |
429 | * no harm is soft limiting it. |
430 | */ |
431 | mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M); |
432 | if (mds->payload_size < 256) { |
433 | dev_err(dev, "Mailbox is too small (%zub)", |
434 | mds->payload_size); |
435 | return -ENXIO; |
436 | } |
437 | |
438 | dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); |
439 | |
440 | rcuwait_init(w: &mds->mbox_wait); |
441 | INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); |
442 | |
443 | /* background command interrupts are optional */ |
444 | if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) |
445 | return 0; |
446 | |
447 | msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); |
448 | irq = pci_irq_vector(to_pci_dev(cxlds->dev), nr: msgnum); |
449 | if (irq < 0) |
450 | return 0; |
451 | |
452 | if (cxl_request_irq(cxlds, irq, thread_fn: cxl_pci_mbox_irq)) |
453 | return 0; |
454 | |
455 | dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); |
456 | /* enable background command mbox irq support */ |
457 | ctrl = readl(addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
458 | ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; |
459 | writel(val: ctrl, addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
460 | |
461 | return 0; |
462 | } |
463 | |
464 | /* |
465 | * Assume that any RCIEP that emits the CXL memory expander class code |
466 | * is an RCD |
467 | */ |
468 | static bool is_cxl_restricted(struct pci_dev *pdev) |
469 | { |
470 | return pci_pcie_type(dev: pdev) == PCI_EXP_TYPE_RC_END; |
471 | } |
472 | |
473 | static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, |
474 | struct cxl_register_map *map) |
475 | { |
476 | struct cxl_port *port; |
477 | struct cxl_dport *dport; |
478 | resource_size_t component_reg_phys; |
479 | |
480 | *map = (struct cxl_register_map) { |
481 | .host = &pdev->dev, |
482 | .resource = CXL_RESOURCE_NONE, |
483 | }; |
484 | |
485 | port = cxl_pci_find_port(pdev, dport: &dport); |
486 | if (!port) |
487 | return -EPROBE_DEFER; |
488 | |
489 | component_reg_phys = cxl_rcd_component_reg_phys(dev: &pdev->dev, dport); |
490 | |
491 | put_device(dev: &port->dev); |
492 | |
493 | if (component_reg_phys == CXL_RESOURCE_NONE) |
494 | return -ENXIO; |
495 | |
496 | map->resource = component_reg_phys; |
497 | map->reg_type = CXL_REGLOC_RBI_COMPONENT; |
498 | map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; |
499 | |
500 | return 0; |
501 | } |
502 | |
503 | static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, |
504 | struct cxl_register_map *map) |
505 | { |
506 | int rc; |
507 | |
508 | rc = cxl_find_regblock(pdev, type, map); |
509 | |
510 | /* |
511 | * If the Register Locator DVSEC does not exist, check if it |
512 | * is an RCH and try to extract the Component Registers from |
513 | * an RCRB. |
514 | */ |
515 | if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) |
516 | rc = cxl_rcrb_get_comp_regs(pdev, map); |
517 | |
518 | if (rc) |
519 | return rc; |
520 | |
521 | return cxl_setup_regs(map); |
522 | } |
523 | |
524 | static int cxl_pci_ras_unmask(struct pci_dev *pdev) |
525 | { |
526 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
527 | void __iomem *addr; |
528 | u32 orig_val, val, mask; |
529 | u16 cap; |
530 | int rc; |
531 | |
532 | if (!cxlds->regs.ras) { |
533 | dev_dbg(&pdev->dev, "No RAS registers.\n"); |
534 | return 0; |
535 | } |
536 | |
537 | /* BIOS has PCIe AER error control */ |
538 | if (!pcie_aer_is_native(dev: pdev)) |
539 | return 0; |
540 | |
541 | rc = pcie_capability_read_word(dev: pdev, PCI_EXP_DEVCTL, val: &cap); |
542 | if (rc) |
543 | return rc; |
544 | |
545 | if (cap & PCI_EXP_DEVCTL_URRE) { |
546 | addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; |
547 | orig_val = readl(addr); |
548 | |
549 | mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | |
550 | CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; |
551 | val = orig_val & ~mask; |
552 | writel(val, addr); |
553 | dev_dbg(&pdev->dev, |
554 | "Uncorrectable RAS Errors Mask: %#x -> %#x\n", |
555 | orig_val, val); |
556 | } |
557 | |
558 | if (cap & PCI_EXP_DEVCTL_CERE) { |
559 | addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; |
560 | orig_val = readl(addr); |
561 | val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; |
562 | writel(val, addr); |
563 | dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", |
564 | orig_val, val); |
565 | } |
566 | |
567 | return 0; |
568 | } |
569 | |
570 | static void free_event_buf(void *buf) |
571 | { |
572 | kvfree(addr: buf); |
573 | } |
574 | |
575 | /* |
576 | * There is a single buffer for reading event logs from the mailbox. All logs |
577 | * share this buffer protected by the mds->event_log_lock. |
578 | */ |
579 | static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) |
580 | { |
581 | struct cxl_get_event_payload *buf; |
582 | |
583 | buf = kvmalloc(size: mds->payload_size, GFP_KERNEL); |
584 | if (!buf) |
585 | return -ENOMEM; |
586 | mds->event.buf = buf; |
587 | |
588 | return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); |
589 | } |
590 | |
591 | static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) |
592 | { |
593 | int nvecs; |
594 | |
595 | /* |
596 | * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must |
597 | * not generate INTx messages if that function participates in |
598 | * CXL.cache or CXL.mem. |
599 | * |
600 | * Additionally pci_alloc_irq_vectors() handles calling |
601 | * pci_free_irq_vectors() automatically despite not being called |
602 | * pcim_*. See pci_setup_msi_context(). |
603 | */ |
604 | nvecs = pci_alloc_irq_vectors(dev: pdev, min_vecs: 1, CXL_PCI_DEFAULT_MAX_VECTORS, |
605 | PCI_IRQ_MSIX | PCI_IRQ_MSI); |
606 | if (nvecs < 1) { |
607 | dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); |
608 | return false; |
609 | } |
610 | return true; |
611 | } |
612 | |
613 | static irqreturn_t cxl_event_thread(int irq, void *id) |
614 | { |
615 | struct cxl_dev_id *dev_id = id; |
616 | struct cxl_dev_state *cxlds = dev_id->cxlds; |
617 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
618 | u32 status; |
619 | |
620 | do { |
621 | /* |
622 | * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; |
623 | * ignore the reserved upper 32 bits |
624 | */ |
625 | status = readl(addr: cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); |
626 | /* Ignore logs unknown to the driver */ |
627 | status &= CXLDEV_EVENT_STATUS_ALL; |
628 | if (!status) |
629 | break; |
630 | cxl_mem_get_event_records(mds, status); |
631 | cond_resched(); |
632 | } while (status); |
633 | |
634 | return IRQ_HANDLED; |
635 | } |
636 | |
637 | static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) |
638 | { |
639 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
640 | int irq; |
641 | |
642 | if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) |
643 | return -ENXIO; |
644 | |
645 | irq = pci_irq_vector(dev: pdev, |
646 | FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); |
647 | if (irq < 0) |
648 | return irq; |
649 | |
650 | return cxl_request_irq(cxlds, irq, thread_fn: cxl_event_thread); |
651 | } |
652 | |
653 | static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, |
654 | struct cxl_event_interrupt_policy *policy) |
655 | { |
656 | struct cxl_mbox_cmd mbox_cmd = { |
657 | .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, |
658 | .payload_out = policy, |
659 | .size_out = sizeof(*policy), |
660 | }; |
661 | int rc; |
662 | |
663 | rc = cxl_internal_send_cmd(mds, cmd: &mbox_cmd); |
664 | if (rc < 0) |
665 | dev_err(mds->cxlds.dev, |
666 | "Failed to get event interrupt policy : %d", rc); |
667 | |
668 | return rc; |
669 | } |
670 | |
671 | static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, |
672 | struct cxl_event_interrupt_policy *policy) |
673 | { |
674 | struct cxl_mbox_cmd mbox_cmd; |
675 | int rc; |
676 | |
677 | *policy = (struct cxl_event_interrupt_policy) { |
678 | .info_settings = CXL_INT_MSI_MSIX, |
679 | .warn_settings = CXL_INT_MSI_MSIX, |
680 | .failure_settings = CXL_INT_MSI_MSIX, |
681 | .fatal_settings = CXL_INT_MSI_MSIX, |
682 | }; |
683 | |
684 | mbox_cmd = (struct cxl_mbox_cmd) { |
685 | .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, |
686 | .payload_in = policy, |
687 | .size_in = sizeof(*policy), |
688 | }; |
689 | |
690 | rc = cxl_internal_send_cmd(mds, cmd: &mbox_cmd); |
691 | if (rc < 0) { |
692 | dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", |
693 | rc); |
694 | return rc; |
695 | } |
696 | |
697 | /* Retrieve final interrupt settings */ |
698 | return cxl_event_get_int_policy(mds, policy); |
699 | } |
700 | |
701 | static int cxl_event_irqsetup(struct cxl_memdev_state *mds) |
702 | { |
703 | struct cxl_dev_state *cxlds = &mds->cxlds; |
704 | struct cxl_event_interrupt_policy policy; |
705 | int rc; |
706 | |
707 | rc = cxl_event_config_msgnums(mds, policy: &policy); |
708 | if (rc) |
709 | return rc; |
710 | |
711 | rc = cxl_event_req_irq(cxlds, setting: policy.info_settings); |
712 | if (rc) { |
713 | dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); |
714 | return rc; |
715 | } |
716 | |
717 | rc = cxl_event_req_irq(cxlds, setting: policy.warn_settings); |
718 | if (rc) { |
719 | dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); |
720 | return rc; |
721 | } |
722 | |
723 | rc = cxl_event_req_irq(cxlds, setting: policy.failure_settings); |
724 | if (rc) { |
725 | dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); |
726 | return rc; |
727 | } |
728 | |
729 | rc = cxl_event_req_irq(cxlds, setting: policy.fatal_settings); |
730 | if (rc) { |
731 | dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); |
732 | return rc; |
733 | } |
734 | |
735 | return 0; |
736 | } |
737 | |
738 | static bool cxl_event_int_is_fw(u8 setting) |
739 | { |
740 | u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); |
741 | |
742 | return mode == CXL_INT_FW; |
743 | } |
744 | |
745 | static int cxl_event_config(struct pci_host_bridge *host_bridge, |
746 | struct cxl_memdev_state *mds, bool irq_avail) |
747 | { |
748 | struct cxl_event_interrupt_policy policy; |
749 | int rc; |
750 | |
751 | /* |
752 | * When BIOS maintains CXL error reporting control, it will process |
753 | * event records. Only one agent can do so. |
754 | */ |
755 | if (!host_bridge->native_cxl_error) |
756 | return 0; |
757 | |
758 | if (!irq_avail) { |
759 | dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); |
760 | return 0; |
761 | } |
762 | |
763 | rc = cxl_mem_alloc_event_buf(mds); |
764 | if (rc) |
765 | return rc; |
766 | |
767 | rc = cxl_event_get_int_policy(mds, policy: &policy); |
768 | if (rc) |
769 | return rc; |
770 | |
771 | if (cxl_event_int_is_fw(setting: policy.info_settings) || |
772 | cxl_event_int_is_fw(setting: policy.warn_settings) || |
773 | cxl_event_int_is_fw(setting: policy.failure_settings) || |
774 | cxl_event_int_is_fw(setting: policy.fatal_settings)) { |
775 | dev_err(mds->cxlds.dev, |
776 | "FW still in control of Event Logs despite _OSC settings\n"); |
777 | return -EBUSY; |
778 | } |
779 | |
780 | rc = cxl_event_irqsetup(mds); |
781 | if (rc) |
782 | return rc; |
783 | |
784 | cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); |
785 | |
786 | return 0; |
787 | } |
788 | |
789 | static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
790 | { |
791 | struct pci_host_bridge *host_bridge = pci_find_host_bridge(bus: pdev->bus); |
792 | struct cxl_memdev_state *mds; |
793 | struct cxl_dev_state *cxlds; |
794 | struct cxl_register_map map; |
795 | struct cxl_memdev *cxlmd; |
796 | int i, rc, pmu_count; |
797 | bool irq_avail; |
798 | |
799 | /* |
800 | * Double check the anonymous union trickery in struct cxl_regs |
801 | * FIXME switch to struct_group() |
802 | */ |
803 | BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != |
804 | offsetof(struct cxl_regs, device_regs.memdev)); |
805 | |
806 | rc = pcim_enable_device(pdev); |
807 | if (rc) |
808 | return rc; |
809 | pci_set_master(dev: pdev); |
810 | |
811 | mds = cxl_memdev_state_create(dev: &pdev->dev); |
812 | if (IS_ERR(ptr: mds)) |
813 | return PTR_ERR(ptr: mds); |
814 | cxlds = &mds->cxlds; |
815 | pci_set_drvdata(pdev, data: cxlds); |
816 | |
817 | cxlds->rcd = is_cxl_restricted(pdev); |
818 | cxlds->serial = pci_get_dsn(dev: pdev); |
819 | cxlds->cxl_dvsec = pci_find_dvsec_capability( |
820 | dev: pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); |
821 | if (!cxlds->cxl_dvsec) |
822 | dev_warn(&pdev->dev, |
823 | "Device DVSEC not present, skip CXL.mem init\n"); |
824 | |
825 | rc = cxl_pci_setup_regs(pdev, type: CXL_REGLOC_RBI_MEMDEV, map: &map); |
826 | if (rc) |
827 | return rc; |
828 | |
829 | rc = cxl_map_device_regs(map: &map, regs: &cxlds->regs.device_regs); |
830 | if (rc) |
831 | return rc; |
832 | |
833 | /* |
834 | * If the component registers can't be found, the cxl_pci driver may |
835 | * still be useful for management functions so don't return an error. |
836 | */ |
837 | rc = cxl_pci_setup_regs(pdev, type: CXL_REGLOC_RBI_COMPONENT, |
838 | map: &cxlds->reg_map); |
839 | if (rc) |
840 | dev_warn(&pdev->dev, "No component registers (%d)\n", rc); |
841 | else if (!cxlds->reg_map.component_map.ras.valid) |
842 | dev_dbg(&pdev->dev, "RAS registers not found\n"); |
843 | |
844 | rc = cxl_map_component_regs(map: &cxlds->reg_map, regs: &cxlds->regs.component, |
845 | BIT(CXL_CM_CAP_CAP_ID_RAS)); |
846 | if (rc) |
847 | dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); |
848 | |
849 | rc = cxl_await_media_ready(cxlds); |
850 | if (rc == 0) |
851 | cxlds->media_ready = true; |
852 | else |
853 | dev_warn(&pdev->dev, "Media not active (%d)\n", rc); |
854 | |
855 | irq_avail = cxl_alloc_irq_vectors(pdev); |
856 | |
857 | rc = cxl_pci_setup_mailbox(mds, irq_avail); |
858 | if (rc) |
859 | return rc; |
860 | |
861 | rc = cxl_enumerate_cmds(mds); |
862 | if (rc) |
863 | return rc; |
864 | |
865 | rc = cxl_set_timestamp(mds); |
866 | if (rc) |
867 | return rc; |
868 | |
869 | rc = cxl_poison_state_init(mds); |
870 | if (rc) |
871 | return rc; |
872 | |
873 | rc = cxl_dev_state_identify(mds); |
874 | if (rc) |
875 | return rc; |
876 | |
877 | rc = cxl_mem_create_range_info(mds); |
878 | if (rc) |
879 | return rc; |
880 | |
881 | cxlmd = devm_cxl_add_memdev(host: &pdev->dev, cxlds); |
882 | if (IS_ERR(ptr: cxlmd)) |
883 | return PTR_ERR(ptr: cxlmd); |
884 | |
885 | rc = devm_cxl_setup_fw_upload(host: &pdev->dev, mds); |
886 | if (rc) |
887 | return rc; |
888 | |
889 | rc = devm_cxl_sanitize_setup_notifier(host: &pdev->dev, cxlmd); |
890 | if (rc) |
891 | return rc; |
892 | |
893 | pmu_count = cxl_count_regblock(pdev, type: CXL_REGLOC_RBI_PMU); |
894 | for (i = 0; i < pmu_count; i++) { |
895 | struct cxl_pmu_regs pmu_regs; |
896 | |
897 | rc = cxl_find_regblock_instance(pdev, type: CXL_REGLOC_RBI_PMU, map: &map, index: i); |
898 | if (rc) { |
899 | dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); |
900 | break; |
901 | } |
902 | |
903 | rc = cxl_map_pmu_regs(map: &map, regs: &pmu_regs); |
904 | if (rc) { |
905 | dev_dbg(&pdev->dev, "Could not map PMU regs\n"); |
906 | break; |
907 | } |
908 | |
909 | rc = devm_cxl_pmu_add(parent: cxlds->dev, regs: &pmu_regs, assoc_id: cxlmd->id, idx: i, type: CXL_PMU_MEMDEV); |
910 | if (rc) { |
911 | dev_dbg(&pdev->dev, "Could not add PMU instance\n"); |
912 | break; |
913 | } |
914 | } |
915 | |
916 | rc = cxl_event_config(host_bridge, mds, irq_avail); |
917 | if (rc) |
918 | return rc; |
919 | |
920 | rc = cxl_pci_ras_unmask(pdev); |
921 | if (rc) |
922 | dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); |
923 | |
924 | pci_save_state(dev: pdev); |
925 | |
926 | return rc; |
927 | } |
928 | |
929 | static const struct pci_device_id cxl_mem_pci_tbl[] = { |
930 | /* PCI class code for CXL.mem Type-3 Devices */ |
931 | { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, |
932 | { /* terminate list */ }, |
933 | }; |
934 | MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); |
935 | |
936 | static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) |
937 | { |
938 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
939 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
940 | struct device *dev = &cxlmd->dev; |
941 | |
942 | dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", |
943 | dev_name(dev)); |
944 | pci_restore_state(dev: pdev); |
945 | if (device_attach(dev) <= 0) |
946 | return PCI_ERS_RESULT_DISCONNECT; |
947 | return PCI_ERS_RESULT_RECOVERED; |
948 | } |
949 | |
950 | static void cxl_error_resume(struct pci_dev *pdev) |
951 | { |
952 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
953 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
954 | struct device *dev = &cxlmd->dev; |
955 | |
956 | dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), |
957 | dev->driver ? "successful": "failed"); |
958 | } |
959 | |
960 | static const struct pci_error_handlers cxl_error_handlers = { |
961 | .error_detected = cxl_error_detected, |
962 | .slot_reset = cxl_slot_reset, |
963 | .resume = cxl_error_resume, |
964 | .cor_error_detected = cxl_cor_error_detected, |
965 | }; |
966 | |
967 | static struct pci_driver cxl_pci_driver = { |
968 | .name = KBUILD_MODNAME, |
969 | .id_table = cxl_mem_pci_tbl, |
970 | .probe = cxl_pci_probe, |
971 | .err_handler = &cxl_error_handlers, |
972 | .driver = { |
973 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, |
974 | }, |
975 | }; |
976 | |
977 | module_pci_driver(cxl_pci_driver); |
978 | MODULE_LICENSE("GPL v2"); |
979 | MODULE_IMPORT_NS(CXL); |
980 |
Definitions
- mbox_ready_timeout
- cxl_pci_mbox_wait_for_doorbell
- cxl_dev_id
- cxl_request_irq
- cxl_mbox_background_complete
- cxl_pci_mbox_irq
- cxl_mbox_sanitize_work
- __cxl_pci_mbox_send_cmd
- cxl_pci_mbox_send
- cxl_pci_setup_mailbox
- is_cxl_restricted
- cxl_rcrb_get_comp_regs
- cxl_pci_setup_regs
- cxl_pci_ras_unmask
- free_event_buf
- cxl_mem_alloc_event_buf
- cxl_alloc_irq_vectors
- cxl_event_thread
- cxl_event_req_irq
- cxl_event_get_int_policy
- cxl_event_config_msgnums
- cxl_event_irqsetup
- cxl_event_int_is_fw
- cxl_event_config
- cxl_pci_probe
- cxl_mem_pci_tbl
- cxl_slot_reset
- cxl_error_resume
- cxl_error_handlers
Improve your Profiling and Debugging skills
Find out more