1 | // SPDX-License-Identifier: GPL-2.0-only |
---|---|
2 | /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ |
3 | #include <linux/unaligned.h> |
4 | #include <linux/io-64-nonatomic-lo-hi.h> |
5 | #include <linux/moduleparam.h> |
6 | #include <linux/module.h> |
7 | #include <linux/delay.h> |
8 | #include <linux/sizes.h> |
9 | #include <linux/mutex.h> |
10 | #include <linux/list.h> |
11 | #include <linux/pci.h> |
12 | #include <linux/aer.h> |
13 | #include <linux/io.h> |
14 | #include <cxl/mailbox.h> |
15 | #include "cxlmem.h" |
16 | #include "cxlpci.h" |
17 | #include "cxl.h" |
18 | #include "pmu.h" |
19 | |
20 | /** |
21 | * DOC: cxl pci |
22 | * |
23 | * This implements the PCI exclusive functionality for a CXL device as it is |
24 | * defined by the Compute Express Link specification. CXL devices may surface |
25 | * certain functionality even if it isn't CXL enabled. While this driver is |
26 | * focused around the PCI specific aspects of a CXL device, it binds to the |
27 | * specific CXL memory device class code, and therefore the implementation of |
28 | * cxl_pci is focused around CXL memory devices. |
29 | * |
30 | * The driver has several responsibilities, mainly: |
31 | * - Create the memX device and register on the CXL bus. |
32 | * - Enumerate device's register interface and map them. |
33 | * - Registers nvdimm bridge device with cxl_core. |
34 | * - Registers a CXL mailbox with cxl_core. |
35 | */ |
36 | |
37 | #define cxl_doorbell_busy(cxlds) \ |
38 | (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ |
39 | CXLDEV_MBOX_CTRL_DOORBELL) |
40 | |
41 | /* CXL 2.0 - 8.2.8.4 */ |
42 | #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) |
43 | |
44 | /* |
45 | * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to |
46 | * dictate how long to wait for the mailbox to become ready. The new |
47 | * field allows the device to tell software the amount of time to wait |
48 | * before mailbox ready. This field per the spec theoretically allows |
49 | * for up to 255 seconds. 255 seconds is unreasonably long, its longer |
50 | * than the maximum SATA port link recovery wait. Default to 60 seconds |
51 | * until someone builds a CXL device that needs more time in practice. |
52 | */ |
53 | static unsigned short mbox_ready_timeout = 60; |
54 | module_param(mbox_ready_timeout, ushort, 0644); |
55 | MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); |
56 | |
57 | static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) |
58 | { |
59 | const unsigned long start = jiffies; |
60 | unsigned long end = start; |
61 | |
62 | while (cxl_doorbell_busy(cxlds)) { |
63 | end = jiffies; |
64 | |
65 | if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { |
66 | /* Check again in case preempted before timeout test */ |
67 | if (!cxl_doorbell_busy(cxlds)) |
68 | break; |
69 | return -ETIMEDOUT; |
70 | } |
71 | cpu_relax(); |
72 | } |
73 | |
74 | dev_dbg(cxlds->dev, "Doorbell wait took %dms", |
75 | jiffies_to_msecs(end) - jiffies_to_msecs(start)); |
76 | return 0; |
77 | } |
78 | |
79 | #define cxl_err(dev, status, msg) \ |
80 | dev_err_ratelimited(dev, msg ", device state %s%s\n", \ |
81 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ |
82 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") |
83 | |
84 | #define cxl_cmd_err(dev, cmd, status, msg) \ |
85 | dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ |
86 | (cmd)->opcode, \ |
87 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ |
88 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") |
89 | |
90 | /* |
91 | * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique |
92 | * wrapper object for each irq within the same cxlds. |
93 | */ |
94 | struct cxl_dev_id { |
95 | struct cxl_dev_state *cxlds; |
96 | }; |
97 | |
98 | static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, |
99 | irq_handler_t thread_fn) |
100 | { |
101 | struct device *dev = cxlds->dev; |
102 | struct cxl_dev_id *dev_id; |
103 | |
104 | dev_id = devm_kzalloc(dev, size: sizeof(*dev_id), GFP_KERNEL); |
105 | if (!dev_id) |
106 | return -ENOMEM; |
107 | dev_id->cxlds = cxlds; |
108 | |
109 | return devm_request_threaded_irq(dev, irq, NULL, thread_fn, |
110 | IRQF_SHARED | IRQF_ONESHOT, NULL, |
111 | dev_id); |
112 | } |
113 | |
114 | static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) |
115 | { |
116 | u64 reg; |
117 | |
118 | reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
119 | return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; |
120 | } |
121 | |
122 | static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) |
123 | { |
124 | u64 reg; |
125 | u16 opcode; |
126 | struct cxl_dev_id *dev_id = id; |
127 | struct cxl_dev_state *cxlds = dev_id->cxlds; |
128 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
129 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
130 | |
131 | if (!cxl_mbox_background_complete(cxlds)) |
132 | return IRQ_NONE; |
133 | |
134 | reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
135 | opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); |
136 | if (opcode == CXL_MBOX_OP_SANITIZE) { |
137 | mutex_lock(&cxl_mbox->mbox_mutex); |
138 | if (mds->security.sanitize_node) |
139 | mod_delayed_work(wq: system_wq, dwork: &mds->security.poll_dwork, delay: 0); |
140 | mutex_unlock(lock: &cxl_mbox->mbox_mutex); |
141 | } else { |
142 | /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ |
143 | rcuwait_wake_up(w: &cxl_mbox->mbox_wait); |
144 | } |
145 | |
146 | return IRQ_HANDLED; |
147 | } |
148 | |
149 | /* |
150 | * Sanitization operation polling mode. |
151 | */ |
152 | static void cxl_mbox_sanitize_work(struct work_struct *work) |
153 | { |
154 | struct cxl_memdev_state *mds = |
155 | container_of(work, typeof(*mds), security.poll_dwork.work); |
156 | struct cxl_dev_state *cxlds = &mds->cxlds; |
157 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
158 | |
159 | mutex_lock(&cxl_mbox->mbox_mutex); |
160 | if (cxl_mbox_background_complete(cxlds)) { |
161 | mds->security.poll_tmo_secs = 0; |
162 | if (mds->security.sanitize_node) |
163 | sysfs_notify_dirent(kn: mds->security.sanitize_node); |
164 | mds->security.sanitize_active = false; |
165 | |
166 | dev_dbg(cxlds->dev, "Sanitization operation ended\n"); |
167 | } else { |
168 | int timeout = mds->security.poll_tmo_secs + 10; |
169 | |
170 | mds->security.poll_tmo_secs = min(15 * 60, timeout); |
171 | schedule_delayed_work(dwork: &mds->security.poll_dwork, delay: timeout * HZ); |
172 | } |
173 | mutex_unlock(lock: &cxl_mbox->mbox_mutex); |
174 | } |
175 | |
176 | /** |
177 | * __cxl_pci_mbox_send_cmd() - Execute a mailbox command |
178 | * @cxl_mbox: CXL mailbox context |
179 | * @mbox_cmd: Command to send to the memory device. |
180 | * |
181 | * Context: Any context. Expects mbox_mutex to be held. |
182 | * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. |
183 | * Caller should check the return code in @mbox_cmd to make sure it |
184 | * succeeded. |
185 | * |
186 | * This is a generic form of the CXL mailbox send command thus only using the |
187 | * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory |
188 | * devices, and perhaps other types of CXL devices may have further information |
189 | * available upon error conditions. Driver facilities wishing to send mailbox |
190 | * commands should use the wrapper command. |
191 | * |
192 | * The CXL spec allows for up to two mailboxes. The intention is for the primary |
193 | * mailbox to be OS controlled and the secondary mailbox to be used by system |
194 | * firmware. This allows the OS and firmware to communicate with the device and |
195 | * not need to coordinate with each other. The driver only uses the primary |
196 | * mailbox. |
197 | */ |
198 | static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox, |
199 | struct cxl_mbox_cmd *mbox_cmd) |
200 | { |
201 | struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox); |
202 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
203 | void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; |
204 | struct device *dev = cxlds->dev; |
205 | u64 cmd_reg, status_reg; |
206 | size_t out_len; |
207 | int rc; |
208 | |
209 | lockdep_assert_held(&cxl_mbox->mbox_mutex); |
210 | |
211 | /* |
212 | * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. |
213 | * 1. Caller reads MB Control Register to verify doorbell is clear |
214 | * 2. Caller writes Command Register |
215 | * 3. Caller writes Command Payload Registers if input payload is non-empty |
216 | * 4. Caller writes MB Control Register to set doorbell |
217 | * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured |
218 | * 6. Caller reads MB Status Register to fetch Return code |
219 | * 7. If command successful, Caller reads Command Register to get Payload Length |
220 | * 8. If output payload is non-empty, host reads Command Payload Registers |
221 | * |
222 | * Hardware is free to do whatever it wants before the doorbell is rung, |
223 | * and isn't allowed to change anything after it clears the doorbell. As |
224 | * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can |
225 | * also happen in any order (though some orders might not make sense). |
226 | */ |
227 | |
228 | /* #1 */ |
229 | if (cxl_doorbell_busy(cxlds)) { |
230 | u64 md_status = |
231 | readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
232 | |
233 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, |
234 | "mailbox queue busy"); |
235 | return -EBUSY; |
236 | } |
237 | |
238 | /* |
239 | * With sanitize polling, hardware might be done and the poller still |
240 | * not be in sync. Ensure no new command comes in until so. Keep the |
241 | * hardware semantics and only allow device health status. |
242 | */ |
243 | if (mds->security.poll_tmo_secs > 0) { |
244 | if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) |
245 | return -EBUSY; |
246 | } |
247 | |
248 | cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, |
249 | mbox_cmd->opcode); |
250 | if (mbox_cmd->size_in) { |
251 | if (WARN_ON(!mbox_cmd->payload_in)) |
252 | return -EINVAL; |
253 | |
254 | cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, |
255 | mbox_cmd->size_in); |
256 | memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); |
257 | } |
258 | |
259 | /* #2, #3 */ |
260 | writeq(val: cmd_reg, addr: cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
261 | |
262 | /* #4 */ |
263 | dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); |
264 | writel(CXLDEV_MBOX_CTRL_DOORBELL, |
265 | addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
266 | |
267 | /* #5 */ |
268 | rc = cxl_pci_mbox_wait_for_doorbell(cxlds); |
269 | if (rc == -ETIMEDOUT) { |
270 | u64 md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
271 | |
272 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); |
273 | return rc; |
274 | } |
275 | |
276 | /* #6 */ |
277 | status_reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); |
278 | mbox_cmd->return_code = |
279 | FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); |
280 | |
281 | /* |
282 | * Handle the background command in a synchronous manner. |
283 | * |
284 | * All other mailbox commands will serialize/queue on the mbox_mutex, |
285 | * which we currently hold. Furthermore this also guarantees that |
286 | * cxl_mbox_background_complete() checks are safe amongst each other, |
287 | * in that no new bg operation can occur in between. |
288 | * |
289 | * Background operations are timesliced in accordance with the nature |
290 | * of the command. In the event of timeout, the mailbox state is |
291 | * indeterminate until the next successful command submission and the |
292 | * driver can get back in sync with the hardware state. |
293 | */ |
294 | if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { |
295 | u64 bg_status_reg; |
296 | int i, timeout; |
297 | |
298 | /* |
299 | * Sanitization is a special case which monopolizes the device |
300 | * and cannot be timesliced. Handle asynchronously instead, |
301 | * and allow userspace to poll(2) for completion. |
302 | */ |
303 | if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { |
304 | if (mds->security.sanitize_active) |
305 | return -EBUSY; |
306 | |
307 | /* give first timeout a second */ |
308 | timeout = 1; |
309 | mds->security.poll_tmo_secs = timeout; |
310 | mds->security.sanitize_active = true; |
311 | schedule_delayed_work(dwork: &mds->security.poll_dwork, |
312 | delay: timeout * HZ); |
313 | dev_dbg(dev, "Sanitization operation started\n"); |
314 | goto success; |
315 | } |
316 | |
317 | dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", |
318 | mbox_cmd->opcode); |
319 | |
320 | timeout = mbox_cmd->poll_interval_ms; |
321 | for (i = 0; i < mbox_cmd->poll_count; i++) { |
322 | if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait, |
323 | cxl_mbox_background_complete(cxlds), |
324 | TASK_UNINTERRUPTIBLE, |
325 | msecs_to_jiffies(timeout)) > 0) |
326 | break; |
327 | } |
328 | |
329 | if (!cxl_mbox_background_complete(cxlds)) { |
330 | dev_err(dev, "timeout waiting for background (%d ms)\n", |
331 | timeout * mbox_cmd->poll_count); |
332 | return -ETIMEDOUT; |
333 | } |
334 | |
335 | bg_status_reg = readq(addr: cxlds->regs.mbox + |
336 | CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
337 | mbox_cmd->return_code = |
338 | FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, |
339 | bg_status_reg); |
340 | dev_dbg(dev, |
341 | "Mailbox background operation (0x%04x) completed\n", |
342 | mbox_cmd->opcode); |
343 | } |
344 | |
345 | if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { |
346 | dev_dbg(dev, "Mailbox operation had an error: %s\n", |
347 | cxl_mbox_cmd_rc2str(mbox_cmd)); |
348 | return 0; /* completed but caller must check return_code */ |
349 | } |
350 | |
351 | success: |
352 | /* #7 */ |
353 | cmd_reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
354 | out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); |
355 | |
356 | /* #8 */ |
357 | if (out_len && mbox_cmd->payload_out) { |
358 | /* |
359 | * Sanitize the copy. If hardware misbehaves, out_len per the |
360 | * spec can actually be greater than the max allowed size (21 |
361 | * bits available but spec defined 1M max). The caller also may |
362 | * have requested less data than the hardware supplied even |
363 | * within spec. |
364 | */ |
365 | size_t n; |
366 | |
367 | n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len); |
368 | memcpy_fromio(mbox_cmd->payload_out, payload, n); |
369 | mbox_cmd->size_out = n; |
370 | } else { |
371 | mbox_cmd->size_out = 0; |
372 | } |
373 | |
374 | return 0; |
375 | } |
376 | |
377 | static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, |
378 | struct cxl_mbox_cmd *cmd) |
379 | { |
380 | int rc; |
381 | |
382 | mutex_lock_io(&cxl_mbox->mbox_mutex); |
383 | rc = __cxl_pci_mbox_send_cmd(cxl_mbox, mbox_cmd: cmd); |
384 | mutex_unlock(lock: &cxl_mbox->mbox_mutex); |
385 | |
386 | return rc; |
387 | } |
388 | |
389 | static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) |
390 | { |
391 | struct cxl_dev_state *cxlds = &mds->cxlds; |
392 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
393 | const int cap = readl(addr: cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); |
394 | struct device *dev = cxlds->dev; |
395 | unsigned long timeout; |
396 | int irq, msgnum; |
397 | u64 md_status; |
398 | u32 ctrl; |
399 | |
400 | timeout = jiffies + mbox_ready_timeout * HZ; |
401 | do { |
402 | md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
403 | if (md_status & CXLMDEV_MBOX_IF_READY) |
404 | break; |
405 | if (msleep_interruptible(msecs: 100)) |
406 | break; |
407 | } while (!time_after(jiffies, timeout)); |
408 | |
409 | if (!(md_status & CXLMDEV_MBOX_IF_READY)) { |
410 | cxl_err(dev, md_status, "timeout awaiting mailbox ready"); |
411 | return -ETIMEDOUT; |
412 | } |
413 | |
414 | /* |
415 | * A command may be in flight from a previous driver instance, |
416 | * think kexec, do one doorbell wait so that |
417 | * __cxl_pci_mbox_send_cmd() can assume that it is the only |
418 | * source for future doorbell busy events. |
419 | */ |
420 | if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { |
421 | cxl_err(dev, md_status, "timeout awaiting mailbox idle"); |
422 | return -ETIMEDOUT; |
423 | } |
424 | |
425 | cxl_mbox->mbox_send = cxl_pci_mbox_send; |
426 | cxl_mbox->payload_size = |
427 | 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); |
428 | |
429 | /* |
430 | * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register |
431 | * |
432 | * If the size is too small, mandatory commands will not work and so |
433 | * there's no point in going forward. If the size is too large, there's |
434 | * no harm is soft limiting it. |
435 | */ |
436 | cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M); |
437 | if (cxl_mbox->payload_size < 256) { |
438 | dev_err(dev, "Mailbox is too small (%zub)", |
439 | cxl_mbox->payload_size); |
440 | return -ENXIO; |
441 | } |
442 | |
443 | dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size); |
444 | |
445 | INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); |
446 | |
447 | /* background command interrupts are optional */ |
448 | if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) |
449 | return 0; |
450 | |
451 | msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); |
452 | irq = pci_irq_vector(to_pci_dev(cxlds->dev), nr: msgnum); |
453 | if (irq < 0) |
454 | return 0; |
455 | |
456 | if (cxl_request_irq(cxlds, irq, thread_fn: cxl_pci_mbox_irq)) |
457 | return 0; |
458 | |
459 | dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); |
460 | /* enable background command mbox irq support */ |
461 | ctrl = readl(addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
462 | ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; |
463 | writel(val: ctrl, addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
464 | |
465 | return 0; |
466 | } |
467 | |
468 | /* |
469 | * Assume that any RCIEP that emits the CXL memory expander class code |
470 | * is an RCD |
471 | */ |
472 | static bool is_cxl_restricted(struct pci_dev *pdev) |
473 | { |
474 | return pci_pcie_type(dev: pdev) == PCI_EXP_TYPE_RC_END; |
475 | } |
476 | |
477 | static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, |
478 | struct cxl_register_map *map, |
479 | struct cxl_dport *dport) |
480 | { |
481 | resource_size_t component_reg_phys; |
482 | |
483 | *map = (struct cxl_register_map) { |
484 | .host = &pdev->dev, |
485 | .resource = CXL_RESOURCE_NONE, |
486 | }; |
487 | |
488 | struct cxl_port *port __free(put_cxl_port) = |
489 | cxl_pci_find_port(pdev, dport: &dport); |
490 | if (!port) |
491 | return -EPROBE_DEFER; |
492 | |
493 | component_reg_phys = cxl_rcd_component_reg_phys(dev: &pdev->dev, dport); |
494 | if (component_reg_phys == CXL_RESOURCE_NONE) |
495 | return -ENXIO; |
496 | |
497 | map->resource = component_reg_phys; |
498 | map->reg_type = CXL_REGLOC_RBI_COMPONENT; |
499 | map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; |
500 | |
501 | return 0; |
502 | } |
503 | |
504 | static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, |
505 | struct cxl_register_map *map) |
506 | { |
507 | int rc; |
508 | |
509 | rc = cxl_find_regblock(pdev, type, map); |
510 | |
511 | /* |
512 | * If the Register Locator DVSEC does not exist, check if it |
513 | * is an RCH and try to extract the Component Registers from |
514 | * an RCRB. |
515 | */ |
516 | if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { |
517 | struct cxl_dport *dport; |
518 | struct cxl_port *port __free(put_cxl_port) = |
519 | cxl_pci_find_port(pdev, dport: &dport); |
520 | if (!port) |
521 | return -EPROBE_DEFER; |
522 | |
523 | rc = cxl_rcrb_get_comp_regs(pdev, map, dport); |
524 | if (rc) |
525 | return rc; |
526 | |
527 | rc = cxl_dport_map_rcd_linkcap(pdev, dport); |
528 | if (rc) |
529 | return rc; |
530 | |
531 | } else if (rc) { |
532 | return rc; |
533 | } |
534 | |
535 | return cxl_setup_regs(map); |
536 | } |
537 | |
538 | static int cxl_pci_ras_unmask(struct pci_dev *pdev) |
539 | { |
540 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
541 | void __iomem *addr; |
542 | u32 orig_val, val, mask; |
543 | u16 cap; |
544 | int rc; |
545 | |
546 | if (!cxlds->regs.ras) { |
547 | dev_dbg(&pdev->dev, "No RAS registers.\n"); |
548 | return 0; |
549 | } |
550 | |
551 | /* BIOS has PCIe AER error control */ |
552 | if (!pcie_aer_is_native(dev: pdev)) |
553 | return 0; |
554 | |
555 | rc = pcie_capability_read_word(dev: pdev, PCI_EXP_DEVCTL, val: &cap); |
556 | if (rc) |
557 | return rc; |
558 | |
559 | if (cap & PCI_EXP_DEVCTL_URRE) { |
560 | addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; |
561 | orig_val = readl(addr); |
562 | |
563 | mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | |
564 | CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; |
565 | val = orig_val & ~mask; |
566 | writel(val, addr); |
567 | dev_dbg(&pdev->dev, |
568 | "Uncorrectable RAS Errors Mask: %#x -> %#x\n", |
569 | orig_val, val); |
570 | } |
571 | |
572 | if (cap & PCI_EXP_DEVCTL_CERE) { |
573 | addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; |
574 | orig_val = readl(addr); |
575 | val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; |
576 | writel(val, addr); |
577 | dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", |
578 | orig_val, val); |
579 | } |
580 | |
581 | return 0; |
582 | } |
583 | |
584 | static void free_event_buf(void *buf) |
585 | { |
586 | kvfree(addr: buf); |
587 | } |
588 | |
589 | /* |
590 | * There is a single buffer for reading event logs from the mailbox. All logs |
591 | * share this buffer protected by the mds->event_log_lock. |
592 | */ |
593 | static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) |
594 | { |
595 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
596 | struct cxl_get_event_payload *buf; |
597 | |
598 | buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); |
599 | if (!buf) |
600 | return -ENOMEM; |
601 | mds->event.buf = buf; |
602 | |
603 | return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); |
604 | } |
605 | |
606 | static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) |
607 | { |
608 | int nvecs; |
609 | |
610 | /* |
611 | * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must |
612 | * not generate INTx messages if that function participates in |
613 | * CXL.cache or CXL.mem. |
614 | * |
615 | * Additionally pci_alloc_irq_vectors() handles calling |
616 | * pci_free_irq_vectors() automatically despite not being called |
617 | * pcim_*. See pci_setup_msi_context(). |
618 | */ |
619 | nvecs = pci_alloc_irq_vectors(dev: pdev, min_vecs: 1, CXL_PCI_DEFAULT_MAX_VECTORS, |
620 | PCI_IRQ_MSIX | PCI_IRQ_MSI); |
621 | if (nvecs < 1) { |
622 | dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); |
623 | return false; |
624 | } |
625 | return true; |
626 | } |
627 | |
628 | static irqreturn_t cxl_event_thread(int irq, void *id) |
629 | { |
630 | struct cxl_dev_id *dev_id = id; |
631 | struct cxl_dev_state *cxlds = dev_id->cxlds; |
632 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
633 | u32 status; |
634 | |
635 | do { |
636 | /* |
637 | * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; |
638 | * ignore the reserved upper 32 bits |
639 | */ |
640 | status = readl(addr: cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); |
641 | /* Ignore logs unknown to the driver */ |
642 | status &= CXLDEV_EVENT_STATUS_ALL; |
643 | if (!status) |
644 | break; |
645 | cxl_mem_get_event_records(mds, status); |
646 | cond_resched(); |
647 | } while (status); |
648 | |
649 | return IRQ_HANDLED; |
650 | } |
651 | |
652 | static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) |
653 | { |
654 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
655 | int irq; |
656 | |
657 | if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) |
658 | return -ENXIO; |
659 | |
660 | irq = pci_irq_vector(dev: pdev, |
661 | FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); |
662 | if (irq < 0) |
663 | return irq; |
664 | |
665 | return cxl_request_irq(cxlds, irq, thread_fn: cxl_event_thread); |
666 | } |
667 | |
668 | static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, |
669 | struct cxl_event_interrupt_policy *policy) |
670 | { |
671 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
672 | struct cxl_mbox_cmd mbox_cmd = { |
673 | .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, |
674 | .payload_out = policy, |
675 | .size_out = sizeof(*policy), |
676 | }; |
677 | int rc; |
678 | |
679 | rc = cxl_internal_send_cmd(cxl_mbox, cmd: &mbox_cmd); |
680 | if (rc < 0) |
681 | dev_err(mds->cxlds.dev, |
682 | "Failed to get event interrupt policy : %d", rc); |
683 | |
684 | return rc; |
685 | } |
686 | |
687 | static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, |
688 | struct cxl_event_interrupt_policy *policy) |
689 | { |
690 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
691 | struct cxl_mbox_cmd mbox_cmd; |
692 | int rc; |
693 | |
694 | *policy = (struct cxl_event_interrupt_policy) { |
695 | .info_settings = CXL_INT_MSI_MSIX, |
696 | .warn_settings = CXL_INT_MSI_MSIX, |
697 | .failure_settings = CXL_INT_MSI_MSIX, |
698 | .fatal_settings = CXL_INT_MSI_MSIX, |
699 | }; |
700 | |
701 | mbox_cmd = (struct cxl_mbox_cmd) { |
702 | .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, |
703 | .payload_in = policy, |
704 | .size_in = sizeof(*policy), |
705 | }; |
706 | |
707 | rc = cxl_internal_send_cmd(cxl_mbox, cmd: &mbox_cmd); |
708 | if (rc < 0) { |
709 | dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", |
710 | rc); |
711 | return rc; |
712 | } |
713 | |
714 | /* Retrieve final interrupt settings */ |
715 | return cxl_event_get_int_policy(mds, policy); |
716 | } |
717 | |
718 | static int cxl_event_irqsetup(struct cxl_memdev_state *mds) |
719 | { |
720 | struct cxl_dev_state *cxlds = &mds->cxlds; |
721 | struct cxl_event_interrupt_policy policy; |
722 | int rc; |
723 | |
724 | rc = cxl_event_config_msgnums(mds, policy: &policy); |
725 | if (rc) |
726 | return rc; |
727 | |
728 | rc = cxl_event_req_irq(cxlds, setting: policy.info_settings); |
729 | if (rc) { |
730 | dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); |
731 | return rc; |
732 | } |
733 | |
734 | rc = cxl_event_req_irq(cxlds, setting: policy.warn_settings); |
735 | if (rc) { |
736 | dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); |
737 | return rc; |
738 | } |
739 | |
740 | rc = cxl_event_req_irq(cxlds, setting: policy.failure_settings); |
741 | if (rc) { |
742 | dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); |
743 | return rc; |
744 | } |
745 | |
746 | rc = cxl_event_req_irq(cxlds, setting: policy.fatal_settings); |
747 | if (rc) { |
748 | dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); |
749 | return rc; |
750 | } |
751 | |
752 | return 0; |
753 | } |
754 | |
755 | static bool cxl_event_int_is_fw(u8 setting) |
756 | { |
757 | u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); |
758 | |
759 | return mode == CXL_INT_FW; |
760 | } |
761 | |
762 | static int cxl_event_config(struct pci_host_bridge *host_bridge, |
763 | struct cxl_memdev_state *mds, bool irq_avail) |
764 | { |
765 | struct cxl_event_interrupt_policy policy; |
766 | int rc; |
767 | |
768 | /* |
769 | * When BIOS maintains CXL error reporting control, it will process |
770 | * event records. Only one agent can do so. |
771 | */ |
772 | if (!host_bridge->native_cxl_error) |
773 | return 0; |
774 | |
775 | if (!irq_avail) { |
776 | dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); |
777 | return 0; |
778 | } |
779 | |
780 | rc = cxl_event_get_int_policy(mds, policy: &policy); |
781 | if (rc) |
782 | return rc; |
783 | |
784 | if (cxl_event_int_is_fw(setting: policy.info_settings) || |
785 | cxl_event_int_is_fw(setting: policy.warn_settings) || |
786 | cxl_event_int_is_fw(setting: policy.failure_settings) || |
787 | cxl_event_int_is_fw(setting: policy.fatal_settings)) { |
788 | dev_err(mds->cxlds.dev, |
789 | "FW still in control of Event Logs despite _OSC settings\n"); |
790 | return -EBUSY; |
791 | } |
792 | |
793 | rc = cxl_mem_alloc_event_buf(mds); |
794 | if (rc) |
795 | return rc; |
796 | |
797 | rc = cxl_event_irqsetup(mds); |
798 | if (rc) |
799 | return rc; |
800 | |
801 | cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); |
802 | |
803 | return 0; |
804 | } |
805 | |
806 | static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) |
807 | { |
808 | int rc; |
809 | |
810 | /* |
811 | * Fail the init if there's no mailbox. For a type3 this is out of spec. |
812 | */ |
813 | if (!cxlds->reg_map.device_map.mbox.valid) |
814 | return -ENODEV; |
815 | |
816 | rc = cxl_mailbox_init(cxl_mbox: &cxlds->cxl_mbox, host: cxlds->dev); |
817 | if (rc) |
818 | return rc; |
819 | |
820 | return 0; |
821 | } |
822 | |
823 | static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width) |
824 | { |
825 | struct cxl_dev_state *cxlds = dev_get_drvdata(dev); |
826 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
827 | struct device *root_dev; |
828 | struct cxl_dport *dport; |
829 | struct cxl_port *root __free(put_cxl_port) = |
830 | cxl_mem_find_port(cxlmd, dport: &dport); |
831 | |
832 | if (!root) |
833 | return -ENXIO; |
834 | |
835 | root_dev = root->uport_dev; |
836 | if (!root_dev) |
837 | return -ENXIO; |
838 | |
839 | if (!dport->regs.rcd_pcie_cap) |
840 | return -ENXIO; |
841 | |
842 | guard(device)(T: root_dev); |
843 | if (!root_dev->driver) |
844 | return -ENXIO; |
845 | |
846 | switch (width) { |
847 | case 2: |
848 | return sysfs_emit(buf, fmt: "%#x\n", |
849 | readw(addr: dport->regs.rcd_pcie_cap + offset)); |
850 | case 4: |
851 | return sysfs_emit(buf, fmt: "%#x\n", |
852 | readl(addr: dport->regs.rcd_pcie_cap + offset)); |
853 | default: |
854 | return -EINVAL; |
855 | } |
856 | } |
857 | |
858 | static ssize_t rcd_link_cap_show(struct device *dev, |
859 | struct device_attribute *attr, char *buf) |
860 | { |
861 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, width: sizeof(u32)); |
862 | } |
863 | static DEVICE_ATTR_RO(rcd_link_cap); |
864 | |
865 | static ssize_t rcd_link_ctrl_show(struct device *dev, |
866 | struct device_attribute *attr, char *buf) |
867 | { |
868 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, width: sizeof(u16)); |
869 | } |
870 | static DEVICE_ATTR_RO(rcd_link_ctrl); |
871 | |
872 | static ssize_t rcd_link_status_show(struct device *dev, |
873 | struct device_attribute *attr, char *buf) |
874 | { |
875 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, width: sizeof(u16)); |
876 | } |
877 | static DEVICE_ATTR_RO(rcd_link_status); |
878 | |
879 | static struct attribute *cxl_rcd_attrs[] = { |
880 | &dev_attr_rcd_link_cap.attr, |
881 | &dev_attr_rcd_link_ctrl.attr, |
882 | &dev_attr_rcd_link_status.attr, |
883 | NULL |
884 | }; |
885 | |
886 | static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n) |
887 | { |
888 | struct device *dev = kobj_to_dev(kobj); |
889 | struct pci_dev *pdev = to_pci_dev(dev); |
890 | |
891 | if (is_cxl_restricted(pdev)) |
892 | return a->mode; |
893 | |
894 | return 0; |
895 | } |
896 | |
897 | static struct attribute_group cxl_rcd_group = { |
898 | .attrs = cxl_rcd_attrs, |
899 | .is_visible = cxl_rcd_visible, |
900 | }; |
901 | __ATTRIBUTE_GROUPS(cxl_rcd); |
902 | |
903 | static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
904 | { |
905 | struct pci_host_bridge *host_bridge = pci_find_host_bridge(bus: pdev->bus); |
906 | struct cxl_dpa_info range_info = { 0 }; |
907 | struct cxl_memdev_state *mds; |
908 | struct cxl_dev_state *cxlds; |
909 | struct cxl_register_map map; |
910 | struct cxl_memdev *cxlmd; |
911 | int rc, pmu_count; |
912 | unsigned int i; |
913 | bool irq_avail; |
914 | |
915 | /* |
916 | * Double check the anonymous union trickery in struct cxl_regs |
917 | * FIXME switch to struct_group() |
918 | */ |
919 | BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != |
920 | offsetof(struct cxl_regs, device_regs.memdev)); |
921 | |
922 | rc = pcim_enable_device(pdev); |
923 | if (rc) |
924 | return rc; |
925 | pci_set_master(dev: pdev); |
926 | |
927 | mds = cxl_memdev_state_create(dev: &pdev->dev); |
928 | if (IS_ERR(ptr: mds)) |
929 | return PTR_ERR(ptr: mds); |
930 | cxlds = &mds->cxlds; |
931 | pci_set_drvdata(pdev, data: cxlds); |
932 | |
933 | cxlds->rcd = is_cxl_restricted(pdev); |
934 | cxlds->serial = pci_get_dsn(dev: pdev); |
935 | cxlds->cxl_dvsec = pci_find_dvsec_capability( |
936 | dev: pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); |
937 | if (!cxlds->cxl_dvsec) |
938 | dev_warn(&pdev->dev, |
939 | "Device DVSEC not present, skip CXL.mem init\n"); |
940 | |
941 | rc = cxl_pci_setup_regs(pdev, type: CXL_REGLOC_RBI_MEMDEV, map: &map); |
942 | if (rc) |
943 | return rc; |
944 | |
945 | rc = cxl_map_device_regs(map: &map, regs: &cxlds->regs.device_regs); |
946 | if (rc) |
947 | return rc; |
948 | |
949 | /* |
950 | * If the component registers can't be found, the cxl_pci driver may |
951 | * still be useful for management functions so don't return an error. |
952 | */ |
953 | rc = cxl_pci_setup_regs(pdev, type: CXL_REGLOC_RBI_COMPONENT, |
954 | map: &cxlds->reg_map); |
955 | if (rc) |
956 | dev_warn(&pdev->dev, "No component registers (%d)\n", rc); |
957 | else if (!cxlds->reg_map.component_map.ras.valid) |
958 | dev_dbg(&pdev->dev, "RAS registers not found\n"); |
959 | |
960 | rc = cxl_map_component_regs(map: &cxlds->reg_map, regs: &cxlds->regs.component, |
961 | BIT(CXL_CM_CAP_CAP_ID_RAS)); |
962 | if (rc) |
963 | dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); |
964 | |
965 | rc = cxl_pci_type3_init_mailbox(cxlds); |
966 | if (rc) |
967 | return rc; |
968 | |
969 | rc = cxl_await_media_ready(cxlds); |
970 | if (rc == 0) |
971 | cxlds->media_ready = true; |
972 | else |
973 | dev_warn(&pdev->dev, "Media not active (%d)\n", rc); |
974 | |
975 | irq_avail = cxl_alloc_irq_vectors(pdev); |
976 | |
977 | rc = cxl_pci_setup_mailbox(mds, irq_avail); |
978 | if (rc) |
979 | return rc; |
980 | |
981 | rc = cxl_enumerate_cmds(mds); |
982 | if (rc) |
983 | return rc; |
984 | |
985 | rc = cxl_set_timestamp(mds); |
986 | if (rc) |
987 | return rc; |
988 | |
989 | rc = cxl_poison_state_init(mds); |
990 | if (rc) |
991 | return rc; |
992 | |
993 | rc = cxl_dev_state_identify(mds); |
994 | if (rc) |
995 | return rc; |
996 | |
997 | rc = cxl_mem_dpa_fetch(mds, info: &range_info); |
998 | if (rc) |
999 | return rc; |
1000 | |
1001 | rc = cxl_dpa_setup(cxlds, info: &range_info); |
1002 | if (rc) |
1003 | return rc; |
1004 | |
1005 | rc = devm_cxl_setup_features(cxlds); |
1006 | if (rc) |
1007 | dev_dbg(&pdev->dev, "No CXL Features discovered\n"); |
1008 | |
1009 | cxlmd = devm_cxl_add_memdev(host: &pdev->dev, cxlds); |
1010 | if (IS_ERR(ptr: cxlmd)) |
1011 | return PTR_ERR(ptr: cxlmd); |
1012 | |
1013 | rc = devm_cxl_setup_fw_upload(host: &pdev->dev, mds); |
1014 | if (rc) |
1015 | return rc; |
1016 | |
1017 | rc = devm_cxl_sanitize_setup_notifier(host: &pdev->dev, cxlmd); |
1018 | if (rc) |
1019 | return rc; |
1020 | |
1021 | rc = devm_cxl_setup_fwctl(host: &pdev->dev, cxlmd); |
1022 | if (rc) |
1023 | dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); |
1024 | |
1025 | pmu_count = cxl_count_regblock(pdev, type: CXL_REGLOC_RBI_PMU); |
1026 | if (pmu_count < 0) |
1027 | return pmu_count; |
1028 | |
1029 | for (i = 0; i < pmu_count; i++) { |
1030 | struct cxl_pmu_regs pmu_regs; |
1031 | |
1032 | rc = cxl_find_regblock_instance(pdev, type: CXL_REGLOC_RBI_PMU, map: &map, index: i); |
1033 | if (rc) { |
1034 | dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); |
1035 | break; |
1036 | } |
1037 | |
1038 | rc = cxl_map_pmu_regs(map: &map, regs: &pmu_regs); |
1039 | if (rc) { |
1040 | dev_dbg(&pdev->dev, "Could not map PMU regs\n"); |
1041 | break; |
1042 | } |
1043 | |
1044 | rc = devm_cxl_pmu_add(parent: cxlds->dev, regs: &pmu_regs, assoc_id: cxlmd->id, idx: i, type: CXL_PMU_MEMDEV); |
1045 | if (rc) { |
1046 | dev_dbg(&pdev->dev, "Could not add PMU instance\n"); |
1047 | break; |
1048 | } |
1049 | } |
1050 | |
1051 | rc = cxl_event_config(host_bridge, mds, irq_avail); |
1052 | if (rc) |
1053 | return rc; |
1054 | |
1055 | if (cxl_pci_ras_unmask(pdev)) |
1056 | dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); |
1057 | |
1058 | pci_save_state(dev: pdev); |
1059 | |
1060 | return rc; |
1061 | } |
1062 | |
1063 | static const struct pci_device_id cxl_mem_pci_tbl[] = { |
1064 | /* PCI class code for CXL.mem Type-3 Devices */ |
1065 | { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, |
1066 | { /* terminate list */ }, |
1067 | }; |
1068 | MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); |
1069 | |
1070 | static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) |
1071 | { |
1072 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
1073 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
1074 | struct device *dev = &cxlmd->dev; |
1075 | |
1076 | dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", |
1077 | dev_name(dev)); |
1078 | pci_restore_state(dev: pdev); |
1079 | if (device_attach(dev) <= 0) |
1080 | return PCI_ERS_RESULT_DISCONNECT; |
1081 | return PCI_ERS_RESULT_RECOVERED; |
1082 | } |
1083 | |
1084 | static void cxl_error_resume(struct pci_dev *pdev) |
1085 | { |
1086 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
1087 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
1088 | struct device *dev = &cxlmd->dev; |
1089 | |
1090 | dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), |
1091 | dev->driver ? "successful": "failed"); |
1092 | } |
1093 | |
1094 | static void cxl_reset_done(struct pci_dev *pdev) |
1095 | { |
1096 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
1097 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
1098 | struct device *dev = &pdev->dev; |
1099 | |
1100 | /* |
1101 | * FLR does not expect to touch the HDM decoders and related |
1102 | * registers. SBR, however, will wipe all device configurations. |
1103 | * Issue a warning if there was an active decoder before the reset |
1104 | * that no longer exists. |
1105 | */ |
1106 | guard(device)(T: &cxlmd->dev); |
1107 | if (cxlmd->endpoint && |
1108 | cxl_endpoint_decoder_reset_detected(port: cxlmd->endpoint)) { |
1109 | dev_crit(dev, "SBR happened without memory regions removal.\n"); |
1110 | dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); |
1111 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); |
1112 | } |
1113 | } |
1114 | |
1115 | static const struct pci_error_handlers cxl_error_handlers = { |
1116 | .error_detected = cxl_error_detected, |
1117 | .slot_reset = cxl_slot_reset, |
1118 | .resume = cxl_error_resume, |
1119 | .cor_error_detected = cxl_cor_error_detected, |
1120 | .reset_done = cxl_reset_done, |
1121 | }; |
1122 | |
1123 | static struct pci_driver cxl_pci_driver = { |
1124 | .name = KBUILD_MODNAME, |
1125 | .id_table = cxl_mem_pci_tbl, |
1126 | .probe = cxl_pci_probe, |
1127 | .err_handler = &cxl_error_handlers, |
1128 | .dev_groups = cxl_rcd_groups, |
1129 | .driver = { |
1130 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, |
1131 | }, |
1132 | }; |
1133 | |
1134 | #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) |
1135 | static void cxl_handle_cper_event(enum cxl_event_type ev_type, |
1136 | struct cxl_cper_event_rec *rec) |
1137 | { |
1138 | struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; |
1139 | struct pci_dev *pdev __free(pci_dev_put) = NULL; |
1140 | enum cxl_event_log_type log_type; |
1141 | struct cxl_dev_state *cxlds; |
1142 | unsigned int devfn; |
1143 | u32 hdr_flags; |
1144 | |
1145 | pr_debug("CPER event %d for device %u:%u:%u.%u\n", ev_type, |
1146 | device_id->segment_num, device_id->bus_num, |
1147 | device_id->device_num, device_id->func_num); |
1148 | |
1149 | devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); |
1150 | pdev = pci_get_domain_bus_and_slot(domain: device_id->segment_num, |
1151 | bus: device_id->bus_num, devfn); |
1152 | if (!pdev) |
1153 | return; |
1154 | |
1155 | guard(device)(T: &pdev->dev); |
1156 | if (pdev->driver != &cxl_pci_driver) |
1157 | return; |
1158 | |
1159 | cxlds = pci_get_drvdata(pdev); |
1160 | if (!cxlds) |
1161 | return; |
1162 | |
1163 | /* Fabricate a log type */ |
1164 | hdr_flags = get_unaligned_le24(p: rec->event.generic.hdr.flags); |
1165 | log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); |
1166 | |
1167 | cxl_event_trace_record(cxlmd: cxlds->cxlmd, type: log_type, event_type: ev_type, |
1168 | uuid: &uuid_null, evt: &rec->event); |
1169 | } |
1170 | |
1171 | static void cxl_cper_work_fn(struct work_struct *work) |
1172 | { |
1173 | struct cxl_cper_work_data wd; |
1174 | |
1175 | while (cxl_cper_kfifo_get(wd: &wd)) |
1176 | cxl_handle_cper_event(ev_type: wd.event_type, rec: &wd.rec); |
1177 | } |
1178 | static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); |
1179 | |
1180 | static int __init cxl_pci_driver_init(void) |
1181 | { |
1182 | int rc; |
1183 | |
1184 | rc = pci_register_driver(&cxl_pci_driver); |
1185 | if (rc) |
1186 | return rc; |
1187 | |
1188 | rc = cxl_cper_register_work(work: &cxl_cper_work); |
1189 | if (rc) |
1190 | pci_unregister_driver(dev: &cxl_pci_driver); |
1191 | |
1192 | return rc; |
1193 | } |
1194 | |
1195 | static void __exit cxl_pci_driver_exit(void) |
1196 | { |
1197 | cxl_cper_unregister_work(work: &cxl_cper_work); |
1198 | cancel_work_sync(work: &cxl_cper_work); |
1199 | pci_unregister_driver(dev: &cxl_pci_driver); |
1200 | } |
1201 | |
1202 | module_init(cxl_pci_driver_init); |
1203 | module_exit(cxl_pci_driver_exit); |
1204 | MODULE_DESCRIPTION("CXL: PCI manageability"); |
1205 | MODULE_LICENSE("GPL v2"); |
1206 | MODULE_IMPORT_NS("CXL"); |
1207 |
Definitions
- mbox_ready_timeout
- cxl_pci_mbox_wait_for_doorbell
- cxl_dev_id
- cxl_request_irq
- cxl_mbox_background_complete
- cxl_pci_mbox_irq
- cxl_mbox_sanitize_work
- __cxl_pci_mbox_send_cmd
- cxl_pci_mbox_send
- cxl_pci_setup_mailbox
- is_cxl_restricted
- cxl_rcrb_get_comp_regs
- cxl_pci_setup_regs
- cxl_pci_ras_unmask
- free_event_buf
- cxl_mem_alloc_event_buf
- cxl_alloc_irq_vectors
- cxl_event_thread
- cxl_event_req_irq
- cxl_event_get_int_policy
- cxl_event_config_msgnums
- cxl_event_irqsetup
- cxl_event_int_is_fw
- cxl_event_config
- cxl_pci_type3_init_mailbox
- rcd_pcie_cap_emit
- rcd_link_cap_show
- rcd_link_ctrl_show
- rcd_link_status_show
- cxl_rcd_attrs
- cxl_rcd_visible
- cxl_rcd_group
- cxl_pci_probe
- cxl_mem_pci_tbl
- cxl_slot_reset
- cxl_error_resume
- cxl_reset_done
- cxl_error_handlers
- cxl_pci_driver
- cxl_handle_cper_event
- cxl_cper_work_fn
- cxl_cper_work
- cxl_pci_driver_init
Improve your Profiling and Debugging skills
Find out more