| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ |
| 3 | #include <linux/unaligned.h> |
| 4 | #include <linux/io-64-nonatomic-lo-hi.h> |
| 5 | #include <linux/moduleparam.h> |
| 6 | #include <linux/module.h> |
| 7 | #include <linux/delay.h> |
| 8 | #include <linux/sizes.h> |
| 9 | #include <linux/mutex.h> |
| 10 | #include <linux/list.h> |
| 11 | #include <linux/pci.h> |
| 12 | #include <linux/aer.h> |
| 13 | #include <linux/io.h> |
| 14 | #include <cxl/mailbox.h> |
| 15 | #include "cxlmem.h" |
| 16 | #include "cxlpci.h" |
| 17 | #include "cxl.h" |
| 18 | #include "pmu.h" |
| 19 | |
| 20 | /** |
| 21 | * DOC: cxl pci |
| 22 | * |
| 23 | * This implements the PCI exclusive functionality for a CXL device as it is |
| 24 | * defined by the Compute Express Link specification. CXL devices may surface |
| 25 | * certain functionality even if it isn't CXL enabled. While this driver is |
| 26 | * focused around the PCI specific aspects of a CXL device, it binds to the |
| 27 | * specific CXL memory device class code, and therefore the implementation of |
| 28 | * cxl_pci is focused around CXL memory devices. |
| 29 | * |
| 30 | * The driver has several responsibilities, mainly: |
| 31 | * - Create the memX device and register on the CXL bus. |
| 32 | * - Enumerate device's register interface and map them. |
| 33 | * - Registers nvdimm bridge device with cxl_core. |
| 34 | * - Registers a CXL mailbox with cxl_core. |
| 35 | */ |
| 36 | |
| 37 | #define cxl_doorbell_busy(cxlds) \ |
| 38 | (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ |
| 39 | CXLDEV_MBOX_CTRL_DOORBELL) |
| 40 | |
| 41 | /* CXL 2.0 - 8.2.8.4 */ |
| 42 | #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) |
| 43 | |
| 44 | /* |
| 45 | * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to |
| 46 | * dictate how long to wait for the mailbox to become ready. The new |
| 47 | * field allows the device to tell software the amount of time to wait |
| 48 | * before mailbox ready. This field per the spec theoretically allows |
| 49 | * for up to 255 seconds. 255 seconds is unreasonably long, its longer |
| 50 | * than the maximum SATA port link recovery wait. Default to 60 seconds |
| 51 | * until someone builds a CXL device that needs more time in practice. |
| 52 | */ |
| 53 | static unsigned short mbox_ready_timeout = 60; |
| 54 | module_param(mbox_ready_timeout, ushort, 0644); |
| 55 | MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready" ); |
| 56 | |
| 57 | static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) |
| 58 | { |
| 59 | const unsigned long start = jiffies; |
| 60 | unsigned long end = start; |
| 61 | |
| 62 | while (cxl_doorbell_busy(cxlds)) { |
| 63 | end = jiffies; |
| 64 | |
| 65 | if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { |
| 66 | /* Check again in case preempted before timeout test */ |
| 67 | if (!cxl_doorbell_busy(cxlds)) |
| 68 | break; |
| 69 | return -ETIMEDOUT; |
| 70 | } |
| 71 | cpu_relax(); |
| 72 | } |
| 73 | |
| 74 | dev_dbg(cxlds->dev, "Doorbell wait took %dms" , |
| 75 | jiffies_to_msecs(end) - jiffies_to_msecs(start)); |
| 76 | return 0; |
| 77 | } |
| 78 | |
| 79 | #define cxl_err(dev, status, msg) \ |
| 80 | dev_err_ratelimited(dev, msg ", device state %s%s\n", \ |
| 81 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ |
| 82 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") |
| 83 | |
| 84 | #define cxl_cmd_err(dev, cmd, status, msg) \ |
| 85 | dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ |
| 86 | (cmd)->opcode, \ |
| 87 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ |
| 88 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") |
| 89 | |
| 90 | /* |
| 91 | * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique |
| 92 | * wrapper object for each irq within the same cxlds. |
| 93 | */ |
| 94 | struct cxl_dev_id { |
| 95 | struct cxl_dev_state *cxlds; |
| 96 | }; |
| 97 | |
| 98 | static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, |
| 99 | irq_handler_t thread_fn) |
| 100 | { |
| 101 | struct device *dev = cxlds->dev; |
| 102 | struct cxl_dev_id *dev_id; |
| 103 | |
| 104 | dev_id = devm_kzalloc(dev, size: sizeof(*dev_id), GFP_KERNEL); |
| 105 | if (!dev_id) |
| 106 | return -ENOMEM; |
| 107 | dev_id->cxlds = cxlds; |
| 108 | |
| 109 | return devm_request_threaded_irq(dev, irq, NULL, thread_fn, |
| 110 | IRQF_SHARED | IRQF_ONESHOT, NULL, |
| 111 | dev_id); |
| 112 | } |
| 113 | |
| 114 | static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) |
| 115 | { |
| 116 | u64 reg; |
| 117 | |
| 118 | reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
| 119 | return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; |
| 120 | } |
| 121 | |
| 122 | static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) |
| 123 | { |
| 124 | u64 reg; |
| 125 | u16 opcode; |
| 126 | struct cxl_dev_id *dev_id = id; |
| 127 | struct cxl_dev_state *cxlds = dev_id->cxlds; |
| 128 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
| 129 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
| 130 | |
| 131 | if (!cxl_mbox_background_complete(cxlds)) |
| 132 | return IRQ_NONE; |
| 133 | |
| 134 | reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
| 135 | opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); |
| 136 | if (opcode == CXL_MBOX_OP_SANITIZE) { |
| 137 | mutex_lock(&cxl_mbox->mbox_mutex); |
| 138 | if (mds->security.sanitize_node) |
| 139 | mod_delayed_work(wq: system_percpu_wq, dwork: &mds->security.poll_dwork, delay: 0); |
| 140 | mutex_unlock(lock: &cxl_mbox->mbox_mutex); |
| 141 | } else { |
| 142 | /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ |
| 143 | rcuwait_wake_up(w: &cxl_mbox->mbox_wait); |
| 144 | } |
| 145 | |
| 146 | return IRQ_HANDLED; |
| 147 | } |
| 148 | |
| 149 | /* |
| 150 | * Sanitization operation polling mode. |
| 151 | */ |
| 152 | static void cxl_mbox_sanitize_work(struct work_struct *work) |
| 153 | { |
| 154 | struct cxl_memdev_state *mds = |
| 155 | container_of(work, typeof(*mds), security.poll_dwork.work); |
| 156 | struct cxl_dev_state *cxlds = &mds->cxlds; |
| 157 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
| 158 | |
| 159 | mutex_lock(&cxl_mbox->mbox_mutex); |
| 160 | if (cxl_mbox_background_complete(cxlds)) { |
| 161 | mds->security.poll_tmo_secs = 0; |
| 162 | if (mds->security.sanitize_node) |
| 163 | sysfs_notify_dirent(kn: mds->security.sanitize_node); |
| 164 | mds->security.sanitize_active = false; |
| 165 | |
| 166 | dev_dbg(cxlds->dev, "Sanitization operation ended\n" ); |
| 167 | } else { |
| 168 | int timeout = mds->security.poll_tmo_secs + 10; |
| 169 | |
| 170 | mds->security.poll_tmo_secs = min(15 * 60, timeout); |
| 171 | schedule_delayed_work(dwork: &mds->security.poll_dwork, delay: timeout * HZ); |
| 172 | } |
| 173 | mutex_unlock(lock: &cxl_mbox->mbox_mutex); |
| 174 | } |
| 175 | |
| 176 | /** |
| 177 | * __cxl_pci_mbox_send_cmd() - Execute a mailbox command |
| 178 | * @cxl_mbox: CXL mailbox context |
| 179 | * @mbox_cmd: Command to send to the memory device. |
| 180 | * |
| 181 | * Context: Any context. Expects mbox_mutex to be held. |
| 182 | * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. |
| 183 | * Caller should check the return code in @mbox_cmd to make sure it |
| 184 | * succeeded. |
| 185 | * |
| 186 | * This is a generic form of the CXL mailbox send command thus only using the |
| 187 | * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory |
| 188 | * devices, and perhaps other types of CXL devices may have further information |
| 189 | * available upon error conditions. Driver facilities wishing to send mailbox |
| 190 | * commands should use the wrapper command. |
| 191 | * |
| 192 | * The CXL spec allows for up to two mailboxes. The intention is for the primary |
| 193 | * mailbox to be OS controlled and the secondary mailbox to be used by system |
| 194 | * firmware. This allows the OS and firmware to communicate with the device and |
| 195 | * not need to coordinate with each other. The driver only uses the primary |
| 196 | * mailbox. |
| 197 | */ |
| 198 | static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox, |
| 199 | struct cxl_mbox_cmd *mbox_cmd) |
| 200 | { |
| 201 | struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox); |
| 202 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
| 203 | void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; |
| 204 | struct device *dev = cxlds->dev; |
| 205 | u64 cmd_reg, status_reg; |
| 206 | size_t out_len; |
| 207 | int rc; |
| 208 | |
| 209 | lockdep_assert_held(&cxl_mbox->mbox_mutex); |
| 210 | |
| 211 | /* |
| 212 | * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. |
| 213 | * 1. Caller reads MB Control Register to verify doorbell is clear |
| 214 | * 2. Caller writes Command Register |
| 215 | * 3. Caller writes Command Payload Registers if input payload is non-empty |
| 216 | * 4. Caller writes MB Control Register to set doorbell |
| 217 | * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured |
| 218 | * 6. Caller reads MB Status Register to fetch Return code |
| 219 | * 7. If command successful, Caller reads Command Register to get Payload Length |
| 220 | * 8. If output payload is non-empty, host reads Command Payload Registers |
| 221 | * |
| 222 | * Hardware is free to do whatever it wants before the doorbell is rung, |
| 223 | * and isn't allowed to change anything after it clears the doorbell. As |
| 224 | * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can |
| 225 | * also happen in any order (though some orders might not make sense). |
| 226 | */ |
| 227 | |
| 228 | /* #1 */ |
| 229 | if (cxl_doorbell_busy(cxlds)) { |
| 230 | u64 md_status = |
| 231 | readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
| 232 | |
| 233 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, |
| 234 | "mailbox queue busy" ); |
| 235 | return -EBUSY; |
| 236 | } |
| 237 | |
| 238 | /* |
| 239 | * With sanitize polling, hardware might be done and the poller still |
| 240 | * not be in sync. Ensure no new command comes in until so. Keep the |
| 241 | * hardware semantics and only allow device health status. |
| 242 | */ |
| 243 | if (mds->security.poll_tmo_secs > 0) { |
| 244 | if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) |
| 245 | return -EBUSY; |
| 246 | } |
| 247 | |
| 248 | cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, |
| 249 | mbox_cmd->opcode); |
| 250 | if (mbox_cmd->size_in) { |
| 251 | if (WARN_ON(!mbox_cmd->payload_in)) |
| 252 | return -EINVAL; |
| 253 | |
| 254 | cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, |
| 255 | mbox_cmd->size_in); |
| 256 | memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); |
| 257 | } |
| 258 | |
| 259 | /* #2, #3 */ |
| 260 | writeq(val: cmd_reg, addr: cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
| 261 | |
| 262 | /* #4 */ |
| 263 | dev_dbg(dev, "Sending command: 0x%04x\n" , mbox_cmd->opcode); |
| 264 | writel(CXLDEV_MBOX_CTRL_DOORBELL, |
| 265 | addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
| 266 | |
| 267 | /* #5 */ |
| 268 | rc = cxl_pci_mbox_wait_for_doorbell(cxlds); |
| 269 | if (rc == -ETIMEDOUT) { |
| 270 | u64 md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
| 271 | |
| 272 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout" ); |
| 273 | return rc; |
| 274 | } |
| 275 | |
| 276 | /* #6 */ |
| 277 | status_reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); |
| 278 | mbox_cmd->return_code = |
| 279 | FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); |
| 280 | |
| 281 | /* |
| 282 | * Handle the background command in a synchronous manner. |
| 283 | * |
| 284 | * All other mailbox commands will serialize/queue on the mbox_mutex, |
| 285 | * which we currently hold. Furthermore this also guarantees that |
| 286 | * cxl_mbox_background_complete() checks are safe amongst each other, |
| 287 | * in that no new bg operation can occur in between. |
| 288 | * |
| 289 | * Background operations are timesliced in accordance with the nature |
| 290 | * of the command. In the event of timeout, the mailbox state is |
| 291 | * indeterminate until the next successful command submission and the |
| 292 | * driver can get back in sync with the hardware state. |
| 293 | */ |
| 294 | if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { |
| 295 | u64 bg_status_reg; |
| 296 | int i, timeout; |
| 297 | |
| 298 | /* |
| 299 | * Sanitization is a special case which monopolizes the device |
| 300 | * and cannot be timesliced. Handle asynchronously instead, |
| 301 | * and allow userspace to poll(2) for completion. |
| 302 | */ |
| 303 | if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { |
| 304 | if (mds->security.sanitize_active) |
| 305 | return -EBUSY; |
| 306 | |
| 307 | /* give first timeout a second */ |
| 308 | timeout = 1; |
| 309 | mds->security.poll_tmo_secs = timeout; |
| 310 | mds->security.sanitize_active = true; |
| 311 | schedule_delayed_work(dwork: &mds->security.poll_dwork, |
| 312 | delay: timeout * HZ); |
| 313 | dev_dbg(dev, "Sanitization operation started\n" ); |
| 314 | goto success; |
| 315 | } |
| 316 | |
| 317 | dev_dbg(dev, "Mailbox background operation (0x%04x) started\n" , |
| 318 | mbox_cmd->opcode); |
| 319 | |
| 320 | timeout = mbox_cmd->poll_interval_ms; |
| 321 | for (i = 0; i < mbox_cmd->poll_count; i++) { |
| 322 | if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait, |
| 323 | cxl_mbox_background_complete(cxlds), |
| 324 | TASK_UNINTERRUPTIBLE, |
| 325 | msecs_to_jiffies(timeout)) > 0) |
| 326 | break; |
| 327 | } |
| 328 | |
| 329 | if (!cxl_mbox_background_complete(cxlds)) { |
| 330 | dev_err(dev, "timeout waiting for background (%d ms)\n" , |
| 331 | timeout * mbox_cmd->poll_count); |
| 332 | return -ETIMEDOUT; |
| 333 | } |
| 334 | |
| 335 | bg_status_reg = readq(addr: cxlds->regs.mbox + |
| 336 | CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
| 337 | mbox_cmd->return_code = |
| 338 | FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, |
| 339 | bg_status_reg); |
| 340 | dev_dbg(dev, |
| 341 | "Mailbox background operation (0x%04x) completed\n" , |
| 342 | mbox_cmd->opcode); |
| 343 | } |
| 344 | |
| 345 | if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { |
| 346 | dev_dbg(dev, "Mailbox operation had an error: %s\n" , |
| 347 | cxl_mbox_cmd_rc2str(mbox_cmd)); |
| 348 | return 0; /* completed but caller must check return_code */ |
| 349 | } |
| 350 | |
| 351 | success: |
| 352 | /* #7 */ |
| 353 | cmd_reg = readq(addr: cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
| 354 | out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); |
| 355 | |
| 356 | /* #8 */ |
| 357 | if (out_len && mbox_cmd->payload_out) { |
| 358 | /* |
| 359 | * Sanitize the copy. If hardware misbehaves, out_len per the |
| 360 | * spec can actually be greater than the max allowed size (21 |
| 361 | * bits available but spec defined 1M max). The caller also may |
| 362 | * have requested less data than the hardware supplied even |
| 363 | * within spec. |
| 364 | */ |
| 365 | size_t n; |
| 366 | |
| 367 | n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len); |
| 368 | memcpy_fromio(mbox_cmd->payload_out, payload, n); |
| 369 | mbox_cmd->size_out = n; |
| 370 | } else { |
| 371 | mbox_cmd->size_out = 0; |
| 372 | } |
| 373 | |
| 374 | return 0; |
| 375 | } |
| 376 | |
| 377 | static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, |
| 378 | struct cxl_mbox_cmd *cmd) |
| 379 | { |
| 380 | int rc; |
| 381 | |
| 382 | mutex_lock(&cxl_mbox->mbox_mutex); |
| 383 | rc = __cxl_pci_mbox_send_cmd(cxl_mbox, mbox_cmd: cmd); |
| 384 | mutex_unlock(lock: &cxl_mbox->mbox_mutex); |
| 385 | |
| 386 | return rc; |
| 387 | } |
| 388 | |
| 389 | static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) |
| 390 | { |
| 391 | struct cxl_dev_state *cxlds = &mds->cxlds; |
| 392 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
| 393 | const int cap = readl(addr: cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); |
| 394 | struct device *dev = cxlds->dev; |
| 395 | unsigned long timeout; |
| 396 | int irq, msgnum; |
| 397 | u64 md_status; |
| 398 | u32 ctrl; |
| 399 | |
| 400 | timeout = jiffies + mbox_ready_timeout * HZ; |
| 401 | do { |
| 402 | md_status = readq(addr: cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
| 403 | if (md_status & CXLMDEV_MBOX_IF_READY) |
| 404 | break; |
| 405 | if (msleep_interruptible(msecs: 100)) |
| 406 | break; |
| 407 | } while (!time_after(jiffies, timeout)); |
| 408 | |
| 409 | if (!(md_status & CXLMDEV_MBOX_IF_READY)) { |
| 410 | cxl_err(dev, md_status, "timeout awaiting mailbox ready" ); |
| 411 | return -ETIMEDOUT; |
| 412 | } |
| 413 | |
| 414 | /* |
| 415 | * A command may be in flight from a previous driver instance, |
| 416 | * think kexec, do one doorbell wait so that |
| 417 | * __cxl_pci_mbox_send_cmd() can assume that it is the only |
| 418 | * source for future doorbell busy events. |
| 419 | */ |
| 420 | if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { |
| 421 | cxl_err(dev, md_status, "timeout awaiting mailbox idle" ); |
| 422 | return -ETIMEDOUT; |
| 423 | } |
| 424 | |
| 425 | cxl_mbox->mbox_send = cxl_pci_mbox_send; |
| 426 | cxl_mbox->payload_size = |
| 427 | 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); |
| 428 | |
| 429 | /* |
| 430 | * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register |
| 431 | * |
| 432 | * If the size is too small, mandatory commands will not work and so |
| 433 | * there's no point in going forward. If the size is too large, there's |
| 434 | * no harm is soft limiting it. |
| 435 | */ |
| 436 | cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M); |
| 437 | if (cxl_mbox->payload_size < 256) { |
| 438 | dev_err(dev, "Mailbox is too small (%zub)" , |
| 439 | cxl_mbox->payload_size); |
| 440 | return -ENXIO; |
| 441 | } |
| 442 | |
| 443 | dev_dbg(dev, "Mailbox payload sized %zu" , cxl_mbox->payload_size); |
| 444 | |
| 445 | INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); |
| 446 | |
| 447 | /* background command interrupts are optional */ |
| 448 | if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) |
| 449 | return 0; |
| 450 | |
| 451 | msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); |
| 452 | irq = pci_irq_vector(to_pci_dev(cxlds->dev), nr: msgnum); |
| 453 | if (irq < 0) |
| 454 | return 0; |
| 455 | |
| 456 | if (cxl_request_irq(cxlds, irq, thread_fn: cxl_pci_mbox_irq)) |
| 457 | return 0; |
| 458 | |
| 459 | dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n" ); |
| 460 | /* enable background command mbox irq support */ |
| 461 | ctrl = readl(addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
| 462 | ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; |
| 463 | writel(val: ctrl, addr: cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
| 464 | |
| 465 | return 0; |
| 466 | } |
| 467 | |
| 468 | /* |
| 469 | * Assume that any RCIEP that emits the CXL memory expander class code |
| 470 | * is an RCD |
| 471 | */ |
| 472 | static bool is_cxl_restricted(struct pci_dev *pdev) |
| 473 | { |
| 474 | return pci_pcie_type(dev: pdev) == PCI_EXP_TYPE_RC_END; |
| 475 | } |
| 476 | |
| 477 | static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, |
| 478 | struct cxl_register_map *map, |
| 479 | struct cxl_dport *dport) |
| 480 | { |
| 481 | resource_size_t component_reg_phys; |
| 482 | |
| 483 | *map = (struct cxl_register_map) { |
| 484 | .host = &pdev->dev, |
| 485 | .resource = CXL_RESOURCE_NONE, |
| 486 | }; |
| 487 | |
| 488 | struct cxl_port *port __free(put_cxl_port) = |
| 489 | cxl_pci_find_port(pdev, dport: &dport); |
| 490 | if (!port) |
| 491 | return -EPROBE_DEFER; |
| 492 | |
| 493 | component_reg_phys = cxl_rcd_component_reg_phys(dev: &pdev->dev, dport); |
| 494 | if (component_reg_phys == CXL_RESOURCE_NONE) |
| 495 | return -ENXIO; |
| 496 | |
| 497 | map->resource = component_reg_phys; |
| 498 | map->reg_type = CXL_REGLOC_RBI_COMPONENT; |
| 499 | map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; |
| 500 | |
| 501 | return 0; |
| 502 | } |
| 503 | |
| 504 | static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, |
| 505 | struct cxl_register_map *map) |
| 506 | { |
| 507 | int rc; |
| 508 | |
| 509 | rc = cxl_find_regblock(pdev, type, map); |
| 510 | |
| 511 | /* |
| 512 | * If the Register Locator DVSEC does not exist, check if it |
| 513 | * is an RCH and try to extract the Component Registers from |
| 514 | * an RCRB. |
| 515 | */ |
| 516 | if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { |
| 517 | struct cxl_dport *dport; |
| 518 | struct cxl_port *port __free(put_cxl_port) = |
| 519 | cxl_pci_find_port(pdev, dport: &dport); |
| 520 | if (!port) |
| 521 | return -EPROBE_DEFER; |
| 522 | |
| 523 | rc = cxl_rcrb_get_comp_regs(pdev, map, dport); |
| 524 | if (rc) |
| 525 | return rc; |
| 526 | |
| 527 | rc = cxl_dport_map_rcd_linkcap(pdev, dport); |
| 528 | if (rc) |
| 529 | return rc; |
| 530 | |
| 531 | } else if (rc) { |
| 532 | return rc; |
| 533 | } |
| 534 | |
| 535 | return cxl_setup_regs(map); |
| 536 | } |
| 537 | |
| 538 | static int cxl_pci_ras_unmask(struct pci_dev *pdev) |
| 539 | { |
| 540 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
| 541 | void __iomem *addr; |
| 542 | u32 orig_val, val, mask; |
| 543 | u16 cap; |
| 544 | int rc; |
| 545 | |
| 546 | if (!cxlds->regs.ras) { |
| 547 | dev_dbg(&pdev->dev, "No RAS registers.\n" ); |
| 548 | return 0; |
| 549 | } |
| 550 | |
| 551 | /* BIOS has PCIe AER error control */ |
| 552 | if (!pcie_aer_is_native(dev: pdev)) |
| 553 | return 0; |
| 554 | |
| 555 | rc = pcie_capability_read_word(dev: pdev, PCI_EXP_DEVCTL, val: &cap); |
| 556 | if (rc) |
| 557 | return rc; |
| 558 | |
| 559 | if (cap & PCI_EXP_DEVCTL_URRE) { |
| 560 | addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; |
| 561 | orig_val = readl(addr); |
| 562 | |
| 563 | mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | |
| 564 | CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; |
| 565 | val = orig_val & ~mask; |
| 566 | writel(val, addr); |
| 567 | dev_dbg(&pdev->dev, |
| 568 | "Uncorrectable RAS Errors Mask: %#x -> %#x\n" , |
| 569 | orig_val, val); |
| 570 | } |
| 571 | |
| 572 | if (cap & PCI_EXP_DEVCTL_CERE) { |
| 573 | addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; |
| 574 | orig_val = readl(addr); |
| 575 | val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; |
| 576 | writel(val, addr); |
| 577 | dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n" , |
| 578 | orig_val, val); |
| 579 | } |
| 580 | |
| 581 | return 0; |
| 582 | } |
| 583 | |
| 584 | static void free_event_buf(void *buf) |
| 585 | { |
| 586 | kvfree(addr: buf); |
| 587 | } |
| 588 | |
| 589 | /* |
| 590 | * There is a single buffer for reading event logs from the mailbox. All logs |
| 591 | * share this buffer protected by the mds->event_log_lock. |
| 592 | */ |
| 593 | static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) |
| 594 | { |
| 595 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
| 596 | struct cxl_get_event_payload *buf; |
| 597 | |
| 598 | buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); |
| 599 | if (!buf) |
| 600 | return -ENOMEM; |
| 601 | mds->event.buf = buf; |
| 602 | |
| 603 | return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); |
| 604 | } |
| 605 | |
| 606 | static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) |
| 607 | { |
| 608 | int nvecs; |
| 609 | |
| 610 | /* |
| 611 | * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must |
| 612 | * not generate INTx messages if that function participates in |
| 613 | * CXL.cache or CXL.mem. |
| 614 | * |
| 615 | * Additionally pci_alloc_irq_vectors() handles calling |
| 616 | * pci_free_irq_vectors() automatically despite not being called |
| 617 | * pcim_*. See pci_setup_msi_context(). |
| 618 | */ |
| 619 | nvecs = pci_alloc_irq_vectors(dev: pdev, min_vecs: 1, CXL_PCI_DEFAULT_MAX_VECTORS, |
| 620 | PCI_IRQ_MSIX | PCI_IRQ_MSI); |
| 621 | if (nvecs < 1) { |
| 622 | dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n" , nvecs); |
| 623 | return false; |
| 624 | } |
| 625 | return true; |
| 626 | } |
| 627 | |
| 628 | static irqreturn_t cxl_event_thread(int irq, void *id) |
| 629 | { |
| 630 | struct cxl_dev_id *dev_id = id; |
| 631 | struct cxl_dev_state *cxlds = dev_id->cxlds; |
| 632 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
| 633 | u32 status; |
| 634 | |
| 635 | do { |
| 636 | /* |
| 637 | * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; |
| 638 | * ignore the reserved upper 32 bits |
| 639 | */ |
| 640 | status = readl(addr: cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); |
| 641 | /* Ignore logs unknown to the driver */ |
| 642 | status &= CXLDEV_EVENT_STATUS_ALL; |
| 643 | if (!status) |
| 644 | break; |
| 645 | cxl_mem_get_event_records(mds, status); |
| 646 | cond_resched(); |
| 647 | } while (status); |
| 648 | |
| 649 | return IRQ_HANDLED; |
| 650 | } |
| 651 | |
| 652 | static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) |
| 653 | { |
| 654 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
| 655 | int irq; |
| 656 | |
| 657 | if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) |
| 658 | return -ENXIO; |
| 659 | |
| 660 | irq = pci_irq_vector(dev: pdev, |
| 661 | FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); |
| 662 | if (irq < 0) |
| 663 | return irq; |
| 664 | |
| 665 | return cxl_request_irq(cxlds, irq, thread_fn: cxl_event_thread); |
| 666 | } |
| 667 | |
| 668 | static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, |
| 669 | struct cxl_event_interrupt_policy *policy) |
| 670 | { |
| 671 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
| 672 | struct cxl_mbox_cmd mbox_cmd = { |
| 673 | .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, |
| 674 | .payload_out = policy, |
| 675 | .size_out = sizeof(*policy), |
| 676 | }; |
| 677 | int rc; |
| 678 | |
| 679 | rc = cxl_internal_send_cmd(cxl_mbox, cmd: &mbox_cmd); |
| 680 | if (rc < 0) |
| 681 | dev_err(mds->cxlds.dev, |
| 682 | "Failed to get event interrupt policy : %d" , rc); |
| 683 | |
| 684 | return rc; |
| 685 | } |
| 686 | |
| 687 | static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, |
| 688 | struct cxl_event_interrupt_policy *policy) |
| 689 | { |
| 690 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
| 691 | struct cxl_mbox_cmd mbox_cmd; |
| 692 | int rc; |
| 693 | |
| 694 | *policy = (struct cxl_event_interrupt_policy) { |
| 695 | .info_settings = CXL_INT_MSI_MSIX, |
| 696 | .warn_settings = CXL_INT_MSI_MSIX, |
| 697 | .failure_settings = CXL_INT_MSI_MSIX, |
| 698 | .fatal_settings = CXL_INT_MSI_MSIX, |
| 699 | }; |
| 700 | |
| 701 | mbox_cmd = (struct cxl_mbox_cmd) { |
| 702 | .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, |
| 703 | .payload_in = policy, |
| 704 | .size_in = sizeof(*policy), |
| 705 | }; |
| 706 | |
| 707 | rc = cxl_internal_send_cmd(cxl_mbox, cmd: &mbox_cmd); |
| 708 | if (rc < 0) { |
| 709 | dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d" , |
| 710 | rc); |
| 711 | return rc; |
| 712 | } |
| 713 | |
| 714 | /* Retrieve final interrupt settings */ |
| 715 | return cxl_event_get_int_policy(mds, policy); |
| 716 | } |
| 717 | |
| 718 | static int cxl_event_irqsetup(struct cxl_memdev_state *mds) |
| 719 | { |
| 720 | struct cxl_dev_state *cxlds = &mds->cxlds; |
| 721 | struct cxl_event_interrupt_policy policy; |
| 722 | int rc; |
| 723 | |
| 724 | rc = cxl_event_config_msgnums(mds, policy: &policy); |
| 725 | if (rc) |
| 726 | return rc; |
| 727 | |
| 728 | rc = cxl_event_req_irq(cxlds, setting: policy.info_settings); |
| 729 | if (rc) { |
| 730 | dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n" ); |
| 731 | return rc; |
| 732 | } |
| 733 | |
| 734 | rc = cxl_event_req_irq(cxlds, setting: policy.warn_settings); |
| 735 | if (rc) { |
| 736 | dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n" ); |
| 737 | return rc; |
| 738 | } |
| 739 | |
| 740 | rc = cxl_event_req_irq(cxlds, setting: policy.failure_settings); |
| 741 | if (rc) { |
| 742 | dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n" ); |
| 743 | return rc; |
| 744 | } |
| 745 | |
| 746 | rc = cxl_event_req_irq(cxlds, setting: policy.fatal_settings); |
| 747 | if (rc) { |
| 748 | dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n" ); |
| 749 | return rc; |
| 750 | } |
| 751 | |
| 752 | return 0; |
| 753 | } |
| 754 | |
| 755 | static bool cxl_event_int_is_fw(u8 setting) |
| 756 | { |
| 757 | u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); |
| 758 | |
| 759 | return mode == CXL_INT_FW; |
| 760 | } |
| 761 | |
| 762 | static int cxl_event_config(struct pci_host_bridge *host_bridge, |
| 763 | struct cxl_memdev_state *mds, bool irq_avail) |
| 764 | { |
| 765 | struct cxl_event_interrupt_policy policy; |
| 766 | int rc; |
| 767 | |
| 768 | /* |
| 769 | * When BIOS maintains CXL error reporting control, it will process |
| 770 | * event records. Only one agent can do so. |
| 771 | */ |
| 772 | if (!host_bridge->native_cxl_error) |
| 773 | return 0; |
| 774 | |
| 775 | if (!irq_avail) { |
| 776 | dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n" ); |
| 777 | return 0; |
| 778 | } |
| 779 | |
| 780 | rc = cxl_event_get_int_policy(mds, policy: &policy); |
| 781 | if (rc) |
| 782 | return rc; |
| 783 | |
| 784 | if (cxl_event_int_is_fw(setting: policy.info_settings) || |
| 785 | cxl_event_int_is_fw(setting: policy.warn_settings) || |
| 786 | cxl_event_int_is_fw(setting: policy.failure_settings) || |
| 787 | cxl_event_int_is_fw(setting: policy.fatal_settings)) { |
| 788 | dev_err(mds->cxlds.dev, |
| 789 | "FW still in control of Event Logs despite _OSC settings\n" ); |
| 790 | return -EBUSY; |
| 791 | } |
| 792 | |
| 793 | rc = cxl_mem_alloc_event_buf(mds); |
| 794 | if (rc) |
| 795 | return rc; |
| 796 | |
| 797 | rc = cxl_event_irqsetup(mds); |
| 798 | if (rc) |
| 799 | return rc; |
| 800 | |
| 801 | cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); |
| 802 | |
| 803 | return 0; |
| 804 | } |
| 805 | |
| 806 | static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) |
| 807 | { |
| 808 | int rc; |
| 809 | |
| 810 | /* |
| 811 | * Fail the init if there's no mailbox. For a type3 this is out of spec. |
| 812 | */ |
| 813 | if (!cxlds->reg_map.device_map.mbox.valid) |
| 814 | return -ENODEV; |
| 815 | |
| 816 | rc = cxl_mailbox_init(cxl_mbox: &cxlds->cxl_mbox, host: cxlds->dev); |
| 817 | if (rc) |
| 818 | return rc; |
| 819 | |
| 820 | return 0; |
| 821 | } |
| 822 | |
| 823 | static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width) |
| 824 | { |
| 825 | struct cxl_dev_state *cxlds = dev_get_drvdata(dev); |
| 826 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 827 | struct device *root_dev; |
| 828 | struct cxl_dport *dport; |
| 829 | struct cxl_port *root __free(put_cxl_port) = |
| 830 | cxl_mem_find_port(cxlmd, dport: &dport); |
| 831 | |
| 832 | if (!root) |
| 833 | return -ENXIO; |
| 834 | |
| 835 | root_dev = root->uport_dev; |
| 836 | if (!root_dev) |
| 837 | return -ENXIO; |
| 838 | |
| 839 | if (!dport->regs.rcd_pcie_cap) |
| 840 | return -ENXIO; |
| 841 | |
| 842 | guard(device)(T: root_dev); |
| 843 | if (!root_dev->driver) |
| 844 | return -ENXIO; |
| 845 | |
| 846 | switch (width) { |
| 847 | case 2: |
| 848 | return sysfs_emit(buf, fmt: "%#x\n" , |
| 849 | readw(addr: dport->regs.rcd_pcie_cap + offset)); |
| 850 | case 4: |
| 851 | return sysfs_emit(buf, fmt: "%#x\n" , |
| 852 | readl(addr: dport->regs.rcd_pcie_cap + offset)); |
| 853 | default: |
| 854 | return -EINVAL; |
| 855 | } |
| 856 | } |
| 857 | |
| 858 | static ssize_t rcd_link_cap_show(struct device *dev, |
| 859 | struct device_attribute *attr, char *buf) |
| 860 | { |
| 861 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, width: sizeof(u32)); |
| 862 | } |
| 863 | static DEVICE_ATTR_RO(rcd_link_cap); |
| 864 | |
| 865 | static ssize_t rcd_link_ctrl_show(struct device *dev, |
| 866 | struct device_attribute *attr, char *buf) |
| 867 | { |
| 868 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, width: sizeof(u16)); |
| 869 | } |
| 870 | static DEVICE_ATTR_RO(rcd_link_ctrl); |
| 871 | |
| 872 | static ssize_t rcd_link_status_show(struct device *dev, |
| 873 | struct device_attribute *attr, char *buf) |
| 874 | { |
| 875 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, width: sizeof(u16)); |
| 876 | } |
| 877 | static DEVICE_ATTR_RO(rcd_link_status); |
| 878 | |
| 879 | static struct attribute *cxl_rcd_attrs[] = { |
| 880 | &dev_attr_rcd_link_cap.attr, |
| 881 | &dev_attr_rcd_link_ctrl.attr, |
| 882 | &dev_attr_rcd_link_status.attr, |
| 883 | NULL |
| 884 | }; |
| 885 | |
| 886 | static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n) |
| 887 | { |
| 888 | struct device *dev = kobj_to_dev(kobj); |
| 889 | struct pci_dev *pdev = to_pci_dev(dev); |
| 890 | |
| 891 | if (is_cxl_restricted(pdev)) |
| 892 | return a->mode; |
| 893 | |
| 894 | return 0; |
| 895 | } |
| 896 | |
| 897 | static struct attribute_group cxl_rcd_group = { |
| 898 | .attrs = cxl_rcd_attrs, |
| 899 | .is_visible = cxl_rcd_visible, |
| 900 | }; |
| 901 | __ATTRIBUTE_GROUPS(cxl_rcd); |
| 902 | |
| 903 | static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
| 904 | { |
| 905 | struct pci_host_bridge *host_bridge = pci_find_host_bridge(bus: pdev->bus); |
| 906 | struct cxl_dpa_info range_info = { 0 }; |
| 907 | struct cxl_memdev_state *mds; |
| 908 | struct cxl_dev_state *cxlds; |
| 909 | struct cxl_register_map map; |
| 910 | struct cxl_memdev *cxlmd; |
| 911 | int rc, pmu_count; |
| 912 | unsigned int i; |
| 913 | bool irq_avail; |
| 914 | |
| 915 | /* |
| 916 | * Double check the anonymous union trickery in struct cxl_regs |
| 917 | * FIXME switch to struct_group() |
| 918 | */ |
| 919 | BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != |
| 920 | offsetof(struct cxl_regs, device_regs.memdev)); |
| 921 | |
| 922 | rc = pcim_enable_device(pdev); |
| 923 | if (rc) |
| 924 | return rc; |
| 925 | pci_set_master(dev: pdev); |
| 926 | |
| 927 | mds = cxl_memdev_state_create(dev: &pdev->dev); |
| 928 | if (IS_ERR(ptr: mds)) |
| 929 | return PTR_ERR(ptr: mds); |
| 930 | cxlds = &mds->cxlds; |
| 931 | pci_set_drvdata(pdev, data: cxlds); |
| 932 | |
| 933 | cxlds->rcd = is_cxl_restricted(pdev); |
| 934 | cxlds->serial = pci_get_dsn(dev: pdev); |
| 935 | cxlds->cxl_dvsec = pci_find_dvsec_capability( |
| 936 | dev: pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); |
| 937 | if (!cxlds->cxl_dvsec) |
| 938 | dev_warn(&pdev->dev, |
| 939 | "Device DVSEC not present, skip CXL.mem init\n" ); |
| 940 | |
| 941 | rc = cxl_pci_setup_regs(pdev, type: CXL_REGLOC_RBI_MEMDEV, map: &map); |
| 942 | if (rc) |
| 943 | return rc; |
| 944 | |
| 945 | rc = cxl_map_device_regs(map: &map, regs: &cxlds->regs.device_regs); |
| 946 | if (rc) |
| 947 | return rc; |
| 948 | |
| 949 | /* |
| 950 | * If the component registers can't be found, the cxl_pci driver may |
| 951 | * still be useful for management functions so don't return an error. |
| 952 | */ |
| 953 | rc = cxl_pci_setup_regs(pdev, type: CXL_REGLOC_RBI_COMPONENT, |
| 954 | map: &cxlds->reg_map); |
| 955 | if (rc) |
| 956 | dev_warn(&pdev->dev, "No component registers (%d)\n" , rc); |
| 957 | else if (!cxlds->reg_map.component_map.ras.valid) |
| 958 | dev_dbg(&pdev->dev, "RAS registers not found\n" ); |
| 959 | |
| 960 | rc = cxl_map_component_regs(map: &cxlds->reg_map, regs: &cxlds->regs.component, |
| 961 | BIT(CXL_CM_CAP_CAP_ID_RAS)); |
| 962 | if (rc) |
| 963 | dev_dbg(&pdev->dev, "Failed to map RAS capability.\n" ); |
| 964 | |
| 965 | rc = cxl_pci_type3_init_mailbox(cxlds); |
| 966 | if (rc) |
| 967 | return rc; |
| 968 | |
| 969 | rc = cxl_await_media_ready(cxlds); |
| 970 | if (rc == 0) |
| 971 | cxlds->media_ready = true; |
| 972 | else |
| 973 | dev_warn(&pdev->dev, "Media not active (%d)\n" , rc); |
| 974 | |
| 975 | irq_avail = cxl_alloc_irq_vectors(pdev); |
| 976 | |
| 977 | rc = cxl_pci_setup_mailbox(mds, irq_avail); |
| 978 | if (rc) |
| 979 | return rc; |
| 980 | |
| 981 | rc = cxl_enumerate_cmds(mds); |
| 982 | if (rc) |
| 983 | return rc; |
| 984 | |
| 985 | rc = cxl_set_timestamp(mds); |
| 986 | if (rc) |
| 987 | return rc; |
| 988 | |
| 989 | rc = cxl_poison_state_init(mds); |
| 990 | if (rc) |
| 991 | return rc; |
| 992 | |
| 993 | rc = cxl_dev_state_identify(mds); |
| 994 | if (rc) |
| 995 | return rc; |
| 996 | |
| 997 | rc = cxl_mem_dpa_fetch(mds, info: &range_info); |
| 998 | if (rc) |
| 999 | return rc; |
| 1000 | |
| 1001 | rc = cxl_dpa_setup(cxlds, info: &range_info); |
| 1002 | if (rc) |
| 1003 | return rc; |
| 1004 | |
| 1005 | rc = devm_cxl_setup_features(cxlds); |
| 1006 | if (rc) |
| 1007 | dev_dbg(&pdev->dev, "No CXL Features discovered\n" ); |
| 1008 | |
| 1009 | cxlmd = devm_cxl_add_memdev(host: &pdev->dev, cxlds); |
| 1010 | if (IS_ERR(ptr: cxlmd)) |
| 1011 | return PTR_ERR(ptr: cxlmd); |
| 1012 | |
| 1013 | rc = devm_cxl_setup_fw_upload(host: &pdev->dev, mds); |
| 1014 | if (rc) |
| 1015 | return rc; |
| 1016 | |
| 1017 | rc = devm_cxl_sanitize_setup_notifier(host: &pdev->dev, cxlmd); |
| 1018 | if (rc) |
| 1019 | return rc; |
| 1020 | |
| 1021 | rc = devm_cxl_setup_fwctl(host: &pdev->dev, cxlmd); |
| 1022 | if (rc) |
| 1023 | dev_dbg(&pdev->dev, "No CXL FWCTL setup\n" ); |
| 1024 | |
| 1025 | pmu_count = cxl_count_regblock(pdev, type: CXL_REGLOC_RBI_PMU); |
| 1026 | if (pmu_count < 0) |
| 1027 | return pmu_count; |
| 1028 | |
| 1029 | for (i = 0; i < pmu_count; i++) { |
| 1030 | struct cxl_pmu_regs pmu_regs; |
| 1031 | |
| 1032 | rc = cxl_find_regblock_instance(pdev, type: CXL_REGLOC_RBI_PMU, map: &map, index: i); |
| 1033 | if (rc) { |
| 1034 | dev_dbg(&pdev->dev, "Could not find PMU regblock\n" ); |
| 1035 | break; |
| 1036 | } |
| 1037 | |
| 1038 | rc = cxl_map_pmu_regs(map: &map, regs: &pmu_regs); |
| 1039 | if (rc) { |
| 1040 | dev_dbg(&pdev->dev, "Could not map PMU regs\n" ); |
| 1041 | break; |
| 1042 | } |
| 1043 | |
| 1044 | rc = devm_cxl_pmu_add(parent: cxlds->dev, regs: &pmu_regs, assoc_id: cxlmd->id, idx: i, type: CXL_PMU_MEMDEV); |
| 1045 | if (rc) { |
| 1046 | dev_dbg(&pdev->dev, "Could not add PMU instance\n" ); |
| 1047 | break; |
| 1048 | } |
| 1049 | } |
| 1050 | |
| 1051 | rc = cxl_event_config(host_bridge, mds, irq_avail); |
| 1052 | if (rc) |
| 1053 | return rc; |
| 1054 | |
| 1055 | if (cxl_pci_ras_unmask(pdev)) |
| 1056 | dev_dbg(&pdev->dev, "No RAS reporting unmasked\n" ); |
| 1057 | |
| 1058 | pci_save_state(dev: pdev); |
| 1059 | |
| 1060 | return rc; |
| 1061 | } |
| 1062 | |
| 1063 | static const struct pci_device_id cxl_mem_pci_tbl[] = { |
| 1064 | /* PCI class code for CXL.mem Type-3 Devices */ |
| 1065 | { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, |
| 1066 | { /* terminate list */ }, |
| 1067 | }; |
| 1068 | MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); |
| 1069 | |
| 1070 | static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) |
| 1071 | { |
| 1072 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
| 1073 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 1074 | struct device *dev = &cxlmd->dev; |
| 1075 | |
| 1076 | dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n" , |
| 1077 | dev_name(dev)); |
| 1078 | pci_restore_state(dev: pdev); |
| 1079 | if (device_attach(dev) <= 0) |
| 1080 | return PCI_ERS_RESULT_DISCONNECT; |
| 1081 | return PCI_ERS_RESULT_RECOVERED; |
| 1082 | } |
| 1083 | |
| 1084 | static void cxl_error_resume(struct pci_dev *pdev) |
| 1085 | { |
| 1086 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
| 1087 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 1088 | struct device *dev = &cxlmd->dev; |
| 1089 | |
| 1090 | dev_info(&pdev->dev, "%s: error resume %s\n" , dev_name(dev), |
| 1091 | dev->driver ? "successful" : "failed" ); |
| 1092 | } |
| 1093 | |
| 1094 | static void cxl_reset_done(struct pci_dev *pdev) |
| 1095 | { |
| 1096 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
| 1097 | struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 1098 | struct device *dev = &pdev->dev; |
| 1099 | |
| 1100 | /* |
| 1101 | * FLR does not expect to touch the HDM decoders and related |
| 1102 | * registers. SBR, however, will wipe all device configurations. |
| 1103 | * Issue a warning if there was an active decoder before the reset |
| 1104 | * that no longer exists. |
| 1105 | */ |
| 1106 | guard(device)(T: &cxlmd->dev); |
| 1107 | if (cxlmd->endpoint && |
| 1108 | cxl_endpoint_decoder_reset_detected(port: cxlmd->endpoint)) { |
| 1109 | dev_crit(dev, "SBR happened without memory regions removal.\n" ); |
| 1110 | dev_crit(dev, "System may be unstable if regions hosted system memory.\n" ); |
| 1111 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); |
| 1112 | } |
| 1113 | } |
| 1114 | |
| 1115 | static const struct pci_error_handlers cxl_error_handlers = { |
| 1116 | .error_detected = cxl_error_detected, |
| 1117 | .slot_reset = cxl_slot_reset, |
| 1118 | .resume = cxl_error_resume, |
| 1119 | .cor_error_detected = cxl_cor_error_detected, |
| 1120 | .reset_done = cxl_reset_done, |
| 1121 | }; |
| 1122 | |
| 1123 | static struct pci_driver cxl_pci_driver = { |
| 1124 | .name = KBUILD_MODNAME, |
| 1125 | .id_table = cxl_mem_pci_tbl, |
| 1126 | .probe = cxl_pci_probe, |
| 1127 | .err_handler = &cxl_error_handlers, |
| 1128 | .dev_groups = cxl_rcd_groups, |
| 1129 | .driver = { |
| 1130 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, |
| 1131 | }, |
| 1132 | }; |
| 1133 | |
| 1134 | #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) |
| 1135 | static void cxl_handle_cper_event(enum cxl_event_type ev_type, |
| 1136 | struct cxl_cper_event_rec *rec) |
| 1137 | { |
| 1138 | struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; |
| 1139 | struct pci_dev *pdev __free(pci_dev_put) = NULL; |
| 1140 | enum cxl_event_log_type log_type; |
| 1141 | struct cxl_dev_state *cxlds; |
| 1142 | unsigned int devfn; |
| 1143 | u32 hdr_flags; |
| 1144 | |
| 1145 | pr_debug("CPER event %d for device %u:%u:%u.%u\n" , ev_type, |
| 1146 | device_id->segment_num, device_id->bus_num, |
| 1147 | device_id->device_num, device_id->func_num); |
| 1148 | |
| 1149 | devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); |
| 1150 | pdev = pci_get_domain_bus_and_slot(domain: device_id->segment_num, |
| 1151 | bus: device_id->bus_num, devfn); |
| 1152 | if (!pdev) |
| 1153 | return; |
| 1154 | |
| 1155 | guard(device)(T: &pdev->dev); |
| 1156 | if (pdev->driver != &cxl_pci_driver) |
| 1157 | return; |
| 1158 | |
| 1159 | cxlds = pci_get_drvdata(pdev); |
| 1160 | if (!cxlds) |
| 1161 | return; |
| 1162 | |
| 1163 | /* Fabricate a log type */ |
| 1164 | hdr_flags = get_unaligned_le24(p: rec->event.generic.hdr.flags); |
| 1165 | log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); |
| 1166 | |
| 1167 | cxl_event_trace_record(cxlmd: cxlds->cxlmd, type: log_type, event_type: ev_type, |
| 1168 | uuid: &uuid_null, evt: &rec->event); |
| 1169 | } |
| 1170 | |
| 1171 | static void cxl_cper_work_fn(struct work_struct *work) |
| 1172 | { |
| 1173 | struct cxl_cper_work_data wd; |
| 1174 | |
| 1175 | while (cxl_cper_kfifo_get(wd: &wd)) |
| 1176 | cxl_handle_cper_event(ev_type: wd.event_type, rec: &wd.rec); |
| 1177 | } |
| 1178 | static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); |
| 1179 | |
| 1180 | static int __init cxl_pci_driver_init(void) |
| 1181 | { |
| 1182 | int rc; |
| 1183 | |
| 1184 | rc = pci_register_driver(&cxl_pci_driver); |
| 1185 | if (rc) |
| 1186 | return rc; |
| 1187 | |
| 1188 | rc = cxl_cper_register_work(work: &cxl_cper_work); |
| 1189 | if (rc) |
| 1190 | pci_unregister_driver(dev: &cxl_pci_driver); |
| 1191 | |
| 1192 | return rc; |
| 1193 | } |
| 1194 | |
| 1195 | static void __exit cxl_pci_driver_exit(void) |
| 1196 | { |
| 1197 | cxl_cper_unregister_work(work: &cxl_cper_work); |
| 1198 | cancel_work_sync(work: &cxl_cper_work); |
| 1199 | pci_unregister_driver(dev: &cxl_pci_driver); |
| 1200 | } |
| 1201 | |
| 1202 | module_init(cxl_pci_driver_init); |
| 1203 | module_exit(cxl_pci_driver_exit); |
| 1204 | MODULE_DESCRIPTION("CXL: PCI manageability" ); |
| 1205 | MODULE_LICENSE("GPL v2" ); |
| 1206 | MODULE_IMPORT_NS("CXL" ); |
| 1207 | |