1 | // SPDX-License-Identifier: GPL-2.0 |
---|---|
2 | /* |
3 | * PCI Message Signaled Interrupt (MSI) |
4 | * |
5 | * Copyright (C) 2003-2004 Intel |
6 | * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) |
7 | * Copyright (C) 2016 Christoph Hellwig. |
8 | */ |
9 | #include <linux/bitfield.h> |
10 | #include <linux/err.h> |
11 | #include <linux/export.h> |
12 | #include <linux/irq.h> |
13 | #include <linux/irqdomain.h> |
14 | |
15 | #include "../pci.h" |
16 | #include "msi.h" |
17 | |
18 | bool pci_msi_enable = true; |
19 | |
20 | /** |
21 | * pci_msi_supported - check whether MSI may be enabled on a device |
22 | * @dev: pointer to the pci_dev data structure of MSI device function |
23 | * @nvec: how many MSIs have been requested? |
24 | * |
25 | * Look at global flags, the device itself, and its parent buses |
26 | * to determine if MSI/-X are supported for the device. If MSI/-X is |
27 | * supported return 1, else return 0. |
28 | **/ |
29 | static int pci_msi_supported(struct pci_dev *dev, int nvec) |
30 | { |
31 | struct pci_bus *bus; |
32 | |
33 | /* MSI must be globally enabled and supported by the device */ |
34 | if (!pci_msi_enable) |
35 | return 0; |
36 | |
37 | if (!dev || dev->no_msi) |
38 | return 0; |
39 | |
40 | /* |
41 | * You can't ask to have 0 or less MSIs configured. |
42 | * a) it's stupid .. |
43 | * b) the list manipulation code assumes nvec >= 1. |
44 | */ |
45 | if (nvec < 1) |
46 | return 0; |
47 | |
48 | /* |
49 | * Any bridge which does NOT route MSI transactions from its |
50 | * secondary bus to its primary bus must set NO_MSI flag on |
51 | * the secondary pci_bus. |
52 | * |
53 | * The NO_MSI flag can either be set directly by: |
54 | * - arch-specific PCI host bus controller drivers (deprecated) |
55 | * - quirks for specific PCI bridges |
56 | * |
57 | * or indirectly by platform-specific PCI host bridge drivers by |
58 | * advertising the 'msi_domain' property, which results in |
59 | * the NO_MSI flag when no MSI domain is found for this bridge |
60 | * at probe time. |
61 | */ |
62 | for (bus = dev->bus; bus; bus = bus->parent) |
63 | if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) |
64 | return 0; |
65 | |
66 | return 1; |
67 | } |
68 | |
69 | static void pcim_msi_release(void *pcidev) |
70 | { |
71 | struct pci_dev *dev = pcidev; |
72 | |
73 | dev->is_msi_managed = false; |
74 | pci_free_irq_vectors(dev); |
75 | } |
76 | |
77 | /* |
78 | * Needs to be separate from pcim_release to prevent an ordering problem |
79 | * vs. msi_device_data_release() in the MSI core code. |
80 | */ |
81 | static int pcim_setup_msi_release(struct pci_dev *dev) |
82 | { |
83 | int ret; |
84 | |
85 | if (!pci_is_managed(pdev: dev) || dev->is_msi_managed) |
86 | return 0; |
87 | |
88 | ret = devm_add_action(&dev->dev, pcim_msi_release, dev); |
89 | if (ret) |
90 | return ret; |
91 | |
92 | dev->is_msi_managed = true; |
93 | return 0; |
94 | } |
95 | |
96 | /* |
97 | * Ordering vs. devres: msi device data has to be installed first so that |
98 | * pcim_msi_release() is invoked before it on device release. |
99 | */ |
100 | static int pci_setup_msi_context(struct pci_dev *dev) |
101 | { |
102 | int ret = msi_setup_device_data(dev: &dev->dev); |
103 | |
104 | if (ret) |
105 | return ret; |
106 | |
107 | return pcim_setup_msi_release(dev); |
108 | } |
109 | |
110 | /* |
111 | * Helper functions for mask/unmask and MSI message handling |
112 | */ |
113 | |
114 | void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) |
115 | { |
116 | raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock; |
117 | unsigned long flags; |
118 | |
119 | if (!desc->pci.msi_attrib.can_mask) |
120 | return; |
121 | |
122 | raw_spin_lock_irqsave(lock, flags); |
123 | desc->pci.msi_mask &= ~clear; |
124 | desc->pci.msi_mask |= set; |
125 | pci_write_config_dword(dev: msi_desc_to_pci_dev(desc), where: desc->pci.mask_pos, |
126 | val: desc->pci.msi_mask); |
127 | raw_spin_unlock_irqrestore(lock, flags); |
128 | } |
129 | |
130 | /** |
131 | * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts |
132 | * @data: pointer to irqdata associated to that interrupt |
133 | */ |
134 | void pci_msi_mask_irq(struct irq_data *data) |
135 | { |
136 | struct msi_desc *desc = irq_data_get_msi_desc(d: data); |
137 | |
138 | __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq)); |
139 | } |
140 | EXPORT_SYMBOL_GPL(pci_msi_mask_irq); |
141 | |
142 | /** |
143 | * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts |
144 | * @data: pointer to irqdata associated to that interrupt |
145 | */ |
146 | void pci_msi_unmask_irq(struct irq_data *data) |
147 | { |
148 | struct msi_desc *desc = irq_data_get_msi_desc(d: data); |
149 | |
150 | __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq)); |
151 | } |
152 | EXPORT_SYMBOL_GPL(pci_msi_unmask_irq); |
153 | |
154 | void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) |
155 | { |
156 | struct pci_dev *dev = msi_desc_to_pci_dev(desc: entry); |
157 | |
158 | BUG_ON(dev->current_state != PCI_D0); |
159 | |
160 | if (entry->pci.msi_attrib.is_msix) { |
161 | void __iomem *base = pci_msix_desc_addr(desc: entry); |
162 | |
163 | if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual)) |
164 | return; |
165 | |
166 | msg->address_lo = readl(addr: base + PCI_MSIX_ENTRY_LOWER_ADDR); |
167 | msg->address_hi = readl(addr: base + PCI_MSIX_ENTRY_UPPER_ADDR); |
168 | msg->data = readl(addr: base + PCI_MSIX_ENTRY_DATA); |
169 | } else { |
170 | int pos = dev->msi_cap; |
171 | u16 data; |
172 | |
173 | pci_read_config_dword(dev, where: pos + PCI_MSI_ADDRESS_LO, |
174 | val: &msg->address_lo); |
175 | if (entry->pci.msi_attrib.is_64) { |
176 | pci_read_config_dword(dev, where: pos + PCI_MSI_ADDRESS_HI, |
177 | val: &msg->address_hi); |
178 | pci_read_config_word(dev, where: pos + PCI_MSI_DATA_64, val: &data); |
179 | } else { |
180 | msg->address_hi = 0; |
181 | pci_read_config_word(dev, where: pos + PCI_MSI_DATA_32, val: &data); |
182 | } |
183 | msg->data = data; |
184 | } |
185 | } |
186 | |
187 | static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc, |
188 | struct msi_msg *msg) |
189 | { |
190 | int pos = dev->msi_cap; |
191 | u16 msgctl; |
192 | |
193 | pci_read_config_word(dev, where: pos + PCI_MSI_FLAGS, val: &msgctl); |
194 | msgctl &= ~PCI_MSI_FLAGS_QSIZE; |
195 | msgctl |= FIELD_PREP(PCI_MSI_FLAGS_QSIZE, desc->pci.msi_attrib.multiple); |
196 | pci_write_config_word(dev, where: pos + PCI_MSI_FLAGS, val: msgctl); |
197 | |
198 | pci_write_config_dword(dev, where: pos + PCI_MSI_ADDRESS_LO, val: msg->address_lo); |
199 | if (desc->pci.msi_attrib.is_64) { |
200 | pci_write_config_dword(dev, where: pos + PCI_MSI_ADDRESS_HI, val: msg->address_hi); |
201 | pci_write_config_word(dev, where: pos + PCI_MSI_DATA_64, val: msg->data); |
202 | } else { |
203 | pci_write_config_word(dev, where: pos + PCI_MSI_DATA_32, val: msg->data); |
204 | } |
205 | /* Ensure that the writes are visible in the device */ |
206 | pci_read_config_word(dev, where: pos + PCI_MSI_FLAGS, val: &msgctl); |
207 | } |
208 | |
209 | static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg) |
210 | { |
211 | void __iomem *base = pci_msix_desc_addr(desc); |
212 | u32 ctrl = desc->pci.msix_ctrl; |
213 | bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); |
214 | |
215 | if (desc->pci.msi_attrib.is_virtual) |
216 | return; |
217 | /* |
218 | * The specification mandates that the entry is masked |
219 | * when the message is modified: |
220 | * |
221 | * "If software changes the Address or Data value of an |
222 | * entry while the entry is unmasked, the result is |
223 | * undefined." |
224 | */ |
225 | if (unmasked) |
226 | pci_msix_write_vector_ctrl(desc, ctrl: ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); |
227 | |
228 | writel(val: msg->address_lo, addr: base + PCI_MSIX_ENTRY_LOWER_ADDR); |
229 | writel(val: msg->address_hi, addr: base + PCI_MSIX_ENTRY_UPPER_ADDR); |
230 | writel(val: msg->data, addr: base + PCI_MSIX_ENTRY_DATA); |
231 | |
232 | if (unmasked) |
233 | pci_msix_write_vector_ctrl(desc, ctrl); |
234 | |
235 | /* Ensure that the writes are visible in the device */ |
236 | readl(addr: base + PCI_MSIX_ENTRY_DATA); |
237 | } |
238 | |
239 | void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) |
240 | { |
241 | struct pci_dev *dev = msi_desc_to_pci_dev(desc: entry); |
242 | |
243 | if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { |
244 | /* Don't touch the hardware now */ |
245 | } else if (entry->pci.msi_attrib.is_msix) { |
246 | pci_write_msg_msix(desc: entry, msg); |
247 | } else { |
248 | pci_write_msg_msi(dev, desc: entry, msg); |
249 | } |
250 | |
251 | entry->msg = *msg; |
252 | |
253 | if (entry->write_msi_msg) |
254 | entry->write_msi_msg(entry, entry->write_msi_msg_data); |
255 | } |
256 | |
257 | void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) |
258 | { |
259 | struct msi_desc *entry = irq_get_msi_desc(irq); |
260 | |
261 | __pci_write_msi_msg(entry, msg); |
262 | } |
263 | EXPORT_SYMBOL_GPL(pci_write_msi_msg); |
264 | |
265 | |
266 | /* PCI/MSI specific functionality */ |
267 | |
268 | static void pci_intx_for_msi(struct pci_dev *dev, int enable) |
269 | { |
270 | if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) |
271 | pci_intx(dev, enable); |
272 | } |
273 | |
274 | static void pci_msi_set_enable(struct pci_dev *dev, int enable) |
275 | { |
276 | u16 control; |
277 | |
278 | pci_read_config_word(dev, where: dev->msi_cap + PCI_MSI_FLAGS, val: &control); |
279 | control &= ~PCI_MSI_FLAGS_ENABLE; |
280 | if (enable) |
281 | control |= PCI_MSI_FLAGS_ENABLE; |
282 | pci_write_config_word(dev, where: dev->msi_cap + PCI_MSI_FLAGS, val: control); |
283 | } |
284 | |
285 | static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, |
286 | struct irq_affinity_desc *masks) |
287 | { |
288 | struct msi_desc desc; |
289 | u16 control; |
290 | |
291 | /* MSI Entry Initialization */ |
292 | memset(&desc, 0, sizeof(desc)); |
293 | |
294 | pci_read_config_word(dev, where: dev->msi_cap + PCI_MSI_FLAGS, val: &control); |
295 | /* Lies, damned lies, and MSIs */ |
296 | if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) |
297 | control |= PCI_MSI_FLAGS_MASKBIT; |
298 | if (pci_msi_domain_supports(dev, feature_mask: MSI_FLAG_NO_MASK, mode: DENY_LEGACY)) |
299 | control &= ~PCI_MSI_FLAGS_MASKBIT; |
300 | |
301 | desc.nvec_used = nvec; |
302 | desc.pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); |
303 | desc.pci.msi_attrib.can_mask = !!(control & PCI_MSI_FLAGS_MASKBIT); |
304 | desc.pci.msi_attrib.default_irq = dev->irq; |
305 | desc.pci.msi_attrib.multi_cap = FIELD_GET(PCI_MSI_FLAGS_QMASK, control); |
306 | desc.pci.msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); |
307 | desc.affinity = masks; |
308 | |
309 | if (control & PCI_MSI_FLAGS_64BIT) |
310 | desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64; |
311 | else |
312 | desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32; |
313 | |
314 | /* Save the initial mask status */ |
315 | if (desc.pci.msi_attrib.can_mask) |
316 | pci_read_config_dword(dev, where: desc.pci.mask_pos, val: &desc.pci.msi_mask); |
317 | |
318 | return msi_insert_msi_desc(dev: &dev->dev, init_desc: &desc); |
319 | } |
320 | |
321 | static int msi_verify_entries(struct pci_dev *dev) |
322 | { |
323 | struct msi_desc *entry; |
324 | |
325 | if (!dev->no_64bit_msi) |
326 | return 0; |
327 | |
328 | msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { |
329 | if (entry->msg.address_hi) { |
330 | pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n", |
331 | entry->msg.address_hi, entry->msg.address_lo); |
332 | break; |
333 | } |
334 | } |
335 | return !entry ? 0 : -EIO; |
336 | } |
337 | |
338 | static int __msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) |
339 | { |
340 | int ret = msi_setup_msi_desc(dev, nvec, masks); |
341 | struct msi_desc *entry, desc; |
342 | |
343 | if (ret) |
344 | return ret; |
345 | |
346 | /* All MSIs are unmasked by default; mask them all */ |
347 | entry = msi_first_desc(dev: &dev->dev, filter: MSI_DESC_ALL); |
348 | pci_msi_mask(desc: entry, mask: msi_multi_mask(desc: entry)); |
349 | /* |
350 | * Copy the MSI descriptor for the error path because |
351 | * pci_msi_setup_msi_irqs() will free it for the hierarchical |
352 | * interrupt domain case. |
353 | */ |
354 | memcpy(&desc, entry, sizeof(desc)); |
355 | |
356 | /* Configure MSI capability structure */ |
357 | ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); |
358 | if (ret) |
359 | goto err; |
360 | |
361 | ret = msi_verify_entries(dev); |
362 | if (ret) |
363 | goto err; |
364 | |
365 | /* Set MSI enabled bits */ |
366 | dev->msi_enabled = 1; |
367 | pci_intx_for_msi(dev, enable: 0); |
368 | pci_msi_set_enable(dev, enable: 1); |
369 | |
370 | pcibios_free_irq(dev); |
371 | dev->irq = entry->irq; |
372 | return 0; |
373 | err: |
374 | pci_msi_unmask(desc: &desc, mask: msi_multi_mask(desc: &desc)); |
375 | pci_free_msi_irqs(dev); |
376 | return ret; |
377 | } |
378 | |
379 | /** |
380 | * msi_capability_init - configure device's MSI capability structure |
381 | * @dev: pointer to the pci_dev data structure of MSI device function |
382 | * @nvec: number of interrupts to allocate |
383 | * @affd: description of automatic IRQ affinity assignments (may be %NULL) |
384 | * |
385 | * Setup the MSI capability structure of the device with the requested |
386 | * number of interrupts. A return value of zero indicates the successful |
387 | * setup of an entry with the new MSI IRQ. A negative return value indicates |
388 | * an error, and a positive return value indicates the number of interrupts |
389 | * which could have been allocated. |
390 | */ |
391 | static int msi_capability_init(struct pci_dev *dev, int nvec, |
392 | struct irq_affinity *affd) |
393 | { |
394 | /* Reject multi-MSI early on irq domain enabled architectures */ |
395 | if (nvec > 1 && !pci_msi_domain_supports(dev, feature_mask: MSI_FLAG_MULTI_PCI_MSI, mode: ALLOW_LEGACY)) |
396 | return 1; |
397 | |
398 | /* |
399 | * Disable MSI during setup in the hardware, but mark it enabled |
400 | * so that setup code can evaluate it. |
401 | */ |
402 | pci_msi_set_enable(dev, enable: 0); |
403 | |
404 | struct irq_affinity_desc *masks __free(kfree) = |
405 | affd ? irq_create_affinity_masks(nvec, affd) : NULL; |
406 | |
407 | guard(msi_descs_lock)(l: &dev->dev); |
408 | return __msi_capability_init(dev, nvec, masks); |
409 | } |
410 | |
411 | int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, |
412 | struct irq_affinity *affd) |
413 | { |
414 | int nvec; |
415 | int rc; |
416 | |
417 | if (!pci_msi_supported(dev, nvec: minvec) || dev->current_state != PCI_D0) |
418 | return -EINVAL; |
419 | |
420 | /* Check whether driver already requested MSI-X IRQs */ |
421 | if (dev->msix_enabled) { |
422 | pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); |
423 | return -EINVAL; |
424 | } |
425 | |
426 | if (maxvec < minvec) |
427 | return -ERANGE; |
428 | |
429 | if (WARN_ON_ONCE(dev->msi_enabled)) |
430 | return -EINVAL; |
431 | |
432 | /* Test for the availability of MSI support */ |
433 | if (!pci_msi_domain_supports(dev, feature_mask: 0, mode: ALLOW_LEGACY)) |
434 | return -ENOTSUPP; |
435 | |
436 | nvec = pci_msi_vec_count(dev); |
437 | if (nvec < 0) |
438 | return nvec; |
439 | if (nvec < minvec) |
440 | return -ENOSPC; |
441 | |
442 | rc = pci_setup_msi_context(dev); |
443 | if (rc) |
444 | return rc; |
445 | |
446 | if (!pci_setup_msi_device_domain(pdev: dev, hwsize: nvec)) |
447 | return -ENODEV; |
448 | |
449 | if (nvec > maxvec) |
450 | nvec = maxvec; |
451 | |
452 | for (;;) { |
453 | if (affd) { |
454 | nvec = irq_calc_affinity_vectors(minvec, maxvec: nvec, affd); |
455 | if (nvec < minvec) |
456 | return -ENOSPC; |
457 | } |
458 | |
459 | rc = msi_capability_init(dev, nvec, affd); |
460 | if (rc == 0) |
461 | return nvec; |
462 | |
463 | if (rc < 0) |
464 | return rc; |
465 | if (rc < minvec) |
466 | return -ENOSPC; |
467 | |
468 | nvec = rc; |
469 | } |
470 | } |
471 | |
472 | /** |
473 | * pci_msi_vec_count - Return the number of MSI vectors a device can send |
474 | * @dev: device to report about |
475 | * |
476 | * This function returns the number of MSI vectors a device requested via |
477 | * Multiple Message Capable register. It returns a negative errno if the |
478 | * device is not capable sending MSI interrupts. Otherwise, the call succeeds |
479 | * and returns a power of two, up to a maximum of 2^5 (32), according to the |
480 | * MSI specification. |
481 | **/ |
482 | int pci_msi_vec_count(struct pci_dev *dev) |
483 | { |
484 | int ret; |
485 | u16 msgctl; |
486 | |
487 | if (!dev->msi_cap) |
488 | return -EINVAL; |
489 | |
490 | pci_read_config_word(dev, where: dev->msi_cap + PCI_MSI_FLAGS, val: &msgctl); |
491 | ret = 1 << FIELD_GET(PCI_MSI_FLAGS_QMASK, msgctl); |
492 | |
493 | return ret; |
494 | } |
495 | EXPORT_SYMBOL(pci_msi_vec_count); |
496 | |
497 | /* |
498 | * Architecture override returns true when the PCI MSI message should be |
499 | * written by the generic restore function. |
500 | */ |
501 | bool __weak arch_restore_msi_irqs(struct pci_dev *dev) |
502 | { |
503 | return true; |
504 | } |
505 | |
506 | void __pci_restore_msi_state(struct pci_dev *dev) |
507 | { |
508 | struct msi_desc *entry; |
509 | u16 control; |
510 | |
511 | if (!dev->msi_enabled) |
512 | return; |
513 | |
514 | entry = irq_get_msi_desc(irq: dev->irq); |
515 | |
516 | pci_intx_for_msi(dev, enable: 0); |
517 | pci_msi_set_enable(dev, enable: 0); |
518 | if (arch_restore_msi_irqs(dev)) |
519 | __pci_write_msi_msg(entry, msg: &entry->msg); |
520 | |
521 | pci_read_config_word(dev, where: dev->msi_cap + PCI_MSI_FLAGS, val: &control); |
522 | pci_msi_update_mask(desc: entry, clear: 0, set: 0); |
523 | control &= ~PCI_MSI_FLAGS_QSIZE; |
524 | control |= PCI_MSI_FLAGS_ENABLE | |
525 | FIELD_PREP(PCI_MSI_FLAGS_QSIZE, entry->pci.msi_attrib.multiple); |
526 | pci_write_config_word(dev, where: dev->msi_cap + PCI_MSI_FLAGS, val: control); |
527 | } |
528 | |
529 | void pci_msi_shutdown(struct pci_dev *dev) |
530 | { |
531 | struct msi_desc *desc; |
532 | |
533 | if (!pci_msi_enable || !dev || !dev->msi_enabled) |
534 | return; |
535 | |
536 | pci_msi_set_enable(dev, enable: 0); |
537 | pci_intx_for_msi(dev, enable: 1); |
538 | dev->msi_enabled = 0; |
539 | |
540 | /* Return the device with MSI unmasked as initial states */ |
541 | desc = msi_first_desc(dev: &dev->dev, filter: MSI_DESC_ALL); |
542 | if (!WARN_ON_ONCE(!desc)) |
543 | pci_msi_unmask(desc, mask: msi_multi_mask(desc)); |
544 | |
545 | /* Restore dev->irq to its default pin-assertion IRQ */ |
546 | dev->irq = desc->pci.msi_attrib.default_irq; |
547 | pcibios_alloc_irq(dev); |
548 | } |
549 | |
550 | /* PCI/MSI-X specific functionality */ |
551 | |
552 | static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) |
553 | { |
554 | u16 ctrl; |
555 | |
556 | pci_read_config_word(dev, where: dev->msix_cap + PCI_MSIX_FLAGS, val: &ctrl); |
557 | ctrl &= ~clear; |
558 | ctrl |= set; |
559 | pci_write_config_word(dev, where: dev->msix_cap + PCI_MSIX_FLAGS, val: ctrl); |
560 | } |
561 | |
562 | static void __iomem *msix_map_region(struct pci_dev *dev, |
563 | unsigned int nr_entries) |
564 | { |
565 | resource_size_t phys_addr; |
566 | u32 table_offset; |
567 | unsigned long flags; |
568 | u8 bir; |
569 | |
570 | pci_read_config_dword(dev, where: dev->msix_cap + PCI_MSIX_TABLE, |
571 | val: &table_offset); |
572 | bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); |
573 | flags = pci_resource_flags(dev, bir); |
574 | if (!flags || (flags & IORESOURCE_UNSET)) |
575 | return NULL; |
576 | |
577 | table_offset &= PCI_MSIX_TABLE_OFFSET; |
578 | phys_addr = pci_resource_start(dev, bir) + table_offset; |
579 | |
580 | return ioremap(offset: phys_addr, size: nr_entries * PCI_MSIX_ENTRY_SIZE); |
581 | } |
582 | |
583 | /** |
584 | * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation |
585 | * @dev: The PCI device for which the descriptor is prepared |
586 | * @desc: The MSI descriptor for preparation |
587 | * |
588 | * This is separate from msix_setup_msi_descs() below to handle dynamic |
589 | * allocations for MSI-X after initial enablement. |
590 | * |
591 | * Ideally the whole MSI-X setup would work that way, but there is no way to |
592 | * support this for the legacy arch_setup_msi_irqs() mechanism and for the |
593 | * fake irq domains like the x86 XEN one. Sigh... |
594 | * |
595 | * The descriptor is zeroed and only @desc::msi_index and @desc::affinity |
596 | * are set. When called from msix_setup_msi_descs() then the is_virtual |
597 | * attribute is initialized as well. |
598 | * |
599 | * Fill in the rest. |
600 | */ |
601 | void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) |
602 | { |
603 | desc->nvec_used = 1; |
604 | desc->pci.msi_attrib.is_msix = 1; |
605 | desc->pci.msi_attrib.is_64 = 1; |
606 | desc->pci.msi_attrib.default_irq = dev->irq; |
607 | desc->pci.mask_base = dev->msix_base; |
608 | |
609 | |
610 | if (!pci_msi_domain_supports(dev, feature_mask: MSI_FLAG_NO_MASK, mode: DENY_LEGACY) && |
611 | !desc->pci.msi_attrib.is_virtual) { |
612 | void __iomem *addr = pci_msix_desc_addr(desc); |
613 | |
614 | desc->pci.msi_attrib.can_mask = 1; |
615 | /* Workaround for SUN NIU insanity, which requires write before read */ |
616 | if (dev->dev_flags & PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST) |
617 | writel(val: 0, addr: addr + PCI_MSIX_ENTRY_DATA); |
618 | desc->pci.msix_ctrl = readl(addr: addr + PCI_MSIX_ENTRY_VECTOR_CTRL); |
619 | } |
620 | } |
621 | |
622 | static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries, |
623 | int nvec, struct irq_affinity_desc *masks) |
624 | { |
625 | int ret = 0, i, vec_count = pci_msix_vec_count(dev); |
626 | struct irq_affinity_desc *curmsk; |
627 | struct msi_desc desc; |
628 | |
629 | memset(&desc, 0, sizeof(desc)); |
630 | |
631 | for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) { |
632 | desc.msi_index = entries ? entries[i].entry : i; |
633 | desc.affinity = masks ? curmsk : NULL; |
634 | desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count; |
635 | |
636 | msix_prepare_msi_desc(dev, desc: &desc); |
637 | |
638 | ret = msi_insert_msi_desc(dev: &dev->dev, init_desc: &desc); |
639 | if (ret) |
640 | break; |
641 | } |
642 | return ret; |
643 | } |
644 | |
645 | static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) |
646 | { |
647 | struct msi_desc *desc; |
648 | |
649 | if (entries) { |
650 | msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) { |
651 | entries->vector = desc->irq; |
652 | entries++; |
653 | } |
654 | } |
655 | } |
656 | |
657 | static void msix_mask_all(void __iomem *base, int tsize) |
658 | { |
659 | u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; |
660 | int i; |
661 | |
662 | for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) |
663 | writel(val: ctrl, addr: base + PCI_MSIX_ENTRY_VECTOR_CTRL); |
664 | } |
665 | |
666 | DEFINE_FREE(free_msi_irqs, struct pci_dev *, if (_T) pci_free_msi_irqs(_T)); |
667 | |
668 | static int __msix_setup_interrupts(struct pci_dev *__dev, struct msix_entry *entries, |
669 | int nvec, struct irq_affinity_desc *masks) |
670 | { |
671 | struct pci_dev *dev __free(free_msi_irqs) = __dev; |
672 | |
673 | int ret = msix_setup_msi_descs(dev, entries, nvec, masks); |
674 | if (ret) |
675 | return ret; |
676 | |
677 | ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); |
678 | if (ret) |
679 | return ret; |
680 | |
681 | /* Check if all MSI entries honor device restrictions */ |
682 | ret = msi_verify_entries(dev); |
683 | if (ret) |
684 | return ret; |
685 | |
686 | msix_update_entries(dev, entries); |
687 | retain_and_null_ptr(dev); |
688 | return 0; |
689 | } |
690 | |
691 | static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, |
692 | int nvec, struct irq_affinity *affd) |
693 | { |
694 | struct irq_affinity_desc *masks __free(kfree) = |
695 | affd ? irq_create_affinity_masks(nvec, affd) : NULL; |
696 | |
697 | guard(msi_descs_lock)(l: &dev->dev); |
698 | return __msix_setup_interrupts(dev: dev, entries, nvec, masks); |
699 | } |
700 | |
701 | /** |
702 | * msix_capability_init - configure device's MSI-X capability |
703 | * @dev: pointer to the pci_dev data structure of MSI-X device function |
704 | * @entries: pointer to an array of struct msix_entry entries |
705 | * @nvec: number of @entries |
706 | * @affd: Optional pointer to enable automatic affinity assignment |
707 | * |
708 | * Setup the MSI-X capability structure of device function with a |
709 | * single MSI-X IRQ. A return of zero indicates the successful setup of |
710 | * requested MSI-X entries with allocated IRQs or non-zero for otherwise. |
711 | **/ |
712 | static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, |
713 | int nvec, struct irq_affinity *affd) |
714 | { |
715 | int ret, tsize; |
716 | u16 control; |
717 | |
718 | /* |
719 | * Some devices require MSI-X to be enabled before the MSI-X |
720 | * registers can be accessed. Mask all the vectors to prevent |
721 | * interrupts coming in before they're fully set up. |
722 | */ |
723 | pci_msix_clear_and_set_ctrl(dev, clear: 0, PCI_MSIX_FLAGS_MASKALL | |
724 | PCI_MSIX_FLAGS_ENABLE); |
725 | |
726 | /* Mark it enabled so setup functions can query it */ |
727 | dev->msix_enabled = 1; |
728 | |
729 | pci_read_config_word(dev, where: dev->msix_cap + PCI_MSIX_FLAGS, val: &control); |
730 | /* Request & Map MSI-X table region */ |
731 | tsize = msix_table_size(control); |
732 | dev->msix_base = msix_map_region(dev, nr_entries: tsize); |
733 | if (!dev->msix_base) { |
734 | ret = -ENOMEM; |
735 | goto out_disable; |
736 | } |
737 | |
738 | ret = msix_setup_interrupts(dev, entries, nvec, affd); |
739 | if (ret) |
740 | goto out_disable; |
741 | |
742 | /* Disable INTX */ |
743 | pci_intx_for_msi(dev, enable: 0); |
744 | |
745 | if (!pci_msi_domain_supports(dev, feature_mask: MSI_FLAG_NO_MASK, mode: DENY_LEGACY)) { |
746 | /* |
747 | * Ensure that all table entries are masked to prevent |
748 | * stale entries from firing in a crash kernel. |
749 | * |
750 | * Done late to deal with a broken Marvell NVME device |
751 | * which takes the MSI-X mask bits into account even |
752 | * when MSI-X is disabled, which prevents MSI delivery. |
753 | */ |
754 | msix_mask_all(base: dev->msix_base, tsize); |
755 | } |
756 | pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, set: 0); |
757 | |
758 | pcibios_free_irq(dev); |
759 | return 0; |
760 | |
761 | out_disable: |
762 | dev->msix_enabled = 0; |
763 | pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, set: 0); |
764 | |
765 | return ret; |
766 | } |
767 | |
768 | static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries, int nvec) |
769 | { |
770 | bool nogap; |
771 | int i, j; |
772 | |
773 | if (!entries) |
774 | return true; |
775 | |
776 | nogap = pci_msi_domain_supports(dev, feature_mask: MSI_FLAG_MSIX_CONTIGUOUS, mode: DENY_LEGACY); |
777 | |
778 | for (i = 0; i < nvec; i++) { |
779 | /* Check for duplicate entries */ |
780 | for (j = i + 1; j < nvec; j++) { |
781 | if (entries[i].entry == entries[j].entry) |
782 | return false; |
783 | } |
784 | /* Check for unsupported gaps */ |
785 | if (nogap && entries[i].entry != i) |
786 | return false; |
787 | } |
788 | return true; |
789 | } |
790 | |
791 | int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, |
792 | int maxvec, struct irq_affinity *affd, int flags) |
793 | { |
794 | int hwsize, rc, nvec = maxvec; |
795 | |
796 | if (maxvec < minvec) |
797 | return -ERANGE; |
798 | |
799 | if (dev->msi_enabled) { |
800 | pci_info(dev, "can't enable MSI-X (MSI already enabled)\n"); |
801 | return -EINVAL; |
802 | } |
803 | |
804 | if (WARN_ON_ONCE(dev->msix_enabled)) |
805 | return -EINVAL; |
806 | |
807 | /* Check MSI-X early on irq domain enabled architectures */ |
808 | if (!pci_msi_domain_supports(dev, feature_mask: MSI_FLAG_PCI_MSIX, mode: ALLOW_LEGACY)) |
809 | return -ENOTSUPP; |
810 | |
811 | if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) |
812 | return -EINVAL; |
813 | |
814 | hwsize = pci_msix_vec_count(dev); |
815 | if (hwsize < 0) |
816 | return hwsize; |
817 | |
818 | if (!pci_msix_validate_entries(dev, entries, nvec)) |
819 | return -EINVAL; |
820 | |
821 | if (hwsize < nvec) { |
822 | /* Keep the IRQ virtual hackery working */ |
823 | if (flags & PCI_IRQ_VIRTUAL) |
824 | hwsize = nvec; |
825 | else |
826 | nvec = hwsize; |
827 | } |
828 | |
829 | if (nvec < minvec) |
830 | return -ENOSPC; |
831 | |
832 | rc = pci_setup_msi_context(dev); |
833 | if (rc) |
834 | return rc; |
835 | |
836 | if (!pci_setup_msix_device_domain(pdev: dev, hwsize)) |
837 | return -ENODEV; |
838 | |
839 | for (;;) { |
840 | if (affd) { |
841 | nvec = irq_calc_affinity_vectors(minvec, maxvec: nvec, affd); |
842 | if (nvec < minvec) |
843 | return -ENOSPC; |
844 | } |
845 | |
846 | rc = msix_capability_init(dev, entries, nvec, affd); |
847 | if (rc == 0) |
848 | return nvec; |
849 | |
850 | if (rc < 0) |
851 | return rc; |
852 | if (rc < minvec) |
853 | return -ENOSPC; |
854 | |
855 | nvec = rc; |
856 | } |
857 | } |
858 | |
859 | void __pci_restore_msix_state(struct pci_dev *dev) |
860 | { |
861 | struct msi_desc *entry; |
862 | bool write_msg; |
863 | |
864 | if (!dev->msix_enabled) |
865 | return; |
866 | |
867 | /* route the table */ |
868 | pci_intx_for_msi(dev, enable: 0); |
869 | pci_msix_clear_and_set_ctrl(dev, clear: 0, |
870 | PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); |
871 | |
872 | write_msg = arch_restore_msi_irqs(dev); |
873 | |
874 | scoped_guard (msi_descs_lock, &dev->dev) { |
875 | msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { |
876 | if (write_msg) |
877 | __pci_write_msi_msg(entry, msg: &entry->msg); |
878 | pci_msix_write_vector_ctrl(desc: entry, ctrl: entry->pci.msix_ctrl); |
879 | } |
880 | } |
881 | |
882 | pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, set: 0); |
883 | } |
884 | |
885 | void pci_msix_shutdown(struct pci_dev *dev) |
886 | { |
887 | struct msi_desc *desc; |
888 | |
889 | if (!pci_msi_enable || !dev || !dev->msix_enabled) |
890 | return; |
891 | |
892 | if (pci_dev_is_disconnected(dev)) { |
893 | dev->msix_enabled = 0; |
894 | return; |
895 | } |
896 | |
897 | /* Return the device with MSI-X masked as initial states */ |
898 | msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) |
899 | pci_msix_mask(desc); |
900 | |
901 | pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, set: 0); |
902 | pci_intx_for_msi(dev, enable: 1); |
903 | dev->msix_enabled = 0; |
904 | pcibios_alloc_irq(dev); |
905 | } |
906 | |
907 | /* Common interfaces */ |
908 | |
909 | void pci_free_msi_irqs(struct pci_dev *dev) |
910 | { |
911 | pci_msi_teardown_msi_irqs(dev); |
912 | |
913 | if (dev->msix_base) { |
914 | iounmap(addr: dev->msix_base); |
915 | dev->msix_base = NULL; |
916 | } |
917 | } |
918 | |
919 | #ifdef CONFIG_PCIE_TPH |
920 | /** |
921 | * pci_msix_write_tph_tag - Update the TPH tag for a given MSI-X vector |
922 | * @pdev: The PCIe device to update |
923 | * @index: The MSI-X index to update |
924 | * @tag: The tag to write |
925 | * |
926 | * Returns: 0 on success, error code on failure |
927 | */ |
928 | int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) |
929 | { |
930 | struct msi_desc *msi_desc; |
931 | struct irq_desc *irq_desc; |
932 | unsigned int virq; |
933 | |
934 | if (!pdev->msix_enabled) |
935 | return -ENXIO; |
936 | |
937 | guard(msi_descs_lock)(l: &pdev->dev); |
938 | virq = msi_get_virq(dev: &pdev->dev, index); |
939 | if (!virq) |
940 | return -ENXIO; |
941 | /* |
942 | * This is a horrible hack, but short of implementing a PCI |
943 | * specific interrupt chip callback and a huge pile of |
944 | * infrastructure, this is the minor nuissance. It provides the |
945 | * protection against concurrent operations on this entry and keeps |
946 | * the control word cache in sync. |
947 | */ |
948 | irq_desc = irq_to_desc(irq: virq); |
949 | if (!irq_desc) |
950 | return -ENXIO; |
951 | |
952 | guard(raw_spinlock_irq)(l: &irq_desc->lock); |
953 | msi_desc = irq_data_get_msi_desc(d: &irq_desc->irq_data); |
954 | if (!msi_desc || msi_desc->pci.msi_attrib.is_virtual) |
955 | return -ENXIO; |
956 | |
957 | msi_desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_ST; |
958 | msi_desc->pci.msix_ctrl |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); |
959 | pci_msix_write_vector_ctrl(desc: msi_desc, ctrl: msi_desc->pci.msix_ctrl); |
960 | /* Flush the write */ |
961 | readl(addr: pci_msix_desc_addr(desc: msi_desc)); |
962 | return 0; |
963 | } |
964 | #endif |
965 | |
966 | /* Misc. infrastructure */ |
967 | |
968 | struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) |
969 | { |
970 | return to_pci_dev(desc->dev); |
971 | } |
972 | EXPORT_SYMBOL(msi_desc_to_pci_dev); |
973 | |
974 | void pci_no_msi(void) |
975 | { |
976 | pci_msi_enable = false; |
977 | } |
978 |
Definitions
- pci_msi_enable
- pci_msi_supported
- pcim_msi_release
- pcim_setup_msi_release
- pci_setup_msi_context
- pci_msi_update_mask
- pci_msi_mask_irq
- pci_msi_unmask_irq
- __pci_read_msi_msg
- pci_write_msg_msi
- pci_write_msg_msix
- __pci_write_msi_msg
- pci_write_msi_msg
- pci_intx_for_msi
- pci_msi_set_enable
- msi_setup_msi_desc
- msi_verify_entries
- __msi_capability_init
- msi_capability_init
- __pci_enable_msi_range
- pci_msi_vec_count
- arch_restore_msi_irqs
- __pci_restore_msi_state
- pci_msi_shutdown
- pci_msix_clear_and_set_ctrl
- msix_map_region
- msix_prepare_msi_desc
- msix_setup_msi_descs
- msix_update_entries
- msix_mask_all
- __msix_setup_interrupts
- msix_setup_interrupts
- msix_capability_init
- pci_msix_validate_entries
- __pci_enable_msix_range
- __pci_restore_msix_state
- pci_msix_shutdown
- pci_free_msi_irqs
- pci_msix_write_tph_tag
- msi_desc_to_pci_dev
Improve your Profiling and Debugging skills
Find out more