1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #define pr_fmt(fmt) "DMAR-IR: " fmt |
4 | |
5 | #include <linux/interrupt.h> |
6 | #include <linux/dmar.h> |
7 | #include <linux/spinlock.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/jiffies.h> |
10 | #include <linux/hpet.h> |
11 | #include <linux/pci.h> |
12 | #include <linux/irq.h> |
13 | #include <linux/acpi.h> |
14 | #include <linux/irqdomain.h> |
15 | #include <linux/crash_dump.h> |
16 | #include <asm/io_apic.h> |
17 | #include <asm/apic.h> |
18 | #include <asm/smp.h> |
19 | #include <asm/cpu.h> |
20 | #include <asm/irq_remapping.h> |
21 | #include <asm/pci-direct.h> |
22 | |
23 | #include "iommu.h" |
24 | #include "../irq_remapping.h" |
25 | #include "cap_audit.h" |
26 | |
27 | enum irq_mode { |
28 | IRQ_REMAPPING, |
29 | IRQ_POSTING, |
30 | }; |
31 | |
32 | struct ioapic_scope { |
33 | struct intel_iommu *iommu; |
34 | unsigned int id; |
35 | unsigned int bus; /* PCI bus number */ |
36 | unsigned int devfn; /* PCI devfn number */ |
37 | }; |
38 | |
39 | struct hpet_scope { |
40 | struct intel_iommu *iommu; |
41 | u8 id; |
42 | unsigned int bus; |
43 | unsigned int devfn; |
44 | }; |
45 | |
46 | struct irq_2_iommu { |
47 | struct intel_iommu *iommu; |
48 | u16 irte_index; |
49 | u16 sub_handle; |
50 | u8 irte_mask; |
51 | enum irq_mode mode; |
52 | }; |
53 | |
54 | struct intel_ir_data { |
55 | struct irq_2_iommu irq_2_iommu; |
56 | struct irte irte_entry; |
57 | union { |
58 | struct msi_msg msi_entry; |
59 | }; |
60 | }; |
61 | |
62 | #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) |
63 | #define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8) |
64 | |
65 | static int __read_mostly eim_mode; |
66 | static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; |
67 | static struct hpet_scope ir_hpet[MAX_HPET_TBS]; |
68 | |
69 | /* |
70 | * Lock ordering: |
71 | * ->dmar_global_lock |
72 | * ->irq_2_ir_lock |
73 | * ->qi->q_lock |
74 | * ->iommu->register_lock |
75 | * Note: |
76 | * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called |
77 | * in single-threaded environment with interrupt disabled, so no need to tabke |
78 | * the dmar_global_lock. |
79 | */ |
80 | DEFINE_RAW_SPINLOCK(irq_2_ir_lock); |
81 | static const struct irq_domain_ops intel_ir_domain_ops; |
82 | |
83 | static void iommu_disable_irq_remapping(struct intel_iommu *iommu); |
84 | static int __init parse_ioapics_under_ir(void); |
85 | static const struct msi_parent_ops dmar_msi_parent_ops, virt_dmar_msi_parent_ops; |
86 | |
87 | static bool ir_pre_enabled(struct intel_iommu *iommu) |
88 | { |
89 | return (iommu->flags & VTD_FLAG_IRQ_REMAP_PRE_ENABLED); |
90 | } |
91 | |
92 | static void clear_ir_pre_enabled(struct intel_iommu *iommu) |
93 | { |
94 | iommu->flags &= ~VTD_FLAG_IRQ_REMAP_PRE_ENABLED; |
95 | } |
96 | |
97 | static void init_ir_status(struct intel_iommu *iommu) |
98 | { |
99 | u32 gsts; |
100 | |
101 | gsts = readl(addr: iommu->reg + DMAR_GSTS_REG); |
102 | if (gsts & DMA_GSTS_IRES) |
103 | iommu->flags |= VTD_FLAG_IRQ_REMAP_PRE_ENABLED; |
104 | } |
105 | |
106 | static int alloc_irte(struct intel_iommu *iommu, |
107 | struct irq_2_iommu *irq_iommu, u16 count) |
108 | { |
109 | struct ir_table *table = iommu->ir_table; |
110 | unsigned int mask = 0; |
111 | unsigned long flags; |
112 | int index; |
113 | |
114 | if (!count || !irq_iommu) |
115 | return -1; |
116 | |
117 | if (count > 1) { |
118 | count = __roundup_pow_of_two(n: count); |
119 | mask = ilog2(count); |
120 | } |
121 | |
122 | if (mask > ecap_max_handle_mask(iommu->ecap)) { |
123 | pr_err("Requested mask %x exceeds the max invalidation handle" |
124 | " mask value %Lx\n" , mask, |
125 | ecap_max_handle_mask(iommu->ecap)); |
126 | return -1; |
127 | } |
128 | |
129 | raw_spin_lock_irqsave(&irq_2_ir_lock, flags); |
130 | index = bitmap_find_free_region(bitmap: table->bitmap, |
131 | INTR_REMAP_TABLE_ENTRIES, order: mask); |
132 | if (index < 0) { |
133 | pr_warn("IR%d: can't allocate an IRTE\n" , iommu->seq_id); |
134 | } else { |
135 | irq_iommu->iommu = iommu; |
136 | irq_iommu->irte_index = index; |
137 | irq_iommu->sub_handle = 0; |
138 | irq_iommu->irte_mask = mask; |
139 | irq_iommu->mode = IRQ_REMAPPING; |
140 | } |
141 | raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); |
142 | |
143 | return index; |
144 | } |
145 | |
146 | static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) |
147 | { |
148 | struct qi_desc desc; |
149 | |
150 | desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) |
151 | | QI_IEC_SELECTIVE; |
152 | desc.qw1 = 0; |
153 | desc.qw2 = 0; |
154 | desc.qw3 = 0; |
155 | |
156 | return qi_submit_sync(iommu, desc: &desc, count: 1, options: 0); |
157 | } |
158 | |
159 | static int modify_irte(struct irq_2_iommu *irq_iommu, |
160 | struct irte *irte_modified) |
161 | { |
162 | struct intel_iommu *iommu; |
163 | unsigned long flags; |
164 | struct irte *irte; |
165 | int rc, index; |
166 | |
167 | if (!irq_iommu) |
168 | return -1; |
169 | |
170 | raw_spin_lock_irqsave(&irq_2_ir_lock, flags); |
171 | |
172 | iommu = irq_iommu->iommu; |
173 | |
174 | index = irq_iommu->irte_index + irq_iommu->sub_handle; |
175 | irte = &iommu->ir_table->base[index]; |
176 | |
177 | if ((irte->pst == 1) || (irte_modified->pst == 1)) { |
178 | /* |
179 | * We use cmpxchg16 to atomically update the 128-bit IRTE, |
180 | * and it cannot be updated by the hardware or other processors |
181 | * behind us, so the return value of cmpxchg16 should be the |
182 | * same as the old value. |
183 | */ |
184 | u128 old = irte->irte; |
185 | WARN_ON(!try_cmpxchg128(&irte->irte, &old, irte_modified->irte)); |
186 | } else { |
187 | WRITE_ONCE(irte->low, irte_modified->low); |
188 | WRITE_ONCE(irte->high, irte_modified->high); |
189 | } |
190 | __iommu_flush_cache(iommu, addr: irte, size: sizeof(*irte)); |
191 | |
192 | rc = qi_flush_iec(iommu, index, mask: 0); |
193 | |
194 | /* Update iommu mode according to the IRTE mode */ |
195 | irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; |
196 | raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); |
197 | |
198 | return rc; |
199 | } |
200 | |
201 | static struct intel_iommu *map_hpet_to_iommu(u8 hpet_id) |
202 | { |
203 | int i; |
204 | |
205 | for (i = 0; i < MAX_HPET_TBS; i++) { |
206 | if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu) |
207 | return ir_hpet[i].iommu; |
208 | } |
209 | return NULL; |
210 | } |
211 | |
212 | static struct intel_iommu *map_ioapic_to_iommu(int apic) |
213 | { |
214 | int i; |
215 | |
216 | for (i = 0; i < MAX_IO_APICS; i++) { |
217 | if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu) |
218 | return ir_ioapic[i].iommu; |
219 | } |
220 | return NULL; |
221 | } |
222 | |
223 | static struct irq_domain *map_dev_to_ir(struct pci_dev *dev) |
224 | { |
225 | struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev); |
226 | |
227 | return drhd ? drhd->iommu->ir_domain : NULL; |
228 | } |
229 | |
230 | static int clear_entries(struct irq_2_iommu *irq_iommu) |
231 | { |
232 | struct irte *start, *entry, *end; |
233 | struct intel_iommu *iommu; |
234 | int index; |
235 | |
236 | if (irq_iommu->sub_handle) |
237 | return 0; |
238 | |
239 | iommu = irq_iommu->iommu; |
240 | index = irq_iommu->irte_index; |
241 | |
242 | start = iommu->ir_table->base + index; |
243 | end = start + (1 << irq_iommu->irte_mask); |
244 | |
245 | for (entry = start; entry < end; entry++) { |
246 | WRITE_ONCE(entry->low, 0); |
247 | WRITE_ONCE(entry->high, 0); |
248 | } |
249 | bitmap_release_region(bitmap: iommu->ir_table->bitmap, pos: index, |
250 | order: irq_iommu->irte_mask); |
251 | |
252 | return qi_flush_iec(iommu, index, mask: irq_iommu->irte_mask); |
253 | } |
254 | |
255 | /* |
256 | * source validation type |
257 | */ |
258 | #define SVT_NO_VERIFY 0x0 /* no verification is required */ |
259 | #define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */ |
260 | #define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */ |
261 | |
262 | /* |
263 | * source-id qualifier |
264 | */ |
265 | #define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */ |
266 | #define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore |
267 | * the third least significant bit |
268 | */ |
269 | #define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore |
270 | * the second and third least significant bits |
271 | */ |
272 | #define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore |
273 | * the least three significant bits |
274 | */ |
275 | |
276 | /* |
277 | * set SVT, SQ and SID fields of irte to verify |
278 | * source ids of interrupt requests |
279 | */ |
280 | static void set_irte_sid(struct irte *irte, unsigned int svt, |
281 | unsigned int sq, unsigned int sid) |
282 | { |
283 | if (disable_sourceid_checking) |
284 | svt = SVT_NO_VERIFY; |
285 | irte->svt = svt; |
286 | irte->sq = sq; |
287 | irte->sid = sid; |
288 | } |
289 | |
290 | /* |
291 | * Set an IRTE to match only the bus number. Interrupt requests that reference |
292 | * this IRTE must have a requester-id whose bus number is between or equal |
293 | * to the start_bus and end_bus arguments. |
294 | */ |
295 | static void set_irte_verify_bus(struct irte *irte, unsigned int start_bus, |
296 | unsigned int end_bus) |
297 | { |
298 | set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, |
299 | sid: (start_bus << 8) | end_bus); |
300 | } |
301 | |
302 | static int set_ioapic_sid(struct irte *irte, int apic) |
303 | { |
304 | int i; |
305 | u16 sid = 0; |
306 | |
307 | if (!irte) |
308 | return -1; |
309 | |
310 | for (i = 0; i < MAX_IO_APICS; i++) { |
311 | if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) { |
312 | sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; |
313 | break; |
314 | } |
315 | } |
316 | |
317 | if (sid == 0) { |
318 | pr_warn("Failed to set source-id of IOAPIC (%d)\n" , apic); |
319 | return -1; |
320 | } |
321 | |
322 | set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid); |
323 | |
324 | return 0; |
325 | } |
326 | |
327 | static int set_hpet_sid(struct irte *irte, u8 id) |
328 | { |
329 | int i; |
330 | u16 sid = 0; |
331 | |
332 | if (!irte) |
333 | return -1; |
334 | |
335 | for (i = 0; i < MAX_HPET_TBS; i++) { |
336 | if (ir_hpet[i].iommu && ir_hpet[i].id == id) { |
337 | sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn; |
338 | break; |
339 | } |
340 | } |
341 | |
342 | if (sid == 0) { |
343 | pr_warn("Failed to set source-id of HPET block (%d)\n" , id); |
344 | return -1; |
345 | } |
346 | |
347 | /* |
348 | * Should really use SQ_ALL_16. Some platforms are broken. |
349 | * While we figure out the right quirks for these broken platforms, use |
350 | * SQ_13_IGNORE_3 for now. |
351 | */ |
352 | set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid); |
353 | |
354 | return 0; |
355 | } |
356 | |
357 | struct set_msi_sid_data { |
358 | struct pci_dev *pdev; |
359 | u16 alias; |
360 | int count; |
361 | int busmatch_count; |
362 | }; |
363 | |
364 | static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) |
365 | { |
366 | struct set_msi_sid_data *data = opaque; |
367 | |
368 | if (data->count == 0 || PCI_BUS_NUM(alias) == PCI_BUS_NUM(data->alias)) |
369 | data->busmatch_count++; |
370 | |
371 | data->pdev = pdev; |
372 | data->alias = alias; |
373 | data->count++; |
374 | |
375 | return 0; |
376 | } |
377 | |
378 | static int set_msi_sid(struct irte *irte, struct pci_dev *dev) |
379 | { |
380 | struct set_msi_sid_data data; |
381 | |
382 | if (!irte || !dev) |
383 | return -1; |
384 | |
385 | data.count = 0; |
386 | data.busmatch_count = 0; |
387 | pci_for_each_dma_alias(pdev: dev, fn: set_msi_sid_cb, data: &data); |
388 | |
389 | /* |
390 | * DMA alias provides us with a PCI device and alias. The only case |
391 | * where the it will return an alias on a different bus than the |
392 | * device is the case of a PCIe-to-PCI bridge, where the alias is for |
393 | * the subordinate bus. In this case we can only verify the bus. |
394 | * |
395 | * If there are multiple aliases, all with the same bus number, |
396 | * then all we can do is verify the bus. This is typical in NTB |
397 | * hardware which use proxy IDs where the device will generate traffic |
398 | * from multiple devfn numbers on the same bus. |
399 | * |
400 | * If the alias device is on a different bus than our source device |
401 | * then we have a topology based alias, use it. |
402 | * |
403 | * Otherwise, the alias is for a device DMA quirk and we cannot |
404 | * assume that MSI uses the same requester ID. Therefore use the |
405 | * original device. |
406 | */ |
407 | if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) |
408 | set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias), |
409 | end_bus: dev->bus->number); |
410 | else if (data.count >= 2 && data.busmatch_count == data.count) |
411 | set_irte_verify_bus(irte, start_bus: dev->bus->number, end_bus: dev->bus->number); |
412 | else if (data.pdev->bus->number != dev->bus->number) |
413 | set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid: data.alias); |
414 | else |
415 | set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, |
416 | sid: pci_dev_id(dev)); |
417 | |
418 | return 0; |
419 | } |
420 | |
421 | static int iommu_load_old_irte(struct intel_iommu *iommu) |
422 | { |
423 | struct irte *old_ir_table; |
424 | phys_addr_t irt_phys; |
425 | unsigned int i; |
426 | size_t size; |
427 | u64 irta; |
428 | |
429 | /* Check whether the old ir-table has the same size as ours */ |
430 | irta = dmar_readq(iommu->reg + DMAR_IRTA_REG); |
431 | if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK) |
432 | != INTR_REMAP_TABLE_REG_SIZE) |
433 | return -EINVAL; |
434 | |
435 | irt_phys = irta & VTD_PAGE_MASK; |
436 | size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte); |
437 | |
438 | /* Map the old IR table */ |
439 | old_ir_table = memremap(offset: irt_phys, size, flags: MEMREMAP_WB); |
440 | if (!old_ir_table) |
441 | return -ENOMEM; |
442 | |
443 | /* Copy data over */ |
444 | memcpy(iommu->ir_table->base, old_ir_table, size); |
445 | |
446 | __iommu_flush_cache(iommu, addr: iommu->ir_table->base, size); |
447 | |
448 | /* |
449 | * Now check the table for used entries and mark those as |
450 | * allocated in the bitmap |
451 | */ |
452 | for (i = 0; i < INTR_REMAP_TABLE_ENTRIES; i++) { |
453 | if (iommu->ir_table->base[i].present) |
454 | bitmap_set(map: iommu->ir_table->bitmap, start: i, nbits: 1); |
455 | } |
456 | |
457 | memunmap(addr: old_ir_table); |
458 | |
459 | return 0; |
460 | } |
461 | |
462 | |
463 | static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) |
464 | { |
465 | unsigned long flags; |
466 | u64 addr; |
467 | u32 sts; |
468 | |
469 | addr = virt_to_phys(address: (void *)iommu->ir_table->base); |
470 | |
471 | raw_spin_lock_irqsave(&iommu->register_lock, flags); |
472 | |
473 | dmar_writeq(iommu->reg + DMAR_IRTA_REG, |
474 | (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); |
475 | |
476 | /* Set interrupt-remapping table pointer */ |
477 | writel(val: iommu->gcmd | DMA_GCMD_SIRTP, addr: iommu->reg + DMAR_GCMD_REG); |
478 | |
479 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
480 | readl, (sts & DMA_GSTS_IRTPS), sts); |
481 | raw_spin_unlock_irqrestore(&iommu->register_lock, flags); |
482 | |
483 | /* |
484 | * Global invalidation of interrupt entry cache to make sure the |
485 | * hardware uses the new irq remapping table. |
486 | */ |
487 | if (!cap_esirtps(iommu->cap)) |
488 | qi_global_iec(iommu); |
489 | } |
490 | |
491 | static void iommu_enable_irq_remapping(struct intel_iommu *iommu) |
492 | { |
493 | unsigned long flags; |
494 | u32 sts; |
495 | |
496 | raw_spin_lock_irqsave(&iommu->register_lock, flags); |
497 | |
498 | /* Enable interrupt-remapping */ |
499 | iommu->gcmd |= DMA_GCMD_IRE; |
500 | writel(val: iommu->gcmd, addr: iommu->reg + DMAR_GCMD_REG); |
501 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
502 | readl, (sts & DMA_GSTS_IRES), sts); |
503 | |
504 | /* Block compatibility-format MSIs */ |
505 | if (sts & DMA_GSTS_CFIS) { |
506 | iommu->gcmd &= ~DMA_GCMD_CFI; |
507 | writel(val: iommu->gcmd, addr: iommu->reg + DMAR_GCMD_REG); |
508 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
509 | readl, !(sts & DMA_GSTS_CFIS), sts); |
510 | } |
511 | |
512 | /* |
513 | * With CFI clear in the Global Command register, we should be |
514 | * protected from dangerous (i.e. compatibility) interrupts |
515 | * regardless of x2apic status. Check just to be sure. |
516 | */ |
517 | if (sts & DMA_GSTS_CFIS) |
518 | WARN(1, KERN_WARNING |
519 | "Compatibility-format IRQs enabled despite intr remapping;\n" |
520 | "you are vulnerable to IRQ injection.\n" ); |
521 | |
522 | raw_spin_unlock_irqrestore(&iommu->register_lock, flags); |
523 | } |
524 | |
525 | static int intel_setup_irq_remapping(struct intel_iommu *iommu) |
526 | { |
527 | struct ir_table *ir_table; |
528 | struct fwnode_handle *fn; |
529 | unsigned long *bitmap; |
530 | struct page *pages; |
531 | |
532 | if (iommu->ir_table) |
533 | return 0; |
534 | |
535 | ir_table = kzalloc(size: sizeof(struct ir_table), GFP_KERNEL); |
536 | if (!ir_table) |
537 | return -ENOMEM; |
538 | |
539 | pages = alloc_pages_node(nid: iommu->node, GFP_KERNEL | __GFP_ZERO, |
540 | INTR_REMAP_PAGE_ORDER); |
541 | if (!pages) { |
542 | pr_err("IR%d: failed to allocate pages of order %d\n" , |
543 | iommu->seq_id, INTR_REMAP_PAGE_ORDER); |
544 | goto out_free_table; |
545 | } |
546 | |
547 | bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_KERNEL); |
548 | if (bitmap == NULL) { |
549 | pr_err("IR%d: failed to allocate bitmap\n" , iommu->seq_id); |
550 | goto out_free_pages; |
551 | } |
552 | |
553 | fn = irq_domain_alloc_named_id_fwnode(name: "INTEL-IR" , id: iommu->seq_id); |
554 | if (!fn) |
555 | goto out_free_bitmap; |
556 | |
557 | iommu->ir_domain = |
558 | irq_domain_create_hierarchy(parent: arch_get_ir_parent_domain(), |
559 | flags: 0, INTR_REMAP_TABLE_ENTRIES, |
560 | fwnode: fn, ops: &intel_ir_domain_ops, |
561 | host_data: iommu); |
562 | if (!iommu->ir_domain) { |
563 | pr_err("IR%d: failed to allocate irqdomain\n" , iommu->seq_id); |
564 | goto out_free_fwnode; |
565 | } |
566 | |
567 | irq_domain_update_bus_token(domain: iommu->ir_domain, bus_token: DOMAIN_BUS_DMAR); |
568 | iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | |
569 | IRQ_DOMAIN_FLAG_ISOLATED_MSI; |
570 | |
571 | if (cap_caching_mode(iommu->cap)) |
572 | iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops; |
573 | else |
574 | iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; |
575 | |
576 | ir_table->base = page_address(pages); |
577 | ir_table->bitmap = bitmap; |
578 | iommu->ir_table = ir_table; |
579 | |
580 | /* |
581 | * If the queued invalidation is already initialized, |
582 | * shouldn't disable it. |
583 | */ |
584 | if (!iommu->qi) { |
585 | /* |
586 | * Clear previous faults. |
587 | */ |
588 | dmar_fault(irq: -1, dev_id: iommu); |
589 | dmar_disable_qi(iommu); |
590 | |
591 | if (dmar_enable_qi(iommu)) { |
592 | pr_err("Failed to enable queued invalidation\n" ); |
593 | goto out_free_ir_domain; |
594 | } |
595 | } |
596 | |
597 | init_ir_status(iommu); |
598 | |
599 | if (ir_pre_enabled(iommu)) { |
600 | if (!is_kdump_kernel()) { |
601 | pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n" , |
602 | iommu->name); |
603 | clear_ir_pre_enabled(iommu); |
604 | iommu_disable_irq_remapping(iommu); |
605 | } else if (iommu_load_old_irte(iommu)) |
606 | pr_err("Failed to copy IR table for %s from previous kernel\n" , |
607 | iommu->name); |
608 | else |
609 | pr_info("Copied IR table for %s from previous kernel\n" , |
610 | iommu->name); |
611 | } |
612 | |
613 | iommu_set_irq_remapping(iommu, mode: eim_mode); |
614 | |
615 | return 0; |
616 | |
617 | out_free_ir_domain: |
618 | irq_domain_remove(host: iommu->ir_domain); |
619 | iommu->ir_domain = NULL; |
620 | out_free_fwnode: |
621 | irq_domain_free_fwnode(fwnode: fn); |
622 | out_free_bitmap: |
623 | bitmap_free(bitmap); |
624 | out_free_pages: |
625 | __free_pages(page: pages, INTR_REMAP_PAGE_ORDER); |
626 | out_free_table: |
627 | kfree(objp: ir_table); |
628 | |
629 | iommu->ir_table = NULL; |
630 | |
631 | return -ENOMEM; |
632 | } |
633 | |
634 | static void intel_teardown_irq_remapping(struct intel_iommu *iommu) |
635 | { |
636 | struct fwnode_handle *fn; |
637 | |
638 | if (iommu && iommu->ir_table) { |
639 | if (iommu->ir_domain) { |
640 | fn = iommu->ir_domain->fwnode; |
641 | |
642 | irq_domain_remove(host: iommu->ir_domain); |
643 | irq_domain_free_fwnode(fwnode: fn); |
644 | iommu->ir_domain = NULL; |
645 | } |
646 | free_pages(addr: (unsigned long)iommu->ir_table->base, |
647 | INTR_REMAP_PAGE_ORDER); |
648 | bitmap_free(bitmap: iommu->ir_table->bitmap); |
649 | kfree(objp: iommu->ir_table); |
650 | iommu->ir_table = NULL; |
651 | } |
652 | } |
653 | |
654 | /* |
655 | * Disable Interrupt Remapping. |
656 | */ |
657 | static void iommu_disable_irq_remapping(struct intel_iommu *iommu) |
658 | { |
659 | unsigned long flags; |
660 | u32 sts; |
661 | |
662 | if (!ecap_ir_support(iommu->ecap)) |
663 | return; |
664 | |
665 | /* |
666 | * global invalidation of interrupt entry cache before disabling |
667 | * interrupt-remapping. |
668 | */ |
669 | if (!cap_esirtps(iommu->cap)) |
670 | qi_global_iec(iommu); |
671 | |
672 | raw_spin_lock_irqsave(&iommu->register_lock, flags); |
673 | |
674 | sts = readl(addr: iommu->reg + DMAR_GSTS_REG); |
675 | if (!(sts & DMA_GSTS_IRES)) |
676 | goto end; |
677 | |
678 | iommu->gcmd &= ~DMA_GCMD_IRE; |
679 | writel(val: iommu->gcmd, addr: iommu->reg + DMAR_GCMD_REG); |
680 | |
681 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
682 | readl, !(sts & DMA_GSTS_IRES), sts); |
683 | |
684 | end: |
685 | raw_spin_unlock_irqrestore(&iommu->register_lock, flags); |
686 | } |
687 | |
688 | static int __init dmar_x2apic_optout(void) |
689 | { |
690 | struct acpi_table_dmar *dmar; |
691 | dmar = (struct acpi_table_dmar *)dmar_tbl; |
692 | if (!dmar || no_x2apic_optout) |
693 | return 0; |
694 | return dmar->flags & DMAR_X2APIC_OPT_OUT; |
695 | } |
696 | |
697 | static void __init intel_cleanup_irq_remapping(void) |
698 | { |
699 | struct dmar_drhd_unit *drhd; |
700 | struct intel_iommu *iommu; |
701 | |
702 | for_each_iommu(iommu, drhd) { |
703 | if (ecap_ir_support(iommu->ecap)) { |
704 | iommu_disable_irq_remapping(iommu); |
705 | intel_teardown_irq_remapping(iommu); |
706 | } |
707 | } |
708 | |
709 | if (x2apic_supported()) |
710 | pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n" ); |
711 | } |
712 | |
713 | static int __init intel_prepare_irq_remapping(void) |
714 | { |
715 | struct dmar_drhd_unit *drhd; |
716 | struct intel_iommu *iommu; |
717 | int eim = 0; |
718 | |
719 | if (irq_remap_broken) { |
720 | pr_warn("This system BIOS has enabled interrupt remapping\n" |
721 | "on a chipset that contains an erratum making that\n" |
722 | "feature unstable. To maintain system stability\n" |
723 | "interrupt remapping is being disabled. Please\n" |
724 | "contact your BIOS vendor for an update\n" ); |
725 | add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); |
726 | return -ENODEV; |
727 | } |
728 | |
729 | if (dmar_table_init() < 0) |
730 | return -ENODEV; |
731 | |
732 | if (intel_cap_audit(type: CAP_AUDIT_STATIC_IRQR, NULL)) |
733 | return -ENODEV; |
734 | |
735 | if (!dmar_ir_support()) |
736 | return -ENODEV; |
737 | |
738 | if (parse_ioapics_under_ir()) { |
739 | pr_info("Not enabling interrupt remapping\n" ); |
740 | goto error; |
741 | } |
742 | |
743 | /* First make sure all IOMMUs support IRQ remapping */ |
744 | for_each_iommu(iommu, drhd) |
745 | if (!ecap_ir_support(iommu->ecap)) |
746 | goto error; |
747 | |
748 | /* Detect remapping mode: lapic or x2apic */ |
749 | if (x2apic_supported()) { |
750 | eim = !dmar_x2apic_optout(); |
751 | if (!eim) { |
752 | pr_info("x2apic is disabled because BIOS sets x2apic opt out bit." ); |
753 | pr_info("Use 'intremap=no_x2apic_optout' to override the BIOS setting.\n" ); |
754 | } |
755 | } |
756 | |
757 | for_each_iommu(iommu, drhd) { |
758 | if (eim && !ecap_eim_support(iommu->ecap)) { |
759 | pr_info("%s does not support EIM\n" , iommu->name); |
760 | eim = 0; |
761 | } |
762 | } |
763 | |
764 | eim_mode = eim; |
765 | if (eim) |
766 | pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n" ); |
767 | |
768 | /* Do the initializations early */ |
769 | for_each_iommu(iommu, drhd) { |
770 | if (intel_setup_irq_remapping(iommu)) { |
771 | pr_err("Failed to setup irq remapping for %s\n" , |
772 | iommu->name); |
773 | goto error; |
774 | } |
775 | } |
776 | |
777 | return 0; |
778 | |
779 | error: |
780 | intel_cleanup_irq_remapping(); |
781 | return -ENODEV; |
782 | } |
783 | |
784 | /* |
785 | * Set Posted-Interrupts capability. |
786 | */ |
787 | static inline void set_irq_posting_cap(void) |
788 | { |
789 | struct dmar_drhd_unit *drhd; |
790 | struct intel_iommu *iommu; |
791 | |
792 | if (!disable_irq_post) { |
793 | /* |
794 | * If IRTE is in posted format, the 'pda' field goes across the |
795 | * 64-bit boundary, we need use cmpxchg16b to atomically update |
796 | * it. We only expose posted-interrupt when X86_FEATURE_CX16 |
797 | * is supported. Actually, hardware platforms supporting PI |
798 | * should have X86_FEATURE_CX16 support, this has been confirmed |
799 | * with Intel hardware guys. |
800 | */ |
801 | if (boot_cpu_has(X86_FEATURE_CX16)) |
802 | intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; |
803 | |
804 | for_each_iommu(iommu, drhd) |
805 | if (!cap_pi_support(iommu->cap)) { |
806 | intel_irq_remap_ops.capability &= |
807 | ~(1 << IRQ_POSTING_CAP); |
808 | break; |
809 | } |
810 | } |
811 | } |
812 | |
813 | static int __init intel_enable_irq_remapping(void) |
814 | { |
815 | struct dmar_drhd_unit *drhd; |
816 | struct intel_iommu *iommu; |
817 | bool setup = false; |
818 | |
819 | /* |
820 | * Setup Interrupt-remapping for all the DRHD's now. |
821 | */ |
822 | for_each_iommu(iommu, drhd) { |
823 | if (!ir_pre_enabled(iommu)) |
824 | iommu_enable_irq_remapping(iommu); |
825 | setup = true; |
826 | } |
827 | |
828 | if (!setup) |
829 | goto error; |
830 | |
831 | irq_remapping_enabled = 1; |
832 | |
833 | set_irq_posting_cap(); |
834 | |
835 | pr_info("Enabled IRQ remapping in %s mode\n" , eim_mode ? "x2apic" : "xapic" ); |
836 | |
837 | return eim_mode ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; |
838 | |
839 | error: |
840 | intel_cleanup_irq_remapping(); |
841 | return -1; |
842 | } |
843 | |
844 | static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope, |
845 | struct intel_iommu *iommu, |
846 | struct acpi_dmar_hardware_unit *drhd) |
847 | { |
848 | struct acpi_dmar_pci_path *path; |
849 | u8 bus; |
850 | int count, free = -1; |
851 | |
852 | bus = scope->bus; |
853 | path = (struct acpi_dmar_pci_path *)(scope + 1); |
854 | count = (scope->length - sizeof(struct acpi_dmar_device_scope)) |
855 | / sizeof(struct acpi_dmar_pci_path); |
856 | |
857 | while (--count > 0) { |
858 | /* |
859 | * Access PCI directly due to the PCI |
860 | * subsystem isn't initialized yet. |
861 | */ |
862 | bus = read_pci_config_byte(bus, slot: path->device, func: path->function, |
863 | PCI_SECONDARY_BUS); |
864 | path++; |
865 | } |
866 | |
867 | for (count = 0; count < MAX_HPET_TBS; count++) { |
868 | if (ir_hpet[count].iommu == iommu && |
869 | ir_hpet[count].id == scope->enumeration_id) |
870 | return 0; |
871 | else if (ir_hpet[count].iommu == NULL && free == -1) |
872 | free = count; |
873 | } |
874 | if (free == -1) { |
875 | pr_warn("Exceeded Max HPET blocks\n" ); |
876 | return -ENOSPC; |
877 | } |
878 | |
879 | ir_hpet[free].iommu = iommu; |
880 | ir_hpet[free].id = scope->enumeration_id; |
881 | ir_hpet[free].bus = bus; |
882 | ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function); |
883 | pr_info("HPET id %d under DRHD base 0x%Lx\n" , |
884 | scope->enumeration_id, drhd->address); |
885 | |
886 | return 0; |
887 | } |
888 | |
889 | static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, |
890 | struct intel_iommu *iommu, |
891 | struct acpi_dmar_hardware_unit *drhd) |
892 | { |
893 | struct acpi_dmar_pci_path *path; |
894 | u8 bus; |
895 | int count, free = -1; |
896 | |
897 | bus = scope->bus; |
898 | path = (struct acpi_dmar_pci_path *)(scope + 1); |
899 | count = (scope->length - sizeof(struct acpi_dmar_device_scope)) |
900 | / sizeof(struct acpi_dmar_pci_path); |
901 | |
902 | while (--count > 0) { |
903 | /* |
904 | * Access PCI directly due to the PCI |
905 | * subsystem isn't initialized yet. |
906 | */ |
907 | bus = read_pci_config_byte(bus, slot: path->device, func: path->function, |
908 | PCI_SECONDARY_BUS); |
909 | path++; |
910 | } |
911 | |
912 | for (count = 0; count < MAX_IO_APICS; count++) { |
913 | if (ir_ioapic[count].iommu == iommu && |
914 | ir_ioapic[count].id == scope->enumeration_id) |
915 | return 0; |
916 | else if (ir_ioapic[count].iommu == NULL && free == -1) |
917 | free = count; |
918 | } |
919 | if (free == -1) { |
920 | pr_warn("Exceeded Max IO APICS\n" ); |
921 | return -ENOSPC; |
922 | } |
923 | |
924 | ir_ioapic[free].bus = bus; |
925 | ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function); |
926 | ir_ioapic[free].iommu = iommu; |
927 | ir_ioapic[free].id = scope->enumeration_id; |
928 | pr_info("IOAPIC id %d under DRHD base 0x%Lx IOMMU %d\n" , |
929 | scope->enumeration_id, drhd->address, iommu->seq_id); |
930 | |
931 | return 0; |
932 | } |
933 | |
934 | static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *, |
935 | struct intel_iommu *iommu) |
936 | { |
937 | int ret = 0; |
938 | struct acpi_dmar_hardware_unit *drhd; |
939 | struct acpi_dmar_device_scope *scope; |
940 | void *start, *end; |
941 | |
942 | drhd = (struct acpi_dmar_hardware_unit *)header; |
943 | start = (void *)(drhd + 1); |
944 | end = ((void *)drhd) + header->length; |
945 | |
946 | while (start < end && ret == 0) { |
947 | scope = start; |
948 | if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) |
949 | ret = ir_parse_one_ioapic_scope(scope, iommu, drhd); |
950 | else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) |
951 | ret = ir_parse_one_hpet_scope(scope, iommu, drhd); |
952 | start += scope->length; |
953 | } |
954 | |
955 | return ret; |
956 | } |
957 | |
958 | static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu) |
959 | { |
960 | int i; |
961 | |
962 | for (i = 0; i < MAX_HPET_TBS; i++) |
963 | if (ir_hpet[i].iommu == iommu) |
964 | ir_hpet[i].iommu = NULL; |
965 | |
966 | for (i = 0; i < MAX_IO_APICS; i++) |
967 | if (ir_ioapic[i].iommu == iommu) |
968 | ir_ioapic[i].iommu = NULL; |
969 | } |
970 | |
971 | /* |
972 | * Finds the assocaition between IOAPIC's and its Interrupt-remapping |
973 | * hardware unit. |
974 | */ |
975 | static int __init parse_ioapics_under_ir(void) |
976 | { |
977 | struct dmar_drhd_unit *drhd; |
978 | struct intel_iommu *iommu; |
979 | bool ir_supported = false; |
980 | int ioapic_idx; |
981 | |
982 | for_each_iommu(iommu, drhd) { |
983 | int ret; |
984 | |
985 | if (!ecap_ir_support(iommu->ecap)) |
986 | continue; |
987 | |
988 | ret = ir_parse_ioapic_hpet_scope(header: drhd->hdr, iommu); |
989 | if (ret) |
990 | return ret; |
991 | |
992 | ir_supported = true; |
993 | } |
994 | |
995 | if (!ir_supported) |
996 | return -ENODEV; |
997 | |
998 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { |
999 | int ioapic_id = mpc_ioapic_id(ioapic: ioapic_idx); |
1000 | if (!map_ioapic_to_iommu(apic: ioapic_id)) { |
1001 | pr_err(FW_BUG "ioapic %d has no mapping iommu, " |
1002 | "interrupt remapping will be disabled\n" , |
1003 | ioapic_id); |
1004 | return -1; |
1005 | } |
1006 | } |
1007 | |
1008 | return 0; |
1009 | } |
1010 | |
1011 | static int __init ir_dev_scope_init(void) |
1012 | { |
1013 | int ret; |
1014 | |
1015 | if (!irq_remapping_enabled) |
1016 | return 0; |
1017 | |
1018 | down_write(sem: &dmar_global_lock); |
1019 | ret = dmar_dev_scope_init(); |
1020 | up_write(sem: &dmar_global_lock); |
1021 | |
1022 | return ret; |
1023 | } |
1024 | rootfs_initcall(ir_dev_scope_init); |
1025 | |
1026 | static void disable_irq_remapping(void) |
1027 | { |
1028 | struct dmar_drhd_unit *drhd; |
1029 | struct intel_iommu *iommu = NULL; |
1030 | |
1031 | /* |
1032 | * Disable Interrupt-remapping for all the DRHD's now. |
1033 | */ |
1034 | for_each_iommu(iommu, drhd) { |
1035 | if (!ecap_ir_support(iommu->ecap)) |
1036 | continue; |
1037 | |
1038 | iommu_disable_irq_remapping(iommu); |
1039 | } |
1040 | |
1041 | /* |
1042 | * Clear Posted-Interrupts capability. |
1043 | */ |
1044 | if (!disable_irq_post) |
1045 | intel_irq_remap_ops.capability &= ~(1 << IRQ_POSTING_CAP); |
1046 | } |
1047 | |
1048 | static int reenable_irq_remapping(int eim) |
1049 | { |
1050 | struct dmar_drhd_unit *drhd; |
1051 | bool setup = false; |
1052 | struct intel_iommu *iommu = NULL; |
1053 | |
1054 | for_each_iommu(iommu, drhd) |
1055 | if (iommu->qi) |
1056 | dmar_reenable_qi(iommu); |
1057 | |
1058 | /* |
1059 | * Setup Interrupt-remapping for all the DRHD's now. |
1060 | */ |
1061 | for_each_iommu(iommu, drhd) { |
1062 | if (!ecap_ir_support(iommu->ecap)) |
1063 | continue; |
1064 | |
1065 | /* Set up interrupt remapping for iommu.*/ |
1066 | iommu_set_irq_remapping(iommu, mode: eim); |
1067 | iommu_enable_irq_remapping(iommu); |
1068 | setup = true; |
1069 | } |
1070 | |
1071 | if (!setup) |
1072 | goto error; |
1073 | |
1074 | set_irq_posting_cap(); |
1075 | |
1076 | return 0; |
1077 | |
1078 | error: |
1079 | /* |
1080 | * handle error condition gracefully here! |
1081 | */ |
1082 | return -1; |
1083 | } |
1084 | |
1085 | /* |
1086 | * Store the MSI remapping domain pointer in the device if enabled. |
1087 | * |
1088 | * This is called from dmar_pci_bus_add_dev() so it works even when DMA |
1089 | * remapping is disabled. Only update the pointer if the device is not |
1090 | * already handled by a non default PCI/MSI interrupt domain. This protects |
1091 | * e.g. VMD devices. |
1092 | */ |
1093 | void intel_irq_remap_add_device(struct dmar_pci_notify_info *info) |
1094 | { |
1095 | if (!irq_remapping_enabled || !pci_dev_has_default_msi_parent_domain(dev: info->dev)) |
1096 | return; |
1097 | |
1098 | dev_set_msi_domain(dev: &info->dev->dev, d: map_dev_to_ir(dev: info->dev)); |
1099 | } |
1100 | |
1101 | static void prepare_irte(struct irte *irte, int vector, unsigned int dest) |
1102 | { |
1103 | memset(irte, 0, sizeof(*irte)); |
1104 | |
1105 | irte->present = 1; |
1106 | irte->dst_mode = apic->dest_mode_logical; |
1107 | /* |
1108 | * Trigger mode in the IRTE will always be edge, and for IO-APIC, the |
1109 | * actual level or edge trigger will be setup in the IO-APIC |
1110 | * RTE. This will help simplify level triggered irq migration. |
1111 | * For more details, see the comments (in io_apic.c) explainig IO-APIC |
1112 | * irq migration in the presence of interrupt-remapping. |
1113 | */ |
1114 | irte->trigger_mode = 0; |
1115 | irte->dlvry_mode = APIC_DELIVERY_MODE_FIXED; |
1116 | irte->vector = vector; |
1117 | irte->dest_id = IRTE_DEST(dest); |
1118 | irte->redir_hint = 1; |
1119 | } |
1120 | |
1121 | struct irq_remap_ops intel_irq_remap_ops = { |
1122 | .prepare = intel_prepare_irq_remapping, |
1123 | .enable = intel_enable_irq_remapping, |
1124 | .disable = disable_irq_remapping, |
1125 | .reenable = reenable_irq_remapping, |
1126 | .enable_faulting = enable_drhd_fault_handling, |
1127 | }; |
1128 | |
1129 | static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) |
1130 | { |
1131 | struct intel_ir_data *ir_data = irqd->chip_data; |
1132 | struct irte *irte = &ir_data->irte_entry; |
1133 | struct irq_cfg *cfg = irqd_cfg(irq_data: irqd); |
1134 | |
1135 | /* |
1136 | * Atomically updates the IRTE with the new destination, vector |
1137 | * and flushes the interrupt entry cache. |
1138 | */ |
1139 | irte->vector = cfg->vector; |
1140 | irte->dest_id = IRTE_DEST(cfg->dest_apicid); |
1141 | |
1142 | /* Update the hardware only if the interrupt is in remapped mode. */ |
1143 | if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) |
1144 | modify_irte(irq_iommu: &ir_data->irq_2_iommu, irte_modified: irte); |
1145 | } |
1146 | |
1147 | /* |
1148 | * Migrate the IO-APIC irq in the presence of intr-remapping. |
1149 | * |
1150 | * For both level and edge triggered, irq migration is a simple atomic |
1151 | * update(of vector and cpu destination) of IRTE and flush the hardware cache. |
1152 | * |
1153 | * For level triggered, we eliminate the io-apic RTE modification (with the |
1154 | * updated vector information), by using a virtual vector (io-apic pin number). |
1155 | * Real vector that is used for interrupting cpu will be coming from |
1156 | * the interrupt-remapping table entry. |
1157 | * |
1158 | * As the migration is a simple atomic update of IRTE, the same mechanism |
1159 | * is used to migrate MSI irq's in the presence of interrupt-remapping. |
1160 | */ |
1161 | static int |
1162 | intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, |
1163 | bool force) |
1164 | { |
1165 | struct irq_data *parent = data->parent_data; |
1166 | struct irq_cfg *cfg = irqd_cfg(irq_data: data); |
1167 | int ret; |
1168 | |
1169 | ret = parent->chip->irq_set_affinity(parent, mask, force); |
1170 | if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) |
1171 | return ret; |
1172 | |
1173 | intel_ir_reconfigure_irte(irqd: data, force: false); |
1174 | /* |
1175 | * After this point, all the interrupts will start arriving |
1176 | * at the new destination. So, time to cleanup the previous |
1177 | * vector allocation. |
1178 | */ |
1179 | vector_schedule_cleanup(cfg); |
1180 | |
1181 | return IRQ_SET_MASK_OK_DONE; |
1182 | } |
1183 | |
1184 | static void intel_ir_compose_msi_msg(struct irq_data *irq_data, |
1185 | struct msi_msg *msg) |
1186 | { |
1187 | struct intel_ir_data *ir_data = irq_data->chip_data; |
1188 | |
1189 | *msg = ir_data->msi_entry; |
1190 | } |
1191 | |
1192 | static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) |
1193 | { |
1194 | struct intel_ir_data *ir_data = data->chip_data; |
1195 | struct vcpu_data *vcpu_pi_info = info; |
1196 | |
1197 | /* stop posting interrupts, back to remapping mode */ |
1198 | if (!vcpu_pi_info) { |
1199 | modify_irte(irq_iommu: &ir_data->irq_2_iommu, irte_modified: &ir_data->irte_entry); |
1200 | } else { |
1201 | struct irte irte_pi; |
1202 | |
1203 | /* |
1204 | * We are not caching the posted interrupt entry. We |
1205 | * copy the data from the remapped entry and modify |
1206 | * the fields which are relevant for posted mode. The |
1207 | * cached remapped entry is used for switching back to |
1208 | * remapped mode. |
1209 | */ |
1210 | memset(&irte_pi, 0, sizeof(irte_pi)); |
1211 | dmar_copy_shared_irte(dst: &irte_pi, src: &ir_data->irte_entry); |
1212 | |
1213 | /* Update the posted mode fields */ |
1214 | irte_pi.p_pst = 1; |
1215 | irte_pi.p_urgent = 0; |
1216 | irte_pi.p_vector = vcpu_pi_info->vector; |
1217 | irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >> |
1218 | (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT); |
1219 | irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & |
1220 | ~(-1UL << PDA_HIGH_BIT); |
1221 | |
1222 | modify_irte(irq_iommu: &ir_data->irq_2_iommu, irte_modified: &irte_pi); |
1223 | } |
1224 | |
1225 | return 0; |
1226 | } |
1227 | |
1228 | static struct irq_chip intel_ir_chip = { |
1229 | .name = "INTEL-IR" , |
1230 | .irq_ack = apic_ack_irq, |
1231 | .irq_set_affinity = intel_ir_set_affinity, |
1232 | .irq_compose_msi_msg = intel_ir_compose_msi_msg, |
1233 | .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, |
1234 | }; |
1235 | |
1236 | static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle) |
1237 | { |
1238 | memset(msg, 0, sizeof(*msg)); |
1239 | |
1240 | msg->arch_addr_lo.dmar_base_address = X86_MSI_BASE_ADDRESS_LOW; |
1241 | msg->arch_addr_lo.dmar_subhandle_valid = true; |
1242 | msg->arch_addr_lo.dmar_format = true; |
1243 | msg->arch_addr_lo.dmar_index_0_14 = index & 0x7FFF; |
1244 | msg->arch_addr_lo.dmar_index_15 = !!(index & 0x8000); |
1245 | |
1246 | msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; |
1247 | |
1248 | msg->arch_data.dmar_subhandle = subhandle; |
1249 | } |
1250 | |
1251 | static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, |
1252 | struct irq_cfg *irq_cfg, |
1253 | struct irq_alloc_info *info, |
1254 | int index, int sub_handle) |
1255 | { |
1256 | struct irte *irte = &data->irte_entry; |
1257 | |
1258 | prepare_irte(irte, vector: irq_cfg->vector, dest: irq_cfg->dest_apicid); |
1259 | |
1260 | switch (info->type) { |
1261 | case X86_IRQ_ALLOC_TYPE_IOAPIC: |
1262 | /* Set source-id of interrupt request */ |
1263 | set_ioapic_sid(irte, apic: info->devid); |
1264 | apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n" , |
1265 | info->devid, irte->present, irte->fpd, |
1266 | irte->dst_mode, irte->redir_hint, |
1267 | irte->trigger_mode, irte->dlvry_mode, |
1268 | irte->avail, irte->vector, irte->dest_id, |
1269 | irte->sid, irte->sq, irte->svt); |
1270 | sub_handle = info->ioapic.pin; |
1271 | break; |
1272 | case X86_IRQ_ALLOC_TYPE_HPET: |
1273 | set_hpet_sid(irte, id: info->devid); |
1274 | break; |
1275 | case X86_IRQ_ALLOC_TYPE_PCI_MSI: |
1276 | case X86_IRQ_ALLOC_TYPE_PCI_MSIX: |
1277 | set_msi_sid(irte, |
1278 | dev: pci_real_dma_dev(dev: msi_desc_to_pci_dev(desc: info->desc))); |
1279 | break; |
1280 | default: |
1281 | BUG_ON(1); |
1282 | break; |
1283 | } |
1284 | fill_msi_msg(msg: &data->msi_entry, index, subhandle: sub_handle); |
1285 | } |
1286 | |
1287 | static void intel_free_irq_resources(struct irq_domain *domain, |
1288 | unsigned int virq, unsigned int nr_irqs) |
1289 | { |
1290 | struct irq_data *irq_data; |
1291 | struct intel_ir_data *data; |
1292 | struct irq_2_iommu *irq_iommu; |
1293 | unsigned long flags; |
1294 | int i; |
1295 | for (i = 0; i < nr_irqs; i++) { |
1296 | irq_data = irq_domain_get_irq_data(domain, virq: virq + i); |
1297 | if (irq_data && irq_data->chip_data) { |
1298 | data = irq_data->chip_data; |
1299 | irq_iommu = &data->irq_2_iommu; |
1300 | raw_spin_lock_irqsave(&irq_2_ir_lock, flags); |
1301 | clear_entries(irq_iommu); |
1302 | raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); |
1303 | irq_domain_reset_irq_data(irq_data); |
1304 | kfree(objp: data); |
1305 | } |
1306 | } |
1307 | } |
1308 | |
1309 | static int intel_irq_remapping_alloc(struct irq_domain *domain, |
1310 | unsigned int virq, unsigned int nr_irqs, |
1311 | void *arg) |
1312 | { |
1313 | struct intel_iommu *iommu = domain->host_data; |
1314 | struct irq_alloc_info *info = arg; |
1315 | struct intel_ir_data *data, *ird; |
1316 | struct irq_data *irq_data; |
1317 | struct irq_cfg *irq_cfg; |
1318 | int i, ret, index; |
1319 | |
1320 | if (!info || !iommu) |
1321 | return -EINVAL; |
1322 | if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) |
1323 | return -EINVAL; |
1324 | |
1325 | ret = irq_domain_alloc_irqs_parent(domain, irq_base: virq, nr_irqs, arg); |
1326 | if (ret < 0) |
1327 | return ret; |
1328 | |
1329 | ret = -ENOMEM; |
1330 | data = kzalloc(size: sizeof(*data), GFP_KERNEL); |
1331 | if (!data) |
1332 | goto out_free_parent; |
1333 | |
1334 | index = alloc_irte(iommu, irq_iommu: &data->irq_2_iommu, count: nr_irqs); |
1335 | if (index < 0) { |
1336 | pr_warn("Failed to allocate IRTE\n" ); |
1337 | kfree(objp: data); |
1338 | goto out_free_parent; |
1339 | } |
1340 | |
1341 | for (i = 0; i < nr_irqs; i++) { |
1342 | irq_data = irq_domain_get_irq_data(domain, virq: virq + i); |
1343 | irq_cfg = irqd_cfg(irq_data); |
1344 | if (!irq_data || !irq_cfg) { |
1345 | if (!i) |
1346 | kfree(objp: data); |
1347 | ret = -EINVAL; |
1348 | goto out_free_data; |
1349 | } |
1350 | |
1351 | if (i > 0) { |
1352 | ird = kzalloc(size: sizeof(*ird), GFP_KERNEL); |
1353 | if (!ird) |
1354 | goto out_free_data; |
1355 | /* Initialize the common data */ |
1356 | ird->irq_2_iommu = data->irq_2_iommu; |
1357 | ird->irq_2_iommu.sub_handle = i; |
1358 | } else { |
1359 | ird = data; |
1360 | } |
1361 | |
1362 | irq_data->hwirq = (index << 16) + i; |
1363 | irq_data->chip_data = ird; |
1364 | irq_data->chip = &intel_ir_chip; |
1365 | intel_irq_remapping_prepare_irte(data: ird, irq_cfg, info, index, sub_handle: i); |
1366 | irq_set_status_flags(irq: virq + i, set: IRQ_MOVE_PCNTXT); |
1367 | } |
1368 | return 0; |
1369 | |
1370 | out_free_data: |
1371 | intel_free_irq_resources(domain, virq, nr_irqs: i); |
1372 | out_free_parent: |
1373 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
1374 | return ret; |
1375 | } |
1376 | |
1377 | static void intel_irq_remapping_free(struct irq_domain *domain, |
1378 | unsigned int virq, unsigned int nr_irqs) |
1379 | { |
1380 | intel_free_irq_resources(domain, virq, nr_irqs); |
1381 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
1382 | } |
1383 | |
1384 | static int intel_irq_remapping_activate(struct irq_domain *domain, |
1385 | struct irq_data *irq_data, bool reserve) |
1386 | { |
1387 | intel_ir_reconfigure_irte(irqd: irq_data, force: true); |
1388 | return 0; |
1389 | } |
1390 | |
1391 | static void intel_irq_remapping_deactivate(struct irq_domain *domain, |
1392 | struct irq_data *irq_data) |
1393 | { |
1394 | struct intel_ir_data *data = irq_data->chip_data; |
1395 | struct irte entry; |
1396 | |
1397 | memset(&entry, 0, sizeof(entry)); |
1398 | modify_irte(irq_iommu: &data->irq_2_iommu, irte_modified: &entry); |
1399 | } |
1400 | |
1401 | static int intel_irq_remapping_select(struct irq_domain *d, |
1402 | struct irq_fwspec *fwspec, |
1403 | enum irq_domain_bus_token bus_token) |
1404 | { |
1405 | struct intel_iommu *iommu = NULL; |
1406 | |
1407 | if (x86_fwspec_is_ioapic(fwspec)) |
1408 | iommu = map_ioapic_to_iommu(apic: fwspec->param[0]); |
1409 | else if (x86_fwspec_is_hpet(fwspec)) |
1410 | iommu = map_hpet_to_iommu(hpet_id: fwspec->param[0]); |
1411 | |
1412 | return iommu && d == iommu->ir_domain; |
1413 | } |
1414 | |
1415 | static const struct irq_domain_ops intel_ir_domain_ops = { |
1416 | .select = intel_irq_remapping_select, |
1417 | .alloc = intel_irq_remapping_alloc, |
1418 | .free = intel_irq_remapping_free, |
1419 | .activate = intel_irq_remapping_activate, |
1420 | .deactivate = intel_irq_remapping_deactivate, |
1421 | }; |
1422 | |
1423 | static const struct msi_parent_ops dmar_msi_parent_ops = { |
1424 | .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | |
1425 | MSI_FLAG_MULTI_PCI_MSI | |
1426 | MSI_FLAG_PCI_IMS, |
1427 | .prefix = "IR-" , |
1428 | .init_dev_msi_info = msi_parent_init_dev_msi_info, |
1429 | }; |
1430 | |
1431 | static const struct msi_parent_ops virt_dmar_msi_parent_ops = { |
1432 | .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | |
1433 | MSI_FLAG_MULTI_PCI_MSI, |
1434 | .prefix = "vIR-" , |
1435 | .init_dev_msi_info = msi_parent_init_dev_msi_info, |
1436 | }; |
1437 | |
1438 | /* |
1439 | * Support of Interrupt Remapping Unit Hotplug |
1440 | */ |
1441 | static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) |
1442 | { |
1443 | int ret; |
1444 | int eim = x2apic_enabled(); |
1445 | |
1446 | ret = intel_cap_audit(type: CAP_AUDIT_HOTPLUG_IRQR, iommu); |
1447 | if (ret) |
1448 | return ret; |
1449 | |
1450 | if (eim && !ecap_eim_support(iommu->ecap)) { |
1451 | pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n" , |
1452 | iommu->reg_phys, iommu->ecap); |
1453 | return -ENODEV; |
1454 | } |
1455 | |
1456 | if (ir_parse_ioapic_hpet_scope(header: dmaru->hdr, iommu)) { |
1457 | pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n" , |
1458 | iommu->reg_phys); |
1459 | return -ENODEV; |
1460 | } |
1461 | |
1462 | /* TODO: check all IOAPICs are covered by IOMMU */ |
1463 | |
1464 | /* Setup Interrupt-remapping now. */ |
1465 | ret = intel_setup_irq_remapping(iommu); |
1466 | if (ret) { |
1467 | pr_err("Failed to setup irq remapping for %s\n" , |
1468 | iommu->name); |
1469 | intel_teardown_irq_remapping(iommu); |
1470 | ir_remove_ioapic_hpet_scope(iommu); |
1471 | } else { |
1472 | iommu_enable_irq_remapping(iommu); |
1473 | } |
1474 | |
1475 | return ret; |
1476 | } |
1477 | |
1478 | int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert) |
1479 | { |
1480 | int ret = 0; |
1481 | struct intel_iommu *iommu = dmaru->iommu; |
1482 | |
1483 | if (!irq_remapping_enabled) |
1484 | return 0; |
1485 | if (iommu == NULL) |
1486 | return -EINVAL; |
1487 | if (!ecap_ir_support(iommu->ecap)) |
1488 | return 0; |
1489 | if (irq_remapping_cap(cap: IRQ_POSTING_CAP) && |
1490 | !cap_pi_support(iommu->cap)) |
1491 | return -EBUSY; |
1492 | |
1493 | if (insert) { |
1494 | if (!iommu->ir_table) |
1495 | ret = dmar_ir_add(dmaru, iommu); |
1496 | } else { |
1497 | if (iommu->ir_table) { |
1498 | if (!bitmap_empty(src: iommu->ir_table->bitmap, |
1499 | INTR_REMAP_TABLE_ENTRIES)) { |
1500 | ret = -EBUSY; |
1501 | } else { |
1502 | iommu_disable_irq_remapping(iommu); |
1503 | intel_teardown_irq_remapping(iommu); |
1504 | ir_remove_ioapic_hpet_scope(iommu); |
1505 | } |
1506 | } |
1507 | } |
1508 | |
1509 | return ret; |
1510 | } |
1511 | |