1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright © 2015 Intel Corporation. |
4 | * |
5 | * Authors: David Woodhouse <dwmw2@infradead.org> |
6 | */ |
7 | |
8 | #include <linux/mmu_notifier.h> |
9 | #include <linux/sched.h> |
10 | #include <linux/sched/mm.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/rculist.h> |
13 | #include <linux/pci.h> |
14 | #include <linux/pci-ats.h> |
15 | #include <linux/dmar.h> |
16 | #include <linux/interrupt.h> |
17 | #include <linux/mm_types.h> |
18 | #include <linux/xarray.h> |
19 | #include <asm/page.h> |
20 | #include <asm/fpu/api.h> |
21 | |
22 | #include "iommu.h" |
23 | #include "pasid.h" |
24 | #include "perf.h" |
25 | #include "trace.h" |
26 | |
27 | static irqreturn_t prq_event_thread(int irq, void *d); |
28 | |
29 | static DEFINE_XARRAY_ALLOC(pasid_private_array); |
30 | static int pasid_private_add(ioasid_t pasid, void *priv) |
31 | { |
32 | return xa_alloc(xa: &pasid_private_array, id: &pasid, entry: priv, |
33 | XA_LIMIT(pasid, pasid), GFP_ATOMIC); |
34 | } |
35 | |
36 | static void pasid_private_remove(ioasid_t pasid) |
37 | { |
38 | xa_erase(&pasid_private_array, index: pasid); |
39 | } |
40 | |
41 | static void *pasid_private_find(ioasid_t pasid) |
42 | { |
43 | return xa_load(&pasid_private_array, index: pasid); |
44 | } |
45 | |
46 | static struct intel_svm_dev * |
47 | svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) |
48 | { |
49 | struct intel_svm_dev *sdev = NULL, *t; |
50 | |
51 | rcu_read_lock(); |
52 | list_for_each_entry_rcu(t, &svm->devs, list) { |
53 | if (t->dev == dev) { |
54 | sdev = t; |
55 | break; |
56 | } |
57 | } |
58 | rcu_read_unlock(); |
59 | |
60 | return sdev; |
61 | } |
62 | |
63 | int intel_svm_enable_prq(struct intel_iommu *iommu) |
64 | { |
65 | struct iopf_queue *iopfq; |
66 | struct page *pages; |
67 | int irq, ret; |
68 | |
69 | pages = alloc_pages_node(nid: iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); |
70 | if (!pages) { |
71 | pr_warn("IOMMU: %s: Failed to allocate page request queue\n" , |
72 | iommu->name); |
73 | return -ENOMEM; |
74 | } |
75 | iommu->prq = page_address(pages); |
76 | |
77 | irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, node: iommu->node, arg: iommu); |
78 | if (irq <= 0) { |
79 | pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n" , |
80 | iommu->name); |
81 | ret = -EINVAL; |
82 | goto free_prq; |
83 | } |
84 | iommu->pr_irq = irq; |
85 | |
86 | snprintf(buf: iommu->iopfq_name, size: sizeof(iommu->iopfq_name), |
87 | fmt: "dmar%d-iopfq" , iommu->seq_id); |
88 | iopfq = iopf_queue_alloc(name: iommu->iopfq_name); |
89 | if (!iopfq) { |
90 | pr_err("IOMMU: %s: Failed to allocate iopf queue\n" , iommu->name); |
91 | ret = -ENOMEM; |
92 | goto free_hwirq; |
93 | } |
94 | iommu->iopf_queue = iopfq; |
95 | |
96 | snprintf(buf: iommu->prq_name, size: sizeof(iommu->prq_name), fmt: "dmar%d-prq" , iommu->seq_id); |
97 | |
98 | ret = request_threaded_irq(irq, NULL, thread_fn: prq_event_thread, IRQF_ONESHOT, |
99 | name: iommu->prq_name, dev: iommu); |
100 | if (ret) { |
101 | pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n" , |
102 | iommu->name); |
103 | goto free_iopfq; |
104 | } |
105 | dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); |
106 | dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); |
107 | dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); |
108 | |
109 | init_completion(x: &iommu->prq_complete); |
110 | |
111 | return 0; |
112 | |
113 | free_iopfq: |
114 | iopf_queue_free(queue: iommu->iopf_queue); |
115 | iommu->iopf_queue = NULL; |
116 | free_hwirq: |
117 | dmar_free_hwirq(irq); |
118 | iommu->pr_irq = 0; |
119 | free_prq: |
120 | free_pages(addr: (unsigned long)iommu->prq, PRQ_ORDER); |
121 | iommu->prq = NULL; |
122 | |
123 | return ret; |
124 | } |
125 | |
126 | int intel_svm_finish_prq(struct intel_iommu *iommu) |
127 | { |
128 | dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); |
129 | dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); |
130 | dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); |
131 | |
132 | if (iommu->pr_irq) { |
133 | free_irq(iommu->pr_irq, iommu); |
134 | dmar_free_hwirq(irq: iommu->pr_irq); |
135 | iommu->pr_irq = 0; |
136 | } |
137 | |
138 | if (iommu->iopf_queue) { |
139 | iopf_queue_free(queue: iommu->iopf_queue); |
140 | iommu->iopf_queue = NULL; |
141 | } |
142 | |
143 | free_pages(addr: (unsigned long)iommu->prq, PRQ_ORDER); |
144 | iommu->prq = NULL; |
145 | |
146 | return 0; |
147 | } |
148 | |
149 | void intel_svm_check(struct intel_iommu *iommu) |
150 | { |
151 | if (!pasid_supported(iommu)) |
152 | return; |
153 | |
154 | if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && |
155 | !cap_fl1gp_support(iommu->cap)) { |
156 | pr_err("%s SVM disabled, incompatible 1GB page capability\n" , |
157 | iommu->name); |
158 | return; |
159 | } |
160 | |
161 | if (cpu_feature_enabled(X86_FEATURE_LA57) && |
162 | !cap_fl5lp_support(iommu->cap)) { |
163 | pr_err("%s SVM disabled, incompatible paging mode\n" , |
164 | iommu->name); |
165 | return; |
166 | } |
167 | |
168 | iommu->flags |= VTD_FLAG_SVM_CAPABLE; |
169 | } |
170 | |
171 | static void __flush_svm_range_dev(struct intel_svm *svm, |
172 | struct intel_svm_dev *sdev, |
173 | unsigned long address, |
174 | unsigned long pages, int ih) |
175 | { |
176 | struct device_domain_info *info = dev_iommu_priv_get(dev: sdev->dev); |
177 | |
178 | if (WARN_ON(!pages)) |
179 | return; |
180 | |
181 | qi_flush_piotlb(iommu: sdev->iommu, did: sdev->did, pasid: svm->pasid, addr: address, npages: pages, ih); |
182 | if (info->ats_enabled) { |
183 | qi_flush_dev_iotlb_pasid(iommu: sdev->iommu, sid: sdev->sid, pfsid: info->pfsid, |
184 | pasid: svm->pasid, qdep: sdev->qdep, addr: address, |
185 | order_base_2(pages)); |
186 | quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), |
187 | pasid: svm->pasid, qdep: sdev->qdep); |
188 | } |
189 | } |
190 | |
191 | static void intel_flush_svm_range_dev(struct intel_svm *svm, |
192 | struct intel_svm_dev *sdev, |
193 | unsigned long address, |
194 | unsigned long pages, int ih) |
195 | { |
196 | unsigned long shift = ilog2(__roundup_pow_of_two(pages)); |
197 | unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); |
198 | unsigned long start = ALIGN_DOWN(address, align); |
199 | unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); |
200 | |
201 | while (start < end) { |
202 | __flush_svm_range_dev(svm, sdev, address: start, pages: align >> VTD_PAGE_SHIFT, ih); |
203 | start += align; |
204 | } |
205 | } |
206 | |
207 | static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, |
208 | unsigned long pages, int ih) |
209 | { |
210 | struct intel_svm_dev *sdev; |
211 | |
212 | rcu_read_lock(); |
213 | list_for_each_entry_rcu(sdev, &svm->devs, list) |
214 | intel_flush_svm_range_dev(svm, sdev, address, pages, ih); |
215 | rcu_read_unlock(); |
216 | } |
217 | |
218 | static void intel_flush_svm_all(struct intel_svm *svm) |
219 | { |
220 | struct device_domain_info *info; |
221 | struct intel_svm_dev *sdev; |
222 | |
223 | rcu_read_lock(); |
224 | list_for_each_entry_rcu(sdev, &svm->devs, list) { |
225 | info = dev_iommu_priv_get(dev: sdev->dev); |
226 | |
227 | qi_flush_piotlb(iommu: sdev->iommu, did: sdev->did, pasid: svm->pasid, addr: 0, npages: -1UL, ih: 0); |
228 | if (info->ats_enabled) { |
229 | qi_flush_dev_iotlb_pasid(iommu: sdev->iommu, sid: sdev->sid, pfsid: info->pfsid, |
230 | pasid: svm->pasid, qdep: sdev->qdep, |
231 | addr: 0, size_order: 64 - VTD_PAGE_SHIFT); |
232 | quirk_extra_dev_tlb_flush(info, address: 0, pages: 64 - VTD_PAGE_SHIFT, |
233 | pasid: svm->pasid, qdep: sdev->qdep); |
234 | } |
235 | } |
236 | rcu_read_unlock(); |
237 | } |
238 | |
239 | /* Pages have been freed at this point */ |
240 | static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, |
241 | struct mm_struct *mm, |
242 | unsigned long start, unsigned long end) |
243 | { |
244 | struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); |
245 | |
246 | if (start == 0 && end == -1UL) { |
247 | intel_flush_svm_all(svm); |
248 | return; |
249 | } |
250 | |
251 | intel_flush_svm_range(svm, address: start, |
252 | pages: (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, ih: 0); |
253 | } |
254 | |
255 | static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) |
256 | { |
257 | struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); |
258 | struct intel_svm_dev *sdev; |
259 | |
260 | /* This might end up being called from exit_mmap(), *before* the page |
261 | * tables are cleared. And __mmu_notifier_release() will delete us from |
262 | * the list of notifiers so that our invalidate_range() callback doesn't |
263 | * get called when the page tables are cleared. So we need to protect |
264 | * against hardware accessing those page tables. |
265 | * |
266 | * We do it by clearing the entry in the PASID table and then flushing |
267 | * the IOTLB and the PASID table caches. This might upset hardware; |
268 | * perhaps we'll want to point the PASID to a dummy PGD (like the zero |
269 | * page) so that we end up taking a fault that the hardware really |
270 | * *has* to handle gracefully without affecting other processes. |
271 | */ |
272 | rcu_read_lock(); |
273 | list_for_each_entry_rcu(sdev, &svm->devs, list) |
274 | intel_pasid_tear_down_entry(iommu: sdev->iommu, dev: sdev->dev, |
275 | pasid: svm->pasid, fault_ignore: true); |
276 | rcu_read_unlock(); |
277 | |
278 | } |
279 | |
280 | static const struct mmu_notifier_ops intel_mmuops = { |
281 | .release = intel_mm_release, |
282 | .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, |
283 | }; |
284 | |
285 | static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, |
286 | struct intel_svm **rsvm, |
287 | struct intel_svm_dev **rsdev) |
288 | { |
289 | struct intel_svm_dev *sdev = NULL; |
290 | struct intel_svm *svm; |
291 | |
292 | if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) |
293 | return -EINVAL; |
294 | |
295 | svm = pasid_private_find(pasid); |
296 | if (IS_ERR(ptr: svm)) |
297 | return PTR_ERR(ptr: svm); |
298 | |
299 | if (!svm) |
300 | goto out; |
301 | |
302 | /* |
303 | * If we found svm for the PASID, there must be at least one device |
304 | * bond. |
305 | */ |
306 | if (WARN_ON(list_empty(&svm->devs))) |
307 | return -EINVAL; |
308 | sdev = svm_lookup_device_by_dev(svm, dev); |
309 | |
310 | out: |
311 | *rsvm = svm; |
312 | *rsdev = sdev; |
313 | |
314 | return 0; |
315 | } |
316 | |
317 | static int intel_svm_set_dev_pasid(struct iommu_domain *domain, |
318 | struct device *dev, ioasid_t pasid) |
319 | { |
320 | struct device_domain_info *info = dev_iommu_priv_get(dev); |
321 | struct intel_iommu *iommu = info->iommu; |
322 | struct mm_struct *mm = domain->mm; |
323 | struct intel_svm_dev *sdev; |
324 | struct intel_svm *svm; |
325 | unsigned long sflags; |
326 | int ret = 0; |
327 | |
328 | svm = pasid_private_find(pasid); |
329 | if (!svm) { |
330 | svm = kzalloc(size: sizeof(*svm), GFP_KERNEL); |
331 | if (!svm) |
332 | return -ENOMEM; |
333 | |
334 | svm->pasid = pasid; |
335 | svm->mm = mm; |
336 | INIT_LIST_HEAD_RCU(list: &svm->devs); |
337 | |
338 | svm->notifier.ops = &intel_mmuops; |
339 | ret = mmu_notifier_register(subscription: &svm->notifier, mm); |
340 | if (ret) { |
341 | kfree(objp: svm); |
342 | return ret; |
343 | } |
344 | |
345 | ret = pasid_private_add(pasid: svm->pasid, priv: svm); |
346 | if (ret) { |
347 | mmu_notifier_unregister(subscription: &svm->notifier, mm); |
348 | kfree(objp: svm); |
349 | return ret; |
350 | } |
351 | } |
352 | |
353 | sdev = kzalloc(size: sizeof(*sdev), GFP_KERNEL); |
354 | if (!sdev) { |
355 | ret = -ENOMEM; |
356 | goto free_svm; |
357 | } |
358 | |
359 | sdev->dev = dev; |
360 | sdev->iommu = iommu; |
361 | sdev->did = FLPT_DEFAULT_DID; |
362 | sdev->sid = PCI_DEVID(info->bus, info->devfn); |
363 | if (info->ats_enabled) { |
364 | sdev->qdep = info->ats_qdep; |
365 | if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) |
366 | sdev->qdep = 0; |
367 | } |
368 | |
369 | /* Setup the pasid table: */ |
370 | sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; |
371 | ret = intel_pasid_setup_first_level(iommu, dev, pgd: mm->pgd, pasid, |
372 | FLPT_DEFAULT_DID, flags: sflags); |
373 | if (ret) |
374 | goto free_sdev; |
375 | |
376 | list_add_rcu(new: &sdev->list, head: &svm->devs); |
377 | |
378 | return 0; |
379 | |
380 | free_sdev: |
381 | kfree(objp: sdev); |
382 | free_svm: |
383 | if (list_empty(head: &svm->devs)) { |
384 | mmu_notifier_unregister(subscription: &svm->notifier, mm); |
385 | pasid_private_remove(pasid); |
386 | kfree(objp: svm); |
387 | } |
388 | |
389 | return ret; |
390 | } |
391 | |
392 | void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) |
393 | { |
394 | struct intel_svm_dev *sdev; |
395 | struct intel_svm *svm; |
396 | struct mm_struct *mm; |
397 | |
398 | if (pasid_to_svm_sdev(dev, pasid, rsvm: &svm, rsdev: &sdev)) |
399 | return; |
400 | mm = svm->mm; |
401 | |
402 | if (sdev) { |
403 | list_del_rcu(entry: &sdev->list); |
404 | kfree_rcu(sdev, rcu); |
405 | |
406 | if (list_empty(head: &svm->devs)) { |
407 | if (svm->notifier.ops) |
408 | mmu_notifier_unregister(subscription: &svm->notifier, mm); |
409 | pasid_private_remove(pasid: svm->pasid); |
410 | kfree(objp: svm); |
411 | } |
412 | } |
413 | } |
414 | |
415 | /* Page request queue descriptor */ |
416 | struct page_req_dsc { |
417 | union { |
418 | struct { |
419 | u64 type:8; |
420 | u64 pasid_present:1; |
421 | u64 priv_data_present:1; |
422 | u64 rsvd:6; |
423 | u64 rid:16; |
424 | u64 pasid:20; |
425 | u64 exe_req:1; |
426 | u64 pm_req:1; |
427 | u64 rsvd2:10; |
428 | }; |
429 | u64 qw_0; |
430 | }; |
431 | union { |
432 | struct { |
433 | u64 rd_req:1; |
434 | u64 wr_req:1; |
435 | u64 lpig:1; |
436 | u64 prg_index:9; |
437 | u64 addr:52; |
438 | }; |
439 | u64 qw_1; |
440 | }; |
441 | u64 priv_data[2]; |
442 | }; |
443 | |
444 | static bool is_canonical_address(u64 addr) |
445 | { |
446 | int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); |
447 | long saddr = (long) addr; |
448 | |
449 | return (((saddr << shift) >> shift) == saddr); |
450 | } |
451 | |
452 | /** |
453 | * intel_drain_pasid_prq - Drain page requests and responses for a pasid |
454 | * @dev: target device |
455 | * @pasid: pasid for draining |
456 | * |
457 | * Drain all pending page requests and responses related to @pasid in both |
458 | * software and hardware. This is supposed to be called after the device |
459 | * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB |
460 | * and DevTLB have been invalidated. |
461 | * |
462 | * It waits until all pending page requests for @pasid in the page fault |
463 | * queue are completed by the prq handling thread. Then follow the steps |
464 | * described in VT-d spec CH7.10 to drain all page requests and page |
465 | * responses pending in the hardware. |
466 | */ |
467 | void intel_drain_pasid_prq(struct device *dev, u32 pasid) |
468 | { |
469 | struct device_domain_info *info; |
470 | struct dmar_domain *domain; |
471 | struct intel_iommu *iommu; |
472 | struct qi_desc desc[3]; |
473 | struct pci_dev *pdev; |
474 | int head, tail; |
475 | u16 sid, did; |
476 | int qdep; |
477 | |
478 | info = dev_iommu_priv_get(dev); |
479 | if (WARN_ON(!info || !dev_is_pci(dev))) |
480 | return; |
481 | |
482 | if (!info->pri_enabled) |
483 | return; |
484 | |
485 | iommu = info->iommu; |
486 | domain = info->domain; |
487 | pdev = to_pci_dev(dev); |
488 | sid = PCI_DEVID(info->bus, info->devfn); |
489 | did = domain_id_iommu(domain, iommu); |
490 | qdep = pci_ats_queue_depth(dev: pdev); |
491 | |
492 | /* |
493 | * Check and wait until all pending page requests in the queue are |
494 | * handled by the prq handling thread. |
495 | */ |
496 | prq_retry: |
497 | reinit_completion(x: &iommu->prq_complete); |
498 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; |
499 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; |
500 | while (head != tail) { |
501 | struct page_req_dsc *req; |
502 | |
503 | req = &iommu->prq[head / sizeof(*req)]; |
504 | if (!req->pasid_present || req->pasid != pasid) { |
505 | head = (head + sizeof(*req)) & PRQ_RING_MASK; |
506 | continue; |
507 | } |
508 | |
509 | wait_for_completion(&iommu->prq_complete); |
510 | goto prq_retry; |
511 | } |
512 | |
513 | iopf_queue_flush_dev(dev); |
514 | |
515 | /* |
516 | * Perform steps described in VT-d spec CH7.10 to drain page |
517 | * requests and responses in hardware. |
518 | */ |
519 | memset(desc, 0, sizeof(desc)); |
520 | desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | |
521 | QI_IWD_FENCE | |
522 | QI_IWD_TYPE; |
523 | desc[1].qw0 = QI_EIOTLB_PASID(pasid) | |
524 | QI_EIOTLB_DID(did) | |
525 | QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | |
526 | QI_EIOTLB_TYPE; |
527 | desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | |
528 | QI_DEV_EIOTLB_SID(sid) | |
529 | QI_DEV_EIOTLB_QDEP(qdep) | |
530 | QI_DEIOTLB_TYPE | |
531 | QI_DEV_IOTLB_PFSID(info->pfsid); |
532 | qi_retry: |
533 | reinit_completion(x: &iommu->prq_complete); |
534 | qi_submit_sync(iommu, desc, count: 3, QI_OPT_WAIT_DRAIN); |
535 | if (readl(addr: iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { |
536 | wait_for_completion(&iommu->prq_complete); |
537 | goto qi_retry; |
538 | } |
539 | } |
540 | |
541 | static int prq_to_iommu_prot(struct page_req_dsc *req) |
542 | { |
543 | int prot = 0; |
544 | |
545 | if (req->rd_req) |
546 | prot |= IOMMU_FAULT_PERM_READ; |
547 | if (req->wr_req) |
548 | prot |= IOMMU_FAULT_PERM_WRITE; |
549 | if (req->exe_req) |
550 | prot |= IOMMU_FAULT_PERM_EXEC; |
551 | if (req->pm_req) |
552 | prot |= IOMMU_FAULT_PERM_PRIV; |
553 | |
554 | return prot; |
555 | } |
556 | |
557 | static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, |
558 | struct page_req_dsc *desc) |
559 | { |
560 | struct iopf_fault event = { }; |
561 | |
562 | /* Fill in event data for device specific processing */ |
563 | event.fault.type = IOMMU_FAULT_PAGE_REQ; |
564 | event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; |
565 | event.fault.prm.pasid = desc->pasid; |
566 | event.fault.prm.grpid = desc->prg_index; |
567 | event.fault.prm.perm = prq_to_iommu_prot(req: desc); |
568 | |
569 | if (desc->lpig) |
570 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; |
571 | if (desc->pasid_present) { |
572 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; |
573 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; |
574 | } |
575 | if (desc->priv_data_present) { |
576 | /* |
577 | * Set last page in group bit if private data is present, |
578 | * page response is required as it does for LPIG. |
579 | * iommu_report_device_fault() doesn't understand this vendor |
580 | * specific requirement thus we set last_page as a workaround. |
581 | */ |
582 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; |
583 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; |
584 | event.fault.prm.private_data[0] = desc->priv_data[0]; |
585 | event.fault.prm.private_data[1] = desc->priv_data[1]; |
586 | } else if (dmar_latency_enabled(iommu, type: DMAR_LATENCY_PRQ)) { |
587 | /* |
588 | * If the private data fields are not used by hardware, use it |
589 | * to monitor the prq handle latency. |
590 | */ |
591 | event.fault.prm.private_data[0] = ktime_to_ns(kt: ktime_get()); |
592 | } |
593 | |
594 | iommu_report_device_fault(dev, evt: &event); |
595 | } |
596 | |
597 | static void handle_bad_prq_event(struct intel_iommu *iommu, |
598 | struct page_req_dsc *req, int result) |
599 | { |
600 | struct qi_desc desc; |
601 | |
602 | pr_err("%s: Invalid page request: %08llx %08llx\n" , |
603 | iommu->name, ((unsigned long long *)req)[0], |
604 | ((unsigned long long *)req)[1]); |
605 | |
606 | /* |
607 | * Per VT-d spec. v3.0 ch7.7, system software must |
608 | * respond with page group response if private data |
609 | * is present (PDP) or last page in group (LPIG) bit |
610 | * is set. This is an additional VT-d feature beyond |
611 | * PCI ATS spec. |
612 | */ |
613 | if (!req->lpig && !req->priv_data_present) |
614 | return; |
615 | |
616 | desc.qw0 = QI_PGRP_PASID(req->pasid) | |
617 | QI_PGRP_DID(req->rid) | |
618 | QI_PGRP_PASID_P(req->pasid_present) | |
619 | QI_PGRP_PDP(req->priv_data_present) | |
620 | QI_PGRP_RESP_CODE(result) | |
621 | QI_PGRP_RESP_TYPE; |
622 | desc.qw1 = QI_PGRP_IDX(req->prg_index) | |
623 | QI_PGRP_LPIG(req->lpig); |
624 | |
625 | if (req->priv_data_present) { |
626 | desc.qw2 = req->priv_data[0]; |
627 | desc.qw3 = req->priv_data[1]; |
628 | } else { |
629 | desc.qw2 = 0; |
630 | desc.qw3 = 0; |
631 | } |
632 | |
633 | qi_submit_sync(iommu, desc: &desc, count: 1, options: 0); |
634 | } |
635 | |
636 | static irqreturn_t prq_event_thread(int irq, void *d) |
637 | { |
638 | struct intel_iommu *iommu = d; |
639 | struct page_req_dsc *req; |
640 | int head, tail, handled; |
641 | struct device *dev; |
642 | u64 address; |
643 | |
644 | /* |
645 | * Clear PPR bit before reading head/tail registers, to ensure that |
646 | * we get a new interrupt if needed. |
647 | */ |
648 | writel(DMA_PRS_PPR, addr: iommu->reg + DMAR_PRS_REG); |
649 | |
650 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; |
651 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; |
652 | handled = (head != tail); |
653 | while (head != tail) { |
654 | req = &iommu->prq[head / sizeof(*req)]; |
655 | address = (u64)req->addr << VTD_PAGE_SHIFT; |
656 | |
657 | if (unlikely(!req->pasid_present)) { |
658 | pr_err("IOMMU: %s: Page request without PASID\n" , |
659 | iommu->name); |
660 | bad_req: |
661 | handle_bad_prq_event(iommu, req, QI_RESP_INVALID); |
662 | goto prq_advance; |
663 | } |
664 | |
665 | if (unlikely(!is_canonical_address(address))) { |
666 | pr_err("IOMMU: %s: Address is not canonical\n" , |
667 | iommu->name); |
668 | goto bad_req; |
669 | } |
670 | |
671 | if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { |
672 | pr_err("IOMMU: %s: Page request in Privilege Mode\n" , |
673 | iommu->name); |
674 | goto bad_req; |
675 | } |
676 | |
677 | if (unlikely(req->exe_req && req->rd_req)) { |
678 | pr_err("IOMMU: %s: Execution request not supported\n" , |
679 | iommu->name); |
680 | goto bad_req; |
681 | } |
682 | |
683 | /* Drop Stop Marker message. No need for a response. */ |
684 | if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) |
685 | goto prq_advance; |
686 | |
687 | /* |
688 | * If prq is to be handled outside iommu driver via receiver of |
689 | * the fault notifiers, we skip the page response here. |
690 | */ |
691 | mutex_lock(&iommu->iopf_lock); |
692 | dev = device_rbtree_find(iommu, rid: req->rid); |
693 | if (!dev) { |
694 | mutex_unlock(lock: &iommu->iopf_lock); |
695 | goto bad_req; |
696 | } |
697 | |
698 | intel_svm_prq_report(iommu, dev, desc: req); |
699 | trace_prq_report(iommu, dev, dw0: req->qw_0, dw1: req->qw_1, |
700 | dw2: req->priv_data[0], dw3: req->priv_data[1], |
701 | seq: iommu->prq_seq_number++); |
702 | mutex_unlock(lock: &iommu->iopf_lock); |
703 | prq_advance: |
704 | head = (head + sizeof(*req)) & PRQ_RING_MASK; |
705 | } |
706 | |
707 | dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); |
708 | |
709 | /* |
710 | * Clear the page request overflow bit and wake up all threads that |
711 | * are waiting for the completion of this handling. |
712 | */ |
713 | if (readl(addr: iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { |
714 | pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n" , |
715 | iommu->name); |
716 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; |
717 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; |
718 | if (head == tail) { |
719 | iopf_queue_discard_partial(queue: iommu->iopf_queue); |
720 | writel(DMA_PRS_PRO, addr: iommu->reg + DMAR_PRS_REG); |
721 | pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared" , |
722 | iommu->name); |
723 | } |
724 | } |
725 | |
726 | if (!completion_done(x: &iommu->prq_complete)) |
727 | complete(&iommu->prq_complete); |
728 | |
729 | return IRQ_RETVAL(handled); |
730 | } |
731 | |
732 | void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, |
733 | struct iommu_page_response *msg) |
734 | { |
735 | struct device_domain_info *info = dev_iommu_priv_get(dev); |
736 | struct intel_iommu *iommu = info->iommu; |
737 | u8 bus = info->bus, devfn = info->devfn; |
738 | struct iommu_fault_page_request *prm; |
739 | bool private_present; |
740 | bool pasid_present; |
741 | bool last_page; |
742 | u16 sid; |
743 | |
744 | prm = &evt->fault.prm; |
745 | sid = PCI_DEVID(bus, devfn); |
746 | pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; |
747 | private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; |
748 | last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; |
749 | |
750 | /* |
751 | * Per VT-d spec. v3.0 ch7.7, system software must respond |
752 | * with page group response if private data is present (PDP) |
753 | * or last page in group (LPIG) bit is set. This is an |
754 | * additional VT-d requirement beyond PCI ATS spec. |
755 | */ |
756 | if (last_page || private_present) { |
757 | struct qi_desc desc; |
758 | |
759 | desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | |
760 | QI_PGRP_PASID_P(pasid_present) | |
761 | QI_PGRP_PDP(private_present) | |
762 | QI_PGRP_RESP_CODE(msg->code) | |
763 | QI_PGRP_RESP_TYPE; |
764 | desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); |
765 | desc.qw2 = 0; |
766 | desc.qw3 = 0; |
767 | |
768 | if (private_present) { |
769 | desc.qw2 = prm->private_data[0]; |
770 | desc.qw3 = prm->private_data[1]; |
771 | } else if (prm->private_data[0]) { |
772 | dmar_latency_update(iommu, type: DMAR_LATENCY_PRQ, |
773 | latency: ktime_to_ns(kt: ktime_get()) - prm->private_data[0]); |
774 | } |
775 | |
776 | qi_submit_sync(iommu, desc: &desc, count: 1, options: 0); |
777 | } |
778 | } |
779 | |
780 | static void intel_svm_domain_free(struct iommu_domain *domain) |
781 | { |
782 | kfree(objp: to_dmar_domain(dom: domain)); |
783 | } |
784 | |
785 | static const struct iommu_domain_ops intel_svm_domain_ops = { |
786 | .set_dev_pasid = intel_svm_set_dev_pasid, |
787 | .free = intel_svm_domain_free |
788 | }; |
789 | |
790 | struct iommu_domain *intel_svm_domain_alloc(void) |
791 | { |
792 | struct dmar_domain *domain; |
793 | |
794 | domain = kzalloc(size: sizeof(*domain), GFP_KERNEL); |
795 | if (!domain) |
796 | return NULL; |
797 | domain->domain.ops = &intel_svm_domain_ops; |
798 | |
799 | return &domain->domain; |
800 | } |
801 | |