1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Handle device page faults |
4 | * |
5 | * Copyright (C) 2020 ARM Ltd. |
6 | */ |
7 | |
8 | #include <linux/iommu.h> |
9 | #include <linux/list.h> |
10 | #include <linux/sched/mm.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/workqueue.h> |
13 | |
14 | #include "iommu-priv.h" |
15 | |
16 | /* |
17 | * Return the fault parameter of a device if it exists. Otherwise, return NULL. |
18 | * On a successful return, the caller takes a reference of this parameter and |
19 | * should put it after use by calling iopf_put_dev_fault_param(). |
20 | */ |
21 | static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev) |
22 | { |
23 | struct dev_iommu *param = dev->iommu; |
24 | struct iommu_fault_param *fault_param; |
25 | |
26 | rcu_read_lock(); |
27 | fault_param = rcu_dereference(param->fault_param); |
28 | if (fault_param && !refcount_inc_not_zero(r: &fault_param->users)) |
29 | fault_param = NULL; |
30 | rcu_read_unlock(); |
31 | |
32 | return fault_param; |
33 | } |
34 | |
35 | /* Caller must hold a reference of the fault parameter. */ |
36 | static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) |
37 | { |
38 | if (refcount_dec_and_test(r: &fault_param->users)) |
39 | kfree_rcu(fault_param, rcu); |
40 | } |
41 | |
42 | static void __iopf_free_group(struct iopf_group *group) |
43 | { |
44 | struct iopf_fault *iopf, *next; |
45 | |
46 | list_for_each_entry_safe(iopf, next, &group->faults, list) { |
47 | if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) |
48 | kfree(objp: iopf); |
49 | } |
50 | |
51 | /* Pair with iommu_report_device_fault(). */ |
52 | iopf_put_dev_fault_param(fault_param: group->fault_param); |
53 | } |
54 | |
55 | void iopf_free_group(struct iopf_group *group) |
56 | { |
57 | __iopf_free_group(group); |
58 | kfree(objp: group); |
59 | } |
60 | EXPORT_SYMBOL_GPL(iopf_free_group); |
61 | |
62 | static struct iommu_domain *get_domain_for_iopf(struct device *dev, |
63 | struct iommu_fault *fault) |
64 | { |
65 | struct iommu_domain *domain; |
66 | |
67 | if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { |
68 | domain = iommu_get_domain_for_dev_pasid(dev, pasid: fault->prm.pasid, type: 0); |
69 | if (IS_ERR(ptr: domain)) |
70 | domain = NULL; |
71 | } else { |
72 | domain = iommu_get_domain_for_dev(dev); |
73 | } |
74 | |
75 | if (!domain || !domain->iopf_handler) { |
76 | dev_warn_ratelimited(dev, |
77 | "iopf (pasid %d) without domain attached or handler installed\n" , |
78 | fault->prm.pasid); |
79 | |
80 | return NULL; |
81 | } |
82 | |
83 | return domain; |
84 | } |
85 | |
86 | /* Non-last request of a group. Postpone until the last one. */ |
87 | static int report_partial_fault(struct iommu_fault_param *fault_param, |
88 | struct iommu_fault *fault) |
89 | { |
90 | struct iopf_fault *iopf; |
91 | |
92 | iopf = kzalloc(size: sizeof(*iopf), GFP_KERNEL); |
93 | if (!iopf) |
94 | return -ENOMEM; |
95 | |
96 | iopf->fault = *fault; |
97 | |
98 | mutex_lock(&fault_param->lock); |
99 | list_add(new: &iopf->list, head: &fault_param->partial); |
100 | mutex_unlock(lock: &fault_param->lock); |
101 | |
102 | return 0; |
103 | } |
104 | |
105 | static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, |
106 | struct iopf_fault *evt, |
107 | struct iopf_group *abort_group) |
108 | { |
109 | struct iopf_fault *iopf, *next; |
110 | struct iopf_group *group; |
111 | |
112 | group = kzalloc(size: sizeof(*group), GFP_KERNEL); |
113 | if (!group) { |
114 | /* |
115 | * We always need to construct the group as we need it to abort |
116 | * the request at the driver if it can't be handled. |
117 | */ |
118 | group = abort_group; |
119 | } |
120 | |
121 | group->fault_param = iopf_param; |
122 | group->last_fault.fault = evt->fault; |
123 | INIT_LIST_HEAD(list: &group->faults); |
124 | INIT_LIST_HEAD(list: &group->pending_node); |
125 | list_add(new: &group->last_fault.list, head: &group->faults); |
126 | |
127 | /* See if we have partial faults for this group */ |
128 | mutex_lock(&iopf_param->lock); |
129 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { |
130 | if (iopf->fault.prm.grpid == evt->fault.prm.grpid) |
131 | /* Insert *before* the last fault */ |
132 | list_move(list: &iopf->list, head: &group->faults); |
133 | } |
134 | list_add(new: &group->pending_node, head: &iopf_param->faults); |
135 | mutex_unlock(lock: &iopf_param->lock); |
136 | |
137 | return group; |
138 | } |
139 | |
140 | /** |
141 | * iommu_report_device_fault() - Report fault event to device driver |
142 | * @dev: the device |
143 | * @evt: fault event data |
144 | * |
145 | * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ |
146 | * handler. If this function fails then ops->page_response() was called to |
147 | * complete evt if required. |
148 | * |
149 | * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard |
150 | * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't |
151 | * expect a response. It may be generated when disabling a PASID (issuing a |
152 | * PASID stop request) by some PCI devices. |
153 | * |
154 | * The PASID stop request is issued by the device driver before unbind(). Once |
155 | * it completes, no page request is generated for this PASID anymore and |
156 | * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 |
157 | * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait |
158 | * for all outstanding page requests to come back with a response before |
159 | * completing the PASID stop request. Others do not wait for page responses, and |
160 | * instead issue this Stop Marker that tells us when the PASID can be |
161 | * reallocated. |
162 | * |
163 | * It is safe to discard the Stop Marker because it is an optimization. |
164 | * a. Page requests, which are posted requests, have been flushed to the IOMMU |
165 | * when the stop request completes. |
166 | * b. The IOMMU driver flushes all fault queues on unbind() before freeing the |
167 | * PASID. |
168 | * |
169 | * So even though the Stop Marker might be issued by the device *after* the stop |
170 | * request completes, outstanding faults will have been dealt with by the time |
171 | * the PASID is freed. |
172 | * |
173 | * Any valid page fault will be eventually routed to an iommu domain and the |
174 | * page fault handler installed there will get called. The users of this |
175 | * handling framework should guarantee that the iommu domain could only be |
176 | * freed after the device has stopped generating page faults (or the iommu |
177 | * hardware has been set to block the page faults) and the pending page faults |
178 | * have been flushed. |
179 | */ |
180 | void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) |
181 | { |
182 | struct iommu_fault *fault = &evt->fault; |
183 | struct iommu_fault_param *iopf_param; |
184 | struct iopf_group abort_group = {}; |
185 | struct iopf_group *group; |
186 | |
187 | iopf_param = iopf_get_dev_fault_param(dev); |
188 | if (WARN_ON(!iopf_param)) |
189 | return; |
190 | |
191 | if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { |
192 | report_partial_fault(fault_param: iopf_param, fault); |
193 | iopf_put_dev_fault_param(fault_param: iopf_param); |
194 | /* A request that is not the last does not need to be ack'd */ |
195 | } |
196 | |
197 | /* |
198 | * This is the last page fault of a group. Allocate an iopf group and |
199 | * pass it to domain's page fault handler. The group holds a reference |
200 | * count of the fault parameter. It will be released after response or |
201 | * error path of this function. If an error is returned, the caller |
202 | * will send a response to the hardware. We need to clean up before |
203 | * leaving, otherwise partial faults will be stuck. |
204 | */ |
205 | group = iopf_group_alloc(iopf_param, evt, abort_group: &abort_group); |
206 | if (group == &abort_group) |
207 | goto err_abort; |
208 | |
209 | group->domain = get_domain_for_iopf(dev, fault); |
210 | if (!group->domain) |
211 | goto err_abort; |
212 | |
213 | /* |
214 | * On success iopf_handler must call iopf_group_response() and |
215 | * iopf_free_group() |
216 | */ |
217 | if (group->domain->iopf_handler(group)) |
218 | goto err_abort; |
219 | |
220 | return; |
221 | |
222 | err_abort: |
223 | iopf_group_response(group, status: IOMMU_PAGE_RESP_FAILURE); |
224 | if (group == &abort_group) |
225 | __iopf_free_group(group); |
226 | else |
227 | iopf_free_group(group); |
228 | } |
229 | EXPORT_SYMBOL_GPL(iommu_report_device_fault); |
230 | |
231 | /** |
232 | * iopf_queue_flush_dev - Ensure that all queued faults have been processed |
233 | * @dev: the endpoint whose faults need to be flushed. |
234 | * |
235 | * The IOMMU driver calls this before releasing a PASID, to ensure that all |
236 | * pending faults for this PASID have been handled, and won't hit the address |
237 | * space of the next process that uses this PASID. The driver must make sure |
238 | * that no new fault is added to the queue. In particular it must flush its |
239 | * low-level queue before calling this function. |
240 | * |
241 | * Return: 0 on success and <0 on error. |
242 | */ |
243 | int iopf_queue_flush_dev(struct device *dev) |
244 | { |
245 | struct iommu_fault_param *iopf_param; |
246 | |
247 | /* |
248 | * It's a driver bug to be here after iopf_queue_remove_device(). |
249 | * Therefore, it's safe to dereference the fault parameter without |
250 | * holding the lock. |
251 | */ |
252 | iopf_param = rcu_dereference_check(dev->iommu->fault_param, true); |
253 | if (WARN_ON(!iopf_param)) |
254 | return -ENODEV; |
255 | |
256 | flush_workqueue(iopf_param->queue->wq); |
257 | |
258 | return 0; |
259 | } |
260 | EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); |
261 | |
262 | /** |
263 | * iopf_group_response - Respond a group of page faults |
264 | * @group: the group of faults with the same group id |
265 | * @status: the response code |
266 | */ |
267 | void iopf_group_response(struct iopf_group *group, |
268 | enum iommu_page_response_code status) |
269 | { |
270 | struct iommu_fault_param *fault_param = group->fault_param; |
271 | struct iopf_fault *iopf = &group->last_fault; |
272 | struct device *dev = group->fault_param->dev; |
273 | const struct iommu_ops *ops = dev_iommu_ops(dev); |
274 | struct iommu_page_response resp = { |
275 | .pasid = iopf->fault.prm.pasid, |
276 | .grpid = iopf->fault.prm.grpid, |
277 | .code = status, |
278 | }; |
279 | |
280 | /* Only send response if there is a fault report pending */ |
281 | mutex_lock(&fault_param->lock); |
282 | if (!list_empty(head: &group->pending_node)) { |
283 | ops->page_response(dev, &group->last_fault, &resp); |
284 | list_del_init(entry: &group->pending_node); |
285 | } |
286 | mutex_unlock(lock: &fault_param->lock); |
287 | } |
288 | EXPORT_SYMBOL_GPL(iopf_group_response); |
289 | |
290 | /** |
291 | * iopf_queue_discard_partial - Remove all pending partial fault |
292 | * @queue: the queue whose partial faults need to be discarded |
293 | * |
294 | * When the hardware queue overflows, last page faults in a group may have been |
295 | * lost and the IOMMU driver calls this to discard all partial faults. The |
296 | * driver shouldn't be adding new faults to this queue concurrently. |
297 | * |
298 | * Return: 0 on success and <0 on error. |
299 | */ |
300 | int iopf_queue_discard_partial(struct iopf_queue *queue) |
301 | { |
302 | struct iopf_fault *iopf, *next; |
303 | struct iommu_fault_param *iopf_param; |
304 | |
305 | if (!queue) |
306 | return -EINVAL; |
307 | |
308 | mutex_lock(&queue->lock); |
309 | list_for_each_entry(iopf_param, &queue->devices, queue_list) { |
310 | mutex_lock(&iopf_param->lock); |
311 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, |
312 | list) { |
313 | list_del(entry: &iopf->list); |
314 | kfree(objp: iopf); |
315 | } |
316 | mutex_unlock(lock: &iopf_param->lock); |
317 | } |
318 | mutex_unlock(lock: &queue->lock); |
319 | return 0; |
320 | } |
321 | EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); |
322 | |
323 | /** |
324 | * iopf_queue_add_device - Add producer to the fault queue |
325 | * @queue: IOPF queue |
326 | * @dev: device to add |
327 | * |
328 | * Return: 0 on success and <0 on error. |
329 | */ |
330 | int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) |
331 | { |
332 | int ret = 0; |
333 | struct dev_iommu *param = dev->iommu; |
334 | struct iommu_fault_param *fault_param; |
335 | const struct iommu_ops *ops = dev_iommu_ops(dev); |
336 | |
337 | if (!ops->page_response) |
338 | return -ENODEV; |
339 | |
340 | mutex_lock(&queue->lock); |
341 | mutex_lock(¶m->lock); |
342 | if (rcu_dereference_check(param->fault_param, |
343 | lockdep_is_held(¶m->lock))) { |
344 | ret = -EBUSY; |
345 | goto done_unlock; |
346 | } |
347 | |
348 | fault_param = kzalloc(size: sizeof(*fault_param), GFP_KERNEL); |
349 | if (!fault_param) { |
350 | ret = -ENOMEM; |
351 | goto done_unlock; |
352 | } |
353 | |
354 | mutex_init(&fault_param->lock); |
355 | INIT_LIST_HEAD(list: &fault_param->faults); |
356 | INIT_LIST_HEAD(list: &fault_param->partial); |
357 | fault_param->dev = dev; |
358 | refcount_set(r: &fault_param->users, n: 1); |
359 | list_add(new: &fault_param->queue_list, head: &queue->devices); |
360 | fault_param->queue = queue; |
361 | |
362 | rcu_assign_pointer(param->fault_param, fault_param); |
363 | |
364 | done_unlock: |
365 | mutex_unlock(lock: ¶m->lock); |
366 | mutex_unlock(lock: &queue->lock); |
367 | |
368 | return ret; |
369 | } |
370 | EXPORT_SYMBOL_GPL(iopf_queue_add_device); |
371 | |
372 | /** |
373 | * iopf_queue_remove_device - Remove producer from fault queue |
374 | * @queue: IOPF queue |
375 | * @dev: device to remove |
376 | * |
377 | * Removing a device from an iopf_queue. It's recommended to follow these |
378 | * steps when removing a device: |
379 | * |
380 | * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware |
381 | * and flush any hardware page request queues. This should be done before |
382 | * calling into this helper. |
383 | * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding |
384 | * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should |
385 | * not retry. This helper function handles this. |
386 | * - Disable PRI on the device: After calling this helper, the caller could |
387 | * then disable PRI on the device. |
388 | * |
389 | * Calling iopf_queue_remove_device() essentially disassociates the device. |
390 | * The fault_param might still exist, but iommu_page_response() will do |
391 | * nothing. The device fault parameter reference count has been properly |
392 | * passed from iommu_report_device_fault() to the fault handling work, and |
393 | * will eventually be released after iommu_page_response(). |
394 | */ |
395 | void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) |
396 | { |
397 | struct iopf_fault *partial_iopf; |
398 | struct iopf_fault *next; |
399 | struct iopf_group *group, *temp; |
400 | struct dev_iommu *param = dev->iommu; |
401 | struct iommu_fault_param *fault_param; |
402 | const struct iommu_ops *ops = dev_iommu_ops(dev); |
403 | |
404 | mutex_lock(&queue->lock); |
405 | mutex_lock(¶m->lock); |
406 | fault_param = rcu_dereference_check(param->fault_param, |
407 | lockdep_is_held(¶m->lock)); |
408 | |
409 | if (WARN_ON(!fault_param || fault_param->queue != queue)) |
410 | goto unlock; |
411 | |
412 | mutex_lock(&fault_param->lock); |
413 | list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list) |
414 | kfree(objp: partial_iopf); |
415 | |
416 | list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) { |
417 | struct iopf_fault *iopf = &group->last_fault; |
418 | struct iommu_page_response resp = { |
419 | .pasid = iopf->fault.prm.pasid, |
420 | .grpid = iopf->fault.prm.grpid, |
421 | .code = IOMMU_PAGE_RESP_INVALID |
422 | }; |
423 | |
424 | ops->page_response(dev, iopf, &resp); |
425 | list_del_init(entry: &group->pending_node); |
426 | } |
427 | mutex_unlock(lock: &fault_param->lock); |
428 | |
429 | list_del(entry: &fault_param->queue_list); |
430 | |
431 | /* dec the ref owned by iopf_queue_add_device() */ |
432 | rcu_assign_pointer(param->fault_param, NULL); |
433 | iopf_put_dev_fault_param(fault_param); |
434 | unlock: |
435 | mutex_unlock(lock: ¶m->lock); |
436 | mutex_unlock(lock: &queue->lock); |
437 | } |
438 | EXPORT_SYMBOL_GPL(iopf_queue_remove_device); |
439 | |
440 | /** |
441 | * iopf_queue_alloc - Allocate and initialize a fault queue |
442 | * @name: a unique string identifying the queue (for workqueue) |
443 | * |
444 | * Return: the queue on success and NULL on error. |
445 | */ |
446 | struct iopf_queue *iopf_queue_alloc(const char *name) |
447 | { |
448 | struct iopf_queue *queue; |
449 | |
450 | queue = kzalloc(size: sizeof(*queue), GFP_KERNEL); |
451 | if (!queue) |
452 | return NULL; |
453 | |
454 | /* |
455 | * The WQ is unordered because the low-level handler enqueues faults by |
456 | * group. PRI requests within a group have to be ordered, but once |
457 | * that's dealt with, the high-level function can handle groups out of |
458 | * order. |
459 | */ |
460 | queue->wq = alloc_workqueue(fmt: "iopf_queue/%s" , flags: WQ_UNBOUND, max_active: 0, name); |
461 | if (!queue->wq) { |
462 | kfree(objp: queue); |
463 | return NULL; |
464 | } |
465 | |
466 | INIT_LIST_HEAD(list: &queue->devices); |
467 | mutex_init(&queue->lock); |
468 | |
469 | return queue; |
470 | } |
471 | EXPORT_SYMBOL_GPL(iopf_queue_alloc); |
472 | |
473 | /** |
474 | * iopf_queue_free - Free IOPF queue |
475 | * @queue: queue to free |
476 | * |
477 | * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or |
478 | * adding/removing devices on this queue anymore. |
479 | */ |
480 | void iopf_queue_free(struct iopf_queue *queue) |
481 | { |
482 | struct iommu_fault_param *iopf_param, *next; |
483 | |
484 | if (!queue) |
485 | return; |
486 | |
487 | list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) |
488 | iopf_queue_remove_device(queue, iopf_param->dev); |
489 | |
490 | destroy_workqueue(wq: queue->wq); |
491 | kfree(objp: queue); |
492 | } |
493 | EXPORT_SYMBOL_GPL(iopf_queue_free); |
494 | |