iommu.c source code [linux/drivers/iommu/iommu.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
4	* Author: Joerg Roedel <jroedel@suse.de>
5	*/
6
7	#define pr_fmt(fmt) "iommu: " fmt
8
9	#include <linux/amba/bus.h>
10	#include <linux/device.h>
11	#include <linux/kernel.h>
12	#include <linux/bits.h>
13	#include <linux/bug.h>
14	#include <linux/types.h>
15	#include <linux/init.h>
16	#include <linux/export.h>
17	#include <linux/slab.h>
18	#include <linux/errno.h>
19	#include <linux/host1x_context_bus.h>
20	#include <linux/iommu.h>
21	#include <linux/iommufd.h>
22	#include <linux/idr.h>
23	#include <linux/err.h>
24	#include <linux/pci.h>
25	#include <linux/pci-ats.h>
26	#include <linux/bitops.h>
27	#include <linux/platform_device.h>
28	#include <linux/property.h>
29	#include <linux/fsl/mc.h>
30	#include <linux/module.h>
31	#include <linux/cc_platform.h>
32	#include <linux/cdx/cdx_bus.h>
33	#include <trace/events/iommu.h>
34	#include <linux/sched/mm.h>
35	#include <linux/msi.h>
36	#include <uapi/linux/iommufd.h>
37
38	#include "dma-iommu.h"
39	#include "iommu-priv.h"
40
41	static struct kset *iommu_group_kset;
42	static DEFINE_IDA(iommu_group_ida);
43	static DEFINE_IDA(iommu_global_pasid_ida);
44
45	static unsigned int iommu_def_domain_type __read_mostly;
46	static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
47	static u32 iommu_cmd_line __read_mostly;
48
49	/ Tags used with xa_tag_pointer() in group->pasid_array /
50	enum { IOMMU_PASID_ARRAY_DOMAIN = `0`, IOMMU_PASID_ARRAY_HANDLE = `1` };
51
52	struct iommu_group {
53	struct kobject kobj;
54	struct kobject *devices_kobj;
55	struct list_head devices;
56	struct xarray pasid_array;
57	struct mutex mutex;
58	void *iommu_data;
59	void (iommu_data_release)(void* *iommu_data);
60	char *name;
61	int id;
62	struct iommu_domain *default_domain;
63	struct iommu_domain *blocking_domain;
64	struct iommu_domain *domain;
65	struct list_head entry;
66	unsigned int owner_cnt;
67	void *owner;
68	};
69
70	struct group_device {
71	struct list_head list;
72	struct device *dev;
73	char *name;
74	};
75
76	/ Iterate over each struct group_device in a struct iommu_group /
77	#define for_each_group_device(group, pos) \
78	list_for_each_entry(pos, &(group)->devices, list)
79
80	struct iommu_group_attribute {
81	struct attribute attr;
82	ssize_t (show)(struct* iommu_group group, char* *buf);
83	ssize_t (store)(struct* iommu_group *group,
84	const char *buf, size_t count);
85	};
86
87	static const char * const iommu_group_resv_type_string[] = {
88	[IOMMU_RESV_DIRECT] = "direct",
89	[IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable",
90	[IOMMU_RESV_RESERVED] = "reserved",
91	[IOMMU_RESV_MSI] = "msi",
92	[IOMMU_RESV_SW_MSI] = "msi",
93	};
94
95	#define IOMMU_CMD_LINE_DMA_API BIT(0)
96	#define IOMMU_CMD_LINE_STRICT BIT(1)
97
98	static int bus_iommu_probe(const struct bus_type *bus);
99	static int iommu_bus_notifier(struct notifier_block *nb,
100	unsigned long action, void *data);
101	static void iommu_release_device(struct device *dev);
102	static int __iommu_attach_device(struct iommu_domain *domain,
103	struct device *dev);
104	static int __iommu_attach_group(struct iommu_domain *domain,
105	struct iommu_group *group);
106	static struct iommu_domain __iommu_paging_domain_alloc_flags(struct* device *dev,
107	unsigned int type,
108	unsigned int flags);
109
110	enum {
111	IOMMU_SET_DOMAIN_MUST_SUCCEED = `1` << `0`,
112	};
113
114	static int __iommu_device_set_domain(struct iommu_group *group,
115	struct device *dev,
116	struct iommu_domain *new_domain,
117	unsigned int flags);
118	static int __iommu_group_set_domain_internal(struct iommu_group *group,
119	struct iommu_domain *new_domain,
120	unsigned int flags);
121	static int __iommu_group_set_domain(struct iommu_group *group,
122	struct iommu_domain *new_domain)
123	{
124	return __iommu_group_set_domain_internal(group, new_domain, flags: `0`);
125	}
126	static void __iommu_group_set_domain_nofail(struct iommu_group *group,
127	struct iommu_domain *new_domain)
128	{
129	WARN_ON(__iommu_group_set_domain_internal(
130	group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
131	}
132
133	static int iommu_setup_default_domain(struct iommu_group *group,
134	int target_type);
135	static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
136	struct device *dev);
137	static ssize_t iommu_group_store_type(struct iommu_group *group,
138	const char *buf, size_t count);
139	static struct group_device iommu_group_alloc_device(struct* iommu_group *group,
140	struct device *dev);
141	static void __iommu_group_free_device(struct iommu_group *group,
142	struct group_device *grp_dev);
143	static void iommu_domain_init(struct iommu_domain domain, unsigned* int type,
144	const struct iommu_ops *ops);
145
146	#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
147	struct iommu_group_attribute iommu_group_attr_##_name = \
148	__ATTR(_name, _mode, _show, _store)
149
150	#define to_iommu_group_attr(_attr) \
151	container_of(_attr, struct iommu_group_attribute, attr)
152	#define to_iommu_group(_kobj) \
153	container_of(_kobj, struct iommu_group, kobj)
154
155	static LIST_HEAD(iommu_device_list);
156	static DEFINE_SPINLOCK(iommu_device_lock);
157
158	static const struct bus_type * const iommu_buses[] = {
159	&platform_bus_type,
160	#ifdef CONFIG_PCI
161	&pci_bus_type,
162	#endif
163	#ifdef CONFIG_ARM_AMBA
164	&amba_bustype,
165	#endif
166	#ifdef CONFIG_FSL_MC_BUS
167	&fsl_mc_bus_type,
168	#endif
169	#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
170	&host1x_context_device_bus_type,
171	#endif
172	#ifdef CONFIG_CDX_BUS
173	&cdx_bus_type,
174	#endif
175	};
176
177	/*
178	* Use a function instead of an array here because the domain-type is a
179	* bit-field, so an array would waste memory.
180	*/
181	static const char iommu_domain_type_str(unsigned* int t)
182	{
183	switch (t) {
184	case IOMMU_DOMAIN_BLOCKED:
185	return "Blocked";
186	case IOMMU_DOMAIN_IDENTITY:
187	return "Passthrough";
188	case IOMMU_DOMAIN_UNMANAGED:
189	return "Unmanaged";
190	case IOMMU_DOMAIN_DMA:
191	case IOMMU_DOMAIN_DMA_FQ:
192	return "Translated";
193	case IOMMU_DOMAIN_PLATFORM:
194	return "Platform";
195	default:
196	return "Unknown";
197	}
198	}
199
200	static int __init iommu_subsys_init(void)
201	{
202	struct notifier_block *nb;
203
204	if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
205	if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
206	iommu_set_default_passthrough(cmd_line: false);
207	else
208	iommu_set_default_translated(cmd_line: false);
209
210	if (iommu_default_passthrough() && cc_platform_has(attr: CC_ATTR_MEM_ENCRYPT)) {
211	pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
212	iommu_set_default_translated(cmd_line: false);
213	}
214	}
215
216	if (!iommu_default_passthrough() && !iommu_dma_strict)
217	iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
218
219	pr_info("Default domain type: %s%s\n",
220	iommu_domain_type_str(iommu_def_domain_type),
221	(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
222	" (set via kernel command line)" : "");
223
224	if (!iommu_default_passthrough())
225	pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
226	iommu_dma_strict ? "strict" : "lazy",
227	(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
228	" (set via kernel command line)" : "");
229
230	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
231	if (!nb)
232	return -ENOMEM;
233
234	for (int i = `0`; i < ARRAY_SIZE(iommu_buses); i++) {
235	nb[i].notifier_call = iommu_bus_notifier;
236	bus_register_notifier(bus: iommu_buses[i], nb: &nb[i]);
237	}
238
239	return `0`;
240	}
241	subsys_initcall(iommu_subsys_init);
242
243	static int remove_iommu_group(struct device dev, void* *data)
244	{
245	if (dev->iommu && dev->iommu->iommu_dev == data)
246	iommu_release_device(dev);
247
248	return `0`;
249	}
250
251	/**
252	* iommu_device_register() - Register an IOMMU hardware instance
253	* @iommu: IOMMU handle for the instance
254	* @ops: IOMMU ops to associate with the instance
255	* @hwdev: (optional) actual instance device, used for fwnode lookup
256	*
257	* Return: 0 on success, or an error.
258	*/
259	int iommu_device_register(struct iommu_device *iommu,
260	const struct iommu_ops ops, struct* device *hwdev)
261	{
262	int err = `0`;
263
264	/ We need to be able to take module references appropriately /
265	if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
266	return -EINVAL;
267
268	iommu->ops = ops;
269	if (hwdev)
270	iommu->fwnode = dev_fwnode(hwdev);
271
272	spin_lock(lock: &iommu_device_lock);
273	list_add_tail(new: &iommu->list, head: &iommu_device_list);
274	spin_unlock(lock: &iommu_device_lock);
275
276	for (int i = `0`; i < ARRAY_SIZE(iommu_buses) && !err; i++)
277	err = bus_iommu_probe(bus: iommu_buses[i]);
278	if (err)
279	iommu_device_unregister(iommu);
280	else
281	WRITE_ONCE(iommu->ready, true);
282	return err;
283	}
284	EXPORT_SYMBOL_GPL(iommu_device_register);
285
286	void iommu_device_unregister(struct iommu_device *iommu)
287	{
288	for (int i = `0`; i < ARRAY_SIZE(iommu_buses); i++)
289	bus_for_each_dev(bus: iommu_buses[i], NULL, data: iommu, fn: remove_iommu_group);
290
291	spin_lock(lock: &iommu_device_lock);
292	list_del(entry: &iommu->list);
293	spin_unlock(lock: &iommu_device_lock);
294
295	/ Pairs with the alloc in generic_single_device_group() /
296	iommu_group_put(group: iommu->singleton_group);
297	iommu->singleton_group = NULL;
298	}
299	EXPORT_SYMBOL_GPL(iommu_device_unregister);
300
301	#if IS_ENABLED(CONFIG_IOMMUFD_TEST)
302	void iommu_device_unregister_bus(struct iommu_device *iommu,
303	const struct bus_type *bus,
304	struct notifier_block *nb)
305	{
306	bus_unregister_notifier(bus, nb);
307	iommu_device_unregister(iommu);
308	}
309	EXPORT_SYMBOL_GPL(iommu_device_unregister_bus);
310
311	/*
312	* Register an iommu driver against a single bus. This is only used by iommufd
313	* selftest to create a mock iommu driver. The caller must provide
314	* some memory to hold a notifier_block.
315	*/
316	int iommu_device_register_bus(struct iommu_device *iommu,
317	const struct iommu_ops *ops,
318	const struct bus_type *bus,
319	struct notifier_block *nb)
320	{
321	int err;
322
323	iommu->ops = ops;
324	nb->notifier_call = iommu_bus_notifier;
325	err = bus_register_notifier(bus, nb);
326	if (err)
327	return err;
328
329	spin_lock(lock: &iommu_device_lock);
330	list_add_tail(new: &iommu->list, head: &iommu_device_list);
331	spin_unlock(lock: &iommu_device_lock);
332
333	err = bus_iommu_probe(bus);
334	if (err) {
335	iommu_device_unregister_bus(iommu, bus, nb);
336	return err;
337	}
338	return `0`;
339	}
340	EXPORT_SYMBOL_GPL(iommu_device_register_bus);
341	#endif
342
343	static struct dev_iommu dev_iommu_get(struct* device *dev)
344	{
345	struct dev_iommu *param = dev->iommu;
346
347	lockdep_assert_held(&iommu_probe_device_lock);
348
349	if (param)
350	return param;
351
352	param = kzalloc(sizeof(*param), GFP_KERNEL);
353	if (!param)
354	return NULL;
355
356	mutex_init(&param->lock);
357	dev->iommu = param;
358	return param;
359	}
360
361	void dev_iommu_free(struct device *dev)
362	{
363	struct dev_iommu *param = dev->iommu;
364
365	dev->iommu = NULL;
366	if (param->fwspec) {
367	fwnode_handle_put(fwnode: param->fwspec->iommu_fwnode);
368	kfree(objp: param->fwspec);
369	}
370	kfree(objp: param);
371	}
372
373	/*
374	* Internal equivalent of device_iommu_mapped() for when we care that a device
375	* actually has API ops, and don't want false positives from VFIO-only groups.
376	*/
377	static bool dev_has_iommu(struct device *dev)
378	{
379	return dev->iommu && dev->iommu->iommu_dev;
380	}
381
382	static u32 dev_iommu_get_max_pasids(struct device *dev)
383	{
384	u32 max_pasids = `0`, bits = `0`;
385	int ret;
386
387	if (dev_is_pci(dev)) {
388	ret = pci_max_pasids(to_pci_dev(dev));
389	if (ret > `0`)
390	max_pasids = ret;
391	} else {
392	ret = device_property_read_u32(dev, propname: "pasid-num-bits", val: &bits);
393	if (!ret)
394	max_pasids = `1UL` << bits;
395	}
396
397	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
398	}
399
400	void dev_iommu_priv_set(struct device dev, void* *priv)
401	{
402	/ FSL_PAMU does something weird /
403	if (!IS_ENABLED(CONFIG_FSL_PAMU))
404	lockdep_assert_held(&iommu_probe_device_lock);
405	dev->iommu->priv = priv;
406	}
407	EXPORT_SYMBOL_GPL(dev_iommu_priv_set);
408
409	/*
410	* Init the dev->iommu and dev->iommu_group in the struct device and get the
411	* driver probed
412	*/
413	static int iommu_init_device(struct device *dev)
414	{
415	const struct iommu_ops *ops;
416	struct iommu_device *iommu_dev;
417	struct iommu_group *group;
418	int ret;
419
420	if (!dev_iommu_get(dev))
421	return -ENOMEM;
422	/*
423	* For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing
424	* is buried in the bus dma_configure path. Properly unpicking that is
425	* still a big job, so for now just invoke the whole thing. The device
426	* already having a driver bound means dma_configure has already run and
427	* found no IOMMU to wait for, so there's no point calling it again.
428	*/
429	if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) {
430	mutex_unlock(lock: &iommu_probe_device_lock);
431	dev->bus->dma_configure(dev);
432	mutex_lock(&iommu_probe_device_lock);
433	/ If another instance finished the job for us, skip it /
434	if (!dev->iommu \|\| dev->iommu_group)
435	return -ENODEV;
436	}
437	/*
438	* At this point, relevant devices either now have a fwspec which will
439	* match ops registered with a non-NULL fwnode, or we can reasonably
440	* assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
441	* be present, and that any of their registered instances has suitable
442	* ops for probing, and thus cheekily co-opt the same mechanism.
443	*/
444	ops = iommu_fwspec_ops(fwspec: dev->iommu->fwspec);
445	if (!ops) {
446	ret = -ENODEV;
447	goto err_free;
448	}
449
450	if (!try_module_get(module: ops->owner)) {
451	ret = -EINVAL;
452	goto err_free;
453	}
454
455	iommu_dev = ops->probe_device(dev);
456	if (IS_ERR(ptr: iommu_dev)) {
457	ret = PTR_ERR(ptr: iommu_dev);
458	goto err_module_put;
459	}
460	dev->iommu->iommu_dev = iommu_dev;
461
462	ret = iommu_device_link(iommu: iommu_dev, link: dev);
463	if (ret)
464	goto err_release;
465
466	group = ops->device_group(dev);
467	if (WARN_ON_ONCE(group == NULL))
468	group = ERR_PTR(error: -EINVAL);
469	if (IS_ERR(ptr: group)) {
470	ret = PTR_ERR(ptr: group);
471	goto err_unlink;
472	}
473	dev->iommu_group = group;
474
475	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
476	if (ops->is_attach_deferred)
477	dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
478	return `0`;
479
480	err_unlink:
481	iommu_device_unlink(iommu: iommu_dev, link: dev);
482	err_release:
483	if (ops->release_device)
484	ops->release_device(dev);
485	err_module_put:
486	module_put(module: ops->owner);
487	err_free:
488	dev->iommu->iommu_dev = NULL;
489	dev_iommu_free(dev);
490	return ret;
491	}
492
493	static void iommu_deinit_device(struct device *dev)
494	{
495	struct iommu_group *group = dev->iommu_group;
496	const struct iommu_ops *ops = dev_iommu_ops(dev);
497
498	lockdep_assert_held(&group->mutex);
499
500	iommu_device_unlink(iommu: dev->iommu->iommu_dev, link: dev);
501
502	/*
503	* release_device() must stop using any attached domain on the device.
504	* If there are still other devices in the group, they are not affected
505	* by this callback.
506	*
507	* If the iommu driver provides release_domain, the core code ensures
508	* that domain is attached prior to calling release_device. Drivers can
509	* use this to enforce a translation on the idle iommu. Typically, the
510	* global static blocked_domain is a good choice.
511	*
512	* Otherwise, the iommu driver must set the device to either an identity
513	* or a blocking translation in release_device() and stop using any
514	* domain pointer, as it is going to be freed.
515	*
516	* Regardless, if a delayed attach never occurred, then the release
517	* should still avoid touching any hardware configuration either.
518	*/
519	if (!dev->iommu->attach_deferred && ops->release_domain)
520	ops->release_domain->ops->attach_dev(ops->release_domain, dev);
521
522	if (ops->release_device)
523	ops->release_device(dev);
524
525	/*
526	* If this is the last driver to use the group then we must free the
527	* domains before we do the module_put().
528	*/
529	if (list_empty(head: &group->devices)) {
530	if (group->default_domain) {
531	iommu_domain_free(domain: group->default_domain);
532	group->default_domain = NULL;
533	}
534	if (group->blocking_domain) {
535	iommu_domain_free(domain: group->blocking_domain);
536	group->blocking_domain = NULL;
537	}
538	group->domain = NULL;
539	}
540
541	/ Caller must put iommu_group /
542	dev->iommu_group = NULL;
543	module_put(module: ops->owner);
544	dev_iommu_free(dev);
545	#ifdef CONFIG_IOMMU_DMA
546	dev->dma_iommu = false;
547	#endif
548	}
549
550	static struct iommu_domain pasid_array_entry_to_domain(void* *entry)
551	{
552	if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN)
553	return xa_untag_pointer(entry);
554	return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain;
555	}
556
557	DEFINE_MUTEX(iommu_probe_device_lock);
558
559	static int __iommu_probe_device(struct device dev, struct* list_head *group_list)
560	{
561	struct iommu_group *group;
562	struct group_device *gdev;
563	int ret;
564
565	/*
566	* Serialise to avoid races between IOMMU drivers registering in
567	* parallel and/or the "replay" calls from ACPI/OF code via client
568	* driver probe. Once the latter have been cleaned up we should
569	* probably be able to use device_lock() here to minimise the scope,
570	* but for now enforcing a simple global ordering is fine.
571	*/
572	lockdep_assert_held(&iommu_probe_device_lock);
573
574	/ Device is probed already if in a group /
575	if (dev->iommu_group)
576	return `0`;
577
578	ret = iommu_init_device(dev);
579	if (ret)
580	return ret;
581	/*
582	* And if we do now see any replay calls, they would indicate someone
583	* misusing the dma_configure path outside bus code.
584	*/
585	if (dev->driver)
586	dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n");
587
588	group = dev->iommu_group;
589	gdev = iommu_group_alloc_device(group, dev);
590	mutex_lock(&group->mutex);
591	if (IS_ERR(ptr: gdev)) {
592	ret = PTR_ERR(ptr: gdev);
593	goto err_put_group;
594	}
595
596	/*
597	* The gdev must be in the list before calling
598	* iommu_setup_default_domain()
599	*/
600	list_add_tail(new: &gdev->list, head: &group->devices);
601	WARN_ON(group->default_domain && !group->domain);
602	if (group->default_domain)
603	iommu_create_device_direct_mappings(domain: group->default_domain, dev);
604	if (group->domain) {
605	ret = __iommu_device_set_domain(group, dev, new_domain: group->domain, flags: `0`);
606	if (ret)
607	goto err_remove_gdev;
608	} else if (!group->default_domain && !group_list) {
609	ret = iommu_setup_default_domain(group, target_type: `0`);
610	if (ret)
611	goto err_remove_gdev;
612	} else if (!group->default_domain) {
613	/*
614	* With a group_list argument we defer the default_domain setup
615	* to the caller by providing a de-duplicated list of groups
616	* that need further setup.
617	*/
618	if (list_empty(head: &group->entry))
619	list_add_tail(new: &group->entry, head: group_list);
620	}
621
622	if (group->default_domain)
623	iommu_setup_dma_ops(dev);
624
625	mutex_unlock(lock: &group->mutex);
626
627	return `0`;
628
629	err_remove_gdev:
630	list_del(entry: &gdev->list);
631	__iommu_group_free_device(group, grp_dev: gdev);
632	err_put_group:
633	iommu_deinit_device(dev);
634	mutex_unlock(lock: &group->mutex);
635	iommu_group_put(group);
636
637	return ret;
638	}
639
640	int iommu_probe_device(struct device *dev)
641	{
642	const struct iommu_ops *ops;
643	int ret;
644
645	mutex_lock(&iommu_probe_device_lock);
646	ret = __iommu_probe_device(dev, NULL);
647	mutex_unlock(lock: &iommu_probe_device_lock);
648	if (ret)
649	return ret;
650
651	ops = dev_iommu_ops(dev);
652	if (ops->probe_finalize)
653	ops->probe_finalize(dev);
654
655	return `0`;
656	}
657
658	static void __iommu_group_free_device(struct iommu_group *group,
659	struct group_device *grp_dev)
660	{
661	struct device *dev = grp_dev->dev;
662
663	sysfs_remove_link(kobj: group->devices_kobj, name: grp_dev->name);
664	sysfs_remove_link(kobj: &dev->kobj, name: "iommu_group");
665
666	trace_remove_device_from_group(group_id: group->id, dev);
667
668	/*
669	* If the group has become empty then ownership must have been
670	* released, and the current domain must be set back to NULL or
671	* the default domain.
672	*/
673	if (list_empty(head: &group->devices))
674	WARN_ON(group->owner_cnt \|\|
675	group->domain != group->default_domain);
676
677	kfree(objp: grp_dev->name);
678	kfree(objp: grp_dev);
679	}
680
681	/ Remove the iommu_group from the struct device. /
682	static void __iommu_group_remove_device(struct device *dev)
683	{
684	struct iommu_group *group = dev->iommu_group;
685	struct group_device *device;
686
687	mutex_lock(&group->mutex);
688	for_each_group_device(group, device) {
689	if (device->dev != dev)
690	continue;
691
692	list_del(entry: &device->list);
693	__iommu_group_free_device(group, grp_dev: device);
694	if (dev_has_iommu(dev))
695	iommu_deinit_device(dev);
696	else
697	dev->iommu_group = NULL;
698	break;
699	}
700	mutex_unlock(lock: &group->mutex);
701
702	/*
703	* Pairs with the get in iommu_init_device() or
704	* iommu_group_add_device()
705	*/
706	iommu_group_put(group);
707	}
708
709	static void iommu_release_device(struct device *dev)
710	{
711	struct iommu_group *group = dev->iommu_group;
712
713	if (group)
714	__iommu_group_remove_device(dev);
715
716	/ Free any fwspec if no iommu_driver was ever attached /
717	if (dev->iommu)
718	dev_iommu_free(dev);
719	}
720
721	static int __init iommu_set_def_domain_type(char *str)
722	{
723	bool pt;
724	int ret;
725
726	ret = kstrtobool(s: str, res: &pt);
727	if (ret)
728	return ret;
729
730	if (pt)
731	iommu_set_default_passthrough(cmd_line: true);
732	else
733	iommu_set_default_translated(cmd_line: true);
734
735	return `0`;
736	}
737	early_param("iommu.passthrough", iommu_set_def_domain_type);
738
739	static int __init iommu_dma_setup(char *str)
740	{
741	int ret = kstrtobool(s: str, res: &iommu_dma_strict);
742
743	if (!ret)
744	iommu_cmd_line \|= IOMMU_CMD_LINE_STRICT;
745	return ret;
746	}
747	early_param("iommu.strict", iommu_dma_setup);
748
749	void iommu_set_dma_strict(void)
750	{
751	iommu_dma_strict = true;
752	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
753	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
754	}
755
756	static ssize_t iommu_group_attr_show(struct kobject *kobj,
757	struct attribute __attr, char* *buf)
758	{
759	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
760	struct iommu_group *group = to_iommu_group(kobj);
761	ssize_t ret = -EIO;
762
763	if (attr->show)
764	ret = attr->show(group, buf);
765	return ret;
766	}
767
768	static ssize_t iommu_group_attr_store(struct kobject *kobj,
769	struct attribute *__attr,
770	const char *buf, size_t count)
771	{
772	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
773	struct iommu_group *group = to_iommu_group(kobj);
774	ssize_t ret = -EIO;
775
776	if (attr->store)
777	ret = attr->store(group, buf, count);
778	return ret;
779	}
780
781	static const struct sysfs_ops iommu_group_sysfs_ops = {
782	.show = iommu_group_attr_show,
783	.store = iommu_group_attr_store,
784	};
785
786	static int iommu_group_create_file(struct iommu_group *group,
787	struct iommu_group_attribute *attr)
788	{
789	return sysfs_create_file(kobj: &group->kobj, attr: &attr->attr);
790	}
791
792	static void iommu_group_remove_file(struct iommu_group *group,
793	struct iommu_group_attribute *attr)
794	{
795	sysfs_remove_file(kobj: &group->kobj, attr: &attr->attr);
796	}
797
798	static ssize_t iommu_group_show_name(struct iommu_group group, char* *buf)
799	{
800	return sysfs_emit(buf, fmt: "%s\n", group->name);
801	}
802
803	/**
804	* iommu_insert_resv_region - Insert a new region in the
805	* list of reserved regions.
806	* @new: new region to insert
807	* @regions: list of regions
808	*
809	* Elements are sorted by start address and overlapping segments
810	* of the same type are merged.
811	*/
812	static int iommu_insert_resv_region(struct iommu_resv_region *new,
813	struct list_head *regions)
814	{
815	struct iommu_resv_region iter, tmp, nr, top;
816	LIST_HEAD(stack);
817
818	nr = iommu_alloc_resv_region(start: new->start, length: new->length,
819	prot: new->prot, type: new->type, GFP_KERNEL);
820	if (!nr)
821	return -ENOMEM;
822
823	/ First add the new element based on start address sorting /
824	list_for_each_entry(iter, regions, list) {
825	if (nr->start < iter->start \|\|
826	(nr->start == iter->start && nr->type <= iter->type))
827	break;
828	}
829	list_add_tail(new: &nr->list, head: &iter->list);
830
831	/ Merge overlapping segments of type nr->type in @regions, if any /
832	list_for_each_entry_safe(iter, tmp, regions, list) {
833	phys_addr_t top_end, iter_end = iter->start + iter->length - `1`;
834
835	/ no merge needed on elements of different types than @new /
836	if (iter->type != new->type) {
837	list_move_tail(list: &iter->list, head: &stack);
838	continue;
839	}
840
841	/ look for the last stack element of same type as @iter /
842	list_for_each_entry_reverse(top, &stack, list)
843	if (top->type == iter->type)
844	goto check_overlap;
845
846	list_move_tail(list: &iter->list, head: &stack);
847	continue;
848
849	check_overlap:
850	top_end = top->start + top->length - `1`;
851
852	if (iter->start > top_end + `1`) {
853	list_move_tail(list: &iter->list, head: &stack);
854	} else {
855	top->length = max(top_end, iter_end) - top->start + `1`;
856	list_del(entry: &iter->list);
857	kfree(objp: iter);
858	}
859	}
860	list_splice(list: &stack, head: regions);
861	return `0`;
862	}
863
864	static int
865	iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
866	struct list_head *group_resv_regions)
867	{
868	struct iommu_resv_region *entry;
869	int ret = `0`;
870
871	list_for_each_entry(entry, dev_resv_regions, list) {
872	ret = iommu_insert_resv_region(new: entry, regions: group_resv_regions);
873	if (ret)
874	break;
875	}
876	return ret;
877	}
878
879	int iommu_get_group_resv_regions(struct iommu_group *group,
880	struct list_head *head)
881	{
882	struct group_device *device;
883	int ret = `0`;
884
885	mutex_lock(&group->mutex);
886	for_each_group_device(group, device) {
887	struct list_head dev_resv_regions;
888
889	/*
890	* Non-API groups still expose reserved_regions in sysfs,
891	* so filter out calls that get here that way.
892	*/
893	if (!dev_has_iommu(dev: device->dev))
894	break;
895
896	INIT_LIST_HEAD(list: &dev_resv_regions);
897	iommu_get_resv_regions(dev: device->dev, list: &dev_resv_regions);
898	ret = iommu_insert_device_resv_regions(dev_resv_regions: &dev_resv_regions, group_resv_regions: head);
899	iommu_put_resv_regions(dev: device->dev, list: &dev_resv_regions);
900	if (ret)
901	break;
902	}
903	mutex_unlock(lock: &group->mutex);
904	return ret;
905	}
906	EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);
907
908	static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
909	char *buf)
910	{
911	struct iommu_resv_region region, next;
912	struct list_head group_resv_regions;
913	int offset = `0`;
914
915	INIT_LIST_HEAD(list: &group_resv_regions);
916	iommu_get_group_resv_regions(group, &group_resv_regions);
917
918	list_for_each_entry_safe(region, next, &group_resv_regions, list) {
919	offset += sysfs_emit_at(buf, at: offset, fmt: "0x%016llx 0x%016llx %s\n",
920	(long long)region->start,
921	(long long)(region->start +
922	region->length - `1`),
923	iommu_group_resv_type_string[region->type]);
924	kfree(objp: region);
925	}
926
927	return offset;
928	}
929
930	static ssize_t iommu_group_show_type(struct iommu_group *group,
931	char *buf)
932	{
933	char *type = "unknown";
934
935	mutex_lock(&group->mutex);
936	if (group->default_domain) {
937	switch (group->default_domain->type) {
938	case IOMMU_DOMAIN_BLOCKED:
939	type = "blocked";
940	break;
941	case IOMMU_DOMAIN_IDENTITY:
942	type = "identity";
943	break;
944	case IOMMU_DOMAIN_UNMANAGED:
945	type = "unmanaged";
946	break;
947	case IOMMU_DOMAIN_DMA:
948	type = "DMA";
949	break;
950	case IOMMU_DOMAIN_DMA_FQ:
951	type = "DMA-FQ";
952	break;
953	}
954	}
955	mutex_unlock(lock: &group->mutex);
956
957	return sysfs_emit(buf, fmt: "%s\n", type);
958	}
959
960	static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
961
962	static IOMMU_GROUP_ATTR(reserved_regions, `0444`,
963	iommu_group_show_resv_regions, NULL);
964
965	static IOMMU_GROUP_ATTR(type, `0644`, iommu_group_show_type,
966	iommu_group_store_type);
967
968	static void iommu_group_release(struct kobject *kobj)
969	{
970	struct iommu_group *group = to_iommu_group(kobj);
971
972	pr_debug("Releasing group %d\n", group->id);
973
974	if (group->iommu_data_release)
975	group->iommu_data_release(group->iommu_data);
976
977	ida_free(&iommu_group_ida, id: group->id);
978
979	/ Domains are free'd by iommu_deinit_device() /
980	WARN_ON(group->default_domain);
981	WARN_ON(group->blocking_domain);
982
983	kfree(objp: group->name);
984	kfree(objp: group);
985	}
986
987	static const struct kobj_type iommu_group_ktype = {
988	.sysfs_ops = &iommu_group_sysfs_ops,
989	.release = iommu_group_release,
990	};
991
992	/**
993	* iommu_group_alloc - Allocate a new group
994	*
995	* This function is called by an iommu driver to allocate a new iommu
996	* group. The iommu group represents the minimum granularity of the iommu.
997	* Upon successful return, the caller holds a reference to the supplied
998	* group in order to hold the group until devices are added. Use
999	* iommu_group_put() to release this extra reference count, allowing the
1000	* group to be automatically reclaimed once it has no devices or external
1001	* references.
1002	*/
1003	struct iommu_group iommu_group_alloc(void*)
1004	{
1005	struct iommu_group *group;
1006	int ret;
1007
1008	group = kzalloc(sizeof(*group), GFP_KERNEL);
1009	if (!group)
1010	return ERR_PTR(error: -ENOMEM);
1011
1012	group->kobj.kset = iommu_group_kset;
1013	mutex_init(&group->mutex);
1014	INIT_LIST_HEAD(list: &group->devices);
1015	INIT_LIST_HEAD(list: &group->entry);
1016	xa_init(xa: &group->pasid_array);
1017
1018	ret = ida_alloc(ida: &iommu_group_ida, GFP_KERNEL);
1019	if (ret < `0`) {
1020	kfree(objp: group);
1021	return ERR_PTR(error: ret);
1022	}
1023	group->id = ret;
1024
1025	ret = kobject_init_and_add(kobj: &group->kobj, ktype: &iommu_group_ktype,
1026	NULL, fmt: "%d", group->id);
1027	if (ret) {
1028	kobject_put(kobj: &group->kobj);
1029	return ERR_PTR(error: ret);
1030	}
1031
1032	group->devices_kobj = kobject_create_and_add(name: "devices", parent: &group->kobj);
1033	if (!group->devices_kobj) {
1034	kobject_put(kobj: &group->kobj); / triggers .release & free /
1035	return ERR_PTR(error: -ENOMEM);
1036	}
1037
1038	/*
1039	* The devices_kobj holds a reference on the group kobject, so
1040	* as long as that exists so will the group. We can therefore
1041	* use the devices_kobj for reference counting.
1042	*/
1043	kobject_put(kobj: &group->kobj);
1044
1045	ret = iommu_group_create_file(group,
1046	attr: &iommu_group_attr_reserved_regions);
1047	if (ret) {
1048	kobject_put(kobj: group->devices_kobj);
1049	return ERR_PTR(error: ret);
1050	}
1051
1052	ret = iommu_group_create_file(group, attr: &iommu_group_attr_type);
1053	if (ret) {
1054	kobject_put(kobj: group->devices_kobj);
1055	return ERR_PTR(error: ret);
1056	}
1057
1058	pr_debug("Allocated group %d\n", group->id);
1059
1060	return group;
1061	}
1062	EXPORT_SYMBOL_GPL(iommu_group_alloc);
1063
1064	/**
1065	* iommu_group_get_iommudata - retrieve iommu_data registered for a group
1066	* @group: the group
1067	*
1068	* iommu drivers can store data in the group for use when doing iommu
1069	* operations. This function provides a way to retrieve it. Caller
1070	* should hold a group reference.
1071	*/
1072	void iommu_group_get_iommudata(struct* iommu_group *group)
1073	{
1074	return group->iommu_data;
1075	}
1076	EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
1077
1078	/**
1079	* iommu_group_set_iommudata - set iommu_data for a group
1080	* @group: the group
1081	* @iommu_data: new data
1082	* @release: release function for iommu_data
1083	*
1084	* iommu drivers can store data in the group for use when doing iommu
1085	* operations. This function provides a way to set the data after
1086	* the group has been allocated. Caller should hold a group reference.
1087	*/
1088	void iommu_group_set_iommudata(struct iommu_group group, void* *iommu_data,
1089	void (release)(void* *iommu_data))
1090	{
1091	group->iommu_data = iommu_data;
1092	group->iommu_data_release = release;
1093	}
1094	EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
1095
1096	/**
1097	* iommu_group_set_name - set name for a group
1098	* @group: the group
1099	* @name: name
1100	*
1101	* Allow iommu driver to set a name for a group. When set it will
1102	* appear in a name attribute file under the group in sysfs.
1103	*/
1104	int iommu_group_set_name(struct iommu_group group, const* char *name)
1105	{
1106	int ret;
1107
1108	if (group->name) {
1109	iommu_group_remove_file(group, attr: &iommu_group_attr_name);
1110	kfree(objp: group->name);
1111	group->name = NULL;
1112	if (!name)
1113	return `0`;
1114	}
1115
1116	group->name = kstrdup(s: name, GFP_KERNEL);
1117	if (!group->name)
1118	return -ENOMEM;
1119
1120	ret = iommu_group_create_file(group, attr: &iommu_group_attr_name);
1121	if (ret) {
1122	kfree(objp: group->name);
1123	group->name = NULL;
1124	return ret;
1125	}
1126
1127	return `0`;
1128	}
1129	EXPORT_SYMBOL_GPL(iommu_group_set_name);
1130
1131	static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
1132	struct device *dev)
1133	{
1134	struct iommu_resv_region *entry;
1135	struct list_head mappings;
1136	unsigned long pg_size;
1137	int ret = `0`;
1138
1139	pg_size = domain->pgsize_bitmap ? `1UL` << __ffs(domain->pgsize_bitmap) : `0`;
1140	INIT_LIST_HEAD(list: &mappings);
1141
1142	if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size))
1143	return -EINVAL;
1144
1145	iommu_get_resv_regions(dev, list: &mappings);
1146
1147	/ We need to consider overlapping regions for different devices /
1148	list_for_each_entry(entry, &mappings, list) {
1149	dma_addr_t start, end, addr;
1150	size_t map_size = `0`;
1151
1152	if (entry->type == IOMMU_RESV_DIRECT)
1153	dev->iommu->require_direct = `1`;
1154
1155	if ((entry->type != IOMMU_RESV_DIRECT &&
1156	entry->type != IOMMU_RESV_DIRECT_RELAXABLE) \|\|
1157	!iommu_is_dma_domain(domain))
1158	continue;
1159
1160	start = ALIGN(entry->start, pg_size);
1161	end = ALIGN(entry->start + entry->length, pg_size);
1162
1163	for (addr = start; addr <= end; addr += pg_size) {
1164	phys_addr_t phys_addr;
1165
1166	if (addr == end)
1167	goto map_end;
1168
1169	phys_addr = iommu_iova_to_phys(domain, iova: addr);
1170	if (!phys_addr) {
1171	map_size += pg_size;
1172	continue;
1173	}
1174
1175	map_end:
1176	if (map_size) {
1177	ret = iommu_map(domain, iova: addr - map_size,
1178	paddr: addr - map_size, size: map_size,
1179	prot: entry->prot, GFP_KERNEL);
1180	if (ret)
1181	goto out;
1182	map_size = `0`;
1183	}
1184	}
1185
1186	}
1187	out:
1188	iommu_put_resv_regions(dev, list: &mappings);
1189
1190	return ret;
1191	}
1192
1193	/ This is undone by __iommu_group_free_device() /
1194	static struct group_device iommu_group_alloc_device(struct* iommu_group *group,
1195	struct device *dev)
1196	{
1197	int ret, i = `0`;
1198	struct group_device *device;
1199
1200	device = kzalloc(sizeof(*device), GFP_KERNEL);
1201	if (!device)
1202	return ERR_PTR(error: -ENOMEM);
1203
1204	device->dev = dev;
1205
1206	ret = sysfs_create_link(kobj: &dev->kobj, target: &group->kobj, name: "iommu_group");
1207	if (ret)
1208	goto err_free_device;
1209
1210	device->name = kasprintf(GFP_KERNEL, fmt: "%s", kobject_name(kobj: &dev->kobj));
1211	rename:
1212	if (!device->name) {
1213	ret = -ENOMEM;
1214	goto err_remove_link;
1215	}
1216
1217	ret = sysfs_create_link_nowarn(kobj: group->devices_kobj,
1218	target: &dev->kobj, name: device->name);
1219	if (ret) {
1220	if (ret == -EEXIST && i >= `0`) {
1221	/*
1222	* Account for the slim chance of collision
1223	* and append an instance to the name.
1224	*/
1225	kfree(objp: device->name);
1226	device->name = kasprintf(GFP_KERNEL, fmt: "%s.%d",
1227	kobject_name(kobj: &dev->kobj), i++);
1228	goto rename;
1229	}
1230	goto err_free_name;
1231	}
1232
1233	trace_add_device_to_group(group_id: group->id, dev);
1234
1235	dev_info(dev, "Adding to iommu group %d\n", group->id);
1236
1237	return device;
1238
1239	err_free_name:
1240	kfree(objp: device->name);
1241	err_remove_link:
1242	sysfs_remove_link(kobj: &dev->kobj, name: "iommu_group");
1243	err_free_device:
1244	kfree(objp: device);
1245	dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
1246	return ERR_PTR(error: ret);
1247	}
1248
1249	/**
1250	* iommu_group_add_device - add a device to an iommu group
1251	* @group: the group into which to add the device (reference should be held)
1252	* @dev: the device
1253	*
1254	* This function is called by an iommu driver to add a device into a
1255	* group. Adding a device increments the group reference count.
1256	*/
1257	int iommu_group_add_device(struct iommu_group group, struct* device *dev)
1258	{
1259	struct group_device *gdev;
1260
1261	gdev = iommu_group_alloc_device(group, dev);
1262	if (IS_ERR(ptr: gdev))
1263	return PTR_ERR(ptr: gdev);
1264
1265	iommu_group_ref_get(group);
1266	dev->iommu_group = group;
1267
1268	mutex_lock(&group->mutex);
1269	list_add_tail(new: &gdev->list, head: &group->devices);
1270	mutex_unlock(lock: &group->mutex);
1271	return `0`;
1272	}
1273	EXPORT_SYMBOL_GPL(iommu_group_add_device);
1274
1275	/**
1276	* iommu_group_remove_device - remove a device from it's current group
1277	* @dev: device to be removed
1278	*
1279	* This function is called by an iommu driver to remove the device from
1280	* it's current group. This decrements the iommu group reference count.
1281	*/
1282	void iommu_group_remove_device(struct device *dev)
1283	{
1284	struct iommu_group *group = dev->iommu_group;
1285
1286	if (!group)
1287	return;
1288
1289	dev_info(dev, "Removing from iommu group %d\n", group->id);
1290
1291	__iommu_group_remove_device(dev);
1292	}
1293	EXPORT_SYMBOL_GPL(iommu_group_remove_device);
1294
1295	#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
1296	/**
1297	* iommu_group_mutex_assert - Check device group mutex lock
1298	* @dev: the device that has group param set
1299	*
1300	* This function is called by an iommu driver to check whether it holds
1301	* group mutex lock for the given device or not.
1302	*
1303	* Note that this function must be called after device group param is set.
1304	*/
1305	void iommu_group_mutex_assert(struct device *dev)
1306	{
1307	struct iommu_group *group = dev->iommu_group;
1308
1309	lockdep_assert_held(&group->mutex);
1310	}
1311	EXPORT_SYMBOL_GPL(iommu_group_mutex_assert);
1312	#endif
1313
1314	static struct device iommu_group_first_dev(struct* iommu_group *group)
1315	{
1316	lockdep_assert_held(&group->mutex);
1317	return list_first_entry(&group->devices, struct group_device, list)->dev;
1318	}
1319
1320	/**
1321	* iommu_group_for_each_dev - iterate over each device in the group
1322	* @group: the group
1323	* @data: caller opaque data to be passed to callback function
1324	* @fn: caller supplied callback function
1325	*
1326	* This function is called by group users to iterate over group devices.
1327	* Callers should hold a reference count to the group during callback.
1328	* The group->mutex is held across callbacks, which will block calls to
1329	* iommu_group_add/remove_device.
1330	*/
1331	int iommu_group_for_each_dev(struct iommu_group group, void* *data,
1332	int (fn)(struct* device , void* *))
1333	{
1334	struct group_device *device;
1335	int ret = `0`;
1336
1337	mutex_lock(&group->mutex);
1338	for_each_group_device(group, device) {
1339	ret = fn(device->dev, data);
1340	if (ret)
1341	break;
1342	}
1343	mutex_unlock(lock: &group->mutex);
1344
1345	return ret;
1346	}
1347	EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
1348
1349	/**
1350	* iommu_group_get - Return the group for a device and increment reference
1351	* @dev: get the group that this device belongs to
1352	*
1353	* This function is called by iommu drivers and users to get the group
1354	* for the specified device. If found, the group is returned and the group
1355	* reference in incremented, else NULL.
1356	*/
1357	struct iommu_group iommu_group_get(struct* device *dev)
1358	{
1359	struct iommu_group *group = dev->iommu_group;
1360
1361	if (group)
1362	kobject_get(kobj: group->devices_kobj);
1363
1364	return group;
1365	}
1366	EXPORT_SYMBOL_GPL(iommu_group_get);
1367
1368	/**
1369	* iommu_group_ref_get - Increment reference on a group
1370	* @group: the group to use, must not be NULL
1371	*
1372	* This function is called by iommu drivers to take additional references on an
1373	* existing group. Returns the given group for convenience.
1374	*/
1375	struct iommu_group iommu_group_ref_get(struct* iommu_group *group)
1376	{
1377	kobject_get(kobj: group->devices_kobj);
1378	return group;
1379	}
1380	EXPORT_SYMBOL_GPL(iommu_group_ref_get);
1381
1382	/**
1383	* iommu_group_put - Decrement group reference
1384	* @group: the group to use
1385	*
1386	* This function is called by iommu drivers and users to release the
1387	* iommu group. Once the reference count is zero, the group is released.
1388	*/
1389	void iommu_group_put(struct iommu_group *group)
1390	{
1391	if (group)
1392	kobject_put(kobj: group->devices_kobj);
1393	}
1394	EXPORT_SYMBOL_GPL(iommu_group_put);
1395
1396	/**
1397	* iommu_group_id - Return ID for a group
1398	* @group: the group to ID
1399	*
1400	* Return the unique ID for the group matching the sysfs group number.
1401	*/
1402	int iommu_group_id(struct iommu_group *group)
1403	{
1404	return group->id;
1405	}
1406	EXPORT_SYMBOL_GPL(iommu_group_id);
1407
1408	static struct iommu_group get_pci_alias_group(struct* pci_dev *pdev,
1409	unsigned long *devfns);
1410
1411	/*
1412	* To consider a PCI device isolated, we require ACS to support Source
1413	* Validation, Request Redirection, Completer Redirection, and Upstream
1414	* Forwarding. This effectively means that devices cannot spoof their
1415	* requester ID, requests and completions cannot be redirected, and all
1416	* transactions are forwarded upstream, even as it passes through a
1417	* bridge where the target device is downstream.
1418	*/
1419	#define REQ_ACS_FLAGS (PCI_ACS_SV \| PCI_ACS_RR \| PCI_ACS_CR \| PCI_ACS_UF)
1420
1421	/*
1422	* For multifunction devices which are not isolated from each other, find
1423	* all the other non-isolated functions and look for existing groups. For
1424	* each function, we also need to look for aliases to or from other devices
1425	* that may already have a group.
1426	*/
1427	static struct iommu_group get_pci_function_alias_group(struct* pci_dev *pdev,
1428	unsigned long *devfns)
1429	{
1430	struct pci_dev *tmp = NULL;
1431	struct iommu_group *group;
1432
1433	if (!pdev->multifunction \|\| pci_acs_enabled(pdev, REQ_ACS_FLAGS))
1434	return NULL;
1435
1436	for_each_pci_dev(tmp) {
1437	if (tmp == pdev \|\| tmp->bus != pdev->bus \|\|
1438	PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) \|\|
1439	pci_acs_enabled(pdev: tmp, REQ_ACS_FLAGS))
1440	continue;
1441
1442	group = get_pci_alias_group(pdev: tmp, devfns);
1443	if (group) {
1444	pci_dev_put(dev: tmp);
1445	return group;
1446	}
1447	}
1448
1449	return NULL;
1450	}
1451
1452	/*
1453	* Look for aliases to or from the given device for existing groups. DMA
1454	* aliases are only supported on the same bus, therefore the search
1455	* space is quite small (especially since we're really only looking at pcie
1456	* device, and therefore only expect multiple slots on the root complex or
1457	* downstream switch ports). It's conceivable though that a pair of
1458	* multifunction devices could have aliases between them that would cause a
1459	* loop. To prevent this, we use a bitmap to track where we've been.
1460	*/
1461	static struct iommu_group get_pci_alias_group(struct* pci_dev *pdev,
1462	unsigned long *devfns)
1463	{
1464	struct pci_dev *tmp = NULL;
1465	struct iommu_group *group;
1466
1467	if (test_and_set_bit(nr: pdev->devfn & `0xff`, addr: devfns))
1468	return NULL;
1469
1470	group = iommu_group_get(&pdev->dev);
1471	if (group)
1472	return group;
1473
1474	for_each_pci_dev(tmp) {
1475	if (tmp == pdev \|\| tmp->bus != pdev->bus)
1476	continue;
1477
1478	/ We alias them or they alias us /
1479	if (pci_devs_are_dma_aliases(dev1: pdev, dev2: tmp)) {
1480	group = get_pci_alias_group(pdev: tmp, devfns);
1481	if (group) {
1482	pci_dev_put(dev: tmp);
1483	return group;
1484	}
1485
1486	group = get_pci_function_alias_group(pdev: tmp, devfns);
1487	if (group) {
1488	pci_dev_put(dev: tmp);
1489	return group;
1490	}
1491	}
1492	}
1493
1494	return NULL;
1495	}
1496
1497	struct group_for_pci_data {
1498	struct pci_dev *pdev;
1499	struct iommu_group *group;
1500	};
1501
1502	/*
1503	* DMA alias iterator callback, return the last seen device. Stop and return
1504	* the IOMMU group if we find one along the way.
1505	*/
1506	static int get_pci_alias_or_group(struct pci_dev pdev, u16 alias, void* *opaque)
1507	{
1508	struct group_for_pci_data *data = opaque;
1509
1510	data->pdev = pdev;
1511	data->group = iommu_group_get(&pdev->dev);
1512
1513	return data->group != NULL;
1514	}
1515
1516	/*
1517	* Generic device_group call-back function. It just allocates one
1518	* iommu-group per device.
1519	*/
1520	struct iommu_group generic_device_group(struct* device *dev)
1521	{
1522	return iommu_group_alloc();
1523	}
1524	EXPORT_SYMBOL_GPL(generic_device_group);
1525
1526	/*
1527	* Generic device_group call-back function. It just allocates one
1528	* iommu-group per iommu driver instance shared by every device
1529	* probed by that iommu driver.
1530	*/
1531	struct iommu_group generic_single_device_group(struct* device *dev)
1532	{
1533	struct iommu_device *iommu = dev->iommu->iommu_dev;
1534
1535	if (!iommu->singleton_group) {
1536	struct iommu_group *group;
1537
1538	group = iommu_group_alloc();
1539	if (IS_ERR(ptr: group))
1540	return group;
1541	iommu->singleton_group = group;
1542	}
1543	return iommu_group_ref_get(iommu->singleton_group);
1544	}
1545	EXPORT_SYMBOL_GPL(generic_single_device_group);
1546
1547	/*
1548	* Use standard PCI bus topology, isolation features, and DMA alias quirks
1549	* to find or create an IOMMU group for a device.
1550	*/
1551	struct iommu_group pci_device_group(struct* device *dev)
1552	{
1553	struct pci_dev *pdev = to_pci_dev(dev);
1554	struct group_for_pci_data data;
1555	struct pci_bus *bus;
1556	struct iommu_group *group = NULL;
1557	u64 devfns[`4`] = { `0` };
1558
1559	if (WARN_ON(!dev_is_pci(dev)))
1560	return ERR_PTR(error: -EINVAL);
1561
1562	/*
1563	* Find the upstream DMA alias for the device. A device must not
1564	* be aliased due to topology in order to have its own IOMMU group.
1565	* If we find an alias along the way that already belongs to a
1566	* group, use it.
1567	*/
1568	if (pci_for_each_dma_alias(pdev, fn: get_pci_alias_or_group, data: &data))
1569	return data.group;
1570
1571	pdev = data.pdev;
1572
1573	/*
1574	* Continue upstream from the point of minimum IOMMU granularity
1575	* due to aliases to the point where devices are protected from
1576	* peer-to-peer DMA by PCI ACS. Again, if we find an existing
1577	* group, use it.
1578	*/
1579	for (bus = pdev->bus; !pci_is_root_bus(pbus: bus); bus = bus->parent) {
1580	if (!bus->self)
1581	continue;
1582
1583	if (pci_acs_path_enabled(start: bus->self, NULL, REQ_ACS_FLAGS))
1584	break;
1585
1586	pdev = bus->self;
1587
1588	group = iommu_group_get(&pdev->dev);
1589	if (group)
1590	return group;
1591	}
1592
1593	/*
1594	* Look for existing groups on device aliases. If we alias another
1595	* device or another device aliases us, use the same group.
1596	*/
1597	group = get_pci_alias_group(pdev, devfns: (unsigned long *)devfns);
1598	if (group)
1599	return group;
1600
1601	/*
1602	* Look for existing groups on non-isolated functions on the same
1603	* slot and aliases of those funcions, if any. No need to clear
1604	* the search bitmap, the tested devfns are still valid.
1605	*/
1606	group = get_pci_function_alias_group(pdev, devfns: (unsigned long *)devfns);
1607	if (group)
1608	return group;
1609
1610	/ No shared group found, allocate new /
1611	return iommu_group_alloc();
1612	}
1613	EXPORT_SYMBOL_GPL(pci_device_group);
1614
1615	/ Get the IOMMU group for device on fsl-mc bus /
1616	struct iommu_group fsl_mc_device_group(struct* device *dev)
1617	{
1618	struct device *cont_dev = fsl_mc_cont_dev(dev);
1619	struct iommu_group *group;
1620
1621	group = iommu_group_get(cont_dev);
1622	if (!group)
1623	group = iommu_group_alloc();
1624	return group;
1625	}
1626	EXPORT_SYMBOL_GPL(fsl_mc_device_group);
1627
1628	static struct iommu_domain __iommu_alloc_identity_domain(struct* device *dev)
1629	{
1630	const struct iommu_ops *ops = dev_iommu_ops(dev);
1631	struct iommu_domain *domain;
1632
1633	if (ops->identity_domain)
1634	return ops->identity_domain;
1635
1636	if (ops->domain_alloc_identity) {
1637	domain = ops->domain_alloc_identity(dev);
1638	if (IS_ERR(ptr: domain))
1639	return domain;
1640	} else {
1641	return ERR_PTR(error: -EOPNOTSUPP);
1642	}
1643
1644	iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops);
1645	return domain;
1646	}
1647
1648	static struct iommu_domain *
1649	__iommu_group_alloc_default_domain(struct iommu_group group, int* req_type)
1650	{
1651	struct device *dev = iommu_group_first_dev(group);
1652	struct iommu_domain *dom;
1653
1654	if (group->default_domain && group->default_domain->type == req_type)
1655	return group->default_domain;
1656
1657	/*
1658	* When allocating the DMA API domain assume that the driver is going to
1659	* use PASID and make sure the RID's domain is PASID compatible.
1660	*/
1661	if (req_type & __IOMMU_DOMAIN_PAGING) {
1662	dom = __iommu_paging_domain_alloc_flags(dev, type: req_type,
1663	flags: dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : `0`);
1664
1665	/*
1666	* If driver does not support PASID feature then
1667	* try to allocate non-PASID domain
1668	*/
1669	if (PTR_ERR(ptr: dom) == -EOPNOTSUPP)
1670	dom = __iommu_paging_domain_alloc_flags(dev, type: req_type, flags: `0`);
1671
1672	return dom;
1673	}
1674
1675	if (req_type == IOMMU_DOMAIN_IDENTITY)
1676	return __iommu_alloc_identity_domain(dev);
1677
1678	return ERR_PTR(error: -EINVAL);
1679	}
1680
1681	/*
1682	* req_type of 0 means "auto" which means to select a domain based on
1683	* iommu_def_domain_type or what the driver actually supports.
1684	*/
1685	static struct iommu_domain *
1686	iommu_group_alloc_default_domain(struct iommu_group group, int* req_type)
1687	{
1688	const struct iommu_ops *ops = dev_iommu_ops(dev: iommu_group_first_dev(group));
1689	struct iommu_domain *dom;
1690
1691	lockdep_assert_held(&group->mutex);
1692
1693	/*
1694	* Allow legacy drivers to specify the domain that will be the default
1695	* domain. This should always be either an IDENTITY/BLOCKED/PLATFORM
1696	* domain. Do not use in new drivers.
1697	*/
1698	if (ops->default_domain) {
1699	if (req_type != ops->default_domain->type)
1700	return ERR_PTR(error: -EINVAL);
1701	return ops->default_domain;
1702	}
1703
1704	if (req_type)
1705	return __iommu_group_alloc_default_domain(group, req_type);
1706
1707	/ The driver gave no guidance on what type to use, try the default /
1708	dom = __iommu_group_alloc_default_domain(group, req_type: iommu_def_domain_type);
1709	if (!IS_ERR(ptr: dom))
1710	return dom;
1711
1712	/ Otherwise IDENTITY and DMA_FQ defaults will try DMA /
1713	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
1714	return ERR_PTR(error: -EINVAL);
1715	dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
1716	if (IS_ERR(ptr: dom))
1717	return dom;
1718
1719	pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
1720	iommu_def_domain_type, group->name);
1721	return dom;
1722	}
1723
1724	struct iommu_domain iommu_group_default_domain(struct* iommu_group *group)
1725	{
1726	return group->default_domain;
1727	}
1728
1729	static int probe_iommu_group(struct device dev, void* *data)
1730	{
1731	struct list_head *group_list = data;
1732	int ret;
1733
1734	mutex_lock(&iommu_probe_device_lock);
1735	ret = __iommu_probe_device(dev, group_list);
1736	mutex_unlock(lock: &iommu_probe_device_lock);
1737	if (ret == -ENODEV)
1738	ret = `0`;
1739
1740	return ret;
1741	}
1742
1743	static int iommu_bus_notifier(struct notifier_block *nb,
1744	unsigned long action, void *data)
1745	{
1746	struct device *dev = data;
1747
1748	if (action == BUS_NOTIFY_ADD_DEVICE) {
1749	int ret;
1750
1751	ret = iommu_probe_device(dev);
1752	return (ret) ? NOTIFY_DONE : NOTIFY_OK;
1753	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
1754	iommu_release_device(dev);
1755	return NOTIFY_OK;
1756	}
1757
1758	return `0`;
1759	}
1760
1761	/*
1762	* Combine the driver's chosen def_domain_type across all the devices in a
1763	* group. Drivers must give a consistent result.
1764	*/
1765	static int iommu_get_def_domain_type(struct iommu_group *group,
1766	struct device dev, int* cur_type)
1767	{
1768	const struct iommu_ops *ops = dev_iommu_ops(dev);
1769	int type;
1770
1771	if (ops->default_domain) {
1772	/*
1773	* Drivers that declare a global static default_domain will
1774	* always choose that.
1775	*/
1776	type = ops->default_domain->type;
1777	} else {
1778	if (ops->def_domain_type)
1779	type = ops->def_domain_type(dev);
1780	else
1781	return cur_type;
1782	}
1783	if (!type \|\| cur_type == type)
1784	return cur_type;
1785	if (!cur_type)
1786	return type;
1787
1788	dev_err_ratelimited(
1789	dev,
1790	"IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n",
1791	iommu_domain_type_str(cur_type), iommu_domain_type_str(type),
1792	group->id);
1793
1794	/*
1795	* Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY
1796	* takes precedence.
1797	*/
1798	if (type == IOMMU_DOMAIN_IDENTITY)
1799	return type;
1800	return cur_type;
1801	}
1802
1803	/*
1804	* A target_type of 0 will select the best domain type. 0 can be returned in
1805	* this case meaning the global default should be used.
1806	*/
1807	static int iommu_get_default_domain_type(struct iommu_group *group,
1808	int target_type)
1809	{
1810	struct device *untrusted = NULL;
1811	struct group_device *gdev;
1812	int driver_type = `0`;
1813
1814	lockdep_assert_held(&group->mutex);
1815
1816	/*
1817	* ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an
1818	* identity_domain and it will automatically become their default
1819	* domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain.
1820	* Override the selection to IDENTITY.
1821	*/
1822	if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
1823	static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) &&
1824	IS_ENABLED(CONFIG_IOMMU_DMA)));
1825	driver_type = IOMMU_DOMAIN_IDENTITY;
1826	}
1827
1828	for_each_group_device(group, gdev) {
1829	driver_type = iommu_get_def_domain_type(group, dev: gdev->dev,
1830	cur_type: driver_type);
1831
1832	if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) {
1833	/*
1834	* No ARM32 using systems will set untrusted, it cannot
1835	* work.
1836	*/
1837	if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)))
1838	return -`1`;
1839	untrusted = gdev->dev;
1840	}
1841	}
1842
1843	/*
1844	* If the common dma ops are not selected in kconfig then we cannot use
1845	* IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been
1846	* selected.
1847	*/
1848	if (!IS_ENABLED(CONFIG_IOMMU_DMA)) {
1849	if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA))
1850	return -`1`;
1851	if (!driver_type)
1852	driver_type = IOMMU_DOMAIN_IDENTITY;
1853	}
1854
1855	if (untrusted) {
1856	if (driver_type && driver_type != IOMMU_DOMAIN_DMA) {
1857	dev_err_ratelimited(
1858	untrusted,
1859	"Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n",
1860	group->id, iommu_domain_type_str(driver_type));
1861	return -`1`;
1862	}
1863	driver_type = IOMMU_DOMAIN_DMA;
1864	}
1865
1866	if (target_type) {
1867	if (driver_type && target_type != driver_type)
1868	return -`1`;
1869	return target_type;
1870	}
1871	return driver_type;
1872	}
1873
1874	static void iommu_group_do_probe_finalize(struct device *dev)
1875	{
1876	const struct iommu_ops *ops = dev_iommu_ops(dev);
1877
1878	if (ops->probe_finalize)
1879	ops->probe_finalize(dev);
1880	}
1881
1882	static int bus_iommu_probe(const struct bus_type *bus)
1883	{
1884	struct iommu_group group, next;
1885	LIST_HEAD(group_list);
1886	int ret;
1887
1888	ret = bus_for_each_dev(bus, NULL, data: &group_list, fn: probe_iommu_group);
1889	if (ret)
1890	return ret;
1891
1892	list_for_each_entry_safe(group, next, &group_list, entry) {
1893	struct group_device *gdev;
1894
1895	mutex_lock(&group->mutex);
1896
1897	/ Remove item from the list /
1898	list_del_init(entry: &group->entry);
1899
1900	/*
1901	* We go to the trouble of deferred default domain creation so
1902	* that the cross-group default domain type and the setup of the
1903	* IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios.
1904	*/
1905	ret = iommu_setup_default_domain(group, target_type: `0`);
1906	if (ret) {
1907	mutex_unlock(lock: &group->mutex);
1908	return ret;
1909	}
1910	for_each_group_device(group, gdev)
1911	iommu_setup_dma_ops(dev: gdev->dev);
1912	mutex_unlock(lock: &group->mutex);
1913
1914	/*
1915	* FIXME: Mis-locked because the ops->probe_finalize() call-back
1916	* of some IOMMU drivers calls arm_iommu_attach_device() which
1917	* in-turn might call back into IOMMU core code, where it tries
1918	* to take group->mutex, resulting in a deadlock.
1919	*/
1920	for_each_group_device(group, gdev)
1921	iommu_group_do_probe_finalize(dev: gdev->dev);
1922	}
1923
1924	return `0`;
1925	}
1926
1927	/**
1928	* device_iommu_capable() - check for a general IOMMU capability
1929	* @dev: device to which the capability would be relevant, if available
1930	* @cap: IOMMU capability
1931	*
1932	* Return: true if an IOMMU is present and supports the given capability
1933	* for the given device, otherwise false.
1934	*/
1935	bool device_iommu_capable(struct device dev, enum* iommu_cap cap)
1936	{
1937	const struct iommu_ops *ops;
1938
1939	if (!dev_has_iommu(dev))
1940	return false;
1941
1942	ops = dev_iommu_ops(dev);
1943	if (!ops->capable)
1944	return false;
1945
1946	return ops->capable(dev, cap);
1947	}
1948	EXPORT_SYMBOL_GPL(device_iommu_capable);
1949
1950	/**
1951	* iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
1952	* for a group
1953	* @group: Group to query
1954	*
1955	* IOMMU groups should not have differing values of
1956	* msi_device_has_isolated_msi() for devices in a group. However nothing
1957	* directly prevents this, so ensure mistakes don't result in isolation failures
1958	* by checking that all the devices are the same.
1959	*/
1960	bool iommu_group_has_isolated_msi(struct iommu_group *group)
1961	{
1962	struct group_device *group_dev;
1963	bool ret = true;
1964
1965	mutex_lock(&group->mutex);
1966	for_each_group_device(group, group_dev)
1967	ret &= msi_device_has_isolated_msi(dev: group_dev->dev);
1968	mutex_unlock(lock: &group->mutex);
1969	return ret;
1970	}
1971	EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
1972
1973	/**
1974	* iommu_set_fault_handler() - set a fault handler for an iommu domain
1975	* @domain: iommu domain
1976	* @handler: fault handler
1977	* @token: user data, will be passed back to the fault handler
1978	*
1979	* This function should be used by IOMMU users which want to be notified
1980	* whenever an IOMMU fault happens.
1981	*
1982	* The fault handler itself should return 0 on success, and an appropriate
1983	* error code otherwise.
1984	*/
1985	void iommu_set_fault_handler(struct iommu_domain *domain,
1986	iommu_fault_handler_t handler,
1987	void *token)
1988	{
1989	if (WARN_ON(!domain \|\| domain->cookie_type != IOMMU_COOKIE_NONE))
1990	return;
1991
1992	domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER;
1993	domain->handler = handler;
1994	domain->handler_token = token;
1995	}
1996	EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
1997
1998	static void iommu_domain_init(struct iommu_domain domain, unsigned* int type,
1999	const struct iommu_ops *ops)
2000	{
2001	domain->type = type;
2002	domain->owner = ops;
2003	if (!domain->ops)
2004	domain->ops = ops->default_domain_ops;
2005
2006	/*
2007	* If not already set, assume all sizes by default; the driver
2008	* may override this later
2009	*/
2010	if (!domain->pgsize_bitmap)
2011	domain->pgsize_bitmap = ops->pgsize_bitmap;
2012	}
2013
2014	static struct iommu_domain *
2015	__iommu_paging_domain_alloc_flags(struct device dev, unsigned* int type,
2016	unsigned int flags)
2017	{
2018	const struct iommu_ops *ops;
2019	struct iommu_domain *domain;
2020
2021	if (!dev_has_iommu(dev))
2022	return ERR_PTR(error: -ENODEV);
2023
2024	ops = dev_iommu_ops(dev);
2025
2026	if (ops->domain_alloc_paging && !flags)
2027	domain = ops->domain_alloc_paging(dev);
2028	else if (ops->domain_alloc_paging_flags)
2029	domain = ops->domain_alloc_paging_flags(dev, flags, NULL);
2030	#if IS_ENABLED(CONFIG_FSL_PAMU)
2031	else if (ops->domain_alloc && !flags)
2032	domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
2033	#endif
2034	else
2035	return ERR_PTR(error: -EOPNOTSUPP);
2036
2037	if (IS_ERR(ptr: domain))
2038	return domain;
2039	if (!domain)
2040	return ERR_PTR(error: -ENOMEM);
2041
2042	iommu_domain_init(domain, type, ops);
2043	return domain;
2044	}
2045
2046	/**
2047	* iommu_paging_domain_alloc_flags() - Allocate a paging domain
2048	* @dev: device for which the domain is allocated
2049	* @flags: Bitmap of iommufd_hwpt_alloc_flags
2050	*
2051	* Allocate a paging domain which will be managed by a kernel driver. Return
2052	* allocated domain if successful, or an ERR pointer for failure.
2053	*/
2054	struct iommu_domain iommu_paging_domain_alloc_flags(struct* device *dev,
2055	unsigned int flags)
2056	{
2057	return __iommu_paging_domain_alloc_flags(dev,
2058	IOMMU_DOMAIN_UNMANAGED, flags);
2059	}
2060	EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
2061
2062	void iommu_domain_free(struct iommu_domain *domain)
2063	{
2064	switch (domain->cookie_type) {
2065	case IOMMU_COOKIE_DMA_IOVA:
2066	iommu_put_dma_cookie(domain);
2067	break;
2068	case IOMMU_COOKIE_DMA_MSI:
2069	iommu_put_msi_cookie(domain);
2070	break;
2071	case IOMMU_COOKIE_SVA:
2072	mmdrop(mm: domain->mm);
2073	break;
2074	default:
2075	break;
2076	}
2077	if (domain->ops->free)
2078	domain->ops->free(domain);
2079	}
2080	EXPORT_SYMBOL_GPL(iommu_domain_free);
2081
2082	/*
2083	* Put the group's domain back to the appropriate core-owned domain - either the
2084	* standard kernel-mode DMA configuration or an all-DMA-blocked domain.
2085	*/
2086	static void __iommu_group_set_core_domain(struct iommu_group *group)
2087	{
2088	struct iommu_domain *new_domain;
2089
2090	if (group->owner)
2091	new_domain = group->blocking_domain;
2092	else
2093	new_domain = group->default_domain;
2094
2095	__iommu_group_set_domain_nofail(group, new_domain);
2096	}
2097
2098	static int __iommu_attach_device(struct iommu_domain *domain,
2099	struct device *dev)
2100	{
2101	int ret;
2102
2103	if (unlikely(domain->ops->attach_dev == NULL))
2104	return -ENODEV;
2105
2106	ret = domain->ops->attach_dev(domain, dev);
2107	if (ret)
2108	return ret;
2109	dev->iommu->attach_deferred = `0`;
2110	trace_attach_device_to_domain(dev);
2111	return `0`;
2112	}
2113
2114	/**
2115	* iommu_attach_device - Attach an IOMMU domain to a device
2116	* @domain: IOMMU domain to attach
2117	* @dev: Device that will be attached
2118	*
2119	* Returns 0 on success and error code on failure
2120	*
2121	* Note that EINVAL can be treated as a soft failure, indicating
2122	* that certain configuration of the domain is incompatible with
2123	* the device. In this case attaching a different domain to the
2124	* device may succeed.
2125	*/
2126	int iommu_attach_device(struct iommu_domain domain, struct* device *dev)
2127	{
2128	/ Caller must be a probed driver on dev /
2129	struct iommu_group *group = dev->iommu_group;
2130	int ret;
2131
2132	if (!group)
2133	return -ENODEV;
2134
2135	/*
2136	* Lock the group to make sure the device-count doesn't
2137	* change while we are attaching
2138	*/
2139	mutex_lock(&group->mutex);
2140	ret = -EINVAL;
2141	if (list_count_nodes(head: &group->devices) != `1`)
2142	goto out_unlock;
2143
2144	ret = __iommu_attach_group(domain, group);
2145
2146	out_unlock:
2147	mutex_unlock(lock: &group->mutex);
2148	return ret;
2149	}
2150	EXPORT_SYMBOL_GPL(iommu_attach_device);
2151
2152	int iommu_deferred_attach(struct device dev, struct* iommu_domain *domain)
2153	{
2154	if (dev->iommu && dev->iommu->attach_deferred)
2155	return __iommu_attach_device(domain, dev);
2156
2157	return `0`;
2158	}
2159
2160	void iommu_detach_device(struct iommu_domain domain, struct* device *dev)
2161	{
2162	/ Caller must be a probed driver on dev /
2163	struct iommu_group *group = dev->iommu_group;
2164
2165	if (!group)
2166	return;
2167
2168	mutex_lock(&group->mutex);
2169	if (WARN_ON(domain != group->domain) \|\|
2170	WARN_ON(list_count_nodes(&group->devices) != `1`))
2171	goto out_unlock;
2172	__iommu_group_set_core_domain(group);
2173
2174	out_unlock:
2175	mutex_unlock(lock: &group->mutex);
2176	}
2177	EXPORT_SYMBOL_GPL(iommu_detach_device);
2178
2179	struct iommu_domain iommu_get_domain_for_dev(struct* device *dev)
2180	{
2181	/ Caller must be a probed driver on dev /
2182	struct iommu_group *group = dev->iommu_group;
2183
2184	if (!group)
2185	return NULL;
2186
2187	return group->domain;
2188	}
2189	EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
2190
2191	/*
2192	* For IOMMU_DOMAIN_DMA implementations which already provide their own
2193	* guarantees that the group and its default domain are valid and correct.
2194	*/
2195	struct iommu_domain iommu_get_dma_domain(struct* device *dev)
2196	{
2197	return dev->iommu_group->default_domain;
2198	}
2199
2200	static void iommu_make_pasid_array_entry(struct* iommu_domain *domain,
2201	struct iommu_attach_handle *handle)
2202	{
2203	if (handle) {
2204	handle->domain = domain;
2205	return xa_tag_pointer(p: handle, tag: IOMMU_PASID_ARRAY_HANDLE);
2206	}
2207
2208	return xa_tag_pointer(p: domain, tag: IOMMU_PASID_ARRAY_DOMAIN);
2209	}
2210
2211	static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
2212	struct iommu_domain *domain)
2213	{
2214	if (domain->owner == ops)
2215	return true;
2216
2217	/ For static domains, owner isn't set. /
2218	if (domain == ops->blocked_domain \|\| domain == ops->identity_domain)
2219	return true;
2220
2221	return false;
2222	}
2223
2224	static int __iommu_attach_group(struct iommu_domain *domain,
2225	struct iommu_group *group)
2226	{
2227	struct device *dev;
2228
2229	if (group->domain && group->domain != group->default_domain &&
2230	group->domain != group->blocking_domain)
2231	return -EBUSY;
2232
2233	dev = iommu_group_first_dev(group);
2234	if (!dev_has_iommu(dev) \|\|
2235	!domain_iommu_ops_compatible(ops: dev_iommu_ops(dev), domain))
2236	return -EINVAL;
2237
2238	return __iommu_group_set_domain(group, new_domain: domain);
2239	}
2240
2241	/**
2242	* iommu_attach_group - Attach an IOMMU domain to an IOMMU group
2243	* @domain: IOMMU domain to attach
2244	* @group: IOMMU group that will be attached
2245	*
2246	* Returns 0 on success and error code on failure
2247	*
2248	* Note that EINVAL can be treated as a soft failure, indicating
2249	* that certain configuration of the domain is incompatible with
2250	* the group. In this case attaching a different domain to the
2251	* group may succeed.
2252	*/
2253	int iommu_attach_group(struct iommu_domain domain, struct* iommu_group *group)
2254	{
2255	int ret;
2256
2257	mutex_lock(&group->mutex);
2258	ret = __iommu_attach_group(domain, group);
2259	mutex_unlock(lock: &group->mutex);
2260
2261	return ret;
2262	}
2263	EXPORT_SYMBOL_GPL(iommu_attach_group);
2264
2265	static int __iommu_device_set_domain(struct iommu_group *group,
2266	struct device *dev,
2267	struct iommu_domain *new_domain,
2268	unsigned int flags)
2269	{
2270	int ret;
2271
2272	/*
2273	* If the device requires IOMMU_RESV_DIRECT then we cannot allow
2274	* the blocking domain to be attached as it does not contain the
2275	* required 1:1 mapping. This test effectively excludes the device
2276	* being used with iommu_group_claim_dma_owner() which will block
2277	* vfio and iommufd as well.
2278	*/
2279	if (dev->iommu->require_direct &&
2280	(new_domain->type == IOMMU_DOMAIN_BLOCKED \|\|
2281	new_domain == group->blocking_domain)) {
2282	dev_warn(dev,
2283	"Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n");
2284	return -EINVAL;
2285	}
2286
2287	if (dev->iommu->attach_deferred) {
2288	if (new_domain == group->default_domain)
2289	return `0`;
2290	dev->iommu->attach_deferred = `0`;
2291	}
2292
2293	ret = __iommu_attach_device(domain: new_domain, dev);
2294	if (ret) {
2295	/*
2296	* If we have a blocking domain then try to attach that in hopes
2297	* of avoiding a UAF. Modern drivers should implement blocking
2298	* domains as global statics that cannot fail.
2299	*/
2300	if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
2301	group->blocking_domain &&
2302	group->blocking_domain != new_domain)
2303	__iommu_attach_device(domain: group->blocking_domain, dev);
2304	return ret;
2305	}
2306	return `0`;
2307	}
2308
2309	/*
2310	* If 0 is returned the group's domain is new_domain. If an error is returned
2311	* then the group's domain will be set back to the existing domain unless
2312	* IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
2313	* domains is left inconsistent. This is a driver bug to fail attach with a
2314	* previously good domain. We try to avoid a kernel UAF because of this.
2315	*
2316	* IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
2317	* API works on domains and devices. Bridge that gap by iterating over the
2318	* devices in a group. Ideally we'd have a single device which represents the
2319	* requestor ID of the group, but we also allow IOMMU drivers to create policy
2320	* defined minimum sets, where the physical hardware may be able to distiguish
2321	* members, but we wish to group them at a higher level (ex. untrusted
2322	* multi-function PCI devices). Thus we attach each device.
2323	*/
2324	static int __iommu_group_set_domain_internal(struct iommu_group *group,
2325	struct iommu_domain *new_domain,
2326	unsigned int flags)
2327	{
2328	struct group_device *last_gdev;
2329	struct group_device *gdev;
2330	int result;
2331	int ret;
2332
2333	lockdep_assert_held(&group->mutex);
2334
2335	if (group->domain == new_domain)
2336	return `0`;
2337
2338	if (WARN_ON(!new_domain))
2339	return -EINVAL;
2340
2341	/*
2342	* Changing the domain is done by calling attach_dev() on the new
2343	* domain. This switch does not have to be atomic and DMA can be
2344	* discarded during the transition. DMA must only be able to access
2345	* either new_domain or group->domain, never something else.
2346	*/
2347	result = `0`;
2348	for_each_group_device(group, gdev) {
2349	ret = __iommu_device_set_domain(group, dev: gdev->dev, new_domain,
2350	flags);
2351	if (ret) {
2352	result = ret;
2353	/*
2354	* Keep trying the other devices in the group. If a
2355	* driver fails attach to an otherwise good domain, and
2356	* does not support blocking domains, it should at least
2357	* drop its reference on the current domain so we don't
2358	* UAF.
2359	*/
2360	if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
2361	continue;
2362	goto err_revert;
2363	}
2364	}
2365	group->domain = new_domain;
2366	return result;
2367
2368	err_revert:
2369	/*
2370	* This is called in error unwind paths. A well behaved driver should
2371	* always allow us to attach to a domain that was already attached.
2372	*/
2373	last_gdev = gdev;
2374	for_each_group_device(group, gdev) {
2375	/*
2376	* A NULL domain can happen only for first probe, in which case
2377	* we leave group->domain as NULL and let release clean
2378	* everything up.
2379	*/
2380	if (group->domain)
2381	WARN_ON(__iommu_device_set_domain(
2382	group, gdev->dev, group->domain,
2383	IOMMU_SET_DOMAIN_MUST_SUCCEED));
2384	if (gdev == last_gdev)
2385	break;
2386	}
2387	return ret;
2388	}
2389
2390	void iommu_detach_group(struct iommu_domain domain, struct* iommu_group *group)
2391	{
2392	mutex_lock(&group->mutex);
2393	__iommu_group_set_core_domain(group);
2394	mutex_unlock(lock: &group->mutex);
2395	}
2396	EXPORT_SYMBOL_GPL(iommu_detach_group);
2397
2398	phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2399	{
2400	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2401	return iova;
2402
2403	if (domain->type == IOMMU_DOMAIN_BLOCKED)
2404	return `0`;
2405
2406	return domain->ops->iova_to_phys(domain, iova);
2407	}
2408	EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
2409
2410	static size_t iommu_pgsize(struct iommu_domain domain, unsigned* long iova,
2411	phys_addr_t paddr, size_t size, size_t *count)
2412	{
2413	unsigned int pgsize_idx, pgsize_idx_next;
2414	unsigned long pgsizes;
2415	size_t offset, pgsize, pgsize_next;
2416	size_t offset_end;
2417	unsigned long addr_merge = paddr \| iova;
2418
2419	/ Page sizes supported by the hardware and small enough for @size /
2420	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), `0`);
2421
2422	/ Constrain the page sizes further based on the maximum alignment /
2423	if (likely(addr_merge))
2424	pgsizes &= GENMASK(__ffs(addr_merge), `0`);
2425
2426	/ Make sure we have at least one suitable page size /
2427	BUG_ON(!pgsizes);
2428
2429	/ Pick the biggest page size remaining /
2430	pgsize_idx = __fls(word: pgsizes);
2431	pgsize = BIT(pgsize_idx);
2432	if (!count)
2433	return pgsize;
2434
2435	/ Find the next biggest support page size, if it exists /
2436	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, `0`);
2437	if (!pgsizes)
2438	goto out_set_count;
2439
2440	pgsize_idx_next = __ffs(pgsizes);
2441	pgsize_next = BIT(pgsize_idx_next);
2442
2443	/*
2444	* There's no point trying a bigger page size unless the virtual
2445	* and physical addresses are similarly offset within the larger page.
2446	*/
2447	if ((iova ^ paddr) & (pgsize_next - `1`))
2448	goto out_set_count;
2449
2450	/ Calculate the offset to the next page size alignment boundary /
2451	offset = pgsize_next - (addr_merge & (pgsize_next - `1`));
2452
2453	/*
2454	* If size is big enough to accommodate the larger page, reduce
2455	* the number of smaller pages.
2456	*/
2457	if (!check_add_overflow(offset, pgsize_next, &offset_end) &&
2458	offset_end <= size)
2459	size = offset;
2460
2461	out_set_count:
2462	*count = size >> pgsize_idx;
2463	return pgsize;
2464	}
2465
2466	int iommu_map_nosync(struct iommu_domain domain, unsigned* long iova,
2467	phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2468	{
2469	const struct iommu_domain_ops *ops = domain->ops;
2470	unsigned long orig_iova = iova;
2471	unsigned int min_pagesz;
2472	size_t orig_size = size;
2473	phys_addr_t orig_paddr = paddr;
2474	int ret = `0`;
2475
2476	might_sleep_if(gfpflags_allow_blocking(gfp));
2477
2478	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2479	return -EINVAL;
2480
2481	if (WARN_ON(!ops->map_pages \|\| domain->pgsize_bitmap == `0UL`))
2482	return -ENODEV;
2483
2484	/ Discourage passing strange GFP flags /
2485	if (WARN_ON_ONCE(gfp & (__GFP_COMP \| __GFP_DMA \| __GFP_DMA32 \|
2486	__GFP_HIGHMEM)))
2487	return -EINVAL;
2488
2489	/ find out the minimum page size supported /
2490	min_pagesz = `1` << __ffs(domain->pgsize_bitmap);
2491
2492	/*
2493	* both the virtual address and the physical one, as well as
2494	* the size of the mapping, must be aligned (at least) to the
2495	* size of the smallest page supported by the hardware
2496	*/
2497	if (!IS_ALIGNED(iova \| paddr \| size, min_pagesz)) {
2498	pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
2499	iova, &paddr, size, min_pagesz);
2500	return -EINVAL;
2501	}
2502
2503	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
2504
2505	while (size) {
2506	size_t pgsize, count, mapped = `0`;
2507
2508	pgsize = iommu_pgsize(domain, iova, paddr, size, count: &count);
2509
2510	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
2511	iova, &paddr, pgsize, count);
2512	ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
2513	gfp, &mapped);
2514	/*
2515	* Some pages may have been mapped, even if an error occurred,
2516	* so we should account for those so they can be unmapped.
2517	*/
2518	size -= mapped;
2519
2520	if (ret)
2521	break;
2522
2523	iova += mapped;
2524	paddr += mapped;
2525	}
2526
2527	/ unroll mapping in case something went wrong /
2528	if (ret)
2529	iommu_unmap(domain, iova: orig_iova, size: orig_size - size);
2530	else
2531	trace_map(iova: orig_iova, paddr: orig_paddr, size: orig_size);
2532
2533	return ret;
2534	}
2535
2536	int iommu_sync_map(struct iommu_domain domain, unsigned* long iova, size_t size)
2537	{
2538	const struct iommu_domain_ops *ops = domain->ops;
2539
2540	if (!ops->iotlb_sync_map)
2541	return `0`;
2542	return ops->iotlb_sync_map(domain, iova, size);
2543	}
2544
2545	int iommu_map(struct iommu_domain domain, unsigned* long iova,
2546	phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2547	{
2548	int ret;
2549
2550	ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp);
2551	if (ret)
2552	return ret;
2553
2554	ret = iommu_sync_map(domain, iova, size);
2555	if (ret)
2556	iommu_unmap(domain, iova, size);
2557
2558	return ret;
2559	}
2560	EXPORT_SYMBOL_GPL(iommu_map);
2561
2562	static size_t __iommu_unmap(struct iommu_domain *domain,
2563	unsigned long iova, size_t size,
2564	struct iommu_iotlb_gather *iotlb_gather)
2565	{
2566	const struct iommu_domain_ops *ops = domain->ops;
2567	size_t unmapped_page, unmapped = `0`;
2568	unsigned long orig_iova = iova;
2569	unsigned int min_pagesz;
2570
2571	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2572	return `0`;
2573
2574	if (WARN_ON(!ops->unmap_pages \|\| domain->pgsize_bitmap == `0UL`))
2575	return `0`;
2576
2577	/ find out the minimum page size supported /
2578	min_pagesz = `1` << __ffs(domain->pgsize_bitmap);
2579
2580	/*
2581	* The virtual address, as well as the size of the mapping, must be
2582	* aligned (at least) to the size of the smallest page supported
2583	* by the hardware
2584	*/
2585	if (!IS_ALIGNED(iova \| size, min_pagesz)) {
2586	pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
2587	iova, size, min_pagesz);
2588	return `0`;
2589	}
2590
2591	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
2592
2593	/*
2594	* Keep iterating until we either unmap 'size' bytes (or more)
2595	* or we hit an area that isn't mapped.
2596	*/
2597	while (unmapped < size) {
2598	size_t pgsize, count;
2599
2600	pgsize = iommu_pgsize(domain, iova, paddr: iova, size: size - unmapped, count: &count);
2601	unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather);
2602	if (!unmapped_page)
2603	break;
2604
2605	pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
2606	iova, unmapped_page);
2607
2608	iova += unmapped_page;
2609	unmapped += unmapped_page;
2610	}
2611
2612	trace_unmap(iova: orig_iova, size, unmapped_size: unmapped);
2613	return unmapped;
2614	}
2615
2616	/**
2617	* iommu_unmap() - Remove mappings from a range of IOVA
2618	* @domain: Domain to manipulate
2619	* @iova: IO virtual address to start
2620	* @size: Length of the range starting from @iova
2621	*
2622	* iommu_unmap() will remove a translation created by iommu_map(). It cannot
2623	* subdivide a mapping created by iommu_map(), so it should be called with IOVA
2624	* ranges that match what was passed to iommu_map(). The range can aggregate
2625	* contiguous iommu_map() calls so long as no individual range is split.
2626	*
2627	* Returns: Number of bytes of IOVA unmapped. iova + res will be the point
2628	* unmapping stopped.
2629	*/
2630	size_t iommu_unmap(struct iommu_domain *domain,
2631	unsigned long iova, size_t size)
2632	{
2633	struct iommu_iotlb_gather iotlb_gather;
2634	size_t ret;
2635
2636	iommu_iotlb_gather_init(gather: &iotlb_gather);
2637	ret = __iommu_unmap(domain, iova, size, iotlb_gather: &iotlb_gather);
2638	iommu_iotlb_sync(domain, iotlb_gather: &iotlb_gather);
2639
2640	return ret;
2641	}
2642	EXPORT_SYMBOL_GPL(iommu_unmap);
2643
2644	/**
2645	* iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync
2646	* @domain: Domain to manipulate
2647	* @iova: IO virtual address to start
2648	* @size: Length of the range starting from @iova
2649	* @iotlb_gather: range information for a pending IOTLB flush
2650	*
2651	* iommu_unmap_fast() will remove a translation created by iommu_map().
2652	* It can't subdivide a mapping created by iommu_map(), so it should be
2653	* called with IOVA ranges that match what was passed to iommu_map(). The
2654	* range can aggregate contiguous iommu_map() calls so long as no individual
2655	* range is split.
2656	*
2657	* Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers
2658	* which manage the IOTLB flushing externally to perform a batched sync.
2659	*
2660	* Returns: Number of bytes of IOVA unmapped. iova + res will be the point
2661	* unmapping stopped.
2662	*/
2663	size_t iommu_unmap_fast(struct iommu_domain *domain,
2664	unsigned long iova, size_t size,
2665	struct iommu_iotlb_gather *iotlb_gather)
2666	{
2667	return __iommu_unmap(domain, iova, size, iotlb_gather);
2668	}
2669	EXPORT_SYMBOL_GPL(iommu_unmap_fast);
2670
2671	ssize_t iommu_map_sg(struct iommu_domain domain, unsigned* long iova,
2672	struct scatterlist sg, unsigned* int nents, int prot,
2673	gfp_t gfp)
2674	{
2675	size_t len = `0`, mapped = `0`;
2676	phys_addr_t start;
2677	unsigned int i = `0`;
2678	int ret;
2679
2680	while (i <= nents) {
2681	phys_addr_t s_phys = sg_phys(sg);
2682
2683	if (len && s_phys != start + len) {
2684	ret = iommu_map_nosync(domain, iova: iova + mapped, paddr: start,
2685	size: len, prot, gfp);
2686	if (ret)
2687	goto out_err;
2688
2689	mapped += len;
2690	len = `0`;
2691	}
2692
2693	if (sg_dma_is_bus_address(sg))
2694	goto next;
2695
2696	if (len) {
2697	len += sg->length;
2698	} else {
2699	len = sg->length;
2700	start = s_phys;
2701	}
2702
2703	next:
2704	if (++i < nents)
2705	sg = sg_next(sg);
2706	}
2707
2708	ret = iommu_sync_map(domain, iova, size: mapped);
2709	if (ret)
2710	goto out_err;
2711
2712	return mapped;
2713
2714	out_err:
2715	/ undo mappings already done /
2716	iommu_unmap(domain, iova, mapped);
2717
2718	return ret;
2719	}
2720	EXPORT_SYMBOL_GPL(iommu_map_sg);
2721
2722	/**
2723	* report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
2724	* @domain: the iommu domain where the fault has happened
2725	* @dev: the device where the fault has happened
2726	* @iova: the faulting address
2727	* @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
2728	*
2729	* This function should be called by the low-level IOMMU implementations
2730	* whenever IOMMU faults happen, to allow high-level users, that are
2731	* interested in such events, to know about them.
2732	*
2733	* This event may be useful for several possible use cases:
2734	* - mere logging of the event
2735	* - dynamic TLB/PTE loading
2736	* - if restarting of the faulting device is required
2737	*
2738	* Returns 0 on success and an appropriate error code otherwise (if dynamic
2739	* PTE/TLB loading will one day be supported, implementations will be able
2740	* to tell whether it succeeded or not according to this return value).
2741	*
2742	* Specifically, -ENOSYS is returned if a fault handler isn't installed
2743	* (though fault handlers can also return -ENOSYS, in case they want to
2744	* elicit the default behavior of the IOMMU drivers).
2745	*/
2746	int report_iommu_fault(struct iommu_domain domain, struct* device *dev,
2747	unsigned long iova, int flags)
2748	{
2749	int ret = -ENOSYS;
2750
2751	/*
2752	* if upper layers showed interest and installed a fault handler,
2753	* invoke it.
2754	*/
2755	if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER &&
2756	domain->handler)
2757	ret = domain->handler(domain, dev, iova, flags,
2758	domain->handler_token);
2759
2760	trace_io_page_fault(dev, iova, flags);
2761	return ret;
2762	}
2763	EXPORT_SYMBOL_GPL(report_iommu_fault);
2764
2765	static int __init iommu_init(void)
2766	{
2767	iommu_group_kset = kset_create_and_add(name: "iommu_groups",
2768	NULL, parent_kobj: kernel_kobj);
2769	BUG_ON(!iommu_group_kset);
2770
2771	iommu_debugfs_setup();
2772
2773	return `0`;
2774	}
2775	core_initcall(iommu_init);
2776
2777	int iommu_set_pgtable_quirks(struct iommu_domain *domain,
2778	unsigned long quirk)
2779	{
2780	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2781	return -EINVAL;
2782	if (!domain->ops->set_pgtable_quirks)
2783	return -EINVAL;
2784	return domain->ops->set_pgtable_quirks(domain, quirk);
2785	}
2786	EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
2787
2788	/**
2789	* iommu_get_resv_regions - get reserved regions
2790	* @dev: device for which to get reserved regions
2791	* @list: reserved region list for device
2792	*
2793	* This returns a list of reserved IOVA regions specific to this device.
2794	* A domain user should not map IOVA in these ranges.
2795	*/
2796	void iommu_get_resv_regions(struct device dev, struct* list_head *list)
2797	{
2798	const struct iommu_ops *ops = dev_iommu_ops(dev);
2799
2800	if (ops->get_resv_regions)
2801	ops->get_resv_regions(dev, list);
2802	}
2803	EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
2804
2805	/**
2806	* iommu_put_resv_regions - release reserved regions
2807	* @dev: device for which to free reserved regions
2808	* @list: reserved region list for device
2809	*
2810	* This releases a reserved region list acquired by iommu_get_resv_regions().
2811	*/
2812	void iommu_put_resv_regions(struct device dev, struct* list_head *list)
2813	{
2814	struct iommu_resv_region entry, next;
2815
2816	list_for_each_entry_safe(entry, next, list, list) {
2817	if (entry->free)
2818	entry->free(dev, entry);
2819	else
2820	kfree(objp: entry);
2821	}
2822	}
2823	EXPORT_SYMBOL(iommu_put_resv_regions);
2824
2825	struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
2826	size_t length, int prot,
2827	enum iommu_resv_type type,
2828	gfp_t gfp)
2829	{
2830	struct iommu_resv_region *region;
2831
2832	region = kzalloc(sizeof(*region), gfp);
2833	if (!region)
2834	return NULL;
2835
2836	INIT_LIST_HEAD(list: &region->list);
2837	region->start = start;
2838	region->length = length;
2839	region->prot = prot;
2840	region->type = type;
2841	return region;
2842	}
2843	EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
2844
2845	void iommu_set_default_passthrough(bool cmd_line)
2846	{
2847	if (cmd_line)
2848	iommu_cmd_line \|= IOMMU_CMD_LINE_DMA_API;
2849	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
2850	}
2851
2852	void iommu_set_default_translated(bool cmd_line)
2853	{
2854	if (cmd_line)
2855	iommu_cmd_line \|= IOMMU_CMD_LINE_DMA_API;
2856	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
2857	}
2858
2859	bool iommu_default_passthrough(void)
2860	{
2861	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
2862	}
2863	EXPORT_SYMBOL_GPL(iommu_default_passthrough);
2864
2865	static const struct iommu_device iommu_from_fwnode(const* struct fwnode_handle *fwnode)
2866	{
2867	const struct iommu_device iommu, ret = NULL;
2868
2869	spin_lock(lock: &iommu_device_lock);
2870	list_for_each_entry(iommu, &iommu_device_list, list)
2871	if (iommu->fwnode == fwnode) {
2872	ret = iommu;
2873	break;
2874	}
2875	spin_unlock(lock: &iommu_device_lock);
2876	return ret;
2877	}
2878
2879	const struct iommu_ops iommu_ops_from_fwnode(const* struct fwnode_handle *fwnode)
2880	{
2881	const struct iommu_device *iommu = iommu_from_fwnode(fwnode);
2882
2883	return iommu ? iommu->ops : NULL;
2884	}
2885
2886	int iommu_fwspec_init(struct device dev, struct* fwnode_handle *iommu_fwnode)
2887	{
2888	const struct iommu_device *iommu = iommu_from_fwnode(fwnode: iommu_fwnode);
2889	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2890
2891	if (!iommu)
2892	return driver_deferred_probe_check_state(dev);
2893	if (!dev->iommu && !READ_ONCE(iommu->ready))
2894	return -EPROBE_DEFER;
2895
2896	if (fwspec)
2897	return iommu->ops == iommu_fwspec_ops(fwspec) ? `0` : -EINVAL;
2898
2899	if (!dev_iommu_get(dev))
2900	return -ENOMEM;
2901
2902	/ Preallocate for the overwhelmingly common case of 1 ID /
2903	fwspec = kzalloc(struct_size(fwspec, ids, `1`), GFP_KERNEL);
2904	if (!fwspec)
2905	return -ENOMEM;
2906
2907	fwnode_handle_get(fwnode: iommu_fwnode);
2908	fwspec->iommu_fwnode = iommu_fwnode;
2909	dev_iommu_fwspec_set(dev, fwspec);
2910	return `0`;
2911	}
2912	EXPORT_SYMBOL_GPL(iommu_fwspec_init);
2913
2914	void iommu_fwspec_free(struct device *dev)
2915	{
2916	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2917
2918	if (fwspec) {
2919	fwnode_handle_put(fwnode: fwspec->iommu_fwnode);
2920	kfree(objp: fwspec);
2921	dev_iommu_fwspec_set(dev, NULL);
2922	}
2923	}
2924
2925	int iommu_fwspec_add_ids(struct device dev, const* u32 ids, int* num_ids)
2926	{
2927	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2928	int i, new_num;
2929
2930	if (!fwspec)
2931	return -EINVAL;
2932
2933	new_num = fwspec->num_ids + num_ids;
2934	if (new_num > `1`) {
2935	fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
2936	GFP_KERNEL);
2937	if (!fwspec)
2938	return -ENOMEM;
2939
2940	dev_iommu_fwspec_set(dev, fwspec);
2941	}
2942
2943	for (i = `0`; i < num_ids; i++)
2944	fwspec->ids[fwspec->num_ids + i] = ids[i];
2945
2946	fwspec->num_ids = new_num;
2947	return `0`;
2948	}
2949	EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
2950
2951	/**
2952	* iommu_setup_default_domain - Set the default_domain for the group
2953	* @group: Group to change
2954	* @target_type: Domain type to set as the default_domain
2955	*
2956	* Allocate a default domain and set it as the current domain on the group. If
2957	* the group already has a default domain it will be changed to the target_type.
2958	* When target_type is 0 the default domain is selected based on driver and
2959	* system preferences.
2960	*/
2961	static int iommu_setup_default_domain(struct iommu_group *group,
2962	int target_type)
2963	{
2964	struct iommu_domain *old_dom = group->default_domain;
2965	struct group_device *gdev;
2966	struct iommu_domain *dom;
2967	bool direct_failed;
2968	int req_type;
2969	int ret;
2970
2971	lockdep_assert_held(&group->mutex);
2972
2973	req_type = iommu_get_default_domain_type(group, target_type);
2974	if (req_type < `0`)
2975	return -EINVAL;
2976
2977	dom = iommu_group_alloc_default_domain(group, req_type);
2978	if (IS_ERR(ptr: dom))
2979	return PTR_ERR(ptr: dom);
2980
2981	if (group->default_domain == dom)
2982	return `0`;
2983
2984	if (iommu_is_dma_domain(domain: dom)) {
2985	ret = iommu_get_dma_cookie(domain: dom);
2986	if (ret) {
2987	iommu_domain_free(dom);
2988	return ret;
2989	}
2990	}
2991
2992	/*
2993	* IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
2994	* mapped before their device is attached, in order to guarantee
2995	* continuity with any FW activity
2996	*/
2997	direct_failed = false;
2998	for_each_group_device(group, gdev) {
2999	if (iommu_create_device_direct_mappings(domain: dom, dev: gdev->dev)) {
3000	direct_failed = true;
3001	dev_warn_once(
3002	gdev->dev->iommu->iommu_dev->dev,
3003	"IOMMU driver was not able to establish FW requested direct mapping.");
3004	}
3005	}
3006
3007	/ We must set default_domain early for __iommu_device_set_domain /
3008	group->default_domain = dom;
3009	if (!group->domain) {
3010	/*
3011	* Drivers are not allowed to fail the first domain attach.
3012	* The only way to recover from this is to fail attaching the
3013	* iommu driver and call ops->release_device. Put the domain
3014	* in group->default_domain so it is freed after.
3015	*/
3016	ret = __iommu_group_set_domain_internal(
3017	group, new_domain: dom, flags: IOMMU_SET_DOMAIN_MUST_SUCCEED);
3018	if (WARN_ON(ret))
3019	goto out_free_old;
3020	} else {
3021	ret = __iommu_group_set_domain(group, new_domain: dom);
3022	if (ret)
3023	goto err_restore_def_domain;
3024	}
3025
3026	/*
3027	* Drivers are supposed to allow mappings to be installed in a domain
3028	* before device attachment, but some don't. Hack around this defect by
3029	* trying again after attaching. If this happens it means the device
3030	* will not continuously have the IOMMU_RESV_DIRECT map.
3031	*/
3032	if (direct_failed) {
3033	for_each_group_device(group, gdev) {
3034	ret = iommu_create_device_direct_mappings(domain: dom, dev: gdev->dev);
3035	if (ret)
3036	goto err_restore_domain;
3037	}
3038	}
3039
3040	out_free_old:
3041	if (old_dom)
3042	iommu_domain_free(old_dom);
3043	return ret;
3044
3045	err_restore_domain:
3046	if (old_dom)
3047	__iommu_group_set_domain_internal(
3048	group, new_domain: old_dom, flags: IOMMU_SET_DOMAIN_MUST_SUCCEED);
3049	err_restore_def_domain:
3050	if (old_dom) {
3051	iommu_domain_free(dom);
3052	group->default_domain = old_dom;
3053	}
3054	return ret;
3055	}
3056
3057	/*
3058	* Changing the default domain through sysfs requires the users to unbind the
3059	* drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
3060	* transition. Return failure if this isn't met.
3061	*
3062	* We need to consider the race between this and the device release path.
3063	* group->mutex is used here to guarantee that the device release path
3064	* will not be entered at the same time.
3065	*/
3066	static ssize_t iommu_group_store_type(struct iommu_group *group,
3067	const char *buf, size_t count)
3068	{
3069	struct group_device *gdev;
3070	int ret, req_type;
3071
3072	if (!capable(CAP_SYS_ADMIN) \|\| !capable(CAP_SYS_RAWIO))
3073	return -EACCES;
3074
3075	if (WARN_ON(!group) \|\| !group->default_domain)
3076	return -EINVAL;
3077
3078	if (sysfs_streq(s1: buf, s2: "identity"))
3079	req_type = IOMMU_DOMAIN_IDENTITY;
3080	else if (sysfs_streq(s1: buf, s2: "DMA"))
3081	req_type = IOMMU_DOMAIN_DMA;
3082	else if (sysfs_streq(s1: buf, s2: "DMA-FQ"))
3083	req_type = IOMMU_DOMAIN_DMA_FQ;
3084	else if (sysfs_streq(s1: buf, s2: "auto"))
3085	req_type = `0`;
3086	else
3087	return -EINVAL;
3088
3089	mutex_lock(&group->mutex);
3090	/ We can bring up a flush queue without tearing down the domain. /
3091	if (req_type == IOMMU_DOMAIN_DMA_FQ &&
3092	group->default_domain->type == IOMMU_DOMAIN_DMA) {
3093	ret = iommu_dma_init_fq(domain: group->default_domain);
3094	if (ret)
3095	goto out_unlock;
3096
3097	group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
3098	ret = count;
3099	goto out_unlock;
3100	}
3101
3102	/ Otherwise, ensure that device exists and no driver is bound. /
3103	if (list_empty(head: &group->devices) \|\| group->owner_cnt) {
3104	ret = -EPERM;
3105	goto out_unlock;
3106	}
3107
3108	ret = iommu_setup_default_domain(group, target_type: req_type);
3109	if (ret)
3110	goto out_unlock;
3111
3112	/ Make sure dma_ops is appropriatley set /
3113	for_each_group_device(group, gdev)
3114	iommu_setup_dma_ops(dev: gdev->dev);
3115
3116	out_unlock:
3117	mutex_unlock(lock: &group->mutex);
3118	return ret ?: count;
3119	}
3120
3121	/**
3122	* iommu_device_use_default_domain() - Device driver wants to handle device
3123	* DMA through the kernel DMA API.
3124	* @dev: The device.
3125	*
3126	* The device driver about to bind @dev wants to do DMA through the kernel
3127	* DMA API. Return 0 if it is allowed, otherwise an error.
3128	*/
3129	int iommu_device_use_default_domain(struct device *dev)
3130	{
3131	/ Caller is the driver core during the pre-probe path /
3132	struct iommu_group *group = dev->iommu_group;
3133	int ret = `0`;
3134
3135	if (!group)
3136	return `0`;
3137
3138	mutex_lock(&group->mutex);
3139	/ We may race against bus_iommu_probe() finalising groups here /
3140	if (!group->default_domain) {
3141	ret = -EPROBE_DEFER;
3142	goto unlock_out;
3143	}
3144	if (group->owner_cnt) {
3145	if (group->domain != group->default_domain \|\| group->owner \|\|
3146	!xa_empty(xa: &group->pasid_array)) {
3147	ret = -EBUSY;
3148	goto unlock_out;
3149	}
3150	}
3151
3152	group->owner_cnt++;
3153
3154	unlock_out:
3155	mutex_unlock(lock: &group->mutex);
3156	return ret;
3157	}
3158
3159	/**
3160	* iommu_device_unuse_default_domain() - Device driver stops handling device
3161	* DMA through the kernel DMA API.
3162	* @dev: The device.
3163	*
3164	* The device driver doesn't want to do DMA through kernel DMA API anymore.
3165	* It must be called after iommu_device_use_default_domain().
3166	*/
3167	void iommu_device_unuse_default_domain(struct device *dev)
3168	{
3169	/ Caller is the driver core during the post-probe path /
3170	struct iommu_group *group = dev->iommu_group;
3171
3172	if (!group)
3173	return;
3174
3175	mutex_lock(&group->mutex);
3176	if (!WARN_ON(!group->owner_cnt \|\| !xa_empty(&group->pasid_array)))
3177	group->owner_cnt--;
3178
3179	mutex_unlock(lock: &group->mutex);
3180	}
3181
3182	static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
3183	{
3184	struct device *dev = iommu_group_first_dev(group);
3185	const struct iommu_ops *ops = dev_iommu_ops(dev);
3186	struct iommu_domain *domain;
3187
3188	if (group->blocking_domain)
3189	return `0`;
3190
3191	if (ops->blocked_domain) {
3192	group->blocking_domain = ops->blocked_domain;
3193	return `0`;
3194	}
3195
3196	/*
3197	* For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an
3198	* empty PAGING domain instead.
3199	*/
3200	domain = iommu_paging_domain_alloc(dev);
3201	if (IS_ERR(ptr: domain))
3202	return PTR_ERR(ptr: domain);
3203	group->blocking_domain = domain;
3204	return `0`;
3205	}
3206
3207	static int __iommu_take_dma_ownership(struct iommu_group group, void* *owner)
3208	{
3209	int ret;
3210
3211	if ((group->domain && group->domain != group->default_domain) \|\|
3212	!xa_empty(xa: &group->pasid_array))
3213	return -EBUSY;
3214
3215	ret = __iommu_group_alloc_blocking_domain(group);
3216	if (ret)
3217	return ret;
3218	ret = __iommu_group_set_domain(group, new_domain: group->blocking_domain);
3219	if (ret)
3220	return ret;
3221
3222	group->owner = owner;
3223	group->owner_cnt++;
3224	return `0`;
3225	}
3226
3227	/**
3228	* iommu_group_claim_dma_owner() - Set DMA ownership of a group
3229	* @group: The group.
3230	* @owner: Caller specified pointer. Used for exclusive ownership.
3231	*
3232	* This is to support backward compatibility for vfio which manages the dma
3233	* ownership in iommu_group level. New invocations on this interface should be
3234	* prohibited. Only a single owner may exist for a group.
3235	*/
3236	int iommu_group_claim_dma_owner(struct iommu_group group, void* *owner)
3237	{
3238	int ret = `0`;
3239
3240	if (WARN_ON(!owner))
3241	return -EINVAL;
3242
3243	mutex_lock(&group->mutex);
3244	if (group->owner_cnt) {
3245	ret = -EPERM;
3246	goto unlock_out;
3247	}
3248
3249	ret = __iommu_take_dma_ownership(group, owner);
3250	unlock_out:
3251	mutex_unlock(lock: &group->mutex);
3252
3253	return ret;
3254	}
3255	EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
3256
3257	/**
3258	* iommu_device_claim_dma_owner() - Set DMA ownership of a device
3259	* @dev: The device.
3260	* @owner: Caller specified pointer. Used for exclusive ownership.
3261	*
3262	* Claim the DMA ownership of a device. Multiple devices in the same group may
3263	* concurrently claim ownership if they present the same owner value. Returns 0
3264	* on success and error code on failure
3265	*/
3266	int iommu_device_claim_dma_owner(struct device dev, void* *owner)
3267	{
3268	/ Caller must be a probed driver on dev /
3269	struct iommu_group *group = dev->iommu_group;
3270	int ret = `0`;
3271
3272	if (WARN_ON(!owner))
3273	return -EINVAL;
3274
3275	if (!group)
3276	return -ENODEV;
3277
3278	mutex_lock(&group->mutex);
3279	if (group->owner_cnt) {
3280	if (group->owner != owner) {
3281	ret = -EPERM;
3282	goto unlock_out;
3283	}
3284	group->owner_cnt++;
3285	goto unlock_out;
3286	}
3287
3288	ret = __iommu_take_dma_ownership(group, owner);
3289	unlock_out:
3290	mutex_unlock(lock: &group->mutex);
3291	return ret;
3292	}
3293	EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
3294
3295	static void __iommu_release_dma_ownership(struct iommu_group *group)
3296	{
3297	if (WARN_ON(!group->owner_cnt \|\| !group->owner \|\|
3298	!xa_empty(&group->pasid_array)))
3299	return;
3300
3301	group->owner_cnt = `0`;
3302	group->owner = NULL;
3303	__iommu_group_set_domain_nofail(group, new_domain: group->default_domain);
3304	}
3305
3306	/**
3307	* iommu_group_release_dma_owner() - Release DMA ownership of a group
3308	* @group: The group
3309	*
3310	* Release the DMA ownership claimed by iommu_group_claim_dma_owner().
3311	*/
3312	void iommu_group_release_dma_owner(struct iommu_group *group)
3313	{
3314	mutex_lock(&group->mutex);
3315	__iommu_release_dma_ownership(group);
3316	mutex_unlock(lock: &group->mutex);
3317	}
3318	EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
3319
3320	/**
3321	* iommu_device_release_dma_owner() - Release DMA ownership of a device
3322	* @dev: The device.
3323	*
3324	* Release the DMA ownership claimed by iommu_device_claim_dma_owner().
3325	*/
3326	void iommu_device_release_dma_owner(struct device *dev)
3327	{
3328	/ Caller must be a probed driver on dev /
3329	struct iommu_group *group = dev->iommu_group;
3330
3331	mutex_lock(&group->mutex);
3332	if (group->owner_cnt > `1`)
3333	group->owner_cnt--;
3334	else
3335	__iommu_release_dma_ownership(group);
3336	mutex_unlock(lock: &group->mutex);
3337	}
3338	EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
3339
3340	/**
3341	* iommu_group_dma_owner_claimed() - Query group dma ownership status
3342	* @group: The group.
3343	*
3344	* This provides status query on a given group. It is racy and only for
3345	* non-binding status reporting.
3346	*/
3347	bool iommu_group_dma_owner_claimed(struct iommu_group *group)
3348	{
3349	unsigned int user;
3350
3351	mutex_lock(&group->mutex);
3352	user = group->owner_cnt;
3353	mutex_unlock(lock: &group->mutex);
3354
3355	return user;
3356	}
3357	EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
3358
3359	static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
3360	struct iommu_domain *domain)
3361	{
3362	const struct iommu_ops *ops = dev_iommu_ops(dev);
3363	struct iommu_domain *blocked_domain = ops->blocked_domain;
3364
3365	WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain,
3366	dev, pasid, domain));
3367	}
3368
3369	static int __iommu_set_group_pasid(struct iommu_domain *domain,
3370	struct iommu_group *group, ioasid_t pasid,
3371	struct iommu_domain *old)
3372	{
3373	struct group_device device, last_gdev;
3374	int ret;
3375
3376	for_each_group_device(group, device) {
3377	if (device->dev->iommu->max_pasids > `0`) {
3378	ret = domain->ops->set_dev_pasid(domain, device->dev,
3379	pasid, old);
3380	if (ret)
3381	goto err_revert;
3382	}
3383	}
3384
3385	return `0`;
3386
3387	err_revert:
3388	last_gdev = device;
3389	for_each_group_device(group, device) {
3390	if (device == last_gdev)
3391	break;
3392	if (device->dev->iommu->max_pasids > `0`) {
3393	/*
3394	* If no old domain, undo the succeeded devices/pasid.
3395	* Otherwise, rollback the succeeded devices/pasid to
3396	* the old domain. And it is a driver bug to fail
3397	* attaching with a previously good domain.
3398	*/
3399	if (!old \|\|
3400	WARN_ON(old->ops->set_dev_pasid(old, device->dev,
3401	pasid, domain)))
3402	iommu_remove_dev_pasid(dev: device->dev, pasid, domain);
3403	}
3404	}
3405	return ret;
3406	}
3407
3408	static void __iommu_remove_group_pasid(struct iommu_group *group,
3409	ioasid_t pasid,
3410	struct iommu_domain *domain)
3411	{
3412	struct group_device *device;
3413
3414	for_each_group_device(group, device) {
3415	if (device->dev->iommu->max_pasids > `0`)
3416	iommu_remove_dev_pasid(dev: device->dev, pasid, domain);
3417	}
3418	}
3419
3420	/*
3421	* iommu_attach_device_pasid() - Attach a domain to pasid of device
3422	* @domain: the iommu domain.
3423	* @dev: the attached device.
3424	* @pasid: the pasid of the device.
3425	* @handle: the attach handle.
3426	*
3427	* Caller should always provide a new handle to avoid race with the paths
3428	* that have lockless reference to handle if it intends to pass a valid handle.
3429	*
3430	* Return: 0 on success, or an error.
3431	*/
3432	int iommu_attach_device_pasid(struct iommu_domain *domain,
3433	struct device *dev, ioasid_t pasid,
3434	struct iommu_attach_handle *handle)
3435	{
3436	/ Caller must be a probed driver on dev /
3437	struct iommu_group *group = dev->iommu_group;
3438	struct group_device *device;
3439	const struct iommu_ops *ops;
3440	void *entry;
3441	int ret;
3442
3443	if (!group)
3444	return -ENODEV;
3445
3446	ops = dev_iommu_ops(dev);
3447
3448	if (!domain->ops->set_dev_pasid \|\|
3449	!ops->blocked_domain \|\|
3450	!ops->blocked_domain->ops->set_dev_pasid)
3451	return -EOPNOTSUPP;
3452
3453	if (!domain_iommu_ops_compatible(ops, domain) \|\|
3454	pasid == IOMMU_NO_PASID)
3455	return -EINVAL;
3456
3457	mutex_lock(&group->mutex);
3458	for_each_group_device(group, device) {
3459	/*
3460	* Skip PASID validation for devices without PASID support
3461	* (max_pasids = 0). These devices cannot issue transactions
3462	* with PASID, so they don't affect group's PASID usage.
3463	*/
3464	if ((device->dev->iommu->max_pasids > `0`) &&
3465	(pasid >= device->dev->iommu->max_pasids)) {
3466	ret = -EINVAL;
3467	goto out_unlock;
3468	}
3469	}
3470
3471	entry = iommu_make_pasid_array_entry(domain, handle);
3472
3473	/*
3474	* Entry present is a failure case. Use xa_insert() instead of
3475	* xa_reserve().
3476	*/
3477	ret = xa_insert(xa: &group->pasid_array, index: pasid, XA_ZERO_ENTRY, GFP_KERNEL);
3478	if (ret)
3479	goto out_unlock;
3480
3481	ret = __iommu_set_group_pasid(domain, group, pasid, NULL);
3482	if (ret) {
3483	xa_release(xa: &group->pasid_array, index: pasid);
3484	goto out_unlock;
3485	}
3486
3487	/*
3488	* The xa_insert() above reserved the memory, and the group->mutex is
3489	* held, this cannot fail. The new domain cannot be visible until the
3490	* operation succeeds as we cannot tolerate PRIs becoming concurrently
3491	* queued and then failing attach.
3492	*/
3493	WARN_ON(xa_is_err(xa_store(&group->pasid_array,
3494	pasid, entry, GFP_KERNEL)));
3495
3496	out_unlock:
3497	mutex_unlock(lock: &group->mutex);
3498	return ret;
3499	}
3500	EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
3501
3502	/**
3503	* iommu_replace_device_pasid - Replace the domain that a specific pasid
3504	* of the device is attached to
3505	* @domain: the new iommu domain
3506	* @dev: the attached device.
3507	* @pasid: the pasid of the device.
3508	* @handle: the attach handle.
3509	*
3510	* This API allows the pasid to switch domains. The @pasid should have been
3511	* attached. Otherwise, this fails. The pasid will keep the old configuration
3512	* if replacement failed.
3513	*
3514	* Caller should always provide a new handle to avoid race with the paths
3515	* that have lockless reference to handle if it intends to pass a valid handle.
3516	*
3517	* Return 0 on success, or an error.
3518	*/
3519	int iommu_replace_device_pasid(struct iommu_domain *domain,
3520	struct device *dev, ioasid_t pasid,
3521	struct iommu_attach_handle *handle)
3522	{
3523	/ Caller must be a probed driver on dev /
3524	struct iommu_group *group = dev->iommu_group;
3525	struct iommu_attach_handle *entry;
3526	struct iommu_domain *curr_domain;
3527	void *curr;
3528	int ret;
3529
3530	if (!group)
3531	return -ENODEV;
3532
3533	if (!domain->ops->set_dev_pasid)
3534	return -EOPNOTSUPP;
3535
3536	if (!domain_iommu_ops_compatible(ops: dev_iommu_ops(dev), domain) \|\|
3537	pasid == IOMMU_NO_PASID \|\| !handle)
3538	return -EINVAL;
3539
3540	mutex_lock(&group->mutex);
3541	entry = iommu_make_pasid_array_entry(domain, handle);
3542	curr = xa_cmpxchg(xa: &group->pasid_array, index: pasid, NULL,
3543	XA_ZERO_ENTRY, GFP_KERNEL);
3544	if (xa_is_err(entry: curr)) {
3545	ret = xa_err(entry: curr);
3546	goto out_unlock;
3547	}
3548
3549	/*
3550	* No domain (with or without handle) attached, hence not
3551	* a replace case.
3552	*/
3553	if (!curr) {
3554	xa_release(xa: &group->pasid_array, index: pasid);
3555	ret = -EINVAL;
3556	goto out_unlock;
3557	}
3558
3559	/*
3560	* Reusing handle is problematic as there are paths that refers
3561	* the handle without lock. To avoid race, reject the callers that
3562	* attempt it.
3563	*/
3564	if (curr == entry) {
3565	WARN_ON(`1`);
3566	ret = -EINVAL;
3567	goto out_unlock;
3568	}
3569
3570	curr_domain = pasid_array_entry_to_domain(entry: curr);
3571	ret = `0`;
3572
3573	if (curr_domain != domain) {
3574	ret = __iommu_set_group_pasid(domain, group,
3575	pasid, old: curr_domain);
3576	if (ret)
3577	goto out_unlock;
3578	}
3579
3580	/*
3581	* The above xa_cmpxchg() reserved the memory, and the
3582	* group->mutex is held, this cannot fail.
3583	*/
3584	WARN_ON(xa_is_err(xa_store(&group->pasid_array,
3585	pasid, entry, GFP_KERNEL)));
3586
3587	out_unlock:
3588	mutex_unlock(lock: &group->mutex);
3589	return ret;
3590	}
3591	EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL");
3592
3593	/*
3594	* iommu_detach_device_pasid() - Detach the domain from pasid of device
3595	* @domain: the iommu domain.
3596	* @dev: the attached device.
3597	* @pasid: the pasid of the device.
3598	*
3599	* The @domain must have been attached to @pasid of the @dev with
3600	* iommu_attach_device_pasid().
3601	*/
3602	void iommu_detach_device_pasid(struct iommu_domain domain, struct* device *dev,
3603	ioasid_t pasid)
3604	{
3605	/ Caller must be a probed driver on dev /
3606	struct iommu_group *group = dev->iommu_group;
3607
3608	mutex_lock(&group->mutex);
3609	__iommu_remove_group_pasid(group, pasid, domain);
3610	xa_erase(&group->pasid_array, index: pasid);
3611	mutex_unlock(lock: &group->mutex);
3612	}
3613	EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
3614
3615	ioasid_t iommu_alloc_global_pasid(struct device *dev)
3616	{
3617	int ret;
3618
3619	/ max_pasids == 0 means that the device does not support PASID /
3620	if (!dev->iommu->max_pasids)
3621	return IOMMU_PASID_INVALID;
3622
3623	/*
3624	* max_pasids is set up by vendor driver based on number of PASID bits
3625	* supported but the IDA allocation is inclusive.
3626	*/
3627	ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID,
3628	max: dev->iommu->max_pasids - `1`, GFP_KERNEL);
3629	return ret < `0` ? IOMMU_PASID_INVALID : ret;
3630	}
3631	EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid);
3632
3633	void iommu_free_global_pasid(ioasid_t pasid)
3634	{
3635	if (WARN_ON(pasid == IOMMU_PASID_INVALID))
3636	return;
3637
3638	ida_free(&iommu_global_pasid_ida, id: pasid);
3639	}
3640	EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
3641
3642	/**
3643	* iommu_attach_handle_get - Return the attach handle
3644	* @group: the iommu group that domain was attached to
3645	* @pasid: the pasid within the group
3646	* @type: matched domain type, 0 for any match
3647	*
3648	* Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch.
3649	*
3650	* Return the attach handle to the caller. The life cycle of an iommu attach
3651	* handle is from the time when the domain is attached to the time when the
3652	* domain is detached. Callers are required to synchronize the call of
3653	* iommu_attach_handle_get() with domain attachment and detachment. The attach
3654	* handle can only be used during its life cycle.
3655	*/
3656	struct iommu_attach_handle *
3657	iommu_attach_handle_get(struct iommu_group group, ioasid_t pasid, unsigned* int type)
3658	{
3659	struct iommu_attach_handle *handle;
3660	void *entry;
3661
3662	xa_lock(&group->pasid_array);
3663	entry = xa_load(&group->pasid_array, index: pasid);
3664	if (!entry \|\| xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) {
3665	handle = ERR_PTR(error: -ENOENT);
3666	} else {
3667	handle = xa_untag_pointer(entry);
3668	if (type && handle->domain->type != type)
3669	handle = ERR_PTR(error: -EBUSY);
3670	}
3671	xa_unlock(&group->pasid_array);
3672
3673	return handle;
3674	}
3675	EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL");
3676
3677	/**
3678	* iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group
3679	* @domain: IOMMU domain to attach
3680	* @group: IOMMU group that will be attached
3681	* @handle: attach handle
3682	*
3683	* Returns 0 on success and error code on failure.
3684	*
3685	* This is a variant of iommu_attach_group(). It allows the caller to provide
3686	* an attach handle and use it when the domain is attached. This is currently
3687	* used by IOMMUFD to deliver the I/O page faults.
3688	*
3689	* Caller should always provide a new handle to avoid race with the paths
3690	* that have lockless reference to handle.
3691	*/
3692	int iommu_attach_group_handle(struct iommu_domain *domain,
3693	struct iommu_group *group,
3694	struct iommu_attach_handle *handle)
3695	{
3696	void *entry;
3697	int ret;
3698
3699	if (!handle)
3700	return -EINVAL;
3701
3702	mutex_lock(&group->mutex);
3703	entry = iommu_make_pasid_array_entry(domain, handle);
3704	ret = xa_insert(xa: &group->pasid_array,
3705	IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL);
3706	if (ret)
3707	goto out_unlock;
3708
3709	ret = __iommu_attach_group(domain, group);
3710	if (ret) {
3711	xa_release(xa: &group->pasid_array, IOMMU_NO_PASID);
3712	goto out_unlock;
3713	}
3714
3715	/*
3716	* The xa_insert() above reserved the memory, and the group->mutex is
3717	* held, this cannot fail. The new domain cannot be visible until the
3718	* operation succeeds as we cannot tolerate PRIs becoming concurrently
3719	* queued and then failing attach.
3720	*/
3721	WARN_ON(xa_is_err(xa_store(&group->pasid_array,
3722	IOMMU_NO_PASID, entry, GFP_KERNEL)));
3723
3724	out_unlock:
3725	mutex_unlock(lock: &group->mutex);
3726	return ret;
3727	}
3728	EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL");
3729
3730	/**
3731	* iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group
3732	* @domain: IOMMU domain to attach
3733	* @group: IOMMU group that will be attached
3734	*
3735	* Detach the specified IOMMU domain from the specified IOMMU group.
3736	* It must be used in conjunction with iommu_attach_group_handle().
3737	*/
3738	void iommu_detach_group_handle(struct iommu_domain *domain,
3739	struct iommu_group *group)
3740	{
3741	mutex_lock(&group->mutex);
3742	__iommu_group_set_core_domain(group);
3743	xa_erase(&group->pasid_array, IOMMU_NO_PASID);
3744	mutex_unlock(lock: &group->mutex);
3745	}
3746	EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL");
3747
3748	/**
3749	* iommu_replace_group_handle - replace the domain that a group is attached to
3750	* @group: IOMMU group that will be attached to the new domain
3751	* @new_domain: new IOMMU domain to replace with
3752	* @handle: attach handle
3753	*
3754	* This API allows the group to switch domains without being forced to go to
3755	* the blocking domain in-between. It allows the caller to provide an attach
3756	* handle for the new domain and use it when the domain is attached.
3757	*
3758	* If the currently attached domain is a core domain (e.g. a default_domain),
3759	* it will act just like the iommu_attach_group_handle().
3760	*
3761	* Caller should always provide a new handle to avoid race with the paths
3762	* that have lockless reference to handle.
3763	*/
3764	int iommu_replace_group_handle(struct iommu_group *group,
3765	struct iommu_domain *new_domain,
3766	struct iommu_attach_handle *handle)
3767	{
3768	void curr, entry;
3769	int ret;
3770
3771	if (!new_domain \|\| !handle)
3772	return -EINVAL;
3773
3774	mutex_lock(&group->mutex);
3775	entry = iommu_make_pasid_array_entry(domain: new_domain, handle);
3776	ret = xa_reserve(xa: &group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
3777	if (ret)
3778	goto err_unlock;
3779
3780	ret = __iommu_group_set_domain(group, new_domain);
3781	if (ret)
3782	goto err_release;
3783
3784	curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL);
3785	WARN_ON(xa_is_err(curr));
3786
3787	mutex_unlock(lock: &group->mutex);
3788
3789	return `0`;
3790	err_release:
3791	xa_release(xa: &group->pasid_array, IOMMU_NO_PASID);
3792	err_unlock:
3793	mutex_unlock(lock: &group->mutex);
3794	return ret;
3795	}
3796	EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
3797
3798	#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
3799	/**
3800	* iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
3801	* @desc: MSI descriptor, will store the MSI page
3802	* @msi_addr: MSI target address to be mapped
3803	*
3804	* The implementation of sw_msi() should take msi_addr and map it to
3805	* an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
3806	* mapping information.
3807	*
3808	* Return: 0 on success or negative error code if the mapping failed.
3809	*/
3810	int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
3811	{
3812	struct device *dev = msi_desc_to_dev(desc);
3813	struct iommu_group *group = dev->iommu_group;
3814	int ret = `0`;
3815
3816	if (!group)
3817	return `0`;
3818
3819	mutex_lock(&group->mutex);
3820	/ An IDENTITY domain must pass through /
3821	if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) {
3822	switch (group->domain->cookie_type) {
3823	case IOMMU_COOKIE_DMA_MSI:
3824	case IOMMU_COOKIE_DMA_IOVA:
3825	ret = iommu_dma_sw_msi(group->domain, desc, msi_addr);
3826	break;
3827	case IOMMU_COOKIE_IOMMUFD:
3828	ret = iommufd_sw_msi(group->domain, desc, msi_addr);
3829	break;
3830	default:
3831	ret = -EOPNOTSUPP;
3832	break;
3833	}
3834	}
3835	mutex_unlock(&group->mutex);
3836	return ret;
3837	}
3838	#endif /* CONFIG_IRQ_MSI_IOMMU */
3839

source code of linux/drivers/iommu/iommu.c