1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2025 Ant Group
4 * Author: Tiwei Bie <tiwei.btw@antgroup.com>
5 */
6#include <errno.h>
7#include <fcntl.h>
8#include <unistd.h>
9#include <stdio.h>
10#include <stdint.h>
11#include <stdlib.h>
12#include <string.h>
13#include <sys/ioctl.h>
14#include <sys/eventfd.h>
15#include <linux/limits.h>
16#include <linux/vfio.h>
17#include <linux/pci_regs.h>
18#include <as-layout.h>
19#include <um_malloc.h>
20
21#include "vfio_user.h"
22
23int uml_vfio_user_open_container(void)
24{
25 int r, fd;
26
27 fd = open("/dev/vfio/vfio", O_RDWR);
28 if (fd < 0)
29 return -errno;
30
31 r = ioctl(fd, VFIO_GET_API_VERSION);
32 if (r != VFIO_API_VERSION) {
33 r = r < 0 ? -errno : -EINVAL;
34 goto error;
35 }
36
37 r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
38 if (r <= 0) {
39 r = r < 0 ? -errno : -EINVAL;
40 goto error;
41 }
42
43 return fd;
44
45error:
46 close(fd);
47 return r;
48}
49
50int uml_vfio_user_setup_iommu(int container)
51{
52 /*
53 * This is a bit tricky. See the big comment in
54 * vhost_user_set_mem_table() in virtio_uml.c.
55 */
56 unsigned long reserved = uml_reserved - uml_physmem;
57 struct vfio_iommu_type1_dma_map dma_map = {
58 .argsz = sizeof(dma_map),
59 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
60 .vaddr = uml_reserved,
61 .iova = reserved,
62 .size = physmem_size - reserved,
63 };
64
65 if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
66 return -errno;
67
68 if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
69 return -errno;
70
71 return 0;
72}
73
74int uml_vfio_user_get_group_id(const char *device)
75{
76 char *path, *buf, *end;
77 const char *name;
78 int r;
79
80 path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
81 if (!path)
82 return -ENOMEM;
83
84 sprintf(buf: path, fmt: "/sys/bus/pci/devices/%s/iommu_group", device);
85
86 buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
87 if (!buf) {
88 r = -ENOMEM;
89 goto free_path;
90 }
91
92 r = readlink(path, buf, PATH_MAX);
93 if (r < 0) {
94 r = -errno;
95 goto free_buf;
96 }
97 buf[r] = '\0';
98
99 name = basename(buf);
100
101 r = strtoul(name, &end, 10);
102 if (*end != '\0' || end == name) {
103 r = -EINVAL;
104 goto free_buf;
105 }
106
107free_buf:
108 kfree(objp: buf);
109free_path:
110 kfree(objp: path);
111 return r;
112}
113
114int uml_vfio_user_open_group(int group_id)
115{
116 char *path;
117 int fd;
118
119 path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
120 if (!path)
121 return -ENOMEM;
122
123 sprintf(buf: path, fmt: "/dev/vfio/%d", group_id);
124
125 fd = open(path, O_RDWR);
126 if (fd < 0) {
127 fd = -errno;
128 goto out;
129 }
130
131out:
132 kfree(objp: path);
133 return fd;
134}
135
136int uml_vfio_user_set_container(int container, int group)
137{
138 if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
139 return -errno;
140 return 0;
141}
142
143int uml_vfio_user_unset_container(int container, int group)
144{
145 if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
146 return -errno;
147 return 0;
148}
149
150static int vfio_set_irqs(int device, int start, int count, int *irqfd)
151{
152 struct vfio_irq_set *irq_set;
153 int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
154 int err = 0;
155
156 irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
157 if (!irq_set)
158 return -ENOMEM;
159
160 irq_set->argsz = argsz;
161 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
162 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
163 irq_set->start = start;
164 irq_set->count = count;
165 memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
166
167 if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
168 err = -errno;
169 goto out;
170 }
171
172out:
173 kfree(objp: irq_set);
174 return err;
175}
176
177int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
178 int group, const char *device)
179{
180 struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
181 struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
182 int err, i;
183
184 dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
185 if (dev->device < 0)
186 return -errno;
187
188 if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
189 err = -errno;
190 goto close_device;
191 }
192
193 dev->num_regions = device_info.num_regions;
194 if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
195 dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
196
197 dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
198 UM_GFP_KERNEL);
199 if (!dev->region) {
200 err = -ENOMEM;
201 goto close_device;
202 }
203
204 for (i = 0; i < dev->num_regions; i++) {
205 struct vfio_region_info region = {
206 .argsz = sizeof(region),
207 .index = i,
208 };
209 if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, &region) < 0) {
210 err = -errno;
211 goto free_region;
212 }
213 dev->region[i].size = region.size;
214 dev->region[i].offset = region.offset;
215 }
216
217 /* Only MSI-X is supported currently. */
218 irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
219 if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
220 err = -errno;
221 goto free_region;
222 }
223
224 dev->irq_count = irq_info.count;
225
226 dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
227 if (!dev->irqfd) {
228 err = -ENOMEM;
229 goto free_region;
230 }
231
232 memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
233
234 err = vfio_set_irqs(device: dev->device, start: 0, count: dev->irq_count, irqfd: dev->irqfd);
235 if (err)
236 goto free_irqfd;
237
238 return 0;
239
240free_irqfd:
241 kfree(objp: dev->irqfd);
242free_region:
243 kfree(objp: dev->region);
244close_device:
245 close(dev->device);
246 return err;
247}
248
249void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
250{
251 kfree(objp: dev->irqfd);
252 kfree(objp: dev->region);
253 close(dev->device);
254}
255
256int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
257{
258 int irqfd;
259
260 irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
261 if (irqfd < 0)
262 return -errno;
263
264 dev->irqfd[index] = irqfd;
265 return irqfd;
266}
267
268void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
269{
270 close(dev->irqfd[index]);
271 dev->irqfd[index] = -1;
272}
273
274int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
275{
276 return vfio_set_irqs(device: dev->device, start: 0, count: dev->irq_count, irqfd: dev->irqfd);
277}
278
279static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
280 uint64_t offset, void *buf, uint64_t size)
281{
282 if (index >= dev->num_regions || offset + size > dev->region[index].size)
283 return -EINVAL;
284
285 if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
286 return -errno;
287
288 return 0;
289}
290
291static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
292 uint64_t offset, const void *buf, uint64_t size)
293{
294 if (index >= dev->num_regions || offset + size > dev->region[index].size)
295 return -EINVAL;
296
297 if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
298 return -errno;
299
300 return 0;
301}
302
303int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
304 unsigned int offset, void *buf, int size)
305{
306 return vfio_region_read(dev, index: VFIO_PCI_CONFIG_REGION_INDEX,
307 offset, buf, size);
308}
309
310int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
311 unsigned int offset, const void *buf, int size)
312{
313 return vfio_region_write(dev, index: VFIO_PCI_CONFIG_REGION_INDEX,
314 offset, buf, size);
315}
316
317int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
318 unsigned int offset, void *buf, int size)
319{
320 return vfio_region_read(dev, index: bar, offset, buf, size);
321}
322
323int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
324 unsigned int offset, const void *buf, int size)
325{
326 return vfio_region_write(dev, index: bar, offset, buf, size);
327}
328

source code of linux/arch/um/drivers/vfio_user.c