1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2025 Ant Group |
4 | * Author: Tiwei Bie <tiwei.btw@antgroup.com> |
5 | */ |
6 | #include <errno.h> |
7 | #include <fcntl.h> |
8 | #include <unistd.h> |
9 | #include <stdio.h> |
10 | #include <stdint.h> |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | #include <sys/ioctl.h> |
14 | #include <sys/eventfd.h> |
15 | #include <linux/limits.h> |
16 | #include <linux/vfio.h> |
17 | #include <linux/pci_regs.h> |
18 | #include <as-layout.h> |
19 | #include <um_malloc.h> |
20 | |
21 | #include "vfio_user.h" |
22 | |
23 | int uml_vfio_user_open_container(void) |
24 | { |
25 | int r, fd; |
26 | |
27 | fd = open("/dev/vfio/vfio" , O_RDWR); |
28 | if (fd < 0) |
29 | return -errno; |
30 | |
31 | r = ioctl(fd, VFIO_GET_API_VERSION); |
32 | if (r != VFIO_API_VERSION) { |
33 | r = r < 0 ? -errno : -EINVAL; |
34 | goto error; |
35 | } |
36 | |
37 | r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); |
38 | if (r <= 0) { |
39 | r = r < 0 ? -errno : -EINVAL; |
40 | goto error; |
41 | } |
42 | |
43 | return fd; |
44 | |
45 | error: |
46 | close(fd); |
47 | return r; |
48 | } |
49 | |
50 | int uml_vfio_user_setup_iommu(int container) |
51 | { |
52 | /* |
53 | * This is a bit tricky. See the big comment in |
54 | * vhost_user_set_mem_table() in virtio_uml.c. |
55 | */ |
56 | unsigned long reserved = uml_reserved - uml_physmem; |
57 | struct vfio_iommu_type1_dma_map dma_map = { |
58 | .argsz = sizeof(dma_map), |
59 | .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, |
60 | .vaddr = uml_reserved, |
61 | .iova = reserved, |
62 | .size = physmem_size - reserved, |
63 | }; |
64 | |
65 | if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) |
66 | return -errno; |
67 | |
68 | if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0) |
69 | return -errno; |
70 | |
71 | return 0; |
72 | } |
73 | |
74 | int uml_vfio_user_get_group_id(const char *device) |
75 | { |
76 | char *path, *buf, *end; |
77 | const char *name; |
78 | int r; |
79 | |
80 | path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); |
81 | if (!path) |
82 | return -ENOMEM; |
83 | |
84 | sprintf(buf: path, fmt: "/sys/bus/pci/devices/%s/iommu_group" , device); |
85 | |
86 | buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL); |
87 | if (!buf) { |
88 | r = -ENOMEM; |
89 | goto free_path; |
90 | } |
91 | |
92 | r = readlink(path, buf, PATH_MAX); |
93 | if (r < 0) { |
94 | r = -errno; |
95 | goto free_buf; |
96 | } |
97 | buf[r] = '\0'; |
98 | |
99 | name = basename(buf); |
100 | |
101 | r = strtoul(name, &end, 10); |
102 | if (*end != '\0' || end == name) { |
103 | r = -EINVAL; |
104 | goto free_buf; |
105 | } |
106 | |
107 | free_buf: |
108 | kfree(objp: buf); |
109 | free_path: |
110 | kfree(objp: path); |
111 | return r; |
112 | } |
113 | |
114 | int uml_vfio_user_open_group(int group_id) |
115 | { |
116 | char *path; |
117 | int fd; |
118 | |
119 | path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); |
120 | if (!path) |
121 | return -ENOMEM; |
122 | |
123 | sprintf(buf: path, fmt: "/dev/vfio/%d" , group_id); |
124 | |
125 | fd = open(path, O_RDWR); |
126 | if (fd < 0) { |
127 | fd = -errno; |
128 | goto out; |
129 | } |
130 | |
131 | out: |
132 | kfree(objp: path); |
133 | return fd; |
134 | } |
135 | |
136 | int uml_vfio_user_set_container(int container, int group) |
137 | { |
138 | if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0) |
139 | return -errno; |
140 | return 0; |
141 | } |
142 | |
143 | int uml_vfio_user_unset_container(int container, int group) |
144 | { |
145 | if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0) |
146 | return -errno; |
147 | return 0; |
148 | } |
149 | |
150 | static int vfio_set_irqs(int device, int start, int count, int *irqfd) |
151 | { |
152 | struct vfio_irq_set *irq_set; |
153 | int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count; |
154 | int err = 0; |
155 | |
156 | irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL); |
157 | if (!irq_set) |
158 | return -ENOMEM; |
159 | |
160 | irq_set->argsz = argsz; |
161 | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; |
162 | irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; |
163 | irq_set->start = start; |
164 | irq_set->count = count; |
165 | memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count); |
166 | |
167 | if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { |
168 | err = -errno; |
169 | goto out; |
170 | } |
171 | |
172 | out: |
173 | kfree(objp: irq_set); |
174 | return err; |
175 | } |
176 | |
177 | int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, |
178 | int group, const char *device) |
179 | { |
180 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; |
181 | struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; |
182 | int err, i; |
183 | |
184 | dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device); |
185 | if (dev->device < 0) |
186 | return -errno; |
187 | |
188 | if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) { |
189 | err = -errno; |
190 | goto close_device; |
191 | } |
192 | |
193 | dev->num_regions = device_info.num_regions; |
194 | if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1) |
195 | dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1; |
196 | |
197 | dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions, |
198 | UM_GFP_KERNEL); |
199 | if (!dev->region) { |
200 | err = -ENOMEM; |
201 | goto close_device; |
202 | } |
203 | |
204 | for (i = 0; i < dev->num_regions; i++) { |
205 | struct vfio_region_info region = { |
206 | .argsz = sizeof(region), |
207 | .index = i, |
208 | }; |
209 | if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) { |
210 | err = -errno; |
211 | goto free_region; |
212 | } |
213 | dev->region[i].size = region.size; |
214 | dev->region[i].offset = region.offset; |
215 | } |
216 | |
217 | /* Only MSI-X is supported currently. */ |
218 | irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX; |
219 | if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { |
220 | err = -errno; |
221 | goto free_region; |
222 | } |
223 | |
224 | dev->irq_count = irq_info.count; |
225 | |
226 | dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL); |
227 | if (!dev->irqfd) { |
228 | err = -ENOMEM; |
229 | goto free_region; |
230 | } |
231 | |
232 | memset(dev->irqfd, -1, sizeof(int) * dev->irq_count); |
233 | |
234 | err = vfio_set_irqs(device: dev->device, start: 0, count: dev->irq_count, irqfd: dev->irqfd); |
235 | if (err) |
236 | goto free_irqfd; |
237 | |
238 | return 0; |
239 | |
240 | free_irqfd: |
241 | kfree(objp: dev->irqfd); |
242 | free_region: |
243 | kfree(objp: dev->region); |
244 | close_device: |
245 | close(dev->device); |
246 | return err; |
247 | } |
248 | |
249 | void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev) |
250 | { |
251 | kfree(objp: dev->irqfd); |
252 | kfree(objp: dev->region); |
253 | close(dev->device); |
254 | } |
255 | |
256 | int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index) |
257 | { |
258 | int irqfd; |
259 | |
260 | irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); |
261 | if (irqfd < 0) |
262 | return -errno; |
263 | |
264 | dev->irqfd[index] = irqfd; |
265 | return irqfd; |
266 | } |
267 | |
268 | void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index) |
269 | { |
270 | close(dev->irqfd[index]); |
271 | dev->irqfd[index] = -1; |
272 | } |
273 | |
274 | int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev) |
275 | { |
276 | return vfio_set_irqs(device: dev->device, start: 0, count: dev->irq_count, irqfd: dev->irqfd); |
277 | } |
278 | |
279 | static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index, |
280 | uint64_t offset, void *buf, uint64_t size) |
281 | { |
282 | if (index >= dev->num_regions || offset + size > dev->region[index].size) |
283 | return -EINVAL; |
284 | |
285 | if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0) |
286 | return -errno; |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index, |
292 | uint64_t offset, const void *buf, uint64_t size) |
293 | { |
294 | if (index >= dev->num_regions || offset + size > dev->region[index].size) |
295 | return -EINVAL; |
296 | |
297 | if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0) |
298 | return -errno; |
299 | |
300 | return 0; |
301 | } |
302 | |
303 | int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, |
304 | unsigned int offset, void *buf, int size) |
305 | { |
306 | return vfio_region_read(dev, index: VFIO_PCI_CONFIG_REGION_INDEX, |
307 | offset, buf, size); |
308 | } |
309 | |
310 | int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, |
311 | unsigned int offset, const void *buf, int size) |
312 | { |
313 | return vfio_region_write(dev, index: VFIO_PCI_CONFIG_REGION_INDEX, |
314 | offset, buf, size); |
315 | } |
316 | |
317 | int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, |
318 | unsigned int offset, void *buf, int size) |
319 | { |
320 | return vfio_region_read(dev, index: bar, offset, buf, size); |
321 | } |
322 | |
323 | int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, |
324 | unsigned int offset, const void *buf, int size) |
325 | { |
326 | return vfio_region_write(dev, index: bar, offset, buf, size); |
327 | } |
328 | |