| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 2025 Ant Group |
| 4 | * Author: Tiwei Bie <tiwei.btw@antgroup.com> |
| 5 | */ |
| 6 | #include <errno.h> |
| 7 | #include <fcntl.h> |
| 8 | #include <unistd.h> |
| 9 | #include <stdio.h> |
| 10 | #include <stdint.h> |
| 11 | #include <stdlib.h> |
| 12 | #include <string.h> |
| 13 | #include <sys/ioctl.h> |
| 14 | #include <sys/eventfd.h> |
| 15 | #include <linux/limits.h> |
| 16 | #include <linux/vfio.h> |
| 17 | #include <linux/pci_regs.h> |
| 18 | #include <as-layout.h> |
| 19 | #include <um_malloc.h> |
| 20 | |
| 21 | #include "vfio_user.h" |
| 22 | |
| 23 | int uml_vfio_user_open_container(void) |
| 24 | { |
| 25 | int r, fd; |
| 26 | |
| 27 | fd = open("/dev/vfio/vfio" , O_RDWR); |
| 28 | if (fd < 0) |
| 29 | return -errno; |
| 30 | |
| 31 | r = ioctl(fd, VFIO_GET_API_VERSION); |
| 32 | if (r != VFIO_API_VERSION) { |
| 33 | r = r < 0 ? -errno : -EINVAL; |
| 34 | goto error; |
| 35 | } |
| 36 | |
| 37 | r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); |
| 38 | if (r <= 0) { |
| 39 | r = r < 0 ? -errno : -EINVAL; |
| 40 | goto error; |
| 41 | } |
| 42 | |
| 43 | return fd; |
| 44 | |
| 45 | error: |
| 46 | close(fd); |
| 47 | return r; |
| 48 | } |
| 49 | |
| 50 | int uml_vfio_user_setup_iommu(int container) |
| 51 | { |
| 52 | /* |
| 53 | * This is a bit tricky. See the big comment in |
| 54 | * vhost_user_set_mem_table() in virtio_uml.c. |
| 55 | */ |
| 56 | unsigned long reserved = uml_reserved - uml_physmem; |
| 57 | struct vfio_iommu_type1_dma_map dma_map = { |
| 58 | .argsz = sizeof(dma_map), |
| 59 | .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, |
| 60 | .vaddr = uml_reserved, |
| 61 | .iova = reserved, |
| 62 | .size = physmem_size - reserved, |
| 63 | }; |
| 64 | |
| 65 | if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) |
| 66 | return -errno; |
| 67 | |
| 68 | if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0) |
| 69 | return -errno; |
| 70 | |
| 71 | return 0; |
| 72 | } |
| 73 | |
| 74 | int uml_vfio_user_get_group_id(const char *device) |
| 75 | { |
| 76 | char *path, *buf, *end; |
| 77 | const char *name; |
| 78 | int r; |
| 79 | |
| 80 | path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); |
| 81 | if (!path) |
| 82 | return -ENOMEM; |
| 83 | |
| 84 | sprintf(buf: path, fmt: "/sys/bus/pci/devices/%s/iommu_group" , device); |
| 85 | |
| 86 | buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL); |
| 87 | if (!buf) { |
| 88 | r = -ENOMEM; |
| 89 | goto free_path; |
| 90 | } |
| 91 | |
| 92 | r = readlink(path, buf, PATH_MAX); |
| 93 | if (r < 0) { |
| 94 | r = -errno; |
| 95 | goto free_buf; |
| 96 | } |
| 97 | buf[r] = '\0'; |
| 98 | |
| 99 | name = basename(buf); |
| 100 | |
| 101 | r = strtoul(name, &end, 10); |
| 102 | if (*end != '\0' || end == name) { |
| 103 | r = -EINVAL; |
| 104 | goto free_buf; |
| 105 | } |
| 106 | |
| 107 | free_buf: |
| 108 | kfree(objp: buf); |
| 109 | free_path: |
| 110 | kfree(objp: path); |
| 111 | return r; |
| 112 | } |
| 113 | |
| 114 | int uml_vfio_user_open_group(int group_id) |
| 115 | { |
| 116 | char *path; |
| 117 | int fd; |
| 118 | |
| 119 | path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); |
| 120 | if (!path) |
| 121 | return -ENOMEM; |
| 122 | |
| 123 | sprintf(buf: path, fmt: "/dev/vfio/%d" , group_id); |
| 124 | |
| 125 | fd = open(path, O_RDWR); |
| 126 | if (fd < 0) { |
| 127 | fd = -errno; |
| 128 | goto out; |
| 129 | } |
| 130 | |
| 131 | out: |
| 132 | kfree(objp: path); |
| 133 | return fd; |
| 134 | } |
| 135 | |
| 136 | int uml_vfio_user_set_container(int container, int group) |
| 137 | { |
| 138 | if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0) |
| 139 | return -errno; |
| 140 | return 0; |
| 141 | } |
| 142 | |
| 143 | int uml_vfio_user_unset_container(int container, int group) |
| 144 | { |
| 145 | if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0) |
| 146 | return -errno; |
| 147 | return 0; |
| 148 | } |
| 149 | |
| 150 | static int vfio_set_irqs(int device, int start, int count, int *irqfd) |
| 151 | { |
| 152 | struct vfio_irq_set *irq_set; |
| 153 | int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count; |
| 154 | int err = 0; |
| 155 | |
| 156 | irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL); |
| 157 | if (!irq_set) |
| 158 | return -ENOMEM; |
| 159 | |
| 160 | irq_set->argsz = argsz; |
| 161 | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; |
| 162 | irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; |
| 163 | irq_set->start = start; |
| 164 | irq_set->count = count; |
| 165 | memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count); |
| 166 | |
| 167 | if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { |
| 168 | err = -errno; |
| 169 | goto out; |
| 170 | } |
| 171 | |
| 172 | out: |
| 173 | kfree(objp: irq_set); |
| 174 | return err; |
| 175 | } |
| 176 | |
| 177 | int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, |
| 178 | int group, const char *device) |
| 179 | { |
| 180 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; |
| 181 | struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; |
| 182 | int err, i; |
| 183 | |
| 184 | dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device); |
| 185 | if (dev->device < 0) |
| 186 | return -errno; |
| 187 | |
| 188 | if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) { |
| 189 | err = -errno; |
| 190 | goto close_device; |
| 191 | } |
| 192 | |
| 193 | dev->num_regions = device_info.num_regions; |
| 194 | if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1) |
| 195 | dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1; |
| 196 | |
| 197 | dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions, |
| 198 | UM_GFP_KERNEL); |
| 199 | if (!dev->region) { |
| 200 | err = -ENOMEM; |
| 201 | goto close_device; |
| 202 | } |
| 203 | |
| 204 | for (i = 0; i < dev->num_regions; i++) { |
| 205 | struct vfio_region_info region = { |
| 206 | .argsz = sizeof(region), |
| 207 | .index = i, |
| 208 | }; |
| 209 | if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) { |
| 210 | err = -errno; |
| 211 | goto free_region; |
| 212 | } |
| 213 | dev->region[i].size = region.size; |
| 214 | dev->region[i].offset = region.offset; |
| 215 | } |
| 216 | |
| 217 | /* Only MSI-X is supported currently. */ |
| 218 | irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX; |
| 219 | if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { |
| 220 | err = -errno; |
| 221 | goto free_region; |
| 222 | } |
| 223 | |
| 224 | dev->irq_count = irq_info.count; |
| 225 | |
| 226 | dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL); |
| 227 | if (!dev->irqfd) { |
| 228 | err = -ENOMEM; |
| 229 | goto free_region; |
| 230 | } |
| 231 | |
| 232 | memset(dev->irqfd, -1, sizeof(int) * dev->irq_count); |
| 233 | |
| 234 | err = vfio_set_irqs(device: dev->device, start: 0, count: dev->irq_count, irqfd: dev->irqfd); |
| 235 | if (err) |
| 236 | goto free_irqfd; |
| 237 | |
| 238 | return 0; |
| 239 | |
| 240 | free_irqfd: |
| 241 | kfree(objp: dev->irqfd); |
| 242 | free_region: |
| 243 | kfree(objp: dev->region); |
| 244 | close_device: |
| 245 | close(dev->device); |
| 246 | return err; |
| 247 | } |
| 248 | |
| 249 | void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev) |
| 250 | { |
| 251 | kfree(objp: dev->irqfd); |
| 252 | kfree(objp: dev->region); |
| 253 | close(dev->device); |
| 254 | } |
| 255 | |
| 256 | int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index) |
| 257 | { |
| 258 | int irqfd; |
| 259 | |
| 260 | irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); |
| 261 | if (irqfd < 0) |
| 262 | return -errno; |
| 263 | |
| 264 | dev->irqfd[index] = irqfd; |
| 265 | return irqfd; |
| 266 | } |
| 267 | |
| 268 | void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index) |
| 269 | { |
| 270 | close(dev->irqfd[index]); |
| 271 | dev->irqfd[index] = -1; |
| 272 | } |
| 273 | |
| 274 | int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev) |
| 275 | { |
| 276 | return vfio_set_irqs(device: dev->device, start: 0, count: dev->irq_count, irqfd: dev->irqfd); |
| 277 | } |
| 278 | |
| 279 | static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index, |
| 280 | uint64_t offset, void *buf, uint64_t size) |
| 281 | { |
| 282 | if (index >= dev->num_regions || offset + size > dev->region[index].size) |
| 283 | return -EINVAL; |
| 284 | |
| 285 | if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0) |
| 286 | return -errno; |
| 287 | |
| 288 | return 0; |
| 289 | } |
| 290 | |
| 291 | static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index, |
| 292 | uint64_t offset, const void *buf, uint64_t size) |
| 293 | { |
| 294 | if (index >= dev->num_regions || offset + size > dev->region[index].size) |
| 295 | return -EINVAL; |
| 296 | |
| 297 | if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0) |
| 298 | return -errno; |
| 299 | |
| 300 | return 0; |
| 301 | } |
| 302 | |
| 303 | int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, |
| 304 | unsigned int offset, void *buf, int size) |
| 305 | { |
| 306 | return vfio_region_read(dev, index: VFIO_PCI_CONFIG_REGION_INDEX, |
| 307 | offset, buf, size); |
| 308 | } |
| 309 | |
| 310 | int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, |
| 311 | unsigned int offset, const void *buf, int size) |
| 312 | { |
| 313 | return vfio_region_write(dev, index: VFIO_PCI_CONFIG_REGION_INDEX, |
| 314 | offset, buf, size); |
| 315 | } |
| 316 | |
| 317 | int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, |
| 318 | unsigned int offset, void *buf, int size) |
| 319 | { |
| 320 | return vfio_region_read(dev, index: bar, offset, buf, size); |
| 321 | } |
| 322 | |
| 323 | int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, |
| 324 | unsigned int offset, const void *buf, int size) |
| 325 | { |
| 326 | return vfio_region_write(dev, index: bar, offset, buf, size); |
| 327 | } |
| 328 | |