| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * PCI Peer 2 Peer DMA support. |
| 4 | * |
| 5 | * Copyright (c) 2016-2018, Logan Gunthorpe |
| 6 | * Copyright (c) 2016-2017, Microsemi Corporation |
| 7 | * Copyright (c) 2017, Christoph Hellwig |
| 8 | * Copyright (c) 2018, Eideticom Inc. |
| 9 | */ |
| 10 | |
| 11 | #define pr_fmt(fmt) "pci-p2pdma: " fmt |
| 12 | #include <linux/ctype.h> |
| 13 | #include <linux/dma-map-ops.h> |
| 14 | #include <linux/pci-p2pdma.h> |
| 15 | #include <linux/module.h> |
| 16 | #include <linux/slab.h> |
| 17 | #include <linux/genalloc.h> |
| 18 | #include <linux/memremap.h> |
| 19 | #include <linux/percpu-refcount.h> |
| 20 | #include <linux/random.h> |
| 21 | #include <linux/seq_buf.h> |
| 22 | #include <linux/xarray.h> |
| 23 | |
| 24 | struct pci_p2pdma { |
| 25 | struct gen_pool *pool; |
| 26 | bool p2pmem_published; |
| 27 | struct xarray map_types; |
| 28 | struct p2pdma_provider mem[PCI_STD_NUM_BARS]; |
| 29 | }; |
| 30 | |
| 31 | struct pci_p2pdma_pagemap { |
| 32 | struct dev_pagemap pgmap; |
| 33 | struct p2pdma_provider *mem; |
| 34 | }; |
| 35 | |
| 36 | static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap) |
| 37 | { |
| 38 | return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap); |
| 39 | } |
| 40 | |
| 41 | static ssize_t size_show(struct device *dev, struct device_attribute *attr, |
| 42 | char *buf) |
| 43 | { |
| 44 | struct pci_dev *pdev = to_pci_dev(dev); |
| 45 | struct pci_p2pdma *p2pdma; |
| 46 | size_t size = 0; |
| 47 | |
| 48 | rcu_read_lock(); |
| 49 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 50 | if (p2pdma && p2pdma->pool) |
| 51 | size = gen_pool_size(p2pdma->pool); |
| 52 | rcu_read_unlock(); |
| 53 | |
| 54 | return sysfs_emit(buf, fmt: "%zd\n" , size); |
| 55 | } |
| 56 | static DEVICE_ATTR_RO(size); |
| 57 | |
| 58 | static ssize_t available_show(struct device *dev, struct device_attribute *attr, |
| 59 | char *buf) |
| 60 | { |
| 61 | struct pci_dev *pdev = to_pci_dev(dev); |
| 62 | struct pci_p2pdma *p2pdma; |
| 63 | size_t avail = 0; |
| 64 | |
| 65 | rcu_read_lock(); |
| 66 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 67 | if (p2pdma && p2pdma->pool) |
| 68 | avail = gen_pool_avail(p2pdma->pool); |
| 69 | rcu_read_unlock(); |
| 70 | |
| 71 | return sysfs_emit(buf, fmt: "%zd\n" , avail); |
| 72 | } |
| 73 | static DEVICE_ATTR_RO(available); |
| 74 | |
| 75 | static ssize_t published_show(struct device *dev, struct device_attribute *attr, |
| 76 | char *buf) |
| 77 | { |
| 78 | struct pci_dev *pdev = to_pci_dev(dev); |
| 79 | struct pci_p2pdma *p2pdma; |
| 80 | bool published = false; |
| 81 | |
| 82 | rcu_read_lock(); |
| 83 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 84 | if (p2pdma) |
| 85 | published = p2pdma->p2pmem_published; |
| 86 | rcu_read_unlock(); |
| 87 | |
| 88 | return sysfs_emit(buf, fmt: "%d\n" , published); |
| 89 | } |
| 90 | static DEVICE_ATTR_RO(published); |
| 91 | |
| 92 | static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj, |
| 93 | const struct bin_attribute *attr, struct vm_area_struct *vma) |
| 94 | { |
| 95 | struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); |
| 96 | size_t len = vma->vm_end - vma->vm_start; |
| 97 | struct pci_p2pdma *p2pdma; |
| 98 | struct percpu_ref *ref; |
| 99 | unsigned long vaddr; |
| 100 | void *kaddr; |
| 101 | int ret; |
| 102 | |
| 103 | /* prevent private mappings from being established */ |
| 104 | if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { |
| 105 | pci_info_ratelimited(pdev, |
| 106 | "%s: fail, attempted private mapping\n" , |
| 107 | current->comm); |
| 108 | return -EINVAL; |
| 109 | } |
| 110 | |
| 111 | if (vma->vm_pgoff) { |
| 112 | pci_info_ratelimited(pdev, |
| 113 | "%s: fail, attempted mapping with non-zero offset\n" , |
| 114 | current->comm); |
| 115 | return -EINVAL; |
| 116 | } |
| 117 | |
| 118 | rcu_read_lock(); |
| 119 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 120 | if (!p2pdma) { |
| 121 | ret = -ENODEV; |
| 122 | goto out; |
| 123 | } |
| 124 | |
| 125 | kaddr = (void *)gen_pool_alloc_owner(pool: p2pdma->pool, size: len, owner: (void **)&ref); |
| 126 | if (!kaddr) { |
| 127 | ret = -ENOMEM; |
| 128 | goto out; |
| 129 | } |
| 130 | |
| 131 | /* |
| 132 | * vm_insert_page() can sleep, so a reference is taken to mapping |
| 133 | * such that rcu_read_unlock() can be done before inserting the |
| 134 | * pages |
| 135 | */ |
| 136 | if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { |
| 137 | ret = -ENODEV; |
| 138 | goto out_free_mem; |
| 139 | } |
| 140 | rcu_read_unlock(); |
| 141 | |
| 142 | for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { |
| 143 | struct page *page = virt_to_page(kaddr); |
| 144 | |
| 145 | /* |
| 146 | * Initialise the refcount for the freshly allocated page. As |
| 147 | * we have just allocated the page no one else should be |
| 148 | * using it. |
| 149 | */ |
| 150 | VM_WARN_ON_ONCE_PAGE(!page_ref_count(page), page); |
| 151 | set_page_count(page, v: 1); |
| 152 | ret = vm_insert_page(vma, addr: vaddr, page); |
| 153 | if (ret) { |
| 154 | gen_pool_free(pool: p2pdma->pool, addr: (uintptr_t)kaddr, size: len); |
| 155 | return ret; |
| 156 | } |
| 157 | percpu_ref_get(ref); |
| 158 | put_page(page); |
| 159 | kaddr += PAGE_SIZE; |
| 160 | len -= PAGE_SIZE; |
| 161 | } |
| 162 | |
| 163 | percpu_ref_put(ref); |
| 164 | |
| 165 | return 0; |
| 166 | out_free_mem: |
| 167 | gen_pool_free(pool: p2pdma->pool, addr: (uintptr_t)kaddr, size: len); |
| 168 | out: |
| 169 | rcu_read_unlock(); |
| 170 | return ret; |
| 171 | } |
| 172 | |
| 173 | static const struct bin_attribute p2pmem_alloc_attr = { |
| 174 | .attr = { .name = "allocate" , .mode = 0660 }, |
| 175 | .mmap = p2pmem_alloc_mmap, |
| 176 | /* |
| 177 | * Some places where we want to call mmap (ie. python) will check |
| 178 | * that the file size is greater than the mmap size before allowing |
| 179 | * the mmap to continue. To work around this, just set the size |
| 180 | * to be very large. |
| 181 | */ |
| 182 | .size = SZ_1T, |
| 183 | }; |
| 184 | |
| 185 | static struct attribute *p2pmem_attrs[] = { |
| 186 | &dev_attr_size.attr, |
| 187 | &dev_attr_available.attr, |
| 188 | &dev_attr_published.attr, |
| 189 | NULL, |
| 190 | }; |
| 191 | |
| 192 | static const struct bin_attribute *const p2pmem_bin_attrs[] = { |
| 193 | &p2pmem_alloc_attr, |
| 194 | NULL, |
| 195 | }; |
| 196 | |
| 197 | static const struct attribute_group p2pmem_group = { |
| 198 | .attrs = p2pmem_attrs, |
| 199 | .bin_attrs = p2pmem_bin_attrs, |
| 200 | .name = "p2pmem" , |
| 201 | }; |
| 202 | |
| 203 | static void p2pdma_folio_free(struct folio *folio) |
| 204 | { |
| 205 | struct page *page = &folio->page; |
| 206 | struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(pgmap: page_pgmap(page)); |
| 207 | /* safe to dereference while a reference is held to the percpu ref */ |
| 208 | struct pci_p2pdma *p2pdma = rcu_dereference_protected( |
| 209 | to_pci_dev(pgmap->mem->owner)->p2pdma, 1); |
| 210 | struct percpu_ref *ref; |
| 211 | |
| 212 | gen_pool_free_owner(pool: p2pdma->pool, addr: (uintptr_t)page_to_virt(page), |
| 213 | PAGE_SIZE, owner: (void **)&ref); |
| 214 | percpu_ref_put(ref); |
| 215 | } |
| 216 | |
| 217 | static const struct dev_pagemap_ops p2pdma_pgmap_ops = { |
| 218 | .folio_free = p2pdma_folio_free, |
| 219 | }; |
| 220 | |
| 221 | static void pci_p2pdma_release(void *data) |
| 222 | { |
| 223 | struct pci_dev *pdev = data; |
| 224 | struct pci_p2pdma *p2pdma; |
| 225 | |
| 226 | p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); |
| 227 | if (!p2pdma) |
| 228 | return; |
| 229 | |
| 230 | /* Flush and disable pci_alloc_p2p_mem() */ |
| 231 | pdev->p2pdma = NULL; |
| 232 | if (p2pdma->pool) |
| 233 | synchronize_rcu(); |
| 234 | xa_destroy(&p2pdma->map_types); |
| 235 | |
| 236 | if (!p2pdma->pool) |
| 237 | return; |
| 238 | |
| 239 | gen_pool_destroy(p2pdma->pool); |
| 240 | sysfs_remove_group(kobj: &pdev->dev.kobj, grp: &p2pmem_group); |
| 241 | } |
| 242 | |
| 243 | /** |
| 244 | * pcim_p2pdma_init - Initialise peer-to-peer DMA providers |
| 245 | * @pdev: The PCI device to enable P2PDMA for |
| 246 | * |
| 247 | * This function initializes the peer-to-peer DMA infrastructure |
| 248 | * for a PCI device. It allocates and sets up the necessary data |
| 249 | * structures to support P2PDMA operations, including mapping type |
| 250 | * tracking. |
| 251 | */ |
| 252 | int pcim_p2pdma_init(struct pci_dev *pdev) |
| 253 | { |
| 254 | struct pci_p2pdma *p2p; |
| 255 | int i, ret; |
| 256 | |
| 257 | p2p = rcu_dereference_protected(pdev->p2pdma, 1); |
| 258 | if (p2p) |
| 259 | return 0; |
| 260 | |
| 261 | p2p = devm_kzalloc(dev: &pdev->dev, size: sizeof(*p2p), GFP_KERNEL); |
| 262 | if (!p2p) |
| 263 | return -ENOMEM; |
| 264 | |
| 265 | xa_init(xa: &p2p->map_types); |
| 266 | /* |
| 267 | * Iterate over all standard PCI BARs and record only those that |
| 268 | * correspond to MMIO regions. Skip non-memory resources (e.g. I/O |
| 269 | * port BARs) since they cannot be used for peer-to-peer (P2P) |
| 270 | * transactions. |
| 271 | */ |
| 272 | for (i = 0; i < PCI_STD_NUM_BARS; i++) { |
| 273 | if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) |
| 274 | continue; |
| 275 | |
| 276 | p2p->mem[i].owner = &pdev->dev; |
| 277 | p2p->mem[i].bus_offset = |
| 278 | pci_bus_address(pdev, bar: i) - pci_resource_start(pdev, i); |
| 279 | } |
| 280 | |
| 281 | ret = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); |
| 282 | if (ret) |
| 283 | goto out_p2p; |
| 284 | |
| 285 | rcu_assign_pointer(pdev->p2pdma, p2p); |
| 286 | return 0; |
| 287 | |
| 288 | out_p2p: |
| 289 | devm_kfree(dev: &pdev->dev, p: p2p); |
| 290 | return ret; |
| 291 | } |
| 292 | EXPORT_SYMBOL_GPL(pcim_p2pdma_init); |
| 293 | |
| 294 | /** |
| 295 | * pcim_p2pdma_provider - Get peer-to-peer DMA provider |
| 296 | * @pdev: The PCI device to enable P2PDMA for |
| 297 | * @bar: BAR index to get provider |
| 298 | * |
| 299 | * This function gets peer-to-peer DMA provider for a PCI device. The lifetime |
| 300 | * of the provider (and of course the MMIO) is bound to the lifetime of the |
| 301 | * driver. A driver calling this function must ensure that all references to the |
| 302 | * provider, and any DMA mappings created for any MMIO, are all cleaned up |
| 303 | * before the driver remove() completes. |
| 304 | * |
| 305 | * Since P2P is almost always shared with a second driver this means some system |
| 306 | * to notify, invalidate and revoke the MMIO's DMA must be in place to use this |
| 307 | * function. For example a revoke can be built using DMABUF. |
| 308 | */ |
| 309 | struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar) |
| 310 | { |
| 311 | struct pci_p2pdma *p2p; |
| 312 | |
| 313 | if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) |
| 314 | return NULL; |
| 315 | |
| 316 | p2p = rcu_dereference_protected(pdev->p2pdma, 1); |
| 317 | if (WARN_ON(!p2p)) |
| 318 | /* Someone forgot to call to pcim_p2pdma_init() before */ |
| 319 | return NULL; |
| 320 | |
| 321 | return &p2p->mem[bar]; |
| 322 | } |
| 323 | EXPORT_SYMBOL_GPL(pcim_p2pdma_provider); |
| 324 | |
| 325 | static int pci_p2pdma_setup_pool(struct pci_dev *pdev) |
| 326 | { |
| 327 | struct pci_p2pdma *p2pdma; |
| 328 | int ret; |
| 329 | |
| 330 | p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); |
| 331 | if (p2pdma->pool) |
| 332 | /* We already setup pools, do nothing, */ |
| 333 | return 0; |
| 334 | |
| 335 | p2pdma->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(dev: &pdev->dev)); |
| 336 | if (!p2pdma->pool) |
| 337 | return -ENOMEM; |
| 338 | |
| 339 | ret = sysfs_create_group(kobj: &pdev->dev.kobj, grp: &p2pmem_group); |
| 340 | if (ret) |
| 341 | goto out_pool_destroy; |
| 342 | |
| 343 | return 0; |
| 344 | |
| 345 | out_pool_destroy: |
| 346 | gen_pool_destroy(p2pdma->pool); |
| 347 | p2pdma->pool = NULL; |
| 348 | return ret; |
| 349 | } |
| 350 | |
| 351 | static void pci_p2pdma_unmap_mappings(void *data) |
| 352 | { |
| 353 | struct pci_p2pdma_pagemap *p2p_pgmap = data; |
| 354 | |
| 355 | /* |
| 356 | * Removing the alloc attribute from sysfs will call |
| 357 | * unmap_mapping_range() on the inode, teardown any existing userspace |
| 358 | * mappings and prevent new ones from being created. |
| 359 | */ |
| 360 | sysfs_remove_file_from_group(kobj: &p2p_pgmap->mem->owner->kobj, |
| 361 | attr: &p2pmem_alloc_attr.attr, |
| 362 | group: p2pmem_group.name); |
| 363 | } |
| 364 | |
| 365 | /** |
| 366 | * pci_p2pdma_add_resource - add memory for use as p2p memory |
| 367 | * @pdev: the device to add the memory to |
| 368 | * @bar: PCI BAR to add |
| 369 | * @size: size of the memory to add, may be zero to use the whole BAR |
| 370 | * @offset: offset into the PCI BAR |
| 371 | * |
| 372 | * The memory will be given ZONE_DEVICE struct pages so that it may |
| 373 | * be used with any DMA request. |
| 374 | */ |
| 375 | int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, |
| 376 | u64 offset) |
| 377 | { |
| 378 | struct pci_p2pdma_pagemap *p2p_pgmap; |
| 379 | struct p2pdma_provider *mem; |
| 380 | struct dev_pagemap *pgmap; |
| 381 | struct pci_p2pdma *p2pdma; |
| 382 | void *addr; |
| 383 | int error; |
| 384 | |
| 385 | if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) |
| 386 | return -EINVAL; |
| 387 | |
| 388 | if (offset >= pci_resource_len(pdev, bar)) |
| 389 | return -EINVAL; |
| 390 | |
| 391 | if (!size) |
| 392 | size = pci_resource_len(pdev, bar) - offset; |
| 393 | |
| 394 | if (size + offset > pci_resource_len(pdev, bar)) |
| 395 | return -EINVAL; |
| 396 | |
| 397 | error = pcim_p2pdma_init(pdev); |
| 398 | if (error) |
| 399 | return error; |
| 400 | |
| 401 | error = pci_p2pdma_setup_pool(pdev); |
| 402 | if (error) |
| 403 | return error; |
| 404 | |
| 405 | mem = pcim_p2pdma_provider(pdev, bar); |
| 406 | /* |
| 407 | * We checked validity of BAR prior to call |
| 408 | * to pcim_p2pdma_provider. It should never return NULL. |
| 409 | */ |
| 410 | if (WARN_ON(!mem)) |
| 411 | return -EINVAL; |
| 412 | |
| 413 | p2p_pgmap = devm_kzalloc(dev: &pdev->dev, size: sizeof(*p2p_pgmap), GFP_KERNEL); |
| 414 | if (!p2p_pgmap) |
| 415 | return -ENOMEM; |
| 416 | |
| 417 | pgmap = &p2p_pgmap->pgmap; |
| 418 | pgmap->range.start = pci_resource_start(pdev, bar) + offset; |
| 419 | pgmap->range.end = pgmap->range.start + size - 1; |
| 420 | pgmap->nr_range = 1; |
| 421 | pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; |
| 422 | pgmap->ops = &p2pdma_pgmap_ops; |
| 423 | p2p_pgmap->mem = mem; |
| 424 | |
| 425 | addr = devm_memremap_pages(dev: &pdev->dev, pgmap); |
| 426 | if (IS_ERR(ptr: addr)) { |
| 427 | error = PTR_ERR(ptr: addr); |
| 428 | goto pgmap_free; |
| 429 | } |
| 430 | |
| 431 | error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, |
| 432 | p2p_pgmap); |
| 433 | if (error) |
| 434 | goto pages_free; |
| 435 | |
| 436 | p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); |
| 437 | error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, |
| 438 | pci_bus_address(pdev, bar) + offset, |
| 439 | range_len(range: &pgmap->range), dev_to_node(dev: &pdev->dev), |
| 440 | &pgmap->ref); |
| 441 | if (error) |
| 442 | goto pages_free; |
| 443 | |
| 444 | pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n" , |
| 445 | pgmap->range.start, pgmap->range.end); |
| 446 | |
| 447 | return 0; |
| 448 | |
| 449 | pages_free: |
| 450 | devm_memunmap_pages(dev: &pdev->dev, pgmap); |
| 451 | pgmap_free: |
| 452 | devm_kfree(dev: &pdev->dev, p: p2p_pgmap); |
| 453 | return error; |
| 454 | } |
| 455 | EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); |
| 456 | |
| 457 | /* |
| 458 | * Note this function returns the parent PCI device with a |
| 459 | * reference taken. It is the caller's responsibility to drop |
| 460 | * the reference. |
| 461 | */ |
| 462 | static struct pci_dev *find_parent_pci_dev(struct device *dev) |
| 463 | { |
| 464 | struct device *parent; |
| 465 | |
| 466 | dev = get_device(dev); |
| 467 | |
| 468 | while (dev) { |
| 469 | if (dev_is_pci(dev)) |
| 470 | return to_pci_dev(dev); |
| 471 | |
| 472 | parent = get_device(dev: dev->parent); |
| 473 | put_device(dev); |
| 474 | dev = parent; |
| 475 | } |
| 476 | |
| 477 | return NULL; |
| 478 | } |
| 479 | |
| 480 | /* |
| 481 | * Check if a PCI bridge has its ACS redirection bits set to redirect P2P |
| 482 | * TLPs upstream via ACS. Returns 1 if the packets will be redirected |
| 483 | * upstream, 0 otherwise. |
| 484 | */ |
| 485 | static int pci_bridge_has_acs_redir(struct pci_dev *pdev) |
| 486 | { |
| 487 | int pos; |
| 488 | u16 ctrl; |
| 489 | |
| 490 | pos = pdev->acs_cap; |
| 491 | if (!pos) |
| 492 | return 0; |
| 493 | |
| 494 | pci_read_config_word(dev: pdev, where: pos + PCI_ACS_CTRL, val: &ctrl); |
| 495 | |
| 496 | if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC)) |
| 497 | return 1; |
| 498 | |
| 499 | return 0; |
| 500 | } |
| 501 | |
| 502 | static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev) |
| 503 | { |
| 504 | if (!buf) |
| 505 | return; |
| 506 | |
| 507 | seq_buf_printf(s: buf, fmt: "%s;" , pci_name(pdev)); |
| 508 | } |
| 509 | |
| 510 | static bool cpu_supports_p2pdma(void) |
| 511 | { |
| 512 | #ifdef CONFIG_X86 |
| 513 | struct cpuinfo_x86 *c = &cpu_data(0); |
| 514 | |
| 515 | /* Any AMD CPU whose family ID is Zen or newer supports p2pdma */ |
| 516 | if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) |
| 517 | return true; |
| 518 | #endif |
| 519 | |
| 520 | return false; |
| 521 | } |
| 522 | |
| 523 | static const struct pci_p2pdma_whitelist_entry { |
| 524 | unsigned short vendor; |
| 525 | unsigned short device; |
| 526 | enum { |
| 527 | REQ_SAME_HOST_BRIDGE = 1 << 0, |
| 528 | } flags; |
| 529 | } pci_p2pdma_whitelist[] = { |
| 530 | /* Intel Xeon E5/Core i7 */ |
| 531 | {PCI_VENDOR_ID_INTEL, 0x3c00, REQ_SAME_HOST_BRIDGE}, |
| 532 | {PCI_VENDOR_ID_INTEL, 0x3c01, REQ_SAME_HOST_BRIDGE}, |
| 533 | /* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */ |
| 534 | {PCI_VENDOR_ID_INTEL, 0x2f00, REQ_SAME_HOST_BRIDGE}, |
| 535 | {PCI_VENDOR_ID_INTEL, 0x2f01, REQ_SAME_HOST_BRIDGE}, |
| 536 | /* Intel Skylake-E */ |
| 537 | {PCI_VENDOR_ID_INTEL, 0x2030, 0}, |
| 538 | {PCI_VENDOR_ID_INTEL, 0x2031, 0}, |
| 539 | {PCI_VENDOR_ID_INTEL, 0x2032, 0}, |
| 540 | {PCI_VENDOR_ID_INTEL, 0x2033, 0}, |
| 541 | {PCI_VENDOR_ID_INTEL, 0x2020, 0}, |
| 542 | {PCI_VENDOR_ID_INTEL, 0x09a2, 0}, |
| 543 | {} |
| 544 | }; |
| 545 | |
| 546 | /* |
| 547 | * If the first device on host's root bus is either devfn 00.0 or a PCIe |
| 548 | * Root Port, return it. Otherwise return NULL. |
| 549 | * |
| 550 | * We often use a devfn 00.0 "host bridge" in the pci_p2pdma_whitelist[] |
| 551 | * (though there is no PCI/PCIe requirement for such a device). On some |
| 552 | * platforms, e.g., Intel Skylake, there is no such host bridge device, and |
| 553 | * pci_p2pdma_whitelist[] may contain a Root Port at any devfn. |
| 554 | * |
| 555 | * This function is similar to pci_get_slot(host->bus, 0), but it does |
| 556 | * not take the pci_bus_sem lock since __host_bridge_whitelist() must not |
| 557 | * sleep. |
| 558 | * |
| 559 | * For this to be safe, the caller should hold a reference to a device on the |
| 560 | * bridge, which should ensure the host_bridge device will not be freed |
| 561 | * or removed from the head of the devices list. |
| 562 | */ |
| 563 | static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host) |
| 564 | { |
| 565 | struct pci_dev *root; |
| 566 | |
| 567 | root = list_first_entry_or_null(&host->bus->devices, |
| 568 | struct pci_dev, bus_list); |
| 569 | |
| 570 | if (!root) |
| 571 | return NULL; |
| 572 | |
| 573 | if (root->devfn == PCI_DEVFN(0, 0)) |
| 574 | return root; |
| 575 | |
| 576 | if (pci_pcie_type(dev: root) == PCI_EXP_TYPE_ROOT_PORT) |
| 577 | return root; |
| 578 | |
| 579 | return NULL; |
| 580 | } |
| 581 | |
| 582 | static bool __host_bridge_whitelist(struct pci_host_bridge *host, |
| 583 | bool same_host_bridge, bool warn) |
| 584 | { |
| 585 | struct pci_dev *root = pci_host_bridge_dev(host); |
| 586 | const struct pci_p2pdma_whitelist_entry *entry; |
| 587 | unsigned short vendor, device; |
| 588 | |
| 589 | if (!root) |
| 590 | return false; |
| 591 | |
| 592 | vendor = root->vendor; |
| 593 | device = root->device; |
| 594 | |
| 595 | for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) { |
| 596 | if (vendor != entry->vendor || device != entry->device) |
| 597 | continue; |
| 598 | if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge) |
| 599 | return false; |
| 600 | |
| 601 | return true; |
| 602 | } |
| 603 | |
| 604 | if (warn) |
| 605 | pci_warn(root, "Host bridge not in P2PDMA whitelist: %04x:%04x\n" , |
| 606 | vendor, device); |
| 607 | |
| 608 | return false; |
| 609 | } |
| 610 | |
| 611 | /* |
| 612 | * If we can't find a common upstream bridge take a look at the root |
| 613 | * complex and compare it to a whitelist of known good hardware. |
| 614 | */ |
| 615 | static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, |
| 616 | bool warn) |
| 617 | { |
| 618 | struct pci_host_bridge *host_a = pci_find_host_bridge(bus: a->bus); |
| 619 | struct pci_host_bridge *host_b = pci_find_host_bridge(bus: b->bus); |
| 620 | |
| 621 | if (host_a == host_b) |
| 622 | return __host_bridge_whitelist(host: host_a, same_host_bridge: true, warn); |
| 623 | |
| 624 | if (__host_bridge_whitelist(host: host_a, same_host_bridge: false, warn) && |
| 625 | __host_bridge_whitelist(host: host_b, same_host_bridge: false, warn)) |
| 626 | return true; |
| 627 | |
| 628 | return false; |
| 629 | } |
| 630 | |
| 631 | static unsigned long map_types_idx(struct pci_dev *client) |
| 632 | { |
| 633 | return (pci_domain_nr(bus: client->bus) << 16) | pci_dev_id(dev: client); |
| 634 | } |
| 635 | |
| 636 | /* |
| 637 | * Calculate the P2PDMA mapping type and distance between two PCI devices. |
| 638 | * |
| 639 | * If the two devices are the same PCI function, return |
| 640 | * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 0. |
| 641 | * |
| 642 | * If they are two functions of the same device, return |
| 643 | * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 2 (one hop up to the bridge, |
| 644 | * then one hop back down to another function of the same device). |
| 645 | * |
| 646 | * In the case where two devices are connected to the same PCIe switch, |
| 647 | * return a distance of 4. This corresponds to the following PCI tree: |
| 648 | * |
| 649 | * -+ Root Port |
| 650 | * \+ Switch Upstream Port |
| 651 | * +-+ Switch Downstream Port 0 |
| 652 | * + \- Device A |
| 653 | * \-+ Switch Downstream Port 1 |
| 654 | * \- Device B |
| 655 | * |
| 656 | * The distance is 4 because we traverse from Device A to Downstream Port 0 |
| 657 | * to the common Switch Upstream Port, back down to Downstream Port 1 and |
| 658 | * then to Device B. The mapping type returned depends on the ACS |
| 659 | * redirection setting of the ports along the path. |
| 660 | * |
| 661 | * If ACS redirect is set on any port in the path, traffic between the |
| 662 | * devices will go through the host bridge, so return |
| 663 | * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; otherwise return |
| 664 | * PCI_P2PDMA_MAP_BUS_ADDR. |
| 665 | * |
| 666 | * Any two devices that have a data path that goes through the host bridge |
| 667 | * will consult a whitelist. If the host bridge is in the whitelist, return |
| 668 | * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of |
| 669 | * ports per above. If the device is not in the whitelist, return |
| 670 | * PCI_P2PDMA_MAP_NOT_SUPPORTED. |
| 671 | */ |
| 672 | static enum pci_p2pdma_map_type |
| 673 | calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, |
| 674 | int *dist, bool verbose) |
| 675 | { |
| 676 | enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; |
| 677 | struct pci_dev *a = provider, *b = client, *bb; |
| 678 | bool acs_redirects = false; |
| 679 | struct pci_p2pdma *p2pdma; |
| 680 | struct seq_buf acs_list; |
| 681 | int acs_cnt = 0; |
| 682 | int dist_a = 0; |
| 683 | int dist_b = 0; |
| 684 | char buf[128]; |
| 685 | |
| 686 | seq_buf_init(s: &acs_list, buf, size: sizeof(buf)); |
| 687 | |
| 688 | /* |
| 689 | * Note, we don't need to take references to devices returned by |
| 690 | * pci_upstream_bridge() seeing we hold a reference to a child |
| 691 | * device which will already hold a reference to the upstream bridge. |
| 692 | */ |
| 693 | while (a) { |
| 694 | dist_b = 0; |
| 695 | |
| 696 | if (pci_bridge_has_acs_redir(pdev: a)) { |
| 697 | seq_buf_print_bus_devfn(buf: &acs_list, pdev: a); |
| 698 | acs_cnt++; |
| 699 | } |
| 700 | |
| 701 | bb = b; |
| 702 | |
| 703 | while (bb) { |
| 704 | if (a == bb) |
| 705 | goto check_b_path_acs; |
| 706 | |
| 707 | bb = pci_upstream_bridge(dev: bb); |
| 708 | dist_b++; |
| 709 | } |
| 710 | |
| 711 | a = pci_upstream_bridge(dev: a); |
| 712 | dist_a++; |
| 713 | } |
| 714 | |
| 715 | *dist = dist_a + dist_b; |
| 716 | goto map_through_host_bridge; |
| 717 | |
| 718 | check_b_path_acs: |
| 719 | bb = b; |
| 720 | |
| 721 | while (bb) { |
| 722 | if (a == bb) |
| 723 | break; |
| 724 | |
| 725 | if (pci_bridge_has_acs_redir(pdev: bb)) { |
| 726 | seq_buf_print_bus_devfn(buf: &acs_list, pdev: bb); |
| 727 | acs_cnt++; |
| 728 | } |
| 729 | |
| 730 | bb = pci_upstream_bridge(dev: bb); |
| 731 | } |
| 732 | |
| 733 | *dist = dist_a + dist_b; |
| 734 | |
| 735 | if (!acs_cnt) { |
| 736 | map_type = PCI_P2PDMA_MAP_BUS_ADDR; |
| 737 | goto done; |
| 738 | } |
| 739 | |
| 740 | if (verbose) { |
| 741 | acs_list.buffer[acs_list.len-1] = 0; /* drop final semicolon */ |
| 742 | pci_warn(client, "ACS redirect is set between the client and provider (%s)\n" , |
| 743 | pci_name(provider)); |
| 744 | pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n" , |
| 745 | acs_list.buffer); |
| 746 | } |
| 747 | acs_redirects = true; |
| 748 | |
| 749 | map_through_host_bridge: |
| 750 | if (!cpu_supports_p2pdma() && |
| 751 | !host_bridge_whitelist(a: provider, b: client, warn: acs_redirects)) { |
| 752 | if (verbose) |
| 753 | pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n" , |
| 754 | pci_name(provider)); |
| 755 | map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; |
| 756 | } |
| 757 | done: |
| 758 | rcu_read_lock(); |
| 759 | p2pdma = rcu_dereference(provider->p2pdma); |
| 760 | if (p2pdma) |
| 761 | xa_store(&p2pdma->map_types, index: map_types_idx(client), |
| 762 | entry: xa_mk_value(v: map_type), GFP_ATOMIC); |
| 763 | rcu_read_unlock(); |
| 764 | return map_type; |
| 765 | } |
| 766 | |
| 767 | /** |
| 768 | * pci_p2pdma_distance_many - Determine the cumulative distance between |
| 769 | * a p2pdma provider and the clients in use. |
| 770 | * @provider: p2pdma provider to check against the client list |
| 771 | * @clients: array of devices to check (NULL-terminated) |
| 772 | * @num_clients: number of clients in the array |
| 773 | * @verbose: if true, print warnings for devices when we return -1 |
| 774 | * |
| 775 | * Returns -1 if any of the clients are not compatible, otherwise returns a |
| 776 | * positive number where a lower number is the preferable choice. (If there's |
| 777 | * one client that's the same as the provider it will return 0, which is best |
| 778 | * choice). |
| 779 | * |
| 780 | * "compatible" means the provider and the clients are either all behind |
| 781 | * the same PCI root port or the host bridges connected to each of the devices |
| 782 | * are listed in the 'pci_p2pdma_whitelist'. |
| 783 | */ |
| 784 | int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, |
| 785 | int num_clients, bool verbose) |
| 786 | { |
| 787 | enum pci_p2pdma_map_type map; |
| 788 | bool not_supported = false; |
| 789 | struct pci_dev *pci_client; |
| 790 | int total_dist = 0; |
| 791 | int i, distance; |
| 792 | |
| 793 | if (num_clients == 0) |
| 794 | return -1; |
| 795 | |
| 796 | for (i = 0; i < num_clients; i++) { |
| 797 | pci_client = find_parent_pci_dev(dev: clients[i]); |
| 798 | if (!pci_client) { |
| 799 | if (verbose) |
| 800 | dev_warn(clients[i], |
| 801 | "cannot be used for peer-to-peer DMA as it is not a PCI device\n" ); |
| 802 | return -1; |
| 803 | } |
| 804 | |
| 805 | map = calc_map_type_and_dist(provider, client: pci_client, dist: &distance, |
| 806 | verbose); |
| 807 | |
| 808 | pci_dev_put(dev: pci_client); |
| 809 | |
| 810 | if (map == PCI_P2PDMA_MAP_NOT_SUPPORTED) |
| 811 | not_supported = true; |
| 812 | |
| 813 | if (not_supported && !verbose) |
| 814 | break; |
| 815 | |
| 816 | total_dist += distance; |
| 817 | } |
| 818 | |
| 819 | if (not_supported) |
| 820 | return -1; |
| 821 | |
| 822 | return total_dist; |
| 823 | } |
| 824 | EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); |
| 825 | |
| 826 | /** |
| 827 | * pci_has_p2pmem - check if a given PCI device has published any p2pmem |
| 828 | * @pdev: PCI device to check |
| 829 | */ |
| 830 | static bool pci_has_p2pmem(struct pci_dev *pdev) |
| 831 | { |
| 832 | struct pci_p2pdma *p2pdma; |
| 833 | bool res; |
| 834 | |
| 835 | rcu_read_lock(); |
| 836 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 837 | res = p2pdma && p2pdma->p2pmem_published; |
| 838 | rcu_read_unlock(); |
| 839 | |
| 840 | return res; |
| 841 | } |
| 842 | |
| 843 | /** |
| 844 | * pci_p2pmem_find_many - find a peer-to-peer DMA memory device compatible with |
| 845 | * the specified list of clients and shortest distance |
| 846 | * @clients: array of devices to check (NULL-terminated) |
| 847 | * @num_clients: number of client devices in the list |
| 848 | * |
| 849 | * If multiple devices are behind the same switch, the one "closest" to the |
| 850 | * client devices in use will be chosen first. (So if one of the providers is |
| 851 | * the same as one of the clients, that provider will be used ahead of any |
| 852 | * other providers that are unrelated). If multiple providers are an equal |
| 853 | * distance away, one will be chosen at random. |
| 854 | * |
| 855 | * Returns a pointer to the PCI device with a reference taken (use pci_dev_put |
| 856 | * to return the reference) or NULL if no compatible device is found. The |
| 857 | * found provider will also be assigned to the client list. |
| 858 | */ |
| 859 | struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) |
| 860 | { |
| 861 | struct pci_dev *pdev = NULL; |
| 862 | int distance; |
| 863 | int closest_distance = INT_MAX; |
| 864 | struct pci_dev **closest_pdevs; |
| 865 | int dev_cnt = 0; |
| 866 | const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs); |
| 867 | int i; |
| 868 | |
| 869 | closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
| 870 | if (!closest_pdevs) |
| 871 | return NULL; |
| 872 | |
| 873 | for_each_pci_dev(pdev) { |
| 874 | if (!pci_has_p2pmem(pdev)) |
| 875 | continue; |
| 876 | |
| 877 | distance = pci_p2pdma_distance_many(pdev, clients, |
| 878 | num_clients, false); |
| 879 | if (distance < 0 || distance > closest_distance) |
| 880 | continue; |
| 881 | |
| 882 | if (distance == closest_distance && dev_cnt >= max_devs) |
| 883 | continue; |
| 884 | |
| 885 | if (distance < closest_distance) { |
| 886 | for (i = 0; i < dev_cnt; i++) |
| 887 | pci_dev_put(dev: closest_pdevs[i]); |
| 888 | |
| 889 | dev_cnt = 0; |
| 890 | closest_distance = distance; |
| 891 | } |
| 892 | |
| 893 | closest_pdevs[dev_cnt++] = pci_dev_get(dev: pdev); |
| 894 | } |
| 895 | |
| 896 | if (dev_cnt) |
| 897 | pdev = pci_dev_get(dev: closest_pdevs[get_random_u32_below(ceil: dev_cnt)]); |
| 898 | |
| 899 | for (i = 0; i < dev_cnt; i++) |
| 900 | pci_dev_put(dev: closest_pdevs[i]); |
| 901 | |
| 902 | kfree(objp: closest_pdevs); |
| 903 | return pdev; |
| 904 | } |
| 905 | EXPORT_SYMBOL_GPL(pci_p2pmem_find_many); |
| 906 | |
| 907 | /** |
| 908 | * pci_alloc_p2pmem - allocate peer-to-peer DMA memory |
| 909 | * @pdev: the device to allocate memory from |
| 910 | * @size: number of bytes to allocate |
| 911 | * |
| 912 | * Returns the allocated memory or NULL on error. |
| 913 | */ |
| 914 | void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) |
| 915 | { |
| 916 | void *ret = NULL; |
| 917 | struct percpu_ref *ref; |
| 918 | struct pci_p2pdma *p2pdma; |
| 919 | |
| 920 | /* |
| 921 | * Pairs with synchronize_rcu() in pci_p2pdma_release() to |
| 922 | * ensure pdev->p2pdma is non-NULL for the duration of the |
| 923 | * read-lock. |
| 924 | */ |
| 925 | rcu_read_lock(); |
| 926 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 927 | if (unlikely(!p2pdma)) |
| 928 | goto out; |
| 929 | |
| 930 | ret = (void *)gen_pool_alloc_owner(pool: p2pdma->pool, size, owner: (void **) &ref); |
| 931 | if (!ret) |
| 932 | goto out; |
| 933 | |
| 934 | if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { |
| 935 | gen_pool_free(pool: p2pdma->pool, addr: (unsigned long) ret, size); |
| 936 | ret = NULL; |
| 937 | } |
| 938 | out: |
| 939 | rcu_read_unlock(); |
| 940 | return ret; |
| 941 | } |
| 942 | EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); |
| 943 | |
| 944 | /** |
| 945 | * pci_free_p2pmem - free peer-to-peer DMA memory |
| 946 | * @pdev: the device the memory was allocated from |
| 947 | * @addr: address of the memory that was allocated |
| 948 | * @size: number of bytes that were allocated |
| 949 | */ |
| 950 | void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) |
| 951 | { |
| 952 | struct percpu_ref *ref; |
| 953 | struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); |
| 954 | |
| 955 | gen_pool_free_owner(pool: p2pdma->pool, addr: (uintptr_t)addr, size, |
| 956 | owner: (void **) &ref); |
| 957 | percpu_ref_put(ref); |
| 958 | } |
| 959 | EXPORT_SYMBOL_GPL(pci_free_p2pmem); |
| 960 | |
| 961 | /** |
| 962 | * pci_p2pmem_virt_to_bus - return the PCI bus address for a given virtual |
| 963 | * address obtained with pci_alloc_p2pmem() |
| 964 | * @pdev: the device the memory was allocated from |
| 965 | * @addr: address of the memory that was allocated |
| 966 | */ |
| 967 | pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) |
| 968 | { |
| 969 | struct pci_p2pdma *p2pdma; |
| 970 | |
| 971 | if (!addr) |
| 972 | return 0; |
| 973 | |
| 974 | p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); |
| 975 | if (!p2pdma) |
| 976 | return 0; |
| 977 | |
| 978 | /* |
| 979 | * Note: when we added the memory to the pool we used the PCI |
| 980 | * bus address as the physical address. So gen_pool_virt_to_phys() |
| 981 | * actually returns the bus address despite the misleading name. |
| 982 | */ |
| 983 | return gen_pool_virt_to_phys(pool: p2pdma->pool, (unsigned long)addr); |
| 984 | } |
| 985 | EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus); |
| 986 | |
| 987 | /** |
| 988 | * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist |
| 989 | * @pdev: the device to allocate memory from |
| 990 | * @nents: the number of SG entries in the list |
| 991 | * @length: number of bytes to allocate |
| 992 | * |
| 993 | * Return: %NULL on error or &struct scatterlist pointer and @nents on success |
| 994 | */ |
| 995 | struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev, |
| 996 | unsigned int *nents, u32 length) |
| 997 | { |
| 998 | struct scatterlist *sg; |
| 999 | void *addr; |
| 1000 | |
| 1001 | sg = kmalloc(sizeof(*sg), GFP_KERNEL); |
| 1002 | if (!sg) |
| 1003 | return NULL; |
| 1004 | |
| 1005 | sg_init_table(sg, 1); |
| 1006 | |
| 1007 | addr = pci_alloc_p2pmem(pdev, length); |
| 1008 | if (!addr) |
| 1009 | goto out_free_sg; |
| 1010 | |
| 1011 | sg_set_buf(sg, buf: addr, buflen: length); |
| 1012 | *nents = 1; |
| 1013 | return sg; |
| 1014 | |
| 1015 | out_free_sg: |
| 1016 | kfree(objp: sg); |
| 1017 | return NULL; |
| 1018 | } |
| 1019 | EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl); |
| 1020 | |
| 1021 | /** |
| 1022 | * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl() |
| 1023 | * @pdev: the device to allocate memory from |
| 1024 | * @sgl: the allocated scatterlist |
| 1025 | */ |
| 1026 | void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl) |
| 1027 | { |
| 1028 | struct scatterlist *sg; |
| 1029 | int count; |
| 1030 | |
| 1031 | for_each_sg(sgl, sg, INT_MAX, count) { |
| 1032 | if (!sg) |
| 1033 | break; |
| 1034 | |
| 1035 | pci_free_p2pmem(pdev, sg_virt(sg), sg->length); |
| 1036 | } |
| 1037 | kfree(objp: sgl); |
| 1038 | } |
| 1039 | EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl); |
| 1040 | |
| 1041 | /** |
| 1042 | * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by |
| 1043 | * other devices with pci_p2pmem_find() |
| 1044 | * @pdev: the device with peer-to-peer DMA memory to publish |
| 1045 | * @publish: set to true to publish the memory, false to unpublish it |
| 1046 | * |
| 1047 | * Published memory can be used by other PCI device drivers for |
| 1048 | * peer-2-peer DMA operations. Non-published memory is reserved for |
| 1049 | * exclusive use of the device driver that registers the peer-to-peer |
| 1050 | * memory. |
| 1051 | */ |
| 1052 | void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) |
| 1053 | { |
| 1054 | struct pci_p2pdma *p2pdma; |
| 1055 | |
| 1056 | rcu_read_lock(); |
| 1057 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 1058 | if (p2pdma) |
| 1059 | p2pdma->p2pmem_published = publish; |
| 1060 | rcu_read_unlock(); |
| 1061 | } |
| 1062 | EXPORT_SYMBOL_GPL(pci_p2pmem_publish); |
| 1063 | |
| 1064 | /** |
| 1065 | * pci_p2pdma_map_type - Determine the mapping type for P2PDMA transfers |
| 1066 | * @provider: P2PDMA provider structure |
| 1067 | * @dev: Target device for the transfer |
| 1068 | * |
| 1069 | * Determines how peer-to-peer DMA transfers should be mapped between |
| 1070 | * the provider and the target device. The mapping type indicates whether |
| 1071 | * the transfer can be done directly through PCI switches or must go |
| 1072 | * through the host bridge. |
| 1073 | */ |
| 1074 | enum pci_p2pdma_map_type pci_p2pdma_map_type(struct p2pdma_provider *provider, |
| 1075 | struct device *dev) |
| 1076 | { |
| 1077 | enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; |
| 1078 | struct pci_dev *pdev = to_pci_dev(provider->owner); |
| 1079 | struct pci_dev *client; |
| 1080 | struct pci_p2pdma *p2pdma; |
| 1081 | int dist; |
| 1082 | |
| 1083 | if (!pdev->p2pdma) |
| 1084 | return PCI_P2PDMA_MAP_NOT_SUPPORTED; |
| 1085 | |
| 1086 | if (!dev_is_pci(dev)) |
| 1087 | return PCI_P2PDMA_MAP_NOT_SUPPORTED; |
| 1088 | |
| 1089 | client = to_pci_dev(dev); |
| 1090 | |
| 1091 | rcu_read_lock(); |
| 1092 | p2pdma = rcu_dereference(pdev->p2pdma); |
| 1093 | |
| 1094 | if (p2pdma) |
| 1095 | type = xa_to_value(entry: xa_load(&p2pdma->map_types, |
| 1096 | index: map_types_idx(client))); |
| 1097 | rcu_read_unlock(); |
| 1098 | |
| 1099 | if (type == PCI_P2PDMA_MAP_UNKNOWN) |
| 1100 | return calc_map_type_and_dist(provider: pdev, client, dist: &dist, verbose: true); |
| 1101 | |
| 1102 | return type; |
| 1103 | } |
| 1104 | |
| 1105 | void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, |
| 1106 | struct device *dev, struct page *page) |
| 1107 | { |
| 1108 | struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(pgmap: page_pgmap(page)); |
| 1109 | |
| 1110 | if (state->mem == p2p_pgmap->mem) |
| 1111 | return; |
| 1112 | |
| 1113 | state->mem = p2p_pgmap->mem; |
| 1114 | state->map = pci_p2pdma_map_type(provider: p2p_pgmap->mem, dev); |
| 1115 | } |
| 1116 | |
| 1117 | /** |
| 1118 | * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store |
| 1119 | * to enable p2pdma |
| 1120 | * @page: contents of the value to be stored |
| 1121 | * @p2p_dev: returns the PCI device that was selected to be used |
| 1122 | * (if one was specified in the stored value) |
| 1123 | * @use_p2pdma: returns whether to enable p2pdma or not |
| 1124 | * |
| 1125 | * Parses an attribute value to decide whether to enable p2pdma. |
| 1126 | * The value can select a PCI device (using its full BDF device |
| 1127 | * name) or a boolean (in any format kstrtobool() accepts). A false |
| 1128 | * value disables p2pdma, a true value expects the caller |
| 1129 | * to automatically find a compatible device and specifying a PCI device |
| 1130 | * expects the caller to use the specific provider. |
| 1131 | * |
| 1132 | * pci_p2pdma_enable_show() should be used as the show operation for |
| 1133 | * the attribute. |
| 1134 | * |
| 1135 | * Returns 0 on success |
| 1136 | */ |
| 1137 | int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, |
| 1138 | bool *use_p2pdma) |
| 1139 | { |
| 1140 | struct device *dev; |
| 1141 | |
| 1142 | dev = bus_find_device_by_name(bus: &pci_bus_type, NULL, name: page); |
| 1143 | if (dev) { |
| 1144 | *use_p2pdma = true; |
| 1145 | *p2p_dev = to_pci_dev(dev); |
| 1146 | |
| 1147 | if (!pci_has_p2pmem(pdev: *p2p_dev)) { |
| 1148 | pci_err(*p2p_dev, |
| 1149 | "PCI device has no peer-to-peer memory: %s\n" , |
| 1150 | page); |
| 1151 | pci_dev_put(dev: *p2p_dev); |
| 1152 | return -ENODEV; |
| 1153 | } |
| 1154 | |
| 1155 | return 0; |
| 1156 | } else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) { |
| 1157 | /* |
| 1158 | * If the user enters a PCI device that doesn't exist |
| 1159 | * like "0000:01:00.1", we don't want kstrtobool to think |
| 1160 | * it's a '0' when it's clearly not what the user wanted. |
| 1161 | * So we require 0's and 1's to be exactly one character. |
| 1162 | */ |
| 1163 | } else if (!kstrtobool(s: page, res: use_p2pdma)) { |
| 1164 | return 0; |
| 1165 | } |
| 1166 | |
| 1167 | pr_err("No such PCI device: %.*s\n" , (int)strcspn(page, "\n" ), page); |
| 1168 | return -ENODEV; |
| 1169 | } |
| 1170 | EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store); |
| 1171 | |
| 1172 | /** |
| 1173 | * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating |
| 1174 | * whether p2pdma is enabled |
| 1175 | * @page: contents of the stored value |
| 1176 | * @p2p_dev: the selected p2p device (NULL if no device is selected) |
| 1177 | * @use_p2pdma: whether p2pdma has been enabled |
| 1178 | * |
| 1179 | * Attributes that use pci_p2pdma_enable_store() should use this function |
| 1180 | * to show the value of the attribute. |
| 1181 | * |
| 1182 | * Returns 0 on success |
| 1183 | */ |
| 1184 | ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, |
| 1185 | bool use_p2pdma) |
| 1186 | { |
| 1187 | if (!use_p2pdma) |
| 1188 | return sprintf(buf: page, fmt: "0\n" ); |
| 1189 | |
| 1190 | if (!p2p_dev) |
| 1191 | return sprintf(buf: page, fmt: "1\n" ); |
| 1192 | |
| 1193 | return sprintf(buf: page, fmt: "%s\n" , pci_name(pdev: p2p_dev)); |
| 1194 | } |
| 1195 | EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show); |
| 1196 | |