| 1 | /* |
| 2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. |
| 3 | * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. |
| 4 | * |
| 5 | * This software is available to you under a choice of one of two |
| 6 | * licenses. You may choose to be licensed under the terms of the GNU |
| 7 | * General Public License (GPL) Version 2, available from the file |
| 8 | * COPYING in the main directory of this source tree, or the |
| 9 | * OpenIB.org BSD license below: |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or |
| 12 | * without modification, are permitted provided that the following |
| 13 | * conditions are met: |
| 14 | * |
| 15 | * - Redistributions of source code must retain the above |
| 16 | * copyright notice, this list of conditions and the following |
| 17 | * disclaimer. |
| 18 | * |
| 19 | * - Redistributions in binary form must reproduce the above |
| 20 | * copyright notice, this list of conditions and the following |
| 21 | * disclaimer in the documentation and/or other materials |
| 22 | * provided with the distribution. |
| 23 | * |
| 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 31 | * SOFTWARE. |
| 32 | */ |
| 33 | |
| 34 | #include <linux/slab.h> |
| 35 | #include <rdma/ib_user_verbs.h> |
| 36 | |
| 37 | #include "mlx4_ib.h" |
| 38 | |
| 39 | static u32 convert_access(int acc) |
| 40 | { |
| 41 | return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | |
| 42 | (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | |
| 43 | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | |
| 44 | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | |
| 45 | (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | |
| 46 | MLX4_PERM_LOCAL_READ; |
| 47 | } |
| 48 | |
| 49 | static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) |
| 50 | { |
| 51 | switch (type) { |
| 52 | case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; |
| 53 | case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; |
| 54 | default: return -1; |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) |
| 59 | { |
| 60 | struct mlx4_ib_mr *mr; |
| 61 | int err; |
| 62 | |
| 63 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
| 64 | if (!mr) |
| 65 | return ERR_PTR(error: -ENOMEM); |
| 66 | |
| 67 | err = mlx4_mr_alloc(dev: to_mdev(ibdev: pd->device)->dev, pd: to_mpd(ibpd: pd)->pdn, iova: 0, |
| 68 | size: ~0ull, access: convert_access(acc), npages: 0, page_shift: 0, mr: &mr->mmr); |
| 69 | if (err) |
| 70 | goto err_free; |
| 71 | |
| 72 | err = mlx4_mr_enable(dev: to_mdev(ibdev: pd->device)->dev, mr: &mr->mmr); |
| 73 | if (err) |
| 74 | goto err_mr; |
| 75 | |
| 76 | mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; |
| 77 | mr->umem = NULL; |
| 78 | |
| 79 | return &mr->ibmr; |
| 80 | |
| 81 | err_mr: |
| 82 | (void) mlx4_mr_free(dev: to_mdev(ibdev: pd->device)->dev, mr: &mr->mmr); |
| 83 | |
| 84 | err_free: |
| 85 | kfree(objp: mr); |
| 86 | |
| 87 | return ERR_PTR(error: err); |
| 88 | } |
| 89 | |
| 90 | int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, |
| 91 | struct ib_umem *umem) |
| 92 | { |
| 93 | struct ib_block_iter biter; |
| 94 | int err, i = 0; |
| 95 | u64 addr; |
| 96 | |
| 97 | rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) { |
| 98 | addr = rdma_block_iter_dma_address(biter: &biter); |
| 99 | err = mlx4_write_mtt(dev: dev->dev, mtt, start_index: i++, npages: 1, page_list: &addr); |
| 100 | if (err) |
| 101 | return err; |
| 102 | } |
| 103 | return 0; |
| 104 | } |
| 105 | |
| 106 | static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start, |
| 107 | u64 length, int access_flags) |
| 108 | { |
| 109 | /* |
| 110 | * Force registering the memory as writable if the underlying pages |
| 111 | * are writable. This is so rereg can change the access permissions |
| 112 | * from readable to writable without having to run through ib_umem_get |
| 113 | * again |
| 114 | */ |
| 115 | if (!ib_access_writable(access_flags)) { |
| 116 | unsigned long untagged_start = untagged_addr(start); |
| 117 | struct vm_area_struct *vma; |
| 118 | |
| 119 | mmap_read_lock(current->mm); |
| 120 | /* |
| 121 | * FIXME: Ideally this would iterate over all the vmas that |
| 122 | * cover the memory, but for now it requires a single vma to |
| 123 | * entirely cover the MR to support RO mappings. |
| 124 | */ |
| 125 | vma = find_vma(current->mm, addr: untagged_start); |
| 126 | if (vma && vma->vm_end >= untagged_start + length && |
| 127 | vma->vm_start <= untagged_start) { |
| 128 | if (vma->vm_flags & VM_WRITE) |
| 129 | access_flags |= IB_ACCESS_LOCAL_WRITE; |
| 130 | } else { |
| 131 | access_flags |= IB_ACCESS_LOCAL_WRITE; |
| 132 | } |
| 133 | |
| 134 | mmap_read_unlock(current->mm); |
| 135 | } |
| 136 | |
| 137 | return ib_umem_get(device, addr: start, size: length, access: access_flags); |
| 138 | } |
| 139 | |
| 140 | struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
| 141 | u64 virt_addr, int access_flags, |
| 142 | struct ib_dmah *dmah, |
| 143 | struct ib_udata *udata) |
| 144 | { |
| 145 | struct mlx4_ib_dev *dev = to_mdev(ibdev: pd->device); |
| 146 | struct mlx4_ib_mr *mr; |
| 147 | int shift; |
| 148 | int err; |
| 149 | int n; |
| 150 | |
| 151 | if (dmah) |
| 152 | return ERR_PTR(error: -EOPNOTSUPP); |
| 153 | |
| 154 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
| 155 | if (!mr) |
| 156 | return ERR_PTR(error: -ENOMEM); |
| 157 | |
| 158 | mr->umem = mlx4_get_umem_mr(device: pd->device, start, length, access_flags); |
| 159 | if (IS_ERR(ptr: mr->umem)) { |
| 160 | err = PTR_ERR(ptr: mr->umem); |
| 161 | goto err_free; |
| 162 | } |
| 163 | |
| 164 | shift = mlx4_ib_umem_calc_optimal_mtt_size(umem: mr->umem, start, num_of_mtts: &n); |
| 165 | if (shift < 0) { |
| 166 | err = shift; |
| 167 | goto err_umem; |
| 168 | } |
| 169 | |
| 170 | err = mlx4_mr_alloc(dev: dev->dev, pd: to_mpd(ibpd: pd)->pdn, iova: virt_addr, size: length, |
| 171 | access: convert_access(acc: access_flags), npages: n, page_shift: shift, mr: &mr->mmr); |
| 172 | if (err) |
| 173 | goto err_umem; |
| 174 | |
| 175 | err = mlx4_ib_umem_write_mtt(dev, mtt: &mr->mmr.mtt, umem: mr->umem); |
| 176 | if (err) |
| 177 | goto err_mr; |
| 178 | |
| 179 | err = mlx4_mr_enable(dev: dev->dev, mr: &mr->mmr); |
| 180 | if (err) |
| 181 | goto err_mr; |
| 182 | |
| 183 | mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; |
| 184 | mr->ibmr.page_size = 1U << shift; |
| 185 | |
| 186 | return &mr->ibmr; |
| 187 | |
| 188 | err_mr: |
| 189 | (void) mlx4_mr_free(dev: to_mdev(ibdev: pd->device)->dev, mr: &mr->mmr); |
| 190 | |
| 191 | err_umem: |
| 192 | ib_umem_release(umem: mr->umem); |
| 193 | |
| 194 | err_free: |
| 195 | kfree(objp: mr); |
| 196 | |
| 197 | return ERR_PTR(error: err); |
| 198 | } |
| 199 | |
| 200 | struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, |
| 201 | u64 length, u64 virt_addr, |
| 202 | int mr_access_flags, struct ib_pd *pd, |
| 203 | struct ib_udata *udata) |
| 204 | { |
| 205 | struct mlx4_ib_dev *dev = to_mdev(ibdev: mr->device); |
| 206 | struct mlx4_ib_mr *mmr = to_mmr(ibmr: mr); |
| 207 | struct mlx4_mpt_entry *mpt_entry; |
| 208 | struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; |
| 209 | int err; |
| 210 | |
| 211 | /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, |
| 212 | * we assume that the calls can't run concurrently. Otherwise, a |
| 213 | * race exists. |
| 214 | */ |
| 215 | err = mlx4_mr_hw_get_mpt(dev: dev->dev, mmr: &mmr->mmr, mpt_entry: &pmpt_entry); |
| 216 | if (err) |
| 217 | return ERR_PTR(error: err); |
| 218 | |
| 219 | if (flags & IB_MR_REREG_PD) { |
| 220 | err = mlx4_mr_hw_change_pd(dev: dev->dev, mpt_entry: *pmpt_entry, |
| 221 | pdn: to_mpd(ibpd: pd)->pdn); |
| 222 | |
| 223 | if (err) |
| 224 | goto release_mpt_entry; |
| 225 | } |
| 226 | |
| 227 | if (flags & IB_MR_REREG_ACCESS) { |
| 228 | if (ib_access_writable(access_flags: mr_access_flags) && |
| 229 | !mmr->umem->writable) { |
| 230 | err = -EPERM; |
| 231 | goto release_mpt_entry; |
| 232 | } |
| 233 | |
| 234 | err = mlx4_mr_hw_change_access(dev: dev->dev, mpt_entry: *pmpt_entry, |
| 235 | access: convert_access(acc: mr_access_flags)); |
| 236 | |
| 237 | if (err) |
| 238 | goto release_mpt_entry; |
| 239 | } |
| 240 | |
| 241 | if (flags & IB_MR_REREG_TRANS) { |
| 242 | int shift; |
| 243 | int n; |
| 244 | |
| 245 | mlx4_mr_rereg_mem_cleanup(dev: dev->dev, mr: &mmr->mmr); |
| 246 | ib_umem_release(umem: mmr->umem); |
| 247 | mmr->umem = mlx4_get_umem_mr(device: mr->device, start, length, |
| 248 | access_flags: mr_access_flags); |
| 249 | if (IS_ERR(ptr: mmr->umem)) { |
| 250 | err = PTR_ERR(ptr: mmr->umem); |
| 251 | /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ |
| 252 | mmr->umem = NULL; |
| 253 | goto release_mpt_entry; |
| 254 | } |
| 255 | n = ib_umem_num_dma_blocks(umem: mmr->umem, PAGE_SIZE); |
| 256 | shift = PAGE_SHIFT; |
| 257 | |
| 258 | err = mlx4_mr_rereg_mem_write(dev: dev->dev, mr: &mmr->mmr, |
| 259 | iova: virt_addr, size: length, npages: n, page_shift: shift, |
| 260 | mpt_entry: *pmpt_entry); |
| 261 | if (err) { |
| 262 | ib_umem_release(umem: mmr->umem); |
| 263 | goto release_mpt_entry; |
| 264 | } |
| 265 | mmr->mmr.iova = virt_addr; |
| 266 | mmr->mmr.size = length; |
| 267 | |
| 268 | err = mlx4_ib_umem_write_mtt(dev, mtt: &mmr->mmr.mtt, umem: mmr->umem); |
| 269 | if (err) { |
| 270 | mlx4_mr_rereg_mem_cleanup(dev: dev->dev, mr: &mmr->mmr); |
| 271 | ib_umem_release(umem: mmr->umem); |
| 272 | goto release_mpt_entry; |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | /* If we couldn't transfer the MR to the HCA, just remember to |
| 277 | * return a failure. But dereg_mr will free the resources. |
| 278 | */ |
| 279 | err = mlx4_mr_hw_write_mpt(dev: dev->dev, mmr: &mmr->mmr, mpt_entry: pmpt_entry); |
| 280 | if (!err && flags & IB_MR_REREG_ACCESS) |
| 281 | mmr->mmr.access = mr_access_flags; |
| 282 | |
| 283 | release_mpt_entry: |
| 284 | mlx4_mr_hw_put_mpt(dev: dev->dev, mpt_entry: pmpt_entry); |
| 285 | if (err) |
| 286 | return ERR_PTR(error: err); |
| 287 | return NULL; |
| 288 | } |
| 289 | |
| 290 | static int |
| 291 | mlx4_alloc_priv_pages(struct ib_device *device, |
| 292 | struct mlx4_ib_mr *mr, |
| 293 | int max_pages) |
| 294 | { |
| 295 | int ret; |
| 296 | |
| 297 | /* Ensure that size is aligned to DMA cacheline |
| 298 | * requirements. |
| 299 | * max_pages is limited to MLX4_MAX_FAST_REG_PAGES |
| 300 | * so page_map_size will never cross PAGE_SIZE. |
| 301 | */ |
| 302 | mr->page_map_size = roundup(max_pages * sizeof(u64), |
| 303 | MLX4_MR_PAGES_ALIGN); |
| 304 | |
| 305 | /* Prevent cross page boundary allocation. */ |
| 306 | mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL); |
| 307 | if (!mr->pages) |
| 308 | return -ENOMEM; |
| 309 | |
| 310 | mr->page_map = dma_map_single(device->dev.parent, mr->pages, |
| 311 | mr->page_map_size, DMA_TO_DEVICE); |
| 312 | |
| 313 | if (dma_mapping_error(dev: device->dev.parent, dma_addr: mr->page_map)) { |
| 314 | ret = -ENOMEM; |
| 315 | goto err; |
| 316 | } |
| 317 | |
| 318 | return 0; |
| 319 | |
| 320 | err: |
| 321 | free_page((unsigned long)mr->pages); |
| 322 | return ret; |
| 323 | } |
| 324 | |
| 325 | static void |
| 326 | mlx4_free_priv_pages(struct mlx4_ib_mr *mr) |
| 327 | { |
| 328 | if (mr->pages) { |
| 329 | struct ib_device *device = mr->ibmr.device; |
| 330 | |
| 331 | dma_unmap_single(device->dev.parent, mr->page_map, |
| 332 | mr->page_map_size, DMA_TO_DEVICE); |
| 333 | free_page((unsigned long)mr->pages); |
| 334 | mr->pages = NULL; |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) |
| 339 | { |
| 340 | struct mlx4_ib_mr *mr = to_mmr(ibmr); |
| 341 | int ret; |
| 342 | |
| 343 | mlx4_free_priv_pages(mr); |
| 344 | |
| 345 | ret = mlx4_mr_free(dev: to_mdev(ibdev: ibmr->device)->dev, mr: &mr->mmr); |
| 346 | if (ret) |
| 347 | return ret; |
| 348 | if (mr->umem) |
| 349 | ib_umem_release(umem: mr->umem); |
| 350 | kfree(objp: mr); |
| 351 | |
| 352 | return 0; |
| 353 | } |
| 354 | |
| 355 | int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) |
| 356 | { |
| 357 | struct mlx4_ib_dev *dev = to_mdev(ibdev: ibmw->device); |
| 358 | struct mlx4_ib_mw *mw = to_mmw(ibmw); |
| 359 | int err; |
| 360 | |
| 361 | err = mlx4_mw_alloc(dev: dev->dev, pd: to_mpd(ibpd: ibmw->pd)->pdn, |
| 362 | type: to_mlx4_type(type: ibmw->type), mw: &mw->mmw); |
| 363 | if (err) |
| 364 | return err; |
| 365 | |
| 366 | err = mlx4_mw_enable(dev: dev->dev, mw: &mw->mmw); |
| 367 | if (err) |
| 368 | goto err_mw; |
| 369 | |
| 370 | ibmw->rkey = mw->mmw.key; |
| 371 | return 0; |
| 372 | |
| 373 | err_mw: |
| 374 | mlx4_mw_free(dev: dev->dev, mw: &mw->mmw); |
| 375 | return err; |
| 376 | } |
| 377 | |
| 378 | int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) |
| 379 | { |
| 380 | struct mlx4_ib_mw *mw = to_mmw(ibmw); |
| 381 | |
| 382 | mlx4_mw_free(dev: to_mdev(ibdev: ibmw->device)->dev, mw: &mw->mmw); |
| 383 | return 0; |
| 384 | } |
| 385 | |
| 386 | struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, |
| 387 | u32 max_num_sg) |
| 388 | { |
| 389 | struct mlx4_ib_dev *dev = to_mdev(ibdev: pd->device); |
| 390 | struct mlx4_ib_mr *mr; |
| 391 | int err; |
| 392 | |
| 393 | if (mr_type != IB_MR_TYPE_MEM_REG || |
| 394 | max_num_sg > MLX4_MAX_FAST_REG_PAGES) |
| 395 | return ERR_PTR(error: -EINVAL); |
| 396 | |
| 397 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
| 398 | if (!mr) |
| 399 | return ERR_PTR(error: -ENOMEM); |
| 400 | |
| 401 | err = mlx4_mr_alloc(dev: dev->dev, pd: to_mpd(ibpd: pd)->pdn, iova: 0, size: 0, access: 0, |
| 402 | npages: max_num_sg, page_shift: 0, mr: &mr->mmr); |
| 403 | if (err) |
| 404 | goto err_free; |
| 405 | |
| 406 | err = mlx4_alloc_priv_pages(device: pd->device, mr, max_pages: max_num_sg); |
| 407 | if (err) |
| 408 | goto err_free_mr; |
| 409 | |
| 410 | mr->max_pages = max_num_sg; |
| 411 | err = mlx4_mr_enable(dev: dev->dev, mr: &mr->mmr); |
| 412 | if (err) |
| 413 | goto err_free_pl; |
| 414 | |
| 415 | mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; |
| 416 | mr->umem = NULL; |
| 417 | |
| 418 | return &mr->ibmr; |
| 419 | |
| 420 | err_free_pl: |
| 421 | mr->ibmr.device = pd->device; |
| 422 | mlx4_free_priv_pages(mr); |
| 423 | err_free_mr: |
| 424 | (void) mlx4_mr_free(dev: dev->dev, mr: &mr->mmr); |
| 425 | err_free: |
| 426 | kfree(objp: mr); |
| 427 | return ERR_PTR(error: err); |
| 428 | } |
| 429 | |
| 430 | static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) |
| 431 | { |
| 432 | struct mlx4_ib_mr *mr = to_mmr(ibmr); |
| 433 | |
| 434 | if (unlikely(mr->npages == mr->max_pages)) |
| 435 | return -ENOMEM; |
| 436 | |
| 437 | mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); |
| 438 | |
| 439 | return 0; |
| 440 | } |
| 441 | |
| 442 | int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, |
| 443 | unsigned int *sg_offset) |
| 444 | { |
| 445 | struct mlx4_ib_mr *mr = to_mmr(ibmr); |
| 446 | int rc; |
| 447 | |
| 448 | mr->npages = 0; |
| 449 | |
| 450 | ib_dma_sync_single_for_cpu(dev: ibmr->device, addr: mr->page_map, |
| 451 | size: mr->page_map_size, dir: DMA_TO_DEVICE); |
| 452 | |
| 453 | rc = ib_sg_to_pages(mr: ibmr, sgl: sg, sg_nents, sg_offset, set_page: mlx4_set_page); |
| 454 | |
| 455 | ib_dma_sync_single_for_device(dev: ibmr->device, addr: mr->page_map, |
| 456 | size: mr->page_map_size, dir: DMA_TO_DEVICE); |
| 457 | |
| 458 | return rc; |
| 459 | } |
| 460 | |