1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Copyright 2016-2022 HabanaLabs, Ltd. |
5 | * All Rights Reserved. |
6 | */ |
7 | |
8 | #include <uapi/drm/habanalabs_accel.h> |
9 | #include "habanalabs.h" |
10 | #include "../include/hw_ip/mmu/mmu_general.h" |
11 | |
12 | #include <linux/uaccess.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/vmalloc.h> |
15 | #include <linux/pci-p2pdma.h> |
16 | |
17 | MODULE_IMPORT_NS(DMA_BUF); |
18 | |
19 | #define HL_MMU_DEBUG 0 |
20 | |
21 | /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */ |
22 | #define DRAM_POOL_PAGE_SIZE SZ_8M |
23 | |
24 | #define MEM_HANDLE_INVALID ULONG_MAX |
25 | |
26 | static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, |
27 | struct hl_mem_in *args, u64 *handle); |
28 | |
29 | static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size) |
30 | { |
31 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
32 | u64 psize; |
33 | |
34 | /* |
35 | * for ASIC that supports setting the allocation page size by user we will address |
36 | * user's choice only if it is not 0 (as 0 means taking the default page size) |
37 | */ |
38 | if (prop->supports_user_set_page_size && args->alloc.page_size) { |
39 | psize = args->alloc.page_size; |
40 | |
41 | if (!is_power_of_2(n: psize)) { |
42 | dev_err(hdev->dev, "user page size (%#llx) is not power of 2\n" , psize); |
43 | return -EINVAL; |
44 | } |
45 | } else { |
46 | psize = prop->device_mem_alloc_default_page_size; |
47 | } |
48 | |
49 | *page_size = psize; |
50 | |
51 | return 0; |
52 | } |
53 | |
54 | /* |
55 | * The va ranges in context object contain a list with the available chunks of |
56 | * device virtual memory. |
57 | * There is one range for host allocations and one for DRAM allocations. |
58 | * |
59 | * On initialization each range contains one chunk of all of its available |
60 | * virtual range which is a half of the total device virtual range. |
61 | * |
62 | * On each mapping of physical pages, a suitable virtual range chunk (with a |
63 | * minimum size) is selected from the list. If the chunk size equals the |
64 | * requested size, the chunk is returned. Otherwise, the chunk is split into |
65 | * two chunks - one to return as result and a remainder to stay in the list. |
66 | * |
67 | * On each Unmapping of a virtual address, the relevant virtual chunk is |
68 | * returned to the list. The chunk is added to the list and if its edges match |
69 | * the edges of the adjacent chunks (means a contiguous chunk can be created), |
70 | * the chunks are merged. |
71 | * |
72 | * On finish, the list is checked to have only one chunk of all the relevant |
73 | * virtual range (which is a half of the device total virtual range). |
74 | * If not (means not all mappings were unmapped), a warning is printed. |
75 | */ |
76 | |
77 | /* |
78 | * alloc_device_memory() - allocate device memory. |
79 | * @ctx: pointer to the context structure. |
80 | * @args: host parameters containing the requested size. |
81 | * @ret_handle: result handle. |
82 | * |
83 | * This function does the following: |
84 | * - Allocate the requested size rounded up to 'dram_page_size' pages. |
85 | * - Return unique handle for later map/unmap/free. |
86 | */ |
87 | static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, |
88 | u32 *ret_handle) |
89 | { |
90 | struct hl_device *hdev = ctx->hdev; |
91 | struct hl_vm *vm = &hdev->vm; |
92 | struct hl_vm_phys_pg_pack *phys_pg_pack; |
93 | u64 paddr = 0, total_size, num_pgs, i; |
94 | u32 num_curr_pgs, page_size; |
95 | bool contiguous; |
96 | int handle, rc; |
97 | |
98 | num_curr_pgs = 0; |
99 | |
100 | rc = set_alloc_page_size(hdev, args, page_size: &page_size); |
101 | if (rc) |
102 | return rc; |
103 | |
104 | num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size); |
105 | total_size = num_pgs * page_size; |
106 | |
107 | if (!total_size) { |
108 | dev_err(hdev->dev, "Cannot allocate 0 bytes\n" ); |
109 | return -EINVAL; |
110 | } |
111 | |
112 | contiguous = args->flags & HL_MEM_CONTIGUOUS; |
113 | |
114 | if (contiguous) { |
115 | if (is_power_of_2(n: page_size)) |
116 | paddr = (uintptr_t) gen_pool_dma_alloc_align(pool: vm->dram_pg_pool, |
117 | size: total_size, NULL, align: page_size); |
118 | else |
119 | paddr = gen_pool_alloc(pool: vm->dram_pg_pool, size: total_size); |
120 | if (!paddr) { |
121 | dev_err(hdev->dev, |
122 | "Cannot allocate %llu contiguous pages with total size of %llu\n" , |
123 | num_pgs, total_size); |
124 | return -ENOMEM; |
125 | } |
126 | } |
127 | |
128 | phys_pg_pack = kzalloc(size: sizeof(*phys_pg_pack), GFP_KERNEL); |
129 | if (!phys_pg_pack) { |
130 | rc = -ENOMEM; |
131 | goto pages_pack_err; |
132 | } |
133 | |
134 | phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; |
135 | phys_pg_pack->asid = ctx->asid; |
136 | phys_pg_pack->npages = num_pgs; |
137 | phys_pg_pack->page_size = page_size; |
138 | phys_pg_pack->total_size = total_size; |
139 | phys_pg_pack->flags = args->flags; |
140 | phys_pg_pack->contiguous = contiguous; |
141 | |
142 | phys_pg_pack->pages = kvmalloc_array(n: num_pgs, size: sizeof(u64), GFP_KERNEL); |
143 | if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { |
144 | rc = -ENOMEM; |
145 | goto pages_arr_err; |
146 | } |
147 | |
148 | if (phys_pg_pack->contiguous) { |
149 | for (i = 0 ; i < num_pgs ; i++) |
150 | phys_pg_pack->pages[i] = paddr + i * page_size; |
151 | } else { |
152 | for (i = 0 ; i < num_pgs ; i++) { |
153 | if (is_power_of_2(n: page_size)) |
154 | phys_pg_pack->pages[i] = |
155 | (uintptr_t)gen_pool_dma_alloc_align(pool: vm->dram_pg_pool, |
156 | size: page_size, NULL, |
157 | align: page_size); |
158 | else |
159 | phys_pg_pack->pages[i] = gen_pool_alloc(pool: vm->dram_pg_pool, |
160 | size: page_size); |
161 | |
162 | if (!phys_pg_pack->pages[i]) { |
163 | dev_err(hdev->dev, |
164 | "Cannot allocate device memory (out of memory)\n" ); |
165 | rc = -ENOMEM; |
166 | goto page_err; |
167 | } |
168 | |
169 | num_curr_pgs++; |
170 | } |
171 | } |
172 | |
173 | spin_lock(lock: &vm->idr_lock); |
174 | handle = idr_alloc(&vm->phys_pg_pack_handles, ptr: phys_pg_pack, start: 1, end: 0, |
175 | GFP_ATOMIC); |
176 | spin_unlock(lock: &vm->idr_lock); |
177 | |
178 | if (handle < 0) { |
179 | dev_err(hdev->dev, "Failed to get handle for page\n" ); |
180 | rc = -EFAULT; |
181 | goto idr_err; |
182 | } |
183 | |
184 | for (i = 0 ; i < num_pgs ; i++) |
185 | kref_get(kref: &vm->dram_pg_pool_refcount); |
186 | |
187 | phys_pg_pack->handle = handle; |
188 | |
189 | atomic64_add(i: phys_pg_pack->total_size, v: &ctx->dram_phys_mem); |
190 | atomic64_add(i: phys_pg_pack->total_size, v: &hdev->dram_used_mem); |
191 | |
192 | *ret_handle = handle; |
193 | |
194 | return 0; |
195 | |
196 | idr_err: |
197 | page_err: |
198 | if (!phys_pg_pack->contiguous) |
199 | for (i = 0 ; i < num_curr_pgs ; i++) |
200 | gen_pool_free(pool: vm->dram_pg_pool, addr: phys_pg_pack->pages[i], |
201 | size: page_size); |
202 | |
203 | kvfree(addr: phys_pg_pack->pages); |
204 | pages_arr_err: |
205 | kfree(objp: phys_pg_pack); |
206 | pages_pack_err: |
207 | if (contiguous) |
208 | gen_pool_free(pool: vm->dram_pg_pool, addr: paddr, size: total_size); |
209 | |
210 | return rc; |
211 | } |
212 | |
213 | /** |
214 | * dma_map_host_va() - DMA mapping of the given host virtual address. |
215 | * @hdev: habanalabs device structure. |
216 | * @addr: the host virtual address of the memory area. |
217 | * @size: the size of the memory area. |
218 | * @p_userptr: pointer to result userptr structure. |
219 | * |
220 | * This function does the following: |
221 | * - Allocate userptr structure. |
222 | * - Pin the given host memory using the userptr structure. |
223 | * - Perform DMA mapping to have the DMA addresses of the pages. |
224 | */ |
225 | static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, |
226 | struct hl_userptr **p_userptr) |
227 | { |
228 | struct hl_userptr *userptr; |
229 | int rc; |
230 | |
231 | userptr = kzalloc(size: sizeof(*userptr), GFP_KERNEL); |
232 | if (!userptr) { |
233 | rc = -ENOMEM; |
234 | goto userptr_err; |
235 | } |
236 | |
237 | rc = hl_pin_host_memory(hdev, addr, size, userptr); |
238 | if (rc) |
239 | goto pin_err; |
240 | |
241 | userptr->dma_mapped = true; |
242 | userptr->dir = DMA_BIDIRECTIONAL; |
243 | userptr->vm_type = VM_TYPE_USERPTR; |
244 | |
245 | *p_userptr = userptr; |
246 | |
247 | rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); |
248 | if (rc) { |
249 | dev_err(hdev->dev, "failed to map sgt with DMA region\n" ); |
250 | goto dma_map_err; |
251 | } |
252 | |
253 | return 0; |
254 | |
255 | dma_map_err: |
256 | hl_unpin_host_memory(hdev, userptr); |
257 | pin_err: |
258 | kfree(objp: userptr); |
259 | userptr_err: |
260 | |
261 | return rc; |
262 | } |
263 | |
264 | /** |
265 | * dma_unmap_host_va() - DMA unmapping of the given host virtual address. |
266 | * @hdev: habanalabs device structure. |
267 | * @userptr: userptr to free. |
268 | * |
269 | * This function does the following: |
270 | * - Unpins the physical pages. |
271 | * - Frees the userptr structure. |
272 | */ |
273 | static void dma_unmap_host_va(struct hl_device *hdev, |
274 | struct hl_userptr *userptr) |
275 | { |
276 | hl_unpin_host_memory(hdev, userptr); |
277 | kfree(objp: userptr); |
278 | } |
279 | |
280 | /** |
281 | * dram_pg_pool_do_release() - free DRAM pages pool |
282 | * @ref: pointer to reference object. |
283 | * |
284 | * This function does the following: |
285 | * - Frees the idr structure of physical pages handles. |
286 | * - Frees the generic pool of DRAM physical pages. |
287 | */ |
288 | static void dram_pg_pool_do_release(struct kref *ref) |
289 | { |
290 | struct hl_vm *vm = container_of(ref, struct hl_vm, |
291 | dram_pg_pool_refcount); |
292 | |
293 | /* |
294 | * free the idr here as only here we know for sure that there are no |
295 | * allocated physical pages and hence there are no handles in use |
296 | */ |
297 | idr_destroy(&vm->phys_pg_pack_handles); |
298 | gen_pool_destroy(vm->dram_pg_pool); |
299 | } |
300 | |
301 | /** |
302 | * free_phys_pg_pack() - free physical page pack. |
303 | * @hdev: habanalabs device structure. |
304 | * @phys_pg_pack: physical page pack to free. |
305 | * |
306 | * This function does the following: |
307 | * - For DRAM memory only |
308 | * - iterate over the pack, free each physical block structure by |
309 | * returning it to the general pool. |
310 | * - Free the hl_vm_phys_pg_pack structure. |
311 | */ |
312 | static void free_phys_pg_pack(struct hl_device *hdev, |
313 | struct hl_vm_phys_pg_pack *phys_pg_pack) |
314 | { |
315 | struct hl_vm *vm = &hdev->vm; |
316 | u64 i; |
317 | |
318 | if (phys_pg_pack->created_from_userptr) |
319 | goto end; |
320 | |
321 | if (phys_pg_pack->contiguous) { |
322 | gen_pool_free(pool: vm->dram_pg_pool, addr: phys_pg_pack->pages[0], |
323 | size: phys_pg_pack->total_size); |
324 | |
325 | for (i = 0; i < phys_pg_pack->npages ; i++) |
326 | kref_put(kref: &vm->dram_pg_pool_refcount, |
327 | release: dram_pg_pool_do_release); |
328 | } else { |
329 | for (i = 0 ; i < phys_pg_pack->npages ; i++) { |
330 | gen_pool_free(pool: vm->dram_pg_pool, |
331 | addr: phys_pg_pack->pages[i], |
332 | size: phys_pg_pack->page_size); |
333 | kref_put(kref: &vm->dram_pg_pool_refcount, |
334 | release: dram_pg_pool_do_release); |
335 | } |
336 | } |
337 | |
338 | end: |
339 | kvfree(addr: phys_pg_pack->pages); |
340 | kfree(objp: phys_pg_pack); |
341 | |
342 | return; |
343 | } |
344 | |
345 | /** |
346 | * free_device_memory() - free device memory. |
347 | * @ctx: pointer to the context structure. |
348 | * @args: host parameters containing the requested size. |
349 | * |
350 | * This function does the following: |
351 | * - Free the device memory related to the given handle. |
352 | */ |
353 | static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) |
354 | { |
355 | struct hl_device *hdev = ctx->hdev; |
356 | struct hl_vm *vm = &hdev->vm; |
357 | struct hl_vm_phys_pg_pack *phys_pg_pack; |
358 | u32 handle = args->free.handle; |
359 | |
360 | spin_lock(lock: &vm->idr_lock); |
361 | phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, id: handle); |
362 | if (!phys_pg_pack) { |
363 | spin_unlock(lock: &vm->idr_lock); |
364 | dev_err(hdev->dev, "free device memory failed, no match for handle %u\n" , handle); |
365 | return -EINVAL; |
366 | } |
367 | |
368 | if (atomic_read(v: &phys_pg_pack->mapping_cnt) > 0) { |
369 | spin_unlock(lock: &vm->idr_lock); |
370 | dev_err(hdev->dev, "handle %u is mapped, cannot free\n" , handle); |
371 | return -EINVAL; |
372 | } |
373 | |
374 | /* must remove from idr before the freeing of the physical pages as the refcount of the pool |
375 | * is also the trigger of the idr destroy |
376 | */ |
377 | idr_remove(&vm->phys_pg_pack_handles, id: handle); |
378 | spin_unlock(lock: &vm->idr_lock); |
379 | |
380 | atomic64_sub(i: phys_pg_pack->total_size, v: &ctx->dram_phys_mem); |
381 | atomic64_sub(i: phys_pg_pack->total_size, v: &hdev->dram_used_mem); |
382 | |
383 | free_phys_pg_pack(hdev, phys_pg_pack); |
384 | |
385 | return 0; |
386 | } |
387 | |
388 | /** |
389 | * clear_va_list_locked() - free virtual addresses list. |
390 | * @hdev: habanalabs device structure. |
391 | * @va_list: list of virtual addresses to free. |
392 | * |
393 | * This function does the following: |
394 | * - Iterate over the list and free each virtual addresses block. |
395 | * |
396 | * This function should be called only when va_list lock is taken. |
397 | */ |
398 | static void clear_va_list_locked(struct hl_device *hdev, |
399 | struct list_head *va_list) |
400 | { |
401 | struct hl_vm_va_block *va_block, *tmp; |
402 | |
403 | list_for_each_entry_safe(va_block, tmp, va_list, node) { |
404 | list_del(entry: &va_block->node); |
405 | kfree(objp: va_block); |
406 | } |
407 | } |
408 | |
409 | /** |
410 | * print_va_list_locked() - print virtual addresses list. |
411 | * @hdev: habanalabs device structure. |
412 | * @va_list: list of virtual addresses to print. |
413 | * |
414 | * This function does the following: |
415 | * - Iterate over the list and print each virtual addresses block. |
416 | * |
417 | * This function should be called only when va_list lock is taken. |
418 | */ |
419 | static void print_va_list_locked(struct hl_device *hdev, |
420 | struct list_head *va_list) |
421 | { |
422 | #if HL_MMU_DEBUG |
423 | struct hl_vm_va_block *va_block; |
424 | |
425 | dev_dbg(hdev->dev, "print va list:\n" ); |
426 | |
427 | list_for_each_entry(va_block, va_list, node) |
428 | dev_dbg(hdev->dev, |
429 | "va block, start: 0x%llx, end: 0x%llx, size: %llu\n" , |
430 | va_block->start, va_block->end, va_block->size); |
431 | #endif |
432 | } |
433 | |
434 | /** |
435 | * merge_va_blocks_locked() - merge a virtual block if possible. |
436 | * @hdev: pointer to the habanalabs device structure. |
437 | * @va_list: pointer to the virtual addresses block list. |
438 | * @va_block: virtual block to merge with adjacent blocks. |
439 | * |
440 | * This function does the following: |
441 | * - Merge the given blocks with the adjacent blocks if their virtual ranges |
442 | * create a contiguous virtual range. |
443 | * |
444 | * This Function should be called only when va_list lock is taken. |
445 | */ |
446 | static void merge_va_blocks_locked(struct hl_device *hdev, |
447 | struct list_head *va_list, struct hl_vm_va_block *va_block) |
448 | { |
449 | struct hl_vm_va_block *prev, *next; |
450 | |
451 | prev = list_prev_entry(va_block, node); |
452 | if (&prev->node != va_list && prev->end + 1 == va_block->start) { |
453 | prev->end = va_block->end; |
454 | prev->size = prev->end - prev->start + 1; |
455 | list_del(entry: &va_block->node); |
456 | kfree(objp: va_block); |
457 | va_block = prev; |
458 | } |
459 | |
460 | next = list_next_entry(va_block, node); |
461 | if (&next->node != va_list && va_block->end + 1 == next->start) { |
462 | next->start = va_block->start; |
463 | next->size = next->end - next->start + 1; |
464 | list_del(entry: &va_block->node); |
465 | kfree(objp: va_block); |
466 | } |
467 | } |
468 | |
469 | /** |
470 | * add_va_block_locked() - add a virtual block to the virtual addresses list. |
471 | * @hdev: pointer to the habanalabs device structure. |
472 | * @va_list: pointer to the virtual addresses block list. |
473 | * @start: start virtual address. |
474 | * @end: end virtual address. |
475 | * |
476 | * This function does the following: |
477 | * - Add the given block to the virtual blocks list and merge with other blocks |
478 | * if a contiguous virtual block can be created. |
479 | * |
480 | * This Function should be called only when va_list lock is taken. |
481 | */ |
482 | static int add_va_block_locked(struct hl_device *hdev, |
483 | struct list_head *va_list, u64 start, u64 end) |
484 | { |
485 | struct hl_vm_va_block *va_block, *res = NULL; |
486 | u64 size = end - start + 1; |
487 | |
488 | print_va_list_locked(hdev, va_list); |
489 | |
490 | list_for_each_entry(va_block, va_list, node) { |
491 | /* TODO: remove upon matureness */ |
492 | if (hl_mem_area_crosses_range(address: start, size, range_start_address: va_block->start, |
493 | range_end_address: va_block->end)) { |
494 | dev_err(hdev->dev, |
495 | "block crossing ranges at start 0x%llx, end 0x%llx\n" , |
496 | va_block->start, va_block->end); |
497 | return -EINVAL; |
498 | } |
499 | |
500 | if (va_block->end < start) |
501 | res = va_block; |
502 | } |
503 | |
504 | va_block = kmalloc(size: sizeof(*va_block), GFP_KERNEL); |
505 | if (!va_block) |
506 | return -ENOMEM; |
507 | |
508 | va_block->start = start; |
509 | va_block->end = end; |
510 | va_block->size = size; |
511 | |
512 | if (!res) |
513 | list_add(new: &va_block->node, head: va_list); |
514 | else |
515 | list_add(new: &va_block->node, head: &res->node); |
516 | |
517 | merge_va_blocks_locked(hdev, va_list, va_block); |
518 | |
519 | print_va_list_locked(hdev, va_list); |
520 | |
521 | return 0; |
522 | } |
523 | |
524 | /** |
525 | * add_va_block() - wrapper for add_va_block_locked. |
526 | * @hdev: pointer to the habanalabs device structure. |
527 | * @va_range: pointer to the virtual addresses range object. |
528 | * @start: start virtual address. |
529 | * @end: end virtual address. |
530 | * |
531 | * This function does the following: |
532 | * - Takes the list lock and calls add_va_block_locked. |
533 | */ |
534 | static inline int add_va_block(struct hl_device *hdev, |
535 | struct hl_va_range *va_range, u64 start, u64 end) |
536 | { |
537 | int rc; |
538 | |
539 | mutex_lock(&va_range->lock); |
540 | rc = add_va_block_locked(hdev, va_list: &va_range->list, start, end); |
541 | mutex_unlock(lock: &va_range->lock); |
542 | |
543 | return rc; |
544 | } |
545 | |
546 | /** |
547 | * is_hint_crossing_range() - check if hint address crossing specified reserved. |
548 | * @range_type: virtual space range type. |
549 | * @start_addr: start virtual address. |
550 | * @size: block size. |
551 | * @prop: asic properties structure to retrieve reserved ranges from. |
552 | */ |
553 | static inline bool is_hint_crossing_range(enum hl_va_range_type range_type, |
554 | u64 start_addr, u32 size, struct asic_fixed_properties *prop) { |
555 | bool range_cross; |
556 | |
557 | if (range_type == HL_VA_RANGE_TYPE_DRAM) |
558 | range_cross = |
559 | hl_mem_area_crosses_range(address: start_addr, size, |
560 | range_start_address: prop->hints_dram_reserved_va_range.start_addr, |
561 | range_end_address: prop->hints_dram_reserved_va_range.end_addr); |
562 | else if (range_type == HL_VA_RANGE_TYPE_HOST) |
563 | range_cross = |
564 | hl_mem_area_crosses_range(address: start_addr, size, |
565 | range_start_address: prop->hints_host_reserved_va_range.start_addr, |
566 | range_end_address: prop->hints_host_reserved_va_range.end_addr); |
567 | else |
568 | range_cross = |
569 | hl_mem_area_crosses_range(address: start_addr, size, |
570 | range_start_address: prop->hints_host_hpage_reserved_va_range.start_addr, |
571 | range_end_address: prop->hints_host_hpage_reserved_va_range.end_addr); |
572 | |
573 | return range_cross; |
574 | } |
575 | |
576 | /** |
577 | * get_va_block() - get a virtual block for the given size and alignment. |
578 | * |
579 | * @hdev: pointer to the habanalabs device structure. |
580 | * @va_range: pointer to the virtual addresses range. |
581 | * @size: requested block size. |
582 | * @hint_addr: hint for requested address by the user. |
583 | * @va_block_align: required alignment of the virtual block start address. |
584 | * @range_type: va range type (host, dram) |
585 | * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT |
586 | * |
587 | * This function does the following: |
588 | * - Iterate on the virtual block list to find a suitable virtual block for the |
589 | * given size, hint address and alignment. |
590 | * - Reserve the requested block and update the list. |
591 | * - Return the start address of the virtual block. |
592 | */ |
593 | static u64 get_va_block(struct hl_device *hdev, |
594 | struct hl_va_range *va_range, |
595 | u64 size, u64 hint_addr, u32 va_block_align, |
596 | enum hl_va_range_type range_type, |
597 | u32 flags) |
598 | { |
599 | struct hl_vm_va_block *va_block, *new_va_block = NULL; |
600 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
601 | u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end, |
602 | align_mask, reserved_valid_start = 0, reserved_valid_size = 0, |
603 | dram_hint_mask = prop->dram_hints_align_mask; |
604 | bool add_prev = false; |
605 | bool is_align_pow_2 = is_power_of_2(n: va_range->page_size); |
606 | bool is_hint_dram_addr = hl_is_dram_va(hdev, virt_addr: hint_addr); |
607 | bool force_hint = flags & HL_MEM_FORCE_HINT; |
608 | int rc; |
609 | |
610 | if (is_align_pow_2) |
611 | align_mask = ~((u64)va_block_align - 1); |
612 | else |
613 | /* |
614 | * with non-power-of-2 range we work only with page granularity |
615 | * and the start address is page aligned, |
616 | * so no need for alignment checking. |
617 | */ |
618 | size = DIV_ROUND_UP_ULL(size, va_range->page_size) * |
619 | va_range->page_size; |
620 | |
621 | tmp_hint_addr = hint_addr & ~dram_hint_mask; |
622 | |
623 | /* Check if we need to ignore hint address */ |
624 | if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) || |
625 | (!is_align_pow_2 && is_hint_dram_addr && |
626 | do_div(tmp_hint_addr, va_range->page_size))) { |
627 | |
628 | if (force_hint) { |
629 | /* Hint must be respected, so here we just fail */ |
630 | dev_err(hdev->dev, |
631 | "Hint address 0x%llx is not page aligned - cannot be respected\n" , |
632 | hint_addr); |
633 | return 0; |
634 | } |
635 | |
636 | dev_dbg(hdev->dev, |
637 | "Hint address 0x%llx will be ignored because it is not aligned\n" , |
638 | hint_addr); |
639 | hint_addr = 0; |
640 | } |
641 | |
642 | mutex_lock(&va_range->lock); |
643 | |
644 | print_va_list_locked(hdev, va_list: &va_range->list); |
645 | |
646 | list_for_each_entry(va_block, &va_range->list, node) { |
647 | /* Calc the first possible aligned addr */ |
648 | valid_start = va_block->start; |
649 | |
650 | if (is_align_pow_2 && (valid_start & (va_block_align - 1))) { |
651 | valid_start &= align_mask; |
652 | valid_start += va_block_align; |
653 | if (valid_start > va_block->end) |
654 | continue; |
655 | } |
656 | |
657 | valid_size = va_block->end - valid_start + 1; |
658 | if (valid_size < size) |
659 | continue; |
660 | |
661 | /* |
662 | * In case hint address is 0, and hints_range_reservation |
663 | * property enabled, then avoid allocating va blocks from the |
664 | * range reserved for hint addresses |
665 | */ |
666 | if (prop->hints_range_reservation && !hint_addr) |
667 | if (is_hint_crossing_range(range_type, start_addr: valid_start, |
668 | size, prop)) |
669 | continue; |
670 | |
671 | /* Pick the minimal length block which has the required size */ |
672 | if (!new_va_block || (valid_size < reserved_valid_size)) { |
673 | new_va_block = va_block; |
674 | reserved_valid_start = valid_start; |
675 | reserved_valid_size = valid_size; |
676 | } |
677 | |
678 | if (hint_addr && hint_addr >= valid_start && |
679 | (hint_addr + size) <= va_block->end) { |
680 | new_va_block = va_block; |
681 | reserved_valid_start = hint_addr; |
682 | reserved_valid_size = valid_size; |
683 | break; |
684 | } |
685 | } |
686 | |
687 | if (!new_va_block) { |
688 | dev_err(hdev->dev, "no available va block for size %llu\n" , |
689 | size); |
690 | goto out; |
691 | } |
692 | |
693 | if (force_hint && reserved_valid_start != hint_addr) { |
694 | /* Hint address must be respected. If we are here - this means |
695 | * we could not respect it. |
696 | */ |
697 | dev_err(hdev->dev, |
698 | "Hint address 0x%llx could not be respected\n" , |
699 | hint_addr); |
700 | reserved_valid_start = 0; |
701 | goto out; |
702 | } |
703 | |
704 | /* |
705 | * Check if there is some leftover range due to reserving the new |
706 | * va block, then return it to the main virtual addresses list. |
707 | */ |
708 | if (reserved_valid_start > new_va_block->start) { |
709 | prev_start = new_va_block->start; |
710 | prev_end = reserved_valid_start - 1; |
711 | |
712 | new_va_block->start = reserved_valid_start; |
713 | new_va_block->size = reserved_valid_size; |
714 | |
715 | add_prev = true; |
716 | } |
717 | |
718 | if (new_va_block->size > size) { |
719 | new_va_block->start += size; |
720 | new_va_block->size = new_va_block->end - new_va_block->start + 1; |
721 | } else { |
722 | list_del(entry: &new_va_block->node); |
723 | kfree(objp: new_va_block); |
724 | } |
725 | |
726 | if (add_prev) { |
727 | rc = add_va_block_locked(hdev, va_list: &va_range->list, start: prev_start, end: prev_end); |
728 | if (rc) { |
729 | reserved_valid_start = 0; |
730 | goto out; |
731 | } |
732 | } |
733 | |
734 | print_va_list_locked(hdev, va_list: &va_range->list); |
735 | out: |
736 | mutex_unlock(lock: &va_range->lock); |
737 | |
738 | return reserved_valid_start; |
739 | } |
740 | |
741 | /* |
742 | * hl_reserve_va_block() - reserve a virtual block of a given size. |
743 | * @hdev: pointer to the habanalabs device structure. |
744 | * @ctx: current context |
745 | * @type: virtual addresses range type. |
746 | * @size: requested block size. |
747 | * @alignment: required alignment in bytes of the virtual block start address, |
748 | * 0 means no alignment. |
749 | * |
750 | * This function does the following: |
751 | * - Iterate on the virtual block list to find a suitable virtual block for the |
752 | * given size and alignment. |
753 | * - Reserve the requested block and update the list. |
754 | * - Return the start address of the virtual block. |
755 | */ |
756 | u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, |
757 | enum hl_va_range_type type, u64 size, u32 alignment) |
758 | { |
759 | return get_va_block(hdev, va_range: ctx->va_range[type], size, hint_addr: 0, |
760 | max(alignment, ctx->va_range[type]->page_size), |
761 | range_type: type, flags: 0); |
762 | } |
763 | |
764 | /** |
765 | * hl_get_va_range_type() - get va_range type for the given address and size. |
766 | * @ctx: context to fetch va_range from. |
767 | * @address: the start address of the area we want to validate. |
768 | * @size: the size in bytes of the area we want to validate. |
769 | * @type: returned va_range type. |
770 | * |
771 | * Return: true if the area is inside a valid range, false otherwise. |
772 | */ |
773 | static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size, |
774 | enum hl_va_range_type *type) |
775 | { |
776 | int i; |
777 | |
778 | for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) { |
779 | if (hl_mem_area_inside_range(address, size, |
780 | range_start_address: ctx->va_range[i]->start_addr, |
781 | range_end_address: ctx->va_range[i]->end_addr)) { |
782 | *type = i; |
783 | return 0; |
784 | } |
785 | } |
786 | |
787 | return -EINVAL; |
788 | } |
789 | |
790 | /** |
791 | * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block. |
792 | * @hdev: pointer to the habanalabs device structure |
793 | * @ctx: pointer to the context structure. |
794 | * @start_addr: start virtual address. |
795 | * @size: number of bytes to unreserve. |
796 | * |
797 | * This function does the following: |
798 | * - Takes the list lock and calls add_va_block_locked. |
799 | */ |
800 | int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, |
801 | u64 start_addr, u64 size) |
802 | { |
803 | enum hl_va_range_type type; |
804 | int rc; |
805 | |
806 | rc = hl_get_va_range_type(ctx, address: start_addr, size, type: &type); |
807 | if (rc) { |
808 | dev_err(hdev->dev, |
809 | "cannot find va_range for va %#llx size %llu" , |
810 | start_addr, size); |
811 | return rc; |
812 | } |
813 | |
814 | rc = add_va_block(hdev, va_range: ctx->va_range[type], start: start_addr, |
815 | end: start_addr + size - 1); |
816 | if (rc) |
817 | dev_warn(hdev->dev, |
818 | "add va block failed for vaddr: 0x%llx\n" , start_addr); |
819 | |
820 | return rc; |
821 | } |
822 | |
823 | /** |
824 | * init_phys_pg_pack_from_userptr() - initialize physical page pack from host |
825 | * memory |
826 | * @ctx: pointer to the context structure. |
827 | * @userptr: userptr to initialize from. |
828 | * @pphys_pg_pack: result pointer. |
829 | * @force_regular_page: tell the function to ignore huge page optimization, |
830 | * even if possible. Needed for cases where the device VA |
831 | * is allocated before we know the composition of the |
832 | * physical pages |
833 | * |
834 | * This function does the following: |
835 | * - Create a physical page pack from the physical pages related to the given |
836 | * virtual block. |
837 | */ |
838 | static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, |
839 | struct hl_userptr *userptr, |
840 | struct hl_vm_phys_pg_pack **pphys_pg_pack, |
841 | bool force_regular_page) |
842 | { |
843 | u32 npages, page_size = PAGE_SIZE, |
844 | huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; |
845 | u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); |
846 | struct hl_vm_phys_pg_pack *phys_pg_pack; |
847 | bool first = true, is_huge_page_opt; |
848 | u64 page_mask, total_npages; |
849 | struct scatterlist *sg; |
850 | dma_addr_t dma_addr; |
851 | int rc, i, j; |
852 | |
853 | phys_pg_pack = kzalloc(size: sizeof(*phys_pg_pack), GFP_KERNEL); |
854 | if (!phys_pg_pack) |
855 | return -ENOMEM; |
856 | |
857 | phys_pg_pack->vm_type = userptr->vm_type; |
858 | phys_pg_pack->created_from_userptr = true; |
859 | phys_pg_pack->asid = ctx->asid; |
860 | atomic_set(v: &phys_pg_pack->mapping_cnt, i: 1); |
861 | |
862 | is_huge_page_opt = (force_regular_page ? false : true); |
863 | |
864 | /* Only if all dma_addrs are aligned to 2MB and their |
865 | * sizes is at least 2MB, we can use huge page mapping. |
866 | * We limit the 2MB optimization to this condition, |
867 | * since later on we acquire the related VA range as one |
868 | * consecutive block. |
869 | */ |
870 | total_npages = 0; |
871 | for_each_sgtable_dma_sg(userptr->sgt, sg, i) { |
872 | npages = hl_get_sg_info(sg, dma_addr: &dma_addr); |
873 | |
874 | total_npages += npages; |
875 | |
876 | if ((npages % pgs_in_huge_page) || |
877 | (dma_addr & (huge_page_size - 1))) |
878 | is_huge_page_opt = false; |
879 | } |
880 | |
881 | if (is_huge_page_opt) { |
882 | page_size = huge_page_size; |
883 | do_div(total_npages, pgs_in_huge_page); |
884 | } |
885 | |
886 | page_mask = ~(((u64) page_size) - 1); |
887 | |
888 | phys_pg_pack->pages = kvmalloc_array(n: total_npages, size: sizeof(u64), |
889 | GFP_KERNEL); |
890 | if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { |
891 | rc = -ENOMEM; |
892 | goto page_pack_arr_mem_err; |
893 | } |
894 | |
895 | phys_pg_pack->npages = total_npages; |
896 | phys_pg_pack->page_size = page_size; |
897 | phys_pg_pack->total_size = total_npages * page_size; |
898 | |
899 | j = 0; |
900 | for_each_sgtable_dma_sg(userptr->sgt, sg, i) { |
901 | npages = hl_get_sg_info(sg, dma_addr: &dma_addr); |
902 | |
903 | /* align down to physical page size and save the offset */ |
904 | if (first) { |
905 | first = false; |
906 | phys_pg_pack->offset = dma_addr & (page_size - 1); |
907 | dma_addr &= page_mask; |
908 | } |
909 | |
910 | while (npages) { |
911 | phys_pg_pack->pages[j++] = dma_addr; |
912 | dma_addr += page_size; |
913 | |
914 | if (is_huge_page_opt) |
915 | npages -= pgs_in_huge_page; |
916 | else |
917 | npages--; |
918 | } |
919 | } |
920 | |
921 | *pphys_pg_pack = phys_pg_pack; |
922 | |
923 | return 0; |
924 | |
925 | page_pack_arr_mem_err: |
926 | kfree(objp: phys_pg_pack); |
927 | |
928 | return rc; |
929 | } |
930 | |
931 | /** |
932 | * map_phys_pg_pack() - maps the physical page pack.. |
933 | * @ctx: pointer to the context structure. |
934 | * @vaddr: start address of the virtual area to map from. |
935 | * @phys_pg_pack: the pack of physical pages to map to. |
936 | * |
937 | * This function does the following: |
938 | * - Maps each chunk of virtual memory to matching physical chunk. |
939 | * - Stores number of successful mappings in the given argument. |
940 | * - Returns 0 on success, error code otherwise. |
941 | */ |
942 | static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, |
943 | struct hl_vm_phys_pg_pack *phys_pg_pack) |
944 | { |
945 | struct hl_device *hdev = ctx->hdev; |
946 | u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; |
947 | u32 page_size = phys_pg_pack->page_size; |
948 | int rc = 0; |
949 | bool is_host_addr; |
950 | |
951 | for (i = 0 ; i < phys_pg_pack->npages ; i++) { |
952 | paddr = phys_pg_pack->pages[i]; |
953 | |
954 | rc = hl_mmu_map_page(ctx, virt_addr: next_vaddr, phys_addr: paddr, page_size, |
955 | flush_pte: (i + 1) == phys_pg_pack->npages); |
956 | if (rc) { |
957 | dev_err(hdev->dev, |
958 | "map failed (%d) for handle %u, npages: %llu, mapped: %llu\n" , |
959 | rc, phys_pg_pack->handle, phys_pg_pack->npages, |
960 | mapped_pg_cnt); |
961 | goto err; |
962 | } |
963 | |
964 | mapped_pg_cnt++; |
965 | next_vaddr += page_size; |
966 | } |
967 | |
968 | return 0; |
969 | |
970 | err: |
971 | is_host_addr = !hl_is_dram_va(hdev, virt_addr: vaddr); |
972 | |
973 | next_vaddr = vaddr; |
974 | for (i = 0 ; i < mapped_pg_cnt ; i++) { |
975 | if (hl_mmu_unmap_page(ctx, virt_addr: next_vaddr, page_size, |
976 | flush_pte: (i + 1) == mapped_pg_cnt)) |
977 | dev_warn_ratelimited(hdev->dev, |
978 | "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n" , |
979 | phys_pg_pack->handle, next_vaddr, |
980 | phys_pg_pack->pages[i], page_size); |
981 | |
982 | next_vaddr += page_size; |
983 | |
984 | /* |
985 | * unmapping on Palladium can be really long, so avoid a CPU |
986 | * soft lockup bug by sleeping a little between unmapping pages |
987 | * |
988 | * In addition, on host num of pages could be huge, |
989 | * because page size could be 4KB, so when unmapping host |
990 | * pages sleep every 32K pages to avoid soft lockup |
991 | */ |
992 | if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) |
993 | usleep_range(min: 50, max: 200); |
994 | } |
995 | |
996 | return rc; |
997 | } |
998 | |
999 | /** |
1000 | * unmap_phys_pg_pack() - unmaps the physical page pack. |
1001 | * @ctx: pointer to the context structure. |
1002 | * @vaddr: start address of the virtual area to unmap. |
1003 | * @phys_pg_pack: the pack of physical pages to unmap. |
1004 | */ |
1005 | static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, |
1006 | struct hl_vm_phys_pg_pack *phys_pg_pack) |
1007 | { |
1008 | struct hl_device *hdev = ctx->hdev; |
1009 | u64 next_vaddr, i; |
1010 | bool is_host_addr; |
1011 | u32 page_size; |
1012 | |
1013 | is_host_addr = !hl_is_dram_va(hdev, virt_addr: vaddr); |
1014 | page_size = phys_pg_pack->page_size; |
1015 | next_vaddr = vaddr; |
1016 | |
1017 | for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { |
1018 | if (hl_mmu_unmap_page(ctx, virt_addr: next_vaddr, page_size, |
1019 | flush_pte: (i + 1) == phys_pg_pack->npages)) |
1020 | dev_warn_ratelimited(hdev->dev, |
1021 | "unmap failed for vaddr: 0x%llx\n" , next_vaddr); |
1022 | |
1023 | /* |
1024 | * unmapping on Palladium can be really long, so avoid a CPU |
1025 | * soft lockup bug by sleeping a little between unmapping pages |
1026 | * |
1027 | * In addition, on host num of pages could be huge, |
1028 | * because page size could be 4KB, so when unmapping host |
1029 | * pages sleep every 32K pages to avoid soft lockup |
1030 | */ |
1031 | if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) |
1032 | usleep_range(min: 50, max: 200); |
1033 | } |
1034 | } |
1035 | |
1036 | /** |
1037 | * map_device_va() - map the given memory. |
1038 | * @ctx: pointer to the context structure. |
1039 | * @args: host parameters with handle/host virtual address. |
1040 | * @device_addr: pointer to result device virtual address. |
1041 | * |
1042 | * This function does the following: |
1043 | * - If given a physical device memory handle, map to a device virtual block |
1044 | * and return the start address of this block. |
1045 | * - If given a host virtual address and size, find the related physical pages, |
1046 | * map a device virtual block to this pages and return the start address of |
1047 | * this block. |
1048 | */ |
1049 | static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr) |
1050 | { |
1051 | struct hl_vm_phys_pg_pack *phys_pg_pack; |
1052 | enum hl_va_range_type va_range_type = 0; |
1053 | struct hl_device *hdev = ctx->hdev; |
1054 | struct hl_userptr *userptr = NULL; |
1055 | u32 handle = 0, va_block_align; |
1056 | struct hl_vm_hash_node *hnode; |
1057 | struct hl_vm *vm = &hdev->vm; |
1058 | struct hl_va_range *va_range; |
1059 | bool is_userptr, do_prefetch; |
1060 | u64 ret_vaddr, hint_addr; |
1061 | enum vm_type *vm_type; |
1062 | int rc; |
1063 | |
1064 | /* set map flags */ |
1065 | is_userptr = args->flags & HL_MEM_USERPTR; |
1066 | do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH); |
1067 | |
1068 | /* Assume failure */ |
1069 | *device_addr = 0; |
1070 | |
1071 | if (is_userptr) { |
1072 | u64 addr = args->map_host.host_virt_addr, |
1073 | size = args->map_host.mem_size; |
1074 | u32 page_size = hdev->asic_prop.pmmu.page_size, |
1075 | huge_page_size = hdev->asic_prop.pmmu_huge.page_size; |
1076 | |
1077 | rc = dma_map_host_va(hdev, addr, size, p_userptr: &userptr); |
1078 | if (rc) |
1079 | return rc; |
1080 | |
1081 | rc = init_phys_pg_pack_from_userptr(ctx, userptr, |
1082 | pphys_pg_pack: &phys_pg_pack, force_regular_page: false); |
1083 | if (rc) { |
1084 | dev_err(hdev->dev, |
1085 | "unable to init page pack for vaddr 0x%llx\n" , |
1086 | addr); |
1087 | goto init_page_pack_err; |
1088 | } |
1089 | |
1090 | vm_type = (enum vm_type *) userptr; |
1091 | hint_addr = args->map_host.hint_addr; |
1092 | handle = phys_pg_pack->handle; |
1093 | |
1094 | /* get required alignment */ |
1095 | if (phys_pg_pack->page_size == page_size) { |
1096 | va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; |
1097 | va_range_type = HL_VA_RANGE_TYPE_HOST; |
1098 | /* |
1099 | * huge page alignment may be needed in case of regular |
1100 | * page mapping, depending on the host VA alignment |
1101 | */ |
1102 | if (addr & (huge_page_size - 1)) |
1103 | va_block_align = page_size; |
1104 | else |
1105 | va_block_align = huge_page_size; |
1106 | } else { |
1107 | /* |
1108 | * huge page alignment is needed in case of huge page |
1109 | * mapping |
1110 | */ |
1111 | va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; |
1112 | va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE; |
1113 | va_block_align = huge_page_size; |
1114 | } |
1115 | } else { |
1116 | handle = lower_32_bits(args->map_device.handle); |
1117 | |
1118 | spin_lock(lock: &vm->idr_lock); |
1119 | phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, id: handle); |
1120 | if (!phys_pg_pack) { |
1121 | spin_unlock(lock: &vm->idr_lock); |
1122 | dev_err(hdev->dev, |
1123 | "no match for handle %u\n" , handle); |
1124 | return -EINVAL; |
1125 | } |
1126 | |
1127 | /* increment now to avoid freeing device memory while mapping */ |
1128 | atomic_inc(v: &phys_pg_pack->mapping_cnt); |
1129 | |
1130 | spin_unlock(lock: &vm->idr_lock); |
1131 | |
1132 | vm_type = (enum vm_type *) phys_pg_pack; |
1133 | |
1134 | hint_addr = args->map_device.hint_addr; |
1135 | |
1136 | /* DRAM VA alignment is the same as the MMU page size */ |
1137 | va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; |
1138 | va_range_type = HL_VA_RANGE_TYPE_DRAM; |
1139 | va_block_align = hdev->asic_prop.dmmu.page_size; |
1140 | } |
1141 | |
1142 | /* |
1143 | * relevant for mapping device physical memory only, as host memory is |
1144 | * implicitly shared |
1145 | */ |
1146 | if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && |
1147 | phys_pg_pack->asid != ctx->asid) { |
1148 | dev_err(hdev->dev, |
1149 | "Failed to map memory, handle %u is not shared\n" , |
1150 | handle); |
1151 | rc = -EPERM; |
1152 | goto shared_err; |
1153 | } |
1154 | |
1155 | hnode = kzalloc(size: sizeof(*hnode), GFP_KERNEL); |
1156 | if (!hnode) { |
1157 | rc = -ENOMEM; |
1158 | goto hnode_err; |
1159 | } |
1160 | |
1161 | if (hint_addr && phys_pg_pack->offset) { |
1162 | if (args->flags & HL_MEM_FORCE_HINT) { |
1163 | /* Fail if hint must be respected but it can't be */ |
1164 | dev_err(hdev->dev, |
1165 | "Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n" , |
1166 | hint_addr, phys_pg_pack->offset); |
1167 | rc = -EINVAL; |
1168 | goto va_block_err; |
1169 | } |
1170 | dev_dbg(hdev->dev, |
1171 | "Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n" , |
1172 | hint_addr, phys_pg_pack->offset); |
1173 | } |
1174 | |
1175 | ret_vaddr = get_va_block(hdev, va_range, size: phys_pg_pack->total_size, |
1176 | hint_addr, va_block_align, |
1177 | range_type: va_range_type, flags: args->flags); |
1178 | if (!ret_vaddr) { |
1179 | dev_err(hdev->dev, "no available va block for handle %u\n" , |
1180 | handle); |
1181 | rc = -ENOMEM; |
1182 | goto va_block_err; |
1183 | } |
1184 | |
1185 | mutex_lock(&hdev->mmu_lock); |
1186 | |
1187 | rc = map_phys_pg_pack(ctx, vaddr: ret_vaddr, phys_pg_pack); |
1188 | if (rc) { |
1189 | dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n" , |
1190 | rc, handle); |
1191 | mutex_unlock(lock: &hdev->mmu_lock); |
1192 | goto map_err; |
1193 | } |
1194 | |
1195 | rc = hl_mmu_invalidate_cache_range(hdev, is_hard: false, flags: *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, |
1196 | asid: ctx->asid, va: ret_vaddr, size: phys_pg_pack->total_size); |
1197 | mutex_unlock(lock: &hdev->mmu_lock); |
1198 | if (rc) |
1199 | goto map_err; |
1200 | |
1201 | /* |
1202 | * prefetch is done upon user's request. it is performed in WQ as and so can |
1203 | * be outside the MMU lock. the operation itself is already protected by the mmu lock |
1204 | */ |
1205 | if (do_prefetch) { |
1206 | rc = hl_mmu_prefetch_cache_range(ctx, flags: *vm_type, asid: ctx->asid, va: ret_vaddr, |
1207 | size: phys_pg_pack->total_size); |
1208 | if (rc) |
1209 | goto map_err; |
1210 | } |
1211 | |
1212 | ret_vaddr += phys_pg_pack->offset; |
1213 | |
1214 | hnode->ptr = vm_type; |
1215 | hnode->vaddr = ret_vaddr; |
1216 | hnode->handle = is_userptr ? MEM_HANDLE_INVALID : handle; |
1217 | |
1218 | mutex_lock(&ctx->mem_hash_lock); |
1219 | hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); |
1220 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1221 | |
1222 | *device_addr = ret_vaddr; |
1223 | |
1224 | if (is_userptr) |
1225 | free_phys_pg_pack(hdev, phys_pg_pack); |
1226 | |
1227 | return rc; |
1228 | |
1229 | map_err: |
1230 | if (add_va_block(hdev, va_range, start: ret_vaddr, |
1231 | end: ret_vaddr + phys_pg_pack->total_size - 1)) |
1232 | dev_warn(hdev->dev, |
1233 | "release va block failed for handle 0x%x, vaddr: 0x%llx\n" , |
1234 | handle, ret_vaddr); |
1235 | |
1236 | va_block_err: |
1237 | kfree(objp: hnode); |
1238 | hnode_err: |
1239 | shared_err: |
1240 | atomic_dec(v: &phys_pg_pack->mapping_cnt); |
1241 | if (is_userptr) |
1242 | free_phys_pg_pack(hdev, phys_pg_pack); |
1243 | init_page_pack_err: |
1244 | if (is_userptr) |
1245 | dma_unmap_host_va(hdev, userptr); |
1246 | |
1247 | return rc; |
1248 | } |
1249 | |
1250 | /* Should be called while the context's mem_hash_lock is taken */ |
1251 | static struct hl_vm_hash_node *get_vm_hash_node_locked(struct hl_ctx *ctx, u64 vaddr) |
1252 | { |
1253 | struct hl_vm_hash_node *hnode; |
1254 | |
1255 | hash_for_each_possible(ctx->mem_hash, hnode, node, vaddr) |
1256 | if (vaddr == hnode->vaddr) |
1257 | return hnode; |
1258 | |
1259 | return NULL; |
1260 | } |
1261 | |
1262 | /** |
1263 | * unmap_device_va() - unmap the given device virtual address. |
1264 | * @ctx: pointer to the context structure. |
1265 | * @args: host parameters with device virtual address to unmap. |
1266 | * @ctx_free: true if in context free flow, false otherwise. |
1267 | * |
1268 | * This function does the following: |
1269 | * - unmap the physical pages related to the given virtual address. |
1270 | * - return the device virtual block to the virtual block list. |
1271 | */ |
1272 | static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, |
1273 | bool ctx_free) |
1274 | { |
1275 | struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; |
1276 | u64 vaddr = args->unmap.device_virt_addr; |
1277 | struct asic_fixed_properties *prop; |
1278 | struct hl_device *hdev = ctx->hdev; |
1279 | struct hl_userptr *userptr = NULL; |
1280 | struct hl_vm_hash_node *hnode; |
1281 | struct hl_va_range *va_range; |
1282 | enum vm_type *vm_type; |
1283 | bool is_userptr; |
1284 | int rc = 0; |
1285 | |
1286 | prop = &hdev->asic_prop; |
1287 | |
1288 | /* protect from double entrance */ |
1289 | mutex_lock(&ctx->mem_hash_lock); |
1290 | hnode = get_vm_hash_node_locked(ctx, vaddr); |
1291 | if (!hnode) { |
1292 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1293 | dev_err(hdev->dev, "unmap failed, no mem hnode for vaddr 0x%llx\n" , vaddr); |
1294 | return -EINVAL; |
1295 | } |
1296 | |
1297 | if (hnode->export_cnt) { |
1298 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1299 | dev_err(hdev->dev, "failed to unmap %#llx, memory is exported\n" , vaddr); |
1300 | return -EINVAL; |
1301 | } |
1302 | |
1303 | hash_del(node: &hnode->node); |
1304 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1305 | |
1306 | vm_type = hnode->ptr; |
1307 | |
1308 | if (*vm_type == VM_TYPE_USERPTR) { |
1309 | is_userptr = true; |
1310 | userptr = hnode->ptr; |
1311 | |
1312 | rc = init_phys_pg_pack_from_userptr(ctx, userptr, pphys_pg_pack: &phys_pg_pack, |
1313 | force_regular_page: false); |
1314 | if (rc) { |
1315 | dev_err(hdev->dev, |
1316 | "unable to init page pack for vaddr 0x%llx\n" , |
1317 | vaddr); |
1318 | goto vm_type_err; |
1319 | } |
1320 | |
1321 | if (phys_pg_pack->page_size == |
1322 | hdev->asic_prop.pmmu.page_size) |
1323 | va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; |
1324 | else |
1325 | va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; |
1326 | } else if (*vm_type == VM_TYPE_PHYS_PACK) { |
1327 | is_userptr = false; |
1328 | va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; |
1329 | phys_pg_pack = hnode->ptr; |
1330 | } else { |
1331 | dev_warn(hdev->dev, |
1332 | "unmap failed, unknown vm desc for vaddr 0x%llx\n" , |
1333 | vaddr); |
1334 | rc = -EFAULT; |
1335 | goto vm_type_err; |
1336 | } |
1337 | |
1338 | if (atomic_read(v: &phys_pg_pack->mapping_cnt) == 0) { |
1339 | dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n" , vaddr); |
1340 | rc = -EINVAL; |
1341 | goto mapping_cnt_err; |
1342 | } |
1343 | |
1344 | if (!is_userptr && !is_power_of_2(n: phys_pg_pack->page_size)) |
1345 | vaddr = prop->dram_base_address + |
1346 | DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address, |
1347 | phys_pg_pack->page_size) * |
1348 | phys_pg_pack->page_size; |
1349 | else |
1350 | vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); |
1351 | |
1352 | mutex_lock(&hdev->mmu_lock); |
1353 | |
1354 | unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); |
1355 | |
1356 | /* |
1357 | * During context free this function is called in a loop to clean all |
1358 | * the context mappings. Hence the cache invalidation can be called once |
1359 | * at the loop end rather than for each iteration |
1360 | */ |
1361 | if (!ctx_free) |
1362 | rc = hl_mmu_invalidate_cache_range(hdev, is_hard: true, flags: *vm_type, asid: ctx->asid, va: vaddr, |
1363 | size: phys_pg_pack->total_size); |
1364 | |
1365 | mutex_unlock(lock: &hdev->mmu_lock); |
1366 | |
1367 | /* |
1368 | * If the context is closing we don't need to check for the MMU cache |
1369 | * invalidation return code and update the VA free list as in this flow |
1370 | * we invalidate the MMU cache outside of this unmap function and the VA |
1371 | * free list will be freed anyway. |
1372 | */ |
1373 | if (!ctx_free) { |
1374 | int tmp_rc; |
1375 | |
1376 | tmp_rc = add_va_block(hdev, va_range, start: vaddr, |
1377 | end: vaddr + phys_pg_pack->total_size - 1); |
1378 | if (tmp_rc) { |
1379 | dev_warn(hdev->dev, |
1380 | "add va block failed for vaddr: 0x%llx\n" , |
1381 | vaddr); |
1382 | if (!rc) |
1383 | rc = tmp_rc; |
1384 | } |
1385 | } |
1386 | |
1387 | atomic_dec(v: &phys_pg_pack->mapping_cnt); |
1388 | kfree(objp: hnode); |
1389 | |
1390 | if (is_userptr) { |
1391 | free_phys_pg_pack(hdev, phys_pg_pack); |
1392 | dma_unmap_host_va(hdev, userptr); |
1393 | } |
1394 | |
1395 | return rc; |
1396 | |
1397 | mapping_cnt_err: |
1398 | if (is_userptr) |
1399 | free_phys_pg_pack(hdev, phys_pg_pack); |
1400 | vm_type_err: |
1401 | mutex_lock(&ctx->mem_hash_lock); |
1402 | hash_add(ctx->mem_hash, &hnode->node, vaddr); |
1403 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1404 | |
1405 | return rc; |
1406 | } |
1407 | |
1408 | static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size) |
1409 | { |
1410 | u32 block_id; |
1411 | int rc; |
1412 | |
1413 | *handle = 0; |
1414 | if (size) |
1415 | *size = 0; |
1416 | |
1417 | rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); |
1418 | if (rc) |
1419 | return rc; |
1420 | |
1421 | *handle = block_id | HL_MMAP_TYPE_BLOCK; |
1422 | *handle <<= PAGE_SHIFT; |
1423 | |
1424 | return 0; |
1425 | } |
1426 | |
1427 | static void hw_block_vm_close(struct vm_area_struct *vma) |
1428 | { |
1429 | struct hl_vm_hw_block_list_node *lnode = |
1430 | (struct hl_vm_hw_block_list_node *) vma->vm_private_data; |
1431 | struct hl_ctx *ctx = lnode->ctx; |
1432 | long new_mmap_size; |
1433 | |
1434 | new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start); |
1435 | if (new_mmap_size > 0) { |
1436 | lnode->mapped_size = new_mmap_size; |
1437 | return; |
1438 | } |
1439 | |
1440 | mutex_lock(&ctx->hw_block_list_lock); |
1441 | list_del(entry: &lnode->node); |
1442 | mutex_unlock(lock: &ctx->hw_block_list_lock); |
1443 | hl_ctx_put(ctx); |
1444 | kfree(objp: lnode); |
1445 | vma->vm_private_data = NULL; |
1446 | } |
1447 | |
1448 | static const struct vm_operations_struct hw_block_vm_ops = { |
1449 | .close = hw_block_vm_close |
1450 | }; |
1451 | |
1452 | /** |
1453 | * hl_hw_block_mmap() - mmap a hw block to user. |
1454 | * @hpriv: pointer to the private data of the fd |
1455 | * @vma: pointer to vm_area_struct of the process |
1456 | * |
1457 | * Driver increments context reference for every HW block mapped in order |
1458 | * to prevent user from closing FD without unmapping first |
1459 | */ |
1460 | int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) |
1461 | { |
1462 | struct hl_vm_hw_block_list_node *lnode; |
1463 | struct hl_device *hdev = hpriv->hdev; |
1464 | struct hl_ctx *ctx = hpriv->ctx; |
1465 | u32 block_id, block_size; |
1466 | int rc; |
1467 | |
1468 | /* We use the page offset to hold the block id and thus we need to clear |
1469 | * it before doing the mmap itself |
1470 | */ |
1471 | block_id = vma->vm_pgoff; |
1472 | vma->vm_pgoff = 0; |
1473 | |
1474 | /* Driver only allows mapping of a complete HW block */ |
1475 | block_size = vma->vm_end - vma->vm_start; |
1476 | |
1477 | if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) { |
1478 | dev_err(hdev->dev, |
1479 | "user pointer is invalid - 0x%lx\n" , |
1480 | vma->vm_start); |
1481 | |
1482 | return -EINVAL; |
1483 | } |
1484 | |
1485 | lnode = kzalloc(size: sizeof(*lnode), GFP_KERNEL); |
1486 | if (!lnode) |
1487 | return -ENOMEM; |
1488 | |
1489 | rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); |
1490 | if (rc) { |
1491 | kfree(objp: lnode); |
1492 | return rc; |
1493 | } |
1494 | |
1495 | hl_ctx_get(ctx); |
1496 | |
1497 | lnode->ctx = ctx; |
1498 | lnode->vaddr = vma->vm_start; |
1499 | lnode->block_size = block_size; |
1500 | lnode->mapped_size = lnode->block_size; |
1501 | lnode->id = block_id; |
1502 | |
1503 | vma->vm_private_data = lnode; |
1504 | vma->vm_ops = &hw_block_vm_ops; |
1505 | |
1506 | mutex_lock(&ctx->hw_block_list_lock); |
1507 | list_add_tail(new: &lnode->node, head: &ctx->hw_block_mem_list); |
1508 | mutex_unlock(lock: &ctx->hw_block_list_lock); |
1509 | |
1510 | vma->vm_pgoff = block_id; |
1511 | |
1512 | return 0; |
1513 | } |
1514 | |
1515 | static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, |
1516 | struct device *dev, enum dma_data_direction dir) |
1517 | { |
1518 | dma_addr_t addr; |
1519 | int rc; |
1520 | |
1521 | addr = dma_map_resource(dev, phys_addr: bar_address, size: chunk_size, dir, |
1522 | DMA_ATTR_SKIP_CPU_SYNC); |
1523 | rc = dma_mapping_error(dev, dma_addr: addr); |
1524 | if (rc) |
1525 | return rc; |
1526 | |
1527 | sg_set_page(sg, NULL, len: chunk_size, offset: 0); |
1528 | sg_dma_address(sg) = addr; |
1529 | sg_dma_len(sg) = chunk_size; |
1530 | |
1531 | return 0; |
1532 | } |
1533 | |
1534 | static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, |
1535 | u64 page_size, u64 exported_size, u64 offset, |
1536 | struct device *dev, enum dma_data_direction dir) |
1537 | { |
1538 | u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page, |
1539 | left_size_in_dma_seg, device_address, bar_address, start_page; |
1540 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
1541 | struct scatterlist *sg; |
1542 | unsigned int nents, i; |
1543 | struct sg_table *sgt; |
1544 | bool next_sg_entry; |
1545 | int rc; |
1546 | |
1547 | /* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */ |
1548 | dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE); |
1549 | if (dma_max_seg_size < PAGE_SIZE) { |
1550 | dev_err_ratelimited(hdev->dev, |
1551 | "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n" , |
1552 | dma_max_seg_size); |
1553 | return ERR_PTR(error: -EINVAL); |
1554 | } |
1555 | |
1556 | sgt = kzalloc(size: sizeof(*sgt), GFP_KERNEL); |
1557 | if (!sgt) |
1558 | return ERR_PTR(error: -ENOMEM); |
1559 | |
1560 | /* Use the offset to move to the actual first page that is exported */ |
1561 | for (start_page = 0 ; start_page < npages ; ++start_page) { |
1562 | if (offset < page_size) |
1563 | break; |
1564 | |
1565 | /* The offset value was validated so there can't be an underflow */ |
1566 | offset -= page_size; |
1567 | } |
1568 | |
1569 | /* Calculate the required number of entries for the SG table */ |
1570 | curr_page = start_page; |
1571 | nents = 1; |
1572 | left_size_to_export = exported_size; |
1573 | left_size_in_page = page_size - offset; |
1574 | left_size_in_dma_seg = dma_max_seg_size; |
1575 | next_sg_entry = false; |
1576 | |
1577 | while (true) { |
1578 | size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); |
1579 | left_size_to_export -= size; |
1580 | left_size_in_page -= size; |
1581 | left_size_in_dma_seg -= size; |
1582 | |
1583 | if (!left_size_to_export) |
1584 | break; |
1585 | |
1586 | if (!left_size_in_page) { |
1587 | /* left_size_to_export is not zero so there must be another page */ |
1588 | if (pages[curr_page] + page_size != pages[curr_page + 1]) |
1589 | next_sg_entry = true; |
1590 | |
1591 | ++curr_page; |
1592 | left_size_in_page = page_size; |
1593 | } |
1594 | |
1595 | if (!left_size_in_dma_seg) { |
1596 | next_sg_entry = true; |
1597 | left_size_in_dma_seg = dma_max_seg_size; |
1598 | } |
1599 | |
1600 | if (next_sg_entry) { |
1601 | ++nents; |
1602 | next_sg_entry = false; |
1603 | } |
1604 | } |
1605 | |
1606 | rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); |
1607 | if (rc) |
1608 | goto err_free_sgt; |
1609 | |
1610 | /* Prepare the SG table entries */ |
1611 | curr_page = start_page; |
1612 | device_address = pages[curr_page] + offset; |
1613 | left_size_to_export = exported_size; |
1614 | left_size_in_page = page_size - offset; |
1615 | left_size_in_dma_seg = dma_max_seg_size; |
1616 | next_sg_entry = false; |
1617 | |
1618 | for_each_sgtable_dma_sg(sgt, sg, i) { |
1619 | bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address); |
1620 | chunk_size = 0; |
1621 | |
1622 | for ( ; curr_page < npages ; ++curr_page) { |
1623 | size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); |
1624 | chunk_size += size; |
1625 | left_size_to_export -= size; |
1626 | left_size_in_page -= size; |
1627 | left_size_in_dma_seg -= size; |
1628 | |
1629 | if (!left_size_to_export) |
1630 | break; |
1631 | |
1632 | if (!left_size_in_page) { |
1633 | /* left_size_to_export is not zero so there must be another page */ |
1634 | if (pages[curr_page] + page_size != pages[curr_page + 1]) { |
1635 | device_address = pages[curr_page + 1]; |
1636 | next_sg_entry = true; |
1637 | } |
1638 | |
1639 | left_size_in_page = page_size; |
1640 | } |
1641 | |
1642 | if (!left_size_in_dma_seg) { |
1643 | /* |
1644 | * Skip setting a new device address if already moving to a page |
1645 | * which is not contiguous with the current page. |
1646 | */ |
1647 | if (!next_sg_entry) { |
1648 | device_address += chunk_size; |
1649 | next_sg_entry = true; |
1650 | } |
1651 | |
1652 | left_size_in_dma_seg = dma_max_seg_size; |
1653 | } |
1654 | |
1655 | if (next_sg_entry) { |
1656 | next_sg_entry = false; |
1657 | break; |
1658 | } |
1659 | } |
1660 | |
1661 | rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); |
1662 | if (rc) |
1663 | goto err_unmap; |
1664 | } |
1665 | |
1666 | /* There should be nothing left to export exactly after looping over all SG elements */ |
1667 | if (left_size_to_export) { |
1668 | dev_err(hdev->dev, |
1669 | "left size to export %#llx after initializing %u SG elements\n" , |
1670 | left_size_to_export, sgt->nents); |
1671 | rc = -ENOMEM; |
1672 | goto err_unmap; |
1673 | } |
1674 | |
1675 | /* |
1676 | * Because we are not going to include a CPU list, we want to have some chance that other |
1677 | * users will detect this when going over SG table, by setting the orig_nents to 0 and using |
1678 | * only nents (length of DMA list). |
1679 | */ |
1680 | sgt->orig_nents = 0; |
1681 | |
1682 | dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n" , |
1683 | nents, dev_name(dev)); |
1684 | for_each_sgtable_dma_sg(sgt, sg, i) |
1685 | dev_dbg(hdev->dev, |
1686 | "SG entry %d: address %#llx, length %#x\n" , |
1687 | i, sg_dma_address(sg), sg_dma_len(sg)); |
1688 | |
1689 | return sgt; |
1690 | |
1691 | err_unmap: |
1692 | for_each_sgtable_dma_sg(sgt, sg, i) { |
1693 | if (!sg_dma_len(sg)) |
1694 | continue; |
1695 | |
1696 | dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir, |
1697 | DMA_ATTR_SKIP_CPU_SYNC); |
1698 | } |
1699 | |
1700 | sg_free_table(sgt); |
1701 | |
1702 | err_free_sgt: |
1703 | kfree(objp: sgt); |
1704 | return ERR_PTR(error: rc); |
1705 | } |
1706 | |
1707 | static int hl_dmabuf_attach(struct dma_buf *dmabuf, |
1708 | struct dma_buf_attachment *attachment) |
1709 | { |
1710 | struct hl_dmabuf_priv *hl_dmabuf; |
1711 | struct hl_device *hdev; |
1712 | int rc; |
1713 | |
1714 | hl_dmabuf = dmabuf->priv; |
1715 | hdev = hl_dmabuf->ctx->hdev; |
1716 | |
1717 | rc = pci_p2pdma_distance(provider: hdev->pdev, client: attachment->dev, verbose: true); |
1718 | |
1719 | if (rc < 0) |
1720 | attachment->peer2peer = false; |
1721 | return 0; |
1722 | } |
1723 | |
1724 | static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, |
1725 | enum dma_data_direction dir) |
1726 | { |
1727 | u64 *pages, npages, page_size, exported_size, offset; |
1728 | struct dma_buf *dma_buf = attachment->dmabuf; |
1729 | struct hl_vm_phys_pg_pack *phys_pg_pack; |
1730 | struct hl_dmabuf_priv *hl_dmabuf; |
1731 | struct hl_device *hdev; |
1732 | struct sg_table *sgt; |
1733 | |
1734 | hl_dmabuf = dma_buf->priv; |
1735 | hdev = hl_dmabuf->ctx->hdev; |
1736 | |
1737 | if (!attachment->peer2peer) { |
1738 | dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n" ); |
1739 | return ERR_PTR(error: -EPERM); |
1740 | } |
1741 | |
1742 | exported_size = hl_dmabuf->dmabuf->size; |
1743 | offset = hl_dmabuf->offset; |
1744 | phys_pg_pack = hl_dmabuf->phys_pg_pack; |
1745 | |
1746 | if (phys_pg_pack) { |
1747 | pages = phys_pg_pack->pages; |
1748 | npages = phys_pg_pack->npages; |
1749 | page_size = phys_pg_pack->page_size; |
1750 | } else { |
1751 | pages = &hl_dmabuf->device_phys_addr; |
1752 | npages = 1; |
1753 | page_size = hl_dmabuf->dmabuf->size; |
1754 | } |
1755 | |
1756 | sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset, |
1757 | dev: attachment->dev, dir); |
1758 | if (IS_ERR(ptr: sgt)) |
1759 | dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n" , PTR_ERR(sgt)); |
1760 | |
1761 | return sgt; |
1762 | } |
1763 | |
1764 | static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment, |
1765 | struct sg_table *sgt, |
1766 | enum dma_data_direction dir) |
1767 | { |
1768 | struct scatterlist *sg; |
1769 | int i; |
1770 | |
1771 | /* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives |
1772 | * only in the 'device' domain (after all, it maps a PCI bar address which points to the |
1773 | * device memory). |
1774 | * |
1775 | * Therefore, it was never in the 'CPU' domain and hence, there is no need to perform |
1776 | * a sync of the memory to the CPU's cache, as it never resided inside that cache. |
1777 | */ |
1778 | for_each_sgtable_dma_sg(sgt, sg, i) |
1779 | dma_unmap_resource(dev: attachment->dev, sg_dma_address(sg), |
1780 | sg_dma_len(sg), dir, |
1781 | DMA_ATTR_SKIP_CPU_SYNC); |
1782 | |
1783 | /* Need to restore orig_nents because sg_free_table use that field */ |
1784 | sgt->orig_nents = sgt->nents; |
1785 | sg_free_table(sgt); |
1786 | kfree(objp: sgt); |
1787 | } |
1788 | |
1789 | static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr) |
1790 | { |
1791 | struct hl_device *hdev = ctx->hdev; |
1792 | struct hl_vm_hash_node *hnode; |
1793 | |
1794 | /* get the memory handle */ |
1795 | mutex_lock(&ctx->mem_hash_lock); |
1796 | hnode = get_vm_hash_node_locked(ctx, vaddr: addr); |
1797 | if (!hnode) { |
1798 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1799 | dev_dbg(hdev->dev, "map address %#llx not found\n" , addr); |
1800 | return ERR_PTR(error: -EINVAL); |
1801 | } |
1802 | |
1803 | if (upper_32_bits(hnode->handle)) { |
1804 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1805 | dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n" , |
1806 | hnode->handle, addr); |
1807 | return ERR_PTR(error: -EINVAL); |
1808 | } |
1809 | |
1810 | /* |
1811 | * node found, increase export count so this memory cannot be unmapped |
1812 | * and the hash node cannot be deleted. |
1813 | */ |
1814 | hnode->export_cnt++; |
1815 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1816 | |
1817 | return hnode; |
1818 | } |
1819 | |
1820 | static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode) |
1821 | { |
1822 | mutex_lock(&ctx->mem_hash_lock); |
1823 | hnode->export_cnt--; |
1824 | mutex_unlock(lock: &ctx->mem_hash_lock); |
1825 | } |
1826 | |
1827 | static void hl_release_dmabuf(struct dma_buf *dmabuf) |
1828 | { |
1829 | struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; |
1830 | struct hl_ctx *ctx; |
1831 | |
1832 | if (!hl_dmabuf) |
1833 | return; |
1834 | |
1835 | ctx = hl_dmabuf->ctx; |
1836 | |
1837 | if (hl_dmabuf->memhash_hnode) |
1838 | memhash_node_export_put(ctx, hnode: hl_dmabuf->memhash_hnode); |
1839 | |
1840 | atomic_dec(v: &ctx->hdev->dmabuf_export_cnt); |
1841 | hl_ctx_put(ctx); |
1842 | |
1843 | /* Paired with get_file() in export_dmabuf() */ |
1844 | fput(ctx->hpriv->file_priv->filp); |
1845 | |
1846 | kfree(objp: hl_dmabuf); |
1847 | } |
1848 | |
1849 | static const struct dma_buf_ops habanalabs_dmabuf_ops = { |
1850 | .attach = hl_dmabuf_attach, |
1851 | .map_dma_buf = hl_map_dmabuf, |
1852 | .unmap_dma_buf = hl_unmap_dmabuf, |
1853 | .release = hl_release_dmabuf, |
1854 | }; |
1855 | |
1856 | static int export_dmabuf(struct hl_ctx *ctx, |
1857 | struct hl_dmabuf_priv *hl_dmabuf, |
1858 | u64 total_size, int flags, int *dmabuf_fd) |
1859 | { |
1860 | DEFINE_DMA_BUF_EXPORT_INFO(exp_info); |
1861 | struct hl_device *hdev = ctx->hdev; |
1862 | int rc, fd; |
1863 | |
1864 | exp_info.ops = &habanalabs_dmabuf_ops; |
1865 | exp_info.size = total_size; |
1866 | exp_info.flags = flags; |
1867 | exp_info.priv = hl_dmabuf; |
1868 | |
1869 | hl_dmabuf->dmabuf = dma_buf_export(exp_info: &exp_info); |
1870 | if (IS_ERR(ptr: hl_dmabuf->dmabuf)) { |
1871 | dev_err(hdev->dev, "failed to export dma-buf\n" ); |
1872 | return PTR_ERR(ptr: hl_dmabuf->dmabuf); |
1873 | } |
1874 | |
1875 | fd = dma_buf_fd(dmabuf: hl_dmabuf->dmabuf, flags); |
1876 | if (fd < 0) { |
1877 | dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n" , fd); |
1878 | rc = fd; |
1879 | goto err_dma_buf_put; |
1880 | } |
1881 | |
1882 | hl_dmabuf->ctx = ctx; |
1883 | hl_ctx_get(ctx: hl_dmabuf->ctx); |
1884 | atomic_inc(v: &ctx->hdev->dmabuf_export_cnt); |
1885 | |
1886 | /* Get compute device file to enforce release order, such that all exported dma-buf will be |
1887 | * released first and only then the compute device. |
1888 | * Paired with fput() in hl_release_dmabuf(). |
1889 | */ |
1890 | get_file(f: ctx->hpriv->file_priv->filp); |
1891 | |
1892 | *dmabuf_fd = fd; |
1893 | |
1894 | return 0; |
1895 | |
1896 | err_dma_buf_put: |
1897 | hl_dmabuf->dmabuf->priv = NULL; |
1898 | dma_buf_put(dmabuf: hl_dmabuf->dmabuf); |
1899 | return rc; |
1900 | } |
1901 | |
1902 | static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) |
1903 | { |
1904 | if (!PAGE_ALIGNED(addr)) { |
1905 | dev_dbg(hdev->dev, |
1906 | "exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n" , |
1907 | addr, PAGE_SIZE); |
1908 | return -EINVAL; |
1909 | } |
1910 | |
1911 | if (!size || !PAGE_ALIGNED(size)) { |
1912 | dev_dbg(hdev->dev, |
1913 | "exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n" , |
1914 | size, PAGE_SIZE); |
1915 | return -EINVAL; |
1916 | } |
1917 | |
1918 | if (!PAGE_ALIGNED(offset)) { |
1919 | dev_dbg(hdev->dev, |
1920 | "exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n" , |
1921 | offset, PAGE_SIZE); |
1922 | return -EINVAL; |
1923 | } |
1924 | |
1925 | return 0; |
1926 | } |
1927 | |
1928 | static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr, u64 size) |
1929 | { |
1930 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
1931 | u64 bar_address; |
1932 | int rc; |
1933 | |
1934 | rc = validate_export_params_common(hdev, addr: device_addr, size, offset: 0); |
1935 | if (rc) |
1936 | return rc; |
1937 | |
1938 | if (device_addr < prop->dram_user_base_address || |
1939 | (device_addr + size) > prop->dram_end_address || |
1940 | (device_addr + size) < device_addr) { |
1941 | dev_dbg(hdev->dev, |
1942 | "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n" , |
1943 | device_addr, size); |
1944 | return -EINVAL; |
1945 | } |
1946 | |
1947 | bar_address = hdev->dram_pci_bar_start + (device_addr - prop->dram_base_address); |
1948 | |
1949 | if ((bar_address + size) > (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || |
1950 | (bar_address + size) < bar_address) { |
1951 | dev_dbg(hdev->dev, |
1952 | "DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n" , |
1953 | device_addr, size); |
1954 | return -EINVAL; |
1955 | } |
1956 | |
1957 | return 0; |
1958 | } |
1959 | |
1960 | static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 size, u64 offset, |
1961 | struct hl_vm_phys_pg_pack *phys_pg_pack) |
1962 | { |
1963 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
1964 | u64 bar_address; |
1965 | int i, rc; |
1966 | |
1967 | rc = validate_export_params_common(hdev, addr: device_addr, size, offset); |
1968 | if (rc) |
1969 | return rc; |
1970 | |
1971 | if ((offset + size) > phys_pg_pack->total_size) { |
1972 | dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n" , |
1973 | offset, size, phys_pg_pack->total_size); |
1974 | return -EINVAL; |
1975 | } |
1976 | |
1977 | for (i = 0 ; i < phys_pg_pack->npages ; i++) { |
1978 | bar_address = hdev->dram_pci_bar_start + |
1979 | (phys_pg_pack->pages[i] - prop->dram_base_address); |
1980 | |
1981 | if ((bar_address + phys_pg_pack->page_size) > |
1982 | (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || |
1983 | (bar_address + phys_pg_pack->page_size) < bar_address) { |
1984 | dev_dbg(hdev->dev, |
1985 | "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n" , |
1986 | phys_pg_pack->pages[i], phys_pg_pack->page_size); |
1987 | return -EINVAL; |
1988 | } |
1989 | } |
1990 | |
1991 | return 0; |
1992 | } |
1993 | |
1994 | static struct hl_vm_phys_pg_pack *get_phys_pg_pack_from_hash_node(struct hl_device *hdev, |
1995 | struct hl_vm_hash_node *hnode) |
1996 | { |
1997 | struct hl_vm_phys_pg_pack *phys_pg_pack; |
1998 | struct hl_vm *vm = &hdev->vm; |
1999 | |
2000 | spin_lock(lock: &vm->idr_lock); |
2001 | phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, id: (u32) hnode->handle); |
2002 | if (!phys_pg_pack) { |
2003 | spin_unlock(lock: &vm->idr_lock); |
2004 | dev_dbg(hdev->dev, "no match for handle 0x%x\n" , (u32) hnode->handle); |
2005 | return ERR_PTR(error: -EINVAL); |
2006 | } |
2007 | |
2008 | spin_unlock(lock: &vm->idr_lock); |
2009 | |
2010 | if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) { |
2011 | dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n" , hnode->handle); |
2012 | return ERR_PTR(error: -EINVAL); |
2013 | } |
2014 | |
2015 | return phys_pg_pack; |
2016 | } |
2017 | |
2018 | /** |
2019 | * export_dmabuf_from_addr() - export a dma-buf object for the given memory |
2020 | * address and size. |
2021 | * @ctx: pointer to the context structure. |
2022 | * @addr: device address. |
2023 | * @size: size of device memory to export. |
2024 | * @offset: the offset into the buffer from which to start exporting |
2025 | * @flags: DMA-BUF file/FD flags. |
2026 | * @dmabuf_fd: pointer to result FD that represents the dma-buf object. |
2027 | * |
2028 | * Create and export a dma-buf object for an existing memory allocation inside |
2029 | * the device memory, and return a FD which is associated with the dma-buf |
2030 | * object. |
2031 | * |
2032 | * Return: 0 on success, non-zero for failure. |
2033 | */ |
2034 | static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 offset, |
2035 | int flags, int *dmabuf_fd) |
2036 | { |
2037 | struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; |
2038 | struct hl_vm_hash_node *hnode = NULL; |
2039 | struct asic_fixed_properties *prop; |
2040 | struct hl_dmabuf_priv *hl_dmabuf; |
2041 | struct hl_device *hdev; |
2042 | int rc; |
2043 | |
2044 | hdev = ctx->hdev; |
2045 | prop = &hdev->asic_prop; |
2046 | |
2047 | /* offset must be 0 in devices without virtual memory support */ |
2048 | if (!prop->dram_supports_virtual_memory && offset) { |
2049 | dev_dbg(hdev->dev, "offset is not allowed in device without virtual memory\n" ); |
2050 | return -EINVAL; |
2051 | } |
2052 | |
2053 | hl_dmabuf = kzalloc(size: sizeof(*hl_dmabuf), GFP_KERNEL); |
2054 | if (!hl_dmabuf) |
2055 | return -ENOMEM; |
2056 | |
2057 | if (prop->dram_supports_virtual_memory) { |
2058 | hnode = memhash_node_export_get(ctx, addr); |
2059 | if (IS_ERR(ptr: hnode)) { |
2060 | rc = PTR_ERR(ptr: hnode); |
2061 | goto err_free_dmabuf_wrapper; |
2062 | } |
2063 | phys_pg_pack = get_phys_pg_pack_from_hash_node(hdev, hnode); |
2064 | if (IS_ERR(ptr: phys_pg_pack)) { |
2065 | rc = PTR_ERR(ptr: phys_pg_pack); |
2066 | goto dec_memhash_export_cnt; |
2067 | } |
2068 | rc = validate_export_params(hdev, device_addr: addr, size, offset, phys_pg_pack); |
2069 | if (rc) |
2070 | goto dec_memhash_export_cnt; |
2071 | |
2072 | hl_dmabuf->phys_pg_pack = phys_pg_pack; |
2073 | hl_dmabuf->memhash_hnode = hnode; |
2074 | hl_dmabuf->offset = offset; |
2075 | } else { |
2076 | rc = validate_export_params_no_mmu(hdev, device_addr: addr, size); |
2077 | if (rc) |
2078 | goto err_free_dmabuf_wrapper; |
2079 | |
2080 | hl_dmabuf->device_phys_addr = addr; |
2081 | } |
2082 | |
2083 | rc = export_dmabuf(ctx, hl_dmabuf, total_size: size, flags, dmabuf_fd); |
2084 | if (rc) |
2085 | goto dec_memhash_export_cnt; |
2086 | |
2087 | return 0; |
2088 | |
2089 | dec_memhash_export_cnt: |
2090 | if (prop->dram_supports_virtual_memory) |
2091 | memhash_node_export_put(ctx, hnode); |
2092 | err_free_dmabuf_wrapper: |
2093 | kfree(objp: hl_dmabuf); |
2094 | return rc; |
2095 | } |
2096 | |
2097 | static void ts_buff_release(struct hl_mmap_mem_buf *buf) |
2098 | { |
2099 | struct hl_ts_buff *ts_buff = buf->private; |
2100 | |
2101 | vfree(addr: ts_buff->kernel_buff_address); |
2102 | vfree(addr: ts_buff->user_buff_address); |
2103 | kfree(objp: ts_buff); |
2104 | } |
2105 | |
2106 | static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args) |
2107 | { |
2108 | struct hl_ts_buff *ts_buff = buf->private; |
2109 | |
2110 | vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE); |
2111 | return remap_vmalloc_range(vma, addr: ts_buff->user_buff_address, pgoff: 0); |
2112 | } |
2113 | |
2114 | static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) |
2115 | { |
2116 | struct hl_ts_buff *ts_buff = NULL; |
2117 | u32 num_elements; |
2118 | size_t size; |
2119 | void *p; |
2120 | |
2121 | num_elements = *(u32 *)args; |
2122 | |
2123 | ts_buff = kzalloc(size: sizeof(*ts_buff), flags: gfp); |
2124 | if (!ts_buff) |
2125 | return -ENOMEM; |
2126 | |
2127 | /* Allocate the user buffer */ |
2128 | size = num_elements * sizeof(u64); |
2129 | p = vmalloc_user(size); |
2130 | if (!p) |
2131 | goto free_mem; |
2132 | |
2133 | ts_buff->user_buff_address = p; |
2134 | buf->mappable_size = size; |
2135 | |
2136 | /* Allocate the internal kernel buffer */ |
2137 | size = num_elements * sizeof(struct hl_user_pending_interrupt); |
2138 | p = vzalloc(size); |
2139 | if (!p) |
2140 | goto free_user_buff; |
2141 | |
2142 | ts_buff->kernel_buff_address = p; |
2143 | ts_buff->kernel_buff_size = size; |
2144 | |
2145 | buf->private = ts_buff; |
2146 | |
2147 | return 0; |
2148 | |
2149 | free_user_buff: |
2150 | vfree(addr: ts_buff->user_buff_address); |
2151 | free_mem: |
2152 | kfree(objp: ts_buff); |
2153 | return -ENOMEM; |
2154 | } |
2155 | |
2156 | static struct hl_mmap_mem_buf_behavior hl_ts_behavior = { |
2157 | .topic = "TS" , |
2158 | .mem_id = HL_MMAP_TYPE_TS_BUFF, |
2159 | .mmap = hl_ts_mmap, |
2160 | .alloc = hl_ts_alloc_buf, |
2161 | .release = ts_buff_release, |
2162 | }; |
2163 | |
2164 | /** |
2165 | * allocate_timestamps_buffers() - allocate timestamps buffers |
2166 | * This function will allocate ts buffer that will later on be mapped to the user |
2167 | * in order to be able to read the timestamp. |
2168 | * in addition it'll allocate an extra buffer for registration management. |
2169 | * since we cannot fail during registration for out-of-memory situation, so |
2170 | * we'll prepare a pool which will be used as user interrupt nodes and instead |
2171 | * of dynamically allocating nodes while registration we'll pick the node from |
2172 | * this pool. in addition it'll add node to the mapping hash which will be used |
2173 | * to map user ts buffer to the internal kernel ts buffer. |
2174 | * @hpriv: pointer to the private data of the fd |
2175 | * @args: ioctl input |
2176 | * @handle: user timestamp buffer handle as an output |
2177 | */ |
2178 | static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle) |
2179 | { |
2180 | struct hl_mem_mgr *mmg = &hpriv->mem_mgr; |
2181 | struct hl_mmap_mem_buf *buf; |
2182 | |
2183 | if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) { |
2184 | dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n" , |
2185 | args->num_of_elements, TS_MAX_ELEMENTS_NUM); |
2186 | return -EINVAL; |
2187 | } |
2188 | |
2189 | buf = hl_mmap_mem_buf_alloc(mmg, behavior: &hl_ts_behavior, GFP_KERNEL, args: &args->num_of_elements); |
2190 | if (!buf) |
2191 | return -ENOMEM; |
2192 | |
2193 | *handle = buf->handle; |
2194 | |
2195 | return 0; |
2196 | } |
2197 | |
2198 | int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) |
2199 | { |
2200 | struct hl_fpriv *hpriv = file_priv->driver_priv; |
2201 | enum hl_device_status status; |
2202 | union hl_mem_args *args = data; |
2203 | struct hl_device *hdev = hpriv->hdev; |
2204 | struct hl_ctx *ctx = hpriv->ctx; |
2205 | u64 block_handle, device_addr = 0; |
2206 | u32 handle = 0, block_size; |
2207 | int rc, dmabuf_fd = -EBADF; |
2208 | |
2209 | if (!hl_device_operational(hdev, status: &status)) { |
2210 | dev_dbg_ratelimited(hdev->dev, |
2211 | "Device is %s. Can't execute MEMORY IOCTL\n" , |
2212 | hdev->status[status]); |
2213 | return -EBUSY; |
2214 | } |
2215 | |
2216 | switch (args->in.op) { |
2217 | case HL_MEM_OP_ALLOC: |
2218 | if (args->in.alloc.mem_size == 0) { |
2219 | dev_err(hdev->dev, |
2220 | "alloc size must be larger than 0\n" ); |
2221 | rc = -EINVAL; |
2222 | goto out; |
2223 | } |
2224 | |
2225 | /* If DRAM does not support virtual memory the driver won't |
2226 | * handle the allocation/freeing of that memory. However, for |
2227 | * system administration/monitoring purposes, the driver will |
2228 | * keep track of the amount of DRAM memory that is allocated |
2229 | * and freed by the user. Because this code totally relies on |
2230 | * the user's input, the driver can't ensure the validity |
2231 | * of this accounting. |
2232 | */ |
2233 | if (!hdev->asic_prop.dram_supports_virtual_memory) { |
2234 | atomic64_add(i: args->in.alloc.mem_size, |
2235 | v: &ctx->dram_phys_mem); |
2236 | atomic64_add(i: args->in.alloc.mem_size, |
2237 | v: &hdev->dram_used_mem); |
2238 | |
2239 | dev_dbg(hdev->dev, "DRAM alloc is not supported\n" ); |
2240 | rc = 0; |
2241 | |
2242 | memset(args, 0, sizeof(*args)); |
2243 | args->out.handle = 0; |
2244 | goto out; |
2245 | } |
2246 | |
2247 | rc = alloc_device_memory(ctx, args: &args->in, ret_handle: &handle); |
2248 | |
2249 | memset(args, 0, sizeof(*args)); |
2250 | args->out.handle = (__u64) handle; |
2251 | break; |
2252 | |
2253 | case HL_MEM_OP_FREE: |
2254 | /* If DRAM does not support virtual memory the driver won't |
2255 | * handle the allocation/freeing of that memory. However, for |
2256 | * system administration/monitoring purposes, the driver will |
2257 | * keep track of the amount of DRAM memory that is allocated |
2258 | * and freed by the user. Because this code totally relies on |
2259 | * the user's input, the driver can't ensure the validity |
2260 | * of this accounting. |
2261 | */ |
2262 | if (!hdev->asic_prop.dram_supports_virtual_memory) { |
2263 | atomic64_sub(i: args->in.alloc.mem_size, |
2264 | v: &ctx->dram_phys_mem); |
2265 | atomic64_sub(i: args->in.alloc.mem_size, |
2266 | v: &hdev->dram_used_mem); |
2267 | |
2268 | dev_dbg(hdev->dev, "DRAM alloc is not supported\n" ); |
2269 | rc = 0; |
2270 | |
2271 | goto out; |
2272 | } |
2273 | |
2274 | rc = free_device_memory(ctx, args: &args->in); |
2275 | break; |
2276 | |
2277 | case HL_MEM_OP_MAP: |
2278 | rc = map_device_va(ctx, args: &args->in, device_addr: &device_addr); |
2279 | |
2280 | memset(args, 0, sizeof(*args)); |
2281 | args->out.device_virt_addr = device_addr; |
2282 | break; |
2283 | |
2284 | case HL_MEM_OP_UNMAP: |
2285 | rc = unmap_device_va(ctx, args: &args->in, ctx_free: false); |
2286 | break; |
2287 | |
2288 | case HL_MEM_OP_MAP_BLOCK: |
2289 | rc = map_block(hdev, address: args->in.map_block.block_addr, |
2290 | handle: &block_handle, size: &block_size); |
2291 | args->out.block_handle = block_handle; |
2292 | args->out.block_size = block_size; |
2293 | break; |
2294 | |
2295 | case HL_MEM_OP_EXPORT_DMABUF_FD: |
2296 | rc = export_dmabuf_from_addr(ctx, |
2297 | addr: args->in.export_dmabuf_fd.addr, |
2298 | size: args->in.export_dmabuf_fd.mem_size, |
2299 | offset: args->in.export_dmabuf_fd.offset, |
2300 | flags: args->in.flags, |
2301 | dmabuf_fd: &dmabuf_fd); |
2302 | memset(args, 0, sizeof(*args)); |
2303 | args->out.fd = dmabuf_fd; |
2304 | break; |
2305 | |
2306 | case HL_MEM_OP_TS_ALLOC: |
2307 | rc = allocate_timestamps_buffers(hpriv, args: &args->in, handle: &args->out.handle); |
2308 | break; |
2309 | default: |
2310 | dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n" ); |
2311 | rc = -EINVAL; |
2312 | break; |
2313 | } |
2314 | |
2315 | out: |
2316 | return rc; |
2317 | } |
2318 | |
2319 | static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, |
2320 | u32 npages, u64 start, u32 offset, |
2321 | struct hl_userptr *userptr) |
2322 | { |
2323 | int rc; |
2324 | |
2325 | if (!access_ok((void __user *) (uintptr_t) addr, size)) { |
2326 | dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n" , addr); |
2327 | return -EFAULT; |
2328 | } |
2329 | |
2330 | userptr->pages = kvmalloc_array(n: npages, size: sizeof(struct page *), GFP_KERNEL); |
2331 | if (!userptr->pages) |
2332 | return -ENOMEM; |
2333 | |
2334 | rc = pin_user_pages_fast(start, nr_pages: npages, gup_flags: FOLL_WRITE | FOLL_LONGTERM, |
2335 | pages: userptr->pages); |
2336 | |
2337 | if (rc != npages) { |
2338 | dev_err(hdev->dev, |
2339 | "Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n" , |
2340 | rc, addr, size, npages); |
2341 | if (rc < 0) |
2342 | goto destroy_pages; |
2343 | npages = rc; |
2344 | rc = -EFAULT; |
2345 | goto put_pages; |
2346 | } |
2347 | userptr->npages = npages; |
2348 | |
2349 | rc = sg_alloc_table_from_pages(sgt: userptr->sgt, |
2350 | pages: userptr->pages, |
2351 | n_pages: npages, offset, size, GFP_KERNEL); |
2352 | if (rc < 0) { |
2353 | dev_err(hdev->dev, "failed to create SG table from pages\n" ); |
2354 | goto put_pages; |
2355 | } |
2356 | |
2357 | return 0; |
2358 | |
2359 | put_pages: |
2360 | unpin_user_pages(pages: userptr->pages, npages); |
2361 | destroy_pages: |
2362 | kvfree(addr: userptr->pages); |
2363 | return rc; |
2364 | } |
2365 | |
2366 | /** |
2367 | * hl_pin_host_memory() - pins a chunk of host memory. |
2368 | * @hdev: pointer to the habanalabs device structure. |
2369 | * @addr: the host virtual address of the memory area. |
2370 | * @size: the size of the memory area. |
2371 | * @userptr: pointer to hl_userptr structure. |
2372 | * |
2373 | * This function does the following: |
2374 | * - Pins the physical pages. |
2375 | * - Create an SG list from those pages. |
2376 | */ |
2377 | int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, |
2378 | struct hl_userptr *userptr) |
2379 | { |
2380 | u64 start, end; |
2381 | u32 npages, offset; |
2382 | int rc; |
2383 | |
2384 | if (!size) { |
2385 | dev_err(hdev->dev, "size to pin is invalid - %llu\n" , size); |
2386 | return -EINVAL; |
2387 | } |
2388 | |
2389 | /* |
2390 | * If the combination of the address and size requested for this memory |
2391 | * region causes an integer overflow, return error. |
2392 | */ |
2393 | if (((addr + size) < addr) || |
2394 | PAGE_ALIGN(addr + size) < (addr + size)) { |
2395 | dev_err(hdev->dev, |
2396 | "user pointer 0x%llx + %llu causes integer overflow\n" , |
2397 | addr, size); |
2398 | return -EINVAL; |
2399 | } |
2400 | |
2401 | userptr->pid = current->pid; |
2402 | userptr->sgt = kzalloc(size: sizeof(*userptr->sgt), GFP_KERNEL); |
2403 | if (!userptr->sgt) |
2404 | return -ENOMEM; |
2405 | |
2406 | start = addr & PAGE_MASK; |
2407 | offset = addr & ~PAGE_MASK; |
2408 | end = PAGE_ALIGN(addr + size); |
2409 | npages = (end - start) >> PAGE_SHIFT; |
2410 | |
2411 | userptr->size = size; |
2412 | userptr->addr = addr; |
2413 | userptr->dma_mapped = false; |
2414 | INIT_LIST_HEAD(list: &userptr->job_node); |
2415 | |
2416 | rc = get_user_memory(hdev, addr, size, npages, start, offset, |
2417 | userptr); |
2418 | if (rc) { |
2419 | dev_err(hdev->dev, |
2420 | "failed to get user memory for address 0x%llx\n" , |
2421 | addr); |
2422 | goto free_sgt; |
2423 | } |
2424 | |
2425 | hl_debugfs_add_userptr(hdev, userptr); |
2426 | |
2427 | return 0; |
2428 | |
2429 | free_sgt: |
2430 | kfree(objp: userptr->sgt); |
2431 | return rc; |
2432 | } |
2433 | |
2434 | /* |
2435 | * hl_unpin_host_memory - unpins a chunk of host memory. |
2436 | * @hdev: pointer to the habanalabs device structure |
2437 | * @userptr: pointer to hl_userptr structure |
2438 | * |
2439 | * This function does the following: |
2440 | * - Unpins the physical pages related to the host memory |
2441 | * - Free the SG list |
2442 | */ |
2443 | void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) |
2444 | { |
2445 | hl_debugfs_remove_userptr(hdev, userptr); |
2446 | |
2447 | if (userptr->dma_mapped) |
2448 | hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); |
2449 | |
2450 | unpin_user_pages_dirty_lock(pages: userptr->pages, npages: userptr->npages, make_dirty: true); |
2451 | kvfree(addr: userptr->pages); |
2452 | |
2453 | list_del(entry: &userptr->job_node); |
2454 | |
2455 | sg_free_table(userptr->sgt); |
2456 | kfree(objp: userptr->sgt); |
2457 | } |
2458 | |
2459 | /** |
2460 | * hl_userptr_delete_list() - clear userptr list. |
2461 | * @hdev: pointer to the habanalabs device structure. |
2462 | * @userptr_list: pointer to the list to clear. |
2463 | * |
2464 | * This function does the following: |
2465 | * - Iterates over the list and unpins the host memory and frees the userptr |
2466 | * structure. |
2467 | */ |
2468 | void hl_userptr_delete_list(struct hl_device *hdev, |
2469 | struct list_head *userptr_list) |
2470 | { |
2471 | struct hl_userptr *userptr, *tmp; |
2472 | |
2473 | list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { |
2474 | hl_unpin_host_memory(hdev, userptr); |
2475 | kfree(objp: userptr); |
2476 | } |
2477 | |
2478 | INIT_LIST_HEAD(list: userptr_list); |
2479 | } |
2480 | |
2481 | /** |
2482 | * hl_userptr_is_pinned() - returns whether the given userptr is pinned. |
2483 | * @hdev: pointer to the habanalabs device structure. |
2484 | * @addr: user address to check. |
2485 | * @size: user block size to check. |
2486 | * @userptr_list: pointer to the list to clear. |
2487 | * @userptr: pointer to userptr to check. |
2488 | * |
2489 | * This function does the following: |
2490 | * - Iterates over the list and checks if the given userptr is in it, means is |
2491 | * pinned. If so, returns true, otherwise returns false. |
2492 | */ |
2493 | bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, |
2494 | u32 size, struct list_head *userptr_list, |
2495 | struct hl_userptr **userptr) |
2496 | { |
2497 | list_for_each_entry((*userptr), userptr_list, job_node) { |
2498 | if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) |
2499 | return true; |
2500 | } |
2501 | |
2502 | return false; |
2503 | } |
2504 | |
2505 | /** |
2506 | * va_range_init() - initialize virtual addresses range. |
2507 | * @hdev: pointer to the habanalabs device structure. |
2508 | * @va_ranges: pointer to va_ranges array. |
2509 | * @range_type: virtual address range type. |
2510 | * @start: range start address, inclusive. |
2511 | * @end: range end address, inclusive. |
2512 | * @page_size: page size for this va_range. |
2513 | * |
2514 | * This function does the following: |
2515 | * - Initializes the virtual addresses list of the given range with the given |
2516 | * addresses. |
2517 | */ |
2518 | static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges, |
2519 | enum hl_va_range_type range_type, u64 start, |
2520 | u64 end, u32 page_size) |
2521 | { |
2522 | struct hl_va_range *va_range = va_ranges[range_type]; |
2523 | int rc; |
2524 | |
2525 | INIT_LIST_HEAD(list: &va_range->list); |
2526 | |
2527 | /* |
2528 | * PAGE_SIZE alignment |
2529 | * it is the caller's responsibility to align the addresses if the |
2530 | * page size is not a power of 2 |
2531 | */ |
2532 | |
2533 | if (is_power_of_2(n: page_size)) { |
2534 | start = round_up(start, page_size); |
2535 | |
2536 | /* |
2537 | * The end of the range is inclusive, hence we need to align it |
2538 | * to the end of the last full page in the range. For example if |
2539 | * end = 0x3ff5 with page size 0x1000, we need to align it to |
2540 | * 0x2fff. The remaining 0xff5 bytes do not form a full page. |
2541 | */ |
2542 | end = round_down(end + 1, page_size) - 1; |
2543 | } |
2544 | |
2545 | if (start >= end) { |
2546 | dev_err(hdev->dev, "too small vm range for va list\n" ); |
2547 | return -EFAULT; |
2548 | } |
2549 | |
2550 | rc = add_va_block(hdev, va_range, start, end); |
2551 | |
2552 | if (rc) { |
2553 | dev_err(hdev->dev, "Failed to init host va list\n" ); |
2554 | return rc; |
2555 | } |
2556 | |
2557 | va_range->start_addr = start; |
2558 | va_range->end_addr = end; |
2559 | va_range->page_size = page_size; |
2560 | |
2561 | return 0; |
2562 | } |
2563 | |
2564 | /** |
2565 | * va_range_fini() - clear a virtual addresses range. |
2566 | * @hdev: pointer to the habanalabs structure. |
2567 | * @va_range: pointer to virtual addresses range. |
2568 | * |
2569 | * This function does the following: |
2570 | * - Frees the virtual addresses block list and its lock. |
2571 | */ |
2572 | static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) |
2573 | { |
2574 | mutex_lock(&va_range->lock); |
2575 | clear_va_list_locked(hdev, va_list: &va_range->list); |
2576 | mutex_unlock(lock: &va_range->lock); |
2577 | |
2578 | mutex_destroy(lock: &va_range->lock); |
2579 | kfree(objp: va_range); |
2580 | } |
2581 | |
2582 | /** |
2583 | * vm_ctx_init_with_ranges() - initialize virtual memory for context. |
2584 | * @ctx: pointer to the habanalabs context structure. |
2585 | * @host_range_start: host virtual addresses range start. |
2586 | * @host_range_end: host virtual addresses range end. |
2587 | * @host_page_size: host page size. |
2588 | * @host_huge_range_start: host virtual addresses range start for memory |
2589 | * allocated with huge pages. |
2590 | * @host_huge_range_end: host virtual addresses range end for memory allocated |
2591 | * with huge pages. |
2592 | * @host_huge_page_size: host huge page size. |
2593 | * @dram_range_start: dram virtual addresses range start. |
2594 | * @dram_range_end: dram virtual addresses range end. |
2595 | * @dram_page_size: dram page size. |
2596 | * |
2597 | * This function initializes the following: |
2598 | * - MMU for context. |
2599 | * - Virtual address to area descriptor hashtable. |
2600 | * - Virtual block list of available virtual memory. |
2601 | */ |
2602 | static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, |
2603 | u64 host_range_start, |
2604 | u64 host_range_end, |
2605 | u32 host_page_size, |
2606 | u64 host_huge_range_start, |
2607 | u64 host_huge_range_end, |
2608 | u32 host_huge_page_size, |
2609 | u64 dram_range_start, |
2610 | u64 dram_range_end, |
2611 | u32 dram_page_size) |
2612 | { |
2613 | struct hl_device *hdev = ctx->hdev; |
2614 | int i, rc; |
2615 | |
2616 | for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) { |
2617 | ctx->va_range[i] = |
2618 | kzalloc(size: sizeof(struct hl_va_range), GFP_KERNEL); |
2619 | if (!ctx->va_range[i]) { |
2620 | rc = -ENOMEM; |
2621 | goto free_va_range; |
2622 | } |
2623 | } |
2624 | |
2625 | rc = hl_mmu_ctx_init(ctx); |
2626 | if (rc) { |
2627 | dev_err(hdev->dev, "failed to init context %d\n" , ctx->asid); |
2628 | goto free_va_range; |
2629 | } |
2630 | |
2631 | mutex_init(&ctx->mem_hash_lock); |
2632 | hash_init(ctx->mem_hash); |
2633 | |
2634 | mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); |
2635 | |
2636 | rc = va_range_init(hdev, va_ranges: ctx->va_range, range_type: HL_VA_RANGE_TYPE_HOST, |
2637 | start: host_range_start, end: host_range_end, page_size: host_page_size); |
2638 | if (rc) { |
2639 | dev_err(hdev->dev, "failed to init host vm range\n" ); |
2640 | goto mmu_ctx_fini; |
2641 | } |
2642 | |
2643 | if (hdev->pmmu_huge_range) { |
2644 | mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); |
2645 | |
2646 | rc = va_range_init(hdev, |
2647 | va_ranges: ctx->va_range, range_type: HL_VA_RANGE_TYPE_HOST_HUGE, |
2648 | start: host_huge_range_start, end: host_huge_range_end, |
2649 | page_size: host_huge_page_size); |
2650 | if (rc) { |
2651 | dev_err(hdev->dev, |
2652 | "failed to init host huge vm range\n" ); |
2653 | goto clear_host_va_range; |
2654 | } |
2655 | } else { |
2656 | kfree(objp: ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); |
2657 | ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] = |
2658 | ctx->va_range[HL_VA_RANGE_TYPE_HOST]; |
2659 | } |
2660 | |
2661 | mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); |
2662 | |
2663 | rc = va_range_init(hdev, va_ranges: ctx->va_range, range_type: HL_VA_RANGE_TYPE_DRAM, |
2664 | start: dram_range_start, end: dram_range_end, page_size: dram_page_size); |
2665 | if (rc) { |
2666 | dev_err(hdev->dev, "failed to init dram vm range\n" ); |
2667 | goto clear_host_huge_va_range; |
2668 | } |
2669 | |
2670 | hl_debugfs_add_ctx_mem_hash(hdev, ctx); |
2671 | |
2672 | return 0; |
2673 | |
2674 | clear_host_huge_va_range: |
2675 | mutex_destroy(lock: &ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); |
2676 | |
2677 | if (hdev->pmmu_huge_range) { |
2678 | mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); |
2679 | clear_va_list_locked(hdev, |
2680 | va_list: &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list); |
2681 | mutex_unlock(lock: &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); |
2682 | } |
2683 | clear_host_va_range: |
2684 | if (hdev->pmmu_huge_range) |
2685 | mutex_destroy(lock: &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); |
2686 | mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); |
2687 | clear_va_list_locked(hdev, va_list: &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list); |
2688 | mutex_unlock(lock: &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); |
2689 | mmu_ctx_fini: |
2690 | mutex_destroy(lock: &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); |
2691 | mutex_destroy(lock: &ctx->mem_hash_lock); |
2692 | hl_mmu_ctx_fini(ctx); |
2693 | free_va_range: |
2694 | for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) |
2695 | kfree(objp: ctx->va_range[i]); |
2696 | |
2697 | return rc; |
2698 | } |
2699 | |
2700 | int hl_vm_ctx_init(struct hl_ctx *ctx) |
2701 | { |
2702 | struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; |
2703 | u64 host_range_start, host_range_end, host_huge_range_start, |
2704 | host_huge_range_end, dram_range_start, dram_range_end; |
2705 | u32 host_page_size, host_huge_page_size, dram_page_size; |
2706 | |
2707 | atomic64_set(v: &ctx->dram_phys_mem, i: 0); |
2708 | |
2709 | /* |
2710 | * In case of DRAM mapping, the returned address is the physical |
2711 | * address of the memory related to the given handle. |
2712 | */ |
2713 | if (ctx->hdev->mmu_disable) |
2714 | return 0; |
2715 | |
2716 | dram_range_start = prop->dmmu.start_addr; |
2717 | dram_range_end = prop->dmmu.end_addr - 1; |
2718 | dram_page_size = prop->dram_page_size ? |
2719 | prop->dram_page_size : prop->dmmu.page_size; |
2720 | host_range_start = prop->pmmu.start_addr; |
2721 | host_range_end = prop->pmmu.end_addr - 1; |
2722 | host_page_size = prop->pmmu.page_size; |
2723 | host_huge_range_start = prop->pmmu_huge.start_addr; |
2724 | host_huge_range_end = prop->pmmu_huge.end_addr - 1; |
2725 | host_huge_page_size = prop->pmmu_huge.page_size; |
2726 | |
2727 | return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, |
2728 | host_page_size, host_huge_range_start, |
2729 | host_huge_range_end, host_huge_page_size, |
2730 | dram_range_start, dram_range_end, dram_page_size); |
2731 | } |
2732 | |
2733 | /** |
2734 | * hl_vm_ctx_fini() - virtual memory teardown of context. |
2735 | * @ctx: pointer to the habanalabs context structure. |
2736 | * |
2737 | * This function perform teardown the following: |
2738 | * - Virtual block list of available virtual memory. |
2739 | * - Virtual address to area descriptor hashtable. |
2740 | * - MMU for context. |
2741 | * |
2742 | * In addition this function does the following: |
2743 | * - Unmaps the existing hashtable nodes if the hashtable is not empty. The |
2744 | * hashtable should be empty as no valid mappings should exist at this |
2745 | * point. |
2746 | * - Frees any existing physical page list from the idr which relates to the |
2747 | * current context asid. |
2748 | * - This function checks the virtual block list for correctness. At this point |
2749 | * the list should contain one element which describes the whole virtual |
2750 | * memory range of the context. Otherwise, a warning is printed. |
2751 | */ |
2752 | void hl_vm_ctx_fini(struct hl_ctx *ctx) |
2753 | { |
2754 | struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node; |
2755 | struct hl_device *hdev = ctx->hdev; |
2756 | struct hl_vm_hash_node *hnode; |
2757 | struct hl_vm *vm = &hdev->vm; |
2758 | struct hlist_node *tmp_node; |
2759 | struct list_head free_list; |
2760 | struct hl_mem_in args; |
2761 | int i; |
2762 | |
2763 | if (hdev->mmu_disable) |
2764 | return; |
2765 | |
2766 | hl_debugfs_remove_ctx_mem_hash(hdev, ctx); |
2767 | |
2768 | /* |
2769 | * Clearly something went wrong on hard reset so no point in printing |
2770 | * another side effect error |
2771 | */ |
2772 | if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash)) |
2773 | dev_dbg(hdev->dev, |
2774 | "user released device without removing its memory mappings\n" ); |
2775 | |
2776 | hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { |
2777 | dev_dbg(hdev->dev, |
2778 | "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n" , |
2779 | hnode->vaddr, ctx->asid); |
2780 | args.unmap.device_virt_addr = hnode->vaddr; |
2781 | unmap_device_va(ctx, args: &args, ctx_free: true); |
2782 | } |
2783 | |
2784 | mutex_lock(&hdev->mmu_lock); |
2785 | |
2786 | /* invalidate the cache once after the unmapping loop */ |
2787 | hl_mmu_invalidate_cache(hdev, is_hard: true, flags: MMU_OP_USERPTR); |
2788 | hl_mmu_invalidate_cache(hdev, is_hard: true, flags: MMU_OP_PHYS_PACK); |
2789 | |
2790 | mutex_unlock(lock: &hdev->mmu_lock); |
2791 | |
2792 | INIT_LIST_HEAD(list: &free_list); |
2793 | |
2794 | spin_lock(lock: &vm->idr_lock); |
2795 | idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) |
2796 | if (phys_pg_list->asid == ctx->asid) { |
2797 | dev_dbg(hdev->dev, |
2798 | "page list 0x%px of asid %d is still alive\n" , |
2799 | phys_pg_list, ctx->asid); |
2800 | |
2801 | atomic64_sub(i: phys_pg_list->total_size, v: &hdev->dram_used_mem); |
2802 | idr_remove(&vm->phys_pg_pack_handles, id: i); |
2803 | list_add(new: &phys_pg_list->node, head: &free_list); |
2804 | } |
2805 | spin_unlock(lock: &vm->idr_lock); |
2806 | |
2807 | list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node) |
2808 | free_phys_pg_pack(hdev, phys_pg_pack: phys_pg_list); |
2809 | |
2810 | va_range_fini(hdev, va_range: ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); |
2811 | va_range_fini(hdev, va_range: ctx->va_range[HL_VA_RANGE_TYPE_HOST]); |
2812 | |
2813 | if (hdev->pmmu_huge_range) |
2814 | va_range_fini(hdev, va_range: ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); |
2815 | |
2816 | mutex_destroy(lock: &ctx->mem_hash_lock); |
2817 | hl_mmu_ctx_fini(ctx); |
2818 | |
2819 | /* In this case we need to clear the global accounting of DRAM usage |
2820 | * because the user notifies us on allocations. If the user is no more, |
2821 | * all DRAM is available |
2822 | */ |
2823 | if (ctx->asid != HL_KERNEL_ASID_ID && |
2824 | !hdev->asic_prop.dram_supports_virtual_memory) |
2825 | atomic64_set(v: &hdev->dram_used_mem, i: 0); |
2826 | } |
2827 | |
2828 | /** |
2829 | * hl_vm_init() - initialize virtual memory module. |
2830 | * @hdev: pointer to the habanalabs device structure. |
2831 | * |
2832 | * This function initializes the following: |
2833 | * - MMU module. |
2834 | * - DRAM physical pages pool of 2MB. |
2835 | * - Idr for device memory allocation handles. |
2836 | */ |
2837 | int hl_vm_init(struct hl_device *hdev) |
2838 | { |
2839 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
2840 | struct hl_vm *vm = &hdev->vm; |
2841 | int rc; |
2842 | |
2843 | if (is_power_of_2(n: prop->dram_page_size)) |
2844 | vm->dram_pg_pool = |
2845 | gen_pool_create(__ffs(prop->dram_page_size), -1); |
2846 | else |
2847 | vm->dram_pg_pool = |
2848 | gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1); |
2849 | |
2850 | if (!vm->dram_pg_pool) { |
2851 | dev_err(hdev->dev, "Failed to create dram page pool\n" ); |
2852 | return -ENOMEM; |
2853 | } |
2854 | |
2855 | kref_init(kref: &vm->dram_pg_pool_refcount); |
2856 | |
2857 | rc = gen_pool_add(pool: vm->dram_pg_pool, addr: prop->dram_user_base_address, |
2858 | size: prop->dram_end_address - prop->dram_user_base_address, |
2859 | nid: -1); |
2860 | |
2861 | if (rc) { |
2862 | dev_err(hdev->dev, |
2863 | "Failed to add memory to dram page pool %d\n" , rc); |
2864 | goto pool_add_err; |
2865 | } |
2866 | |
2867 | spin_lock_init(&vm->idr_lock); |
2868 | idr_init(idr: &vm->phys_pg_pack_handles); |
2869 | |
2870 | atomic64_set(v: &hdev->dram_used_mem, i: 0); |
2871 | |
2872 | vm->init_done = true; |
2873 | |
2874 | return 0; |
2875 | |
2876 | pool_add_err: |
2877 | gen_pool_destroy(vm->dram_pg_pool); |
2878 | |
2879 | return rc; |
2880 | } |
2881 | |
2882 | /** |
2883 | * hl_vm_fini() - virtual memory module teardown. |
2884 | * @hdev: pointer to the habanalabs device structure. |
2885 | * |
2886 | * This function perform teardown to the following: |
2887 | * - Idr for device memory allocation handles. |
2888 | * - DRAM physical pages pool of 2MB. |
2889 | * - MMU module. |
2890 | */ |
2891 | void hl_vm_fini(struct hl_device *hdev) |
2892 | { |
2893 | struct hl_vm *vm = &hdev->vm; |
2894 | |
2895 | if (!vm->init_done) |
2896 | return; |
2897 | |
2898 | /* |
2899 | * At this point all the contexts should be freed and hence no DRAM |
2900 | * memory should be in use. Hence the DRAM pool should be freed here. |
2901 | */ |
2902 | if (kref_put(kref: &vm->dram_pg_pool_refcount, release: dram_pg_pool_do_release) != 1) |
2903 | dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n" , |
2904 | __func__); |
2905 | |
2906 | vm->init_done = false; |
2907 | } |
2908 | |
2909 | /** |
2910 | * hl_hw_block_mem_init() - HW block memory initialization. |
2911 | * @ctx: pointer to the habanalabs context structure. |
2912 | * |
2913 | * This function initializes the HW block virtual mapped addresses list and |
2914 | * it's lock. |
2915 | */ |
2916 | void hl_hw_block_mem_init(struct hl_ctx *ctx) |
2917 | { |
2918 | mutex_init(&ctx->hw_block_list_lock); |
2919 | INIT_LIST_HEAD(list: &ctx->hw_block_mem_list); |
2920 | } |
2921 | |
2922 | /** |
2923 | * hl_hw_block_mem_fini() - HW block memory teardown. |
2924 | * @ctx: pointer to the habanalabs context structure. |
2925 | * |
2926 | * This function clears the HW block virtual mapped addresses list and destroys |
2927 | * it's lock. |
2928 | */ |
2929 | void hl_hw_block_mem_fini(struct hl_ctx *ctx) |
2930 | { |
2931 | struct hl_vm_hw_block_list_node *lnode, *tmp; |
2932 | |
2933 | if (!list_empty(head: &ctx->hw_block_mem_list)) |
2934 | dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n" ); |
2935 | |
2936 | list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) { |
2937 | list_del(entry: &lnode->node); |
2938 | kfree(objp: lnode); |
2939 | } |
2940 | |
2941 | mutex_destroy(lock: &ctx->hw_block_list_lock); |
2942 | } |
2943 | |