1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | */ |
23 | |
24 | #include <drm/drm_drv.h> |
25 | |
26 | #include "amdgpu.h" |
27 | #include "amdgpu_trace.h" |
28 | #include "amdgpu_vm.h" |
29 | |
30 | /* |
31 | * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt |
32 | */ |
33 | struct amdgpu_vm_pt_cursor { |
34 | uint64_t pfn; |
35 | struct amdgpu_vm_bo_base *parent; |
36 | struct amdgpu_vm_bo_base *entry; |
37 | unsigned int level; |
38 | }; |
39 | |
40 | /** |
41 | * amdgpu_vm_pt_level_shift - return the addr shift for each level |
42 | * |
43 | * @adev: amdgpu_device pointer |
44 | * @level: VMPT level |
45 | * |
46 | * Returns: |
47 | * The number of bits the pfn needs to be right shifted for a level. |
48 | */ |
49 | static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, |
50 | unsigned int level) |
51 | { |
52 | switch (level) { |
53 | case AMDGPU_VM_PDB2: |
54 | case AMDGPU_VM_PDB1: |
55 | case AMDGPU_VM_PDB0: |
56 | return 9 * (AMDGPU_VM_PDB0 - level) + |
57 | adev->vm_manager.block_size; |
58 | case AMDGPU_VM_PTB: |
59 | return 0; |
60 | default: |
61 | return ~0; |
62 | } |
63 | } |
64 | |
65 | /** |
66 | * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT |
67 | * |
68 | * @adev: amdgpu_device pointer |
69 | * @level: VMPT level |
70 | * |
71 | * Returns: |
72 | * The number of entries in a page directory or page table. |
73 | */ |
74 | static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, |
75 | unsigned int level) |
76 | { |
77 | unsigned int shift; |
78 | |
79 | shift = amdgpu_vm_pt_level_shift(adev, level: adev->vm_manager.root_level); |
80 | if (level == adev->vm_manager.root_level) |
81 | /* For the root directory */ |
82 | return round_up(adev->vm_manager.max_pfn, 1ULL << shift) |
83 | >> shift; |
84 | else if (level != AMDGPU_VM_PTB) |
85 | /* Everything in between */ |
86 | return 512; |
87 | |
88 | /* For the page tables on the leaves */ |
89 | return AMDGPU_VM_PTE_COUNT(adev); |
90 | } |
91 | |
92 | /** |
93 | * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT |
94 | * |
95 | * @adev: amdgpu_device pointer |
96 | * @level: VMPT level |
97 | * |
98 | * Returns: |
99 | * The mask to extract the entry number of a PD/PT from an address. |
100 | */ |
101 | static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, |
102 | unsigned int level) |
103 | { |
104 | if (level <= adev->vm_manager.root_level) |
105 | return 0xffffffff; |
106 | else if (level != AMDGPU_VM_PTB) |
107 | return 0x1ff; |
108 | else |
109 | return AMDGPU_VM_PTE_COUNT(adev) - 1; |
110 | } |
111 | |
112 | /** |
113 | * amdgpu_vm_pt_size - returns the size of the page table in bytes |
114 | * |
115 | * @adev: amdgpu_device pointer |
116 | * @level: VMPT level |
117 | * |
118 | * Returns: |
119 | * The size of the BO for a page directory or page table in bytes. |
120 | */ |
121 | static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, |
122 | unsigned int level) |
123 | { |
124 | return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); |
125 | } |
126 | |
127 | /** |
128 | * amdgpu_vm_pt_parent - get the parent page directory |
129 | * |
130 | * @pt: child page table |
131 | * |
132 | * Helper to get the parent entry for the child page table. NULL if we are at |
133 | * the root page directory. |
134 | */ |
135 | static struct amdgpu_vm_bo_base * |
136 | amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) |
137 | { |
138 | struct amdgpu_bo *parent = pt->bo->parent; |
139 | |
140 | if (!parent) |
141 | return NULL; |
142 | |
143 | return parent->vm_bo; |
144 | } |
145 | |
146 | /** |
147 | * amdgpu_vm_pt_start - start PD/PT walk |
148 | * |
149 | * @adev: amdgpu_device pointer |
150 | * @vm: amdgpu_vm structure |
151 | * @start: start address of the walk |
152 | * @cursor: state to initialize |
153 | * |
154 | * Initialize a amdgpu_vm_pt_cursor to start a walk. |
155 | */ |
156 | static void amdgpu_vm_pt_start(struct amdgpu_device *adev, |
157 | struct amdgpu_vm *vm, uint64_t start, |
158 | struct amdgpu_vm_pt_cursor *cursor) |
159 | { |
160 | cursor->pfn = start; |
161 | cursor->parent = NULL; |
162 | cursor->entry = &vm->root; |
163 | cursor->level = adev->vm_manager.root_level; |
164 | } |
165 | |
166 | /** |
167 | * amdgpu_vm_pt_descendant - go to child node |
168 | * |
169 | * @adev: amdgpu_device pointer |
170 | * @cursor: current state |
171 | * |
172 | * Walk to the child node of the current node. |
173 | * Returns: |
174 | * True if the walk was possible, false otherwise. |
175 | */ |
176 | static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, |
177 | struct amdgpu_vm_pt_cursor *cursor) |
178 | { |
179 | unsigned int mask, shift, idx; |
180 | |
181 | if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || |
182 | !cursor->entry->bo) |
183 | return false; |
184 | |
185 | mask = amdgpu_vm_pt_entries_mask(adev, level: cursor->level); |
186 | shift = amdgpu_vm_pt_level_shift(adev, level: cursor->level); |
187 | |
188 | ++cursor->level; |
189 | idx = (cursor->pfn >> shift) & mask; |
190 | cursor->parent = cursor->entry; |
191 | cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; |
192 | return true; |
193 | } |
194 | |
195 | /** |
196 | * amdgpu_vm_pt_sibling - go to sibling node |
197 | * |
198 | * @adev: amdgpu_device pointer |
199 | * @cursor: current state |
200 | * |
201 | * Walk to the sibling node of the current node. |
202 | * Returns: |
203 | * True if the walk was possible, false otherwise. |
204 | */ |
205 | static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, |
206 | struct amdgpu_vm_pt_cursor *cursor) |
207 | { |
208 | |
209 | unsigned int shift, num_entries; |
210 | struct amdgpu_bo_vm *parent; |
211 | |
212 | /* Root doesn't have a sibling */ |
213 | if (!cursor->parent) |
214 | return false; |
215 | |
216 | /* Go to our parents and see if we got a sibling */ |
217 | shift = amdgpu_vm_pt_level_shift(adev, level: cursor->level - 1); |
218 | num_entries = amdgpu_vm_pt_num_entries(adev, level: cursor->level - 1); |
219 | parent = to_amdgpu_bo_vm(cursor->parent->bo); |
220 | |
221 | if (cursor->entry == &parent->entries[num_entries - 1]) |
222 | return false; |
223 | |
224 | cursor->pfn += 1ULL << shift; |
225 | cursor->pfn &= ~((1ULL << shift) - 1); |
226 | ++cursor->entry; |
227 | return true; |
228 | } |
229 | |
230 | /** |
231 | * amdgpu_vm_pt_ancestor - go to parent node |
232 | * |
233 | * @cursor: current state |
234 | * |
235 | * Walk to the parent node of the current node. |
236 | * Returns: |
237 | * True if the walk was possible, false otherwise. |
238 | */ |
239 | static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) |
240 | { |
241 | if (!cursor->parent) |
242 | return false; |
243 | |
244 | --cursor->level; |
245 | cursor->entry = cursor->parent; |
246 | cursor->parent = amdgpu_vm_pt_parent(pt: cursor->parent); |
247 | return true; |
248 | } |
249 | |
250 | /** |
251 | * amdgpu_vm_pt_next - get next PD/PT in hieratchy |
252 | * |
253 | * @adev: amdgpu_device pointer |
254 | * @cursor: current state |
255 | * |
256 | * Walk the PD/PT tree to the next node. |
257 | */ |
258 | static void amdgpu_vm_pt_next(struct amdgpu_device *adev, |
259 | struct amdgpu_vm_pt_cursor *cursor) |
260 | { |
261 | /* First try a newborn child */ |
262 | if (amdgpu_vm_pt_descendant(adev, cursor)) |
263 | return; |
264 | |
265 | /* If that didn't worked try to find a sibling */ |
266 | while (!amdgpu_vm_pt_sibling(adev, cursor)) { |
267 | /* No sibling, go to our parents and grandparents */ |
268 | if (!amdgpu_vm_pt_ancestor(cursor)) { |
269 | cursor->pfn = ~0ll; |
270 | return; |
271 | } |
272 | } |
273 | } |
274 | |
275 | /** |
276 | * amdgpu_vm_pt_first_dfs - start a deep first search |
277 | * |
278 | * @adev: amdgpu_device structure |
279 | * @vm: amdgpu_vm structure |
280 | * @start: optional cursor to start with |
281 | * @cursor: state to initialize |
282 | * |
283 | * Starts a deep first traversal of the PD/PT tree. |
284 | */ |
285 | static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, |
286 | struct amdgpu_vm *vm, |
287 | struct amdgpu_vm_pt_cursor *start, |
288 | struct amdgpu_vm_pt_cursor *cursor) |
289 | { |
290 | if (start) |
291 | *cursor = *start; |
292 | else |
293 | amdgpu_vm_pt_start(adev, vm, start: 0, cursor); |
294 | |
295 | while (amdgpu_vm_pt_descendant(adev, cursor)) |
296 | ; |
297 | } |
298 | |
299 | /** |
300 | * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue |
301 | * |
302 | * @start: starting point for the search |
303 | * @entry: current entry |
304 | * |
305 | * Returns: |
306 | * True when the search should continue, false otherwise. |
307 | */ |
308 | static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, |
309 | struct amdgpu_vm_bo_base *entry) |
310 | { |
311 | return entry && (!start || entry != start->entry); |
312 | } |
313 | |
314 | /** |
315 | * amdgpu_vm_pt_next_dfs - get the next node for a deep first search |
316 | * |
317 | * @adev: amdgpu_device structure |
318 | * @cursor: current state |
319 | * |
320 | * Move the cursor to the next node in a deep first search. |
321 | */ |
322 | static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, |
323 | struct amdgpu_vm_pt_cursor *cursor) |
324 | { |
325 | if (!cursor->entry) |
326 | return; |
327 | |
328 | if (!cursor->parent) |
329 | cursor->entry = NULL; |
330 | else if (amdgpu_vm_pt_sibling(adev, cursor)) |
331 | while (amdgpu_vm_pt_descendant(adev, cursor)) |
332 | ; |
333 | else |
334 | amdgpu_vm_pt_ancestor(cursor); |
335 | } |
336 | |
337 | /* |
338 | * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs |
339 | */ |
340 | #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ |
341 | for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ |
342 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ |
343 | amdgpu_vm_pt_continue_dfs((start), (entry)); \ |
344 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) |
345 | |
346 | /** |
347 | * amdgpu_vm_pt_clear - initially clear the PDs/PTs |
348 | * |
349 | * @adev: amdgpu_device pointer |
350 | * @vm: VM to clear BO from |
351 | * @vmbo: BO to clear |
352 | * @immediate: use an immediate update |
353 | * |
354 | * Root PD needs to be reserved when calling this. |
355 | * |
356 | * Returns: |
357 | * 0 on success, errno otherwise. |
358 | */ |
359 | int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
360 | struct amdgpu_bo_vm *vmbo, bool immediate) |
361 | { |
362 | unsigned int level = adev->vm_manager.root_level; |
363 | struct ttm_operation_ctx ctx = { true, false }; |
364 | struct amdgpu_vm_update_params params; |
365 | struct amdgpu_bo *ancestor = &vmbo->bo; |
366 | unsigned int entries; |
367 | struct amdgpu_bo *bo = &vmbo->bo; |
368 | uint64_t addr; |
369 | int r, idx; |
370 | |
371 | /* Figure out our place in the hierarchy */ |
372 | if (ancestor->parent) { |
373 | ++level; |
374 | while (ancestor->parent->parent) { |
375 | ++level; |
376 | ancestor = ancestor->parent; |
377 | } |
378 | } |
379 | |
380 | entries = amdgpu_bo_size(bo) / 8; |
381 | |
382 | r = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx); |
383 | if (r) |
384 | return r; |
385 | |
386 | if (vmbo->shadow) { |
387 | struct amdgpu_bo *shadow = vmbo->shadow; |
388 | |
389 | r = ttm_bo_validate(bo: &shadow->tbo, placement: &shadow->placement, ctx: &ctx); |
390 | if (r) |
391 | return r; |
392 | } |
393 | |
394 | if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) |
395 | return -ENODEV; |
396 | |
397 | r = vm->update_funcs->map_table(vmbo); |
398 | if (r) |
399 | goto exit; |
400 | |
401 | memset(¶ms, 0, sizeof(params)); |
402 | params.adev = adev; |
403 | params.vm = vm; |
404 | params.immediate = immediate; |
405 | |
406 | r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); |
407 | if (r) |
408 | goto exit; |
409 | |
410 | addr = 0; |
411 | |
412 | uint64_t value = 0, flags = 0; |
413 | if (adev->asic_type >= CHIP_VEGA10) { |
414 | if (level != AMDGPU_VM_PTB) { |
415 | /* Handle leaf PDEs as PTEs */ |
416 | flags |= AMDGPU_PDE_PTE; |
417 | amdgpu_gmc_get_vm_pde(adev, level, |
418 | &value, &flags); |
419 | } else { |
420 | /* Workaround for fault priority problem on GMC9 */ |
421 | flags = AMDGPU_PTE_EXECUTABLE; |
422 | } |
423 | } |
424 | |
425 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, |
426 | value, flags); |
427 | if (r) |
428 | goto exit; |
429 | |
430 | r = vm->update_funcs->commit(¶ms, NULL); |
431 | exit: |
432 | drm_dev_exit(idx); |
433 | return r; |
434 | } |
435 | |
436 | /** |
437 | * amdgpu_vm_pt_create - create bo for PD/PT |
438 | * |
439 | * @adev: amdgpu_device pointer |
440 | * @vm: requesting vm |
441 | * @level: the page table level |
442 | * @immediate: use a immediate update |
443 | * @vmbo: pointer to the buffer object pointer |
444 | * @xcp_id: GPU partition id |
445 | */ |
446 | int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
447 | int level, bool immediate, struct amdgpu_bo_vm **vmbo, |
448 | int32_t xcp_id) |
449 | { |
450 | struct amdgpu_bo_param bp; |
451 | struct amdgpu_bo *bo; |
452 | struct dma_resv *resv; |
453 | unsigned int num_entries; |
454 | int r; |
455 | |
456 | memset(&bp, 0, sizeof(bp)); |
457 | |
458 | bp.size = amdgpu_vm_pt_size(adev, level); |
459 | bp.byte_align = AMDGPU_GPU_PAGE_SIZE; |
460 | |
461 | if (!adev->gmc.is_app_apu) |
462 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; |
463 | else |
464 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; |
465 | |
466 | bp.domain = amdgpu_bo_get_preferred_domain(adev, domain: bp.domain); |
467 | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | |
468 | AMDGPU_GEM_CREATE_CPU_GTT_USWC; |
469 | |
470 | if (level < AMDGPU_VM_PTB) |
471 | num_entries = amdgpu_vm_pt_num_entries(adev, level); |
472 | else |
473 | num_entries = 0; |
474 | |
475 | bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); |
476 | |
477 | if (vm->use_cpu_for_update) |
478 | bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
479 | |
480 | bp.type = ttm_bo_type_kernel; |
481 | bp.no_wait_gpu = immediate; |
482 | bp.xcp_id_plus1 = xcp_id + 1; |
483 | |
484 | if (vm->root.bo) |
485 | bp.resv = vm->root.bo->tbo.base.resv; |
486 | |
487 | r = amdgpu_bo_create_vm(adev, bp: &bp, ubo_ptr: vmbo); |
488 | if (r) |
489 | return r; |
490 | |
491 | bo = &(*vmbo)->bo; |
492 | if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { |
493 | (*vmbo)->shadow = NULL; |
494 | return 0; |
495 | } |
496 | |
497 | if (!bp.resv) |
498 | WARN_ON(dma_resv_lock(bo->tbo.base.resv, |
499 | NULL)); |
500 | resv = bp.resv; |
501 | memset(&bp, 0, sizeof(bp)); |
502 | bp.size = amdgpu_vm_pt_size(adev, level); |
503 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; |
504 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; |
505 | bp.type = ttm_bo_type_kernel; |
506 | bp.resv = bo->tbo.base.resv; |
507 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); |
508 | bp.xcp_id_plus1 = xcp_id + 1; |
509 | |
510 | r = amdgpu_bo_create(adev, bp: &bp, bo_ptr: &(*vmbo)->shadow); |
511 | |
512 | if (!resv) |
513 | dma_resv_unlock(obj: bo->tbo.base.resv); |
514 | |
515 | if (r) { |
516 | amdgpu_bo_unref(bo: &bo); |
517 | return r; |
518 | } |
519 | |
520 | amdgpu_bo_add_to_shadow_list(vmbo: *vmbo); |
521 | |
522 | return 0; |
523 | } |
524 | |
525 | /** |
526 | * amdgpu_vm_pt_alloc - Allocate a specific page table |
527 | * |
528 | * @adev: amdgpu_device pointer |
529 | * @vm: VM to allocate page tables for |
530 | * @cursor: Which page table to allocate |
531 | * @immediate: use an immediate update |
532 | * |
533 | * Make sure a specific page table or directory is allocated. |
534 | * |
535 | * Returns: |
536 | * 1 if page table needed to be allocated, 0 if page table was already |
537 | * allocated, negative errno if an error occurred. |
538 | */ |
539 | static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, |
540 | struct amdgpu_vm *vm, |
541 | struct amdgpu_vm_pt_cursor *cursor, |
542 | bool immediate) |
543 | { |
544 | struct amdgpu_vm_bo_base *entry = cursor->entry; |
545 | struct amdgpu_bo *pt_bo; |
546 | struct amdgpu_bo_vm *pt; |
547 | int r; |
548 | |
549 | if (entry->bo) |
550 | return 0; |
551 | |
552 | amdgpu_vm_eviction_unlock(vm); |
553 | r = amdgpu_vm_pt_create(adev, vm, level: cursor->level, immediate, vmbo: &pt, |
554 | xcp_id: vm->root.bo->xcp_id); |
555 | amdgpu_vm_eviction_lock(vm); |
556 | if (r) |
557 | return r; |
558 | |
559 | /* Keep a reference to the root directory to avoid |
560 | * freeing them up in the wrong order. |
561 | */ |
562 | pt_bo = &pt->bo; |
563 | pt_bo->parent = amdgpu_bo_ref(bo: cursor->parent->bo); |
564 | amdgpu_vm_bo_base_init(base: entry, vm, bo: pt_bo); |
565 | r = amdgpu_vm_pt_clear(adev, vm, vmbo: pt, immediate); |
566 | if (r) |
567 | goto error_free_pt; |
568 | |
569 | return 0; |
570 | |
571 | error_free_pt: |
572 | amdgpu_bo_unref(bo: &pt->shadow); |
573 | amdgpu_bo_unref(bo: &pt_bo); |
574 | return r; |
575 | } |
576 | |
577 | /** |
578 | * amdgpu_vm_pt_free - free one PD/PT |
579 | * |
580 | * @entry: PDE to free |
581 | */ |
582 | static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) |
583 | { |
584 | struct amdgpu_bo *shadow; |
585 | |
586 | if (!entry->bo) |
587 | return; |
588 | |
589 | entry->bo->vm_bo = NULL; |
590 | shadow = amdgpu_bo_shadowed(bo: entry->bo); |
591 | if (shadow) { |
592 | ttm_bo_set_bulk_move(bo: &shadow->tbo, NULL); |
593 | amdgpu_bo_unref(bo: &shadow); |
594 | } |
595 | ttm_bo_set_bulk_move(bo: &entry->bo->tbo, NULL); |
596 | |
597 | spin_lock(lock: &entry->vm->status_lock); |
598 | list_del(entry: &entry->vm_status); |
599 | spin_unlock(lock: &entry->vm->status_lock); |
600 | amdgpu_bo_unref(bo: &entry->bo); |
601 | } |
602 | |
603 | void amdgpu_vm_pt_free_work(struct work_struct *work) |
604 | { |
605 | struct amdgpu_vm_bo_base *entry, *next; |
606 | struct amdgpu_vm *vm; |
607 | LIST_HEAD(pt_freed); |
608 | |
609 | vm = container_of(work, struct amdgpu_vm, pt_free_work); |
610 | |
611 | spin_lock(lock: &vm->status_lock); |
612 | list_splice_init(list: &vm->pt_freed, head: &pt_freed); |
613 | spin_unlock(lock: &vm->status_lock); |
614 | |
615 | /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ |
616 | amdgpu_bo_reserve(bo: vm->root.bo, no_intr: true); |
617 | |
618 | list_for_each_entry_safe(entry, next, &pt_freed, vm_status) |
619 | amdgpu_vm_pt_free(entry); |
620 | |
621 | amdgpu_bo_unreserve(bo: vm->root.bo); |
622 | } |
623 | |
624 | /** |
625 | * amdgpu_vm_pt_free_dfs - free PD/PT levels |
626 | * |
627 | * @adev: amdgpu device structure |
628 | * @vm: amdgpu vm structure |
629 | * @start: optional cursor where to start freeing PDs/PTs |
630 | * @unlocked: vm resv unlock status |
631 | * |
632 | * Free the page directory or page table level and all sub levels. |
633 | */ |
634 | static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, |
635 | struct amdgpu_vm *vm, |
636 | struct amdgpu_vm_pt_cursor *start, |
637 | bool unlocked) |
638 | { |
639 | struct amdgpu_vm_pt_cursor cursor; |
640 | struct amdgpu_vm_bo_base *entry; |
641 | |
642 | if (unlocked) { |
643 | spin_lock(lock: &vm->status_lock); |
644 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) |
645 | list_move(list: &entry->vm_status, head: &vm->pt_freed); |
646 | |
647 | if (start) |
648 | list_move(list: &start->entry->vm_status, head: &vm->pt_freed); |
649 | spin_unlock(lock: &vm->status_lock); |
650 | schedule_work(work: &vm->pt_free_work); |
651 | return; |
652 | } |
653 | |
654 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) |
655 | amdgpu_vm_pt_free(entry); |
656 | |
657 | if (start) |
658 | amdgpu_vm_pt_free(entry: start->entry); |
659 | } |
660 | |
661 | /** |
662 | * amdgpu_vm_pt_free_root - free root PD |
663 | * @adev: amdgpu device structure |
664 | * @vm: amdgpu vm structure |
665 | * |
666 | * Free the root page directory and everything below it. |
667 | */ |
668 | void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
669 | { |
670 | amdgpu_vm_pt_free_dfs(adev, vm, NULL, unlocked: false); |
671 | } |
672 | |
673 | /** |
674 | * amdgpu_vm_pde_update - update a single level in the hierarchy |
675 | * |
676 | * @params: parameters for the update |
677 | * @entry: entry to update |
678 | * |
679 | * Makes sure the requested entry in parent is up to date. |
680 | */ |
681 | int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, |
682 | struct amdgpu_vm_bo_base *entry) |
683 | { |
684 | struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(pt: entry); |
685 | struct amdgpu_bo *bo = parent->bo, *pbo; |
686 | struct amdgpu_vm *vm = params->vm; |
687 | uint64_t pde, pt, flags; |
688 | unsigned int level; |
689 | |
690 | for (level = 0, pbo = bo->parent; pbo; ++level) |
691 | pbo = pbo->parent; |
692 | |
693 | level += params->adev->vm_manager.root_level; |
694 | amdgpu_gmc_get_pde_for_bo(bo: entry->bo, level, addr: &pt, flags: &flags); |
695 | pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; |
696 | return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, |
697 | 1, 0, flags); |
698 | } |
699 | |
700 | /** |
701 | * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags |
702 | * |
703 | * @adev: amdgpu_device pointer |
704 | * @flags: pointer to PTE flags |
705 | * |
706 | * Update PTE no-retry flags when TF is enabled. |
707 | */ |
708 | static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev, |
709 | uint64_t *flags) |
710 | { |
711 | /* |
712 | * Update no-retry flags with the corresponding TF |
713 | * no-retry combination. |
714 | */ |
715 | if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) { |
716 | *flags &= ~AMDGPU_VM_NORETRY_FLAGS; |
717 | *flags |= adev->gmc.noretry_flags; |
718 | } |
719 | } |
720 | |
721 | /* |
722 | * amdgpu_vm_pte_update_flags - figure out flags for PTE updates |
723 | * |
724 | * Make sure to set the right flags for the PTEs at the desired level. |
725 | */ |
726 | static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, |
727 | struct amdgpu_bo_vm *pt, |
728 | unsigned int level, |
729 | uint64_t pe, uint64_t addr, |
730 | unsigned int count, uint32_t incr, |
731 | uint64_t flags) |
732 | { |
733 | struct amdgpu_device *adev = params->adev; |
734 | |
735 | if (level != AMDGPU_VM_PTB) { |
736 | flags |= AMDGPU_PDE_PTE; |
737 | amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); |
738 | |
739 | } else if (adev->asic_type >= CHIP_VEGA10 && |
740 | !(flags & AMDGPU_PTE_VALID) && |
741 | !(flags & AMDGPU_PTE_PRT)) { |
742 | |
743 | /* Workaround for fault priority problem on GMC9 */ |
744 | flags |= AMDGPU_PTE_EXECUTABLE; |
745 | } |
746 | |
747 | /* |
748 | * Update no-retry flags to use the no-retry flag combination |
749 | * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination |
750 | * does not work when TF is enabled. So, replace them with |
751 | * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for |
752 | * all cases. |
753 | */ |
754 | if (level == AMDGPU_VM_PTB) |
755 | amdgpu_vm_pte_update_noretry_flags(adev, flags: &flags); |
756 | |
757 | /* APUs mapping system memory may need different MTYPEs on different |
758 | * NUMA nodes. Only do this for contiguous ranges that can be assumed |
759 | * to be on the same NUMA node. |
760 | */ |
761 | if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && |
762 | adev->gmc.gmc_funcs->override_vm_pte_flags && |
763 | num_possible_nodes() > 1 && !params->pages_addr && params->allow_override) |
764 | amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags); |
765 | |
766 | params->vm->update_funcs->update(params, pt, pe, addr, count, incr, |
767 | flags); |
768 | } |
769 | |
770 | /** |
771 | * amdgpu_vm_pte_fragment - get fragment for PTEs |
772 | * |
773 | * @params: see amdgpu_vm_update_params definition |
774 | * @start: first PTE to handle |
775 | * @end: last PTE to handle |
776 | * @flags: hw mapping flags |
777 | * @frag: resulting fragment size |
778 | * @frag_end: end of this fragment |
779 | * |
780 | * Returns the first possible fragment for the start and end address. |
781 | */ |
782 | static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, |
783 | uint64_t start, uint64_t end, uint64_t flags, |
784 | unsigned int *frag, uint64_t *frag_end) |
785 | { |
786 | /** |
787 | * The MC L1 TLB supports variable sized pages, based on a fragment |
788 | * field in the PTE. When this field is set to a non-zero value, page |
789 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE |
790 | * flags are considered valid for all PTEs within the fragment range |
791 | * and corresponding mappings are assumed to be physically contiguous. |
792 | * |
793 | * The L1 TLB can store a single PTE for the whole fragment, |
794 | * significantly increasing the space available for translation |
795 | * caching. This leads to large improvements in throughput when the |
796 | * TLB is under pressure. |
797 | * |
798 | * The L2 TLB distributes small and large fragments into two |
799 | * asymmetric partitions. The large fragment cache is significantly |
800 | * larger. Thus, we try to use large fragments wherever possible. |
801 | * Userspace can support this by aligning virtual base address and |
802 | * allocation size to the fragment size. |
803 | * |
804 | * Starting with Vega10 the fragment size only controls the L1. The L2 |
805 | * is now directly feed with small/huge/giant pages from the walker. |
806 | */ |
807 | unsigned int max_frag; |
808 | |
809 | if (params->adev->asic_type < CHIP_VEGA10) |
810 | max_frag = params->adev->vm_manager.fragment_size; |
811 | else |
812 | max_frag = 31; |
813 | |
814 | /* system pages are non continuously */ |
815 | if (params->pages_addr) { |
816 | *frag = 0; |
817 | *frag_end = end; |
818 | return; |
819 | } |
820 | |
821 | /* This intentionally wraps around if no bit is set */ |
822 | *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); |
823 | if (*frag >= max_frag) { |
824 | *frag = max_frag; |
825 | *frag_end = end & ~((1ULL << max_frag) - 1); |
826 | } else { |
827 | *frag_end = start + (1 << *frag); |
828 | } |
829 | } |
830 | |
831 | /** |
832 | * amdgpu_vm_ptes_update - make sure that page tables are valid |
833 | * |
834 | * @params: see amdgpu_vm_update_params definition |
835 | * @start: start of GPU address range |
836 | * @end: end of GPU address range |
837 | * @dst: destination address to map to, the next dst inside the function |
838 | * @flags: mapping flags |
839 | * |
840 | * Update the page tables in the range @start - @end. |
841 | * |
842 | * Returns: |
843 | * 0 for success, -EINVAL for failure. |
844 | */ |
845 | int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, |
846 | uint64_t start, uint64_t end, |
847 | uint64_t dst, uint64_t flags) |
848 | { |
849 | struct amdgpu_device *adev = params->adev; |
850 | struct amdgpu_vm_pt_cursor cursor; |
851 | uint64_t frag_start = start, frag_end; |
852 | unsigned int frag; |
853 | int r; |
854 | |
855 | /* figure out the initial fragment */ |
856 | amdgpu_vm_pte_fragment(params, start: frag_start, end, flags, frag: &frag, |
857 | frag_end: &frag_end); |
858 | |
859 | /* walk over the address space and update the PTs */ |
860 | amdgpu_vm_pt_start(adev, vm: params->vm, start, cursor: &cursor); |
861 | while (cursor.pfn < end) { |
862 | unsigned int shift, parent_shift, mask; |
863 | uint64_t incr, entry_end, pe_start; |
864 | struct amdgpu_bo *pt; |
865 | |
866 | if (!params->unlocked) { |
867 | /* make sure that the page tables covering the |
868 | * address range are actually allocated |
869 | */ |
870 | r = amdgpu_vm_pt_alloc(adev: params->adev, vm: params->vm, |
871 | cursor: &cursor, immediate: params->immediate); |
872 | if (r) |
873 | return r; |
874 | } |
875 | |
876 | shift = amdgpu_vm_pt_level_shift(adev, level: cursor.level); |
877 | parent_shift = amdgpu_vm_pt_level_shift(adev, level: cursor.level - 1); |
878 | if (params->unlocked) { |
879 | /* Unlocked updates are only allowed on the leaves */ |
880 | if (amdgpu_vm_pt_descendant(adev, cursor: &cursor)) |
881 | continue; |
882 | } else if (adev->asic_type < CHIP_VEGA10 && |
883 | (flags & AMDGPU_PTE_VALID)) { |
884 | /* No huge page support before GMC v9 */ |
885 | if (cursor.level != AMDGPU_VM_PTB) { |
886 | if (!amdgpu_vm_pt_descendant(adev, cursor: &cursor)) |
887 | return -ENOENT; |
888 | continue; |
889 | } |
890 | } else if (frag < shift) { |
891 | /* We can't use this level when the fragment size is |
892 | * smaller than the address shift. Go to the next |
893 | * child entry and try again. |
894 | */ |
895 | if (amdgpu_vm_pt_descendant(adev, cursor: &cursor)) |
896 | continue; |
897 | } else if (frag >= parent_shift) { |
898 | /* If the fragment size is even larger than the parent |
899 | * shift we should go up one level and check it again. |
900 | */ |
901 | if (!amdgpu_vm_pt_ancestor(cursor: &cursor)) |
902 | return -EINVAL; |
903 | continue; |
904 | } |
905 | |
906 | pt = cursor.entry->bo; |
907 | if (!pt) { |
908 | /* We need all PDs and PTs for mapping something, */ |
909 | if (flags & AMDGPU_PTE_VALID) |
910 | return -ENOENT; |
911 | |
912 | /* but unmapping something can happen at a higher |
913 | * level. |
914 | */ |
915 | if (!amdgpu_vm_pt_ancestor(cursor: &cursor)) |
916 | return -EINVAL; |
917 | |
918 | pt = cursor.entry->bo; |
919 | shift = parent_shift; |
920 | frag_end = max(frag_end, ALIGN(frag_start + 1, |
921 | 1ULL << shift)); |
922 | } |
923 | |
924 | /* Looks good so far, calculate parameters for the update */ |
925 | incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; |
926 | mask = amdgpu_vm_pt_entries_mask(adev, level: cursor.level); |
927 | pe_start = ((cursor.pfn >> shift) & mask) * 8; |
928 | entry_end = ((uint64_t)mask + 1) << shift; |
929 | entry_end += cursor.pfn & ~(entry_end - 1); |
930 | entry_end = min(entry_end, end); |
931 | |
932 | do { |
933 | struct amdgpu_vm *vm = params->vm; |
934 | uint64_t upd_end = min(entry_end, frag_end); |
935 | unsigned int nptes = (upd_end - frag_start) >> shift; |
936 | uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); |
937 | |
938 | /* This can happen when we set higher level PDs to |
939 | * silent to stop fault floods. |
940 | */ |
941 | nptes = max(nptes, 1u); |
942 | |
943 | trace_amdgpu_vm_update_ptes(p: params, start: frag_start, end: upd_end, |
944 | min(nptes, 32u), dst, incr, |
945 | flags: upd_flags, |
946 | pid: vm->task_info ? vm->task_info->tgid : 0, |
947 | vm_ctx: vm->immediate.fence_context); |
948 | amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), |
949 | level: cursor.level, pe: pe_start, addr: dst, |
950 | count: nptes, incr, flags: upd_flags); |
951 | |
952 | pe_start += nptes * 8; |
953 | dst += nptes * incr; |
954 | |
955 | frag_start = upd_end; |
956 | if (frag_start >= frag_end) { |
957 | /* figure out the next fragment */ |
958 | amdgpu_vm_pte_fragment(params, start: frag_start, end, |
959 | flags, frag: &frag, frag_end: &frag_end); |
960 | if (frag < shift) |
961 | break; |
962 | } |
963 | } while (frag_start < entry_end); |
964 | |
965 | if (amdgpu_vm_pt_descendant(adev, cursor: &cursor)) { |
966 | /* Free all child entries. |
967 | * Update the tables with the flags and addresses and free up subsequent |
968 | * tables in the case of huge pages or freed up areas. |
969 | * This is the maximum you can free, because all other page tables are not |
970 | * completely covered by the range and so potentially still in use. |
971 | */ |
972 | while (cursor.pfn < frag_start) { |
973 | /* Make sure previous mapping is freed */ |
974 | if (cursor.entry->bo) { |
975 | params->table_freed = true; |
976 | amdgpu_vm_pt_free_dfs(adev, vm: params->vm, |
977 | start: &cursor, |
978 | unlocked: params->unlocked); |
979 | } |
980 | amdgpu_vm_pt_next(adev, cursor: &cursor); |
981 | } |
982 | |
983 | } else if (frag >= shift) { |
984 | /* or just move on to the next on the same level. */ |
985 | amdgpu_vm_pt_next(adev, cursor: &cursor); |
986 | } |
987 | } |
988 | |
989 | return 0; |
990 | } |
991 | |
992 | /** |
993 | * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible |
994 | * @adev: amdgpu device structure |
995 | * @vm: amdgpu vm structure |
996 | * |
997 | * make root page directory and everything below it cpu accessible. |
998 | */ |
999 | int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
1000 | { |
1001 | struct amdgpu_vm_pt_cursor cursor; |
1002 | struct amdgpu_vm_bo_base *entry; |
1003 | |
1004 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { |
1005 | |
1006 | struct amdgpu_bo_vm *bo; |
1007 | int r; |
1008 | |
1009 | if (entry->bo) { |
1010 | bo = to_amdgpu_bo_vm(entry->bo); |
1011 | r = vm->update_funcs->map_table(bo); |
1012 | if (r) |
1013 | return r; |
1014 | } |
1015 | } |
1016 | |
1017 | return 0; |
1018 | } |
1019 | |