amdgpu_vm_pt.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c]

1	// SPDX-License-Identifier: GPL-2.0 OR MIT
2	/*
3	* Copyright 2022 Advanced Micro Devices, Inc.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the "Software"),
7	* to deal in the Software without restriction, including without limitation
8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9	* and/or sell copies of the Software, and to permit persons to whom the
10	* Software is furnished to do so, subject to the following conditions:
11	*
12	* The above copyright notice and this permission notice shall be included in
13	* all copies or substantial portions of the Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21	* OTHER DEALINGS IN THE SOFTWARE.
22	*/
23
24	#include <drm/drm_drv.h>
25
26	#include "amdgpu.h"
27	#include "amdgpu_trace.h"
28	#include "amdgpu_vm.h"
29
30	/*
31	* amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
32	*/
33	struct amdgpu_vm_pt_cursor {
34	uint64_t pfn;
35	struct amdgpu_vm_bo_base *parent;
36	struct amdgpu_vm_bo_base *entry;
37	unsigned int level;
38	};
39
40	/**
41	* amdgpu_vm_pt_level_shift - return the addr shift for each level
42	*
43	* @adev: amdgpu_device pointer
44	* @level: VMPT level
45	*
46	* Returns:
47	* The number of bits the pfn needs to be right shifted for a level.
48	*/
49	static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
50	unsigned int level)
51	{
52	switch (level) {
53	case AMDGPU_VM_PDB2:
54	case AMDGPU_VM_PDB1:
55	case AMDGPU_VM_PDB0:
56	return `9` * (AMDGPU_VM_PDB0 - level) +
57	adev->vm_manager.block_size;
58	case AMDGPU_VM_PTB:
59	return `0`;
60	default:
61	return ~`0`;
62	}
63	}
64
65	/**
66	* amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
67	*
68	* @adev: amdgpu_device pointer
69	* @level: VMPT level
70	*
71	* Returns:
72	* The number of entries in a page directory or page table.
73	*/
74	static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
75	unsigned int level)
76	{
77	unsigned int shift;
78
79	shift = amdgpu_vm_pt_level_shift(adev, level: adev->vm_manager.root_level);
80	if (level == adev->vm_manager.root_level)
81	/ For the root directory /
82	return round_up(adev->vm_manager.max_pfn, `1ULL` << shift)
83	>> shift;
84	else if (level != AMDGPU_VM_PTB)
85	/ Everything in between /
86	return `512`;
87
88	/ For the page tables on the leaves /
89	return AMDGPU_VM_PTE_COUNT(adev);
90	}
91
92	/**
93	* amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
94	*
95	* @adev: amdgpu_device pointer
96	* @level: VMPT level
97	*
98	* Returns:
99	* The mask to extract the entry number of a PD/PT from an address.
100	*/
101	static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
102	unsigned int level)
103	{
104	if (level <= adev->vm_manager.root_level)
105	return `0xffffffff`;
106	else if (level != AMDGPU_VM_PTB)
107	return `0x1ff`;
108	else
109	return AMDGPU_VM_PTE_COUNT(adev) - `1`;
110	}
111
112	/**
113	* amdgpu_vm_pt_size - returns the size of the page table in bytes
114	*
115	* @adev: amdgpu_device pointer
116	* @level: VMPT level
117	*
118	* Returns:
119	* The size of the BO for a page directory or page table in bytes.
120	*/
121	static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
122	unsigned int level)
123	{
124	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * `8`);
125	}
126
127	/**
128	* amdgpu_vm_pt_parent - get the parent page directory
129	*
130	* @pt: child page table
131	*
132	* Helper to get the parent entry for the child page table. NULL if we are at
133	* the root page directory.
134	*/
135	static struct amdgpu_vm_bo_base *
136	amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
137	{
138	struct amdgpu_bo *parent = pt->bo->parent;
139
140	if (!parent)
141	return NULL;
142
143	return parent->vm_bo;
144	}
145
146	/**
147	* amdgpu_vm_pt_start - start PD/PT walk
148	*
149	* @adev: amdgpu_device pointer
150	* @vm: amdgpu_vm structure
151	* @start: start address of the walk
152	* @cursor: state to initialize
153	*
154	* Initialize a amdgpu_vm_pt_cursor to start a walk.
155	*/
156	static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
157	struct amdgpu_vm *vm, uint64_t start,
158	struct amdgpu_vm_pt_cursor *cursor)
159	{
160	cursor->pfn = start;
161	cursor->parent = NULL;
162	cursor->entry = &vm->root;
163	cursor->level = adev->vm_manager.root_level;
164	}
165
166	/**
167	* amdgpu_vm_pt_descendant - go to child node
168	*
169	* @adev: amdgpu_device pointer
170	* @cursor: current state
171	*
172	* Walk to the child node of the current node.
173	* Returns:
174	* True if the walk was possible, false otherwise.
175	*/
176	static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
177	struct amdgpu_vm_pt_cursor *cursor)
178	{
179	unsigned int mask, shift, idx;
180
181	if ((cursor->level == AMDGPU_VM_PTB) \|\| !cursor->entry \|\|
182	!cursor->entry->bo)
183	return false;
184
185	mask = amdgpu_vm_pt_entries_mask(adev, level: cursor->level);
186	shift = amdgpu_vm_pt_level_shift(adev, level: cursor->level);
187
188	++cursor->level;
189	idx = (cursor->pfn >> shift) & mask;
190	cursor->parent = cursor->entry;
191	cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
192	return true;
193	}
194
195	/**
196	* amdgpu_vm_pt_sibling - go to sibling node
197	*
198	* @adev: amdgpu_device pointer
199	* @cursor: current state
200	*
201	* Walk to the sibling node of the current node.
202	* Returns:
203	* True if the walk was possible, false otherwise.
204	*/
205	static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
206	struct amdgpu_vm_pt_cursor *cursor)
207	{
208
209	unsigned int shift, num_entries;
210	struct amdgpu_bo_vm *parent;
211
212	/ Root doesn't have a sibling /
213	if (!cursor->parent)
214	return false;
215
216	/ Go to our parents and see if we got a sibling /
217	shift = amdgpu_vm_pt_level_shift(adev, level: cursor->level - `1`);
218	num_entries = amdgpu_vm_pt_num_entries(adev, level: cursor->level - `1`);
219	parent = to_amdgpu_bo_vm(cursor->parent->bo);
220
221	if (cursor->entry == &parent->entries[num_entries - `1`])
222	return false;
223
224	cursor->pfn += `1ULL` << shift;
225	cursor->pfn &= ~((`1ULL` << shift) - `1`);
226	++cursor->entry;
227	return true;
228	}
229
230	/**
231	* amdgpu_vm_pt_ancestor - go to parent node
232	*
233	* @cursor: current state
234	*
235	* Walk to the parent node of the current node.
236	* Returns:
237	* True if the walk was possible, false otherwise.
238	*/
239	static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
240	{
241	if (!cursor->parent)
242	return false;
243
244	--cursor->level;
245	cursor->entry = cursor->parent;
246	cursor->parent = amdgpu_vm_pt_parent(pt: cursor->parent);
247	return true;
248	}
249
250	/**
251	* amdgpu_vm_pt_next - get next PD/PT in hieratchy
252	*
253	* @adev: amdgpu_device pointer
254	* @cursor: current state
255	*
256	* Walk the PD/PT tree to the next node.
257	*/
258	static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
259	struct amdgpu_vm_pt_cursor *cursor)
260	{
261	/ First try a newborn child /
262	if (amdgpu_vm_pt_descendant(adev, cursor))
263	return;
264
265	/ If that didn't worked try to find a sibling /
266	while (!amdgpu_vm_pt_sibling(adev, cursor)) {
267	/ No sibling, go to our parents and grandparents /
268	if (!amdgpu_vm_pt_ancestor(cursor)) {
269	cursor->pfn = ~`0ll`;
270	return;
271	}
272	}
273	}
274
275	/**
276	* amdgpu_vm_pt_first_dfs - start a deep first search
277	*
278	* @adev: amdgpu_device structure
279	* @vm: amdgpu_vm structure
280	* @start: optional cursor to start with
281	* @cursor: state to initialize
282	*
283	* Starts a deep first traversal of the PD/PT tree.
284	*/
285	static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
286	struct amdgpu_vm *vm,
287	struct amdgpu_vm_pt_cursor *start,
288	struct amdgpu_vm_pt_cursor *cursor)
289	{
290	if (start)
291	cursor = start;
292	else
293	amdgpu_vm_pt_start(adev, vm, start: `0`, cursor);
294
295	while (amdgpu_vm_pt_descendant(adev, cursor))
296	;
297	}
298
299	/**
300	* amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
301	*
302	* @start: starting point for the search
303	* @entry: current entry
304	*
305	* Returns:
306	* True when the search should continue, false otherwise.
307	*/
308	static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
309	struct amdgpu_vm_bo_base *entry)
310	{
311	return entry && (!start \|\| entry != start->entry);
312	}
313
314	/**
315	* amdgpu_vm_pt_next_dfs - get the next node for a deep first search
316	*
317	* @adev: amdgpu_device structure
318	* @cursor: current state
319	*
320	* Move the cursor to the next node in a deep first search.
321	*/
322	static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
323	struct amdgpu_vm_pt_cursor *cursor)
324	{
325	if (!cursor->entry)
326	return;
327
328	if (!cursor->parent)
329	cursor->entry = NULL;
330	else if (amdgpu_vm_pt_sibling(adev, cursor))
331	while (amdgpu_vm_pt_descendant(adev, cursor))
332	;
333	else
334	amdgpu_vm_pt_ancestor(cursor);
335	}
336
337	/*
338	* for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
339	*/
340	#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
341	for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
342	(entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
343	amdgpu_vm_pt_continue_dfs((start), (entry)); \
344	(entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
345
346	/**
347	* amdgpu_vm_pt_clear - initially clear the PDs/PTs
348	*
349	* @adev: amdgpu_device pointer
350	* @vm: VM to clear BO from
351	* @vmbo: BO to clear
352	* @immediate: use an immediate update
353	*
354	* Root PD needs to be reserved when calling this.
355	*
356	* Returns:
357	* 0 on success, errno otherwise.
358	*/
359	int amdgpu_vm_pt_clear(struct amdgpu_device adev, struct* amdgpu_vm *vm,
360	struct amdgpu_bo_vm *vmbo, bool immediate)
361	{
362	unsigned int level = adev->vm_manager.root_level;
363	struct ttm_operation_ctx ctx = { true, false };
364	struct amdgpu_vm_update_params params;
365	struct amdgpu_bo *ancestor = &vmbo->bo;
366	unsigned int entries;
367	struct amdgpu_bo *bo = &vmbo->bo;
368	uint64_t addr;
369	int r, idx;
370
371	/ Figure out our place in the hierarchy /
372	if (ancestor->parent) {
373	++level;
374	while (ancestor->parent->parent) {
375	++level;
376	ancestor = ancestor->parent;
377	}
378	}
379
380	entries = amdgpu_bo_size(bo) / `8`;
381
382	r = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx);
383	if (r)
384	return r;
385
386	if (vmbo->shadow) {
387	struct amdgpu_bo *shadow = vmbo->shadow;
388
389	r = ttm_bo_validate(bo: &shadow->tbo, placement: &shadow->placement, ctx: &ctx);
390	if (r)
391	return r;
392	}
393
394	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
395	return -ENODEV;
396
397	r = vm->update_funcs->map_table(vmbo);
398	if (r)
399	goto exit;
400
401	memset(&params, `0`, sizeof(params));
402	params.adev = adev;
403	params.vm = vm;
404	params.immediate = immediate;
405
406	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
407	if (r)
408	goto exit;
409
410	addr = `0`;
411
412	uint64_t value = `0`, flags = `0`;
413	if (adev->asic_type >= CHIP_VEGA10) {
414	if (level != AMDGPU_VM_PTB) {
415	/ Handle leaf PDEs as PTEs /
416	flags \|= AMDGPU_PDE_PTE;
417	amdgpu_gmc_get_vm_pde(adev, level,
418	&value, &flags);
419	} else {
420	/ Workaround for fault priority problem on GMC9 /
421	flags = AMDGPU_PTE_EXECUTABLE;
422	}
423	}
424
425	r = vm->update_funcs->update(&params, vmbo, addr, `0`, entries,
426	value, flags);
427	if (r)
428	goto exit;
429
430	r = vm->update_funcs->commit(&params, NULL);
431	exit:
432	drm_dev_exit(idx);
433	return r;
434	}
435
436	/**
437	* amdgpu_vm_pt_create - create bo for PD/PT
438	*
439	* @adev: amdgpu_device pointer
440	* @vm: requesting vm
441	* @level: the page table level
442	* @immediate: use a immediate update
443	* @vmbo: pointer to the buffer object pointer
444	* @xcp_id: GPU partition id
445	*/
446	int amdgpu_vm_pt_create(struct amdgpu_device adev, struct* amdgpu_vm *vm,
447	int level, bool immediate, struct amdgpu_bo_vm **vmbo,
448	int32_t xcp_id)
449	{
450	struct amdgpu_bo_param bp;
451	struct amdgpu_bo *bo;
452	struct dma_resv *resv;
453	unsigned int num_entries;
454	int r;
455
456	memset(&bp, `0`, sizeof(bp));
457
458	bp.size = amdgpu_vm_pt_size(adev, level);
459	bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
460
461	if (!adev->gmc.is_app_apu)
462	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
463	else
464	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
465
466	bp.domain = amdgpu_bo_get_preferred_domain(adev, domain: bp.domain);
467	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS \|
468	AMDGPU_GEM_CREATE_CPU_GTT_USWC;
469
470	if (level < AMDGPU_VM_PTB)
471	num_entries = amdgpu_vm_pt_num_entries(adev, level);
472	else
473	num_entries = `0`;
474
475	bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
476
477	if (vm->use_cpu_for_update)
478	bp.flags \|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
479
480	bp.type = ttm_bo_type_kernel;
481	bp.no_wait_gpu = immediate;
482	bp.xcp_id_plus1 = xcp_id + `1`;
483
484	if (vm->root.bo)
485	bp.resv = vm->root.bo->tbo.base.resv;
486
487	r = amdgpu_bo_create_vm(adev, bp: &bp, ubo_ptr: vmbo);
488	if (r)
489	return r;
490
491	bo = &(*vmbo)->bo;
492	if (vm->is_compute_context \|\| (adev->flags & AMD_IS_APU)) {
493	(*vmbo)->shadow = NULL;
494	return `0`;
495	}
496
497	if (!bp.resv)
498	WARN_ON(dma_resv_lock(bo->tbo.base.resv,
499	NULL));
500	resv = bp.resv;
501	memset(&bp, `0`, sizeof(bp));
502	bp.size = amdgpu_vm_pt_size(adev, level);
503	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
504	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
505	bp.type = ttm_bo_type_kernel;
506	bp.resv = bo->tbo.base.resv;
507	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
508	bp.xcp_id_plus1 = xcp_id + `1`;
509
510	r = amdgpu_bo_create(adev, bp: &bp, bo_ptr: &(*vmbo)->shadow);
511
512	if (!resv)
513	dma_resv_unlock(obj: bo->tbo.base.resv);
514
515	if (r) {
516	amdgpu_bo_unref(bo: &bo);
517	return r;
518	}
519
520	amdgpu_bo_add_to_shadow_list(vmbo: *vmbo);
521
522	return `0`;
523	}
524
525	/**
526	* amdgpu_vm_pt_alloc - Allocate a specific page table
527	*
528	* @adev: amdgpu_device pointer
529	* @vm: VM to allocate page tables for
530	* @cursor: Which page table to allocate
531	* @immediate: use an immediate update
532	*
533	* Make sure a specific page table or directory is allocated.
534	*
535	* Returns:
536	* 1 if page table needed to be allocated, 0 if page table was already
537	* allocated, negative errno if an error occurred.
538	*/
539	static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
540	struct amdgpu_vm *vm,
541	struct amdgpu_vm_pt_cursor *cursor,
542	bool immediate)
543	{
544	struct amdgpu_vm_bo_base *entry = cursor->entry;
545	struct amdgpu_bo *pt_bo;
546	struct amdgpu_bo_vm *pt;
547	int r;
548
549	if (entry->bo)
550	return `0`;
551
552	amdgpu_vm_eviction_unlock(vm);
553	r = amdgpu_vm_pt_create(adev, vm, level: cursor->level, immediate, vmbo: &pt,
554	xcp_id: vm->root.bo->xcp_id);
555	amdgpu_vm_eviction_lock(vm);
556	if (r)
557	return r;
558
559	/ Keep a reference to the root directory to avoid*
560	* freeing them up in the wrong order.
561	*/
562	pt_bo = &pt->bo;
563	pt_bo->parent = amdgpu_bo_ref(bo: cursor->parent->bo);
564	amdgpu_vm_bo_base_init(base: entry, vm, bo: pt_bo);
565	r = amdgpu_vm_pt_clear(adev, vm, vmbo: pt, immediate);
566	if (r)
567	goto error_free_pt;
568
569	return `0`;
570
571	error_free_pt:
572	amdgpu_bo_unref(bo: &pt->shadow);
573	amdgpu_bo_unref(bo: &pt_bo);
574	return r;
575	}
576
577	/**
578	* amdgpu_vm_pt_free - free one PD/PT
579	*
580	* @entry: PDE to free
581	*/
582	static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
583	{
584	struct amdgpu_bo *shadow;
585
586	if (!entry->bo)
587	return;
588
589	entry->bo->vm_bo = NULL;
590	shadow = amdgpu_bo_shadowed(bo: entry->bo);
591	if (shadow) {
592	ttm_bo_set_bulk_move(bo: &shadow->tbo, NULL);
593	amdgpu_bo_unref(bo: &shadow);
594	}
595	ttm_bo_set_bulk_move(bo: &entry->bo->tbo, NULL);
596
597	spin_lock(lock: &entry->vm->status_lock);
598	list_del(entry: &entry->vm_status);
599	spin_unlock(lock: &entry->vm->status_lock);
600	amdgpu_bo_unref(bo: &entry->bo);
601	}
602
603	void amdgpu_vm_pt_free_work(struct work_struct *work)
604	{
605	struct amdgpu_vm_bo_base entry, next;
606	struct amdgpu_vm *vm;
607	LIST_HEAD(pt_freed);
608
609	vm = container_of(work, struct amdgpu_vm, pt_free_work);
610
611	spin_lock(lock: &vm->status_lock);
612	list_splice_init(list: &vm->pt_freed, head: &pt_freed);
613	spin_unlock(lock: &vm->status_lock);
614
615	/ flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. /
616	amdgpu_bo_reserve(bo: vm->root.bo, no_intr: true);
617
618	list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
619	amdgpu_vm_pt_free(entry);
620
621	amdgpu_bo_unreserve(bo: vm->root.bo);
622	}
623
624	/**
625	* amdgpu_vm_pt_free_dfs - free PD/PT levels
626	*
627	* @adev: amdgpu device structure
628	* @vm: amdgpu vm structure
629	* @start: optional cursor where to start freeing PDs/PTs
630	* @unlocked: vm resv unlock status
631	*
632	* Free the page directory or page table level and all sub levels.
633	*/
634	static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
635	struct amdgpu_vm *vm,
636	struct amdgpu_vm_pt_cursor *start,
637	bool unlocked)
638	{
639	struct amdgpu_vm_pt_cursor cursor;
640	struct amdgpu_vm_bo_base *entry;
641
642	if (unlocked) {
643	spin_lock(lock: &vm->status_lock);
644	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
645	list_move(list: &entry->vm_status, head: &vm->pt_freed);
646
647	if (start)
648	list_move(list: &start->entry->vm_status, head: &vm->pt_freed);
649	spin_unlock(lock: &vm->status_lock);
650	schedule_work(work: &vm->pt_free_work);
651	return;
652	}
653
654	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
655	amdgpu_vm_pt_free(entry);
656
657	if (start)
658	amdgpu_vm_pt_free(entry: start->entry);
659	}
660
661	/**
662	* amdgpu_vm_pt_free_root - free root PD
663	* @adev: amdgpu device structure
664	* @vm: amdgpu vm structure
665	*
666	* Free the root page directory and everything below it.
667	*/
668	void amdgpu_vm_pt_free_root(struct amdgpu_device adev, struct* amdgpu_vm *vm)
669	{
670	amdgpu_vm_pt_free_dfs(adev, vm, NULL, unlocked: false);
671	}
672
673	/**
674	* amdgpu_vm_pde_update - update a single level in the hierarchy
675	*
676	* @params: parameters for the update
677	* @entry: entry to update
678	*
679	* Makes sure the requested entry in parent is up to date.
680	*/
681	int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
682	struct amdgpu_vm_bo_base *entry)
683	{
684	struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(pt: entry);
685	struct amdgpu_bo bo = parent->bo, pbo;
686	struct amdgpu_vm *vm = params->vm;
687	uint64_t pde, pt, flags;
688	unsigned int level;
689
690	for (level = `0`, pbo = bo->parent; pbo; ++level)
691	pbo = pbo->parent;
692
693	level += params->adev->vm_manager.root_level;
694	amdgpu_gmc_get_pde_for_bo(bo: entry->bo, level, addr: &pt, flags: &flags);
695	pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * `8`;
696	return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
697	`1`, `0`, flags);
698	}
699
700	/**
701	* amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
702	*
703	* @adev: amdgpu_device pointer
704	* @flags: pointer to PTE flags
705	*
706	* Update PTE no-retry flags when TF is enabled.
707	*/
708	static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
709	uint64_t *flags)
710	{
711	/*
712	* Update no-retry flags with the corresponding TF
713	* no-retry combination.
714	*/
715	if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
716	*flags &= ~AMDGPU_VM_NORETRY_FLAGS;
717	*flags \|= adev->gmc.noretry_flags;
718	}
719	}
720
721	/*
722	* amdgpu_vm_pte_update_flags - figure out flags for PTE updates
723	*
724	* Make sure to set the right flags for the PTEs at the desired level.
725	*/
726	static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
727	struct amdgpu_bo_vm *pt,
728	unsigned int level,
729	uint64_t pe, uint64_t addr,
730	unsigned int count, uint32_t incr,
731	uint64_t flags)
732	{
733	struct amdgpu_device *adev = params->adev;
734
735	if (level != AMDGPU_VM_PTB) {
736	flags \|= AMDGPU_PDE_PTE;
737	amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
738
739	} else if (adev->asic_type >= CHIP_VEGA10 &&
740	!(flags & AMDGPU_PTE_VALID) &&
741	!(flags & AMDGPU_PTE_PRT)) {
742
743	/ Workaround for fault priority problem on GMC9 /
744	flags \|= AMDGPU_PTE_EXECUTABLE;
745	}
746
747	/*
748	* Update no-retry flags to use the no-retry flag combination
749	* with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
750	* does not work when TF is enabled. So, replace them with
751	* AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
752	* all cases.
753	*/
754	if (level == AMDGPU_VM_PTB)
755	amdgpu_vm_pte_update_noretry_flags(adev, flags: &flags);
756
757	/ APUs mapping system memory may need different MTYPEs on different*
758	* NUMA nodes. Only do this for contiguous ranges that can be assumed
759	* to be on the same NUMA node.
760	*/
761	if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
762	adev->gmc.gmc_funcs->override_vm_pte_flags &&
763	num_possible_nodes() > `1` && !params->pages_addr && params->allow_override)
764	amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
765
766	params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
767	flags);
768	}
769
770	/**
771	* amdgpu_vm_pte_fragment - get fragment for PTEs
772	*
773	* @params: see amdgpu_vm_update_params definition
774	* @start: first PTE to handle
775	* @end: last PTE to handle
776	* @flags: hw mapping flags
777	* @frag: resulting fragment size
778	* @frag_end: end of this fragment
779	*
780	* Returns the first possible fragment for the start and end address.
781	*/
782	static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
783	uint64_t start, uint64_t end, uint64_t flags,
784	unsigned int frag, uint64_t frag_end)
785	{
786	/**
787	* The MC L1 TLB supports variable sized pages, based on a fragment
788	* field in the PTE. When this field is set to a non-zero value, page
789	* granularity is increased from 4KB to (1 << (12 + frag)). The PTE
790	* flags are considered valid for all PTEs within the fragment range
791	* and corresponding mappings are assumed to be physically contiguous.
792	*
793	* The L1 TLB can store a single PTE for the whole fragment,
794	* significantly increasing the space available for translation
795	* caching. This leads to large improvements in throughput when the
796	* TLB is under pressure.
797	*
798	* The L2 TLB distributes small and large fragments into two
799	* asymmetric partitions. The large fragment cache is significantly
800	* larger. Thus, we try to use large fragments wherever possible.
801	* Userspace can support this by aligning virtual base address and
802	* allocation size to the fragment size.
803	*
804	* Starting with Vega10 the fragment size only controls the L1. The L2
805	* is now directly feed with small/huge/giant pages from the walker.
806	*/
807	unsigned int max_frag;
808
809	if (params->adev->asic_type < CHIP_VEGA10)
810	max_frag = params->adev->vm_manager.fragment_size;
811	else
812	max_frag = `31`;
813
814	/ system pages are non continuously /
815	if (params->pages_addr) {
816	*frag = `0`;
817	*frag_end = end;
818	return;
819	}
820
821	/ This intentionally wraps around if no bit is set /
822	frag = min_t(unsigned* int, ffs(start) - `1`, fls64(end - start) - `1`);
823	if (*frag >= max_frag) {
824	*frag = max_frag;
825	*frag_end = end & ~((`1ULL` << max_frag) - `1`);
826	} else {
827	frag_end = start + (`1` << frag);
828	}
829	}
830
831	/**
832	* amdgpu_vm_ptes_update - make sure that page tables are valid
833	*
834	* @params: see amdgpu_vm_update_params definition
835	* @start: start of GPU address range
836	* @end: end of GPU address range
837	* @dst: destination address to map to, the next dst inside the function
838	* @flags: mapping flags
839	*
840	* Update the page tables in the range @start - @end.
841	*
842	* Returns:
843	* 0 for success, -EINVAL for failure.
844	*/
845	int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
846	uint64_t start, uint64_t end,
847	uint64_t dst, uint64_t flags)
848	{
849	struct amdgpu_device *adev = params->adev;
850	struct amdgpu_vm_pt_cursor cursor;
851	uint64_t frag_start = start, frag_end;
852	unsigned int frag;
853	int r;
854
855	/ figure out the initial fragment /
856	amdgpu_vm_pte_fragment(params, start: frag_start, end, flags, frag: &frag,
857	frag_end: &frag_end);
858
859	/ walk over the address space and update the PTs /
860	amdgpu_vm_pt_start(adev, vm: params->vm, start, cursor: &cursor);
861	while (cursor.pfn < end) {
862	unsigned int shift, parent_shift, mask;
863	uint64_t incr, entry_end, pe_start;
864	struct amdgpu_bo *pt;
865
866	if (!params->unlocked) {
867	/ make sure that the page tables covering the*
868	* address range are actually allocated
869	*/
870	r = amdgpu_vm_pt_alloc(adev: params->adev, vm: params->vm,
871	cursor: &cursor, immediate: params->immediate);
872	if (r)
873	return r;
874	}
875
876	shift = amdgpu_vm_pt_level_shift(adev, level: cursor.level);
877	parent_shift = amdgpu_vm_pt_level_shift(adev, level: cursor.level - `1`);
878	if (params->unlocked) {
879	/ Unlocked updates are only allowed on the leaves /
880	if (amdgpu_vm_pt_descendant(adev, cursor: &cursor))
881	continue;
882	} else if (adev->asic_type < CHIP_VEGA10 &&
883	(flags & AMDGPU_PTE_VALID)) {
884	/ No huge page support before GMC v9 /
885	if (cursor.level != AMDGPU_VM_PTB) {
886	if (!amdgpu_vm_pt_descendant(adev, cursor: &cursor))
887	return -ENOENT;
888	continue;
889	}
890	} else if (frag < shift) {
891	/ We can't use this level when the fragment size is*
892	* smaller than the address shift. Go to the next
893	* child entry and try again.
894	*/
895	if (amdgpu_vm_pt_descendant(adev, cursor: &cursor))
896	continue;
897	} else if (frag >= parent_shift) {
898	/ If the fragment size is even larger than the parent*
899	* shift we should go up one level and check it again.
900	*/
901	if (!amdgpu_vm_pt_ancestor(cursor: &cursor))
902	return -EINVAL;
903	continue;
904	}
905
906	pt = cursor.entry->bo;
907	if (!pt) {
908	/ We need all PDs and PTs for mapping something, /
909	if (flags & AMDGPU_PTE_VALID)
910	return -ENOENT;
911
912	/ but unmapping something can happen at a higher*
913	* level.
914	*/
915	if (!amdgpu_vm_pt_ancestor(cursor: &cursor))
916	return -EINVAL;
917
918	pt = cursor.entry->bo;
919	shift = parent_shift;
920	frag_end = max(frag_end, ALIGN(frag_start + `1`,
921	`1ULL` << shift));
922	}
923
924	/ Looks good so far, calculate parameters for the update /
925	incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
926	mask = amdgpu_vm_pt_entries_mask(adev, level: cursor.level);
927	pe_start = ((cursor.pfn >> shift) & mask) * `8`;
928	entry_end = ((uint64_t)mask + `1`) << shift;
929	entry_end += cursor.pfn & ~(entry_end - `1`);
930	entry_end = min(entry_end, end);
931
932	do {
933	struct amdgpu_vm *vm = params->vm;
934	uint64_t upd_end = min(entry_end, frag_end);
935	unsigned int nptes = (upd_end - frag_start) >> shift;
936	uint64_t upd_flags = flags \| AMDGPU_PTE_FRAG(frag);
937
938	/ This can happen when we set higher level PDs to*
939	* silent to stop fault floods.
940	*/
941	nptes = max(nptes, `1u`);
942
943	trace_amdgpu_vm_update_ptes(p: params, start: frag_start, end: upd_end,
944	min(nptes, `32u`), dst, incr,
945	flags: upd_flags,
946	pid: vm->task_info ? vm->task_info->tgid : `0`,
947	vm_ctx: vm->immediate.fence_context);
948	amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
949	level: cursor.level, pe: pe_start, addr: dst,
950	count: nptes, incr, flags: upd_flags);
951
952	pe_start += nptes * `8`;
953	dst += nptes * incr;
954
955	frag_start = upd_end;
956	if (frag_start >= frag_end) {
957	/ figure out the next fragment /
958	amdgpu_vm_pte_fragment(params, start: frag_start, end,
959	flags, frag: &frag, frag_end: &frag_end);
960	if (frag < shift)
961	break;
962	}
963	} while (frag_start < entry_end);
964
965	if (amdgpu_vm_pt_descendant(adev, cursor: &cursor)) {
966	/ Free all child entries.*
967	* Update the tables with the flags and addresses and free up subsequent
968	* tables in the case of huge pages or freed up areas.
969	* This is the maximum you can free, because all other page tables are not
970	* completely covered by the range and so potentially still in use.
971	*/
972	while (cursor.pfn < frag_start) {
973	/ Make sure previous mapping is freed /
974	if (cursor.entry->bo) {
975	params->table_freed = true;
976	amdgpu_vm_pt_free_dfs(adev, vm: params->vm,
977	start: &cursor,
978	unlocked: params->unlocked);
979	}
980	amdgpu_vm_pt_next(adev, cursor: &cursor);
981	}
982
983	} else if (frag >= shift) {
984	/ or just move on to the next on the same level. /
985	amdgpu_vm_pt_next(adev, cursor: &cursor);
986	}
987	}
988
989	return `0`;
990	}
991
992	/**
993	* amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
994	* @adev: amdgpu device structure
995	* @vm: amdgpu vm structure
996	*
997	* make root page directory and everything below it cpu accessible.
998	*/
999	int amdgpu_vm_pt_map_tables(struct amdgpu_device adev, struct* amdgpu_vm *vm)
1000	{
1001	struct amdgpu_vm_pt_cursor cursor;
1002	struct amdgpu_vm_bo_base *entry;
1003
1004	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
1005
1006	struct amdgpu_bo_vm *bo;
1007	int r;
1008
1009	if (entry->bo) {
1010	bo = to_amdgpu_bo_vm(entry->bo);
1011	r = vm->update_funcs->map_table(bo);
1012	if (r)
1013	return r;
1014	}
1015	}
1016
1017	return `0`;
1018	}
1019

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c