amdgpu_ttm.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c]

1	/*
2	* Copyright 2009 Jerome Glisse.
3	* All Rights Reserved.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the
7	* "Software"), to deal in the Software without restriction, including
8	* without limitation the rights to use, copy, modify, merge, publish,
9	* distribute, sub license, and/or sell copies of the Software, and to
10	* permit persons to whom the Software is furnished to do so, subject to
11	* the following conditions:
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16	* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19	* USE OR OTHER DEALINGS IN THE SOFTWARE.
20	*
21	* The above copyright notice and this permission notice (including the
22	* next paragraph) shall be included in all copies or substantial portions
23	* of the Software.
24	*
25	*/
26	/*
27	* Authors:
28	* Jerome Glisse <glisse@freedesktop.org>
29	* Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30	* Dave Airlie
31	*/
32
33	#include <linux/dma-mapping.h>
34	#include <linux/iommu.h>
35	#include <linux/pagemap.h>
36	#include <linux/sched/task.h>
37	#include <linux/sched/mm.h>
38	#include <linux/seq_file.h>
39	#include <linux/slab.h>
40	#include <linux/swap.h>
41	#include <linux/dma-buf.h>
42	#include <linux/sizes.h>
43	#include <linux/module.h>
44
45	#include <drm/drm_drv.h>
46	#include <drm/ttm/ttm_bo.h>
47	#include <drm/ttm/ttm_placement.h>
48	#include <drm/ttm/ttm_range_manager.h>
49	#include <drm/ttm/ttm_tt.h>
50
51	#include <drm/amdgpu_drm.h>
52
53	#include "amdgpu.h"
54	#include "amdgpu_object.h"
55	#include "amdgpu_trace.h"
56	#include "amdgpu_amdkfd.h"
57	#include "amdgpu_sdma.h"
58	#include "amdgpu_ras.h"
59	#include "amdgpu_hmm.h"
60	#include "amdgpu_atomfirmware.h"
61	#include "amdgpu_res_cursor.h"
62	#include "bif/bif_4_1_d.h"
63
64	MODULE_IMPORT_NS(DMA_BUF);
65
66	#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
67
68	static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
69	struct ttm_tt *ttm,
70	struct ttm_resource *bo_mem);
71	static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
72	struct ttm_tt *ttm);
73
74	static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
75	unsigned int type,
76	uint64_t size_in_page)
77	{
78	return ttm_range_man_init(bdev: &adev->mman.bdev, type,
79	use_tt: false, p_size: size_in_page);
80	}
81
82	/**
83	* amdgpu_evict_flags - Compute placement flags
84	*
85	* @bo: The buffer object to evict
86	* @placement: Possible destination(s) for evicted BO
87	*
88	* Fill in placement data when ttm_bo_evict() is called
89	*/
90	static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
91	struct ttm_placement *placement)
92	{
93	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev);
94	struct amdgpu_bo *abo;
95	static const struct ttm_place placements = {
96	.fpfn = `0`,
97	.lpfn = `0`,
98	.mem_type = TTM_PL_SYSTEM,
99	.flags = `0`
100	};
101
102	/ Don't handle scatter gather BOs /
103	if (bo->type == ttm_bo_type_sg) {
104	placement->num_placement = `0`;
105	return;
106	}
107
108	/ Object isn't an AMDGPU object so ignore /
109	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
110	placement->placement = &placements;
111	placement->num_placement = `1`;
112	return;
113	}
114
115	abo = ttm_to_amdgpu_bo(tbo: bo);
116	if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
117	placement->num_placement = `0`;
118	return;
119	}
120
121	switch (bo->resource->mem_type) {
122	case AMDGPU_PL_GDS:
123	case AMDGPU_PL_GWS:
124	case AMDGPU_PL_OA:
125	case AMDGPU_PL_DOORBELL:
126	placement->num_placement = `0`;
127	return;
128
129	case TTM_PL_VRAM:
130	if (!adev->mman.buffer_funcs_enabled) {
131	/ Move to system memory /
132	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
133
134	} else if (!amdgpu_gmc_vram_full_visible(gmc: &adev->gmc) &&
135	!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
136	amdgpu_res_cpu_visible(adev, res: bo->resource)) {
137
138	/ Try evicting to the CPU inaccessible part of VRAM*
139	* first, but only set GTT as busy placement, so this
140	* BO will be evicted to GTT rather than causing other
141	* BOs to be evicted from VRAM
142	*/
143	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM \|
144	AMDGPU_GEM_DOMAIN_GTT \|
145	AMDGPU_GEM_DOMAIN_CPU);
146	abo->placements[`0`].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
147	abo->placements[`0`].lpfn = `0`;
148	abo->placements[`0`].flags \|= TTM_PL_FLAG_DESIRED;
149	} else {
150	/ Move to GTT memory /
151	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT \|
152	AMDGPU_GEM_DOMAIN_CPU);
153	}
154	break;
155	case TTM_PL_TT:
156	case AMDGPU_PL_PREEMPT:
157	default:
158	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
159	break;
160	}
161	*placement = abo->placement;
162	}
163
164	/**
165	* amdgpu_ttm_map_buffer - Map memory into the GART windows
166	* @bo: buffer object to map
167	* @mem: memory object to map
168	* @mm_cur: range to map
169	* @window: which GART window to use
170	* @ring: DMA ring to use for the copy
171	* @tmz: if we should setup a TMZ enabled mapping
172	* @size: in number of bytes to map, out number of bytes mapped
173	* @addr: resulting address inside the MC address space
174	*
175	* Setup one of the GART windows to access a specific piece of memory or return
176	* the physical address for local memory.
177	*/
178	static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
179	struct ttm_resource *mem,
180	struct amdgpu_res_cursor *mm_cur,
181	unsigned int window, struct amdgpu_ring *ring,
182	bool tmz, uint64_t size, uint64_t addr)
183	{
184	struct amdgpu_device *adev = ring->adev;
185	unsigned int offset, num_pages, num_dw, num_bytes;
186	uint64_t src_addr, dst_addr;
187	struct amdgpu_job *job;
188	void *cpu_addr;
189	uint64_t flags;
190	unsigned int i;
191	int r;
192
193	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
194	AMDGPU_GTT_MAX_TRANSFER_SIZE * `8`);
195
196	if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
197	return -EINVAL;
198
199	/ Map only what can't be accessed directly /
200	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
201	*addr = amdgpu_ttm_domain_start(adev, type: mem->mem_type) +
202	mm_cur->start;
203	return `0`;
204	}
205
206
207	/*
208	* If start begins at an offset inside the page, then adjust the size
209	* and addr accordingly
210	*/
211	offset = mm_cur->start & ~PAGE_MASK;
212
213	num_pages = PFN_UP(*size + offset);
214	num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
215
216	size = min(size, (uint64_t)num_pages * PAGE_SIZE - offset);
217
218	*addr = adev->gmc.gart_start;
219	addr += (u64)window AMDGPU_GTT_MAX_TRANSFER_SIZE *
220	AMDGPU_GPU_PAGE_SIZE;
221	*addr += offset;
222
223	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, `8`);
224	num_bytes = num_pages * `8` * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
225
226	r = amdgpu_job_alloc_with_ib(adev, entity: &adev->mman.high_pr,
227	AMDGPU_FENCE_OWNER_UNDEFINED,
228	size: num_dw * `4` + num_bytes,
229	pool_type: AMDGPU_IB_POOL_DELAYED, job: &job);
230	if (r)
231	return r;
232
233	src_addr = num_dw * `4`;
234	src_addr += job->ibs[`0`].gpu_addr;
235
236	dst_addr = amdgpu_bo_gpu_offset(bo: adev->gart.bo);
237	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * `8`;
238	amdgpu_emit_copy_buffer(adev, &job->ibs[`0`], src_addr,
239	dst_addr, num_bytes, false);
240
241	amdgpu_ring_pad_ib(ring, &job->ibs[`0`]);
242	WARN_ON(job->ibs[`0`].length_dw > num_dw);
243
244	flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->ttm, mem);
245	if (tmz)
246	flags \|= AMDGPU_PTE_TMZ;
247
248	cpu_addr = &job->ibs[`0`].ptr[num_dw];
249
250	if (mem->mem_type == TTM_PL_TT) {
251	dma_addr_t *dma_addr;
252
253	dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
254	amdgpu_gart_map(adev, offset: `0`, pages: num_pages, dma_addr, flags, dst: cpu_addr);
255	} else {
256	dma_addr_t dma_address;
257
258	dma_address = mm_cur->start;
259	dma_address += adev->vm_manager.vram_base_offset;
260
261	for (i = `0`; i < num_pages; ++i) {
262	amdgpu_gart_map(adev, offset: i << PAGE_SHIFT, pages: `1`, dma_addr: &dma_address,
263	flags, dst: cpu_addr);
264	dma_address += PAGE_SIZE;
265	}
266	}
267
268	dma_fence_put(fence: amdgpu_job_submit(job));
269	return `0`;
270	}
271
272	/**
273	* amdgpu_ttm_copy_mem_to_mem - Helper function for copy
274	* @adev: amdgpu device
275	* @src: buffer/address where to read from
276	* @dst: buffer/address where to write to
277	* @size: number of bytes to copy
278	* @tmz: if a secure copy should be used
279	* @resv: resv object to sync to
280	* @f: Returns the last fence if multiple jobs are submitted.
281	*
282	* The function copies @size bytes from {src->mem + src->offset} to
283	* {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
284	* move and different for a BO to BO copy.
285	*
286	*/
287	int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
288	const struct amdgpu_copy_mem *src,
289	const struct amdgpu_copy_mem *dst,
290	uint64_t size, bool tmz,
291	struct dma_resv *resv,
292	struct dma_fence **f)
293	{
294	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
295	struct amdgpu_res_cursor src_mm, dst_mm;
296	struct dma_fence *fence = NULL;
297	int r = `0`;
298
299	if (!adev->mman.buffer_funcs_enabled) {
300	DRM_ERROR("Trying to move memory with ring turned off.\n");
301	return -EINVAL;
302	}
303
304	amdgpu_res_first(res: src->mem, start: src->offset, size, cur: &src_mm);
305	amdgpu_res_first(res: dst->mem, start: dst->offset, size, cur: &dst_mm);
306
307	mutex_lock(&adev->mman.gtt_window_lock);
308	while (src_mm.remaining) {
309	uint64_t from, to, cur_size;
310	struct dma_fence *next;
311
312	/ Never copy more than 256MiB at once to avoid a timeout /
313	cur_size = min3(src_mm.size, dst_mm.size, `256ULL` << `20`);
314
315	/ Map src to window 0 and dst to window 1. /
316	r = amdgpu_ttm_map_buffer(bo: src->bo, mem: src->mem, mm_cur: &src_mm,
317	window: `0`, ring, tmz, size: &cur_size, addr: &from);
318	if (r)
319	goto error;
320
321	r = amdgpu_ttm_map_buffer(bo: dst->bo, mem: dst->mem, mm_cur: &dst_mm,
322	window: `1`, ring, tmz, size: &cur_size, addr: &to);
323	if (r)
324	goto error;
325
326	r = amdgpu_copy_buffer(ring, src_offset: from, dst_offset: to, byte_count: cur_size,
327	resv, fence: &next, direct_submit: false, vm_needs_flush: true, tmz);
328	if (r)
329	goto error;
330
331	dma_fence_put(fence);
332	fence = next;
333
334	amdgpu_res_next(cur: &src_mm, size: cur_size);
335	amdgpu_res_next(cur: &dst_mm, size: cur_size);
336	}
337	error:
338	mutex_unlock(lock: &adev->mman.gtt_window_lock);
339	if (f)
340	*f = dma_fence_get(fence);
341	dma_fence_put(fence);
342	return r;
343	}
344
345	/*
346	* amdgpu_move_blit - Copy an entire buffer to another buffer
347	*
348	* This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
349	* help move buffers to and from VRAM.
350	*/
351	static int amdgpu_move_blit(struct ttm_buffer_object *bo,
352	bool evict,
353	struct ttm_resource *new_mem,
354	struct ttm_resource *old_mem)
355	{
356	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev);
357	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo);
358	struct amdgpu_copy_mem src, dst;
359	struct dma_fence *fence = NULL;
360	int r;
361
362	src.bo = bo;
363	dst.bo = bo;
364	src.mem = old_mem;
365	dst.mem = new_mem;
366	src.offset = `0`;
367	dst.offset = `0`;
368
369	r = amdgpu_ttm_copy_mem_to_mem(adev, src: &src, dst: &dst,
370	size: new_mem->size,
371	tmz: amdgpu_bo_encrypted(bo: abo),
372	resv: bo->base.resv, f: &fence);
373	if (r)
374	goto error;
375
376	/ clear the space being freed /
377	if (old_mem->mem_type == TTM_PL_VRAM &&
378	(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
379	struct dma_fence *wipe_fence = NULL;
380
381	r = amdgpu_fill_buffer(bo: abo, AMDGPU_POISON, NULL, fence: &wipe_fence,
382	delayed: false);
383	if (r) {
384	goto error;
385	} else if (wipe_fence) {
386	dma_fence_put(fence);
387	fence = wipe_fence;
388	}
389	}
390
391	/ Always block for VM page tables before committing the new location /
392	if (bo->type == ttm_bo_type_kernel)
393	r = ttm_bo_move_accel_cleanup(bo, fence, evict: true, pipeline: false, new_mem);
394	else
395	r = ttm_bo_move_accel_cleanup(bo, fence, evict, pipeline: true, new_mem);
396	dma_fence_put(fence);
397	return r;
398
399	error:
400	if (fence)
401	dma_fence_wait(fence, intr: false);
402	dma_fence_put(fence);
403	return r;
404	}
405
406	/**
407	* amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
408	* @adev: amdgpu device
409	* @res: the resource to check
410	*
411	* Returns: true if the full resource is CPU visible, false otherwise.
412	*/
413	bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
414	struct ttm_resource *res)
415	{
416	struct amdgpu_res_cursor cursor;
417
418	if (!res)
419	return false;
420
421	if (res->mem_type == TTM_PL_SYSTEM \|\| res->mem_type == TTM_PL_TT \|\|
422	res->mem_type == AMDGPU_PL_PREEMPT)
423	return true;
424
425	if (res->mem_type != TTM_PL_VRAM)
426	return false;
427
428	amdgpu_res_first(res, start: `0`, size: res->size, cur: &cursor);
429	while (cursor.remaining) {
430	if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
431	return false;
432	amdgpu_res_next(cur: &cursor, size: cursor.size);
433	}
434
435	return true;
436	}
437
438	/*
439	* amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
440	*
441	* Called by amdgpu_bo_move()
442	*/
443	static bool amdgpu_res_copyable(struct amdgpu_device *adev,
444	struct ttm_resource *mem)
445	{
446	if (!amdgpu_res_cpu_visible(adev, res: mem))
447	return false;
448
449	/ ttm_resource_ioremap only supports contiguous memory /
450	if (mem->mem_type == TTM_PL_VRAM &&
451	!(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
452	return false;
453
454	return true;
455	}
456
457	/*
458	* amdgpu_bo_move - Move a buffer object to a new memory location
459	*
460	* Called by ttm_bo_handle_move_mem()
461	*/
462	static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
463	struct ttm_operation_ctx *ctx,
464	struct ttm_resource *new_mem,
465	struct ttm_place *hop)
466	{
467	struct amdgpu_device *adev;
468	struct amdgpu_bo *abo;
469	struct ttm_resource *old_mem = bo->resource;
470	int r;
471
472	if (new_mem->mem_type == TTM_PL_TT \|\|
473	new_mem->mem_type == AMDGPU_PL_PREEMPT) {
474	r = amdgpu_ttm_backend_bind(bdev: bo->bdev, ttm: bo->ttm, bo_mem: new_mem);
475	if (r)
476	return r;
477	}
478
479	abo = ttm_to_amdgpu_bo(tbo: bo);
480	adev = amdgpu_ttm_adev(bdev: bo->bdev);
481
482	if (!old_mem \|\| (old_mem->mem_type == TTM_PL_SYSTEM &&
483	bo->ttm == NULL)) {
484	ttm_bo_move_null(bo, new_mem);
485	goto out;
486	}
487	if (old_mem->mem_type == TTM_PL_SYSTEM &&
488	(new_mem->mem_type == TTM_PL_TT \|\|
489	new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
490	ttm_bo_move_null(bo, new_mem);
491	goto out;
492	}
493	if ((old_mem->mem_type == TTM_PL_TT \|\|
494	old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
495	new_mem->mem_type == TTM_PL_SYSTEM) {
496	r = ttm_bo_wait_ctx(bo, ctx);
497	if (r)
498	return r;
499
500	amdgpu_ttm_backend_unbind(bdev: bo->bdev, ttm: bo->ttm);
501	ttm_resource_free(bo, res: &bo->resource);
502	ttm_bo_assign_mem(bo, new_mem);
503	goto out;
504	}
505
506	if (old_mem->mem_type == AMDGPU_PL_GDS \|\|
507	old_mem->mem_type == AMDGPU_PL_GWS \|\|
508	old_mem->mem_type == AMDGPU_PL_OA \|\|
509	old_mem->mem_type == AMDGPU_PL_DOORBELL \|\|
510	new_mem->mem_type == AMDGPU_PL_GDS \|\|
511	new_mem->mem_type == AMDGPU_PL_GWS \|\|
512	new_mem->mem_type == AMDGPU_PL_OA \|\|
513	new_mem->mem_type == AMDGPU_PL_DOORBELL) {
514	/ Nothing to save here /
515	ttm_bo_move_null(bo, new_mem);
516	goto out;
517	}
518
519	if (bo->type == ttm_bo_type_device &&
520	new_mem->mem_type == TTM_PL_VRAM &&
521	old_mem->mem_type != TTM_PL_VRAM) {
522	/ amdgpu_bo_fault_reserve_notify will re-set this if the CPU*
523	* accesses the BO after it's moved.
524	*/
525	abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
526	}
527
528	if (adev->mman.buffer_funcs_enabled) {
529	if (((old_mem->mem_type == TTM_PL_SYSTEM &&
530	new_mem->mem_type == TTM_PL_VRAM) \|\|
531	(old_mem->mem_type == TTM_PL_VRAM &&
532	new_mem->mem_type == TTM_PL_SYSTEM))) {
533	hop->fpfn = `0`;
534	hop->lpfn = `0`;
535	hop->mem_type = TTM_PL_TT;
536	hop->flags = TTM_PL_FLAG_TEMPORARY;
537	return -EMULTIHOP;
538	}
539
540	r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
541	} else {
542	r = -ENODEV;
543	}
544
545	if (r) {
546	/ Check that all memory is CPU accessible /
547	if (!amdgpu_res_copyable(adev, mem: old_mem) \|\|
548	!amdgpu_res_copyable(adev, mem: new_mem)) {
549	pr_err("Move buffer fallback to memcpy unavailable\n");
550	return r;
551	}
552
553	r = ttm_bo_move_memcpy(bo, ctx, new_mem);
554	if (r)
555	return r;
556	}
557
558	trace_amdgpu_bo_move(bo: abo, new_placement: new_mem->mem_type, old_placement: old_mem->mem_type);
559	out:
560	/ update statistics /
561	atomic64_add(i: bo->base.size, v: &adev->num_bytes_moved);
562	amdgpu_bo_move_notify(bo, evict);
563	return `0`;
564	}
565
566	/*
567	* amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
568	*
569	* Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
570	*/
571	static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
572	struct ttm_resource *mem)
573	{
574	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
575
576	switch (mem->mem_type) {
577	case TTM_PL_SYSTEM:
578	/ system memory /
579	return `0`;
580	case TTM_PL_TT:
581	case AMDGPU_PL_PREEMPT:
582	break;
583	case TTM_PL_VRAM:
584	mem->bus.offset = mem->start << PAGE_SHIFT;
585
586	if (adev->mman.aper_base_kaddr &&
587	mem->placement & TTM_PL_FLAG_CONTIGUOUS)
588	mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
589	mem->bus.offset;
590
591	mem->bus.offset += adev->gmc.aper_base;
592	mem->bus.is_iomem = true;
593	break;
594	case AMDGPU_PL_DOORBELL:
595	mem->bus.offset = mem->start << PAGE_SHIFT;
596	mem->bus.offset += adev->doorbell.base;
597	mem->bus.is_iomem = true;
598	mem->bus.caching = ttm_uncached;
599	break;
600	default:
601	return -EINVAL;
602	}
603	return `0`;
604	}
605
606	static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
607	unsigned long page_offset)
608	{
609	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev);
610	struct amdgpu_res_cursor cursor;
611
612	amdgpu_res_first(res: bo->resource, start: (u64)page_offset << PAGE_SHIFT, size: `0`,
613	cur: &cursor);
614
615	if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
616	return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
617
618	return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
619	}
620
621	/**
622	* amdgpu_ttm_domain_start - Returns GPU start address
623	* @adev: amdgpu device object
624	* @type: type of the memory
625	*
626	* Returns:
627	* GPU start address of a memory domain
628	*/
629
630	uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
631	{
632	switch (type) {
633	case TTM_PL_TT:
634	return adev->gmc.gart_start;
635	case TTM_PL_VRAM:
636	return adev->gmc.vram_start;
637	}
638
639	return `0`;
640	}
641
642	/*
643	* TTM backend functions.
644	*/
645	struct amdgpu_ttm_tt {
646	struct ttm_tt ttm;
647	struct drm_gem_object *gobj;
648	u64 offset;
649	uint64_t userptr;
650	struct task_struct *usertask;
651	uint32_t userflags;
652	bool bound;
653	int32_t pool_id;
654	};
655
656	#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
657
658	#ifdef CONFIG_DRM_AMDGPU_USERPTR
659	/*
660	* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
661	* memory and start HMM tracking CPU page table update
662	*
663	* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
664	* once afterwards to stop HMM tracking
665	*/
666	int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo bo, struct* page **pages,
667	struct hmm_range **range)
668	{
669	struct ttm_tt *ttm = bo->tbo.ttm;
670	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
671	unsigned long start = gtt->userptr;
672	struct vm_area_struct *vma;
673	struct mm_struct *mm;
674	bool readonly;
675	int r = `0`;
676
677	/ Make sure get_user_pages_done() can cleanup gracefully /
678	*range = NULL;
679
680	mm = bo->notifier.mm;
681	if (unlikely(!mm)) {
682	DRM_DEBUG_DRIVER("BO is not registered?\n");
683	return -EFAULT;
684	}
685
686	if (!mmget_not_zero(mm)) / Happens during process shutdown /
687	return -ESRCH;
688
689	mmap_read_lock(mm);
690	vma = vma_lookup(mm, addr: start);
691	if (unlikely(!vma)) {
692	r = -EFAULT;
693	goto out_unlock;
694	}
695	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
696	vma->vm_file)) {
697	r = -EPERM;
698	goto out_unlock;
699	}
700
701	readonly = amdgpu_ttm_tt_is_readonly(ttm);
702	r = amdgpu_hmm_range_get_pages(notifier: &bo->notifier, start, npages: ttm->num_pages,
703	readonly, NULL, pages, phmm_range: range);
704	out_unlock:
705	mmap_read_unlock(mm);
706	if (r)
707	pr_debug("failed %d to get user pages 0x%lx\n", r, start);
708
709	mmput(mm);
710
711	return r;
712	}
713
714	/ amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations*
715	*/
716	void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
717	struct hmm_range *range)
718	{
719	struct amdgpu_ttm_tt gtt = (void* *)ttm;
720
721	if (gtt && gtt->userptr && range)
722	amdgpu_hmm_range_get_pages_done(hmm_range: range);
723	}
724
725	/*
726	* amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
727	* Check if the pages backing this ttm range have been invalidated
728	*
729	* Returns: true if pages are still valid
730	*/
731	bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
732	struct hmm_range *range)
733	{
734	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
735
736	if (!gtt \|\| !gtt->userptr \|\| !range)
737	return false;
738
739	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
740	gtt->userptr, ttm->num_pages);
741
742	WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
743
744	return !amdgpu_hmm_range_get_pages_done(hmm_range: range);
745	}
746	#endif
747
748	/*
749	* amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
750	*
751	* Called by amdgpu_cs_list_validate(). This creates the page list
752	* that backs user memory and will ultimately be mapped into the device
753	* address space.
754	*/
755	void amdgpu_ttm_tt_set_user_pages(struct ttm_tt ttm, struct* page **pages)
756	{
757	unsigned long i;
758
759	for (i = `0`; i < ttm->num_pages; ++i)
760	ttm->pages[i] = pages ? pages[i] : NULL;
761	}
762
763	/*
764	* amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
765	*
766	* Called by amdgpu_ttm_backend_bind()
767	**/
768	static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
769	struct ttm_tt *ttm)
770	{
771	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
772	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
773	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
774	enum dma_data_direction direction = write ?
775	DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
776	int r;
777
778	/ Allocate an SG array and squash pages into it /
779	r = sg_alloc_table_from_pages(sgt: ttm->sg, pages: ttm->pages, n_pages: ttm->num_pages, offset: `0`,
780	size: (u64)ttm->num_pages << PAGE_SHIFT,
781	GFP_KERNEL);
782	if (r)
783	goto release_sg;
784
785	/ Map SG to device /
786	r = dma_map_sgtable(dev: adev->dev, sgt: ttm->sg, dir: direction, attrs: `0`);
787	if (r)
788	goto release_sg;
789
790	/ convert SG to linear array of pages and dma addresses /
791	drm_prime_sg_to_dma_addr_array(sgt: ttm->sg, addrs: gtt->ttm.dma_address,
792	max_pages: ttm->num_pages);
793
794	return `0`;
795
796	release_sg:
797	kfree(objp: ttm->sg);
798	ttm->sg = NULL;
799	return r;
800	}
801
802	/*
803	* amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
804	*/
805	static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
806	struct ttm_tt *ttm)
807	{
808	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
809	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
810	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
811	enum dma_data_direction direction = write ?
812	DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
813
814	/ double check that we don't free the table twice /
815	if (!ttm->sg \|\| !ttm->sg->sgl)
816	return;
817
818	/ unmap the pages mapped to the device /
819	dma_unmap_sgtable(dev: adev->dev, sgt: ttm->sg, dir: direction, attrs: `0`);
820	sg_free_table(ttm->sg);
821	}
822
823	/*
824	* total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
825	* MQDn+CtrlStackn where n is the number of XCCs per partition.
826	* pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
827	* and uses memory type default, UC. The rest of pages_per_xcc are
828	* Ctrl stack and modify their memory type to NC.
829	*/
830	static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
831	struct ttm_tt *ttm, uint64_t flags)
832	{
833	struct amdgpu_ttm_tt gtt = (void* *)ttm;
834	uint64_t total_pages = ttm->num_pages;
835	int num_xcc = max(`1U`, adev->gfx.num_xcc_per_xcp);
836	uint64_t page_idx, pages_per_xcc;
837	int i;
838	uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) \|
839	AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
840
841	pages_per_xcc = total_pages;
842	do_div(pages_per_xcc, num_xcc);
843
844	for (i = `0`, page_idx = `0`; i < num_xcc; i++, page_idx += pages_per_xcc) {
845	/ MQD page: use default flags /
846	amdgpu_gart_bind(adev,
847	offset: gtt->offset + (page_idx << PAGE_SHIFT),
848	pages: `1`, dma_addr: &gtt->ttm.dma_address[page_idx], flags);
849	/*
850	* Ctrl pages - modify the memory type to NC (ctrl_flags) from
851	* the second page of the BO onward.
852	*/
853	amdgpu_gart_bind(adev,
854	offset: gtt->offset + ((page_idx + `1`) << PAGE_SHIFT),
855	pages: pages_per_xcc - `1`,
856	dma_addr: &gtt->ttm.dma_address[page_idx + `1`],
857	flags: ctrl_flags);
858	}
859	}
860
861	static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
862	struct ttm_buffer_object *tbo,
863	uint64_t flags)
864	{
865	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
866	struct ttm_tt *ttm = tbo->ttm;
867	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
868
869	if (amdgpu_bo_encrypted(bo: abo))
870	flags \|= AMDGPU_PTE_TMZ;
871
872	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
873	amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
874	} else {
875	amdgpu_gart_bind(adev, offset: gtt->offset, pages: ttm->num_pages,
876	dma_addr: gtt->ttm.dma_address, flags);
877	}
878	gtt->bound = true;
879	}
880
881	/*
882	* amdgpu_ttm_backend_bind - Bind GTT memory
883	*
884	* Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
885	* This handles binding GTT memory to the device address space.
886	*/
887	static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
888	struct ttm_tt *ttm,
889	struct ttm_resource *bo_mem)
890	{
891	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
892	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
893	uint64_t flags;
894	int r;
895
896	if (!bo_mem)
897	return -EINVAL;
898
899	if (gtt->bound)
900	return `0`;
901
902	if (gtt->userptr) {
903	r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
904	if (r) {
905	DRM_ERROR("failed to pin userptr\n");
906	return r;
907	}
908	} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
909	if (!ttm->sg) {
910	struct dma_buf_attachment *attach;
911	struct sg_table *sgt;
912
913	attach = gtt->gobj->import_attach;
914	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
915	if (IS_ERR(ptr: sgt))
916	return PTR_ERR(ptr: sgt);
917
918	ttm->sg = sgt;
919	}
920
921	drm_prime_sg_to_dma_addr_array(sgt: ttm->sg, addrs: gtt->ttm.dma_address,
922	max_pages: ttm->num_pages);
923	}
924
925	if (!ttm->num_pages) {
926	WARN(`1`, "nothing to bind %u pages for mreg %p back %p!\n",
927	ttm->num_pages, bo_mem, ttm);
928	}
929
930	if (bo_mem->mem_type != TTM_PL_TT \|\|
931	!amdgpu_gtt_mgr_has_gart_addr(mem: bo_mem)) {
932	gtt->offset = AMDGPU_BO_INVALID_OFFSET;
933	return `0`;
934	}
935
936	/ compute PTE flags relevant to this BO memory /
937	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem: bo_mem);
938
939	/ bind pages into GART page tables /
940	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
941	amdgpu_gart_bind(adev, offset: gtt->offset, pages: ttm->num_pages,
942	dma_addr: gtt->ttm.dma_address, flags);
943	gtt->bound = true;
944	return `0`;
945	}
946
947	/*
948	* amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
949	* through AGP or GART aperture.
950	*
951	* If bo is accessible through AGP aperture, then use AGP aperture
952	* to access bo; otherwise allocate logical space in GART aperture
953	* and map bo to GART aperture.
954	*/
955	int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
956	{
957	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev);
958	struct ttm_operation_ctx ctx = { false, false };
959	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
960	struct ttm_placement placement;
961	struct ttm_place placements;
962	struct ttm_resource *tmp;
963	uint64_t addr, flags;
964	int r;
965
966	if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
967	return `0`;
968
969	addr = amdgpu_gmc_agp_addr(bo);
970	if (addr != AMDGPU_BO_INVALID_OFFSET)
971	return `0`;
972
973	/ allocate GART space /
974	placement.num_placement = `1`;
975	placement.placement = &placements;
976	placements.fpfn = `0`;
977	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
978	placements.mem_type = TTM_PL_TT;
979	placements.flags = bo->resource->placement;
980
981	r = ttm_bo_mem_space(bo, placement: &placement, mem: &tmp, ctx: &ctx);
982	if (unlikely(r))
983	return r;
984
985	/ compute PTE flags for this buffer object /
986	flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->ttm, mem: tmp);
987
988	/ Bind pages /
989	gtt->offset = (u64)tmp->start << PAGE_SHIFT;
990	amdgpu_ttm_gart_bind(adev, tbo: bo, flags);
991	amdgpu_gart_invalidate_tlb(adev);
992	ttm_resource_free(bo, res: &bo->resource);
993	ttm_bo_assign_mem(bo, new_mem: tmp);
994
995	return `0`;
996	}
997
998	/*
999	* amdgpu_ttm_recover_gart - Rebind GTT pages
1000	*
1001	* Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1002	* rebind GTT pages during a GPU reset.
1003	*/
1004	void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1005	{
1006	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: tbo->bdev);
1007	uint64_t flags;
1008
1009	if (!tbo->ttm)
1010	return;
1011
1012	flags = amdgpu_ttm_tt_pte_flags(adev, ttm: tbo->ttm, mem: tbo->resource);
1013	amdgpu_ttm_gart_bind(adev, tbo, flags);
1014	}
1015
1016	/*
1017	* amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1018	*
1019	* Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1020	* ttm_tt_destroy().
1021	*/
1022	static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
1023	struct ttm_tt *ttm)
1024	{
1025	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
1026	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1027
1028	/ if the pages have userptr pinning then clear that first /
1029	if (gtt->userptr) {
1030	amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1031	} else if (ttm->sg && gtt->gobj->import_attach) {
1032	struct dma_buf_attachment *attach;
1033
1034	attach = gtt->gobj->import_attach;
1035	dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1036	ttm->sg = NULL;
1037	}
1038
1039	if (!gtt->bound)
1040	return;
1041
1042	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1043	return;
1044
1045	/ unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm /
1046	amdgpu_gart_unbind(adev, offset: gtt->offset, pages: ttm->num_pages);
1047	gtt->bound = false;
1048	}
1049
1050	static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
1051	struct ttm_tt *ttm)
1052	{
1053	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1054
1055	if (gtt->usertask)
1056	put_task_struct(t: gtt->usertask);
1057
1058	ttm_tt_fini(ttm: &gtt->ttm);
1059	kfree(objp: gtt);
1060	}
1061
1062	/**
1063	* amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1064	*
1065	* @bo: The buffer object to create a GTT ttm_tt object around
1066	* @page_flags: Page flags to be added to the ttm_tt object
1067	*
1068	* Called by ttm_tt_create().
1069	*/
1070	static struct ttm_tt amdgpu_ttm_tt_create(struct* ttm_buffer_object *bo,
1071	uint32_t page_flags)
1072	{
1073	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev);
1074	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo);
1075	struct amdgpu_ttm_tt *gtt;
1076	enum ttm_caching caching;
1077
1078	gtt = kzalloc(size: sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
1079	if (!gtt)
1080	return NULL;
1081
1082	gtt->gobj = &bo->base;
1083	if (adev->gmc.mem_partitions && abo->xcp_id >= `0`)
1084	gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
1085	else
1086	gtt->pool_id = abo->xcp_id;
1087
1088	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1089	caching = ttm_write_combined;
1090	else
1091	caching = ttm_cached;
1092
1093	/ allocate space for the uninitialized page entries /
1094	if (ttm_sg_tt_init(ttm_dma: &gtt->ttm, bo, page_flags, caching)) {
1095	kfree(objp: gtt);
1096	return NULL;
1097	}
1098	return &gtt->ttm;
1099	}
1100
1101	/*
1102	* amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1103	*
1104	* Map the pages of a ttm_tt object to an address space visible
1105	* to the underlying device.
1106	*/
1107	static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
1108	struct ttm_tt *ttm,
1109	struct ttm_operation_ctx *ctx)
1110	{
1111	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
1112	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1113	struct ttm_pool *pool;
1114	pgoff_t i;
1115	int ret;
1116
1117	/ user pages are bound by amdgpu_ttm_tt_pin_userptr() /
1118	if (gtt->userptr) {
1119	ttm->sg = kzalloc(size: sizeof(struct sg_table), GFP_KERNEL);
1120	if (!ttm->sg)
1121	return -ENOMEM;
1122	return `0`;
1123	}
1124
1125	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1126	return `0`;
1127
1128	if (adev->mman.ttm_pools && gtt->pool_id >= `0`)
1129	pool = &adev->mman.ttm_pools[gtt->pool_id];
1130	else
1131	pool = &adev->mman.bdev.pool;
1132	ret = ttm_pool_alloc(pool, tt: ttm, ctx);
1133	if (ret)
1134	return ret;
1135
1136	for (i = `0`; i < ttm->num_pages; ++i)
1137	ttm->pages[i]->mapping = bdev->dev_mapping;
1138
1139	return `0`;
1140	}
1141
1142	/*
1143	* amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1144	*
1145	* Unmaps pages of a ttm_tt object from the device address space and
1146	* unpopulates the page array backing it.
1147	*/
1148	static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
1149	struct ttm_tt *ttm)
1150	{
1151	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1152	struct amdgpu_device *adev;
1153	struct ttm_pool *pool;
1154	pgoff_t i;
1155
1156	amdgpu_ttm_backend_unbind(bdev, ttm);
1157
1158	if (gtt->userptr) {
1159	amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1160	kfree(objp: ttm->sg);
1161	ttm->sg = NULL;
1162	return;
1163	}
1164
1165	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1166	return;
1167
1168	for (i = `0`; i < ttm->num_pages; ++i)
1169	ttm->pages[i]->mapping = NULL;
1170
1171	adev = amdgpu_ttm_adev(bdev);
1172
1173	if (adev->mman.ttm_pools && gtt->pool_id >= `0`)
1174	pool = &adev->mman.ttm_pools[gtt->pool_id];
1175	else
1176	pool = &adev->mman.bdev.pool;
1177
1178	return ttm_pool_free(pool, tt: ttm);
1179	}
1180
1181	/**
1182	* amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
1183	* task
1184	*
1185	* @tbo: The ttm_buffer_object that contains the userptr
1186	* @user_addr: The returned value
1187	*/
1188	int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
1189	uint64_t *user_addr)
1190	{
1191	struct amdgpu_ttm_tt *gtt;
1192
1193	if (!tbo->ttm)
1194	return -EINVAL;
1195
1196	gtt = (void *)tbo->ttm;
1197	*user_addr = gtt->userptr;
1198	return `0`;
1199	}
1200
1201	/**
1202	* amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
1203	* task
1204	*
1205	* @bo: The ttm_buffer_object to bind this userptr to
1206	* @addr: The address in the current tasks VM space to use
1207	* @flags: Requirements of userptr object.
1208	*
1209	* Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
1210	* bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
1211	* initialize GPU VM for a KFD process.
1212	*/
1213	int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
1214	uint64_t addr, uint32_t flags)
1215	{
1216	struct amdgpu_ttm_tt *gtt;
1217
1218	if (!bo->ttm) {
1219	/ TODO: We want a separate TTM object type for userptrs /
1220	bo->ttm = amdgpu_ttm_tt_create(bo, page_flags: `0`);
1221	if (bo->ttm == NULL)
1222	return -ENOMEM;
1223	}
1224
1225	/ Set TTM_TT_FLAG_EXTERNAL before populate but after create. /
1226	bo->ttm->page_flags \|= TTM_TT_FLAG_EXTERNAL;
1227
1228	gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
1229	gtt->userptr = addr;
1230	gtt->userflags = flags;
1231
1232	if (gtt->usertask)
1233	put_task_struct(t: gtt->usertask);
1234	gtt->usertask = current->group_leader;
1235	get_task_struct(t: gtt->usertask);
1236
1237	return `0`;
1238	}
1239
1240	/*
1241	* amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1242	*/
1243	struct mm_struct amdgpu_ttm_tt_get_usermm(struct* ttm_tt *ttm)
1244	{
1245	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1246
1247	if (gtt == NULL)
1248	return NULL;
1249
1250	if (gtt->usertask == NULL)
1251	return NULL;
1252
1253	return gtt->usertask->mm;
1254	}
1255
1256	/*
1257	* amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
1258	* address range for the current task.
1259	*
1260	*/
1261	bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt ttm, unsigned* long start,
1262	unsigned long end, unsigned long *userptr)
1263	{
1264	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1265	unsigned long size;
1266
1267	if (gtt == NULL \|\| !gtt->userptr)
1268	return false;
1269
1270	/ Return false if no part of the ttm_tt object lies within*
1271	* the range
1272	*/
1273	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1274	if (gtt->userptr > end \|\| gtt->userptr + size <= start)
1275	return false;
1276
1277	if (userptr)
1278	*userptr = gtt->userptr;
1279	return true;
1280	}
1281
1282	/*
1283	* amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1284	*/
1285	bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1286	{
1287	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1288
1289	if (gtt == NULL \|\| !gtt->userptr)
1290	return false;
1291
1292	return true;
1293	}
1294
1295	/*
1296	* amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1297	*/
1298	bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1299	{
1300	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
1301
1302	if (gtt == NULL)
1303	return false;
1304
1305	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1306	}
1307
1308	/**
1309	* amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1310	*
1311	* @ttm: The ttm_tt object to compute the flags for
1312	* @mem: The memory registry backing this ttm_tt object
1313	*
1314	* Figure out the flags to use for a VM PDE (Page Directory Entry).
1315	*/
1316	uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt ttm, struct* ttm_resource *mem)
1317	{
1318	uint64_t flags = `0`;
1319
1320	if (mem && mem->mem_type != TTM_PL_SYSTEM)
1321	flags \|= AMDGPU_PTE_VALID;
1322
1323	if (mem && (mem->mem_type == TTM_PL_TT \|\|
1324	mem->mem_type == AMDGPU_PL_DOORBELL \|\|
1325	mem->mem_type == AMDGPU_PL_PREEMPT)) {
1326	flags \|= AMDGPU_PTE_SYSTEM;
1327
1328	if (ttm->caching == ttm_cached)
1329	flags \|= AMDGPU_PTE_SNOOPED;
1330	}
1331
1332	if (mem && mem->mem_type == TTM_PL_VRAM &&
1333	mem->bus.caching == ttm_cached)
1334	flags \|= AMDGPU_PTE_SNOOPED;
1335
1336	return flags;
1337	}
1338
1339	/**
1340	* amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1341	*
1342	* @adev: amdgpu_device pointer
1343	* @ttm: The ttm_tt object to compute the flags for
1344	* @mem: The memory registry backing this ttm_tt object
1345	*
1346	* Figure out the flags to use for a VM PTE (Page Table Entry).
1347	*/
1348	uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device adev, struct* ttm_tt *ttm,
1349	struct ttm_resource *mem)
1350	{
1351	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
1352
1353	flags \|= adev->gart.gart_pte_flags;
1354	flags \|= AMDGPU_PTE_READABLE;
1355
1356	if (!amdgpu_ttm_tt_is_readonly(ttm))
1357	flags \|= AMDGPU_PTE_WRITEABLE;
1358
1359	return flags;
1360	}
1361
1362	/*
1363	* amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
1364	* object.
1365	*
1366	* Return true if eviction is sensible. Called by ttm_mem_evict_first() on
1367	* behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
1368	* it can find space for a new object and by ttm_bo_force_list_clean() which is
1369	* used to clean out a memory space.
1370	*/
1371	static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1372	const struct ttm_place *place)
1373	{
1374	struct dma_resv_iter resv_cursor;
1375	struct dma_fence *f;
1376
1377	if (!amdgpu_bo_is_amdgpu_bo(bo))
1378	return ttm_bo_eviction_valuable(bo, place);
1379
1380	/ Swapout? /
1381	if (bo->resource->mem_type == TTM_PL_SYSTEM)
1382	return true;
1383
1384	if (bo->type == ttm_bo_type_kernel &&
1385	!amdgpu_vm_evictable(bo: ttm_to_amdgpu_bo(tbo: bo)))
1386	return false;
1387
1388	/ If bo is a KFD BO, check if the bo belongs to the current process.*
1389	* If true, then return false as any KFD process needs all its BOs to
1390	* be resident to run successfully
1391	*/
1392	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
1393	DMA_RESV_USAGE_BOOKKEEP, f) {
1394	if (amdkfd_fence_check_mm(f, current->mm))
1395	return false;
1396	}
1397
1398	/ Preemptible BOs don't own system resources managed by the*
1399	* driver (pages, VRAM, GART space). They point to resources
1400	* owned by someone else (e.g. pageable memory in user mode
1401	* or a DMABuf). They are used in a preemptible context so we
1402	* can guarantee no deadlocks and good QoS in case of MMU
1403	* notifiers or DMABuf move notifiers from the resource owner.
1404	*/
1405	if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
1406	return false;
1407
1408	if (bo->resource->mem_type == TTM_PL_TT &&
1409	amdgpu_bo_encrypted(bo: ttm_to_amdgpu_bo(tbo: bo)))
1410	return false;
1411
1412	return ttm_bo_eviction_valuable(bo, place);
1413	}
1414
1415	static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
1416	void *buf, size_t size, bool write)
1417	{
1418	while (size) {
1419	uint64_t aligned_pos = ALIGN_DOWN(pos, `4`);
1420	uint64_t bytes = `4` - (pos & `0x3`);
1421	uint32_t shift = (pos & `0x3`) * `8`;
1422	uint32_t mask = `0xffffffff` << shift;
1423	uint32_t value = `0`;
1424
1425	if (size < bytes) {
1426	mask &= `0xffffffff` >> (bytes - size) * `8`;
1427	bytes = size;
1428	}
1429
1430	if (mask != `0xffffffff`) {
1431	amdgpu_device_mm_access(adev, pos: aligned_pos, buf: &value, size: `4`, write: false);
1432	if (write) {
1433	value &= ~mask;
1434	value \|= ((uint32_t )buf << shift) & mask;
1435	amdgpu_device_mm_access(adev, pos: aligned_pos, buf: &value, size: `4`, write: true);
1436	} else {
1437	value = (value & mask) >> shift;
1438	memcpy(buf, &value, bytes);
1439	}
1440	} else {
1441	amdgpu_device_mm_access(adev, pos: aligned_pos, buf, size: `4`, write);
1442	}
1443
1444	pos += bytes;
1445	buf += bytes;
1446	size -= bytes;
1447	}
1448	}
1449
1450	static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
1451	unsigned long offset, void *buf,
1452	int len, int write)
1453	{
1454	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo);
1455	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: abo->tbo.bdev);
1456	struct amdgpu_res_cursor src_mm;
1457	struct amdgpu_job *job;
1458	struct dma_fence *fence;
1459	uint64_t src_addr, dst_addr;
1460	unsigned int num_dw;
1461	int r, idx;
1462
1463	if (len != PAGE_SIZE)
1464	return -EINVAL;
1465
1466	if (!adev->mman.sdma_access_ptr)
1467	return -EACCES;
1468
1469	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
1470	return -ENODEV;
1471
1472	if (write)
1473	memcpy(adev->mman.sdma_access_ptr, buf, len);
1474
1475	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, `8`);
1476	r = amdgpu_job_alloc_with_ib(adev, entity: &adev->mman.high_pr,
1477	AMDGPU_FENCE_OWNER_UNDEFINED,
1478	size: num_dw * `4`, pool_type: AMDGPU_IB_POOL_DELAYED,
1479	job: &job);
1480	if (r)
1481	goto out;
1482
1483	amdgpu_res_first(res: abo->tbo.resource, start: offset, size: len, cur: &src_mm);
1484	src_addr = amdgpu_ttm_domain_start(adev, type: bo->resource->mem_type) +
1485	src_mm.start;
1486	dst_addr = amdgpu_bo_gpu_offset(bo: adev->mman.sdma_access_bo);
1487	if (write)
1488	swap(src_addr, dst_addr);
1489
1490	amdgpu_emit_copy_buffer(adev, &job->ibs[`0`], src_addr, dst_addr,
1491	PAGE_SIZE, false);
1492
1493	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[`0`]);
1494	WARN_ON(job->ibs[`0`].length_dw > num_dw);
1495
1496	fence = amdgpu_job_submit(job);
1497
1498	if (!dma_fence_wait_timeout(fence, intr: false, timeout: adev->sdma_timeout))
1499	r = -ETIMEDOUT;
1500	dma_fence_put(fence);
1501
1502	if (!(r \|\| write))
1503	memcpy(buf, adev->mman.sdma_access_ptr, len);
1504	out:
1505	drm_dev_exit(idx);
1506	return r;
1507	}
1508
1509	/**
1510	* amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1511	*
1512	* @bo: The buffer object to read/write
1513	* @offset: Offset into buffer object
1514	* @buf: Secondary buffer to write/read from
1515	* @len: Length in bytes of access
1516	* @write: true if writing
1517	*
1518	* This is used to access VRAM that backs a buffer object via MMIO
1519	* access for debugging purposes.
1520	*/
1521	static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1522	unsigned long offset, void buf, int* len,
1523	int write)
1524	{
1525	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo);
1526	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: abo->tbo.bdev);
1527	struct amdgpu_res_cursor cursor;
1528	int ret = `0`;
1529
1530	if (bo->resource->mem_type != TTM_PL_VRAM)
1531	return -EIO;
1532
1533	if (amdgpu_device_has_timeouts_enabled(adev) &&
1534	!amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
1535	return len;
1536
1537	amdgpu_res_first(res: bo->resource, start: offset, size: len, cur: &cursor);
1538	while (cursor.remaining) {
1539	size_t count, size = cursor.size;
1540	loff_t pos = cursor.start;
1541
1542	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
1543	size -= count;
1544	if (size) {
1545	/ using MM to access rest vram and handle un-aligned address /
1546	pos += count;
1547	buf += count;
1548	amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
1549	}
1550
1551	ret += cursor.size;
1552	buf += cursor.size;
1553	amdgpu_res_next(cur: &cursor, size: cursor.size);
1554	}
1555
1556	return ret;
1557	}
1558
1559	static void
1560	amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
1561	{
1562	amdgpu_bo_move_notify(bo, evict: false);
1563	}
1564
1565	static struct ttm_device_funcs amdgpu_bo_driver = {
1566	.ttm_tt_create = &amdgpu_ttm_tt_create,
1567	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
1568	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1569	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
1570	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1571	.evict_flags = &amdgpu_evict_flags,
1572	.move = &amdgpu_bo_move,
1573	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
1574	.release_notify = &amdgpu_bo_release_notify,
1575	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1576	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1577	.access_memory = &amdgpu_ttm_access_memory,
1578	};
1579
1580	/*
1581	* Firmware Reservation functions
1582	*/
1583	/**
1584	* amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1585	*
1586	* @adev: amdgpu_device pointer
1587	*
1588	* free fw reserved vram if it has been reserved.
1589	*/
1590	static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1591	{
1592	amdgpu_bo_free_kernel(bo: &adev->mman.fw_vram_usage_reserved_bo,
1593	NULL, cpu_addr: &adev->mman.fw_vram_usage_va);
1594	}
1595
1596	/*
1597	* Driver Reservation functions
1598	*/
1599	/**
1600	* amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
1601	*
1602	* @adev: amdgpu_device pointer
1603	*
1604	* free drv reserved vram if it has been reserved.
1605	*/
1606	static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
1607	{
1608	amdgpu_bo_free_kernel(bo: &adev->mman.drv_vram_usage_reserved_bo,
1609	NULL,
1610	cpu_addr: &adev->mman.drv_vram_usage_va);
1611	}
1612
1613	/**
1614	* amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1615	*
1616	* @adev: amdgpu_device pointer
1617	*
1618	* create bo vram reservation from fw.
1619	*/
1620	static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1621	{
1622	uint64_t vram_size = adev->gmc.visible_vram_size;
1623
1624	adev->mman.fw_vram_usage_va = NULL;
1625	adev->mman.fw_vram_usage_reserved_bo = NULL;
1626
1627	if (adev->mman.fw_vram_usage_size == `0` \|\|
1628	adev->mman.fw_vram_usage_size > vram_size)
1629	return `0`;
1630
1631	return amdgpu_bo_create_kernel_at(adev,
1632	offset: adev->mman.fw_vram_usage_start_offset,
1633	size: adev->mman.fw_vram_usage_size,
1634	bo_ptr: &adev->mman.fw_vram_usage_reserved_bo,
1635	cpu_addr: &adev->mman.fw_vram_usage_va);
1636	}
1637
1638	/**
1639	* amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
1640	*
1641	* @adev: amdgpu_device pointer
1642	*
1643	* create bo vram reservation from drv.
1644	*/
1645	static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
1646	{
1647	u64 vram_size = adev->gmc.visible_vram_size;
1648
1649	adev->mman.drv_vram_usage_va = NULL;
1650	adev->mman.drv_vram_usage_reserved_bo = NULL;
1651
1652	if (adev->mman.drv_vram_usage_size == `0` \|\|
1653	adev->mman.drv_vram_usage_size > vram_size)
1654	return `0`;
1655
1656	return amdgpu_bo_create_kernel_at(adev,
1657	offset: adev->mman.drv_vram_usage_start_offset,
1658	size: adev->mman.drv_vram_usage_size,
1659	bo_ptr: &adev->mman.drv_vram_usage_reserved_bo,
1660	cpu_addr: &adev->mman.drv_vram_usage_va);
1661	}
1662
1663	/*
1664	* Memoy training reservation functions
1665	*/
1666
1667	/**
1668	* amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1669	*
1670	* @adev: amdgpu_device pointer
1671	*
1672	* free memory training reserved vram if it has been reserved.
1673	*/
1674	static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
1675	{
1676	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1677
1678	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1679	amdgpu_bo_free_kernel(bo: &ctx->c2p_bo, NULL, NULL);
1680	ctx->c2p_bo = NULL;
1681
1682	return `0`;
1683	}
1684
1685	static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
1686	uint32_t reserve_size)
1687	{
1688	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1689
1690	memset(ctx, `0`, sizeof(*ctx));
1691
1692	ctx->c2p_train_data_offset =
1693	ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
1694	ctx->p2c_train_data_offset =
1695	(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1696	ctx->train_data_size =
1697	GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1698
1699	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
1700	ctx->train_data_size,
1701	ctx->p2c_train_data_offset,
1702	ctx->c2p_train_data_offset);
1703	}
1704
1705	/*
1706	* reserve TMR memory at the top of VRAM which holds
1707	* IP Discovery data and is protected by PSP.
1708	*/
1709	static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1710	{
1711	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1712	bool mem_train_support = false;
1713	uint32_t reserve_size = `0`;
1714	int ret;
1715
1716	if (adev->bios && !amdgpu_sriov_vf(adev)) {
1717	if (amdgpu_atomfirmware_mem_training_supported(adev))
1718	mem_train_support = true;
1719	else
1720	DRM_DEBUG("memory training does not support!\n");
1721	}
1722
1723	/*
1724	* Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
1725	* the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
1726	*
1727	* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
1728	* discovery data and G6 memory training data respectively
1729	*/
1730	if (adev->bios)
1731	reserve_size =
1732	amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1733
1734	if (!adev->bios &&
1735	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`))
1736	reserve_size = max(reserve_size, (uint32_t)`280` << `20`);
1737	else if (!reserve_size)
1738	reserve_size = DISCOVERY_TMR_OFFSET;
1739
1740	if (mem_train_support) {
1741	/ reserve vram for mem train according to TMR location /
1742	amdgpu_ttm_training_data_block_init(adev, reserve_size);
1743	ret = amdgpu_bo_create_kernel_at(adev,
1744	offset: ctx->c2p_train_data_offset,
1745	size: ctx->train_data_size,
1746	bo_ptr: &ctx->c2p_bo,
1747	NULL);
1748	if (ret) {
1749	DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
1750	amdgpu_ttm_training_reserve_vram_fini(adev);
1751	return ret;
1752	}
1753	ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1754	}
1755
1756	if (!adev->gmc.is_app_apu) {
1757	ret = amdgpu_bo_create_kernel_at(
1758	adev, offset: adev->gmc.real_vram_size - reserve_size,
1759	size: reserve_size, bo_ptr: &adev->mman.fw_reserved_memory, NULL);
1760	if (ret) {
1761	DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1762	amdgpu_bo_free_kernel(bo: &adev->mman.fw_reserved_memory,
1763	NULL, NULL);
1764	return ret;
1765	}
1766	} else {
1767	DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
1768	}
1769
1770	return `0`;
1771	}
1772
1773	static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
1774	{
1775	int i;
1776
1777	if (!adev->gmc.is_app_apu \|\| !adev->gmc.num_mem_partitions)
1778	return `0`;
1779
1780	adev->mman.ttm_pools = kcalloc(n: adev->gmc.num_mem_partitions,
1781	size: sizeof(*adev->mman.ttm_pools),
1782	GFP_KERNEL);
1783	if (!adev->mman.ttm_pools)
1784	return -ENOMEM;
1785
1786	for (i = `0`; i < adev->gmc.num_mem_partitions; i++) {
1787	ttm_pool_init(pool: &adev->mman.ttm_pools[i], dev: adev->dev,
1788	nid: adev->gmc.mem_partitions[i].numa.node,
1789	use_dma_alloc: false, use_dma32: false);
1790	}
1791	return `0`;
1792	}
1793
1794	static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
1795	{
1796	int i;
1797
1798	if (!adev->gmc.is_app_apu \|\| !adev->mman.ttm_pools)
1799	return;
1800
1801	for (i = `0`; i < adev->gmc.num_mem_partitions; i++)
1802	ttm_pool_fini(pool: &adev->mman.ttm_pools[i]);
1803
1804	kfree(objp: adev->mman.ttm_pools);
1805	adev->mman.ttm_pools = NULL;
1806	}
1807
1808	/*
1809	* amdgpu_ttm_init - Init the memory management (ttm) as well as various
1810	* gtt/vram related fields.
1811	*
1812	* This initializes all of the memory space pools that the TTM layer
1813	* will need such as the GTT space (system memory mapped to the device),
1814	* VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1815	* can be mapped per VMID.
1816	*/
1817	int amdgpu_ttm_init(struct amdgpu_device *adev)
1818	{
1819	uint64_t gtt_size;
1820	int r;
1821
1822	mutex_init(&adev->mman.gtt_window_lock);
1823
1824	/ No others user of address space so set it to 0 /
1825	r = ttm_device_init(bdev: &adev->mman.bdev, funcs: &amdgpu_bo_driver, dev: adev->dev,
1826	mapping: adev_to_drm(adev)->anon_inode->i_mapping,
1827	vma_manager: adev_to_drm(adev)->vma_offset_manager,
1828	use_dma_alloc: adev->need_swiotlb,
1829	use_dma32: dma_addressing_limited(dev: adev->dev));
1830	if (r) {
1831	DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1832	return r;
1833	}
1834
1835	r = amdgpu_ttm_pools_init(adev);
1836	if (r) {
1837	DRM_ERROR("failed to init ttm pools(%d).\n", r);
1838	return r;
1839	}
1840	adev->mman.initialized = true;
1841
1842	/ Initialize VRAM pool with all of VRAM divided into pages /
1843	r = amdgpu_vram_mgr_init(adev);
1844	if (r) {
1845	DRM_ERROR("Failed initializing VRAM heap.\n");
1846	return r;
1847	}
1848
1849	/ Change the size here instead of the init above so only lpfn is affected /
1850	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
1851	#ifdef CONFIG_64BIT
1852	#ifdef CONFIG_X86
1853	if (adev->gmc.xgmi.connected_to_cpu)
1854	adev->mman.aper_base_kaddr = ioremap_cache(offset: adev->gmc.aper_base,
1855	size: adev->gmc.visible_vram_size);
1856
1857	else if (adev->gmc.is_app_apu)
1858	DRM_DEBUG_DRIVER(
1859	"No need to ioremap when real vram size is 0\n");
1860	else
1861	#endif
1862	adev->mman.aper_base_kaddr = ioremap_wc(offset: adev->gmc.aper_base,
1863	size: adev->gmc.visible_vram_size);
1864	#endif
1865
1866	/*
1867	*The reserved vram for firmware must be pinned to the specified
1868	*place on the VRAM, so reserve it early.
1869	*/
1870	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1871	if (r)
1872	return r;
1873
1874	/*
1875	*The reserved vram for driver must be pinned to the specified
1876	*place on the VRAM, so reserve it early.
1877	*/
1878	r = amdgpu_ttm_drv_reserve_vram_init(adev);
1879	if (r)
1880	return r;
1881
1882	/*
1883	* only NAVI10 and onwards ASIC support for IP discovery.
1884	* If IP discovery enabled, a block of memory should be
1885	* reserved for IP discovey.
1886	*/
1887	if (adev->mman.discovery_bin) {
1888	r = amdgpu_ttm_reserve_tmr(adev);
1889	if (r)
1890	return r;
1891	}
1892
1893	/ allocate memory as required for VGA*
1894	* This is used for VGA emulation and pre-OS scanout buffers to
1895	* avoid display artifacts while transitioning between pre-OS
1896	* and driver.
1897	*/
1898	if (!adev->gmc.is_app_apu) {
1899	r = amdgpu_bo_create_kernel_at(adev, offset: `0`,
1900	size: adev->mman.stolen_vga_size,
1901	bo_ptr: &adev->mman.stolen_vga_memory,
1902	NULL);
1903	if (r)
1904	return r;
1905
1906	r = amdgpu_bo_create_kernel_at(adev, offset: adev->mman.stolen_vga_size,
1907	size: adev->mman.stolen_extended_size,
1908	bo_ptr: &adev->mman.stolen_extended_memory,
1909	NULL);
1910
1911	if (r)
1912	return r;
1913
1914	r = amdgpu_bo_create_kernel_at(adev,
1915	offset: adev->mman.stolen_reserved_offset,
1916	size: adev->mman.stolen_reserved_size,
1917	bo_ptr: &adev->mman.stolen_reserved_memory,
1918	NULL);
1919	if (r)
1920	return r;
1921	} else {
1922	DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
1923	}
1924
1925	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1926	(unsigned int)(adev->gmc.real_vram_size / (`1024` * `1024`)));
1927
1928	/ Compute GTT size, either based on TTM limit*
1929	* or whatever the user passed on module init.
1930	*/
1931	if (amdgpu_gtt_size == -`1`)
1932	gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
1933	else
1934	gtt_size = (uint64_t)amdgpu_gtt_size << `20`;
1935
1936	/ Initialize GTT memory pool /
1937	r = amdgpu_gtt_mgr_init(adev, gtt_size);
1938	if (r) {
1939	DRM_ERROR("Failed initializing GTT heap.\n");
1940	return r;
1941	}
1942	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1943	(unsigned int)(gtt_size / (`1024` * `1024`)));
1944
1945	/ Initiailize doorbell pool on PCI BAR /
1946	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, size_in_page: adev->doorbell.size / PAGE_SIZE);
1947	if (r) {
1948	DRM_ERROR("Failed initializing doorbell heap.\n");
1949	return r;
1950	}
1951
1952	/ Create a boorbell page for kernel usages /
1953	r = amdgpu_doorbell_create_kernel_doorbells(adev);
1954	if (r) {
1955	DRM_ERROR("Failed to initialize kernel doorbells.\n");
1956	return r;
1957	}
1958
1959	/ Initialize preemptible memory pool /
1960	r = amdgpu_preempt_mgr_init(adev);
1961	if (r) {
1962	DRM_ERROR("Failed initializing PREEMPT heap.\n");
1963	return r;
1964	}
1965
1966	/ Initialize various on-chip memory pools /
1967	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, size_in_page: adev->gds.gds_size);
1968	if (r) {
1969	DRM_ERROR("Failed initializing GDS heap.\n");
1970	return r;
1971	}
1972
1973	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, size_in_page: adev->gds.gws_size);
1974	if (r) {
1975	DRM_ERROR("Failed initializing gws heap.\n");
1976	return r;
1977	}
1978
1979	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, size_in_page: adev->gds.oa_size);
1980	if (r) {
1981	DRM_ERROR("Failed initializing oa heap.\n");
1982	return r;
1983	}
1984	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
1985	AMDGPU_GEM_DOMAIN_GTT,
1986	bo_ptr: &adev->mman.sdma_access_bo, NULL,
1987	cpu_addr: &adev->mman.sdma_access_ptr))
1988	DRM_WARN("Debug VRAM access will use slowpath MM access\n");
1989
1990	return `0`;
1991	}
1992
1993	/*
1994	* amdgpu_ttm_fini - De-initialize the TTM memory pools
1995	*/
1996	void amdgpu_ttm_fini(struct amdgpu_device *adev)
1997	{
1998	int idx;
1999
2000	if (!adev->mman.initialized)
2001	return;
2002
2003	amdgpu_ttm_pools_fini(adev);
2004
2005	amdgpu_ttm_training_reserve_vram_fini(adev);
2006	/ return the stolen vga memory back to VRAM /
2007	if (!adev->gmc.is_app_apu) {
2008	amdgpu_bo_free_kernel(bo: &adev->mman.stolen_vga_memory, NULL, NULL);
2009	amdgpu_bo_free_kernel(bo: &adev->mman.stolen_extended_memory, NULL, NULL);
2010	/ return the FW reserved memory back to VRAM /
2011	amdgpu_bo_free_kernel(bo: &adev->mman.fw_reserved_memory, NULL,
2012	NULL);
2013	if (adev->mman.stolen_reserved_size)
2014	amdgpu_bo_free_kernel(bo: &adev->mman.stolen_reserved_memory,
2015	NULL, NULL);
2016	}
2017	amdgpu_bo_free_kernel(bo: &adev->mman.sdma_access_bo, NULL,
2018	cpu_addr: &adev->mman.sdma_access_ptr);
2019	amdgpu_ttm_fw_reserve_vram_fini(adev);
2020	amdgpu_ttm_drv_reserve_vram_fini(adev);
2021
2022	if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) {
2023
2024	if (adev->mman.aper_base_kaddr)
2025	iounmap(addr: adev->mman.aper_base_kaddr);
2026	adev->mman.aper_base_kaddr = NULL;
2027
2028	drm_dev_exit(idx);
2029	}
2030
2031	amdgpu_vram_mgr_fini(adev);
2032	amdgpu_gtt_mgr_fini(adev);
2033	amdgpu_preempt_mgr_fini(adev);
2034	ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_GDS);
2035	ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_GWS);
2036	ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_OA);
2037	ttm_device_fini(bdev: &adev->mman.bdev);
2038	adev->mman.initialized = false;
2039	DRM_INFO("amdgpu: ttm finalized\n");
2040	}
2041
2042	/**
2043	* amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
2044	*
2045	* @adev: amdgpu_device pointer
2046	* @enable: true when we can use buffer functions.
2047	*
2048	* Enable/disable use of buffer functions during suspend/resume. This should
2049	* only be called at bootup or when userspace isn't running.
2050	*/
2051	void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
2052	{
2053	struct ttm_resource_manager *man = ttm_manager_type(bdev: &adev->mman.bdev, TTM_PL_VRAM);
2054	uint64_t size;
2055	int r;
2056
2057	if (!adev->mman.initialized \|\| amdgpu_in_reset(adev) \|\|
2058	adev->mman.buffer_funcs_enabled == enable \|\| adev->gmc.is_app_apu)
2059	return;
2060
2061	if (enable) {
2062	struct amdgpu_ring *ring;
2063	struct drm_gpu_scheduler *sched;
2064
2065	ring = adev->mman.buffer_funcs_ring;
2066	sched = &ring->sched;
2067	r = drm_sched_entity_init(entity: &adev->mman.high_pr,
2068	priority: DRM_SCHED_PRIORITY_KERNEL, sched_list: &sched,
2069	num_sched_list: `1`, NULL);
2070	if (r) {
2071	DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
2072	r);
2073	return;
2074	}
2075
2076	r = drm_sched_entity_init(entity: &adev->mman.low_pr,
2077	priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched,
2078	num_sched_list: `1`, NULL);
2079	if (r) {
2080	DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
2081	r);
2082	goto error_free_entity;
2083	}
2084	} else {
2085	drm_sched_entity_destroy(entity: &adev->mman.high_pr);
2086	drm_sched_entity_destroy(entity: &adev->mman.low_pr);
2087	dma_fence_put(fence: man->move);
2088	man->move = NULL;
2089	}
2090
2091	/ this just adjusts TTM size idea, which sets lpfn to the correct value /
2092	if (enable)
2093	size = adev->gmc.real_vram_size;
2094	else
2095	size = adev->gmc.visible_vram_size;
2096	man->size = size;
2097	adev->mman.buffer_funcs_enabled = enable;
2098
2099	return;
2100
2101	error_free_entity:
2102	drm_sched_entity_destroy(entity: &adev->mman.high_pr);
2103	}
2104
2105	static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
2106	bool direct_submit,
2107	unsigned int num_dw,
2108	struct dma_resv *resv,
2109	bool vm_needs_flush,
2110	struct amdgpu_job **job,
2111	bool delayed)
2112	{
2113	enum amdgpu_ib_pool_type pool = direct_submit ?
2114	AMDGPU_IB_POOL_DIRECT :
2115	AMDGPU_IB_POOL_DELAYED;
2116	int r;
2117	struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
2118	&adev->mman.high_pr;
2119	r = amdgpu_job_alloc_with_ib(adev, entity,
2120	AMDGPU_FENCE_OWNER_UNDEFINED,
2121	size: num_dw * `4`, pool_type: pool, job);
2122	if (r)
2123	return r;
2124
2125	if (vm_needs_flush) {
2126	(*job)->vm_pd_addr = amdgpu_gmc_pd_addr(bo: adev->gmc.pdb0_bo ?
2127	adev->gmc.pdb0_bo :
2128	adev->gart.bo);
2129	(*job)->vm_needs_flush = true;
2130	}
2131	if (!resv)
2132	return `0`;
2133
2134	return drm_sched_job_add_resv_dependencies(job: &(*job)->base, resv,
2135	usage: DMA_RESV_USAGE_BOOKKEEP);
2136	}
2137
2138	int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
2139	uint64_t dst_offset, uint32_t byte_count,
2140	struct dma_resv *resv,
2141	struct dma_fence **fence, bool direct_submit,
2142	bool vm_needs_flush, bool tmz)
2143	{
2144	struct amdgpu_device *adev = ring->adev;
2145	unsigned int num_loops, num_dw;
2146	struct amdgpu_job *job;
2147	uint32_t max_bytes;
2148	unsigned int i;
2149	int r;
2150
2151	if (!direct_submit && !ring->sched.ready) {
2152	DRM_ERROR("Trying to move memory with ring turned off.\n");
2153	return -EINVAL;
2154	}
2155
2156	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
2157	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
2158	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, `8`);
2159	r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
2160	resv, vm_needs_flush, job: &job, delayed: false);
2161	if (r)
2162	return r;
2163
2164	for (i = `0`; i < num_loops; i++) {
2165	uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
2166
2167	amdgpu_emit_copy_buffer(adev, &job->ibs[`0`], src_offset,
2168	dst_offset, cur_size_in_bytes, tmz);
2169
2170	src_offset += cur_size_in_bytes;
2171	dst_offset += cur_size_in_bytes;
2172	byte_count -= cur_size_in_bytes;
2173	}
2174
2175	amdgpu_ring_pad_ib(ring, &job->ibs[`0`]);
2176	WARN_ON(job->ibs[`0`].length_dw > num_dw);
2177	if (direct_submit)
2178	r = amdgpu_job_submit_direct(job, ring, fence);
2179	else
2180	*fence = amdgpu_job_submit(job);
2181	if (r)
2182	goto error_free;
2183
2184	return r;
2185
2186	error_free:
2187	amdgpu_job_free(job);
2188	DRM_ERROR("Error scheduling IBs (%d)\n", r);
2189	return r;
2190	}
2191
2192	static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
2193	uint64_t dst_addr, uint32_t byte_count,
2194	struct dma_resv *resv,
2195	struct dma_fence **fence,
2196	bool vm_needs_flush, bool delayed)
2197	{
2198	struct amdgpu_device *adev = ring->adev;
2199	unsigned int num_loops, num_dw;
2200	struct amdgpu_job *job;
2201	uint32_t max_bytes;
2202	unsigned int i;
2203	int r;
2204
2205	max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
2206	num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
2207	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, `8`);
2208	r = amdgpu_ttm_prepare_job(adev, direct_submit: false, num_dw, resv, vm_needs_flush,
2209	job: &job, delayed);
2210	if (r)
2211	return r;
2212
2213	for (i = `0`; i < num_loops; i++) {
2214	uint32_t cur_size = min(byte_count, max_bytes);
2215
2216	amdgpu_emit_fill_buffer(adev, &job->ibs[`0`], src_data, dst_addr,
2217	cur_size);
2218
2219	dst_addr += cur_size;
2220	byte_count -= cur_size;
2221	}
2222
2223	amdgpu_ring_pad_ib(ring, &job->ibs[`0`]);
2224	WARN_ON(job->ibs[`0`].length_dw > num_dw);
2225	*fence = amdgpu_job_submit(job);
2226	return `0`;
2227	}
2228
2229	int amdgpu_fill_buffer(struct amdgpu_bo *bo,
2230	uint32_t src_data,
2231	struct dma_resv *resv,
2232	struct dma_fence **f,
2233	bool delayed)
2234	{
2235	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev);
2236	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2237	struct dma_fence *fence = NULL;
2238	struct amdgpu_res_cursor dst;
2239	int r;
2240
2241	if (!adev->mman.buffer_funcs_enabled) {
2242	DRM_ERROR("Trying to clear memory with ring turned off.\n");
2243	return -EINVAL;
2244	}
2245
2246	amdgpu_res_first(res: bo->tbo.resource, start: `0`, size: amdgpu_bo_size(bo), cur: &dst);
2247
2248	mutex_lock(&adev->mman.gtt_window_lock);
2249	while (dst.remaining) {
2250	struct dma_fence *next;
2251	uint64_t cur_size, to;
2252
2253	/ Never fill more than 256MiB at once to avoid timeouts /
2254	cur_size = min(dst.size, `256ULL` << `20`);
2255
2256	r = amdgpu_ttm_map_buffer(bo: &bo->tbo, mem: bo->tbo.resource, mm_cur: &dst,
2257	window: `1`, ring, tmz: false, size: &cur_size, addr: &to);
2258	if (r)
2259	goto error;
2260
2261	r = amdgpu_ttm_fill_mem(ring, src_data, dst_addr: to, byte_count: cur_size, resv,
2262	fence: &next, vm_needs_flush: true, delayed);
2263	if (r)
2264	goto error;
2265
2266	dma_fence_put(fence);
2267	fence = next;
2268
2269	amdgpu_res_next(cur: &dst, size: cur_size);
2270	}
2271	error:
2272	mutex_unlock(lock: &adev->mman.gtt_window_lock);
2273	if (f)
2274	*f = dma_fence_get(fence);
2275	dma_fence_put(fence);
2276	return r;
2277	}
2278
2279	/**
2280	* amdgpu_ttm_evict_resources - evict memory buffers
2281	* @adev: amdgpu device object
2282	* @mem_type: evicted BO's memory type
2283	*
2284	* Evicts all @mem_type buffers on the lru list of the memory type.
2285	*
2286	* Returns:
2287	* 0 for success or a negative error code on failure.
2288	*/
2289	int amdgpu_ttm_evict_resources(struct amdgpu_device adev, int* mem_type)
2290	{
2291	struct ttm_resource_manager *man;
2292
2293	switch (mem_type) {
2294	case TTM_PL_VRAM:
2295	case TTM_PL_TT:
2296	case AMDGPU_PL_GWS:
2297	case AMDGPU_PL_GDS:
2298	case AMDGPU_PL_OA:
2299	man = ttm_manager_type(bdev: &adev->mman.bdev, mem_type);
2300	break;
2301	default:
2302	DRM_ERROR("Trying to evict invalid memory type\n");
2303	return -EINVAL;
2304	}
2305
2306	return ttm_resource_manager_evict_all(bdev: &adev->mman.bdev, man);
2307	}
2308
2309	#if defined(CONFIG_DEBUG_FS)
2310
2311	static int amdgpu_ttm_page_pool_show(struct seq_file m, void* *unused)
2312	{
2313	struct amdgpu_device *adev = m->private;
2314
2315	return ttm_pool_debugfs(pool: &adev->mman.bdev.pool, m);
2316	}
2317
2318	DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
2319
2320	/*
2321	* amdgpu_ttm_vram_read - Linear read access to VRAM
2322	*
2323	* Accesses VRAM via MMIO for debugging purposes.
2324	*/
2325	static ssize_t amdgpu_ttm_vram_read(struct file f, char* __user *buf,
2326	size_t size, loff_t *pos)
2327	{
2328	struct amdgpu_device *adev = file_inode(f)->i_private;
2329	ssize_t result = `0`;
2330
2331	if (size & `0x3` \|\| *pos & `0x3`)
2332	return -EINVAL;
2333
2334	if (*pos >= adev->gmc.mc_vram_size)
2335	return -ENXIO;
2336
2337	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
2338	while (size) {
2339	size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * `4`);
2340	uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
2341
2342	amdgpu_device_vram_access(adev, pos: *pos, buf: value, size: bytes, write: false);
2343	if (copy_to_user(to: buf, from: value, n: bytes))
2344	return -EFAULT;
2345
2346	result += bytes;
2347	buf += bytes;
2348	*pos += bytes;
2349	size -= bytes;
2350	}
2351
2352	return result;
2353	}
2354
2355	/*
2356	* amdgpu_ttm_vram_write - Linear write access to VRAM
2357	*
2358	* Accesses VRAM via MMIO for debugging purposes.
2359	*/
2360	static ssize_t amdgpu_ttm_vram_write(struct file f, const* char __user *buf,
2361	size_t size, loff_t *pos)
2362	{
2363	struct amdgpu_device *adev = file_inode(f)->i_private;
2364	ssize_t result = `0`;
2365	int r;
2366
2367	if (size & `0x3` \|\| *pos & `0x3`)
2368	return -EINVAL;
2369
2370	if (*pos >= adev->gmc.mc_vram_size)
2371	return -ENXIO;
2372
2373	while (size) {
2374	uint32_t value;
2375
2376	if (*pos >= adev->gmc.mc_vram_size)
2377	return result;
2378
2379	r = get_user(value, (uint32_t *)buf);
2380	if (r)
2381	return r;
2382
2383	amdgpu_device_mm_access(adev, pos: *pos, buf: &value, size: `4`, write: true);
2384
2385	result += `4`;
2386	buf += `4`;
2387	*pos += `4`;
2388	size -= `4`;
2389	}
2390
2391	return result;
2392	}
2393
2394	static const struct file_operations amdgpu_ttm_vram_fops = {
2395	.owner = THIS_MODULE,
2396	.read = amdgpu_ttm_vram_read,
2397	.write = amdgpu_ttm_vram_write,
2398	.llseek = default_llseek,
2399	};
2400
2401	/*
2402	* amdgpu_iomem_read - Virtual read access to GPU mapped memory
2403	*
2404	* This function is used to read memory that has been mapped to the
2405	* GPU and the known addresses are not physical addresses but instead
2406	* bus addresses (e.g., what you'd put in an IB or ring buffer).
2407	*/
2408	static ssize_t amdgpu_iomem_read(struct file f, char* __user *buf,
2409	size_t size, loff_t *pos)
2410	{
2411	struct amdgpu_device *adev = file_inode(f)->i_private;
2412	struct iommu_domain *dom;
2413	ssize_t result = `0`;
2414	int r;
2415
2416	/ retrieve the IOMMU domain if any for this device /
2417	dom = iommu_get_domain_for_dev(dev: adev->dev);
2418
2419	while (size) {
2420	phys_addr_t addr = *pos & PAGE_MASK;
2421	loff_t off = *pos & ~PAGE_MASK;
2422	size_t bytes = PAGE_SIZE - off;
2423	unsigned long pfn;
2424	struct page *p;
2425	void *ptr;
2426
2427	bytes = min(bytes, size);
2428
2429	/ Translate the bus address to a physical address. If*
2430	* the domain is NULL it means there is no IOMMU active
2431	* and the address translation is the identity
2432	*/
2433	addr = dom ? iommu_iova_to_phys(domain: dom, iova: addr) : addr;
2434
2435	pfn = addr >> PAGE_SHIFT;
2436	if (!pfn_valid(pfn))
2437	return -EPERM;
2438
2439	p = pfn_to_page(pfn);
2440	if (p->mapping != adev->mman.bdev.dev_mapping)
2441	return -EPERM;
2442
2443	ptr = kmap_local_page(page: p);
2444	r = copy_to_user(to: buf, from: ptr + off, n: bytes);
2445	kunmap_local(ptr);
2446	if (r)
2447	return -EFAULT;
2448
2449	size -= bytes;
2450	*pos += bytes;
2451	result += bytes;
2452	}
2453
2454	return result;
2455	}
2456
2457	/*
2458	* amdgpu_iomem_write - Virtual write access to GPU mapped memory
2459	*
2460	* This function is used to write memory that has been mapped to the
2461	* GPU and the known addresses are not physical addresses but instead
2462	* bus addresses (e.g., what you'd put in an IB or ring buffer).
2463	*/
2464	static ssize_t amdgpu_iomem_write(struct file f, const* char __user *buf,
2465	size_t size, loff_t *pos)
2466	{
2467	struct amdgpu_device *adev = file_inode(f)->i_private;
2468	struct iommu_domain *dom;
2469	ssize_t result = `0`;
2470	int r;
2471
2472	dom = iommu_get_domain_for_dev(dev: adev->dev);
2473
2474	while (size) {
2475	phys_addr_t addr = *pos & PAGE_MASK;
2476	loff_t off = *pos & ~PAGE_MASK;
2477	size_t bytes = PAGE_SIZE - off;
2478	unsigned long pfn;
2479	struct page *p;
2480	void *ptr;
2481
2482	bytes = min(bytes, size);
2483
2484	addr = dom ? iommu_iova_to_phys(domain: dom, iova: addr) : addr;
2485
2486	pfn = addr >> PAGE_SHIFT;
2487	if (!pfn_valid(pfn))
2488	return -EPERM;
2489
2490	p = pfn_to_page(pfn);
2491	if (p->mapping != adev->mman.bdev.dev_mapping)
2492	return -EPERM;
2493
2494	ptr = kmap_local_page(page: p);
2495	r = copy_from_user(to: ptr + off, from: buf, n: bytes);
2496	kunmap_local(ptr);
2497	if (r)
2498	return -EFAULT;
2499
2500	size -= bytes;
2501	*pos += bytes;
2502	result += bytes;
2503	}
2504
2505	return result;
2506	}
2507
2508	static const struct file_operations amdgpu_ttm_iomem_fops = {
2509	.owner = THIS_MODULE,
2510	.read = amdgpu_iomem_read,
2511	.write = amdgpu_iomem_write,
2512	.llseek = default_llseek
2513	};
2514
2515	#endif
2516
2517	void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2518	{
2519	#if defined(CONFIG_DEBUG_FS)
2520	struct drm_minor *minor = adev_to_drm(adev)->primary;
2521	struct dentry *root = minor->debugfs_root;
2522
2523	debugfs_create_file_size(name: "amdgpu_vram", mode: `0444`, parent: root, data: adev,
2524	fops: &amdgpu_ttm_vram_fops, file_size: adev->gmc.mc_vram_size);
2525	debugfs_create_file(name: "amdgpu_iomem", mode: `0444`, parent: root, data: adev,
2526	fops: &amdgpu_ttm_iomem_fops);
2527	debugfs_create_file(name: "ttm_page_pool", mode: `0444`, parent: root, data: adev,
2528	fops: &amdgpu_ttm_page_pool_fops);
2529	ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev,
2530	TTM_PL_VRAM),
2531	parent: root, name: "amdgpu_vram_mm");
2532	ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev,
2533	TTM_PL_TT),
2534	parent: root, name: "amdgpu_gtt_mm");
2535	ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev,
2536	AMDGPU_PL_GDS),
2537	parent: root, name: "amdgpu_gds_mm");
2538	ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev,
2539	AMDGPU_PL_GWS),
2540	parent: root, name: "amdgpu_gws_mm");
2541	ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev,
2542	AMDGPU_PL_OA),
2543	parent: root, name: "amdgpu_oa_mm");
2544
2545	#endif
2546	}
2547

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c