amdgpu_gart.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c]

1	/*
2	* Copyright 2008 Advanced Micro Devices, Inc.
3	* Copyright 2008 Red Hat Inc.
4	* Copyright 2009 Jerome Glisse.
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),
8	* to deal in the Software without restriction, including without limitation
9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10	* and/or sell copies of the Software, and to permit persons to whom the
11	* Software is furnished to do so, subject to the following conditions:
12	*
13	* The above copyright notice and this permission notice shall be included in
14	* all copies or substantial portions of the Software.
15	*
16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22	* OTHER DEALINGS IN THE SOFTWARE.
23	*
24	* Authors: Dave Airlie
25	* Alex Deucher
26	* Jerome Glisse
27	*/
28
29	#include <linux/pci.h>
30	#include <linux/vmalloc.h>
31
32	#include <drm/amdgpu_drm.h>
33	#ifdef CONFIG_X86
34	#include <asm/set_memory.h>
35	#endif
36	#include "amdgpu.h"
37	#include <drm/drm_drv.h>
38	#include <drm/ttm/ttm_tt.h>
39
40	/*
41	* GART
42	* The GART (Graphics Aperture Remapping Table) is an aperture
43	* in the GPU's address space. System pages can be mapped into
44	* the aperture and look like contiguous pages from the GPU's
45	* perspective. A page table maps the pages in the aperture
46	* to the actual backing pages in system memory.
47	*
48	* Radeon GPUs support both an internal GART, as described above,
49	* and AGP. AGP works similarly, but the GART table is configured
50	* and maintained by the northbridge rather than the driver.
51	* Radeon hw has a separate AGP aperture that is programmed to
52	* point to the AGP aperture provided by the northbridge and the
53	* requests are passed through to the northbridge aperture.
54	* Both AGP and internal GART can be used at the same time, however
55	* that is not currently supported by the driver.
56	*
57	* This file handles the common internal GART management.
58	*/
59
60	/*
61	* Common GART table functions.
62	*/
63
64	/**
65	* amdgpu_gart_dummy_page_init - init dummy page used by the driver
66	*
67	* @adev: amdgpu_device pointer
68	*
69	* Allocate the dummy page used by the driver (all asics).
70	* This dummy page is used by the driver as a filler for gart entries
71	* when pages are taken out of the GART
72	* Returns 0 on sucess, -ENOMEM on failure.
73	*/
74	static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
75	{
76	struct page *dummy_page = ttm_glob.dummy_read_page;
77
78	if (adev->dummy_page_addr)
79	return `0`;
80	adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, `0`,
81	PAGE_SIZE, DMA_BIDIRECTIONAL);
82	if (dma_mapping_error(dev: &adev->pdev->dev, dma_addr: adev->dummy_page_addr)) {
83	dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
84	adev->dummy_page_addr = `0`;
85	return -ENOMEM;
86	}
87	return `0`;
88	}
89
90	/**
91	* amdgpu_gart_dummy_page_fini - free dummy page used by the driver
92	*
93	* @adev: amdgpu_device pointer
94	*
95	* Frees the dummy page used by the driver (all asics).
96	*/
97	void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
98	{
99	if (!adev->dummy_page_addr)
100	return;
101	dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
102	DMA_BIDIRECTIONAL);
103	adev->dummy_page_addr = `0`;
104	}
105
106	/**
107	* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
108	*
109	* @adev: amdgpu_device pointer
110	*
111	* Allocate system memory for GART page table for ASICs that don't have
112	* dedicated VRAM.
113	* Returns 0 for success, error for failure.
114	*/
115	int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
116	{
117	unsigned int order = get_order(size: adev->gart.table_size);
118	gfp_t gfp_flags = GFP_KERNEL \| __GFP_ZERO;
119	struct amdgpu_bo *bo = NULL;
120	struct sg_table *sg = NULL;
121	struct amdgpu_bo_param bp;
122	dma_addr_t dma_addr;
123	struct page *p;
124	unsigned long x;
125	int ret;
126
127	if (adev->gart.bo != NULL)
128	return `0`;
129
130	p = alloc_pages(gfp: gfp_flags, order);
131	if (!p)
132	return -ENOMEM;
133
134	/ assign pages to this device /
135	for (x = `0`; x < (`1UL` << order); x++)
136	p[x].mapping = adev->mman.bdev.dev_mapping;
137
138	/ If the hardware does not support UTCL2 snooping of the CPU caches*
139	* then set_memory_wc() could be used as a workaround to mark the pages
140	* as write combine memory.
141	*/
142	dma_addr = dma_map_page(&adev->pdev->dev, p, `0`, adev->gart.table_size,
143	DMA_BIDIRECTIONAL);
144	if (dma_mapping_error(dev: &adev->pdev->dev, dma_addr)) {
145	dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
146	__free_pages(page: p, order);
147	p = NULL;
148	return -EFAULT;
149	}
150
151	dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
152	/ Create SG table /
153	sg = kmalloc(size: sizeof(*sg), GFP_KERNEL);
154	if (!sg) {
155	ret = -ENOMEM;
156	goto error;
157	}
158	ret = sg_alloc_table(sg, `1`, GFP_KERNEL);
159	if (ret)
160	goto error;
161
162	sg_dma_address(sg->sgl) = dma_addr;
163	sg->sgl->length = adev->gart.table_size;
164	#ifdef CONFIG_NEED_SG_DMA_LENGTH
165	sg->sgl->dma_length = adev->gart.table_size;
166	#endif
167	/ Create SG BO /
168	memset(&bp, `0`, sizeof(bp));
169	bp.size = adev->gart.table_size;
170	bp.byte_align = PAGE_SIZE;
171	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
172	bp.type = ttm_bo_type_sg;
173	bp.resv = NULL;
174	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
175	bp.flags = `0`;
176	ret = amdgpu_bo_create(adev, bp: &bp, bo_ptr: &bo);
177	if (ret)
178	goto error;
179
180	bo->tbo.sg = sg;
181	bo->tbo.ttm->sg = sg;
182	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
183	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
184
185	ret = amdgpu_bo_reserve(bo, no_intr: true);
186	if (ret) {
187	dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
188	goto error;
189	}
190
191	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
192	WARN(ret, "Pinning the GART table failed");
193	if (ret)
194	goto error_resv;
195
196	adev->gart.bo = bo;
197	adev->gart.ptr = page_to_virt(p);
198	/ Make GART table accessible in VMID0 /
199	ret = amdgpu_ttm_alloc_gart(bo: &adev->gart.bo->tbo);
200	if (ret)
201	amdgpu_gart_table_ram_free(adev);
202	amdgpu_bo_unreserve(bo);
203
204	return `0`;
205
206	error_resv:
207	amdgpu_bo_unreserve(bo);
208	error:
209	amdgpu_bo_unref(bo: &bo);
210	if (sg) {
211	sg_free_table(sg);
212	kfree(objp: sg);
213	}
214	__free_pages(page: p, order);
215	return ret;
216	}
217
218	/**
219	* amdgpu_gart_table_ram_free - free gart page table system ram
220	*
221	* @adev: amdgpu_device pointer
222	*
223	* Free the system memory used for the GART page tableon ASICs that don't
224	* have dedicated VRAM.
225	*/
226	void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
227	{
228	unsigned int order = get_order(size: adev->gart.table_size);
229	struct sg_table *sg = adev->gart.bo->tbo.sg;
230	struct page *p;
231	unsigned long x;
232	int ret;
233
234	ret = amdgpu_bo_reserve(bo: adev->gart.bo, no_intr: false);
235	if (!ret) {
236	amdgpu_bo_unpin(bo: adev->gart.bo);
237	amdgpu_bo_unreserve(bo: adev->gart.bo);
238	}
239	amdgpu_bo_unref(bo: &adev->gart.bo);
240	sg_free_table(sg);
241	kfree(objp: sg);
242	p = virt_to_page(adev->gart.ptr);
243	for (x = `0`; x < (`1UL` << order); x++)
244	p[x].mapping = NULL;
245	__free_pages(page: p, order);
246
247	adev->gart.ptr = NULL;
248	}
249
250	/**
251	* amdgpu_gart_table_vram_alloc - allocate vram for gart page table
252	*
253	* @adev: amdgpu_device pointer
254	*
255	* Allocate video memory for GART page table
256	* (pcie r4xx, r5xx+). These asics require the
257	* gart table to be in video memory.
258	* Returns 0 for success, error for failure.
259	*/
260	int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
261	{
262	if (adev->gart.bo != NULL)
263	return `0`;
264
265	return amdgpu_bo_create_kernel(adev, size: adev->gart.table_size, PAGE_SIZE,
266	AMDGPU_GEM_DOMAIN_VRAM, bo_ptr: &adev->gart.bo,
267	NULL, cpu_addr: (void *)&adev->gart.ptr);
268	}
269
270	/**
271	* amdgpu_gart_table_vram_free - free gart page table vram
272	*
273	* @adev: amdgpu_device pointer
274	*
275	* Free the video memory used for the GART page table
276	* (pcie r4xx, r5xx+). These asics require the gart table to
277	* be in video memory.
278	*/
279	void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
280	{
281	amdgpu_bo_free_kernel(bo: &adev->gart.bo, NULL, cpu_addr: (void *)&adev->gart.ptr);
282	}
283
284	/*
285	* Common gart functions.
286	*/
287	/**
288	* amdgpu_gart_unbind - unbind pages from the gart page table
289	*
290	* @adev: amdgpu_device pointer
291	* @offset: offset into the GPU's gart aperture
292	* @pages: number of pages to unbind
293	*
294	* Unbinds the requested pages from the gart page table and
295	* replaces them with the dummy page (all asics).
296	* Returns 0 for success, -EINVAL for failure.
297	*/
298	void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
299	int pages)
300	{
301	unsigned t;
302	unsigned p;
303	int i, j;
304	u64 page_base;
305	/ Starting from VEGA10, system bit must be 0 to mean invalid. /
306	uint64_t flags = `0`;
307	int idx;
308
309	if (!adev->gart.ptr)
310	return;
311
312	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
313	return;
314
315	t = offset / AMDGPU_GPU_PAGE_SIZE;
316	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
317	for (i = `0`; i < pages; i++, p++) {
318	page_base = adev->dummy_page_addr;
319	if (!adev->gart.ptr)
320	continue;
321
322	for (j = `0`; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
323	amdgpu_gmc_set_pte_pde(adev, cpu_pt_addr: adev->gart.ptr,
324	gpu_page_idx: t, addr: page_base, flags);
325	page_base += AMDGPU_GPU_PAGE_SIZE;
326	}
327	}
328	mb();
329	amdgpu_device_flush_hdp(adev, NULL);
330	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
331	amdgpu_gmc_flush_gpu_tlb(adev, vmid: `0`, vmhub: i, flush_type: `0`);
332
333	drm_dev_exit(idx);
334	}
335
336	/**
337	* amdgpu_gart_map - map dma_addresses into GART entries
338	*
339	* @adev: amdgpu_device pointer
340	* @offset: offset into the GPU's gart aperture
341	* @pages: number of pages to bind
342	* @dma_addr: DMA addresses of pages
343	* @flags: page table entry flags
344	* @dst: CPU address of the gart table
345	*
346	* Map the dma_addresses into GART entries (all asics).
347	* Returns 0 for success, -EINVAL for failure.
348	*/
349	void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
350	int pages, dma_addr_t *dma_addr, uint64_t flags,
351	void *dst)
352	{
353	uint64_t page_base;
354	unsigned i, j, t;
355	int idx;
356
357	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
358	return;
359
360	t = offset / AMDGPU_GPU_PAGE_SIZE;
361
362	for (i = `0`; i < pages; i++) {
363	page_base = dma_addr[i];
364	for (j = `0`; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
365	amdgpu_gmc_set_pte_pde(adev, cpu_pt_addr: dst, gpu_page_idx: t, addr: page_base, flags);
366	page_base += AMDGPU_GPU_PAGE_SIZE;
367	}
368	}
369	drm_dev_exit(idx);
370	}
371
372	/**
373	* amdgpu_gart_bind - bind pages into the gart page table
374	*
375	* @adev: amdgpu_device pointer
376	* @offset: offset into the GPU's gart aperture
377	* @pages: number of pages to bind
378	* @dma_addr: DMA addresses of pages
379	* @flags: page table entry flags
380	*
381	* Binds the requested pages to the gart page table
382	* (all asics).
383	* Returns 0 for success, -EINVAL for failure.
384	*/
385	void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
386	int pages, dma_addr_t *dma_addr,
387	uint64_t flags)
388	{
389	if (!adev->gart.ptr)
390	return;
391
392	amdgpu_gart_map(adev, offset, pages, dma_addr, flags, dst: adev->gart.ptr);
393	}
394
395	/**
396	* amdgpu_gart_invalidate_tlb - invalidate gart TLB
397	*
398	* @adev: amdgpu device driver pointer
399	*
400	* Invalidate gart TLB which can be use as a way to flush gart changes
401	*
402	*/
403	void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
404	{
405	int i;
406
407	if (!adev->gart.ptr)
408	return;
409
410	mb();
411	amdgpu_device_flush_hdp(adev, NULL);
412	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
413	amdgpu_gmc_flush_gpu_tlb(adev, vmid: `0`, vmhub: i, flush_type: `0`);
414	}
415
416	/**
417	* amdgpu_gart_init - init the driver info for managing the gart
418	*
419	* @adev: amdgpu_device pointer
420	*
421	* Allocate the dummy page and init the gart driver info (all asics).
422	* Returns 0 for success, error for failure.
423	*/
424	int amdgpu_gart_init(struct amdgpu_device *adev)
425	{
426	int r;
427
428	if (adev->dummy_page_addr)
429	return `0`;
430
431	/ We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE /
432	if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
433	DRM_ERROR("Page size is smaller than GPU page size!\n");
434	return -EINVAL;
435	}
436	r = amdgpu_gart_dummy_page_init(adev);
437	if (r)
438	return r;
439	/ Compute table size /
440	adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
441	adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
442	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
443	adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
444
445	return `0`;
446	}
447

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c