1 | /* |
2 | * Copyright 2018 Advanced Micro Devices, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | |
27 | #include <linux/io-64-nonatomic-lo-hi.h> |
28 | #ifdef CONFIG_X86 |
29 | #include <asm/hypervisor.h> |
30 | #endif |
31 | |
32 | #include "amdgpu.h" |
33 | #include "amdgpu_gmc.h" |
34 | #include "amdgpu_ras.h" |
35 | #include "amdgpu_reset.h" |
36 | #include "amdgpu_xgmi.h" |
37 | |
38 | #include <drm/drm_drv.h> |
39 | #include <drm/ttm/ttm_tt.h> |
40 | |
41 | /** |
42 | * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 |
43 | * |
44 | * @adev: amdgpu_device pointer |
45 | * |
46 | * Allocate video memory for pdb0 and map it for CPU access |
47 | * Returns 0 for success, error for failure. |
48 | */ |
49 | int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev) |
50 | { |
51 | int r; |
52 | struct amdgpu_bo_param bp; |
53 | u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; |
54 | uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21; |
55 | uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift; |
56 | |
57 | memset(&bp, 0, sizeof(bp)); |
58 | bp.size = PAGE_ALIGN((npdes + 1) * 8); |
59 | bp.byte_align = PAGE_SIZE; |
60 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; |
61 | bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
62 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
63 | bp.type = ttm_bo_type_kernel; |
64 | bp.resv = NULL; |
65 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); |
66 | |
67 | r = amdgpu_bo_create(adev, bp: &bp, bo_ptr: &adev->gmc.pdb0_bo); |
68 | if (r) |
69 | return r; |
70 | |
71 | r = amdgpu_bo_reserve(bo: adev->gmc.pdb0_bo, no_intr: false); |
72 | if (unlikely(r != 0)) |
73 | goto bo_reserve_failure; |
74 | |
75 | r = amdgpu_bo_pin(bo: adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM); |
76 | if (r) |
77 | goto bo_pin_failure; |
78 | r = amdgpu_bo_kmap(bo: adev->gmc.pdb0_bo, ptr: &adev->gmc.ptr_pdb0); |
79 | if (r) |
80 | goto bo_kmap_failure; |
81 | |
82 | amdgpu_bo_unreserve(bo: adev->gmc.pdb0_bo); |
83 | return 0; |
84 | |
85 | bo_kmap_failure: |
86 | amdgpu_bo_unpin(bo: adev->gmc.pdb0_bo); |
87 | bo_pin_failure: |
88 | amdgpu_bo_unreserve(bo: adev->gmc.pdb0_bo); |
89 | bo_reserve_failure: |
90 | amdgpu_bo_unref(bo: &adev->gmc.pdb0_bo); |
91 | return r; |
92 | } |
93 | |
94 | /** |
95 | * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO |
96 | * |
97 | * @bo: the BO to get the PDE for |
98 | * @level: the level in the PD hirarchy |
99 | * @addr: resulting addr |
100 | * @flags: resulting flags |
101 | * |
102 | * Get the address and flags to be used for a PDE (Page Directory Entry). |
103 | */ |
104 | void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, |
105 | uint64_t *addr, uint64_t *flags) |
106 | { |
107 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
108 | |
109 | switch (bo->tbo.resource->mem_type) { |
110 | case TTM_PL_TT: |
111 | *addr = bo->tbo.ttm->dma_address[0]; |
112 | break; |
113 | case TTM_PL_VRAM: |
114 | *addr = amdgpu_bo_gpu_offset(bo); |
115 | break; |
116 | default: |
117 | *addr = 0; |
118 | break; |
119 | } |
120 | *flags = amdgpu_ttm_tt_pde_flags(ttm: bo->tbo.ttm, mem: bo->tbo.resource); |
121 | amdgpu_gmc_get_vm_pde(adev, level, addr, flags); |
122 | } |
123 | |
124 | /* |
125 | * amdgpu_gmc_pd_addr - return the address of the root directory |
126 | */ |
127 | uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) |
128 | { |
129 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
130 | uint64_t pd_addr; |
131 | |
132 | /* TODO: move that into ASIC specific code */ |
133 | if (adev->asic_type >= CHIP_VEGA10) { |
134 | uint64_t flags = AMDGPU_PTE_VALID; |
135 | |
136 | amdgpu_gmc_get_pde_for_bo(bo, level: -1, addr: &pd_addr, flags: &flags); |
137 | pd_addr |= flags; |
138 | } else { |
139 | pd_addr = amdgpu_bo_gpu_offset(bo); |
140 | } |
141 | return pd_addr; |
142 | } |
143 | |
144 | /** |
145 | * amdgpu_gmc_set_pte_pde - update the page tables using CPU |
146 | * |
147 | * @adev: amdgpu_device pointer |
148 | * @cpu_pt_addr: cpu address of the page table |
149 | * @gpu_page_idx: entry in the page table to update |
150 | * @addr: dst addr to write into pte/pde |
151 | * @flags: access flags |
152 | * |
153 | * Update the page tables using CPU. |
154 | */ |
155 | int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, |
156 | uint32_t gpu_page_idx, uint64_t addr, |
157 | uint64_t flags) |
158 | { |
159 | void __iomem *ptr = (void *)cpu_pt_addr; |
160 | uint64_t value; |
161 | |
162 | /* |
163 | * The following is for PTE only. GART does not have PDEs. |
164 | */ |
165 | value = addr & 0x0000FFFFFFFFF000ULL; |
166 | value |= flags; |
167 | writeq(val: value, addr: ptr + (gpu_page_idx * 8)); |
168 | |
169 | return 0; |
170 | } |
171 | |
172 | /** |
173 | * amdgpu_gmc_agp_addr - return the address in the AGP address space |
174 | * |
175 | * @bo: TTM BO which needs the address, must be in GTT domain |
176 | * |
177 | * Tries to figure out how to access the BO through the AGP aperture. Returns |
178 | * AMDGPU_BO_INVALID_OFFSET if that is not possible. |
179 | */ |
180 | uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) |
181 | { |
182 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
183 | |
184 | if (!bo->ttm) |
185 | return AMDGPU_BO_INVALID_OFFSET; |
186 | |
187 | if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) |
188 | return AMDGPU_BO_INVALID_OFFSET; |
189 | |
190 | if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size) |
191 | return AMDGPU_BO_INVALID_OFFSET; |
192 | |
193 | return adev->gmc.agp_start + bo->ttm->dma_address[0]; |
194 | } |
195 | |
196 | /** |
197 | * amdgpu_gmc_vram_location - try to find VRAM location |
198 | * |
199 | * @adev: amdgpu device structure holding all necessary information |
200 | * @mc: memory controller structure holding memory information |
201 | * @base: base address at which to put VRAM |
202 | * |
203 | * Function will try to place VRAM at base address provided |
204 | * as parameter. |
205 | */ |
206 | void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, |
207 | u64 base) |
208 | { |
209 | uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20; |
210 | uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; |
211 | |
212 | mc->vram_start = base; |
213 | mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; |
214 | if (limit < mc->real_vram_size) |
215 | mc->real_vram_size = limit; |
216 | |
217 | if (vis_limit && vis_limit < mc->visible_vram_size) |
218 | mc->visible_vram_size = vis_limit; |
219 | |
220 | if (mc->real_vram_size < mc->visible_vram_size) |
221 | mc->visible_vram_size = mc->real_vram_size; |
222 | |
223 | if (mc->xgmi.num_physical_nodes == 0) { |
224 | mc->fb_start = mc->vram_start; |
225 | mc->fb_end = mc->vram_end; |
226 | } |
227 | dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n" , |
228 | mc->mc_vram_size >> 20, mc->vram_start, |
229 | mc->vram_end, mc->real_vram_size >> 20); |
230 | } |
231 | |
232 | /** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture |
233 | * |
234 | * @adev: amdgpu device structure holding all necessary information |
235 | * @mc: memory controller structure holding memory information |
236 | * |
237 | * This function is only used if use GART for FB translation. In such |
238 | * case, we use sysvm aperture (vmid0 page tables) for both vram |
239 | * and gart (aka system memory) access. |
240 | * |
241 | * GPUVM (and our organization of vmid0 page tables) require sysvm |
242 | * aperture to be placed at a location aligned with 8 times of native |
243 | * page size. For example, if vm_context0_cntl.page_table_block_size |
244 | * is 12, then native page size is 8G (2M*2^12), sysvm should start |
245 | * with a 64G aligned address. For simplicity, we just put sysvm at |
246 | * address 0. So vram start at address 0 and gart is right after vram. |
247 | */ |
248 | void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) |
249 | { |
250 | u64 hive_vram_start = 0; |
251 | u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1; |
252 | mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id; |
253 | mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; |
254 | mc->gart_start = hive_vram_end + 1; |
255 | mc->gart_end = mc->gart_start + mc->gart_size - 1; |
256 | mc->fb_start = hive_vram_start; |
257 | mc->fb_end = hive_vram_end; |
258 | dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n" , |
259 | mc->mc_vram_size >> 20, mc->vram_start, |
260 | mc->vram_end, mc->real_vram_size >> 20); |
261 | dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n" , |
262 | mc->gart_size >> 20, mc->gart_start, mc->gart_end); |
263 | } |
264 | |
265 | /** |
266 | * amdgpu_gmc_gart_location - try to find GART location |
267 | * |
268 | * @adev: amdgpu device structure holding all necessary information |
269 | * @mc: memory controller structure holding memory information |
270 | * @gart_placement: GART placement policy with respect to VRAM |
271 | * |
272 | * Function will place try to place GART before or after VRAM. |
273 | * If GART size is bigger than space left then we ajust GART size. |
274 | * Thus function will never fails. |
275 | */ |
276 | void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, |
277 | enum amdgpu_gart_placement gart_placement) |
278 | { |
279 | const uint64_t four_gb = 0x100000000ULL; |
280 | u64 size_af, size_bf; |
281 | /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ |
282 | u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); |
283 | |
284 | /* VCE doesn't like it when BOs cross a 4GB segment, so align |
285 | * the GART base on a 4GB boundary as well. |
286 | */ |
287 | size_bf = mc->fb_start; |
288 | size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb); |
289 | |
290 | if (mc->gart_size > max(size_bf, size_af)) { |
291 | dev_warn(adev->dev, "limiting GART\n" ); |
292 | mc->gart_size = max(size_bf, size_af); |
293 | } |
294 | |
295 | switch (gart_placement) { |
296 | case AMDGPU_GART_PLACEMENT_HIGH: |
297 | mc->gart_start = max_mc_address - mc->gart_size + 1; |
298 | break; |
299 | case AMDGPU_GART_PLACEMENT_LOW: |
300 | mc->gart_start = 0; |
301 | break; |
302 | case AMDGPU_GART_PLACEMENT_BEST_FIT: |
303 | default: |
304 | if ((size_bf >= mc->gart_size && size_bf < size_af) || |
305 | (size_af < mc->gart_size)) |
306 | mc->gart_start = 0; |
307 | else |
308 | mc->gart_start = max_mc_address - mc->gart_size + 1; |
309 | break; |
310 | } |
311 | |
312 | mc->gart_start &= ~(four_gb - 1); |
313 | mc->gart_end = mc->gart_start + mc->gart_size - 1; |
314 | dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n" , |
315 | mc->gart_size >> 20, mc->gart_start, mc->gart_end); |
316 | } |
317 | |
318 | /** |
319 | * amdgpu_gmc_agp_location - try to find AGP location |
320 | * @adev: amdgpu device structure holding all necessary information |
321 | * @mc: memory controller structure holding memory information |
322 | * |
323 | * Function will place try to find a place for the AGP BAR in the MC address |
324 | * space. |
325 | * |
326 | * AGP BAR will be assigned the largest available hole in the address space. |
327 | * Should be called after VRAM and GART locations are setup. |
328 | */ |
329 | void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) |
330 | { |
331 | const uint64_t sixteen_gb = 1ULL << 34; |
332 | const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); |
333 | u64 size_af, size_bf; |
334 | |
335 | if (mc->fb_start > mc->gart_start) { |
336 | size_bf = (mc->fb_start & sixteen_gb_mask) - |
337 | ALIGN(mc->gart_end + 1, sixteen_gb); |
338 | size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb); |
339 | } else { |
340 | size_bf = mc->fb_start & sixteen_gb_mask; |
341 | size_af = (mc->gart_start & sixteen_gb_mask) - |
342 | ALIGN(mc->fb_end + 1, sixteen_gb); |
343 | } |
344 | |
345 | if (size_bf > size_af) { |
346 | mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask; |
347 | mc->agp_size = size_bf; |
348 | } else { |
349 | mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb); |
350 | mc->agp_size = size_af; |
351 | } |
352 | |
353 | mc->agp_end = mc->agp_start + mc->agp_size - 1; |
354 | dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n" , |
355 | mc->agp_size >> 20, mc->agp_start, mc->agp_end); |
356 | } |
357 | |
358 | /** |
359 | * amdgpu_gmc_set_agp_default - Set the default AGP aperture value. |
360 | * @adev: amdgpu device structure holding all necessary information |
361 | * @mc: memory controller structure holding memory information |
362 | * |
363 | * To disable the AGP aperture, you need to set the start to a larger |
364 | * value than the end. This function sets the default value which |
365 | * can then be overridden using amdgpu_gmc_agp_location() if you want |
366 | * to enable the AGP aperture on a specific chip. |
367 | * |
368 | */ |
369 | void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev, |
370 | struct amdgpu_gmc *mc) |
371 | { |
372 | mc->agp_start = 0xffffffffffff; |
373 | mc->agp_end = 0; |
374 | mc->agp_size = 0; |
375 | } |
376 | |
377 | /** |
378 | * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid |
379 | * |
380 | * @addr: 48 bit physical address, page aligned (36 significant bits) |
381 | * @pasid: 16 bit process address space identifier |
382 | */ |
383 | static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) |
384 | { |
385 | return addr << 4 | pasid; |
386 | } |
387 | |
388 | /** |
389 | * amdgpu_gmc_filter_faults - filter VM faults |
390 | * |
391 | * @adev: amdgpu device structure |
392 | * @ih: interrupt ring that the fault received from |
393 | * @addr: address of the VM fault |
394 | * @pasid: PASID of the process causing the fault |
395 | * @timestamp: timestamp of the fault |
396 | * |
397 | * Returns: |
398 | * True if the fault was filtered and should not be processed further. |
399 | * False if the fault is a new one and needs to be handled. |
400 | */ |
401 | bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, |
402 | struct amdgpu_ih_ring *ih, uint64_t addr, |
403 | uint16_t pasid, uint64_t timestamp) |
404 | { |
405 | struct amdgpu_gmc *gmc = &adev->gmc; |
406 | uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid); |
407 | struct amdgpu_gmc_fault *fault; |
408 | uint32_t hash; |
409 | |
410 | /* Stale retry fault if timestamp goes backward */ |
411 | if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp)) |
412 | return true; |
413 | |
414 | /* If we don't have space left in the ring buffer return immediately */ |
415 | stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) - |
416 | AMDGPU_GMC_FAULT_TIMEOUT; |
417 | if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp) |
418 | return true; |
419 | |
420 | /* Try to find the fault in the hash */ |
421 | hash = hash_64(val: key, AMDGPU_GMC_FAULT_HASH_ORDER); |
422 | fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; |
423 | while (fault->timestamp >= stamp) { |
424 | uint64_t tmp; |
425 | |
426 | if (atomic64_read(v: &fault->key) == key) { |
427 | /* |
428 | * if we get a fault which is already present in |
429 | * the fault_ring and the timestamp of |
430 | * the fault is after the expired timestamp, |
431 | * then this is a new fault that needs to be added |
432 | * into the fault ring. |
433 | */ |
434 | if (fault->timestamp_expiry != 0 && |
435 | amdgpu_ih_ts_after(fault->timestamp_expiry, |
436 | timestamp)) |
437 | break; |
438 | else |
439 | return true; |
440 | } |
441 | |
442 | tmp = fault->timestamp; |
443 | fault = &gmc->fault_ring[fault->next]; |
444 | |
445 | /* Check if the entry was reused */ |
446 | if (fault->timestamp >= tmp) |
447 | break; |
448 | } |
449 | |
450 | /* Add the fault to the ring */ |
451 | fault = &gmc->fault_ring[gmc->last_fault]; |
452 | atomic64_set(v: &fault->key, i: key); |
453 | fault->timestamp = timestamp; |
454 | |
455 | /* And update the hash */ |
456 | fault->next = gmc->fault_hash[hash].idx; |
457 | gmc->fault_hash[hash].idx = gmc->last_fault++; |
458 | return false; |
459 | } |
460 | |
461 | /** |
462 | * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter |
463 | * |
464 | * @adev: amdgpu device structure |
465 | * @addr: address of the VM fault |
466 | * @pasid: PASID of the process causing the fault |
467 | * |
468 | * Remove the address from fault filter, then future vm fault on this address |
469 | * will pass to retry fault handler to recover. |
470 | */ |
471 | void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, |
472 | uint16_t pasid) |
473 | { |
474 | struct amdgpu_gmc *gmc = &adev->gmc; |
475 | uint64_t key = amdgpu_gmc_fault_key(addr, pasid); |
476 | struct amdgpu_ih_ring *ih; |
477 | struct amdgpu_gmc_fault *fault; |
478 | uint32_t last_wptr; |
479 | uint64_t last_ts; |
480 | uint32_t hash; |
481 | uint64_t tmp; |
482 | |
483 | if (adev->irq.retry_cam_enabled) |
484 | return; |
485 | |
486 | ih = &adev->irq.ih1; |
487 | /* Get the WPTR of the last entry in IH ring */ |
488 | last_wptr = amdgpu_ih_get_wptr(adev, ih); |
489 | /* Order wptr with ring data. */ |
490 | rmb(); |
491 | /* Get the timetamp of the last entry in IH ring */ |
492 | last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1); |
493 | |
494 | hash = hash_64(val: key, AMDGPU_GMC_FAULT_HASH_ORDER); |
495 | fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; |
496 | do { |
497 | if (atomic64_read(v: &fault->key) == key) { |
498 | /* |
499 | * Update the timestamp when this fault |
500 | * expired. |
501 | */ |
502 | fault->timestamp_expiry = last_ts; |
503 | break; |
504 | } |
505 | |
506 | tmp = fault->timestamp; |
507 | fault = &gmc->fault_ring[fault->next]; |
508 | } while (fault->timestamp < tmp); |
509 | } |
510 | |
511 | int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev) |
512 | { |
513 | int r; |
514 | |
515 | /* umc ras block */ |
516 | r = amdgpu_umc_ras_sw_init(adev); |
517 | if (r) |
518 | return r; |
519 | |
520 | /* mmhub ras block */ |
521 | r = amdgpu_mmhub_ras_sw_init(adev); |
522 | if (r) |
523 | return r; |
524 | |
525 | /* hdp ras block */ |
526 | r = amdgpu_hdp_ras_sw_init(adev); |
527 | if (r) |
528 | return r; |
529 | |
530 | /* mca.x ras block */ |
531 | r = amdgpu_mca_mp0_ras_sw_init(adev); |
532 | if (r) |
533 | return r; |
534 | |
535 | r = amdgpu_mca_mp1_ras_sw_init(adev); |
536 | if (r) |
537 | return r; |
538 | |
539 | r = amdgpu_mca_mpio_ras_sw_init(adev); |
540 | if (r) |
541 | return r; |
542 | |
543 | /* xgmi ras block */ |
544 | r = amdgpu_xgmi_ras_sw_init(adev); |
545 | if (r) |
546 | return r; |
547 | |
548 | return 0; |
549 | } |
550 | |
551 | int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) |
552 | { |
553 | return 0; |
554 | } |
555 | |
556 | void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) |
557 | { |
558 | |
559 | } |
560 | |
561 | /* |
562 | * The latest engine allocation on gfx9/10 is: |
563 | * Engine 2, 3: firmware |
564 | * Engine 0, 1, 4~16: amdgpu ring, |
565 | * subject to change when ring number changes |
566 | * Engine 17: Gart flushes |
567 | */ |
568 | #define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3 |
569 | |
570 | int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) |
571 | { |
572 | struct amdgpu_ring *ring; |
573 | unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0}; |
574 | unsigned i; |
575 | unsigned vmhub, inv_eng; |
576 | |
577 | /* init the vm inv eng for all vmhubs */ |
578 | for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { |
579 | vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP; |
580 | /* reserve engine 5 for firmware */ |
581 | if (adev->enable_mes) |
582 | vm_inv_engs[i] &= ~(1 << 5); |
583 | /* reserve mmhub engine 3 for firmware */ |
584 | if (adev->enable_umsch_mm) |
585 | vm_inv_engs[i] &= ~(1 << 3); |
586 | } |
587 | |
588 | for (i = 0; i < adev->num_rings; ++i) { |
589 | ring = adev->rings[i]; |
590 | vmhub = ring->vm_hub; |
591 | |
592 | if (ring == &adev->mes.ring || |
593 | ring == &adev->umsch_mm.ring) |
594 | continue; |
595 | |
596 | inv_eng = ffs(vm_inv_engs[vmhub]); |
597 | if (!inv_eng) { |
598 | dev_err(adev->dev, "no VM inv eng for ring %s\n" , |
599 | ring->name); |
600 | return -EINVAL; |
601 | } |
602 | |
603 | ring->vm_inv_eng = inv_eng - 1; |
604 | vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); |
605 | |
606 | dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n" , |
607 | ring->name, ring->vm_inv_eng, ring->vm_hub); |
608 | } |
609 | |
610 | return 0; |
611 | } |
612 | |
613 | void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, |
614 | uint32_t vmhub, uint32_t flush_type) |
615 | { |
616 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
617 | struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; |
618 | struct dma_fence *fence; |
619 | struct amdgpu_job *job; |
620 | int r; |
621 | |
622 | if (!hub->sdma_invalidation_workaround || vmid || |
623 | !adev->mman.buffer_funcs_enabled || |
624 | !adev->ib_pool_ready || amdgpu_in_reset(adev) || |
625 | !ring->sched.ready) { |
626 | |
627 | /* |
628 | * A GPU reset should flush all TLBs anyway, so no need to do |
629 | * this while one is ongoing. |
630 | */ |
631 | if (!down_read_trylock(sem: &adev->reset_domain->sem)) |
632 | return; |
633 | |
634 | if (adev->gmc.flush_tlb_needs_extra_type_2) |
635 | adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, |
636 | vmhub, 2); |
637 | |
638 | if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) |
639 | adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, |
640 | vmhub, 0); |
641 | |
642 | adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub, |
643 | flush_type); |
644 | up_read(sem: &adev->reset_domain->sem); |
645 | return; |
646 | } |
647 | |
648 | /* The SDMA on Navi 1x has a bug which can theoretically result in memory |
649 | * corruption if an invalidation happens at the same time as an VA |
650 | * translation. Avoid this by doing the invalidation from the SDMA |
651 | * itself at least for GART. |
652 | */ |
653 | mutex_lock(&adev->mman.gtt_window_lock); |
654 | r = amdgpu_job_alloc_with_ib(adev: ring->adev, entity: &adev->mman.high_pr, |
655 | AMDGPU_FENCE_OWNER_UNDEFINED, |
656 | size: 16 * 4, pool_type: AMDGPU_IB_POOL_IMMEDIATE, |
657 | job: &job); |
658 | if (r) |
659 | goto error_alloc; |
660 | |
661 | job->vm_pd_addr = amdgpu_gmc_pd_addr(bo: adev->gart.bo); |
662 | job->vm_needs_flush = true; |
663 | job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; |
664 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
665 | fence = amdgpu_job_submit(job); |
666 | mutex_unlock(lock: &adev->mman.gtt_window_lock); |
667 | |
668 | dma_fence_wait(fence, intr: false); |
669 | dma_fence_put(fence); |
670 | |
671 | return; |
672 | |
673 | error_alloc: |
674 | mutex_unlock(lock: &adev->mman.gtt_window_lock); |
675 | dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n" , r); |
676 | } |
677 | |
678 | int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, |
679 | uint32_t flush_type, bool all_hub, |
680 | uint32_t inst) |
681 | { |
682 | u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : |
683 | adev->usec_timeout; |
684 | struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; |
685 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; |
686 | unsigned int ndw; |
687 | signed long r; |
688 | uint32_t seq; |
689 | |
690 | if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || |
691 | !down_read_trylock(sem: &adev->reset_domain->sem)) { |
692 | |
693 | if (adev->gmc.flush_tlb_needs_extra_type_2) |
694 | adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, |
695 | 2, all_hub, |
696 | inst); |
697 | |
698 | if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) |
699 | adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, |
700 | 0, all_hub, |
701 | inst); |
702 | |
703 | adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, |
704 | flush_type, all_hub, |
705 | inst); |
706 | return 0; |
707 | } |
708 | |
709 | /* 2 dwords flush + 8 dwords fence */ |
710 | ndw = kiq->pmf->invalidate_tlbs_size + 8; |
711 | |
712 | if (adev->gmc.flush_tlb_needs_extra_type_2) |
713 | ndw += kiq->pmf->invalidate_tlbs_size; |
714 | |
715 | if (adev->gmc.flush_tlb_needs_extra_type_0) |
716 | ndw += kiq->pmf->invalidate_tlbs_size; |
717 | |
718 | spin_lock(lock: &adev->gfx.kiq[inst].ring_lock); |
719 | amdgpu_ring_alloc(ring, ndw); |
720 | if (adev->gmc.flush_tlb_needs_extra_type_2) |
721 | kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); |
722 | |
723 | if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) |
724 | kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); |
725 | |
726 | kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); |
727 | r = amdgpu_fence_emit_polling(ring, s: &seq, MAX_KIQ_REG_WAIT); |
728 | if (r) { |
729 | amdgpu_ring_undo(ring); |
730 | spin_unlock(lock: &adev->gfx.kiq[inst].ring_lock); |
731 | goto error_unlock_reset; |
732 | } |
733 | |
734 | amdgpu_ring_commit(ring); |
735 | spin_unlock(lock: &adev->gfx.kiq[inst].ring_lock); |
736 | r = amdgpu_fence_wait_polling(ring, wait_seq: seq, timeout: usec_timeout); |
737 | if (r < 1) { |
738 | dev_err(adev->dev, "wait for kiq fence error: %ld.\n" , r); |
739 | r = -ETIME; |
740 | goto error_unlock_reset; |
741 | } |
742 | r = 0; |
743 | |
744 | error_unlock_reset: |
745 | up_read(sem: &adev->reset_domain->sem); |
746 | return r; |
747 | } |
748 | |
749 | void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, |
750 | uint32_t reg0, uint32_t reg1, |
751 | uint32_t ref, uint32_t mask, |
752 | uint32_t xcc_inst) |
753 | { |
754 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; |
755 | struct amdgpu_ring *ring = &kiq->ring; |
756 | signed long r, cnt = 0; |
757 | unsigned long flags; |
758 | uint32_t seq; |
759 | |
760 | if (adev->mes.ring.sched.ready) { |
761 | amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, |
762 | ref, mask); |
763 | return; |
764 | } |
765 | |
766 | spin_lock_irqsave(&kiq->ring_lock, flags); |
767 | amdgpu_ring_alloc(ring, ndw: 32); |
768 | amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, |
769 | ref, mask); |
770 | r = amdgpu_fence_emit_polling(ring, s: &seq, MAX_KIQ_REG_WAIT); |
771 | if (r) |
772 | goto failed_undo; |
773 | |
774 | amdgpu_ring_commit(ring); |
775 | spin_unlock_irqrestore(lock: &kiq->ring_lock, flags); |
776 | |
777 | r = amdgpu_fence_wait_polling(ring, wait_seq: seq, MAX_KIQ_REG_WAIT); |
778 | |
779 | /* don't wait anymore for IRQ context */ |
780 | if (r < 1 && in_interrupt()) |
781 | goto failed_kiq; |
782 | |
783 | might_sleep(); |
784 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
785 | |
786 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
787 | r = amdgpu_fence_wait_polling(ring, wait_seq: seq, MAX_KIQ_REG_WAIT); |
788 | } |
789 | |
790 | if (cnt > MAX_KIQ_REG_TRY) |
791 | goto failed_kiq; |
792 | |
793 | return; |
794 | |
795 | failed_undo: |
796 | amdgpu_ring_undo(ring); |
797 | spin_unlock_irqrestore(lock: &kiq->ring_lock, flags); |
798 | failed_kiq: |
799 | dev_err(adev->dev, "failed to write reg %x wait reg %x\n" , reg0, reg1); |
800 | } |
801 | |
802 | /** |
803 | * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ |
804 | * @adev: amdgpu_device pointer |
805 | * |
806 | * Check and set if an the device @adev supports Trusted Memory |
807 | * Zones (TMZ). |
808 | */ |
809 | void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) |
810 | { |
811 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
812 | /* RAVEN */ |
813 | case IP_VERSION(9, 2, 2): |
814 | case IP_VERSION(9, 1, 0): |
815 | /* RENOIR looks like RAVEN */ |
816 | case IP_VERSION(9, 3, 0): |
817 | /* GC 10.3.7 */ |
818 | case IP_VERSION(10, 3, 7): |
819 | /* GC 11.0.1 */ |
820 | case IP_VERSION(11, 0, 1): |
821 | if (amdgpu_tmz == 0) { |
822 | adev->gmc.tmz_enabled = false; |
823 | dev_info(adev->dev, |
824 | "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n" ); |
825 | } else { |
826 | adev->gmc.tmz_enabled = true; |
827 | dev_info(adev->dev, |
828 | "Trusted Memory Zone (TMZ) feature enabled\n" ); |
829 | } |
830 | break; |
831 | case IP_VERSION(10, 1, 10): |
832 | case IP_VERSION(10, 1, 1): |
833 | case IP_VERSION(10, 1, 2): |
834 | case IP_VERSION(10, 1, 3): |
835 | case IP_VERSION(10, 3, 0): |
836 | case IP_VERSION(10, 3, 2): |
837 | case IP_VERSION(10, 3, 4): |
838 | case IP_VERSION(10, 3, 5): |
839 | case IP_VERSION(10, 3, 6): |
840 | /* VANGOGH */ |
841 | case IP_VERSION(10, 3, 1): |
842 | /* YELLOW_CARP*/ |
843 | case IP_VERSION(10, 3, 3): |
844 | case IP_VERSION(11, 0, 4): |
845 | case IP_VERSION(11, 5, 0): |
846 | case IP_VERSION(11, 5, 1): |
847 | /* Don't enable it by default yet. |
848 | */ |
849 | if (amdgpu_tmz < 1) { |
850 | adev->gmc.tmz_enabled = false; |
851 | dev_info(adev->dev, |
852 | "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n" ); |
853 | } else { |
854 | adev->gmc.tmz_enabled = true; |
855 | dev_info(adev->dev, |
856 | "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n" ); |
857 | } |
858 | break; |
859 | default: |
860 | adev->gmc.tmz_enabled = false; |
861 | dev_info(adev->dev, |
862 | "Trusted Memory Zone (TMZ) feature not supported\n" ); |
863 | break; |
864 | } |
865 | } |
866 | |
867 | /** |
868 | * amdgpu_gmc_noretry_set -- set per asic noretry defaults |
869 | * @adev: amdgpu_device pointer |
870 | * |
871 | * Set a per asic default for the no-retry parameter. |
872 | * |
873 | */ |
874 | void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) |
875 | { |
876 | struct amdgpu_gmc *gmc = &adev->gmc; |
877 | uint32_t gc_ver = amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0); |
878 | bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) || |
879 | gc_ver == IP_VERSION(9, 3, 0) || |
880 | gc_ver == IP_VERSION(9, 4, 0) || |
881 | gc_ver == IP_VERSION(9, 4, 1) || |
882 | gc_ver == IP_VERSION(9, 4, 2) || |
883 | gc_ver == IP_VERSION(9, 4, 3) || |
884 | gc_ver >= IP_VERSION(10, 3, 0)); |
885 | |
886 | if (!amdgpu_sriov_xnack_support(adev)) |
887 | gmc->noretry = 1; |
888 | else |
889 | gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; |
890 | } |
891 | |
892 | void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, |
893 | bool enable) |
894 | { |
895 | struct amdgpu_vmhub *hub; |
896 | u32 tmp, reg, i; |
897 | |
898 | hub = &adev->vmhub[hub_type]; |
899 | for (i = 0; i < 16; i++) { |
900 | reg = hub->vm_context0_cntl + hub->ctx_distance * i; |
901 | |
902 | tmp = (hub_type == AMDGPU_GFXHUB(0)) ? |
903 | RREG32_SOC15_IP(GC, reg) : |
904 | RREG32_SOC15_IP(MMHUB, reg); |
905 | |
906 | if (enable) |
907 | tmp |= hub->vm_cntx_cntl_vm_fault; |
908 | else |
909 | tmp &= ~hub->vm_cntx_cntl_vm_fault; |
910 | |
911 | (hub_type == AMDGPU_GFXHUB(0)) ? |
912 | WREG32_SOC15_IP(GC, reg, tmp) : |
913 | WREG32_SOC15_IP(MMHUB, reg, tmp); |
914 | } |
915 | } |
916 | |
917 | void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) |
918 | { |
919 | unsigned size; |
920 | |
921 | /* |
922 | * Some ASICs need to reserve a region of video memory to avoid access |
923 | * from driver |
924 | */ |
925 | adev->mman.stolen_reserved_offset = 0; |
926 | adev->mman.stolen_reserved_size = 0; |
927 | |
928 | /* |
929 | * TODO: |
930 | * Currently there is a bug where some memory client outside |
931 | * of the driver writes to first 8M of VRAM on S3 resume, |
932 | * this overrides GART which by default gets placed in first 8M and |
933 | * causes VM_FAULTS once GTT is accessed. |
934 | * Keep the stolen memory reservation until the while this is not solved. |
935 | */ |
936 | switch (adev->asic_type) { |
937 | case CHIP_VEGA10: |
938 | adev->mman.keep_stolen_vga_memory = true; |
939 | /* |
940 | * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area. |
941 | */ |
942 | #ifdef CONFIG_X86 |
943 | if (amdgpu_sriov_vf(adev) && hypervisor_is_type(type: X86_HYPER_MS_HYPERV)) { |
944 | adev->mman.stolen_reserved_offset = 0x500000; |
945 | adev->mman.stolen_reserved_size = 0x200000; |
946 | } |
947 | #endif |
948 | break; |
949 | case CHIP_RAVEN: |
950 | case CHIP_RENOIR: |
951 | adev->mman.keep_stolen_vga_memory = true; |
952 | break; |
953 | default: |
954 | adev->mman.keep_stolen_vga_memory = false; |
955 | break; |
956 | } |
957 | |
958 | if (amdgpu_sriov_vf(adev) || |
959 | !amdgpu_device_has_display_hardware(adev)) { |
960 | size = 0; |
961 | } else { |
962 | size = amdgpu_gmc_get_vbios_fb_size(adev); |
963 | |
964 | if (adev->mman.keep_stolen_vga_memory) |
965 | size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION); |
966 | } |
967 | |
968 | /* set to 0 if the pre-OS buffer uses up most of vram */ |
969 | if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) |
970 | size = 0; |
971 | |
972 | if (size > AMDGPU_VBIOS_VGA_ALLOCATION) { |
973 | adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION; |
974 | adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size; |
975 | } else { |
976 | adev->mman.stolen_vga_size = size; |
977 | adev->mman.stolen_extended_size = 0; |
978 | } |
979 | } |
980 | |
981 | /** |
982 | * amdgpu_gmc_init_pdb0 - initialize PDB0 |
983 | * |
984 | * @adev: amdgpu_device pointer |
985 | * |
986 | * This function is only used when GART page table is used |
987 | * for FB address translatioin. In such a case, we construct |
988 | * a 2-level system VM page table: PDB0->PTB, to cover both |
989 | * VRAM of the hive and system memory. |
990 | * |
991 | * PDB0 is static, initialized once on driver initialization. |
992 | * The first n entries of PDB0 are used as PTE by setting |
993 | * P bit to 1, pointing to VRAM. The n+1'th entry points |
994 | * to a big PTB covering system memory. |
995 | * |
996 | */ |
997 | void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) |
998 | { |
999 | int i; |
1000 | uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW? |
1001 | /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M |
1002 | */ |
1003 | u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; |
1004 | u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21; |
1005 | u64 vram_addr = adev->vm_manager.vram_base_offset - |
1006 | adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; |
1007 | u64 vram_end = vram_addr + vram_size; |
1008 | u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, bo: adev->gart.bo); |
1009 | int idx; |
1010 | |
1011 | if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) |
1012 | return; |
1013 | |
1014 | flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; |
1015 | flags |= AMDGPU_PTE_WRITEABLE; |
1016 | flags |= AMDGPU_PTE_SNOOPED; |
1017 | flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); |
1018 | flags |= AMDGPU_PDE_PTE; |
1019 | |
1020 | /* The first n PDE0 entries are used as PTE, |
1021 | * pointing to vram |
1022 | */ |
1023 | for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) |
1024 | amdgpu_gmc_set_pte_pde(adev, cpu_pt_addr: adev->gmc.ptr_pdb0, gpu_page_idx: i, addr: vram_addr, flags); |
1025 | |
1026 | /* The n+1'th PDE0 entry points to a huge |
1027 | * PTB who has more than 512 entries each |
1028 | * pointing to a 4K system page |
1029 | */ |
1030 | flags = AMDGPU_PTE_VALID; |
1031 | flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED; |
1032 | /* Requires gart_ptb_gpu_pa to be 4K aligned */ |
1033 | amdgpu_gmc_set_pte_pde(adev, cpu_pt_addr: adev->gmc.ptr_pdb0, gpu_page_idx: i, addr: gart_ptb_gpu_pa, flags); |
1034 | drm_dev_exit(idx); |
1035 | } |
1036 | |
1037 | /** |
1038 | * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC |
1039 | * address |
1040 | * |
1041 | * @adev: amdgpu_device pointer |
1042 | * @mc_addr: MC address of buffer |
1043 | */ |
1044 | uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr) |
1045 | { |
1046 | return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; |
1047 | } |
1048 | |
1049 | /** |
1050 | * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from |
1051 | * GPU's view |
1052 | * |
1053 | * @adev: amdgpu_device pointer |
1054 | * @bo: amdgpu buffer object |
1055 | */ |
1056 | uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) |
1057 | { |
1058 | return amdgpu_gmc_vram_mc2pa(adev, mc_addr: amdgpu_bo_gpu_offset(bo)); |
1059 | } |
1060 | |
1061 | /** |
1062 | * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address |
1063 | * from CPU's view |
1064 | * |
1065 | * @adev: amdgpu_device pointer |
1066 | * @bo: amdgpu buffer object |
1067 | */ |
1068 | uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) |
1069 | { |
1070 | return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; |
1071 | } |
1072 | |
1073 | int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) |
1074 | { |
1075 | struct amdgpu_bo *vram_bo = NULL; |
1076 | uint64_t vram_gpu = 0; |
1077 | void *vram_ptr = NULL; |
1078 | |
1079 | int ret, size = 0x100000; |
1080 | uint8_t cptr[10]; |
1081 | |
1082 | ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, |
1083 | AMDGPU_GEM_DOMAIN_VRAM, |
1084 | bo_ptr: &vram_bo, |
1085 | gpu_addr: &vram_gpu, |
1086 | cpu_addr: &vram_ptr); |
1087 | if (ret) |
1088 | return ret; |
1089 | |
1090 | memset(vram_ptr, 0x86, size); |
1091 | memset(cptr, 0x86, 10); |
1092 | |
1093 | /** |
1094 | * Check the start, the mid, and the end of the memory if the content of |
1095 | * each byte is the pattern "0x86". If yes, we suppose the vram bo is |
1096 | * workable. |
1097 | * |
1098 | * Note: If check the each byte of whole 1M bo, it will cost too many |
1099 | * seconds, so here, we just pick up three parts for emulation. |
1100 | */ |
1101 | ret = memcmp(p: vram_ptr, q: cptr, size: 10); |
1102 | if (ret) { |
1103 | ret = -EIO; |
1104 | goto release_buffer; |
1105 | } |
1106 | |
1107 | ret = memcmp(p: vram_ptr + (size / 2), q: cptr, size: 10); |
1108 | if (ret) { |
1109 | ret = -EIO; |
1110 | goto release_buffer; |
1111 | } |
1112 | |
1113 | ret = memcmp(p: vram_ptr + size - 10, q: cptr, size: 10); |
1114 | if (ret) { |
1115 | ret = -EIO; |
1116 | goto release_buffer; |
1117 | } |
1118 | |
1119 | release_buffer: |
1120 | amdgpu_bo_free_kernel(bo: &vram_bo, gpu_addr: &vram_gpu, |
1121 | cpu_addr: &vram_ptr); |
1122 | |
1123 | return ret; |
1124 | } |
1125 | |
1126 | static ssize_t current_memory_partition_show( |
1127 | struct device *dev, struct device_attribute *addr, char *buf) |
1128 | { |
1129 | struct drm_device *ddev = dev_get_drvdata(dev); |
1130 | struct amdgpu_device *adev = drm_to_adev(ddev); |
1131 | enum amdgpu_memory_partition mode; |
1132 | |
1133 | mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); |
1134 | switch (mode) { |
1135 | case AMDGPU_NPS1_PARTITION_MODE: |
1136 | return sysfs_emit(buf, fmt: "NPS1\n" ); |
1137 | case AMDGPU_NPS2_PARTITION_MODE: |
1138 | return sysfs_emit(buf, fmt: "NPS2\n" ); |
1139 | case AMDGPU_NPS3_PARTITION_MODE: |
1140 | return sysfs_emit(buf, fmt: "NPS3\n" ); |
1141 | case AMDGPU_NPS4_PARTITION_MODE: |
1142 | return sysfs_emit(buf, fmt: "NPS4\n" ); |
1143 | case AMDGPU_NPS6_PARTITION_MODE: |
1144 | return sysfs_emit(buf, fmt: "NPS6\n" ); |
1145 | case AMDGPU_NPS8_PARTITION_MODE: |
1146 | return sysfs_emit(buf, fmt: "NPS8\n" ); |
1147 | default: |
1148 | return sysfs_emit(buf, fmt: "UNKNOWN\n" ); |
1149 | } |
1150 | |
1151 | return sysfs_emit(buf, fmt: "UNKNOWN\n" ); |
1152 | } |
1153 | |
1154 | static DEVICE_ATTR_RO(current_memory_partition); |
1155 | |
1156 | int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev) |
1157 | { |
1158 | if (!adev->gmc.gmc_funcs->query_mem_partition_mode) |
1159 | return 0; |
1160 | |
1161 | return device_create_file(device: adev->dev, |
1162 | entry: &dev_attr_current_memory_partition); |
1163 | } |
1164 | |
1165 | void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) |
1166 | { |
1167 | device_remove_file(dev: adev->dev, attr: &dev_attr_current_memory_partition); |
1168 | } |
1169 | |