1 | /* |
2 | * Copyright 2021 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | #include <linux/delay.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/firmware.h> |
26 | #include <linux/module.h> |
27 | #include <linux/pci.h> |
28 | #include "amdgpu.h" |
29 | #include "amdgpu_gfx.h" |
30 | #include "amdgpu_psp.h" |
31 | #include "amdgpu_smu.h" |
32 | #include "amdgpu_atomfirmware.h" |
33 | #include "imu_v11_0.h" |
34 | #include "soc21.h" |
35 | #include "nvd.h" |
36 | |
37 | #include "gc/gc_11_0_0_offset.h" |
38 | #include "gc/gc_11_0_0_sh_mask.h" |
39 | #include "smuio/smuio_13_0_6_offset.h" |
40 | #include "smuio/smuio_13_0_6_sh_mask.h" |
41 | #include "navi10_enum.h" |
42 | #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" |
43 | |
44 | #include "soc15.h" |
45 | #include "soc15d.h" |
46 | #include "clearstate_gfx11.h" |
47 | #include "v11_structs.h" |
48 | #include "gfx_v11_0.h" |
49 | #include "gfx_v11_0_3.h" |
50 | #include "nbio_v4_3.h" |
51 | #include "mes_v11_0.h" |
52 | |
53 | #define GFX11_NUM_GFX_RINGS 1 |
54 | #define GFX11_MEC_HPD_SIZE 2048 |
55 | |
56 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L |
57 | #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 |
58 | |
59 | #define regCGTT_WD_CLK_CTRL 0x5086 |
60 | #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 |
61 | #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e |
62 | #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 |
63 | #define regPC_CONFIG_CNTL_1 0x194d |
64 | #define regPC_CONFIG_CNTL_1_BASE_IDX 1 |
65 | |
66 | MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin" ); |
67 | MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin" ); |
68 | MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin" ); |
69 | MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin" ); |
70 | MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin" ); |
71 | MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin" ); |
72 | MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin" ); |
73 | MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin" ); |
74 | MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin" ); |
75 | MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin" ); |
76 | MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin" ); |
77 | MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin" ); |
78 | MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin" ); |
79 | MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin" ); |
80 | MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin" ); |
81 | MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin" ); |
82 | MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin" ); |
83 | MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin" ); |
84 | MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin" ); |
85 | MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin" ); |
86 | MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin" ); |
87 | MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin" ); |
88 | MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin" ); |
89 | MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin" ); |
90 | MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin" ); |
91 | MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin" ); |
92 | MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin" ); |
93 | MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin" ); |
94 | MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin" ); |
95 | MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin" ); |
96 | |
97 | static const struct soc15_reg_golden golden_settings_gc_11_0[] = { |
98 | SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) |
99 | }; |
100 | |
101 | static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = |
102 | { |
103 | SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), |
104 | SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), |
105 | SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), |
106 | SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), |
107 | SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), |
108 | SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), |
109 | SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), |
110 | SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), |
111 | SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) |
112 | }; |
113 | |
114 | #define DEFAULT_SH_MEM_CONFIG \ |
115 | ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ |
116 | (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ |
117 | (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) |
118 | |
119 | static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); |
120 | static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); |
121 | static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); |
122 | static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); |
123 | static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); |
124 | static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); |
125 | static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); |
126 | static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, |
127 | struct amdgpu_cu_info *cu_info); |
128 | static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); |
129 | static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, |
130 | u32 sh_num, u32 instance, int xcc_id); |
131 | static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); |
132 | |
133 | static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); |
134 | static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); |
135 | static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, |
136 | uint32_t val); |
137 | static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); |
138 | static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, |
139 | uint16_t pasid, uint32_t flush_type, |
140 | bool all_hub, uint8_t dst_sel); |
141 | static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); |
142 | static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); |
143 | static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, |
144 | bool enable); |
145 | |
146 | static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) |
147 | { |
148 | amdgpu_ring_write(ring: kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); |
149 | amdgpu_ring_write(ring: kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | |
150 | PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ |
151 | PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ |
152 | amdgpu_ring_write(ring: kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ |
153 | amdgpu_ring_write(ring: kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ |
154 | amdgpu_ring_write(ring: kiq_ring, v: 0); /* gws mask lo */ |
155 | amdgpu_ring_write(ring: kiq_ring, v: 0); /* gws mask hi */ |
156 | amdgpu_ring_write(ring: kiq_ring, v: 0); /* oac mask */ |
157 | amdgpu_ring_write(ring: kiq_ring, v: 0); /* gds heap base:0, gds heap size:0 */ |
158 | } |
159 | |
160 | static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, |
161 | struct amdgpu_ring *ring) |
162 | { |
163 | uint64_t mqd_addr = amdgpu_bo_gpu_offset(bo: ring->mqd_obj); |
164 | uint64_t wptr_addr = ring->wptr_gpu_addr; |
165 | uint32_t me = 0, eng_sel = 0; |
166 | |
167 | switch (ring->funcs->type) { |
168 | case AMDGPU_RING_TYPE_COMPUTE: |
169 | me = 1; |
170 | eng_sel = 0; |
171 | break; |
172 | case AMDGPU_RING_TYPE_GFX: |
173 | me = 0; |
174 | eng_sel = 4; |
175 | break; |
176 | case AMDGPU_RING_TYPE_MES: |
177 | me = 2; |
178 | eng_sel = 5; |
179 | break; |
180 | default: |
181 | WARN_ON(1); |
182 | } |
183 | |
184 | amdgpu_ring_write(ring: kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); |
185 | /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ |
186 | amdgpu_ring_write(ring: kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ |
187 | PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ |
188 | PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ |
189 | PACKET3_MAP_QUEUES_QUEUE(ring->queue) | |
190 | PACKET3_MAP_QUEUES_PIPE(ring->pipe) | |
191 | PACKET3_MAP_QUEUES_ME((me)) | |
192 | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ |
193 | PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ |
194 | PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | |
195 | PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ |
196 | amdgpu_ring_write(ring: kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); |
197 | amdgpu_ring_write(ring: kiq_ring, lower_32_bits(mqd_addr)); |
198 | amdgpu_ring_write(ring: kiq_ring, upper_32_bits(mqd_addr)); |
199 | amdgpu_ring_write(ring: kiq_ring, lower_32_bits(wptr_addr)); |
200 | amdgpu_ring_write(ring: kiq_ring, upper_32_bits(wptr_addr)); |
201 | } |
202 | |
203 | static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, |
204 | struct amdgpu_ring *ring, |
205 | enum amdgpu_unmap_queues_action action, |
206 | u64 gpu_addr, u64 seq) |
207 | { |
208 | struct amdgpu_device *adev = kiq_ring->adev; |
209 | uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; |
210 | |
211 | if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { |
212 | amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); |
213 | return; |
214 | } |
215 | |
216 | amdgpu_ring_write(ring: kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); |
217 | amdgpu_ring_write(ring: kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ |
218 | PACKET3_UNMAP_QUEUES_ACTION(action) | |
219 | PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | |
220 | PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | |
221 | PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); |
222 | amdgpu_ring_write(ring: kiq_ring, |
223 | PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); |
224 | |
225 | if (action == PREEMPT_QUEUES_NO_UNMAP) { |
226 | amdgpu_ring_write(ring: kiq_ring, lower_32_bits(gpu_addr)); |
227 | amdgpu_ring_write(ring: kiq_ring, upper_32_bits(gpu_addr)); |
228 | amdgpu_ring_write(ring: kiq_ring, v: seq); |
229 | } else { |
230 | amdgpu_ring_write(ring: kiq_ring, v: 0); |
231 | amdgpu_ring_write(ring: kiq_ring, v: 0); |
232 | amdgpu_ring_write(ring: kiq_ring, v: 0); |
233 | } |
234 | } |
235 | |
236 | static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, |
237 | struct amdgpu_ring *ring, |
238 | u64 addr, |
239 | u64 seq) |
240 | { |
241 | uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; |
242 | |
243 | amdgpu_ring_write(ring: kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); |
244 | amdgpu_ring_write(ring: kiq_ring, |
245 | PACKET3_QUERY_STATUS_CONTEXT_ID(0) | |
246 | PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | |
247 | PACKET3_QUERY_STATUS_COMMAND(2)); |
248 | amdgpu_ring_write(ring: kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ |
249 | PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | |
250 | PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); |
251 | amdgpu_ring_write(ring: kiq_ring, lower_32_bits(addr)); |
252 | amdgpu_ring_write(ring: kiq_ring, upper_32_bits(addr)); |
253 | amdgpu_ring_write(ring: kiq_ring, lower_32_bits(seq)); |
254 | amdgpu_ring_write(ring: kiq_ring, upper_32_bits(seq)); |
255 | } |
256 | |
257 | static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, |
258 | uint16_t pasid, uint32_t flush_type, |
259 | bool all_hub) |
260 | { |
261 | gfx_v11_0_ring_invalidate_tlbs(ring: kiq_ring, pasid, flush_type, all_hub, dst_sel: 1); |
262 | } |
263 | |
264 | static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { |
265 | .kiq_set_resources = gfx11_kiq_set_resources, |
266 | .kiq_map_queues = gfx11_kiq_map_queues, |
267 | .kiq_unmap_queues = gfx11_kiq_unmap_queues, |
268 | .kiq_query_status = gfx11_kiq_query_status, |
269 | .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, |
270 | .set_resources_size = 8, |
271 | .map_queues_size = 7, |
272 | .unmap_queues_size = 6, |
273 | .query_status_size = 7, |
274 | .invalidate_tlbs_size = 2, |
275 | }; |
276 | |
277 | static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) |
278 | { |
279 | adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; |
280 | } |
281 | |
282 | static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) |
283 | { |
284 | if (amdgpu_sriov_vf(adev)) |
285 | return; |
286 | |
287 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
288 | case IP_VERSION(11, 0, 1): |
289 | case IP_VERSION(11, 0, 4): |
290 | soc15_program_register_sequence(adev, |
291 | registers: golden_settings_gc_11_0_1, |
292 | array_size: (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); |
293 | break; |
294 | default: |
295 | break; |
296 | } |
297 | soc15_program_register_sequence(adev, |
298 | registers: golden_settings_gc_11_0, |
299 | array_size: (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); |
300 | |
301 | } |
302 | |
303 | static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, |
304 | bool wc, uint32_t reg, uint32_t val) |
305 | { |
306 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
307 | amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | |
308 | WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); |
309 | amdgpu_ring_write(ring, v: reg); |
310 | amdgpu_ring_write(ring, v: 0); |
311 | amdgpu_ring_write(ring, v: val); |
312 | } |
313 | |
314 | static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, |
315 | int mem_space, int opt, uint32_t addr0, |
316 | uint32_t addr1, uint32_t ref, uint32_t mask, |
317 | uint32_t inv) |
318 | { |
319 | amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); |
320 | amdgpu_ring_write(ring, |
321 | /* memory (1) or register (0) */ |
322 | v: (WAIT_REG_MEM_MEM_SPACE(mem_space) | |
323 | WAIT_REG_MEM_OPERATION(opt) | /* wait */ |
324 | WAIT_REG_MEM_FUNCTION(3) | /* equal */ |
325 | WAIT_REG_MEM_ENGINE(eng_sel))); |
326 | |
327 | if (mem_space) |
328 | BUG_ON(addr0 & 0x3); /* Dword align */ |
329 | amdgpu_ring_write(ring, v: addr0); |
330 | amdgpu_ring_write(ring, v: addr1); |
331 | amdgpu_ring_write(ring, v: ref); |
332 | amdgpu_ring_write(ring, v: mask); |
333 | amdgpu_ring_write(ring, v: inv); /* poll interval */ |
334 | } |
335 | |
336 | static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) |
337 | { |
338 | struct amdgpu_device *adev = ring->adev; |
339 | uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); |
340 | uint32_t tmp = 0; |
341 | unsigned i; |
342 | int r; |
343 | |
344 | WREG32(scratch, 0xCAFEDEAD); |
345 | r = amdgpu_ring_alloc(ring, ndw: 5); |
346 | if (r) { |
347 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n" , |
348 | ring->idx, r); |
349 | return r; |
350 | } |
351 | |
352 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { |
353 | gfx_v11_0_ring_emit_wreg(ring, reg: scratch, val: 0xDEADBEEF); |
354 | } else { |
355 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
356 | amdgpu_ring_write(ring, v: scratch - |
357 | PACKET3_SET_UCONFIG_REG_START); |
358 | amdgpu_ring_write(ring, v: 0xDEADBEEF); |
359 | } |
360 | amdgpu_ring_commit(ring); |
361 | |
362 | for (i = 0; i < adev->usec_timeout; i++) { |
363 | tmp = RREG32(scratch); |
364 | if (tmp == 0xDEADBEEF) |
365 | break; |
366 | if (amdgpu_emu_mode == 1) |
367 | msleep(msecs: 1); |
368 | else |
369 | udelay(1); |
370 | } |
371 | |
372 | if (i >= adev->usec_timeout) |
373 | r = -ETIMEDOUT; |
374 | return r; |
375 | } |
376 | |
377 | static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
378 | { |
379 | struct amdgpu_device *adev = ring->adev; |
380 | struct amdgpu_ib ib; |
381 | struct dma_fence *f = NULL; |
382 | unsigned index; |
383 | uint64_t gpu_addr; |
384 | volatile uint32_t *cpu_ptr; |
385 | long r; |
386 | |
387 | /* MES KIQ fw hasn't indirect buffer support for now */ |
388 | if (adev->enable_mes_kiq && |
389 | ring->funcs->type == AMDGPU_RING_TYPE_KIQ) |
390 | return 0; |
391 | |
392 | memset(&ib, 0, sizeof(ib)); |
393 | |
394 | if (ring->is_mes_queue) { |
395 | uint32_t padding, offset; |
396 | |
397 | offset = amdgpu_mes_ctx_get_offs(ring, id_offs: AMDGPU_MES_CTX_IB_OFFS); |
398 | padding = amdgpu_mes_ctx_get_offs(ring, |
399 | id_offs: AMDGPU_MES_CTX_PADDING_OFFS); |
400 | |
401 | ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); |
402 | ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); |
403 | |
404 | gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); |
405 | cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); |
406 | *cpu_ptr = cpu_to_le32(0xCAFEDEAD); |
407 | } else { |
408 | r = amdgpu_device_wb_get(adev, wb: &index); |
409 | if (r) |
410 | return r; |
411 | |
412 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
413 | adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); |
414 | cpu_ptr = &adev->wb.wb[index]; |
415 | |
416 | r = amdgpu_ib_get(adev, NULL, size: 20, pool: AMDGPU_IB_POOL_DIRECT, ib: &ib); |
417 | if (r) { |
418 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n" , r); |
419 | goto err1; |
420 | } |
421 | } |
422 | |
423 | ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); |
424 | ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; |
425 | ib.ptr[2] = lower_32_bits(gpu_addr); |
426 | ib.ptr[3] = upper_32_bits(gpu_addr); |
427 | ib.ptr[4] = 0xDEADBEEF; |
428 | ib.length_dw = 5; |
429 | |
430 | r = amdgpu_ib_schedule(ring, num_ibs: 1, ibs: &ib, NULL, f: &f); |
431 | if (r) |
432 | goto err2; |
433 | |
434 | r = dma_fence_wait_timeout(f, intr: false, timeout); |
435 | if (r == 0) { |
436 | r = -ETIMEDOUT; |
437 | goto err2; |
438 | } else if (r < 0) { |
439 | goto err2; |
440 | } |
441 | |
442 | if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) |
443 | r = 0; |
444 | else |
445 | r = -EINVAL; |
446 | err2: |
447 | if (!ring->is_mes_queue) |
448 | amdgpu_ib_free(adev, ib: &ib, NULL); |
449 | dma_fence_put(fence: f); |
450 | err1: |
451 | if (!ring->is_mes_queue) |
452 | amdgpu_device_wb_free(adev, wb: index); |
453 | return r; |
454 | } |
455 | |
456 | static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) |
457 | { |
458 | amdgpu_ucode_release(fw: &adev->gfx.pfp_fw); |
459 | amdgpu_ucode_release(fw: &adev->gfx.me_fw); |
460 | amdgpu_ucode_release(fw: &adev->gfx.rlc_fw); |
461 | amdgpu_ucode_release(fw: &adev->gfx.mec_fw); |
462 | |
463 | kfree(objp: adev->gfx.rlc.register_list_format); |
464 | } |
465 | |
466 | static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) |
467 | { |
468 | const struct psp_firmware_header_v1_0 *toc_hdr; |
469 | int err = 0; |
470 | char fw_name[40]; |
471 | |
472 | snprintf(buf: fw_name, size: sizeof(fw_name), fmt: "amdgpu/%s_toc.bin" , ucode_prefix); |
473 | err = amdgpu_ucode_request(adev, fw: &adev->psp.toc_fw, fw_name); |
474 | if (err) |
475 | goto out; |
476 | |
477 | toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; |
478 | adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); |
479 | adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); |
480 | adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); |
481 | adev->psp.toc.start_addr = (uint8_t *)toc_hdr + |
482 | le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); |
483 | return 0; |
484 | out: |
485 | amdgpu_ucode_release(fw: &adev->psp.toc_fw); |
486 | return err; |
487 | } |
488 | |
489 | static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) |
490 | { |
491 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
492 | case IP_VERSION(11, 0, 0): |
493 | case IP_VERSION(11, 0, 2): |
494 | case IP_VERSION(11, 0, 3): |
495 | if ((adev->gfx.me_fw_version >= 1505) && |
496 | (adev->gfx.pfp_fw_version >= 1600) && |
497 | (adev->gfx.mec_fw_version >= 512)) { |
498 | if (amdgpu_sriov_vf(adev)) |
499 | adev->gfx.cp_gfx_shadow = true; |
500 | else |
501 | adev->gfx.cp_gfx_shadow = false; |
502 | } |
503 | break; |
504 | default: |
505 | adev->gfx.cp_gfx_shadow = false; |
506 | break; |
507 | } |
508 | } |
509 | |
510 | static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) |
511 | { |
512 | char fw_name[40]; |
513 | char ucode_prefix[30]; |
514 | int err; |
515 | const struct rlc_firmware_header_v2_0 *rlc_hdr; |
516 | uint16_t version_major; |
517 | uint16_t version_minor; |
518 | |
519 | DRM_DEBUG("\n" ); |
520 | |
521 | amdgpu_ucode_ip_version_decode(adev, block_type: GC_HWIP, ucode_prefix, len: sizeof(ucode_prefix)); |
522 | |
523 | snprintf(buf: fw_name, size: sizeof(fw_name), fmt: "amdgpu/%s_pfp.bin" , ucode_prefix); |
524 | err = amdgpu_ucode_request(adev, fw: &adev->gfx.pfp_fw, fw_name); |
525 | if (err) |
526 | goto out; |
527 | /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ |
528 | adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( |
529 | hdr: (union amdgpu_firmware_header *) |
530 | adev->gfx.pfp_fw->data, hdr_major: 2, hdr_minor: 0); |
531 | if (adev->gfx.rs64_enable) { |
532 | dev_info(adev->dev, "CP RS64 enable\n" ); |
533 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_PFP); |
534 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); |
535 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); |
536 | } else { |
537 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_PFP); |
538 | } |
539 | |
540 | snprintf(buf: fw_name, size: sizeof(fw_name), fmt: "amdgpu/%s_me.bin" , ucode_prefix); |
541 | err = amdgpu_ucode_request(adev, fw: &adev->gfx.me_fw, fw_name); |
542 | if (err) |
543 | goto out; |
544 | if (adev->gfx.rs64_enable) { |
545 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_ME); |
546 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); |
547 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); |
548 | } else { |
549 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_ME); |
550 | } |
551 | |
552 | if (!amdgpu_sriov_vf(adev)) { |
553 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(11, 0, 0) && |
554 | adev->pdev->revision == 0xCE) |
555 | snprintf(buf: fw_name, size: sizeof(fw_name), fmt: "amdgpu/gc_11_0_0_rlc_1.bin" ); |
556 | else |
557 | snprintf(buf: fw_name, size: sizeof(fw_name), fmt: "amdgpu/%s_rlc.bin" , ucode_prefix); |
558 | err = amdgpu_ucode_request(adev, fw: &adev->gfx.rlc_fw, fw_name); |
559 | if (err) |
560 | goto out; |
561 | rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; |
562 | version_major = le16_to_cpu(rlc_hdr->header.header_version_major); |
563 | version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); |
564 | err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); |
565 | if (err) |
566 | goto out; |
567 | } |
568 | |
569 | snprintf(buf: fw_name, size: sizeof(fw_name), fmt: "amdgpu/%s_mec.bin" , ucode_prefix); |
570 | err = amdgpu_ucode_request(adev, fw: &adev->gfx.mec_fw, fw_name); |
571 | if (err) |
572 | goto out; |
573 | if (adev->gfx.rs64_enable) { |
574 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_MEC); |
575 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); |
576 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); |
577 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); |
578 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); |
579 | } else { |
580 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_MEC1); |
581 | amdgpu_gfx_cp_init_microcode(adev, ucode_id: AMDGPU_UCODE_ID_CP_MEC1_JT); |
582 | } |
583 | |
584 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) |
585 | err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); |
586 | |
587 | /* only one MEC for gfx 11.0.0. */ |
588 | adev->gfx.mec2_fw = NULL; |
589 | |
590 | gfx_v11_0_check_fw_cp_gfx_shadow(adev); |
591 | |
592 | if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { |
593 | err = adev->gfx.imu.funcs->init_microcode(adev); |
594 | if (err) |
595 | DRM_ERROR("Failed to init imu firmware!\n" ); |
596 | return err; |
597 | } |
598 | |
599 | out: |
600 | if (err) { |
601 | amdgpu_ucode_release(fw: &adev->gfx.pfp_fw); |
602 | amdgpu_ucode_release(fw: &adev->gfx.me_fw); |
603 | amdgpu_ucode_release(fw: &adev->gfx.rlc_fw); |
604 | amdgpu_ucode_release(fw: &adev->gfx.mec_fw); |
605 | } |
606 | |
607 | return err; |
608 | } |
609 | |
610 | static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) |
611 | { |
612 | u32 count = 0; |
613 | const struct cs_section_def *sect = NULL; |
614 | const struct cs_extent_def *ext = NULL; |
615 | |
616 | /* begin clear state */ |
617 | count += 2; |
618 | /* context control state */ |
619 | count += 3; |
620 | |
621 | for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { |
622 | for (ext = sect->section; ext->extent != NULL; ++ext) { |
623 | if (sect->id == SECT_CONTEXT) |
624 | count += 2 + ext->reg_count; |
625 | else |
626 | return 0; |
627 | } |
628 | } |
629 | |
630 | /* set PA_SC_TILE_STEERING_OVERRIDE */ |
631 | count += 3; |
632 | /* end clear state */ |
633 | count += 2; |
634 | /* clear state */ |
635 | count += 2; |
636 | |
637 | return count; |
638 | } |
639 | |
640 | static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, |
641 | volatile u32 *buffer) |
642 | { |
643 | u32 count = 0, i; |
644 | const struct cs_section_def *sect = NULL; |
645 | const struct cs_extent_def *ext = NULL; |
646 | int ctx_reg_offset; |
647 | |
648 | if (adev->gfx.rlc.cs_data == NULL) |
649 | return; |
650 | if (buffer == NULL) |
651 | return; |
652 | |
653 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
654 | buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); |
655 | |
656 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); |
657 | buffer[count++] = cpu_to_le32(0x80000000); |
658 | buffer[count++] = cpu_to_le32(0x80000000); |
659 | |
660 | for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { |
661 | for (ext = sect->section; ext->extent != NULL; ++ext) { |
662 | if (sect->id == SECT_CONTEXT) { |
663 | buffer[count++] = |
664 | cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); |
665 | buffer[count++] = cpu_to_le32(ext->reg_index - |
666 | PACKET3_SET_CONTEXT_REG_START); |
667 | for (i = 0; i < ext->reg_count; i++) |
668 | buffer[count++] = cpu_to_le32(ext->extent[i]); |
669 | } else { |
670 | return; |
671 | } |
672 | } |
673 | } |
674 | |
675 | ctx_reg_offset = |
676 | SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; |
677 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); |
678 | buffer[count++] = cpu_to_le32(ctx_reg_offset); |
679 | buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); |
680 | |
681 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
682 | buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); |
683 | |
684 | buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); |
685 | buffer[count++] = cpu_to_le32(0); |
686 | } |
687 | |
688 | static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) |
689 | { |
690 | /* clear state block */ |
691 | amdgpu_bo_free_kernel(bo: &adev->gfx.rlc.clear_state_obj, |
692 | gpu_addr: &adev->gfx.rlc.clear_state_gpu_addr, |
693 | cpu_addr: (void **)&adev->gfx.rlc.cs_ptr); |
694 | |
695 | /* jump table block */ |
696 | amdgpu_bo_free_kernel(bo: &adev->gfx.rlc.cp_table_obj, |
697 | gpu_addr: &adev->gfx.rlc.cp_table_gpu_addr, |
698 | cpu_addr: (void **)&adev->gfx.rlc.cp_table_ptr); |
699 | } |
700 | |
701 | static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) |
702 | { |
703 | struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; |
704 | |
705 | reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; |
706 | reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); |
707 | reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); |
708 | reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); |
709 | reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); |
710 | reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); |
711 | reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); |
712 | reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); |
713 | adev->gfx.rlc.rlcg_reg_access_supported = true; |
714 | } |
715 | |
716 | static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) |
717 | { |
718 | const struct cs_section_def *cs_data; |
719 | int r; |
720 | |
721 | adev->gfx.rlc.cs_data = gfx11_cs_data; |
722 | |
723 | cs_data = adev->gfx.rlc.cs_data; |
724 | |
725 | if (cs_data) { |
726 | /* init clear state block */ |
727 | r = amdgpu_gfx_rlc_init_csb(adev); |
728 | if (r) |
729 | return r; |
730 | } |
731 | |
732 | /* init spm vmid with 0xf */ |
733 | if (adev->gfx.rlc.funcs->update_spm_vmid) |
734 | adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); |
735 | |
736 | return 0; |
737 | } |
738 | |
739 | static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) |
740 | { |
741 | amdgpu_bo_free_kernel(bo: &adev->gfx.mec.hpd_eop_obj, NULL, NULL); |
742 | amdgpu_bo_free_kernel(bo: &adev->gfx.mec.mec_fw_obj, NULL, NULL); |
743 | amdgpu_bo_free_kernel(bo: &adev->gfx.mec.mec_fw_data_obj, NULL, NULL); |
744 | } |
745 | |
746 | static void gfx_v11_0_me_init(struct amdgpu_device *adev) |
747 | { |
748 | bitmap_zero(dst: adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); |
749 | |
750 | amdgpu_gfx_graphics_queue_acquire(adev); |
751 | } |
752 | |
753 | static int gfx_v11_0_mec_init(struct amdgpu_device *adev) |
754 | { |
755 | int r; |
756 | u32 *hpd; |
757 | size_t mec_hpd_size; |
758 | |
759 | bitmap_zero(dst: adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
760 | |
761 | /* take ownership of the relevant compute queues */ |
762 | amdgpu_gfx_compute_queue_acquire(adev); |
763 | mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; |
764 | |
765 | if (mec_hpd_size) { |
766 | r = amdgpu_bo_create_reserved(adev, size: mec_hpd_size, PAGE_SIZE, |
767 | AMDGPU_GEM_DOMAIN_GTT, |
768 | bo_ptr: &adev->gfx.mec.hpd_eop_obj, |
769 | gpu_addr: &adev->gfx.mec.hpd_eop_gpu_addr, |
770 | cpu_addr: (void **)&hpd); |
771 | if (r) { |
772 | dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n" , r); |
773 | gfx_v11_0_mec_fini(adev); |
774 | return r; |
775 | } |
776 | |
777 | memset(hpd, 0, mec_hpd_size); |
778 | |
779 | amdgpu_bo_kunmap(bo: adev->gfx.mec.hpd_eop_obj); |
780 | amdgpu_bo_unreserve(bo: adev->gfx.mec.hpd_eop_obj); |
781 | } |
782 | |
783 | return 0; |
784 | } |
785 | |
786 | static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) |
787 | { |
788 | WREG32_SOC15(GC, 0, regSQ_IND_INDEX, |
789 | (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | |
790 | (address << SQ_IND_INDEX__INDEX__SHIFT)); |
791 | return RREG32_SOC15(GC, 0, regSQ_IND_DATA); |
792 | } |
793 | |
794 | static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, |
795 | uint32_t thread, uint32_t regno, |
796 | uint32_t num, uint32_t *out) |
797 | { |
798 | WREG32_SOC15(GC, 0, regSQ_IND_INDEX, |
799 | (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | |
800 | (regno << SQ_IND_INDEX__INDEX__SHIFT) | |
801 | (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | |
802 | (SQ_IND_INDEX__AUTO_INCR_MASK)); |
803 | while (num--) |
804 | *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); |
805 | } |
806 | |
807 | static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) |
808 | { |
809 | /* in gfx11 the SIMD_ID is specified as part of the INSTANCE |
810 | * field when performing a select_se_sh so it should be |
811 | * zero here */ |
812 | WARN_ON(simd != 0); |
813 | |
814 | /* type 3 wave data */ |
815 | dst[(*no_fields)++] = 3; |
816 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); |
817 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); |
818 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); |
819 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); |
820 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); |
821 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); |
822 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); |
823 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); |
824 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); |
825 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); |
826 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); |
827 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); |
828 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); |
829 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); |
830 | dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); |
831 | } |
832 | |
833 | static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, |
834 | uint32_t wave, uint32_t start, |
835 | uint32_t size, uint32_t *dst) |
836 | { |
837 | WARN_ON(simd != 0); |
838 | |
839 | wave_read_regs( |
840 | adev, wave, thread: 0, regno: start + SQIND_WAVE_SGPRS_OFFSET, num: size, |
841 | out: dst); |
842 | } |
843 | |
844 | static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, |
845 | uint32_t wave, uint32_t thread, |
846 | uint32_t start, uint32_t size, |
847 | uint32_t *dst) |
848 | { |
849 | wave_read_regs( |
850 | adev, wave, thread, |
851 | regno: start + SQIND_WAVE_VGPRS_OFFSET, num: size, out: dst); |
852 | } |
853 | |
854 | static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, |
855 | u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) |
856 | { |
857 | soc21_grbm_select(adev, me, pipe, queue: q, vmid: vm); |
858 | } |
859 | |
860 | /* all sizes are in bytes */ |
861 | #define MQD_SHADOW_BASE_SIZE 73728 |
862 | #define MQD_SHADOW_BASE_ALIGNMENT 256 |
863 | #define MQD_FWWORKAREA_SIZE 484 |
864 | #define MQD_FWWORKAREA_ALIGNMENT 256 |
865 | |
866 | static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, |
867 | struct amdgpu_gfx_shadow_info *shadow_info) |
868 | { |
869 | if (adev->gfx.cp_gfx_shadow) { |
870 | shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; |
871 | shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; |
872 | shadow_info->csa_size = MQD_FWWORKAREA_SIZE; |
873 | shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; |
874 | return 0; |
875 | } else { |
876 | memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); |
877 | return -ENOTSUPP; |
878 | } |
879 | } |
880 | |
881 | static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { |
882 | .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, |
883 | .select_se_sh = &gfx_v11_0_select_se_sh, |
884 | .read_wave_data = &gfx_v11_0_read_wave_data, |
885 | .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, |
886 | .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, |
887 | .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, |
888 | .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, |
889 | .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, |
890 | }; |
891 | |
892 | static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) |
893 | { |
894 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
895 | case IP_VERSION(11, 0, 0): |
896 | case IP_VERSION(11, 0, 2): |
897 | adev->gfx.config.max_hw_contexts = 8; |
898 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; |
899 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; |
900 | adev->gfx.config.sc_hiz_tile_fifo_size = 0; |
901 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; |
902 | break; |
903 | case IP_VERSION(11, 0, 3): |
904 | adev->gfx.ras = &gfx_v11_0_3_ras; |
905 | adev->gfx.config.max_hw_contexts = 8; |
906 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; |
907 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; |
908 | adev->gfx.config.sc_hiz_tile_fifo_size = 0; |
909 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; |
910 | break; |
911 | case IP_VERSION(11, 0, 1): |
912 | case IP_VERSION(11, 0, 4): |
913 | case IP_VERSION(11, 5, 0): |
914 | case IP_VERSION(11, 5, 1): |
915 | adev->gfx.config.max_hw_contexts = 8; |
916 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; |
917 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; |
918 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; |
919 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; |
920 | break; |
921 | default: |
922 | BUG(); |
923 | break; |
924 | } |
925 | |
926 | return 0; |
927 | } |
928 | |
929 | static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, |
930 | int me, int pipe, int queue) |
931 | { |
932 | int r; |
933 | struct amdgpu_ring *ring; |
934 | unsigned int irq_type; |
935 | |
936 | ring = &adev->gfx.gfx_ring[ring_id]; |
937 | |
938 | ring->me = me; |
939 | ring->pipe = pipe; |
940 | ring->queue = queue; |
941 | |
942 | ring->ring_obj = NULL; |
943 | ring->use_doorbell = true; |
944 | |
945 | if (!ring_id) |
946 | ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; |
947 | else |
948 | ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; |
949 | ring->vm_hub = AMDGPU_GFXHUB(0); |
950 | sprintf(buf: ring->name, fmt: "gfx_%d.%d.%d" , ring->me, ring->pipe, ring->queue); |
951 | |
952 | irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; |
953 | r = amdgpu_ring_init(adev, ring, max_dw: 1024, irq_src: &adev->gfx.eop_irq, irq_type, |
954 | hw_prio: AMDGPU_RING_PRIO_DEFAULT, NULL); |
955 | if (r) |
956 | return r; |
957 | return 0; |
958 | } |
959 | |
960 | static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, |
961 | int mec, int pipe, int queue) |
962 | { |
963 | int r; |
964 | unsigned irq_type; |
965 | struct amdgpu_ring *ring; |
966 | unsigned int hw_prio; |
967 | |
968 | ring = &adev->gfx.compute_ring[ring_id]; |
969 | |
970 | /* mec0 is me1 */ |
971 | ring->me = mec + 1; |
972 | ring->pipe = pipe; |
973 | ring->queue = queue; |
974 | |
975 | ring->ring_obj = NULL; |
976 | ring->use_doorbell = true; |
977 | ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; |
978 | ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr |
979 | + (ring_id * GFX11_MEC_HPD_SIZE); |
980 | ring->vm_hub = AMDGPU_GFXHUB(0); |
981 | sprintf(buf: ring->name, fmt: "comp_%d.%d.%d" , ring->me, ring->pipe, ring->queue); |
982 | |
983 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP |
984 | + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) |
985 | + ring->pipe; |
986 | hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? |
987 | AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; |
988 | /* type-2 packets are deprecated on MEC, use type-3 instead */ |
989 | r = amdgpu_ring_init(adev, ring, max_dw: 1024, irq_src: &adev->gfx.eop_irq, irq_type, |
990 | hw_prio, NULL); |
991 | if (r) |
992 | return r; |
993 | |
994 | return 0; |
995 | } |
996 | |
997 | static struct { |
998 | SOC21_FIRMWARE_ID id; |
999 | unsigned int offset; |
1000 | unsigned int size; |
1001 | } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; |
1002 | |
1003 | static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) |
1004 | { |
1005 | RLC_TABLE_OF_CONTENT *ucode = rlc_toc; |
1006 | |
1007 | while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && |
1008 | (ucode->id < SOC21_FIRMWARE_ID_MAX)) { |
1009 | rlc_autoload_info[ucode->id].id = ucode->id; |
1010 | rlc_autoload_info[ucode->id].offset = ucode->offset * 4; |
1011 | rlc_autoload_info[ucode->id].size = ucode->size * 4; |
1012 | |
1013 | ucode++; |
1014 | } |
1015 | } |
1016 | |
1017 | static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) |
1018 | { |
1019 | uint32_t total_size = 0; |
1020 | SOC21_FIRMWARE_ID id; |
1021 | |
1022 | gfx_v11_0_parse_rlc_toc(adev, rlc_toc: adev->psp.toc.start_addr); |
1023 | |
1024 | for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) |
1025 | total_size += rlc_autoload_info[id].size; |
1026 | |
1027 | /* In case the offset in rlc toc ucode is aligned */ |
1028 | if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) |
1029 | total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + |
1030 | rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; |
1031 | |
1032 | return total_size; |
1033 | } |
1034 | |
1035 | static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) |
1036 | { |
1037 | int r; |
1038 | uint32_t total_size; |
1039 | |
1040 | total_size = gfx_v11_0_calc_toc_total_size(adev); |
1041 | |
1042 | r = amdgpu_bo_create_reserved(adev, size: total_size, align: 64 * 1024, |
1043 | AMDGPU_GEM_DOMAIN_VRAM | |
1044 | AMDGPU_GEM_DOMAIN_GTT, |
1045 | bo_ptr: &adev->gfx.rlc.rlc_autoload_bo, |
1046 | gpu_addr: &adev->gfx.rlc.rlc_autoload_gpu_addr, |
1047 | cpu_addr: (void **)&adev->gfx.rlc.rlc_autoload_ptr); |
1048 | |
1049 | if (r) { |
1050 | dev_err(adev->dev, "(%d) failed to create fw autoload bo\n" , r); |
1051 | return r; |
1052 | } |
1053 | |
1054 | return 0; |
1055 | } |
1056 | |
1057 | static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, |
1058 | SOC21_FIRMWARE_ID id, |
1059 | const void *fw_data, |
1060 | uint32_t fw_size, |
1061 | uint32_t *fw_autoload_mask) |
1062 | { |
1063 | uint32_t toc_offset; |
1064 | uint32_t toc_fw_size; |
1065 | char *ptr = adev->gfx.rlc.rlc_autoload_ptr; |
1066 | |
1067 | if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) |
1068 | return; |
1069 | |
1070 | toc_offset = rlc_autoload_info[id].offset; |
1071 | toc_fw_size = rlc_autoload_info[id].size; |
1072 | |
1073 | if (fw_size == 0) |
1074 | fw_size = toc_fw_size; |
1075 | |
1076 | if (fw_size > toc_fw_size) |
1077 | fw_size = toc_fw_size; |
1078 | |
1079 | memcpy(ptr + toc_offset, fw_data, fw_size); |
1080 | |
1081 | if (fw_size < toc_fw_size) |
1082 | memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); |
1083 | |
1084 | if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) |
1085 | *(uint64_t *)fw_autoload_mask |= 1ULL << id; |
1086 | } |
1087 | |
1088 | static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, |
1089 | uint32_t *fw_autoload_mask) |
1090 | { |
1091 | void *data; |
1092 | uint32_t size; |
1093 | uint64_t *toc_ptr; |
1094 | |
1095 | *(uint64_t *)fw_autoload_mask |= 0x1; |
1096 | |
1097 | DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n" , *(uint64_t *)fw_autoload_mask); |
1098 | |
1099 | data = adev->psp.toc.start_addr; |
1100 | size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; |
1101 | |
1102 | toc_ptr = (uint64_t *)data + size / 8 - 1; |
1103 | *toc_ptr = *(uint64_t *)fw_autoload_mask; |
1104 | |
1105 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RLC_TOC, |
1106 | fw_data: data, fw_size: size, fw_autoload_mask); |
1107 | } |
1108 | |
1109 | static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, |
1110 | uint32_t *fw_autoload_mask) |
1111 | { |
1112 | const __le32 *fw_data; |
1113 | uint32_t fw_size; |
1114 | const struct gfx_firmware_header_v1_0 *cp_hdr; |
1115 | const struct gfx_firmware_header_v2_0 *cpv2_hdr; |
1116 | const struct rlc_firmware_header_v2_0 *rlc_hdr; |
1117 | const struct rlc_firmware_header_v2_2 *rlcv22_hdr; |
1118 | uint16_t version_major, version_minor; |
1119 | |
1120 | if (adev->gfx.rs64_enable) { |
1121 | /* pfp ucode */ |
1122 | cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) |
1123 | adev->gfx.pfp_fw->data; |
1124 | /* instruction */ |
1125 | fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + |
1126 | le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); |
1127 | fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); |
1128 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_PFP, |
1129 | fw_data, fw_size, fw_autoload_mask); |
1130 | /* data */ |
1131 | fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + |
1132 | le32_to_cpu(cpv2_hdr->data_offset_bytes)); |
1133 | fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); |
1134 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, |
1135 | fw_data, fw_size, fw_autoload_mask); |
1136 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, |
1137 | fw_data, fw_size, fw_autoload_mask); |
1138 | /* me ucode */ |
1139 | cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) |
1140 | adev->gfx.me_fw->data; |
1141 | /* instruction */ |
1142 | fw_data = (const __le32 *)(adev->gfx.me_fw->data + |
1143 | le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); |
1144 | fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); |
1145 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_ME, |
1146 | fw_data, fw_size, fw_autoload_mask); |
1147 | /* data */ |
1148 | fw_data = (const __le32 *)(adev->gfx.me_fw->data + |
1149 | le32_to_cpu(cpv2_hdr->data_offset_bytes)); |
1150 | fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); |
1151 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, |
1152 | fw_data, fw_size, fw_autoload_mask); |
1153 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, |
1154 | fw_data, fw_size, fw_autoload_mask); |
1155 | /* mec ucode */ |
1156 | cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) |
1157 | adev->gfx.mec_fw->data; |
1158 | /* instruction */ |
1159 | fw_data = (const __le32 *) (adev->gfx.mec_fw->data + |
1160 | le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); |
1161 | fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); |
1162 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_MEC, |
1163 | fw_data, fw_size, fw_autoload_mask); |
1164 | /* data */ |
1165 | fw_data = (const __le32 *) (adev->gfx.mec_fw->data + |
1166 | le32_to_cpu(cpv2_hdr->data_offset_bytes)); |
1167 | fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); |
1168 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, |
1169 | fw_data, fw_size, fw_autoload_mask); |
1170 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, |
1171 | fw_data, fw_size, fw_autoload_mask); |
1172 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, |
1173 | fw_data, fw_size, fw_autoload_mask); |
1174 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, |
1175 | fw_data, fw_size, fw_autoload_mask); |
1176 | } else { |
1177 | /* pfp ucode */ |
1178 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) |
1179 | adev->gfx.pfp_fw->data; |
1180 | fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + |
1181 | le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); |
1182 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); |
1183 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_CP_PFP, |
1184 | fw_data, fw_size, fw_autoload_mask); |
1185 | |
1186 | /* me ucode */ |
1187 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) |
1188 | adev->gfx.me_fw->data; |
1189 | fw_data = (const __le32 *)(adev->gfx.me_fw->data + |
1190 | le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); |
1191 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); |
1192 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_CP_ME, |
1193 | fw_data, fw_size, fw_autoload_mask); |
1194 | |
1195 | /* mec ucode */ |
1196 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) |
1197 | adev->gfx.mec_fw->data; |
1198 | fw_data = (const __le32 *) (adev->gfx.mec_fw->data + |
1199 | le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); |
1200 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - |
1201 | cp_hdr->jt_size * 4; |
1202 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_CP_MEC, |
1203 | fw_data, fw_size, fw_autoload_mask); |
1204 | } |
1205 | |
1206 | /* rlc ucode */ |
1207 | rlc_hdr = (const struct rlc_firmware_header_v2_0 *) |
1208 | adev->gfx.rlc_fw->data; |
1209 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1210 | le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); |
1211 | fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); |
1212 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RLC_G_UCODE, |
1213 | fw_data, fw_size, fw_autoload_mask); |
1214 | |
1215 | version_major = le16_to_cpu(rlc_hdr->header.header_version_major); |
1216 | version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); |
1217 | if (version_major == 2) { |
1218 | if (version_minor >= 2) { |
1219 | rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; |
1220 | |
1221 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1222 | le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); |
1223 | fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); |
1224 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RLX6_UCODE, |
1225 | fw_data, fw_size, fw_autoload_mask); |
1226 | |
1227 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1228 | le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); |
1229 | fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); |
1230 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, id: SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, |
1231 | fw_data, fw_size, fw_autoload_mask); |
1232 | } |
1233 | } |
1234 | } |
1235 | |
1236 | static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, |
1237 | uint32_t *fw_autoload_mask) |
1238 | { |
1239 | const __le32 *fw_data; |
1240 | uint32_t fw_size; |
1241 | const struct sdma_firmware_header_v2_0 *sdma_hdr; |
1242 | |
1243 | sdma_hdr = (const struct sdma_firmware_header_v2_0 *) |
1244 | adev->sdma.instance[0].fw->data; |
1245 | fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + |
1246 | le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); |
1247 | fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); |
1248 | |
1249 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, |
1250 | id: SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); |
1251 | |
1252 | fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + |
1253 | le32_to_cpu(sdma_hdr->ctl_ucode_offset)); |
1254 | fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); |
1255 | |
1256 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, |
1257 | id: SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); |
1258 | } |
1259 | |
1260 | static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, |
1261 | uint32_t *fw_autoload_mask) |
1262 | { |
1263 | const __le32 *fw_data; |
1264 | unsigned fw_size; |
1265 | const struct mes_firmware_header_v1_0 *mes_hdr; |
1266 | int pipe, ucode_id, data_id; |
1267 | |
1268 | for (pipe = 0; pipe < 2; pipe++) { |
1269 | if (pipe==0) { |
1270 | ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; |
1271 | data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; |
1272 | } else { |
1273 | ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; |
1274 | data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; |
1275 | } |
1276 | |
1277 | mes_hdr = (const struct mes_firmware_header_v1_0 *) |
1278 | adev->mes.fw[pipe]->data; |
1279 | |
1280 | fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + |
1281 | le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); |
1282 | fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); |
1283 | |
1284 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, |
1285 | id: ucode_id, fw_data, fw_size, fw_autoload_mask); |
1286 | |
1287 | fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + |
1288 | le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); |
1289 | fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); |
1290 | |
1291 | gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, |
1292 | id: data_id, fw_data, fw_size, fw_autoload_mask); |
1293 | } |
1294 | } |
1295 | |
1296 | static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) |
1297 | { |
1298 | uint32_t rlc_g_offset, rlc_g_size; |
1299 | uint64_t gpu_addr; |
1300 | uint32_t autoload_fw_id[2]; |
1301 | |
1302 | memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); |
1303 | |
1304 | /* RLC autoload sequence 2: copy ucode */ |
1305 | gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, fw_autoload_mask: autoload_fw_id); |
1306 | gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, fw_autoload_mask: autoload_fw_id); |
1307 | gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, fw_autoload_mask: autoload_fw_id); |
1308 | gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, fw_autoload_mask: autoload_fw_id); |
1309 | |
1310 | rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; |
1311 | rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; |
1312 | gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; |
1313 | |
1314 | WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); |
1315 | WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); |
1316 | |
1317 | WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); |
1318 | |
1319 | /* RLC autoload sequence 3: load IMU fw */ |
1320 | if (adev->gfx.imu.funcs->load_microcode) |
1321 | adev->gfx.imu.funcs->load_microcode(adev); |
1322 | /* RLC autoload sequence 4 init IMU fw */ |
1323 | if (adev->gfx.imu.funcs->setup_imu) |
1324 | adev->gfx.imu.funcs->setup_imu(adev); |
1325 | if (adev->gfx.imu.funcs->start_imu) |
1326 | adev->gfx.imu.funcs->start_imu(adev); |
1327 | |
1328 | /* RLC autoload sequence 5 disable gpa mode */ |
1329 | gfx_v11_0_disable_gpa_mode(adev); |
1330 | |
1331 | return 0; |
1332 | } |
1333 | |
1334 | static int gfx_v11_0_sw_init(void *handle) |
1335 | { |
1336 | int i, j, k, r, ring_id = 0; |
1337 | int xcc_id = 0; |
1338 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1339 | |
1340 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
1341 | case IP_VERSION(11, 0, 0): |
1342 | case IP_VERSION(11, 0, 2): |
1343 | case IP_VERSION(11, 0, 3): |
1344 | adev->gfx.me.num_me = 1; |
1345 | adev->gfx.me.num_pipe_per_me = 1; |
1346 | adev->gfx.me.num_queue_per_pipe = 1; |
1347 | adev->gfx.mec.num_mec = 2; |
1348 | adev->gfx.mec.num_pipe_per_mec = 4; |
1349 | adev->gfx.mec.num_queue_per_pipe = 4; |
1350 | break; |
1351 | case IP_VERSION(11, 0, 1): |
1352 | case IP_VERSION(11, 0, 4): |
1353 | case IP_VERSION(11, 5, 0): |
1354 | case IP_VERSION(11, 5, 1): |
1355 | adev->gfx.me.num_me = 1; |
1356 | adev->gfx.me.num_pipe_per_me = 1; |
1357 | adev->gfx.me.num_queue_per_pipe = 1; |
1358 | adev->gfx.mec.num_mec = 1; |
1359 | adev->gfx.mec.num_pipe_per_mec = 4; |
1360 | adev->gfx.mec.num_queue_per_pipe = 4; |
1361 | break; |
1362 | default: |
1363 | adev->gfx.me.num_me = 1; |
1364 | adev->gfx.me.num_pipe_per_me = 1; |
1365 | adev->gfx.me.num_queue_per_pipe = 1; |
1366 | adev->gfx.mec.num_mec = 1; |
1367 | adev->gfx.mec.num_pipe_per_mec = 4; |
1368 | adev->gfx.mec.num_queue_per_pipe = 8; |
1369 | break; |
1370 | } |
1371 | |
1372 | /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ |
1373 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(11, 0, 3) && |
1374 | amdgpu_sriov_is_pp_one_vf(adev)) |
1375 | adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; |
1376 | |
1377 | /* EOP Event */ |
1378 | r = amdgpu_irq_add_id(adev, client_id: SOC21_IH_CLIENTID_GRBM_CP, |
1379 | GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, |
1380 | source: &adev->gfx.eop_irq); |
1381 | if (r) |
1382 | return r; |
1383 | |
1384 | /* Privileged reg */ |
1385 | r = amdgpu_irq_add_id(adev, client_id: SOC21_IH_CLIENTID_GRBM_CP, |
1386 | GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, |
1387 | source: &adev->gfx.priv_reg_irq); |
1388 | if (r) |
1389 | return r; |
1390 | |
1391 | /* Privileged inst */ |
1392 | r = amdgpu_irq_add_id(adev, client_id: SOC21_IH_CLIENTID_GRBM_CP, |
1393 | GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, |
1394 | source: &adev->gfx.priv_inst_irq); |
1395 | if (r) |
1396 | return r; |
1397 | |
1398 | /* FED error */ |
1399 | r = amdgpu_irq_add_id(adev, client_id: SOC21_IH_CLIENTID_GFX, |
1400 | GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, |
1401 | source: &adev->gfx.rlc_gc_fed_irq); |
1402 | if (r) |
1403 | return r; |
1404 | |
1405 | adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; |
1406 | |
1407 | gfx_v11_0_me_init(adev); |
1408 | |
1409 | r = gfx_v11_0_rlc_init(adev); |
1410 | if (r) { |
1411 | DRM_ERROR("Failed to init rlc BOs!\n" ); |
1412 | return r; |
1413 | } |
1414 | |
1415 | r = gfx_v11_0_mec_init(adev); |
1416 | if (r) { |
1417 | DRM_ERROR("Failed to init MEC BOs!\n" ); |
1418 | return r; |
1419 | } |
1420 | |
1421 | /* set up the gfx ring */ |
1422 | for (i = 0; i < adev->gfx.me.num_me; i++) { |
1423 | for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { |
1424 | for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { |
1425 | if (!amdgpu_gfx_is_me_queue_enabled(adev, me: i, pipe: k, queue: j)) |
1426 | continue; |
1427 | |
1428 | r = gfx_v11_0_gfx_ring_init(adev, ring_id, |
1429 | me: i, pipe: k, queue: j); |
1430 | if (r) |
1431 | return r; |
1432 | ring_id++; |
1433 | } |
1434 | } |
1435 | } |
1436 | |
1437 | ring_id = 0; |
1438 | /* set up the compute queues - allocate horizontally across pipes */ |
1439 | for (i = 0; i < adev->gfx.mec.num_mec; ++i) { |
1440 | for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { |
1441 | for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { |
1442 | if (!amdgpu_gfx_is_mec_queue_enabled(adev, xcc_id: 0, mec: i, |
1443 | pipe: k, queue: j)) |
1444 | continue; |
1445 | |
1446 | r = gfx_v11_0_compute_ring_init(adev, ring_id, |
1447 | mec: i, pipe: k, queue: j); |
1448 | if (r) |
1449 | return r; |
1450 | |
1451 | ring_id++; |
1452 | } |
1453 | } |
1454 | } |
1455 | |
1456 | if (!adev->enable_mes_kiq) { |
1457 | r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, xcc_id: 0); |
1458 | if (r) { |
1459 | DRM_ERROR("Failed to init KIQ BOs!\n" ); |
1460 | return r; |
1461 | } |
1462 | |
1463 | r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); |
1464 | if (r) |
1465 | return r; |
1466 | } |
1467 | |
1468 | r = amdgpu_gfx_mqd_sw_init(adev, mqd_size: sizeof(struct v11_compute_mqd), xcc_id: 0); |
1469 | if (r) |
1470 | return r; |
1471 | |
1472 | /* allocate visible FB for rlc auto-loading fw */ |
1473 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { |
1474 | r = gfx_v11_0_rlc_autoload_buffer_init(adev); |
1475 | if (r) |
1476 | return r; |
1477 | } |
1478 | |
1479 | r = gfx_v11_0_gpu_early_init(adev); |
1480 | if (r) |
1481 | return r; |
1482 | |
1483 | if (amdgpu_gfx_ras_sw_init(adev)) { |
1484 | dev_err(adev->dev, "Failed to initialize gfx ras block!\n" ); |
1485 | return -EINVAL; |
1486 | } |
1487 | |
1488 | return 0; |
1489 | } |
1490 | |
1491 | static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) |
1492 | { |
1493 | amdgpu_bo_free_kernel(bo: &adev->gfx.pfp.pfp_fw_obj, |
1494 | gpu_addr: &adev->gfx.pfp.pfp_fw_gpu_addr, |
1495 | cpu_addr: (void **)&adev->gfx.pfp.pfp_fw_ptr); |
1496 | |
1497 | amdgpu_bo_free_kernel(bo: &adev->gfx.pfp.pfp_fw_data_obj, |
1498 | gpu_addr: &adev->gfx.pfp.pfp_fw_data_gpu_addr, |
1499 | cpu_addr: (void **)&adev->gfx.pfp.pfp_fw_data_ptr); |
1500 | } |
1501 | |
1502 | static void gfx_v11_0_me_fini(struct amdgpu_device *adev) |
1503 | { |
1504 | amdgpu_bo_free_kernel(bo: &adev->gfx.me.me_fw_obj, |
1505 | gpu_addr: &adev->gfx.me.me_fw_gpu_addr, |
1506 | cpu_addr: (void **)&adev->gfx.me.me_fw_ptr); |
1507 | |
1508 | amdgpu_bo_free_kernel(bo: &adev->gfx.me.me_fw_data_obj, |
1509 | gpu_addr: &adev->gfx.me.me_fw_data_gpu_addr, |
1510 | cpu_addr: (void **)&adev->gfx.me.me_fw_data_ptr); |
1511 | } |
1512 | |
1513 | static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) |
1514 | { |
1515 | amdgpu_bo_free_kernel(bo: &adev->gfx.rlc.rlc_autoload_bo, |
1516 | gpu_addr: &adev->gfx.rlc.rlc_autoload_gpu_addr, |
1517 | cpu_addr: (void **)&adev->gfx.rlc.rlc_autoload_ptr); |
1518 | } |
1519 | |
1520 | static int gfx_v11_0_sw_fini(void *handle) |
1521 | { |
1522 | int i; |
1523 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1524 | |
1525 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
1526 | amdgpu_ring_fini(ring: &adev->gfx.gfx_ring[i]); |
1527 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
1528 | amdgpu_ring_fini(ring: &adev->gfx.compute_ring[i]); |
1529 | |
1530 | amdgpu_gfx_mqd_sw_fini(adev, xcc_id: 0); |
1531 | |
1532 | if (!adev->enable_mes_kiq) { |
1533 | amdgpu_gfx_kiq_free_ring(ring: &adev->gfx.kiq[0].ring); |
1534 | amdgpu_gfx_kiq_fini(adev, xcc_id: 0); |
1535 | } |
1536 | |
1537 | gfx_v11_0_pfp_fini(adev); |
1538 | gfx_v11_0_me_fini(adev); |
1539 | gfx_v11_0_rlc_fini(adev); |
1540 | gfx_v11_0_mec_fini(adev); |
1541 | |
1542 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) |
1543 | gfx_v11_0_rlc_autoload_buffer_fini(adev); |
1544 | |
1545 | gfx_v11_0_free_microcode(adev); |
1546 | |
1547 | return 0; |
1548 | } |
1549 | |
1550 | static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, |
1551 | u32 sh_num, u32 instance, int xcc_id) |
1552 | { |
1553 | u32 data; |
1554 | |
1555 | if (instance == 0xffffffff) |
1556 | data = REG_SET_FIELD(0, GRBM_GFX_INDEX, |
1557 | INSTANCE_BROADCAST_WRITES, 1); |
1558 | else |
1559 | data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, |
1560 | instance); |
1561 | |
1562 | if (se_num == 0xffffffff) |
1563 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, |
1564 | 1); |
1565 | else |
1566 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); |
1567 | |
1568 | if (sh_num == 0xffffffff) |
1569 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, |
1570 | 1); |
1571 | else |
1572 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); |
1573 | |
1574 | WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); |
1575 | } |
1576 | |
1577 | static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) |
1578 | { |
1579 | u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; |
1580 | |
1581 | gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); |
1582 | gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, |
1583 | CC_GC_SA_UNIT_DISABLE, |
1584 | SA_DISABLE); |
1585 | gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); |
1586 | gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, |
1587 | GC_USER_SA_UNIT_DISABLE, |
1588 | SA_DISABLE); |
1589 | sa_mask = amdgpu_gfx_create_bitmask(bit_width: adev->gfx.config.max_sh_per_se * |
1590 | adev->gfx.config.max_shader_engines); |
1591 | |
1592 | return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); |
1593 | } |
1594 | |
1595 | static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) |
1596 | { |
1597 | u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; |
1598 | u32 rb_mask; |
1599 | |
1600 | gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); |
1601 | gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, |
1602 | CC_RB_BACKEND_DISABLE, |
1603 | BACKEND_DISABLE); |
1604 | gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); |
1605 | gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, |
1606 | GC_USER_RB_BACKEND_DISABLE, |
1607 | BACKEND_DISABLE); |
1608 | rb_mask = amdgpu_gfx_create_bitmask(bit_width: adev->gfx.config.max_backends_per_se * |
1609 | adev->gfx.config.max_shader_engines); |
1610 | |
1611 | return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); |
1612 | } |
1613 | |
1614 | static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) |
1615 | { |
1616 | u32 rb_bitmap_width_per_sa; |
1617 | u32 max_sa; |
1618 | u32 active_sa_bitmap; |
1619 | u32 global_active_rb_bitmap; |
1620 | u32 active_rb_bitmap = 0; |
1621 | u32 i; |
1622 | |
1623 | /* query sa bitmap from SA_UNIT_DISABLE registers */ |
1624 | active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); |
1625 | /* query rb bitmap from RB_BACKEND_DISABLE registers */ |
1626 | global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); |
1627 | |
1628 | /* generate active rb bitmap according to active sa bitmap */ |
1629 | max_sa = adev->gfx.config.max_shader_engines * |
1630 | adev->gfx.config.max_sh_per_se; |
1631 | rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / |
1632 | adev->gfx.config.max_sh_per_se; |
1633 | for (i = 0; i < max_sa; i++) { |
1634 | if (active_sa_bitmap & (1 << i)) |
1635 | active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); |
1636 | } |
1637 | |
1638 | active_rb_bitmap &= global_active_rb_bitmap; |
1639 | adev->gfx.config.backend_enable_mask = active_rb_bitmap; |
1640 | adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); |
1641 | } |
1642 | |
1643 | #define DEFAULT_SH_MEM_BASES (0x6000) |
1644 | #define LDS_APP_BASE 0x1 |
1645 | #define SCRATCH_APP_BASE 0x2 |
1646 | |
1647 | static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) |
1648 | { |
1649 | int i; |
1650 | uint32_t sh_mem_bases; |
1651 | uint32_t data; |
1652 | |
1653 | /* |
1654 | * Configure apertures: |
1655 | * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) |
1656 | * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) |
1657 | * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) |
1658 | */ |
1659 | sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | |
1660 | SCRATCH_APP_BASE; |
1661 | |
1662 | mutex_lock(&adev->srbm_mutex); |
1663 | for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { |
1664 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: i); |
1665 | /* CP and shaders */ |
1666 | WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); |
1667 | WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); |
1668 | |
1669 | /* Enable trap for each kfd vmid. */ |
1670 | data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); |
1671 | data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); |
1672 | WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); |
1673 | } |
1674 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
1675 | mutex_unlock(lock: &adev->srbm_mutex); |
1676 | |
1677 | /* Initialize all compute VMIDs to have no GDS, GWS, or OA |
1678 | acccess. These should be enabled by FW for target VMIDs. */ |
1679 | for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { |
1680 | WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); |
1681 | WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); |
1682 | WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); |
1683 | WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); |
1684 | } |
1685 | } |
1686 | |
1687 | static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) |
1688 | { |
1689 | int vmid; |
1690 | |
1691 | /* |
1692 | * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA |
1693 | * access. Compute VMIDs should be enabled by FW for target VMIDs, |
1694 | * the driver can enable them for graphics. VMID0 should maintain |
1695 | * access so that HWS firmware can save/restore entries. |
1696 | */ |
1697 | for (vmid = 1; vmid < 16; vmid++) { |
1698 | WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); |
1699 | WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); |
1700 | WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); |
1701 | WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); |
1702 | } |
1703 | } |
1704 | |
1705 | static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) |
1706 | { |
1707 | /* TODO: harvest feature to be added later. */ |
1708 | } |
1709 | |
1710 | static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) |
1711 | { |
1712 | /* TCCs are global (not instanced). */ |
1713 | uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | |
1714 | RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); |
1715 | |
1716 | adev->gfx.config.tcc_disabled_mask = |
1717 | REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | |
1718 | (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); |
1719 | } |
1720 | |
1721 | static void gfx_v11_0_constants_init(struct amdgpu_device *adev) |
1722 | { |
1723 | u32 tmp; |
1724 | int i; |
1725 | |
1726 | if (!amdgpu_sriov_vf(adev)) |
1727 | WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); |
1728 | |
1729 | gfx_v11_0_setup_rb(adev); |
1730 | gfx_v11_0_get_cu_info(adev, cu_info: &adev->gfx.cu_info); |
1731 | gfx_v11_0_get_tcc_info(adev); |
1732 | adev->gfx.config.pa_sc_tile_steering_override = 0; |
1733 | |
1734 | /* Set whether texture coordinate truncation is conformant. */ |
1735 | tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); |
1736 | adev->gfx.config.ta_cntl2_truncate_coord_mode = |
1737 | REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); |
1738 | |
1739 | /* XXX SH_MEM regs */ |
1740 | /* where to put LDS, scratch, GPUVM in FSA64 space */ |
1741 | mutex_lock(&adev->srbm_mutex); |
1742 | for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { |
1743 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: i); |
1744 | /* CP and shaders */ |
1745 | WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); |
1746 | if (i != 0) { |
1747 | tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, |
1748 | (adev->gmc.private_aperture_start >> 48)); |
1749 | tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, |
1750 | (adev->gmc.shared_aperture_start >> 48)); |
1751 | WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); |
1752 | } |
1753 | } |
1754 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
1755 | |
1756 | mutex_unlock(lock: &adev->srbm_mutex); |
1757 | |
1758 | gfx_v11_0_init_compute_vmid(adev); |
1759 | gfx_v11_0_init_gds_vmid(adev); |
1760 | } |
1761 | |
1762 | static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, |
1763 | bool enable) |
1764 | { |
1765 | u32 tmp; |
1766 | |
1767 | if (amdgpu_sriov_vf(adev)) |
1768 | return; |
1769 | |
1770 | tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); |
1771 | |
1772 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, |
1773 | enable ? 1 : 0); |
1774 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, |
1775 | enable ? 1 : 0); |
1776 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, |
1777 | enable ? 1 : 0); |
1778 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, |
1779 | enable ? 1 : 0); |
1780 | |
1781 | WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); |
1782 | } |
1783 | |
1784 | static int gfx_v11_0_init_csb(struct amdgpu_device *adev) |
1785 | { |
1786 | adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); |
1787 | |
1788 | WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, |
1789 | adev->gfx.rlc.clear_state_gpu_addr >> 32); |
1790 | WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, |
1791 | adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); |
1792 | WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); |
1793 | |
1794 | return 0; |
1795 | } |
1796 | |
1797 | static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) |
1798 | { |
1799 | u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); |
1800 | |
1801 | tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); |
1802 | WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); |
1803 | } |
1804 | |
1805 | static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) |
1806 | { |
1807 | WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); |
1808 | udelay(50); |
1809 | WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); |
1810 | udelay(50); |
1811 | } |
1812 | |
1813 | static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, |
1814 | bool enable) |
1815 | { |
1816 | uint32_t rlc_pg_cntl; |
1817 | |
1818 | rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); |
1819 | |
1820 | if (!enable) { |
1821 | /* RLC_PG_CNTL[23] = 0 (default) |
1822 | * RLC will wait for handshake acks with SMU |
1823 | * GFXOFF will be enabled |
1824 | * RLC_PG_CNTL[23] = 1 |
1825 | * RLC will not issue any message to SMU |
1826 | * hence no handshake between SMU & RLC |
1827 | * GFXOFF will be disabled |
1828 | */ |
1829 | rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; |
1830 | } else |
1831 | rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; |
1832 | WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); |
1833 | } |
1834 | |
1835 | static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) |
1836 | { |
1837 | /* TODO: enable rlc & smu handshake until smu |
1838 | * and gfxoff feature works as expected */ |
1839 | if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) |
1840 | gfx_v11_0_rlc_smu_handshake_cntl(adev, enable: false); |
1841 | |
1842 | WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); |
1843 | udelay(50); |
1844 | } |
1845 | |
1846 | static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) |
1847 | { |
1848 | uint32_t tmp; |
1849 | |
1850 | /* enable Save Restore Machine */ |
1851 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); |
1852 | tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; |
1853 | tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; |
1854 | WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); |
1855 | } |
1856 | |
1857 | static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) |
1858 | { |
1859 | const struct rlc_firmware_header_v2_0 *hdr; |
1860 | const __le32 *fw_data; |
1861 | unsigned i, fw_size; |
1862 | |
1863 | hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; |
1864 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1865 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
1866 | fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; |
1867 | |
1868 | WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, |
1869 | RLCG_UCODE_LOADING_START_ADDRESS); |
1870 | |
1871 | for (i = 0; i < fw_size; i++) |
1872 | WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, |
1873 | le32_to_cpup(fw_data++)); |
1874 | |
1875 | WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); |
1876 | } |
1877 | |
1878 | static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) |
1879 | { |
1880 | const struct rlc_firmware_header_v2_2 *hdr; |
1881 | const __le32 *fw_data; |
1882 | unsigned i, fw_size; |
1883 | u32 tmp; |
1884 | |
1885 | hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; |
1886 | |
1887 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1888 | le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); |
1889 | fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; |
1890 | |
1891 | WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); |
1892 | |
1893 | for (i = 0; i < fw_size; i++) { |
1894 | if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) |
1895 | msleep(msecs: 1); |
1896 | WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, |
1897 | le32_to_cpup(fw_data++)); |
1898 | } |
1899 | |
1900 | WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); |
1901 | |
1902 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1903 | le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); |
1904 | fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; |
1905 | |
1906 | WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); |
1907 | for (i = 0; i < fw_size; i++) { |
1908 | if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) |
1909 | msleep(msecs: 1); |
1910 | WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, |
1911 | le32_to_cpup(fw_data++)); |
1912 | } |
1913 | |
1914 | WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); |
1915 | |
1916 | tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); |
1917 | tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); |
1918 | tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); |
1919 | WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); |
1920 | } |
1921 | |
1922 | static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) |
1923 | { |
1924 | const struct rlc_firmware_header_v2_3 *hdr; |
1925 | const __le32 *fw_data; |
1926 | unsigned i, fw_size; |
1927 | u32 tmp; |
1928 | |
1929 | hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; |
1930 | |
1931 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1932 | le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); |
1933 | fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; |
1934 | |
1935 | WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); |
1936 | |
1937 | for (i = 0; i < fw_size; i++) { |
1938 | if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) |
1939 | msleep(msecs: 1); |
1940 | WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, |
1941 | le32_to_cpup(fw_data++)); |
1942 | } |
1943 | |
1944 | WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); |
1945 | |
1946 | tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); |
1947 | tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); |
1948 | WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); |
1949 | |
1950 | fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + |
1951 | le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); |
1952 | fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; |
1953 | |
1954 | WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); |
1955 | |
1956 | for (i = 0; i < fw_size; i++) { |
1957 | if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) |
1958 | msleep(msecs: 1); |
1959 | WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, |
1960 | le32_to_cpup(fw_data++)); |
1961 | } |
1962 | |
1963 | WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); |
1964 | |
1965 | tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); |
1966 | tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); |
1967 | WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); |
1968 | } |
1969 | |
1970 | static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) |
1971 | { |
1972 | const struct rlc_firmware_header_v2_0 *hdr; |
1973 | uint16_t version_major; |
1974 | uint16_t version_minor; |
1975 | |
1976 | if (!adev->gfx.rlc_fw) |
1977 | return -EINVAL; |
1978 | |
1979 | hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; |
1980 | amdgpu_ucode_print_rlc_hdr(hdr: &hdr->header); |
1981 | |
1982 | version_major = le16_to_cpu(hdr->header.header_version_major); |
1983 | version_minor = le16_to_cpu(hdr->header.header_version_minor); |
1984 | |
1985 | if (version_major == 2) { |
1986 | gfx_v11_0_load_rlcg_microcode(adev); |
1987 | if (amdgpu_dpm == 1) { |
1988 | if (version_minor >= 2) |
1989 | gfx_v11_0_load_rlc_iram_dram_microcode(adev); |
1990 | if (version_minor == 3) |
1991 | gfx_v11_0_load_rlcp_rlcv_microcode(adev); |
1992 | } |
1993 | |
1994 | return 0; |
1995 | } |
1996 | |
1997 | return -EINVAL; |
1998 | } |
1999 | |
2000 | static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) |
2001 | { |
2002 | int r; |
2003 | |
2004 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
2005 | gfx_v11_0_init_csb(adev); |
2006 | |
2007 | if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ |
2008 | gfx_v11_0_rlc_enable_srm(adev); |
2009 | } else { |
2010 | if (amdgpu_sriov_vf(adev)) { |
2011 | gfx_v11_0_init_csb(adev); |
2012 | return 0; |
2013 | } |
2014 | |
2015 | adev->gfx.rlc.funcs->stop(adev); |
2016 | |
2017 | /* disable CG */ |
2018 | WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); |
2019 | |
2020 | /* disable PG */ |
2021 | WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); |
2022 | |
2023 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
2024 | /* legacy rlc firmware loading */ |
2025 | r = gfx_v11_0_rlc_load_microcode(adev); |
2026 | if (r) |
2027 | return r; |
2028 | } |
2029 | |
2030 | gfx_v11_0_init_csb(adev); |
2031 | |
2032 | adev->gfx.rlc.funcs->start(adev); |
2033 | } |
2034 | return 0; |
2035 | } |
2036 | |
2037 | static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) |
2038 | { |
2039 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2040 | uint32_t tmp; |
2041 | int i; |
2042 | |
2043 | /* Trigger an invalidation of the L1 instruction caches */ |
2044 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
2045 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); |
2046 | WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); |
2047 | |
2048 | /* Wait for invalidation complete */ |
2049 | for (i = 0; i < usec_timeout; i++) { |
2050 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
2051 | if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, |
2052 | INVALIDATE_CACHE_COMPLETE)) |
2053 | break; |
2054 | udelay(1); |
2055 | } |
2056 | |
2057 | if (i >= usec_timeout) { |
2058 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2059 | return -EINVAL; |
2060 | } |
2061 | |
2062 | if (amdgpu_emu_mode == 1) |
2063 | adev->hdp.funcs->flush_hdp(adev, NULL); |
2064 | |
2065 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); |
2066 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); |
2067 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); |
2068 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); |
2069 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); |
2070 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); |
2071 | |
2072 | /* Program me ucode address into intruction cache address register */ |
2073 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, |
2074 | lower_32_bits(addr) & 0xFFFFF000); |
2075 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, |
2076 | upper_32_bits(addr)); |
2077 | |
2078 | return 0; |
2079 | } |
2080 | |
2081 | static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) |
2082 | { |
2083 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2084 | uint32_t tmp; |
2085 | int i; |
2086 | |
2087 | /* Trigger an invalidation of the L1 instruction caches */ |
2088 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2089 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); |
2090 | WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); |
2091 | |
2092 | /* Wait for invalidation complete */ |
2093 | for (i = 0; i < usec_timeout; i++) { |
2094 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2095 | if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, |
2096 | INVALIDATE_CACHE_COMPLETE)) |
2097 | break; |
2098 | udelay(1); |
2099 | } |
2100 | |
2101 | if (i >= usec_timeout) { |
2102 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2103 | return -EINVAL; |
2104 | } |
2105 | |
2106 | if (amdgpu_emu_mode == 1) |
2107 | adev->hdp.funcs->flush_hdp(adev, NULL); |
2108 | |
2109 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); |
2110 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); |
2111 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); |
2112 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); |
2113 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); |
2114 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); |
2115 | |
2116 | /* Program pfp ucode address into intruction cache address register */ |
2117 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, |
2118 | lower_32_bits(addr) & 0xFFFFF000); |
2119 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, |
2120 | upper_32_bits(addr)); |
2121 | |
2122 | return 0; |
2123 | } |
2124 | |
2125 | static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) |
2126 | { |
2127 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2128 | uint32_t tmp; |
2129 | int i; |
2130 | |
2131 | /* Trigger an invalidation of the L1 instruction caches */ |
2132 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); |
2133 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); |
2134 | |
2135 | WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); |
2136 | |
2137 | /* Wait for invalidation complete */ |
2138 | for (i = 0; i < usec_timeout; i++) { |
2139 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); |
2140 | if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, |
2141 | INVALIDATE_CACHE_COMPLETE)) |
2142 | break; |
2143 | udelay(1); |
2144 | } |
2145 | |
2146 | if (i >= usec_timeout) { |
2147 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2148 | return -EINVAL; |
2149 | } |
2150 | |
2151 | if (amdgpu_emu_mode == 1) |
2152 | adev->hdp.funcs->flush_hdp(adev, NULL); |
2153 | |
2154 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); |
2155 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); |
2156 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); |
2157 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); |
2158 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); |
2159 | |
2160 | /* Program mec1 ucode address into intruction cache address register */ |
2161 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, |
2162 | lower_32_bits(addr) & 0xFFFFF000); |
2163 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, |
2164 | upper_32_bits(addr)); |
2165 | |
2166 | return 0; |
2167 | } |
2168 | |
2169 | static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) |
2170 | { |
2171 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2172 | uint32_t tmp; |
2173 | unsigned i, pipe_id; |
2174 | const struct gfx_firmware_header_v2_0 *pfp_hdr; |
2175 | |
2176 | pfp_hdr = (const struct gfx_firmware_header_v2_0 *) |
2177 | adev->gfx.pfp_fw->data; |
2178 | |
2179 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, |
2180 | lower_32_bits(addr)); |
2181 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, |
2182 | upper_32_bits(addr)); |
2183 | |
2184 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); |
2185 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); |
2186 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); |
2187 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); |
2188 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); |
2189 | |
2190 | /* |
2191 | * Programming any of the CP_PFP_IC_BASE registers |
2192 | * forces invalidation of the ME L1 I$. Wait for the |
2193 | * invalidation complete |
2194 | */ |
2195 | for (i = 0; i < usec_timeout; i++) { |
2196 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2197 | if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, |
2198 | INVALIDATE_CACHE_COMPLETE)) |
2199 | break; |
2200 | udelay(1); |
2201 | } |
2202 | |
2203 | if (i >= usec_timeout) { |
2204 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2205 | return -EINVAL; |
2206 | } |
2207 | |
2208 | /* Prime the L1 instruction caches */ |
2209 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2210 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); |
2211 | WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); |
2212 | /* Waiting for cache primed*/ |
2213 | for (i = 0; i < usec_timeout; i++) { |
2214 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2215 | if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, |
2216 | ICACHE_PRIMED)) |
2217 | break; |
2218 | udelay(1); |
2219 | } |
2220 | |
2221 | if (i >= usec_timeout) { |
2222 | dev_err(adev->dev, "failed to prime instruction cache\n" ); |
2223 | return -EINVAL; |
2224 | } |
2225 | |
2226 | mutex_lock(&adev->srbm_mutex); |
2227 | for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { |
2228 | soc21_grbm_select(adev, me: 0, pipe: pipe_id, queue: 0, vmid: 0); |
2229 | WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, |
2230 | (pfp_hdr->ucode_start_addr_hi << 30) | |
2231 | (pfp_hdr->ucode_start_addr_lo >> 2)); |
2232 | WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, |
2233 | pfp_hdr->ucode_start_addr_hi >> 2); |
2234 | |
2235 | /* |
2236 | * Program CP_ME_CNTL to reset given PIPE to take |
2237 | * effect of CP_PFP_PRGRM_CNTR_START. |
2238 | */ |
2239 | tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
2240 | if (pipe_id == 0) |
2241 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2242 | PFP_PIPE0_RESET, 1); |
2243 | else |
2244 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2245 | PFP_PIPE1_RESET, 1); |
2246 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2247 | |
2248 | /* Clear pfp pipe0 reset bit. */ |
2249 | if (pipe_id == 0) |
2250 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2251 | PFP_PIPE0_RESET, 0); |
2252 | else |
2253 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2254 | PFP_PIPE1_RESET, 0); |
2255 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2256 | |
2257 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, |
2258 | lower_32_bits(addr2)); |
2259 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, |
2260 | upper_32_bits(addr2)); |
2261 | } |
2262 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2263 | mutex_unlock(lock: &adev->srbm_mutex); |
2264 | |
2265 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); |
2266 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); |
2267 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); |
2268 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); |
2269 | |
2270 | /* Invalidate the data caches */ |
2271 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
2272 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); |
2273 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); |
2274 | |
2275 | for (i = 0; i < usec_timeout; i++) { |
2276 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
2277 | if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, |
2278 | INVALIDATE_DCACHE_COMPLETE)) |
2279 | break; |
2280 | udelay(1); |
2281 | } |
2282 | |
2283 | if (i >= usec_timeout) { |
2284 | dev_err(adev->dev, "failed to invalidate RS64 data cache\n" ); |
2285 | return -EINVAL; |
2286 | } |
2287 | |
2288 | return 0; |
2289 | } |
2290 | |
2291 | static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) |
2292 | { |
2293 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2294 | uint32_t tmp; |
2295 | unsigned i, pipe_id; |
2296 | const struct gfx_firmware_header_v2_0 *me_hdr; |
2297 | |
2298 | me_hdr = (const struct gfx_firmware_header_v2_0 *) |
2299 | adev->gfx.me_fw->data; |
2300 | |
2301 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, |
2302 | lower_32_bits(addr)); |
2303 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, |
2304 | upper_32_bits(addr)); |
2305 | |
2306 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); |
2307 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); |
2308 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); |
2309 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); |
2310 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); |
2311 | |
2312 | /* |
2313 | * Programming any of the CP_ME_IC_BASE registers |
2314 | * forces invalidation of the ME L1 I$. Wait for the |
2315 | * invalidation complete |
2316 | */ |
2317 | for (i = 0; i < usec_timeout; i++) { |
2318 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
2319 | if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, |
2320 | INVALIDATE_CACHE_COMPLETE)) |
2321 | break; |
2322 | udelay(1); |
2323 | } |
2324 | |
2325 | if (i >= usec_timeout) { |
2326 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2327 | return -EINVAL; |
2328 | } |
2329 | |
2330 | /* Prime the instruction caches */ |
2331 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
2332 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); |
2333 | WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); |
2334 | |
2335 | /* Waiting for instruction cache primed*/ |
2336 | for (i = 0; i < usec_timeout; i++) { |
2337 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
2338 | if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, |
2339 | ICACHE_PRIMED)) |
2340 | break; |
2341 | udelay(1); |
2342 | } |
2343 | |
2344 | if (i >= usec_timeout) { |
2345 | dev_err(adev->dev, "failed to prime instruction cache\n" ); |
2346 | return -EINVAL; |
2347 | } |
2348 | |
2349 | mutex_lock(&adev->srbm_mutex); |
2350 | for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { |
2351 | soc21_grbm_select(adev, me: 0, pipe: pipe_id, queue: 0, vmid: 0); |
2352 | WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, |
2353 | (me_hdr->ucode_start_addr_hi << 30) | |
2354 | (me_hdr->ucode_start_addr_lo >> 2) ); |
2355 | WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, |
2356 | me_hdr->ucode_start_addr_hi>>2); |
2357 | |
2358 | /* |
2359 | * Program CP_ME_CNTL to reset given PIPE to take |
2360 | * effect of CP_PFP_PRGRM_CNTR_START. |
2361 | */ |
2362 | tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
2363 | if (pipe_id == 0) |
2364 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2365 | ME_PIPE0_RESET, 1); |
2366 | else |
2367 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2368 | ME_PIPE1_RESET, 1); |
2369 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2370 | |
2371 | /* Clear pfp pipe0 reset bit. */ |
2372 | if (pipe_id == 0) |
2373 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2374 | ME_PIPE0_RESET, 0); |
2375 | else |
2376 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2377 | ME_PIPE1_RESET, 0); |
2378 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2379 | |
2380 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, |
2381 | lower_32_bits(addr2)); |
2382 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, |
2383 | upper_32_bits(addr2)); |
2384 | } |
2385 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2386 | mutex_unlock(lock: &adev->srbm_mutex); |
2387 | |
2388 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); |
2389 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); |
2390 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); |
2391 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); |
2392 | |
2393 | /* Invalidate the data caches */ |
2394 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
2395 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); |
2396 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); |
2397 | |
2398 | for (i = 0; i < usec_timeout; i++) { |
2399 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
2400 | if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, |
2401 | INVALIDATE_DCACHE_COMPLETE)) |
2402 | break; |
2403 | udelay(1); |
2404 | } |
2405 | |
2406 | if (i >= usec_timeout) { |
2407 | dev_err(adev->dev, "failed to invalidate RS64 data cache\n" ); |
2408 | return -EINVAL; |
2409 | } |
2410 | |
2411 | return 0; |
2412 | } |
2413 | |
2414 | static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) |
2415 | { |
2416 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2417 | uint32_t tmp; |
2418 | unsigned i; |
2419 | const struct gfx_firmware_header_v2_0 *mec_hdr; |
2420 | |
2421 | mec_hdr = (const struct gfx_firmware_header_v2_0 *) |
2422 | adev->gfx.mec_fw->data; |
2423 | |
2424 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); |
2425 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); |
2426 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); |
2427 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); |
2428 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); |
2429 | |
2430 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); |
2431 | tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); |
2432 | tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); |
2433 | WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); |
2434 | |
2435 | mutex_lock(&adev->srbm_mutex); |
2436 | for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { |
2437 | soc21_grbm_select(adev, me: 1, pipe: i, queue: 0, vmid: 0); |
2438 | |
2439 | WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); |
2440 | WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, |
2441 | upper_32_bits(addr2)); |
2442 | |
2443 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, |
2444 | mec_hdr->ucode_start_addr_lo >> 2 | |
2445 | mec_hdr->ucode_start_addr_hi << 30); |
2446 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, |
2447 | mec_hdr->ucode_start_addr_hi >> 2); |
2448 | |
2449 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); |
2450 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, |
2451 | upper_32_bits(addr)); |
2452 | } |
2453 | mutex_unlock(lock: &adev->srbm_mutex); |
2454 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2455 | |
2456 | /* Trigger an invalidation of the L1 instruction caches */ |
2457 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); |
2458 | tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); |
2459 | WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); |
2460 | |
2461 | /* Wait for invalidation complete */ |
2462 | for (i = 0; i < usec_timeout; i++) { |
2463 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); |
2464 | if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, |
2465 | INVALIDATE_DCACHE_COMPLETE)) |
2466 | break; |
2467 | udelay(1); |
2468 | } |
2469 | |
2470 | if (i >= usec_timeout) { |
2471 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2472 | return -EINVAL; |
2473 | } |
2474 | |
2475 | /* Trigger an invalidation of the L1 instruction caches */ |
2476 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); |
2477 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); |
2478 | WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); |
2479 | |
2480 | /* Wait for invalidation complete */ |
2481 | for (i = 0; i < usec_timeout; i++) { |
2482 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); |
2483 | if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, |
2484 | INVALIDATE_CACHE_COMPLETE)) |
2485 | break; |
2486 | udelay(1); |
2487 | } |
2488 | |
2489 | if (i >= usec_timeout) { |
2490 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2491 | return -EINVAL; |
2492 | } |
2493 | |
2494 | return 0; |
2495 | } |
2496 | |
2497 | static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) |
2498 | { |
2499 | const struct gfx_firmware_header_v2_0 *pfp_hdr; |
2500 | const struct gfx_firmware_header_v2_0 *me_hdr; |
2501 | const struct gfx_firmware_header_v2_0 *mec_hdr; |
2502 | uint32_t pipe_id, tmp; |
2503 | |
2504 | mec_hdr = (const struct gfx_firmware_header_v2_0 *) |
2505 | adev->gfx.mec_fw->data; |
2506 | me_hdr = (const struct gfx_firmware_header_v2_0 *) |
2507 | adev->gfx.me_fw->data; |
2508 | pfp_hdr = (const struct gfx_firmware_header_v2_0 *) |
2509 | adev->gfx.pfp_fw->data; |
2510 | |
2511 | /* config pfp program start addr */ |
2512 | for (pipe_id = 0; pipe_id < 2; pipe_id++) { |
2513 | soc21_grbm_select(adev, me: 0, pipe: pipe_id, queue: 0, vmid: 0); |
2514 | WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, |
2515 | (pfp_hdr->ucode_start_addr_hi << 30) | |
2516 | (pfp_hdr->ucode_start_addr_lo >> 2)); |
2517 | WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, |
2518 | pfp_hdr->ucode_start_addr_hi >> 2); |
2519 | } |
2520 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2521 | |
2522 | /* reset pfp pipe */ |
2523 | tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
2524 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); |
2525 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); |
2526 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2527 | |
2528 | /* clear pfp pipe reset */ |
2529 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); |
2530 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); |
2531 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2532 | |
2533 | /* config me program start addr */ |
2534 | for (pipe_id = 0; pipe_id < 2; pipe_id++) { |
2535 | soc21_grbm_select(adev, me: 0, pipe: pipe_id, queue: 0, vmid: 0); |
2536 | WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, |
2537 | (me_hdr->ucode_start_addr_hi << 30) | |
2538 | (me_hdr->ucode_start_addr_lo >> 2) ); |
2539 | WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, |
2540 | me_hdr->ucode_start_addr_hi>>2); |
2541 | } |
2542 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2543 | |
2544 | /* reset me pipe */ |
2545 | tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
2546 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); |
2547 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); |
2548 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2549 | |
2550 | /* clear me pipe reset */ |
2551 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); |
2552 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); |
2553 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2554 | |
2555 | /* config mec program start addr */ |
2556 | for (pipe_id = 0; pipe_id < 4; pipe_id++) { |
2557 | soc21_grbm_select(adev, me: 1, pipe: pipe_id, queue: 0, vmid: 0); |
2558 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, |
2559 | mec_hdr->ucode_start_addr_lo >> 2 | |
2560 | mec_hdr->ucode_start_addr_hi << 30); |
2561 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, |
2562 | mec_hdr->ucode_start_addr_hi >> 2); |
2563 | } |
2564 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2565 | |
2566 | /* reset mec pipe */ |
2567 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); |
2568 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); |
2569 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); |
2570 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); |
2571 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); |
2572 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); |
2573 | |
2574 | /* clear mec pipe reset */ |
2575 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); |
2576 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); |
2577 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); |
2578 | tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); |
2579 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); |
2580 | } |
2581 | |
2582 | static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) |
2583 | { |
2584 | uint32_t cp_status; |
2585 | uint32_t bootload_status; |
2586 | int i, r; |
2587 | uint64_t addr, addr2; |
2588 | |
2589 | for (i = 0; i < adev->usec_timeout; i++) { |
2590 | cp_status = RREG32_SOC15(GC, 0, regCP_STAT); |
2591 | |
2592 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == |
2593 | IP_VERSION(11, 0, 1) || |
2594 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == |
2595 | IP_VERSION(11, 0, 4) || |
2596 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(11, 5, 0) || |
2597 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(11, 5, 1)) |
2598 | bootload_status = RREG32_SOC15(GC, 0, |
2599 | regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); |
2600 | else |
2601 | bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); |
2602 | |
2603 | if ((cp_status == 0) && |
2604 | (REG_GET_FIELD(bootload_status, |
2605 | RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { |
2606 | break; |
2607 | } |
2608 | udelay(1); |
2609 | } |
2610 | |
2611 | if (i >= adev->usec_timeout) { |
2612 | dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n" ); |
2613 | return -ETIMEDOUT; |
2614 | } |
2615 | |
2616 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { |
2617 | if (adev->gfx.rs64_enable) { |
2618 | addr = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2619 | rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; |
2620 | addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2621 | rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; |
2622 | r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); |
2623 | if (r) |
2624 | return r; |
2625 | addr = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2626 | rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; |
2627 | addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2628 | rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; |
2629 | r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); |
2630 | if (r) |
2631 | return r; |
2632 | addr = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2633 | rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; |
2634 | addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2635 | rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; |
2636 | r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); |
2637 | if (r) |
2638 | return r; |
2639 | } else { |
2640 | addr = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2641 | rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; |
2642 | r = gfx_v11_0_config_me_cache(adev, addr); |
2643 | if (r) |
2644 | return r; |
2645 | addr = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2646 | rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; |
2647 | r = gfx_v11_0_config_pfp_cache(adev, addr); |
2648 | if (r) |
2649 | return r; |
2650 | addr = adev->gfx.rlc.rlc_autoload_gpu_addr + |
2651 | rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; |
2652 | r = gfx_v11_0_config_mec_cache(adev, addr); |
2653 | if (r) |
2654 | return r; |
2655 | } |
2656 | } |
2657 | |
2658 | return 0; |
2659 | } |
2660 | |
2661 | static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) |
2662 | { |
2663 | int i; |
2664 | u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
2665 | |
2666 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); |
2667 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); |
2668 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2669 | |
2670 | for (i = 0; i < adev->usec_timeout; i++) { |
2671 | if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) |
2672 | break; |
2673 | udelay(1); |
2674 | } |
2675 | |
2676 | if (i >= adev->usec_timeout) |
2677 | DRM_ERROR("failed to %s cp gfx\n" , enable ? "unhalt" : "halt" ); |
2678 | |
2679 | return 0; |
2680 | } |
2681 | |
2682 | static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) |
2683 | { |
2684 | int r; |
2685 | const struct gfx_firmware_header_v1_0 *pfp_hdr; |
2686 | const __le32 *fw_data; |
2687 | unsigned i, fw_size; |
2688 | |
2689 | pfp_hdr = (const struct gfx_firmware_header_v1_0 *) |
2690 | adev->gfx.pfp_fw->data; |
2691 | |
2692 | amdgpu_ucode_print_gfx_hdr(hdr: &pfp_hdr->header); |
2693 | |
2694 | fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + |
2695 | le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); |
2696 | fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); |
2697 | |
2698 | r = amdgpu_bo_create_reserved(adev, size: pfp_hdr->header.ucode_size_bytes, |
2699 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
2700 | bo_ptr: &adev->gfx.pfp.pfp_fw_obj, |
2701 | gpu_addr: &adev->gfx.pfp.pfp_fw_gpu_addr, |
2702 | cpu_addr: (void **)&adev->gfx.pfp.pfp_fw_ptr); |
2703 | if (r) { |
2704 | dev_err(adev->dev, "(%d) failed to create pfp fw bo\n" , r); |
2705 | gfx_v11_0_pfp_fini(adev); |
2706 | return r; |
2707 | } |
2708 | |
2709 | memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); |
2710 | |
2711 | amdgpu_bo_kunmap(bo: adev->gfx.pfp.pfp_fw_obj); |
2712 | amdgpu_bo_unreserve(bo: adev->gfx.pfp.pfp_fw_obj); |
2713 | |
2714 | gfx_v11_0_config_pfp_cache(adev, addr: adev->gfx.pfp.pfp_fw_gpu_addr); |
2715 | |
2716 | WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); |
2717 | |
2718 | for (i = 0; i < pfp_hdr->jt_size; i++) |
2719 | WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, |
2720 | le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); |
2721 | |
2722 | WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); |
2723 | |
2724 | return 0; |
2725 | } |
2726 | |
2727 | static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) |
2728 | { |
2729 | int r; |
2730 | const struct gfx_firmware_header_v2_0 *pfp_hdr; |
2731 | const __le32 *fw_ucode, *fw_data; |
2732 | unsigned i, pipe_id, fw_ucode_size, fw_data_size; |
2733 | uint32_t tmp; |
2734 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2735 | |
2736 | pfp_hdr = (const struct gfx_firmware_header_v2_0 *) |
2737 | adev->gfx.pfp_fw->data; |
2738 | |
2739 | amdgpu_ucode_print_gfx_hdr(hdr: &pfp_hdr->header); |
2740 | |
2741 | /* instruction */ |
2742 | fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + |
2743 | le32_to_cpu(pfp_hdr->ucode_offset_bytes)); |
2744 | fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); |
2745 | /* data */ |
2746 | fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + |
2747 | le32_to_cpu(pfp_hdr->data_offset_bytes)); |
2748 | fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); |
2749 | |
2750 | /* 64kb align */ |
2751 | r = amdgpu_bo_create_reserved(adev, size: fw_ucode_size, |
2752 | align: 64 * 1024, |
2753 | AMDGPU_GEM_DOMAIN_VRAM | |
2754 | AMDGPU_GEM_DOMAIN_GTT, |
2755 | bo_ptr: &adev->gfx.pfp.pfp_fw_obj, |
2756 | gpu_addr: &adev->gfx.pfp.pfp_fw_gpu_addr, |
2757 | cpu_addr: (void **)&adev->gfx.pfp.pfp_fw_ptr); |
2758 | if (r) { |
2759 | dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n" , r); |
2760 | gfx_v11_0_pfp_fini(adev); |
2761 | return r; |
2762 | } |
2763 | |
2764 | r = amdgpu_bo_create_reserved(adev, size: fw_data_size, |
2765 | align: 64 * 1024, |
2766 | AMDGPU_GEM_DOMAIN_VRAM | |
2767 | AMDGPU_GEM_DOMAIN_GTT, |
2768 | bo_ptr: &adev->gfx.pfp.pfp_fw_data_obj, |
2769 | gpu_addr: &adev->gfx.pfp.pfp_fw_data_gpu_addr, |
2770 | cpu_addr: (void **)&adev->gfx.pfp.pfp_fw_data_ptr); |
2771 | if (r) { |
2772 | dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n" , r); |
2773 | gfx_v11_0_pfp_fini(adev); |
2774 | return r; |
2775 | } |
2776 | |
2777 | memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); |
2778 | memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); |
2779 | |
2780 | amdgpu_bo_kunmap(bo: adev->gfx.pfp.pfp_fw_obj); |
2781 | amdgpu_bo_kunmap(bo: adev->gfx.pfp.pfp_fw_data_obj); |
2782 | amdgpu_bo_unreserve(bo: adev->gfx.pfp.pfp_fw_obj); |
2783 | amdgpu_bo_unreserve(bo: adev->gfx.pfp.pfp_fw_data_obj); |
2784 | |
2785 | if (amdgpu_emu_mode == 1) |
2786 | adev->hdp.funcs->flush_hdp(adev, NULL); |
2787 | |
2788 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, |
2789 | lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); |
2790 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, |
2791 | upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); |
2792 | |
2793 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); |
2794 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); |
2795 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); |
2796 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); |
2797 | WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); |
2798 | |
2799 | /* |
2800 | * Programming any of the CP_PFP_IC_BASE registers |
2801 | * forces invalidation of the ME L1 I$. Wait for the |
2802 | * invalidation complete |
2803 | */ |
2804 | for (i = 0; i < usec_timeout; i++) { |
2805 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2806 | if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, |
2807 | INVALIDATE_CACHE_COMPLETE)) |
2808 | break; |
2809 | udelay(1); |
2810 | } |
2811 | |
2812 | if (i >= usec_timeout) { |
2813 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
2814 | return -EINVAL; |
2815 | } |
2816 | |
2817 | /* Prime the L1 instruction caches */ |
2818 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2819 | tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); |
2820 | WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); |
2821 | /* Waiting for cache primed*/ |
2822 | for (i = 0; i < usec_timeout; i++) { |
2823 | tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); |
2824 | if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, |
2825 | ICACHE_PRIMED)) |
2826 | break; |
2827 | udelay(1); |
2828 | } |
2829 | |
2830 | if (i >= usec_timeout) { |
2831 | dev_err(adev->dev, "failed to prime instruction cache\n" ); |
2832 | return -EINVAL; |
2833 | } |
2834 | |
2835 | mutex_lock(&adev->srbm_mutex); |
2836 | for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { |
2837 | soc21_grbm_select(adev, me: 0, pipe: pipe_id, queue: 0, vmid: 0); |
2838 | WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, |
2839 | (pfp_hdr->ucode_start_addr_hi << 30) | |
2840 | (pfp_hdr->ucode_start_addr_lo >> 2) ); |
2841 | WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, |
2842 | pfp_hdr->ucode_start_addr_hi>>2); |
2843 | |
2844 | /* |
2845 | * Program CP_ME_CNTL to reset given PIPE to take |
2846 | * effect of CP_PFP_PRGRM_CNTR_START. |
2847 | */ |
2848 | tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
2849 | if (pipe_id == 0) |
2850 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2851 | PFP_PIPE0_RESET, 1); |
2852 | else |
2853 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2854 | PFP_PIPE1_RESET, 1); |
2855 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2856 | |
2857 | /* Clear pfp pipe0 reset bit. */ |
2858 | if (pipe_id == 0) |
2859 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2860 | PFP_PIPE0_RESET, 0); |
2861 | else |
2862 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
2863 | PFP_PIPE1_RESET, 0); |
2864 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
2865 | |
2866 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, |
2867 | lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); |
2868 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, |
2869 | upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); |
2870 | } |
2871 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
2872 | mutex_unlock(lock: &adev->srbm_mutex); |
2873 | |
2874 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); |
2875 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); |
2876 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); |
2877 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); |
2878 | |
2879 | /* Invalidate the data caches */ |
2880 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
2881 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); |
2882 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); |
2883 | |
2884 | for (i = 0; i < usec_timeout; i++) { |
2885 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
2886 | if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, |
2887 | INVALIDATE_DCACHE_COMPLETE)) |
2888 | break; |
2889 | udelay(1); |
2890 | } |
2891 | |
2892 | if (i >= usec_timeout) { |
2893 | dev_err(adev->dev, "failed to invalidate RS64 data cache\n" ); |
2894 | return -EINVAL; |
2895 | } |
2896 | |
2897 | return 0; |
2898 | } |
2899 | |
2900 | static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) |
2901 | { |
2902 | int r; |
2903 | const struct gfx_firmware_header_v1_0 *me_hdr; |
2904 | const __le32 *fw_data; |
2905 | unsigned i, fw_size; |
2906 | |
2907 | me_hdr = (const struct gfx_firmware_header_v1_0 *) |
2908 | adev->gfx.me_fw->data; |
2909 | |
2910 | amdgpu_ucode_print_gfx_hdr(hdr: &me_hdr->header); |
2911 | |
2912 | fw_data = (const __le32 *)(adev->gfx.me_fw->data + |
2913 | le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); |
2914 | fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); |
2915 | |
2916 | r = amdgpu_bo_create_reserved(adev, size: me_hdr->header.ucode_size_bytes, |
2917 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
2918 | bo_ptr: &adev->gfx.me.me_fw_obj, |
2919 | gpu_addr: &adev->gfx.me.me_fw_gpu_addr, |
2920 | cpu_addr: (void **)&adev->gfx.me.me_fw_ptr); |
2921 | if (r) { |
2922 | dev_err(adev->dev, "(%d) failed to create me fw bo\n" , r); |
2923 | gfx_v11_0_me_fini(adev); |
2924 | return r; |
2925 | } |
2926 | |
2927 | memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); |
2928 | |
2929 | amdgpu_bo_kunmap(bo: adev->gfx.me.me_fw_obj); |
2930 | amdgpu_bo_unreserve(bo: adev->gfx.me.me_fw_obj); |
2931 | |
2932 | gfx_v11_0_config_me_cache(adev, addr: adev->gfx.me.me_fw_gpu_addr); |
2933 | |
2934 | WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); |
2935 | |
2936 | for (i = 0; i < me_hdr->jt_size; i++) |
2937 | WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, |
2938 | le32_to_cpup(fw_data + me_hdr->jt_offset + i)); |
2939 | |
2940 | WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); |
2941 | |
2942 | return 0; |
2943 | } |
2944 | |
2945 | static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) |
2946 | { |
2947 | int r; |
2948 | const struct gfx_firmware_header_v2_0 *me_hdr; |
2949 | const __le32 *fw_ucode, *fw_data; |
2950 | unsigned i, pipe_id, fw_ucode_size, fw_data_size; |
2951 | uint32_t tmp; |
2952 | uint32_t usec_timeout = 50000; /* wait for 50ms */ |
2953 | |
2954 | me_hdr = (const struct gfx_firmware_header_v2_0 *) |
2955 | adev->gfx.me_fw->data; |
2956 | |
2957 | amdgpu_ucode_print_gfx_hdr(hdr: &me_hdr->header); |
2958 | |
2959 | /* instruction */ |
2960 | fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + |
2961 | le32_to_cpu(me_hdr->ucode_offset_bytes)); |
2962 | fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); |
2963 | /* data */ |
2964 | fw_data = (const __le32 *)(adev->gfx.me_fw->data + |
2965 | le32_to_cpu(me_hdr->data_offset_bytes)); |
2966 | fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); |
2967 | |
2968 | /* 64kb align*/ |
2969 | r = amdgpu_bo_create_reserved(adev, size: fw_ucode_size, |
2970 | align: 64 * 1024, |
2971 | AMDGPU_GEM_DOMAIN_VRAM | |
2972 | AMDGPU_GEM_DOMAIN_GTT, |
2973 | bo_ptr: &adev->gfx.me.me_fw_obj, |
2974 | gpu_addr: &adev->gfx.me.me_fw_gpu_addr, |
2975 | cpu_addr: (void **)&adev->gfx.me.me_fw_ptr); |
2976 | if (r) { |
2977 | dev_err(adev->dev, "(%d) failed to create me ucode bo\n" , r); |
2978 | gfx_v11_0_me_fini(adev); |
2979 | return r; |
2980 | } |
2981 | |
2982 | r = amdgpu_bo_create_reserved(adev, size: fw_data_size, |
2983 | align: 64 * 1024, |
2984 | AMDGPU_GEM_DOMAIN_VRAM | |
2985 | AMDGPU_GEM_DOMAIN_GTT, |
2986 | bo_ptr: &adev->gfx.me.me_fw_data_obj, |
2987 | gpu_addr: &adev->gfx.me.me_fw_data_gpu_addr, |
2988 | cpu_addr: (void **)&adev->gfx.me.me_fw_data_ptr); |
2989 | if (r) { |
2990 | dev_err(adev->dev, "(%d) failed to create me data bo\n" , r); |
2991 | gfx_v11_0_pfp_fini(adev); |
2992 | return r; |
2993 | } |
2994 | |
2995 | memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); |
2996 | memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); |
2997 | |
2998 | amdgpu_bo_kunmap(bo: adev->gfx.me.me_fw_obj); |
2999 | amdgpu_bo_kunmap(bo: adev->gfx.me.me_fw_data_obj); |
3000 | amdgpu_bo_unreserve(bo: adev->gfx.me.me_fw_obj); |
3001 | amdgpu_bo_unreserve(bo: adev->gfx.me.me_fw_data_obj); |
3002 | |
3003 | if (amdgpu_emu_mode == 1) |
3004 | adev->hdp.funcs->flush_hdp(adev, NULL); |
3005 | |
3006 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, |
3007 | lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); |
3008 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, |
3009 | upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); |
3010 | |
3011 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); |
3012 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); |
3013 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); |
3014 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); |
3015 | WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); |
3016 | |
3017 | /* |
3018 | * Programming any of the CP_ME_IC_BASE registers |
3019 | * forces invalidation of the ME L1 I$. Wait for the |
3020 | * invalidation complete |
3021 | */ |
3022 | for (i = 0; i < usec_timeout; i++) { |
3023 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
3024 | if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, |
3025 | INVALIDATE_CACHE_COMPLETE)) |
3026 | break; |
3027 | udelay(1); |
3028 | } |
3029 | |
3030 | if (i >= usec_timeout) { |
3031 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
3032 | return -EINVAL; |
3033 | } |
3034 | |
3035 | /* Prime the instruction caches */ |
3036 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
3037 | tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); |
3038 | WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); |
3039 | |
3040 | /* Waiting for instruction cache primed*/ |
3041 | for (i = 0; i < usec_timeout; i++) { |
3042 | tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); |
3043 | if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, |
3044 | ICACHE_PRIMED)) |
3045 | break; |
3046 | udelay(1); |
3047 | } |
3048 | |
3049 | if (i >= usec_timeout) { |
3050 | dev_err(adev->dev, "failed to prime instruction cache\n" ); |
3051 | return -EINVAL; |
3052 | } |
3053 | |
3054 | mutex_lock(&adev->srbm_mutex); |
3055 | for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { |
3056 | soc21_grbm_select(adev, me: 0, pipe: pipe_id, queue: 0, vmid: 0); |
3057 | WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, |
3058 | (me_hdr->ucode_start_addr_hi << 30) | |
3059 | (me_hdr->ucode_start_addr_lo >> 2) ); |
3060 | WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, |
3061 | me_hdr->ucode_start_addr_hi>>2); |
3062 | |
3063 | /* |
3064 | * Program CP_ME_CNTL to reset given PIPE to take |
3065 | * effect of CP_PFP_PRGRM_CNTR_START. |
3066 | */ |
3067 | tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); |
3068 | if (pipe_id == 0) |
3069 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
3070 | ME_PIPE0_RESET, 1); |
3071 | else |
3072 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
3073 | ME_PIPE1_RESET, 1); |
3074 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
3075 | |
3076 | /* Clear pfp pipe0 reset bit. */ |
3077 | if (pipe_id == 0) |
3078 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
3079 | ME_PIPE0_RESET, 0); |
3080 | else |
3081 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, |
3082 | ME_PIPE1_RESET, 0); |
3083 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); |
3084 | |
3085 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, |
3086 | lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); |
3087 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, |
3088 | upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); |
3089 | } |
3090 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
3091 | mutex_unlock(lock: &adev->srbm_mutex); |
3092 | |
3093 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); |
3094 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); |
3095 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); |
3096 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); |
3097 | |
3098 | /* Invalidate the data caches */ |
3099 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
3100 | tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); |
3101 | WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); |
3102 | |
3103 | for (i = 0; i < usec_timeout; i++) { |
3104 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); |
3105 | if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, |
3106 | INVALIDATE_DCACHE_COMPLETE)) |
3107 | break; |
3108 | udelay(1); |
3109 | } |
3110 | |
3111 | if (i >= usec_timeout) { |
3112 | dev_err(adev->dev, "failed to invalidate RS64 data cache\n" ); |
3113 | return -EINVAL; |
3114 | } |
3115 | |
3116 | return 0; |
3117 | } |
3118 | |
3119 | static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) |
3120 | { |
3121 | int r; |
3122 | |
3123 | if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) |
3124 | return -EINVAL; |
3125 | |
3126 | gfx_v11_0_cp_gfx_enable(adev, enable: false); |
3127 | |
3128 | if (adev->gfx.rs64_enable) |
3129 | r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); |
3130 | else |
3131 | r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); |
3132 | if (r) { |
3133 | dev_err(adev->dev, "(%d) failed to load pfp fw\n" , r); |
3134 | return r; |
3135 | } |
3136 | |
3137 | if (adev->gfx.rs64_enable) |
3138 | r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); |
3139 | else |
3140 | r = gfx_v11_0_cp_gfx_load_me_microcode(adev); |
3141 | if (r) { |
3142 | dev_err(adev->dev, "(%d) failed to load me fw\n" , r); |
3143 | return r; |
3144 | } |
3145 | |
3146 | return 0; |
3147 | } |
3148 | |
3149 | static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) |
3150 | { |
3151 | struct amdgpu_ring *ring; |
3152 | const struct cs_section_def *sect = NULL; |
3153 | const struct cs_extent_def *ext = NULL; |
3154 | int r, i; |
3155 | int ctx_reg_offset; |
3156 | |
3157 | /* init the CP */ |
3158 | WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, |
3159 | adev->gfx.config.max_hw_contexts - 1); |
3160 | WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); |
3161 | |
3162 | if (!amdgpu_async_gfx_ring) |
3163 | gfx_v11_0_cp_gfx_enable(adev, enable: true); |
3164 | |
3165 | ring = &adev->gfx.gfx_ring[0]; |
3166 | r = amdgpu_ring_alloc(ring, ndw: gfx_v11_0_get_csb_size(adev)); |
3167 | if (r) { |
3168 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n" , r); |
3169 | return r; |
3170 | } |
3171 | |
3172 | amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
3173 | amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); |
3174 | |
3175 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); |
3176 | amdgpu_ring_write(ring, v: 0x80000000); |
3177 | amdgpu_ring_write(ring, v: 0x80000000); |
3178 | |
3179 | for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { |
3180 | for (ext = sect->section; ext->extent != NULL; ++ext) { |
3181 | if (sect->id == SECT_CONTEXT) { |
3182 | amdgpu_ring_write(ring, |
3183 | PACKET3(PACKET3_SET_CONTEXT_REG, |
3184 | ext->reg_count)); |
3185 | amdgpu_ring_write(ring, v: ext->reg_index - |
3186 | PACKET3_SET_CONTEXT_REG_START); |
3187 | for (i = 0; i < ext->reg_count; i++) |
3188 | amdgpu_ring_write(ring, v: ext->extent[i]); |
3189 | } |
3190 | } |
3191 | } |
3192 | |
3193 | ctx_reg_offset = |
3194 | SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; |
3195 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); |
3196 | amdgpu_ring_write(ring, v: ctx_reg_offset); |
3197 | amdgpu_ring_write(ring, v: adev->gfx.config.pa_sc_tile_steering_override); |
3198 | |
3199 | amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
3200 | amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); |
3201 | |
3202 | amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); |
3203 | amdgpu_ring_write(ring, v: 0); |
3204 | |
3205 | amdgpu_ring_commit(ring); |
3206 | |
3207 | /* submit cs packet to copy state 0 to next available state */ |
3208 | if (adev->gfx.num_gfx_rings > 1) { |
3209 | /* maximum supported gfx ring is 2 */ |
3210 | ring = &adev->gfx.gfx_ring[1]; |
3211 | r = amdgpu_ring_alloc(ring, ndw: 2); |
3212 | if (r) { |
3213 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n" , r); |
3214 | return r; |
3215 | } |
3216 | |
3217 | amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); |
3218 | amdgpu_ring_write(ring, v: 0); |
3219 | |
3220 | amdgpu_ring_commit(ring); |
3221 | } |
3222 | return 0; |
3223 | } |
3224 | |
3225 | static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, |
3226 | CP_PIPE_ID pipe) |
3227 | { |
3228 | u32 tmp; |
3229 | |
3230 | tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); |
3231 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); |
3232 | |
3233 | WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); |
3234 | } |
3235 | |
3236 | static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, |
3237 | struct amdgpu_ring *ring) |
3238 | { |
3239 | u32 tmp; |
3240 | |
3241 | tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); |
3242 | if (ring->use_doorbell) { |
3243 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, |
3244 | DOORBELL_OFFSET, ring->doorbell_index); |
3245 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, |
3246 | DOORBELL_EN, 1); |
3247 | } else { |
3248 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, |
3249 | DOORBELL_EN, 0); |
3250 | } |
3251 | WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); |
3252 | |
3253 | tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, |
3254 | DOORBELL_RANGE_LOWER, ring->doorbell_index); |
3255 | WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); |
3256 | |
3257 | WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, |
3258 | CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); |
3259 | } |
3260 | |
3261 | static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) |
3262 | { |
3263 | struct amdgpu_ring *ring; |
3264 | u32 tmp; |
3265 | u32 rb_bufsz; |
3266 | u64 rb_addr, rptr_addr, wptr_gpu_addr; |
3267 | |
3268 | /* Set the write pointer delay */ |
3269 | WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); |
3270 | |
3271 | /* set the RB to use vmid 0 */ |
3272 | WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); |
3273 | |
3274 | /* Init gfx ring 0 for pipe 0 */ |
3275 | mutex_lock(&adev->srbm_mutex); |
3276 | gfx_v11_0_cp_gfx_switch_pipe(adev, pipe: PIPE_ID0); |
3277 | |
3278 | /* Set ring buffer size */ |
3279 | ring = &adev->gfx.gfx_ring[0]; |
3280 | rb_bufsz = order_base_2(ring->ring_size / 8); |
3281 | tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); |
3282 | tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); |
3283 | WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); |
3284 | |
3285 | /* Initialize the ring buffer's write pointers */ |
3286 | ring->wptr = 0; |
3287 | WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); |
3288 | WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); |
3289 | |
3290 | /* set the wb address wether it's enabled or not */ |
3291 | rptr_addr = ring->rptr_gpu_addr; |
3292 | WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); |
3293 | WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & |
3294 | CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); |
3295 | |
3296 | wptr_gpu_addr = ring->wptr_gpu_addr; |
3297 | WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, |
3298 | lower_32_bits(wptr_gpu_addr)); |
3299 | WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, |
3300 | upper_32_bits(wptr_gpu_addr)); |
3301 | |
3302 | mdelay(1); |
3303 | WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); |
3304 | |
3305 | rb_addr = ring->gpu_addr >> 8; |
3306 | WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); |
3307 | WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); |
3308 | |
3309 | WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); |
3310 | |
3311 | gfx_v11_0_cp_gfx_set_doorbell(adev, ring); |
3312 | mutex_unlock(lock: &adev->srbm_mutex); |
3313 | |
3314 | /* Init gfx ring 1 for pipe 1 */ |
3315 | if (adev->gfx.num_gfx_rings > 1) { |
3316 | mutex_lock(&adev->srbm_mutex); |
3317 | gfx_v11_0_cp_gfx_switch_pipe(adev, pipe: PIPE_ID1); |
3318 | /* maximum supported gfx ring is 2 */ |
3319 | ring = &adev->gfx.gfx_ring[1]; |
3320 | rb_bufsz = order_base_2(ring->ring_size / 8); |
3321 | tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); |
3322 | tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); |
3323 | WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); |
3324 | /* Initialize the ring buffer's write pointers */ |
3325 | ring->wptr = 0; |
3326 | WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); |
3327 | WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); |
3328 | /* Set the wb address wether it's enabled or not */ |
3329 | rptr_addr = ring->rptr_gpu_addr; |
3330 | WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); |
3331 | WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & |
3332 | CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); |
3333 | wptr_gpu_addr = ring->wptr_gpu_addr; |
3334 | WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, |
3335 | lower_32_bits(wptr_gpu_addr)); |
3336 | WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, |
3337 | upper_32_bits(wptr_gpu_addr)); |
3338 | |
3339 | mdelay(1); |
3340 | WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); |
3341 | |
3342 | rb_addr = ring->gpu_addr >> 8; |
3343 | WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); |
3344 | WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); |
3345 | WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); |
3346 | |
3347 | gfx_v11_0_cp_gfx_set_doorbell(adev, ring); |
3348 | mutex_unlock(lock: &adev->srbm_mutex); |
3349 | } |
3350 | /* Switch to pipe 0 */ |
3351 | mutex_lock(&adev->srbm_mutex); |
3352 | gfx_v11_0_cp_gfx_switch_pipe(adev, pipe: PIPE_ID0); |
3353 | mutex_unlock(lock: &adev->srbm_mutex); |
3354 | |
3355 | /* start the ring */ |
3356 | gfx_v11_0_cp_gfx_start(adev); |
3357 | |
3358 | return 0; |
3359 | } |
3360 | |
3361 | static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) |
3362 | { |
3363 | u32 data; |
3364 | |
3365 | if (adev->gfx.rs64_enable) { |
3366 | data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); |
3367 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, |
3368 | enable ? 0 : 1); |
3369 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, |
3370 | enable ? 0 : 1); |
3371 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, |
3372 | enable ? 0 : 1); |
3373 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, |
3374 | enable ? 0 : 1); |
3375 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, |
3376 | enable ? 0 : 1); |
3377 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, |
3378 | enable ? 1 : 0); |
3379 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, |
3380 | enable ? 1 : 0); |
3381 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, |
3382 | enable ? 1 : 0); |
3383 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, |
3384 | enable ? 1 : 0); |
3385 | data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, |
3386 | enable ? 0 : 1); |
3387 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); |
3388 | } else { |
3389 | data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); |
3390 | |
3391 | if (enable) { |
3392 | data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); |
3393 | if (!adev->enable_mes_kiq) |
3394 | data = REG_SET_FIELD(data, CP_MEC_CNTL, |
3395 | MEC_ME2_HALT, 0); |
3396 | } else { |
3397 | data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); |
3398 | data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); |
3399 | } |
3400 | WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); |
3401 | } |
3402 | |
3403 | udelay(50); |
3404 | } |
3405 | |
3406 | static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) |
3407 | { |
3408 | const struct gfx_firmware_header_v1_0 *mec_hdr; |
3409 | const __le32 *fw_data; |
3410 | unsigned i, fw_size; |
3411 | u32 *fw = NULL; |
3412 | int r; |
3413 | |
3414 | if (!adev->gfx.mec_fw) |
3415 | return -EINVAL; |
3416 | |
3417 | gfx_v11_0_cp_compute_enable(adev, enable: false); |
3418 | |
3419 | mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; |
3420 | amdgpu_ucode_print_gfx_hdr(hdr: &mec_hdr->header); |
3421 | |
3422 | fw_data = (const __le32 *) |
3423 | (adev->gfx.mec_fw->data + |
3424 | le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); |
3425 | fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); |
3426 | |
3427 | r = amdgpu_bo_create_reserved(adev, size: mec_hdr->header.ucode_size_bytes, |
3428 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
3429 | bo_ptr: &adev->gfx.mec.mec_fw_obj, |
3430 | gpu_addr: &adev->gfx.mec.mec_fw_gpu_addr, |
3431 | cpu_addr: (void **)&fw); |
3432 | if (r) { |
3433 | dev_err(adev->dev, "(%d) failed to create mec fw bo\n" , r); |
3434 | gfx_v11_0_mec_fini(adev); |
3435 | return r; |
3436 | } |
3437 | |
3438 | memcpy(fw, fw_data, fw_size); |
3439 | |
3440 | amdgpu_bo_kunmap(bo: adev->gfx.mec.mec_fw_obj); |
3441 | amdgpu_bo_unreserve(bo: adev->gfx.mec.mec_fw_obj); |
3442 | |
3443 | gfx_v11_0_config_mec_cache(adev, addr: adev->gfx.mec.mec_fw_gpu_addr); |
3444 | |
3445 | /* MEC1 */ |
3446 | WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); |
3447 | |
3448 | for (i = 0; i < mec_hdr->jt_size; i++) |
3449 | WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, |
3450 | le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); |
3451 | |
3452 | WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); |
3453 | |
3454 | return 0; |
3455 | } |
3456 | |
3457 | static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) |
3458 | { |
3459 | const struct gfx_firmware_header_v2_0 *mec_hdr; |
3460 | const __le32 *fw_ucode, *fw_data; |
3461 | u32 tmp, fw_ucode_size, fw_data_size; |
3462 | u32 i, usec_timeout = 50000; /* Wait for 50 ms */ |
3463 | u32 *fw_ucode_ptr, *fw_data_ptr; |
3464 | int r; |
3465 | |
3466 | if (!adev->gfx.mec_fw) |
3467 | return -EINVAL; |
3468 | |
3469 | gfx_v11_0_cp_compute_enable(adev, enable: false); |
3470 | |
3471 | mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; |
3472 | amdgpu_ucode_print_gfx_hdr(hdr: &mec_hdr->header); |
3473 | |
3474 | fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + |
3475 | le32_to_cpu(mec_hdr->ucode_offset_bytes)); |
3476 | fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); |
3477 | |
3478 | fw_data = (const __le32 *) (adev->gfx.mec_fw->data + |
3479 | le32_to_cpu(mec_hdr->data_offset_bytes)); |
3480 | fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); |
3481 | |
3482 | r = amdgpu_bo_create_reserved(adev, size: fw_ucode_size, |
3483 | align: 64 * 1024, |
3484 | AMDGPU_GEM_DOMAIN_VRAM | |
3485 | AMDGPU_GEM_DOMAIN_GTT, |
3486 | bo_ptr: &adev->gfx.mec.mec_fw_obj, |
3487 | gpu_addr: &adev->gfx.mec.mec_fw_gpu_addr, |
3488 | cpu_addr: (void **)&fw_ucode_ptr); |
3489 | if (r) { |
3490 | dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n" , r); |
3491 | gfx_v11_0_mec_fini(adev); |
3492 | return r; |
3493 | } |
3494 | |
3495 | r = amdgpu_bo_create_reserved(adev, size: fw_data_size, |
3496 | align: 64 * 1024, |
3497 | AMDGPU_GEM_DOMAIN_VRAM | |
3498 | AMDGPU_GEM_DOMAIN_GTT, |
3499 | bo_ptr: &adev->gfx.mec.mec_fw_data_obj, |
3500 | gpu_addr: &adev->gfx.mec.mec_fw_data_gpu_addr, |
3501 | cpu_addr: (void **)&fw_data_ptr); |
3502 | if (r) { |
3503 | dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n" , r); |
3504 | gfx_v11_0_mec_fini(adev); |
3505 | return r; |
3506 | } |
3507 | |
3508 | memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); |
3509 | memcpy(fw_data_ptr, fw_data, fw_data_size); |
3510 | |
3511 | amdgpu_bo_kunmap(bo: adev->gfx.mec.mec_fw_obj); |
3512 | amdgpu_bo_kunmap(bo: adev->gfx.mec.mec_fw_data_obj); |
3513 | amdgpu_bo_unreserve(bo: adev->gfx.mec.mec_fw_obj); |
3514 | amdgpu_bo_unreserve(bo: adev->gfx.mec.mec_fw_data_obj); |
3515 | |
3516 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); |
3517 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); |
3518 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); |
3519 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); |
3520 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); |
3521 | |
3522 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); |
3523 | tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); |
3524 | tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); |
3525 | WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); |
3526 | |
3527 | mutex_lock(&adev->srbm_mutex); |
3528 | for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { |
3529 | soc21_grbm_select(adev, me: 1, pipe: i, queue: 0, vmid: 0); |
3530 | |
3531 | WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); |
3532 | WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, |
3533 | upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); |
3534 | |
3535 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, |
3536 | mec_hdr->ucode_start_addr_lo >> 2 | |
3537 | mec_hdr->ucode_start_addr_hi << 30); |
3538 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, |
3539 | mec_hdr->ucode_start_addr_hi >> 2); |
3540 | |
3541 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); |
3542 | WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, |
3543 | upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); |
3544 | } |
3545 | mutex_unlock(lock: &adev->srbm_mutex); |
3546 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
3547 | |
3548 | /* Trigger an invalidation of the L1 instruction caches */ |
3549 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); |
3550 | tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); |
3551 | WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); |
3552 | |
3553 | /* Wait for invalidation complete */ |
3554 | for (i = 0; i < usec_timeout; i++) { |
3555 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); |
3556 | if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, |
3557 | INVALIDATE_DCACHE_COMPLETE)) |
3558 | break; |
3559 | udelay(1); |
3560 | } |
3561 | |
3562 | if (i >= usec_timeout) { |
3563 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
3564 | return -EINVAL; |
3565 | } |
3566 | |
3567 | /* Trigger an invalidation of the L1 instruction caches */ |
3568 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); |
3569 | tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); |
3570 | WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); |
3571 | |
3572 | /* Wait for invalidation complete */ |
3573 | for (i = 0; i < usec_timeout; i++) { |
3574 | tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); |
3575 | if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, |
3576 | INVALIDATE_CACHE_COMPLETE)) |
3577 | break; |
3578 | udelay(1); |
3579 | } |
3580 | |
3581 | if (i >= usec_timeout) { |
3582 | dev_err(adev->dev, "failed to invalidate instruction cache\n" ); |
3583 | return -EINVAL; |
3584 | } |
3585 | |
3586 | return 0; |
3587 | } |
3588 | |
3589 | static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) |
3590 | { |
3591 | uint32_t tmp; |
3592 | struct amdgpu_device *adev = ring->adev; |
3593 | |
3594 | /* tell RLC which is KIQ queue */ |
3595 | tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); |
3596 | tmp &= 0xffffff00; |
3597 | tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); |
3598 | WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); |
3599 | tmp |= 0x80; |
3600 | WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); |
3601 | } |
3602 | |
3603 | static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) |
3604 | { |
3605 | /* set graphics engine doorbell range */ |
3606 | WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, |
3607 | (adev->doorbell_index.gfx_ring0 * 2) << 2); |
3608 | WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, |
3609 | (adev->doorbell_index.gfx_userqueue_end * 2) << 2); |
3610 | |
3611 | /* set compute engine doorbell range */ |
3612 | WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, |
3613 | (adev->doorbell_index.kiq * 2) << 2); |
3614 | WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, |
3615 | (adev->doorbell_index.userqueue_end * 2) << 2); |
3616 | } |
3617 | |
3618 | static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, |
3619 | struct amdgpu_mqd_prop *prop) |
3620 | { |
3621 | struct v11_gfx_mqd *mqd = m; |
3622 | uint64_t hqd_gpu_addr, wb_gpu_addr; |
3623 | uint32_t tmp; |
3624 | uint32_t rb_bufsz; |
3625 | |
3626 | /* set up gfx hqd wptr */ |
3627 | mqd->cp_gfx_hqd_wptr = 0; |
3628 | mqd->cp_gfx_hqd_wptr_hi = 0; |
3629 | |
3630 | /* set the pointer to the MQD */ |
3631 | mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; |
3632 | mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); |
3633 | |
3634 | /* set up mqd control */ |
3635 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); |
3636 | tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); |
3637 | tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); |
3638 | tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); |
3639 | mqd->cp_gfx_mqd_control = tmp; |
3640 | |
3641 | /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ |
3642 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); |
3643 | tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); |
3644 | mqd->cp_gfx_hqd_vmid = 0; |
3645 | |
3646 | /* set up default queue priority level |
3647 | * 0x0 = low priority, 0x1 = high priority */ |
3648 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); |
3649 | tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); |
3650 | mqd->cp_gfx_hqd_queue_priority = tmp; |
3651 | |
3652 | /* set up time quantum */ |
3653 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); |
3654 | tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); |
3655 | mqd->cp_gfx_hqd_quantum = tmp; |
3656 | |
3657 | /* set up gfx hqd base. this is similar as CP_RB_BASE */ |
3658 | hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; |
3659 | mqd->cp_gfx_hqd_base = hqd_gpu_addr; |
3660 | mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); |
3661 | |
3662 | /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ |
3663 | wb_gpu_addr = prop->rptr_gpu_addr; |
3664 | mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; |
3665 | mqd->cp_gfx_hqd_rptr_addr_hi = |
3666 | upper_32_bits(wb_gpu_addr) & 0xffff; |
3667 | |
3668 | /* set up rb_wptr_poll addr */ |
3669 | wb_gpu_addr = prop->wptr_gpu_addr; |
3670 | mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; |
3671 | mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; |
3672 | |
3673 | /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ |
3674 | rb_bufsz = order_base_2(prop->queue_size / 4) - 1; |
3675 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); |
3676 | tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); |
3677 | tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); |
3678 | #ifdef __BIG_ENDIAN |
3679 | tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); |
3680 | #endif |
3681 | mqd->cp_gfx_hqd_cntl = tmp; |
3682 | |
3683 | /* set up cp_doorbell_control */ |
3684 | tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); |
3685 | if (prop->use_doorbell) { |
3686 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, |
3687 | DOORBELL_OFFSET, prop->doorbell_index); |
3688 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, |
3689 | DOORBELL_EN, 1); |
3690 | } else |
3691 | tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, |
3692 | DOORBELL_EN, 0); |
3693 | mqd->cp_rb_doorbell_control = tmp; |
3694 | |
3695 | /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ |
3696 | mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); |
3697 | |
3698 | /* active the queue */ |
3699 | mqd->cp_gfx_hqd_active = 1; |
3700 | |
3701 | return 0; |
3702 | } |
3703 | |
3704 | static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) |
3705 | { |
3706 | struct amdgpu_device *adev = ring->adev; |
3707 | struct v11_gfx_mqd *mqd = ring->mqd_ptr; |
3708 | int mqd_idx = ring - &adev->gfx.gfx_ring[0]; |
3709 | |
3710 | if (!amdgpu_in_reset(adev) && !adev->in_suspend) { |
3711 | memset((void *)mqd, 0, sizeof(*mqd)); |
3712 | mutex_lock(&adev->srbm_mutex); |
3713 | soc21_grbm_select(adev, me: ring->me, pipe: ring->pipe, queue: ring->queue, vmid: 0); |
3714 | amdgpu_ring_init_mqd(ring); |
3715 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
3716 | mutex_unlock(lock: &adev->srbm_mutex); |
3717 | if (adev->gfx.me.mqd_backup[mqd_idx]) |
3718 | memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); |
3719 | } else { |
3720 | /* restore mqd with the backup copy */ |
3721 | if (adev->gfx.me.mqd_backup[mqd_idx]) |
3722 | memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); |
3723 | /* reset the ring */ |
3724 | ring->wptr = 0; |
3725 | *ring->wptr_cpu_addr = 0; |
3726 | amdgpu_ring_clear_ring(ring); |
3727 | } |
3728 | |
3729 | return 0; |
3730 | } |
3731 | |
3732 | static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) |
3733 | { |
3734 | int r, i; |
3735 | struct amdgpu_ring *ring; |
3736 | |
3737 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
3738 | ring = &adev->gfx.gfx_ring[i]; |
3739 | |
3740 | r = amdgpu_bo_reserve(bo: ring->mqd_obj, no_intr: false); |
3741 | if (unlikely(r != 0)) |
3742 | return r; |
3743 | |
3744 | r = amdgpu_bo_kmap(bo: ring->mqd_obj, ptr: (void **)&ring->mqd_ptr); |
3745 | if (!r) { |
3746 | r = gfx_v11_0_gfx_init_queue(ring); |
3747 | amdgpu_bo_kunmap(bo: ring->mqd_obj); |
3748 | ring->mqd_ptr = NULL; |
3749 | } |
3750 | amdgpu_bo_unreserve(bo: ring->mqd_obj); |
3751 | if (r) |
3752 | return r; |
3753 | } |
3754 | |
3755 | r = amdgpu_gfx_enable_kgq(adev, xcc_id: 0); |
3756 | if (r) |
3757 | return r; |
3758 | |
3759 | return gfx_v11_0_cp_gfx_start(adev); |
3760 | } |
3761 | |
3762 | static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, |
3763 | struct amdgpu_mqd_prop *prop) |
3764 | { |
3765 | struct v11_compute_mqd *mqd = m; |
3766 | uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; |
3767 | uint32_t tmp; |
3768 | |
3769 | mqd->header = 0xC0310800; |
3770 | mqd->compute_pipelinestat_enable = 0x00000001; |
3771 | mqd->compute_static_thread_mgmt_se0 = 0xffffffff; |
3772 | mqd->compute_static_thread_mgmt_se1 = 0xffffffff; |
3773 | mqd->compute_static_thread_mgmt_se2 = 0xffffffff; |
3774 | mqd->compute_static_thread_mgmt_se3 = 0xffffffff; |
3775 | mqd->compute_misc_reserved = 0x00000007; |
3776 | |
3777 | eop_base_addr = prop->eop_gpu_addr >> 8; |
3778 | mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; |
3779 | mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); |
3780 | |
3781 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ |
3782 | tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); |
3783 | tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, |
3784 | (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); |
3785 | |
3786 | mqd->cp_hqd_eop_control = tmp; |
3787 | |
3788 | /* enable doorbell? */ |
3789 | tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); |
3790 | |
3791 | if (prop->use_doorbell) { |
3792 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3793 | DOORBELL_OFFSET, prop->doorbell_index); |
3794 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3795 | DOORBELL_EN, 1); |
3796 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3797 | DOORBELL_SOURCE, 0); |
3798 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3799 | DOORBELL_HIT, 0); |
3800 | } else { |
3801 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3802 | DOORBELL_EN, 0); |
3803 | } |
3804 | |
3805 | mqd->cp_hqd_pq_doorbell_control = tmp; |
3806 | |
3807 | /* disable the queue if it's active */ |
3808 | mqd->cp_hqd_dequeue_request = 0; |
3809 | mqd->cp_hqd_pq_rptr = 0; |
3810 | mqd->cp_hqd_pq_wptr_lo = 0; |
3811 | mqd->cp_hqd_pq_wptr_hi = 0; |
3812 | |
3813 | /* set the pointer to the MQD */ |
3814 | mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; |
3815 | mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); |
3816 | |
3817 | /* set MQD vmid to 0 */ |
3818 | tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); |
3819 | tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); |
3820 | mqd->cp_mqd_control = tmp; |
3821 | |
3822 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ |
3823 | hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; |
3824 | mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; |
3825 | mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); |
3826 | |
3827 | /* set up the HQD, this is similar to CP_RB0_CNTL */ |
3828 | tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); |
3829 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, |
3830 | (order_base_2(prop->queue_size / 4) - 1)); |
3831 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, |
3832 | (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); |
3833 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); |
3834 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, |
3835 | prop->allow_tunneling); |
3836 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); |
3837 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); |
3838 | mqd->cp_hqd_pq_control = tmp; |
3839 | |
3840 | /* set the wb address whether it's enabled or not */ |
3841 | wb_gpu_addr = prop->rptr_gpu_addr; |
3842 | mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; |
3843 | mqd->cp_hqd_pq_rptr_report_addr_hi = |
3844 | upper_32_bits(wb_gpu_addr) & 0xffff; |
3845 | |
3846 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ |
3847 | wb_gpu_addr = prop->wptr_gpu_addr; |
3848 | mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; |
3849 | mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; |
3850 | |
3851 | tmp = 0; |
3852 | /* enable the doorbell if requested */ |
3853 | if (prop->use_doorbell) { |
3854 | tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); |
3855 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3856 | DOORBELL_OFFSET, prop->doorbell_index); |
3857 | |
3858 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3859 | DOORBELL_EN, 1); |
3860 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3861 | DOORBELL_SOURCE, 0); |
3862 | tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, |
3863 | DOORBELL_HIT, 0); |
3864 | } |
3865 | |
3866 | mqd->cp_hqd_pq_doorbell_control = tmp; |
3867 | |
3868 | /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ |
3869 | mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); |
3870 | |
3871 | /* set the vmid for the queue */ |
3872 | mqd->cp_hqd_vmid = 0; |
3873 | |
3874 | tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); |
3875 | tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); |
3876 | mqd->cp_hqd_persistent_state = tmp; |
3877 | |
3878 | /* set MIN_IB_AVAIL_SIZE */ |
3879 | tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); |
3880 | tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); |
3881 | mqd->cp_hqd_ib_control = tmp; |
3882 | |
3883 | /* set static priority for a compute queue/ring */ |
3884 | mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; |
3885 | mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; |
3886 | |
3887 | mqd->cp_hqd_active = prop->hqd_active; |
3888 | |
3889 | return 0; |
3890 | } |
3891 | |
3892 | static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) |
3893 | { |
3894 | struct amdgpu_device *adev = ring->adev; |
3895 | struct v11_compute_mqd *mqd = ring->mqd_ptr; |
3896 | int j; |
3897 | |
3898 | /* inactivate the queue */ |
3899 | if (amdgpu_sriov_vf(adev)) |
3900 | WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); |
3901 | |
3902 | /* disable wptr polling */ |
3903 | WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); |
3904 | |
3905 | /* write the EOP addr */ |
3906 | WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, |
3907 | mqd->cp_hqd_eop_base_addr_lo); |
3908 | WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, |
3909 | mqd->cp_hqd_eop_base_addr_hi); |
3910 | |
3911 | /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ |
3912 | WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, |
3913 | mqd->cp_hqd_eop_control); |
3914 | |
3915 | /* enable doorbell? */ |
3916 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, |
3917 | mqd->cp_hqd_pq_doorbell_control); |
3918 | |
3919 | /* disable the queue if it's active */ |
3920 | if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { |
3921 | WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); |
3922 | for (j = 0; j < adev->usec_timeout; j++) { |
3923 | if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) |
3924 | break; |
3925 | udelay(1); |
3926 | } |
3927 | WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, |
3928 | mqd->cp_hqd_dequeue_request); |
3929 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, |
3930 | mqd->cp_hqd_pq_rptr); |
3931 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, |
3932 | mqd->cp_hqd_pq_wptr_lo); |
3933 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, |
3934 | mqd->cp_hqd_pq_wptr_hi); |
3935 | } |
3936 | |
3937 | /* set the pointer to the MQD */ |
3938 | WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, |
3939 | mqd->cp_mqd_base_addr_lo); |
3940 | WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, |
3941 | mqd->cp_mqd_base_addr_hi); |
3942 | |
3943 | /* set MQD vmid to 0 */ |
3944 | WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, |
3945 | mqd->cp_mqd_control); |
3946 | |
3947 | /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ |
3948 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, |
3949 | mqd->cp_hqd_pq_base_lo); |
3950 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, |
3951 | mqd->cp_hqd_pq_base_hi); |
3952 | |
3953 | /* set up the HQD, this is similar to CP_RB0_CNTL */ |
3954 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, |
3955 | mqd->cp_hqd_pq_control); |
3956 | |
3957 | /* set the wb address whether it's enabled or not */ |
3958 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, |
3959 | mqd->cp_hqd_pq_rptr_report_addr_lo); |
3960 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, |
3961 | mqd->cp_hqd_pq_rptr_report_addr_hi); |
3962 | |
3963 | /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ |
3964 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, |
3965 | mqd->cp_hqd_pq_wptr_poll_addr_lo); |
3966 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, |
3967 | mqd->cp_hqd_pq_wptr_poll_addr_hi); |
3968 | |
3969 | /* enable the doorbell if requested */ |
3970 | if (ring->use_doorbell) { |
3971 | WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, |
3972 | (adev->doorbell_index.kiq * 2) << 2); |
3973 | WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, |
3974 | (adev->doorbell_index.userqueue_end * 2) << 2); |
3975 | } |
3976 | |
3977 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, |
3978 | mqd->cp_hqd_pq_doorbell_control); |
3979 | |
3980 | /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ |
3981 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, |
3982 | mqd->cp_hqd_pq_wptr_lo); |
3983 | WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, |
3984 | mqd->cp_hqd_pq_wptr_hi); |
3985 | |
3986 | /* set the vmid for the queue */ |
3987 | WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); |
3988 | |
3989 | WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, |
3990 | mqd->cp_hqd_persistent_state); |
3991 | |
3992 | /* activate the queue */ |
3993 | WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, |
3994 | mqd->cp_hqd_active); |
3995 | |
3996 | if (ring->use_doorbell) |
3997 | WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); |
3998 | |
3999 | return 0; |
4000 | } |
4001 | |
4002 | static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) |
4003 | { |
4004 | struct amdgpu_device *adev = ring->adev; |
4005 | struct v11_compute_mqd *mqd = ring->mqd_ptr; |
4006 | |
4007 | gfx_v11_0_kiq_setting(ring); |
4008 | |
4009 | if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ |
4010 | /* reset MQD to a clean status */ |
4011 | if (adev->gfx.kiq[0].mqd_backup) |
4012 | memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); |
4013 | |
4014 | /* reset ring buffer */ |
4015 | ring->wptr = 0; |
4016 | amdgpu_ring_clear_ring(ring); |
4017 | |
4018 | mutex_lock(&adev->srbm_mutex); |
4019 | soc21_grbm_select(adev, me: ring->me, pipe: ring->pipe, queue: ring->queue, vmid: 0); |
4020 | gfx_v11_0_kiq_init_register(ring); |
4021 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
4022 | mutex_unlock(lock: &adev->srbm_mutex); |
4023 | } else { |
4024 | memset((void *)mqd, 0, sizeof(*mqd)); |
4025 | if (amdgpu_sriov_vf(adev) && adev->in_suspend) |
4026 | amdgpu_ring_clear_ring(ring); |
4027 | mutex_lock(&adev->srbm_mutex); |
4028 | soc21_grbm_select(adev, me: ring->me, pipe: ring->pipe, queue: ring->queue, vmid: 0); |
4029 | amdgpu_ring_init_mqd(ring); |
4030 | gfx_v11_0_kiq_init_register(ring); |
4031 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
4032 | mutex_unlock(lock: &adev->srbm_mutex); |
4033 | |
4034 | if (adev->gfx.kiq[0].mqd_backup) |
4035 | memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); |
4036 | } |
4037 | |
4038 | return 0; |
4039 | } |
4040 | |
4041 | static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) |
4042 | { |
4043 | struct amdgpu_device *adev = ring->adev; |
4044 | struct v11_compute_mqd *mqd = ring->mqd_ptr; |
4045 | int mqd_idx = ring - &adev->gfx.compute_ring[0]; |
4046 | |
4047 | if (!amdgpu_in_reset(adev) && !adev->in_suspend) { |
4048 | memset((void *)mqd, 0, sizeof(*mqd)); |
4049 | mutex_lock(&adev->srbm_mutex); |
4050 | soc21_grbm_select(adev, me: ring->me, pipe: ring->pipe, queue: ring->queue, vmid: 0); |
4051 | amdgpu_ring_init_mqd(ring); |
4052 | soc21_grbm_select(adev, me: 0, pipe: 0, queue: 0, vmid: 0); |
4053 | mutex_unlock(lock: &adev->srbm_mutex); |
4054 | |
4055 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
4056 | memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); |
4057 | } else { |
4058 | /* restore MQD to a clean status */ |
4059 | if (adev->gfx.mec.mqd_backup[mqd_idx]) |
4060 | memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); |
4061 | /* reset ring buffer */ |
4062 | ring->wptr = 0; |
4063 | atomic64_set(v: (atomic64_t *)ring->wptr_cpu_addr, i: 0); |
4064 | amdgpu_ring_clear_ring(ring); |
4065 | } |
4066 | |
4067 | return 0; |
4068 | } |
4069 | |
4070 | static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) |
4071 | { |
4072 | struct amdgpu_ring *ring; |
4073 | int r; |
4074 | |
4075 | ring = &adev->gfx.kiq[0].ring; |
4076 | |
4077 | r = amdgpu_bo_reserve(bo: ring->mqd_obj, no_intr: false); |
4078 | if (unlikely(r != 0)) |
4079 | return r; |
4080 | |
4081 | r = amdgpu_bo_kmap(bo: ring->mqd_obj, ptr: (void **)&ring->mqd_ptr); |
4082 | if (unlikely(r != 0)) { |
4083 | amdgpu_bo_unreserve(bo: ring->mqd_obj); |
4084 | return r; |
4085 | } |
4086 | |
4087 | gfx_v11_0_kiq_init_queue(ring); |
4088 | amdgpu_bo_kunmap(bo: ring->mqd_obj); |
4089 | ring->mqd_ptr = NULL; |
4090 | amdgpu_bo_unreserve(bo: ring->mqd_obj); |
4091 | ring->sched.ready = true; |
4092 | return 0; |
4093 | } |
4094 | |
4095 | static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) |
4096 | { |
4097 | struct amdgpu_ring *ring = NULL; |
4098 | int r = 0, i; |
4099 | |
4100 | if (!amdgpu_async_gfx_ring) |
4101 | gfx_v11_0_cp_compute_enable(adev, enable: true); |
4102 | |
4103 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
4104 | ring = &adev->gfx.compute_ring[i]; |
4105 | |
4106 | r = amdgpu_bo_reserve(bo: ring->mqd_obj, no_intr: false); |
4107 | if (unlikely(r != 0)) |
4108 | goto done; |
4109 | r = amdgpu_bo_kmap(bo: ring->mqd_obj, ptr: (void **)&ring->mqd_ptr); |
4110 | if (!r) { |
4111 | r = gfx_v11_0_kcq_init_queue(ring); |
4112 | amdgpu_bo_kunmap(bo: ring->mqd_obj); |
4113 | ring->mqd_ptr = NULL; |
4114 | } |
4115 | amdgpu_bo_unreserve(bo: ring->mqd_obj); |
4116 | if (r) |
4117 | goto done; |
4118 | } |
4119 | |
4120 | r = amdgpu_gfx_enable_kcq(adev, xcc_id: 0); |
4121 | done: |
4122 | return r; |
4123 | } |
4124 | |
4125 | static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) |
4126 | { |
4127 | int r, i; |
4128 | struct amdgpu_ring *ring; |
4129 | |
4130 | if (!(adev->flags & AMD_IS_APU)) |
4131 | gfx_v11_0_enable_gui_idle_interrupt(adev, enable: false); |
4132 | |
4133 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
4134 | /* legacy firmware loading */ |
4135 | r = gfx_v11_0_cp_gfx_load_microcode(adev); |
4136 | if (r) |
4137 | return r; |
4138 | |
4139 | if (adev->gfx.rs64_enable) |
4140 | r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); |
4141 | else |
4142 | r = gfx_v11_0_cp_compute_load_microcode(adev); |
4143 | if (r) |
4144 | return r; |
4145 | } |
4146 | |
4147 | gfx_v11_0_cp_set_doorbell_range(adev); |
4148 | |
4149 | if (amdgpu_async_gfx_ring) { |
4150 | gfx_v11_0_cp_compute_enable(adev, enable: true); |
4151 | gfx_v11_0_cp_gfx_enable(adev, enable: true); |
4152 | } |
4153 | |
4154 | if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) |
4155 | r = amdgpu_mes_kiq_hw_init(adev); |
4156 | else |
4157 | r = gfx_v11_0_kiq_resume(adev); |
4158 | if (r) |
4159 | return r; |
4160 | |
4161 | r = gfx_v11_0_kcq_resume(adev); |
4162 | if (r) |
4163 | return r; |
4164 | |
4165 | if (!amdgpu_async_gfx_ring) { |
4166 | r = gfx_v11_0_cp_gfx_resume(adev); |
4167 | if (r) |
4168 | return r; |
4169 | } else { |
4170 | r = gfx_v11_0_cp_async_gfx_ring_resume(adev); |
4171 | if (r) |
4172 | return r; |
4173 | } |
4174 | |
4175 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
4176 | ring = &adev->gfx.gfx_ring[i]; |
4177 | r = amdgpu_ring_test_helper(ring); |
4178 | if (r) |
4179 | return r; |
4180 | } |
4181 | |
4182 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
4183 | ring = &adev->gfx.compute_ring[i]; |
4184 | r = amdgpu_ring_test_helper(ring); |
4185 | if (r) |
4186 | return r; |
4187 | } |
4188 | |
4189 | return 0; |
4190 | } |
4191 | |
4192 | static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) |
4193 | { |
4194 | gfx_v11_0_cp_gfx_enable(adev, enable); |
4195 | gfx_v11_0_cp_compute_enable(adev, enable); |
4196 | } |
4197 | |
4198 | static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) |
4199 | { |
4200 | int r; |
4201 | bool value; |
4202 | |
4203 | r = adev->gfxhub.funcs->gart_enable(adev); |
4204 | if (r) |
4205 | return r; |
4206 | |
4207 | adev->hdp.funcs->flush_hdp(adev, NULL); |
4208 | |
4209 | value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? |
4210 | false : true; |
4211 | |
4212 | adev->gfxhub.funcs->set_fault_enable_default(adev, value); |
4213 | amdgpu_gmc_flush_gpu_tlb(adev, vmid: 0, AMDGPU_GFXHUB(0), flush_type: 0); |
4214 | |
4215 | return 0; |
4216 | } |
4217 | |
4218 | static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) |
4219 | { |
4220 | u32 tmp; |
4221 | |
4222 | /* select RS64 */ |
4223 | if (adev->gfx.rs64_enable) { |
4224 | tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); |
4225 | tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); |
4226 | WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); |
4227 | |
4228 | tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); |
4229 | tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); |
4230 | WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); |
4231 | } |
4232 | |
4233 | if (amdgpu_emu_mode == 1) |
4234 | msleep(msecs: 100); |
4235 | } |
4236 | |
4237 | static int get_gb_addr_config(struct amdgpu_device * adev) |
4238 | { |
4239 | u32 gb_addr_config; |
4240 | |
4241 | gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); |
4242 | if (gb_addr_config == 0) |
4243 | return -EINVAL; |
4244 | |
4245 | adev->gfx.config.gb_addr_config_fields.num_pkrs = |
4246 | 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); |
4247 | |
4248 | adev->gfx.config.gb_addr_config = gb_addr_config; |
4249 | |
4250 | adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << |
4251 | REG_GET_FIELD(adev->gfx.config.gb_addr_config, |
4252 | GB_ADDR_CONFIG, NUM_PIPES); |
4253 | |
4254 | adev->gfx.config.max_tile_pipes = |
4255 | adev->gfx.config.gb_addr_config_fields.num_pipes; |
4256 | |
4257 | adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << |
4258 | REG_GET_FIELD(adev->gfx.config.gb_addr_config, |
4259 | GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); |
4260 | adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << |
4261 | REG_GET_FIELD(adev->gfx.config.gb_addr_config, |
4262 | GB_ADDR_CONFIG, NUM_RB_PER_SE); |
4263 | adev->gfx.config.gb_addr_config_fields.num_se = 1 << |
4264 | REG_GET_FIELD(adev->gfx.config.gb_addr_config, |
4265 | GB_ADDR_CONFIG, NUM_SHADER_ENGINES); |
4266 | adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + |
4267 | REG_GET_FIELD(adev->gfx.config.gb_addr_config, |
4268 | GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); |
4269 | |
4270 | return 0; |
4271 | } |
4272 | |
4273 | static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) |
4274 | { |
4275 | uint32_t data; |
4276 | |
4277 | data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); |
4278 | data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; |
4279 | WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); |
4280 | |
4281 | data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); |
4282 | data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; |
4283 | WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); |
4284 | } |
4285 | |
4286 | static int gfx_v11_0_hw_init(void *handle) |
4287 | { |
4288 | int r; |
4289 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4290 | |
4291 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { |
4292 | if (adev->gfx.imu.funcs) { |
4293 | /* RLC autoload sequence 1: Program rlc ram */ |
4294 | if (adev->gfx.imu.funcs->program_rlc_ram) |
4295 | adev->gfx.imu.funcs->program_rlc_ram(adev); |
4296 | } |
4297 | /* rlc autoload firmware */ |
4298 | r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); |
4299 | if (r) |
4300 | return r; |
4301 | } else { |
4302 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
4303 | if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { |
4304 | if (adev->gfx.imu.funcs->load_microcode) |
4305 | adev->gfx.imu.funcs->load_microcode(adev); |
4306 | if (adev->gfx.imu.funcs->setup_imu) |
4307 | adev->gfx.imu.funcs->setup_imu(adev); |
4308 | if (adev->gfx.imu.funcs->start_imu) |
4309 | adev->gfx.imu.funcs->start_imu(adev); |
4310 | } |
4311 | |
4312 | /* disable gpa mode in backdoor loading */ |
4313 | gfx_v11_0_disable_gpa_mode(adev); |
4314 | } |
4315 | } |
4316 | |
4317 | if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || |
4318 | (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { |
4319 | r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); |
4320 | if (r) { |
4321 | dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n" , r); |
4322 | return r; |
4323 | } |
4324 | } |
4325 | |
4326 | adev->gfx.is_poweron = true; |
4327 | |
4328 | if(get_gb_addr_config(adev)) |
4329 | DRM_WARN("Invalid gb_addr_config !\n" ); |
4330 | |
4331 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && |
4332 | adev->gfx.rs64_enable) |
4333 | gfx_v11_0_config_gfx_rs64(adev); |
4334 | |
4335 | r = gfx_v11_0_gfxhub_enable(adev); |
4336 | if (r) |
4337 | return r; |
4338 | |
4339 | if (!amdgpu_emu_mode) |
4340 | gfx_v11_0_init_golden_registers(adev); |
4341 | |
4342 | if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || |
4343 | (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { |
4344 | /** |
4345 | * For gfx 11, rlc firmware loading relies on smu firmware is |
4346 | * loaded firstly, so in direct type, it has to load smc ucode |
4347 | * here before rlc. |
4348 | */ |
4349 | if (!(adev->flags & AMD_IS_APU)) { |
4350 | r = amdgpu_pm_load_smu_firmware(adev, NULL); |
4351 | if (r) |
4352 | return r; |
4353 | } |
4354 | } |
4355 | |
4356 | gfx_v11_0_constants_init(adev); |
4357 | |
4358 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
4359 | gfx_v11_0_select_cp_fw_arch(adev); |
4360 | |
4361 | if (adev->nbio.funcs->gc_doorbell_init) |
4362 | adev->nbio.funcs->gc_doorbell_init(adev); |
4363 | |
4364 | r = gfx_v11_0_rlc_resume(adev); |
4365 | if (r) |
4366 | return r; |
4367 | |
4368 | /* |
4369 | * init golden registers and rlc resume may override some registers, |
4370 | * reconfig them here |
4371 | */ |
4372 | gfx_v11_0_tcp_harvest(adev); |
4373 | |
4374 | r = gfx_v11_0_cp_resume(adev); |
4375 | if (r) |
4376 | return r; |
4377 | |
4378 | /* get IMU version from HW if it's not set */ |
4379 | if (!adev->gfx.imu_fw_version) |
4380 | adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); |
4381 | |
4382 | return r; |
4383 | } |
4384 | |
4385 | static int gfx_v11_0_hw_fini(void *handle) |
4386 | { |
4387 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4388 | |
4389 | amdgpu_irq_put(adev, src: &adev->gfx.priv_reg_irq, type: 0); |
4390 | amdgpu_irq_put(adev, src: &adev->gfx.priv_inst_irq, type: 0); |
4391 | |
4392 | if (!adev->no_hw_access) { |
4393 | if (amdgpu_async_gfx_ring) { |
4394 | if (amdgpu_gfx_disable_kgq(adev, xcc_id: 0)) |
4395 | DRM_ERROR("KGQ disable failed\n" ); |
4396 | } |
4397 | |
4398 | if (amdgpu_gfx_disable_kcq(adev, xcc_id: 0)) |
4399 | DRM_ERROR("KCQ disable failed\n" ); |
4400 | |
4401 | amdgpu_mes_kiq_hw_fini(adev); |
4402 | } |
4403 | |
4404 | if (amdgpu_sriov_vf(adev)) |
4405 | /* Remove the steps disabling CPG and clearing KIQ position, |
4406 | * so that CP could perform IDLE-SAVE during switch. Those |
4407 | * steps are necessary to avoid a DMAR error in gfx9 but it is |
4408 | * not reproduced on gfx11. |
4409 | */ |
4410 | return 0; |
4411 | |
4412 | gfx_v11_0_cp_enable(adev, enable: false); |
4413 | gfx_v11_0_enable_gui_idle_interrupt(adev, enable: false); |
4414 | |
4415 | adev->gfxhub.funcs->gart_disable(adev); |
4416 | |
4417 | adev->gfx.is_poweron = false; |
4418 | |
4419 | return 0; |
4420 | } |
4421 | |
4422 | static int gfx_v11_0_suspend(void *handle) |
4423 | { |
4424 | return gfx_v11_0_hw_fini(handle); |
4425 | } |
4426 | |
4427 | static int gfx_v11_0_resume(void *handle) |
4428 | { |
4429 | return gfx_v11_0_hw_init(handle); |
4430 | } |
4431 | |
4432 | static bool gfx_v11_0_is_idle(void *handle) |
4433 | { |
4434 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4435 | |
4436 | if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), |
4437 | GRBM_STATUS, GUI_ACTIVE)) |
4438 | return false; |
4439 | else |
4440 | return true; |
4441 | } |
4442 | |
4443 | static int gfx_v11_0_wait_for_idle(void *handle) |
4444 | { |
4445 | unsigned i; |
4446 | u32 tmp; |
4447 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4448 | |
4449 | for (i = 0; i < adev->usec_timeout; i++) { |
4450 | /* read MC_STATUS */ |
4451 | tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & |
4452 | GRBM_STATUS__GUI_ACTIVE_MASK; |
4453 | |
4454 | if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) |
4455 | return 0; |
4456 | udelay(1); |
4457 | } |
4458 | return -ETIMEDOUT; |
4459 | } |
4460 | |
4461 | static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, |
4462 | int req) |
4463 | { |
4464 | u32 i, tmp, val; |
4465 | |
4466 | for (i = 0; i < adev->usec_timeout; i++) { |
4467 | /* Request with MeId=2, PipeId=0 */ |
4468 | tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); |
4469 | tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); |
4470 | WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); |
4471 | |
4472 | val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); |
4473 | if (req) { |
4474 | if (val == tmp) |
4475 | break; |
4476 | } else { |
4477 | tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, |
4478 | REQUEST, 1); |
4479 | |
4480 | /* unlocked or locked by firmware */ |
4481 | if (val != tmp) |
4482 | break; |
4483 | } |
4484 | udelay(1); |
4485 | } |
4486 | |
4487 | if (i >= adev->usec_timeout) |
4488 | return -EINVAL; |
4489 | |
4490 | return 0; |
4491 | } |
4492 | |
4493 | static int gfx_v11_0_soft_reset(void *handle) |
4494 | { |
4495 | u32 grbm_soft_reset = 0; |
4496 | u32 tmp; |
4497 | int r, i, j, k; |
4498 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4499 | |
4500 | tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); |
4501 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); |
4502 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); |
4503 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); |
4504 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); |
4505 | WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); |
4506 | |
4507 | gfx_v11_0_set_safe_mode(adev, xcc_id: 0); |
4508 | |
4509 | for (i = 0; i < adev->gfx.mec.num_mec; ++i) { |
4510 | for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { |
4511 | for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { |
4512 | tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); |
4513 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); |
4514 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); |
4515 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); |
4516 | WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); |
4517 | |
4518 | WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); |
4519 | WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); |
4520 | } |
4521 | } |
4522 | } |
4523 | for (i = 0; i < adev->gfx.me.num_me; ++i) { |
4524 | for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { |
4525 | for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { |
4526 | tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); |
4527 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); |
4528 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); |
4529 | tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); |
4530 | WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); |
4531 | |
4532 | WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); |
4533 | } |
4534 | } |
4535 | } |
4536 | |
4537 | /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ |
4538 | r = gfx_v11_0_request_gfx_index_mutex(adev, req: 1); |
4539 | if (r) { |
4540 | DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n" ); |
4541 | return r; |
4542 | } |
4543 | |
4544 | WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); |
4545 | |
4546 | // Read CP_VMID_RESET register three times. |
4547 | // to get sufficient time for GFX_HQD_ACTIVE reach 0 |
4548 | RREG32_SOC15(GC, 0, regCP_VMID_RESET); |
4549 | RREG32_SOC15(GC, 0, regCP_VMID_RESET); |
4550 | RREG32_SOC15(GC, 0, regCP_VMID_RESET); |
4551 | |
4552 | /* release the gfx mutex */ |
4553 | r = gfx_v11_0_request_gfx_index_mutex(adev, req: 0); |
4554 | if (r) { |
4555 | DRM_ERROR("Failed to release the gfx mutex during soft reset\n" ); |
4556 | return r; |
4557 | } |
4558 | |
4559 | for (i = 0; i < adev->usec_timeout; i++) { |
4560 | if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && |
4561 | !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) |
4562 | break; |
4563 | udelay(1); |
4564 | } |
4565 | if (i >= adev->usec_timeout) { |
4566 | printk("Failed to wait all pipes clean\n" ); |
4567 | return -EINVAL; |
4568 | } |
4569 | |
4570 | /********** trigger soft reset ***********/ |
4571 | grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); |
4572 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4573 | SOFT_RESET_CP, 1); |
4574 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4575 | SOFT_RESET_GFX, 1); |
4576 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4577 | SOFT_RESET_CPF, 1); |
4578 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4579 | SOFT_RESET_CPC, 1); |
4580 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4581 | SOFT_RESET_CPG, 1); |
4582 | WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); |
4583 | /********** exit soft reset ***********/ |
4584 | grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); |
4585 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4586 | SOFT_RESET_CP, 0); |
4587 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4588 | SOFT_RESET_GFX, 0); |
4589 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4590 | SOFT_RESET_CPF, 0); |
4591 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4592 | SOFT_RESET_CPC, 0); |
4593 | grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, |
4594 | SOFT_RESET_CPG, 0); |
4595 | WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); |
4596 | |
4597 | tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); |
4598 | tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); |
4599 | WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); |
4600 | |
4601 | WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); |
4602 | WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); |
4603 | |
4604 | for (i = 0; i < adev->usec_timeout; i++) { |
4605 | if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) |
4606 | break; |
4607 | udelay(1); |
4608 | } |
4609 | if (i >= adev->usec_timeout) { |
4610 | printk("Failed to wait CP_VMID_RESET to 0\n" ); |
4611 | return -EINVAL; |
4612 | } |
4613 | |
4614 | tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); |
4615 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); |
4616 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); |
4617 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); |
4618 | tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); |
4619 | WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); |
4620 | |
4621 | gfx_v11_0_unset_safe_mode(adev, xcc_id: 0); |
4622 | |
4623 | return gfx_v11_0_cp_resume(adev); |
4624 | } |
4625 | |
4626 | static bool gfx_v11_0_check_soft_reset(void *handle) |
4627 | { |
4628 | int i, r; |
4629 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4630 | struct amdgpu_ring *ring; |
4631 | long tmo = msecs_to_jiffies(m: 1000); |
4632 | |
4633 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
4634 | ring = &adev->gfx.gfx_ring[i]; |
4635 | r = amdgpu_ring_test_ib(ring, tmo); |
4636 | if (r) |
4637 | return true; |
4638 | } |
4639 | |
4640 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
4641 | ring = &adev->gfx.compute_ring[i]; |
4642 | r = amdgpu_ring_test_ib(ring, tmo); |
4643 | if (r) |
4644 | return true; |
4645 | } |
4646 | |
4647 | return false; |
4648 | } |
4649 | |
4650 | static int gfx_v11_0_post_soft_reset(void *handle) |
4651 | { |
4652 | /** |
4653 | * GFX soft reset will impact MES, need resume MES when do GFX soft reset |
4654 | */ |
4655 | return amdgpu_mes_resume(adev: (struct amdgpu_device *)handle); |
4656 | } |
4657 | |
4658 | static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) |
4659 | { |
4660 | uint64_t clock; |
4661 | uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; |
4662 | |
4663 | if (amdgpu_sriov_vf(adev)) { |
4664 | amdgpu_gfx_off_ctrl(adev, enable: false); |
4665 | mutex_lock(&adev->gfx.gpu_clock_mutex); |
4666 | clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); |
4667 | clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); |
4668 | clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); |
4669 | if (clock_counter_hi_pre != clock_counter_hi_after) |
4670 | clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); |
4671 | mutex_unlock(lock: &adev->gfx.gpu_clock_mutex); |
4672 | amdgpu_gfx_off_ctrl(adev, enable: true); |
4673 | } else { |
4674 | preempt_disable(); |
4675 | clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); |
4676 | clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); |
4677 | clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); |
4678 | if (clock_counter_hi_pre != clock_counter_hi_after) |
4679 | clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); |
4680 | preempt_enable(); |
4681 | } |
4682 | clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); |
4683 | |
4684 | return clock; |
4685 | } |
4686 | |
4687 | static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, |
4688 | uint32_t vmid, |
4689 | uint32_t gds_base, uint32_t gds_size, |
4690 | uint32_t gws_base, uint32_t gws_size, |
4691 | uint32_t oa_base, uint32_t oa_size) |
4692 | { |
4693 | struct amdgpu_device *adev = ring->adev; |
4694 | |
4695 | /* GDS Base */ |
4696 | gfx_v11_0_write_data_to_reg(ring, eng_sel: 0, wc: false, |
4697 | SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, |
4698 | val: gds_base); |
4699 | |
4700 | /* GDS Size */ |
4701 | gfx_v11_0_write_data_to_reg(ring, eng_sel: 0, wc: false, |
4702 | SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, |
4703 | val: gds_size); |
4704 | |
4705 | /* GWS */ |
4706 | gfx_v11_0_write_data_to_reg(ring, eng_sel: 0, wc: false, |
4707 | SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, |
4708 | val: gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); |
4709 | |
4710 | /* OA */ |
4711 | gfx_v11_0_write_data_to_reg(ring, eng_sel: 0, wc: false, |
4712 | SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, |
4713 | val: (1 << (oa_size + oa_base)) - (1 << oa_base)); |
4714 | } |
4715 | |
4716 | static int gfx_v11_0_early_init(void *handle) |
4717 | { |
4718 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4719 | |
4720 | adev->gfx.funcs = &gfx_v11_0_gfx_funcs; |
4721 | |
4722 | adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; |
4723 | adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), |
4724 | AMDGPU_MAX_COMPUTE_RINGS); |
4725 | |
4726 | gfx_v11_0_set_kiq_pm4_funcs(adev); |
4727 | gfx_v11_0_set_ring_funcs(adev); |
4728 | gfx_v11_0_set_irq_funcs(adev); |
4729 | gfx_v11_0_set_gds_init(adev); |
4730 | gfx_v11_0_set_rlc_funcs(adev); |
4731 | gfx_v11_0_set_mqd_funcs(adev); |
4732 | gfx_v11_0_set_imu_funcs(adev); |
4733 | |
4734 | gfx_v11_0_init_rlcg_reg_access_ctrl(adev); |
4735 | |
4736 | return gfx_v11_0_init_microcode(adev); |
4737 | } |
4738 | |
4739 | static int gfx_v11_0_late_init(void *handle) |
4740 | { |
4741 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
4742 | int r; |
4743 | |
4744 | r = amdgpu_irq_get(adev, src: &adev->gfx.priv_reg_irq, type: 0); |
4745 | if (r) |
4746 | return r; |
4747 | |
4748 | r = amdgpu_irq_get(adev, src: &adev->gfx.priv_inst_irq, type: 0); |
4749 | if (r) |
4750 | return r; |
4751 | |
4752 | return 0; |
4753 | } |
4754 | |
4755 | static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) |
4756 | { |
4757 | uint32_t rlc_cntl; |
4758 | |
4759 | /* if RLC is not enabled, do nothing */ |
4760 | rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); |
4761 | return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; |
4762 | } |
4763 | |
4764 | static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) |
4765 | { |
4766 | uint32_t data; |
4767 | unsigned i; |
4768 | |
4769 | data = RLC_SAFE_MODE__CMD_MASK; |
4770 | data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); |
4771 | |
4772 | WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); |
4773 | |
4774 | /* wait for RLC_SAFE_MODE */ |
4775 | for (i = 0; i < adev->usec_timeout; i++) { |
4776 | if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), |
4777 | RLC_SAFE_MODE, CMD)) |
4778 | break; |
4779 | udelay(1); |
4780 | } |
4781 | } |
4782 | |
4783 | static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) |
4784 | { |
4785 | WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); |
4786 | } |
4787 | |
4788 | static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, |
4789 | bool enable) |
4790 | { |
4791 | uint32_t def, data; |
4792 | |
4793 | if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) |
4794 | return; |
4795 | |
4796 | def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
4797 | |
4798 | if (enable) |
4799 | data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; |
4800 | else |
4801 | data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; |
4802 | |
4803 | if (def != data) |
4804 | WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); |
4805 | } |
4806 | |
4807 | static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, |
4808 | bool enable) |
4809 | { |
4810 | uint32_t def, data; |
4811 | |
4812 | if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) |
4813 | return; |
4814 | |
4815 | def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
4816 | |
4817 | if (enable) |
4818 | data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; |
4819 | else |
4820 | data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; |
4821 | |
4822 | if (def != data) |
4823 | WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); |
4824 | } |
4825 | |
4826 | static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, |
4827 | bool enable) |
4828 | { |
4829 | uint32_t def, data; |
4830 | |
4831 | if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) |
4832 | return; |
4833 | |
4834 | def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
4835 | |
4836 | if (enable) |
4837 | data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; |
4838 | else |
4839 | data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; |
4840 | |
4841 | if (def != data) |
4842 | WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); |
4843 | } |
4844 | |
4845 | static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, |
4846 | bool enable) |
4847 | { |
4848 | uint32_t data, def; |
4849 | |
4850 | if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) |
4851 | return; |
4852 | |
4853 | /* It is disabled by HW by default */ |
4854 | if (enable) { |
4855 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { |
4856 | /* 1 - RLC_CGTT_MGCG_OVERRIDE */ |
4857 | def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
4858 | |
4859 | data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | |
4860 | RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | |
4861 | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); |
4862 | |
4863 | if (def != data) |
4864 | WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); |
4865 | } |
4866 | } else { |
4867 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { |
4868 | def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
4869 | |
4870 | data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | |
4871 | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | |
4872 | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); |
4873 | |
4874 | if (def != data) |
4875 | WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); |
4876 | } |
4877 | } |
4878 | } |
4879 | |
4880 | static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, |
4881 | bool enable) |
4882 | { |
4883 | uint32_t def, data; |
4884 | |
4885 | if (!(adev->cg_flags & |
4886 | (AMD_CG_SUPPORT_GFX_CGCG | |
4887 | AMD_CG_SUPPORT_GFX_CGLS | |
4888 | AMD_CG_SUPPORT_GFX_3D_CGCG | |
4889 | AMD_CG_SUPPORT_GFX_3D_CGLS))) |
4890 | return; |
4891 | |
4892 | if (enable) { |
4893 | def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
4894 | |
4895 | /* unset CGCG override */ |
4896 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) |
4897 | data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; |
4898 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) |
4899 | data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; |
4900 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || |
4901 | adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) |
4902 | data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; |
4903 | |
4904 | /* update CGCG override bits */ |
4905 | if (def != data) |
4906 | WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); |
4907 | |
4908 | /* enable cgcg FSM(0x0000363F) */ |
4909 | def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); |
4910 | |
4911 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { |
4912 | data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; |
4913 | data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | |
4914 | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; |
4915 | } |
4916 | |
4917 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { |
4918 | data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; |
4919 | data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | |
4920 | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; |
4921 | } |
4922 | |
4923 | if (def != data) |
4924 | WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); |
4925 | |
4926 | /* Program RLC_CGCG_CGLS_CTRL_3D */ |
4927 | def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); |
4928 | |
4929 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { |
4930 | data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; |
4931 | data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | |
4932 | RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; |
4933 | } |
4934 | |
4935 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { |
4936 | data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; |
4937 | data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | |
4938 | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; |
4939 | } |
4940 | |
4941 | if (def != data) |
4942 | WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); |
4943 | |
4944 | /* set IDLE_POLL_COUNT(0x00900100) */ |
4945 | def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); |
4946 | |
4947 | data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); |
4948 | data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | |
4949 | (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); |
4950 | |
4951 | if (def != data) |
4952 | WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); |
4953 | |
4954 | data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); |
4955 | data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); |
4956 | data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); |
4957 | data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); |
4958 | data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); |
4959 | WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); |
4960 | |
4961 | data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); |
4962 | data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); |
4963 | WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); |
4964 | |
4965 | /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ |
4966 | if (adev->sdma.num_instances > 1) { |
4967 | data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); |
4968 | data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); |
4969 | WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); |
4970 | } |
4971 | } else { |
4972 | /* Program RLC_CGCG_CGLS_CTRL */ |
4973 | def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); |
4974 | |
4975 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) |
4976 | data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; |
4977 | |
4978 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) |
4979 | data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; |
4980 | |
4981 | if (def != data) |
4982 | WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); |
4983 | |
4984 | /* Program RLC_CGCG_CGLS_CTRL_3D */ |
4985 | def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); |
4986 | |
4987 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) |
4988 | data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; |
4989 | if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) |
4990 | data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; |
4991 | |
4992 | if (def != data) |
4993 | WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); |
4994 | |
4995 | data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); |
4996 | data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; |
4997 | WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); |
4998 | |
4999 | /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ |
5000 | if (adev->sdma.num_instances > 1) { |
5001 | data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); |
5002 | data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; |
5003 | WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); |
5004 | } |
5005 | } |
5006 | } |
5007 | |
5008 | static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, |
5009 | bool enable) |
5010 | { |
5011 | amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id: 0); |
5012 | |
5013 | gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); |
5014 | |
5015 | gfx_v11_0_update_medium_grain_clock_gating(adev, enable); |
5016 | |
5017 | gfx_v11_0_update_repeater_fgcg(adev, enable); |
5018 | |
5019 | gfx_v11_0_update_sram_fgcg(adev, enable); |
5020 | |
5021 | gfx_v11_0_update_perf_clk(adev, enable); |
5022 | |
5023 | if (adev->cg_flags & |
5024 | (AMD_CG_SUPPORT_GFX_MGCG | |
5025 | AMD_CG_SUPPORT_GFX_CGLS | |
5026 | AMD_CG_SUPPORT_GFX_CGCG | |
5027 | AMD_CG_SUPPORT_GFX_3D_CGCG | |
5028 | AMD_CG_SUPPORT_GFX_3D_CGLS)) |
5029 | gfx_v11_0_enable_gui_idle_interrupt(adev, enable); |
5030 | |
5031 | amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id: 0); |
5032 | |
5033 | return 0; |
5034 | } |
5035 | |
5036 | static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) |
5037 | { |
5038 | u32 data; |
5039 | |
5040 | amdgpu_gfx_off_ctrl(adev, enable: false); |
5041 | |
5042 | data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL); |
5043 | |
5044 | data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; |
5045 | data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; |
5046 | |
5047 | WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); |
5048 | |
5049 | amdgpu_gfx_off_ctrl(adev, enable: true); |
5050 | |
5051 | if (ring |
5052 | && amdgpu_sriov_is_pp_one_vf(adev) |
5053 | && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) |
5054 | || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { |
5055 | uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); |
5056 | amdgpu_ring_emit_wreg(ring, reg, data); |
5057 | } |
5058 | } |
5059 | |
5060 | static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { |
5061 | .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, |
5062 | .set_safe_mode = gfx_v11_0_set_safe_mode, |
5063 | .unset_safe_mode = gfx_v11_0_unset_safe_mode, |
5064 | .init = gfx_v11_0_rlc_init, |
5065 | .get_csb_size = gfx_v11_0_get_csb_size, |
5066 | .get_csb_buffer = gfx_v11_0_get_csb_buffer, |
5067 | .resume = gfx_v11_0_rlc_resume, |
5068 | .stop = gfx_v11_0_rlc_stop, |
5069 | .reset = gfx_v11_0_rlc_reset, |
5070 | .start = gfx_v11_0_rlc_start, |
5071 | .update_spm_vmid = gfx_v11_0_update_spm_vmid, |
5072 | }; |
5073 | |
5074 | static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) |
5075 | { |
5076 | u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); |
5077 | |
5078 | if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) |
5079 | data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; |
5080 | else |
5081 | data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; |
5082 | |
5083 | WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); |
5084 | |
5085 | // Program RLC_PG_DELAY3 for CGPG hysteresis |
5086 | if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { |
5087 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
5088 | case IP_VERSION(11, 0, 1): |
5089 | case IP_VERSION(11, 0, 4): |
5090 | case IP_VERSION(11, 5, 0): |
5091 | case IP_VERSION(11, 5, 1): |
5092 | WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); |
5093 | break; |
5094 | default: |
5095 | break; |
5096 | } |
5097 | } |
5098 | } |
5099 | |
5100 | static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) |
5101 | { |
5102 | amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id: 0); |
5103 | |
5104 | gfx_v11_cntl_power_gating(adev, enable); |
5105 | |
5106 | amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id: 0); |
5107 | } |
5108 | |
5109 | static int gfx_v11_0_set_powergating_state(void *handle, |
5110 | enum amd_powergating_state state) |
5111 | { |
5112 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
5113 | bool enable = (state == AMD_PG_STATE_GATE); |
5114 | |
5115 | if (amdgpu_sriov_vf(adev)) |
5116 | return 0; |
5117 | |
5118 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
5119 | case IP_VERSION(11, 0, 0): |
5120 | case IP_VERSION(11, 0, 2): |
5121 | case IP_VERSION(11, 0, 3): |
5122 | amdgpu_gfx_off_ctrl(adev, enable); |
5123 | break; |
5124 | case IP_VERSION(11, 0, 1): |
5125 | case IP_VERSION(11, 0, 4): |
5126 | case IP_VERSION(11, 5, 0): |
5127 | case IP_VERSION(11, 5, 1): |
5128 | if (!enable) |
5129 | amdgpu_gfx_off_ctrl(adev, enable: false); |
5130 | |
5131 | gfx_v11_cntl_pg(adev, enable); |
5132 | |
5133 | if (enable) |
5134 | amdgpu_gfx_off_ctrl(adev, enable: true); |
5135 | |
5136 | break; |
5137 | default: |
5138 | break; |
5139 | } |
5140 | |
5141 | return 0; |
5142 | } |
5143 | |
5144 | static int gfx_v11_0_set_clockgating_state(void *handle, |
5145 | enum amd_clockgating_state state) |
5146 | { |
5147 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
5148 | |
5149 | if (amdgpu_sriov_vf(adev)) |
5150 | return 0; |
5151 | |
5152 | switch (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0)) { |
5153 | case IP_VERSION(11, 0, 0): |
5154 | case IP_VERSION(11, 0, 1): |
5155 | case IP_VERSION(11, 0, 2): |
5156 | case IP_VERSION(11, 0, 3): |
5157 | case IP_VERSION(11, 0, 4): |
5158 | case IP_VERSION(11, 5, 0): |
5159 | case IP_VERSION(11, 5, 1): |
5160 | gfx_v11_0_update_gfx_clock_gating(adev, |
5161 | enable: state == AMD_CG_STATE_GATE); |
5162 | break; |
5163 | default: |
5164 | break; |
5165 | } |
5166 | |
5167 | return 0; |
5168 | } |
5169 | |
5170 | static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) |
5171 | { |
5172 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
5173 | int data; |
5174 | |
5175 | /* AMD_CG_SUPPORT_GFX_MGCG */ |
5176 | data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); |
5177 | if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) |
5178 | *flags |= AMD_CG_SUPPORT_GFX_MGCG; |
5179 | |
5180 | /* AMD_CG_SUPPORT_REPEATER_FGCG */ |
5181 | if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) |
5182 | *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; |
5183 | |
5184 | /* AMD_CG_SUPPORT_GFX_FGCG */ |
5185 | if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) |
5186 | *flags |= AMD_CG_SUPPORT_GFX_FGCG; |
5187 | |
5188 | /* AMD_CG_SUPPORT_GFX_PERF_CLK */ |
5189 | if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) |
5190 | *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; |
5191 | |
5192 | /* AMD_CG_SUPPORT_GFX_CGCG */ |
5193 | data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); |
5194 | if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) |
5195 | *flags |= AMD_CG_SUPPORT_GFX_CGCG; |
5196 | |
5197 | /* AMD_CG_SUPPORT_GFX_CGLS */ |
5198 | if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) |
5199 | *flags |= AMD_CG_SUPPORT_GFX_CGLS; |
5200 | |
5201 | /* AMD_CG_SUPPORT_GFX_3D_CGCG */ |
5202 | data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); |
5203 | if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) |
5204 | *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; |
5205 | |
5206 | /* AMD_CG_SUPPORT_GFX_3D_CGLS */ |
5207 | if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) |
5208 | *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; |
5209 | } |
5210 | |
5211 | static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) |
5212 | { |
5213 | /* gfx11 is 32bit rptr*/ |
5214 | return *(uint32_t *)ring->rptr_cpu_addr; |
5215 | } |
5216 | |
5217 | static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) |
5218 | { |
5219 | struct amdgpu_device *adev = ring->adev; |
5220 | u64 wptr; |
5221 | |
5222 | /* XXX check if swapping is necessary on BE */ |
5223 | if (ring->use_doorbell) { |
5224 | wptr = atomic64_read(v: (atomic64_t *)ring->wptr_cpu_addr); |
5225 | } else { |
5226 | wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); |
5227 | wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; |
5228 | } |
5229 | |
5230 | return wptr; |
5231 | } |
5232 | |
5233 | static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) |
5234 | { |
5235 | struct amdgpu_device *adev = ring->adev; |
5236 | |
5237 | if (ring->use_doorbell) { |
5238 | /* XXX check if swapping is necessary on BE */ |
5239 | atomic64_set(v: (atomic64_t *)ring->wptr_cpu_addr, |
5240 | i: ring->wptr); |
5241 | WDOORBELL64(ring->doorbell_index, ring->wptr); |
5242 | } else { |
5243 | WREG32_SOC15(GC, 0, regCP_RB0_WPTR, |
5244 | lower_32_bits(ring->wptr)); |
5245 | WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, |
5246 | upper_32_bits(ring->wptr)); |
5247 | } |
5248 | } |
5249 | |
5250 | static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) |
5251 | { |
5252 | /* gfx11 hardware is 32bit rptr */ |
5253 | return *(uint32_t *)ring->rptr_cpu_addr; |
5254 | } |
5255 | |
5256 | static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) |
5257 | { |
5258 | u64 wptr; |
5259 | |
5260 | /* XXX check if swapping is necessary on BE */ |
5261 | if (ring->use_doorbell) |
5262 | wptr = atomic64_read(v: (atomic64_t *)ring->wptr_cpu_addr); |
5263 | else |
5264 | BUG(); |
5265 | return wptr; |
5266 | } |
5267 | |
5268 | static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) |
5269 | { |
5270 | struct amdgpu_device *adev = ring->adev; |
5271 | |
5272 | /* XXX check if swapping is necessary on BE */ |
5273 | if (ring->use_doorbell) { |
5274 | atomic64_set(v: (atomic64_t *)ring->wptr_cpu_addr, |
5275 | i: ring->wptr); |
5276 | WDOORBELL64(ring->doorbell_index, ring->wptr); |
5277 | } else { |
5278 | BUG(); /* only DOORBELL method supported on gfx11 now */ |
5279 | } |
5280 | } |
5281 | |
5282 | static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) |
5283 | { |
5284 | struct amdgpu_device *adev = ring->adev; |
5285 | u32 ref_and_mask, reg_mem_engine; |
5286 | const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; |
5287 | |
5288 | if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { |
5289 | switch (ring->me) { |
5290 | case 1: |
5291 | ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; |
5292 | break; |
5293 | case 2: |
5294 | ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; |
5295 | break; |
5296 | default: |
5297 | return; |
5298 | } |
5299 | reg_mem_engine = 0; |
5300 | } else { |
5301 | ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; |
5302 | reg_mem_engine = 1; /* pfp */ |
5303 | } |
5304 | |
5305 | gfx_v11_0_wait_reg_mem(ring, eng_sel: reg_mem_engine, mem_space: 0, opt: 1, |
5306 | addr0: adev->nbio.funcs->get_hdp_flush_req_offset(adev), |
5307 | addr1: adev->nbio.funcs->get_hdp_flush_done_offset(adev), |
5308 | ref: ref_and_mask, mask: ref_and_mask, inv: 0x20); |
5309 | } |
5310 | |
5311 | static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
5312 | struct amdgpu_job *job, |
5313 | struct amdgpu_ib *ib, |
5314 | uint32_t flags) |
5315 | { |
5316 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
5317 | u32 , control = 0; |
5318 | |
5319 | BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); |
5320 | |
5321 | header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); |
5322 | |
5323 | control |= ib->length_dw | (vmid << 24); |
5324 | |
5325 | if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { |
5326 | control |= INDIRECT_BUFFER_PRE_ENB(1); |
5327 | |
5328 | if (flags & AMDGPU_IB_PREEMPTED) |
5329 | control |= INDIRECT_BUFFER_PRE_RESUME(1); |
5330 | |
5331 | if (vmid) |
5332 | gfx_v11_0_ring_emit_de_meta(ring, |
5333 | resume: (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); |
5334 | } |
5335 | |
5336 | if (ring->is_mes_queue) |
5337 | /* inherit vmid from mqd */ |
5338 | control |= 0x400000; |
5339 | |
5340 | amdgpu_ring_write(ring, v: header); |
5341 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ |
5342 | amdgpu_ring_write(ring, |
5343 | #ifdef __BIG_ENDIAN |
5344 | (2 << 0) | |
5345 | #endif |
5346 | lower_32_bits(ib->gpu_addr)); |
5347 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
5348 | amdgpu_ring_write(ring, v: control); |
5349 | } |
5350 | |
5351 | static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
5352 | struct amdgpu_job *job, |
5353 | struct amdgpu_ib *ib, |
5354 | uint32_t flags) |
5355 | { |
5356 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
5357 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); |
5358 | |
5359 | if (ring->is_mes_queue) |
5360 | /* inherit vmid from mqd */ |
5361 | control |= 0x40000000; |
5362 | |
5363 | /* Currently, there is a high possibility to get wave ID mismatch |
5364 | * between ME and GDS, leading to a hw deadlock, because ME generates |
5365 | * different wave IDs than the GDS expects. This situation happens |
5366 | * randomly when at least 5 compute pipes use GDS ordered append. |
5367 | * The wave IDs generated by ME are also wrong after suspend/resume. |
5368 | * Those are probably bugs somewhere else in the kernel driver. |
5369 | * |
5370 | * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and |
5371 | * GDS to 0 for this ring (me/pipe). |
5372 | */ |
5373 | if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { |
5374 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); |
5375 | amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); |
5376 | amdgpu_ring_write(ring, v: ring->adev->gds.gds_compute_max_wave_id); |
5377 | } |
5378 | |
5379 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); |
5380 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ |
5381 | amdgpu_ring_write(ring, |
5382 | #ifdef __BIG_ENDIAN |
5383 | (2 << 0) | |
5384 | #endif |
5385 | lower_32_bits(ib->gpu_addr)); |
5386 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
5387 | amdgpu_ring_write(ring, v: control); |
5388 | } |
5389 | |
5390 | static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, |
5391 | u64 seq, unsigned flags) |
5392 | { |
5393 | bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; |
5394 | bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; |
5395 | |
5396 | /* RELEASE_MEM - flush caches, send int */ |
5397 | amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); |
5398 | amdgpu_ring_write(ring, v: (PACKET3_RELEASE_MEM_GCR_SEQ | |
5399 | PACKET3_RELEASE_MEM_GCR_GL2_WB | |
5400 | PACKET3_RELEASE_MEM_GCR_GL2_INV | |
5401 | PACKET3_RELEASE_MEM_GCR_GL2_US | |
5402 | PACKET3_RELEASE_MEM_GCR_GL1_INV | |
5403 | PACKET3_RELEASE_MEM_GCR_GLV_INV | |
5404 | PACKET3_RELEASE_MEM_GCR_GLM_INV | |
5405 | PACKET3_RELEASE_MEM_GCR_GLM_WB | |
5406 | PACKET3_RELEASE_MEM_CACHE_POLICY(3) | |
5407 | PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | |
5408 | PACKET3_RELEASE_MEM_EVENT_INDEX(5))); |
5409 | amdgpu_ring_write(ring, v: (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | |
5410 | PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); |
5411 | |
5412 | /* |
5413 | * the address should be Qword aligned if 64bit write, Dword |
5414 | * aligned if only send 32bit data low (discard data high) |
5415 | */ |
5416 | if (write64bit) |
5417 | BUG_ON(addr & 0x7); |
5418 | else |
5419 | BUG_ON(addr & 0x3); |
5420 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
5421 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
5422 | amdgpu_ring_write(ring, lower_32_bits(seq)); |
5423 | amdgpu_ring_write(ring, upper_32_bits(seq)); |
5424 | amdgpu_ring_write(ring, v: ring->is_mes_queue ? |
5425 | (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); |
5426 | } |
5427 | |
5428 | static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) |
5429 | { |
5430 | int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); |
5431 | uint32_t seq = ring->fence_drv.sync_seq; |
5432 | uint64_t addr = ring->fence_drv.gpu_addr; |
5433 | |
5434 | gfx_v11_0_wait_reg_mem(ring, eng_sel: usepfp, mem_space: 1, opt: 0, lower_32_bits(addr), |
5435 | upper_32_bits(addr), ref: seq, mask: 0xffffffff, inv: 4); |
5436 | } |
5437 | |
5438 | static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, |
5439 | uint16_t pasid, uint32_t flush_type, |
5440 | bool all_hub, uint8_t dst_sel) |
5441 | { |
5442 | amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); |
5443 | amdgpu_ring_write(ring, |
5444 | PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | |
5445 | PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | |
5446 | PACKET3_INVALIDATE_TLBS_PASID(pasid) | |
5447 | PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); |
5448 | } |
5449 | |
5450 | static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, |
5451 | unsigned vmid, uint64_t pd_addr) |
5452 | { |
5453 | if (ring->is_mes_queue) |
5454 | gfx_v11_0_ring_invalidate_tlbs(ring, pasid: 0, flush_type: 0, all_hub: false, dst_sel: 0); |
5455 | else |
5456 | amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
5457 | |
5458 | /* compute doesn't have PFP */ |
5459 | if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { |
5460 | /* sync PFP to ME, otherwise we might get invalid PFP reads */ |
5461 | amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); |
5462 | amdgpu_ring_write(ring, v: 0x0); |
5463 | } |
5464 | |
5465 | /* Make sure that we can't skip the SET_Q_MODE packets when the VM |
5466 | * changed in any way. |
5467 | */ |
5468 | ring->set_q_mode_offs = 0; |
5469 | ring->set_q_mode_ptr = NULL; |
5470 | } |
5471 | |
5472 | static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, |
5473 | u64 seq, unsigned int flags) |
5474 | { |
5475 | struct amdgpu_device *adev = ring->adev; |
5476 | |
5477 | /* we only allocate 32bit for each seq wb address */ |
5478 | BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
5479 | |
5480 | /* write fence seq to the "addr" */ |
5481 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
5482 | amdgpu_ring_write(ring, v: (WRITE_DATA_ENGINE_SEL(0) | |
5483 | WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); |
5484 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
5485 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
5486 | amdgpu_ring_write(ring, lower_32_bits(seq)); |
5487 | |
5488 | if (flags & AMDGPU_FENCE_FLAG_INT) { |
5489 | /* set register to trigger INT */ |
5490 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
5491 | amdgpu_ring_write(ring, v: (WRITE_DATA_ENGINE_SEL(0) | |
5492 | WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); |
5493 | amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); |
5494 | amdgpu_ring_write(ring, v: 0); |
5495 | amdgpu_ring_write(ring, v: 0x20000000); /* src_id is 178 */ |
5496 | } |
5497 | } |
5498 | |
5499 | static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, |
5500 | uint32_t flags) |
5501 | { |
5502 | uint32_t dw2 = 0; |
5503 | |
5504 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ |
5505 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { |
5506 | /* set load_global_config & load_global_uconfig */ |
5507 | dw2 |= 0x8001; |
5508 | /* set load_cs_sh_regs */ |
5509 | dw2 |= 0x01000000; |
5510 | /* set load_per_context_state & load_gfx_sh_regs for GFX */ |
5511 | dw2 |= 0x10002; |
5512 | } |
5513 | |
5514 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); |
5515 | amdgpu_ring_write(ring, v: dw2); |
5516 | amdgpu_ring_write(ring, v: 0); |
5517 | } |
5518 | |
5519 | static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, |
5520 | uint64_t addr) |
5521 | { |
5522 | unsigned ret; |
5523 | |
5524 | amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); |
5525 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
5526 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
5527 | /* discard following DWs if *cond_exec_gpu_addr==0 */ |
5528 | amdgpu_ring_write(ring, v: 0); |
5529 | ret = ring->wptr & ring->buf_mask; |
5530 | /* patch dummy value later */ |
5531 | amdgpu_ring_write(ring, v: 0); |
5532 | |
5533 | return ret; |
5534 | } |
5535 | |
5536 | static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, |
5537 | u64 shadow_va, u64 csa_va, |
5538 | u64 gds_va, bool init_shadow, |
5539 | int vmid) |
5540 | { |
5541 | struct amdgpu_device *adev = ring->adev; |
5542 | unsigned int offs, end; |
5543 | |
5544 | if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) |
5545 | return; |
5546 | |
5547 | /* |
5548 | * The logic here isn't easy to understand because we need to keep state |
5549 | * accross multiple executions of the function as well as between the |
5550 | * CPU and GPU. The general idea is that the newly written GPU command |
5551 | * has a condition on the previous one and only executed if really |
5552 | * necessary. |
5553 | */ |
5554 | |
5555 | /* |
5556 | * The dw in the NOP controls if the next SET_Q_MODE packet should be |
5557 | * executed or not. Reserve 64bits just to be on the save side. |
5558 | */ |
5559 | amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); |
5560 | offs = ring->wptr & ring->buf_mask; |
5561 | |
5562 | /* |
5563 | * We start with skipping the prefix SET_Q_MODE and always executing |
5564 | * the postfix SET_Q_MODE packet. This is changed below with a |
5565 | * WRITE_DATA command when the postfix executed. |
5566 | */ |
5567 | amdgpu_ring_write(ring, v: shadow_va ? 1 : 0); |
5568 | amdgpu_ring_write(ring, v: 0); |
5569 | |
5570 | if (ring->set_q_mode_offs) { |
5571 | uint64_t addr; |
5572 | |
5573 | addr = amdgpu_bo_gpu_offset(bo: ring->ring_obj); |
5574 | addr += ring->set_q_mode_offs << 2; |
5575 | end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); |
5576 | } |
5577 | |
5578 | /* |
5579 | * When the postfix SET_Q_MODE packet executes we need to make sure that the |
5580 | * next prefix SET_Q_MODE packet executes as well. |
5581 | */ |
5582 | if (!shadow_va) { |
5583 | uint64_t addr; |
5584 | |
5585 | addr = amdgpu_bo_gpu_offset(bo: ring->ring_obj); |
5586 | addr += offs << 2; |
5587 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
5588 | amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); |
5589 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
5590 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
5591 | amdgpu_ring_write(ring, v: 0x1); |
5592 | } |
5593 | |
5594 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); |
5595 | amdgpu_ring_write(ring, lower_32_bits(shadow_va)); |
5596 | amdgpu_ring_write(ring, upper_32_bits(shadow_va)); |
5597 | amdgpu_ring_write(ring, lower_32_bits(gds_va)); |
5598 | amdgpu_ring_write(ring, upper_32_bits(gds_va)); |
5599 | amdgpu_ring_write(ring, lower_32_bits(csa_va)); |
5600 | amdgpu_ring_write(ring, upper_32_bits(csa_va)); |
5601 | amdgpu_ring_write(ring, v: shadow_va ? |
5602 | PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); |
5603 | amdgpu_ring_write(ring, v: init_shadow ? |
5604 | PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); |
5605 | |
5606 | if (ring->set_q_mode_offs) |
5607 | amdgpu_ring_patch_cond_exec(ring, offset: end); |
5608 | |
5609 | if (shadow_va) { |
5610 | uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; |
5611 | |
5612 | /* |
5613 | * If the tokens match try to skip the last postfix SET_Q_MODE |
5614 | * packet to avoid saving/restoring the state all the time. |
5615 | */ |
5616 | if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) |
5617 | *ring->set_q_mode_ptr = 0; |
5618 | |
5619 | ring->set_q_mode_token = token; |
5620 | } else { |
5621 | ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; |
5622 | } |
5623 | |
5624 | ring->set_q_mode_offs = offs; |
5625 | } |
5626 | |
5627 | static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) |
5628 | { |
5629 | int i, r = 0; |
5630 | struct amdgpu_device *adev = ring->adev; |
5631 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; |
5632 | struct amdgpu_ring *kiq_ring = &kiq->ring; |
5633 | unsigned long flags; |
5634 | |
5635 | if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) |
5636 | return -EINVAL; |
5637 | |
5638 | spin_lock_irqsave(&kiq->ring_lock, flags); |
5639 | |
5640 | if (amdgpu_ring_alloc(ring: kiq_ring, ndw: kiq->pmf->unmap_queues_size)) { |
5641 | spin_unlock_irqrestore(lock: &kiq->ring_lock, flags); |
5642 | return -ENOMEM; |
5643 | } |
5644 | |
5645 | /* assert preemption condition */ |
5646 | amdgpu_ring_set_preempt_cond_exec(ring, cond_exec: false); |
5647 | |
5648 | /* assert IB preemption, emit the trailing fence */ |
5649 | kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, |
5650 | ring->trail_fence_gpu_addr, |
5651 | ++ring->trail_seq); |
5652 | amdgpu_ring_commit(ring: kiq_ring); |
5653 | |
5654 | spin_unlock_irqrestore(lock: &kiq->ring_lock, flags); |
5655 | |
5656 | /* poll the trailing fence */ |
5657 | for (i = 0; i < adev->usec_timeout; i++) { |
5658 | if (ring->trail_seq == |
5659 | le32_to_cpu(*(ring->trail_fence_cpu_addr))) |
5660 | break; |
5661 | udelay(1); |
5662 | } |
5663 | |
5664 | if (i >= adev->usec_timeout) { |
5665 | r = -EINVAL; |
5666 | DRM_ERROR("ring %d failed to preempt ib\n" , ring->idx); |
5667 | } |
5668 | |
5669 | /* deassert preemption condition */ |
5670 | amdgpu_ring_set_preempt_cond_exec(ring, cond_exec: true); |
5671 | return r; |
5672 | } |
5673 | |
5674 | static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) |
5675 | { |
5676 | struct amdgpu_device *adev = ring->adev; |
5677 | struct v10_de_ib_state de_payload = {0}; |
5678 | uint64_t offset, gds_addr, de_payload_gpu_addr; |
5679 | void *de_payload_cpu_addr; |
5680 | int cnt; |
5681 | |
5682 | if (ring->is_mes_queue) { |
5683 | offset = offsetof(struct amdgpu_mes_ctx_meta_data, |
5684 | gfx[0].gfx_meta_data) + |
5685 | offsetof(struct v10_gfx_meta_data, de_payload); |
5686 | de_payload_gpu_addr = |
5687 | amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); |
5688 | de_payload_cpu_addr = |
5689 | amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); |
5690 | |
5691 | offset = offsetof(struct amdgpu_mes_ctx_meta_data, |
5692 | gfx[0].gds_backup) + |
5693 | offsetof(struct v10_gfx_meta_data, de_payload); |
5694 | gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); |
5695 | } else { |
5696 | offset = offsetof(struct v10_gfx_meta_data, de_payload); |
5697 | de_payload_gpu_addr = amdgpu_csa_vaddr(adev: ring->adev) + offset; |
5698 | de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; |
5699 | |
5700 | gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + |
5701 | AMDGPU_CSA_SIZE - adev->gds.gds_size, |
5702 | PAGE_SIZE); |
5703 | } |
5704 | |
5705 | de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); |
5706 | de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); |
5707 | |
5708 | cnt = (sizeof(de_payload) >> 2) + 4 - 2; |
5709 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); |
5710 | amdgpu_ring_write(ring, v: (WRITE_DATA_ENGINE_SEL(1) | |
5711 | WRITE_DATA_DST_SEL(8) | |
5712 | WR_CONFIRM) | |
5713 | WRITE_DATA_CACHE_POLICY(0)); |
5714 | amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); |
5715 | amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); |
5716 | |
5717 | if (resume) |
5718 | amdgpu_ring_write_multiple(ring, src: de_payload_cpu_addr, |
5719 | count_dw: sizeof(de_payload) >> 2); |
5720 | else |
5721 | amdgpu_ring_write_multiple(ring, src: (void *)&de_payload, |
5722 | count_dw: sizeof(de_payload) >> 2); |
5723 | } |
5724 | |
5725 | static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, |
5726 | bool secure) |
5727 | { |
5728 | uint32_t v = secure ? FRAME_TMZ : 0; |
5729 | |
5730 | amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); |
5731 | amdgpu_ring_write(ring, v: v | FRAME_CMD(start ? 0 : 1)); |
5732 | } |
5733 | |
5734 | static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, |
5735 | uint32_t reg_val_offs) |
5736 | { |
5737 | struct amdgpu_device *adev = ring->adev; |
5738 | |
5739 | amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); |
5740 | amdgpu_ring_write(ring, v: 0 | /* src: register*/ |
5741 | (5 << 8) | /* dst: memory */ |
5742 | (1 << 20)); /* write confirm */ |
5743 | amdgpu_ring_write(ring, v: reg); |
5744 | amdgpu_ring_write(ring, v: 0); |
5745 | amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + |
5746 | reg_val_offs * 4)); |
5747 | amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + |
5748 | reg_val_offs * 4)); |
5749 | } |
5750 | |
5751 | static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, |
5752 | uint32_t val) |
5753 | { |
5754 | uint32_t cmd = 0; |
5755 | |
5756 | switch (ring->funcs->type) { |
5757 | case AMDGPU_RING_TYPE_GFX: |
5758 | cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; |
5759 | break; |
5760 | case AMDGPU_RING_TYPE_KIQ: |
5761 | cmd = (1 << 16); /* no inc addr */ |
5762 | break; |
5763 | default: |
5764 | cmd = WR_CONFIRM; |
5765 | break; |
5766 | } |
5767 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
5768 | amdgpu_ring_write(ring, v: cmd); |
5769 | amdgpu_ring_write(ring, v: reg); |
5770 | amdgpu_ring_write(ring, v: 0); |
5771 | amdgpu_ring_write(ring, v: val); |
5772 | } |
5773 | |
5774 | static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, |
5775 | uint32_t val, uint32_t mask) |
5776 | { |
5777 | gfx_v11_0_wait_reg_mem(ring, eng_sel: 0, mem_space: 0, opt: 0, addr0: reg, addr1: 0, ref: val, mask, inv: 0x20); |
5778 | } |
5779 | |
5780 | static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, |
5781 | uint32_t reg0, uint32_t reg1, |
5782 | uint32_t ref, uint32_t mask) |
5783 | { |
5784 | int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); |
5785 | |
5786 | gfx_v11_0_wait_reg_mem(ring, eng_sel: usepfp, mem_space: 0, opt: 1, addr0: reg0, addr1: reg1, |
5787 | ref, mask, inv: 0x20); |
5788 | } |
5789 | |
5790 | static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, |
5791 | unsigned vmid) |
5792 | { |
5793 | struct amdgpu_device *adev = ring->adev; |
5794 | uint32_t value = 0; |
5795 | |
5796 | value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); |
5797 | value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); |
5798 | value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); |
5799 | value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); |
5800 | WREG32_SOC15(GC, 0, regSQ_CMD, value); |
5801 | } |
5802 | |
5803 | static void |
5804 | gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, |
5805 | uint32_t me, uint32_t pipe, |
5806 | enum amdgpu_interrupt_state state) |
5807 | { |
5808 | uint32_t cp_int_cntl, cp_int_cntl_reg; |
5809 | |
5810 | if (!me) { |
5811 | switch (pipe) { |
5812 | case 0: |
5813 | cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); |
5814 | break; |
5815 | case 1: |
5816 | cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); |
5817 | break; |
5818 | default: |
5819 | DRM_DEBUG("invalid pipe %d\n" , pipe); |
5820 | return; |
5821 | } |
5822 | } else { |
5823 | DRM_DEBUG("invalid me %d\n" , me); |
5824 | return; |
5825 | } |
5826 | |
5827 | switch (state) { |
5828 | case AMDGPU_IRQ_STATE_DISABLE: |
5829 | cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); |
5830 | cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, |
5831 | TIME_STAMP_INT_ENABLE, 0); |
5832 | cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, |
5833 | GENERIC0_INT_ENABLE, 0); |
5834 | WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); |
5835 | break; |
5836 | case AMDGPU_IRQ_STATE_ENABLE: |
5837 | cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); |
5838 | cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, |
5839 | TIME_STAMP_INT_ENABLE, 1); |
5840 | cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, |
5841 | GENERIC0_INT_ENABLE, 1); |
5842 | WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); |
5843 | break; |
5844 | default: |
5845 | break; |
5846 | } |
5847 | } |
5848 | |
5849 | static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, |
5850 | int me, int pipe, |
5851 | enum amdgpu_interrupt_state state) |
5852 | { |
5853 | u32 mec_int_cntl, mec_int_cntl_reg; |
5854 | |
5855 | /* |
5856 | * amdgpu controls only the first MEC. That's why this function only |
5857 | * handles the setting of interrupts for this specific MEC. All other |
5858 | * pipes' interrupts are set by amdkfd. |
5859 | */ |
5860 | |
5861 | if (me == 1) { |
5862 | switch (pipe) { |
5863 | case 0: |
5864 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); |
5865 | break; |
5866 | case 1: |
5867 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); |
5868 | break; |
5869 | case 2: |
5870 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); |
5871 | break; |
5872 | case 3: |
5873 | mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); |
5874 | break; |
5875 | default: |
5876 | DRM_DEBUG("invalid pipe %d\n" , pipe); |
5877 | return; |
5878 | } |
5879 | } else { |
5880 | DRM_DEBUG("invalid me %d\n" , me); |
5881 | return; |
5882 | } |
5883 | |
5884 | switch (state) { |
5885 | case AMDGPU_IRQ_STATE_DISABLE: |
5886 | mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); |
5887 | mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, |
5888 | TIME_STAMP_INT_ENABLE, 0); |
5889 | mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, |
5890 | GENERIC0_INT_ENABLE, 0); |
5891 | WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); |
5892 | break; |
5893 | case AMDGPU_IRQ_STATE_ENABLE: |
5894 | mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); |
5895 | mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, |
5896 | TIME_STAMP_INT_ENABLE, 1); |
5897 | mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, |
5898 | GENERIC0_INT_ENABLE, 1); |
5899 | WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); |
5900 | break; |
5901 | default: |
5902 | break; |
5903 | } |
5904 | } |
5905 | |
5906 | static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, |
5907 | struct amdgpu_irq_src *src, |
5908 | unsigned type, |
5909 | enum amdgpu_interrupt_state state) |
5910 | { |
5911 | switch (type) { |
5912 | case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: |
5913 | gfx_v11_0_set_gfx_eop_interrupt_state(adev, me: 0, pipe: 0, state); |
5914 | break; |
5915 | case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: |
5916 | gfx_v11_0_set_gfx_eop_interrupt_state(adev, me: 0, pipe: 1, state); |
5917 | break; |
5918 | case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: |
5919 | gfx_v11_0_set_compute_eop_interrupt_state(adev, me: 1, pipe: 0, state); |
5920 | break; |
5921 | case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: |
5922 | gfx_v11_0_set_compute_eop_interrupt_state(adev, me: 1, pipe: 1, state); |
5923 | break; |
5924 | case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: |
5925 | gfx_v11_0_set_compute_eop_interrupt_state(adev, me: 1, pipe: 2, state); |
5926 | break; |
5927 | case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: |
5928 | gfx_v11_0_set_compute_eop_interrupt_state(adev, me: 1, pipe: 3, state); |
5929 | break; |
5930 | default: |
5931 | break; |
5932 | } |
5933 | return 0; |
5934 | } |
5935 | |
5936 | static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, |
5937 | struct amdgpu_irq_src *source, |
5938 | struct amdgpu_iv_entry *entry) |
5939 | { |
5940 | int i; |
5941 | u8 me_id, pipe_id, queue_id; |
5942 | struct amdgpu_ring *ring; |
5943 | uint32_t mes_queue_id = entry->src_data[0]; |
5944 | |
5945 | DRM_DEBUG("IH: CP EOP\n" ); |
5946 | |
5947 | if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { |
5948 | struct amdgpu_mes_queue *queue; |
5949 | |
5950 | mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; |
5951 | |
5952 | spin_lock(lock: &adev->mes.queue_id_lock); |
5953 | queue = idr_find(&adev->mes.queue_id_idr, id: mes_queue_id); |
5954 | if (queue) { |
5955 | DRM_DEBUG("process mes queue id = %d\n" , mes_queue_id); |
5956 | amdgpu_fence_process(ring: queue->ring); |
5957 | } |
5958 | spin_unlock(lock: &adev->mes.queue_id_lock); |
5959 | } else { |
5960 | me_id = (entry->ring_id & 0x0c) >> 2; |
5961 | pipe_id = (entry->ring_id & 0x03) >> 0; |
5962 | queue_id = (entry->ring_id & 0x70) >> 4; |
5963 | |
5964 | switch (me_id) { |
5965 | case 0: |
5966 | if (pipe_id == 0) |
5967 | amdgpu_fence_process(ring: &adev->gfx.gfx_ring[0]); |
5968 | else |
5969 | amdgpu_fence_process(ring: &adev->gfx.gfx_ring[1]); |
5970 | break; |
5971 | case 1: |
5972 | case 2: |
5973 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
5974 | ring = &adev->gfx.compute_ring[i]; |
5975 | /* Per-queue interrupt is supported for MEC starting from VI. |
5976 | * The interrupt can only be enabled/disabled per pipe instead |
5977 | * of per queue. |
5978 | */ |
5979 | if ((ring->me == me_id) && |
5980 | (ring->pipe == pipe_id) && |
5981 | (ring->queue == queue_id)) |
5982 | amdgpu_fence_process(ring); |
5983 | } |
5984 | break; |
5985 | } |
5986 | } |
5987 | |
5988 | return 0; |
5989 | } |
5990 | |
5991 | static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, |
5992 | struct amdgpu_irq_src *source, |
5993 | unsigned type, |
5994 | enum amdgpu_interrupt_state state) |
5995 | { |
5996 | switch (state) { |
5997 | case AMDGPU_IRQ_STATE_DISABLE: |
5998 | case AMDGPU_IRQ_STATE_ENABLE: |
5999 | WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, |
6000 | PRIV_REG_INT_ENABLE, |
6001 | state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); |
6002 | break; |
6003 | default: |
6004 | break; |
6005 | } |
6006 | |
6007 | return 0; |
6008 | } |
6009 | |
6010 | static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, |
6011 | struct amdgpu_irq_src *source, |
6012 | unsigned type, |
6013 | enum amdgpu_interrupt_state state) |
6014 | { |
6015 | switch (state) { |
6016 | case AMDGPU_IRQ_STATE_DISABLE: |
6017 | case AMDGPU_IRQ_STATE_ENABLE: |
6018 | WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, |
6019 | PRIV_INSTR_INT_ENABLE, |
6020 | state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); |
6021 | break; |
6022 | default: |
6023 | break; |
6024 | } |
6025 | |
6026 | return 0; |
6027 | } |
6028 | |
6029 | static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, |
6030 | struct amdgpu_iv_entry *entry) |
6031 | { |
6032 | u8 me_id, pipe_id, queue_id; |
6033 | struct amdgpu_ring *ring; |
6034 | int i; |
6035 | |
6036 | me_id = (entry->ring_id & 0x0c) >> 2; |
6037 | pipe_id = (entry->ring_id & 0x03) >> 0; |
6038 | queue_id = (entry->ring_id & 0x70) >> 4; |
6039 | |
6040 | switch (me_id) { |
6041 | case 0: |
6042 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
6043 | ring = &adev->gfx.gfx_ring[i]; |
6044 | /* we only enabled 1 gfx queue per pipe for now */ |
6045 | if (ring->me == me_id && ring->pipe == pipe_id) |
6046 | drm_sched_fault(sched: &ring->sched); |
6047 | } |
6048 | break; |
6049 | case 1: |
6050 | case 2: |
6051 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
6052 | ring = &adev->gfx.compute_ring[i]; |
6053 | if (ring->me == me_id && ring->pipe == pipe_id && |
6054 | ring->queue == queue_id) |
6055 | drm_sched_fault(sched: &ring->sched); |
6056 | } |
6057 | break; |
6058 | default: |
6059 | BUG(); |
6060 | break; |
6061 | } |
6062 | } |
6063 | |
6064 | static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, |
6065 | struct amdgpu_irq_src *source, |
6066 | struct amdgpu_iv_entry *entry) |
6067 | { |
6068 | DRM_ERROR("Illegal register access in command stream\n" ); |
6069 | gfx_v11_0_handle_priv_fault(adev, entry); |
6070 | return 0; |
6071 | } |
6072 | |
6073 | static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, |
6074 | struct amdgpu_irq_src *source, |
6075 | struct amdgpu_iv_entry *entry) |
6076 | { |
6077 | DRM_ERROR("Illegal instruction in command stream\n" ); |
6078 | gfx_v11_0_handle_priv_fault(adev, entry); |
6079 | return 0; |
6080 | } |
6081 | |
6082 | static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, |
6083 | struct amdgpu_irq_src *source, |
6084 | struct amdgpu_iv_entry *entry) |
6085 | { |
6086 | if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) |
6087 | return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); |
6088 | |
6089 | return 0; |
6090 | } |
6091 | |
6092 | #if 0 |
6093 | static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, |
6094 | struct amdgpu_irq_src *src, |
6095 | unsigned int type, |
6096 | enum amdgpu_interrupt_state state) |
6097 | { |
6098 | uint32_t tmp, target; |
6099 | struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); |
6100 | |
6101 | target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); |
6102 | target += ring->pipe; |
6103 | |
6104 | switch (type) { |
6105 | case AMDGPU_CP_KIQ_IRQ_DRIVER0: |
6106 | if (state == AMDGPU_IRQ_STATE_DISABLE) { |
6107 | tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); |
6108 | tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, |
6109 | GENERIC2_INT_ENABLE, 0); |
6110 | WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); |
6111 | |
6112 | tmp = RREG32_SOC15_IP(GC, target); |
6113 | tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, |
6114 | GENERIC2_INT_ENABLE, 0); |
6115 | WREG32_SOC15_IP(GC, target, tmp); |
6116 | } else { |
6117 | tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); |
6118 | tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, |
6119 | GENERIC2_INT_ENABLE, 1); |
6120 | WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); |
6121 | |
6122 | tmp = RREG32_SOC15_IP(GC, target); |
6123 | tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, |
6124 | GENERIC2_INT_ENABLE, 1); |
6125 | WREG32_SOC15_IP(GC, target, tmp); |
6126 | } |
6127 | break; |
6128 | default: |
6129 | BUG(); /* kiq only support GENERIC2_INT now */ |
6130 | break; |
6131 | } |
6132 | return 0; |
6133 | } |
6134 | #endif |
6135 | |
6136 | static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) |
6137 | { |
6138 | const unsigned int gcr_cntl = |
6139 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | |
6140 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | |
6141 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | |
6142 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | |
6143 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | |
6144 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | |
6145 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | |
6146 | PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); |
6147 | |
6148 | /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ |
6149 | amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); |
6150 | amdgpu_ring_write(ring, v: 0); /* CP_COHER_CNTL */ |
6151 | amdgpu_ring_write(ring, v: 0xffffffff); /* CP_COHER_SIZE */ |
6152 | amdgpu_ring_write(ring, v: 0xffffff); /* CP_COHER_SIZE_HI */ |
6153 | amdgpu_ring_write(ring, v: 0); /* CP_COHER_BASE */ |
6154 | amdgpu_ring_write(ring, v: 0); /* CP_COHER_BASE_HI */ |
6155 | amdgpu_ring_write(ring, v: 0x0000000A); /* POLL_INTERVAL */ |
6156 | amdgpu_ring_write(ring, v: gcr_cntl); /* GCR_CNTL */ |
6157 | } |
6158 | |
6159 | static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { |
6160 | .name = "gfx_v11_0" , |
6161 | .early_init = gfx_v11_0_early_init, |
6162 | .late_init = gfx_v11_0_late_init, |
6163 | .sw_init = gfx_v11_0_sw_init, |
6164 | .sw_fini = gfx_v11_0_sw_fini, |
6165 | .hw_init = gfx_v11_0_hw_init, |
6166 | .hw_fini = gfx_v11_0_hw_fini, |
6167 | .suspend = gfx_v11_0_suspend, |
6168 | .resume = gfx_v11_0_resume, |
6169 | .is_idle = gfx_v11_0_is_idle, |
6170 | .wait_for_idle = gfx_v11_0_wait_for_idle, |
6171 | .soft_reset = gfx_v11_0_soft_reset, |
6172 | .check_soft_reset = gfx_v11_0_check_soft_reset, |
6173 | .post_soft_reset = gfx_v11_0_post_soft_reset, |
6174 | .set_clockgating_state = gfx_v11_0_set_clockgating_state, |
6175 | .set_powergating_state = gfx_v11_0_set_powergating_state, |
6176 | .get_clockgating_state = gfx_v11_0_get_clockgating_state, |
6177 | }; |
6178 | |
6179 | static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { |
6180 | .type = AMDGPU_RING_TYPE_GFX, |
6181 | .align_mask = 0xff, |
6182 | .nop = PACKET3(PACKET3_NOP, 0x3FFF), |
6183 | .support_64bit_ptrs = true, |
6184 | .secure_submission_supported = true, |
6185 | .get_rptr = gfx_v11_0_ring_get_rptr_gfx, |
6186 | .get_wptr = gfx_v11_0_ring_get_wptr_gfx, |
6187 | .set_wptr = gfx_v11_0_ring_set_wptr_gfx, |
6188 | .emit_frame_size = /* totally 247 maximum if 16 IBs */ |
6189 | 5 + /* update_spm_vmid */ |
6190 | 5 + /* COND_EXEC */ |
6191 | 22 + /* SET_Q_PREEMPTION_MODE */ |
6192 | 7 + /* PIPELINE_SYNC */ |
6193 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + |
6194 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + |
6195 | 2 + /* VM_FLUSH */ |
6196 | 8 + /* FENCE for VM_FLUSH */ |
6197 | 20 + /* GDS switch */ |
6198 | 5 + /* COND_EXEC */ |
6199 | 7 + /* HDP_flush */ |
6200 | 4 + /* VGT_flush */ |
6201 | 31 + /* DE_META */ |
6202 | 3 + /* CNTX_CTRL */ |
6203 | 5 + /* HDP_INVL */ |
6204 | 22 + /* SET_Q_PREEMPTION_MODE */ |
6205 | 8 + 8 + /* FENCE x2 */ |
6206 | 8, /* gfx_v11_0_emit_mem_sync */ |
6207 | .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ |
6208 | .emit_ib = gfx_v11_0_ring_emit_ib_gfx, |
6209 | .emit_fence = gfx_v11_0_ring_emit_fence, |
6210 | .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, |
6211 | .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, |
6212 | .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, |
6213 | .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, |
6214 | .test_ring = gfx_v11_0_ring_test_ring, |
6215 | .test_ib = gfx_v11_0_ring_test_ib, |
6216 | .insert_nop = amdgpu_ring_insert_nop, |
6217 | .pad_ib = amdgpu_ring_generic_pad_ib, |
6218 | .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, |
6219 | .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, |
6220 | .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, |
6221 | .preempt_ib = gfx_v11_0_ring_preempt_ib, |
6222 | .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, |
6223 | .emit_wreg = gfx_v11_0_ring_emit_wreg, |
6224 | .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, |
6225 | .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, |
6226 | .soft_recovery = gfx_v11_0_ring_soft_recovery, |
6227 | .emit_mem_sync = gfx_v11_0_emit_mem_sync, |
6228 | }; |
6229 | |
6230 | static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { |
6231 | .type = AMDGPU_RING_TYPE_COMPUTE, |
6232 | .align_mask = 0xff, |
6233 | .nop = PACKET3(PACKET3_NOP, 0x3FFF), |
6234 | .support_64bit_ptrs = true, |
6235 | .get_rptr = gfx_v11_0_ring_get_rptr_compute, |
6236 | .get_wptr = gfx_v11_0_ring_get_wptr_compute, |
6237 | .set_wptr = gfx_v11_0_ring_set_wptr_compute, |
6238 | .emit_frame_size = |
6239 | 5 + /* update_spm_vmid */ |
6240 | 20 + /* gfx_v11_0_ring_emit_gds_switch */ |
6241 | 7 + /* gfx_v11_0_ring_emit_hdp_flush */ |
6242 | 5 + /* hdp invalidate */ |
6243 | 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ |
6244 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + |
6245 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + |
6246 | 2 + /* gfx_v11_0_ring_emit_vm_flush */ |
6247 | 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ |
6248 | 8, /* gfx_v11_0_emit_mem_sync */ |
6249 | .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ |
6250 | .emit_ib = gfx_v11_0_ring_emit_ib_compute, |
6251 | .emit_fence = gfx_v11_0_ring_emit_fence, |
6252 | .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, |
6253 | .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, |
6254 | .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, |
6255 | .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, |
6256 | .test_ring = gfx_v11_0_ring_test_ring, |
6257 | .test_ib = gfx_v11_0_ring_test_ib, |
6258 | .insert_nop = amdgpu_ring_insert_nop, |
6259 | .pad_ib = amdgpu_ring_generic_pad_ib, |
6260 | .emit_wreg = gfx_v11_0_ring_emit_wreg, |
6261 | .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, |
6262 | .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, |
6263 | .emit_mem_sync = gfx_v11_0_emit_mem_sync, |
6264 | }; |
6265 | |
6266 | static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { |
6267 | .type = AMDGPU_RING_TYPE_KIQ, |
6268 | .align_mask = 0xff, |
6269 | .nop = PACKET3(PACKET3_NOP, 0x3FFF), |
6270 | .support_64bit_ptrs = true, |
6271 | .get_rptr = gfx_v11_0_ring_get_rptr_compute, |
6272 | .get_wptr = gfx_v11_0_ring_get_wptr_compute, |
6273 | .set_wptr = gfx_v11_0_ring_set_wptr_compute, |
6274 | .emit_frame_size = |
6275 | 20 + /* gfx_v11_0_ring_emit_gds_switch */ |
6276 | 7 + /* gfx_v11_0_ring_emit_hdp_flush */ |
6277 | 5 + /*hdp invalidate */ |
6278 | 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ |
6279 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + |
6280 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + |
6281 | 2 + /* gfx_v11_0_ring_emit_vm_flush */ |
6282 | 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ |
6283 | .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ |
6284 | .emit_ib = gfx_v11_0_ring_emit_ib_compute, |
6285 | .emit_fence = gfx_v11_0_ring_emit_fence_kiq, |
6286 | .test_ring = gfx_v11_0_ring_test_ring, |
6287 | .test_ib = gfx_v11_0_ring_test_ib, |
6288 | .insert_nop = amdgpu_ring_insert_nop, |
6289 | .pad_ib = amdgpu_ring_generic_pad_ib, |
6290 | .emit_rreg = gfx_v11_0_ring_emit_rreg, |
6291 | .emit_wreg = gfx_v11_0_ring_emit_wreg, |
6292 | .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, |
6293 | .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, |
6294 | }; |
6295 | |
6296 | static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) |
6297 | { |
6298 | int i; |
6299 | |
6300 | adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; |
6301 | |
6302 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
6303 | adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; |
6304 | |
6305 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
6306 | adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; |
6307 | } |
6308 | |
6309 | static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { |
6310 | .set = gfx_v11_0_set_eop_interrupt_state, |
6311 | .process = gfx_v11_0_eop_irq, |
6312 | }; |
6313 | |
6314 | static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { |
6315 | .set = gfx_v11_0_set_priv_reg_fault_state, |
6316 | .process = gfx_v11_0_priv_reg_irq, |
6317 | }; |
6318 | |
6319 | static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { |
6320 | .set = gfx_v11_0_set_priv_inst_fault_state, |
6321 | .process = gfx_v11_0_priv_inst_irq, |
6322 | }; |
6323 | |
6324 | static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { |
6325 | .process = gfx_v11_0_rlc_gc_fed_irq, |
6326 | }; |
6327 | |
6328 | static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) |
6329 | { |
6330 | adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; |
6331 | adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; |
6332 | |
6333 | adev->gfx.priv_reg_irq.num_types = 1; |
6334 | adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; |
6335 | |
6336 | adev->gfx.priv_inst_irq.num_types = 1; |
6337 | adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; |
6338 | |
6339 | adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ |
6340 | adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; |
6341 | |
6342 | } |
6343 | |
6344 | static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) |
6345 | { |
6346 | if (adev->flags & AMD_IS_APU) |
6347 | adev->gfx.imu.mode = MISSION_MODE; |
6348 | else |
6349 | adev->gfx.imu.mode = DEBUG_MODE; |
6350 | |
6351 | adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; |
6352 | } |
6353 | |
6354 | static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) |
6355 | { |
6356 | adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; |
6357 | } |
6358 | |
6359 | static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) |
6360 | { |
6361 | unsigned total_cu = adev->gfx.config.max_cu_per_sh * |
6362 | adev->gfx.config.max_sh_per_se * |
6363 | adev->gfx.config.max_shader_engines; |
6364 | |
6365 | adev->gds.gds_size = 0x1000; |
6366 | adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; |
6367 | adev->gds.gws_size = 64; |
6368 | adev->gds.oa_size = 16; |
6369 | } |
6370 | |
6371 | static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) |
6372 | { |
6373 | /* set gfx eng mqd */ |
6374 | adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = |
6375 | sizeof(struct v11_gfx_mqd); |
6376 | adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = |
6377 | gfx_v11_0_gfx_mqd_init; |
6378 | /* set compute eng mqd */ |
6379 | adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = |
6380 | sizeof(struct v11_compute_mqd); |
6381 | adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = |
6382 | gfx_v11_0_compute_mqd_init; |
6383 | } |
6384 | |
6385 | static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, |
6386 | u32 bitmap) |
6387 | { |
6388 | u32 data; |
6389 | |
6390 | if (!bitmap) |
6391 | return; |
6392 | |
6393 | data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; |
6394 | data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; |
6395 | |
6396 | WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); |
6397 | } |
6398 | |
6399 | static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) |
6400 | { |
6401 | u32 data, wgp_bitmask; |
6402 | data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); |
6403 | data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); |
6404 | |
6405 | data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; |
6406 | data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; |
6407 | |
6408 | wgp_bitmask = |
6409 | amdgpu_gfx_create_bitmask(bit_width: adev->gfx.config.max_cu_per_sh >> 1); |
6410 | |
6411 | return (~data) & wgp_bitmask; |
6412 | } |
6413 | |
6414 | static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) |
6415 | { |
6416 | u32 wgp_idx, wgp_active_bitmap; |
6417 | u32 cu_bitmap_per_wgp, cu_active_bitmap; |
6418 | |
6419 | wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); |
6420 | cu_active_bitmap = 0; |
6421 | |
6422 | for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { |
6423 | /* if there is one WGP enabled, it means 2 CUs will be enabled */ |
6424 | cu_bitmap_per_wgp = 3 << (2 * wgp_idx); |
6425 | if (wgp_active_bitmap & (1 << wgp_idx)) |
6426 | cu_active_bitmap |= cu_bitmap_per_wgp; |
6427 | } |
6428 | |
6429 | return cu_active_bitmap; |
6430 | } |
6431 | |
6432 | static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, |
6433 | struct amdgpu_cu_info *cu_info) |
6434 | { |
6435 | int i, j, k, counter, active_cu_number = 0; |
6436 | u32 mask, bitmap; |
6437 | unsigned disable_masks[8 * 2]; |
6438 | |
6439 | if (!adev || !cu_info) |
6440 | return -EINVAL; |
6441 | |
6442 | amdgpu_gfx_parse_disable_cu(mask: disable_masks, max_se: 8, max_sh: 2); |
6443 | |
6444 | mutex_lock(&adev->grbm_idx_mutex); |
6445 | for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { |
6446 | for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { |
6447 | bitmap = i * adev->gfx.config.max_sh_per_se + j; |
6448 | if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) |
6449 | continue; |
6450 | mask = 1; |
6451 | counter = 0; |
6452 | gfx_v11_0_select_se_sh(adev, se_num: i, sh_num: j, instance: 0xffffffff, xcc_id: 0); |
6453 | if (i < 8 && j < 2) |
6454 | gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( |
6455 | adev, bitmap: disable_masks[i * 2 + j]); |
6456 | bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); |
6457 | |
6458 | /** |
6459 | * GFX11 could support more than 4 SEs, while the bitmap |
6460 | * in cu_info struct is 4x4 and ioctl interface struct |
6461 | * drm_amdgpu_info_device should keep stable. |
6462 | * So we use last two columns of bitmap to store cu mask for |
6463 | * SEs 4 to 7, the layout of the bitmap is as below: |
6464 | * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} |
6465 | * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} |
6466 | * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} |
6467 | * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} |
6468 | * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} |
6469 | * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} |
6470 | * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} |
6471 | * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} |
6472 | */ |
6473 | cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; |
6474 | |
6475 | for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { |
6476 | if (bitmap & mask) |
6477 | counter++; |
6478 | |
6479 | mask <<= 1; |
6480 | } |
6481 | active_cu_number += counter; |
6482 | } |
6483 | } |
6484 | gfx_v11_0_select_se_sh(adev, se_num: 0xffffffff, sh_num: 0xffffffff, instance: 0xffffffff, xcc_id: 0); |
6485 | mutex_unlock(lock: &adev->grbm_idx_mutex); |
6486 | |
6487 | cu_info->number = active_cu_number; |
6488 | cu_info->simd_per_cu = NUM_SIMD_PER_CU; |
6489 | |
6490 | return 0; |
6491 | } |
6492 | |
6493 | const struct amdgpu_ip_block_version gfx_v11_0_ip_block = |
6494 | { |
6495 | .type = AMD_IP_BLOCK_TYPE_GFX, |
6496 | .major = 11, |
6497 | .minor = 0, |
6498 | .rev = 0, |
6499 | .funcs = &gfx_v11_0_ip_funcs, |
6500 | }; |
6501 | |