1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | |
24 | #include <linux/firmware.h> |
25 | #include <drm/drm_drv.h> |
26 | |
27 | #include "amdgpu.h" |
28 | #include "amdgpu_vcn.h" |
29 | #include "amdgpu_pm.h" |
30 | #include "soc15.h" |
31 | #include "soc15d.h" |
32 | #include "vcn_v2_0.h" |
33 | #include "mmsch_v1_0.h" |
34 | #include "vcn_v2_5.h" |
35 | |
36 | #include "vcn/vcn_2_5_offset.h" |
37 | #include "vcn/vcn_2_5_sh_mask.h" |
38 | #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" |
39 | |
40 | #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 |
41 | #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 |
42 | #define VCN1_AON_SOC_ADDRESS_3_0 0x48000 |
43 | |
44 | #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 |
45 | #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f |
46 | #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 |
47 | #define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 |
48 | #define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 |
49 | #define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 |
50 | #define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d |
51 | |
52 | #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 |
53 | #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 |
54 | #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 |
55 | #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c |
56 | |
57 | #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 |
58 | |
59 | static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { |
60 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), |
61 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), |
62 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), |
63 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), |
64 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), |
65 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), |
66 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), |
67 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), |
68 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), |
69 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), |
70 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), |
71 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), |
72 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), |
73 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), |
74 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), |
75 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), |
76 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), |
77 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), |
78 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), |
79 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), |
80 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), |
81 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), |
82 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), |
83 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), |
84 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), |
85 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), |
86 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), |
87 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), |
88 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), |
89 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), |
90 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), |
91 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), |
92 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), |
93 | SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) |
94 | }; |
95 | |
96 | static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); |
97 | static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); |
98 | static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); |
99 | static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst, |
100 | enum amd_powergating_state state); |
101 | static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, |
102 | struct dpg_pause_state *new_state); |
103 | static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); |
104 | static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); |
105 | |
106 | static int amdgpu_ih_clientid_vcns[] = { |
107 | SOC15_IH_CLIENTID_VCN, |
108 | SOC15_IH_CLIENTID_VCN1 |
109 | }; |
110 | |
111 | static void vcn_v2_5_idle_work_handler(struct work_struct *work) |
112 | { |
113 | struct amdgpu_vcn_inst *vcn_inst = |
114 | container_of(work, struct amdgpu_vcn_inst, idle_work.work); |
115 | struct amdgpu_device *adev = vcn_inst->adev; |
116 | unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; |
117 | unsigned int i, j; |
118 | int r = 0; |
119 | |
120 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
121 | struct amdgpu_vcn_inst *v = &adev->vcn.inst[i]; |
122 | |
123 | if (adev->vcn.harvest_config & (1 << i)) |
124 | continue; |
125 | |
126 | for (j = 0; j < v->num_enc_rings; ++j) |
127 | fence[i] += amdgpu_fence_count_emitted(ring: &v->ring_enc[j]); |
128 | |
129 | /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ |
130 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && |
131 | !v->using_unified_queue) { |
132 | struct dpg_pause_state new_state; |
133 | |
134 | if (fence[i] || |
135 | unlikely(atomic_read(&v->dpg_enc_submission_cnt))) |
136 | new_state.fw_based = VCN_DPG_STATE__PAUSE; |
137 | else |
138 | new_state.fw_based = VCN_DPG_STATE__UNPAUSE; |
139 | |
140 | v->pause_dpg_mode(v, &new_state); |
141 | } |
142 | |
143 | fence[i] += amdgpu_fence_count_emitted(ring: &v->ring_dec); |
144 | fences += fence[i]; |
145 | |
146 | } |
147 | |
148 | if (!fences && !atomic_read(v: &adev->vcn.inst[0].total_submission_cnt)) { |
149 | amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCN, |
150 | state: AMD_PG_STATE_GATE); |
151 | mutex_lock(&adev->vcn.workload_profile_mutex); |
152 | if (adev->vcn.workload_profile_active) { |
153 | r = amdgpu_dpm_switch_power_profile(adev, type: PP_SMC_POWER_PROFILE_VIDEO, |
154 | en: false); |
155 | if (r) |
156 | dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n" , r); |
157 | adev->vcn.workload_profile_active = false; |
158 | } |
159 | mutex_unlock(lock: &adev->vcn.workload_profile_mutex); |
160 | } else { |
161 | schedule_delayed_work(dwork: &adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); |
162 | } |
163 | } |
164 | |
165 | static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) |
166 | { |
167 | struct amdgpu_device *adev = ring->adev; |
168 | struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me]; |
169 | int r = 0; |
170 | |
171 | atomic_inc(v: &adev->vcn.inst[0].total_submission_cnt); |
172 | |
173 | cancel_delayed_work_sync(dwork: &adev->vcn.inst[0].idle_work); |
174 | |
175 | /* We can safely return early here because we've cancelled the |
176 | * the delayed work so there is no one else to set it to false |
177 | * and we don't care if someone else sets it to true. |
178 | */ |
179 | if (adev->vcn.workload_profile_active) |
180 | goto pg_lock; |
181 | |
182 | mutex_lock(&adev->vcn.workload_profile_mutex); |
183 | if (!adev->vcn.workload_profile_active) { |
184 | r = amdgpu_dpm_switch_power_profile(adev, type: PP_SMC_POWER_PROFILE_VIDEO, |
185 | en: true); |
186 | if (r) |
187 | dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n" , r); |
188 | adev->vcn.workload_profile_active = true; |
189 | } |
190 | mutex_unlock(lock: &adev->vcn.workload_profile_mutex); |
191 | |
192 | pg_lock: |
193 | mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); |
194 | amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCN, |
195 | state: AMD_PG_STATE_UNGATE); |
196 | |
197 | /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ |
198 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && |
199 | !v->using_unified_queue) { |
200 | struct dpg_pause_state new_state; |
201 | |
202 | if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { |
203 | atomic_inc(v: &v->dpg_enc_submission_cnt); |
204 | new_state.fw_based = VCN_DPG_STATE__PAUSE; |
205 | } else { |
206 | unsigned int fences = 0; |
207 | unsigned int i; |
208 | |
209 | for (i = 0; i < v->num_enc_rings; ++i) |
210 | fences += amdgpu_fence_count_emitted(ring: &v->ring_enc[i]); |
211 | |
212 | if (fences || atomic_read(v: &v->dpg_enc_submission_cnt)) |
213 | new_state.fw_based = VCN_DPG_STATE__PAUSE; |
214 | else |
215 | new_state.fw_based = VCN_DPG_STATE__UNPAUSE; |
216 | } |
217 | v->pause_dpg_mode(v, &new_state); |
218 | } |
219 | mutex_unlock(lock: &adev->vcn.inst[0].vcn_pg_lock); |
220 | } |
221 | |
222 | static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) |
223 | { |
224 | struct amdgpu_device *adev = ring->adev; |
225 | |
226 | /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ |
227 | if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && |
228 | ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && |
229 | !adev->vcn.inst[ring->me].using_unified_queue) |
230 | atomic_dec(v: &adev->vcn.inst[ring->me].dpg_enc_submission_cnt); |
231 | |
232 | atomic_dec(v: &adev->vcn.inst[0].total_submission_cnt); |
233 | |
234 | schedule_delayed_work(dwork: &adev->vcn.inst[0].idle_work, |
235 | VCN_IDLE_TIMEOUT); |
236 | } |
237 | |
238 | /** |
239 | * vcn_v2_5_early_init - set function pointers and load microcode |
240 | * |
241 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
242 | * |
243 | * Set ring and irq function pointers |
244 | * Load microcode from filesystem |
245 | */ |
246 | static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block) |
247 | { |
248 | struct amdgpu_device *adev = ip_block->adev; |
249 | int i, r; |
250 | |
251 | if (amdgpu_sriov_vf(adev)) { |
252 | adev->vcn.num_vcn_inst = 2; |
253 | adev->vcn.harvest_config = 0; |
254 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) |
255 | adev->vcn.inst[i].num_enc_rings = 1; |
256 | } else { |
257 | u32 harvest; |
258 | int i; |
259 | |
260 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
261 | harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); |
262 | if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) |
263 | adev->vcn.harvest_config |= 1 << i; |
264 | adev->vcn.inst[i].num_enc_rings = 2; |
265 | } |
266 | if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | |
267 | AMDGPU_VCN_HARVEST_VCN1)) |
268 | /* both instances are harvested, disable the block */ |
269 | return -ENOENT; |
270 | } |
271 | |
272 | vcn_v2_5_set_dec_ring_funcs(adev); |
273 | vcn_v2_5_set_enc_ring_funcs(adev); |
274 | vcn_v2_5_set_irq_funcs(adev); |
275 | vcn_v2_5_set_ras_funcs(adev); |
276 | |
277 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
278 | adev->vcn.inst[i].set_pg_state = vcn_v2_5_set_pg_state; |
279 | |
280 | r = amdgpu_vcn_early_init(adev, i); |
281 | if (r) |
282 | return r; |
283 | } |
284 | |
285 | return 0; |
286 | } |
287 | |
288 | /** |
289 | * vcn_v2_5_sw_init - sw init for VCN block |
290 | * |
291 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
292 | * |
293 | * Load firmware and sw initialization |
294 | */ |
295 | static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) |
296 | { |
297 | struct amdgpu_ring *ring; |
298 | int i, j, r; |
299 | uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); |
300 | uint32_t *ptr; |
301 | struct amdgpu_device *adev = ip_block->adev; |
302 | |
303 | for (j = 0; j < adev->vcn.num_vcn_inst; j++) { |
304 | volatile struct amdgpu_fw_shared *fw_shared; |
305 | |
306 | if (adev->vcn.harvest_config & (1 << j)) |
307 | continue; |
308 | /* VCN DEC TRAP */ |
309 | r = amdgpu_irq_add_id(adev, client_id: amdgpu_ih_clientid_vcns[j], |
310 | VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, source: &adev->vcn.inst[j].irq); |
311 | if (r) |
312 | return r; |
313 | |
314 | /* VCN ENC TRAP */ |
315 | for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { |
316 | r = amdgpu_irq_add_id(adev, client_id: amdgpu_ih_clientid_vcns[j], |
317 | src_id: i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, source: &adev->vcn.inst[j].irq); |
318 | if (r) |
319 | return r; |
320 | } |
321 | |
322 | /* VCN POISON TRAP */ |
323 | r = amdgpu_irq_add_id(adev, client_id: amdgpu_ih_clientid_vcns[j], |
324 | VCN_2_6__SRCID_UVD_POISON, source: &adev->vcn.inst[j].ras_poison_irq); |
325 | if (r) |
326 | return r; |
327 | |
328 | r = amdgpu_vcn_sw_init(adev, i: j); |
329 | if (r) |
330 | return r; |
331 | |
332 | /* Override the work func */ |
333 | adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler; |
334 | |
335 | amdgpu_vcn_setup_ucode(adev, i: j); |
336 | |
337 | r = amdgpu_vcn_resume(adev, i: j); |
338 | if (r) |
339 | return r; |
340 | |
341 | adev->vcn.inst[j].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; |
342 | adev->vcn.inst[j].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; |
343 | adev->vcn.inst[j].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; |
344 | adev->vcn.inst[j].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; |
345 | adev->vcn.inst[j].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; |
346 | adev->vcn.inst[j].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; |
347 | |
348 | adev->vcn.inst[j].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; |
349 | adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9); |
350 | adev->vcn.inst[j].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; |
351 | adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0); |
352 | adev->vcn.inst[j].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; |
353 | adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1); |
354 | adev->vcn.inst[j].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; |
355 | adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD); |
356 | adev->vcn.inst[j].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; |
357 | adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP); |
358 | |
359 | ring = &adev->vcn.inst[j].ring_dec; |
360 | ring->use_doorbell = true; |
361 | |
362 | ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + |
363 | (amdgpu_sriov_vf(adev) ? 2*j : 8*j); |
364 | |
365 | if (amdgpu_ip_version(adev, ip: UVD_HWIP, inst: 0) == IP_VERSION(2, 5, 0)) |
366 | ring->vm_hub = AMDGPU_MMHUB1(0); |
367 | else |
368 | ring->vm_hub = AMDGPU_MMHUB0(0); |
369 | |
370 | sprintf(buf: ring->name, fmt: "vcn_dec_%d" , j); |
371 | r = amdgpu_ring_init(adev, ring, max_dw: 512, irq_src: &adev->vcn.inst[j].irq, |
372 | irq_type: 0, hw_prio: AMDGPU_RING_PRIO_DEFAULT, NULL); |
373 | if (r) |
374 | return r; |
375 | |
376 | for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { |
377 | enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(ring: i); |
378 | |
379 | ring = &adev->vcn.inst[j].ring_enc[i]; |
380 | ring->use_doorbell = true; |
381 | |
382 | ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + |
383 | (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); |
384 | |
385 | if (amdgpu_ip_version(adev, ip: UVD_HWIP, inst: 0) == |
386 | IP_VERSION(2, 5, 0)) |
387 | ring->vm_hub = AMDGPU_MMHUB1(0); |
388 | else |
389 | ring->vm_hub = AMDGPU_MMHUB0(0); |
390 | |
391 | sprintf(buf: ring->name, fmt: "vcn_enc_%d.%d" , j, i); |
392 | r = amdgpu_ring_init(adev, ring, max_dw: 512, |
393 | irq_src: &adev->vcn.inst[j].irq, irq_type: 0, |
394 | hw_prio, NULL); |
395 | if (r) |
396 | return r; |
397 | } |
398 | |
399 | fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr; |
400 | fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); |
401 | |
402 | if (amdgpu_vcnfw_log) |
403 | amdgpu_vcn_fwlog_init(vcn: &adev->vcn.inst[i]); |
404 | |
405 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) |
406 | adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode; |
407 | } |
408 | |
409 | if (amdgpu_sriov_vf(adev)) { |
410 | r = amdgpu_virt_alloc_mm_table(adev); |
411 | if (r) |
412 | return r; |
413 | } |
414 | |
415 | r = amdgpu_vcn_ras_sw_init(adev); |
416 | if (r) |
417 | return r; |
418 | |
419 | /* Allocate memory for VCN IP Dump buffer */ |
420 | ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); |
421 | if (!ptr) { |
422 | DRM_ERROR("Failed to allocate memory for VCN IP Dump\n" ); |
423 | adev->vcn.ip_dump = NULL; |
424 | } else { |
425 | adev->vcn.ip_dump = ptr; |
426 | } |
427 | |
428 | return 0; |
429 | } |
430 | |
431 | /** |
432 | * vcn_v2_5_sw_fini - sw fini for VCN block |
433 | * |
434 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
435 | * |
436 | * VCN suspend and free up sw allocation |
437 | */ |
438 | static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) |
439 | { |
440 | int i, r, idx; |
441 | struct amdgpu_device *adev = ip_block->adev; |
442 | volatile struct amdgpu_fw_shared *fw_shared; |
443 | |
444 | if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) { |
445 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
446 | if (adev->vcn.harvest_config & (1 << i)) |
447 | continue; |
448 | fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; |
449 | fw_shared->present_flag_0 = 0; |
450 | } |
451 | drm_dev_exit(idx); |
452 | } |
453 | |
454 | |
455 | if (amdgpu_sriov_vf(adev)) |
456 | amdgpu_virt_free_mm_table(adev); |
457 | |
458 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
459 | r = amdgpu_vcn_suspend(adev, i); |
460 | if (r) |
461 | return r; |
462 | r = amdgpu_vcn_sw_fini(adev, i); |
463 | if (r) |
464 | return r; |
465 | } |
466 | |
467 | kfree(objp: adev->vcn.ip_dump); |
468 | |
469 | return 0; |
470 | } |
471 | |
472 | /** |
473 | * vcn_v2_5_hw_init - start and test VCN block |
474 | * |
475 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
476 | * |
477 | * Initialize the hardware, boot up the VCPU and do some testing |
478 | */ |
479 | static int vcn_v2_5_hw_init(struct amdgpu_ip_block *ip_block) |
480 | { |
481 | struct amdgpu_device *adev = ip_block->adev; |
482 | struct amdgpu_ring *ring; |
483 | int i, j, r = 0; |
484 | |
485 | if (amdgpu_sriov_vf(adev)) |
486 | r = vcn_v2_5_sriov_start(adev); |
487 | |
488 | for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { |
489 | if (adev->vcn.harvest_config & (1 << j)) |
490 | continue; |
491 | |
492 | if (amdgpu_sriov_vf(adev)) { |
493 | adev->vcn.inst[j].ring_enc[0].sched.ready = true; |
494 | adev->vcn.inst[j].ring_enc[1].sched.ready = false; |
495 | adev->vcn.inst[j].ring_enc[2].sched.ready = false; |
496 | adev->vcn.inst[j].ring_dec.sched.ready = true; |
497 | } else { |
498 | |
499 | ring = &adev->vcn.inst[j].ring_dec; |
500 | |
501 | adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, |
502 | ring->doorbell_index, j); |
503 | |
504 | r = amdgpu_ring_test_helper(ring); |
505 | if (r) |
506 | return r; |
507 | |
508 | for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { |
509 | ring = &adev->vcn.inst[j].ring_enc[i]; |
510 | r = amdgpu_ring_test_helper(ring); |
511 | if (r) |
512 | return r; |
513 | } |
514 | } |
515 | } |
516 | |
517 | return r; |
518 | } |
519 | |
520 | /** |
521 | * vcn_v2_5_hw_fini - stop the hardware block |
522 | * |
523 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
524 | * |
525 | * Stop the VCN block, mark ring as not ready any more |
526 | */ |
527 | static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) |
528 | { |
529 | struct amdgpu_device *adev = ip_block->adev; |
530 | int i; |
531 | |
532 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
533 | struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; |
534 | |
535 | if (adev->vcn.harvest_config & (1 << i)) |
536 | continue; |
537 | |
538 | cancel_delayed_work_sync(dwork: &vinst->idle_work); |
539 | |
540 | if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || |
541 | (vinst->cur_state != AMD_PG_STATE_GATE && |
542 | RREG32_SOC15(VCN, i, mmUVD_STATUS))) |
543 | vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); |
544 | |
545 | if (amdgpu_ras_is_supported(adev, block: AMDGPU_RAS_BLOCK__VCN)) |
546 | amdgpu_irq_put(adev, src: &vinst->ras_poison_irq, type: 0); |
547 | } |
548 | |
549 | return 0; |
550 | } |
551 | |
552 | /** |
553 | * vcn_v2_5_suspend - suspend VCN block |
554 | * |
555 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
556 | * |
557 | * HW fini and suspend VCN block |
558 | */ |
559 | static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block) |
560 | { |
561 | struct amdgpu_device *adev = ip_block->adev; |
562 | int r, i; |
563 | |
564 | r = vcn_v2_5_hw_fini(ip_block); |
565 | if (r) |
566 | return r; |
567 | |
568 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
569 | r = amdgpu_vcn_suspend(adev: ip_block->adev, i); |
570 | if (r) |
571 | return r; |
572 | } |
573 | |
574 | return 0; |
575 | } |
576 | |
577 | /** |
578 | * vcn_v2_5_resume - resume VCN block |
579 | * |
580 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
581 | * |
582 | * Resume firmware and hw init VCN block |
583 | */ |
584 | static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block) |
585 | { |
586 | struct amdgpu_device *adev = ip_block->adev; |
587 | int r, i; |
588 | |
589 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
590 | r = amdgpu_vcn_resume(adev: ip_block->adev, i); |
591 | if (r) |
592 | return r; |
593 | } |
594 | |
595 | r = vcn_v2_5_hw_init(ip_block); |
596 | |
597 | return r; |
598 | } |
599 | |
600 | /** |
601 | * vcn_v2_5_mc_resume - memory controller programming |
602 | * |
603 | * @vinst: VCN instance |
604 | * |
605 | * Let the VCN memory controller know it's offsets |
606 | */ |
607 | static void vcn_v2_5_mc_resume(struct amdgpu_vcn_inst *vinst) |
608 | { |
609 | struct amdgpu_device *adev = vinst->adev; |
610 | int i = vinst->inst; |
611 | uint32_t size; |
612 | uint32_t offset; |
613 | |
614 | if (adev->vcn.harvest_config & (1 << i)) |
615 | return; |
616 | |
617 | size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); |
618 | /* cache window 0: fw */ |
619 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
620 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
621 | (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); |
622 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
623 | (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); |
624 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0); |
625 | offset = 0; |
626 | } else { |
627 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
628 | lower_32_bits(adev->vcn.inst[i].gpu_addr)); |
629 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
630 | upper_32_bits(adev->vcn.inst[i].gpu_addr)); |
631 | offset = size; |
632 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, |
633 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); |
634 | } |
635 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size); |
636 | |
637 | /* cache window 1: stack */ |
638 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, |
639 | lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); |
640 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, |
641 | upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); |
642 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0); |
643 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); |
644 | |
645 | /* cache window 2: context */ |
646 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, |
647 | lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); |
648 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, |
649 | upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); |
650 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0); |
651 | WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); |
652 | |
653 | /* non-cache window */ |
654 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, |
655 | lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); |
656 | WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, |
657 | upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); |
658 | WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); |
659 | WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0, |
660 | AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); |
661 | } |
662 | |
663 | static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, |
664 | bool indirect) |
665 | { |
666 | struct amdgpu_device *adev = vinst->adev; |
667 | int inst_idx = vinst->inst; |
668 | uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); |
669 | uint32_t offset; |
670 | |
671 | /* cache window 0: fw */ |
672 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
673 | if (!indirect) { |
674 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
675 | VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), |
676 | (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); |
677 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
678 | VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), |
679 | (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); |
680 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
681 | VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); |
682 | } else { |
683 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
684 | VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); |
685 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
686 | VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); |
687 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
688 | VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); |
689 | } |
690 | offset = 0; |
691 | } else { |
692 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
693 | VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), |
694 | lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); |
695 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
696 | VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), |
697 | upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); |
698 | offset = size; |
699 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
700 | VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), |
701 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); |
702 | } |
703 | |
704 | if (!indirect) |
705 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
706 | VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); |
707 | else |
708 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
709 | VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); |
710 | |
711 | /* cache window 1: stack */ |
712 | if (!indirect) { |
713 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
714 | VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), |
715 | lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); |
716 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
717 | VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), |
718 | upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); |
719 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
720 | VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); |
721 | } else { |
722 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
723 | VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); |
724 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
725 | VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); |
726 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
727 | VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); |
728 | } |
729 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
730 | VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); |
731 | |
732 | /* cache window 2: context */ |
733 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
734 | VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), |
735 | lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); |
736 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
737 | VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), |
738 | upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); |
739 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
740 | VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); |
741 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
742 | VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); |
743 | |
744 | /* non-cache window */ |
745 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
746 | VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), |
747 | lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); |
748 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
749 | VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), |
750 | upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); |
751 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
752 | VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); |
753 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
754 | VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0), |
755 | AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); |
756 | |
757 | /* VCN global tiling registers */ |
758 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
759 | VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); |
760 | } |
761 | |
762 | /** |
763 | * vcn_v2_5_disable_clock_gating - disable VCN clock gating |
764 | * |
765 | * @vinst: VCN instance |
766 | * |
767 | * Disable clock gating for VCN block |
768 | */ |
769 | static void vcn_v2_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst) |
770 | { |
771 | struct amdgpu_device *adev = vinst->adev; |
772 | int i = vinst->inst; |
773 | uint32_t data; |
774 | |
775 | if (adev->vcn.harvest_config & (1 << i)) |
776 | return; |
777 | /* UVD disable CGC */ |
778 | data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); |
779 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
780 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
781 | else |
782 | data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; |
783 | data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; |
784 | data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; |
785 | WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); |
786 | |
787 | data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); |
788 | data &= ~(UVD_CGC_GATE__SYS_MASK |
789 | | UVD_CGC_GATE__UDEC_MASK |
790 | | UVD_CGC_GATE__MPEG2_MASK |
791 | | UVD_CGC_GATE__REGS_MASK |
792 | | UVD_CGC_GATE__RBC_MASK |
793 | | UVD_CGC_GATE__LMI_MC_MASK |
794 | | UVD_CGC_GATE__LMI_UMC_MASK |
795 | | UVD_CGC_GATE__IDCT_MASK |
796 | | UVD_CGC_GATE__MPRD_MASK |
797 | | UVD_CGC_GATE__MPC_MASK |
798 | | UVD_CGC_GATE__LBSI_MASK |
799 | | UVD_CGC_GATE__LRBBM_MASK |
800 | | UVD_CGC_GATE__UDEC_RE_MASK |
801 | | UVD_CGC_GATE__UDEC_CM_MASK |
802 | | UVD_CGC_GATE__UDEC_IT_MASK |
803 | | UVD_CGC_GATE__UDEC_DB_MASK |
804 | | UVD_CGC_GATE__UDEC_MP_MASK |
805 | | UVD_CGC_GATE__WCB_MASK |
806 | | UVD_CGC_GATE__VCPU_MASK |
807 | | UVD_CGC_GATE__MMSCH_MASK); |
808 | |
809 | WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); |
810 | |
811 | SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF); |
812 | |
813 | data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); |
814 | data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
815 | | UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
816 | | UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
817 | | UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
818 | | UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
819 | | UVD_CGC_CTRL__SYS_MODE_MASK |
820 | | UVD_CGC_CTRL__UDEC_MODE_MASK |
821 | | UVD_CGC_CTRL__MPEG2_MODE_MASK |
822 | | UVD_CGC_CTRL__REGS_MODE_MASK |
823 | | UVD_CGC_CTRL__RBC_MODE_MASK |
824 | | UVD_CGC_CTRL__LMI_MC_MODE_MASK |
825 | | UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
826 | | UVD_CGC_CTRL__IDCT_MODE_MASK |
827 | | UVD_CGC_CTRL__MPRD_MODE_MASK |
828 | | UVD_CGC_CTRL__MPC_MODE_MASK |
829 | | UVD_CGC_CTRL__LBSI_MODE_MASK |
830 | | UVD_CGC_CTRL__LRBBM_MODE_MASK |
831 | | UVD_CGC_CTRL__WCB_MODE_MASK |
832 | | UVD_CGC_CTRL__VCPU_MODE_MASK |
833 | | UVD_CGC_CTRL__MMSCH_MODE_MASK); |
834 | WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); |
835 | |
836 | /* turn on */ |
837 | data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); |
838 | data |= (UVD_SUVD_CGC_GATE__SRE_MASK |
839 | | UVD_SUVD_CGC_GATE__SIT_MASK |
840 | | UVD_SUVD_CGC_GATE__SMP_MASK |
841 | | UVD_SUVD_CGC_GATE__SCM_MASK |
842 | | UVD_SUVD_CGC_GATE__SDB_MASK |
843 | | UVD_SUVD_CGC_GATE__SRE_H264_MASK |
844 | | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK |
845 | | UVD_SUVD_CGC_GATE__SIT_H264_MASK |
846 | | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK |
847 | | UVD_SUVD_CGC_GATE__SCM_H264_MASK |
848 | | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK |
849 | | UVD_SUVD_CGC_GATE__SDB_H264_MASK |
850 | | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK |
851 | | UVD_SUVD_CGC_GATE__SCLR_MASK |
852 | | UVD_SUVD_CGC_GATE__UVD_SC_MASK |
853 | | UVD_SUVD_CGC_GATE__ENT_MASK |
854 | | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK |
855 | | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK |
856 | | UVD_SUVD_CGC_GATE__SITE_MASK |
857 | | UVD_SUVD_CGC_GATE__SRE_VP9_MASK |
858 | | UVD_SUVD_CGC_GATE__SCM_VP9_MASK |
859 | | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK |
860 | | UVD_SUVD_CGC_GATE__SDB_VP9_MASK |
861 | | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); |
862 | WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); |
863 | |
864 | data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); |
865 | data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
866 | | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
867 | | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
868 | | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
869 | | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK |
870 | | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK |
871 | | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK |
872 | | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK |
873 | | UVD_SUVD_CGC_CTRL__IME_MODE_MASK |
874 | | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); |
875 | WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); |
876 | } |
877 | |
878 | static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, |
879 | uint8_t sram_sel, uint8_t indirect) |
880 | { |
881 | struct amdgpu_device *adev = vinst->adev; |
882 | int inst_idx = vinst->inst; |
883 | uint32_t reg_data = 0; |
884 | |
885 | /* enable sw clock gating control */ |
886 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
887 | reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
888 | else |
889 | reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
890 | reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; |
891 | reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; |
892 | reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | |
893 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK | |
894 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK | |
895 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK | |
896 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK | |
897 | UVD_CGC_CTRL__SYS_MODE_MASK | |
898 | UVD_CGC_CTRL__UDEC_MODE_MASK | |
899 | UVD_CGC_CTRL__MPEG2_MODE_MASK | |
900 | UVD_CGC_CTRL__REGS_MODE_MASK | |
901 | UVD_CGC_CTRL__RBC_MODE_MASK | |
902 | UVD_CGC_CTRL__LMI_MC_MODE_MASK | |
903 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK | |
904 | UVD_CGC_CTRL__IDCT_MODE_MASK | |
905 | UVD_CGC_CTRL__MPRD_MODE_MASK | |
906 | UVD_CGC_CTRL__MPC_MODE_MASK | |
907 | UVD_CGC_CTRL__LBSI_MODE_MASK | |
908 | UVD_CGC_CTRL__LRBBM_MODE_MASK | |
909 | UVD_CGC_CTRL__WCB_MODE_MASK | |
910 | UVD_CGC_CTRL__VCPU_MODE_MASK | |
911 | UVD_CGC_CTRL__MMSCH_MODE_MASK); |
912 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
913 | VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); |
914 | |
915 | /* turn off clock gating */ |
916 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
917 | VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); |
918 | |
919 | /* turn on SUVD clock gating */ |
920 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
921 | VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); |
922 | |
923 | /* turn on sw mode in UVD_SUVD_CGC_CTRL */ |
924 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
925 | VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); |
926 | } |
927 | |
928 | /** |
929 | * vcn_v2_5_enable_clock_gating - enable VCN clock gating |
930 | * |
931 | * @vinst: VCN instance |
932 | * |
933 | * Enable clock gating for VCN block |
934 | */ |
935 | static void vcn_v2_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst) |
936 | { |
937 | struct amdgpu_device *adev = vinst->adev; |
938 | int i = vinst->inst; |
939 | uint32_t data = 0; |
940 | |
941 | if (adev->vcn.harvest_config & (1 << i)) |
942 | return; |
943 | /* enable UVD CGC */ |
944 | data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); |
945 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
946 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
947 | else |
948 | data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
949 | data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; |
950 | data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; |
951 | WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); |
952 | |
953 | data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); |
954 | data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
955 | | UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
956 | | UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
957 | | UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
958 | | UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
959 | | UVD_CGC_CTRL__SYS_MODE_MASK |
960 | | UVD_CGC_CTRL__UDEC_MODE_MASK |
961 | | UVD_CGC_CTRL__MPEG2_MODE_MASK |
962 | | UVD_CGC_CTRL__REGS_MODE_MASK |
963 | | UVD_CGC_CTRL__RBC_MODE_MASK |
964 | | UVD_CGC_CTRL__LMI_MC_MODE_MASK |
965 | | UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
966 | | UVD_CGC_CTRL__IDCT_MODE_MASK |
967 | | UVD_CGC_CTRL__MPRD_MODE_MASK |
968 | | UVD_CGC_CTRL__MPC_MODE_MASK |
969 | | UVD_CGC_CTRL__LBSI_MODE_MASK |
970 | | UVD_CGC_CTRL__LRBBM_MODE_MASK |
971 | | UVD_CGC_CTRL__WCB_MODE_MASK |
972 | | UVD_CGC_CTRL__VCPU_MODE_MASK); |
973 | WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); |
974 | |
975 | data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); |
976 | data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
977 | | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
978 | | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
979 | | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
980 | | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK |
981 | | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK |
982 | | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK |
983 | | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK |
984 | | UVD_SUVD_CGC_CTRL__IME_MODE_MASK |
985 | | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); |
986 | WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); |
987 | } |
988 | |
989 | static void vcn_v2_6_enable_ras(struct amdgpu_vcn_inst *vinst, |
990 | bool indirect) |
991 | { |
992 | struct amdgpu_device *adev = vinst->adev; |
993 | int inst_idx = vinst->inst; |
994 | uint32_t tmp; |
995 | |
996 | if (amdgpu_ip_version(adev, ip: UVD_HWIP, inst: 0) != IP_VERSION(2, 6, 0)) |
997 | return; |
998 | |
999 | tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | |
1000 | VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | |
1001 | VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | |
1002 | VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; |
1003 | WREG32_SOC15_DPG_MODE(inst_idx, |
1004 | SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL), |
1005 | tmp, 0, indirect); |
1006 | |
1007 | tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; |
1008 | WREG32_SOC15_DPG_MODE(inst_idx, |
1009 | SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN), |
1010 | tmp, 0, indirect); |
1011 | |
1012 | tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; |
1013 | WREG32_SOC15_DPG_MODE(inst_idx, |
1014 | SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN), |
1015 | tmp, 0, indirect); |
1016 | } |
1017 | |
1018 | static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) |
1019 | { |
1020 | struct amdgpu_device *adev = vinst->adev; |
1021 | int inst_idx = vinst->inst; |
1022 | volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; |
1023 | struct amdgpu_ring *ring; |
1024 | uint32_t rb_bufsz, tmp; |
1025 | |
1026 | /* disable register anti-hang mechanism */ |
1027 | WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, |
1028 | ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1029 | /* enable dynamic power gating mode */ |
1030 | tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS); |
1031 | tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; |
1032 | tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; |
1033 | WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp); |
1034 | |
1035 | if (indirect) |
1036 | adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; |
1037 | |
1038 | /* enable clock gating */ |
1039 | vcn_v2_5_clock_gating_dpg_mode(vinst, sram_sel: 0, indirect); |
1040 | |
1041 | /* enable VCPU clock */ |
1042 | tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); |
1043 | tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; |
1044 | tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; |
1045 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1046 | VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); |
1047 | |
1048 | /* disable master interupt */ |
1049 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1050 | VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect); |
1051 | |
1052 | /* setup mmUVD_LMI_CTRL */ |
1053 | tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | |
1054 | UVD_LMI_CTRL__REQ_MODE_MASK | |
1055 | UVD_LMI_CTRL__CRC_RESET_MASK | |
1056 | UVD_LMI_CTRL__MASK_MC_URGENT_MASK | |
1057 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | |
1058 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | |
1059 | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | |
1060 | 0x00100000L); |
1061 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1062 | VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); |
1063 | |
1064 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1065 | VCN, 0, mmUVD_MPC_CNTL), |
1066 | 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); |
1067 | |
1068 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1069 | VCN, 0, mmUVD_MPC_SET_MUXA0), |
1070 | ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | |
1071 | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | |
1072 | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | |
1073 | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); |
1074 | |
1075 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1076 | VCN, 0, mmUVD_MPC_SET_MUXB0), |
1077 | ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | |
1078 | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | |
1079 | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | |
1080 | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); |
1081 | |
1082 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1083 | VCN, 0, mmUVD_MPC_SET_MUX), |
1084 | ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | |
1085 | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | |
1086 | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); |
1087 | |
1088 | vcn_v2_5_mc_resume_dpg_mode(vinst, indirect); |
1089 | |
1090 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1091 | VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); |
1092 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1093 | VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); |
1094 | |
1095 | /* enable LMI MC and UMC channels */ |
1096 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1097 | VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); |
1098 | |
1099 | vcn_v2_6_enable_ras(vinst, indirect); |
1100 | |
1101 | /* unblock VCPU register access */ |
1102 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1103 | VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); |
1104 | |
1105 | tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); |
1106 | tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; |
1107 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1108 | VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); |
1109 | |
1110 | /* enable master interrupt */ |
1111 | WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( |
1112 | VCN, 0, mmUVD_MASTINT_EN), |
1113 | UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); |
1114 | |
1115 | if (indirect) |
1116 | amdgpu_vcn_psp_update_sram(adev, inst_idx, ucode_id: 0); |
1117 | |
1118 | ring = &adev->vcn.inst[inst_idx].ring_dec; |
1119 | /* force RBC into idle state */ |
1120 | rb_bufsz = order_base_2(ring->ring_size); |
1121 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); |
1122 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); |
1123 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); |
1124 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); |
1125 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); |
1126 | WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); |
1127 | |
1128 | /* Stall DPG before WPTR/RPTR reset */ |
1129 | WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), |
1130 | UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, |
1131 | ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); |
1132 | fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; |
1133 | |
1134 | /* set the write pointer delay */ |
1135 | WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); |
1136 | |
1137 | /* set the wb address */ |
1138 | WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, |
1139 | (upper_32_bits(ring->gpu_addr) >> 2)); |
1140 | |
1141 | /* program the RB_BASE for ring buffer */ |
1142 | WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, |
1143 | lower_32_bits(ring->gpu_addr)); |
1144 | WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, |
1145 | upper_32_bits(ring->gpu_addr)); |
1146 | |
1147 | /* Initialize the ring buffer's read and write pointers */ |
1148 | WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0); |
1149 | |
1150 | WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0); |
1151 | |
1152 | ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR); |
1153 | WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, |
1154 | lower_32_bits(ring->wptr)); |
1155 | |
1156 | fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; |
1157 | /* Unstall DPG */ |
1158 | WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), |
1159 | 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); |
1160 | |
1161 | /* Keeping one read-back to ensure all register writes are done, |
1162 | * otherwise it may introduce race conditions. |
1163 | */ |
1164 | RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS); |
1165 | |
1166 | return 0; |
1167 | } |
1168 | |
1169 | static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst) |
1170 | { |
1171 | struct amdgpu_device *adev = vinst->adev; |
1172 | int i = vinst->inst; |
1173 | volatile struct amdgpu_fw_shared *fw_shared = |
1174 | adev->vcn.inst[i].fw_shared.cpu_addr; |
1175 | struct amdgpu_ring *ring; |
1176 | uint32_t rb_bufsz, tmp; |
1177 | int j, k, r; |
1178 | |
1179 | if (adev->vcn.harvest_config & (1 << i)) |
1180 | return 0; |
1181 | |
1182 | if (adev->pm.dpm_enabled) |
1183 | amdgpu_dpm_enable_vcn(adev, enable: true, inst: i); |
1184 | |
1185 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) |
1186 | return vcn_v2_5_start_dpg_mode(vinst, indirect: adev->vcn.inst[i].indirect_sram); |
1187 | |
1188 | /* disable register anti-hang mechanism */ |
1189 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0, |
1190 | ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1191 | |
1192 | /* set uvd status busy */ |
1193 | tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; |
1194 | WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); |
1195 | |
1196 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) |
1197 | return 0; |
1198 | |
1199 | /* SW clock gating */ |
1200 | vcn_v2_5_disable_clock_gating(vinst); |
1201 | |
1202 | /* enable VCPU clock */ |
1203 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), |
1204 | UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); |
1205 | |
1206 | /* disable master interrupt */ |
1207 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, |
1208 | ~UVD_MASTINT_EN__VCPU_EN_MASK); |
1209 | |
1210 | /* setup mmUVD_LMI_CTRL */ |
1211 | tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); |
1212 | tmp &= ~0xff; |
1213 | WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8| |
1214 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | |
1215 | UVD_LMI_CTRL__MASK_MC_URGENT_MASK | |
1216 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | |
1217 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); |
1218 | |
1219 | /* setup mmUVD_MPC_CNTL */ |
1220 | tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); |
1221 | tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; |
1222 | tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; |
1223 | WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); |
1224 | |
1225 | /* setup UVD_MPC_SET_MUXA0 */ |
1226 | WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, |
1227 | ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | |
1228 | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | |
1229 | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | |
1230 | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); |
1231 | |
1232 | /* setup UVD_MPC_SET_MUXB0 */ |
1233 | WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, |
1234 | ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | |
1235 | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | |
1236 | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | |
1237 | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); |
1238 | |
1239 | /* setup mmUVD_MPC_SET_MUX */ |
1240 | WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, |
1241 | ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | |
1242 | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | |
1243 | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); |
1244 | |
1245 | vcn_v2_5_mc_resume(vinst); |
1246 | |
1247 | /* VCN global tiling registers */ |
1248 | WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, |
1249 | adev->gfx.config.gb_addr_config); |
1250 | WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, |
1251 | adev->gfx.config.gb_addr_config); |
1252 | |
1253 | /* enable LMI MC and UMC channels */ |
1254 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, |
1255 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); |
1256 | |
1257 | /* unblock VCPU register access */ |
1258 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, |
1259 | ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); |
1260 | |
1261 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, |
1262 | ~UVD_VCPU_CNTL__BLK_RST_MASK); |
1263 | |
1264 | for (k = 0; k < 10; ++k) { |
1265 | uint32_t status; |
1266 | |
1267 | for (j = 0; j < 100; ++j) { |
1268 | status = RREG32_SOC15(VCN, i, mmUVD_STATUS); |
1269 | if (status & 2) |
1270 | break; |
1271 | if (amdgpu_emu_mode == 1) |
1272 | msleep(msecs: 500); |
1273 | else |
1274 | mdelay(10); |
1275 | } |
1276 | r = 0; |
1277 | if (status & 2) |
1278 | break; |
1279 | |
1280 | DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n" ); |
1281 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), |
1282 | UVD_VCPU_CNTL__BLK_RST_MASK, |
1283 | ~UVD_VCPU_CNTL__BLK_RST_MASK); |
1284 | mdelay(10); |
1285 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, |
1286 | ~UVD_VCPU_CNTL__BLK_RST_MASK); |
1287 | |
1288 | mdelay(10); |
1289 | r = -1; |
1290 | } |
1291 | |
1292 | if (r) { |
1293 | DRM_ERROR("VCN decode not responding, giving up!!!\n" ); |
1294 | return r; |
1295 | } |
1296 | |
1297 | /* enable master interrupt */ |
1298 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), |
1299 | UVD_MASTINT_EN__VCPU_EN_MASK, |
1300 | ~UVD_MASTINT_EN__VCPU_EN_MASK); |
1301 | |
1302 | /* clear the busy bit of VCN_STATUS */ |
1303 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, |
1304 | ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); |
1305 | |
1306 | WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); |
1307 | |
1308 | ring = &adev->vcn.inst[i].ring_dec; |
1309 | /* force RBC into idle state */ |
1310 | rb_bufsz = order_base_2(ring->ring_size); |
1311 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); |
1312 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); |
1313 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); |
1314 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); |
1315 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); |
1316 | WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); |
1317 | |
1318 | fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; |
1319 | /* program the RB_BASE for ring buffer */ |
1320 | WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, |
1321 | lower_32_bits(ring->gpu_addr)); |
1322 | WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, |
1323 | upper_32_bits(ring->gpu_addr)); |
1324 | |
1325 | /* Initialize the ring buffer's read and write pointers */ |
1326 | WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); |
1327 | |
1328 | ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); |
1329 | WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, |
1330 | lower_32_bits(ring->wptr)); |
1331 | fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; |
1332 | |
1333 | fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET; |
1334 | ring = &adev->vcn.inst[i].ring_enc[0]; |
1335 | WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); |
1336 | WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); |
1337 | WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); |
1338 | WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); |
1339 | WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); |
1340 | fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET; |
1341 | |
1342 | fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET; |
1343 | ring = &adev->vcn.inst[i].ring_enc[1]; |
1344 | WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); |
1345 | WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); |
1346 | WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); |
1347 | WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); |
1348 | WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); |
1349 | fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; |
1350 | |
1351 | /* Keeping one read-back to ensure all register writes are done, |
1352 | * otherwise it may introduce race conditions. |
1353 | */ |
1354 | RREG32_SOC15(VCN, i, mmUVD_STATUS); |
1355 | |
1356 | return 0; |
1357 | } |
1358 | |
1359 | static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev, |
1360 | struct amdgpu_mm_table *table) |
1361 | { |
1362 | uint32_t data = 0, loop = 0, size = 0; |
1363 | uint64_t addr = table->gpu_addr; |
1364 | struct mmsch_v1_1_init_header * = NULL; |
1365 | |
1366 | header = (struct mmsch_v1_1_init_header *)table->cpu_addr; |
1367 | size = header->total_size; |
1368 | |
1369 | /* |
1370 | * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of |
1371 | * memory descriptor location |
1372 | */ |
1373 | WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); |
1374 | WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); |
1375 | |
1376 | /* 2, update vmid of descriptor */ |
1377 | data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID); |
1378 | data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; |
1379 | /* use domain0 for MM scheduler */ |
1380 | data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); |
1381 | WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data); |
1382 | |
1383 | /* 3, notify mmsch about the size of this descriptor */ |
1384 | WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size); |
1385 | |
1386 | /* 4, set resp to zero */ |
1387 | WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0); |
1388 | |
1389 | /* |
1390 | * 5, kick off the initialization and wait until |
1391 | * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero |
1392 | */ |
1393 | WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001); |
1394 | |
1395 | data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); |
1396 | loop = 10; |
1397 | while ((data & 0x10000002) != 0x10000002) { |
1398 | udelay(usec: 100); |
1399 | data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); |
1400 | loop--; |
1401 | if (!loop) |
1402 | break; |
1403 | } |
1404 | |
1405 | if (!loop) { |
1406 | dev_err(adev->dev, |
1407 | "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n" , |
1408 | data); |
1409 | return -EBUSY; |
1410 | } |
1411 | |
1412 | return 0; |
1413 | } |
1414 | |
1415 | static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) |
1416 | { |
1417 | struct amdgpu_ring *ring; |
1418 | uint32_t offset, size, tmp, i, rb_bufsz; |
1419 | uint32_t table_size = 0; |
1420 | struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; |
1421 | struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; |
1422 | struct mmsch_v1_0_cmd_end end = { { 0 } }; |
1423 | uint32_t *init_table = adev->virt.mm_table.cpu_addr; |
1424 | struct mmsch_v1_1_init_header * = (struct mmsch_v1_1_init_header *)init_table; |
1425 | |
1426 | direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; |
1427 | direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; |
1428 | end.cmd_header.command_type = MMSCH_COMMAND__END; |
1429 | |
1430 | header->version = MMSCH_VERSION; |
1431 | header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2; |
1432 | init_table += header->total_size; |
1433 | |
1434 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
1435 | header->eng[i].table_offset = header->total_size; |
1436 | header->eng[i].init_status = 0; |
1437 | header->eng[i].table_size = 0; |
1438 | |
1439 | table_size = 0; |
1440 | |
1441 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT( |
1442 | SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), |
1443 | ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); |
1444 | |
1445 | size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); |
1446 | /* mc resume*/ |
1447 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
1448 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1449 | SOC15_REG_OFFSET(VCN, i, |
1450 | mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), |
1451 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); |
1452 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1453 | SOC15_REG_OFFSET(VCN, i, |
1454 | mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), |
1455 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); |
1456 | offset = 0; |
1457 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1458 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0); |
1459 | } else { |
1460 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1461 | SOC15_REG_OFFSET(VCN, i, |
1462 | mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), |
1463 | lower_32_bits(adev->vcn.inst[i].gpu_addr)); |
1464 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1465 | SOC15_REG_OFFSET(VCN, i, |
1466 | mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), |
1467 | upper_32_bits(adev->vcn.inst[i].gpu_addr)); |
1468 | offset = size; |
1469 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1470 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), |
1471 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); |
1472 | } |
1473 | |
1474 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1475 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0), |
1476 | size); |
1477 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1478 | SOC15_REG_OFFSET(VCN, i, |
1479 | mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), |
1480 | lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); |
1481 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1482 | SOC15_REG_OFFSET(VCN, i, |
1483 | mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), |
1484 | upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); |
1485 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1486 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1), |
1487 | 0); |
1488 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1489 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1), |
1490 | AMDGPU_VCN_STACK_SIZE); |
1491 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1492 | SOC15_REG_OFFSET(VCN, i, |
1493 | mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), |
1494 | lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + |
1495 | AMDGPU_VCN_STACK_SIZE)); |
1496 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1497 | SOC15_REG_OFFSET(VCN, i, |
1498 | mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), |
1499 | upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + |
1500 | AMDGPU_VCN_STACK_SIZE)); |
1501 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1502 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2), |
1503 | 0); |
1504 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1505 | SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2), |
1506 | AMDGPU_VCN_CONTEXT_SIZE); |
1507 | |
1508 | ring = &adev->vcn.inst[i].ring_enc[0]; |
1509 | ring->wptr = 0; |
1510 | |
1511 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1512 | SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO), |
1513 | lower_32_bits(ring->gpu_addr)); |
1514 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1515 | SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI), |
1516 | upper_32_bits(ring->gpu_addr)); |
1517 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1518 | SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE), |
1519 | ring->ring_size / 4); |
1520 | |
1521 | ring = &adev->vcn.inst[i].ring_dec; |
1522 | ring->wptr = 0; |
1523 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1524 | SOC15_REG_OFFSET(VCN, i, |
1525 | mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), |
1526 | lower_32_bits(ring->gpu_addr)); |
1527 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1528 | SOC15_REG_OFFSET(VCN, i, |
1529 | mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), |
1530 | upper_32_bits(ring->gpu_addr)); |
1531 | |
1532 | /* force RBC into idle state */ |
1533 | rb_bufsz = order_base_2(ring->ring_size); |
1534 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); |
1535 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); |
1536 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); |
1537 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); |
1538 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); |
1539 | MMSCH_V1_0_INSERT_DIRECT_WT( |
1540 | SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp); |
1541 | |
1542 | /* add end packet */ |
1543 | memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); |
1544 | table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; |
1545 | init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4; |
1546 | |
1547 | /* refine header */ |
1548 | header->eng[i].table_size = table_size; |
1549 | header->total_size += table_size; |
1550 | } |
1551 | |
1552 | return vcn_v2_5_mmsch_start(adev, table: &adev->virt.mm_table); |
1553 | } |
1554 | |
1555 | static int vcn_v2_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) |
1556 | { |
1557 | struct amdgpu_device *adev = vinst->adev; |
1558 | int inst_idx = vinst->inst; |
1559 | uint32_t tmp; |
1560 | |
1561 | /* Wait for power status to be 1 */ |
1562 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, |
1563 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1564 | |
1565 | /* wait for read ptr to be equal to write ptr */ |
1566 | tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR); |
1567 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF); |
1568 | |
1569 | tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2); |
1570 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF); |
1571 | |
1572 | tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; |
1573 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF); |
1574 | |
1575 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, |
1576 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1577 | |
1578 | /* disable dynamic power gating mode */ |
1579 | WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, |
1580 | ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); |
1581 | |
1582 | /* Keeping one read-back to ensure all register writes are done, |
1583 | * otherwise it may introduce race conditions. |
1584 | */ |
1585 | RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS); |
1586 | |
1587 | return 0; |
1588 | } |
1589 | |
1590 | static int vcn_v2_5_stop(struct amdgpu_vcn_inst *vinst) |
1591 | { |
1592 | struct amdgpu_device *adev = vinst->adev; |
1593 | int i = vinst->inst; |
1594 | uint32_t tmp; |
1595 | int r; |
1596 | |
1597 | if (adev->vcn.harvest_config & (1 << i)) |
1598 | return 0; |
1599 | |
1600 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { |
1601 | r = vcn_v2_5_stop_dpg_mode(vinst); |
1602 | goto done; |
1603 | } |
1604 | |
1605 | /* wait for vcn idle */ |
1606 | r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); |
1607 | if (r) |
1608 | goto done; |
1609 | |
1610 | tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | |
1611 | UVD_LMI_STATUS__READ_CLEAN_MASK | |
1612 | UVD_LMI_STATUS__WRITE_CLEAN_MASK | |
1613 | UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; |
1614 | r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); |
1615 | if (r) |
1616 | goto done; |
1617 | |
1618 | /* block LMI UMC channel */ |
1619 | tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); |
1620 | tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; |
1621 | WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); |
1622 | |
1623 | tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| |
1624 | UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; |
1625 | r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); |
1626 | if (r) |
1627 | goto done; |
1628 | |
1629 | /* block VCPU register access */ |
1630 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), |
1631 | UVD_RB_ARB_CTRL__VCPU_DIS_MASK, |
1632 | ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); |
1633 | |
1634 | /* reset VCPU */ |
1635 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), |
1636 | UVD_VCPU_CNTL__BLK_RST_MASK, |
1637 | ~UVD_VCPU_CNTL__BLK_RST_MASK); |
1638 | |
1639 | /* disable VCPU clock */ |
1640 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, |
1641 | ~(UVD_VCPU_CNTL__CLK_EN_MASK)); |
1642 | |
1643 | /* clear status */ |
1644 | WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); |
1645 | |
1646 | vcn_v2_5_enable_clock_gating(vinst); |
1647 | |
1648 | /* enable register anti-hang mechanism */ |
1649 | WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), |
1650 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, |
1651 | ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1652 | |
1653 | /* Keeping one read-back to ensure all register writes are done, |
1654 | * otherwise it may introduce race conditions. |
1655 | */ |
1656 | RREG32_SOC15(VCN, i, mmUVD_STATUS); |
1657 | done: |
1658 | if (adev->pm.dpm_enabled) |
1659 | amdgpu_dpm_enable_vcn(adev, enable: false, inst: i); |
1660 | |
1661 | return r; |
1662 | } |
1663 | |
1664 | static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, |
1665 | struct dpg_pause_state *new_state) |
1666 | { |
1667 | struct amdgpu_device *adev = vinst->adev; |
1668 | int inst_idx = vinst->inst; |
1669 | struct amdgpu_ring *ring; |
1670 | uint32_t reg_data = 0; |
1671 | int ret_code = 0; |
1672 | |
1673 | /* pause/unpause if state is changed */ |
1674 | if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { |
1675 | DRM_DEBUG("dpg pause state changed %d -> %d" , |
1676 | adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); |
1677 | reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) & |
1678 | (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); |
1679 | |
1680 | if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { |
1681 | ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, |
1682 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1683 | |
1684 | if (!ret_code) { |
1685 | volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; |
1686 | |
1687 | /* pause DPG */ |
1688 | reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; |
1689 | WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); |
1690 | |
1691 | /* wait for ACK */ |
1692 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE, |
1693 | UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, |
1694 | UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); |
1695 | |
1696 | /* Stall DPG before WPTR/RPTR reset */ |
1697 | WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), |
1698 | UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, |
1699 | ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); |
1700 | |
1701 | /* Restore */ |
1702 | fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET; |
1703 | ring = &adev->vcn.inst[inst_idx].ring_enc[0]; |
1704 | ring->wptr = 0; |
1705 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); |
1706 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); |
1707 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); |
1708 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); |
1709 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); |
1710 | fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET; |
1711 | |
1712 | fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET; |
1713 | ring = &adev->vcn.inst[inst_idx].ring_enc[1]; |
1714 | ring->wptr = 0; |
1715 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); |
1716 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); |
1717 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); |
1718 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); |
1719 | WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); |
1720 | fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; |
1721 | |
1722 | /* Unstall DPG */ |
1723 | WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), |
1724 | 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); |
1725 | |
1726 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, |
1727 | UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1728 | } |
1729 | } else { |
1730 | reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; |
1731 | WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); |
1732 | SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, |
1733 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); |
1734 | } |
1735 | adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; |
1736 | } |
1737 | |
1738 | return 0; |
1739 | } |
1740 | |
1741 | /** |
1742 | * vcn_v2_5_dec_ring_get_rptr - get read pointer |
1743 | * |
1744 | * @ring: amdgpu_ring pointer |
1745 | * |
1746 | * Returns the current hardware read pointer |
1747 | */ |
1748 | static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) |
1749 | { |
1750 | struct amdgpu_device *adev = ring->adev; |
1751 | |
1752 | return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR); |
1753 | } |
1754 | |
1755 | /** |
1756 | * vcn_v2_5_dec_ring_get_wptr - get write pointer |
1757 | * |
1758 | * @ring: amdgpu_ring pointer |
1759 | * |
1760 | * Returns the current hardware write pointer |
1761 | */ |
1762 | static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) |
1763 | { |
1764 | struct amdgpu_device *adev = ring->adev; |
1765 | |
1766 | if (ring->use_doorbell) |
1767 | return *ring->wptr_cpu_addr; |
1768 | else |
1769 | return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR); |
1770 | } |
1771 | |
1772 | /** |
1773 | * vcn_v2_5_dec_ring_set_wptr - set write pointer |
1774 | * |
1775 | * @ring: amdgpu_ring pointer |
1776 | * |
1777 | * Commits the write pointer to the hardware |
1778 | */ |
1779 | static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) |
1780 | { |
1781 | struct amdgpu_device *adev = ring->adev; |
1782 | |
1783 | if (ring->use_doorbell) { |
1784 | *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); |
1785 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); |
1786 | } else { |
1787 | WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); |
1788 | } |
1789 | } |
1790 | |
1791 | static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { |
1792 | .type = AMDGPU_RING_TYPE_VCN_DEC, |
1793 | .align_mask = 0xf, |
1794 | .secure_submission_supported = true, |
1795 | .get_rptr = vcn_v2_5_dec_ring_get_rptr, |
1796 | .get_wptr = vcn_v2_5_dec_ring_get_wptr, |
1797 | .set_wptr = vcn_v2_5_dec_ring_set_wptr, |
1798 | .emit_frame_size = |
1799 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + |
1800 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + |
1801 | 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ |
1802 | 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ |
1803 | 6, |
1804 | .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ |
1805 | .emit_ib = vcn_v2_0_dec_ring_emit_ib, |
1806 | .emit_fence = vcn_v2_0_dec_ring_emit_fence, |
1807 | .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, |
1808 | .test_ring = vcn_v2_0_dec_ring_test_ring, |
1809 | .test_ib = amdgpu_vcn_dec_ring_test_ib, |
1810 | .insert_nop = vcn_v2_0_dec_ring_insert_nop, |
1811 | .insert_start = vcn_v2_0_dec_ring_insert_start, |
1812 | .insert_end = vcn_v2_0_dec_ring_insert_end, |
1813 | .pad_ib = amdgpu_ring_generic_pad_ib, |
1814 | .begin_use = vcn_v2_5_ring_begin_use, |
1815 | .end_use = vcn_v2_5_ring_end_use, |
1816 | .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, |
1817 | .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, |
1818 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
1819 | }; |
1820 | |
1821 | /** |
1822 | * vcn_v2_5_enc_ring_get_rptr - get enc read pointer |
1823 | * |
1824 | * @ring: amdgpu_ring pointer |
1825 | * |
1826 | * Returns the current hardware enc read pointer |
1827 | */ |
1828 | static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) |
1829 | { |
1830 | struct amdgpu_device *adev = ring->adev; |
1831 | |
1832 | if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) |
1833 | return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR); |
1834 | else |
1835 | return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2); |
1836 | } |
1837 | |
1838 | /** |
1839 | * vcn_v2_5_enc_ring_get_wptr - get enc write pointer |
1840 | * |
1841 | * @ring: amdgpu_ring pointer |
1842 | * |
1843 | * Returns the current hardware enc write pointer |
1844 | */ |
1845 | static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) |
1846 | { |
1847 | struct amdgpu_device *adev = ring->adev; |
1848 | |
1849 | if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { |
1850 | if (ring->use_doorbell) |
1851 | return *ring->wptr_cpu_addr; |
1852 | else |
1853 | return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR); |
1854 | } else { |
1855 | if (ring->use_doorbell) |
1856 | return *ring->wptr_cpu_addr; |
1857 | else |
1858 | return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2); |
1859 | } |
1860 | } |
1861 | |
1862 | /** |
1863 | * vcn_v2_5_enc_ring_set_wptr - set enc write pointer |
1864 | * |
1865 | * @ring: amdgpu_ring pointer |
1866 | * |
1867 | * Commits the enc write pointer to the hardware |
1868 | */ |
1869 | static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) |
1870 | { |
1871 | struct amdgpu_device *adev = ring->adev; |
1872 | |
1873 | if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { |
1874 | if (ring->use_doorbell) { |
1875 | *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); |
1876 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); |
1877 | } else { |
1878 | WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); |
1879 | } |
1880 | } else { |
1881 | if (ring->use_doorbell) { |
1882 | *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); |
1883 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); |
1884 | } else { |
1885 | WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); |
1886 | } |
1887 | } |
1888 | } |
1889 | |
1890 | static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { |
1891 | .type = AMDGPU_RING_TYPE_VCN_ENC, |
1892 | .align_mask = 0x3f, |
1893 | .nop = VCN_ENC_CMD_NO_OP, |
1894 | .get_rptr = vcn_v2_5_enc_ring_get_rptr, |
1895 | .get_wptr = vcn_v2_5_enc_ring_get_wptr, |
1896 | .set_wptr = vcn_v2_5_enc_ring_set_wptr, |
1897 | .emit_frame_size = |
1898 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + |
1899 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + |
1900 | 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ |
1901 | 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ |
1902 | 1, /* vcn_v2_0_enc_ring_insert_end */ |
1903 | .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ |
1904 | .emit_ib = vcn_v2_0_enc_ring_emit_ib, |
1905 | .emit_fence = vcn_v2_0_enc_ring_emit_fence, |
1906 | .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, |
1907 | .test_ring = amdgpu_vcn_enc_ring_test_ring, |
1908 | .test_ib = amdgpu_vcn_enc_ring_test_ib, |
1909 | .insert_nop = amdgpu_ring_insert_nop, |
1910 | .insert_end = vcn_v2_0_enc_ring_insert_end, |
1911 | .pad_ib = amdgpu_ring_generic_pad_ib, |
1912 | .begin_use = vcn_v2_5_ring_begin_use, |
1913 | .end_use = vcn_v2_5_ring_end_use, |
1914 | .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, |
1915 | .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, |
1916 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
1917 | }; |
1918 | |
1919 | static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) |
1920 | { |
1921 | int i; |
1922 | |
1923 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
1924 | if (adev->vcn.harvest_config & (1 << i)) |
1925 | continue; |
1926 | adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; |
1927 | adev->vcn.inst[i].ring_dec.me = i; |
1928 | } |
1929 | } |
1930 | |
1931 | static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) |
1932 | { |
1933 | int i, j; |
1934 | |
1935 | for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { |
1936 | if (adev->vcn.harvest_config & (1 << j)) |
1937 | continue; |
1938 | for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { |
1939 | adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; |
1940 | adev->vcn.inst[j].ring_enc[i].me = j; |
1941 | } |
1942 | } |
1943 | } |
1944 | |
1945 | static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block) |
1946 | { |
1947 | struct amdgpu_device *adev = ip_block->adev; |
1948 | int i, ret = 1; |
1949 | |
1950 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
1951 | if (adev->vcn.harvest_config & (1 << i)) |
1952 | continue; |
1953 | |
1954 | ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); |
1955 | } |
1956 | |
1957 | return ret; |
1958 | } |
1959 | |
1960 | static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) |
1961 | { |
1962 | struct amdgpu_device *adev = ip_block->adev; |
1963 | int i, ret = 0; |
1964 | |
1965 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
1966 | if (adev->vcn.harvest_config & (1 << i)) |
1967 | continue; |
1968 | ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, |
1969 | UVD_STATUS__IDLE); |
1970 | if (ret) |
1971 | return ret; |
1972 | } |
1973 | |
1974 | return ret; |
1975 | } |
1976 | |
1977 | static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, |
1978 | enum amd_clockgating_state state) |
1979 | { |
1980 | struct amdgpu_device *adev = ip_block->adev; |
1981 | bool enable = (state == AMD_CG_STATE_GATE); |
1982 | int i; |
1983 | |
1984 | if (amdgpu_sriov_vf(adev)) |
1985 | return 0; |
1986 | |
1987 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
1988 | struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; |
1989 | |
1990 | if (enable) { |
1991 | if (!vcn_v2_5_is_idle(ip_block)) |
1992 | return -EBUSY; |
1993 | vcn_v2_5_enable_clock_gating(vinst); |
1994 | } else { |
1995 | vcn_v2_5_disable_clock_gating(vinst); |
1996 | } |
1997 | } |
1998 | |
1999 | return 0; |
2000 | } |
2001 | |
2002 | static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst, |
2003 | enum amd_powergating_state state) |
2004 | { |
2005 | struct amdgpu_device *adev = vinst->adev; |
2006 | int ret; |
2007 | |
2008 | if (amdgpu_sriov_vf(adev)) |
2009 | return 0; |
2010 | |
2011 | if (state == vinst->cur_state) |
2012 | return 0; |
2013 | |
2014 | if (state == AMD_PG_STATE_GATE) |
2015 | ret = vcn_v2_5_stop(vinst); |
2016 | else |
2017 | ret = vcn_v2_5_start(vinst); |
2018 | |
2019 | if (!ret) |
2020 | vinst->cur_state = state; |
2021 | |
2022 | return ret; |
2023 | } |
2024 | |
2025 | static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, |
2026 | struct amdgpu_irq_src *source, |
2027 | unsigned type, |
2028 | enum amdgpu_interrupt_state state) |
2029 | { |
2030 | return 0; |
2031 | } |
2032 | |
2033 | static int vcn_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev, |
2034 | struct amdgpu_irq_src *source, |
2035 | unsigned int type, |
2036 | enum amdgpu_interrupt_state state) |
2037 | { |
2038 | return 0; |
2039 | } |
2040 | |
2041 | static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, |
2042 | struct amdgpu_irq_src *source, |
2043 | struct amdgpu_iv_entry *entry) |
2044 | { |
2045 | uint32_t ip_instance; |
2046 | |
2047 | switch (entry->client_id) { |
2048 | case SOC15_IH_CLIENTID_VCN: |
2049 | ip_instance = 0; |
2050 | break; |
2051 | case SOC15_IH_CLIENTID_VCN1: |
2052 | ip_instance = 1; |
2053 | break; |
2054 | default: |
2055 | DRM_ERROR("Unhandled client id: %d\n" , entry->client_id); |
2056 | return 0; |
2057 | } |
2058 | |
2059 | DRM_DEBUG("IH: VCN TRAP\n" ); |
2060 | |
2061 | switch (entry->src_id) { |
2062 | case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: |
2063 | amdgpu_fence_process(ring: &adev->vcn.inst[ip_instance].ring_dec); |
2064 | break; |
2065 | case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: |
2066 | amdgpu_fence_process(ring: &adev->vcn.inst[ip_instance].ring_enc[0]); |
2067 | break; |
2068 | case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: |
2069 | amdgpu_fence_process(ring: &adev->vcn.inst[ip_instance].ring_enc[1]); |
2070 | break; |
2071 | default: |
2072 | DRM_ERROR("Unhandled interrupt: %d %d\n" , |
2073 | entry->src_id, entry->src_data[0]); |
2074 | break; |
2075 | } |
2076 | |
2077 | return 0; |
2078 | } |
2079 | |
2080 | static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { |
2081 | .set = vcn_v2_5_set_interrupt_state, |
2082 | .process = vcn_v2_5_process_interrupt, |
2083 | }; |
2084 | |
2085 | static const struct amdgpu_irq_src_funcs vcn_v2_6_ras_irq_funcs = { |
2086 | .set = vcn_v2_6_set_ras_interrupt_state, |
2087 | .process = amdgpu_vcn_process_poison_irq, |
2088 | }; |
2089 | |
2090 | static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) |
2091 | { |
2092 | int i; |
2093 | |
2094 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
2095 | if (adev->vcn.harvest_config & (1 << i)) |
2096 | continue; |
2097 | adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; |
2098 | adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; |
2099 | |
2100 | adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; |
2101 | adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs; |
2102 | } |
2103 | } |
2104 | |
2105 | static void vcn_v2_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) |
2106 | { |
2107 | struct amdgpu_device *adev = ip_block->adev; |
2108 | int i, j; |
2109 | uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); |
2110 | uint32_t inst_off, is_powered; |
2111 | |
2112 | if (!adev->vcn.ip_dump) |
2113 | return; |
2114 | |
2115 | drm_printf(p, f: "num_instances:%d\n" , adev->vcn.num_vcn_inst); |
2116 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
2117 | if (adev->vcn.harvest_config & (1 << i)) { |
2118 | drm_printf(p, f: "\nHarvested Instance:VCN%d Skipping dump\n" , i); |
2119 | continue; |
2120 | } |
2121 | |
2122 | inst_off = i * reg_count; |
2123 | is_powered = (adev->vcn.ip_dump[inst_off] & |
2124 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; |
2125 | |
2126 | if (is_powered) { |
2127 | drm_printf(p, f: "\nActive Instance:VCN%d\n" , i); |
2128 | for (j = 0; j < reg_count; j++) |
2129 | drm_printf(p, f: "%-50s \t 0x%08x\n" , vcn_reg_list_2_5[j].reg_name, |
2130 | adev->vcn.ip_dump[inst_off + j]); |
2131 | } else { |
2132 | drm_printf(p, f: "\nInactive Instance:VCN%d\n" , i); |
2133 | } |
2134 | } |
2135 | } |
2136 | |
2137 | static void vcn_v2_5_dump_ip_state(struct amdgpu_ip_block *ip_block) |
2138 | { |
2139 | struct amdgpu_device *adev = ip_block->adev; |
2140 | int i, j; |
2141 | bool is_powered; |
2142 | uint32_t inst_off; |
2143 | uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); |
2144 | |
2145 | if (!adev->vcn.ip_dump) |
2146 | return; |
2147 | |
2148 | for (i = 0; i < adev->vcn.num_vcn_inst; i++) { |
2149 | if (adev->vcn.harvest_config & (1 << i)) |
2150 | continue; |
2151 | |
2152 | inst_off = i * reg_count; |
2153 | /* mmUVD_POWER_STATUS is always readable and is first element of the array */ |
2154 | adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); |
2155 | is_powered = (adev->vcn.ip_dump[inst_off] & |
2156 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; |
2157 | |
2158 | if (is_powered) |
2159 | for (j = 1; j < reg_count; j++) |
2160 | adev->vcn.ip_dump[inst_off + j] = |
2161 | RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); |
2162 | } |
2163 | } |
2164 | |
2165 | static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { |
2166 | .name = "vcn_v2_5" , |
2167 | .early_init = vcn_v2_5_early_init, |
2168 | .sw_init = vcn_v2_5_sw_init, |
2169 | .sw_fini = vcn_v2_5_sw_fini, |
2170 | .hw_init = vcn_v2_5_hw_init, |
2171 | .hw_fini = vcn_v2_5_hw_fini, |
2172 | .suspend = vcn_v2_5_suspend, |
2173 | .resume = vcn_v2_5_resume, |
2174 | .is_idle = vcn_v2_5_is_idle, |
2175 | .wait_for_idle = vcn_v2_5_wait_for_idle, |
2176 | .set_clockgating_state = vcn_v2_5_set_clockgating_state, |
2177 | .set_powergating_state = vcn_set_powergating_state, |
2178 | .dump_ip_state = vcn_v2_5_dump_ip_state, |
2179 | .print_ip_state = vcn_v2_5_print_ip_state, |
2180 | }; |
2181 | |
2182 | static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { |
2183 | .name = "vcn_v2_6" , |
2184 | .early_init = vcn_v2_5_early_init, |
2185 | .sw_init = vcn_v2_5_sw_init, |
2186 | .sw_fini = vcn_v2_5_sw_fini, |
2187 | .hw_init = vcn_v2_5_hw_init, |
2188 | .hw_fini = vcn_v2_5_hw_fini, |
2189 | .suspend = vcn_v2_5_suspend, |
2190 | .resume = vcn_v2_5_resume, |
2191 | .is_idle = vcn_v2_5_is_idle, |
2192 | .wait_for_idle = vcn_v2_5_wait_for_idle, |
2193 | .set_clockgating_state = vcn_v2_5_set_clockgating_state, |
2194 | .set_powergating_state = vcn_set_powergating_state, |
2195 | .dump_ip_state = vcn_v2_5_dump_ip_state, |
2196 | .print_ip_state = vcn_v2_5_print_ip_state, |
2197 | }; |
2198 | |
2199 | const struct amdgpu_ip_block_version vcn_v2_5_ip_block = |
2200 | { |
2201 | .type = AMD_IP_BLOCK_TYPE_VCN, |
2202 | .major = 2, |
2203 | .minor = 5, |
2204 | .rev = 0, |
2205 | .funcs = &vcn_v2_5_ip_funcs, |
2206 | }; |
2207 | |
2208 | const struct amdgpu_ip_block_version vcn_v2_6_ip_block = |
2209 | { |
2210 | .type = AMD_IP_BLOCK_TYPE_VCN, |
2211 | .major = 2, |
2212 | .minor = 6, |
2213 | .rev = 0, |
2214 | .funcs = &vcn_v2_6_ip_funcs, |
2215 | }; |
2216 | |
2217 | static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev, |
2218 | uint32_t instance, uint32_t sub_block) |
2219 | { |
2220 | uint32_t poison_stat = 0, reg_value = 0; |
2221 | |
2222 | switch (sub_block) { |
2223 | case AMDGPU_VCN_V2_6_VCPU_VCODEC: |
2224 | reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS); |
2225 | poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); |
2226 | break; |
2227 | default: |
2228 | break; |
2229 | } |
2230 | |
2231 | if (poison_stat) |
2232 | dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n" , |
2233 | instance, sub_block); |
2234 | |
2235 | return poison_stat; |
2236 | } |
2237 | |
2238 | static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev) |
2239 | { |
2240 | uint32_t inst, sub; |
2241 | uint32_t poison_stat = 0; |
2242 | |
2243 | for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) |
2244 | for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++) |
2245 | poison_stat += |
2246 | vcn_v2_6_query_poison_by_instance(adev, instance: inst, sub_block: sub); |
2247 | |
2248 | return !!poison_stat; |
2249 | } |
2250 | |
2251 | const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { |
2252 | .query_poison_status = vcn_v2_6_query_poison_status, |
2253 | }; |
2254 | |
2255 | static struct amdgpu_vcn_ras vcn_v2_6_ras = { |
2256 | .ras_block = { |
2257 | .hw_ops = &vcn_v2_6_ras_hw_ops, |
2258 | .ras_late_init = amdgpu_vcn_ras_late_init, |
2259 | }, |
2260 | }; |
2261 | |
2262 | static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) |
2263 | { |
2264 | switch (amdgpu_ip_version(adev, ip: VCN_HWIP, inst: 0)) { |
2265 | case IP_VERSION(2, 6, 0): |
2266 | adev->vcn.ras = &vcn_v2_6_ras; |
2267 | break; |
2268 | default: |
2269 | break; |
2270 | } |
2271 | } |
2272 | |