1 | /* |
2 | * Copyright 2022 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | |
24 | #include "amdgpu.h" |
25 | #include "amdgpu_jpeg.h" |
26 | #include "soc15.h" |
27 | #include "soc15d.h" |
28 | #include "jpeg_v4_0_3.h" |
29 | #include "mmsch_v4_0_3.h" |
30 | |
31 | #include "vcn/vcn_4_0_3_offset.h" |
32 | #include "vcn/vcn_4_0_3_sh_mask.h" |
33 | #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" |
34 | |
35 | enum jpeg_engin_status { |
36 | UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, |
37 | UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, |
38 | }; |
39 | |
40 | static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); |
41 | static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); |
42 | static int jpeg_v4_0_3_set_powergating_state(void *handle, |
43 | enum amd_powergating_state state); |
44 | static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); |
45 | static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); |
46 | |
47 | static int amdgpu_ih_srcid_jpeg[] = { |
48 | VCN_4_0__SRCID__JPEG_DECODE, |
49 | VCN_4_0__SRCID__JPEG1_DECODE, |
50 | VCN_4_0__SRCID__JPEG2_DECODE, |
51 | VCN_4_0__SRCID__JPEG3_DECODE, |
52 | VCN_4_0__SRCID__JPEG4_DECODE, |
53 | VCN_4_0__SRCID__JPEG5_DECODE, |
54 | VCN_4_0__SRCID__JPEG6_DECODE, |
55 | VCN_4_0__SRCID__JPEG7_DECODE |
56 | }; |
57 | |
58 | /** |
59 | * jpeg_v4_0_3_early_init - set function pointers |
60 | * |
61 | * @handle: amdgpu_device pointer |
62 | * |
63 | * Set ring and irq function pointers |
64 | */ |
65 | static int jpeg_v4_0_3_early_init(void *handle) |
66 | { |
67 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
68 | |
69 | adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; |
70 | |
71 | jpeg_v4_0_3_set_dec_ring_funcs(adev); |
72 | jpeg_v4_0_3_set_irq_funcs(adev); |
73 | jpeg_v4_0_3_set_ras_funcs(adev); |
74 | |
75 | return 0; |
76 | } |
77 | |
78 | /** |
79 | * jpeg_v4_0_3_sw_init - sw init for JPEG block |
80 | * |
81 | * @handle: amdgpu_device pointer |
82 | * |
83 | * Load firmware and sw initialization |
84 | */ |
85 | static int jpeg_v4_0_3_sw_init(void *handle) |
86 | { |
87 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
88 | struct amdgpu_ring *ring; |
89 | int i, j, r, jpeg_inst; |
90 | |
91 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
92 | /* JPEG TRAP */ |
93 | r = amdgpu_irq_add_id(adev, client_id: SOC15_IH_CLIENTID_VCN, |
94 | src_id: amdgpu_ih_srcid_jpeg[j], source: &adev->jpeg.inst->irq); |
95 | if (r) |
96 | return r; |
97 | } |
98 | |
99 | r = amdgpu_jpeg_sw_init(adev); |
100 | if (r) |
101 | return r; |
102 | |
103 | r = amdgpu_jpeg_resume(adev); |
104 | if (r) |
105 | return r; |
106 | |
107 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
108 | jpeg_inst = GET_INST(JPEG, i); |
109 | |
110 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
111 | ring = &adev->jpeg.inst[i].ring_dec[j]; |
112 | ring->use_doorbell = true; |
113 | ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); |
114 | if (!amdgpu_sriov_vf(adev)) { |
115 | ring->doorbell_index = |
116 | (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + |
117 | 1 + j + 9 * jpeg_inst; |
118 | } else { |
119 | if (j < 4) |
120 | ring->doorbell_index = |
121 | (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + |
122 | 4 + j + 32 * jpeg_inst; |
123 | else |
124 | ring->doorbell_index = |
125 | (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + |
126 | 8 + j + 32 * jpeg_inst; |
127 | } |
128 | sprintf(buf: ring->name, fmt: "jpeg_dec_%d.%d" , adev->jpeg.inst[i].aid_id, j); |
129 | r = amdgpu_ring_init(adev, ring, max_dw: 512, irq_src: &adev->jpeg.inst->irq, irq_type: 0, |
130 | hw_prio: AMDGPU_RING_PRIO_DEFAULT, NULL); |
131 | if (r) |
132 | return r; |
133 | |
134 | adev->jpeg.internal.jpeg_pitch[j] = |
135 | regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; |
136 | adev->jpeg.inst[i].external.jpeg_pitch[j] = |
137 | SOC15_REG_OFFSET1( |
138 | JPEG, jpeg_inst, |
139 | regUVD_JRBC0_UVD_JRBC_SCRATCH0, |
140 | (j ? (0x40 * j - 0xc80) : 0)); |
141 | } |
142 | } |
143 | |
144 | if (amdgpu_ras_is_supported(adev, block: AMDGPU_RAS_BLOCK__JPEG)) { |
145 | r = amdgpu_jpeg_ras_sw_init(adev); |
146 | if (r) { |
147 | dev_err(adev->dev, "Failed to initialize jpeg ras block!\n" ); |
148 | return r; |
149 | } |
150 | } |
151 | |
152 | return 0; |
153 | } |
154 | |
155 | /** |
156 | * jpeg_v4_0_3_sw_fini - sw fini for JPEG block |
157 | * |
158 | * @handle: amdgpu_device pointer |
159 | * |
160 | * JPEG suspend and free up sw allocation |
161 | */ |
162 | static int jpeg_v4_0_3_sw_fini(void *handle) |
163 | { |
164 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
165 | int r; |
166 | |
167 | r = amdgpu_jpeg_suspend(adev); |
168 | if (r) |
169 | return r; |
170 | |
171 | r = amdgpu_jpeg_sw_fini(adev); |
172 | |
173 | return r; |
174 | } |
175 | |
176 | static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) |
177 | { |
178 | struct amdgpu_ring *ring; |
179 | uint64_t ctx_addr; |
180 | uint32_t param, resp, expected; |
181 | uint32_t tmp, timeout; |
182 | |
183 | struct amdgpu_mm_table *table = &adev->virt.mm_table; |
184 | uint32_t *table_loc; |
185 | uint32_t table_size; |
186 | uint32_t size, size_dw, item_offset; |
187 | uint32_t init_status; |
188 | int i, j, jpeg_inst; |
189 | |
190 | struct mmsch_v4_0_cmd_direct_write |
191 | direct_wt = { {0} }; |
192 | struct mmsch_v4_0_cmd_end end = { {0} }; |
193 | struct mmsch_v4_0_3_init_header ; |
194 | |
195 | direct_wt.cmd_header.command_type = |
196 | MMSCH_COMMAND__DIRECT_REG_WRITE; |
197 | end.cmd_header.command_type = |
198 | MMSCH_COMMAND__END; |
199 | |
200 | for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { |
201 | jpeg_inst = GET_INST(JPEG, i); |
202 | |
203 | memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); |
204 | header.version = MMSCH_VERSION; |
205 | header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; |
206 | |
207 | table_loc = (uint32_t *)table->cpu_addr; |
208 | table_loc += header.total_size; |
209 | |
210 | item_offset = header.total_size; |
211 | |
212 | for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { |
213 | ring = &adev->jpeg.inst[i].ring_dec[j]; |
214 | table_size = 0; |
215 | |
216 | tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); |
217 | MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); |
218 | tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); |
219 | MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); |
220 | tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); |
221 | MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); |
222 | |
223 | if (j <= 3) { |
224 | header.mjpegdec0[j].table_offset = item_offset; |
225 | header.mjpegdec0[j].init_status = 0; |
226 | header.mjpegdec0[j].table_size = table_size; |
227 | } else { |
228 | header.mjpegdec1[j - 4].table_offset = item_offset; |
229 | header.mjpegdec1[j - 4].init_status = 0; |
230 | header.mjpegdec1[j - 4].table_size = table_size; |
231 | } |
232 | header.total_size += table_size; |
233 | item_offset += table_size; |
234 | } |
235 | |
236 | MMSCH_V4_0_INSERT_END(); |
237 | |
238 | /* send init table to MMSCH */ |
239 | size = sizeof(struct mmsch_v4_0_3_init_header); |
240 | table_loc = (uint32_t *)table->cpu_addr; |
241 | memcpy((void *)table_loc, &header, size); |
242 | |
243 | ctx_addr = table->gpu_addr; |
244 | WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); |
245 | WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); |
246 | |
247 | tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); |
248 | tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; |
249 | tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); |
250 | WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); |
251 | |
252 | size = header.total_size; |
253 | WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); |
254 | |
255 | WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); |
256 | |
257 | param = 0x00000001; |
258 | WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); |
259 | tmp = 0; |
260 | timeout = 1000; |
261 | resp = 0; |
262 | expected = MMSCH_VF_MAILBOX_RESP__OK; |
263 | init_status = |
264 | ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status; |
265 | while (resp != expected) { |
266 | resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP); |
267 | |
268 | if (resp != 0) |
269 | break; |
270 | udelay(10); |
271 | tmp = tmp + 10; |
272 | if (tmp >= timeout) { |
273 | DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec" \ |
274 | " waiting for regMMSCH_VF_MAILBOX_RESP " \ |
275 | "(expected=0x%08x, readback=0x%08x)\n" , |
276 | tmp, expected, resp); |
277 | return -EBUSY; |
278 | } |
279 | } |
280 | if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && |
281 | init_status != MMSCH_VF_ENGINE_STATUS__PASS) |
282 | DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n" , |
283 | resp, init_status); |
284 | |
285 | } |
286 | return 0; |
287 | } |
288 | |
289 | /** |
290 | * jpeg_v4_0_3_hw_init - start and test JPEG block |
291 | * |
292 | * @handle: amdgpu_device pointer |
293 | * |
294 | */ |
295 | static int jpeg_v4_0_3_hw_init(void *handle) |
296 | { |
297 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
298 | struct amdgpu_ring *ring; |
299 | int i, j, r, jpeg_inst; |
300 | |
301 | if (amdgpu_sriov_vf(adev)) { |
302 | r = jpeg_v4_0_3_start_sriov(adev); |
303 | if (r) |
304 | return r; |
305 | |
306 | for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { |
307 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
308 | ring = &adev->jpeg.inst[i].ring_dec[j]; |
309 | ring->wptr = 0; |
310 | ring->wptr_old = 0; |
311 | jpeg_v4_0_3_dec_ring_set_wptr(ring); |
312 | ring->sched.ready = true; |
313 | } |
314 | } |
315 | } else { |
316 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
317 | jpeg_inst = GET_INST(JPEG, i); |
318 | |
319 | ring = adev->jpeg.inst[i].ring_dec; |
320 | |
321 | if (ring->use_doorbell) |
322 | adev->nbio.funcs->vcn_doorbell_range( |
323 | adev, ring->use_doorbell, |
324 | (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + |
325 | 9 * jpeg_inst, |
326 | adev->jpeg.inst[i].aid_id); |
327 | |
328 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
329 | ring = &adev->jpeg.inst[i].ring_dec[j]; |
330 | if (ring->use_doorbell) |
331 | WREG32_SOC15_OFFSET( |
332 | VCN, GET_INST(VCN, i), |
333 | regVCN_JPEG_DB_CTRL, |
334 | (ring->pipe ? (ring->pipe - 0x15) : 0), |
335 | ring->doorbell_index |
336 | << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | |
337 | VCN_JPEG_DB_CTRL__EN_MASK); |
338 | r = amdgpu_ring_test_helper(ring); |
339 | if (r) |
340 | return r; |
341 | } |
342 | } |
343 | } |
344 | DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n" ); |
345 | |
346 | return 0; |
347 | } |
348 | |
349 | /** |
350 | * jpeg_v4_0_3_hw_fini - stop the hardware block |
351 | * |
352 | * @handle: amdgpu_device pointer |
353 | * |
354 | * Stop the JPEG block, mark ring as not ready any more |
355 | */ |
356 | static int jpeg_v4_0_3_hw_fini(void *handle) |
357 | { |
358 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
359 | int ret = 0; |
360 | |
361 | cancel_delayed_work_sync(dwork: &adev->jpeg.idle_work); |
362 | |
363 | if (!amdgpu_sriov_vf(adev)) { |
364 | if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) |
365 | ret = jpeg_v4_0_3_set_powergating_state(handle: adev, state: AMD_PG_STATE_GATE); |
366 | } |
367 | |
368 | return ret; |
369 | } |
370 | |
371 | /** |
372 | * jpeg_v4_0_3_suspend - suspend JPEG block |
373 | * |
374 | * @handle: amdgpu_device pointer |
375 | * |
376 | * HW fini and suspend JPEG block |
377 | */ |
378 | static int jpeg_v4_0_3_suspend(void *handle) |
379 | { |
380 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
381 | int r; |
382 | |
383 | r = jpeg_v4_0_3_hw_fini(handle: adev); |
384 | if (r) |
385 | return r; |
386 | |
387 | r = amdgpu_jpeg_suspend(adev); |
388 | |
389 | return r; |
390 | } |
391 | |
392 | /** |
393 | * jpeg_v4_0_3_resume - resume JPEG block |
394 | * |
395 | * @handle: amdgpu_device pointer |
396 | * |
397 | * Resume firmware and hw init JPEG block |
398 | */ |
399 | static int jpeg_v4_0_3_resume(void *handle) |
400 | { |
401 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
402 | int r; |
403 | |
404 | r = amdgpu_jpeg_resume(adev); |
405 | if (r) |
406 | return r; |
407 | |
408 | r = jpeg_v4_0_3_hw_init(handle: adev); |
409 | |
410 | return r; |
411 | } |
412 | |
413 | static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) |
414 | { |
415 | int i, jpeg_inst; |
416 | uint32_t data; |
417 | |
418 | jpeg_inst = GET_INST(JPEG, inst_idx); |
419 | data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); |
420 | if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { |
421 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
422 | data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); |
423 | } else { |
424 | data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
425 | } |
426 | |
427 | data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; |
428 | data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; |
429 | WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); |
430 | |
431 | data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); |
432 | data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); |
433 | for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) |
434 | data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); |
435 | WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); |
436 | } |
437 | |
438 | static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) |
439 | { |
440 | int i, jpeg_inst; |
441 | uint32_t data; |
442 | |
443 | jpeg_inst = GET_INST(JPEG, inst_idx); |
444 | data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); |
445 | if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { |
446 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
447 | data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); |
448 | } else { |
449 | data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
450 | } |
451 | |
452 | data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; |
453 | data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; |
454 | WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); |
455 | |
456 | data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); |
457 | data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); |
458 | for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) |
459 | data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); |
460 | WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); |
461 | } |
462 | |
463 | /** |
464 | * jpeg_v4_0_3_start - start JPEG block |
465 | * |
466 | * @adev: amdgpu_device pointer |
467 | * |
468 | * Setup and start the JPEG block |
469 | */ |
470 | static int jpeg_v4_0_3_start(struct amdgpu_device *adev) |
471 | { |
472 | struct amdgpu_ring *ring; |
473 | int i, j, jpeg_inst; |
474 | |
475 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
476 | jpeg_inst = GET_INST(JPEG, i); |
477 | |
478 | WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, |
479 | 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); |
480 | SOC15_WAIT_ON_RREG( |
481 | JPEG, jpeg_inst, regUVD_PGFSM_STATUS, |
482 | UVD_PGFSM_STATUS__UVDJ_PWR_ON |
483 | << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, |
484 | UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); |
485 | |
486 | /* disable anti hang mechanism */ |
487 | WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, |
488 | regUVD_JPEG_POWER_STATUS), |
489 | 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); |
490 | |
491 | /* JPEG disable CGC */ |
492 | jpeg_v4_0_3_disable_clock_gating(adev, inst_idx: i); |
493 | |
494 | /* MJPEG global tiling registers */ |
495 | WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, |
496 | adev->gfx.config.gb_addr_config); |
497 | WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, |
498 | adev->gfx.config.gb_addr_config); |
499 | |
500 | /* enable JMI channel */ |
501 | WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, |
502 | ~UVD_JMI_CNTL__SOFT_RESET_MASK); |
503 | |
504 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
505 | unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); |
506 | |
507 | ring = &adev->jpeg.inst[i].ring_dec[j]; |
508 | |
509 | /* enable System Interrupt for JRBC */ |
510 | WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, |
511 | regJPEG_SYS_INT_EN), |
512 | JPEG_SYS_INT_EN__DJRBC0_MASK << j, |
513 | ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); |
514 | |
515 | WREG32_SOC15_OFFSET(JPEG, jpeg_inst, |
516 | regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, |
517 | reg_offset, 0); |
518 | WREG32_SOC15_OFFSET(JPEG, jpeg_inst, |
519 | regUVD_JRBC0_UVD_JRBC_RB_CNTL, |
520 | reg_offset, |
521 | (0x00000001L | 0x00000002L)); |
522 | WREG32_SOC15_OFFSET( |
523 | JPEG, jpeg_inst, |
524 | regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, |
525 | reg_offset, lower_32_bits(ring->gpu_addr)); |
526 | WREG32_SOC15_OFFSET( |
527 | JPEG, jpeg_inst, |
528 | regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, |
529 | reg_offset, upper_32_bits(ring->gpu_addr)); |
530 | WREG32_SOC15_OFFSET(JPEG, jpeg_inst, |
531 | regUVD_JRBC0_UVD_JRBC_RB_RPTR, |
532 | reg_offset, 0); |
533 | WREG32_SOC15_OFFSET(JPEG, jpeg_inst, |
534 | regUVD_JRBC0_UVD_JRBC_RB_WPTR, |
535 | reg_offset, 0); |
536 | WREG32_SOC15_OFFSET(JPEG, jpeg_inst, |
537 | regUVD_JRBC0_UVD_JRBC_RB_CNTL, |
538 | reg_offset, 0x00000002L); |
539 | WREG32_SOC15_OFFSET(JPEG, jpeg_inst, |
540 | regUVD_JRBC0_UVD_JRBC_RB_SIZE, |
541 | reg_offset, ring->ring_size / 4); |
542 | ring->wptr = RREG32_SOC15_OFFSET( |
543 | JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, |
544 | reg_offset); |
545 | } |
546 | } |
547 | |
548 | return 0; |
549 | } |
550 | |
551 | /** |
552 | * jpeg_v4_0_3_stop - stop JPEG block |
553 | * |
554 | * @adev: amdgpu_device pointer |
555 | * |
556 | * stop the JPEG block |
557 | */ |
558 | static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) |
559 | { |
560 | int i, jpeg_inst; |
561 | |
562 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
563 | jpeg_inst = GET_INST(JPEG, i); |
564 | /* reset JMI */ |
565 | WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), |
566 | UVD_JMI_CNTL__SOFT_RESET_MASK, |
567 | ~UVD_JMI_CNTL__SOFT_RESET_MASK); |
568 | |
569 | jpeg_v4_0_3_enable_clock_gating(adev, inst_idx: i); |
570 | |
571 | /* enable anti hang mechanism */ |
572 | WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, |
573 | regUVD_JPEG_POWER_STATUS), |
574 | UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, |
575 | ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); |
576 | |
577 | WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, |
578 | 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); |
579 | SOC15_WAIT_ON_RREG( |
580 | JPEG, jpeg_inst, regUVD_PGFSM_STATUS, |
581 | UVD_PGFSM_STATUS__UVDJ_PWR_OFF |
582 | << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, |
583 | UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); |
584 | } |
585 | |
586 | return 0; |
587 | } |
588 | |
589 | /** |
590 | * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer |
591 | * |
592 | * @ring: amdgpu_ring pointer |
593 | * |
594 | * Returns the current hardware read pointer |
595 | */ |
596 | static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) |
597 | { |
598 | struct amdgpu_device *adev = ring->adev; |
599 | |
600 | return RREG32_SOC15_OFFSET( |
601 | JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, |
602 | ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); |
603 | } |
604 | |
605 | /** |
606 | * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer |
607 | * |
608 | * @ring: amdgpu_ring pointer |
609 | * |
610 | * Returns the current hardware write pointer |
611 | */ |
612 | static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) |
613 | { |
614 | struct amdgpu_device *adev = ring->adev; |
615 | |
616 | if (ring->use_doorbell) |
617 | return adev->wb.wb[ring->wptr_offs]; |
618 | else |
619 | return RREG32_SOC15_OFFSET( |
620 | JPEG, GET_INST(JPEG, ring->me), |
621 | regUVD_JRBC0_UVD_JRBC_RB_WPTR, |
622 | ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); |
623 | } |
624 | |
625 | /** |
626 | * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer |
627 | * |
628 | * @ring: amdgpu_ring pointer |
629 | * |
630 | * Commits the write pointer to the hardware |
631 | */ |
632 | static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) |
633 | { |
634 | struct amdgpu_device *adev = ring->adev; |
635 | |
636 | if (ring->use_doorbell) { |
637 | adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); |
638 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); |
639 | } else { |
640 | WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), |
641 | regUVD_JRBC0_UVD_JRBC_RB_WPTR, |
642 | (ring->pipe ? (0x40 * ring->pipe - 0xc80) : |
643 | 0), |
644 | lower_32_bits(ring->wptr)); |
645 | } |
646 | } |
647 | |
648 | /** |
649 | * jpeg_v4_0_3_dec_ring_insert_start - insert a start command |
650 | * |
651 | * @ring: amdgpu_ring pointer |
652 | * |
653 | * Write a start command to the ring. |
654 | */ |
655 | void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) |
656 | { |
657 | if (!amdgpu_sriov_vf(ring->adev)) { |
658 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, |
659 | 0, 0, PACKETJ_TYPE0)); |
660 | amdgpu_ring_write(ring, v: 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ |
661 | } |
662 | |
663 | amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, |
664 | 0, 0, PACKETJ_TYPE0)); |
665 | amdgpu_ring_write(ring, v: 0x80004000); |
666 | } |
667 | |
668 | /** |
669 | * jpeg_v4_0_3_dec_ring_insert_end - insert a end command |
670 | * |
671 | * @ring: amdgpu_ring pointer |
672 | * |
673 | * Write a end command to the ring. |
674 | */ |
675 | void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) |
676 | { |
677 | if (!amdgpu_sriov_vf(ring->adev)) { |
678 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, |
679 | 0, 0, PACKETJ_TYPE0)); |
680 | amdgpu_ring_write(ring, v: 0x62a04); |
681 | } |
682 | |
683 | amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, |
684 | 0, 0, PACKETJ_TYPE0)); |
685 | amdgpu_ring_write(ring, v: 0x00004000); |
686 | } |
687 | |
688 | /** |
689 | * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command |
690 | * |
691 | * @ring: amdgpu_ring pointer |
692 | * @addr: address |
693 | * @seq: sequence number |
694 | * @flags: fence related flags |
695 | * |
696 | * Write a fence and a trap command to the ring. |
697 | */ |
698 | void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, |
699 | unsigned int flags) |
700 | { |
701 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
702 | |
703 | amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, |
704 | 0, 0, PACKETJ_TYPE0)); |
705 | amdgpu_ring_write(ring, v: seq); |
706 | |
707 | amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, |
708 | 0, 0, PACKETJ_TYPE0)); |
709 | amdgpu_ring_write(ring, v: seq); |
710 | |
711 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, |
712 | 0, 0, PACKETJ_TYPE0)); |
713 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
714 | |
715 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, |
716 | 0, 0, PACKETJ_TYPE0)); |
717 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
718 | |
719 | amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, |
720 | 0, 0, PACKETJ_TYPE0)); |
721 | amdgpu_ring_write(ring, v: 0x8); |
722 | |
723 | amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, |
724 | 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); |
725 | amdgpu_ring_write(ring, v: 0); |
726 | |
727 | if (ring->adev->jpeg.inst[ring->me].aid_id) { |
728 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, |
729 | 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); |
730 | amdgpu_ring_write(ring, v: 0x4); |
731 | } else { |
732 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); |
733 | amdgpu_ring_write(ring, v: 0); |
734 | } |
735 | |
736 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, |
737 | 0, 0, PACKETJ_TYPE0)); |
738 | amdgpu_ring_write(ring, v: 0x3fbc); |
739 | |
740 | if (ring->adev->jpeg.inst[ring->me].aid_id) { |
741 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, |
742 | 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); |
743 | amdgpu_ring_write(ring, v: 0x0); |
744 | } else { |
745 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); |
746 | amdgpu_ring_write(ring, v: 0); |
747 | } |
748 | |
749 | amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, |
750 | 0, 0, PACKETJ_TYPE0)); |
751 | amdgpu_ring_write(ring, v: 0x1); |
752 | |
753 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); |
754 | amdgpu_ring_write(ring, v: 0); |
755 | } |
756 | |
757 | /** |
758 | * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer |
759 | * |
760 | * @ring: amdgpu_ring pointer |
761 | * @job: job to retrieve vmid from |
762 | * @ib: indirect buffer to execute |
763 | * @flags: unused |
764 | * |
765 | * Write ring commands to execute the indirect buffer. |
766 | */ |
767 | void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, |
768 | struct amdgpu_job *job, |
769 | struct amdgpu_ib *ib, |
770 | uint32_t flags) |
771 | { |
772 | unsigned int vmid = AMDGPU_JOB_GET_VMID(job); |
773 | |
774 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, |
775 | 0, 0, PACKETJ_TYPE0)); |
776 | amdgpu_ring_write(ring, v: (vmid | (vmid << 4))); |
777 | |
778 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, |
779 | 0, 0, PACKETJ_TYPE0)); |
780 | amdgpu_ring_write(ring, v: (vmid | (vmid << 4))); |
781 | |
782 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, |
783 | 0, 0, PACKETJ_TYPE0)); |
784 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
785 | |
786 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, |
787 | 0, 0, PACKETJ_TYPE0)); |
788 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
789 | |
790 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, |
791 | 0, 0, PACKETJ_TYPE0)); |
792 | amdgpu_ring_write(ring, v: ib->length_dw); |
793 | |
794 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, |
795 | 0, 0, PACKETJ_TYPE0)); |
796 | amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); |
797 | |
798 | amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, |
799 | 0, 0, PACKETJ_TYPE0)); |
800 | amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); |
801 | |
802 | amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); |
803 | amdgpu_ring_write(ring, v: 0); |
804 | |
805 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, |
806 | 0, 0, PACKETJ_TYPE0)); |
807 | amdgpu_ring_write(ring, v: 0x01400200); |
808 | |
809 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, |
810 | 0, 0, PACKETJ_TYPE0)); |
811 | amdgpu_ring_write(ring, v: 0x2); |
812 | |
813 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, |
814 | 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); |
815 | amdgpu_ring_write(ring, v: 0x2); |
816 | } |
817 | |
818 | void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, |
819 | uint32_t val, uint32_t mask) |
820 | { |
821 | uint32_t reg_offset = (reg << 2); |
822 | |
823 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, |
824 | 0, 0, PACKETJ_TYPE0)); |
825 | amdgpu_ring_write(ring, v: 0x01400200); |
826 | |
827 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, |
828 | 0, 0, PACKETJ_TYPE0)); |
829 | amdgpu_ring_write(ring, v: val); |
830 | |
831 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, |
832 | 0, 0, PACKETJ_TYPE0)); |
833 | if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { |
834 | amdgpu_ring_write(ring, v: 0); |
835 | amdgpu_ring_write(ring, |
836 | PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); |
837 | } else { |
838 | amdgpu_ring_write(ring, v: reg_offset); |
839 | amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, |
840 | 0, 0, PACKETJ_TYPE3)); |
841 | } |
842 | amdgpu_ring_write(ring, v: mask); |
843 | } |
844 | |
845 | void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, |
846 | unsigned int vmid, uint64_t pd_addr) |
847 | { |
848 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; |
849 | uint32_t data0, data1, mask; |
850 | |
851 | pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
852 | |
853 | /* wait for register write */ |
854 | data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; |
855 | data1 = lower_32_bits(pd_addr); |
856 | mask = 0xffffffff; |
857 | jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, reg: data0, val: data1, mask); |
858 | } |
859 | |
860 | void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) |
861 | { |
862 | uint32_t reg_offset = (reg << 2); |
863 | |
864 | amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, |
865 | 0, 0, PACKETJ_TYPE0)); |
866 | if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { |
867 | amdgpu_ring_write(ring, v: 0); |
868 | amdgpu_ring_write(ring, |
869 | PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); |
870 | } else { |
871 | amdgpu_ring_write(ring, v: reg_offset); |
872 | amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, |
873 | 0, 0, PACKETJ_TYPE0)); |
874 | } |
875 | amdgpu_ring_write(ring, v: val); |
876 | } |
877 | |
878 | void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) |
879 | { |
880 | int i; |
881 | |
882 | WARN_ON(ring->wptr % 2 || count % 2); |
883 | |
884 | for (i = 0; i < count / 2; i++) { |
885 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); |
886 | amdgpu_ring_write(ring, v: 0); |
887 | } |
888 | } |
889 | |
890 | static bool jpeg_v4_0_3_is_idle(void *handle) |
891 | { |
892 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
893 | bool ret = false; |
894 | int i, j; |
895 | |
896 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
897 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
898 | unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); |
899 | |
900 | ret &= ((RREG32_SOC15_OFFSET( |
901 | JPEG, GET_INST(JPEG, i), |
902 | regUVD_JRBC0_UVD_JRBC_STATUS, |
903 | reg_offset) & |
904 | UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == |
905 | UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); |
906 | } |
907 | } |
908 | |
909 | return ret; |
910 | } |
911 | |
912 | static int jpeg_v4_0_3_wait_for_idle(void *handle) |
913 | { |
914 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
915 | int ret = 0; |
916 | int i, j; |
917 | |
918 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
919 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
920 | unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); |
921 | |
922 | ret &= SOC15_WAIT_ON_RREG_OFFSET( |
923 | JPEG, GET_INST(JPEG, i), |
924 | regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, |
925 | UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, |
926 | UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); |
927 | } |
928 | } |
929 | return ret; |
930 | } |
931 | |
932 | static int jpeg_v4_0_3_set_clockgating_state(void *handle, |
933 | enum amd_clockgating_state state) |
934 | { |
935 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
936 | bool enable = state == AMD_CG_STATE_GATE; |
937 | int i; |
938 | |
939 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
940 | if (enable) { |
941 | if (!jpeg_v4_0_3_is_idle(handle)) |
942 | return -EBUSY; |
943 | jpeg_v4_0_3_enable_clock_gating(adev, inst_idx: i); |
944 | } else { |
945 | jpeg_v4_0_3_disable_clock_gating(adev, inst_idx: i); |
946 | } |
947 | } |
948 | return 0; |
949 | } |
950 | |
951 | static int jpeg_v4_0_3_set_powergating_state(void *handle, |
952 | enum amd_powergating_state state) |
953 | { |
954 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
955 | int ret; |
956 | |
957 | if (amdgpu_sriov_vf(adev)) { |
958 | adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; |
959 | return 0; |
960 | } |
961 | |
962 | if (state == adev->jpeg.cur_state) |
963 | return 0; |
964 | |
965 | if (state == AMD_PG_STATE_GATE) |
966 | ret = jpeg_v4_0_3_stop(adev); |
967 | else |
968 | ret = jpeg_v4_0_3_start(adev); |
969 | |
970 | if (!ret) |
971 | adev->jpeg.cur_state = state; |
972 | |
973 | return ret; |
974 | } |
975 | |
976 | static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, |
977 | struct amdgpu_irq_src *source, |
978 | unsigned int type, |
979 | enum amdgpu_interrupt_state state) |
980 | { |
981 | return 0; |
982 | } |
983 | |
984 | static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, |
985 | struct amdgpu_irq_src *source, |
986 | struct amdgpu_iv_entry *entry) |
987 | { |
988 | uint32_t i, inst; |
989 | |
990 | i = node_id_to_phys_map[entry->node_id]; |
991 | DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n" ); |
992 | |
993 | for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) |
994 | if (adev->jpeg.inst[inst].aid_id == i) |
995 | break; |
996 | |
997 | if (inst >= adev->jpeg.num_jpeg_inst) { |
998 | dev_WARN_ONCE(adev->dev, 1, |
999 | "Interrupt received for unknown JPEG instance %d" , |
1000 | entry->node_id); |
1001 | return 0; |
1002 | } |
1003 | |
1004 | switch (entry->src_id) { |
1005 | case VCN_4_0__SRCID__JPEG_DECODE: |
1006 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[0]); |
1007 | break; |
1008 | case VCN_4_0__SRCID__JPEG1_DECODE: |
1009 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[1]); |
1010 | break; |
1011 | case VCN_4_0__SRCID__JPEG2_DECODE: |
1012 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[2]); |
1013 | break; |
1014 | case VCN_4_0__SRCID__JPEG3_DECODE: |
1015 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[3]); |
1016 | break; |
1017 | case VCN_4_0__SRCID__JPEG4_DECODE: |
1018 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[4]); |
1019 | break; |
1020 | case VCN_4_0__SRCID__JPEG5_DECODE: |
1021 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[5]); |
1022 | break; |
1023 | case VCN_4_0__SRCID__JPEG6_DECODE: |
1024 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[6]); |
1025 | break; |
1026 | case VCN_4_0__SRCID__JPEG7_DECODE: |
1027 | amdgpu_fence_process(ring: &adev->jpeg.inst[inst].ring_dec[7]); |
1028 | break; |
1029 | default: |
1030 | DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n" , |
1031 | entry->src_id, entry->src_data[0]); |
1032 | break; |
1033 | } |
1034 | |
1035 | return 0; |
1036 | } |
1037 | |
1038 | static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { |
1039 | .name = "jpeg_v4_0_3" , |
1040 | .early_init = jpeg_v4_0_3_early_init, |
1041 | .late_init = NULL, |
1042 | .sw_init = jpeg_v4_0_3_sw_init, |
1043 | .sw_fini = jpeg_v4_0_3_sw_fini, |
1044 | .hw_init = jpeg_v4_0_3_hw_init, |
1045 | .hw_fini = jpeg_v4_0_3_hw_fini, |
1046 | .suspend = jpeg_v4_0_3_suspend, |
1047 | .resume = jpeg_v4_0_3_resume, |
1048 | .is_idle = jpeg_v4_0_3_is_idle, |
1049 | .wait_for_idle = jpeg_v4_0_3_wait_for_idle, |
1050 | .check_soft_reset = NULL, |
1051 | .pre_soft_reset = NULL, |
1052 | .soft_reset = NULL, |
1053 | .post_soft_reset = NULL, |
1054 | .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, |
1055 | .set_powergating_state = jpeg_v4_0_3_set_powergating_state, |
1056 | }; |
1057 | |
1058 | static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { |
1059 | .type = AMDGPU_RING_TYPE_VCN_JPEG, |
1060 | .align_mask = 0xf, |
1061 | .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, |
1062 | .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, |
1063 | .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, |
1064 | .emit_frame_size = |
1065 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + |
1066 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + |
1067 | 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ |
1068 | 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ |
1069 | 8 + 16, |
1070 | .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ |
1071 | .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, |
1072 | .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, |
1073 | .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, |
1074 | .test_ring = amdgpu_jpeg_dec_ring_test_ring, |
1075 | .test_ib = amdgpu_jpeg_dec_ring_test_ib, |
1076 | .insert_nop = jpeg_v4_0_3_dec_ring_nop, |
1077 | .insert_start = jpeg_v4_0_3_dec_ring_insert_start, |
1078 | .insert_end = jpeg_v4_0_3_dec_ring_insert_end, |
1079 | .pad_ib = amdgpu_ring_generic_pad_ib, |
1080 | .begin_use = amdgpu_jpeg_ring_begin_use, |
1081 | .end_use = amdgpu_jpeg_ring_end_use, |
1082 | .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, |
1083 | .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, |
1084 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
1085 | }; |
1086 | |
1087 | static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) |
1088 | { |
1089 | int i, j, jpeg_inst; |
1090 | |
1091 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
1092 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { |
1093 | adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; |
1094 | adev->jpeg.inst[i].ring_dec[j].me = i; |
1095 | adev->jpeg.inst[i].ring_dec[j].pipe = j; |
1096 | } |
1097 | jpeg_inst = GET_INST(JPEG, i); |
1098 | adev->jpeg.inst[i].aid_id = |
1099 | jpeg_inst / adev->jpeg.num_inst_per_aid; |
1100 | } |
1101 | DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n" ); |
1102 | } |
1103 | |
1104 | static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { |
1105 | .set = jpeg_v4_0_3_set_interrupt_state, |
1106 | .process = jpeg_v4_0_3_process_interrupt, |
1107 | }; |
1108 | |
1109 | static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) |
1110 | { |
1111 | int i; |
1112 | |
1113 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
1114 | adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; |
1115 | } |
1116 | adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; |
1117 | } |
1118 | |
1119 | const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { |
1120 | .type = AMD_IP_BLOCK_TYPE_JPEG, |
1121 | .major = 4, |
1122 | .minor = 0, |
1123 | .rev = 3, |
1124 | .funcs = &jpeg_v4_0_3_ip_funcs, |
1125 | }; |
1126 | |
1127 | static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = { |
1128 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S), |
1129 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S" }, |
1130 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D), |
1131 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D" }, |
1132 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S), |
1133 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S" }, |
1134 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D), |
1135 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D" }, |
1136 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S), |
1137 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S" }, |
1138 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D), |
1139 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D" }, |
1140 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S), |
1141 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S" }, |
1142 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D), |
1143 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D" }, |
1144 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S), |
1145 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S" }, |
1146 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D), |
1147 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D" }, |
1148 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S), |
1149 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S" }, |
1150 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D), |
1151 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D" }, |
1152 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S), |
1153 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S" }, |
1154 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D), |
1155 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D" }, |
1156 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S), |
1157 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S" }, |
1158 | {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D), |
1159 | 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D" }, |
1160 | }; |
1161 | |
1162 | static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, |
1163 | uint32_t jpeg_inst, |
1164 | void *ras_err_status) |
1165 | { |
1166 | struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; |
1167 | |
1168 | /* jpeg v4_0_3 only support uncorrectable errors */ |
1169 | amdgpu_ras_inst_query_ras_error_count(adev, |
1170 | reg_list: jpeg_v4_0_3_ue_reg_list, |
1171 | ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), |
1172 | NULL, mem_list_size: 0, GET_INST(VCN, jpeg_inst), |
1173 | err_type: AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, |
1174 | err_count: &err_data->ue_count); |
1175 | } |
1176 | |
1177 | static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, |
1178 | void *ras_err_status) |
1179 | { |
1180 | uint32_t i; |
1181 | |
1182 | if (!amdgpu_ras_is_supported(adev, block: AMDGPU_RAS_BLOCK__JPEG)) { |
1183 | dev_warn(adev->dev, "JPEG RAS is not supported\n" ); |
1184 | return; |
1185 | } |
1186 | |
1187 | for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) |
1188 | jpeg_v4_0_3_inst_query_ras_error_count(adev, jpeg_inst: i, ras_err_status); |
1189 | } |
1190 | |
1191 | static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, |
1192 | uint32_t jpeg_inst) |
1193 | { |
1194 | amdgpu_ras_inst_reset_ras_error_count(adev, |
1195 | reg_list: jpeg_v4_0_3_ue_reg_list, |
1196 | ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), |
1197 | GET_INST(VCN, jpeg_inst)); |
1198 | } |
1199 | |
1200 | static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) |
1201 | { |
1202 | uint32_t i; |
1203 | |
1204 | if (!amdgpu_ras_is_supported(adev, block: AMDGPU_RAS_BLOCK__JPEG)) { |
1205 | dev_warn(adev->dev, "JPEG RAS is not supported\n" ); |
1206 | return; |
1207 | } |
1208 | |
1209 | for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) |
1210 | jpeg_v4_0_3_inst_reset_ras_error_count(adev, jpeg_inst: i); |
1211 | } |
1212 | |
1213 | static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { |
1214 | .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count, |
1215 | .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, |
1216 | }; |
1217 | |
1218 | static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { |
1219 | .ras_block = { |
1220 | .hw_ops = &jpeg_v4_0_3_ras_hw_ops, |
1221 | }, |
1222 | }; |
1223 | |
1224 | static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) |
1225 | { |
1226 | adev->jpeg.ras = &jpeg_v4_0_3_ras; |
1227 | } |
1228 | |