1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | |
24 | #include "amdgpu.h" |
25 | #include "amdgpu_jpeg.h" |
26 | #include "soc15.h" |
27 | #include "soc15d.h" |
28 | #include "vcn_v1_0.h" |
29 | #include "jpeg_v1_0.h" |
30 | |
31 | #include "vcn/vcn_1_0_offset.h" |
32 | #include "vcn/vcn_1_0_sh_mask.h" |
33 | |
34 | static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); |
35 | static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); |
36 | static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring); |
37 | |
38 | static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) |
39 | { |
40 | struct amdgpu_device *adev = ring->adev; |
41 | ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); |
42 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
43 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
44 | ring->ring[(*ptr)++] = 0; |
45 | ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); |
46 | } else { |
47 | ring->ring[(*ptr)++] = reg_offset; |
48 | ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); |
49 | } |
50 | ring->ring[(*ptr)++] = val; |
51 | } |
52 | |
53 | static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) |
54 | { |
55 | struct amdgpu_device *adev = ring->adev; |
56 | |
57 | uint32_t reg, reg_offset, val, mask, i; |
58 | |
59 | // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW |
60 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); |
61 | reg_offset = (reg << 2); |
62 | val = lower_32_bits(ring->gpu_addr); |
63 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
64 | |
65 | // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH |
66 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); |
67 | reg_offset = (reg << 2); |
68 | val = upper_32_bits(ring->gpu_addr); |
69 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
70 | |
71 | // 3rd to 5th: issue MEM_READ commands |
72 | for (i = 0; i <= 2; i++) { |
73 | ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); |
74 | ring->ring[ptr++] = 0; |
75 | } |
76 | |
77 | // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability |
78 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); |
79 | reg_offset = (reg << 2); |
80 | val = 0x13; |
81 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
82 | |
83 | // 7th: program mmUVD_JRBC_RB_REF_DATA |
84 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA); |
85 | reg_offset = (reg << 2); |
86 | val = 0x1; |
87 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
88 | |
89 | // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL |
90 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); |
91 | reg_offset = (reg << 2); |
92 | val = 0x1; |
93 | mask = 0x1; |
94 | |
95 | ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); |
96 | ring->ring[ptr++] = 0x01400200; |
97 | ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); |
98 | ring->ring[ptr++] = val; |
99 | ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); |
100 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
101 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
102 | ring->ring[ptr++] = 0; |
103 | ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); |
104 | } else { |
105 | ring->ring[ptr++] = reg_offset; |
106 | ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); |
107 | } |
108 | ring->ring[ptr++] = mask; |
109 | |
110 | //9th to 21st: insert no-op |
111 | for (i = 0; i <= 12; i++) { |
112 | ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); |
113 | ring->ring[ptr++] = 0; |
114 | } |
115 | |
116 | //22nd: reset mmUVD_JRBC_RB_RPTR |
117 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR); |
118 | reg_offset = (reg << 2); |
119 | val = 0; |
120 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
121 | |
122 | //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch |
123 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); |
124 | reg_offset = (reg << 2); |
125 | val = 0x12; |
126 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
127 | } |
128 | |
129 | /** |
130 | * jpeg_v1_0_decode_ring_get_rptr - get read pointer |
131 | * |
132 | * @ring: amdgpu_ring pointer |
133 | * |
134 | * Returns the current hardware read pointer |
135 | */ |
136 | static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring) |
137 | { |
138 | struct amdgpu_device *adev = ring->adev; |
139 | |
140 | return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); |
141 | } |
142 | |
143 | /** |
144 | * jpeg_v1_0_decode_ring_get_wptr - get write pointer |
145 | * |
146 | * @ring: amdgpu_ring pointer |
147 | * |
148 | * Returns the current hardware write pointer |
149 | */ |
150 | static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring) |
151 | { |
152 | struct amdgpu_device *adev = ring->adev; |
153 | |
154 | return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); |
155 | } |
156 | |
157 | /** |
158 | * jpeg_v1_0_decode_ring_set_wptr - set write pointer |
159 | * |
160 | * @ring: amdgpu_ring pointer |
161 | * |
162 | * Commits the write pointer to the hardware |
163 | */ |
164 | static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring) |
165 | { |
166 | struct amdgpu_device *adev = ring->adev; |
167 | |
168 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); |
169 | } |
170 | |
171 | /** |
172 | * jpeg_v1_0_decode_ring_insert_start - insert a start command |
173 | * |
174 | * @ring: amdgpu_ring pointer |
175 | * |
176 | * Write a start command to the ring. |
177 | */ |
178 | static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring) |
179 | { |
180 | struct amdgpu_device *adev = ring->adev; |
181 | |
182 | amdgpu_ring_write(ring, |
183 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
184 | amdgpu_ring_write(ring, v: 0x68e04); |
185 | |
186 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
187 | amdgpu_ring_write(ring, v: 0x80010000); |
188 | } |
189 | |
190 | /** |
191 | * jpeg_v1_0_decode_ring_insert_end - insert a end command |
192 | * |
193 | * @ring: amdgpu_ring pointer |
194 | * |
195 | * Write a end command to the ring. |
196 | */ |
197 | static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring) |
198 | { |
199 | struct amdgpu_device *adev = ring->adev; |
200 | |
201 | amdgpu_ring_write(ring, |
202 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
203 | amdgpu_ring_write(ring, v: 0x68e04); |
204 | |
205 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
206 | amdgpu_ring_write(ring, v: 0x00010000); |
207 | } |
208 | |
209 | /** |
210 | * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command |
211 | * |
212 | * @ring: amdgpu_ring pointer |
213 | * @addr: address |
214 | * @seq: sequence number |
215 | * @flags: fence related flags |
216 | * |
217 | * Write a fence and a trap command to the ring. |
218 | */ |
219 | static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, |
220 | unsigned flags) |
221 | { |
222 | struct amdgpu_device *adev = ring->adev; |
223 | |
224 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
225 | |
226 | amdgpu_ring_write(ring, |
227 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); |
228 | amdgpu_ring_write(ring, v: seq); |
229 | |
230 | amdgpu_ring_write(ring, |
231 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); |
232 | amdgpu_ring_write(ring, v: seq); |
233 | |
234 | amdgpu_ring_write(ring, |
235 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
236 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
237 | |
238 | amdgpu_ring_write(ring, |
239 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
240 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
241 | |
242 | amdgpu_ring_write(ring, |
243 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); |
244 | amdgpu_ring_write(ring, v: 0x8); |
245 | |
246 | amdgpu_ring_write(ring, |
247 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); |
248 | amdgpu_ring_write(ring, v: 0); |
249 | |
250 | amdgpu_ring_write(ring, |
251 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); |
252 | amdgpu_ring_write(ring, v: 0x01400200); |
253 | |
254 | amdgpu_ring_write(ring, |
255 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); |
256 | amdgpu_ring_write(ring, v: seq); |
257 | |
258 | amdgpu_ring_write(ring, |
259 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
260 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
261 | |
262 | amdgpu_ring_write(ring, |
263 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
264 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
265 | |
266 | amdgpu_ring_write(ring, |
267 | PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); |
268 | amdgpu_ring_write(ring, v: 0xffffffff); |
269 | |
270 | amdgpu_ring_write(ring, |
271 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
272 | amdgpu_ring_write(ring, v: 0x3fbc); |
273 | |
274 | amdgpu_ring_write(ring, |
275 | PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
276 | amdgpu_ring_write(ring, v: 0x1); |
277 | |
278 | /* emit trap */ |
279 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); |
280 | amdgpu_ring_write(ring, v: 0); |
281 | } |
282 | |
283 | /** |
284 | * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer |
285 | * |
286 | * @ring: amdgpu_ring pointer |
287 | * @job: job to retrieve vmid from |
288 | * @ib: indirect buffer to execute |
289 | * @flags: unused |
290 | * |
291 | * Write ring commands to execute the indirect buffer. |
292 | */ |
293 | static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, |
294 | struct amdgpu_job *job, |
295 | struct amdgpu_ib *ib, |
296 | uint32_t flags) |
297 | { |
298 | struct amdgpu_device *adev = ring->adev; |
299 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
300 | |
301 | amdgpu_ring_write(ring, |
302 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); |
303 | amdgpu_ring_write(ring, v: (vmid | (vmid << 4))); |
304 | |
305 | amdgpu_ring_write(ring, |
306 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); |
307 | amdgpu_ring_write(ring, v: (vmid | (vmid << 4))); |
308 | |
309 | amdgpu_ring_write(ring, |
310 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
311 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
312 | |
313 | amdgpu_ring_write(ring, |
314 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
315 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
316 | |
317 | amdgpu_ring_write(ring, |
318 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); |
319 | amdgpu_ring_write(ring, v: ib->length_dw); |
320 | |
321 | amdgpu_ring_write(ring, |
322 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
323 | amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); |
324 | |
325 | amdgpu_ring_write(ring, |
326 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
327 | amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); |
328 | |
329 | amdgpu_ring_write(ring, |
330 | PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); |
331 | amdgpu_ring_write(ring, v: 0); |
332 | |
333 | amdgpu_ring_write(ring, |
334 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); |
335 | amdgpu_ring_write(ring, v: 0x01400200); |
336 | |
337 | amdgpu_ring_write(ring, |
338 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); |
339 | amdgpu_ring_write(ring, v: 0x2); |
340 | |
341 | amdgpu_ring_write(ring, |
342 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); |
343 | amdgpu_ring_write(ring, v: 0x2); |
344 | } |
345 | |
346 | static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring, |
347 | uint32_t reg, uint32_t val, |
348 | uint32_t mask) |
349 | { |
350 | struct amdgpu_device *adev = ring->adev; |
351 | uint32_t reg_offset = (reg << 2); |
352 | |
353 | amdgpu_ring_write(ring, |
354 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); |
355 | amdgpu_ring_write(ring, v: 0x01400200); |
356 | |
357 | amdgpu_ring_write(ring, |
358 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); |
359 | amdgpu_ring_write(ring, v: val); |
360 | |
361 | amdgpu_ring_write(ring, |
362 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
363 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
364 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
365 | amdgpu_ring_write(ring, v: 0); |
366 | amdgpu_ring_write(ring, |
367 | PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); |
368 | } else { |
369 | amdgpu_ring_write(ring, v: reg_offset); |
370 | amdgpu_ring_write(ring, |
371 | PACKETJ(0, 0, 0, PACKETJ_TYPE3)); |
372 | } |
373 | amdgpu_ring_write(ring, v: mask); |
374 | } |
375 | |
376 | static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, |
377 | unsigned vmid, uint64_t pd_addr) |
378 | { |
379 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; |
380 | uint32_t data0, data1, mask; |
381 | |
382 | pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
383 | |
384 | /* wait for register write */ |
385 | data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; |
386 | data1 = lower_32_bits(pd_addr); |
387 | mask = 0xffffffff; |
388 | jpeg_v1_0_decode_ring_emit_reg_wait(ring, reg: data0, val: data1, mask); |
389 | } |
390 | |
391 | static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring, |
392 | uint32_t reg, uint32_t val) |
393 | { |
394 | struct amdgpu_device *adev = ring->adev; |
395 | uint32_t reg_offset = (reg << 2); |
396 | |
397 | amdgpu_ring_write(ring, |
398 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
399 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
400 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
401 | amdgpu_ring_write(ring, v: 0); |
402 | amdgpu_ring_write(ring, |
403 | PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); |
404 | } else { |
405 | amdgpu_ring_write(ring, v: reg_offset); |
406 | amdgpu_ring_write(ring, |
407 | PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
408 | } |
409 | amdgpu_ring_write(ring, v: val); |
410 | } |
411 | |
412 | static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count) |
413 | { |
414 | int i; |
415 | |
416 | WARN_ON(ring->wptr % 2 || count % 2); |
417 | |
418 | for (i = 0; i < count / 2; i++) { |
419 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); |
420 | amdgpu_ring_write(ring, v: 0); |
421 | } |
422 | } |
423 | |
424 | static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev, |
425 | struct amdgpu_irq_src *source, |
426 | unsigned type, |
427 | enum amdgpu_interrupt_state state) |
428 | { |
429 | return 0; |
430 | } |
431 | |
432 | static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, |
433 | struct amdgpu_irq_src *source, |
434 | struct amdgpu_iv_entry *entry) |
435 | { |
436 | DRM_DEBUG("IH: JPEG decode TRAP\n" ); |
437 | |
438 | switch (entry->src_id) { |
439 | case 126: |
440 | amdgpu_fence_process(ring: adev->jpeg.inst->ring_dec); |
441 | break; |
442 | default: |
443 | DRM_ERROR("Unhandled interrupt: %d %d\n" , |
444 | entry->src_id, entry->src_data[0]); |
445 | break; |
446 | } |
447 | |
448 | return 0; |
449 | } |
450 | |
451 | /** |
452 | * jpeg_v1_0_early_init - set function pointers |
453 | * |
454 | * @handle: amdgpu_device pointer |
455 | * |
456 | * Set ring and irq function pointers |
457 | */ |
458 | int jpeg_v1_0_early_init(void *handle) |
459 | { |
460 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
461 | |
462 | adev->jpeg.num_jpeg_inst = 1; |
463 | adev->jpeg.num_jpeg_rings = 1; |
464 | |
465 | jpeg_v1_0_set_dec_ring_funcs(adev); |
466 | jpeg_v1_0_set_irq_funcs(adev); |
467 | |
468 | return 0; |
469 | } |
470 | |
471 | /** |
472 | * jpeg_v1_0_sw_init - sw init for JPEG block |
473 | * |
474 | * @handle: amdgpu_device pointer |
475 | * |
476 | */ |
477 | int jpeg_v1_0_sw_init(void *handle) |
478 | { |
479 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
480 | struct amdgpu_ring *ring; |
481 | int r; |
482 | |
483 | /* JPEG TRAP */ |
484 | r = amdgpu_irq_add_id(adev, client_id: SOC15_IH_CLIENTID_VCN, src_id: 126, source: &adev->jpeg.inst->irq); |
485 | if (r) |
486 | return r; |
487 | |
488 | ring = adev->jpeg.inst->ring_dec; |
489 | ring->vm_hub = AMDGPU_MMHUB0(0); |
490 | sprintf(buf: ring->name, fmt: "jpeg_dec" ); |
491 | r = amdgpu_ring_init(adev, ring, max_dw: 512, irq_src: &adev->jpeg.inst->irq, |
492 | irq_type: 0, hw_prio: AMDGPU_RING_PRIO_DEFAULT, NULL); |
493 | if (r) |
494 | return r; |
495 | |
496 | adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] = |
497 | SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); |
498 | |
499 | return 0; |
500 | } |
501 | |
502 | /** |
503 | * jpeg_v1_0_sw_fini - sw fini for JPEG block |
504 | * |
505 | * @handle: amdgpu_device pointer |
506 | * |
507 | * JPEG free up sw allocation |
508 | */ |
509 | void jpeg_v1_0_sw_fini(void *handle) |
510 | { |
511 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
512 | |
513 | amdgpu_ring_fini(ring: adev->jpeg.inst->ring_dec); |
514 | } |
515 | |
516 | /** |
517 | * jpeg_v1_0_start - start JPEG block |
518 | * |
519 | * @adev: amdgpu_device pointer |
520 | * @mode: SPG or DPG mode |
521 | * |
522 | * Setup and start the JPEG block |
523 | */ |
524 | void jpeg_v1_0_start(struct amdgpu_device *adev, int mode) |
525 | { |
526 | struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; |
527 | |
528 | if (mode == 0) { |
529 | WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); |
530 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | |
531 | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); |
532 | WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); |
533 | WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); |
534 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); |
535 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); |
536 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); |
537 | } |
538 | |
539 | /* initialize wptr */ |
540 | ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); |
541 | |
542 | /* copy patch commands to the jpeg ring */ |
543 | jpeg_v1_0_decode_ring_set_patch_ring(ring, |
544 | ptr: (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); |
545 | } |
546 | |
547 | static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { |
548 | .type = AMDGPU_RING_TYPE_VCN_JPEG, |
549 | .align_mask = 0xf, |
550 | .nop = PACKET0(0x81ff, 0), |
551 | .support_64bit_ptrs = false, |
552 | .no_user_fence = true, |
553 | .extra_dw = 64, |
554 | .get_rptr = jpeg_v1_0_decode_ring_get_rptr, |
555 | .get_wptr = jpeg_v1_0_decode_ring_get_wptr, |
556 | .set_wptr = jpeg_v1_0_decode_ring_set_wptr, |
557 | .emit_frame_size = |
558 | 6 + 6 + /* hdp invalidate / flush */ |
559 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + |
560 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + |
561 | 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */ |
562 | 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */ |
563 | 6, |
564 | .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */ |
565 | .emit_ib = jpeg_v1_0_decode_ring_emit_ib, |
566 | .emit_fence = jpeg_v1_0_decode_ring_emit_fence, |
567 | .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush, |
568 | .test_ring = amdgpu_jpeg_dec_ring_test_ring, |
569 | .test_ib = amdgpu_jpeg_dec_ring_test_ib, |
570 | .insert_nop = jpeg_v1_0_decode_ring_nop, |
571 | .insert_start = jpeg_v1_0_decode_ring_insert_start, |
572 | .insert_end = jpeg_v1_0_decode_ring_insert_end, |
573 | .pad_ib = amdgpu_ring_generic_pad_ib, |
574 | .begin_use = jpeg_v1_0_ring_begin_use, |
575 | .end_use = vcn_v1_0_ring_end_use, |
576 | .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg, |
577 | .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait, |
578 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
579 | }; |
580 | |
581 | static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) |
582 | { |
583 | adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs; |
584 | DRM_INFO("JPEG decode is enabled in VM mode\n" ); |
585 | } |
586 | |
587 | static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = { |
588 | .set = jpeg_v1_0_set_interrupt_state, |
589 | .process = jpeg_v1_0_process_interrupt, |
590 | }; |
591 | |
592 | static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) |
593 | { |
594 | adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs; |
595 | } |
596 | |
597 | static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) |
598 | { |
599 | struct amdgpu_device *adev = ring->adev; |
600 | bool set_clocks = !cancel_delayed_work_sync(dwork: &adev->vcn.idle_work); |
601 | int cnt = 0; |
602 | |
603 | mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); |
604 | |
605 | if (amdgpu_fence_wait_empty(ring: &adev->vcn.inst->ring_dec)) |
606 | DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n" ); |
607 | |
608 | for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) { |
609 | if (amdgpu_fence_wait_empty(ring: &adev->vcn.inst->ring_enc[cnt])) |
610 | DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n" , cnt); |
611 | } |
612 | |
613 | vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); |
614 | } |
615 | |