1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | |
27 | #include "amdgpu.h" |
28 | #include "amdgpu_jpeg.h" |
29 | #include "amdgpu_pm.h" |
30 | #include "soc15d.h" |
31 | #include "soc15_common.h" |
32 | |
33 | #define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000) |
34 | |
35 | static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); |
36 | |
37 | int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) |
38 | { |
39 | int i, r; |
40 | |
41 | INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); |
42 | mutex_init(&adev->jpeg.jpeg_pg_lock); |
43 | atomic_set(v: &adev->jpeg.total_submission_cnt, i: 0); |
44 | |
45 | if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && |
46 | (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)) |
47 | adev->jpeg.indirect_sram = true; |
48 | |
49 | for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { |
50 | if (adev->jpeg.harvest_config & (1 << i)) |
51 | continue; |
52 | |
53 | if (adev->jpeg.indirect_sram) { |
54 | r = amdgpu_bo_create_kernel(adev, size: 64 * 2 * 4, PAGE_SIZE, |
55 | AMDGPU_GEM_DOMAIN_VRAM | |
56 | AMDGPU_GEM_DOMAIN_GTT, |
57 | bo_ptr: &adev->jpeg.inst[i].dpg_sram_bo, |
58 | gpu_addr: &adev->jpeg.inst[i].dpg_sram_gpu_addr, |
59 | cpu_addr: &adev->jpeg.inst[i].dpg_sram_cpu_addr); |
60 | if (r) { |
61 | dev_err(adev->dev, |
62 | "JPEG %d (%d) failed to allocate DPG bo\n" , i, r); |
63 | return r; |
64 | } |
65 | } |
66 | } |
67 | |
68 | return 0; |
69 | } |
70 | |
71 | int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) |
72 | { |
73 | int i, j; |
74 | |
75 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
76 | if (adev->jpeg.harvest_config & (1 << i)) |
77 | continue; |
78 | |
79 | amdgpu_bo_free_kernel( |
80 | bo: &adev->jpeg.inst[i].dpg_sram_bo, |
81 | gpu_addr: &adev->jpeg.inst[i].dpg_sram_gpu_addr, |
82 | cpu_addr: (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr); |
83 | |
84 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) |
85 | amdgpu_ring_fini(ring: &adev->jpeg.inst[i].ring_dec[j]); |
86 | } |
87 | |
88 | mutex_destroy(lock: &adev->jpeg.jpeg_pg_lock); |
89 | |
90 | return 0; |
91 | } |
92 | |
93 | int amdgpu_jpeg_suspend(struct amdgpu_device *adev) |
94 | { |
95 | cancel_delayed_work_sync(dwork: &adev->jpeg.idle_work); |
96 | |
97 | return 0; |
98 | } |
99 | |
100 | int amdgpu_jpeg_resume(struct amdgpu_device *adev) |
101 | { |
102 | return 0; |
103 | } |
104 | |
105 | static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) |
106 | { |
107 | struct amdgpu_device *adev = |
108 | container_of(work, struct amdgpu_device, jpeg.idle_work.work); |
109 | unsigned int fences = 0; |
110 | unsigned int i, j; |
111 | |
112 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
113 | if (adev->jpeg.harvest_config & (1 << i)) |
114 | continue; |
115 | |
116 | for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) |
117 | fences += amdgpu_fence_count_emitted(ring: &adev->jpeg.inst[i].ring_dec[j]); |
118 | } |
119 | |
120 | if (!fences && !atomic_read(v: &adev->jpeg.total_submission_cnt)) |
121 | amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_JPEG, |
122 | state: AMD_PG_STATE_GATE); |
123 | else |
124 | schedule_delayed_work(dwork: &adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); |
125 | } |
126 | |
127 | void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring) |
128 | { |
129 | struct amdgpu_device *adev = ring->adev; |
130 | |
131 | atomic_inc(v: &adev->jpeg.total_submission_cnt); |
132 | cancel_delayed_work_sync(dwork: &adev->jpeg.idle_work); |
133 | |
134 | mutex_lock(&adev->jpeg.jpeg_pg_lock); |
135 | amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_JPEG, |
136 | state: AMD_PG_STATE_UNGATE); |
137 | mutex_unlock(lock: &adev->jpeg.jpeg_pg_lock); |
138 | } |
139 | |
140 | void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring) |
141 | { |
142 | atomic_dec(v: &ring->adev->jpeg.total_submission_cnt); |
143 | schedule_delayed_work(dwork: &ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); |
144 | } |
145 | |
146 | int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) |
147 | { |
148 | struct amdgpu_device *adev = ring->adev; |
149 | uint32_t tmp = 0; |
150 | unsigned i; |
151 | int r; |
152 | |
153 | /* JPEG in SRIOV does not support direct register read/write */ |
154 | if (amdgpu_sriov_vf(adev)) |
155 | return 0; |
156 | |
157 | r = amdgpu_ring_alloc(ring, ndw: 3); |
158 | if (r) |
159 | return r; |
160 | |
161 | WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD); |
162 | /* Add a read register to make sure the write register is executed. */ |
163 | RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); |
164 | |
165 | amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0)); |
166 | amdgpu_ring_write(ring, v: 0xABADCAFE); |
167 | amdgpu_ring_commit(ring); |
168 | |
169 | for (i = 0; i < adev->usec_timeout; i++) { |
170 | tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); |
171 | if (tmp == 0xABADCAFE) |
172 | break; |
173 | udelay(1); |
174 | } |
175 | |
176 | if (i >= adev->usec_timeout) |
177 | r = -ETIMEDOUT; |
178 | |
179 | return r; |
180 | } |
181 | |
182 | static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, |
183 | struct dma_fence **fence) |
184 | { |
185 | struct amdgpu_device *adev = ring->adev; |
186 | struct amdgpu_job *job; |
187 | struct amdgpu_ib *ib; |
188 | struct dma_fence *f = NULL; |
189 | const unsigned ib_size_dw = 16; |
190 | int i, r; |
191 | |
192 | r = amdgpu_job_alloc_with_ib(adev: ring->adev, NULL, NULL, size: ib_size_dw * 4, |
193 | pool_type: AMDGPU_IB_POOL_DIRECT, job: &job); |
194 | if (r) |
195 | return r; |
196 | |
197 | ib = &job->ibs[0]; |
198 | |
199 | ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0); |
200 | ib->ptr[1] = 0xDEADBEEF; |
201 | for (i = 2; i < 16; i += 2) { |
202 | ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); |
203 | ib->ptr[i+1] = 0; |
204 | } |
205 | ib->length_dw = 16; |
206 | |
207 | r = amdgpu_job_submit_direct(job, ring, fence: &f); |
208 | if (r) |
209 | goto err; |
210 | |
211 | if (fence) |
212 | *fence = dma_fence_get(fence: f); |
213 | dma_fence_put(fence: f); |
214 | |
215 | return 0; |
216 | |
217 | err: |
218 | amdgpu_job_free(job); |
219 | return r; |
220 | } |
221 | |
222 | int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
223 | { |
224 | struct amdgpu_device *adev = ring->adev; |
225 | uint32_t tmp = 0; |
226 | unsigned i; |
227 | struct dma_fence *fence = NULL; |
228 | long r = 0; |
229 | |
230 | r = amdgpu_jpeg_dec_set_reg(ring, handle: 1, fence: &fence); |
231 | if (r) |
232 | goto error; |
233 | |
234 | r = dma_fence_wait_timeout(fence, intr: false, timeout); |
235 | if (r == 0) { |
236 | r = -ETIMEDOUT; |
237 | goto error; |
238 | } else if (r < 0) { |
239 | goto error; |
240 | } else { |
241 | r = 0; |
242 | } |
243 | |
244 | if (!amdgpu_sriov_vf(adev)) { |
245 | for (i = 0; i < adev->usec_timeout; i++) { |
246 | tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); |
247 | if (tmp == 0xDEADBEEF) |
248 | break; |
249 | udelay(1); |
250 | if (amdgpu_emu_mode == 1) |
251 | udelay(10); |
252 | } |
253 | |
254 | if (i >= adev->usec_timeout) |
255 | r = -ETIMEDOUT; |
256 | } |
257 | |
258 | dma_fence_put(fence); |
259 | error: |
260 | return r; |
261 | } |
262 | |
263 | int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, |
264 | struct amdgpu_irq_src *source, |
265 | struct amdgpu_iv_entry *entry) |
266 | { |
267 | struct ras_common_if *ras_if = adev->jpeg.ras_if; |
268 | struct ras_dispatch_if ih_data = { |
269 | .entry = entry, |
270 | }; |
271 | |
272 | if (!ras_if) |
273 | return 0; |
274 | |
275 | ih_data.head = *ras_if; |
276 | amdgpu_ras_interrupt_dispatch(adev, info: &ih_data); |
277 | |
278 | return 0; |
279 | } |
280 | |
281 | int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) |
282 | { |
283 | int r, i; |
284 | |
285 | r = amdgpu_ras_block_late_init(adev, ras_block); |
286 | if (r) |
287 | return r; |
288 | |
289 | if (amdgpu_ras_is_supported(adev, block: ras_block->block)) { |
290 | for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { |
291 | if (adev->jpeg.harvest_config & (1 << i) || |
292 | !adev->jpeg.inst[i].ras_poison_irq.funcs) |
293 | continue; |
294 | |
295 | r = amdgpu_irq_get(adev, src: &adev->jpeg.inst[i].ras_poison_irq, type: 0); |
296 | if (r) |
297 | goto late_fini; |
298 | } |
299 | } |
300 | return 0; |
301 | |
302 | late_fini: |
303 | amdgpu_ras_block_late_fini(adev, ras_block); |
304 | return r; |
305 | } |
306 | |
307 | int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) |
308 | { |
309 | int err; |
310 | struct amdgpu_jpeg_ras *ras; |
311 | |
312 | if (!adev->jpeg.ras) |
313 | return 0; |
314 | |
315 | ras = adev->jpeg.ras; |
316 | err = amdgpu_ras_register_ras_block(adev, ras_block_obj: &ras->ras_block); |
317 | if (err) { |
318 | dev_err(adev->dev, "Failed to register jpeg ras block!\n" ); |
319 | return err; |
320 | } |
321 | |
322 | strcpy(p: ras->ras_block.ras_comm.name, q: "jpeg" ); |
323 | ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; |
324 | ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; |
325 | adev->jpeg.ras_if = &ras->ras_block.ras_comm; |
326 | |
327 | if (!ras->ras_block.ras_late_init) |
328 | ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init; |
329 | |
330 | return 0; |
331 | } |
332 | |
333 | int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, |
334 | enum AMDGPU_UCODE_ID ucode_id) |
335 | { |
336 | struct amdgpu_firmware_info ucode = { |
337 | .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM, |
338 | .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr, |
339 | .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr - |
340 | (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr), |
341 | }; |
342 | |
343 | return psp_execute_ip_fw_load(psp: &adev->psp, ucode: &ucode); |
344 | } |
345 | |