1 | /* |
2 | * Copyright 2010 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: Alex Deucher |
23 | */ |
24 | |
25 | #include "radeon.h" |
26 | #include "radeon_asic.h" |
27 | #include "radeon_trace.h" |
28 | #include "ni.h" |
29 | #include "nid.h" |
30 | |
31 | /* |
32 | * DMA |
33 | * Starting with R600, the GPU has an asynchronous |
34 | * DMA engine. The programming model is very similar |
35 | * to the 3D engine (ring buffer, IBs, etc.), but the |
36 | * DMA controller has it's own packet format that is |
37 | * different form the PM4 format used by the 3D engine. |
38 | * It supports copying data, writing embedded data, |
39 | * solid fills, and a number of other things. It also |
40 | * has support for tiling/detiling of buffers. |
41 | * Cayman and newer support two asynchronous DMA engines. |
42 | */ |
43 | |
44 | /** |
45 | * cayman_dma_get_rptr - get the current read pointer |
46 | * |
47 | * @rdev: radeon_device pointer |
48 | * @ring: radeon ring pointer |
49 | * |
50 | * Get the current rptr from the hardware (cayman+). |
51 | */ |
52 | uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, |
53 | struct radeon_ring *ring) |
54 | { |
55 | u32 rptr, reg; |
56 | |
57 | if (rdev->wb.enabled) { |
58 | rptr = rdev->wb.wb[ring->rptr_offs/4]; |
59 | } else { |
60 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
61 | reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; |
62 | else |
63 | reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; |
64 | |
65 | rptr = RREG32(reg); |
66 | } |
67 | |
68 | return (rptr & 0x3fffc) >> 2; |
69 | } |
70 | |
71 | /** |
72 | * cayman_dma_get_wptr - get the current write pointer |
73 | * |
74 | * @rdev: radeon_device pointer |
75 | * @ring: radeon ring pointer |
76 | * |
77 | * Get the current wptr from the hardware (cayman+). |
78 | */ |
79 | uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, |
80 | struct radeon_ring *ring) |
81 | { |
82 | u32 reg; |
83 | |
84 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
85 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; |
86 | else |
87 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; |
88 | |
89 | return (RREG32(reg) & 0x3fffc) >> 2; |
90 | } |
91 | |
92 | /** |
93 | * cayman_dma_set_wptr - commit the write pointer |
94 | * |
95 | * @rdev: radeon_device pointer |
96 | * @ring: radeon ring pointer |
97 | * |
98 | * Write the wptr back to the hardware (cayman+). |
99 | */ |
100 | void cayman_dma_set_wptr(struct radeon_device *rdev, |
101 | struct radeon_ring *ring) |
102 | { |
103 | u32 reg; |
104 | |
105 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
106 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; |
107 | else |
108 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; |
109 | |
110 | WREG32(reg, (ring->wptr << 2) & 0x3fffc); |
111 | } |
112 | |
113 | /** |
114 | * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine |
115 | * |
116 | * @rdev: radeon_device pointer |
117 | * @ib: IB object to schedule |
118 | * |
119 | * Schedule an IB in the DMA ring (cayman-SI). |
120 | */ |
121 | void cayman_dma_ring_ib_execute(struct radeon_device *rdev, |
122 | struct radeon_ib *ib) |
123 | { |
124 | struct radeon_ring *ring = &rdev->ring[ib->ring]; |
125 | unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; |
126 | |
127 | if (rdev->wb.enabled) { |
128 | u32 next_rptr = ring->wptr + 4; |
129 | while ((next_rptr & 7) != 5) |
130 | next_rptr++; |
131 | next_rptr += 3; |
132 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
133 | radeon_ring_write(ring, v: ring->next_rptr_gpu_addr & 0xfffffffc); |
134 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
135 | radeon_ring_write(ring, v: next_rptr); |
136 | } |
137 | |
138 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
139 | * Pad as necessary with NOPs. |
140 | */ |
141 | while ((ring->wptr & 7) != 5) |
142 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); |
143 | radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); |
144 | radeon_ring_write(ring, v: (ib->gpu_addr & 0xFFFFFFE0)); |
145 | radeon_ring_write(ring, v: (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
146 | |
147 | } |
148 | |
149 | /** |
150 | * cayman_dma_stop - stop the async dma engines |
151 | * |
152 | * @rdev: radeon_device pointer |
153 | * |
154 | * Stop the async dma engines (cayman-SI). |
155 | */ |
156 | void cayman_dma_stop(struct radeon_device *rdev) |
157 | { |
158 | u32 rb_cntl; |
159 | |
160 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
161 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) |
162 | radeon_ttm_set_active_vram_size(rdev, size: rdev->mc.visible_vram_size); |
163 | |
164 | /* dma0 */ |
165 | rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); |
166 | rb_cntl &= ~DMA_RB_ENABLE; |
167 | WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); |
168 | |
169 | /* dma1 */ |
170 | rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); |
171 | rb_cntl &= ~DMA_RB_ENABLE; |
172 | WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); |
173 | |
174 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; |
175 | rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; |
176 | } |
177 | |
178 | /** |
179 | * cayman_dma_resume - setup and start the async dma engines |
180 | * |
181 | * @rdev: radeon_device pointer |
182 | * |
183 | * Set up the DMA ring buffers and enable them. (cayman-SI). |
184 | * Returns 0 for success, error for failure. |
185 | */ |
186 | int cayman_dma_resume(struct radeon_device *rdev) |
187 | { |
188 | struct radeon_ring *ring; |
189 | u32 rb_cntl, dma_cntl, ib_cntl; |
190 | u32 rb_bufsz; |
191 | u32 reg_offset, wb_offset; |
192 | int i, r; |
193 | |
194 | for (i = 0; i < 2; i++) { |
195 | if (i == 0) { |
196 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
197 | reg_offset = DMA0_REGISTER_OFFSET; |
198 | wb_offset = R600_WB_DMA_RPTR_OFFSET; |
199 | } else { |
200 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; |
201 | reg_offset = DMA1_REGISTER_OFFSET; |
202 | wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; |
203 | } |
204 | |
205 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); |
206 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); |
207 | |
208 | /* Set ring buffer size in dwords */ |
209 | rb_bufsz = order_base_2(ring->ring_size / 4); |
210 | rb_cntl = rb_bufsz << 1; |
211 | #ifdef __BIG_ENDIAN |
212 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; |
213 | #endif |
214 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); |
215 | |
216 | /* Initialize the ring buffer's read and write pointers */ |
217 | WREG32(DMA_RB_RPTR + reg_offset, 0); |
218 | WREG32(DMA_RB_WPTR + reg_offset, 0); |
219 | |
220 | /* set the wb address whether it's enabled or not */ |
221 | WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, |
222 | upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); |
223 | WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, |
224 | ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); |
225 | |
226 | if (rdev->wb.enabled) |
227 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; |
228 | |
229 | WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); |
230 | |
231 | /* enable DMA IBs */ |
232 | ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; |
233 | #ifdef __BIG_ENDIAN |
234 | ib_cntl |= DMA_IB_SWAP_ENABLE; |
235 | #endif |
236 | WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); |
237 | |
238 | dma_cntl = RREG32(DMA_CNTL + reg_offset); |
239 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; |
240 | WREG32(DMA_CNTL + reg_offset, dma_cntl); |
241 | |
242 | ring->wptr = 0; |
243 | WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); |
244 | |
245 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); |
246 | |
247 | ring->ready = true; |
248 | |
249 | r = radeon_ring_test(rdev, ring->idx, ring); |
250 | if (r) { |
251 | ring->ready = false; |
252 | return r; |
253 | } |
254 | } |
255 | |
256 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
257 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) |
258 | radeon_ttm_set_active_vram_size(rdev, size: rdev->mc.real_vram_size); |
259 | |
260 | return 0; |
261 | } |
262 | |
263 | /** |
264 | * cayman_dma_fini - tear down the async dma engines |
265 | * |
266 | * @rdev: radeon_device pointer |
267 | * |
268 | * Stop the async dma engines and free the rings (cayman-SI). |
269 | */ |
270 | void cayman_dma_fini(struct radeon_device *rdev) |
271 | { |
272 | cayman_dma_stop(rdev); |
273 | radeon_ring_fini(rdev, cp: &rdev->ring[R600_RING_TYPE_DMA_INDEX]); |
274 | radeon_ring_fini(rdev, cp: &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); |
275 | } |
276 | |
277 | /** |
278 | * cayman_dma_is_lockup - Check if the DMA engine is locked up |
279 | * |
280 | * @rdev: radeon_device pointer |
281 | * @ring: radeon_ring structure holding ring information |
282 | * |
283 | * Check if the async DMA engine is locked up. |
284 | * Returns true if the engine appears to be locked up, false if not. |
285 | */ |
286 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
287 | { |
288 | u32 reset_mask = cayman_gpu_check_soft_reset(rdev); |
289 | u32 mask; |
290 | |
291 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
292 | mask = RADEON_RESET_DMA; |
293 | else |
294 | mask = RADEON_RESET_DMA1; |
295 | |
296 | if (!(reset_mask & mask)) { |
297 | radeon_ring_lockup_update(rdev, ring); |
298 | return false; |
299 | } |
300 | return radeon_ring_test_lockup(rdev, ring); |
301 | } |
302 | |
303 | /** |
304 | * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART |
305 | * |
306 | * @rdev: radeon_device pointer |
307 | * @ib: indirect buffer to fill with commands |
308 | * @pe: addr of the page entry |
309 | * @src: src addr where to copy from |
310 | * @count: number of page entries to update |
311 | * |
312 | * Update PTEs by copying them from the GART using the DMA (cayman/TN). |
313 | */ |
314 | void cayman_dma_vm_copy_pages(struct radeon_device *rdev, |
315 | struct radeon_ib *ib, |
316 | uint64_t pe, uint64_t src, |
317 | unsigned count) |
318 | { |
319 | unsigned ndw; |
320 | |
321 | while (count) { |
322 | ndw = count * 2; |
323 | if (ndw > 0xFFFFE) |
324 | ndw = 0xFFFFE; |
325 | |
326 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, |
327 | 0, 0, ndw); |
328 | ib->ptr[ib->length_dw++] = lower_32_bits(pe); |
329 | ib->ptr[ib->length_dw++] = lower_32_bits(src); |
330 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; |
331 | ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; |
332 | |
333 | pe += ndw * 4; |
334 | src += ndw * 4; |
335 | count -= ndw / 2; |
336 | } |
337 | } |
338 | |
339 | /** |
340 | * cayman_dma_vm_write_pages - update PTEs by writing them manually |
341 | * |
342 | * @rdev: radeon_device pointer |
343 | * @ib: indirect buffer to fill with commands |
344 | * @pe: addr of the page entry |
345 | * @addr: dst addr to write into pe |
346 | * @count: number of page entries to update |
347 | * @incr: increase next addr by incr bytes |
348 | * @flags: hw access flags |
349 | * |
350 | * Update PTEs by writing them manually using the DMA (cayman/TN). |
351 | */ |
352 | void cayman_dma_vm_write_pages(struct radeon_device *rdev, |
353 | struct radeon_ib *ib, |
354 | uint64_t pe, |
355 | uint64_t addr, unsigned count, |
356 | uint32_t incr, uint32_t flags) |
357 | { |
358 | uint64_t value; |
359 | unsigned ndw; |
360 | |
361 | while (count) { |
362 | ndw = count * 2; |
363 | if (ndw > 0xFFFFE) |
364 | ndw = 0xFFFFE; |
365 | |
366 | /* for non-physically contiguous pages (system) */ |
367 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, |
368 | 0, 0, ndw); |
369 | ib->ptr[ib->length_dw++] = pe; |
370 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; |
371 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
372 | if (flags & R600_PTE_SYSTEM) { |
373 | value = radeon_vm_map_gart(rdev, addr); |
374 | } else if (flags & R600_PTE_VALID) { |
375 | value = addr; |
376 | } else { |
377 | value = 0; |
378 | } |
379 | addr += incr; |
380 | value |= flags; |
381 | ib->ptr[ib->length_dw++] = value; |
382 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
383 | } |
384 | } |
385 | } |
386 | |
387 | /** |
388 | * cayman_dma_vm_set_pages - update the page tables using the DMA |
389 | * |
390 | * @rdev: radeon_device pointer |
391 | * @ib: indirect buffer to fill with commands |
392 | * @pe: addr of the page entry |
393 | * @addr: dst addr to write into pe |
394 | * @count: number of page entries to update |
395 | * @incr: increase next addr by incr bytes |
396 | * @flags: hw access flags |
397 | * |
398 | * Update the page tables using the DMA (cayman/TN). |
399 | */ |
400 | void cayman_dma_vm_set_pages(struct radeon_device *rdev, |
401 | struct radeon_ib *ib, |
402 | uint64_t pe, |
403 | uint64_t addr, unsigned count, |
404 | uint32_t incr, uint32_t flags) |
405 | { |
406 | uint64_t value; |
407 | unsigned ndw; |
408 | |
409 | while (count) { |
410 | ndw = count * 2; |
411 | if (ndw > 0xFFFFE) |
412 | ndw = 0xFFFFE; |
413 | |
414 | if (flags & R600_PTE_VALID) |
415 | value = addr; |
416 | else |
417 | value = 0; |
418 | |
419 | /* for physically contiguous pages (vram) */ |
420 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); |
421 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ |
422 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; |
423 | ib->ptr[ib->length_dw++] = flags; /* mask */ |
424 | ib->ptr[ib->length_dw++] = 0; |
425 | ib->ptr[ib->length_dw++] = value; /* value */ |
426 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
427 | ib->ptr[ib->length_dw++] = incr; /* increment size */ |
428 | ib->ptr[ib->length_dw++] = 0; |
429 | |
430 | pe += ndw * 4; |
431 | addr += (ndw / 2) * incr; |
432 | count -= ndw / 2; |
433 | } |
434 | } |
435 | |
436 | /** |
437 | * cayman_dma_vm_pad_ib - pad the IB to the required number of dw |
438 | * |
439 | * @ib: indirect buffer to fill with padding |
440 | * |
441 | */ |
442 | void cayman_dma_vm_pad_ib(struct radeon_ib *ib) |
443 | { |
444 | while (ib->length_dw & 0x7) |
445 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); |
446 | } |
447 | |
448 | void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, |
449 | unsigned vm_id, uint64_t pd_addr) |
450 | { |
451 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
452 | radeon_ring_write(ring, v: (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); |
453 | radeon_ring_write(ring, v: pd_addr >> 12); |
454 | |
455 | /* flush hdp cache */ |
456 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
457 | radeon_ring_write(ring, v: (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); |
458 | radeon_ring_write(ring, v: 1); |
459 | |
460 | /* bits 0-7 are the VM contexts0-7 */ |
461 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
462 | radeon_ring_write(ring, v: (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); |
463 | radeon_ring_write(ring, v: 1 << vm_id); |
464 | |
465 | /* wait for invalidate to complete */ |
466 | radeon_ring_write(ring, DMA_SRBM_READ_PACKET); |
467 | radeon_ring_write(ring, v: (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); |
468 | radeon_ring_write(ring, v: 0); /* mask */ |
469 | radeon_ring_write(ring, v: 0); /* value */ |
470 | } |
471 | |
472 | |