1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | */ |
22 | |
23 | #include "amdgpu_vm.h" |
24 | #include "amdgpu_job.h" |
25 | #include "amdgpu_object.h" |
26 | #include "amdgpu_trace.h" |
27 | |
28 | #define AMDGPU_VM_SDMA_MIN_NUM_DW 256u |
29 | #define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u) |
30 | |
31 | /** |
32 | * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped |
33 | * |
34 | * @table: newly allocated or validated PD/PT |
35 | */ |
36 | static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) |
37 | { |
38 | int r; |
39 | |
40 | r = amdgpu_ttm_alloc_gart(bo: &table->bo.tbo); |
41 | if (r) |
42 | return r; |
43 | |
44 | if (table->shadow) |
45 | r = amdgpu_ttm_alloc_gart(bo: &table->shadow->tbo); |
46 | |
47 | return r; |
48 | } |
49 | |
50 | /* Allocate a new job for @count PTE updates */ |
51 | static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, |
52 | unsigned int count) |
53 | { |
54 | enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE |
55 | : AMDGPU_IB_POOL_DELAYED; |
56 | struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate |
57 | : &p->vm->delayed; |
58 | unsigned int ndw; |
59 | int r; |
60 | |
61 | /* estimate how many dw we need */ |
62 | ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; |
63 | if (p->pages_addr) |
64 | ndw += count * 2; |
65 | ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); |
66 | |
67 | r = amdgpu_job_alloc_with_ib(adev: p->adev, entity, AMDGPU_FENCE_OWNER_VM, |
68 | size: ndw * 4, pool_type: pool, job: &p->job); |
69 | if (r) |
70 | return r; |
71 | |
72 | p->num_dw_left = ndw; |
73 | return 0; |
74 | } |
75 | |
76 | /** |
77 | * amdgpu_vm_sdma_prepare - prepare SDMA command submission |
78 | * |
79 | * @p: see amdgpu_vm_update_params definition |
80 | * @resv: reservation object with embedded fence |
81 | * @sync_mode: synchronization mode |
82 | * |
83 | * Returns: |
84 | * Negativ errno, 0 for success. |
85 | */ |
86 | static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, |
87 | struct dma_resv *resv, |
88 | enum amdgpu_sync_mode sync_mode) |
89 | { |
90 | struct amdgpu_sync sync; |
91 | int r; |
92 | |
93 | r = amdgpu_vm_sdma_alloc_job(p, count: 0); |
94 | if (r) |
95 | return r; |
96 | |
97 | if (!resv) |
98 | return 0; |
99 | |
100 | amdgpu_sync_create(sync: &sync); |
101 | r = amdgpu_sync_resv(adev: p->adev, sync: &sync, resv, mode: sync_mode, owner: p->vm); |
102 | if (!r) |
103 | r = amdgpu_sync_push_to_job(sync: &sync, job: p->job); |
104 | amdgpu_sync_free(sync: &sync); |
105 | return r; |
106 | } |
107 | |
108 | /** |
109 | * amdgpu_vm_sdma_commit - commit SDMA command submission |
110 | * |
111 | * @p: see amdgpu_vm_update_params definition |
112 | * @fence: resulting fence |
113 | * |
114 | * Returns: |
115 | * Negativ errno, 0 for success. |
116 | */ |
117 | static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, |
118 | struct dma_fence **fence) |
119 | { |
120 | struct amdgpu_ib *ib = p->job->ibs; |
121 | struct amdgpu_ring *ring; |
122 | struct dma_fence *f; |
123 | |
124 | ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, |
125 | sched); |
126 | |
127 | WARN_ON(ib->length_dw == 0); |
128 | amdgpu_ring_pad_ib(ring, ib); |
129 | WARN_ON(ib->length_dw > p->num_dw_left); |
130 | f = amdgpu_job_submit(job: p->job); |
131 | |
132 | if (p->unlocked) { |
133 | struct dma_fence *tmp = dma_fence_get(fence: f); |
134 | |
135 | swap(p->vm->last_unlocked, tmp); |
136 | dma_fence_put(fence: tmp); |
137 | } else { |
138 | dma_resv_add_fence(obj: p->vm->root.bo->tbo.base.resv, fence: f, |
139 | usage: DMA_RESV_USAGE_BOOKKEEP); |
140 | } |
141 | |
142 | if (fence && !p->immediate) { |
143 | /* |
144 | * Most hw generations now have a separate queue for page table |
145 | * updates, but when the queue is shared with userspace we need |
146 | * the extra CPU round trip to correctly flush the TLB. |
147 | */ |
148 | set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, addr: &f->flags); |
149 | swap(*fence, f); |
150 | } |
151 | dma_fence_put(fence: f); |
152 | return 0; |
153 | } |
154 | |
155 | /** |
156 | * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping |
157 | * |
158 | * @p: see amdgpu_vm_update_params definition |
159 | * @bo: PD/PT to update |
160 | * @pe: addr of the page entry |
161 | * @count: number of page entries to copy |
162 | * |
163 | * Traces the parameters and calls the DMA function to copy the PTEs. |
164 | */ |
165 | static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, |
166 | struct amdgpu_bo *bo, uint64_t pe, |
167 | unsigned count) |
168 | { |
169 | struct amdgpu_ib *ib = p->job->ibs; |
170 | uint64_t src = ib->gpu_addr; |
171 | |
172 | src += p->num_dw_left * 4; |
173 | |
174 | pe += amdgpu_bo_gpu_offset_no_check(bo); |
175 | trace_amdgpu_vm_copy_ptes(pe, src, count, immediate: p->immediate); |
176 | |
177 | amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); |
178 | } |
179 | |
180 | /** |
181 | * amdgpu_vm_sdma_set_ptes - helper to call the right asic function |
182 | * |
183 | * @p: see amdgpu_vm_update_params definition |
184 | * @bo: PD/PT to update |
185 | * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB |
186 | * @addr: dst addr to write into pe |
187 | * @count: number of page entries to update |
188 | * @incr: increase next addr by incr bytes |
189 | * @flags: hw access flags |
190 | * |
191 | * Traces the parameters and calls the right asic functions |
192 | * to setup the page table using the DMA. |
193 | */ |
194 | static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, |
195 | struct amdgpu_bo *bo, uint64_t pe, |
196 | uint64_t addr, unsigned count, |
197 | uint32_t incr, uint64_t flags) |
198 | { |
199 | struct amdgpu_ib *ib = p->job->ibs; |
200 | |
201 | pe += amdgpu_bo_gpu_offset_no_check(bo); |
202 | trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, immediate: p->immediate); |
203 | if (count < 3) { |
204 | amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, |
205 | count, incr); |
206 | } else { |
207 | amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr, |
208 | count, incr, flags); |
209 | } |
210 | } |
211 | |
212 | /** |
213 | * amdgpu_vm_sdma_update - execute VM update |
214 | * |
215 | * @p: see amdgpu_vm_update_params definition |
216 | * @vmbo: PD/PT to update |
217 | * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB |
218 | * @addr: dst addr to write into pe |
219 | * @count: number of page entries to update |
220 | * @incr: increase next addr by incr bytes |
221 | * @flags: hw access flags |
222 | * |
223 | * Reserve space in the IB, setup mapping buffer on demand and write commands to |
224 | * the IB. |
225 | */ |
226 | static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, |
227 | struct amdgpu_bo_vm *vmbo, uint64_t pe, |
228 | uint64_t addr, unsigned count, uint32_t incr, |
229 | uint64_t flags) |
230 | { |
231 | struct amdgpu_bo *bo = &vmbo->bo; |
232 | struct dma_resv_iter cursor; |
233 | unsigned int i, ndw, nptes; |
234 | struct dma_fence *fence; |
235 | uint64_t *pte; |
236 | int r; |
237 | |
238 | /* Wait for PD/PT moves to be completed */ |
239 | dma_resv_iter_begin(cursor: &cursor, obj: bo->tbo.base.resv, usage: DMA_RESV_USAGE_KERNEL); |
240 | dma_resv_for_each_fence_unlocked(&cursor, fence) { |
241 | dma_fence_get(fence); |
242 | r = drm_sched_job_add_dependency(job: &p->job->base, fence); |
243 | if (r) { |
244 | dma_fence_put(fence); |
245 | dma_resv_iter_end(cursor: &cursor); |
246 | return r; |
247 | } |
248 | } |
249 | dma_resv_iter_end(cursor: &cursor); |
250 | |
251 | do { |
252 | ndw = p->num_dw_left; |
253 | ndw -= p->job->ibs->length_dw; |
254 | |
255 | if (ndw < 32) { |
256 | r = amdgpu_vm_sdma_commit(p, NULL); |
257 | if (r) |
258 | return r; |
259 | |
260 | r = amdgpu_vm_sdma_alloc_job(p, count); |
261 | if (r) |
262 | return r; |
263 | } |
264 | |
265 | if (!p->pages_addr) { |
266 | /* set page commands needed */ |
267 | if (vmbo->shadow) |
268 | amdgpu_vm_sdma_set_ptes(p, bo: vmbo->shadow, pe, addr, |
269 | count, incr, flags); |
270 | amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, |
271 | incr, flags); |
272 | return 0; |
273 | } |
274 | |
275 | /* copy commands needed */ |
276 | ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * |
277 | (vmbo->shadow ? 2 : 1); |
278 | |
279 | /* for padding */ |
280 | ndw -= 7; |
281 | |
282 | nptes = min(count, ndw / 2); |
283 | |
284 | /* Put the PTEs at the end of the IB. */ |
285 | p->num_dw_left -= nptes * 2; |
286 | pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]); |
287 | for (i = 0; i < nptes; ++i, addr += incr) { |
288 | pte[i] = amdgpu_vm_map_gart(pages_addr: p->pages_addr, addr); |
289 | pte[i] |= flags; |
290 | } |
291 | |
292 | if (vmbo->shadow) |
293 | amdgpu_vm_sdma_copy_ptes(p, bo: vmbo->shadow, pe, count: nptes); |
294 | amdgpu_vm_sdma_copy_ptes(p, bo, pe, count: nptes); |
295 | |
296 | pe += nptes * 8; |
297 | count -= nptes; |
298 | } while (count); |
299 | |
300 | return 0; |
301 | } |
302 | |
303 | const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { |
304 | .map_table = amdgpu_vm_sdma_map_table, |
305 | .prepare = amdgpu_vm_sdma_prepare, |
306 | .update = amdgpu_vm_sdma_update, |
307 | .commit = amdgpu_vm_sdma_commit |
308 | }; |
309 | |