1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright 2014 Advanced Micro Devices, Inc. |
4 | * All Rights Reserved. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the |
8 | * "Software"), to deal in the Software without restriction, including |
9 | * without limitation the rights to use, copy, modify, merge, publish, |
10 | * distribute, sub license, and/or sell copies of the Software, and to |
11 | * permit persons to whom the Software is furnished to do so, subject to |
12 | * the following conditions: |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
18 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
19 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
20 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * The above copyright notice and this permission notice (including the |
23 | * next paragraph) shall be included in all copies or substantial portions |
24 | * of the Software. |
25 | * |
26 | */ |
27 | /* |
28 | * Authors: |
29 | * Christian König <christian.koenig@amd.com> |
30 | */ |
31 | |
32 | #include <linux/dma-fence-chain.h> |
33 | |
34 | #include "amdgpu.h" |
35 | #include "amdgpu_trace.h" |
36 | #include "amdgpu_amdkfd.h" |
37 | |
38 | struct amdgpu_sync_entry { |
39 | struct hlist_node node; |
40 | struct dma_fence *fence; |
41 | }; |
42 | |
43 | static struct kmem_cache *amdgpu_sync_slab; |
44 | |
45 | /** |
46 | * amdgpu_sync_create - zero init sync object |
47 | * |
48 | * @sync: sync object to initialize |
49 | * |
50 | * Just clear the sync object for now. |
51 | */ |
52 | void amdgpu_sync_create(struct amdgpu_sync *sync) |
53 | { |
54 | hash_init(sync->fences); |
55 | } |
56 | |
57 | /** |
58 | * amdgpu_sync_same_dev - test if fence belong to us |
59 | * |
60 | * @adev: amdgpu device to use for the test |
61 | * @f: fence to test |
62 | * |
63 | * Test if the fence was issued by us. |
64 | */ |
65 | static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, |
66 | struct dma_fence *f) |
67 | { |
68 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); |
69 | |
70 | if (s_fence) { |
71 | struct amdgpu_ring *ring; |
72 | |
73 | ring = container_of(s_fence->sched, struct amdgpu_ring, sched); |
74 | return ring->adev == adev; |
75 | } |
76 | |
77 | return false; |
78 | } |
79 | |
80 | /** |
81 | * amdgpu_sync_get_owner - extract the owner of a fence |
82 | * |
83 | * @f: fence get the owner from |
84 | * |
85 | * Extract who originally created the fence. |
86 | */ |
87 | static void *amdgpu_sync_get_owner(struct dma_fence *f) |
88 | { |
89 | struct drm_sched_fence *s_fence; |
90 | struct amdgpu_amdkfd_fence *kfd_fence; |
91 | |
92 | if (!f) |
93 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
94 | |
95 | s_fence = to_drm_sched_fence(f); |
96 | if (s_fence) |
97 | return s_fence->owner; |
98 | |
99 | kfd_fence = to_amdgpu_amdkfd_fence(f); |
100 | if (kfd_fence) |
101 | return AMDGPU_FENCE_OWNER_KFD; |
102 | |
103 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
104 | } |
105 | |
106 | /** |
107 | * amdgpu_sync_keep_later - Keep the later fence |
108 | * |
109 | * @keep: existing fence to test |
110 | * @fence: new fence |
111 | * |
112 | * Either keep the existing fence or the new one, depending which one is later. |
113 | */ |
114 | static void amdgpu_sync_keep_later(struct dma_fence **keep, |
115 | struct dma_fence *fence) |
116 | { |
117 | if (*keep && dma_fence_is_later(f1: *keep, f2: fence)) |
118 | return; |
119 | |
120 | dma_fence_put(fence: *keep); |
121 | *keep = dma_fence_get(fence); |
122 | } |
123 | |
124 | /** |
125 | * amdgpu_sync_add_later - add the fence to the hash |
126 | * |
127 | * @sync: sync object to add the fence to |
128 | * @f: fence to add |
129 | * |
130 | * Tries to add the fence to an existing hash entry. Returns true when an entry |
131 | * was found, false otherwise. |
132 | */ |
133 | static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) |
134 | { |
135 | struct amdgpu_sync_entry *e; |
136 | |
137 | hash_for_each_possible(sync->fences, e, node, f->context) { |
138 | if (unlikely(e->fence->context != f->context)) |
139 | continue; |
140 | |
141 | amdgpu_sync_keep_later(keep: &e->fence, fence: f); |
142 | return true; |
143 | } |
144 | return false; |
145 | } |
146 | |
147 | /** |
148 | * amdgpu_sync_fence - remember to sync to this fence |
149 | * |
150 | * @sync: sync object to add fence to |
151 | * @f: fence to sync to |
152 | * |
153 | * Add the fence to the sync object. |
154 | */ |
155 | int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) |
156 | { |
157 | struct amdgpu_sync_entry *e; |
158 | |
159 | if (!f) |
160 | return 0; |
161 | |
162 | if (amdgpu_sync_add_later(sync, f)) |
163 | return 0; |
164 | |
165 | e = kmem_cache_alloc(cachep: amdgpu_sync_slab, GFP_KERNEL); |
166 | if (!e) |
167 | return -ENOMEM; |
168 | |
169 | hash_add(sync->fences, &e->node, f->context); |
170 | e->fence = dma_fence_get(fence: f); |
171 | return 0; |
172 | } |
173 | |
174 | /* Determine based on the owner and mode if we should sync to a fence or not */ |
175 | static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, |
176 | enum amdgpu_sync_mode mode, |
177 | void *owner, struct dma_fence *f) |
178 | { |
179 | void *fence_owner = amdgpu_sync_get_owner(f); |
180 | |
181 | /* Always sync to moves, no matter what */ |
182 | if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) |
183 | return true; |
184 | |
185 | /* We only want to trigger KFD eviction fences on |
186 | * evict or move jobs. Skip KFD fences otherwise. |
187 | */ |
188 | if (fence_owner == AMDGPU_FENCE_OWNER_KFD && |
189 | owner != AMDGPU_FENCE_OWNER_UNDEFINED) |
190 | return false; |
191 | |
192 | /* Never sync to VM updates either. */ |
193 | if (fence_owner == AMDGPU_FENCE_OWNER_VM && |
194 | owner != AMDGPU_FENCE_OWNER_UNDEFINED && |
195 | owner != AMDGPU_FENCE_OWNER_KFD) |
196 | return false; |
197 | |
198 | /* Ignore fences depending on the sync mode */ |
199 | switch (mode) { |
200 | case AMDGPU_SYNC_ALWAYS: |
201 | return true; |
202 | |
203 | case AMDGPU_SYNC_NE_OWNER: |
204 | if (amdgpu_sync_same_dev(adev, f) && |
205 | fence_owner == owner) |
206 | return false; |
207 | break; |
208 | |
209 | case AMDGPU_SYNC_EQ_OWNER: |
210 | if (amdgpu_sync_same_dev(adev, f) && |
211 | fence_owner != owner) |
212 | return false; |
213 | break; |
214 | |
215 | case AMDGPU_SYNC_EXPLICIT: |
216 | return false; |
217 | } |
218 | |
219 | WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, |
220 | "Adding eviction fence to sync obj" ); |
221 | return true; |
222 | } |
223 | |
224 | /** |
225 | * amdgpu_sync_resv - sync to a reservation object |
226 | * |
227 | * @adev: amdgpu device |
228 | * @sync: sync object to add fences from reservation object to |
229 | * @resv: reservation object with embedded fence |
230 | * @mode: how owner affects which fences we sync to |
231 | * @owner: owner of the planned job submission |
232 | * |
233 | * Sync to the fence |
234 | */ |
235 | int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
236 | struct dma_resv *resv, enum amdgpu_sync_mode mode, |
237 | void *owner) |
238 | { |
239 | struct dma_resv_iter cursor; |
240 | struct dma_fence *f; |
241 | int r; |
242 | |
243 | if (resv == NULL) |
244 | return -EINVAL; |
245 | |
246 | /* TODO: Use DMA_RESV_USAGE_READ here */ |
247 | dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { |
248 | dma_fence_chain_for_each(f, f) { |
249 | struct dma_fence *tmp = dma_fence_chain_contained(fence: f); |
250 | |
251 | if (amdgpu_sync_test_fence(adev, mode, owner, f: tmp)) { |
252 | r = amdgpu_sync_fence(sync, f); |
253 | dma_fence_put(fence: f); |
254 | if (r) |
255 | return r; |
256 | break; |
257 | } |
258 | } |
259 | } |
260 | return 0; |
261 | } |
262 | |
263 | /* Free the entry back to the slab */ |
264 | static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) |
265 | { |
266 | hash_del(node: &e->node); |
267 | dma_fence_put(fence: e->fence); |
268 | kmem_cache_free(s: amdgpu_sync_slab, objp: e); |
269 | } |
270 | |
271 | /** |
272 | * amdgpu_sync_peek_fence - get the next fence not signaled yet |
273 | * |
274 | * @sync: the sync object |
275 | * @ring: optional ring to use for test |
276 | * |
277 | * Returns the next fence not signaled yet without removing it from the sync |
278 | * object. |
279 | */ |
280 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
281 | struct amdgpu_ring *ring) |
282 | { |
283 | struct amdgpu_sync_entry *e; |
284 | struct hlist_node *tmp; |
285 | int i; |
286 | |
287 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
288 | struct dma_fence *f = e->fence; |
289 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); |
290 | |
291 | if (dma_fence_is_signaled(fence: f)) { |
292 | amdgpu_sync_entry_free(e); |
293 | continue; |
294 | } |
295 | if (ring && s_fence) { |
296 | /* For fences from the same ring it is sufficient |
297 | * when they are scheduled. |
298 | */ |
299 | if (s_fence->sched == &ring->sched) { |
300 | if (dma_fence_is_signaled(fence: &s_fence->scheduled)) |
301 | continue; |
302 | |
303 | return &s_fence->scheduled; |
304 | } |
305 | } |
306 | |
307 | return f; |
308 | } |
309 | |
310 | return NULL; |
311 | } |
312 | |
313 | /** |
314 | * amdgpu_sync_get_fence - get the next fence from the sync object |
315 | * |
316 | * @sync: sync object to use |
317 | * |
318 | * Get and removes the next fence from the sync object not signaled yet. |
319 | */ |
320 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) |
321 | { |
322 | struct amdgpu_sync_entry *e; |
323 | struct hlist_node *tmp; |
324 | struct dma_fence *f; |
325 | int i; |
326 | |
327 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
328 | |
329 | f = e->fence; |
330 | |
331 | hash_del(node: &e->node); |
332 | kmem_cache_free(s: amdgpu_sync_slab, objp: e); |
333 | |
334 | if (!dma_fence_is_signaled(fence: f)) |
335 | return f; |
336 | |
337 | dma_fence_put(fence: f); |
338 | } |
339 | return NULL; |
340 | } |
341 | |
342 | /** |
343 | * amdgpu_sync_clone - clone a sync object |
344 | * |
345 | * @source: sync object to clone |
346 | * @clone: pointer to destination sync object |
347 | * |
348 | * Adds references to all unsignaled fences in @source to @clone. Also |
349 | * removes signaled fences from @source while at it. |
350 | */ |
351 | int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) |
352 | { |
353 | struct amdgpu_sync_entry *e; |
354 | struct hlist_node *tmp; |
355 | struct dma_fence *f; |
356 | int i, r; |
357 | |
358 | hash_for_each_safe(source->fences, i, tmp, e, node) { |
359 | f = e->fence; |
360 | if (!dma_fence_is_signaled(fence: f)) { |
361 | r = amdgpu_sync_fence(sync: clone, f); |
362 | if (r) |
363 | return r; |
364 | } else { |
365 | amdgpu_sync_entry_free(e); |
366 | } |
367 | } |
368 | |
369 | return 0; |
370 | } |
371 | |
372 | /** |
373 | * amdgpu_sync_push_to_job - push fences into job |
374 | * @sync: sync object to get the fences from |
375 | * @job: job to push the fences into |
376 | * |
377 | * Add all unsignaled fences from sync to job. |
378 | */ |
379 | int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) |
380 | { |
381 | struct amdgpu_sync_entry *e; |
382 | struct hlist_node *tmp; |
383 | struct dma_fence *f; |
384 | int i, r; |
385 | |
386 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
387 | f = e->fence; |
388 | if (dma_fence_is_signaled(fence: f)) { |
389 | amdgpu_sync_entry_free(e); |
390 | continue; |
391 | } |
392 | |
393 | dma_fence_get(fence: f); |
394 | r = drm_sched_job_add_dependency(job: &job->base, fence: f); |
395 | if (r) { |
396 | dma_fence_put(fence: f); |
397 | return r; |
398 | } |
399 | } |
400 | return 0; |
401 | } |
402 | |
403 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) |
404 | { |
405 | struct amdgpu_sync_entry *e; |
406 | struct hlist_node *tmp; |
407 | int i, r; |
408 | |
409 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
410 | r = dma_fence_wait(fence: e->fence, intr); |
411 | if (r) |
412 | return r; |
413 | |
414 | amdgpu_sync_entry_free(e); |
415 | } |
416 | |
417 | return 0; |
418 | } |
419 | |
420 | /** |
421 | * amdgpu_sync_free - free the sync object |
422 | * |
423 | * @sync: sync object to use |
424 | * |
425 | * Free the sync object. |
426 | */ |
427 | void amdgpu_sync_free(struct amdgpu_sync *sync) |
428 | { |
429 | struct amdgpu_sync_entry *e; |
430 | struct hlist_node *tmp; |
431 | unsigned int i; |
432 | |
433 | hash_for_each_safe(sync->fences, i, tmp, e, node) |
434 | amdgpu_sync_entry_free(e); |
435 | } |
436 | |
437 | /** |
438 | * amdgpu_sync_init - init sync object subsystem |
439 | * |
440 | * Allocate the slab allocator. |
441 | */ |
442 | int amdgpu_sync_init(void) |
443 | { |
444 | amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN); |
445 | if (!amdgpu_sync_slab) |
446 | return -ENOMEM; |
447 | |
448 | return 0; |
449 | } |
450 | |
451 | /** |
452 | * amdgpu_sync_fini - fini sync object subsystem |
453 | * |
454 | * Free the slab allocator. |
455 | */ |
456 | void amdgpu_sync_fini(void) |
457 | { |
458 | kmem_cache_destroy(s: amdgpu_sync_slab); |
459 | } |
460 | |