1 | /* |
2 | * Copyright © 2017 Intel Corporation |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/slab.h> |
26 | |
27 | #include "i915_syncmap.h" |
28 | |
29 | #include "i915_gem.h" /* GEM_BUG_ON() */ |
30 | #include "i915_selftest.h" |
31 | |
32 | #define SHIFT ilog2(KSYNCMAP) |
33 | #define MASK (KSYNCMAP - 1) |
34 | |
35 | /* |
36 | * struct i915_syncmap is a layer of a radixtree that maps a u64 fence |
37 | * context id to the last u32 fence seqno waited upon from that context. |
38 | * Unlike lib/radixtree it uses a parent pointer that allows traversal back to |
39 | * the root. This allows us to access the whole tree via a single pointer |
40 | * to the most recently used layer. We expect fence contexts to be dense |
41 | * and most reuse to be on the same i915_gem_context but on neighbouring |
42 | * engines (i.e. on adjacent contexts) and reuse the same leaf, a very |
43 | * effective lookup cache. If the new lookup is not on the same leaf, we |
44 | * expect it to be on the neighbouring branch. |
45 | * |
46 | * A leaf holds an array of u32 seqno, and has height 0. The bitmap field |
47 | * allows us to store whether a particular seqno is valid (i.e. allows us |
48 | * to distinguish unset from 0). |
49 | * |
50 | * A branch holds an array of layer pointers, and has height > 0, and always |
51 | * has at least 2 layers (either branches or leaves) below it. |
52 | * |
53 | * For example, |
54 | * for x in |
55 | * 0 1 2 0x10 0x11 0x200 0x201 |
56 | * 0x500000 0x500001 0x503000 0x503001 |
57 | * 0xE<<60: |
58 | * i915_syncmap_set(&sync, x, lower_32_bits(x)); |
59 | * will build a tree like: |
60 | * 0xXXXXXXXXXXXXXXXX |
61 | * 0-> 0x0000000000XXXXXX |
62 | * | 0-> 0x0000000000000XXX |
63 | * | | 0-> 0x00000000000000XX |
64 | * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 |
65 | * | | | 1-> 0x000000000000001X 0:10, 1:11 |
66 | * | | 2-> 0x000000000000020X 0:200, 1:201 |
67 | * | 5-> 0x000000000050XXXX |
68 | * | 0-> 0x000000000050000X 0:500000, 1:500001 |
69 | * | 3-> 0x000000000050300X 0:503000, 1:503001 |
70 | * e-> 0xe00000000000000X e:e |
71 | */ |
72 | |
73 | struct i915_syncmap { |
74 | u64 prefix; |
75 | unsigned int height; |
76 | unsigned int bitmap; |
77 | struct i915_syncmap *parent; |
78 | /* |
79 | * Following this header is an array of either seqno or child pointers: |
80 | * union { |
81 | * u32 seqno[KSYNCMAP]; |
82 | * struct i915_syncmap *child[KSYNCMAP]; |
83 | * }; |
84 | */ |
85 | }; |
86 | |
87 | /** |
88 | * i915_syncmap_init -- initialise the #i915_syncmap |
89 | * @root: pointer to the #i915_syncmap |
90 | */ |
91 | void i915_syncmap_init(struct i915_syncmap **root) |
92 | { |
93 | BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); |
94 | BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); |
95 | BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap)); |
96 | *root = NULL; |
97 | } |
98 | |
99 | static inline u32 *__sync_seqno(struct i915_syncmap *p) |
100 | { |
101 | GEM_BUG_ON(p->height); |
102 | return (u32 *)(p + 1); |
103 | } |
104 | |
105 | static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) |
106 | { |
107 | GEM_BUG_ON(!p->height); |
108 | return (struct i915_syncmap **)(p + 1); |
109 | } |
110 | |
111 | static inline unsigned int |
112 | __sync_branch_idx(const struct i915_syncmap *p, u64 id) |
113 | { |
114 | return (id >> p->height) & MASK; |
115 | } |
116 | |
117 | static inline unsigned int |
118 | __sync_leaf_idx(const struct i915_syncmap *p, u64 id) |
119 | { |
120 | GEM_BUG_ON(p->height); |
121 | return id & MASK; |
122 | } |
123 | |
124 | static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) |
125 | { |
126 | return id >> p->height >> SHIFT; |
127 | } |
128 | |
129 | static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) |
130 | { |
131 | GEM_BUG_ON(p->height); |
132 | return id >> SHIFT; |
133 | } |
134 | |
135 | static inline bool seqno_later(u32 a, u32 b) |
136 | { |
137 | return (s32)(a - b) >= 0; |
138 | } |
139 | |
140 | /** |
141 | * i915_syncmap_is_later -- compare against the last know sync point |
142 | * @root: pointer to the #i915_syncmap |
143 | * @id: the context id (other timeline) we are synchronising to |
144 | * @seqno: the sequence number along the other timeline |
145 | * |
146 | * If we have already synchronised this @root timeline with another (@id) then |
147 | * we can omit any repeated or earlier synchronisation requests. If the two |
148 | * timelines are already coupled, we can also omit the dependency between the |
149 | * two as that is already known via the timeline. |
150 | * |
151 | * Returns true if the two timelines are already synchronised wrt to @seqno, |
152 | * false if not and the synchronisation must be emitted. |
153 | */ |
154 | bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) |
155 | { |
156 | struct i915_syncmap *p; |
157 | unsigned int idx; |
158 | |
159 | p = *root; |
160 | if (!p) |
161 | return false; |
162 | |
163 | if (likely(__sync_leaf_prefix(p, id) == p->prefix)) |
164 | goto found; |
165 | |
166 | /* First climb the tree back to a parent branch */ |
167 | do { |
168 | p = p->parent; |
169 | if (!p) |
170 | return false; |
171 | |
172 | if (__sync_branch_prefix(p, id) == p->prefix) |
173 | break; |
174 | } while (1); |
175 | |
176 | /* And then descend again until we find our leaf */ |
177 | do { |
178 | if (!p->height) |
179 | break; |
180 | |
181 | p = __sync_child(p)[__sync_branch_idx(p, id)]; |
182 | if (!p) |
183 | return false; |
184 | |
185 | if (__sync_branch_prefix(p, id) != p->prefix) |
186 | return false; |
187 | } while (1); |
188 | |
189 | *root = p; |
190 | found: |
191 | idx = __sync_leaf_idx(p, id); |
192 | if (!(p->bitmap & BIT(idx))) |
193 | return false; |
194 | |
195 | return seqno_later(a: __sync_seqno(p)[idx], b: seqno); |
196 | } |
197 | |
198 | static struct i915_syncmap * |
199 | __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) |
200 | { |
201 | struct i915_syncmap *p; |
202 | |
203 | p = kmalloc(size: sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); |
204 | if (unlikely(!p)) |
205 | return NULL; |
206 | |
207 | p->parent = parent; |
208 | p->height = 0; |
209 | p->bitmap = 0; |
210 | p->prefix = __sync_leaf_prefix(p, id); |
211 | return p; |
212 | } |
213 | |
214 | static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) |
215 | { |
216 | unsigned int idx = __sync_leaf_idx(p, id); |
217 | |
218 | p->bitmap |= BIT(idx); |
219 | __sync_seqno(p)[idx] = seqno; |
220 | } |
221 | |
222 | static inline void __sync_set_child(struct i915_syncmap *p, |
223 | unsigned int idx, |
224 | struct i915_syncmap *child) |
225 | { |
226 | p->bitmap |= BIT(idx); |
227 | __sync_child(p)[idx] = child; |
228 | } |
229 | |
230 | static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) |
231 | { |
232 | struct i915_syncmap *p = *root; |
233 | unsigned int idx; |
234 | |
235 | if (!p) { |
236 | p = __sync_alloc_leaf(NULL, id); |
237 | if (unlikely(!p)) |
238 | return -ENOMEM; |
239 | |
240 | goto found; |
241 | } |
242 | |
243 | /* Caller handled the likely cached case */ |
244 | GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); |
245 | |
246 | /* Climb back up the tree until we find a common prefix */ |
247 | do { |
248 | if (!p->parent) |
249 | break; |
250 | |
251 | p = p->parent; |
252 | |
253 | if (__sync_branch_prefix(p, id) == p->prefix) |
254 | break; |
255 | } while (1); |
256 | |
257 | /* |
258 | * No shortcut, we have to descend the tree to find the right layer |
259 | * containing this fence. |
260 | * |
261 | * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences |
262 | * or lower layers. Leaf nodes (height = 0) contain the fences, all |
263 | * other nodes (height > 0) are internal layers that point to a lower |
264 | * node. Each internal layer has at least 2 descendents. |
265 | * |
266 | * Starting at the top, we check whether the current prefix matches. If |
267 | * it doesn't, we have gone past our target and need to insert a join |
268 | * into the tree, and a new leaf node for the target as a descendent |
269 | * of the join, as well as the original layer. |
270 | * |
271 | * The matching prefix means we are still following the right branch |
272 | * of the tree. If it has height 0, we have found our leaf and just |
273 | * need to replace the fence slot with ourselves. If the height is |
274 | * not zero, our slot contains the next layer in the tree (unless |
275 | * it is empty, in which case we can add ourselves as a new leaf). |
276 | * As descend the tree the prefix grows (and height decreases). |
277 | */ |
278 | do { |
279 | struct i915_syncmap *next; |
280 | |
281 | if (__sync_branch_prefix(p, id) != p->prefix) { |
282 | unsigned int above; |
283 | |
284 | /* Insert a join above the current layer */ |
285 | next = kzalloc(size: sizeof(*next) + KSYNCMAP * sizeof(next), |
286 | GFP_KERNEL); |
287 | if (unlikely(!next)) |
288 | return -ENOMEM; |
289 | |
290 | /* Compute the height at which these two diverge */ |
291 | above = fls64(x: __sync_branch_prefix(p, id) ^ p->prefix); |
292 | above = round_up(above, SHIFT); |
293 | next->height = above + p->height; |
294 | next->prefix = __sync_branch_prefix(p: next, id); |
295 | |
296 | /* Insert the join into the parent */ |
297 | if (p->parent) { |
298 | idx = __sync_branch_idx(p: p->parent, id); |
299 | __sync_child(p: p->parent)[idx] = next; |
300 | GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); |
301 | } |
302 | next->parent = p->parent; |
303 | |
304 | /* Compute the idx of the other branch, not our id! */ |
305 | idx = p->prefix >> (above - SHIFT) & MASK; |
306 | __sync_set_child(p: next, idx, child: p); |
307 | p->parent = next; |
308 | |
309 | /* Ascend to the join */ |
310 | p = next; |
311 | } else { |
312 | if (!p->height) |
313 | break; |
314 | } |
315 | |
316 | /* Descend into the next layer */ |
317 | GEM_BUG_ON(!p->height); |
318 | idx = __sync_branch_idx(p, id); |
319 | next = __sync_child(p)[idx]; |
320 | if (!next) { |
321 | next = __sync_alloc_leaf(parent: p, id); |
322 | if (unlikely(!next)) |
323 | return -ENOMEM; |
324 | |
325 | __sync_set_child(p, idx, child: next); |
326 | p = next; |
327 | break; |
328 | } |
329 | |
330 | p = next; |
331 | } while (1); |
332 | |
333 | found: |
334 | GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); |
335 | __sync_set_seqno(p, id, seqno); |
336 | *root = p; |
337 | return 0; |
338 | } |
339 | |
340 | /** |
341 | * i915_syncmap_set -- mark the most recent syncpoint between contexts |
342 | * @root: pointer to the #i915_syncmap |
343 | * @id: the context id (other timeline) we have synchronised to |
344 | * @seqno: the sequence number along the other timeline |
345 | * |
346 | * When we synchronise this @root timeline with another (@id), we also know |
347 | * that we have synchronized with all previous seqno along that timeline. If |
348 | * we then have a request to synchronise with the same seqno or older, we can |
349 | * omit it, see i915_syncmap_is_later() |
350 | * |
351 | * Returns 0 on success, or a negative error code. |
352 | */ |
353 | int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) |
354 | { |
355 | struct i915_syncmap *p = *root; |
356 | |
357 | /* |
358 | * We expect to be called in sequence following is_later(id), which |
359 | * should have preloaded the root for us. |
360 | */ |
361 | if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { |
362 | __sync_set_seqno(p, id, seqno); |
363 | return 0; |
364 | } |
365 | |
366 | return __sync_set(root, id, seqno); |
367 | } |
368 | |
369 | static void __sync_free(struct i915_syncmap *p) |
370 | { |
371 | if (p->height) { |
372 | unsigned int i; |
373 | |
374 | while ((i = ffs(p->bitmap))) { |
375 | p->bitmap &= ~0u << i; |
376 | __sync_free(p: __sync_child(p)[i - 1]); |
377 | } |
378 | } |
379 | |
380 | kfree(objp: p); |
381 | } |
382 | |
383 | /** |
384 | * i915_syncmap_free -- free all memory associated with the syncmap |
385 | * @root: pointer to the #i915_syncmap |
386 | * |
387 | * Either when the timeline is to be freed and we no longer need the sync |
388 | * point tracking, or when the fences are all known to be signaled and the |
389 | * sync point tracking is redundant, we can free the #i915_syncmap to recover |
390 | * its allocations. |
391 | * |
392 | * Will reinitialise the @root pointer so that the #i915_syncmap is ready for |
393 | * reuse. |
394 | */ |
395 | void i915_syncmap_free(struct i915_syncmap **root) |
396 | { |
397 | struct i915_syncmap *p; |
398 | |
399 | p = *root; |
400 | if (!p) |
401 | return; |
402 | |
403 | while (p->parent) |
404 | p = p->parent; |
405 | |
406 | __sync_free(p); |
407 | *root = NULL; |
408 | } |
409 | |
410 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
411 | #include "selftests/i915_syncmap.c" |
412 | #endif |
413 | |