1 | /* Copyright (C) 1991-2024 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | /* If you consider tuning this algorithm, you should consult first: |
19 | Engineering a sort function; Jon Bentley and M. Douglas McIlroy; |
20 | Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ |
21 | |
22 | #include <errno.h> |
23 | #include <limits.h> |
24 | #include <memswap.h> |
25 | #include <stdlib.h> |
26 | #include <string.h> |
27 | #include <stdbool.h> |
28 | |
29 | /* Swap SIZE bytes between addresses A and B. These helpers are provided |
30 | along the generic one as an optimization. */ |
31 | |
32 | enum swap_type_t |
33 | { |
34 | SWAP_WORDS_64, |
35 | SWAP_WORDS_32, |
36 | SWAP_VOID_ARG, |
37 | SWAP_BYTES |
38 | }; |
39 | |
40 | typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; |
41 | typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; |
42 | |
43 | static inline void |
44 | swap_words_64 (void * restrict a, void * restrict b, size_t n) |
45 | { |
46 | do |
47 | { |
48 | n -= 8; |
49 | u64_alias_t t = *(u64_alias_t *)(a + n); |
50 | *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n); |
51 | *(u64_alias_t *)(b + n) = t; |
52 | } while (n); |
53 | } |
54 | |
55 | static inline void |
56 | swap_words_32 (void * restrict a, void * restrict b, size_t n) |
57 | { |
58 | do |
59 | { |
60 | n -= 4; |
61 | u32_alias_t t = *(u32_alias_t *)(a + n); |
62 | *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n); |
63 | *(u32_alias_t *)(b + n) = t; |
64 | } while (n); |
65 | } |
66 | |
67 | /* Replace the indirect call with a serie of if statements. It should help |
68 | the branch predictor. */ |
69 | static void |
70 | do_swap (void * restrict a, void * restrict b, size_t size, |
71 | enum swap_type_t swap_type) |
72 | { |
73 | if (swap_type == SWAP_WORDS_64) |
74 | swap_words_64 (a, b, n: size); |
75 | else if (swap_type == SWAP_WORDS_32) |
76 | swap_words_32 (a, b, n: size); |
77 | else |
78 | __memswap (p1: a, p2: b, n: size); |
79 | } |
80 | |
81 | /* Establish the heap condition at index K, that is, the key at K will |
82 | not be less than either of its children, at 2 * K + 1 and 2 * K + 2 |
83 | (if they exist). N is the last valid index. */ |
84 | static inline void |
85 | siftdown (void *base, size_t size, size_t k, size_t n, |
86 | enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) |
87 | { |
88 | /* There can only be a heap condition violation if there are |
89 | children. */ |
90 | while (2 * k + 1 <= n) |
91 | { |
92 | /* Left child. */ |
93 | size_t j = 2 * k + 1; |
94 | /* If the right child is larger, use it. */ |
95 | if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0) |
96 | j++; |
97 | |
98 | /* If k is already >= to its children, we are done. */ |
99 | if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0) |
100 | break; |
101 | |
102 | /* Heal the violation. */ |
103 | do_swap (a: base + (size * j), b: base + (k * size), size, swap_type); |
104 | |
105 | /* Swapping with j may have introduced a violation at j. Fix |
106 | it in the next loop iteration. */ |
107 | k = j; |
108 | } |
109 | } |
110 | |
111 | /* Establish the heap condition for the indices 0 to N (inclusive). */ |
112 | static inline void |
113 | heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, |
114 | __compar_d_fn_t cmp, void *arg) |
115 | { |
116 | /* If n is odd, k = n / 2 has a left child at n, so this is the |
117 | largest index that can have a heap condition violation regarding |
118 | its children. */ |
119 | size_t k = n / 2; |
120 | while (1) |
121 | { |
122 | siftdown (base, size, k, n, swap_type, cmp, arg); |
123 | if (k-- == 0) |
124 | break; |
125 | } |
126 | } |
127 | |
128 | static enum swap_type_t |
129 | get_swap_type (void *const pbase, size_t size) |
130 | { |
131 | if ((size & (sizeof (uint32_t) - 1)) == 0 |
132 | && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) |
133 | { |
134 | if (size == sizeof (uint32_t)) |
135 | return SWAP_WORDS_32; |
136 | else if (size == sizeof (uint64_t) |
137 | && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) |
138 | return SWAP_WORDS_64; |
139 | } |
140 | return SWAP_BYTES; |
141 | } |
142 | |
143 | |
144 | /* A non-recursive heapsort with worst-case performance of O(nlog n) and |
145 | worst-case space complexity of O(1). It sorts the array starting at |
146 | BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback |
147 | function used to swap elements, and CMP is the function used to compare |
148 | elements. */ |
149 | static void |
150 | heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) |
151 | { |
152 | if (n == 0) |
153 | return; |
154 | |
155 | enum swap_type_t swap_type = get_swap_type (pbase: base, size); |
156 | |
157 | /* Build the binary heap, largest value at the base[0]. */ |
158 | heapify (base, size, n, swap_type, cmp, arg); |
159 | |
160 | while (true) |
161 | { |
162 | /* Indices 0 .. n contain the binary heap. Extract the largest |
163 | element put it into the final position in the array. */ |
164 | do_swap (a: base, b: base + (n * size), size, swap_type); |
165 | |
166 | /* The heap is now one element shorter. */ |
167 | n--; |
168 | if (n == 0) |
169 | break; |
170 | |
171 | /* By swapping in elements 0 and the previous value of n (now at |
172 | n + 1), we likely introduced a heap condition violation. Fix |
173 | it for the reduced heap. */ |
174 | siftdown (base, size, k: 0, n, swap_type, cmp, arg); |
175 | } |
176 | } |
177 | |
178 | /* The maximum size in bytes required by mergesort that will be provided |
179 | through a buffer allocated in the stack. */ |
180 | #define QSORT_STACK_SIZE 1024 |
181 | |
182 | /* Elements larger than this value will be sorted through indirect sorting |
183 | to minimize the need to memory swap calls. */ |
184 | #define INDIRECT_SORT_SIZE_THRES 32 |
185 | |
186 | struct msort_param |
187 | { |
188 | size_t s; |
189 | enum swap_type_t var; |
190 | __compar_d_fn_t cmp; |
191 | void *arg; |
192 | char *t; |
193 | }; |
194 | |
195 | static void |
196 | msort_with_tmp (const struct msort_param *p, void *b, size_t n) |
197 | { |
198 | char *b1, *b2; |
199 | size_t n1, n2; |
200 | |
201 | if (n <= 1) |
202 | return; |
203 | |
204 | n1 = n / 2; |
205 | n2 = n - n1; |
206 | b1 = b; |
207 | b2 = (char *) b + (n1 * p->s); |
208 | |
209 | msort_with_tmp (p, b: b1, n: n1); |
210 | msort_with_tmp (p, b: b2, n: n2); |
211 | |
212 | char *tmp = p->t; |
213 | const size_t s = p->s; |
214 | __compar_d_fn_t cmp = p->cmp; |
215 | void *arg = p->arg; |
216 | switch (p->var) |
217 | { |
218 | case SWAP_WORDS_32: |
219 | while (n1 > 0 && n2 > 0) |
220 | { |
221 | if (cmp (b1, b2, arg) <= 0) |
222 | { |
223 | *(u32_alias_t *) tmp = *(u32_alias_t *) b1; |
224 | b1 += sizeof (u32_alias_t); |
225 | --n1; |
226 | } |
227 | else |
228 | { |
229 | *(u32_alias_t *) tmp = *(u32_alias_t *) b2; |
230 | b2 += sizeof (u32_alias_t); |
231 | --n2; |
232 | } |
233 | tmp += sizeof (u32_alias_t); |
234 | } |
235 | break; |
236 | case SWAP_WORDS_64: |
237 | while (n1 > 0 && n2 > 0) |
238 | { |
239 | if (cmp (b1, b2, arg) <= 0) |
240 | { |
241 | *(u64_alias_t *) tmp = *(u64_alias_t *) b1; |
242 | b1 += sizeof (u64_alias_t); |
243 | --n1; |
244 | } |
245 | else |
246 | { |
247 | *(u64_alias_t *) tmp = *(u64_alias_t *) b2; |
248 | b2 += sizeof (u64_alias_t); |
249 | --n2; |
250 | } |
251 | tmp += sizeof (u64_alias_t); |
252 | } |
253 | break; |
254 | case SWAP_VOID_ARG: |
255 | while (n1 > 0 && n2 > 0) |
256 | { |
257 | if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) |
258 | { |
259 | *(void **) tmp = *(void **) b1; |
260 | b1 += sizeof (void *); |
261 | --n1; |
262 | } |
263 | else |
264 | { |
265 | *(void **) tmp = *(void **) b2; |
266 | b2 += sizeof (void *); |
267 | --n2; |
268 | } |
269 | tmp += sizeof (void *); |
270 | } |
271 | break; |
272 | default: |
273 | while (n1 > 0 && n2 > 0) |
274 | { |
275 | if (cmp (b1, b2, arg) <= 0) |
276 | { |
277 | tmp = (char *) __mempcpy (tmp, b1, s); |
278 | b1 += s; |
279 | --n1; |
280 | } |
281 | else |
282 | { |
283 | tmp = (char *) __mempcpy (tmp, b2, s); |
284 | b2 += s; |
285 | --n2; |
286 | } |
287 | } |
288 | break; |
289 | } |
290 | |
291 | if (n1 > 0) |
292 | memcpy (tmp, b1, n1 * s); |
293 | memcpy (b, p->t, (n - n2) * s); |
294 | } |
295 | |
296 | static void |
297 | __attribute_used__ |
298 | indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, |
299 | size_t s) |
300 | { |
301 | /* Indirect sorting. */ |
302 | char *ip = (char *) b; |
303 | void **tp = (void **) (p->t + n * sizeof (void *)); |
304 | void **t = tp; |
305 | void *tmp_storage = (void *) (tp + n); |
306 | |
307 | while ((void *) t < tmp_storage) |
308 | { |
309 | *t++ = ip; |
310 | ip += s; |
311 | } |
312 | msort_with_tmp (p, b: p->t + n * sizeof (void *), n); |
313 | |
314 | /* tp[0] .. tp[n - 1] is now sorted, copy around entries of |
315 | the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ |
316 | char *kp; |
317 | size_t i; |
318 | for (i = 0, ip = (char *) b; i < n; i++, ip += s) |
319 | if ((kp = tp[i]) != ip) |
320 | { |
321 | size_t j = i; |
322 | char *jp = ip; |
323 | memcpy (tmp_storage, ip, s); |
324 | |
325 | do |
326 | { |
327 | size_t k = (kp - (char *) b) / s; |
328 | tp[j] = jp; |
329 | memcpy (jp, kp, s); |
330 | j = k; |
331 | jp = kp; |
332 | kp = tp[k]; |
333 | } |
334 | while (kp != ip); |
335 | |
336 | tp[j] = jp; |
337 | memcpy (jp, tmp_storage, s); |
338 | } |
339 | } |
340 | |
341 | void |
342 | __qsort_r (void *const pbase, size_t total_elems, size_t size, |
343 | __compar_d_fn_t cmp, void *arg) |
344 | { |
345 | if (total_elems <= 1) |
346 | return; |
347 | |
348 | /* Align to the maximum size used by the swap optimization. */ |
349 | _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; |
350 | size_t total_size = total_elems * size; |
351 | char *buf; |
352 | |
353 | if (size > INDIRECT_SORT_SIZE_THRES) |
354 | total_size = 2 * total_elems * sizeof (void *) + size; |
355 | |
356 | if (total_size <= sizeof tmp) |
357 | buf = tmp; |
358 | else |
359 | { |
360 | int save = errno; |
361 | buf = malloc (size: total_size); |
362 | __set_errno (save); |
363 | if (buf == NULL) |
364 | { |
365 | /* Fallback to heapsort in case of memory failure. */ |
366 | heapsort_r (base: pbase, n: total_elems - 1, size, cmp, arg); |
367 | return; |
368 | } |
369 | } |
370 | |
371 | if (size > INDIRECT_SORT_SIZE_THRES) |
372 | { |
373 | const struct msort_param msort_param = |
374 | { |
375 | .s = sizeof (void *), |
376 | .cmp = cmp, |
377 | .arg = arg, |
378 | .var = SWAP_VOID_ARG, |
379 | .t = buf, |
380 | }; |
381 | indirect_msort_with_tmp (p: &msort_param, b: pbase, n: total_elems, s: size); |
382 | } |
383 | else |
384 | { |
385 | const struct msort_param msort_param = |
386 | { |
387 | .s = size, |
388 | .cmp = cmp, |
389 | .arg = arg, |
390 | .var = get_swap_type (pbase, size), |
391 | .t = buf, |
392 | }; |
393 | msort_with_tmp (p: &msort_param, b: pbase, n: total_elems); |
394 | } |
395 | |
396 | if (buf != tmp) |
397 | free (ptr: buf); |
398 | } |
399 | libc_hidden_def (__qsort_r) |
400 | weak_alias (__qsort_r, qsort_r) |
401 | |
402 | void |
403 | qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) |
404 | { |
405 | return __qsort_r (pbase: b, total_elems: n, size: s, cmp: (__compar_d_fn_t) cmp, NULL); |
406 | } |
407 | libc_hidden_def (qsort) |
408 | |