| 1 | /* Copyright (C) 1991-2024 Free Software Foundation, Inc. |
| 2 | This file is part of the GNU C Library. |
| 3 | |
| 4 | The GNU C Library is free software; you can redistribute it and/or |
| 5 | modify it under the terms of the GNU Lesser General Public |
| 6 | License as published by the Free Software Foundation; either |
| 7 | version 2.1 of the License, or (at your option) any later version. |
| 8 | |
| 9 | The GNU C Library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with the GNU C Library; if not, see |
| 16 | <https://www.gnu.org/licenses/>. */ |
| 17 | |
| 18 | /* If you consider tuning this algorithm, you should consult first: |
| 19 | Engineering a sort function; Jon Bentley and M. Douglas McIlroy; |
| 20 | Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ |
| 21 | |
| 22 | #include <errno.h> |
| 23 | #include <limits.h> |
| 24 | #include <memswap.h> |
| 25 | #include <stdlib.h> |
| 26 | #include <string.h> |
| 27 | #include <stdbool.h> |
| 28 | |
| 29 | /* Swap SIZE bytes between addresses A and B. These helpers are provided |
| 30 | along the generic one as an optimization. */ |
| 31 | |
| 32 | enum swap_type_t |
| 33 | { |
| 34 | SWAP_WORDS_64, |
| 35 | SWAP_WORDS_32, |
| 36 | SWAP_VOID_ARG, |
| 37 | SWAP_BYTES |
| 38 | }; |
| 39 | |
| 40 | typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; |
| 41 | typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; |
| 42 | |
| 43 | static inline void |
| 44 | swap_words_64 (void * restrict a, void * restrict b, size_t n) |
| 45 | { |
| 46 | do |
| 47 | { |
| 48 | n -= 8; |
| 49 | u64_alias_t t = *(u64_alias_t *)(a + n); |
| 50 | *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n); |
| 51 | *(u64_alias_t *)(b + n) = t; |
| 52 | } while (n); |
| 53 | } |
| 54 | |
| 55 | static inline void |
| 56 | swap_words_32 (void * restrict a, void * restrict b, size_t n) |
| 57 | { |
| 58 | do |
| 59 | { |
| 60 | n -= 4; |
| 61 | u32_alias_t t = *(u32_alias_t *)(a + n); |
| 62 | *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n); |
| 63 | *(u32_alias_t *)(b + n) = t; |
| 64 | } while (n); |
| 65 | } |
| 66 | |
| 67 | /* Replace the indirect call with a serie of if statements. It should help |
| 68 | the branch predictor. */ |
| 69 | static void |
| 70 | do_swap (void * restrict a, void * restrict b, size_t size, |
| 71 | enum swap_type_t swap_type) |
| 72 | { |
| 73 | if (swap_type == SWAP_WORDS_64) |
| 74 | swap_words_64 (a, b, n: size); |
| 75 | else if (swap_type == SWAP_WORDS_32) |
| 76 | swap_words_32 (a, b, n: size); |
| 77 | else |
| 78 | __memswap (p1: a, p2: b, n: size); |
| 79 | } |
| 80 | |
| 81 | /* Establish the heap condition at index K, that is, the key at K will |
| 82 | not be less than either of its children, at 2 * K + 1 and 2 * K + 2 |
| 83 | (if they exist). N is the last valid index. */ |
| 84 | static inline void |
| 85 | siftdown (void *base, size_t size, size_t k, size_t n, |
| 86 | enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) |
| 87 | { |
| 88 | /* There can only be a heap condition violation if there are |
| 89 | children. */ |
| 90 | while (2 * k + 1 <= n) |
| 91 | { |
| 92 | /* Left child. */ |
| 93 | size_t j = 2 * k + 1; |
| 94 | /* If the right child is larger, use it. */ |
| 95 | if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0) |
| 96 | j++; |
| 97 | |
| 98 | /* If k is already >= to its children, we are done. */ |
| 99 | if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0) |
| 100 | break; |
| 101 | |
| 102 | /* Heal the violation. */ |
| 103 | do_swap (a: base + (size * j), b: base + (k * size), size, swap_type); |
| 104 | |
| 105 | /* Swapping with j may have introduced a violation at j. Fix |
| 106 | it in the next loop iteration. */ |
| 107 | k = j; |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | /* Establish the heap condition for the indices 0 to N (inclusive). */ |
| 112 | static inline void |
| 113 | heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, |
| 114 | __compar_d_fn_t cmp, void *arg) |
| 115 | { |
| 116 | /* If n is odd, k = n / 2 has a left child at n, so this is the |
| 117 | largest index that can have a heap condition violation regarding |
| 118 | its children. */ |
| 119 | size_t k = n / 2; |
| 120 | while (1) |
| 121 | { |
| 122 | siftdown (base, size, k, n, swap_type, cmp, arg); |
| 123 | if (k-- == 0) |
| 124 | break; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | static enum swap_type_t |
| 129 | get_swap_type (void *const pbase, size_t size) |
| 130 | { |
| 131 | if ((size & (sizeof (uint32_t) - 1)) == 0 |
| 132 | && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) |
| 133 | { |
| 134 | if (size == sizeof (uint32_t)) |
| 135 | return SWAP_WORDS_32; |
| 136 | else if (size == sizeof (uint64_t) |
| 137 | && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) |
| 138 | return SWAP_WORDS_64; |
| 139 | } |
| 140 | return SWAP_BYTES; |
| 141 | } |
| 142 | |
| 143 | |
| 144 | /* A non-recursive heapsort with worst-case performance of O(nlog n) and |
| 145 | worst-case space complexity of O(1). It sorts the array starting at |
| 146 | BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback |
| 147 | function used to swap elements, and CMP is the function used to compare |
| 148 | elements. */ |
| 149 | static void |
| 150 | heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) |
| 151 | { |
| 152 | if (n == 0) |
| 153 | return; |
| 154 | |
| 155 | enum swap_type_t swap_type = get_swap_type (pbase: base, size); |
| 156 | |
| 157 | /* Build the binary heap, largest value at the base[0]. */ |
| 158 | heapify (base, size, n, swap_type, cmp, arg); |
| 159 | |
| 160 | while (true) |
| 161 | { |
| 162 | /* Indices 0 .. n contain the binary heap. Extract the largest |
| 163 | element put it into the final position in the array. */ |
| 164 | do_swap (a: base, b: base + (n * size), size, swap_type); |
| 165 | |
| 166 | /* The heap is now one element shorter. */ |
| 167 | n--; |
| 168 | if (n == 0) |
| 169 | break; |
| 170 | |
| 171 | /* By swapping in elements 0 and the previous value of n (now at |
| 172 | n + 1), we likely introduced a heap condition violation. Fix |
| 173 | it for the reduced heap. */ |
| 174 | siftdown (base, size, k: 0, n, swap_type, cmp, arg); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | /* The maximum size in bytes required by mergesort that will be provided |
| 179 | through a buffer allocated in the stack. */ |
| 180 | #define QSORT_STACK_SIZE 1024 |
| 181 | |
| 182 | /* Elements larger than this value will be sorted through indirect sorting |
| 183 | to minimize the need to memory swap calls. */ |
| 184 | #define INDIRECT_SORT_SIZE_THRES 32 |
| 185 | |
| 186 | struct msort_param |
| 187 | { |
| 188 | size_t s; |
| 189 | enum swap_type_t var; |
| 190 | __compar_d_fn_t cmp; |
| 191 | void *arg; |
| 192 | char *t; |
| 193 | }; |
| 194 | |
| 195 | static void |
| 196 | msort_with_tmp (const struct msort_param *p, void *b, size_t n) |
| 197 | { |
| 198 | char *b1, *b2; |
| 199 | size_t n1, n2; |
| 200 | |
| 201 | if (n <= 1) |
| 202 | return; |
| 203 | |
| 204 | n1 = n / 2; |
| 205 | n2 = n - n1; |
| 206 | b1 = b; |
| 207 | b2 = (char *) b + (n1 * p->s); |
| 208 | |
| 209 | msort_with_tmp (p, b: b1, n: n1); |
| 210 | msort_with_tmp (p, b: b2, n: n2); |
| 211 | |
| 212 | char *tmp = p->t; |
| 213 | const size_t s = p->s; |
| 214 | __compar_d_fn_t cmp = p->cmp; |
| 215 | void *arg = p->arg; |
| 216 | switch (p->var) |
| 217 | { |
| 218 | case SWAP_WORDS_32: |
| 219 | while (n1 > 0 && n2 > 0) |
| 220 | { |
| 221 | if (cmp (b1, b2, arg) <= 0) |
| 222 | { |
| 223 | *(u32_alias_t *) tmp = *(u32_alias_t *) b1; |
| 224 | b1 += sizeof (u32_alias_t); |
| 225 | --n1; |
| 226 | } |
| 227 | else |
| 228 | { |
| 229 | *(u32_alias_t *) tmp = *(u32_alias_t *) b2; |
| 230 | b2 += sizeof (u32_alias_t); |
| 231 | --n2; |
| 232 | } |
| 233 | tmp += sizeof (u32_alias_t); |
| 234 | } |
| 235 | break; |
| 236 | case SWAP_WORDS_64: |
| 237 | while (n1 > 0 && n2 > 0) |
| 238 | { |
| 239 | if (cmp (b1, b2, arg) <= 0) |
| 240 | { |
| 241 | *(u64_alias_t *) tmp = *(u64_alias_t *) b1; |
| 242 | b1 += sizeof (u64_alias_t); |
| 243 | --n1; |
| 244 | } |
| 245 | else |
| 246 | { |
| 247 | *(u64_alias_t *) tmp = *(u64_alias_t *) b2; |
| 248 | b2 += sizeof (u64_alias_t); |
| 249 | --n2; |
| 250 | } |
| 251 | tmp += sizeof (u64_alias_t); |
| 252 | } |
| 253 | break; |
| 254 | case SWAP_VOID_ARG: |
| 255 | while (n1 > 0 && n2 > 0) |
| 256 | { |
| 257 | if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) |
| 258 | { |
| 259 | *(void **) tmp = *(void **) b1; |
| 260 | b1 += sizeof (void *); |
| 261 | --n1; |
| 262 | } |
| 263 | else |
| 264 | { |
| 265 | *(void **) tmp = *(void **) b2; |
| 266 | b2 += sizeof (void *); |
| 267 | --n2; |
| 268 | } |
| 269 | tmp += sizeof (void *); |
| 270 | } |
| 271 | break; |
| 272 | default: |
| 273 | while (n1 > 0 && n2 > 0) |
| 274 | { |
| 275 | if (cmp (b1, b2, arg) <= 0) |
| 276 | { |
| 277 | tmp = (char *) __mempcpy (tmp, b1, s); |
| 278 | b1 += s; |
| 279 | --n1; |
| 280 | } |
| 281 | else |
| 282 | { |
| 283 | tmp = (char *) __mempcpy (tmp, b2, s); |
| 284 | b2 += s; |
| 285 | --n2; |
| 286 | } |
| 287 | } |
| 288 | break; |
| 289 | } |
| 290 | |
| 291 | if (n1 > 0) |
| 292 | memcpy (tmp, b1, n1 * s); |
| 293 | memcpy (b, p->t, (n - n2) * s); |
| 294 | } |
| 295 | |
| 296 | static void |
| 297 | __attribute_used__ |
| 298 | indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, |
| 299 | size_t s) |
| 300 | { |
| 301 | /* Indirect sorting. */ |
| 302 | char *ip = (char *) b; |
| 303 | void **tp = (void **) (p->t + n * sizeof (void *)); |
| 304 | void **t = tp; |
| 305 | void *tmp_storage = (void *) (tp + n); |
| 306 | |
| 307 | while ((void *) t < tmp_storage) |
| 308 | { |
| 309 | *t++ = ip; |
| 310 | ip += s; |
| 311 | } |
| 312 | msort_with_tmp (p, b: p->t + n * sizeof (void *), n); |
| 313 | |
| 314 | /* tp[0] .. tp[n - 1] is now sorted, copy around entries of |
| 315 | the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ |
| 316 | char *kp; |
| 317 | size_t i; |
| 318 | for (i = 0, ip = (char *) b; i < n; i++, ip += s) |
| 319 | if ((kp = tp[i]) != ip) |
| 320 | { |
| 321 | size_t j = i; |
| 322 | char *jp = ip; |
| 323 | memcpy (tmp_storage, ip, s); |
| 324 | |
| 325 | do |
| 326 | { |
| 327 | size_t k = (kp - (char *) b) / s; |
| 328 | tp[j] = jp; |
| 329 | memcpy (jp, kp, s); |
| 330 | j = k; |
| 331 | jp = kp; |
| 332 | kp = tp[k]; |
| 333 | } |
| 334 | while (kp != ip); |
| 335 | |
| 336 | tp[j] = jp; |
| 337 | memcpy (jp, tmp_storage, s); |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | void |
| 342 | __qsort_r (void *const pbase, size_t total_elems, size_t size, |
| 343 | __compar_d_fn_t cmp, void *arg) |
| 344 | { |
| 345 | if (total_elems <= 1) |
| 346 | return; |
| 347 | |
| 348 | /* Align to the maximum size used by the swap optimization. */ |
| 349 | _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; |
| 350 | size_t total_size = total_elems * size; |
| 351 | char *buf; |
| 352 | |
| 353 | if (size > INDIRECT_SORT_SIZE_THRES) |
| 354 | total_size = 2 * total_elems * sizeof (void *) + size; |
| 355 | |
| 356 | if (total_size <= sizeof tmp) |
| 357 | buf = tmp; |
| 358 | else |
| 359 | { |
| 360 | int save = errno; |
| 361 | buf = malloc (size: total_size); |
| 362 | __set_errno (save); |
| 363 | if (buf == NULL) |
| 364 | { |
| 365 | /* Fallback to heapsort in case of memory failure. */ |
| 366 | heapsort_r (base: pbase, n: total_elems - 1, size, cmp, arg); |
| 367 | return; |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | if (size > INDIRECT_SORT_SIZE_THRES) |
| 372 | { |
| 373 | const struct msort_param msort_param = |
| 374 | { |
| 375 | .s = sizeof (void *), |
| 376 | .cmp = cmp, |
| 377 | .arg = arg, |
| 378 | .var = SWAP_VOID_ARG, |
| 379 | .t = buf, |
| 380 | }; |
| 381 | indirect_msort_with_tmp (p: &msort_param, b: pbase, n: total_elems, s: size); |
| 382 | } |
| 383 | else |
| 384 | { |
| 385 | const struct msort_param msort_param = |
| 386 | { |
| 387 | .s = size, |
| 388 | .cmp = cmp, |
| 389 | .arg = arg, |
| 390 | .var = get_swap_type (pbase, size), |
| 391 | .t = buf, |
| 392 | }; |
| 393 | msort_with_tmp (p: &msort_param, b: pbase, n: total_elems); |
| 394 | } |
| 395 | |
| 396 | if (buf != tmp) |
| 397 | free (ptr: buf); |
| 398 | } |
| 399 | libc_hidden_def (__qsort_r) |
| 400 | weak_alias (__qsort_r, qsort_r) |
| 401 | |
| 402 | void |
| 403 | qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) |
| 404 | { |
| 405 | return __qsort_r (pbase: b, total_elems: n, size: s, cmp: (__compar_d_fn_t) cmp, NULL); |
| 406 | } |
| 407 | libc_hidden_def (qsort) |
| 408 | |