| 1 | /* Platform-independent deterministic sort function. |
| 2 | Copyright (C) 2018-2026 Free Software Foundation, Inc. |
| 3 | Contributed by Alexander Monakov. |
| 4 | |
| 5 | This file is part of GCC. |
| 6 | |
| 7 | GCC is free software; you can redistribute it and/or modify it |
| 8 | under the terms of the GNU General Public License as published by the |
| 9 | Free Software Foundation; either version 3, or (at your option) any |
| 10 | later version. |
| 11 | |
| 12 | GCC is distributed in the hope that it will be useful, but WITHOUT |
| 13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 15 | for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with GCC; see the file COPYING3. If not see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | /* This implements a sort function suitable for GCC use cases: |
| 22 | - signature-compatible to C qsort, but relaxed contract: |
| 23 | - may apply the comparator to elements in a temporary buffer |
| 24 | - may abort on allocation failure |
| 25 | - deterministic (but not necessarily stable) |
| 26 | - fast, especially for common cases (0-5 elements of size 8 or 4) |
| 27 | |
| 28 | The implementation uses sorting networks for up to 5 elements and |
| 29 | a merge sort on top of that. Neither stage has branches depending on |
| 30 | comparator result, trading extra arithmetic for branch mispredictions. */ |
| 31 | |
| 32 | #ifdef GENERATOR_FILE |
| 33 | #include "bconfig.h" |
| 34 | #else |
| 35 | #include "config.h" |
| 36 | #endif |
| 37 | |
| 38 | #include "system.h" |
| 39 | |
| 40 | #ifdef __GNUC__ |
| 41 | #define noinline __attribute__ ((__noinline__)) |
| 42 | #else |
| 43 | #define noinline |
| 44 | #endif |
| 45 | |
| 46 | /* C-style qsort comparator function type. */ |
| 47 | typedef int cmp_fn (const void *, const void *); |
| 48 | |
| 49 | /* Structure holding read-mostly (read-only in netsort) context. */ |
| 50 | struct sort_ctx |
| 51 | { |
| 52 | cmp_fn *cmp; // pointer to comparator |
| 53 | char *out; // output buffer |
| 54 | size_t n; // number of elements |
| 55 | size_t size; // element size |
| 56 | size_t nlim; // limit for using sorting networks |
| 57 | }; |
| 58 | |
| 59 | /* Like sort_ctx, but for use with qsort_r-style comparators. Several |
| 60 | functions in this file are templates that work with either context type. */ |
| 61 | struct sort_r_ctx |
| 62 | { |
| 63 | void *data; |
| 64 | sort_r_cmp_fn *cmp_; |
| 65 | char *out; |
| 66 | size_t n; |
| 67 | size_t size; |
| 68 | size_t nlim; |
| 69 | int cmp (const void *a, const void *b) |
| 70 | { |
| 71 | return cmp_ (a, b, data); |
| 72 | } |
| 73 | }; |
| 74 | |
| 75 | /* Helper for netsort. Permute, possibly in-place, 2 or 3 elements, |
| 76 | placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */ |
| 77 | template<typename sort_ctx> |
| 78 | static void |
| 79 | reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) |
| 80 | { |
| 81 | #define REORDER_23(TYPE, STRIDE, OFFSET) \ |
| 82 | do { \ |
| 83 | TYPE t0, t1; \ |
| 84 | memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ |
| 85 | memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ |
| 86 | char *out = c->out + OFFSET; \ |
| 87 | if (LIKELY (c->n == 3)) \ |
| 88 | memmove (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE));\ |
| 89 | memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ |
| 90 | memcpy (out, &t1, sizeof (TYPE)); \ |
| 91 | } while (0) |
| 92 | |
| 93 | if (LIKELY (c->size == sizeof (size_t))) |
| 94 | REORDER_23 (size_t, sizeof (size_t), 0); |
| 95 | else if (LIKELY (c->size == sizeof (int))) |
| 96 | REORDER_23 (int, sizeof (int), 0); |
| 97 | else |
| 98 | { |
| 99 | size_t offset = 0, step = sizeof (size_t); |
| 100 | for (; offset + step <= c->size; offset += step) |
| 101 | REORDER_23 (size_t, c->size, offset); |
| 102 | for (; offset < c->size; offset++) |
| 103 | REORDER_23 (char, c->size, offset); |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | /* Like reorder23, but permute 4 or 5 elements. */ |
| 108 | template<typename sort_ctx> |
| 109 | static void |
| 110 | reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4) |
| 111 | { |
| 112 | #define REORDER_45(TYPE, STRIDE, OFFSET) \ |
| 113 | do { \ |
| 114 | TYPE t0, t1, t2, t3; \ |
| 115 | memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ |
| 116 | memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ |
| 117 | memcpy (&t2, e2 + OFFSET, sizeof (TYPE)); \ |
| 118 | memcpy (&t3, e3 + OFFSET, sizeof (TYPE)); \ |
| 119 | char *out = c->out + OFFSET; \ |
| 120 | if (LIKELY (c->n == 5)) \ |
| 121 | memmove (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE));\ |
| 122 | memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ |
| 123 | memcpy (out, &t1, sizeof (TYPE)); out += STRIDE; \ |
| 124 | memcpy (out, &t2, sizeof (TYPE)); out += STRIDE; \ |
| 125 | memcpy (out, &t3, sizeof (TYPE)); \ |
| 126 | } while (0) |
| 127 | |
| 128 | if (LIKELY (c->size == sizeof (size_t))) |
| 129 | REORDER_45 (size_t, sizeof (size_t), 0); |
| 130 | else if (LIKELY (c->size == sizeof (int))) |
| 131 | REORDER_45 (int, sizeof (int), 0); |
| 132 | else |
| 133 | { |
| 134 | size_t offset = 0, step = sizeof (size_t); |
| 135 | for (; offset + step <= c->size; offset += step) |
| 136 | REORDER_45 (size_t, c->size, offset); |
| 137 | for (; offset < c->size; offset++) |
| 138 | REORDER_45 (char, c->size, offset); |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | /* Helper for netsort. Invoke comparator CMP on E0 and E1. |
| 143 | Return E0^E1 if E0 compares less than E1, zero otherwise. |
| 144 | This is noinline to avoid code growth and confine invocation |
| 145 | to a single call site, assisting indirect branch prediction. */ |
| 146 | template<typename sort_ctx> |
| 147 | noinline static intptr_t |
| 148 | cmp1 (char *e0, char *e1, sort_ctx *c) |
| 149 | { |
| 150 | intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; |
| 151 | return x & (c->cmp (e0, e1) >> 31); |
| 152 | } |
| 153 | |
| 154 | /* Apply a sorting network to 2 to 5 elements from IN, placing them into C->OUT. |
| 155 | IN may be equal to C->OUT, in which case elements are sorted in place. */ |
| 156 | template<typename sort_ctx> |
| 157 | static void |
| 158 | netsort (char *in, sort_ctx *c) |
| 159 | { |
| 160 | #define CMP(e0, e1) \ |
| 161 | do { \ |
| 162 | intptr_t x = cmp1 (e1, e0, c); \ |
| 163 | e0 = (char *)((intptr_t)e0 ^ x); \ |
| 164 | e1 = (char *)((intptr_t)e1 ^ x); \ |
| 165 | } while (0) |
| 166 | |
| 167 | char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size; |
| 168 | CMP (e0, e1); |
| 169 | if (LIKELY (c->n == 3)) |
| 170 | { |
| 171 | CMP (e1, e2); |
| 172 | CMP (e0, e1); |
| 173 | } |
| 174 | if (c->n <= 3) |
| 175 | return reorder23 (c, e0, e1, e2); |
| 176 | char *e3 = e2 + c->size, *e4 = e3 + c->size; |
| 177 | if (LIKELY (c->n == 5)) |
| 178 | { |
| 179 | CMP (e3, e4); |
| 180 | CMP (e2, e4); |
| 181 | } |
| 182 | CMP (e2, e3); |
| 183 | if (LIKELY (c->n == 5)) |
| 184 | { |
| 185 | CMP (e0, e3); |
| 186 | CMP (e1, e4); |
| 187 | } |
| 188 | CMP (e0, e2); |
| 189 | CMP (e1, e3); |
| 190 | CMP (e1, e2); |
| 191 | reorder45 (c, e0, e1, e2, e3, e4); |
| 192 | } |
| 193 | |
| 194 | /* Execute merge sort on N elements from IN, placing them into OUT, |
| 195 | using TMP as temporary storage if IN is equal to OUT. |
| 196 | This is a stable sort if netsort is used only for 2 or 3 elements. */ |
| 197 | template<typename sort_ctx> |
| 198 | static void |
| 199 | mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp) |
| 200 | { |
| 201 | if (LIKELY (n <= c->nlim)) |
| 202 | { |
| 203 | c->out = out; |
| 204 | c->n = n; |
| 205 | return netsort (in, c); |
| 206 | } |
| 207 | size_t nl = n / 2, nr = n - nl, sz = nl * c->size; |
| 208 | char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in; |
| 209 | /* Sort the right half, outputting to right half of OUT. */ |
| 210 | mergesort (mid, c, nr, r, tmp); |
| 211 | /* Sort the left half, leaving left half of OUT free. */ |
| 212 | mergesort (in, c, nl, l, mid); |
| 213 | /* Merge sorted halves given by L, R to [OUT, END). */ |
| 214 | #define MERGE_ELTSIZE(SIZE) \ |
| 215 | do { \ |
| 216 | intptr_t mr = c->cmp (r, l) >> 31; \ |
| 217 | intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ |
| 218 | lr = (intptr_t)l ^ (lr & mr); \ |
| 219 | out = (char *)memcpy (out, (char *)lr, SIZE); \ |
| 220 | out += SIZE; \ |
| 221 | r += mr & SIZE; \ |
| 222 | if (r == out) return; \ |
| 223 | l += ~mr & SIZE; \ |
| 224 | } while (r != end) |
| 225 | |
| 226 | if (LIKELY (c->cmp (r, l + (r - out) - c->size) < 0)) |
| 227 | { |
| 228 | char *end = out + n * c->size; |
| 229 | if (sizeof (size_t) == 8 && LIKELY (c->size == 8)) |
| 230 | MERGE_ELTSIZE (8); |
| 231 | else if (LIKELY (c->size == 4)) |
| 232 | MERGE_ELTSIZE (4); |
| 233 | else |
| 234 | MERGE_ELTSIZE (c->size); |
| 235 | } |
| 236 | memcpy (dest: out, src: l, n: r - out); |
| 237 | } |
| 238 | |
| 239 | #if CHECKING_P |
| 240 | /* Don't complain about cast from void* to function pointer. */ |
| 241 | #pragma GCC diagnostic push |
| 242 | #pragma GCC diagnostic ignored "-Wconditionally-supported" |
| 243 | |
| 244 | /* Adapter for using two-argument comparators in functions expecting the |
| 245 | three-argument sort_r_cmp_fn type. */ |
| 246 | static int |
| 247 | cmp2to3 (const void *a, const void *b, void *c) |
| 248 | { |
| 249 | return ((cmp_fn *)c) (a, b); |
| 250 | } |
| 251 | #endif |
| 252 | |
| 253 | /* Replacement for C qsort. */ |
| 254 | void |
| 255 | gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) |
| 256 | { |
| 257 | if (n < 2) |
| 258 | return; |
| 259 | size_t nlim = 5; |
| 260 | bool stable = (ssize_t) size < 0; |
| 261 | if (stable) |
| 262 | nlim = 3, size = ~size; |
| 263 | char *base = (char *)vbase; |
| 264 | sort_ctx c = {.cmp: cmp, .out: base, .n: n, .size: size, .nlim: nlim}; |
| 265 | long long scratch[32]; |
| 266 | size_t bufsz = (n / 2) * size; |
| 267 | void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); |
| 268 | mergesort (in: base, c: &c, n, out: base, tmp: (char *)buf); |
| 269 | if (buf != scratch) |
| 270 | free (ptr: buf); |
| 271 | #if CHECKING_P |
| 272 | qsort_chk (vbase, n, size, cmp2to3, (void*)cmp); |
| 273 | #pragma GCC diagnostic pop |
| 274 | #endif |
| 275 | } |
| 276 | |
| 277 | /* Substitute for Glibc qsort_r. */ |
| 278 | void |
| 279 | gcc_sort_r (void *vbase, size_t n, size_t size, sort_r_cmp_fn *cmp, void *data) |
| 280 | { |
| 281 | if (n < 2) |
| 282 | return; |
| 283 | size_t nlim = 5; |
| 284 | bool stable = (ssize_t) size < 0; |
| 285 | if (stable) |
| 286 | nlim = 3, size = ~size; |
| 287 | char *base = (char *)vbase; |
| 288 | sort_r_ctx c = {.data: data, .cmp_: cmp, .out: base, .n: n, .size: size, .nlim: nlim}; |
| 289 | long long scratch[32]; |
| 290 | size_t bufsz = (n / 2) * size; |
| 291 | void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); |
| 292 | mergesort (in: base, c: &c, n, out: base, tmp: (char *)buf); |
| 293 | if (buf != scratch) |
| 294 | free (ptr: buf); |
| 295 | #if CHECKING_P |
| 296 | qsort_chk (vbase, n, size, cmp, data); |
| 297 | #endif |
| 298 | } |
| 299 | |
| 300 | /* Stable sort, signature-compatible to C qsort. */ |
| 301 | void |
| 302 | gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp) |
| 303 | { |
| 304 | gcc_qsort (vbase, n, size: ~size, cmp); |
| 305 | } |
| 306 | |
| 307 | /* Stable sort, signature-compatible to Glibc qsort_r. */ |
| 308 | void |
| 309 | gcc_stablesort_r (void *vbase, size_t n, size_t size, sort_r_cmp_fn *cmp, |
| 310 | void *data) |
| 311 | { |
| 312 | gcc_sort_r (vbase, n, size: ~size, cmp, data); |
| 313 | } |
| 314 | |