1 | /* Platform-independent deterministic sort function. |
2 | Copyright (C) 2018-2023 Free Software Foundation, Inc. |
3 | Contributed by Alexander Monakov. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it |
8 | under the terms of the GNU General Public License as published by the |
9 | Free Software Foundation; either version 3, or (at your option) any |
10 | later version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* This implements a sort function suitable for GCC use cases: |
22 | - signature-compatible to C qsort, but relaxed contract: |
23 | - may apply the comparator to elements in a temporary buffer |
24 | - may abort on allocation failure |
25 | - deterministic (but not necessarily stable) |
26 | - fast, especially for common cases (0-5 elements of size 8 or 4) |
27 | |
28 | The implementation uses a network sort for up to 5 elements and |
29 | a merge sort on top of that. Neither stage has branches depending on |
30 | comparator result, trading extra arithmetic for branch mispredictions. */ |
31 | |
32 | #ifdef GENERATOR_FILE |
33 | #include "bconfig.h" |
34 | #else |
35 | #include "config.h" |
36 | #endif |
37 | |
38 | #include "system.h" |
39 | |
40 | #ifdef __GNUC__ |
41 | #define noinline __attribute__ ((__noinline__)) |
42 | #else |
43 | #define noinline |
44 | #endif |
45 | |
46 | /* C-style qsort comparator function type. */ |
47 | typedef int cmp_fn (const void *, const void *); |
48 | |
49 | /* Structure holding read-mostly (read-only in netsort) context. */ |
50 | struct sort_ctx |
51 | { |
52 | cmp_fn *cmp; // pointer to comparator |
53 | char *out; // output buffer |
54 | size_t n; // number of elements |
55 | size_t size; // element size |
56 | size_t nlim; // limit for network sort |
57 | }; |
58 | |
59 | /* Like sort_ctx, but for use with qsort_r-style comparators. Several |
60 | functions in this file are templates that work with either context type. */ |
61 | struct sort_r_ctx |
62 | { |
63 | void *data; |
64 | sort_r_cmp_fn *cmp_; |
65 | char *out; |
66 | size_t n; |
67 | size_t size; |
68 | size_t nlim; |
69 | int cmp (const void *a, const void *b) |
70 | { |
71 | return cmp_ (a, b, data); |
72 | } |
73 | }; |
74 | |
75 | /* Helper for netsort. Permute, possibly in-place, 2 or 3 elements, |
76 | placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */ |
77 | template<typename sort_ctx> |
78 | static void |
79 | reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) |
80 | { |
81 | #define REORDER_23(TYPE, STRIDE, OFFSET) \ |
82 | do { \ |
83 | TYPE t0, t1; \ |
84 | memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ |
85 | memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ |
86 | char *out = c->out + OFFSET; \ |
87 | if (LIKELY (c->n == 3)) \ |
88 | memmove (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE));\ |
89 | memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ |
90 | memcpy (out, &t1, sizeof (TYPE)); \ |
91 | } while (0) |
92 | |
93 | if (LIKELY (c->size == sizeof (size_t))) |
94 | REORDER_23 (size_t, sizeof (size_t), 0); |
95 | else if (LIKELY (c->size == sizeof (int))) |
96 | REORDER_23 (int, sizeof (int), 0); |
97 | else |
98 | { |
99 | size_t offset = 0, step = sizeof (size_t); |
100 | for (; offset + step <= c->size; offset += step) |
101 | REORDER_23 (size_t, c->size, offset); |
102 | for (; offset < c->size; offset++) |
103 | REORDER_23 (char, c->size, offset); |
104 | } |
105 | } |
106 | |
107 | /* Like reorder23, but permute 4 or 5 elements. */ |
108 | template<typename sort_ctx> |
109 | static void |
110 | reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4) |
111 | { |
112 | #define REORDER_45(TYPE, STRIDE, OFFSET) \ |
113 | do { \ |
114 | TYPE t0, t1, t2, t3; \ |
115 | memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ |
116 | memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ |
117 | memcpy (&t2, e2 + OFFSET, sizeof (TYPE)); \ |
118 | memcpy (&t3, e3 + OFFSET, sizeof (TYPE)); \ |
119 | char *out = c->out + OFFSET; \ |
120 | if (LIKELY (c->n == 5)) \ |
121 | memmove (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE));\ |
122 | memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ |
123 | memcpy (out, &t1, sizeof (TYPE)); out += STRIDE; \ |
124 | memcpy (out, &t2, sizeof (TYPE)); out += STRIDE; \ |
125 | memcpy (out, &t3, sizeof (TYPE)); \ |
126 | } while (0) |
127 | |
128 | if (LIKELY (c->size == sizeof (size_t))) |
129 | REORDER_45 (size_t, sizeof (size_t), 0); |
130 | else if (LIKELY (c->size == sizeof (int))) |
131 | REORDER_45 (int, sizeof (int), 0); |
132 | else |
133 | { |
134 | size_t offset = 0, step = sizeof (size_t); |
135 | for (; offset + step <= c->size; offset += step) |
136 | REORDER_45 (size_t, c->size, offset); |
137 | for (; offset < c->size; offset++) |
138 | REORDER_45 (char, c->size, offset); |
139 | } |
140 | } |
141 | |
142 | /* Helper for netsort. Invoke comparator CMP on E0 and E1. |
143 | Return E0^E1 if E0 compares less than E1, zero otherwise. |
144 | This is noinline to avoid code growth and confine invocation |
145 | to a single call site, assisting indirect branch prediction. */ |
146 | template<typename sort_ctx> |
147 | noinline static intptr_t |
148 | cmp1 (char *e0, char *e1, sort_ctx *c) |
149 | { |
150 | intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; |
151 | return x & (c->cmp (e0, e1) >> 31); |
152 | } |
153 | |
154 | /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT. |
155 | IN may be equal to C->OUT, in which case elements are sorted in place. */ |
156 | template<typename sort_ctx> |
157 | static void |
158 | netsort (char *in, sort_ctx *c) |
159 | { |
160 | #define CMP(e0, e1) \ |
161 | do { \ |
162 | intptr_t x = cmp1 (e1, e0, c); \ |
163 | e0 = (char *)((intptr_t)e0 ^ x); \ |
164 | e1 = (char *)((intptr_t)e1 ^ x); \ |
165 | } while (0) |
166 | |
167 | char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size; |
168 | CMP (e0, e1); |
169 | if (LIKELY (c->n == 3)) |
170 | { |
171 | CMP (e1, e2); |
172 | CMP (e0, e1); |
173 | } |
174 | if (c->n <= 3) |
175 | return reorder23 (c, e0, e1, e2); |
176 | char *e3 = e2 + c->size, *e4 = e3 + c->size; |
177 | if (LIKELY (c->n == 5)) |
178 | { |
179 | CMP (e3, e4); |
180 | CMP (e2, e4); |
181 | } |
182 | CMP (e2, e3); |
183 | if (LIKELY (c->n == 5)) |
184 | { |
185 | CMP (e0, e3); |
186 | CMP (e1, e4); |
187 | } |
188 | CMP (e0, e2); |
189 | CMP (e1, e3); |
190 | CMP (e1, e2); |
191 | reorder45 (c, e0, e1, e2, e3, e4); |
192 | } |
193 | |
194 | /* Execute merge sort on N elements from IN, placing them into OUT, |
195 | using TMP as temporary storage if IN is equal to OUT. |
196 | This is a stable sort if netsort is used only for 2 or 3 elements. */ |
197 | template<typename sort_ctx> |
198 | static void |
199 | mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp) |
200 | { |
201 | if (LIKELY (n <= c->nlim)) |
202 | { |
203 | c->out = out; |
204 | c->n = n; |
205 | return netsort (in, c); |
206 | } |
207 | size_t nl = n / 2, nr = n - nl, sz = nl * c->size; |
208 | char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in; |
209 | /* Sort the right half, outputting to right half of OUT. */ |
210 | mergesort (mid, c, nr, r, tmp); |
211 | /* Sort the left half, leaving left half of OUT free. */ |
212 | mergesort (in, c, nl, l, mid); |
213 | /* Merge sorted halves given by L, R to [OUT, END). */ |
214 | #define MERGE_ELTSIZE(SIZE) \ |
215 | do { \ |
216 | intptr_t mr = c->cmp (r, l) >> 31; \ |
217 | intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ |
218 | lr = (intptr_t)l ^ (lr & mr); \ |
219 | out = (char *)memcpy (out, (char *)lr, SIZE); \ |
220 | out += SIZE; \ |
221 | r += mr & SIZE; \ |
222 | if (r == out) return; \ |
223 | l += ~mr & SIZE; \ |
224 | } while (r != end) |
225 | |
226 | if (LIKELY (c->cmp (r, l + (r - out) - c->size) < 0)) |
227 | { |
228 | char *end = out + n * c->size; |
229 | if (sizeof (size_t) == 8 && LIKELY (c->size == 8)) |
230 | MERGE_ELTSIZE (8); |
231 | else if (LIKELY (c->size == 4)) |
232 | MERGE_ELTSIZE (4); |
233 | else |
234 | MERGE_ELTSIZE (c->size); |
235 | } |
236 | memcpy (dest: out, src: l, n: r - out); |
237 | } |
238 | |
239 | #if CHECKING_P |
240 | /* Don't complain about cast from void* to function pointer. */ |
241 | #pragma GCC diagnostic push |
242 | #pragma GCC diagnostic ignored "-Wconditionally-supported" |
243 | |
244 | /* Adapter for using two-argument comparators in functions expecting the |
245 | three-argument sort_r_cmp_fn type. */ |
246 | static int |
247 | cmp2to3 (const void *a, const void *b, void *c) |
248 | { |
249 | return ((cmp_fn *)c) (a, b); |
250 | } |
251 | #endif |
252 | |
253 | /* Replacement for C qsort. */ |
254 | void |
255 | gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) |
256 | { |
257 | if (n < 2) |
258 | return; |
259 | size_t nlim = 5; |
260 | bool stable = (ssize_t) size < 0; |
261 | if (stable) |
262 | nlim = 3, size = ~size; |
263 | char *base = (char *)vbase; |
264 | sort_ctx c = {.cmp: cmp, .out: base, .n: n, .size: size, .nlim: nlim}; |
265 | long long scratch[32]; |
266 | size_t bufsz = (n / 2) * size; |
267 | void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); |
268 | mergesort (in: base, c: &c, n, out: base, tmp: (char *)buf); |
269 | if (buf != scratch) |
270 | free (ptr: buf); |
271 | #if CHECKING_P |
272 | qsort_chk (vbase, n, size, cmp2to3, (void*)cmp); |
273 | #pragma GCC diagnostic pop |
274 | #endif |
275 | } |
276 | |
277 | /* Substitute for Glibc qsort_r. */ |
278 | void |
279 | gcc_sort_r (void *vbase, size_t n, size_t size, sort_r_cmp_fn *cmp, void *data) |
280 | { |
281 | if (n < 2) |
282 | return; |
283 | size_t nlim = 5; |
284 | bool stable = (ssize_t) size < 0; |
285 | if (stable) |
286 | nlim = 3, size = ~size; |
287 | char *base = (char *)vbase; |
288 | sort_r_ctx c = {.data: data, .cmp_: cmp, .out: base, .n: n, .size: size, .nlim: nlim}; |
289 | long long scratch[32]; |
290 | size_t bufsz = (n / 2) * size; |
291 | void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); |
292 | mergesort (in: base, c: &c, n, out: base, tmp: (char *)buf); |
293 | if (buf != scratch) |
294 | free (ptr: buf); |
295 | #if CHECKING_P |
296 | qsort_chk (vbase, n, size, cmp, data); |
297 | #endif |
298 | } |
299 | |
300 | /* Stable sort, signature-compatible to C qsort. */ |
301 | void |
302 | gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp) |
303 | { |
304 | gcc_qsort (vbase, n, size: ~size, cmp); |
305 | } |
306 | |
307 | /* Stable sort, signature-compatible to Glibc qsort_r. */ |
308 | void |
309 | gcc_stablesort_r (void *vbase, size_t n, size_t size, sort_r_cmp_fn *cmp, |
310 | void *data) |
311 | { |
312 | gcc_sort_r (vbase, n, size: ~size, cmp, data); |
313 | } |
314 | |