1 | /* Copyright (C) 1991-2024 Free Software Foundation, Inc. |
---|---|

2 | This file is part of the GNU C Library. |

3 | |

4 | The GNU C Library is free software; you can redistribute it and/or |

5 | modify it under the terms of the GNU Lesser General Public |

6 | License as published by the Free Software Foundation; either |

7 | version 2.1 of the License, or (at your option) any later version. |

8 | |

9 | The GNU C Library is distributed in the hope that it will be useful, |

10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

12 | Lesser General Public License for more details. |

13 | |

14 | You should have received a copy of the GNU Lesser General Public |

15 | License along with the GNU C Library; if not, see |

16 | <https://www.gnu.org/licenses/>. */ |

17 | |

18 | /* If you consider tuning this algorithm, you should consult first: |

19 | Engineering a sort function; Jon Bentley and M. Douglas McIlroy; |

20 | Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ |

21 | |

22 | #include <errno.h> |

23 | #include <limits.h> |

24 | #include <memswap.h> |

25 | #include <stdlib.h> |

26 | #include <string.h> |

27 | #include <stdbool.h> |

28 | |

29 | /* Swap SIZE bytes between addresses A and B. These helpers are provided |

30 | along the generic one as an optimization. */ |

31 | |

32 | enum swap_type_t |

33 | { |

34 | SWAP_WORDS_64, |

35 | SWAP_WORDS_32, |

36 | SWAP_VOID_ARG, |

37 | SWAP_BYTES |

38 | }; |

39 | |

40 | typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; |

41 | typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; |

42 | |

43 | static inline void |

44 | swap_words_64 (void * restrict a, void * restrict b, size_t n) |

45 | { |

46 | do |

47 | { |

48 | n -= 8; |

49 | u64_alias_t t = *(u64_alias_t *)(a + n); |

50 | *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n); |

51 | *(u64_alias_t *)(b + n) = t; |

52 | } while (n); |

53 | } |

54 | |

55 | static inline void |

56 | swap_words_32 (void * restrict a, void * restrict b, size_t n) |

57 | { |

58 | do |

59 | { |

60 | n -= 4; |

61 | u32_alias_t t = *(u32_alias_t *)(a + n); |

62 | *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n); |

63 | *(u32_alias_t *)(b + n) = t; |

64 | } while (n); |

65 | } |

66 | |

67 | /* Replace the indirect call with a serie of if statements. It should help |

68 | the branch predictor. */ |

69 | static void |

70 | do_swap (void * restrict a, void * restrict b, size_t size, |

71 | enum swap_type_t swap_type) |

72 | { |

73 | if (swap_type == SWAP_WORDS_64) |

74 | swap_words_64 (a, b, n: size); |

75 | else if (swap_type == SWAP_WORDS_32) |

76 | swap_words_32 (a, b, n: size); |

77 | else |

78 | __memswap (p1: a, p2: b, n: size); |

79 | } |

80 | |

81 | /* Establish the heap condition at index K, that is, the key at K will |

82 | not be less than either of its children, at 2 * K + 1 and 2 * K + 2 |

83 | (if they exist). N is the last valid index. */ |

84 | static inline void |

85 | siftdown (void *base, size_t size, size_t k, size_t n, |

86 | enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) |

87 | { |

88 | /* There can only be a heap condition violation if there are |

89 | children. */ |

90 | while (2 * k + 1 <= n) |

91 | { |

92 | /* Left child. */ |

93 | size_t j = 2 * k + 1; |

94 | /* If the right child is larger, use it. */ |

95 | if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0) |

96 | j++; |

97 | |

98 | /* If k is already >= to its children, we are done. */ |

99 | if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0) |

100 | break; |

101 | |

102 | /* Heal the violation. */ |

103 | do_swap (a: base + (size * j), b: base + (k * size), size, swap_type); |

104 | |

105 | /* Swapping with j may have introduced a violation at j. Fix |

106 | it in the next loop iteration. */ |

107 | k = j; |

108 | } |

109 | } |

110 | |

111 | /* Establish the heap condition for the indices 0 to N (inclusive). */ |

112 | static inline void |

113 | heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, |

114 | __compar_d_fn_t cmp, void *arg) |

115 | { |

116 | /* If n is odd, k = n / 2 has a left child at n, so this is the |

117 | largest index that can have a heap condition violation regarding |

118 | its children. */ |

119 | size_t k = n / 2; |

120 | while (1) |

121 | { |

122 | siftdown (base, size, k, n, swap_type, cmp, arg); |

123 | if (k-- == 0) |

124 | break; |

125 | } |

126 | } |

127 | |

128 | static enum swap_type_t |

129 | get_swap_type (void *const pbase, size_t size) |

130 | { |

131 | if ((size & (sizeof (uint32_t) - 1)) == 0 |

132 | && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) |

133 | { |

134 | if (size == sizeof (uint32_t)) |

135 | return SWAP_WORDS_32; |

136 | else if (size == sizeof (uint64_t) |

137 | && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) |

138 | return SWAP_WORDS_64; |

139 | } |

140 | return SWAP_BYTES; |

141 | } |

142 | |

143 | |

144 | /* A non-recursive heapsort with worst-case performance of O(nlog n) and |

145 | worst-case space complexity of O(1). It sorts the array starting at |

146 | BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback |

147 | function used to swap elements, and CMP is the function used to compare |

148 | elements. */ |

149 | static void |

150 | heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) |

151 | { |

152 | if (n == 0) |

153 | return; |

154 | |

155 | enum swap_type_t swap_type = get_swap_type (pbase: base, size); |

156 | |

157 | /* Build the binary heap, largest value at the base[0]. */ |

158 | heapify (base, size, n, swap_type, cmp, arg); |

159 | |

160 | while (true) |

161 | { |

162 | /* Indices 0 .. n contain the binary heap. Extract the largest |

163 | element put it into the final position in the array. */ |

164 | do_swap (a: base, b: base + (n * size), size, swap_type); |

165 | |

166 | /* The heap is now one element shorter. */ |

167 | n--; |

168 | if (n == 0) |

169 | break; |

170 | |

171 | /* By swapping in elements 0 and the previous value of n (now at |

172 | n + 1), we likely introduced a heap condition violation. Fix |

173 | it for the reduced heap. */ |

174 | siftdown (base, size, k: 0, n, swap_type, cmp, arg); |

175 | } |

176 | } |

177 | |

178 | /* The maximum size in bytes required by mergesort that will be provided |

179 | through a buffer allocated in the stack. */ |

180 | #define QSORT_STACK_SIZE 1024 |

181 | |

182 | /* Elements larger than this value will be sorted through indirect sorting |

183 | to minimize the need to memory swap calls. */ |

184 | #define INDIRECT_SORT_SIZE_THRES 32 |

185 | |

186 | struct msort_param |

187 | { |

188 | size_t s; |

189 | enum swap_type_t var; |

190 | __compar_d_fn_t cmp; |

191 | void *arg; |

192 | char *t; |

193 | }; |

194 | |

195 | static void |

196 | msort_with_tmp (const struct msort_param *p, void *b, size_t n) |

197 | { |

198 | char *b1, *b2; |

199 | size_t n1, n2; |

200 | |

201 | if (n <= 1) |

202 | return; |

203 | |

204 | n1 = n / 2; |

205 | n2 = n - n1; |

206 | b1 = b; |

207 | b2 = (char *) b + (n1 * p->s); |

208 | |

209 | msort_with_tmp (p, b: b1, n: n1); |

210 | msort_with_tmp (p, b: b2, n: n2); |

211 | |

212 | char *tmp = p->t; |

213 | const size_t s = p->s; |

214 | __compar_d_fn_t cmp = p->cmp; |

215 | void *arg = p->arg; |

216 | switch (p->var) |

217 | { |

218 | case SWAP_WORDS_32: |

219 | while (n1 > 0 && n2 > 0) |

220 | { |

221 | if (cmp (b1, b2, arg) <= 0) |

222 | { |

223 | *(u32_alias_t *) tmp = *(u32_alias_t *) b1; |

224 | b1 += sizeof (u32_alias_t); |

225 | --n1; |

226 | } |

227 | else |

228 | { |

229 | *(u32_alias_t *) tmp = *(u32_alias_t *) b2; |

230 | b2 += sizeof (u32_alias_t); |

231 | --n2; |

232 | } |

233 | tmp += sizeof (u32_alias_t); |

234 | } |

235 | break; |

236 | case SWAP_WORDS_64: |

237 | while (n1 > 0 && n2 > 0) |

238 | { |

239 | if (cmp (b1, b2, arg) <= 0) |

240 | { |

241 | *(u64_alias_t *) tmp = *(u64_alias_t *) b1; |

242 | b1 += sizeof (u64_alias_t); |

243 | --n1; |

244 | } |

245 | else |

246 | { |

247 | *(u64_alias_t *) tmp = *(u64_alias_t *) b2; |

248 | b2 += sizeof (u64_alias_t); |

249 | --n2; |

250 | } |

251 | tmp += sizeof (u64_alias_t); |

252 | } |

253 | break; |

254 | case SWAP_VOID_ARG: |

255 | while (n1 > 0 && n2 > 0) |

256 | { |

257 | if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) |

258 | { |

259 | *(void **) tmp = *(void **) b1; |

260 | b1 += sizeof (void *); |

261 | --n1; |

262 | } |

263 | else |

264 | { |

265 | *(void **) tmp = *(void **) b2; |

266 | b2 += sizeof (void *); |

267 | --n2; |

268 | } |

269 | tmp += sizeof (void *); |

270 | } |

271 | break; |

272 | default: |

273 | while (n1 > 0 && n2 > 0) |

274 | { |

275 | if (cmp (b1, b2, arg) <= 0) |

276 | { |

277 | tmp = (char *) __mempcpy (tmp, b1, s); |

278 | b1 += s; |

279 | --n1; |

280 | } |

281 | else |

282 | { |

283 | tmp = (char *) __mempcpy (tmp, b2, s); |

284 | b2 += s; |

285 | --n2; |

286 | } |

287 | } |

288 | break; |

289 | } |

290 | |

291 | if (n1 > 0) |

292 | memcpy (tmp, b1, n1 * s); |

293 | memcpy (b, p->t, (n - n2) * s); |

294 | } |

295 | |

296 | static void |

297 | __attribute_used__ |

298 | indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, |

299 | size_t s) |

300 | { |

301 | /* Indirect sorting. */ |

302 | char *ip = (char *) b; |

303 | void **tp = (void **) (p->t + n * sizeof (void *)); |

304 | void **t = tp; |

305 | void *tmp_storage = (void *) (tp + n); |

306 | |

307 | while ((void *) t < tmp_storage) |

308 | { |

309 | *t++ = ip; |

310 | ip += s; |

311 | } |

312 | msort_with_tmp (p, b: p->t + n * sizeof (void *), n); |

313 | |

314 | /* tp[0] .. tp[n - 1] is now sorted, copy around entries of |

315 | the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ |

316 | char *kp; |

317 | size_t i; |

318 | for (i = 0, ip = (char *) b; i < n; i++, ip += s) |

319 | if ((kp = tp[i]) != ip) |

320 | { |

321 | size_t j = i; |

322 | char *jp = ip; |

323 | memcpy (tmp_storage, ip, s); |

324 | |

325 | do |

326 | { |

327 | size_t k = (kp - (char *) b) / s; |

328 | tp[j] = jp; |

329 | memcpy (jp, kp, s); |

330 | j = k; |

331 | jp = kp; |

332 | kp = tp[k]; |

333 | } |

334 | while (kp != ip); |

335 | |

336 | tp[j] = jp; |

337 | memcpy (jp, tmp_storage, s); |

338 | } |

339 | } |

340 | |

341 | void |

342 | __qsort_r (void *const pbase, size_t total_elems, size_t size, |

343 | __compar_d_fn_t cmp, void *arg) |

344 | { |

345 | if (total_elems <= 1) |

346 | return; |

347 | |

348 | /* Align to the maximum size used by the swap optimization. */ |

349 | _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; |

350 | size_t total_size = total_elems * size; |

351 | char *buf; |

352 | |

353 | if (size > INDIRECT_SORT_SIZE_THRES) |

354 | total_size = 2 * total_elems * sizeof (void *) + size; |

355 | |

356 | if (total_size <= sizeof tmp) |

357 | buf = tmp; |

358 | else |

359 | { |

360 | int save = errno; |

361 | buf = malloc (size: total_size); |

362 | __set_errno (save); |

363 | if (buf == NULL) |

364 | { |

365 | /* Fallback to heapsort in case of memory failure. */ |

366 | heapsort_r (base: pbase, n: total_elems - 1, size, cmp, arg); |

367 | return; |

368 | } |

369 | } |

370 | |

371 | if (size > INDIRECT_SORT_SIZE_THRES) |

372 | { |

373 | const struct msort_param msort_param = |

374 | { |

375 | .s = sizeof (void *), |

376 | .cmp = cmp, |

377 | .arg = arg, |

378 | .var = SWAP_VOID_ARG, |

379 | .t = buf, |

380 | }; |

381 | indirect_msort_with_tmp (p: &msort_param, b: pbase, n: total_elems, s: size); |

382 | } |

383 | else |

384 | { |

385 | const struct msort_param msort_param = |

386 | { |

387 | .s = size, |

388 | .cmp = cmp, |

389 | .arg = arg, |

390 | .var = get_swap_type (pbase, size), |

391 | .t = buf, |

392 | }; |

393 | msort_with_tmp (p: &msort_param, b: pbase, n: total_elems); |

394 | } |

395 | |

396 | if (buf != tmp) |

397 | free (ptr: buf); |

398 | } |

399 | libc_hidden_def (__qsort_r) |

400 | weak_alias (__qsort_r, qsort_r) |

401 | |

402 | void |

403 | qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) |

404 | { |

405 | return __qsort_r (pbase: b, total_elems: n, size: s, cmp: (__compar_d_fn_t) cmp, NULL); |

406 | } |

407 | libc_hidden_def (qsort) |

408 |