1 | /* Copyright (C) 1991-2024 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | Contributed by Torbjorn Granlund (tege@sics.se). |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifdef HAVE_CONFIG_H |
20 | # include "config.h" |
21 | #endif |
22 | |
23 | #if defined HAVE_STRING_H || defined _LIBC |
24 | # include <string.h> |
25 | #endif |
26 | |
27 | #undef memcmp |
28 | |
29 | #ifndef MEMCMP |
30 | # define MEMCMP memcmp |
31 | #endif |
32 | |
33 | #ifdef _LIBC |
34 | |
35 | # include <memcopy.h> |
36 | # include <endian.h> |
37 | |
38 | # if __BYTE_ORDER == __BIG_ENDIAN |
39 | # define WORDS_BIGENDIAN |
40 | # endif |
41 | |
42 | #else /* Not in the GNU C library. */ |
43 | |
44 | # include <sys/types.h> |
45 | |
46 | /* Type to use for aligned memory operations. |
47 | This should normally be the biggest type supported by a single load |
48 | and store. Must be an unsigned type. */ |
49 | # define OPSIZ (sizeof (op_t)) |
50 | |
51 | /* Type to use for unaligned operations. */ |
52 | typedef unsigned char byte; |
53 | |
54 | # ifndef WORDS_BIGENDIAN |
55 | # define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) |
56 | # else |
57 | # define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) |
58 | # endif |
59 | |
60 | #endif /* In the GNU C library. */ |
61 | |
62 | #ifdef WORDS_BIGENDIAN |
63 | # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) |
64 | #else |
65 | # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) |
66 | #endif |
67 | |
68 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ |
69 | |
70 | /* The strategy of this memcmp is: |
71 | |
72 | 1. Compare bytes until one of the block pointers is aligned. |
73 | |
74 | 2. Compare using memcmp_common_alignment or |
75 | memcmp_not_common_alignment, regarding the alignment of the other |
76 | block after the initial byte operations. The maximum number of |
77 | full words (of type op_t) are compared in this way. |
78 | |
79 | 3. Compare the few remaining bytes. */ |
80 | |
81 | #ifndef WORDS_BIGENDIAN |
82 | /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. |
83 | A and B are known to be different. |
84 | This is needed only on little-endian machines. */ |
85 | |
86 | static int memcmp_bytes (op_t, op_t) __THROW; |
87 | |
88 | static int |
89 | memcmp_bytes (op_t a, op_t b) |
90 | { |
91 | long int srcp1 = (long int) &a; |
92 | long int srcp2 = (long int) &b; |
93 | op_t a0, b0; |
94 | |
95 | do |
96 | { |
97 | a0 = ((byte *) srcp1)[0]; |
98 | b0 = ((byte *) srcp2)[0]; |
99 | srcp1 += 1; |
100 | srcp2 += 1; |
101 | } |
102 | while (a0 == b0); |
103 | return a0 - b0; |
104 | } |
105 | #endif |
106 | |
107 | static int memcmp_common_alignment (long, long, size_t) __THROW; |
108 | |
109 | /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' |
110 | objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for |
111 | memory operations on `op_t's. */ |
112 | static int |
113 | memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) |
114 | { |
115 | op_t a0, a1; |
116 | op_t b0, b1; |
117 | |
118 | switch (len % 4) |
119 | { |
120 | default: /* Avoid warning about uninitialized local variables. */ |
121 | case 2: |
122 | a0 = ((op_t *) srcp1)[0]; |
123 | b0 = ((op_t *) srcp2)[0]; |
124 | srcp1 -= 2 * OPSIZ; |
125 | srcp2 -= 2 * OPSIZ; |
126 | len += 2; |
127 | goto do1; |
128 | case 3: |
129 | a1 = ((op_t *) srcp1)[0]; |
130 | b1 = ((op_t *) srcp2)[0]; |
131 | srcp1 -= OPSIZ; |
132 | srcp2 -= OPSIZ; |
133 | len += 1; |
134 | goto do2; |
135 | case 0: |
136 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
137 | return 0; |
138 | a0 = ((op_t *) srcp1)[0]; |
139 | b0 = ((op_t *) srcp2)[0]; |
140 | goto do3; |
141 | case 1: |
142 | a1 = ((op_t *) srcp1)[0]; |
143 | b1 = ((op_t *) srcp2)[0]; |
144 | srcp1 += OPSIZ; |
145 | srcp2 += OPSIZ; |
146 | len -= 1; |
147 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
148 | goto do0; |
149 | /* Fall through. */ |
150 | } |
151 | |
152 | do |
153 | { |
154 | a0 = ((op_t *) srcp1)[0]; |
155 | b0 = ((op_t *) srcp2)[0]; |
156 | if (a1 != b1) |
157 | return CMP_LT_OR_GT (a1, b1); |
158 | |
159 | do3: |
160 | a1 = ((op_t *) srcp1)[1]; |
161 | b1 = ((op_t *) srcp2)[1]; |
162 | if (a0 != b0) |
163 | return CMP_LT_OR_GT (a0, b0); |
164 | |
165 | do2: |
166 | a0 = ((op_t *) srcp1)[2]; |
167 | b0 = ((op_t *) srcp2)[2]; |
168 | if (a1 != b1) |
169 | return CMP_LT_OR_GT (a1, b1); |
170 | |
171 | do1: |
172 | a1 = ((op_t *) srcp1)[3]; |
173 | b1 = ((op_t *) srcp2)[3]; |
174 | if (a0 != b0) |
175 | return CMP_LT_OR_GT (a0, b0); |
176 | |
177 | srcp1 += 4 * OPSIZ; |
178 | srcp2 += 4 * OPSIZ; |
179 | len -= 4; |
180 | } |
181 | while (len != 0); |
182 | |
183 | /* This is the right position for do0. Please don't move |
184 | it into the loop. */ |
185 | do0: |
186 | if (a1 != b1) |
187 | return CMP_LT_OR_GT (a1, b1); |
188 | return 0; |
189 | } |
190 | |
191 | static int memcmp_not_common_alignment (long, long, size_t) __THROW; |
192 | |
193 | /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN |
194 | `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory |
195 | operations on `op_t', but SRCP1 *should be unaligned*. */ |
196 | static int |
197 | memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len) |
198 | { |
199 | op_t a0, a1, a2, a3; |
200 | op_t b0, b1, b2, b3; |
201 | op_t x; |
202 | int shl, shr; |
203 | |
204 | /* Calculate how to shift a word read at the memory operation |
205 | aligned srcp1 to make it aligned for comparison. */ |
206 | |
207 | shl = 8 * (srcp1 % OPSIZ); |
208 | shr = 8 * OPSIZ - shl; |
209 | |
210 | /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' |
211 | it points in the middle of. */ |
212 | srcp1 &= -OPSIZ; |
213 | |
214 | switch (len % 4) |
215 | { |
216 | default: /* Avoid warning about uninitialized local variables. */ |
217 | case 2: |
218 | a1 = ((op_t *) srcp1)[0]; |
219 | a2 = ((op_t *) srcp1)[1]; |
220 | b2 = ((op_t *) srcp2)[0]; |
221 | srcp1 -= 1 * OPSIZ; |
222 | srcp2 -= 2 * OPSIZ; |
223 | len += 2; |
224 | goto do1; |
225 | case 3: |
226 | a0 = ((op_t *) srcp1)[0]; |
227 | a1 = ((op_t *) srcp1)[1]; |
228 | b1 = ((op_t *) srcp2)[0]; |
229 | srcp2 -= 1 * OPSIZ; |
230 | len += 1; |
231 | goto do2; |
232 | case 0: |
233 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
234 | return 0; |
235 | a3 = ((op_t *) srcp1)[0]; |
236 | a0 = ((op_t *) srcp1)[1]; |
237 | b0 = ((op_t *) srcp2)[0]; |
238 | srcp1 += 1 * OPSIZ; |
239 | goto do3; |
240 | case 1: |
241 | a2 = ((op_t *) srcp1)[0]; |
242 | a3 = ((op_t *) srcp1)[1]; |
243 | b3 = ((op_t *) srcp2)[0]; |
244 | srcp1 += 2 * OPSIZ; |
245 | srcp2 += 1 * OPSIZ; |
246 | len -= 1; |
247 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
248 | goto do0; |
249 | /* Fall through. */ |
250 | } |
251 | |
252 | do |
253 | { |
254 | a0 = ((op_t *) srcp1)[0]; |
255 | b0 = ((op_t *) srcp2)[0]; |
256 | x = MERGE(a2, shl, a3, shr); |
257 | if (x != b3) |
258 | return CMP_LT_OR_GT (x, b3); |
259 | |
260 | do3: |
261 | a1 = ((op_t *) srcp1)[1]; |
262 | b1 = ((op_t *) srcp2)[1]; |
263 | x = MERGE(a3, shl, a0, shr); |
264 | if (x != b0) |
265 | return CMP_LT_OR_GT (x, b0); |
266 | |
267 | do2: |
268 | a2 = ((op_t *) srcp1)[2]; |
269 | b2 = ((op_t *) srcp2)[2]; |
270 | x = MERGE(a0, shl, a1, shr); |
271 | if (x != b1) |
272 | return CMP_LT_OR_GT (x, b1); |
273 | |
274 | do1: |
275 | a3 = ((op_t *) srcp1)[3]; |
276 | b3 = ((op_t *) srcp2)[3]; |
277 | x = MERGE(a1, shl, a2, shr); |
278 | if (x != b2) |
279 | return CMP_LT_OR_GT (x, b2); |
280 | |
281 | srcp1 += 4 * OPSIZ; |
282 | srcp2 += 4 * OPSIZ; |
283 | len -= 4; |
284 | } |
285 | while (len != 0); |
286 | |
287 | /* This is the right position for do0. Please don't move |
288 | it into the loop. */ |
289 | do0: |
290 | x = MERGE(a2, shl, a3, shr); |
291 | if (x != b3) |
292 | return CMP_LT_OR_GT (x, b3); |
293 | return 0; |
294 | } |
295 | |
296 | int |
297 | MEMCMP (const void *s1, const void *s2, size_t len) |
298 | { |
299 | op_t a0; |
300 | op_t b0; |
301 | long int srcp1 = (long int) s1; |
302 | long int srcp2 = (long int) s2; |
303 | op_t res; |
304 | |
305 | if (len >= OP_T_THRES) |
306 | { |
307 | /* There are at least some bytes to compare. No need to test |
308 | for LEN == 0 in this alignment loop. */ |
309 | while (srcp2 % OPSIZ != 0) |
310 | { |
311 | a0 = ((byte *) srcp1)[0]; |
312 | b0 = ((byte *) srcp2)[0]; |
313 | srcp1 += 1; |
314 | srcp2 += 1; |
315 | res = a0 - b0; |
316 | if (res != 0) |
317 | return res; |
318 | len -= 1; |
319 | } |
320 | |
321 | /* SRCP2 is now aligned for memory operations on `op_t'. |
322 | SRCP1 alignment determines if we can do a simple, |
323 | aligned compare or need to shuffle bits. */ |
324 | |
325 | if (srcp1 % OPSIZ == 0) |
326 | res = memcmp_common_alignment (srcp1, srcp2, len: len / OPSIZ); |
327 | else |
328 | res = memcmp_not_common_alignment (srcp1, srcp2, len: len / OPSIZ); |
329 | if (res != 0) |
330 | return res; |
331 | |
332 | /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ |
333 | srcp1 += len & -OPSIZ; |
334 | srcp2 += len & -OPSIZ; |
335 | len %= OPSIZ; |
336 | } |
337 | |
338 | /* There are just a few bytes to compare. Use byte memory operations. */ |
339 | while (len != 0) |
340 | { |
341 | a0 = ((byte *) srcp1)[0]; |
342 | b0 = ((byte *) srcp2)[0]; |
343 | srcp1 += 1; |
344 | srcp2 += 1; |
345 | res = a0 - b0; |
346 | if (res != 0) |
347 | return res; |
348 | len -= 1; |
349 | } |
350 | |
351 | return 0; |
352 | } |
353 | libc_hidden_builtin_def(memcmp) |
354 | #ifdef weak_alias |
355 | # undef bcmp |
356 | weak_alias (memcmp, bcmp) |
357 | #endif |
358 | |
359 | #undef __memcmpeq |
360 | strong_alias (memcmp, __memcmpeq) |
361 | libc_hidden_def(__memcmpeq) |
362 | |