1 | /* memcmp - compare memory |
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | */ |
7 | |
8 | /* Assumptions: |
9 | * |
10 | * ARMv8-a, AArch64, unaligned accesses. |
11 | */ |
12 | |
13 | #include "../asmdefs.h" |
14 | |
15 | /* Parameters and result. */ |
16 | #define src1 x0 |
17 | #define src2 x1 |
18 | #define limit x2 |
19 | #define result w0 |
20 | |
21 | /* Internal variables. */ |
22 | #define data1 x3 |
23 | #define data1w w3 |
24 | #define data1h x4 |
25 | #define data2 x5 |
26 | #define data2w w5 |
27 | #define data2h x6 |
28 | #define tmp1 x7 |
29 | #define tmp2 x8 |
30 | |
31 | ENTRY (__memcmp_aarch64) |
32 | subs limit, limit, 8 |
33 | b.lo L(less8) |
34 | |
35 | ldr data1, [src1], 8 |
36 | ldr data2, [src2], 8 |
37 | cmp data1, data2 |
38 | b.ne L(return) |
39 | |
40 | subs limit, limit, 8 |
41 | b.gt L(more16) |
42 | |
43 | ldr data1, [src1, limit] |
44 | ldr data2, [src2, limit] |
45 | b L(return) |
46 | |
47 | L(more16): |
48 | ldr data1, [src1], 8 |
49 | ldr data2, [src2], 8 |
50 | cmp data1, data2 |
51 | bne L(return) |
52 | |
53 | /* Jump directly to comparing the last 16 bytes for 32 byte (or less) |
54 | strings. */ |
55 | subs limit, limit, 16 |
56 | b.ls L(last_bytes) |
57 | |
58 | /* We overlap loads between 0-32 bytes at either side of SRC1 when we |
59 | try to align, so limit it only to strings larger than 128 bytes. */ |
60 | cmp limit, 96 |
61 | b.ls L(loop16) |
62 | |
63 | /* Align src1 and adjust src2 with bytes not yet done. */ |
64 | and tmp1, src1, 15 |
65 | add limit, limit, tmp1 |
66 | sub src1, src1, tmp1 |
67 | sub src2, src2, tmp1 |
68 | |
69 | /* Loop performing 16 bytes per iteration using aligned src1. |
70 | Limit is pre-decremented by 16 and must be larger than zero. |
71 | Exit if <= 16 bytes left to do or if the data is not equal. */ |
72 | .p2align 4 |
73 | L(loop16): |
74 | ldp data1, data1h, [src1], 16 |
75 | ldp data2, data2h, [src2], 16 |
76 | subs limit, limit, 16 |
77 | ccmp data1, data2, 0, hi |
78 | ccmp data1h, data2h, 0, eq |
79 | b.eq L(loop16) |
80 | |
81 | cmp data1, data2 |
82 | bne L(return) |
83 | mov data1, data1h |
84 | mov data2, data2h |
85 | cmp data1, data2 |
86 | bne L(return) |
87 | |
88 | /* Compare last 1-16 bytes using unaligned access. */ |
89 | L(last_bytes): |
90 | add src1, src1, limit |
91 | add src2, src2, limit |
92 | ldp data1, data1h, [src1] |
93 | ldp data2, data2h, [src2] |
94 | cmp data1, data2 |
95 | bne L(return) |
96 | mov data1, data1h |
97 | mov data2, data2h |
98 | cmp data1, data2 |
99 | |
100 | /* Compare data bytes and set return value to 0, -1 or 1. */ |
101 | L(return): |
102 | #ifndef __AARCH64EB__ |
103 | rev data1, data1 |
104 | rev data2, data2 |
105 | #endif |
106 | cmp data1, data2 |
107 | L(ret_eq): |
108 | cset result, ne |
109 | cneg result, result, lo |
110 | ret |
111 | |
112 | .p2align 4 |
113 | /* Compare up to 8 bytes. Limit is [-8..-1]. */ |
114 | L(less8): |
115 | adds limit, limit, 4 |
116 | b.lo L(less4) |
117 | ldr data1w, [src1], 4 |
118 | ldr data2w, [src2], 4 |
119 | cmp data1w, data2w |
120 | b.ne L(return) |
121 | sub limit, limit, 4 |
122 | L(less4): |
123 | adds limit, limit, 4 |
124 | beq L(ret_eq) |
125 | L(byte_loop): |
126 | ldrb data1w, [src1], 1 |
127 | ldrb data2w, [src2], 1 |
128 | subs limit, limit, 1 |
129 | ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ |
130 | b.eq L(byte_loop) |
131 | sub result, data1w, data2w |
132 | ret |
133 | |
134 | END (__memcmp_aarch64) |
135 | |