1/* memcmp - compare memory
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 */
7
8/* Assumptions:
9 *
10 * ARMv8-a, AArch64, unaligned accesses.
11 */
12
13#include "../asmdefs.h"
14
15/* Parameters and result. */
16#define src1 x0
17#define src2 x1
18#define limit x2
19#define result w0
20
21/* Internal variables. */
22#define data1 x3
23#define data1w w3
24#define data1h x4
25#define data2 x5
26#define data2w w5
27#define data2h x6
28#define tmp1 x7
29#define tmp2 x8
30
31ENTRY (__memcmp_aarch64)
32 subs limit, limit, 8
33 b.lo L(less8)
34
35 ldr data1, [src1], 8
36 ldr data2, [src2], 8
37 cmp data1, data2
38 b.ne L(return)
39
40 subs limit, limit, 8
41 b.gt L(more16)
42
43 ldr data1, [src1, limit]
44 ldr data2, [src2, limit]
45 b L(return)
46
47L(more16):
48 ldr data1, [src1], 8
49 ldr data2, [src2], 8
50 cmp data1, data2
51 bne L(return)
52
53 /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
54 strings. */
55 subs limit, limit, 16
56 b.ls L(last_bytes)
57
58 /* We overlap loads between 0-32 bytes at either side of SRC1 when we
59 try to align, so limit it only to strings larger than 128 bytes. */
60 cmp limit, 96
61 b.ls L(loop16)
62
63 /* Align src1 and adjust src2 with bytes not yet done. */
64 and tmp1, src1, 15
65 add limit, limit, tmp1
66 sub src1, src1, tmp1
67 sub src2, src2, tmp1
68
69 /* Loop performing 16 bytes per iteration using aligned src1.
70 Limit is pre-decremented by 16 and must be larger than zero.
71 Exit if <= 16 bytes left to do or if the data is not equal. */
72 .p2align 4
73L(loop16):
74 ldp data1, data1h, [src1], 16
75 ldp data2, data2h, [src2], 16
76 subs limit, limit, 16
77 ccmp data1, data2, 0, hi
78 ccmp data1h, data2h, 0, eq
79 b.eq L(loop16)
80
81 cmp data1, data2
82 bne L(return)
83 mov data1, data1h
84 mov data2, data2h
85 cmp data1, data2
86 bne L(return)
87
88 /* Compare last 1-16 bytes using unaligned access. */
89L(last_bytes):
90 add src1, src1, limit
91 add src2, src2, limit
92 ldp data1, data1h, [src1]
93 ldp data2, data2h, [src2]
94 cmp data1, data2
95 bne L(return)
96 mov data1, data1h
97 mov data2, data2h
98 cmp data1, data2
99
100 /* Compare data bytes and set return value to 0, -1 or 1. */
101L(return):
102#ifndef __AARCH64EB__
103 rev data1, data1
104 rev data2, data2
105#endif
106 cmp data1, data2
107L(ret_eq):
108 cset result, ne
109 cneg result, result, lo
110 ret
111
112 .p2align 4
113 /* Compare up to 8 bytes. Limit is [-8..-1]. */
114L(less8):
115 adds limit, limit, 4
116 b.lo L(less4)
117 ldr data1w, [src1], 4
118 ldr data2w, [src2], 4
119 cmp data1w, data2w
120 b.ne L(return)
121 sub limit, limit, 4
122L(less4):
123 adds limit, limit, 4
124 beq L(ret_eq)
125L(byte_loop):
126 ldrb data1w, [src1], 1
127 ldrb data2w, [src2], 1
128 subs limit, limit, 1
129 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
130 b.eq L(byte_loop)
131 sub result, data1w, data2w
132 ret
133
134END (__memcmp_aarch64)
135

source code of libc/AOR_v20.02/string/aarch64/memcmp.S