1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (c) 2012-2022, Arm Limited.
4 *
5 * Adapted from the original at:
6 * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12/* Assumptions:
13 *
14 * ARMv8-a, AArch64.
15 * MTE compatible.
16 */
17
18#define L(label) .L ## label
19
20#define REP8_01 0x0101010101010101
21#define REP8_7f 0x7f7f7f7f7f7f7f7f
22
23#define src1 x0
24#define src2 x1
25#define result x0
26
27#define data1 x2
28#define data1w w2
29#define data2 x3
30#define data2w w3
31#define has_nul x4
32#define diff x5
33#define off1 x5
34#define syndrome x6
35#define tmp x6
36#define data3 x7
37#define zeroones x8
38#define shift x9
39#define off2 x10
40
41/* On big-endian early bytes are at MSB and on little-endian LSB.
42 LS_FW means shifting towards early bytes. */
43#ifdef __AARCH64EB__
44# define LS_FW lsl
45#else
46# define LS_FW lsr
47#endif
48
49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
50 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
51 can be done in parallel across the entire word.
52 Since carry propagation makes 0x1 bytes before a NUL byte appear
53 NUL too in big-endian, byte-reverse the data before the NUL check. */
54
55
56SYM_FUNC_START(__pi_strcmp)
57 sub off2, src2, src1
58 mov zeroones, REP8_01
59 and tmp, src1, 7
60 tst off2, 7
61 b.ne L(misaligned8)
62 cbnz tmp, L(mutual_align)
63
64 .p2align 4
65
66L(loop_aligned):
67 ldr data2, [src1, off2]
68 ldr data1, [src1], 8
69L(start_realigned):
70#ifdef __AARCH64EB__
71 rev tmp, data1
72 sub has_nul, tmp, zeroones
73 orr tmp, tmp, REP8_7f
74#else
75 sub has_nul, data1, zeroones
76 orr tmp, data1, REP8_7f
77#endif
78 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
79 ccmp data1, data2, 0, eq
80 b.eq L(loop_aligned)
81#ifdef __AARCH64EB__
82 rev has_nul, has_nul
83#endif
84 eor diff, data1, data2
85 orr syndrome, diff, has_nul
86L(end):
87#ifndef __AARCH64EB__
88 rev syndrome, syndrome
89 rev data1, data1
90 rev data2, data2
91#endif
92 clz shift, syndrome
93 /* The most-significant-non-zero bit of the syndrome marks either the
94 first bit that is different, or the top bit of the first zero byte.
95 Shifting left now will bring the critical information into the
96 top bits. */
97 lsl data1, data1, shift
98 lsl data2, data2, shift
99 /* But we need to zero-extend (char is unsigned) the value and then
100 perform a signed 32-bit subtraction. */
101 lsr data1, data1, 56
102 sub result, data1, data2, lsr 56
103 ret
104
105 .p2align 4
106
107L(mutual_align):
108 /* Sources are mutually aligned, but are not currently at an
109 alignment boundary. Round down the addresses and then mask off
110 the bytes that precede the start point. */
111 bic src1, src1, 7
112 ldr data2, [src1, off2]
113 ldr data1, [src1], 8
114 neg shift, src2, lsl 3 /* Bits to alignment -64. */
115 mov tmp, -1
116 LS_FW tmp, tmp, shift
117 orr data1, data1, tmp
118 orr data2, data2, tmp
119 b L(start_realigned)
120
121L(misaligned8):
122 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
123 checking to make sure that we don't access beyond the end of SRC2. */
124 cbz tmp, L(src1_aligned)
125L(do_misaligned):
126 ldrb data1w, [src1], 1
127 ldrb data2w, [src2], 1
128 cmp data1w, 0
129 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
130 b.ne L(done)
131 tst src1, 7
132 b.ne L(do_misaligned)
133
134L(src1_aligned):
135 neg shift, src2, lsl 3
136 bic src2, src2, 7
137 ldr data3, [src2], 8
138#ifdef __AARCH64EB__
139 rev data3, data3
140#endif
141 lsr tmp, zeroones, shift
142 orr data3, data3, tmp
143 sub has_nul, data3, zeroones
144 orr tmp, data3, REP8_7f
145 bics has_nul, has_nul, tmp
146 b.ne L(tail)
147
148 sub off1, src2, src1
149
150 .p2align 4
151
152L(loop_unaligned):
153 ldr data3, [src1, off1]
154 ldr data2, [src1, off2]
155#ifdef __AARCH64EB__
156 rev data3, data3
157#endif
158 sub has_nul, data3, zeroones
159 orr tmp, data3, REP8_7f
160 ldr data1, [src1], 8
161 bics has_nul, has_nul, tmp
162 ccmp data1, data2, 0, eq
163 b.eq L(loop_unaligned)
164
165 lsl tmp, has_nul, shift
166#ifdef __AARCH64EB__
167 rev tmp, tmp
168#endif
169 eor diff, data1, data2
170 orr syndrome, diff, tmp
171 cbnz syndrome, L(end)
172L(tail):
173 ldr data1, [src1]
174 neg shift, shift
175 lsr data2, data3, shift
176 lsr has_nul, has_nul, shift
177#ifdef __AARCH64EB__
178 rev data2, data2
179 rev has_nul, has_nul
180#endif
181 eor diff, data1, data2
182 orr syndrome, diff, has_nul
183 b L(end)
184
185L(done):
186 sub result, data1, data2
187 ret
188SYM_FUNC_END(__pi_strcmp)
189SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
190EXPORT_SYMBOL_NOKASAN(strcmp)
191

source code of linux/arch/arm64/lib/strcmp.S