1/* memchr - find a character in a memory zone using base integer registers
2
3 Copyright (C) 2018-2024 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 * Use base integer registers.
27 */
28
29/* Arguments and results. */
30#define srcin x0
31#define chrin x1
32#define cntin x2
33
34#define result x0
35
36#define repchr x1
37
38#define tmp1 x2
39#define tmp2 x3
40#define tmp3 x4
41#define tmp4 x5
42
43#define src x6
44#define srcend x7
45#define srcend16 x8
46
47#define anymore x9
48
49#define zeroones x10
50
51#define data1 x11
52#define data2 x12
53
54#define has_chr1 x13
55#define has_chr2 x14
56
57#define REP8_01 0x0101010101010101
58#define REP8_7f 0x7f7f7f7f7f7f7f7f
59
60
61ENTRY (__memchr_nosimd)
62
63 PTR_ARG (0)
64 SIZE_ARG (2)
65
66 /* Do not dereference srcin if no bytes to compare. */
67 cbz cntin, L(none_chr)
68
69 /* Start address is 16-byte aligned or not? */
70 tst srcin, 15
71 bic src, srcin, 15
72
73 mov zeroones, REP8_01
74 and repchr, chrin, 255
75 /* Generate a qword integer as |c|c|c|c|c|c|c|c|. */
76 mul repchr, repchr, zeroones
77
78 add srcend, srcin, cntin
79 /*
80 * srcend16 is address of the block following the last block.
81 *
82 * [A block is 16-byte aligned and sized.]
83 */
84 add srcend16, srcend, 15
85 bic srcend16, srcend16, 15
86
87 b.eq L(loop)
88
89 /* Load the first block containing start address. */
90 ldp data1, data2, [src], 16
91
92 lsl tmp1, srcin, 3
93 mov tmp2, ~0
94#ifdef __AARCH64EB__
95 lsr tmp3, tmp2, tmp1
96#else
97 lsl tmp3, tmp2, tmp1
98#endif
99 /* Start address is in the first or the second qword? */
100 tst srcin, 8
101
102 /*
103 * Transform any byte in the block to zero using XOR operation,
104 * if that byte equals the char to search. In this way, searching
105 * the char becomes detecting zero in the resulting two qwords.
106 */
107 eor data1, data1, repchr
108 eor data2, data2, repchr
109
110 /*
111 * Set those unused bytes(before start address) to 0xff, so
112 * that they will not hit any zero detection.
113 */
114 orn tmp1, data1, tmp3
115 orn tmp2, data2, tmp3
116
117 csinv data1, tmp1, xzr, eq
118 csel data2, data2, tmp2, eq
119
120 /*
121 * When the first and last block are the same, there are two cases:
122 * o. Memory range to search is just in one block.
123 * ( start address - end address) < 0
124 *
125 * o. Memory range is so large that end address wrap-around.
126 * ( start address - end address) > 0
127 */
128 cmp srcin, srcend
129 ccmp src, srcend16, 0, mi
130 csetm anymore, ne
131 b L(find_chr)
132
133 .p2align 4
134L(loop):
135 ldp data1, data2, [src], 16
136
137 subs anymore, src, srcend16
138
139 /*
140 * Transform any byte in the block to zero using XOR operation,
141 * if that byte equals the char to search.
142 */
143 eor data1, data1, repchr
144 eor data2, data2, repchr
145
146L(find_chr):
147 /*
148 * Use the following integer test to find out if any byte in a
149 * qword is zero. If do not contain zero-valued byte, test result
150 * is zero.
151 *
152 * (qword - 0x0101010101010101) & ~(qword) & 0x8080808080808080
153 * =
154 * (qword - 0x0101010101010101) & ~(qword | 0x7f7f7f7f7f7f7f7f)
155 *
156 */
157 sub tmp1, data1, zeroones
158 sub tmp2, data2, zeroones
159
160 orr tmp3, data1, REP8_7f
161 orr tmp4, data2, REP8_7f
162
163 bic has_chr1, tmp1, tmp3
164 bic has_chr2, tmp2, tmp4
165
166 orr tmp1, has_chr1, has_chr2
167 ccmp tmp1, 0, 0, ne
168
169 b.eq L(loop)
170
171 cbz has_chr1, 1f
172 sub result, src, 16
173#ifdef __AARCH64EB__
174 rev data1, data1
175#else
176 rev has_chr1, has_chr1
177#endif
178 b L(done)
179
1801: cbz has_chr2, L(none_chr)
181 sub result, src, 8
182#ifdef __AARCH64EB__
183 rev data1, data2
184#else
185 rev has_chr1, has_chr2
186#endif
187
188L(done):
189#ifdef __AARCH64EB__
190 /*
191 * For big-endian, can not directly use has_chr1/has_chr2 because
192 * two qwords has been reversed after loading from memory.
193 * Thus, have to perform char detection on two qwords again, which
194 * should be byte-swapped this time.
195 */
196 sub tmp1, data1, zeroones
197 orr tmp3, data1, REP8_7f
198 bic has_chr1, tmp1, tmp3
199 rev has_chr1, has_chr1
200#endif
201
202 /*
203 * If the specified char is found in a qword, the corresponding
204 * byte of in has_chr has value of 1, while this is only true for
205 * the first occurrence, not other occurrences.
206 */
207 cmp anymore, 0
208 clz tmp1, has_chr1
209 add result, result, tmp1, lsr 3
210 ccmp result, srcend, 8, eq /* NZCV = 8000 */
211 csel result, result, xzr, mi
212 ret
213
214L(none_chr):
215 mov result, 0
216 ret
217
218END (__memchr_nosimd)
219

source code of glibc/sysdeps/aarch64/multiarch/memchr_nosimd.S