1/* strchr - find a character in a string
2
3 Copyright (C) 2014-2024 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin x0
30#define chrin w1
31#define result x0
32
33#define src x2
34#define tmp1 x1
35#define tmp2 x3
36
37#define vrepchr v0
38#define vdata v1
39#define qdata q1
40#define vhas_nul v2
41#define vhas_chr v3
42#define vrepmask v4
43#define vend v5
44#define dend d5
45
46/* Core algorithm.
47
48 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
49 per byte. Bits 0-1 are set if the relevant byte matched the requested
50 character, bits 2-3 are set if the byte is NUL or matched. Count trailing
51 zeroes gives the position of the matching byte if it is a multiple of 4.
52 If it is not a multiple of 4, there was no match. */
53
54ENTRY (strchr)
55 PTR_ARG (0)
56 bic src, srcin, 15
57 dup vrepchr.16b, chrin
58 ld1 {vdata.16b}, [src]
59 movi vrepmask.16b, 0x33
60 cmeq vhas_nul.16b, vdata.16b, 0
61 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
62 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
63 lsl tmp2, srcin, 2
64 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
65 fmov tmp1, dend
66 lsr tmp1, tmp1, tmp2
67 cbz tmp1, L(loop)
68
69 rbit tmp1, tmp1
70 clz tmp1, tmp1
71 /* Tmp1 is an even multiple of 2 if the target character was
72 found first. Otherwise we've found the end of string. */
73 tst tmp1, 2
74 add result, srcin, tmp1, lsr 2
75 csel result, result, xzr, eq
76 ret
77
78 .p2align 4
79L(loop):
80 ldr qdata, [src, 16]
81 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
82 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
83 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
84 fmov tmp1, dend
85 cbnz tmp1, L(end)
86 ldr qdata, [src, 32]!
87 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
88 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
89 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
90 fmov tmp1, dend
91 cbz tmp1, L(loop)
92 sub src, src, 16
93L(end):
94
95#ifdef __AARCH64EB__
96 bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
97 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
98 fmov tmp1, dend
99#else
100 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
101 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
102 fmov tmp1, dend
103 rbit tmp1, tmp1
104#endif
105 add src, src, 16
106 clz tmp1, tmp1
107 /* Tmp1 is a multiple of 4 if the target character was found. */
108 tst tmp1, 2
109 add result, src, tmp1, lsr 2
110 csel result, result, xzr, eq
111 ret
112
113END (strchr)
114libc_hidden_builtin_def (strchr)
115weak_alias (strchr, index)
116

source code of glibc/sysdeps/aarch64/strchr.S