1/* strrchr: find the last instance of a character in a string.
2
3 Copyright (C) 2014-2024 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin x0
30#define chrin w1
31#define result x0
32
33#define src x2
34#define tmp x3
35#define synd x3
36#define shift x4
37#define src_match x4
38#define nul_match x5
39#define chr_match x6
40
41#define vrepchr v0
42#define vdata v1
43#define vhas_nul v2
44#define vhas_chr v3
45#define vrepmask v4
46#define vend v5
47#define dend d5
48
49/* Core algorithm.
50
51 For each 16-byte chunk we calculate a 64-bit syndrome value, with
52 four bits per byte (LSB is always in bits 0 and 1, for both big
53 and little-endian systems). For each tuple, bits 0-1 are set if
54 the relevant byte matched the requested character; bits 2-3 are set
55 if the relevant byte matched the NUL end of string. */
56
57ENTRY (strrchr)
58 PTR_ARG (0)
59 bic src, srcin, 15
60 dup vrepchr.16b, chrin
61 movi vrepmask.16b, 0x33
62 ld1 {vdata.16b}, [src]
63 cmeq vhas_nul.16b, vdata.16b, 0
64 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
65 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
66 shrn vend.8b, vhas_nul.8h, 4
67 lsl shift, srcin, 2
68 fmov synd, dend
69 lsr synd, synd, shift
70 lsl synd, synd, shift
71 ands nul_match, synd, 0xcccccccccccccccc
72 bne L(tail)
73 cbnz synd, L(loop2_start)
74
75 .p2align 4
76L(loop1):
77 ldr q1, [src, 16]
78 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
79 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
80 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
81 fmov synd, dend
82 cbnz synd, L(loop1_end)
83 ldr q1, [src, 32]!
84 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
85 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
86 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
87 fmov synd, dend
88 cbz synd, L(loop1)
89 sub src, src, 16
90L(loop1_end):
91 add src, src, 16
92 cmeq vhas_nul.16b, vdata.16b, 0
93#ifdef __AARCH64EB__
94 bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
95 shrn vend.8b, vhas_nul.8h, 4
96 fmov synd, dend
97 rbit synd, synd
98#else
99 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
100 shrn vend.8b, vhas_nul.8h, 4
101 fmov synd, dend
102#endif
103 ands nul_match, synd, 0xcccccccccccccccc
104 beq L(loop2_start)
105L(tail):
106 sub nul_match, nul_match, 1
107 and chr_match, synd, 0x3333333333333333
108 ands chr_match, chr_match, nul_match
109 add result, src, 15
110 clz tmp, chr_match
111 sub result, result, tmp, lsr 2
112 csel result, result, xzr, ne
113 ret
114
115 .p2align 4
116 nop
117 nop
118L(loop2_start):
119 add src, src, 16
120 bic vrepmask.8h, 0xf0
121
122L(loop2):
123 cmp synd, 0
124 csel src_match, src, src_match, ne
125 csel chr_match, synd, chr_match, ne
126 ld1 {vdata.16b}, [src], 16
127 cmeq vhas_nul.16b, vdata.16b, 0
128 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
129 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
130 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
131 fmov synd, dend
132 tst synd, 0xcccccccccccccccc
133 beq L(loop2)
134
135 bic vhas_nul.8h, 0x0f, lsl 8
136 addp vend.16b, vhas_nul.16b, vhas_nul.16b
137 fmov synd, dend
138 and nul_match, synd, 0xcccccccccccccccc
139 sub nul_match, nul_match, 1
140 and tmp, synd, 0x3333333333333333
141 ands tmp, tmp, nul_match
142 csel chr_match, tmp, chr_match, ne
143 csel src_match, src, src_match, ne
144 sub src_match, src_match, 1
145 clz tmp, chr_match
146 sub result, src_match, tmp, lsr 2
147 ret
148
149END(strrchr)
150weak_alias (strrchr, rindex)
151libc_hidden_builtin_def (strrchr)
152

source code of glibc/sysdeps/aarch64/strrchr.S