1/*
2 * strrchr - find last position of a character in a string.
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 */
8
9/* Assumptions:
10 *
11 * ARMv8-a, AArch64
12 * Neon Available.
13 */
14
15#include "../asmdefs.h"
16
17/* Arguments and results. */
18#define srcin x0
19#define chrin w1
20
21#define result x0
22
23#define src x2
24#define tmp1 x3
25#define wtmp2 w4
26#define tmp3 x5
27#define src_match x6
28#define src_offset x7
29#define const_m1 x8
30#define tmp4 x9
31#define nul_match x10
32#define chr_match x11
33
34#define vrepchr v0
35#define vdata1 v1
36#define vdata2 v2
37#define vhas_nul1 v3
38#define vhas_nul2 v4
39#define vhas_chr1 v5
40#define vhas_chr2 v6
41#define vrepmask_0 v7
42#define vrepmask_c v16
43#define vend1 v17
44#define vend2 v18
45
46/* Core algorithm.
47
48 For each 32-byte hunk we calculate a 64-bit syndrome value, with
49 two bits per byte (LSB is always in bits 0 and 1, for both big
50 and little-endian systems). For each tuple, bit 0 is set iff
51 the relevant byte matched the requested character; bit 1 is set
52 iff the relevant byte matched the NUL end of string (we trigger
53 off bit0 for the special case of looking for NUL). Since the bits
54 in the syndrome reflect exactly the order in which things occur
55 in the original string a count_trailing_zeros() operation will
56 identify exactly which byte is causing the termination, and why. */
57
58ENTRY (__strrchr_aarch64)
59 /* Magic constant 0x40100401 to allow us to identify which lane
60 matches the requested byte. Magic constant 0x80200802 used
61 similarly for NUL termination. */
62 mov wtmp2, #0x0401
63 movk wtmp2, #0x4010, lsl #16
64 dup vrepchr.16b, chrin
65 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
66 dup vrepmask_c.4s, wtmp2
67 mov src_offset, #0
68 ands tmp1, srcin, #31
69 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
70 b.eq L(aligned)
71
72 /* Input string is not 32-byte aligned. Rather than forcing
73 the padding bytes to a safe value, we calculate the syndrome
74 for all the bytes, but then mask off those bits of the
75 syndrome that are related to the padding. */
76 ld1 {vdata1.16b, vdata2.16b}, [src], #32
77 neg tmp1, tmp1
78 cmeq vhas_nul1.16b, vdata1.16b, #0
79 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
80 cmeq vhas_nul2.16b, vdata2.16b, #0
81 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
82 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
83 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
84 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
85 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
86 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
87 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
88 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
89 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
90 mov nul_match, vhas_nul1.d[0]
91 lsl tmp1, tmp1, #1
92 mov const_m1, #~0
93 mov chr_match, vhas_chr1.d[0]
94 lsr tmp3, const_m1, tmp1
95
96 bic nul_match, nul_match, tmp3 // Mask padding bits.
97 bic chr_match, chr_match, tmp3 // Mask padding bits.
98 cbnz nul_match, L(tail)
99
100L(loop):
101 cmp chr_match, #0
102 csel src_match, src, src_match, ne
103 csel src_offset, chr_match, src_offset, ne
104L(aligned):
105 ld1 {vdata1.16b, vdata2.16b}, [src], #32
106 cmeq vhas_nul1.16b, vdata1.16b, #0
107 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
108 cmeq vhas_nul2.16b, vdata2.16b, #0
109 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
110 addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
111 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
112 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
113 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
114 addp vend1.16b, vend1.16b, vend1.16b // 128->64
115 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
116 mov nul_match, vend1.d[0]
117 mov chr_match, vhas_chr1.d[0]
118 cbz nul_match, L(loop)
119
120 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
121 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
122 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
123 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
124 mov nul_match, vhas_nul1.d[0]
125
126L(tail):
127 /* Work out exactly where the string ends. */
128 sub tmp4, nul_match, #1
129 eor tmp4, tmp4, nul_match
130 ands chr_match, chr_match, tmp4
131 /* And pick the values corresponding to the last match. */
132 csel src_match, src, src_match, ne
133 csel src_offset, chr_match, src_offset, ne
134
135 /* Count down from the top of the syndrome to find the last match. */
136 clz tmp3, src_offset
137 /* Src_match points beyond the word containing the match, so we can
138 simply subtract half the bit-offset into the syndrome. Because
139 we are counting down, we need to go back one more character. */
140 add tmp3, tmp3, #2
141 sub result, src_match, tmp3, lsr #1
142 /* But if the syndrome shows no match was found, then return NULL. */
143 cmp src_offset, #0
144 csel result, result, xzr, ne
145
146 ret
147
148END (__strrchr_aarch64)
149

source code of libc/AOR_v20.02/string/aarch64/strrchr.S