1/*
2 * strchr - find a character in a string
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 */
8
9/* Assumptions:
10 *
11 * ARMv8-a, AArch64
12 * Neon Available.
13 */
14
15#include "../asmdefs.h"
16
17/* Arguments and results. */
18#define srcin x0
19#define chrin w1
20
21#define result x0
22
23#define src x2
24#define tmp1 x3
25#define wtmp2 w4
26#define tmp3 x5
27
28#define vrepchr v0
29#define qdata q1
30#define vdata v1
31#define vhas_nul v2
32#define vhas_chr v3
33#define vrepmask_0 v4
34#define vrepmask_c v5
35#define vend v6
36
37#define L(l) .L ## l
38
39/* Core algorithm.
40
41 For each 16-byte chunk we calculate a 64-bit syndrome value, with
42 four bits per byte (LSB is always in bits 0 and 1, for both big
43 and little-endian systems). For each tuple, bit 0 is set if
44 the relevant byte matched the requested character; bit 1 is set
45 if the relevant byte matched the NUL end of string (we trigger
46 off bit0 for the special case of looking for NUL) and bits 2 and 3
47 are not used.
48 Since the bits in the syndrome reflect exactly the order in which
49 things occur in the original string a count_trailing_zeros()
50 operation will identify exactly which byte is causing the termination,
51 and why. */
52
53/* Locals and temporaries. */
54
55ENTRY(__strchr_aarch64_mte)
56 /* Magic constant 0x10011001 to allow us to identify which lane
57 matches the requested byte. Magic constant 0x20022002 used
58 similarly for NUL termination. */
59 mov wtmp2, #0x1001
60 movk wtmp2, #0x1001, lsl #16
61 dup vrepchr.16b, chrin
62 bic src, srcin, #15 /* Work with aligned 16-byte chunks. */
63 dup vrepmask_c.4s, wtmp2
64 ands tmp1, srcin, #15
65 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
66 b.eq L(loop)
67
68 /* Input string is not 16-byte aligned. Rather than forcing
69 the padding bytes to a safe value, we calculate the syndrome
70 for all the bytes, but then mask off those bits of the
71 syndrome that are related to the padding. */
72 ldr qdata, [src], #16
73 cmeq vhas_nul.16b, vdata.16b, #0
74 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
75 and vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
76 and vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
77 lsl tmp1, tmp1, #2
78 orr vend.16b, vhas_nul.16b, vhas_chr.16b
79 mov tmp3, #~0
80 addp vend.16b, vend.16b, vend.16b /* 128->64 */
81 lsl tmp1, tmp3, tmp1
82
83 mov tmp3, vend.d[0]
84 ands tmp1, tmp3, tmp1 /* Mask padding bits. */
85 b.ne L(tail)
86
87L(loop):
88 ldr qdata, [src], #32
89 cmeq vhas_nul.16b, vdata.16b, #0
90 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
91 /* Use a fast check for the termination condition. */
92 orr vend.16b, vhas_nul.16b, vhas_chr.16b
93 addp vend.16b, vend.16b, vend.16b /* 128->64 */
94 mov tmp1, vend.d[0]
95 cbnz tmp1, L(end)
96
97 ldr qdata, [src, #-16]
98 cmeq vhas_nul.16b, vdata.16b, #0
99 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
100 /* Use a fast check for the termination condition. */
101 orr vend.16b, vhas_nul.16b, vhas_chr.16b
102 addp vend.16b, vend.16b, vend.16b /* 128->64 */
103 mov tmp1, vend.d[0]
104 cbz tmp1, L(loop)
105
106 /* Adjust src for next two subtractions. */
107 add src, src, #16
108L(end):
109 /* Termination condition found. Now need to establish exactly why
110 we terminated. */
111 and vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
112 and vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
113 sub src, src, #16
114 orr vend.16b, vhas_nul.16b, vhas_chr.16b
115 addp vend.16b, vend.16b, vend.16b /* 128->64 */
116
117 mov tmp1, vend.d[0]
118L(tail):
119 /* Count the trailing zeros, by bit reversing... */
120 rbit tmp1, tmp1
121 /* Re-bias source. */
122 sub src, src, #16
123 clz tmp1, tmp1 /* And counting the leading zeros. */
124 /* Tmp1 is even if the target character was found first. Otherwise
125 we've found the end of string and we weren't looking for NUL. */
126 tst tmp1, #1
127 add result, src, tmp1, lsr #2
128 csel result, result, xzr, eq
129 ret
130
131END(__strchr_aarch64_mte)
132

source code of libc/AOR_v20.02/string/aarch64/strchr-mte.S