strchrnul.S source code [glibc/sysdeps/aarch64/strchrnul.S]

1	/ strchrnul - find a character or nul in a string*
2
3	Copyright (C) 2014-2024 Free Software Foundation, Inc.
4
5	This file is part of the GNU C Library.
6
7	The GNU C Library is free software; you can redistribute it and/or
8	modify it under the terms of the GNU Lesser General Public
9	License as published by the Free Software Foundation; either
10	version 2.1 of the License, or (at your option) any later version.
11
12	The GNU C Library is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	Lesser General Public License for more details.
16
17	You should have received a copy of the GNU Lesser General Public
18	License along with the GNU C Library. If not, see
19	<https://www.gnu.org/licenses/>. /*
20
21	#include <sysdep.h>
22
23	/ Assumptions:*
24	*
25	* ARMv8-a, AArch64, Advanced SIMD.
26	* MTE compatible.
27	*/
28
29	#define srcin x0
30	#define chrin w1
31	#define result x0
32
33	#define src x2
34	#define tmp1 x1
35	#define tmp2 x3
36
37	#define vrepchr v0
38	#define vdata v1
39	#define qdata q1
40	#define vhas_nul v2
41	#define vhas_chr v3
42	#define vend v4
43	#define dend d4
44
45	/*
46	Core algorithm:
47	For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
48	per byte. We take 4 bits of every comparison byte with shift right and narrow
49	by 4 instruction. Since the bits in the nibble mask reflect the order in
50	which things occur in the original string, counting leading zeros identifies
51	exactly which byte matched. /*
52
53	ENTRY (__strchrnul)
54	PTR_ARG (`0`)
55	bic src, srcin, `15`
56	dup vrepchr`.16b`, chrin
57	ld1 {vdata`.16b`}, [src]
58	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
59	cmhs vhas_chr`.16b`, vhas_chr`.16b`, vdata`.16b`
60	lsl tmp2, srcin, `2`
61	shrn vend`.8b`, vhas_chr`.8h`, `4` / 128->64 /
62	fmov tmp1, dend
63	lsr tmp1, tmp1, tmp2 / Mask padding bits. /
64	cbz tmp1, L(loop)
65
66	rbit tmp1, tmp1
67	clz tmp1, tmp1
68	add result, srcin, tmp1, lsr `2`
69	ret
70
71	.p2align `4`
72	L(loop):
73	ldr qdata, [src, `16`]
74	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
75	cmhs vhas_chr`.16b`, vhas_chr`.16b`, vdata`.16b`
76	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b`
77	fmov tmp1, dend
78	cbnz tmp1, L(end)
79	ldr qdata, [src, `32`]!
80	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
81	cmhs vhas_chr`.16b`, vhas_chr`.16b`, vdata`.16b`
82	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b`
83	fmov tmp1, dend
84	cbz tmp1, L(loop)
85	sub src, src, `16`
86	L(end):
87	shrn vend`.8b`, vhas_chr`.8h`, `4` / 128->64 /
88	add src, src, `16`
89	fmov tmp1, dend
90	#ifndef __AARCH64EB__
91	rbit tmp1, tmp1
92	#endif
93	clz tmp1, tmp1
94	add result, src, tmp1, lsr `2`
95	ret
96
97	END(__strchrnul)
98	libc_hidden_def (__strchrnul)
99	weak_alias (__strchrnul, strchrnul)
100

source code of glibc/sysdeps/aarch64/strchrnul.S