strnlen.S source code [glibc/sysdeps/aarch64/strnlen.S]

1	/ strnlen - calculate the length of a string with limit.*
2
3	Copyright (C) 2013-2024 Free Software Foundation, Inc.
4
5	This file is part of the GNU C Library.
6
7	The GNU C Library is free software; you can redistribute it and/or
8	modify it under the terms of the GNU Lesser General Public
9	License as published by the Free Software Foundation; either
10	version 2.1 of the License, or (at your option) any later version.
11
12	The GNU C Library is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	Lesser General Public License for more details.
16
17	You should have received a copy of the GNU Lesser General Public
18	License along with the GNU C Library. If not, see
19	<https://www.gnu.org/licenses/>. /*
20
21	#include <sysdep.h>
22
23	/ Assumptions:*
24	*
25	* ARMv8-a, AArch64, Advanced SIMD.
26	* MTE compatible.
27	*/
28
29	#define srcin x0
30	#define cntin x1
31	#define result x0
32
33	#define src x2
34	#define synd x3
35	#define shift x4
36	#define tmp x4
37	#define cntrem x5
38
39	#define qdata q0
40	#define vdata v0
41	#define vhas_chr v1
42	#define vend v2
43	#define dend d2
44
45	/*
46	Core algorithm:
47	Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
48	four bits per byte using the shrn instruction. A count trailing zeros then
49	identifies the first zero byte. /*
50
51	ENTRY (__strnlen)
52	PTR_ARG (`0`)
53	SIZE_ARG (`1`)
54	bic src, srcin, `15`
55	cbz cntin, L(nomatch)
56	ld1 {vdata`.16b`}, [src]
57	cmeq vhas_chr`.16b`, vdata`.16b`, `0`
58	lsl shift, srcin, `2`
59	shrn vend`.8b`, vhas_chr`.8h`, `4` / 128->64 /
60	fmov synd, dend
61	lsr synd, synd, shift
62	cbz synd, L(start_loop)
63	L(finish):
64	rbit synd, synd
65	clz synd, synd
66	lsr result, synd, `2`
67	cmp cntin, result
68	csel result, cntin, result, ls
69	ret
70
71	L(nomatch):
72	mov result, cntin
73	ret
74
75	L(start_loop):
76	sub tmp, src, srcin
77	add tmp, tmp, `17`
78	subs cntrem, cntin, tmp
79	b.lo L(nomatch)
80
81	/ Make sure that it won't overread by a 16-byte chunk /
82	tbz cntrem, `4`, L(loop32_2)
83	sub src, src, `16`
84	.p2align `5`
85	L(loop32):
86	ldr qdata, [src, `32`]!
87	cmeq vhas_chr`.16b`, vdata`.16b`, `0`
88	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
89	fmov synd, dend
90	cbnz synd, L(end)
91	L(loop32_2):
92	ldr qdata, [src, `16`]
93	subs cntrem, cntrem, `32`
94	cmeq vhas_chr`.16b`, vdata`.16b`, `0`
95	b.lo L(end_2)
96	umaxp vend`.16b`, vhas_chr`.16b`, vhas_chr`.16b` / 128->64 /
97	fmov synd, dend
98	cbz synd, L(loop32)
99	L(end_2):
100	add src, src, `16`
101	L(end):
102	shrn vend`.8b`, vhas_chr`.8h`, `4` / 128->64 /
103	sub result, src, srcin
104	fmov synd, dend
105	#ifndef __AARCH64EB__
106	rbit synd, synd
107	#endif
108	clz synd, synd
109	add result, result, synd, lsr `2`
110	cmp cntin, result
111	csel result, cntin, result, ls
112	ret
113
114	END (__strnlen)
115	libc_hidden_def (__strnlen)
116	weak_alias (__strnlen, strnlen)
117	libc_hidden_def (strnlen)
118

source code of glibc/sysdeps/aarch64/strnlen.S