1/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
2
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20
21/* Assumptions:
22 *
23 * ARMv8-a, AArch64, Advanced SIMD.
24 * MTE compatible.
25 */
26
27#ifndef STRLEN
28# define STRLEN __strlen
29#endif
30
31#define srcin x0
32#define result x0
33
34#define src x1
35#define synd x2
36#define tmp x3
37#define shift x4
38
39#define data q0
40#define vdata v0
41#define vhas_nul v1
42#define vend v2
43#define dend d2
44
45/* Core algorithm:
46 Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
47 four bits per byte using the shrn instruction. A count trailing zeros then
48 identifies the first zero byte. */
49
50ENTRY (STRLEN)
51 PTR_ARG (0)
52 bic src, srcin, 15
53 ld1 {vdata.16b}, [src]
54 cmeq vhas_nul.16b, vdata.16b, 0
55 lsl shift, srcin, 2
56 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
57 fmov synd, dend
58 lsr synd, synd, shift
59 cbz synd, L(loop)
60
61 rbit synd, synd
62 clz result, synd
63 lsr result, result, 2
64 ret
65
66 .p2align 5
67L(loop):
68 ldr data, [src, 16]
69 cmeq vhas_nul.16b, vdata.16b, 0
70 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
71 fmov synd, dend
72 cbnz synd, L(loop_end)
73 ldr data, [src, 32]!
74 cmeq vhas_nul.16b, vdata.16b, 0
75 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
76 fmov synd, dend
77 cbz synd, L(loop)
78 sub src, src, 16
79L(loop_end):
80 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
81 sub result, src, srcin
82 fmov synd, dend
83#ifndef __AARCH64EB__
84 rbit synd, synd
85#endif
86 add result, result, 16
87 clz tmp, synd
88 add result, result, tmp, lsr 2
89 ret
90
91END (STRLEN)
92weak_alias (STRLEN, strlen)
93libc_hidden_builtin_def (strlen)
94

source code of glibc/sysdeps/aarch64/strlen.S