1 | /* Copyright (C) 2012-2024 Free Software Foundation, Inc. |
2 | |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library. If not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | |
21 | /* Assumptions: |
22 | * |
23 | * ARMv8-a, AArch64, Advanced SIMD. |
24 | * MTE compatible. |
25 | */ |
26 | |
27 | #ifndef STRLEN |
28 | # define STRLEN __strlen |
29 | #endif |
30 | |
31 | #define srcin x0 |
32 | #define result x0 |
33 | |
34 | #define src x1 |
35 | #define synd x2 |
36 | #define tmp x3 |
37 | #define shift x4 |
38 | |
39 | #define data q0 |
40 | #define vdata v0 |
41 | #define vhas_nul v1 |
42 | #define vend v2 |
43 | #define dend d2 |
44 | |
45 | /* Core algorithm: |
46 | Process the string in 16-byte aligned chunks. Compute a 64-bit mask with |
47 | four bits per byte using the shrn instruction. A count trailing zeros then |
48 | identifies the first zero byte. */ |
49 | |
50 | ENTRY (STRLEN) |
51 | PTR_ARG (0) |
52 | bic src, srcin, 15 |
53 | ld1 {vdata.16b}, [src] |
54 | cmeq vhas_nul.16b, vdata.16b, 0 |
55 | lsl shift, srcin, 2 |
56 | shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ |
57 | fmov synd, dend |
58 | lsr synd, synd, shift |
59 | cbz synd, L(loop) |
60 | |
61 | rbit synd, synd |
62 | clz result, synd |
63 | lsr result, result, 2 |
64 | ret |
65 | |
66 | .p2align 5 |
67 | L(loop): |
68 | ldr data, [src, 16] |
69 | cmeq vhas_nul.16b, vdata.16b, 0 |
70 | umaxp vend.16b, vhas_nul.16b, vhas_nul.16b |
71 | fmov synd, dend |
72 | cbnz synd, L(loop_end) |
73 | ldr data, [src, 32]! |
74 | cmeq vhas_nul.16b, vdata.16b, 0 |
75 | umaxp vend.16b, vhas_nul.16b, vhas_nul.16b |
76 | fmov synd, dend |
77 | cbz synd, L(loop) |
78 | sub src, src, 16 |
79 | L(loop_end): |
80 | shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ |
81 | sub result, src, srcin |
82 | fmov synd, dend |
83 | #ifndef __AARCH64EB__ |
84 | rbit synd, synd |
85 | #endif |
86 | add result, result, 16 |
87 | clz tmp, synd |
88 | add result, result, tmp, lsr 2 |
89 | ret |
90 | |
91 | END (STRLEN) |
92 | weak_alias (STRLEN, strlen) |
93 | libc_hidden_builtin_def (strlen) |
94 | |