1/* strcpy/stpcpy - copy a string returning pointer to start/end.
2 Copyright (C) 2013-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
20
21 To test the page crossing code path more thoroughly, compile with
22 -DSTRCPY_TEST_PAGE_CROSS - this will force all unaligned copies through
23 the slower entry path. This option is not intended for production use. */
24
25#include <sysdep.h>
26
27/* Assumptions:
28 *
29 * ARMv8-a, AArch64, Advanced SIMD.
30 * MTE compatible.
31 */
32
33#define dstin x0
34#define srcin x1
35#define result x0
36
37#define src x2
38#define dst x3
39#define len x4
40#define synd x4
41#define tmp x5
42#define shift x5
43#define data1 x6
44#define dataw1 w6
45#define data2 x7
46#define dataw2 w7
47
48#define dataq q0
49#define vdata v0
50#define vhas_nul v1
51#define vend v2
52#define dend d2
53#define dataq2 q1
54
55#ifdef BUILD_STPCPY
56# define STRCPY __stpcpy
57# define IFSTPCPY(X,...) X,__VA_ARGS__
58#else
59# define STRCPY strcpy
60# define IFSTPCPY(X,...)
61#endif
62
63/*
64 Core algorithm:
65 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
66 per byte. We take 4 bits of every comparison byte with shift right and narrow
67 by 4 instruction. Since the bits in the nibble mask reflect the order in
68 which things occur in the original string, counting leading zeros identifies
69 exactly which byte matched. */
70
71ENTRY (STRCPY)
72 PTR_ARG (0)
73 PTR_ARG (1)
74 bic src, srcin, 15
75 ld1 {vdata.16b}, [src]
76 cmeq vhas_nul.16b, vdata.16b, 0
77 lsl shift, srcin, 2
78 shrn vend.8b, vhas_nul.8h, 4
79 fmov synd, dend
80 lsr synd, synd, shift
81 cbnz synd, L(tail)
82
83 ldr dataq, [src, 16]!
84 cmeq vhas_nul.16b, vdata.16b, 0
85 shrn vend.8b, vhas_nul.8h, 4
86 fmov synd, dend
87 cbz synd, L(start_loop)
88
89#ifndef __AARCH64EB__
90 rbit synd, synd
91#endif
92 sub tmp, src, srcin
93 clz len, synd
94 add len, tmp, len, lsr 2
95 tbz len, 4, L(less16)
96 sub tmp, len, 15
97 ldr dataq, [srcin]
98 ldr dataq2, [srcin, tmp]
99 str dataq, [dstin]
100 str dataq2, [dstin, tmp]
101 IFSTPCPY (add result, dstin, len)
102 ret
103
104L(tail):
105 rbit synd, synd
106 clz len, synd
107 lsr len, len, 2
108L(less16):
109 tbz len, 3, L(less8)
110 sub tmp, len, 7
111 ldr data1, [srcin]
112 ldr data2, [srcin, tmp]
113 str data1, [dstin]
114 str data2, [dstin, tmp]
115 IFSTPCPY (add result, dstin, len)
116 ret
117
118 .p2align 4
119L(less8):
120 subs tmp, len, 3
121 b.lo L(less4)
122 ldr dataw1, [srcin]
123 ldr dataw2, [srcin, tmp]
124 str dataw1, [dstin]
125 str dataw2, [dstin, tmp]
126 IFSTPCPY (add result, dstin, len)
127 ret
128
129L(less4):
130 cbz len, L(zerobyte)
131 ldrh dataw1, [srcin]
132 strh dataw1, [dstin]
133L(zerobyte):
134 strb wzr, [dstin, len]
135 IFSTPCPY (add result, dstin, len)
136 ret
137
138 .p2align 4
139L(start_loop):
140 sub tmp, srcin, dstin
141 ldr dataq2, [srcin]
142 sub dst, src, tmp
143 str dataq2, [dstin]
144L(loop):
145 str dataq, [dst], 32
146 ldr dataq, [src, 16]
147 cmeq vhas_nul.16b, vdata.16b, 0
148 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
149 fmov synd, dend
150 cbnz synd, L(loopend)
151 str dataq, [dst, -16]
152 ldr dataq, [src, 32]!
153 cmeq vhas_nul.16b, vdata.16b, 0
154 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
155 fmov synd, dend
156 cbz synd, L(loop)
157 add dst, dst, 16
158L(loopend):
159 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
160 fmov synd, dend
161 sub dst, dst, 31
162#ifndef __AARCH64EB__
163 rbit synd, synd
164#endif
165 clz len, synd
166 lsr len, len, 2
167 add dst, dst, len
168 ldr dataq, [dst, tmp]
169 str dataq, [dst]
170 IFSTPCPY (add result, dst, 15)
171 ret
172
173END (STRCPY)
174
175#ifdef BUILD_STPCPY
176weak_alias (__stpcpy, stpcpy)
177libc_hidden_def (__stpcpy)
178libc_hidden_builtin_def (stpcpy)
179#else
180libc_hidden_builtin_def (strcpy)
181#endif
182

source code of glibc/sysdeps/aarch64/strcpy.S