1/* Copyright (C) 2006-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* Thumb requires excessive IT insns here. */
20#define NO_THUMB
21#include <sysdep.h>
22#include <arm-features.h>
23
24/*
25 * Data preload for architectures that support it (ARM V5TE and above)
26 */
27#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
28 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
29 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
30 && !defined (__ARM_ARCH_5T__))
31#define PLD(code...) code
32#else
33#define PLD(code...)
34#endif
35
36/*
37 * This can be used to enable code to cacheline align the source pointer.
38 * Experiments on tested architectures (StrongARM and XScale) didn't show
39 * this a worthwhile thing to do. That might be different in the future.
40 */
41//#define CALGN(code...) code
42#define CALGN(code...)
43
44/*
45 * Endian independent macros for shifting bytes within registers.
46 */
47#ifndef __ARMEB__
48#define PULL lsr
49#define PUSH lsl
50#else
51#define PULL lsl
52#define PUSH lsr
53#endif
54
55 .text
56 .syntax unified
57
58/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
59
60ENTRY(memcpy)
61
62 push {r0, r4, lr}
63 cfi_adjust_cfa_offset (12)
64 cfi_rel_offset (r4, 4)
65 cfi_rel_offset (lr, 8)
66
67 cfi_remember_state
68
69 subs r2, r2, #4
70 blo 8f
71 ands ip, r0, #3
72 PLD( pld [r1, #0] )
73 bne 9f
74 ands ip, r1, #3
75 bne 10f
76
771: subs r2, r2, #(28)
78 push {r5 - r8}
79 cfi_adjust_cfa_offset (16)
80 cfi_rel_offset (r5, 0)
81 cfi_rel_offset (r6, 4)
82 cfi_rel_offset (r7, 8)
83 cfi_rel_offset (r8, 12)
84 blo 5f
85
86 CALGN( ands ip, r1, #31 )
87 CALGN( rsb r3, ip, #32 )
88 CALGN( sbcsne r4, r3, r2 ) @ C is always set here
89 CALGN( bcs 2f )
90 CALGN( adr r4, 6f )
91 CALGN( subs r2, r2, r3 ) @ C gets set
92#ifndef ARM_ALWAYS_BX
93 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
94#else
95 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
96 CALGN( bx r4 )
97#endif
98
99 PLD( pld [r1, #0] )
1002: PLD( cmp r2, #96 )
101 PLD( pld [r1, #28] )
102 PLD( blo 4f )
103 PLD( pld [r1, #60] )
104 PLD( pld [r1, #92] )
105
1063: PLD( pld [r1, #124] )
1074: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
108 subs r2, r2, #32
109 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
110 bhs 3b
111
1125: ands ip, r2, #28
113 rsb ip, ip, #32
114#ifndef ARM_ALWAYS_BX
115 /* C is always clear here. */
116 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
117 b 7f
118#else
119 beq 7f
120 push {r10}
121 cfi_adjust_cfa_offset (4)
122 cfi_rel_offset (r10, 0)
1230: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
124 /* If alignment is not perfect, then there will be some
125 padding (nop) instructions between this BX and label 6.
126 The computation above assumed that two instructions
127 later is exactly the right spot. */
128 add r10, #(6f - (0b + PC_OFS))
129 bx r10
130#endif
131 .p2align ARM_BX_ALIGN_LOG2
1326: nop
133 .p2align ARM_BX_ALIGN_LOG2
134 ldr r3, [r1], #4
135 .p2align ARM_BX_ALIGN_LOG2
136 ldr r4, [r1], #4
137 .p2align ARM_BX_ALIGN_LOG2
138 ldr r5, [r1], #4
139 .p2align ARM_BX_ALIGN_LOG2
140 ldr r6, [r1], #4
141 .p2align ARM_BX_ALIGN_LOG2
142 ldr r7, [r1], #4
143 .p2align ARM_BX_ALIGN_LOG2
144 ldr r8, [r1], #4
145 .p2align ARM_BX_ALIGN_LOG2
146 ldr lr, [r1], #4
147
148#ifndef ARM_ALWAYS_BX
149 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
150 nop
151#else
1520: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
153 /* If alignment is not perfect, then there will be some
154 padding (nop) instructions between this BX and label 66.
155 The computation above assumed that two instructions
156 later is exactly the right spot. */
157 add r10, #(66f - (0b + PC_OFS))
158 bx r10
159#endif
160 .p2align ARM_BX_ALIGN_LOG2
16166: nop
162 .p2align ARM_BX_ALIGN_LOG2
163 str r3, [r0], #4
164 .p2align ARM_BX_ALIGN_LOG2
165 str r4, [r0], #4
166 .p2align ARM_BX_ALIGN_LOG2
167 str r5, [r0], #4
168 .p2align ARM_BX_ALIGN_LOG2
169 str r6, [r0], #4
170 .p2align ARM_BX_ALIGN_LOG2
171 str r7, [r0], #4
172 .p2align ARM_BX_ALIGN_LOG2
173 str r8, [r0], #4
174 .p2align ARM_BX_ALIGN_LOG2
175 str lr, [r0], #4
176
177#ifdef ARM_ALWAYS_BX
178 pop {r10}
179 cfi_adjust_cfa_offset (-4)
180 cfi_restore (r10)
181#endif
182
183 CALGN( bcs 2b )
184
1857: pop {r5 - r8}
186 cfi_adjust_cfa_offset (-16)
187 cfi_restore (r5)
188 cfi_restore (r6)
189 cfi_restore (r7)
190 cfi_restore (r8)
191
1928: movs r2, r2, lsl #31
193 ldrbne r3, [r1], #1
194 ldrbcs r4, [r1], #1
195 ldrbcs ip, [r1]
196 strbne r3, [r0], #1
197 strbcs r4, [r0], #1
198 strbcs ip, [r0]
199
200#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
201 || defined (ARM_ALWAYS_BX))
202 pop {r0, r4, lr}
203 cfi_adjust_cfa_offset (-12)
204 cfi_restore (r4)
205 cfi_restore (lr)
206 bx lr
207#else
208 pop {r0, r4, pc}
209#endif
210
211 cfi_restore_state
212
2139: rsb ip, ip, #4
214 cmp ip, #2
215 ldrbgt r3, [r1], #1
216 ldrbge r4, [r1], #1
217 ldrb lr, [r1], #1
218 strbgt r3, [r0], #1
219 strbge r4, [r0], #1
220 subs r2, r2, ip
221 strb lr, [r0], #1
222 blo 8b
223 ands ip, r1, #3
224 beq 1b
225
22610: bic r1, r1, #3
227 cmp ip, #2
228 ldr lr, [r1], #4
229 beq 17f
230 bgt 18f
231
232
233 .macro forward_copy_shift pull push
234
235 subs r2, r2, #28
236 blo 14f
237
238 CALGN( ands ip, r1, #31 )
239 CALGN( rsb ip, ip, #32 )
240 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
241 CALGN( subcc r2, r2, ip )
242 CALGN( bcc 15f )
243
24411: push {r5 - r8, r10}
245 cfi_adjust_cfa_offset (20)
246 cfi_rel_offset (r5, 0)
247 cfi_rel_offset (r6, 4)
248 cfi_rel_offset (r7, 8)
249 cfi_rel_offset (r8, 12)
250 cfi_rel_offset (r10, 16)
251
252 PLD( pld [r1, #0] )
253 PLD( cmp r2, #96 )
254 PLD( pld [r1, #28] )
255 PLD( blo 13f )
256 PLD( pld [r1, #60] )
257 PLD( pld [r1, #92] )
258
25912: PLD( pld [r1, #124] )
26013: ldmia r1!, {r4, r5, r6, r7}
261 mov r3, lr, PULL #\pull
262 subs r2, r2, #32
263 ldmia r1!, {r8, r10, ip, lr}
264 orr r3, r3, r4, PUSH #\push
265 mov r4, r4, PULL #\pull
266 orr r4, r4, r5, PUSH #\push
267 mov r5, r5, PULL #\pull
268 orr r5, r5, r6, PUSH #\push
269 mov r6, r6, PULL #\pull
270 orr r6, r6, r7, PUSH #\push
271 mov r7, r7, PULL #\pull
272 orr r7, r7, r8, PUSH #\push
273 mov r8, r8, PULL #\pull
274 orr r8, r8, r10, PUSH #\push
275 mov r10, r10, PULL #\pull
276 orr r10, r10, ip, PUSH #\push
277 mov ip, ip, PULL #\pull
278 orr ip, ip, lr, PUSH #\push
279 stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
280 bhs 12b
281
282 pop {r5 - r8, r10}
283 cfi_adjust_cfa_offset (-20)
284 cfi_restore (r5)
285 cfi_restore (r6)
286 cfi_restore (r7)
287 cfi_restore (r8)
288 cfi_restore (r10)
289
29014: ands ip, r2, #28
291 beq 16f
292
29315: mov r3, lr, PULL #\pull
294 ldr lr, [r1], #4
295 subs ip, ip, #4
296 orr r3, r3, lr, PUSH #\push
297 str r3, [r0], #4
298 bgt 15b
299 CALGN( cmp r2, #0 )
300 CALGN( bge 11b )
301
30216: sub r1, r1, #(\push / 8)
303 b 8b
304
305 .endm
306
307
308 forward_copy_shift pull=8 push=24
309
31017: forward_copy_shift pull=16 push=16
311
31218: forward_copy_shift pull=24 push=8
313
314END(memcpy)
315libc_hidden_builtin_def (memcpy)
316

source code of glibc/sysdeps/arm/memcpy.S