memmove.S source code [glibc/sysdeps/arm/memmove.S]

1	/ Copyright (C) 2006-2024 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library. If not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	/ Thumb requires excessive IT insns here. /
20	#define NO_THUMB
21	#include <sysdep.h>
22	#include <arm-features.h>
23
24	/*
25	* Data preload for architectures that support it (ARM V5TE and above)
26	*/
27	#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
28	&& !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
29	&& !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
30	&& !defined (__ARM_ARCH_5T__))
31	#define PLD(code...) code
32	#else
33	#define PLD(code...)
34	#endif
35
36	/*
37	* This can be used to enable code to cacheline align the source pointer.
38	* Experiments on tested architectures (StrongARM and XScale) didn't show
39	* this a worthwhile thing to do. That might be different in the future.
40	*/
41	//#define CALGN(code...) code
42	#define CALGN(code...)
43
44	/*
45	* Endian independent macros for shifting bytes within registers.
46	*/
47	#ifndef __ARMEB__
48	#define PULL lsr
49	#define PUSH lsl
50	#else
51	#define PULL lsl
52	#define PUSH lsr
53	#endif
54
55	.text
56	.syntax unified
57
58	/*
59	* Prototype: void memmove(void dest, const void *src, size_t n);
60	*
61	* Note:
62	*
63	* If the memory regions don't overlap, we simply branch to memcpy which is
64	* normally a bit faster. Otherwise the copy is done going downwards.
65	*/
66
67	ENTRY(memmove)
68
69	subs ip, r0, r1
70	cmphi r2, ip
71	#if !IS_IN (libc)
72	bls memcpy
73	#else
74	bls HIDDEN_JUMPTARGET(memcpy)
75	#endif
76
77	push {r0, r4, lr}
78	cfi_adjust_cfa_offset (`12`)
79	cfi_rel_offset (r4, `4`)
80	cfi_rel_offset (lr, `8`)
81
82	cfi_remember_state
83
84	add r1, r1, r2
85	add r0, r0, r2
86	subs r2, r2, #`4`
87	blo `8f`
88	ands ip, r0, #`3`
89	PLD( pld [r1, #-`4`] )
90	bne `9f`
91	ands ip, r1, #`3`
92	bne `10f`
93
94	`1`: subs r2, r2, #(`28`)
95	push {r5 - r8}
96	cfi_adjust_cfa_offset (`16`)
97	cfi_rel_offset (r5, `0`)
98	cfi_rel_offset (r6, `4`)
99	cfi_rel_offset (r7, `8`)
100	cfi_rel_offset (r8, `12`)
101	blo `5f`
102
103	CALGN( ands ip, r1, #`31` )
104	CALGN( sbcsne r4, ip, r2 ) @ C is always set here
105	CALGN( bcs `2f` )
106	CALGN( adr r4, `6f` )
107	CALGN( subs r2, r2, ip ) @ C is set here
108	#ifndef ARM_ALWAYS_BX
109	CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`))
110	#else
111	CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`))
112	CALGN( bx r4 )
113	#endif
114
115	PLD( pld [r1, #-`4`] )
116	`2`: PLD( cmp r2, #`96` )
117	PLD( pld [r1, #-`32`] )
118	PLD( blo `4f` )
119	PLD( pld [r1, #-`64`] )
120	PLD( pld [r1, #-`96`] )
121
122	`3`: PLD( pld [r1, #-`128`] )
123	`4`: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
124	subs r2, r2, #`32`
125	stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
126	bhs `3b`
127
128	`5`: ands ip, r2, #`28`
129	rsb ip, ip, #`32`
130	#ifndef ARM_ALWAYS_BX
131	/ C is always clear here. /
132	addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
133	b `7f`
134	#else
135	beq `7f`
136	push {r10}
137	cfi_adjust_cfa_offset (`4`)
138	cfi_rel_offset (r10, `0`)
139	`0`: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
140	/ If alignment is not perfect, then there will be some*
141	padding (nop) instructions between this BX and label 6.
142	The computation above assumed that two instructions
143	later is exactly the right spot. /*
144	add r10, #(`6f` - (`0b` + PC_OFS))
145	bx r10
146	#endif
147	.p2align ARM_BX_ALIGN_LOG2
148	`6`: nop
149	.p2align ARM_BX_ALIGN_LOG2
150	ldr r3, [r1, #-`4`]!
151	.p2align ARM_BX_ALIGN_LOG2
152	ldr r4, [r1, #-`4`]!
153	.p2align ARM_BX_ALIGN_LOG2
154	ldr r5, [r1, #-`4`]!
155	.p2align ARM_BX_ALIGN_LOG2
156	ldr r6, [r1, #-`4`]!
157	.p2align ARM_BX_ALIGN_LOG2
158	ldr r7, [r1, #-`4`]!
159	.p2align ARM_BX_ALIGN_LOG2
160	ldr r8, [r1, #-`4`]!
161	.p2align ARM_BX_ALIGN_LOG2
162	ldr lr, [r1, #-`4`]!
163
164	#ifndef ARM_ALWAYS_BX
165	add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
166	nop
167	#else
168	`0`: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - `2`)
169	/ If alignment is not perfect, then there will be some*
170	padding (nop) instructions between this BX and label 66.
171	The computation above assumed that two instructions
172	later is exactly the right spot. /*
173	add r10, #(`66f` - (`0b` + PC_OFS))
174	bx r10
175	#endif
176	.p2align ARM_BX_ALIGN_LOG2
177	`66`: nop
178	.p2align ARM_BX_ALIGN_LOG2
179	str r3, [r0, #-`4`]!
180	.p2align ARM_BX_ALIGN_LOG2
181	str r4, [r0, #-`4`]!
182	.p2align ARM_BX_ALIGN_LOG2
183	str r5, [r0, #-`4`]!
184	.p2align ARM_BX_ALIGN_LOG2
185	str r6, [r0, #-`4`]!
186	.p2align ARM_BX_ALIGN_LOG2
187	str r7, [r0, #-`4`]!
188	.p2align ARM_BX_ALIGN_LOG2
189	str r8, [r0, #-`4`]!
190	.p2align ARM_BX_ALIGN_LOG2
191	str lr, [r0, #-`4`]!
192
193	#ifdef ARM_ALWAYS_BX
194	pop {r10}
195	cfi_adjust_cfa_offset (-`4`)
196	cfi_restore (r10)
197	#endif
198
199	CALGN( bcs `2b` )
200
201	`7`: pop {r5 - r8}
202	cfi_adjust_cfa_offset (-`16`)
203	cfi_restore (r5)
204	cfi_restore (r6)
205	cfi_restore (r7)
206	cfi_restore (r8)
207
208	`8`: movs r2, r2, lsl #`31`
209	ldrbne r3, [r1, #-`1`]!
210	ldrbcs r4, [r1, #-`1`]!
211	ldrbcs ip, [r1, #-`1`]
212	strbne r3, [r0, #-`1`]!
213	strbcs r4, [r0, #-`1`]!
214	strbcs ip, [r0, #-`1`]
215
216	#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
217	\|\| defined (ARM_ALWAYS_BX))
218	pop {r0, r4, lr}
219	cfi_adjust_cfa_offset (-`12`)
220	cfi_restore (r4)
221	cfi_restore (lr)
222	bx lr
223	#else
224	pop {r0, r4, pc}
225	#endif
226
227	cfi_restore_state
228
229	`9`: cmp ip, #`2`
230	ldrbgt r3, [r1, #-`1`]!
231	ldrbge r4, [r1, #-`1`]!
232	ldrb lr, [r1, #-`1`]!
233	strbgt r3, [r0, #-`1`]!
234	strbge r4, [r0, #-`1`]!
235	subs r2, r2, ip
236	strb lr, [r0, #-`1`]!
237	blo `8b`
238	ands ip, r1, #`3`
239	beq `1b`
240
241	`10`: bic r1, r1, #`3`
242	cmp ip, #`2`
243	ldr r3, [r1, #`0`]
244	beq `17f`
245	blt `18f`
246
247
248	.macro backward_copy_shift push pull
249
250	subs r2, r2, #`28`
251	blo `14f`
252
253	CALGN( ands ip, r1, #`31` )
254	CALGN( rsb ip, ip, #`32` )
255	CALGN( sbcsne r4, ip, r2 ) @ C is always set here
256	CALGN( subcc r2, r2, ip )
257	CALGN( bcc `15f` )
258
259	`11`: push {r5 - r8, r10}
260	cfi_adjust_cfa_offset (`20`)
261	cfi_rel_offset (r5, `0`)
262	cfi_rel_offset (r6, `4`)
263	cfi_rel_offset (r7, `8`)
264	cfi_rel_offset (r8, `12`)
265	cfi_rel_offset (r10, `16`)
266
267	PLD( pld [r1, #-`4`] )
268	PLD( cmp r2, #`96` )
269	PLD( pld [r1, #-`32`] )
270	PLD( blo `13f` )
271	PLD( pld [r1, #-`64`] )
272	PLD( pld [r1, #-`96`] )
273
274	`12`: PLD( pld [r1, #-`128`] )
275	`13`: ldmdb r1!, {r7, r8, r10, ip}
276	mov lr, r3, PUSH #\push
277	subs r2, r2, #`32`
278	ldmdb r1!, {r3, r4, r5, r6}
279	orr lr, lr, ip, PULL #\pull
280	mov ip, ip, PUSH #\push
281	orr ip, ip, r10, PULL #\pull
282	mov r10, r10, PUSH #\push
283	orr r10, r10, r8, PULL #\pull
284	mov r8, r8, PUSH #\push
285	orr r8, r8, r7, PULL #\pull
286	mov r7, r7, PUSH #\push
287	orr r7, r7, r6, PULL #\pull
288	mov r6, r6, PUSH #\push
289	orr r6, r6, r5, PULL #\pull
290	mov r5, r5, PUSH #\push
291	orr r5, r5, r4, PULL #\pull
292	mov r4, r4, PUSH #\push
293	orr r4, r4, r3, PULL #\pull
294	stmdb r0!, {r4 - r8, r10, ip, lr}
295	bhs `12b`
296
297	pop {r5 - r8, r10}
298	cfi_adjust_cfa_offset (-`20`)
299	cfi_restore (r5)
300	cfi_restore (r6)
301	cfi_restore (r7)
302	cfi_restore (r8)
303	cfi_restore (r10)
304
305	`14`: ands ip, r2, #`28`
306	beq `16f`
307
308	`15`: mov lr, r3, PUSH #\push
309	ldr r3, [r1, #-`4`]!
310	subs ip, ip, #`4`
311	orr lr, lr, r3, PULL #\pull
312	str lr, [r0, #-`4`]!
313	bgt `15b`
314	CALGN( cmp r2, #`0` )
315	CALGN( bge `11b` )
316
317	`16`: add r1, r1, #(\pull / `8`)
318	b `8b`
319
320	.endm
321
322
323	backward_copy_shift push=`8` pull=`24`
324
325	`17`: backward_copy_shift push=`16` pull=`16`
326
327	`18`: backward_copy_shift push=`24` pull=`8`
328
329
330	END(memmove)
331	libc_hidden_builtin_def (memmove)
332

source code of glibc/sysdeps/arm/memmove.S