1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/export.h>
4#include <linux/linkage.h>
5
6SYM_FUNC_START(memmove)
7/*
8 * void *memmove(void *dest_in, const void *src_in, size_t n)
9 * -mregparm=3 passes these in registers:
10 * dest_in: %eax
11 * src_in: %edx
12 * n: %ecx
13 * See also: arch/x86/entry/calling.h for description of the calling convention.
14 *
15 * n can remain in %ecx, but for `rep movsl`, we'll need dest in %edi and src
16 * in %esi.
17 */
18.set dest_in, %eax
19.set dest, %edi
20.set src_in, %edx
21.set src, %esi
22.set n, %ecx
23.set tmp0, %edx
24.set tmp0w, %dx
25.set tmp1, %ebx
26.set tmp1w, %bx
27.set tmp2, %eax
28.set tmp3b, %cl
29
30/*
31 * Save all callee-saved registers, because this function is going to clobber
32 * all of them:
33 */
34 pushl %ebp
35 movl %esp, %ebp // set standard frame pointer
36
37 pushl %ebx
38 pushl %edi
39 pushl %esi
40 pushl %eax // save 'dest_in' parameter [eax] as the return value
41
42 movl src_in, src
43 movl dest_in, dest
44
45 /* Handle more 16 bytes in loop */
46 cmpl $0x10, n
47 jb .Lmove_16B
48
49 /* Decide forward/backward copy mode */
50 cmpl dest, src
51 jb .Lbackwards_header
52
53 /*
54 * movs instruction have many startup latency
55 * so we handle small size by general register.
56 */
57 cmpl $680, n
58 jb .Ltoo_small_forwards
59 /* movs instruction is only good for aligned case. */
60 movl src, tmp0
61 xorl dest, tmp0
62 andl $0xff, tmp0
63 jz .Lforward_movs
64.Ltoo_small_forwards:
65 subl $0x10, n
66
67 /* We gobble 16 bytes forward in each loop. */
68.Lmove_16B_forwards_loop:
69 subl $0x10, n
70 movl 0*4(src), tmp0
71 movl 1*4(src), tmp1
72 movl tmp0, 0*4(dest)
73 movl tmp1, 1*4(dest)
74 movl 2*4(src), tmp0
75 movl 3*4(src), tmp1
76 movl tmp0, 2*4(dest)
77 movl tmp1, 3*4(dest)
78 leal 0x10(src), src
79 leal 0x10(dest), dest
80 jae .Lmove_16B_forwards_loop
81 addl $0x10, n
82 jmp .Lmove_16B
83
84 /* Handle data forward by movs. */
85.p2align 4
86.Lforward_movs:
87 movl -4(src, n), tmp0
88 leal -4(dest, n), tmp1
89 shrl $2, n
90 rep movsl
91 movl tmp0, (tmp1)
92 jmp .Ldone
93
94 /* Handle data backward by movs. */
95.p2align 4
96.Lbackwards_movs:
97 movl (src), tmp0
98 movl dest, tmp1
99 leal -4(src, n), src
100 leal -4(dest, n), dest
101 shrl $2, n
102 std
103 rep movsl
104 movl tmp0,(tmp1)
105 cld
106 jmp .Ldone
107
108 /* Start to prepare for backward copy. */
109.p2align 4
110.Lbackwards_header:
111 cmpl $680, n
112 jb .Ltoo_small_backwards
113 movl src, tmp0
114 xorl dest, tmp0
115 andl $0xff, tmp0
116 jz .Lbackwards_movs
117
118 /* Calculate copy position to tail. */
119.Ltoo_small_backwards:
120 addl n, src
121 addl n, dest
122 subl $0x10, n
123
124 /* We gobble 16 bytes backward in each loop. */
125.Lmove_16B_backwards_loop:
126 subl $0x10, n
127
128 movl -1*4(src), tmp0
129 movl -2*4(src), tmp1
130 movl tmp0, -1*4(dest)
131 movl tmp1, -2*4(dest)
132 movl -3*4(src), tmp0
133 movl -4*4(src), tmp1
134 movl tmp0, -3*4(dest)
135 movl tmp1, -4*4(dest)
136 leal -0x10(src), src
137 leal -0x10(dest), dest
138 jae .Lmove_16B_backwards_loop
139 /* Calculate copy position to head. */
140 addl $0x10, n
141 subl n, src
142 subl n, dest
143
144 /* Move data from 8 bytes to 15 bytes. */
145.p2align 4
146.Lmove_16B:
147 cmpl $8, n
148 jb .Lmove_8B
149 movl 0*4(src), tmp0
150 movl 1*4(src), tmp1
151 movl -2*4(src, n), tmp2
152 movl -1*4(src, n), src
153
154 movl tmp0, 0*4(dest)
155 movl tmp1, 1*4(dest)
156 movl tmp2, -2*4(dest, n)
157 movl src, -1*4(dest, n)
158 jmp .Ldone
159
160 /* Move data from 4 bytes to 7 bytes. */
161.p2align 4
162.Lmove_8B:
163 cmpl $4, n
164 jb .Lmove_4B
165 movl 0*4(src), tmp0
166 movl -1*4(src, n), tmp1
167 movl tmp0, 0*4(dest)
168 movl tmp1, -1*4(dest, n)
169 jmp .Ldone
170
171 /* Move data from 2 bytes to 3 bytes. */
172.p2align 4
173.Lmove_4B:
174 cmpl $2, n
175 jb .Lmove_1B
176 movw 0*2(src), tmp0w
177 movw -1*2(src, n), tmp1w
178 movw tmp0w, 0*2(dest)
179 movw tmp1w, -1*2(dest, n)
180 jmp .Ldone
181
182 /* Move data for 1 byte. */
183.p2align 4
184.Lmove_1B:
185 cmpl $1, n
186 jb .Ldone
187 movb (src), tmp3b
188 movb tmp3b, (dest)
189.p2align 4
190.Ldone:
191 popl dest_in // restore 'dest_in' [eax] as the return value
192 /* Restore all callee-saved registers: */
193 popl %esi
194 popl %edi
195 popl %ebx
196 popl %ebp
197
198 RET
199SYM_FUNC_END(memmove)
200EXPORT_SYMBOL(memmove)
201

source code of linux/arch/x86/lib/memmove_32.S