1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/cpu.h>
11#include <asm/regdef.h>
12#include <asm/unwind_hints.h>
13
14.section .noinstr.text, "ax"
15
16SYM_FUNC_START(memcpy)
17 /*
18 * Some CPUs support hardware unaligned access
19 */
20 ALTERNATIVE "b __memcpy_generic", \
21 "b __memcpy_fast", CPU_FEATURE_UAL
22SYM_FUNC_END(memcpy)
23SYM_FUNC_ALIAS(__memcpy, memcpy)
24
25EXPORT_SYMBOL(memcpy)
26EXPORT_SYMBOL(__memcpy)
27
28_ASM_NOKPROBE(memcpy)
29_ASM_NOKPROBE(__memcpy)
30
31/*
32 * void *__memcpy_generic(void *dst, const void *src, size_t n)
33 *
34 * a0: dst
35 * a1: src
36 * a2: n
37 */
38SYM_FUNC_START(__memcpy_generic)
39 move a3, a0
40 beqz a2, 2f
41
421: ld.b t0, a1, 0
43 st.b t0, a0, 0
44 addi.d a0, a0, 1
45 addi.d a1, a1, 1
46 addi.d a2, a2, -1
47 bgt a2, zero, 1b
48
492: move a0, a3
50 jr ra
51SYM_FUNC_END(__memcpy_generic)
52_ASM_NOKPROBE(__memcpy_generic)
53
54 .align 5
55SYM_FUNC_START_NOALIGN(__memcpy_small)
56 pcaddi t0, 8
57 slli.d a2, a2, 5
58 add.d t0, t0, a2
59 jr t0
60
61 .align 5
620: jr ra
63
64 .align 5
651: ld.b t0, a1, 0
66 st.b t0, a0, 0
67 jr ra
68
69 .align 5
702: ld.h t0, a1, 0
71 st.h t0, a0, 0
72 jr ra
73
74 .align 5
753: ld.h t0, a1, 0
76 ld.b t1, a1, 2
77 st.h t0, a0, 0
78 st.b t1, a0, 2
79 jr ra
80
81 .align 5
824: ld.w t0, a1, 0
83 st.w t0, a0, 0
84 jr ra
85
86 .align 5
875: ld.w t0, a1, 0
88 ld.b t1, a1, 4
89 st.w t0, a0, 0
90 st.b t1, a0, 4
91 jr ra
92
93 .align 5
946: ld.w t0, a1, 0
95 ld.h t1, a1, 4
96 st.w t0, a0, 0
97 st.h t1, a0, 4
98 jr ra
99
100 .align 5
1017: ld.w t0, a1, 0
102 ld.w t1, a1, 3
103 st.w t0, a0, 0
104 st.w t1, a0, 3
105 jr ra
106
107 .align 5
1088: ld.d t0, a1, 0
109 st.d t0, a0, 0
110 jr ra
111SYM_FUNC_END(__memcpy_small)
112_ASM_NOKPROBE(__memcpy_small)
113
114/*
115 * void *__memcpy_fast(void *dst, const void *src, size_t n)
116 *
117 * a0: dst
118 * a1: src
119 * a2: n
120 */
121SYM_FUNC_START(__memcpy_fast)
122 sltui t0, a2, 9
123 bnez t0, __memcpy_small
124
125 add.d a3, a1, a2
126 add.d a2, a0, a2
127 ld.d a6, a1, 0
128 ld.d a7, a3, -8
129
130 /* align up destination address */
131 andi t1, a0, 7
132 sub.d t0, zero, t1
133 addi.d t0, t0, 8
134 add.d a1, a1, t0
135 add.d a5, a0, t0
136
137 addi.d a4, a3, -64
138 bgeu a1, a4, .Llt64
139
140 /* copy 64 bytes at a time */
141.Lloop64:
142 ld.d t0, a1, 0
143 ld.d t1, a1, 8
144 ld.d t2, a1, 16
145 ld.d t3, a1, 24
146 ld.d t4, a1, 32
147 ld.d t5, a1, 40
148 ld.d t6, a1, 48
149 ld.d t7, a1, 56
150 addi.d a1, a1, 64
151 st.d t0, a5, 0
152 st.d t1, a5, 8
153 st.d t2, a5, 16
154 st.d t3, a5, 24
155 st.d t4, a5, 32
156 st.d t5, a5, 40
157 st.d t6, a5, 48
158 st.d t7, a5, 56
159 addi.d a5, a5, 64
160 bltu a1, a4, .Lloop64
161
162 /* copy the remaining bytes */
163.Llt64:
164 addi.d a4, a3, -32
165 bgeu a1, a4, .Llt32
166 ld.d t0, a1, 0
167 ld.d t1, a1, 8
168 ld.d t2, a1, 16
169 ld.d t3, a1, 24
170 addi.d a1, a1, 32
171 st.d t0, a5, 0
172 st.d t1, a5, 8
173 st.d t2, a5, 16
174 st.d t3, a5, 24
175 addi.d a5, a5, 32
176
177.Llt32:
178 addi.d a4, a3, -16
179 bgeu a1, a4, .Llt16
180 ld.d t0, a1, 0
181 ld.d t1, a1, 8
182 addi.d a1, a1, 16
183 st.d t0, a5, 0
184 st.d t1, a5, 8
185 addi.d a5, a5, 16
186
187.Llt16:
188 addi.d a4, a3, -8
189 bgeu a1, a4, .Llt8
190 ld.d t0, a1, 0
191 st.d t0, a5, 0
192
193.Llt8:
194 st.d a6, a0, 0
195 st.d a7, a2, -8
196
197 /* return */
198 jr ra
199SYM_FUNC_END(__memcpy_fast)
200_ASM_NOKPROBE(__memcpy_fast)
201
202STACK_FRAME_NON_STANDARD __memcpy_small
203

source code of linux/arch/loongarch/lib/memcpy.S