1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (C) 2020-2022 Loongson Technology Corporation Limited |
4 | */ |
5 | |
6 | #include <linux/export.h> |
7 | #include <asm/alternative-asm.h> |
8 | #include <asm/asm.h> |
9 | #include <asm/asmmacro.h> |
10 | #include <asm/cpu.h> |
11 | #include <asm/regdef.h> |
12 | |
13 | .section .noinstr.text, "ax" |
14 | |
15 | SYM_FUNC_START(memmove) |
16 | blt a0, a1, __memcpy /* dst < src, memcpy */ |
17 | blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ |
18 | jr ra /* dst == src, return */ |
19 | SYM_FUNC_END(memmove) |
20 | SYM_FUNC_ALIAS(__memmove, memmove) |
21 | |
22 | EXPORT_SYMBOL(memmove) |
23 | EXPORT_SYMBOL(__memmove) |
24 | |
25 | _ASM_NOKPROBE(memmove) |
26 | _ASM_NOKPROBE(__memmove) |
27 | |
28 | SYM_FUNC_START(__rmemcpy) |
29 | /* |
30 | * Some CPUs support hardware unaligned access |
31 | */ |
32 | ALTERNATIVE "b __rmemcpy_generic" , \ |
33 | "b __rmemcpy_fast" , CPU_FEATURE_UAL |
34 | SYM_FUNC_END(__rmemcpy) |
35 | _ASM_NOKPROBE(__rmemcpy) |
36 | |
37 | /* |
38 | * void *__rmemcpy_generic(void *dst, const void *src, size_t n) |
39 | * |
40 | * a0: dst |
41 | * a1: src |
42 | * a2: n |
43 | */ |
44 | SYM_FUNC_START(__rmemcpy_generic) |
45 | move a3, a0 |
46 | beqz a2, 2f |
47 | |
48 | add.d a0, a0, a2 |
49 | add.d a1, a1, a2 |
50 | |
51 | 1: ld.b t0, a1, -1 |
52 | st.b t0, a0, -1 |
53 | addi.d a0, a0, -1 |
54 | addi.d a1, a1, -1 |
55 | addi.d a2, a2, -1 |
56 | bgt a2, zero, 1b |
57 | |
58 | 2: move a0, a3 |
59 | jr ra |
60 | SYM_FUNC_END(__rmemcpy_generic) |
61 | _ASM_NOKPROBE(__rmemcpy_generic) |
62 | |
63 | /* |
64 | * void *__rmemcpy_fast(void *dst, const void *src, size_t n) |
65 | * |
66 | * a0: dst |
67 | * a1: src |
68 | * a2: n |
69 | */ |
70 | SYM_FUNC_START(__rmemcpy_fast) |
71 | sltui t0, a2, 9 |
72 | bnez t0, __memcpy_small |
73 | |
74 | add.d a3, a1, a2 |
75 | add.d a2, a0, a2 |
76 | ld.d a6, a1, 0 |
77 | ld.d a7, a3, -8 |
78 | |
79 | /* align up destination address */ |
80 | andi t1, a2, 7 |
81 | sub.d a3, a3, t1 |
82 | sub.d a5, a2, t1 |
83 | |
84 | addi.d a4, a1, 64 |
85 | bgeu a4, a3, .Llt64 |
86 | |
87 | /* copy 64 bytes at a time */ |
88 | .Lloop64: |
89 | ld.d t0, a3, -8 |
90 | ld.d t1, a3, -16 |
91 | ld.d t2, a3, -24 |
92 | ld.d t3, a3, -32 |
93 | ld.d t4, a3, -40 |
94 | ld.d t5, a3, -48 |
95 | ld.d t6, a3, -56 |
96 | ld.d t7, a3, -64 |
97 | addi.d a3, a3, -64 |
98 | st.d t0, a5, -8 |
99 | st.d t1, a5, -16 |
100 | st.d t2, a5, -24 |
101 | st.d t3, a5, -32 |
102 | st.d t4, a5, -40 |
103 | st.d t5, a5, -48 |
104 | st.d t6, a5, -56 |
105 | st.d t7, a5, -64 |
106 | addi.d a5, a5, -64 |
107 | bltu a4, a3, .Lloop64 |
108 | |
109 | /* copy the remaining bytes */ |
110 | .Llt64: |
111 | addi.d a4, a1, 32 |
112 | bgeu a4, a3, .Llt32 |
113 | ld.d t0, a3, -8 |
114 | ld.d t1, a3, -16 |
115 | ld.d t2, a3, -24 |
116 | ld.d t3, a3, -32 |
117 | addi.d a3, a3, -32 |
118 | st.d t0, a5, -8 |
119 | st.d t1, a5, -16 |
120 | st.d t2, a5, -24 |
121 | st.d t3, a5, -32 |
122 | addi.d a5, a5, -32 |
123 | |
124 | .Llt32: |
125 | addi.d a4, a1, 16 |
126 | bgeu a4, a3, .Llt16 |
127 | ld.d t0, a3, -8 |
128 | ld.d t1, a3, -16 |
129 | addi.d a3, a3, -16 |
130 | st.d t0, a5, -8 |
131 | st.d t1, a5, -16 |
132 | addi.d a5, a5, -16 |
133 | |
134 | .Llt16: |
135 | addi.d a4, a1, 8 |
136 | bgeu a4, a3, .Llt8 |
137 | ld.d t0, a3, -8 |
138 | st.d t0, a5, -8 |
139 | |
140 | .Llt8: |
141 | st.d a6, a0, 0 |
142 | st.d a7, a2, -8 |
143 | |
144 | /* return */ |
145 | jr ra |
146 | SYM_FUNC_END(__rmemcpy_fast) |
147 | _ASM_NOKPROBE(__rmemcpy_fast) |
148 | |