1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/cpu.h>
11#include <asm/regdef.h>
12
13.section .noinstr.text, "ax"
14
15SYM_FUNC_START(memmove)
16 blt a0, a1, __memcpy /* dst < src, memcpy */
17 blt a1, a0, __rmemcpy /* src < dst, rmemcpy */
18 jr ra /* dst == src, return */
19SYM_FUNC_END(memmove)
20SYM_FUNC_ALIAS(__memmove, memmove)
21
22EXPORT_SYMBOL(memmove)
23EXPORT_SYMBOL(__memmove)
24
25_ASM_NOKPROBE(memmove)
26_ASM_NOKPROBE(__memmove)
27
28SYM_FUNC_START(__rmemcpy)
29 /*
30 * Some CPUs support hardware unaligned access
31 */
32 ALTERNATIVE "b __rmemcpy_generic", \
33 "b __rmemcpy_fast", CPU_FEATURE_UAL
34SYM_FUNC_END(__rmemcpy)
35_ASM_NOKPROBE(__rmemcpy)
36
37/*
38 * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
39 *
40 * a0: dst
41 * a1: src
42 * a2: n
43 */
44SYM_FUNC_START(__rmemcpy_generic)
45 move a3, a0
46 beqz a2, 2f
47
48 add.d a0, a0, a2
49 add.d a1, a1, a2
50
511: ld.b t0, a1, -1
52 st.b t0, a0, -1
53 addi.d a0, a0, -1
54 addi.d a1, a1, -1
55 addi.d a2, a2, -1
56 bgt a2, zero, 1b
57
582: move a0, a3
59 jr ra
60SYM_FUNC_END(__rmemcpy_generic)
61_ASM_NOKPROBE(__rmemcpy_generic)
62
63/*
64 * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
65 *
66 * a0: dst
67 * a1: src
68 * a2: n
69 */
70SYM_FUNC_START(__rmemcpy_fast)
71 sltui t0, a2, 9
72 bnez t0, __memcpy_small
73
74 add.d a3, a1, a2
75 add.d a2, a0, a2
76 ld.d a6, a1, 0
77 ld.d a7, a3, -8
78
79 /* align up destination address */
80 andi t1, a2, 7
81 sub.d a3, a3, t1
82 sub.d a5, a2, t1
83
84 addi.d a4, a1, 64
85 bgeu a4, a3, .Llt64
86
87 /* copy 64 bytes at a time */
88.Lloop64:
89 ld.d t0, a3, -8
90 ld.d t1, a3, -16
91 ld.d t2, a3, -24
92 ld.d t3, a3, -32
93 ld.d t4, a3, -40
94 ld.d t5, a3, -48
95 ld.d t6, a3, -56
96 ld.d t7, a3, -64
97 addi.d a3, a3, -64
98 st.d t0, a5, -8
99 st.d t1, a5, -16
100 st.d t2, a5, -24
101 st.d t3, a5, -32
102 st.d t4, a5, -40
103 st.d t5, a5, -48
104 st.d t6, a5, -56
105 st.d t7, a5, -64
106 addi.d a5, a5, -64
107 bltu a4, a3, .Lloop64
108
109 /* copy the remaining bytes */
110.Llt64:
111 addi.d a4, a1, 32
112 bgeu a4, a3, .Llt32
113 ld.d t0, a3, -8
114 ld.d t1, a3, -16
115 ld.d t2, a3, -24
116 ld.d t3, a3, -32
117 addi.d a3, a3, -32
118 st.d t0, a5, -8
119 st.d t1, a5, -16
120 st.d t2, a5, -24
121 st.d t3, a5, -32
122 addi.d a5, a5, -32
123
124.Llt32:
125 addi.d a4, a1, 16
126 bgeu a4, a3, .Llt16
127 ld.d t0, a3, -8
128 ld.d t1, a3, -16
129 addi.d a3, a3, -16
130 st.d t0, a5, -8
131 st.d t1, a5, -16
132 addi.d a5, a5, -16
133
134.Llt16:
135 addi.d a4, a1, 8
136 bgeu a4, a3, .Llt8
137 ld.d t0, a3, -8
138 st.d t0, a5, -8
139
140.Llt8:
141 st.d a6, a0, 0
142 st.d a7, a2, -8
143
144 /* return */
145 jr ra
146SYM_FUNC_END(__rmemcpy_fast)
147_ASM_NOKPROBE(__rmemcpy_fast)
148

source code of linux/arch/loongarch/lib/memmove.S