1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/arch/alpha/lib/memcpy.c |
4 | * |
5 | * Copyright (C) 1995 Linus Torvalds |
6 | */ |
7 | |
8 | /* |
9 | * This is a reasonably optimized memcpy() routine. |
10 | */ |
11 | |
12 | /* |
13 | * Note that the C code is written to be optimized into good assembly. However, |
14 | * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a |
15 | * explicit compare against 0 (instead of just using the proper "blt reg, xx" or |
16 | * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually.. |
17 | */ |
18 | |
19 | #include <linux/types.h> |
20 | #include <linux/export.h> |
21 | |
22 | /* |
23 | * This should be done in one go with ldq_u*2/mask/stq_u. Do it |
24 | * with a macro so that we can fix it up later.. |
25 | */ |
26 | #define ALIGN_DEST_TO8_UP(d,s,n) \ |
27 | while (d & 7) { \ |
28 | if (n <= 0) return; \ |
29 | n--; \ |
30 | *(char *) d = *(char *) s; \ |
31 | d++; s++; \ |
32 | } |
33 | #define ALIGN_DEST_TO8_DN(d,s,n) \ |
34 | while (d & 7) { \ |
35 | if (n <= 0) return; \ |
36 | n--; \ |
37 | d--; s--; \ |
38 | *(char *) d = *(char *) s; \ |
39 | } |
40 | |
41 | /* |
42 | * This should similarly be done with ldq_u*2/mask/stq. The destination |
43 | * is aligned, but we don't fill in a full quad-word |
44 | */ |
45 | #define DO_REST_UP(d,s,n) \ |
46 | while (n > 0) { \ |
47 | n--; \ |
48 | *(char *) d = *(char *) s; \ |
49 | d++; s++; \ |
50 | } |
51 | #define DO_REST_DN(d,s,n) \ |
52 | while (n > 0) { \ |
53 | n--; \ |
54 | d--; s--; \ |
55 | *(char *) d = *(char *) s; \ |
56 | } |
57 | |
58 | /* |
59 | * This should be done with ldq/mask/stq. The source and destination are |
60 | * aligned, but we don't fill in a full quad-word |
61 | */ |
62 | #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n) |
63 | #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n) |
64 | |
65 | /* |
66 | * This does unaligned memory copies. We want to avoid storing to |
67 | * an unaligned address, as that would do a read-modify-write cycle. |
68 | * We also want to avoid double-reading the unaligned reads. |
69 | * |
70 | * Note the ordering to try to avoid load (and address generation) latencies. |
71 | */ |
72 | static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s, |
73 | long n) |
74 | { |
75 | ALIGN_DEST_TO8_UP(d,s,n); |
76 | n -= 8; /* to avoid compare against 8 in the loop */ |
77 | if (n >= 0) { |
78 | unsigned long low_word, high_word; |
79 | __asm__("ldq_u %0,%1" :"=r" (low_word):"m" (*(unsigned long *) s)); |
80 | do { |
81 | unsigned long tmp; |
82 | __asm__("ldq_u %0,%1" :"=r" (high_word):"m" (*(unsigned long *)(s+8))); |
83 | n -= 8; |
84 | __asm__("extql %1,%2,%0" |
85 | :"=r" (low_word) |
86 | :"r" (low_word), "r" (s)); |
87 | __asm__("extqh %1,%2,%0" |
88 | :"=r" (tmp) |
89 | :"r" (high_word), "r" (s)); |
90 | s += 8; |
91 | *(unsigned long *) d = low_word | tmp; |
92 | d += 8; |
93 | low_word = high_word; |
94 | } while (n >= 0); |
95 | } |
96 | n += 8; |
97 | DO_REST_UP(d,s,n); |
98 | } |
99 | |
100 | static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s, |
101 | long n) |
102 | { |
103 | /* I don't understand AXP assembler well enough for this. -Tim */ |
104 | s += n; |
105 | d += n; |
106 | while (n--) |
107 | * (char *) --d = * (char *) --s; |
108 | } |
109 | |
110 | /* |
111 | * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register |
112 | * for the load-store. I don't know why, but it would seem that using a floating |
113 | * point register for the move seems to slow things down (very small difference, |
114 | * though). |
115 | * |
116 | * Note the ordering to try to avoid load (and address generation) latencies. |
117 | */ |
118 | static inline void __memcpy_aligned_up (unsigned long d, unsigned long s, |
119 | long n) |
120 | { |
121 | ALIGN_DEST_TO8_UP(d,s,n); |
122 | n -= 8; |
123 | while (n >= 0) { |
124 | unsigned long tmp; |
125 | __asm__("ldq %0,%1" :"=r" (tmp):"m" (*(unsigned long *) s)); |
126 | n -= 8; |
127 | s += 8; |
128 | *(unsigned long *) d = tmp; |
129 | d += 8; |
130 | } |
131 | n += 8; |
132 | DO_REST_ALIGNED_UP(d,s,n); |
133 | } |
134 | static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s, |
135 | long n) |
136 | { |
137 | s += n; |
138 | d += n; |
139 | ALIGN_DEST_TO8_DN(d,s,n); |
140 | n -= 8; |
141 | while (n >= 0) { |
142 | unsigned long tmp; |
143 | s -= 8; |
144 | __asm__("ldq %0,%1" :"=r" (tmp):"m" (*(unsigned long *) s)); |
145 | n -= 8; |
146 | d -= 8; |
147 | *(unsigned long *) d = tmp; |
148 | } |
149 | n += 8; |
150 | DO_REST_ALIGNED_DN(d,s,n); |
151 | } |
152 | |
153 | void * memcpy(void * dest, const void *src, size_t n) |
154 | { |
155 | if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { |
156 | __memcpy_aligned_up (d: (unsigned long) dest, s: (unsigned long) src, |
157 | n); |
158 | return dest; |
159 | } |
160 | __memcpy_unaligned_up (d: (unsigned long) dest, s: (unsigned long) src, n); |
161 | return dest; |
162 | } |
163 | EXPORT_SYMBOL(memcpy); |
164 | |