Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===-- Implementations for platform with mandatory aligned memory access -===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // For some platforms, unaligned loads and stores are either illegal or very |
9 | // slow. The implementations in this file make sure all loads and stores are |
10 | // always aligned. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H |
14 | #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H |
15 | |
16 | #include "src/__support/macros/attributes.h" // LIBC_INLINE |
17 | #include "src/string/memory_utils/generic/byte_per_byte.h" |
18 | #include "src/string/memory_utils/op_generic.h" // generic::splat |
19 | #include "src/string/memory_utils/utils.h" // Ptr, CPtr |
20 | |
21 | #include <stddef.h> // size_t |
22 | |
23 | namespace LIBC_NAMESPACE_DECL { |
24 | |
25 | [[maybe_unused]] LIBC_INLINE uint32_t load32_aligned(CPtr ptr, size_t offset, |
26 | size_t alignment) { |
27 | if (alignment == 0) |
28 | return load32_aligned<uint32_t>(ptr, offset); |
29 | else if (alignment == 2) |
30 | return load32_aligned<uint16_t, uint16_t>(ptr, offset); |
31 | else // 1, 3 |
32 | return load32_aligned<uint8_t, uint16_t, uint8_t>(ptr, offset); |
33 | } |
34 | |
35 | [[maybe_unused]] LIBC_INLINE uint64_t load64_aligned(CPtr ptr, size_t offset, |
36 | size_t alignment) { |
37 | if (alignment == 0) |
38 | return load64_aligned<uint64_t>(ptr, offset); |
39 | else if (alignment == 4) |
40 | return load64_aligned<uint32_t, uint32_t>(ptr, offset); |
41 | else if (alignment == 6) |
42 | return load64_aligned<uint16_t, uint32_t, uint16_t>(ptr, offset); |
43 | else if (alignment == 2) |
44 | return load64_aligned<uint16_t, uint16_t, uint16_t, uint16_t>(ptr, offset); |
45 | else // 1, 3, 5, 7 |
46 | return load64_aligned<uint8_t, uint16_t, uint16_t, uint16_t, uint8_t>( |
47 | ptr, offset); |
48 | } |
49 | |
50 | /////////////////////////////////////////////////////////////////////////////// |
51 | // memcpy |
52 | /////////////////////////////////////////////////////////////////////////////// |
53 | |
54 | [[maybe_unused]] LIBC_INLINE void |
55 | inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, |
56 | size_t count) { |
57 | constexpr size_t kAlign = sizeof(uint32_t); |
58 | if (count <= 2 * kAlign) |
59 | return inline_memcpy_byte_per_byte(dst, src, count); |
60 | size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); |
61 | inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); |
62 | size_t offset = bytes_to_dst_align; |
63 | size_t src_alignment = distance_to_align_down<kAlign>(src + offset); |
64 | for (; offset < count - kAlign; offset += kAlign) { |
65 | uint32_t value = load32_aligned(src, offset, src_alignment); |
66 | store32_aligned<uint32_t>(value, dst, offset); |
67 | } |
68 | // remainder |
69 | inline_memcpy_byte_per_byte(dst, src, count, offset); |
70 | } |
71 | |
72 | [[maybe_unused]] LIBC_INLINE void |
73 | inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src, |
74 | size_t count) { |
75 | constexpr size_t kAlign = sizeof(uint64_t); |
76 | if (count <= 2 * kAlign) |
77 | return inline_memcpy_byte_per_byte(dst, src, count); |
78 | size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); |
79 | inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); |
80 | size_t offset = bytes_to_dst_align; |
81 | size_t src_alignment = distance_to_align_down<kAlign>(src + offset); |
82 | for (; offset < count - kAlign; offset += kAlign) { |
83 | uint64_t value = load64_aligned(src, offset, src_alignment); |
84 | store64_aligned<uint64_t>(value, dst, offset); |
85 | } |
86 | // remainder |
87 | inline_memcpy_byte_per_byte(dst, src, count, offset); |
88 | } |
89 | |
90 | /////////////////////////////////////////////////////////////////////////////// |
91 | // memset |
92 | /////////////////////////////////////////////////////////////////////////////// |
93 | |
94 | [[maybe_unused]] LIBC_INLINE static void |
95 | inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) { |
96 | constexpr size_t kAlign = sizeof(uint32_t); |
97 | if (count <= 2 * kAlign) |
98 | return inline_memset_byte_per_byte(dst, value, count); |
99 | size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); |
100 | inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); |
101 | size_t offset = bytes_to_dst_align; |
102 | for (; offset < count - kAlign; offset += kAlign) |
103 | store32_aligned<uint32_t>(generic::splat<uint32_t>(value), dst, offset); |
104 | inline_memset_byte_per_byte(dst, value, count, offset); |
105 | } |
106 | |
107 | [[maybe_unused]] LIBC_INLINE static void |
108 | inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) { |
109 | constexpr size_t kAlign = sizeof(uint64_t); |
110 | if (count <= 2 * kAlign) |
111 | return inline_memset_byte_per_byte(dst, value, count); |
112 | size_t bytes_to_dst_align = distance_to_align_up<kAlign>(dst); |
113 | inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); |
114 | size_t offset = bytes_to_dst_align; |
115 | for (; offset < count - kAlign; offset += kAlign) |
116 | store64_aligned<uint64_t>(generic::splat<uint64_t>(value), dst, offset); |
117 | inline_memset_byte_per_byte(dst, value, count, offset); |
118 | } |
119 | |
120 | /////////////////////////////////////////////////////////////////////////////// |
121 | // bcmp |
122 | /////////////////////////////////////////////////////////////////////////////// |
123 | |
124 | [[maybe_unused]] LIBC_INLINE BcmpReturnType |
125 | inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { |
126 | constexpr size_t kAlign = sizeof(uint32_t); |
127 | if (count <= 2 * kAlign) |
128 | return inline_bcmp_byte_per_byte(p1, p2, count); |
129 | size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); |
130 | if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) |
131 | return value; |
132 | size_t offset = bytes_to_p1_align; |
133 | size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); |
134 | for (; offset < count - kAlign; offset += kAlign) { |
135 | uint32_t a = load32_aligned<uint32_t>(p1, offset); |
136 | uint32_t b = load32_aligned(p2, offset, p2_alignment); |
137 | if (a != b) |
138 | return BcmpReturnType::nonzero(); |
139 | } |
140 | return inline_bcmp_byte_per_byte(p1, p2, count, offset); |
141 | } |
142 | |
143 | [[maybe_unused]] LIBC_INLINE BcmpReturnType |
144 | inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { |
145 | constexpr size_t kAlign = sizeof(uint64_t); |
146 | if (count <= 2 * kAlign) |
147 | return inline_bcmp_byte_per_byte(p1, p2, count); |
148 | size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); |
149 | if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) |
150 | return value; |
151 | size_t offset = bytes_to_p1_align; |
152 | size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); |
153 | for (; offset < count - kAlign; offset += kAlign) { |
154 | uint64_t a = load64_aligned<uint64_t>(p1, offset); |
155 | uint64_t b = load64_aligned(p2, offset, p2_alignment); |
156 | if (a != b) |
157 | return BcmpReturnType::nonzero(); |
158 | } |
159 | return inline_bcmp_byte_per_byte(p1, p2, count, offset); |
160 | } |
161 | |
162 | /////////////////////////////////////////////////////////////////////////////// |
163 | // memcmp |
164 | /////////////////////////////////////////////////////////////////////////////// |
165 | |
166 | [[maybe_unused]] LIBC_INLINE MemcmpReturnType |
167 | inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { |
168 | constexpr size_t kAlign = sizeof(uint32_t); |
169 | if (count <= 2 * kAlign) |
170 | return inline_memcmp_byte_per_byte(p1, p2, count); |
171 | size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); |
172 | if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) |
173 | return value; |
174 | size_t offset = bytes_to_p1_align; |
175 | size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); |
176 | for (; offset < count - kAlign; offset += kAlign) { |
177 | uint32_t a = load32_aligned<uint32_t>(p1, offset); |
178 | uint32_t b = load32_aligned(p2, offset, p2_alignment); |
179 | if (a != b) |
180 | return cmp_uint32_t(Endian::to_big_endian(a), Endian::to_big_endian(b)); |
181 | } |
182 | return inline_memcmp_byte_per_byte(p1, p2, count, offset); |
183 | } |
184 | |
185 | [[maybe_unused]] LIBC_INLINE MemcmpReturnType |
186 | inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { |
187 | constexpr size_t kAlign = sizeof(uint64_t); |
188 | if (count <= 2 * kAlign) |
189 | return inline_memcmp_byte_per_byte(p1, p2, count); |
190 | size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1); |
191 | if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) |
192 | return value; |
193 | size_t offset = bytes_to_p1_align; |
194 | size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset); |
195 | for (; offset < count - kAlign; offset += kAlign) { |
196 | uint64_t a = load64_aligned<uint64_t>(p1, offset); |
197 | uint64_t b = load64_aligned(p2, offset, p2_alignment); |
198 | if (a != b) |
199 | return cmp_neq_uint64_t(Endian::to_big_endian(a), |
200 | Endian::to_big_endian(b)); |
201 | } |
202 | return inline_memcmp_byte_per_byte(p1, p2, count, offset); |
203 | } |
204 | |
205 | } // namespace LIBC_NAMESPACE_DECL |
206 | |
207 | #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H |
208 |
Warning: This file is not a C or C++ file. It does not have highlighting.