1 | //===-- Memmove implementation for x86_64 -----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H |
9 | #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H |
10 | |
11 | #include "src/__support/macros/config.h" // LIBC_INLINE |
12 | #include "src/string/memory_utils/op_builtin.h" |
13 | #include "src/string/memory_utils/op_generic.h" |
14 | #include "src/string/memory_utils/op_x86.h" |
15 | #include "src/string/memory_utils/utils.h" |
16 | |
17 | #include <stddef.h> // size_t |
18 | |
19 | namespace LIBC_NAMESPACE { |
20 | |
21 | LIBC_INLINE bool inline_memmove_small_size_x86(Ptr dst, CPtr src, |
22 | size_t count) { |
23 | #if defined(__AVX512F__) |
24 | constexpr size_t vector_size = 64; |
25 | using uint128_t = generic_v128; |
26 | using uint256_t = generic_v256; |
27 | using uint512_t = generic_v512; |
28 | #elif defined(__AVX__) |
29 | constexpr size_t vector_size = 32; |
30 | using uint128_t = generic_v128; |
31 | using uint256_t = generic_v256; |
32 | using uint512_t = cpp::array<generic_v256, 2>; |
33 | #elif defined(__SSE2__) |
34 | constexpr size_t vector_size = 16; |
35 | using uint128_t = generic_v128; |
36 | using uint256_t = cpp::array<generic_v128, 2>; |
37 | using uint512_t = cpp::array<generic_v128, 4>; |
38 | #else |
39 | constexpr size_t vector_size = 8; |
40 | using uint128_t = cpp::array<uint64_t, 2>; |
41 | using uint256_t = cpp::array<uint64_t, 4>; |
42 | using uint512_t = cpp::array<uint64_t, 8>; |
43 | #endif |
44 | (void)vector_size; |
45 | if (count == 0) |
46 | return true; |
47 | if (count == 1) { |
48 | generic::Memmove<uint8_t>::block(dst, src); |
49 | return true; |
50 | } |
51 | if (count == 2) { |
52 | generic::Memmove<uint16_t>::block(dst, src); |
53 | return true; |
54 | } |
55 | if (count == 3) { |
56 | generic::Memmove<cpp::array<uint8_t, 3>>::block(dst, src); |
57 | return true; |
58 | } |
59 | if (count == 4) { |
60 | generic::Memmove<uint32_t>::block(dst, src); |
61 | return true; |
62 | } |
63 | if (count < 8) { |
64 | generic::Memmove<uint32_t>::head_tail(dst, src, count); |
65 | return true; |
66 | } |
67 | // If count is equal to a power of 2, we can handle it as head-tail |
68 | // of both smaller size and larger size (head-tail are either |
69 | // non-overlapping for smaller size, or completely collapsed |
70 | // for larger size). It seems to be more profitable to do the copy |
71 | // with the larger size, if it's natively supported (e.g. doing |
72 | // 2 collapsed 32-byte moves for count=64 if AVX2 is supported). |
73 | // But it's not profitable to use larger size if it's not natively |
74 | // supported: we will both use more instructions and handle fewer |
75 | // sizes in earlier branches. |
76 | if (vector_size >= 16 ? count < 16 : count <= 16) { |
77 | generic::Memmove<uint64_t>::head_tail(dst, src, count); |
78 | return true; |
79 | } |
80 | if (vector_size >= 32 ? count < 32 : count <= 32) { |
81 | generic::Memmove<uint128_t>::head_tail(dst, src, count); |
82 | return true; |
83 | } |
84 | if (vector_size >= 64 ? count < 64 : count <= 64) { |
85 | generic::Memmove<uint256_t>::head_tail(dst, src, count); |
86 | return true; |
87 | } |
88 | if (count <= 128) { |
89 | generic::Memmove<uint512_t>::head_tail(dst, src, count); |
90 | return true; |
91 | } |
92 | return false; |
93 | } |
94 | |
95 | LIBC_INLINE void inline_memmove_follow_up_x86(Ptr dst, CPtr src, size_t count) { |
96 | #if defined(__AVX512F__) |
97 | using uint256_t = generic_v256; |
98 | using uint512_t = generic_v512; |
99 | #elif defined(__AVX__) |
100 | using uint256_t = generic_v256; |
101 | using uint512_t = cpp::array<generic_v256, 2>; |
102 | #elif defined(__SSE2__) |
103 | using uint256_t = cpp::array<generic_v128, 2>; |
104 | using uint512_t = cpp::array<generic_v128, 4>; |
105 | #else |
106 | using uint256_t = cpp::array<uint64_t, 4>; |
107 | using uint512_t = cpp::array<uint64_t, 8>; |
108 | #endif |
109 | if (dst < src) { |
110 | generic::Memmove<uint256_t>::align_forward<Arg::Src>(dst, src, count); |
111 | return generic::Memmove<uint512_t>::loop_and_tail_forward(dst, src, count); |
112 | } else { |
113 | generic::Memmove<uint256_t>::align_backward<Arg::Src>(dst, src, count); |
114 | return generic::Memmove<uint512_t>::loop_and_tail_backward(dst, src, count); |
115 | } |
116 | } |
117 | |
118 | } // namespace LIBC_NAMESPACE |
119 | |
120 | #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H |
121 | |