1 | //===-- Memcmp implementation for x86_64 ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMCMP_H |
10 | #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMCMP_H |
11 | |
12 | #include "src/__support/macros/config.h" // LIBC_INLINE |
13 | #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
14 | #include "src/string/memory_utils/op_generic.h" |
15 | #include "src/string/memory_utils/op_x86.h" |
16 | #include "src/string/memory_utils/utils.h" // MemcmpReturnType |
17 | |
18 | namespace LIBC_NAMESPACE { |
19 | |
20 | [[maybe_unused]] LIBC_INLINE MemcmpReturnType |
21 | inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { |
22 | return generic::Memcmp<uint64_t>::loop_and_tail_align_above(threshold: 384, p1, p2, |
23 | count); |
24 | } |
25 | |
26 | #if defined(__SSE4_1__) |
27 | [[maybe_unused]] LIBC_INLINE MemcmpReturnType |
28 | inline_memcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) { |
29 | return generic::Memcmp<__m128i>::loop_and_tail_align_above(384, p1, p2, |
30 | count); |
31 | } |
32 | #endif // __SSE4_1__ |
33 | |
34 | #if defined(__AVX2__) |
35 | [[maybe_unused]] LIBC_INLINE MemcmpReturnType |
36 | inline_memcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) { |
37 | if (count <= 32) |
38 | return generic::Memcmp<__m128i>::head_tail(p1, p2, count); |
39 | if (count <= 64) |
40 | return generic::Memcmp<__m256i>::head_tail(p1, p2, count); |
41 | return generic::Memcmp<__m256i>::loop_and_tail_align_above(384, p1, p2, |
42 | count); |
43 | } |
44 | #endif // __AVX2__ |
45 | |
46 | #if defined(__AVX512BW__) |
47 | [[maybe_unused]] LIBC_INLINE MemcmpReturnType |
48 | inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) { |
49 | if (count <= 32) |
50 | return generic::Memcmp<__m128i>::head_tail(p1, p2, count); |
51 | if (count <= 64) |
52 | return generic::Memcmp<__m256i>::head_tail(p1, p2, count); |
53 | if (count <= 128) |
54 | return generic::Memcmp<__m512i>::head_tail(p1, p2, count); |
55 | return generic::Memcmp<__m512i>::loop_and_tail_align_above(384, p1, p2, |
56 | count); |
57 | } |
58 | #endif // __AVX512BW__ |
59 | |
60 | LIBC_INLINE MemcmpReturnType inline_memcmp_x86(CPtr p1, CPtr p2, size_t count) { |
61 | if (count == 0) |
62 | return MemcmpReturnType::zero(); |
63 | if (count == 1) |
64 | return generic::Memcmp<uint8_t>::block(p1, p2); |
65 | if (count == 2) |
66 | return generic::Memcmp<uint16_t>::block(p1, p2); |
67 | if (count == 3) |
68 | return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2); |
69 | if (count == 4) |
70 | return generic::Memcmp<uint32_t>::block(p1, p2); |
71 | if (count == 5) |
72 | return generic::MemcmpSequence<uint32_t, uint8_t>::block(p1, p2); |
73 | if (count == 6) |
74 | return generic::MemcmpSequence<uint32_t, uint16_t>::block(p1, p2); |
75 | if (count == 7) |
76 | return generic::Memcmp<uint32_t>::head_tail(p1, p2, count: 7); |
77 | if (count == 8) |
78 | return generic::Memcmp<uint64_t>::block(p1, p2); |
79 | if (count <= 16) |
80 | return generic::Memcmp<uint64_t>::head_tail(p1, p2, count); |
81 | #if defined(__AVX512BW__) |
82 | return inline_memcmp_x86_avx512bw_gt16(p1, p2, count); |
83 | #elif defined(__AVX2__) |
84 | return inline_memcmp_x86_avx2_gt16(p1, p2, count); |
85 | #elif defined(__SSE4_1__) |
86 | return inline_memcmp_x86_sse41_gt16(p1, p2, count); |
87 | #else |
88 | return inline_memcmp_generic_gt16(p1, p2, count); |
89 | #endif |
90 | } |
91 | |
92 | } // namespace LIBC_NAMESPACE |
93 | |
94 | #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMCMP_H |
95 | |