1 | //===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H |
9 | #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H |
10 | |
11 | #include "src/__support/macros/attributes.h" // LIBC_INLINE |
12 | #include "src/string/memory_utils/op_generic.h" |
13 | #include "src/string/memory_utils/op_x86.h" |
14 | #include "src/string/memory_utils/utils.h" // Ptr, CPtr |
15 | |
16 | #include <stddef.h> // size_t |
17 | |
18 | namespace LIBC_NAMESPACE { |
19 | |
20 | [[maybe_unused]] LIBC_INLINE BcmpReturnType |
21 | inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { |
22 | return generic::Bcmp<uint64_t>::loop_and_tail_align_above(threshold: 256, p1, p2, count); |
23 | } |
24 | |
25 | #if defined(__SSE4_1__) |
26 | [[maybe_unused]] LIBC_INLINE BcmpReturnType |
27 | inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) { |
28 | if (count <= 32) |
29 | return generic::Bcmp<__m128i>::head_tail(p1, p2, count); |
30 | return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count); |
31 | } |
32 | #endif // __SSE4_1__ |
33 | |
34 | #if defined(__AVX__) |
35 | [[maybe_unused]] LIBC_INLINE BcmpReturnType |
36 | inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) { |
37 | if (count <= 32) |
38 | return generic::Bcmp<__m128i>::head_tail(p1, p2, count); |
39 | if (count <= 64) |
40 | return generic::Bcmp<__m256i>::head_tail(p1, p2, count); |
41 | return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count); |
42 | } |
43 | #endif // __AVX__ |
44 | |
45 | #if defined(__AVX512BW__) |
46 | [[maybe_unused]] LIBC_INLINE BcmpReturnType |
47 | inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) { |
48 | if (count <= 32) |
49 | return generic::Bcmp<__m128i>::head_tail(p1, p2, count); |
50 | if (count <= 64) |
51 | return generic::Bcmp<__m256i>::head_tail(p1, p2, count); |
52 | if (count <= 128) |
53 | return generic::Bcmp<__m512i>::head_tail(p1, p2, count); |
54 | return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count); |
55 | } |
56 | #endif // __AVX512BW__ |
57 | |
58 | [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2, |
59 | size_t count) { |
60 | if (count == 0) |
61 | return BcmpReturnType::zero(); |
62 | if (count == 1) |
63 | return generic::Bcmp<uint8_t>::block(p1, p2); |
64 | if (count == 2) |
65 | return generic::Bcmp<uint16_t>::block(p1, p2); |
66 | if (count == 3) |
67 | return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2); |
68 | if (count == 4) |
69 | return generic::Bcmp<uint32_t>::block(p1, p2); |
70 | if (count == 5) |
71 | return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2); |
72 | if (count == 6) |
73 | return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2); |
74 | if (count == 7) |
75 | return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2); |
76 | if (count == 8) |
77 | return generic::Bcmp<uint64_t>::block(p1, p2); |
78 | if (count <= 16) |
79 | return generic::Bcmp<uint64_t>::head_tail(p1, p2, count); |
80 | #if defined(__AVX512BW__) |
81 | return inline_bcmp_x86_avx512bw_gt16(p1, p2, count); |
82 | #elif defined(__AVX__) |
83 | return inline_bcmp_x86_avx_gt16(p1, p2, count); |
84 | #elif defined(__SSE4_1__) |
85 | return inline_bcmp_x86_sse41_gt16(p1, p2, count); |
86 | #else |
87 | return inline_bcmp_generic_gt16(p1, p2, count); |
88 | #endif |
89 | } |
90 | |
91 | } // namespace LIBC_NAMESPACE |
92 | |
93 | #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H |
94 | |