1//===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
9#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
10
11#include "src/__support/macros/attributes.h" // LIBC_INLINE
12#include "src/string/memory_utils/op_generic.h"
13#include "src/string/memory_utils/op_x86.h"
14#include "src/string/memory_utils/utils.h" // Ptr, CPtr
15
16#include <stddef.h> // size_t
17
18namespace LIBC_NAMESPACE {
19
20[[maybe_unused]] LIBC_INLINE BcmpReturnType
21inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
22 return generic::Bcmp<uint64_t>::loop_and_tail_align_above(threshold: 256, p1, p2, count);
23}
24
25#if defined(__SSE4_1__)
26[[maybe_unused]] LIBC_INLINE BcmpReturnType
27inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
28 if (count <= 32)
29 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
30 return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
31}
32#endif // __SSE4_1__
33
34#if defined(__AVX__)
35[[maybe_unused]] LIBC_INLINE BcmpReturnType
36inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
37 if (count <= 32)
38 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
39 if (count <= 64)
40 return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
41 return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
42}
43#endif // __AVX__
44
45#if defined(__AVX512BW__)
46[[maybe_unused]] LIBC_INLINE BcmpReturnType
47inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
48 if (count <= 32)
49 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
50 if (count <= 64)
51 return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
52 if (count <= 128)
53 return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
54 return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
55}
56#endif // __AVX512BW__
57
58[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
59 size_t count) {
60 if (count == 0)
61 return BcmpReturnType::zero();
62 if (count == 1)
63 return generic::Bcmp<uint8_t>::block(p1, p2);
64 if (count == 2)
65 return generic::Bcmp<uint16_t>::block(p1, p2);
66 if (count == 3)
67 return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
68 if (count == 4)
69 return generic::Bcmp<uint32_t>::block(p1, p2);
70 if (count == 5)
71 return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
72 if (count == 6)
73 return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
74 if (count == 7)
75 return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
76 if (count == 8)
77 return generic::Bcmp<uint64_t>::block(p1, p2);
78 if (count <= 16)
79 return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
80#if defined(__AVX512BW__)
81 return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
82#elif defined(__AVX__)
83 return inline_bcmp_x86_avx_gt16(p1, p2, count);
84#elif defined(__SSE4_1__)
85 return inline_bcmp_x86_sse41_gt16(p1, p2, count);
86#else
87 return inline_bcmp_generic_gt16(p1, p2, count);
88#endif
89}
90
91} // namespace LIBC_NAMESPACE
92
93#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
94

source code of libc/src/string/memory_utils/x86_64/inline_bcmp.h