1//===-- Memset implementation for x86_64 ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
9#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
10
11#include "src/__support/macros/attributes.h" // LIBC_INLINE
12#include "src/string/memory_utils/op_generic.h"
13#include "src/string/memory_utils/op_x86.h"
14#include "src/string/memory_utils/utils.h" // Ptr, CPtr
15
16#include <stddef.h> // size_t
17
18namespace LIBC_NAMESPACE {
19namespace x86 {
20// Size of one cache line for software prefetching
21LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64;
22LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE =
23 K_ONE_CACHELINE_SIZE * 2;
24LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE =
25 K_ONE_CACHELINE_SIZE * 5;
26
27LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET =
28 LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING);
29
30} // namespace x86
31
32#if defined(__AVX512F__)
33using uint128_t = generic_v128;
34using uint256_t = generic_v256;
35using uint512_t = generic_v512;
36#elif defined(__AVX__)
37using uint128_t = generic_v128;
38using uint256_t = generic_v256;
39using uint512_t = cpp::array<generic_v256, 2>;
40#elif defined(__SSE2__)
41using uint128_t = generic_v128;
42using uint256_t = cpp::array<generic_v128, 2>;
43using uint512_t = cpp::array<generic_v128, 4>;
44#else
45using uint128_t = cpp::array<uint64_t, 2>;
46using uint256_t = cpp::array<uint64_t, 4>;
47using uint512_t = cpp::array<uint64_t, 8>;
48#endif
49
50[[maybe_unused]] LIBC_INLINE static void
51inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
52 constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE;
53 constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE;
54 constexpr size_t SIZE = sizeof(uint256_t);
55 // Prefetch one cache line
56 prefetch_for_write(dst: dst + x86::K_ONE_CACHELINE_SIZE);
57 if (count <= 128)
58 return generic::Memset<uint512_t>::head_tail(dst, value, count);
59 // Prefetch the second cache line
60 prefetch_for_write(dst: dst + x86::K_TWO_CACHELINES_SIZE);
61 // Aligned loop
62 generic::Memset<uint256_t>::block(dst, value);
63 align_to_next_boundary<32>(p1&: dst, count);
64 if (count <= 192) {
65 return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
66 } else {
67 generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value);
68 size_t offset = 96;
69 while (offset + PREFETCH_DEGREE + SIZE <= count) {
70 prefetch_for_write(dst: dst + offset + PREFETCH_DISTANCE);
71 prefetch_for_write(dst: dst + offset + PREFETCH_DISTANCE +
72 x86::K_ONE_CACHELINE_SIZE);
73 for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE)
74 generic::Memset<uint256_t>::block(dst: dst + offset, value);
75 }
76 generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset);
77 }
78}
79
80[[maybe_unused]] LIBC_INLINE static void
81inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
82 if (count == 0)
83 return;
84 if (count == 1)
85 return generic::Memset<uint8_t>::block(dst, value);
86 if (count == 2)
87 return generic::Memset<uint16_t>::block(dst, value);
88 if (count == 3)
89 return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
90 if (count <= 8)
91 return generic::Memset<uint32_t>::head_tail(dst, value, count);
92 if (count <= 16)
93 return generic::Memset<uint64_t>::head_tail(dst, value, count);
94 if (count <= 32)
95 return generic::Memset<uint128_t>::head_tail(dst, value, count);
96 if (count <= 64)
97 return generic::Memset<uint256_t>::head_tail(dst, value, count);
98 if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET)
99 return inline_memset_x86_gt64_sw_prefetching(dst, value, count);
100 if (count <= 128)
101 return generic::Memset<uint512_t>::head_tail(dst, value, count);
102 // Aligned loop
103 generic::Memset<uint256_t>::block(dst, value);
104 align_to_next_boundary<32>(p1&: dst, count);
105 return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
106}
107} // namespace LIBC_NAMESPACE
108
109#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
110

source code of libc/src/string/memory_utils/x86_64/inline_memset.h