| 1 | //===-- sanitizer_allocator_size_class_map.h --------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Part of the Sanitizer Allocator. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | #ifndef SANITIZER_ALLOCATOR_H |
| 13 | #error This file must be included inside sanitizer_allocator.h |
| 14 | #endif |
| 15 | |
| 16 | // SizeClassMap maps allocation sizes into size classes and back. |
| 17 | // Class 0 always corresponds to size 0. |
| 18 | // The other sizes are controlled by the template parameters: |
| 19 | // kMinSizeLog: defines the class 1 as 2^kMinSizeLog. |
| 20 | // kMaxSizeLog: defines the last class as 2^kMaxSizeLog. |
| 21 | // kMidSizeLog: the classes starting from 1 increase with step |
| 22 | // 2^kMinSizeLog until 2^kMidSizeLog. |
| 23 | // kNumBits: the number of non-zero bits in sizes after 2^kMidSizeLog. |
| 24 | // E.g. with kNumBits==3 all size classes after 2^kMidSizeLog |
| 25 | // look like 0b1xx0..0, where x is either 0 or 1. |
| 26 | // |
| 27 | // Example: kNumBits=3, kMinSizeLog=4, kMidSizeLog=8, kMaxSizeLog=17: |
| 28 | // |
| 29 | // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16). |
| 30 | // Next 4 classes: 256 + i * 64 (i = 1 to 4). |
| 31 | // Next 4 classes: 512 + i * 128 (i = 1 to 4). |
| 32 | // ... |
| 33 | // Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4). |
| 34 | // Last class corresponds to kMaxSize = 1 << kMaxSizeLog. |
| 35 | // |
| 36 | // This structure of the size class map gives us: |
| 37 | // - Efficient table-free class-to-size and size-to-class functions. |
| 38 | // - Difference between two consequent size classes is between 14% and 25% |
| 39 | // |
| 40 | // This class also gives a hint to a thread-caching allocator about the amount |
| 41 | // of chunks that need to be cached per-thread: |
| 42 | // - kMaxNumCachedHint is a hint for maximal number of chunks per size class. |
| 43 | // The actual number is computed in TransferBatch. |
| 44 | // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class. |
| 45 | // |
| 46 | // Part of output of SizeClassMap::Print(): |
| 47 | // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0 |
| 48 | // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1 |
| 49 | // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2 |
| 50 | // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3 |
| 51 | // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4 |
| 52 | // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5 |
| 53 | // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6 |
| 54 | // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7 |
| 55 | // |
| 56 | // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8 |
| 57 | // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9 |
| 58 | // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10 |
| 59 | // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11 |
| 60 | // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12 |
| 61 | // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13 |
| 62 | // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14 |
| 63 | // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15 |
| 64 | // |
| 65 | // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16 |
| 66 | // c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17 |
| 67 | // c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18 |
| 68 | // c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19 |
| 69 | // |
| 70 | // c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20 |
| 71 | // c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21 |
| 72 | // c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22 |
| 73 | // c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23 |
| 74 | // |
| 75 | // c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24 |
| 76 | // c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25 |
| 77 | // c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26 |
| 78 | // c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27 |
| 79 | // |
| 80 | // ... |
| 81 | // |
| 82 | // c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48 |
| 83 | // c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49 |
| 84 | // c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50 |
| 85 | // c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51 |
| 86 | // |
| 87 | // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52 |
| 88 | // |
| 89 | // |
| 90 | // Another example (kNumBits=2): |
| 91 | // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0 |
| 92 | // c01 => s: 32 diff: +32 00% l 5 cached: 64 2048; id 1 |
| 93 | // c02 => s: 64 diff: +32 100% l 6 cached: 64 4096; id 2 |
| 94 | // c03 => s: 96 diff: +32 50% l 6 cached: 64 6144; id 3 |
| 95 | // c04 => s: 128 diff: +32 33% l 7 cached: 64 8192; id 4 |
| 96 | // c05 => s: 160 diff: +32 25% l 7 cached: 64 10240; id 5 |
| 97 | // c06 => s: 192 diff: +32 20% l 7 cached: 64 12288; id 6 |
| 98 | // c07 => s: 224 diff: +32 16% l 7 cached: 64 14336; id 7 |
| 99 | // c08 => s: 256 diff: +32 14% l 8 cached: 64 16384; id 8 |
| 100 | // c09 => s: 384 diff: +128 50% l 8 cached: 42 16128; id 9 |
| 101 | // c10 => s: 512 diff: +128 33% l 9 cached: 32 16384; id 10 |
| 102 | // c11 => s: 768 diff: +256 50% l 9 cached: 21 16128; id 11 |
| 103 | // c12 => s: 1024 diff: +256 33% l 10 cached: 16 16384; id 12 |
| 104 | // c13 => s: 1536 diff: +512 50% l 10 cached: 10 15360; id 13 |
| 105 | // c14 => s: 2048 diff: +512 33% l 11 cached: 8 16384; id 14 |
| 106 | // c15 => s: 3072 diff: +1024 50% l 11 cached: 5 15360; id 15 |
| 107 | // c16 => s: 4096 diff: +1024 33% l 12 cached: 4 16384; id 16 |
| 108 | // c17 => s: 6144 diff: +2048 50% l 12 cached: 2 12288; id 17 |
| 109 | // c18 => s: 8192 diff: +2048 33% l 13 cached: 2 16384; id 18 |
| 110 | // c19 => s: 12288 diff: +4096 50% l 13 cached: 1 12288; id 19 |
| 111 | // c20 => s: 16384 diff: +4096 33% l 14 cached: 1 16384; id 20 |
| 112 | // c21 => s: 24576 diff: +8192 50% l 14 cached: 1 24576; id 21 |
| 113 | // c22 => s: 32768 diff: +8192 33% l 15 cached: 1 32768; id 22 |
| 114 | // c23 => s: 49152 diff: +16384 50% l 15 cached: 1 49152; id 23 |
| 115 | // c24 => s: 65536 diff: +16384 33% l 16 cached: 1 65536; id 24 |
| 116 | // c25 => s: 98304 diff: +32768 50% l 16 cached: 1 98304; id 25 |
| 117 | // c26 => s: 131072 diff: +32768 33% l 17 cached: 1 131072; id 26 |
| 118 | |
| 119 | template <uptr kNumBits, uptr kMinSizeLog, uptr kMidSizeLog, uptr kMaxSizeLog, |
| 120 | uptr kMaxNumCachedHintT, uptr kMaxBytesCachedLog> |
| 121 | class SizeClassMap { |
| 122 | static const uptr kMinSize = 1 << kMinSizeLog; |
| 123 | static const uptr kMidSize = 1 << kMidSizeLog; |
| 124 | static const uptr kMidClass = kMidSize / kMinSize; |
| 125 | static const uptr S = kNumBits - 1; |
| 126 | static const uptr M = (1 << S) - 1; |
| 127 | |
| 128 | public: |
| 129 | // kMaxNumCachedHintT is a power of two. It serves as a hint |
| 130 | // for the size of TransferBatch, the actual size could be a bit smaller. |
| 131 | static const uptr kMaxNumCachedHint = kMaxNumCachedHintT; |
| 132 | COMPILER_CHECK((kMaxNumCachedHint & (kMaxNumCachedHint - 1)) == 0); |
| 133 | |
| 134 | static const uptr kMaxSize = 1UL << kMaxSizeLog; |
| 135 | static const uptr kNumClasses = |
| 136 | kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1 + 1; |
| 137 | static const uptr kLargestClassID = kNumClasses - 2; |
| 138 | static const uptr kBatchClassID = kNumClasses - 1; |
| 139 | COMPILER_CHECK(kNumClasses >= 16 && kNumClasses <= 256); |
| 140 | static const uptr kNumClassesRounded = |
| 141 | kNumClasses <= 32 ? 32 : |
| 142 | kNumClasses <= 64 ? 64 : |
| 143 | kNumClasses <= 128 ? 128 : 256; |
| 144 | |
| 145 | static uptr Size(uptr class_id) { |
| 146 | // Estimate the result for kBatchClassID because this class does not know |
| 147 | // the exact size of TransferBatch. It's OK since we are using the actual |
| 148 | // sizeof(TransferBatch) where it matters. |
| 149 | if (UNLIKELY(class_id == kBatchClassID)) |
| 150 | return kMaxNumCachedHint * sizeof(uptr); |
| 151 | if (class_id <= kMidClass) |
| 152 | return kMinSize * class_id; |
| 153 | class_id -= kMidClass; |
| 154 | uptr t = kMidSize << (class_id >> S); |
| 155 | return t + (t >> S) * (class_id & M); |
| 156 | } |
| 157 | |
| 158 | static uptr ClassID(uptr size) { |
| 159 | if (UNLIKELY(size > kMaxSize)) |
| 160 | return 0; |
| 161 | if (size <= kMidSize) |
| 162 | return (size + kMinSize - 1) >> kMinSizeLog; |
| 163 | const uptr l = MostSignificantSetBitIndex(x: size); |
| 164 | const uptr hbits = (size >> (l - S)) & M; |
| 165 | const uptr lbits = size & ((1U << (l - S)) - 1); |
| 166 | const uptr l1 = l - kMidSizeLog; |
| 167 | return kMidClass + (l1 << S) + hbits + (lbits > 0); |
| 168 | } |
| 169 | |
| 170 | static uptr MaxCachedHint(uptr size) { |
| 171 | DCHECK_LE(size, kMaxSize); |
| 172 | if (UNLIKELY(size == 0)) |
| 173 | return 0; |
| 174 | uptr n; |
| 175 | // Force a 32-bit division if the template parameters allow for it. |
| 176 | if (kMaxBytesCachedLog > 31 || kMaxSizeLog > 31) |
| 177 | n = (1UL << kMaxBytesCachedLog) / size; |
| 178 | else |
| 179 | n = (1U << kMaxBytesCachedLog) / static_cast<u32>(size); |
| 180 | return Max<uptr>(a: 1U, b: Min(a: kMaxNumCachedHint, b: n)); |
| 181 | } |
| 182 | |
| 183 | static void Print() { |
| 184 | uptr prev_s = 0; |
| 185 | uptr total_cached = 0; |
| 186 | for (uptr i = 0; i < kNumClasses; i++) { |
| 187 | uptr s = Size(class_id: i); |
| 188 | if (s >= kMidSize / 2 && (s & (s - 1)) == 0) |
| 189 | Printf(format: "\n" ); |
| 190 | uptr d = s - prev_s; |
| 191 | uptr p = prev_s ? (d * 100 / prev_s) : 0; |
| 192 | uptr l = s ? MostSignificantSetBitIndex(x: s) : 0; |
| 193 | uptr cached = MaxCachedHint(size: s) * s; |
| 194 | if (i == kBatchClassID) |
| 195 | d = p = l = 0; |
| 196 | Printf( |
| 197 | format: "c%02zu => s: %zu diff: +%zu %02zu%% l %zu cached: %zu %zu; id %zu\n" , |
| 198 | i, Size(class_id: i), d, p, l, MaxCachedHint(size: s), cached, ClassID(size: s)); |
| 199 | total_cached += cached; |
| 200 | prev_s = s; |
| 201 | } |
| 202 | Printf(format: "Total cached: %zu\n" , total_cached); |
| 203 | } |
| 204 | |
| 205 | static void Validate() { |
| 206 | for (uptr c = 1; c < kNumClasses; c++) { |
| 207 | // Printf("Validate: c%zd\n", c); |
| 208 | uptr s = Size(class_id: c); |
| 209 | CHECK_NE(s, 0U); |
| 210 | if (c == kBatchClassID) |
| 211 | continue; |
| 212 | CHECK_EQ(ClassID(s), c); |
| 213 | if (c < kLargestClassID) |
| 214 | CHECK_EQ(ClassID(s + 1), c + 1); |
| 215 | CHECK_EQ(ClassID(s - 1), c); |
| 216 | CHECK_GT(Size(c), Size(c - 1)); |
| 217 | } |
| 218 | CHECK_EQ(ClassID(kMaxSize + 1), 0); |
| 219 | |
| 220 | for (uptr s = 1; s <= kMaxSize; s++) { |
| 221 | uptr c = ClassID(size: s); |
| 222 | // Printf("s%zd => c%zd\n", s, c); |
| 223 | CHECK_LT(c, kNumClasses); |
| 224 | CHECK_GE(Size(c), s); |
| 225 | if (c > 0) |
| 226 | CHECK_LT(Size(c - 1), s); |
| 227 | } |
| 228 | } |
| 229 | }; |
| 230 | |
| 231 | typedef SizeClassMap<3, 4, 8, 17, 128, 16> DefaultSizeClassMap; |
| 232 | typedef SizeClassMap<3, 4, 8, 17, 64, 14> CompactSizeClassMap; |
| 233 | typedef SizeClassMap<2, 5, 9, 16, 64, 14> VeryCompactSizeClassMap; |
| 234 | |
| 235 | // The following SizeClassMap only holds a way small number of cached entries, |
| 236 | // allowing for denser per-class arrays, smaller memory footprint and usually |
| 237 | // better performances in threaded environments. |
| 238 | typedef SizeClassMap<3, 4, 8, 17, 8, 10> DenseSizeClassMap; |
| 239 | // Similar to VeryCompact map above, this one has a small number of different |
| 240 | // size classes, and also reduced thread-local caches. |
| 241 | typedef SizeClassMap<2, 5, 9, 16, 8, 10> VeryDenseSizeClassMap; |
| 242 | |