1 | //===-- tsd_shared.h --------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef SCUDO_TSD_SHARED_H_ |
10 | #define SCUDO_TSD_SHARED_H_ |
11 | |
12 | #include "tsd.h" |
13 | |
14 | #include "string_utils.h" |
15 | |
16 | #if SCUDO_HAS_PLATFORM_TLS_SLOT |
17 | // This is a platform-provided header that needs to be on the include path when |
18 | // Scudo is compiled. It must declare a function with the prototype: |
19 | // uintptr_t *getPlatformAllocatorTlsSlot() |
20 | // that returns the address of a thread-local word of storage reserved for |
21 | // Scudo, that must be zero-initialized in newly created threads. |
22 | #include "scudo_platform_tls_slot.h" |
23 | #endif |
24 | |
25 | namespace scudo { |
26 | |
27 | template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount> |
28 | struct TSDRegistrySharedT { |
29 | using ThisT = TSDRegistrySharedT<Allocator, TSDsArraySize, DefaultTSDCount>; |
30 | |
31 | struct ScopedTSD { |
32 | ALWAYS_INLINE ScopedTSD(ThisT &TSDRegistry) { |
33 | CurrentTSD = TSDRegistry.getTSDAndLock(); |
34 | DCHECK_NE(CurrentTSD, nullptr); |
35 | } |
36 | |
37 | ~ScopedTSD() { CurrentTSD->unlock(); } |
38 | |
39 | TSD<Allocator> &operator*() { return *CurrentTSD; } |
40 | |
41 | TSD<Allocator> *operator->() { |
42 | CurrentTSD->assertLocked(/*BypassCheck=*/false); |
43 | return CurrentTSD; |
44 | } |
45 | |
46 | private: |
47 | TSD<Allocator> *CurrentTSD; |
48 | }; |
49 | |
50 | void init(Allocator *Instance) REQUIRES(Mutex) { |
51 | DCHECK(!Initialized); |
52 | Instance->init(); |
53 | for (u32 I = 0; I < TSDsArraySize; I++) |
54 | TSDs[I].init(Instance); |
55 | const u32 NumberOfCPUs = getNumberOfCPUs(); |
56 | setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount |
57 | : Min(A: NumberOfCPUs, B: DefaultTSDCount)); |
58 | Initialized = true; |
59 | } |
60 | |
61 | void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) { |
62 | ScopedLock L(Mutex); |
63 | if (LIKELY(Initialized)) |
64 | return; |
65 | init(Instance); // Sets Initialized. |
66 | } |
67 | |
68 | void unmapTestOnly(Allocator *Instance) EXCLUDES(Mutex) { |
69 | for (u32 I = 0; I < TSDsArraySize; I++) { |
70 | TSDs[I].commitBack(Instance); |
71 | TSDs[I] = {}; |
72 | } |
73 | setCurrentTSD(nullptr); |
74 | ScopedLock L(Mutex); |
75 | Initialized = false; |
76 | } |
77 | |
78 | void drainCaches(Allocator *Instance) { |
79 | ScopedLock L(MutexTSDs); |
80 | for (uptr I = 0; I < NumberOfTSDs; ++I) { |
81 | TSDs[I].lock(); |
82 | Instance->drainCache(&TSDs[I]); |
83 | TSDs[I].unlock(); |
84 | } |
85 | } |
86 | |
87 | ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, |
88 | UNUSED bool MinimalInit) { |
89 | if (LIKELY(getCurrentTSD())) |
90 | return; |
91 | initThread(Instance); |
92 | } |
93 | |
94 | void disable() NO_THREAD_SAFETY_ANALYSIS { |
95 | Mutex.lock(); |
96 | for (u32 I = 0; I < TSDsArraySize; I++) |
97 | TSDs[I].lock(); |
98 | } |
99 | |
100 | void enable() NO_THREAD_SAFETY_ANALYSIS { |
101 | for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--) |
102 | TSDs[I].unlock(); |
103 | Mutex.unlock(); |
104 | } |
105 | |
106 | bool setOption(Option O, sptr Value) { |
107 | if (O == Option::MaxTSDsCount) |
108 | return setNumberOfTSDs(static_cast<u32>(Value)); |
109 | if (O == Option::ThreadDisableMemInit) |
110 | setDisableMemInit(Value); |
111 | // Not supported by the TSD Registry, but not an error either. |
112 | return true; |
113 | } |
114 | |
115 | bool getDisableMemInit() const { return *getTlsPtr() & 1; } |
116 | |
117 | void getStats(ScopedString *Str) EXCLUDES(MutexTSDs) { |
118 | ScopedLock L(MutexTSDs); |
119 | |
120 | Str->append(Format: "Stats: SharedTSDs: %u available; total %u\n" , NumberOfTSDs, |
121 | TSDsArraySize); |
122 | for (uptr I = 0; I < NumberOfTSDs; ++I) { |
123 | TSDs[I].lock(); |
124 | // Theoretically, we want to mark TSD::lock()/TSD::unlock() with proper |
125 | // thread annotations. However, given the TSD is only locked on shared |
126 | // path, do the assertion in a separate path to avoid confusing the |
127 | // analyzer. |
128 | TSDs[I].assertLocked(/*BypassCheck=*/true); |
129 | Str->append(Format: " Shared TSD[%zu]:\n" , I); |
130 | TSDs[I].getCache().getStats(Str); |
131 | TSDs[I].unlock(); |
132 | } |
133 | } |
134 | |
135 | private: |
136 | ALWAYS_INLINE TSD<Allocator> *getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS { |
137 | TSD<Allocator> *TSD = getCurrentTSD(); |
138 | DCHECK(TSD); |
139 | // Try to lock the currently associated context. |
140 | if (TSD->tryLock()) |
141 | return TSD; |
142 | // If that fails, go down the slow path. |
143 | if (TSDsArraySize == 1U) { |
144 | // Only 1 TSD, not need to go any further. |
145 | // The compiler will optimize this one way or the other. |
146 | TSD->lock(); |
147 | return TSD; |
148 | } |
149 | return getTSDAndLockSlow(CurrentTSD: TSD); |
150 | } |
151 | |
152 | ALWAYS_INLINE uptr *getTlsPtr() const { |
153 | #if SCUDO_HAS_PLATFORM_TLS_SLOT |
154 | return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot()); |
155 | #else |
156 | static thread_local uptr ThreadTSD; |
157 | return &ThreadTSD; |
158 | #endif |
159 | } |
160 | |
161 | static_assert(alignof(TSD<Allocator>) >= 2, "" ); |
162 | |
163 | ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) { |
164 | *getTlsPtr() &= 1; |
165 | *getTlsPtr() |= reinterpret_cast<uptr>(CurrentTSD); |
166 | } |
167 | |
168 | ALWAYS_INLINE TSD<Allocator> *getCurrentTSD() { |
169 | return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL); |
170 | } |
171 | |
172 | bool setNumberOfTSDs(u32 N) EXCLUDES(MutexTSDs) { |
173 | ScopedLock L(MutexTSDs); |
174 | if (N < NumberOfTSDs) |
175 | return false; |
176 | if (N > TSDsArraySize) |
177 | N = TSDsArraySize; |
178 | NumberOfTSDs = N; |
179 | NumberOfCoPrimes = 0; |
180 | // Compute all the coprimes of NumberOfTSDs. This will be used to walk the |
181 | // array of TSDs in a random order. For details, see: |
182 | // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/ |
183 | for (u32 I = 0; I < N; I++) { |
184 | u32 A = I + 1; |
185 | u32 B = N; |
186 | // Find the GCD between I + 1 and N. If 1, they are coprimes. |
187 | while (B != 0) { |
188 | const u32 T = A; |
189 | A = B; |
190 | B = T % B; |
191 | } |
192 | if (A == 1) |
193 | CoPrimes[NumberOfCoPrimes++] = I + 1; |
194 | } |
195 | return true; |
196 | } |
197 | |
198 | void setDisableMemInit(bool B) { |
199 | *getTlsPtr() &= ~1ULL; |
200 | *getTlsPtr() |= B; |
201 | } |
202 | |
203 | NOINLINE void initThread(Allocator *Instance) NO_THREAD_SAFETY_ANALYSIS { |
204 | initOnceMaybe(Instance); |
205 | // Initial context assignment is done in a plain round-robin fashion. |
206 | const u32 Index = atomic_fetch_add(A: &CurrentIndex, V: 1U, MO: memory_order_relaxed); |
207 | setCurrentTSD(&TSDs[Index % NumberOfTSDs]); |
208 | Instance->callPostInitCallback(); |
209 | } |
210 | |
211 | // TSDs is an array of locks which is not supported for marking thread-safety |
212 | // capability. |
213 | NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) |
214 | EXCLUDES(MutexTSDs) { |
215 | // Use the Precedence of the current TSD as our random seed. Since we are |
216 | // in the slow path, it means that tryLock failed, and as a result it's |
217 | // very likely that said Precedence is non-zero. |
218 | const u32 R = static_cast<u32>(CurrentTSD->getPrecedence()); |
219 | u32 N, Inc; |
220 | { |
221 | ScopedLock L(MutexTSDs); |
222 | N = NumberOfTSDs; |
223 | DCHECK_NE(NumberOfCoPrimes, 0U); |
224 | Inc = CoPrimes[R % NumberOfCoPrimes]; |
225 | } |
226 | if (N > 1U) { |
227 | u32 Index = R % N; |
228 | uptr LowestPrecedence = UINTPTR_MAX; |
229 | TSD<Allocator> *CandidateTSD = nullptr; |
230 | // Go randomly through at most 4 contexts and find a candidate. |
231 | for (u32 I = 0; I < Min(A: 4U, B: N); I++) { |
232 | if (TSDs[Index].tryLock()) { |
233 | setCurrentTSD(&TSDs[Index]); |
234 | return &TSDs[Index]; |
235 | } |
236 | const uptr Precedence = TSDs[Index].getPrecedence(); |
237 | // A 0 precedence here means another thread just locked this TSD. |
238 | if (Precedence && Precedence < LowestPrecedence) { |
239 | CandidateTSD = &TSDs[Index]; |
240 | LowestPrecedence = Precedence; |
241 | } |
242 | Index += Inc; |
243 | if (Index >= N) |
244 | Index -= N; |
245 | } |
246 | if (CandidateTSD) { |
247 | CandidateTSD->lock(); |
248 | setCurrentTSD(CandidateTSD); |
249 | return CandidateTSD; |
250 | } |
251 | } |
252 | // Last resort, stick with the current one. |
253 | CurrentTSD->lock(); |
254 | return CurrentTSD; |
255 | } |
256 | |
257 | atomic_u32 CurrentIndex = {}; |
258 | u32 NumberOfTSDs GUARDED_BY(MutexTSDs) = 0; |
259 | u32 NumberOfCoPrimes GUARDED_BY(MutexTSDs) = 0; |
260 | u32 CoPrimes[TSDsArraySize] GUARDED_BY(MutexTSDs) = {}; |
261 | bool Initialized GUARDED_BY(Mutex) = false; |
262 | HybridMutex Mutex; |
263 | HybridMutex MutexTSDs; |
264 | TSD<Allocator> TSDs[TSDsArraySize]; |
265 | }; |
266 | |
267 | } // namespace scudo |
268 | |
269 | #endif // SCUDO_TSD_SHARED_H_ |
270 | |