1 | //===-- stack_depot.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef SCUDO_STACK_DEPOT_H_ |
10 | #define SCUDO_STACK_DEPOT_H_ |
11 | |
12 | #include "atomic_helpers.h" |
13 | #include "common.h" |
14 | #include "mutex.h" |
15 | |
16 | namespace scudo { |
17 | |
18 | class MurMur2HashBuilder { |
19 | static const u32 M = 0x5bd1e995; |
20 | static const u32 Seed = 0x9747b28c; |
21 | static const u32 R = 24; |
22 | u32 H; |
23 | |
24 | public: |
25 | explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; } |
26 | void add(u32 K) { |
27 | K *= M; |
28 | K ^= K >> R; |
29 | K *= M; |
30 | H *= M; |
31 | H ^= K; |
32 | } |
33 | u32 get() { |
34 | u32 X = H; |
35 | X ^= X >> 13; |
36 | X *= M; |
37 | X ^= X >> 15; |
38 | return X; |
39 | } |
40 | }; |
41 | |
42 | class alignas(atomic_u64) StackDepot { |
43 | HybridMutex RingEndMu; |
44 | u32 RingEnd = 0; |
45 | |
46 | // This data structure stores a stack trace for each allocation and |
47 | // deallocation when stack trace recording is enabled, that may be looked up |
48 | // using a hash of the stack trace. The lower bits of the hash are an index |
49 | // into the Tab array, which stores an index into the Ring array where the |
50 | // stack traces are stored. As the name implies, Ring is a ring buffer, so a |
51 | // stack trace may wrap around to the start of the array. |
52 | // |
53 | // Each stack trace in Ring is prefixed by a stack trace marker consisting of |
54 | // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames |
55 | // and stack trace markers in the case where instruction pointers are 4-byte |
56 | // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the |
57 | // size of the stack trace in bits 33-63. |
58 | // |
59 | // The insert() function is potentially racy in its accesses to the Tab and |
60 | // Ring arrays, but find() is resilient to races in the sense that, barring |
61 | // hash collisions, it will either return the correct stack trace or no stack |
62 | // trace at all, even if two instances of insert() raced with one another. |
63 | // This is achieved by re-checking the hash of the stack trace before |
64 | // returning the trace. |
65 | |
66 | u32 RingSize = 0; |
67 | u32 RingMask = 0; |
68 | u32 TabMask = 0; |
69 | // This is immediately followed by RingSize atomic_u64 and |
70 | // (TabMask + 1) atomic_u32. |
71 | |
72 | atomic_u64 *getRing() { |
73 | return reinterpret_cast<atomic_u64 *>(reinterpret_cast<char *>(this) + |
74 | sizeof(StackDepot)); |
75 | } |
76 | |
77 | atomic_u32 *getTab() { |
78 | return reinterpret_cast<atomic_u32 *>(reinterpret_cast<char *>(this) + |
79 | sizeof(StackDepot) + |
80 | sizeof(atomic_u64) * RingSize); |
81 | } |
82 | |
83 | const atomic_u64 *getRing() const { |
84 | return reinterpret_cast<const atomic_u64 *>( |
85 | reinterpret_cast<const char *>(this) + sizeof(StackDepot)); |
86 | } |
87 | |
88 | const atomic_u32 *getTab() const { |
89 | return reinterpret_cast<const atomic_u32 *>( |
90 | reinterpret_cast<const char *>(this) + sizeof(StackDepot) + |
91 | sizeof(atomic_u64) * RingSize); |
92 | } |
93 | |
94 | public: |
95 | void init(u32 RingSz, u32 TabSz) { |
96 | DCHECK(isPowerOfTwo(RingSz)); |
97 | DCHECK(isPowerOfTwo(TabSz)); |
98 | RingSize = RingSz; |
99 | RingMask = RingSz - 1; |
100 | TabMask = TabSz - 1; |
101 | } |
102 | |
103 | // Ensure that RingSize, RingMask and TabMask are set up in a way that |
104 | // all accesses are within range of BufSize. |
105 | bool isValid(uptr BufSize) const { |
106 | if (!isPowerOfTwo(X: RingSize)) |
107 | return false; |
108 | uptr RingBytes = sizeof(atomic_u64) * RingSize; |
109 | if (RingMask + 1 != RingSize) |
110 | return false; |
111 | |
112 | if (TabMask == 0) |
113 | return false; |
114 | uptr TabSize = TabMask + 1; |
115 | if (!isPowerOfTwo(X: TabSize)) |
116 | return false; |
117 | uptr TabBytes = sizeof(atomic_u32) * TabSize; |
118 | |
119 | // Subtract and detect underflow. |
120 | if (BufSize < sizeof(StackDepot)) |
121 | return false; |
122 | BufSize -= sizeof(StackDepot); |
123 | if (BufSize < TabBytes) |
124 | return false; |
125 | BufSize -= TabBytes; |
126 | if (BufSize < RingBytes) |
127 | return false; |
128 | return BufSize == RingBytes; |
129 | } |
130 | |
131 | // Insert hash of the stack trace [Begin, End) into the stack depot, and |
132 | // return the hash. |
133 | u32 insert(uptr *Begin, uptr *End) { |
134 | auto *Tab = getTab(); |
135 | auto *Ring = getRing(); |
136 | |
137 | MurMur2HashBuilder B; |
138 | for (uptr *I = Begin; I != End; ++I) |
139 | B.add(K: u32(*I) >> 2); |
140 | u32 Hash = B.get(); |
141 | |
142 | u32 Pos = Hash & TabMask; |
143 | u32 RingPos = atomic_load_relaxed(A: &Tab[Pos]); |
144 | u64 Entry = atomic_load_relaxed(A: &Ring[RingPos]); |
145 | u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1; |
146 | if (Entry == Id) |
147 | return Hash; |
148 | |
149 | ScopedLock Lock(RingEndMu); |
150 | RingPos = RingEnd; |
151 | atomic_store_relaxed(A: &Tab[Pos], V: RingPos); |
152 | atomic_store_relaxed(A: &Ring[RingPos], V: Id); |
153 | for (uptr *I = Begin; I != End; ++I) { |
154 | RingPos = (RingPos + 1) & RingMask; |
155 | atomic_store_relaxed(A: &Ring[RingPos], V: *I); |
156 | } |
157 | RingEnd = (RingPos + 1) & RingMask; |
158 | return Hash; |
159 | } |
160 | |
161 | // Look up a stack trace by hash. Returns true if successful. The trace may be |
162 | // accessed via operator[] passing indexes between *RingPosPtr and |
163 | // *RingPosPtr + *SizePtr. |
164 | bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { |
165 | auto *Tab = getTab(); |
166 | auto *Ring = getRing(); |
167 | |
168 | u32 Pos = Hash & TabMask; |
169 | u32 RingPos = atomic_load_relaxed(A: &Tab[Pos]); |
170 | if (RingPos >= RingSize) |
171 | return false; |
172 | u64 Entry = atomic_load_relaxed(A: &Ring[RingPos]); |
173 | u64 HashWithTagBit = (u64(Hash) << 1) | 1; |
174 | if ((Entry & 0x1ffffffff) != HashWithTagBit) |
175 | return false; |
176 | u32 Size = u32(Entry >> 33); |
177 | if (Size >= RingSize) |
178 | return false; |
179 | *RingPosPtr = (RingPos + 1) & RingMask; |
180 | *SizePtr = Size; |
181 | MurMur2HashBuilder B; |
182 | for (uptr I = 0; I != Size; ++I) { |
183 | RingPos = (RingPos + 1) & RingMask; |
184 | B.add(K: u32(atomic_load_relaxed(A: &Ring[RingPos])) >> 2); |
185 | } |
186 | return B.get() == Hash; |
187 | } |
188 | |
189 | u64 at(uptr RingPos) const { |
190 | auto *Ring = getRing(); |
191 | return atomic_load_relaxed(A: &Ring[RingPos & RingMask]); |
192 | } |
193 | |
194 | // This is done for the purpose of fork safety in multithreaded programs and |
195 | // does not fully disable StackDepot. In particular, find() still works and |
196 | // only insert() is blocked. |
197 | void disable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.lock(); } |
198 | |
199 | void enable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.unlock(); } |
200 | }; |
201 | |
202 | // We need StackDepot to be aligned to 8-bytes so the ring we store after |
203 | // is correctly assigned. |
204 | static_assert(sizeof(StackDepot) % alignof(atomic_u64) == 0); |
205 | |
206 | } // namespace scudo |
207 | |
208 | #endif // SCUDO_STACK_DEPOT_H_ |
209 | |