1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // This file defines an Arena allocator for better allocation performance. |
32 | |
33 | #ifndef GOOGLE_PROTOBUF_ARENA_IMPL_H__ |
34 | #define GOOGLE_PROTOBUF_ARENA_IMPL_H__ |
35 | |
36 | #include <atomic> |
37 | #include <limits> |
38 | |
39 | #include <google/protobuf/stubs/common.h> |
40 | #include <google/protobuf/stubs/logging.h> |
41 | |
42 | #ifdef ADDRESS_SANITIZER |
43 | #include <sanitizer/asan_interface.h> |
44 | #endif // ADDRESS_SANITIZER |
45 | |
46 | #include <google/protobuf/port_def.inc> |
47 | |
48 | |
49 | namespace google { |
50 | namespace protobuf { |
51 | namespace internal { |
52 | |
53 | inline size_t AlignUpTo8(size_t n) { |
54 | // Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.) |
55 | return (n + 7) & static_cast<size_t>(-8); |
56 | } |
57 | |
58 | using LifecycleId = int64_t; |
59 | |
60 | // This class provides the core Arena memory allocation library. Different |
61 | // implementations only need to implement the public interface below. |
62 | // Arena is not a template type as that would only be useful if all protos |
63 | // in turn would be templates, which will/cannot happen. However separating |
64 | // the memory allocation part from the cruft of the API users expect we can |
65 | // use #ifdef the select the best implementation based on hardware / OS. |
66 | class PROTOBUF_EXPORT ArenaImpl { |
67 | public: |
68 | struct Options { |
69 | size_t start_block_size; |
70 | size_t max_block_size; |
71 | char* initial_block; |
72 | size_t initial_block_size; |
73 | void* (*block_alloc)(size_t); |
74 | void (*block_dealloc)(void*, size_t); |
75 | |
76 | template <typename O> |
77 | explicit Options(const O& options) |
78 | : start_block_size(options.start_block_size), |
79 | max_block_size(options.max_block_size), |
80 | initial_block(options.initial_block), |
81 | initial_block_size(options.initial_block_size), |
82 | block_alloc(options.block_alloc), |
83 | block_dealloc(options.block_dealloc) {} |
84 | }; |
85 | |
86 | template <typename O> |
87 | explicit ArenaImpl(const O& options) : options_(options) { |
88 | if (options_.initial_block != NULL && options_.initial_block_size > 0) { |
89 | GOOGLE_CHECK_GE(options_.initial_block_size, sizeof(Block)) |
90 | << ": Initial block size too small for header." ; |
91 | initial_block_ = reinterpret_cast<Block*>(options_.initial_block); |
92 | } else { |
93 | initial_block_ = NULL; |
94 | } |
95 | |
96 | Init(); |
97 | } |
98 | |
99 | // Destructor deletes all owned heap allocated objects, and destructs objects |
100 | // that have non-trivial destructors, except for proto2 message objects whose |
101 | // destructors can be skipped. Also, frees all blocks except the initial block |
102 | // if it was passed in. |
103 | ~ArenaImpl(); |
104 | |
105 | uint64 Reset(); |
106 | |
107 | uint64 SpaceAllocated() const; |
108 | uint64 SpaceUsed() const; |
109 | |
110 | void* AllocateAligned(size_t n) { |
111 | SerialArena* arena; |
112 | if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { |
113 | return arena->AllocateAligned(n); |
114 | } else { |
115 | return AllocateAlignedFallback(n); |
116 | } |
117 | } |
118 | |
119 | // This function allocates n bytes if the common happy case is true and |
120 | // returns true. Otherwise does nothing and returns false. This strange |
121 | // semantics is necessary to allow callers to program functions that only |
122 | // have fallback function calls in tail position. This substantially improves |
123 | // code for the happy path. |
124 | PROTOBUF_ALWAYS_INLINE bool MaybeAllocateAligned(size_t n, void** out) { |
125 | SerialArena* a; |
126 | if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFromThreadCache(&a))) { |
127 | return a->MaybeAllocateAligned(n, out); |
128 | } |
129 | return false; |
130 | } |
131 | |
132 | void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)); |
133 | |
134 | // Add object pointer and cleanup function pointer to the list. |
135 | void AddCleanup(void* elem, void (*cleanup)(void*)); |
136 | |
137 | private: |
138 | friend class ArenaBenchmark; |
139 | |
140 | void* AllocateAlignedFallback(size_t n); |
141 | void* AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*)); |
142 | void AddCleanupFallback(void* elem, void (*cleanup)(void*)); |
143 | |
144 | // Node contains the ptr of the object to be cleaned up and the associated |
145 | // cleanup function ptr. |
146 | struct CleanupNode { |
147 | void* elem; // Pointer to the object to be cleaned up. |
148 | void (*cleanup)(void*); // Function pointer to the destructor or deleter. |
149 | }; |
150 | |
151 | // Cleanup uses a chunked linked list, to reduce pointer chasing. |
152 | struct CleanupChunk { |
153 | static size_t SizeOf(size_t i) { |
154 | return sizeof(CleanupChunk) + (sizeof(CleanupNode) * (i - 1)); |
155 | } |
156 | size_t size; // Total elements in the list. |
157 | CleanupChunk* next; // Next node in the list. |
158 | CleanupNode nodes[1]; // True length is |size|. |
159 | }; |
160 | |
161 | class Block; |
162 | |
163 | // A thread-unsafe Arena that can only be used within its owning thread. |
164 | class PROTOBUF_EXPORT SerialArena { |
165 | public: |
166 | // The allocate/free methods here are a little strange, since SerialArena is |
167 | // allocated inside a Block which it also manages. This is to avoid doing |
168 | // an extra allocation for the SerialArena itself. |
169 | |
170 | // Creates a new SerialArena inside Block* and returns it. |
171 | static SerialArena* New(Block* b, void* owner, ArenaImpl* arena); |
172 | |
173 | // Destroys this SerialArena, freeing all blocks with the given dealloc |
174 | // function, except any block equal to |initial_block|. |
175 | static uint64 Free(SerialArena* serial, Block* initial_block, |
176 | void (*block_dealloc)(void*, size_t)); |
177 | |
178 | void CleanupList(); |
179 | uint64 SpaceUsed() const; |
180 | |
181 | bool HasSpace(size_t n) { return n <= static_cast<size_t>(limit_ - ptr_); } |
182 | |
183 | void* AllocateAligned(size_t n) { |
184 | GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned. |
185 | GOOGLE_DCHECK_GE(limit_, ptr_); |
186 | if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) { |
187 | return AllocateAlignedFallback(n); |
188 | } |
189 | void* ret = ptr_; |
190 | ptr_ += n; |
191 | #ifdef ADDRESS_SANITIZER |
192 | ASAN_UNPOISON_MEMORY_REGION(ret, n); |
193 | #endif // ADDRESS_SANITIZER |
194 | return ret; |
195 | } |
196 | |
197 | // Allocate space if the current region provides enough space. |
198 | bool MaybeAllocateAligned(size_t n, void** out) { |
199 | GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned. |
200 | GOOGLE_DCHECK_GE(limit_, ptr_); |
201 | if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false; |
202 | void* ret = ptr_; |
203 | ptr_ += n; |
204 | #ifdef ADDRESS_SANITIZER |
205 | ASAN_UNPOISON_MEMORY_REGION(ret, n); |
206 | #endif // ADDRESS_SANITIZER |
207 | *out = ret; |
208 | return true; |
209 | } |
210 | |
211 | void AddCleanup(void* elem, void (*cleanup)(void*)) { |
212 | if (PROTOBUF_PREDICT_FALSE(cleanup_ptr_ == cleanup_limit_)) { |
213 | AddCleanupFallback(elem, cleanup); |
214 | return; |
215 | } |
216 | cleanup_ptr_->elem = elem; |
217 | cleanup_ptr_->cleanup = cleanup; |
218 | cleanup_ptr_++; |
219 | } |
220 | |
221 | void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) { |
222 | void* ret = AllocateAligned(n); |
223 | AddCleanup(elem: ret, cleanup); |
224 | return ret; |
225 | } |
226 | |
227 | void* owner() const { return owner_; } |
228 | SerialArena* next() const { return next_; } |
229 | void set_next(SerialArena* next) { next_ = next; } |
230 | |
231 | private: |
232 | void* AllocateAlignedFallback(size_t n); |
233 | void AddCleanupFallback(void* elem, void (*cleanup)(void*)); |
234 | void CleanupListFallback(); |
235 | |
236 | ArenaImpl* arena_; // Containing arena. |
237 | void* owner_; // &ThreadCache of this thread; |
238 | Block* head_; // Head of linked list of blocks. |
239 | CleanupChunk* cleanup_; // Head of cleanup list. |
240 | SerialArena* next_; // Next SerialArena in this linked list. |
241 | |
242 | // Next pointer to allocate from. Always 8-byte aligned. Points inside |
243 | // head_ (and head_->pos will always be non-canonical). We keep these |
244 | // here to reduce indirection. |
245 | char* ptr_; |
246 | char* limit_; |
247 | |
248 | // Next CleanupList members to append to. These point inside cleanup_. |
249 | CleanupNode* cleanup_ptr_; |
250 | CleanupNode* cleanup_limit_; |
251 | }; |
252 | |
253 | // Blocks are variable length malloc-ed objects. The following structure |
254 | // describes the common header for all blocks. |
255 | class PROTOBUF_EXPORT Block { |
256 | public: |
257 | Block(size_t size, Block* next); |
258 | |
259 | char* Pointer(size_t n) { |
260 | GOOGLE_DCHECK(n <= size_); |
261 | return reinterpret_cast<char*>(this) + n; |
262 | } |
263 | |
264 | Block* next() const { return next_; } |
265 | size_t pos() const { return pos_; } |
266 | size_t size() const { return size_; } |
267 | void set_pos(size_t pos) { pos_ = pos; } |
268 | |
269 | private: |
270 | Block* next_; // Next block for this thread. |
271 | size_t pos_; |
272 | size_t size_; |
273 | // data follows |
274 | }; |
275 | |
276 | struct ThreadCache { |
277 | #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) |
278 | // If we are using the ThreadLocalStorage class to store the ThreadCache, |
279 | // then the ThreadCache's default constructor has to be responsible for |
280 | // initializing it. |
281 | ThreadCache() : last_lifecycle_id_seen(-1), last_serial_arena(NULL) {} |
282 | #endif |
283 | |
284 | // The ThreadCache is considered valid as long as this matches the |
285 | // lifecycle_id of the arena being used. |
286 | LifecycleId last_lifecycle_id_seen; |
287 | SerialArena* last_serial_arena; |
288 | }; |
289 | static std::atomic<LifecycleId> lifecycle_id_generator_; |
290 | #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) |
291 | // Android ndk does not support GOOGLE_THREAD_LOCAL keyword so we use a custom thread |
292 | // local storage class we implemented. |
293 | // iOS also does not support the GOOGLE_THREAD_LOCAL keyword. |
294 | static ThreadCache& thread_cache(); |
295 | #elif defined(PROTOBUF_USE_DLLS) |
296 | // Thread local variables cannot be exposed through DLL interface but we can |
297 | // wrap them in static functions. |
298 | static ThreadCache& thread_cache(); |
299 | #else |
300 | static GOOGLE_THREAD_LOCAL ThreadCache thread_cache_; |
301 | static ThreadCache& thread_cache() { return thread_cache_; } |
302 | #endif |
303 | |
304 | void Init(); |
305 | |
306 | // Free all blocks and return the total space used which is the sums of sizes |
307 | // of the all the allocated blocks. |
308 | uint64 FreeBlocks(); |
309 | // Delete or Destruct all objects owned by the arena. |
310 | void CleanupList(); |
311 | |
312 | inline void CacheSerialArena(SerialArena* serial) { |
313 | thread_cache().last_serial_arena = serial; |
314 | thread_cache().last_lifecycle_id_seen = lifecycle_id_; |
315 | // TODO(haberman): evaluate whether we would gain efficiency by getting rid |
316 | // of hint_. It's the only write we do to ArenaImpl in the allocation path, |
317 | // which will dirty the cache line. |
318 | |
319 | hint_.store(p: serial, m: std::memory_order_release); |
320 | } |
321 | |
322 | std::atomic<SerialArena*> |
323 | threads_; // Pointer to a linked list of SerialArena. |
324 | std::atomic<SerialArena*> hint_; // Fast thread-local block access |
325 | std::atomic<size_t> space_allocated_; // Total size of all allocated blocks. |
326 | |
327 | Block* initial_block_; // If non-NULL, points to the block that came from |
328 | // user data. |
329 | |
330 | Block* NewBlock(Block* last_block, size_t min_bytes); |
331 | |
332 | SerialArena* GetSerialArena(); |
333 | PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFast(SerialArena** arena) { |
334 | if (GetSerialArenaFromThreadCache(arena)) return true; |
335 | |
336 | // Check whether we own the last accessed SerialArena on this arena. This |
337 | // fast path optimizes the case where a single thread uses multiple arenas. |
338 | ThreadCache* tc = &thread_cache(); |
339 | SerialArena* serial = hint_.load(m: std::memory_order_acquire); |
340 | if (PROTOBUF_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) { |
341 | *arena = serial; |
342 | return true; |
343 | } |
344 | return false; |
345 | } |
346 | |
347 | PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFromThreadCache( |
348 | SerialArena** arena) { |
349 | // If this thread already owns a block in this arena then try to use that. |
350 | // This fast path optimizes the case where multiple threads allocate from |
351 | // the same arena. |
352 | ThreadCache* tc = &thread_cache(); |
353 | if (PROTOBUF_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id_)) { |
354 | *arena = tc->last_serial_arena; |
355 | return true; |
356 | } |
357 | return false; |
358 | } |
359 | SerialArena* GetSerialArenaFallback(void* me); |
360 | LifecycleId lifecycle_id_; // Unique for each arena. Changes on Reset(). |
361 | |
362 | Options options_; |
363 | |
364 | GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArenaImpl); |
365 | // All protos have pointers back to the arena hence Arena must have |
366 | // pointer stability. |
367 | ArenaImpl(ArenaImpl&&) = delete; |
368 | ArenaImpl& operator=(ArenaImpl&&) = delete; |
369 | |
370 | public: |
371 | // kBlockHeaderSize is sizeof(Block), aligned up to the nearest multiple of 8 |
372 | // to protect the invariant that pos is always at a multiple of 8. |
373 | static const size_t = |
374 | (sizeof(Block) + 7) & static_cast<size_t>(-8); |
375 | static const size_t kSerialArenaSize = |
376 | (sizeof(SerialArena) + 7) & static_cast<size_t>(-8); |
377 | static_assert(kBlockHeaderSize % 8 == 0, |
378 | "kBlockHeaderSize must be a multiple of 8." ); |
379 | static_assert(kSerialArenaSize % 8 == 0, |
380 | "kSerialArenaSize must be a multiple of 8." ); |
381 | }; |
382 | |
383 | } // namespace internal |
384 | } // namespace protobuf |
385 | } // namespace google |
386 | |
387 | #include <google/protobuf/port_undef.inc> |
388 | |
389 | #endif // GOOGLE_PROTOBUF_ARENA_IMPL_H__ |
390 | |