1 | /* |
2 | * Copyright 2015 WebAssembly Community Group participants |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #ifndef wasm_mixed_arena_h |
18 | #define wasm_mixed_arena_h |
19 | |
20 | #include <atomic> |
21 | #include <cassert> |
22 | #include <memory> |
23 | #include <mutex> |
24 | #include <thread> |
25 | #include <type_traits> |
26 | #include <vector> |
27 | |
28 | #include <support/alloc.h> |
29 | |
30 | // |
31 | // Arena allocation for mixed-type data. |
32 | // |
33 | // Arena-style bump allocation is important for two reasons: First, so that |
34 | // allocation is quick, and second, so that allocated items are close together, |
35 | // which is cache-friendy. Arena allocation is also useful for a minor third |
36 | // reason which is to make freeing all the items in an arena very quick. |
37 | // |
38 | // Each WebAssembly Module has an arena allocator, which should be used |
39 | // for all of its AST nodes and so forth. When the Module is destroyed, the |
40 | // entire arena is cleaned up. |
41 | // |
42 | // When allocating an object in an arena, the object's proper constructor |
43 | // is called. Note that destructors are not called, because to make the |
44 | // arena simple and fast we do not track internal allocations inside it |
45 | // (and we can also avoid the need for virtual destructors). |
46 | // |
47 | // In general, optimization passes avoid allocation as much as possible. |
48 | // Many passes only remove or modify nodes anyhow, others can often |
49 | // reuse nodes that are being optimized out. This keeps things |
50 | // cache-friendly, and also makes the operations trivially thread-safe. |
51 | // In the rare case that a pass does need to allocate, and it is a |
52 | // parallel pass (so multiple threads might access the allocator), |
53 | // the MixedArena instance will notice if it is on a different thread |
54 | // than that arena's original thread, and will perform the allocation |
55 | // in a side arena for that other thread. This is done in a transparent |
56 | // way to the outside; as a result, it is always safe to allocate using |
57 | // a MixedArena, no matter which thread you are on. Allocations will |
58 | // of course be fastest on the original thread for the arena. |
59 | // |
60 | |
61 | struct MixedArena { |
62 | // fast bump allocation |
63 | |
64 | static const size_t CHUNK_SIZE = 32768; |
65 | static const size_t MAX_ALIGN = 16; // allow 128bit SIMD |
66 | |
67 | // Each pointer in chunks is to a multiple of CHUNK_SIZE - typically 1, |
68 | // but possibly more. |
69 | std::vector<void*> chunks; |
70 | |
71 | size_t index = 0; // in last chunk |
72 | |
73 | std::thread::id threadId; |
74 | |
75 | // multithreaded allocation - each arena is valid on a specific thread. |
76 | // if we are on the wrong thread, we atomically look in the linked |
77 | // list of next, adding an allocator if necessary |
78 | std::atomic<MixedArena*> next; |
79 | |
80 | MixedArena() { |
81 | threadId = std::this_thread::get_id(); |
82 | next.store(nullptr); |
83 | } |
84 | |
85 | // Allocate an amount of space with a guaranteed alignment |
86 | void* allocSpace(size_t size, size_t align) { |
87 | // the bump allocator data should not be modified by multiple threads at |
88 | // once. |
89 | auto myId = std::this_thread::get_id(); |
90 | if (myId != threadId) { |
91 | MixedArena* curr = this; |
92 | MixedArena* allocated = nullptr; |
93 | while (myId != curr->threadId) { |
94 | auto seen = curr->next.load(); |
95 | if (seen) { |
96 | curr = seen; |
97 | continue; |
98 | } |
99 | // there is a nullptr for next, so we may be able to place a new |
100 | // allocator for us there. but carefully, as others may do so as |
101 | // well. we may waste a few allocations here, but it doesn't matter |
102 | // as this can only happen as the chain is built up, i.e., |
103 | // O(# of cores) per allocator, and our allocatrs are long-lived. |
104 | if (!allocated) { |
105 | allocated = new MixedArena(); // has our thread id |
106 | } |
107 | if (curr->next.compare_exchange_strong(seen, allocated)) { |
108 | // we replaced it, so we are the next in the chain |
109 | // we can forget about allocated, it is owned by the chain now |
110 | allocated = nullptr; |
111 | break; |
112 | } |
113 | // otherwise, the cmpxchg updated seen, and we continue to loop |
114 | curr = seen; |
115 | } |
116 | if (allocated) { |
117 | delete allocated; |
118 | } |
119 | return curr->allocSpace(size, align); |
120 | } |
121 | // First, move the current index in the last chunk to an aligned position. |
122 | index = (index + align - 1) & (-align); |
123 | if (index + size > CHUNK_SIZE || chunks.size() == 0) { |
124 | // Allocate a new chunk. |
125 | auto numChunks = (size + CHUNK_SIZE - 1) / CHUNK_SIZE; |
126 | assert(size <= numChunks * CHUNK_SIZE); |
127 | auto* allocation = |
128 | wasm::aligned_malloc(MAX_ALIGN, numChunks * CHUNK_SIZE); |
129 | if (!allocation) { |
130 | abort(); |
131 | } |
132 | chunks.push_back(allocation); |
133 | index = 0; |
134 | } |
135 | uint8_t* ret = static_cast<uint8_t*>(chunks.back()); |
136 | ret += index; |
137 | index += size; // TODO: if we allocated more than 1 chunk, reuse the |
138 | // remainder, right now we allocate another next time |
139 | return static_cast<void*>(ret); |
140 | } |
141 | |
142 | template<class T> T* alloc() { |
143 | static_assert(alignof(T) <= MAX_ALIGN, |
144 | "maximum alignment not large enough" ); |
145 | auto* ret = static_cast<T*>(allocSpace(sizeof(T), alignof(T))); |
146 | new (ret) T(*this); // allocated objects receive the allocator, so they can |
147 | // allocate more later if necessary |
148 | return ret; |
149 | } |
150 | |
151 | void clear() { |
152 | for (auto* chunk : chunks) { |
153 | wasm::aligned_free(chunk); |
154 | } |
155 | chunks.clear(); |
156 | } |
157 | |
158 | ~MixedArena() { |
159 | clear(); |
160 | if (next.load()) { |
161 | delete next.load(); |
162 | } |
163 | } |
164 | }; |
165 | |
166 | // |
167 | // A vector that allocates in an arena. |
168 | // |
169 | // TODO: specialize on the initial size of the array |
170 | |
171 | template<typename SubType, typename T> class ArenaVectorBase { |
172 | protected: |
173 | T* data = nullptr; |
174 | size_t usedElements = 0, allocatedElements = 0; |
175 | |
176 | void reallocate(size_t size) { |
177 | T* old = data; |
178 | static_cast<SubType*>(this)->allocate(size); |
179 | for (size_t i = 0; i < usedElements; i++) { |
180 | data[i] = old[i]; |
181 | } |
182 | } |
183 | |
184 | public: |
185 | struct Iterator; |
186 | |
187 | T& operator[](size_t index) const { |
188 | assert(index < usedElements); |
189 | return data[index]; |
190 | } |
191 | |
192 | size_t size() const { return usedElements; } |
193 | |
194 | bool empty() const { return size() == 0; } |
195 | |
196 | void resize(size_t size) { |
197 | if (size > allocatedElements) { |
198 | reallocate(size); |
199 | } |
200 | // construct new elements |
201 | for (size_t i = usedElements; i < size; i++) { |
202 | new (data + i) T(); |
203 | } |
204 | usedElements = size; |
205 | } |
206 | |
207 | T& back() const { |
208 | assert(usedElements > 0); |
209 | return data[usedElements - 1]; |
210 | } |
211 | |
212 | T& pop_back() { |
213 | assert(usedElements > 0); |
214 | usedElements--; |
215 | return data[usedElements]; |
216 | } |
217 | |
218 | void push_back(T item) { |
219 | if (usedElements == allocatedElements) { |
220 | reallocate((allocatedElements + 1) * 2); // TODO: optimize |
221 | } |
222 | data[usedElements] = item; |
223 | usedElements++; |
224 | } |
225 | |
226 | T& front() const { |
227 | assert(usedElements > 0); |
228 | return data[0]; |
229 | } |
230 | |
231 | void erase(Iterator start_it, Iterator end_it) { |
232 | assert(start_it.parent == end_it.parent && start_it.parent == this); |
233 | assert(start_it.index <= end_it.index && end_it.index <= usedElements); |
234 | size_t size = end_it.index - start_it.index; |
235 | for (size_t cur = start_it.index; cur + size < usedElements; ++cur) { |
236 | data[cur] = data[cur + size]; |
237 | } |
238 | usedElements -= size; |
239 | } |
240 | |
241 | void erase(Iterator it) { erase(it, it + 1); } |
242 | |
243 | void clear() { usedElements = 0; } |
244 | |
245 | void reserve(size_t size) { |
246 | if (size > allocatedElements) { |
247 | reallocate(size); |
248 | } |
249 | } |
250 | |
251 | template<typename ListType> void set(const ListType& list) { |
252 | size_t size = list.size(); |
253 | if (allocatedElements < size) { |
254 | static_cast<SubType*>(this)->allocate(size); |
255 | } |
256 | size_t i = 0; |
257 | for (auto elem : list) { |
258 | data[i++] = elem; |
259 | } |
260 | usedElements = size; |
261 | } |
262 | |
263 | void operator=(SubType& other) { set(other); } |
264 | |
265 | void swap(SubType& other) { |
266 | data = other.data; |
267 | usedElements = other.usedElements; |
268 | allocatedElements = other.allocatedElements; |
269 | |
270 | other.data = nullptr; |
271 | other.usedElements = other.allocatedElements = 0; |
272 | } |
273 | |
274 | // iteration |
275 | |
276 | struct Iterator { |
277 | using iterator_category = std::random_access_iterator_tag; |
278 | using value_type = T; |
279 | using difference_type = std::ptrdiff_t; |
280 | using pointer = T*; |
281 | using reference = T&; |
282 | |
283 | const SubType* parent; |
284 | size_t index; |
285 | |
286 | Iterator() : parent(nullptr), index(0) {} |
287 | Iterator(const SubType* parent, size_t index) |
288 | : parent(parent), index(index) {} |
289 | |
290 | bool operator==(const Iterator& other) const { |
291 | return index == other.index && parent == other.parent; |
292 | } |
293 | |
294 | bool operator!=(const Iterator& other) const { return !(*this == other); } |
295 | |
296 | bool operator<(const Iterator& other) const { |
297 | assert(parent == other.parent); |
298 | return index < other.index; |
299 | } |
300 | |
301 | bool operator>(const Iterator& other) const { return other < *this; } |
302 | |
303 | bool operator<=(const Iterator& other) const { return !(other < *this); } |
304 | |
305 | bool operator>=(const Iterator& other) const { return !(*this < other); } |
306 | |
307 | Iterator& operator++() { |
308 | index++; |
309 | return *this; |
310 | } |
311 | |
312 | Iterator& operator--() { |
313 | index--; |
314 | return *this; |
315 | } |
316 | |
317 | Iterator operator++(int) { |
318 | Iterator it = *this; |
319 | ++*this; |
320 | return it; |
321 | } |
322 | |
323 | Iterator operator--(int) { |
324 | Iterator it = *this; |
325 | --*this; |
326 | return it; |
327 | } |
328 | |
329 | Iterator& operator+=(std::ptrdiff_t off) { |
330 | index += off; |
331 | return *this; |
332 | } |
333 | |
334 | Iterator& operator-=(std::ptrdiff_t off) { return *this += -off; } |
335 | |
336 | Iterator operator+(std::ptrdiff_t off) const { |
337 | return Iterator(*this) += off; |
338 | } |
339 | |
340 | Iterator operator-(std::ptrdiff_t off) const { return *this + -off; } |
341 | |
342 | std::ptrdiff_t operator-(const Iterator& other) const { |
343 | assert(parent == other.parent); |
344 | return index - other.index; |
345 | } |
346 | |
347 | friend Iterator operator+(std::ptrdiff_t off, const Iterator& it) { |
348 | return it + off; |
349 | } |
350 | |
351 | T& operator*() const { return (*parent)[index]; } |
352 | |
353 | T& operator[](std::ptrdiff_t off) const { return (*parent)[index + off]; } |
354 | |
355 | T* operator->() const { return &(*parent)[index]; } |
356 | }; |
357 | |
358 | Iterator begin() const { |
359 | return Iterator(static_cast<const SubType*>(this), 0); |
360 | } |
361 | Iterator end() const { |
362 | return Iterator(static_cast<const SubType*>(this), usedElements); |
363 | } |
364 | |
365 | void allocate(size_t size) { |
366 | abort(); // must be implemented in children |
367 | } |
368 | |
369 | // C-API |
370 | |
371 | void insertAt(size_t index, T item) { |
372 | assert(index <= size()); // appending is ok |
373 | resize(size() + 1); |
374 | for (auto i = size() - 1; i > index; --i) { |
375 | data[i] = data[i - 1]; |
376 | } |
377 | data[index] = item; |
378 | } |
379 | |
380 | T removeAt(size_t index) { |
381 | assert(index < size()); |
382 | auto item = data[index]; |
383 | for (auto i = index; i < size() - 1; ++i) { |
384 | data[i] = data[i + 1]; |
385 | } |
386 | resize(size() - 1); |
387 | return item; |
388 | } |
389 | }; |
390 | |
391 | // A vector that has an allocator for arena allocation |
392 | // |
393 | // TODO: consider not saving the allocator, but requiring it be |
394 | // passed in when needed, would make this (and thus Blocks etc. |
395 | // smaller) |
396 | |
397 | template<typename T> |
398 | class ArenaVector : public ArenaVectorBase<ArenaVector<T>, T> { |
399 | private: |
400 | MixedArena& allocator; |
401 | |
402 | public: |
403 | ArenaVector(MixedArena& allocator) : allocator(allocator) {} |
404 | |
405 | ArenaVector(ArenaVector<T>&& other) : allocator(other.allocator) { |
406 | *this = other; |
407 | } |
408 | |
409 | void allocate(size_t size) { |
410 | this->allocatedElements = size; |
411 | this->data = static_cast<T*>( |
412 | allocator.allocSpace(sizeof(T) * this->allocatedElements, alignof(T))); |
413 | } |
414 | }; |
415 | |
416 | #endif // wasm_mixed_arena_h |
417 | |