ConcurrentHashtable.h source code [llvm/include/llvm/ADT/ConcurrentHashtable.h]

1	//===- ConcurrentHashtable.h ------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
10	#define LLVM_ADT_CONCURRENTHASHTABLE_H
11
12	#include "llvm/ADT/DenseMap.h"
13	#include "llvm/ADT/Hashing.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/ADT/SmallVector.h"
16	#include "llvm/Support/Allocator.h"
17	#include "llvm/Support/Debug.h"
18	#include "llvm/Support/Parallel.h"
19	#include "llvm/Support/WithColor.h"
20	#include "llvm/Support/xxhash.h"
21	#include <atomic>
22	#include <cstddef>
23	#include <iomanip>
24	#include <mutex>
25	#include <sstream>
26	#include <type_traits>
27
28	namespace llvm {
29
30	/// ConcurrentHashTable - is a resizeable concurrent hashtable.
31	/// The number of resizings limited up to x2^31. This hashtable is
32	/// useful to have efficient access to aggregate data(like strings,
33	/// type descriptors...) and to keep only single copy of such
34	/// an aggregate. The hashtable allows only concurrent insertions:
35	///
36	/// KeyDataTy = insert ( const KeyTy& );*
37	///
38	/// Data structure:
39	///
40	/// Inserted value KeyTy is mapped to 64-bit hash value ->
41	///
42	/// [------- 64-bit Hash value --------]
43	/// [ StartEntryIndex ][ Bucket Index ]
44	/// \| \|
45	/// points to the points to
46	/// first probe the bucket.
47	/// position inside
48	/// bucket entries
49	///
50	/// After initialization, all buckets have an initial size. During insertions,
51	/// buckets might be extended to contain more entries. Each bucket can be
52	/// independently resized and rehashed(no need to lock the whole table).
53	/// Different buckets may have different sizes. If the single bucket is full
54	/// then the bucket is resized.
55	///
56	/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
57	/// (pointers to KeyDataTy) and another array of entries hashes:
58	///
59	/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
60	/// BucketsArray[BucketIdx].Entries[EntryIdx]:
61	///
62	/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
63	/// [Bucket 0].Entries -> [KeyDataTy][KeyDataTy]
64	///
65	/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
66	/// [Bucket 1].Entries -> [KeyDataTy][KeyDataTy][KeyDataTy][KeyDataTy]
67	/// .........................
68	/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
69	/// [Bucket N].Entries -> [KeyDataTy][KeyDataTy][KeyDataTy]*
70	///
71	/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
72	/// KeyDataTy items.
73
74	template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
75	class ConcurrentHashTableInfoByPtr {
76	public:
77	/// \returns Hash value for the specified \p Key.
78	static inline uint64_t getHashValue(const KeyTy &Key) {
79	return xxh3_64bits(Key);
80	}
81
82	/// \returns true if both \p LHS and \p RHS are equal.
83	static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
84	return LHS == RHS;
85	}
86
87	/// \returns key for the specified \p KeyData.
88	static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
89	return KeyData.getKey();
90	}
91
92	/// \returns newly created object of KeyDataTy type.
93	static inline KeyDataTy create(const* KeyTy &Key, AllocatorTy &Allocator) {
94	return KeyDataTy::create(Key, Allocator);
95	}
96	};
97
98	template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
99	typename Info =
100	ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
101	class ConcurrentHashTableByPtr {
102	public:
103	ConcurrentHashTableByPtr(
104	AllocatorTy &Allocator, uint64_t EstimatedSize = `100000`,
105	size_t ThreadsNum = parallel::strategy.compute_thread_count(),
106	size_t InitialNumberOfBuckets = `128`)
107	: MultiThreadAllocator(Allocator) {
108	assert((ThreadsNum > `0`) && "ThreadsNum must be greater than 0");
109	assert((InitialNumberOfBuckets > `0`) &&
110	"InitialNumberOfBuckets must be greater than 0");
111
112	// Calculate number of buckets.
113	uint64_t EstimatedNumberOfBuckets = ThreadsNum;
114	if (ThreadsNum > `1`) {
115	EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
116	EstimatedNumberOfBuckets *= std::max(
117	a: `1`,
118	b: countr_zero(Val: PowerOf2Ceil(A: EstimatedSize / InitialNumberOfBuckets)) >>
119	`2`);
120	}
121	EstimatedNumberOfBuckets = PowerOf2Ceil(A: EstimatedNumberOfBuckets);
122	NumberOfBuckets =
123	std::min(a: EstimatedNumberOfBuckets, b: (uint64_t)(`1Ull` << `31`));
124
125	// Allocate buckets.
126	BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
127
128	InitialBucketSize = EstimatedSize / NumberOfBuckets;
129	InitialBucketSize = std::max(a: (uint32_t)`1`, b: InitialBucketSize);
130	InitialBucketSize = PowerOf2Ceil(A: InitialBucketSize);
131
132	// Initialize each bucket.
133	for (uint32_t Idx = `0`; Idx < NumberOfBuckets; Idx++) {
134	HashesPtr Hashes = new ExtHashBitsTy[InitialBucketSize];
135	memset(s: Hashes, c: `0`, n: sizeof(ExtHashBitsTy) * InitialBucketSize);
136
137	DataPtr Entries = new EntryDataTy[InitialBucketSize];
138	memset(Entries, `0`, sizeof(EntryDataTy) * InitialBucketSize);
139
140	BucketsArray[Idx].Size = InitialBucketSize;
141	BucketsArray[Idx].Hashes = Hashes;
142	BucketsArray[Idx].Entries = Entries;
143	}
144
145	// Calculate masks.
146	HashMask = NumberOfBuckets - `1`;
147
148	size_t LeadingZerosNumber = countl_zero(Val: HashMask);
149	HashBitsNum = `64` - LeadingZerosNumber;
150
151	// We keep only high 32-bits of hash value. So bucket size cannot
152	// exceed 2^31. Bucket size is always power of two.
153	MaxBucketSize = `1Ull` << (std::min(a: (size_t)`31`, b: LeadingZerosNumber));
154
155	// Calculate mask for extended hash bits.
156	ExtHashMask = (uint64_t)NumberOfBuckets * MaxBucketSize - `1`;
157	}
158
159	virtual ~ConcurrentHashTableByPtr() {
160	// Deallocate buckets.
161	for (uint32_t Idx = `0`; Idx < NumberOfBuckets; Idx++) {
162	delete[] BucketsArray[Idx].Hashes;
163	delete[] BucketsArray[Idx].Entries;
164	}
165	}
166
167	/// Insert new value \p NewValue or return already existing entry.
168	///
169	/// \returns entry and "true" if an entry is just inserted or
170	/// "false" if an entry already exists.
171	std::pair<KeyDataTy , bool> insert(const* KeyTy &NewValue) {
172	// Calculate bucket index.
173	uint64_t Hash = Info::getHashValue(NewValue);
174	Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
175	uint32_t ExtHashBits = getExtHashBits(Hash);
176
177	#if LLVM_ENABLE_THREADS
178	// Lock bucket.
179	CurBucket.Guard.lock();
180	#endif
181
182	HashesPtr BucketHashes = CurBucket.Hashes;
183	DataPtr BucketEntries = CurBucket.Entries;
184	uint32_t CurEntryIdx = getStartIdx(ExtHashBits, BucketSize: CurBucket.Size);
185
186	while (true) {
187	uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
188
189	if (CurEntryHashBits == `0` && BucketEntries[CurEntryIdx] == nullptr) {
190	// Found empty slot. Insert data.
191	KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
192	BucketEntries[CurEntryIdx] = NewData;
193	BucketHashes[CurEntryIdx] = ExtHashBits;
194
195	CurBucket.NumberOfEntries++;
196	RehashBucket(CurBucket);
197
198	#if LLVM_ENABLE_THREADS
199	CurBucket.Guard.unlock();
200	#endif
201
202	return {NewData, true};
203	}
204
205	if (CurEntryHashBits == ExtHashBits) {
206	// Hash matched. Check value for equality.
207	KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
208	if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
209	// Already existed entry matched with inserted data is found.
210	#if LLVM_ENABLE_THREADS
211	CurBucket.Guard.unlock();
212	#endif
213
214	return {EntryData, false};
215	}
216	}
217
218	CurEntryIdx++;
219	CurEntryIdx &= (CurBucket.Size - `1`);
220	}
221
222	llvm_unreachable("Insertion error.");
223	return {};
224	}
225
226	/// Print information about current state of hash table structures.
227	void printStatistic(raw_ostream &OS) {
228	OS << "\n--- HashTable statistic:\n";
229	OS << "\nNumber of buckets = " << NumberOfBuckets;
230	OS << "\nInitial bucket size = " << InitialBucketSize;
231
232	uint64_t NumberOfNonEmptyBuckets = `0`;
233	uint64_t NumberOfEntriesPlusEmpty = `0`;
234	uint64_t OverallNumberOfEntries = `0`;
235	uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
236
237	DenseMap<uint32_t, uint32_t> BucketSizesMap;
238
239	// For each bucket...
240	for (uint32_t Idx = `0`; Idx < NumberOfBuckets; Idx++) {
241	Bucket &CurBucket = BucketsArray[Idx];
242
243	BucketSizesMap[CurBucket.Size]++;
244
245	if (CurBucket.NumberOfEntries != `0`)
246	NumberOfNonEmptyBuckets++;
247	NumberOfEntriesPlusEmpty += CurBucket.Size;
248	OverallNumberOfEntries += CurBucket.NumberOfEntries;
249	OverallSize +=
250	(sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
251	}
252
253	OS << "\nOverall number of entries = " << OverallNumberOfEntries;
254	OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
255	for (auto &BucketSize : BucketSizesMap)
256	OS << "\n Number of buckets with size " << BucketSize.first << ": "
257	<< BucketSize.second;
258
259	std::stringstream stream;
260	stream << std::fixed << std::setprecision(`2`)
261	<< ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
262	std::string str = stream.str();
263
264	OS << "\nLoad factor = " << str;
265	OS << "\nOverall allocated size = " << OverallSize;
266	}
267
268	protected:
269	using ExtHashBitsTy = uint32_t;
270	using EntryDataTy = KeyDataTy *;
271
272	using HashesPtr = ExtHashBitsTy *;
273	using DataPtr = EntryDataTy *;
274
275	// Bucket structure. Keeps bucket data.
276	struct Bucket {
277	Bucket() = default;
278
279	// Size of bucket.
280	uint32_t Size = `0`;
281
282	// Number of non-null entries.
283	uint32_t NumberOfEntries = `0`;
284
285	// Hashes for [Size] entries.
286	HashesPtr Hashes = nullptr;
287
288	// [Size] entries.
289	DataPtr Entries = nullptr;
290
291	#if LLVM_ENABLE_THREADS
292	// Mutex for this bucket.
293	std::mutex Guard;
294	#endif
295	};
296
297	// Reallocate and rehash bucket if this is full enough.
298	void RehashBucket(Bucket &CurBucket) {
299	assert((CurBucket.Size > `0`) && "Uninitialised bucket");
300	if (CurBucket.NumberOfEntries < CurBucket.Size * `0.9`)
301	return;
302
303	if (CurBucket.Size >= MaxBucketSize)
304	report_fatal_error(reason: "ConcurrentHashTable is full");
305
306	uint32_t NewBucketSize = CurBucket.Size << `1`;
307	assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
308	assert((CurBucket.Size < NewBucketSize) &&
309	"New bucket size less than size of current bucket");
310
311	// Store old entries & hashes arrays.
312	HashesPtr SrcHashes = CurBucket.Hashes;
313	DataPtr SrcEntries = CurBucket.Entries;
314
315	// Allocate new entries&hashes arrays.
316	HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
317	memset(s: DestHashes, c: `0`, n: sizeof(ExtHashBitsTy) * NewBucketSize);
318
319	DataPtr DestEntries = new EntryDataTy[NewBucketSize];
320	memset(DestEntries, `0`, sizeof(EntryDataTy) * NewBucketSize);
321
322	// For each entry in source arrays...
323	for (uint32_t CurSrcEntryIdx = `0`; CurSrcEntryIdx < CurBucket.Size;
324	CurSrcEntryIdx++) {
325	uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
326
327	// Check for null entry.
328	if (CurSrcEntryHashBits == `0` && SrcEntries[CurSrcEntryIdx] == nullptr)
329	continue;
330
331	uint32_t StartDestIdx = getStartIdx(ExtHashBits: CurSrcEntryHashBits, BucketSize: NewBucketSize);
332
333	// Insert non-null entry into the new arrays.
334	while (true) {
335	uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
336
337	if (CurDestEntryHashBits == `0` && DestEntries[StartDestIdx] == nullptr) {
338	// Found empty slot. Insert data.
339	DestHashes[StartDestIdx] = CurSrcEntryHashBits;
340	DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
341	break;
342	}
343
344	StartDestIdx++;
345	StartDestIdx = StartDestIdx & (NewBucketSize - `1`);
346	}
347	}
348
349	// Update bucket fields.
350	CurBucket.Hashes = DestHashes;
351	CurBucket.Entries = DestEntries;
352	CurBucket.Size = NewBucketSize;
353
354	// Delete old bucket entries.
355	if (SrcHashes != nullptr)
356	delete[] SrcHashes;
357	if (SrcEntries != nullptr)
358	delete[] SrcEntries;
359	}
360
361	uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
362
363	uint32_t getExtHashBits(uint64_t Hash) {
364	return (Hash & ExtHashMask) >> HashBitsNum;
365	}
366
367	uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
368	assert((BucketSize > `0`) && "Empty bucket");
369
370	return ExtHashBits & (BucketSize - `1`);
371	}
372
373	// Number of bits in hash mask.
374	uint64_t HashBitsNum = `0`;
375
376	// Hash mask.
377	uint64_t HashMask = `0`;
378
379	// Hash mask for the extended hash bits.
380	uint64_t ExtHashMask = `0`;
381
382	// The maximal bucket size.
383	uint32_t MaxBucketSize = `0`;
384
385	// Initial size of bucket.
386	uint32_t InitialBucketSize = `0`;
387
388	// The number of buckets.
389	uint32_t NumberOfBuckets = `0`;
390
391	// Array of buckets.
392	std::unique_ptr<Bucket[]> BucketsArray;
393
394	// Used for allocating KeyDataTy values.
395	AllocatorTy &MultiThreadAllocator;
396	};
397
398	} // end namespace llvm
399
400	#endif // LLVM_ADT_CONCURRENTHASHTABLE_H
401

source code of llvm/include/llvm/ADT/ConcurrentHashtable.h