DebugTypes.cpp source code [lld/COFF/DebugTypes.cpp]

1	//===- DebugTypes.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "DebugTypes.h"
10	#include "COFFLinkerContext.h"
11	#include "Chunks.h"
12	#include "InputFiles.h"
13	#include "PDB.h"
14	#include "TypeMerger.h"
15	#include "lld/Common/ErrorHandler.h"
16	#include "lld/Common/Memory.h"
17	#include "llvm/ADT/StringExtras.h"
18	#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19	#include "llvm/DebugInfo/CodeView/TypeRecord.h"
20	#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21	#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22	#include "llvm/DebugInfo/PDB/GenericError.h"
23	#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24	#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25	#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26	#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27	#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28	#include "llvm/Support/FormatVariadic.h"
29	#include "llvm/Support/Parallel.h"
30	#include "llvm/Support/Path.h"
31	#include "llvm/Support/TimeProfiler.h"
32
33	using namespace llvm;
34	using namespace llvm::codeview;
35	using namespace lld;
36	using namespace lld::coff;
37
38	namespace {
39	class TypeServerIpiSource;
40
41	// The TypeServerSource class represents a PDB type server, a file referenced by
42	// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
43	// files, therefore there must be only once instance per OBJ lot. The file path
44	// is discovered from the dependent OBJ's debug type stream. The
45	// TypeServerSource object is then queued and loaded by the COFF Driver. The
46	// debug type stream for such PDB files will be merged first in the final PDB,
47	// before any dependent OBJ.
48	class TypeServerSource : public TpiSource {
49	public:
50	explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
51	: TpiSource (ctx, PDB, nullptr), pdbInputFile(f) {
52	if (f->loadErrorStr)
53	return;
54	pdb::PDBFile &file = f->session ->getPDBFile();
55	auto expectedInfo = file.getPDBInfoStream();
56	if (!expectedInfo)
57	return;
58	Guid = expectedInfo ->getGuid();
59	auto it = ctx.typeServerSourceMappings.emplace(args&: Guid, args: this);
60	if (!it.second) {
61	// If we hit here we have collision on Guid's in two PDB files.
62	// This can happen if the PDB Guid is invalid or if we are really
63	// unlucky. This should fall back on stright file-system lookup.
64	it.first ->second = nullptr;
65	}
66	}
67
68	Error mergeDebugT(TypeMerger *m) override;
69
70	void loadGHashes() override;
71	void remapTpiWithGHashes(GHashState *g) override;
72
73	bool isDependency() const override { return true; }
74
75	PDBInputFile pdbInputFile = nullptr*;
76
77	// TpiSource for IPI stream.
78	TypeServerIpiSource ipiSrc = nullptr*;
79
80	// The PDB signature GUID.
81	codeview::GUID Guid;
82	};
83
84	// Companion to TypeServerSource. Stores the index map for the IPI stream in the
85	// PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
86	// invariant of one type index space per source.
87	class TypeServerIpiSource : public TpiSource {
88	public:
89	explicit TypeServerIpiSource(COFFLinkerContext &ctx)
90	: TpiSource (ctx, PDBIpi, nullptr) {}
91
92	friend class TypeServerSource;
93
94	// All of the TpiSource methods are no-ops. The parent TypeServerSource
95	// handles both TPI and IPI.
96	Error mergeDebugT(TypeMerger m) override { return* Error::success(); }
97	void loadGHashes() override {}
98	void remapTpiWithGHashes(GHashState *g) override {}
99	bool isDependency() const override { return true; }
100	};
101
102	// This class represents the debug type stream of an OBJ file that depends on a
103	// PDB type server (see TypeServerSource).
104	class UseTypeServerSource : public TpiSource {
105	Expected<TypeServerSource *> getTypeServerSource();
106
107	public:
108	UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
109	: TpiSource (ctx, UsingPDB, f), typeServerDependency (ts) {}
110
111	Error mergeDebugT(TypeMerger *m) override;
112
113	// No need to load ghashes from /Zi objects.
114	void loadGHashes() override {}
115	void remapTpiWithGHashes(GHashState *g) override;
116
117	// Information about the PDB type server dependency, that needs to be loaded
118	// in before merging this OBJ.
119	TypeServer2Record typeServerDependency;
120	};
121
122	// This class represents the debug type stream of a Microsoft precompiled
123	// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
124	// PDB, before any other OBJs that depend on this. Note that only MSVC generate
125	// such files, clang does not.
126	class PrecompSource : public TpiSource {
127	public:
128	PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource (ctx, PCH, f) {
129	// If the S_OBJNAME record contains the PCH signature, we'll register this
130	// source file right away.
131	registerMapping();
132	}
133
134	Error mergeDebugT(TypeMerger *m) override;
135
136	void loadGHashes() override;
137
138	bool isDependency() const override { return true; }
139
140	private:
141	void registerMapping();
142
143	// Whether this precomp OBJ was recorded in the precompSourceMappings map.
144	// Only happens if the file->pchSignature is valid.
145	bool registered = false;
146	};
147
148	// This class represents the debug type stream of an OBJ file that depends on a
149	// Microsoft precompiled headers OBJ (see PrecompSource).
150	class UsePrecompSource : public TpiSource {
151	public:
152	UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
153	: TpiSource (ctx, UsingPCH, f), precompDependency (precomp) {}
154
155	Error mergeDebugT(TypeMerger *m) override;
156
157	void loadGHashes() override;
158	void remapTpiWithGHashes(GHashState *g) override;
159
160	private:
161	Error mergeInPrecompHeaderObj();
162
163	PrecompSource *findObjByName(StringRef fileNameOnly);
164	PrecompSource findPrecompSource(ObjFile file, PrecompRecord &pr);
165	Expected<PrecompSource > findPrecompMap(ObjFile file, PrecompRecord &pr);
166
167	public:
168	// Information about the Precomp OBJ dependency, that needs to be loaded in
169	// before merging this OBJ.
170	PrecompRecord precompDependency;
171	};
172	} // namespace
173
174	TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
175	: ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
176	ctx.addTpiSource(tpi: this);
177	}
178
179	// Vtable key method.
180	TpiSource::~TpiSource() {
181	// Silence any assertions about unchecked errors.
182	consumeError(Err: std::move(typeMergingError));
183	}
184
185	TpiSource lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile file) {
186	return make<TpiSource>(args&: ctx, args: TpiSource::Regular, args&: file);
187	}
188
189	TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
190	PDBInputFile *pdbInputFile) {
191	// Type server sources come in pairs: the TPI stream, and the IPI stream.
192	auto *tpiSource = make<TypeServerSource>(args&: ctx, args&: pdbInputFile);
193	if (pdbInputFile->session ->getPDBFile().hasPDBIpiStream())
194	tpiSource->ipiSrc = make<TypeServerIpiSource>(args&: ctx);
195	return tpiSource;
196	}
197
198	TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
199	ObjFile *file,
200	TypeServer2Record ts) {
201	return make<UseTypeServerSource>(args&: ctx, args&: file, args&: ts);
202	}
203
204	TpiSource lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile file) {
205	return make<PrecompSource>(args&: ctx, args&: file);
206	}
207
208	TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
209	ObjFile *file,
210	PrecompRecord precomp) {
211	return make<UsePrecompSource>(args&: ctx, args&: file, args&: precomp);
212	}
213
214	bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
215	if (ti.isSimple())
216	return true;
217
218	// This can be an item index or a type index. Choose the appropriate map.
219	ArrayRef<TypeIndex> tpiOrIpiMap =
220	(refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
221	if (ti.toArrayIndex() >= tpiOrIpiMap.size())
222	return false;
223	ti = tpiOrIpiMap [ti.toArrayIndex()];
224	return true;
225	}
226
227	void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
228	ArrayRef<TiReference> typeRefs) {
229	MutableArrayRef<uint8_t> contents = rec.drop_front(N: sizeof(RecordPrefix));
230	for (const TiReference &ref : typeRefs) {
231	unsigned byteSize = ref.Count * sizeof(TypeIndex);
232	if (contents.size() < ref.Offset + byteSize)
233	Fatal(ctx) << "symbol record too short";
234
235	MutableArrayRef<TypeIndex> indices(
236	reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
237	for (TypeIndex &ti : indices) {
238	if (!remapTypeIndex(ti, refKind: ref.Kind)) {
239	if (ctx.config.verbose) {
240	uint16_t kind =
241	reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
242	StringRef fname = file ? file->getName() : "<unknown PDB>";
243	Log(ctx) << "failed to remap type index in record of kind 0x"
244	<< utohexstr(X: kind) << " in " << fname << " with bad "
245	<< (ref.Kind == TiRefKind::IndexRef ? "item" : "type")
246	<< " index 0x" << utohexstr(X: ti.getIndex());
247	}
248	ti = TypeIndex(SimpleTypeKind::NotTranslated);
249	continue;
250	}
251	}
252	}
253	}
254
255	void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
256	// TODO: Handle errors similar to symbols.
257	SmallVector<TiReference, `32`> typeRefs;
258	discoverTypeIndices(Type: CVType (rec), Refs&: typeRefs);
259	remapRecord(rec, typeRefs);
260	}
261
262	bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
263	// Discover type index references in the record. Skip it if we don't
264	// know where they are.
265	SmallVector<TiReference, `32`> typeRefs;
266	if (!discoverTypeIndicesInSymbol(RecordData: rec, Refs&: typeRefs))
267	return false;
268	remapRecord(rec, typeRefs);
269	return true;
270	}
271
272	// A COFF .debug$H section is currently a clang extension. This function checks
273	// if a .debug$H section is in a format that we expect / understand, so that we
274	// can ignore any sections which are coincidentally also named .debug$H but do
275	// not contain a format we recognize.
276	static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
277	if (debugH.size() < sizeof(object::debug_h_header))
278	return false;
279	auto *header =
280	reinterpret_cast<const object::debug_h_header *>(debugH.data());
281	debugH = debugH.drop_front(N: sizeof(object::debug_h_header));
282	return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
283	header->Version == `0` &&
284	header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
285	(debugH.size() % `8` == `0`);
286	}
287
288	static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
289	SectionChunk *sec =
290	SectionChunk::findByName(sections: file->getDebugChunks(), name: ".debug$H");
291	if (!sec)
292	return std::nullopt;
293	ArrayRef<uint8_t> contents = sec->getContents();
294	if (!canUseDebugH(debugH: contents))
295	return std::nullopt;
296	return contents;
297	}
298
299	static ArrayRef<GloballyHashedType>
300	getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
301	assert(canUseDebugH(debugH));
302	debugH = debugH.drop_front(N: sizeof(object::debug_h_header));
303	uint32_t count = debugH.size() / sizeof(GloballyHashedType);
304	return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
305	}
306
307	// Merge .debug$T for a generic object file.
308	Error TpiSource::mergeDebugT(TypeMerger *m) {
309	assert(!ctx.config.debugGHashes &&
310	"use remapTpiWithGHashes when ghash is enabled");
311
312	CVTypeArray types;
313	BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
314	cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
315
316	// When dealing with PCH.OBJ, some indices were already merged.
317	unsigned nbHeadIndices = indexMapStorage.size();
318
319	std::optional<PCHMergerInfo> pchInfo;
320	if (auto err = mergeTypeAndIdRecords(DestIds&: m->idTable, DestTypes&: m->typeTable,
321	SourceToDest&: indexMapStorage, IdsAndTypes: types, PCHInfo&: pchInfo))
322	Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: "
323	<< toString(E: std::move(err));
324	if (pchInfo) {
325	file->pchSignature = pchInfo ->PCHSignature;
326	endPrecompIdx = pchInfo ->EndPrecompIndex;
327	}
328
329	// In an object, there is only one mapping for both types and items.
330	tpiMap = indexMapStorage;
331	ipiMap = indexMapStorage;
332
333	if (ctx.config.showSummary) {
334	nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
335	nbTypeRecordsBytes = reader.getLength();
336	// Count how many times we saw each type record in our input. This
337	// calculation requires a second pass over the type records to classify each
338	// record as a type or index. This is slow, but this code executes when
339	// collecting statistics.
340	m->tpiCounts.resize(N: m->getTypeTable().size());
341	m->ipiCounts.resize(N: m->getIDTable().size());
342	uint32_t srcIdx = nbHeadIndices;
343	for (const CVType &ty : types) {
344	TypeIndex dstIdx = tpiMap [srcIdx++];
345	// Type merging may fail, so a complex source type may become the simple
346	// NotTranslated type, which cannot be used as an array index.
347	if (dstIdx.isSimple())
348	continue;
349	SmallVectorImpl<uint32_t> &counts =
350	isIdRecord(K: ty.kind()) ? m->ipiCounts : m->tpiCounts;
351	++counts [dstIdx.toArrayIndex()];
352	}
353	}
354
355	return Error::success();
356	}
357
358	// Merge types from a type server PDB.
359	Error TypeServerSource::mergeDebugT(TypeMerger *m) {
360	assert(!ctx.config.debugGHashes &&
361	"use remapTpiWithGHashes when ghash is enabled");
362
363	pdb::PDBFile &pdbFile = pdbInputFile->session ->getPDBFile();
364	Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
365	if (auto e = expectedTpi.takeError())
366	Fatal(ctx) << "Type server does not have TPI stream: "
367	<< toString(E: std::move(e));
368	pdb::TpiStream maybeIpi = nullptr*;
369	if (pdbFile.hasPDBIpiStream()) {
370	Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371	if (auto e = expectedIpi.takeError())
372	Fatal(ctx) << "Error getting type server IPI stream: "
373	<< toString(E: std::move(e));
374	maybeIpi = &*expectedIpi;
375	}
376
377	// Merge TPI first, because the IPI stream will reference type indices.
378	if (auto err = mergeTypeRecords(Dest&: m->typeTable, SourceToDest&: indexMapStorage,
379	Types: expectedTpi ->typeArray()))
380	Fatal(ctx) << "codeview::mergeTypeRecords failed: "
381	<< toString(E: std::move(err));
382	tpiMap = indexMapStorage;
383
384	// Merge IPI.
385	if (maybeIpi) {
386	if (auto err = mergeIdRecords(Dest&: m->idTable, Types: tpiMap, SourceToDest&: ipiSrc->indexMapStorage,
387	Ids: maybeIpi->typeArray()))
388	Fatal(ctx) << "codeview::mergeIdRecords failed: "
389	<< toString(E: std::move(err));
390	ipiMap = ipiSrc->indexMapStorage;
391	}
392
393	if (ctx.config.showSummary) {
394	nbTypeRecords = tpiMap.size() + ipiMap.size();
395	nbTypeRecordsBytes =
396	expectedTpi ->typeArray().getUnderlyingStream().getLength() +
397	(maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
398	: `0`);
399
400	// Count how many times we saw each type record in our input. If a
401	// destination type index is present in the source to destination type index
402	// map, that means we saw it once in the input. Add it to our histogram.
403	m->tpiCounts.resize(N: m->getTypeTable().size());
404	m->ipiCounts.resize(N: m->getIDTable().size());
405	for (TypeIndex ti : tpiMap)
406	if (!ti.isSimple())
407	++m->tpiCounts [ti.toArrayIndex()];
408	for (TypeIndex ti : ipiMap)
409	if (!ti.isSimple())
410	++m->ipiCounts [ti.toArrayIndex()];
411	}
412
413	return Error::success();
414	}
415
416	Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
417	const codeview::GUID &tsId = typeServerDependency.getGuid();
418	StringRef tsPath = typeServerDependency.getName();
419
420	TypeServerSource tsSrc = nullptr*;
421	auto it = ctx.typeServerSourceMappings.find(x: tsId);
422	if (it != ctx.typeServerSourceMappings.end()) {
423	tsSrc = (TypeServerSource *)it ->second;
424	}
425	if (tsSrc == nullptr) {
426	// The file failed to load, lookup by name
427	PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, path: tsPath, fromFile: file);
428	if (!pdb)
429	return createFileError(F: tsPath, E: errorCodeToError(EC: std::error_code (
430	ENOENT, std::generic_category())));
431	// If an error occurred during loading, throw it now
432	if (pdb->loadErrorStr)
433	return createFileError(
434	F: tsPath, E: make_error<StringError>(Args&: *pdb->loadErrorStr,
435	Args: llvm::inconvertibleErrorCode()));
436
437	tsSrc = (TypeServerSource *)pdb->debugTypesObj;
438
439	// Just because a file with a matching name was found and it was an actual
440	// PDB file doesn't mean it matches. For it to match the InfoStream's GUID
441	// must match the GUID specified in the TypeServer2 record.
442	if (tsSrc->Guid != tsId) {
443	return createFileError(F: tsPath,
444	E: make_error<pdb::PDBError>(
445	Args: pdb::pdb_error_code::signature_out_of_date));
446	}
447	}
448	return tsSrc;
449	}
450
451	Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
452	Expected<TypeServerSource *> tsSrc = getTypeServerSource();
453	if (!tsSrc)
454	return tsSrc.takeError();
455
456	pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session ->getPDBFile();
457	auto expectedInfo = pdbSession.getPDBInfoStream();
458	if (!expectedInfo)
459	return expectedInfo.takeError();
460
461	// Reuse the type index map of the type server.
462	tpiMap = (*tsSrc)->tpiMap;
463	ipiMap = (*tsSrc)->ipiMap;
464	return Error::success();
465	}
466
467	static bool equalsPath(StringRef path1, StringRef path2) {
468	#if defined(_WIN32)
469	return path1.equals_insensitive(path2);
470	#else
471	return path1 == path2;
472	#endif
473	}
474
475	// Find by name an OBJ provided on the command line
476	PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
477	for (auto kv : ctx.precompSourceMappings) {
478	StringRef currentFileName = sys::path::filename(path: kv.second->file->getName(),
479	style: sys::path::Style::windows);
480
481	// Compare based solely on the file name (link.exe behavior)
482	if (equalsPath(path1: currentFileName, path2: fileNameOnly))
483	return (PrecompSource *)kv.second;
484	}
485	return nullptr;
486	}
487
488	PrecompSource UsePrecompSource::findPrecompSource(ObjFile file,
489	PrecompRecord &pr) {
490	// Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
491	// records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
492	// the paths embedded in the OBJs are in the Windows format.
493	SmallString<`128`> prFileName =
494	sys::path::filename(path: pr.getPrecompFilePath(), style: sys::path::Style::windows);
495
496	auto it = ctx.precompSourceMappings.find(x: pr.getSignature());
497	if (it != ctx.precompSourceMappings.end()) {
498	return (PrecompSource *)it ->second;
499	}
500	// Lookup by name
501	return findObjByName(fileNameOnly: prFileName);
502	}
503
504	Expected<PrecompSource > UsePrecompSource::findPrecompMap(ObjFile file,
505	PrecompRecord &pr) {
506	PrecompSource *precomp = findPrecompSource(file, pr);
507
508	if (!precomp)
509	return createFileError(
510	F: pr.getPrecompFilePath(),
511	E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch));
512
513	// Don't rely on the PCH signature to validate the concordance between the PCH
514	// and the OBJ that uses it. However we do validate here that the
515	// LF_ENDPRECOMP record index lines up with the number of type records
516	// LF_PRECOMP is expecting.
517	if (precomp->endPrecompIdx != pr.getTypesCount())
518	return createFileError(
519	F: toString(file),
520	E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch));
521
522	return precomp;
523	}
524
525	/// Merges a precompiled headers TPI map into the current TPI map. The
526	/// precompiled headers object will also be loaded and remapped in the
527	/// process.
528	Error UsePrecompSource::mergeInPrecompHeaderObj() {
529	auto e = findPrecompMap(file, pr&: precompDependency);
530	if (!e)
531	return e.takeError();
532
533	PrecompSource precompSrc = e;
534	if (precompSrc->tpiMap.empty())
535	return Error::success();
536
537	assert(precompDependency.getStartTypeIndex() ==
538	TypeIndex::FirstNonSimpleIndex);
539	assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
540	// Use the previously remapped index map from the precompiled headers.
541	indexMapStorage.insert(I: indexMapStorage.begin(), From: precompSrc->tpiMap.begin(),
542	To: precompSrc->tpiMap.begin() +
543	precompDependency.getTypesCount());
544
545	return Error::success();
546	}
547
548	Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
549	// This object was compiled with /Yu, so process the corresponding
550	// precompiled headers object (/Yc) first. Some type indices in the current
551	// object are referencing data in the precompiled headers object, so we need
552	// both to be loaded.
553	if (Error e = mergeInPrecompHeaderObj())
554	return e;
555
556	return TpiSource::mergeDebugT(m);
557	}
558
559	Error PrecompSource::mergeDebugT(TypeMerger *m) {
560	// In some cases, the S_OBJNAME record doesn't contain the PCH signature.
561	// The signature comes later with the LF_ENDPRECOMP record, so we first need
562	// to merge in all the .PCH.OBJ file type records, before registering below.
563	if (Error e = TpiSource::mergeDebugT(m))
564	return e;
565
566	registerMapping();
567
568	return Error::success();
569	}
570
571	void PrecompSource::registerMapping() {
572	if (registered)
573	return;
574	if (file->pchSignature && *file->pchSignature) {
575	auto it = ctx.precompSourceMappings.emplace(args&: file->pchSignature, args: this*);
576	if (!it.second)
577	Fatal(ctx)
578	<< "a PCH object with the same signature has already been provided ("
579	<< toString(file: it.first ->second->file) << " and " << toString(file)
580	<< ")";
581	registered = true;
582	}
583	}
584
585	//===----------------------------------------------------------------------===//
586	// Parellel GHash type merging implementation.
587	//===----------------------------------------------------------------------===//
588
589	void TpiSource::loadGHashes() {
590	if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
591	ghashes = getHashesFromDebugH(debugH: *debugH);
592	ownedGHashes = false;
593	} else {
594	CVTypeArray types;
595	BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
596	cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
597	assignGHashesFromVector(hashVec: GloballyHashedType::hashTypes(Records&: types));
598	}
599
600	fillIsItemIndexFromDebugT();
601	}
602
603	// Copies ghashes from a vector into an array. These are long lived, so it's
604	// worth the time to copy these into an appropriately sized vector to reduce
605	// memory usage.
606	void TpiSource::assignGHashesFromVector(
607	std::vector<GloballyHashedType> &&hashVec) {
608	if (hashVec.empty())
609	return;
610	GloballyHashedType hashes = new* GloballyHashedType[hashVec.size()];
611	memcpy(dest: hashes, src: hashVec.data(), n: hashVec.size() * sizeof(GloballyHashedType));
612	ghashes = ArrayRef(hashes, hashVec.size());
613	ownedGHashes = true;
614	}
615
616	// Faster way to iterate type records. forEachTypeChecked is faster than
617	// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
618	static void forEachTypeChecked(ArrayRef<uint8_t> types,
619	function_ref<void(const CVType &)> fn) {
620	checkError(
621	e: forEachCodeViewRecord<CVType>(StreamBuffer: types, F: [fn](const CVType &ty) -> Error {
622	fn (ty);
623	return Error::success();
624	}));
625	}
626
627	// Walk over file->debugTypes and fill in the isItemIndex bit vector.
628	// TODO: Store this information in .debug$H so that we don't have to recompute
629	// it. This is the main bottleneck slowing down parallel ghashing with one
630	// thread over single-threaded ghashing.
631	void TpiSource::fillIsItemIndexFromDebugT() {
632	uint32_t index = `0`;
633	isItemIndex.resize(N: ghashes.size());
634	forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
635	if (isIdRecord(K: ty.kind()))
636	isItemIndex.set(index);
637	++index;
638	});
639	}
640
641	void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
642	// Decide if the merged type goes into TPI or IPI.
643	bool isItem = isIdRecord(K: ty.kind());
644	MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
645
646	// Copy the type into our mutable buffer.
647	assert(ty.length() <= codeview::MaxRecordLength);
648	size_t offset = merged.recs.size();
649	size_t newSize = alignTo(Value: ty.length(), Align: `4`);
650	merged.recs.resize(new_size: offset + newSize);
651	auto newRec = MutableArrayRef(&merged.recs [offset], newSize);
652	memcpy(dest: newRec.data(), src: ty.data().data(), n: newSize);
653
654	// Fix up the record prefix and padding bytes if it required resizing.
655	if (newSize != ty.length()) {
656	reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - `2`;
657	for (size_t i = ty.length(); i < newSize; ++i)
658	newRec [i] = LF_PAD0 + (newSize - i);
659	}
660
661	// Remap the type indices in the new record.
662	remapTypesInTypeRecord(rec: newRec);
663	uint32_t pdbHash = check(e: pdb::hashTypeRecord(Type: CVType (newRec)));
664	merged.recSizes.push_back(x: static_cast<uint16_t>(newSize));
665	merged.recHashes.push_back(x: pdbHash);
666
667	// Retain a mapping from PDB function id to PDB function type. This mapping is
668	// used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
669	// symbols.
670	if (ty.kind() == LF_FUNC_ID \|\| ty.kind() == LF_MFUNC_ID) {
671	bool success = ty.length() >= `12`;
672	TypeIndex funcId = curIndex;
673	if (success)
674	success &= remapTypeIndex(ti&: funcId, refKind: TiRefKind::IndexRef);
675	TypeIndex funcType =
676	*reinterpret_cast<const TypeIndex *>(&newRec.data()[`8`]);
677	if (success) {
678	funcIdToType.push_back(x: {funcId, funcType});
679	} else {
680	StringRef fname = file ? file->getName() : "<unknown PDB>";
681	Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x"
682	<< utohexstr(X: curIndex.getIndex()) << " in " << fname;
683	}
684	}
685	}
686
687	void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
688	TypeIndex beginIndex) {
689	// Re-sort the list of unique types by index.
690	if (kind == PDB)
691	assert(llvm::is_sorted(uniqueTypes));
692	else
693	llvm::sort(C&: uniqueTypes);
694
695	// Accumulate all the unique types into one buffer in mergedTypes.
696	uint32_t ghashIndex = `0`;
697	auto nextUniqueIndex = uniqueTypes.begin();
698	assert(mergedTpi.recs.empty());
699	assert(mergedIpi.recs.empty());
700
701	// Pre-compute the number of elements in advance to avoid std::vector resizes.
702	unsigned nbTpiRecs = `0`;
703	unsigned nbIpiRecs = `0`;
704	forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) {
705	if (nextUniqueIndex != uniqueTypes.end() &&
706	*nextUniqueIndex == ghashIndex) {
707	assert(ty.length() <= codeview::MaxRecordLength);
708	size_t newSize = alignTo(Value: ty.length(), Align: `4`);
709	(isIdRecord(K: ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
710	++nextUniqueIndex;
711	}
712	++ghashIndex;
713	});
714	mergedTpi.recs.reserve(n: nbTpiRecs);
715	mergedIpi.recs.reserve(n: nbIpiRecs);
716
717	// Do the actual type merge.
718	ghashIndex = `0`;
719	nextUniqueIndex = uniqueTypes.begin();
720	forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) {
721	if (nextUniqueIndex != uniqueTypes.end() &&
722	*nextUniqueIndex == ghashIndex) {
723	mergeTypeRecord(curIndex: beginIndex + ghashIndex, ty);
724	++nextUniqueIndex;
725	}
726	++ghashIndex;
727	});
728	assert(nextUniqueIndex == uniqueTypes.end() &&
729	"failed to merge all desired records");
730	assert(uniqueTypes.size() ==
731	mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
732	"missing desired record");
733	}
734
735	void TpiSource::remapTpiWithGHashes(GHashState *g) {
736	assert(ctx.config.debugGHashes && "ghashes must be enabled");
737	fillMapFromGHashes(m: g);
738	tpiMap = indexMapStorage;
739	ipiMap = indexMapStorage;
740	mergeUniqueTypeRecords(typeRecords: file->debugTypes);
741	// TODO: Free all unneeded ghash resources now that we have a full index map.
742
743	if (ctx.config.showSummary) {
744	nbTypeRecords = ghashes.size();
745	nbTypeRecordsBytes = file->debugTypes.size();
746	}
747	}
748
749	// PDBs do not actually store global hashes, so when merging a type server
750	// PDB we have to synthesize global hashes. To do this, we first synthesize
751	// global hashes for the TPI stream, since it is independent, then we
752	// synthesize hashes for the IPI stream, using the hashes for the TPI stream
753	// as inputs.
754	void TypeServerSource::loadGHashes() {
755	// Don't hash twice.
756	if (!ghashes.empty())
757	return;
758	pdb::PDBFile &pdbFile = pdbInputFile->session ->getPDBFile();
759
760	// Hash TPI stream.
761	Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
762	if (auto e = expectedTpi.takeError())
763	Fatal(ctx) << "Type server does not have TPI stream: "
764	<< toString(E: std::move(e));
765	assignGHashesFromVector(
766	hashVec: GloballyHashedType::hashTypes(Records: expectedTpi ->typeArray()));
767	isItemIndex.resize(N: ghashes.size());
768
769	// Hash IPI stream, which depends on TPI ghashes.
770	if (!pdbFile.hasPDBIpiStream())
771	return;
772	Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
773	if (auto e = expectedIpi.takeError())
774	Fatal(ctx) << "error retrieving IPI stream: " << toString(E: std::move(e));
775	ipiSrc->assignGHashesFromVector(
776	hashVec: GloballyHashedType::hashIds(Records: expectedIpi ->typeArray(), TypeHashes: ghashes));
777
778	// The IPI stream isItemIndex bitvector should be all ones.
779	ipiSrc->isItemIndex.resize(N: ipiSrc->ghashes.size());
780	ipiSrc->isItemIndex.set(I: `0`, E: ipiSrc->ghashes.size());
781	}
782
783	// Flatten discontiguous PDB type arrays to bytes so that we can use
784	// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
785	// type servers is faster than iterating all object files compiled with /Z7 with
786	// CVTypeArray, which has high overheads due to the virtual interface of
787	// BinaryStream::readBytes.
788	static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
789	BinaryStreamRef stream = types.getUnderlyingStream();
790	ArrayRef<uint8_t> debugTypes;
791	checkError(e: stream.readBytes(Offset: `0`, Size: stream.getLength(), Buffer&: debugTypes));
792	return debugTypes;
793	}
794
795	// Merge types from a type server PDB.
796	void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
797	assert(ctx.config.debugGHashes && "ghashes must be enabled");
798
799	// IPI merging depends on TPI, so do TPI first, then do IPI. No need to
800	// propagate errors, those should've been handled during ghash loading.
801	pdb::PDBFile &pdbFile = pdbInputFile->session ->getPDBFile();
802	pdb::TpiStream &tpi = check(e: pdbFile.getPDBTpiStream());
803	fillMapFromGHashes(m: g);
804	tpiMap = indexMapStorage;
805	mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: tpi.typeArray()));
806	if (pdbFile.hasPDBIpiStream()) {
807	pdb::TpiStream &ipi = check(e: pdbFile.getPDBIpiStream());
808	ipiSrc->indexMapStorage.resize(N: ipiSrc->ghashes.size());
809	ipiSrc->fillMapFromGHashes(m: g);
810	ipiMap = ipiSrc->indexMapStorage;
811	ipiSrc->tpiMap = tpiMap;
812	ipiSrc->ipiMap = ipiMap;
813	ipiSrc->mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: ipi.typeArray()));
814
815	if (ctx.config.showSummary) {
816	nbTypeRecords = ipiSrc->ghashes.size();
817	nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
818	}
819	}
820
821	if (ctx.config.showSummary) {
822	nbTypeRecords += ghashes.size();
823	nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
824	}
825	}
826
827	void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
828	// No remapping to do with /Zi objects. Simply use the index map from the type
829	// server. Errors should have been reported earlier. Symbols from this object
830	// will be ignored.
831	Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
832	if (!maybeTsSrc) {
833	typeMergingError =
834	joinErrors(E1: std::move(typeMergingError), E2: maybeTsSrc.takeError());
835	return;
836	}
837	TypeServerSource tsSrc = maybeTsSrc;
838	tpiMap = tsSrc->tpiMap;
839	ipiMap = tsSrc->ipiMap;
840	}
841
842	void PrecompSource::loadGHashes() {
843	if (getDebugH(file)) {
844	Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented";
845	}
846
847	uint32_t ghashIdx = `0`;
848	std::vector<GloballyHashedType> hashVec;
849	forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
850	// Remember the index of the LF_ENDPRECOMP record so it can be excluded from
851	// the PDB. There must be an entry in the list of ghashes so that the type
852	// indexes of the following records in the /Yc PCH object line up.
853	if (ty.kind() == LF_ENDPRECOMP) {
854	EndPrecompRecord endPrecomp;
855	cantFail(Err: TypeDeserializer::deserializeAs<EndPrecompRecord>(
856	CVT&: const_cast<CVType &>(ty), Record&: endPrecomp));
857	file->pchSignature = endPrecomp.getSignature();
858	registerMapping();
859	endPrecompIdx = ghashIdx;
860	}
861
862	hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec));
863	isItemIndex.push_back(Val: isIdRecord(K: ty.kind()));
864	++ghashIdx;
865	});
866	assignGHashesFromVector(hashVec: std::move(hashVec));
867	}
868
869	void UsePrecompSource::loadGHashes() {
870	auto e = findPrecompMap(file, pr&: precompDependency);
871	if (!e) {
872	Warn(ctx) << e.takeError();
873	return;
874	}
875
876	PrecompSource pchSrc = e;
877
878	// To compute ghashes of a /Yu object file, we need to build on the ghashes of
879	// the /Yc PCH object. After we are done hashing, discard the ghashes from the
880	// PCH source so we don't unnecessarily try to deduplicate them.
881	std::vector<GloballyHashedType> hashVec =
882	pchSrc->ghashes.take_front(N: precompDependency.getTypesCount());
883	forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
884	hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec));
885	isItemIndex.push_back(Val: isIdRecord(K: ty.kind()));
886	});
887	hashVec.erase(first: hashVec.begin(),
888	last: hashVec.begin() + precompDependency.getTypesCount());
889	assignGHashesFromVector(hashVec: std::move(hashVec));
890	}
891
892	void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
893	fillMapFromGHashes(m: g);
894	// This object was compiled with /Yu, so process the corresponding
895	// precompiled headers object (/Yc) first. Some type indices in the current
896	// object are referencing data in the precompiled headers object, so we need
897	// both to be loaded.
898	if (Error e = mergeInPrecompHeaderObj()) {
899	typeMergingError = joinErrors(E1: std::move(typeMergingError), E2: std::move(e));
900	return;
901	}
902
903	tpiMap = indexMapStorage;
904	ipiMap = indexMapStorage;
905	mergeUniqueTypeRecords(typeRecords: file->debugTypes,
906	beginIndex: TypeIndex (precompDependency.getStartTypeIndex() +
907	precompDependency.getTypesCount()));
908	if (ctx.config.showSummary) {
909	nbTypeRecords = ghashes.size();
910	nbTypeRecordsBytes = file->debugTypes.size();
911	}
912	}
913
914	namespace {
915	/// A concurrent hash table for global type hashing. It is based on this paper:
916	/// Concurrent Hash Tables: Fast and General(?)!
917	/// https://dl.acm.org/doi/10.1145/3309206
918	///
919	/// This hash table is meant to be used in two phases:
920	/// 1. concurrent insertions
921	/// 2. concurrent reads
922	/// It does not support lookup, deletion, or rehashing. It uses linear probing.
923	///
924	/// The paper describes storing a key-value pair in two machine words.
925	/// Generally, the values stored in this map are type indices, and we can use
926	/// those values to recover the ghash key from a side table. This allows us to
927	/// shrink the table entries further at the cost of some loads, and sidesteps
928	/// the need for a 128 bit atomic compare-and-swap operation.
929	///
930	/// During insertion, a priority function is used to decide which insertion
931	/// should be preferred. This ensures that the output is deterministic. For
932	/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
933	///
934	class GHashCell;
935	struct GHashTable {
936	GHashCell table = nullptr*;
937	uint32_t tableSize = `0`;
938
939	GHashTable() = default;
940	~GHashTable();
941
942	/// Initialize the table with the given size. Because the table cannot be
943	/// resized, the initial size of the table must be large enough to contain all
944	/// inputs, or insertion may not be able to find an empty cell.
945	void init(uint32_t newTableSize);
946
947	/// Insert the cell with the given ghash into the table. Return the insertion
948	/// position in the table. It is safe for the caller to store the insertion
949	/// position because the table cannot be resized.
950	uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
951	GHashCell newCell);
952	};
953
954	/// A ghash table cell for deduplicating types from TpiSources.
955	class GHashCell {
956	// Force "data" to be 64-bit aligned; otherwise, some versions of clang
957	// will generate calls to libatomic when using some versions of libstdc++
958	// on 32-bit targets. (Also, in theory, there could be a target where
959	// new[] doesn't always return an 8-byte-aligned allocation.)
960	alignas(sizeof(uint64_t)) uint64_t data = `0`;
961
962	public:
963	GHashCell() = default;
964
965	// Construct data most to least significant so that sorting works well:
966	// - isItem
967	// - tpiSrcIdx
968	// - ghashIdx
969	// Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
970	// non-zero representation.
971	GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
972	: data((uint64_t(isItem) << `63U`) \| (uint64_t(tpiSrcIdx + `1`) << `32ULL`) \|
973	ghashIdx) {
974	assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
975	assert(ghashIdx == getGHashIdx() && "round trip failure");
976	}
977
978	explicit GHashCell(uint64_t data) : data(data) {}
979
980	// The empty cell is all zeros.
981	bool isEmpty() const { return data == `0ULL`; }
982
983	/// Extract the tpiSrcIdx.
984	uint32_t getTpiSrcIdx() const {
985	return ((uint32_t)(data >> `32U`) & `0x7FFFFFFF`) - `1`;
986	}
987
988	/// Extract the index into the ghash array of the TpiSource.
989	uint32_t getGHashIdx() const { return (uint32_t)data; }
990
991	bool isItem() const { return data & (`1ULL` << `63U`); }
992
993	/// Get the ghash key for this cell.
994	GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
995	return ctx.tpiSourceList [getTpiSrcIdx()]->ghashes [getGHashIdx()];
996	}
997
998	/// The priority function for the cell. The data is stored such that lower
999	/// tpiSrcIdx and ghashIdx values are preferred, which means that type record
1000	/// from earlier sources are more likely to prevail.
1001	friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
1002	return l.data < r.data;
1003	}
1004	};
1005	} // namespace
1006
1007	namespace lld::coff {
1008	/// This type is just a wrapper around GHashTable with external linkage so it
1009	/// can be used from a header.
1010	struct GHashState {
1011	GHashTable table;
1012	};
1013	} // namespace lld::coff
1014
1015	GHashTable::~GHashTable() { delete[] table; }
1016
1017	void GHashTable::init(uint32_t newTableSize) {
1018	table = new GHashCell[newTableSize];
1019	memset(s: table, c: `0`, n: newTableSize * sizeof(GHashCell));
1020	tableSize = newTableSize;
1021	}
1022
1023	uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1024	GHashCell newCell) {
1025	assert(!newCell.isEmpty() && "cannot insert empty cell value");
1026
1027	// FIXME: The low bytes of SHA1 have low entropy for short records, which
1028	// type records are. Swap the byte order for better entropy. A better ghash
1029	// won't need this.
1030	uint32_t startIdx =
1031	llvm::byteswap<uint64_t>(V: *reinterpret_cast<uint64_t *>(&ghash)) %
1032	tableSize;
1033
1034	// Do a linear probe starting at startIdx.
1035	uint32_t idx = startIdx;
1036	while (true) {
1037	// Run a compare and swap loop. There are four cases:
1038	// - cell is empty: CAS into place and return
1039	// - cell has matching key, earlier priority: do nothing, return
1040	// - cell has matching key, later priority: CAS into place and return
1041	// - cell has non-matching key: hash collision, probe next cell
1042	auto cellPtr = reinterpret_cast<std::atomic<GHashCell> >(&table[idx]);
1043	GHashCell oldCell(cellPtr->load());
1044	while (oldCell.isEmpty() \|\| oldCell.getGHash(ctx) == ghash) {
1045	// Check if there is an existing ghash entry with a higher priority
1046	// (earlier ordering). If so, this is a duplicate, we are done.
1047	if (!oldCell.isEmpty() && oldCell < newCell)
1048	return idx;
1049	// Either the cell is empty, or our value is higher priority. Try to
1050	// compare and swap. If it succeeds, we are done.
1051	if (cellPtr->compare_exchange_weak(e&: oldCell, i: newCell))
1052	return idx;
1053	// If the CAS failed, check this cell again.
1054	}
1055
1056	// Advance the probe. Wrap around to the beginning if we run off the end.
1057	++idx;
1058	idx = idx == tableSize ? `0` : idx;
1059	if (idx == startIdx) {
1060	// If this becomes an issue, we could mark failure and rehash from the
1061	// beginning with a bigger table. There is no difference between rehashing
1062	// internally and starting over.
1063	report_fatal_error(reason: "ghash table is full");
1064	}
1065	}
1066	llvm_unreachable("left infloop");
1067	}
1068
1069	TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1070	: typeTable (alloc), idTable (alloc), ctx(c) {}
1071
1072	TypeMerger::~TypeMerger() = default;
1073
1074	void TypeMerger::mergeTypesWithGHash() {
1075	// Load ghashes. Do type servers and PCH objects first.
1076	{
1077	llvm::TimeTraceScope timeScope("Load GHASHes");
1078	ScopedTimer t1(ctx.loadGHashTimer);
1079	parallelForEach(R&: dependencySources,
1080	Fn: [&](TpiSource *source) { source->loadGHashes(); });
1081	parallelForEach(R&: objectSources,
1082	Fn: [&](TpiSource *source) { source->loadGHashes(); });
1083	}
1084
1085	llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1086	ScopedTimer t2(ctx.mergeGHashTimer);
1087	GHashState ghashState;
1088
1089	// Estimate the size of hash table needed to deduplicate ghashes. This must
1090	// be larger than the number of unique types, or hash table insertion may not
1091	// be able to find a vacant slot. Summing the input types guarantees this, but
1092	// it is a gross overestimate. The table size could be reduced to save memory,
1093	// but it would require implementing rehashing, and this table is generally
1094	// small compared to total memory usage, at eight bytes per input type record,
1095	// and most input type records are larger than eight bytes.
1096	size_t tableSize = `0`;
1097	for (TpiSource *source : ctx.tpiSourceList)
1098	tableSize += source->ghashes.size();
1099
1100	// Cap the table size so that we can use 32-bit cell indices. Type indices are
1101	// also 32-bit, so this is an inherent PDB file format limit anyway.
1102	tableSize =
1103	std::min(a: size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, b: tableSize);
1104	ghashState.table.init(newTableSize: static_cast<uint32_t>(tableSize));
1105
1106	// Insert ghashes in parallel. During concurrent insertion, we cannot observe
1107	// the contents of the hash table cell, but we can remember the insertion
1108	// position. Because the table does not rehash, the position will not change
1109	// under insertion. After insertion is done, the value of the cell can be read
1110	// to retrieve the final PDB type index.
1111	parallelFor(Begin: `0`, End: ctx.tpiSourceList.size(), Fn: [&](size_t tpiSrcIdx) {
1112	TpiSource *source = ctx.tpiSourceList [tpiSrcIdx];
1113	source->indexMapStorage.resize(N: source->ghashes.size());
1114	for (uint32_t i = `0`, e = source->ghashes.size(); i < e; i++) {
1115	if (source->shouldOmitFromPdb(ghashIdx: i)) {
1116	source->indexMapStorage [i] = TypeIndex(SimpleTypeKind::NotTranslated);
1117	continue;
1118	}
1119	GloballyHashedType ghash = source->ghashes [i];
1120	bool isItem = source->isItemIndex.test(Idx: i);
1121	uint32_t cellIdx =
1122	ghashState.table.insert(ctx, ghash, newCell: GHashCell (isItem, tpiSrcIdx, i));
1123
1124	// Store the ghash cell index as a type index in indexMapStorage. Later
1125	// we will replace it with the PDB type index.
1126	source->indexMapStorage [i] = TypeIndex::fromArrayIndex(Index: cellIdx);
1127	}
1128	});
1129
1130	// Collect all non-empty cells and sort them. This will implicitly assign
1131	// destination type indices, and partition the entries into type records and
1132	// item records. It arranges types in this order:
1133	// - type records
1134	// - source 0, type 0...
1135	// - source 1, type 1...
1136	// - item records
1137	// - source 0, type 1...
1138	// - source 1, type 0...
1139	std::vector<GHashCell> entries;
1140	for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1141	if (!cell.isEmpty())
1142	entries.push_back(x: cell);
1143	}
1144	parallelSort(R&: entries, Comp: std::less<GHashCell>());
1145	Log(ctx) << formatv(
1146	Fmt: "ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1147	Vals: tableSize ? double(entries.size()) / tableSize : `0`, Vals: entries.size(),
1148	Vals&: tableSize);
1149
1150	// Find out how many type and item indices there are.
1151	auto mid = llvm::lower_bound(Range&: entries, Value: GHashCell (true, `0`, `0`));
1152	assert((mid == entries.end() \|\| mid ->isItem()) &&
1153	(mid == entries.begin() \|\| !std::prev(mid)->isItem()) &&
1154	"midpoint is not midpoint");
1155	uint32_t numTypes = std::distance(first: entries.begin(), last: mid);
1156	uint32_t numItems = std::distance(first: mid, last: entries.end());
1157	Log(ctx) << "Tpi record count: " << numTypes;
1158	Log(ctx) << "Ipi record count: " << numItems;
1159
1160	// Make a list of the "unique" type records to merge for each tpi source. Type
1161	// merging will skip indices not on this list. Store the destination PDB type
1162	// index for these unique types in the tpiMap for each source. The entries for
1163	// non-unique types will be filled in prior to type merging.
1164	for (uint32_t i = `0`, e = entries.size(); i < e; ++i) {
1165	auto &cell = entries [i];
1166	uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1167	TpiSource *source = ctx.tpiSourceList [tpiSrcIdx];
1168	source->uniqueTypes.push_back(x: cell.getGHashIdx());
1169
1170	// Update the ghash table to store the destination PDB type index in the
1171	// table.
1172	uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1173	uint32_t ghashCellIndex =
1174	source->indexMapStorage [cell.getGHashIdx()].toArrayIndex();
1175	ghashState.table.table[ghashCellIndex] =
1176	GHashCell (cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1177	}
1178
1179	// In parallel, remap all types.
1180	for (TpiSource *source : dependencySources)
1181	source->remapTpiWithGHashes(g: &ghashState);
1182	parallelForEach(R&: objectSources, Fn: [&](TpiSource *source) {
1183	source->remapTpiWithGHashes(g: &ghashState);
1184	});
1185
1186	// Build a global map of from function ID to function type.
1187	for (TpiSource *source : ctx.tpiSourceList) {
1188	funcIdToType.insert_range(R&: source->funcIdToType);
1189	source->funcIdToType.clear();
1190	}
1191
1192	clearGHashes();
1193	}
1194
1195	void TypeMerger::sortDependencies() {
1196	// Order dependencies first, but preserve the existing order.
1197	std::vector<TpiSource *> deps;
1198	std::vector<TpiSource *> objs;
1199	for (TpiSource *s : ctx.tpiSourceList)
1200	(s->isDependency() ? deps : objs).push_back(x: s);
1201	uint32_t numDeps = deps.size();
1202	uint32_t numObjs = objs.size();
1203	ctx.tpiSourceList = std::move(deps);
1204	ctx.tpiSourceList.insert(position: ctx.tpiSourceList.end(), first: objs.begin(), last: objs.end());
1205	for (uint32_t i = `0`, e = ctx.tpiSourceList.size(); i < e; ++i)
1206	ctx.tpiSourceList [i]->tpiSrcIdx = i;
1207	dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1208	objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1209	}
1210
1211	/// Given the index into the ghash table for a particular type, return the type
1212	/// index for that type in the output PDB.
1213	static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1214	uint32_t ghashCellIdx) {
1215	GHashCell cell = g->table.table[ghashCellIdx];
1216	return TypeIndex::fromArrayIndex(Index: cell.getGHashIdx());
1217	}
1218
1219	/// Free heap allocated ghashes.
1220	void TypeMerger::clearGHashes() {
1221	for (TpiSource *src : ctx.tpiSourceList) {
1222	if (src->ownedGHashes)
1223	delete[] src->ghashes.data();
1224	src->ghashes = {};
1225	src->isItemIndex.clear();
1226	src->uniqueTypes.clear();
1227	}
1228	}
1229
1230	// Fill in a TPI or IPI index map using ghashes. For each source type, use its
1231	// ghash to lookup its final type index in the PDB, and store that in the map.
1232	void TpiSource::fillMapFromGHashes(GHashState *g) {
1233	for (size_t i = `0`, e = ghashes.size(); i < e; ++i) {
1234	TypeIndex fakeCellIndex = indexMapStorage [i];
1235	if (fakeCellIndex.isSimple())
1236	indexMapStorage [i] = fakeCellIndex;
1237	else
1238	indexMapStorage [i] =
1239	loadPdbTypeIndexFromCell(g, ghashCellIdx: fakeCellIndex.toArrayIndex());
1240	}
1241	}
1242

source code of lld/COFF/DebugTypes.cpp