1//===- DebugTypes.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "DebugTypes.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "InputFiles.h"
13#include "PDB.h"
14#include "TypeMerger.h"
15#include "lld/Common/ErrorHandler.h"
16#include "lld/Common/Memory.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19#include "llvm/DebugInfo/CodeView/TypeRecord.h"
20#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22#include "llvm/DebugInfo/PDB/GenericError.h"
23#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28#include "llvm/Support/FormatVariadic.h"
29#include "llvm/Support/Parallel.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32
33using namespace llvm;
34using namespace llvm::codeview;
35using namespace lld;
36using namespace lld::coff;
37
38namespace {
39class TypeServerIpiSource;
40
41// The TypeServerSource class represents a PDB type server, a file referenced by
42// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
43// files, therefore there must be only once instance per OBJ lot. The file path
44// is discovered from the dependent OBJ's debug type stream. The
45// TypeServerSource object is then queued and loaded by the COFF Driver. The
46// debug type stream for such PDB files will be merged first in the final PDB,
47// before any dependent OBJ.
48class TypeServerSource : public TpiSource {
49public:
50 explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
51 : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
52 if (f->loadErrorStr)
53 return;
54 pdb::PDBFile &file = f->session->getPDBFile();
55 auto expectedInfo = file.getPDBInfoStream();
56 if (!expectedInfo)
57 return;
58 Guid = expectedInfo->getGuid();
59 auto it = ctx.typeServerSourceMappings.emplace(args&: Guid, args: this);
60 if (!it.second) {
61 // If we hit here we have collision on Guid's in two PDB files.
62 // This can happen if the PDB Guid is invalid or if we are really
63 // unlucky. This should fall back on stright file-system lookup.
64 it.first->second = nullptr;
65 }
66 }
67
68 Error mergeDebugT(TypeMerger *m) override;
69
70 void loadGHashes() override;
71 void remapTpiWithGHashes(GHashState *g) override;
72
73 bool isDependency() const override { return true; }
74
75 PDBInputFile *pdbInputFile = nullptr;
76
77 // TpiSource for IPI stream.
78 TypeServerIpiSource *ipiSrc = nullptr;
79
80 // The PDB signature GUID.
81 codeview::GUID Guid;
82};
83
84// Companion to TypeServerSource. Stores the index map for the IPI stream in the
85// PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
86// invariant of one type index space per source.
87class TypeServerIpiSource : public TpiSource {
88public:
89 explicit TypeServerIpiSource(COFFLinkerContext &ctx)
90 : TpiSource(ctx, PDBIpi, nullptr) {}
91
92 friend class TypeServerSource;
93
94 // All of the TpiSource methods are no-ops. The parent TypeServerSource
95 // handles both TPI and IPI.
96 Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
97 void loadGHashes() override {}
98 void remapTpiWithGHashes(GHashState *g) override {}
99 bool isDependency() const override { return true; }
100};
101
102// This class represents the debug type stream of an OBJ file that depends on a
103// PDB type server (see TypeServerSource).
104class UseTypeServerSource : public TpiSource {
105 Expected<TypeServerSource *> getTypeServerSource();
106
107public:
108 UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
109 : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
110
111 Error mergeDebugT(TypeMerger *m) override;
112
113 // No need to load ghashes from /Zi objects.
114 void loadGHashes() override {}
115 void remapTpiWithGHashes(GHashState *g) override;
116
117 // Information about the PDB type server dependency, that needs to be loaded
118 // in before merging this OBJ.
119 TypeServer2Record typeServerDependency;
120};
121
122// This class represents the debug type stream of a Microsoft precompiled
123// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
124// PDB, before any other OBJs that depend on this. Note that only MSVC generate
125// such files, clang does not.
126class PrecompSource : public TpiSource {
127public:
128 PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
129 // If the S_OBJNAME record contains the PCH signature, we'll register this
130 // source file right away.
131 registerMapping();
132 }
133
134 Error mergeDebugT(TypeMerger *m) override;
135
136 void loadGHashes() override;
137
138 bool isDependency() const override { return true; }
139
140private:
141 void registerMapping();
142
143 // Whether this precomp OBJ was recorded in the precompSourceMappings map.
144 // Only happens if the file->pchSignature is valid.
145 bool registered = false;
146};
147
148// This class represents the debug type stream of an OBJ file that depends on a
149// Microsoft precompiled headers OBJ (see PrecompSource).
150class UsePrecompSource : public TpiSource {
151public:
152 UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
153 : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
154
155 Error mergeDebugT(TypeMerger *m) override;
156
157 void loadGHashes() override;
158 void remapTpiWithGHashes(GHashState *g) override;
159
160private:
161 Error mergeInPrecompHeaderObj();
162
163 PrecompSource *findObjByName(StringRef fileNameOnly);
164 PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
165 Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
166
167public:
168 // Information about the Precomp OBJ dependency, that needs to be loaded in
169 // before merging this OBJ.
170 PrecompRecord precompDependency;
171};
172} // namespace
173
174TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
175 : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
176 ctx.addTpiSource(tpi: this);
177}
178
179// Vtable key method.
180TpiSource::~TpiSource() {
181 // Silence any assertions about unchecked errors.
182 consumeError(Err: std::move(typeMergingError));
183}
184
185TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
186 return make<TpiSource>(args&: ctx, args: TpiSource::Regular, args&: file);
187}
188
189TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
190 PDBInputFile *pdbInputFile) {
191 // Type server sources come in pairs: the TPI stream, and the IPI stream.
192 auto *tpiSource = make<TypeServerSource>(args&: ctx, args&: pdbInputFile);
193 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
194 tpiSource->ipiSrc = make<TypeServerIpiSource>(args&: ctx);
195 return tpiSource;
196}
197
198TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
199 ObjFile *file,
200 TypeServer2Record ts) {
201 return make<UseTypeServerSource>(args&: ctx, args&: file, args&: ts);
202}
203
204TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
205 return make<PrecompSource>(args&: ctx, args&: file);
206}
207
208TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
209 ObjFile *file,
210 PrecompRecord precomp) {
211 return make<UsePrecompSource>(args&: ctx, args&: file, args&: precomp);
212}
213
214bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
215 if (ti.isSimple())
216 return true;
217
218 // This can be an item index or a type index. Choose the appropriate map.
219 ArrayRef<TypeIndex> tpiOrIpiMap =
220 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
221 if (ti.toArrayIndex() >= tpiOrIpiMap.size())
222 return false;
223 ti = tpiOrIpiMap[ti.toArrayIndex()];
224 return true;
225}
226
227void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
228 ArrayRef<TiReference> typeRefs) {
229 MutableArrayRef<uint8_t> contents = rec.drop_front(N: sizeof(RecordPrefix));
230 for (const TiReference &ref : typeRefs) {
231 unsigned byteSize = ref.Count * sizeof(TypeIndex);
232 if (contents.size() < ref.Offset + byteSize)
233 Fatal(ctx) << "symbol record too short";
234
235 MutableArrayRef<TypeIndex> indices(
236 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
237 for (TypeIndex &ti : indices) {
238 if (!remapTypeIndex(ti, refKind: ref.Kind)) {
239 if (ctx.config.verbose) {
240 uint16_t kind =
241 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
242 StringRef fname = file ? file->getName() : "<unknown PDB>";
243 Log(ctx) << "failed to remap type index in record of kind 0x"
244 << utohexstr(X: kind) << " in " << fname << " with bad "
245 << (ref.Kind == TiRefKind::IndexRef ? "item" : "type")
246 << " index 0x" << utohexstr(X: ti.getIndex());
247 }
248 ti = TypeIndex(SimpleTypeKind::NotTranslated);
249 continue;
250 }
251 }
252 }
253}
254
255void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
256 // TODO: Handle errors similar to symbols.
257 SmallVector<TiReference, 32> typeRefs;
258 discoverTypeIndices(Type: CVType(rec), Refs&: typeRefs);
259 remapRecord(rec, typeRefs);
260}
261
262bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
263 // Discover type index references in the record. Skip it if we don't
264 // know where they are.
265 SmallVector<TiReference, 32> typeRefs;
266 if (!discoverTypeIndicesInSymbol(RecordData: rec, Refs&: typeRefs))
267 return false;
268 remapRecord(rec, typeRefs);
269 return true;
270}
271
272// A COFF .debug$H section is currently a clang extension. This function checks
273// if a .debug$H section is in a format that we expect / understand, so that we
274// can ignore any sections which are coincidentally also named .debug$H but do
275// not contain a format we recognize.
276static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
277 if (debugH.size() < sizeof(object::debug_h_header))
278 return false;
279 auto *header =
280 reinterpret_cast<const object::debug_h_header *>(debugH.data());
281 debugH = debugH.drop_front(N: sizeof(object::debug_h_header));
282 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
283 header->Version == 0 &&
284 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
285 (debugH.size() % 8 == 0);
286}
287
288static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
289 SectionChunk *sec =
290 SectionChunk::findByName(sections: file->getDebugChunks(), name: ".debug$H");
291 if (!sec)
292 return std::nullopt;
293 ArrayRef<uint8_t> contents = sec->getContents();
294 if (!canUseDebugH(debugH: contents))
295 return std::nullopt;
296 return contents;
297}
298
299static ArrayRef<GloballyHashedType>
300getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
301 assert(canUseDebugH(debugH));
302 debugH = debugH.drop_front(N: sizeof(object::debug_h_header));
303 uint32_t count = debugH.size() / sizeof(GloballyHashedType);
304 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
305}
306
307// Merge .debug$T for a generic object file.
308Error TpiSource::mergeDebugT(TypeMerger *m) {
309 assert(!ctx.config.debugGHashes &&
310 "use remapTpiWithGHashes when ghash is enabled");
311
312 CVTypeArray types;
313 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
314 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
315
316 // When dealing with PCH.OBJ, some indices were already merged.
317 unsigned nbHeadIndices = indexMapStorage.size();
318
319 std::optional<PCHMergerInfo> pchInfo;
320 if (auto err = mergeTypeAndIdRecords(DestIds&: m->idTable, DestTypes&: m->typeTable,
321 SourceToDest&: indexMapStorage, IdsAndTypes: types, PCHInfo&: pchInfo))
322 Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: "
323 << toString(E: std::move(err));
324 if (pchInfo) {
325 file->pchSignature = pchInfo->PCHSignature;
326 endPrecompIdx = pchInfo->EndPrecompIndex;
327 }
328
329 // In an object, there is only one mapping for both types and items.
330 tpiMap = indexMapStorage;
331 ipiMap = indexMapStorage;
332
333 if (ctx.config.showSummary) {
334 nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
335 nbTypeRecordsBytes = reader.getLength();
336 // Count how many times we saw each type record in our input. This
337 // calculation requires a second pass over the type records to classify each
338 // record as a type or index. This is slow, but this code executes when
339 // collecting statistics.
340 m->tpiCounts.resize(N: m->getTypeTable().size());
341 m->ipiCounts.resize(N: m->getIDTable().size());
342 uint32_t srcIdx = nbHeadIndices;
343 for (const CVType &ty : types) {
344 TypeIndex dstIdx = tpiMap[srcIdx++];
345 // Type merging may fail, so a complex source type may become the simple
346 // NotTranslated type, which cannot be used as an array index.
347 if (dstIdx.isSimple())
348 continue;
349 SmallVectorImpl<uint32_t> &counts =
350 isIdRecord(K: ty.kind()) ? m->ipiCounts : m->tpiCounts;
351 ++counts[dstIdx.toArrayIndex()];
352 }
353 }
354
355 return Error::success();
356}
357
358// Merge types from a type server PDB.
359Error TypeServerSource::mergeDebugT(TypeMerger *m) {
360 assert(!ctx.config.debugGHashes &&
361 "use remapTpiWithGHashes when ghash is enabled");
362
363 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
364 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
365 if (auto e = expectedTpi.takeError())
366 Fatal(ctx) << "Type server does not have TPI stream: "
367 << toString(E: std::move(e));
368 pdb::TpiStream *maybeIpi = nullptr;
369 if (pdbFile.hasPDBIpiStream()) {
370 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371 if (auto e = expectedIpi.takeError())
372 Fatal(ctx) << "Error getting type server IPI stream: "
373 << toString(E: std::move(e));
374 maybeIpi = &*expectedIpi;
375 }
376
377 // Merge TPI first, because the IPI stream will reference type indices.
378 if (auto err = mergeTypeRecords(Dest&: m->typeTable, SourceToDest&: indexMapStorage,
379 Types: expectedTpi->typeArray()))
380 Fatal(ctx) << "codeview::mergeTypeRecords failed: "
381 << toString(E: std::move(err));
382 tpiMap = indexMapStorage;
383
384 // Merge IPI.
385 if (maybeIpi) {
386 if (auto err = mergeIdRecords(Dest&: m->idTable, Types: tpiMap, SourceToDest&: ipiSrc->indexMapStorage,
387 Ids: maybeIpi->typeArray()))
388 Fatal(ctx) << "codeview::mergeIdRecords failed: "
389 << toString(E: std::move(err));
390 ipiMap = ipiSrc->indexMapStorage;
391 }
392
393 if (ctx.config.showSummary) {
394 nbTypeRecords = tpiMap.size() + ipiMap.size();
395 nbTypeRecordsBytes =
396 expectedTpi->typeArray().getUnderlyingStream().getLength() +
397 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
398 : 0);
399
400 // Count how many times we saw each type record in our input. If a
401 // destination type index is present in the source to destination type index
402 // map, that means we saw it once in the input. Add it to our histogram.
403 m->tpiCounts.resize(N: m->getTypeTable().size());
404 m->ipiCounts.resize(N: m->getIDTable().size());
405 for (TypeIndex ti : tpiMap)
406 if (!ti.isSimple())
407 ++m->tpiCounts[ti.toArrayIndex()];
408 for (TypeIndex ti : ipiMap)
409 if (!ti.isSimple())
410 ++m->ipiCounts[ti.toArrayIndex()];
411 }
412
413 return Error::success();
414}
415
416Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
417 const codeview::GUID &tsId = typeServerDependency.getGuid();
418 StringRef tsPath = typeServerDependency.getName();
419
420 TypeServerSource *tsSrc = nullptr;
421 auto it = ctx.typeServerSourceMappings.find(x: tsId);
422 if (it != ctx.typeServerSourceMappings.end()) {
423 tsSrc = (TypeServerSource *)it->second;
424 }
425 if (tsSrc == nullptr) {
426 // The file failed to load, lookup by name
427 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, path: tsPath, fromFile: file);
428 if (!pdb)
429 return createFileError(F: tsPath, E: errorCodeToError(EC: std::error_code(
430 ENOENT, std::generic_category())));
431 // If an error occurred during loading, throw it now
432 if (pdb->loadErrorStr)
433 return createFileError(
434 F: tsPath, E: make_error<StringError>(Args&: *pdb->loadErrorStr,
435 Args: llvm::inconvertibleErrorCode()));
436
437 tsSrc = (TypeServerSource *)pdb->debugTypesObj;
438
439 // Just because a file with a matching name was found and it was an actual
440 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
441 // must match the GUID specified in the TypeServer2 record.
442 if (tsSrc->Guid != tsId) {
443 return createFileError(F: tsPath,
444 E: make_error<pdb::PDBError>(
445 Args: pdb::pdb_error_code::signature_out_of_date));
446 }
447 }
448 return tsSrc;
449}
450
451Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
452 Expected<TypeServerSource *> tsSrc = getTypeServerSource();
453 if (!tsSrc)
454 return tsSrc.takeError();
455
456 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
457 auto expectedInfo = pdbSession.getPDBInfoStream();
458 if (!expectedInfo)
459 return expectedInfo.takeError();
460
461 // Reuse the type index map of the type server.
462 tpiMap = (*tsSrc)->tpiMap;
463 ipiMap = (*tsSrc)->ipiMap;
464 return Error::success();
465}
466
467static bool equalsPath(StringRef path1, StringRef path2) {
468#if defined(_WIN32)
469 return path1.equals_insensitive(path2);
470#else
471 return path1 == path2;
472#endif
473}
474
475// Find by name an OBJ provided on the command line
476PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
477 for (auto kv : ctx.precompSourceMappings) {
478 StringRef currentFileName = sys::path::filename(path: kv.second->file->getName(),
479 style: sys::path::Style::windows);
480
481 // Compare based solely on the file name (link.exe behavior)
482 if (equalsPath(path1: currentFileName, path2: fileNameOnly))
483 return (PrecompSource *)kv.second;
484 }
485 return nullptr;
486}
487
488PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
489 PrecompRecord &pr) {
490 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
491 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
492 // the paths embedded in the OBJs are in the Windows format.
493 SmallString<128> prFileName =
494 sys::path::filename(path: pr.getPrecompFilePath(), style: sys::path::Style::windows);
495
496 auto it = ctx.precompSourceMappings.find(x: pr.getSignature());
497 if (it != ctx.precompSourceMappings.end()) {
498 return (PrecompSource *)it->second;
499 }
500 // Lookup by name
501 return findObjByName(fileNameOnly: prFileName);
502}
503
504Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
505 PrecompRecord &pr) {
506 PrecompSource *precomp = findPrecompSource(file, pr);
507
508 if (!precomp)
509 return createFileError(
510 F: pr.getPrecompFilePath(),
511 E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch));
512
513 // Don't rely on the PCH signature to validate the concordance between the PCH
514 // and the OBJ that uses it. However we do validate here that the
515 // LF_ENDPRECOMP record index lines up with the number of type records
516 // LF_PRECOMP is expecting.
517 if (precomp->endPrecompIdx != pr.getTypesCount())
518 return createFileError(
519 F: toString(file),
520 E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch));
521
522 return precomp;
523}
524
525/// Merges a precompiled headers TPI map into the current TPI map. The
526/// precompiled headers object will also be loaded and remapped in the
527/// process.
528Error UsePrecompSource::mergeInPrecompHeaderObj() {
529 auto e = findPrecompMap(file, pr&: precompDependency);
530 if (!e)
531 return e.takeError();
532
533 PrecompSource *precompSrc = *e;
534 if (precompSrc->tpiMap.empty())
535 return Error::success();
536
537 assert(precompDependency.getStartTypeIndex() ==
538 TypeIndex::FirstNonSimpleIndex);
539 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
540 // Use the previously remapped index map from the precompiled headers.
541 indexMapStorage.insert(I: indexMapStorage.begin(), From: precompSrc->tpiMap.begin(),
542 To: precompSrc->tpiMap.begin() +
543 precompDependency.getTypesCount());
544
545 return Error::success();
546}
547
548Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
549 // This object was compiled with /Yu, so process the corresponding
550 // precompiled headers object (/Yc) first. Some type indices in the current
551 // object are referencing data in the precompiled headers object, so we need
552 // both to be loaded.
553 if (Error e = mergeInPrecompHeaderObj())
554 return e;
555
556 return TpiSource::mergeDebugT(m);
557}
558
559Error PrecompSource::mergeDebugT(TypeMerger *m) {
560 // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
561 // The signature comes later with the LF_ENDPRECOMP record, so we first need
562 // to merge in all the .PCH.OBJ file type records, before registering below.
563 if (Error e = TpiSource::mergeDebugT(m))
564 return e;
565
566 registerMapping();
567
568 return Error::success();
569}
570
571void PrecompSource::registerMapping() {
572 if (registered)
573 return;
574 if (file->pchSignature && *file->pchSignature) {
575 auto it = ctx.precompSourceMappings.emplace(args&: *file->pchSignature, args: this);
576 if (!it.second)
577 Fatal(ctx)
578 << "a PCH object with the same signature has already been provided ("
579 << toString(file: it.first->second->file) << " and " << toString(file)
580 << ")";
581 registered = true;
582 }
583}
584
585//===----------------------------------------------------------------------===//
586// Parellel GHash type merging implementation.
587//===----------------------------------------------------------------------===//
588
589void TpiSource::loadGHashes() {
590 if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
591 ghashes = getHashesFromDebugH(debugH: *debugH);
592 ownedGHashes = false;
593 } else {
594 CVTypeArray types;
595 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
596 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
597 assignGHashesFromVector(hashVec: GloballyHashedType::hashTypes(Records&: types));
598 }
599
600 fillIsItemIndexFromDebugT();
601}
602
603// Copies ghashes from a vector into an array. These are long lived, so it's
604// worth the time to copy these into an appropriately sized vector to reduce
605// memory usage.
606void TpiSource::assignGHashesFromVector(
607 std::vector<GloballyHashedType> &&hashVec) {
608 if (hashVec.empty())
609 return;
610 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
611 memcpy(dest: hashes, src: hashVec.data(), n: hashVec.size() * sizeof(GloballyHashedType));
612 ghashes = ArrayRef(hashes, hashVec.size());
613 ownedGHashes = true;
614}
615
616// Faster way to iterate type records. forEachTypeChecked is faster than
617// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
618static void forEachTypeChecked(ArrayRef<uint8_t> types,
619 function_ref<void(const CVType &)> fn) {
620 checkError(
621 e: forEachCodeViewRecord<CVType>(StreamBuffer: types, F: [fn](const CVType &ty) -> Error {
622 fn(ty);
623 return Error::success();
624 }));
625}
626
627// Walk over file->debugTypes and fill in the isItemIndex bit vector.
628// TODO: Store this information in .debug$H so that we don't have to recompute
629// it. This is the main bottleneck slowing down parallel ghashing with one
630// thread over single-threaded ghashing.
631void TpiSource::fillIsItemIndexFromDebugT() {
632 uint32_t index = 0;
633 isItemIndex.resize(N: ghashes.size());
634 forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
635 if (isIdRecord(K: ty.kind()))
636 isItemIndex.set(index);
637 ++index;
638 });
639}
640
641void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
642 // Decide if the merged type goes into TPI or IPI.
643 bool isItem = isIdRecord(K: ty.kind());
644 MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
645
646 // Copy the type into our mutable buffer.
647 assert(ty.length() <= codeview::MaxRecordLength);
648 size_t offset = merged.recs.size();
649 size_t newSize = alignTo(Value: ty.length(), Align: 4);
650 merged.recs.resize(new_size: offset + newSize);
651 auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
652 memcpy(dest: newRec.data(), src: ty.data().data(), n: newSize);
653
654 // Fix up the record prefix and padding bytes if it required resizing.
655 if (newSize != ty.length()) {
656 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
657 for (size_t i = ty.length(); i < newSize; ++i)
658 newRec[i] = LF_PAD0 + (newSize - i);
659 }
660
661 // Remap the type indices in the new record.
662 remapTypesInTypeRecord(rec: newRec);
663 uint32_t pdbHash = check(e: pdb::hashTypeRecord(Type: CVType(newRec)));
664 merged.recSizes.push_back(x: static_cast<uint16_t>(newSize));
665 merged.recHashes.push_back(x: pdbHash);
666
667 // Retain a mapping from PDB function id to PDB function type. This mapping is
668 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
669 // symbols.
670 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
671 bool success = ty.length() >= 12;
672 TypeIndex funcId = curIndex;
673 if (success)
674 success &= remapTypeIndex(ti&: funcId, refKind: TiRefKind::IndexRef);
675 TypeIndex funcType =
676 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
677 if (success) {
678 funcIdToType.push_back(x: {funcId, funcType});
679 } else {
680 StringRef fname = file ? file->getName() : "<unknown PDB>";
681 Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x"
682 << utohexstr(X: curIndex.getIndex()) << " in " << fname;
683 }
684 }
685}
686
687void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
688 TypeIndex beginIndex) {
689 // Re-sort the list of unique types by index.
690 if (kind == PDB)
691 assert(llvm::is_sorted(uniqueTypes));
692 else
693 llvm::sort(C&: uniqueTypes);
694
695 // Accumulate all the unique types into one buffer in mergedTypes.
696 uint32_t ghashIndex = 0;
697 auto nextUniqueIndex = uniqueTypes.begin();
698 assert(mergedTpi.recs.empty());
699 assert(mergedIpi.recs.empty());
700
701 // Pre-compute the number of elements in advance to avoid std::vector resizes.
702 unsigned nbTpiRecs = 0;
703 unsigned nbIpiRecs = 0;
704 forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) {
705 if (nextUniqueIndex != uniqueTypes.end() &&
706 *nextUniqueIndex == ghashIndex) {
707 assert(ty.length() <= codeview::MaxRecordLength);
708 size_t newSize = alignTo(Value: ty.length(), Align: 4);
709 (isIdRecord(K: ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
710 ++nextUniqueIndex;
711 }
712 ++ghashIndex;
713 });
714 mergedTpi.recs.reserve(n: nbTpiRecs);
715 mergedIpi.recs.reserve(n: nbIpiRecs);
716
717 // Do the actual type merge.
718 ghashIndex = 0;
719 nextUniqueIndex = uniqueTypes.begin();
720 forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) {
721 if (nextUniqueIndex != uniqueTypes.end() &&
722 *nextUniqueIndex == ghashIndex) {
723 mergeTypeRecord(curIndex: beginIndex + ghashIndex, ty);
724 ++nextUniqueIndex;
725 }
726 ++ghashIndex;
727 });
728 assert(nextUniqueIndex == uniqueTypes.end() &&
729 "failed to merge all desired records");
730 assert(uniqueTypes.size() ==
731 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
732 "missing desired record");
733}
734
735void TpiSource::remapTpiWithGHashes(GHashState *g) {
736 assert(ctx.config.debugGHashes && "ghashes must be enabled");
737 fillMapFromGHashes(m: g);
738 tpiMap = indexMapStorage;
739 ipiMap = indexMapStorage;
740 mergeUniqueTypeRecords(typeRecords: file->debugTypes);
741 // TODO: Free all unneeded ghash resources now that we have a full index map.
742
743 if (ctx.config.showSummary) {
744 nbTypeRecords = ghashes.size();
745 nbTypeRecordsBytes = file->debugTypes.size();
746 }
747}
748
749// PDBs do not actually store global hashes, so when merging a type server
750// PDB we have to synthesize global hashes. To do this, we first synthesize
751// global hashes for the TPI stream, since it is independent, then we
752// synthesize hashes for the IPI stream, using the hashes for the TPI stream
753// as inputs.
754void TypeServerSource::loadGHashes() {
755 // Don't hash twice.
756 if (!ghashes.empty())
757 return;
758 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
759
760 // Hash TPI stream.
761 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
762 if (auto e = expectedTpi.takeError())
763 Fatal(ctx) << "Type server does not have TPI stream: "
764 << toString(E: std::move(e));
765 assignGHashesFromVector(
766 hashVec: GloballyHashedType::hashTypes(Records: expectedTpi->typeArray()));
767 isItemIndex.resize(N: ghashes.size());
768
769 // Hash IPI stream, which depends on TPI ghashes.
770 if (!pdbFile.hasPDBIpiStream())
771 return;
772 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
773 if (auto e = expectedIpi.takeError())
774 Fatal(ctx) << "error retrieving IPI stream: " << toString(E: std::move(e));
775 ipiSrc->assignGHashesFromVector(
776 hashVec: GloballyHashedType::hashIds(Records: expectedIpi->typeArray(), TypeHashes: ghashes));
777
778 // The IPI stream isItemIndex bitvector should be all ones.
779 ipiSrc->isItemIndex.resize(N: ipiSrc->ghashes.size());
780 ipiSrc->isItemIndex.set(I: 0, E: ipiSrc->ghashes.size());
781}
782
783// Flatten discontiguous PDB type arrays to bytes so that we can use
784// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
785// type servers is faster than iterating all object files compiled with /Z7 with
786// CVTypeArray, which has high overheads due to the virtual interface of
787// BinaryStream::readBytes.
788static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
789 BinaryStreamRef stream = types.getUnderlyingStream();
790 ArrayRef<uint8_t> debugTypes;
791 checkError(e: stream.readBytes(Offset: 0, Size: stream.getLength(), Buffer&: debugTypes));
792 return debugTypes;
793}
794
795// Merge types from a type server PDB.
796void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
797 assert(ctx.config.debugGHashes && "ghashes must be enabled");
798
799 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to
800 // propagate errors, those should've been handled during ghash loading.
801 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
802 pdb::TpiStream &tpi = check(e: pdbFile.getPDBTpiStream());
803 fillMapFromGHashes(m: g);
804 tpiMap = indexMapStorage;
805 mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: tpi.typeArray()));
806 if (pdbFile.hasPDBIpiStream()) {
807 pdb::TpiStream &ipi = check(e: pdbFile.getPDBIpiStream());
808 ipiSrc->indexMapStorage.resize(N: ipiSrc->ghashes.size());
809 ipiSrc->fillMapFromGHashes(m: g);
810 ipiMap = ipiSrc->indexMapStorage;
811 ipiSrc->tpiMap = tpiMap;
812 ipiSrc->ipiMap = ipiMap;
813 ipiSrc->mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: ipi.typeArray()));
814
815 if (ctx.config.showSummary) {
816 nbTypeRecords = ipiSrc->ghashes.size();
817 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
818 }
819 }
820
821 if (ctx.config.showSummary) {
822 nbTypeRecords += ghashes.size();
823 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
824 }
825}
826
827void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
828 // No remapping to do with /Zi objects. Simply use the index map from the type
829 // server. Errors should have been reported earlier. Symbols from this object
830 // will be ignored.
831 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
832 if (!maybeTsSrc) {
833 typeMergingError =
834 joinErrors(E1: std::move(typeMergingError), E2: maybeTsSrc.takeError());
835 return;
836 }
837 TypeServerSource *tsSrc = *maybeTsSrc;
838 tpiMap = tsSrc->tpiMap;
839 ipiMap = tsSrc->ipiMap;
840}
841
842void PrecompSource::loadGHashes() {
843 if (getDebugH(file)) {
844 Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented";
845 }
846
847 uint32_t ghashIdx = 0;
848 std::vector<GloballyHashedType> hashVec;
849 forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
850 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
851 // the PDB. There must be an entry in the list of ghashes so that the type
852 // indexes of the following records in the /Yc PCH object line up.
853 if (ty.kind() == LF_ENDPRECOMP) {
854 EndPrecompRecord endPrecomp;
855 cantFail(Err: TypeDeserializer::deserializeAs<EndPrecompRecord>(
856 CVT&: const_cast<CVType &>(ty), Record&: endPrecomp));
857 file->pchSignature = endPrecomp.getSignature();
858 registerMapping();
859 endPrecompIdx = ghashIdx;
860 }
861
862 hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec));
863 isItemIndex.push_back(Val: isIdRecord(K: ty.kind()));
864 ++ghashIdx;
865 });
866 assignGHashesFromVector(hashVec: std::move(hashVec));
867}
868
869void UsePrecompSource::loadGHashes() {
870 auto e = findPrecompMap(file, pr&: precompDependency);
871 if (!e) {
872 Warn(ctx) << e.takeError();
873 return;
874 }
875
876 PrecompSource *pchSrc = *e;
877
878 // To compute ghashes of a /Yu object file, we need to build on the ghashes of
879 // the /Yc PCH object. After we are done hashing, discard the ghashes from the
880 // PCH source so we don't unnecessarily try to deduplicate them.
881 std::vector<GloballyHashedType> hashVec =
882 pchSrc->ghashes.take_front(N: precompDependency.getTypesCount());
883 forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
884 hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec));
885 isItemIndex.push_back(Val: isIdRecord(K: ty.kind()));
886 });
887 hashVec.erase(first: hashVec.begin(),
888 last: hashVec.begin() + precompDependency.getTypesCount());
889 assignGHashesFromVector(hashVec: std::move(hashVec));
890}
891
892void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
893 fillMapFromGHashes(m: g);
894 // This object was compiled with /Yu, so process the corresponding
895 // precompiled headers object (/Yc) first. Some type indices in the current
896 // object are referencing data in the precompiled headers object, so we need
897 // both to be loaded.
898 if (Error e = mergeInPrecompHeaderObj()) {
899 typeMergingError = joinErrors(E1: std::move(typeMergingError), E2: std::move(e));
900 return;
901 }
902
903 tpiMap = indexMapStorage;
904 ipiMap = indexMapStorage;
905 mergeUniqueTypeRecords(typeRecords: file->debugTypes,
906 beginIndex: TypeIndex(precompDependency.getStartTypeIndex() +
907 precompDependency.getTypesCount()));
908 if (ctx.config.showSummary) {
909 nbTypeRecords = ghashes.size();
910 nbTypeRecordsBytes = file->debugTypes.size();
911 }
912}
913
914namespace {
915/// A concurrent hash table for global type hashing. It is based on this paper:
916/// Concurrent Hash Tables: Fast and General(?)!
917/// https://dl.acm.org/doi/10.1145/3309206
918///
919/// This hash table is meant to be used in two phases:
920/// 1. concurrent insertions
921/// 2. concurrent reads
922/// It does not support lookup, deletion, or rehashing. It uses linear probing.
923///
924/// The paper describes storing a key-value pair in two machine words.
925/// Generally, the values stored in this map are type indices, and we can use
926/// those values to recover the ghash key from a side table. This allows us to
927/// shrink the table entries further at the cost of some loads, and sidesteps
928/// the need for a 128 bit atomic compare-and-swap operation.
929///
930/// During insertion, a priority function is used to decide which insertion
931/// should be preferred. This ensures that the output is deterministic. For
932/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
933///
934class GHashCell;
935struct GHashTable {
936 GHashCell *table = nullptr;
937 uint32_t tableSize = 0;
938
939 GHashTable() = default;
940 ~GHashTable();
941
942 /// Initialize the table with the given size. Because the table cannot be
943 /// resized, the initial size of the table must be large enough to contain all
944 /// inputs, or insertion may not be able to find an empty cell.
945 void init(uint32_t newTableSize);
946
947 /// Insert the cell with the given ghash into the table. Return the insertion
948 /// position in the table. It is safe for the caller to store the insertion
949 /// position because the table cannot be resized.
950 uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
951 GHashCell newCell);
952};
953
954/// A ghash table cell for deduplicating types from TpiSources.
955class GHashCell {
956 // Force "data" to be 64-bit aligned; otherwise, some versions of clang
957 // will generate calls to libatomic when using some versions of libstdc++
958 // on 32-bit targets. (Also, in theory, there could be a target where
959 // new[] doesn't always return an 8-byte-aligned allocation.)
960 alignas(sizeof(uint64_t)) uint64_t data = 0;
961
962public:
963 GHashCell() = default;
964
965 // Construct data most to least significant so that sorting works well:
966 // - isItem
967 // - tpiSrcIdx
968 // - ghashIdx
969 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
970 // non-zero representation.
971 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
972 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
973 ghashIdx) {
974 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
975 assert(ghashIdx == getGHashIdx() && "round trip failure");
976 }
977
978 explicit GHashCell(uint64_t data) : data(data) {}
979
980 // The empty cell is all zeros.
981 bool isEmpty() const { return data == 0ULL; }
982
983 /// Extract the tpiSrcIdx.
984 uint32_t getTpiSrcIdx() const {
985 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
986 }
987
988 /// Extract the index into the ghash array of the TpiSource.
989 uint32_t getGHashIdx() const { return (uint32_t)data; }
990
991 bool isItem() const { return data & (1ULL << 63U); }
992
993 /// Get the ghash key for this cell.
994 GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
995 return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
996 }
997
998 /// The priority function for the cell. The data is stored such that lower
999 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
1000 /// from earlier sources are more likely to prevail.
1001 friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
1002 return l.data < r.data;
1003 }
1004};
1005} // namespace
1006
1007namespace lld::coff {
1008/// This type is just a wrapper around GHashTable with external linkage so it
1009/// can be used from a header.
1010struct GHashState {
1011 GHashTable table;
1012};
1013} // namespace lld::coff
1014
1015GHashTable::~GHashTable() { delete[] table; }
1016
1017void GHashTable::init(uint32_t newTableSize) {
1018 table = new GHashCell[newTableSize];
1019 memset(s: table, c: 0, n: newTableSize * sizeof(GHashCell));
1020 tableSize = newTableSize;
1021}
1022
1023uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1024 GHashCell newCell) {
1025 assert(!newCell.isEmpty() && "cannot insert empty cell value");
1026
1027 // FIXME: The low bytes of SHA1 have low entropy for short records, which
1028 // type records are. Swap the byte order for better entropy. A better ghash
1029 // won't need this.
1030 uint32_t startIdx =
1031 llvm::byteswap<uint64_t>(V: *reinterpret_cast<uint64_t *>(&ghash)) %
1032 tableSize;
1033
1034 // Do a linear probe starting at startIdx.
1035 uint32_t idx = startIdx;
1036 while (true) {
1037 // Run a compare and swap loop. There are four cases:
1038 // - cell is empty: CAS into place and return
1039 // - cell has matching key, earlier priority: do nothing, return
1040 // - cell has matching key, later priority: CAS into place and return
1041 // - cell has non-matching key: hash collision, probe next cell
1042 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1043 GHashCell oldCell(cellPtr->load());
1044 while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1045 // Check if there is an existing ghash entry with a higher priority
1046 // (earlier ordering). If so, this is a duplicate, we are done.
1047 if (!oldCell.isEmpty() && oldCell < newCell)
1048 return idx;
1049 // Either the cell is empty, or our value is higher priority. Try to
1050 // compare and swap. If it succeeds, we are done.
1051 if (cellPtr->compare_exchange_weak(e&: oldCell, i: newCell))
1052 return idx;
1053 // If the CAS failed, check this cell again.
1054 }
1055
1056 // Advance the probe. Wrap around to the beginning if we run off the end.
1057 ++idx;
1058 idx = idx == tableSize ? 0 : idx;
1059 if (idx == startIdx) {
1060 // If this becomes an issue, we could mark failure and rehash from the
1061 // beginning with a bigger table. There is no difference between rehashing
1062 // internally and starting over.
1063 report_fatal_error(reason: "ghash table is full");
1064 }
1065 }
1066 llvm_unreachable("left infloop");
1067}
1068
1069TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1070 : typeTable(alloc), idTable(alloc), ctx(c) {}
1071
1072TypeMerger::~TypeMerger() = default;
1073
1074void TypeMerger::mergeTypesWithGHash() {
1075 // Load ghashes. Do type servers and PCH objects first.
1076 {
1077 llvm::TimeTraceScope timeScope("Load GHASHes");
1078 ScopedTimer t1(ctx.loadGHashTimer);
1079 parallelForEach(R&: dependencySources,
1080 Fn: [&](TpiSource *source) { source->loadGHashes(); });
1081 parallelForEach(R&: objectSources,
1082 Fn: [&](TpiSource *source) { source->loadGHashes(); });
1083 }
1084
1085 llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1086 ScopedTimer t2(ctx.mergeGHashTimer);
1087 GHashState ghashState;
1088
1089 // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1090 // be larger than the number of unique types, or hash table insertion may not
1091 // be able to find a vacant slot. Summing the input types guarantees this, but
1092 // it is a gross overestimate. The table size could be reduced to save memory,
1093 // but it would require implementing rehashing, and this table is generally
1094 // small compared to total memory usage, at eight bytes per input type record,
1095 // and most input type records are larger than eight bytes.
1096 size_t tableSize = 0;
1097 for (TpiSource *source : ctx.tpiSourceList)
1098 tableSize += source->ghashes.size();
1099
1100 // Cap the table size so that we can use 32-bit cell indices. Type indices are
1101 // also 32-bit, so this is an inherent PDB file format limit anyway.
1102 tableSize =
1103 std::min(a: size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, b: tableSize);
1104 ghashState.table.init(newTableSize: static_cast<uint32_t>(tableSize));
1105
1106 // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1107 // the contents of the hash table cell, but we can remember the insertion
1108 // position. Because the table does not rehash, the position will not change
1109 // under insertion. After insertion is done, the value of the cell can be read
1110 // to retrieve the final PDB type index.
1111 parallelFor(Begin: 0, End: ctx.tpiSourceList.size(), Fn: [&](size_t tpiSrcIdx) {
1112 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1113 source->indexMapStorage.resize(N: source->ghashes.size());
1114 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1115 if (source->shouldOmitFromPdb(ghashIdx: i)) {
1116 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1117 continue;
1118 }
1119 GloballyHashedType ghash = source->ghashes[i];
1120 bool isItem = source->isItemIndex.test(Idx: i);
1121 uint32_t cellIdx =
1122 ghashState.table.insert(ctx, ghash, newCell: GHashCell(isItem, tpiSrcIdx, i));
1123
1124 // Store the ghash cell index as a type index in indexMapStorage. Later
1125 // we will replace it with the PDB type index.
1126 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(Index: cellIdx);
1127 }
1128 });
1129
1130 // Collect all non-empty cells and sort them. This will implicitly assign
1131 // destination type indices, and partition the entries into type records and
1132 // item records. It arranges types in this order:
1133 // - type records
1134 // - source 0, type 0...
1135 // - source 1, type 1...
1136 // - item records
1137 // - source 0, type 1...
1138 // - source 1, type 0...
1139 std::vector<GHashCell> entries;
1140 for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1141 if (!cell.isEmpty())
1142 entries.push_back(x: cell);
1143 }
1144 parallelSort(R&: entries, Comp: std::less<GHashCell>());
1145 Log(ctx) << formatv(
1146 Fmt: "ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1147 Vals: tableSize ? double(entries.size()) / tableSize : 0, Vals: entries.size(),
1148 Vals&: tableSize);
1149
1150 // Find out how many type and item indices there are.
1151 auto mid = llvm::lower_bound(Range&: entries, Value: GHashCell(true, 0, 0));
1152 assert((mid == entries.end() || mid->isItem()) &&
1153 (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1154 "midpoint is not midpoint");
1155 uint32_t numTypes = std::distance(first: entries.begin(), last: mid);
1156 uint32_t numItems = std::distance(first: mid, last: entries.end());
1157 Log(ctx) << "Tpi record count: " << numTypes;
1158 Log(ctx) << "Ipi record count: " << numItems;
1159
1160 // Make a list of the "unique" type records to merge for each tpi source. Type
1161 // merging will skip indices not on this list. Store the destination PDB type
1162 // index for these unique types in the tpiMap for each source. The entries for
1163 // non-unique types will be filled in prior to type merging.
1164 for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1165 auto &cell = entries[i];
1166 uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1167 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1168 source->uniqueTypes.push_back(x: cell.getGHashIdx());
1169
1170 // Update the ghash table to store the destination PDB type index in the
1171 // table.
1172 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1173 uint32_t ghashCellIndex =
1174 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1175 ghashState.table.table[ghashCellIndex] =
1176 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1177 }
1178
1179 // In parallel, remap all types.
1180 for (TpiSource *source : dependencySources)
1181 source->remapTpiWithGHashes(g: &ghashState);
1182 parallelForEach(R&: objectSources, Fn: [&](TpiSource *source) {
1183 source->remapTpiWithGHashes(g: &ghashState);
1184 });
1185
1186 // Build a global map of from function ID to function type.
1187 for (TpiSource *source : ctx.tpiSourceList) {
1188 funcIdToType.insert_range(R&: source->funcIdToType);
1189 source->funcIdToType.clear();
1190 }
1191
1192 clearGHashes();
1193}
1194
1195void TypeMerger::sortDependencies() {
1196 // Order dependencies first, but preserve the existing order.
1197 std::vector<TpiSource *> deps;
1198 std::vector<TpiSource *> objs;
1199 for (TpiSource *s : ctx.tpiSourceList)
1200 (s->isDependency() ? deps : objs).push_back(x: s);
1201 uint32_t numDeps = deps.size();
1202 uint32_t numObjs = objs.size();
1203 ctx.tpiSourceList = std::move(deps);
1204 ctx.tpiSourceList.insert(position: ctx.tpiSourceList.end(), first: objs.begin(), last: objs.end());
1205 for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1206 ctx.tpiSourceList[i]->tpiSrcIdx = i;
1207 dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1208 objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1209}
1210
1211/// Given the index into the ghash table for a particular type, return the type
1212/// index for that type in the output PDB.
1213static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1214 uint32_t ghashCellIdx) {
1215 GHashCell cell = g->table.table[ghashCellIdx];
1216 return TypeIndex::fromArrayIndex(Index: cell.getGHashIdx());
1217}
1218
1219/// Free heap allocated ghashes.
1220void TypeMerger::clearGHashes() {
1221 for (TpiSource *src : ctx.tpiSourceList) {
1222 if (src->ownedGHashes)
1223 delete[] src->ghashes.data();
1224 src->ghashes = {};
1225 src->isItemIndex.clear();
1226 src->uniqueTypes.clear();
1227 }
1228}
1229
1230// Fill in a TPI or IPI index map using ghashes. For each source type, use its
1231// ghash to lookup its final type index in the PDB, and store that in the map.
1232void TpiSource::fillMapFromGHashes(GHashState *g) {
1233 for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1234 TypeIndex fakeCellIndex = indexMapStorage[i];
1235 if (fakeCellIndex.isSimple())
1236 indexMapStorage[i] = fakeCellIndex;
1237 else
1238 indexMapStorage[i] =
1239 loadPdbTypeIndexFromCell(g, ghashCellIdx: fakeCellIndex.toArrayIndex());
1240 }
1241}
1242

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of lld/COFF/DebugTypes.cpp