1 | //===- DebugTypes.cpp -----------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "DebugTypes.h" |
10 | #include "COFFLinkerContext.h" |
11 | #include "Chunks.h" |
12 | #include "InputFiles.h" |
13 | #include "PDB.h" |
14 | #include "TypeMerger.h" |
15 | #include "lld/Common/ErrorHandler.h" |
16 | #include "lld/Common/Memory.h" |
17 | #include "llvm/ADT/StringExtras.h" |
18 | #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" |
19 | #include "llvm/DebugInfo/CodeView/TypeRecord.h" |
20 | #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" |
21 | #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" |
22 | #include "llvm/DebugInfo/PDB/GenericError.h" |
23 | #include "llvm/DebugInfo/PDB/Native/InfoStream.h" |
24 | #include "llvm/DebugInfo/PDB/Native/NativeSession.h" |
25 | #include "llvm/DebugInfo/PDB/Native/PDBFile.h" |
26 | #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" |
27 | #include "llvm/DebugInfo/PDB/Native/TpiStream.h" |
28 | #include "llvm/Support/FormatVariadic.h" |
29 | #include "llvm/Support/Parallel.h" |
30 | #include "llvm/Support/Path.h" |
31 | #include "llvm/Support/TimeProfiler.h" |
32 | |
33 | using namespace llvm; |
34 | using namespace llvm::codeview; |
35 | using namespace lld; |
36 | using namespace lld::coff; |
37 | |
38 | namespace { |
39 | class TypeServerIpiSource; |
40 | |
41 | // The TypeServerSource class represents a PDB type server, a file referenced by |
42 | // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ |
43 | // files, therefore there must be only once instance per OBJ lot. The file path |
44 | // is discovered from the dependent OBJ's debug type stream. The |
45 | // TypeServerSource object is then queued and loaded by the COFF Driver. The |
46 | // debug type stream for such PDB files will be merged first in the final PDB, |
47 | // before any dependent OBJ. |
48 | class TypeServerSource : public TpiSource { |
49 | public: |
50 | explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f) |
51 | : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) { |
52 | if (f->loadErrorStr) |
53 | return; |
54 | pdb::PDBFile &file = f->session->getPDBFile(); |
55 | auto expectedInfo = file.getPDBInfoStream(); |
56 | if (!expectedInfo) |
57 | return; |
58 | Guid = expectedInfo->getGuid(); |
59 | auto it = ctx.typeServerSourceMappings.emplace(args&: Guid, args: this); |
60 | if (!it.second) { |
61 | // If we hit here we have collision on Guid's in two PDB files. |
62 | // This can happen if the PDB Guid is invalid or if we are really |
63 | // unlucky. This should fall back on stright file-system lookup. |
64 | it.first->second = nullptr; |
65 | } |
66 | } |
67 | |
68 | Error mergeDebugT(TypeMerger *m) override; |
69 | |
70 | void loadGHashes() override; |
71 | void remapTpiWithGHashes(GHashState *g) override; |
72 | |
73 | bool isDependency() const override { return true; } |
74 | |
75 | PDBInputFile *pdbInputFile = nullptr; |
76 | |
77 | // TpiSource for IPI stream. |
78 | TypeServerIpiSource *ipiSrc = nullptr; |
79 | |
80 | // The PDB signature GUID. |
81 | codeview::GUID Guid; |
82 | }; |
83 | |
84 | // Companion to TypeServerSource. Stores the index map for the IPI stream in the |
85 | // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the |
86 | // invariant of one type index space per source. |
87 | class TypeServerIpiSource : public TpiSource { |
88 | public: |
89 | explicit TypeServerIpiSource(COFFLinkerContext &ctx) |
90 | : TpiSource(ctx, PDBIpi, nullptr) {} |
91 | |
92 | friend class TypeServerSource; |
93 | |
94 | // All of the TpiSource methods are no-ops. The parent TypeServerSource |
95 | // handles both TPI and IPI. |
96 | Error mergeDebugT(TypeMerger *m) override { return Error::success(); } |
97 | void loadGHashes() override {} |
98 | void remapTpiWithGHashes(GHashState *g) override {} |
99 | bool isDependency() const override { return true; } |
100 | }; |
101 | |
102 | // This class represents the debug type stream of an OBJ file that depends on a |
103 | // PDB type server (see TypeServerSource). |
104 | class UseTypeServerSource : public TpiSource { |
105 | Expected<TypeServerSource *> getTypeServerSource(); |
106 | |
107 | public: |
108 | UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts) |
109 | : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {} |
110 | |
111 | Error mergeDebugT(TypeMerger *m) override; |
112 | |
113 | // No need to load ghashes from /Zi objects. |
114 | void loadGHashes() override {} |
115 | void remapTpiWithGHashes(GHashState *g) override; |
116 | |
117 | // Information about the PDB type server dependency, that needs to be loaded |
118 | // in before merging this OBJ. |
119 | TypeServer2Record typeServerDependency; |
120 | }; |
121 | |
122 | // This class represents the debug type stream of a Microsoft precompiled |
123 | // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output |
124 | // PDB, before any other OBJs that depend on this. Note that only MSVC generate |
125 | // such files, clang does not. |
126 | class PrecompSource : public TpiSource { |
127 | public: |
128 | PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) { |
129 | // If the S_OBJNAME record contains the PCH signature, we'll register this |
130 | // source file right away. |
131 | registerMapping(); |
132 | } |
133 | |
134 | Error mergeDebugT(TypeMerger *m) override; |
135 | |
136 | void loadGHashes() override; |
137 | |
138 | bool isDependency() const override { return true; } |
139 | |
140 | private: |
141 | void registerMapping(); |
142 | |
143 | // Whether this precomp OBJ was recorded in the precompSourceMappings map. |
144 | // Only happens if the file->pchSignature is valid. |
145 | bool registered = false; |
146 | }; |
147 | |
148 | // This class represents the debug type stream of an OBJ file that depends on a |
149 | // Microsoft precompiled headers OBJ (see PrecompSource). |
150 | class UsePrecompSource : public TpiSource { |
151 | public: |
152 | UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp) |
153 | : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {} |
154 | |
155 | Error mergeDebugT(TypeMerger *m) override; |
156 | |
157 | void loadGHashes() override; |
158 | void remapTpiWithGHashes(GHashState *g) override; |
159 | |
160 | private: |
161 | Error mergeInPrecompHeaderObj(); |
162 | |
163 | PrecompSource *findObjByName(StringRef fileNameOnly); |
164 | PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr); |
165 | Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr); |
166 | |
167 | public: |
168 | // Information about the Precomp OBJ dependency, that needs to be loaded in |
169 | // before merging this OBJ. |
170 | PrecompRecord precompDependency; |
171 | }; |
172 | } // namespace |
173 | |
174 | TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f) |
175 | : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) { |
176 | ctx.addTpiSource(tpi: this); |
177 | } |
178 | |
179 | // Vtable key method. |
180 | TpiSource::~TpiSource() { |
181 | // Silence any assertions about unchecked errors. |
182 | consumeError(Err: std::move(typeMergingError)); |
183 | } |
184 | |
185 | TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) { |
186 | return make<TpiSource>(args&: ctx, args: TpiSource::Regular, args&: file); |
187 | } |
188 | |
189 | TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx, |
190 | PDBInputFile *pdbInputFile) { |
191 | // Type server sources come in pairs: the TPI stream, and the IPI stream. |
192 | auto *tpiSource = make<TypeServerSource>(args&: ctx, args&: pdbInputFile); |
193 | if (pdbInputFile->session->getPDBFile().hasPDBIpiStream()) |
194 | tpiSource->ipiSrc = make<TypeServerIpiSource>(args&: ctx); |
195 | return tpiSource; |
196 | } |
197 | |
198 | TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx, |
199 | ObjFile *file, |
200 | TypeServer2Record ts) { |
201 | return make<UseTypeServerSource>(args&: ctx, args&: file, args&: ts); |
202 | } |
203 | |
204 | TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) { |
205 | return make<PrecompSource>(args&: ctx, args&: file); |
206 | } |
207 | |
208 | TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx, |
209 | ObjFile *file, |
210 | PrecompRecord precomp) { |
211 | return make<UsePrecompSource>(args&: ctx, args&: file, args&: precomp); |
212 | } |
213 | |
214 | bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { |
215 | if (ti.isSimple()) |
216 | return true; |
217 | |
218 | // This can be an item index or a type index. Choose the appropriate map. |
219 | ArrayRef<TypeIndex> tpiOrIpiMap = |
220 | (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; |
221 | if (ti.toArrayIndex() >= tpiOrIpiMap.size()) |
222 | return false; |
223 | ti = tpiOrIpiMap[ti.toArrayIndex()]; |
224 | return true; |
225 | } |
226 | |
227 | void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec, |
228 | ArrayRef<TiReference> typeRefs) { |
229 | MutableArrayRef<uint8_t> contents = rec.drop_front(N: sizeof(RecordPrefix)); |
230 | for (const TiReference &ref : typeRefs) { |
231 | unsigned byteSize = ref.Count * sizeof(TypeIndex); |
232 | if (contents.size() < ref.Offset + byteSize) |
233 | Fatal(ctx) << "symbol record too short"; |
234 | |
235 | MutableArrayRef<TypeIndex> indices( |
236 | reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count); |
237 | for (TypeIndex &ti : indices) { |
238 | if (!remapTypeIndex(ti, refKind: ref.Kind)) { |
239 | if (ctx.config.verbose) { |
240 | uint16_t kind = |
241 | reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind; |
242 | StringRef fname = file ? file->getName() : "<unknown PDB>"; |
243 | Log(ctx) << "failed to remap type index in record of kind 0x" |
244 | << utohexstr(X: kind) << " in "<< fname << " with bad " |
245 | << (ref.Kind == TiRefKind::IndexRef ? "item": "type") |
246 | << " index 0x"<< utohexstr(X: ti.getIndex()); |
247 | } |
248 | ti = TypeIndex(SimpleTypeKind::NotTranslated); |
249 | continue; |
250 | } |
251 | } |
252 | } |
253 | } |
254 | |
255 | void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) { |
256 | // TODO: Handle errors similar to symbols. |
257 | SmallVector<TiReference, 32> typeRefs; |
258 | discoverTypeIndices(Type: CVType(rec), Refs&: typeRefs); |
259 | remapRecord(rec, typeRefs); |
260 | } |
261 | |
262 | bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) { |
263 | // Discover type index references in the record. Skip it if we don't |
264 | // know where they are. |
265 | SmallVector<TiReference, 32> typeRefs; |
266 | if (!discoverTypeIndicesInSymbol(RecordData: rec, Refs&: typeRefs)) |
267 | return false; |
268 | remapRecord(rec, typeRefs); |
269 | return true; |
270 | } |
271 | |
272 | // A COFF .debug$H section is currently a clang extension. This function checks |
273 | // if a .debug$H section is in a format that we expect / understand, so that we |
274 | // can ignore any sections which are coincidentally also named .debug$H but do |
275 | // not contain a format we recognize. |
276 | static bool canUseDebugH(ArrayRef<uint8_t> debugH) { |
277 | if (debugH.size() < sizeof(object::debug_h_header)) |
278 | return false; |
279 | auto *header = |
280 | reinterpret_cast<const object::debug_h_header *>(debugH.data()); |
281 | debugH = debugH.drop_front(N: sizeof(object::debug_h_header)); |
282 | return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && |
283 | header->Version == 0 && |
284 | header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) && |
285 | (debugH.size() % 8 == 0); |
286 | } |
287 | |
288 | static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) { |
289 | SectionChunk *sec = |
290 | SectionChunk::findByName(sections: file->getDebugChunks(), name: ".debug$H"); |
291 | if (!sec) |
292 | return std::nullopt; |
293 | ArrayRef<uint8_t> contents = sec->getContents(); |
294 | if (!canUseDebugH(debugH: contents)) |
295 | return std::nullopt; |
296 | return contents; |
297 | } |
298 | |
299 | static ArrayRef<GloballyHashedType> |
300 | getHashesFromDebugH(ArrayRef<uint8_t> debugH) { |
301 | assert(canUseDebugH(debugH)); |
302 | debugH = debugH.drop_front(N: sizeof(object::debug_h_header)); |
303 | uint32_t count = debugH.size() / sizeof(GloballyHashedType); |
304 | return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count}; |
305 | } |
306 | |
307 | // Merge .debug$T for a generic object file. |
308 | Error TpiSource::mergeDebugT(TypeMerger *m) { |
309 | assert(!ctx.config.debugGHashes && |
310 | "use remapTpiWithGHashes when ghash is enabled"); |
311 | |
312 | CVTypeArray types; |
313 | BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); |
314 | cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength())); |
315 | |
316 | // When dealing with PCH.OBJ, some indices were already merged. |
317 | unsigned nbHeadIndices = indexMapStorage.size(); |
318 | |
319 | std::optional<PCHMergerInfo> pchInfo; |
320 | if (auto err = mergeTypeAndIdRecords(DestIds&: m->idTable, DestTypes&: m->typeTable, |
321 | SourceToDest&: indexMapStorage, IdsAndTypes: types, PCHInfo&: pchInfo)) |
322 | Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: " |
323 | << toString(E: std::move(err)); |
324 | if (pchInfo) { |
325 | file->pchSignature = pchInfo->PCHSignature; |
326 | endPrecompIdx = pchInfo->EndPrecompIndex; |
327 | } |
328 | |
329 | // In an object, there is only one mapping for both types and items. |
330 | tpiMap = indexMapStorage; |
331 | ipiMap = indexMapStorage; |
332 | |
333 | if (ctx.config.showSummary) { |
334 | nbTypeRecords = indexMapStorage.size() - nbHeadIndices; |
335 | nbTypeRecordsBytes = reader.getLength(); |
336 | // Count how many times we saw each type record in our input. This |
337 | // calculation requires a second pass over the type records to classify each |
338 | // record as a type or index. This is slow, but this code executes when |
339 | // collecting statistics. |
340 | m->tpiCounts.resize(N: m->getTypeTable().size()); |
341 | m->ipiCounts.resize(N: m->getIDTable().size()); |
342 | uint32_t srcIdx = nbHeadIndices; |
343 | for (const CVType &ty : types) { |
344 | TypeIndex dstIdx = tpiMap[srcIdx++]; |
345 | // Type merging may fail, so a complex source type may become the simple |
346 | // NotTranslated type, which cannot be used as an array index. |
347 | if (dstIdx.isSimple()) |
348 | continue; |
349 | SmallVectorImpl<uint32_t> &counts = |
350 | isIdRecord(K: ty.kind()) ? m->ipiCounts : m->tpiCounts; |
351 | ++counts[dstIdx.toArrayIndex()]; |
352 | } |
353 | } |
354 | |
355 | return Error::success(); |
356 | } |
357 | |
358 | // Merge types from a type server PDB. |
359 | Error TypeServerSource::mergeDebugT(TypeMerger *m) { |
360 | assert(!ctx.config.debugGHashes && |
361 | "use remapTpiWithGHashes when ghash is enabled"); |
362 | |
363 | pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); |
364 | Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); |
365 | if (auto e = expectedTpi.takeError()) |
366 | Fatal(ctx) << "Type server does not have TPI stream: " |
367 | << toString(E: std::move(e)); |
368 | pdb::TpiStream *maybeIpi = nullptr; |
369 | if (pdbFile.hasPDBIpiStream()) { |
370 | Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); |
371 | if (auto e = expectedIpi.takeError()) |
372 | Fatal(ctx) << "Error getting type server IPI stream: " |
373 | << toString(E: std::move(e)); |
374 | maybeIpi = &*expectedIpi; |
375 | } |
376 | |
377 | // Merge TPI first, because the IPI stream will reference type indices. |
378 | if (auto err = mergeTypeRecords(Dest&: m->typeTable, SourceToDest&: indexMapStorage, |
379 | Types: expectedTpi->typeArray())) |
380 | Fatal(ctx) << "codeview::mergeTypeRecords failed: " |
381 | << toString(E: std::move(err)); |
382 | tpiMap = indexMapStorage; |
383 | |
384 | // Merge IPI. |
385 | if (maybeIpi) { |
386 | if (auto err = mergeIdRecords(Dest&: m->idTable, Types: tpiMap, SourceToDest&: ipiSrc->indexMapStorage, |
387 | Ids: maybeIpi->typeArray())) |
388 | Fatal(ctx) << "codeview::mergeIdRecords failed: " |
389 | << toString(E: std::move(err)); |
390 | ipiMap = ipiSrc->indexMapStorage; |
391 | } |
392 | |
393 | if (ctx.config.showSummary) { |
394 | nbTypeRecords = tpiMap.size() + ipiMap.size(); |
395 | nbTypeRecordsBytes = |
396 | expectedTpi->typeArray().getUnderlyingStream().getLength() + |
397 | (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength() |
398 | : 0); |
399 | |
400 | // Count how many times we saw each type record in our input. If a |
401 | // destination type index is present in the source to destination type index |
402 | // map, that means we saw it once in the input. Add it to our histogram. |
403 | m->tpiCounts.resize(N: m->getTypeTable().size()); |
404 | m->ipiCounts.resize(N: m->getIDTable().size()); |
405 | for (TypeIndex ti : tpiMap) |
406 | if (!ti.isSimple()) |
407 | ++m->tpiCounts[ti.toArrayIndex()]; |
408 | for (TypeIndex ti : ipiMap) |
409 | if (!ti.isSimple()) |
410 | ++m->ipiCounts[ti.toArrayIndex()]; |
411 | } |
412 | |
413 | return Error::success(); |
414 | } |
415 | |
416 | Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() { |
417 | const codeview::GUID &tsId = typeServerDependency.getGuid(); |
418 | StringRef tsPath = typeServerDependency.getName(); |
419 | |
420 | TypeServerSource *tsSrc = nullptr; |
421 | auto it = ctx.typeServerSourceMappings.find(x: tsId); |
422 | if (it != ctx.typeServerSourceMappings.end()) { |
423 | tsSrc = (TypeServerSource *)it->second; |
424 | } |
425 | if (tsSrc == nullptr) { |
426 | // The file failed to load, lookup by name |
427 | PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, path: tsPath, fromFile: file); |
428 | if (!pdb) |
429 | return createFileError(F: tsPath, E: errorCodeToError(EC: std::error_code( |
430 | ENOENT, std::generic_category()))); |
431 | // If an error occurred during loading, throw it now |
432 | if (pdb->loadErrorStr) |
433 | return createFileError( |
434 | F: tsPath, E: make_error<StringError>(Args&: *pdb->loadErrorStr, |
435 | Args: llvm::inconvertibleErrorCode())); |
436 | |
437 | tsSrc = (TypeServerSource *)pdb->debugTypesObj; |
438 | |
439 | // Just because a file with a matching name was found and it was an actual |
440 | // PDB file doesn't mean it matches. For it to match the InfoStream's GUID |
441 | // must match the GUID specified in the TypeServer2 record. |
442 | if (tsSrc->Guid != tsId) { |
443 | return createFileError(F: tsPath, |
444 | E: make_error<pdb::PDBError>( |
445 | Args: pdb::pdb_error_code::signature_out_of_date)); |
446 | } |
447 | } |
448 | return tsSrc; |
449 | } |
450 | |
451 | Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { |
452 | Expected<TypeServerSource *> tsSrc = getTypeServerSource(); |
453 | if (!tsSrc) |
454 | return tsSrc.takeError(); |
455 | |
456 | pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); |
457 | auto expectedInfo = pdbSession.getPDBInfoStream(); |
458 | if (!expectedInfo) |
459 | return expectedInfo.takeError(); |
460 | |
461 | // Reuse the type index map of the type server. |
462 | tpiMap = (*tsSrc)->tpiMap; |
463 | ipiMap = (*tsSrc)->ipiMap; |
464 | return Error::success(); |
465 | } |
466 | |
467 | static bool equalsPath(StringRef path1, StringRef path2) { |
468 | #if defined(_WIN32) |
469 | return path1.equals_insensitive(path2); |
470 | #else |
471 | return path1 == path2; |
472 | #endif |
473 | } |
474 | |
475 | // Find by name an OBJ provided on the command line |
476 | PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) { |
477 | for (auto kv : ctx.precompSourceMappings) { |
478 | StringRef currentFileName = sys::path::filename(path: kv.second->file->getName(), |
479 | style: sys::path::Style::windows); |
480 | |
481 | // Compare based solely on the file name (link.exe behavior) |
482 | if (equalsPath(path1: currentFileName, path2: fileNameOnly)) |
483 | return (PrecompSource *)kv.second; |
484 | } |
485 | return nullptr; |
486 | } |
487 | |
488 | PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file, |
489 | PrecompRecord &pr) { |
490 | // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP |
491 | // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, |
492 | // the paths embedded in the OBJs are in the Windows format. |
493 | SmallString<128> prFileName = |
494 | sys::path::filename(path: pr.getPrecompFilePath(), style: sys::path::Style::windows); |
495 | |
496 | auto it = ctx.precompSourceMappings.find(x: pr.getSignature()); |
497 | if (it != ctx.precompSourceMappings.end()) { |
498 | return (PrecompSource *)it->second; |
499 | } |
500 | // Lookup by name |
501 | return findObjByName(fileNameOnly: prFileName); |
502 | } |
503 | |
504 | Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file, |
505 | PrecompRecord &pr) { |
506 | PrecompSource *precomp = findPrecompSource(file, pr); |
507 | |
508 | if (!precomp) |
509 | return createFileError( |
510 | F: pr.getPrecompFilePath(), |
511 | E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch)); |
512 | |
513 | // Don't rely on the PCH signature to validate the concordance between the PCH |
514 | // and the OBJ that uses it. However we do validate here that the |
515 | // LF_ENDPRECOMP record index lines up with the number of type records |
516 | // LF_PRECOMP is expecting. |
517 | if (precomp->endPrecompIdx != pr.getTypesCount()) |
518 | return createFileError( |
519 | F: toString(file), |
520 | E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch)); |
521 | |
522 | return precomp; |
523 | } |
524 | |
525 | /// Merges a precompiled headers TPI map into the current TPI map. The |
526 | /// precompiled headers object will also be loaded and remapped in the |
527 | /// process. |
528 | Error UsePrecompSource::mergeInPrecompHeaderObj() { |
529 | auto e = findPrecompMap(file, pr&: precompDependency); |
530 | if (!e) |
531 | return e.takeError(); |
532 | |
533 | PrecompSource *precompSrc = *e; |
534 | if (precompSrc->tpiMap.empty()) |
535 | return Error::success(); |
536 | |
537 | assert(precompDependency.getStartTypeIndex() == |
538 | TypeIndex::FirstNonSimpleIndex); |
539 | assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); |
540 | // Use the previously remapped index map from the precompiled headers. |
541 | indexMapStorage.insert(I: indexMapStorage.begin(), From: precompSrc->tpiMap.begin(), |
542 | To: precompSrc->tpiMap.begin() + |
543 | precompDependency.getTypesCount()); |
544 | |
545 | return Error::success(); |
546 | } |
547 | |
548 | Error UsePrecompSource::mergeDebugT(TypeMerger *m) { |
549 | // This object was compiled with /Yu, so process the corresponding |
550 | // precompiled headers object (/Yc) first. Some type indices in the current |
551 | // object are referencing data in the precompiled headers object, so we need |
552 | // both to be loaded. |
553 | if (Error e = mergeInPrecompHeaderObj()) |
554 | return e; |
555 | |
556 | return TpiSource::mergeDebugT(m); |
557 | } |
558 | |
559 | Error PrecompSource::mergeDebugT(TypeMerger *m) { |
560 | // In some cases, the S_OBJNAME record doesn't contain the PCH signature. |
561 | // The signature comes later with the LF_ENDPRECOMP record, so we first need |
562 | // to merge in all the .PCH.OBJ file type records, before registering below. |
563 | if (Error e = TpiSource::mergeDebugT(m)) |
564 | return e; |
565 | |
566 | registerMapping(); |
567 | |
568 | return Error::success(); |
569 | } |
570 | |
571 | void PrecompSource::registerMapping() { |
572 | if (registered) |
573 | return; |
574 | if (file->pchSignature && *file->pchSignature) { |
575 | auto it = ctx.precompSourceMappings.emplace(args&: *file->pchSignature, args: this); |
576 | if (!it.second) |
577 | Fatal(ctx) |
578 | << "a PCH object with the same signature has already been provided (" |
579 | << toString(file: it.first->second->file) << " and "<< toString(file) |
580 | << ")"; |
581 | registered = true; |
582 | } |
583 | } |
584 | |
585 | //===----------------------------------------------------------------------===// |
586 | // Parellel GHash type merging implementation. |
587 | //===----------------------------------------------------------------------===// |
588 | |
589 | void TpiSource::loadGHashes() { |
590 | if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) { |
591 | ghashes = getHashesFromDebugH(debugH: *debugH); |
592 | ownedGHashes = false; |
593 | } else { |
594 | CVTypeArray types; |
595 | BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); |
596 | cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength())); |
597 | assignGHashesFromVector(hashVec: GloballyHashedType::hashTypes(Records&: types)); |
598 | } |
599 | |
600 | fillIsItemIndexFromDebugT(); |
601 | } |
602 | |
603 | // Copies ghashes from a vector into an array. These are long lived, so it's |
604 | // worth the time to copy these into an appropriately sized vector to reduce |
605 | // memory usage. |
606 | void TpiSource::assignGHashesFromVector( |
607 | std::vector<GloballyHashedType> &&hashVec) { |
608 | if (hashVec.empty()) |
609 | return; |
610 | GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; |
611 | memcpy(dest: hashes, src: hashVec.data(), n: hashVec.size() * sizeof(GloballyHashedType)); |
612 | ghashes = ArrayRef(hashes, hashVec.size()); |
613 | ownedGHashes = true; |
614 | } |
615 | |
616 | // Faster way to iterate type records. forEachTypeChecked is faster than |
617 | // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. |
618 | static void forEachTypeChecked(ArrayRef<uint8_t> types, |
619 | function_ref<void(const CVType &)> fn) { |
620 | checkError( |
621 | e: forEachCodeViewRecord<CVType>(StreamBuffer: types, F: [fn](const CVType &ty) -> Error { |
622 | fn(ty); |
623 | return Error::success(); |
624 | })); |
625 | } |
626 | |
627 | // Walk over file->debugTypes and fill in the isItemIndex bit vector. |
628 | // TODO: Store this information in .debug$H so that we don't have to recompute |
629 | // it. This is the main bottleneck slowing down parallel ghashing with one |
630 | // thread over single-threaded ghashing. |
631 | void TpiSource::fillIsItemIndexFromDebugT() { |
632 | uint32_t index = 0; |
633 | isItemIndex.resize(N: ghashes.size()); |
634 | forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) { |
635 | if (isIdRecord(K: ty.kind())) |
636 | isItemIndex.set(index); |
637 | ++index; |
638 | }); |
639 | } |
640 | |
641 | void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { |
642 | // Decide if the merged type goes into TPI or IPI. |
643 | bool isItem = isIdRecord(K: ty.kind()); |
644 | MergedInfo &merged = isItem ? mergedIpi : mergedTpi; |
645 | |
646 | // Copy the type into our mutable buffer. |
647 | assert(ty.length() <= codeview::MaxRecordLength); |
648 | size_t offset = merged.recs.size(); |
649 | size_t newSize = alignTo(Value: ty.length(), Align: 4); |
650 | merged.recs.resize(new_size: offset + newSize); |
651 | auto newRec = MutableArrayRef(&merged.recs[offset], newSize); |
652 | memcpy(dest: newRec.data(), src: ty.data().data(), n: newSize); |
653 | |
654 | // Fix up the record prefix and padding bytes if it required resizing. |
655 | if (newSize != ty.length()) { |
656 | reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2; |
657 | for (size_t i = ty.length(); i < newSize; ++i) |
658 | newRec[i] = LF_PAD0 + (newSize - i); |
659 | } |
660 | |
661 | // Remap the type indices in the new record. |
662 | remapTypesInTypeRecord(rec: newRec); |
663 | uint32_t pdbHash = check(e: pdb::hashTypeRecord(Type: CVType(newRec))); |
664 | merged.recSizes.push_back(x: static_cast<uint16_t>(newSize)); |
665 | merged.recHashes.push_back(x: pdbHash); |
666 | |
667 | // Retain a mapping from PDB function id to PDB function type. This mapping is |
668 | // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32 |
669 | // symbols. |
670 | if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { |
671 | bool success = ty.length() >= 12; |
672 | TypeIndex funcId = curIndex; |
673 | if (success) |
674 | success &= remapTypeIndex(ti&: funcId, refKind: TiRefKind::IndexRef); |
675 | TypeIndex funcType = |
676 | *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]); |
677 | if (success) { |
678 | funcIdToType.push_back(x: {funcId, funcType}); |
679 | } else { |
680 | StringRef fname = file ? file->getName() : "<unknown PDB>"; |
681 | Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x" |
682 | << utohexstr(X: curIndex.getIndex()) << " in "<< fname; |
683 | } |
684 | } |
685 | } |
686 | |
687 | void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords, |
688 | TypeIndex beginIndex) { |
689 | // Re-sort the list of unique types by index. |
690 | if (kind == PDB) |
691 | assert(llvm::is_sorted(uniqueTypes)); |
692 | else |
693 | llvm::sort(C&: uniqueTypes); |
694 | |
695 | // Accumulate all the unique types into one buffer in mergedTypes. |
696 | uint32_t ghashIndex = 0; |
697 | auto nextUniqueIndex = uniqueTypes.begin(); |
698 | assert(mergedTpi.recs.empty()); |
699 | assert(mergedIpi.recs.empty()); |
700 | |
701 | // Pre-compute the number of elements in advance to avoid std::vector resizes. |
702 | unsigned nbTpiRecs = 0; |
703 | unsigned nbIpiRecs = 0; |
704 | forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) { |
705 | if (nextUniqueIndex != uniqueTypes.end() && |
706 | *nextUniqueIndex == ghashIndex) { |
707 | assert(ty.length() <= codeview::MaxRecordLength); |
708 | size_t newSize = alignTo(Value: ty.length(), Align: 4); |
709 | (isIdRecord(K: ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize; |
710 | ++nextUniqueIndex; |
711 | } |
712 | ++ghashIndex; |
713 | }); |
714 | mergedTpi.recs.reserve(n: nbTpiRecs); |
715 | mergedIpi.recs.reserve(n: nbIpiRecs); |
716 | |
717 | // Do the actual type merge. |
718 | ghashIndex = 0; |
719 | nextUniqueIndex = uniqueTypes.begin(); |
720 | forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) { |
721 | if (nextUniqueIndex != uniqueTypes.end() && |
722 | *nextUniqueIndex == ghashIndex) { |
723 | mergeTypeRecord(curIndex: beginIndex + ghashIndex, ty); |
724 | ++nextUniqueIndex; |
725 | } |
726 | ++ghashIndex; |
727 | }); |
728 | assert(nextUniqueIndex == uniqueTypes.end() && |
729 | "failed to merge all desired records"); |
730 | assert(uniqueTypes.size() == |
731 | mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && |
732 | "missing desired record"); |
733 | } |
734 | |
735 | void TpiSource::remapTpiWithGHashes(GHashState *g) { |
736 | assert(ctx.config.debugGHashes && "ghashes must be enabled"); |
737 | fillMapFromGHashes(m: g); |
738 | tpiMap = indexMapStorage; |
739 | ipiMap = indexMapStorage; |
740 | mergeUniqueTypeRecords(typeRecords: file->debugTypes); |
741 | // TODO: Free all unneeded ghash resources now that we have a full index map. |
742 | |
743 | if (ctx.config.showSummary) { |
744 | nbTypeRecords = ghashes.size(); |
745 | nbTypeRecordsBytes = file->debugTypes.size(); |
746 | } |
747 | } |
748 | |
749 | // PDBs do not actually store global hashes, so when merging a type server |
750 | // PDB we have to synthesize global hashes. To do this, we first synthesize |
751 | // global hashes for the TPI stream, since it is independent, then we |
752 | // synthesize hashes for the IPI stream, using the hashes for the TPI stream |
753 | // as inputs. |
754 | void TypeServerSource::loadGHashes() { |
755 | // Don't hash twice. |
756 | if (!ghashes.empty()) |
757 | return; |
758 | pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); |
759 | |
760 | // Hash TPI stream. |
761 | Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); |
762 | if (auto e = expectedTpi.takeError()) |
763 | Fatal(ctx) << "Type server does not have TPI stream: " |
764 | << toString(E: std::move(e)); |
765 | assignGHashesFromVector( |
766 | hashVec: GloballyHashedType::hashTypes(Records: expectedTpi->typeArray())); |
767 | isItemIndex.resize(N: ghashes.size()); |
768 | |
769 | // Hash IPI stream, which depends on TPI ghashes. |
770 | if (!pdbFile.hasPDBIpiStream()) |
771 | return; |
772 | Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); |
773 | if (auto e = expectedIpi.takeError()) |
774 | Fatal(ctx) << "error retrieving IPI stream: "<< toString(E: std::move(e)); |
775 | ipiSrc->assignGHashesFromVector( |
776 | hashVec: GloballyHashedType::hashIds(Records: expectedIpi->typeArray(), TypeHashes: ghashes)); |
777 | |
778 | // The IPI stream isItemIndex bitvector should be all ones. |
779 | ipiSrc->isItemIndex.resize(N: ipiSrc->ghashes.size()); |
780 | ipiSrc->isItemIndex.set(I: 0, E: ipiSrc->ghashes.size()); |
781 | } |
782 | |
783 | // Flatten discontiguous PDB type arrays to bytes so that we can use |
784 | // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from |
785 | // type servers is faster than iterating all object files compiled with /Z7 with |
786 | // CVTypeArray, which has high overheads due to the virtual interface of |
787 | // BinaryStream::readBytes. |
788 | static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) { |
789 | BinaryStreamRef stream = types.getUnderlyingStream(); |
790 | ArrayRef<uint8_t> debugTypes; |
791 | checkError(e: stream.readBytes(Offset: 0, Size: stream.getLength(), Buffer&: debugTypes)); |
792 | return debugTypes; |
793 | } |
794 | |
795 | // Merge types from a type server PDB. |
796 | void TypeServerSource::remapTpiWithGHashes(GHashState *g) { |
797 | assert(ctx.config.debugGHashes && "ghashes must be enabled"); |
798 | |
799 | // IPI merging depends on TPI, so do TPI first, then do IPI. No need to |
800 | // propagate errors, those should've been handled during ghash loading. |
801 | pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); |
802 | pdb::TpiStream &tpi = check(e: pdbFile.getPDBTpiStream()); |
803 | fillMapFromGHashes(m: g); |
804 | tpiMap = indexMapStorage; |
805 | mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: tpi.typeArray())); |
806 | if (pdbFile.hasPDBIpiStream()) { |
807 | pdb::TpiStream &ipi = check(e: pdbFile.getPDBIpiStream()); |
808 | ipiSrc->indexMapStorage.resize(N: ipiSrc->ghashes.size()); |
809 | ipiSrc->fillMapFromGHashes(m: g); |
810 | ipiMap = ipiSrc->indexMapStorage; |
811 | ipiSrc->tpiMap = tpiMap; |
812 | ipiSrc->ipiMap = ipiMap; |
813 | ipiSrc->mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: ipi.typeArray())); |
814 | |
815 | if (ctx.config.showSummary) { |
816 | nbTypeRecords = ipiSrc->ghashes.size(); |
817 | nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength(); |
818 | } |
819 | } |
820 | |
821 | if (ctx.config.showSummary) { |
822 | nbTypeRecords += ghashes.size(); |
823 | nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength(); |
824 | } |
825 | } |
826 | |
827 | void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { |
828 | // No remapping to do with /Zi objects. Simply use the index map from the type |
829 | // server. Errors should have been reported earlier. Symbols from this object |
830 | // will be ignored. |
831 | Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource(); |
832 | if (!maybeTsSrc) { |
833 | typeMergingError = |
834 | joinErrors(E1: std::move(typeMergingError), E2: maybeTsSrc.takeError()); |
835 | return; |
836 | } |
837 | TypeServerSource *tsSrc = *maybeTsSrc; |
838 | tpiMap = tsSrc->tpiMap; |
839 | ipiMap = tsSrc->ipiMap; |
840 | } |
841 | |
842 | void PrecompSource::loadGHashes() { |
843 | if (getDebugH(file)) { |
844 | Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented"; |
845 | } |
846 | |
847 | uint32_t ghashIdx = 0; |
848 | std::vector<GloballyHashedType> hashVec; |
849 | forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) { |
850 | // Remember the index of the LF_ENDPRECOMP record so it can be excluded from |
851 | // the PDB. There must be an entry in the list of ghashes so that the type |
852 | // indexes of the following records in the /Yc PCH object line up. |
853 | if (ty.kind() == LF_ENDPRECOMP) { |
854 | EndPrecompRecord endPrecomp; |
855 | cantFail(Err: TypeDeserializer::deserializeAs<EndPrecompRecord>( |
856 | CVT&: const_cast<CVType &>(ty), Record&: endPrecomp)); |
857 | file->pchSignature = endPrecomp.getSignature(); |
858 | registerMapping(); |
859 | endPrecompIdx = ghashIdx; |
860 | } |
861 | |
862 | hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec)); |
863 | isItemIndex.push_back(Val: isIdRecord(K: ty.kind())); |
864 | ++ghashIdx; |
865 | }); |
866 | assignGHashesFromVector(hashVec: std::move(hashVec)); |
867 | } |
868 | |
869 | void UsePrecompSource::loadGHashes() { |
870 | auto e = findPrecompMap(file, pr&: precompDependency); |
871 | if (!e) { |
872 | Warn(ctx) << e.takeError(); |
873 | return; |
874 | } |
875 | |
876 | PrecompSource *pchSrc = *e; |
877 | |
878 | // To compute ghashes of a /Yu object file, we need to build on the ghashes of |
879 | // the /Yc PCH object. After we are done hashing, discard the ghashes from the |
880 | // PCH source so we don't unnecessarily try to deduplicate them. |
881 | std::vector<GloballyHashedType> hashVec = |
882 | pchSrc->ghashes.take_front(N: precompDependency.getTypesCount()); |
883 | forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) { |
884 | hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec)); |
885 | isItemIndex.push_back(Val: isIdRecord(K: ty.kind())); |
886 | }); |
887 | hashVec.erase(first: hashVec.begin(), |
888 | last: hashVec.begin() + precompDependency.getTypesCount()); |
889 | assignGHashesFromVector(hashVec: std::move(hashVec)); |
890 | } |
891 | |
892 | void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { |
893 | fillMapFromGHashes(m: g); |
894 | // This object was compiled with /Yu, so process the corresponding |
895 | // precompiled headers object (/Yc) first. Some type indices in the current |
896 | // object are referencing data in the precompiled headers object, so we need |
897 | // both to be loaded. |
898 | if (Error e = mergeInPrecompHeaderObj()) { |
899 | typeMergingError = joinErrors(E1: std::move(typeMergingError), E2: std::move(e)); |
900 | return; |
901 | } |
902 | |
903 | tpiMap = indexMapStorage; |
904 | ipiMap = indexMapStorage; |
905 | mergeUniqueTypeRecords(typeRecords: file->debugTypes, |
906 | beginIndex: TypeIndex(precompDependency.getStartTypeIndex() + |
907 | precompDependency.getTypesCount())); |
908 | if (ctx.config.showSummary) { |
909 | nbTypeRecords = ghashes.size(); |
910 | nbTypeRecordsBytes = file->debugTypes.size(); |
911 | } |
912 | } |
913 | |
914 | namespace { |
915 | /// A concurrent hash table for global type hashing. It is based on this paper: |
916 | /// Concurrent Hash Tables: Fast and General(?)! |
917 | /// https://dl.acm.org/doi/10.1145/3309206 |
918 | /// |
919 | /// This hash table is meant to be used in two phases: |
920 | /// 1. concurrent insertions |
921 | /// 2. concurrent reads |
922 | /// It does not support lookup, deletion, or rehashing. It uses linear probing. |
923 | /// |
924 | /// The paper describes storing a key-value pair in two machine words. |
925 | /// Generally, the values stored in this map are type indices, and we can use |
926 | /// those values to recover the ghash key from a side table. This allows us to |
927 | /// shrink the table entries further at the cost of some loads, and sidesteps |
928 | /// the need for a 128 bit atomic compare-and-swap operation. |
929 | /// |
930 | /// During insertion, a priority function is used to decide which insertion |
931 | /// should be preferred. This ensures that the output is deterministic. For |
932 | /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. |
933 | /// |
934 | class GHashCell; |
935 | struct GHashTable { |
936 | GHashCell *table = nullptr; |
937 | uint32_t tableSize = 0; |
938 | |
939 | GHashTable() = default; |
940 | ~GHashTable(); |
941 | |
942 | /// Initialize the table with the given size. Because the table cannot be |
943 | /// resized, the initial size of the table must be large enough to contain all |
944 | /// inputs, or insertion may not be able to find an empty cell. |
945 | void init(uint32_t newTableSize); |
946 | |
947 | /// Insert the cell with the given ghash into the table. Return the insertion |
948 | /// position in the table. It is safe for the caller to store the insertion |
949 | /// position because the table cannot be resized. |
950 | uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash, |
951 | GHashCell newCell); |
952 | }; |
953 | |
954 | /// A ghash table cell for deduplicating types from TpiSources. |
955 | class GHashCell { |
956 | // Force "data" to be 64-bit aligned; otherwise, some versions of clang |
957 | // will generate calls to libatomic when using some versions of libstdc++ |
958 | // on 32-bit targets. (Also, in theory, there could be a target where |
959 | // new[] doesn't always return an 8-byte-aligned allocation.) |
960 | alignas(sizeof(uint64_t)) uint64_t data = 0; |
961 | |
962 | public: |
963 | GHashCell() = default; |
964 | |
965 | // Construct data most to least significant so that sorting works well: |
966 | // - isItem |
967 | // - tpiSrcIdx |
968 | // - ghashIdx |
969 | // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a |
970 | // non-zero representation. |
971 | GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) |
972 | : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | |
973 | ghashIdx) { |
974 | assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); |
975 | assert(ghashIdx == getGHashIdx() && "round trip failure"); |
976 | } |
977 | |
978 | explicit GHashCell(uint64_t data) : data(data) {} |
979 | |
980 | // The empty cell is all zeros. |
981 | bool isEmpty() const { return data == 0ULL; } |
982 | |
983 | /// Extract the tpiSrcIdx. |
984 | uint32_t getTpiSrcIdx() const { |
985 | return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; |
986 | } |
987 | |
988 | /// Extract the index into the ghash array of the TpiSource. |
989 | uint32_t getGHashIdx() const { return (uint32_t)data; } |
990 | |
991 | bool isItem() const { return data & (1ULL << 63U); } |
992 | |
993 | /// Get the ghash key for this cell. |
994 | GloballyHashedType getGHash(const COFFLinkerContext &ctx) const { |
995 | return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()]; |
996 | } |
997 | |
998 | /// The priority function for the cell. The data is stored such that lower |
999 | /// tpiSrcIdx and ghashIdx values are preferred, which means that type record |
1000 | /// from earlier sources are more likely to prevail. |
1001 | friend inline bool operator<(const GHashCell &l, const GHashCell &r) { |
1002 | return l.data < r.data; |
1003 | } |
1004 | }; |
1005 | } // namespace |
1006 | |
1007 | namespace lld::coff { |
1008 | /// This type is just a wrapper around GHashTable with external linkage so it |
1009 | /// can be used from a header. |
1010 | struct GHashState { |
1011 | GHashTable table; |
1012 | }; |
1013 | } // namespace lld::coff |
1014 | |
1015 | GHashTable::~GHashTable() { delete[] table; } |
1016 | |
1017 | void GHashTable::init(uint32_t newTableSize) { |
1018 | table = new GHashCell[newTableSize]; |
1019 | memset(s: table, c: 0, n: newTableSize * sizeof(GHashCell)); |
1020 | tableSize = newTableSize; |
1021 | } |
1022 | |
1023 | uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash, |
1024 | GHashCell newCell) { |
1025 | assert(!newCell.isEmpty() && "cannot insert empty cell value"); |
1026 | |
1027 | // FIXME: The low bytes of SHA1 have low entropy for short records, which |
1028 | // type records are. Swap the byte order for better entropy. A better ghash |
1029 | // won't need this. |
1030 | uint32_t startIdx = |
1031 | llvm::byteswap<uint64_t>(V: *reinterpret_cast<uint64_t *>(&ghash)) % |
1032 | tableSize; |
1033 | |
1034 | // Do a linear probe starting at startIdx. |
1035 | uint32_t idx = startIdx; |
1036 | while (true) { |
1037 | // Run a compare and swap loop. There are four cases: |
1038 | // - cell is empty: CAS into place and return |
1039 | // - cell has matching key, earlier priority: do nothing, return |
1040 | // - cell has matching key, later priority: CAS into place and return |
1041 | // - cell has non-matching key: hash collision, probe next cell |
1042 | auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]); |
1043 | GHashCell oldCell(cellPtr->load()); |
1044 | while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) { |
1045 | // Check if there is an existing ghash entry with a higher priority |
1046 | // (earlier ordering). If so, this is a duplicate, we are done. |
1047 | if (!oldCell.isEmpty() && oldCell < newCell) |
1048 | return idx; |
1049 | // Either the cell is empty, or our value is higher priority. Try to |
1050 | // compare and swap. If it succeeds, we are done. |
1051 | if (cellPtr->compare_exchange_weak(e&: oldCell, i: newCell)) |
1052 | return idx; |
1053 | // If the CAS failed, check this cell again. |
1054 | } |
1055 | |
1056 | // Advance the probe. Wrap around to the beginning if we run off the end. |
1057 | ++idx; |
1058 | idx = idx == tableSize ? 0 : idx; |
1059 | if (idx == startIdx) { |
1060 | // If this becomes an issue, we could mark failure and rehash from the |
1061 | // beginning with a bigger table. There is no difference between rehashing |
1062 | // internally and starting over. |
1063 | report_fatal_error(reason: "ghash table is full"); |
1064 | } |
1065 | } |
1066 | llvm_unreachable("left infloop"); |
1067 | } |
1068 | |
1069 | TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc) |
1070 | : typeTable(alloc), idTable(alloc), ctx(c) {} |
1071 | |
1072 | TypeMerger::~TypeMerger() = default; |
1073 | |
1074 | void TypeMerger::mergeTypesWithGHash() { |
1075 | // Load ghashes. Do type servers and PCH objects first. |
1076 | { |
1077 | llvm::TimeTraceScope timeScope("Load GHASHes"); |
1078 | ScopedTimer t1(ctx.loadGHashTimer); |
1079 | parallelForEach(R&: dependencySources, |
1080 | Fn: [&](TpiSource *source) { source->loadGHashes(); }); |
1081 | parallelForEach(R&: objectSources, |
1082 | Fn: [&](TpiSource *source) { source->loadGHashes(); }); |
1083 | } |
1084 | |
1085 | llvm::TimeTraceScope timeScope("Merge types (GHASH)"); |
1086 | ScopedTimer t2(ctx.mergeGHashTimer); |
1087 | GHashState ghashState; |
1088 | |
1089 | // Estimate the size of hash table needed to deduplicate ghashes. This *must* |
1090 | // be larger than the number of unique types, or hash table insertion may not |
1091 | // be able to find a vacant slot. Summing the input types guarantees this, but |
1092 | // it is a gross overestimate. The table size could be reduced to save memory, |
1093 | // but it would require implementing rehashing, and this table is generally |
1094 | // small compared to total memory usage, at eight bytes per input type record, |
1095 | // and most input type records are larger than eight bytes. |
1096 | size_t tableSize = 0; |
1097 | for (TpiSource *source : ctx.tpiSourceList) |
1098 | tableSize += source->ghashes.size(); |
1099 | |
1100 | // Cap the table size so that we can use 32-bit cell indices. Type indices are |
1101 | // also 32-bit, so this is an inherent PDB file format limit anyway. |
1102 | tableSize = |
1103 | std::min(a: size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, b: tableSize); |
1104 | ghashState.table.init(newTableSize: static_cast<uint32_t>(tableSize)); |
1105 | |
1106 | // Insert ghashes in parallel. During concurrent insertion, we cannot observe |
1107 | // the contents of the hash table cell, but we can remember the insertion |
1108 | // position. Because the table does not rehash, the position will not change |
1109 | // under insertion. After insertion is done, the value of the cell can be read |
1110 | // to retrieve the final PDB type index. |
1111 | parallelFor(Begin: 0, End: ctx.tpiSourceList.size(), Fn: [&](size_t tpiSrcIdx) { |
1112 | TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; |
1113 | source->indexMapStorage.resize(N: source->ghashes.size()); |
1114 | for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { |
1115 | if (source->shouldOmitFromPdb(ghashIdx: i)) { |
1116 | source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); |
1117 | continue; |
1118 | } |
1119 | GloballyHashedType ghash = source->ghashes[i]; |
1120 | bool isItem = source->isItemIndex.test(Idx: i); |
1121 | uint32_t cellIdx = |
1122 | ghashState.table.insert(ctx, ghash, newCell: GHashCell(isItem, tpiSrcIdx, i)); |
1123 | |
1124 | // Store the ghash cell index as a type index in indexMapStorage. Later |
1125 | // we will replace it with the PDB type index. |
1126 | source->indexMapStorage[i] = TypeIndex::fromArrayIndex(Index: cellIdx); |
1127 | } |
1128 | }); |
1129 | |
1130 | // Collect all non-empty cells and sort them. This will implicitly assign |
1131 | // destination type indices, and partition the entries into type records and |
1132 | // item records. It arranges types in this order: |
1133 | // - type records |
1134 | // - source 0, type 0... |
1135 | // - source 1, type 1... |
1136 | // - item records |
1137 | // - source 0, type 1... |
1138 | // - source 1, type 0... |
1139 | std::vector<GHashCell> entries; |
1140 | for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) { |
1141 | if (!cell.isEmpty()) |
1142 | entries.push_back(x: cell); |
1143 | } |
1144 | parallelSort(R&: entries, Comp: std::less<GHashCell>()); |
1145 | Log(ctx) << formatv( |
1146 | Fmt: "ghash table load factor: {0:p} (size {1} / capacity {2})\n", |
1147 | Vals: tableSize ? double(entries.size()) / tableSize : 0, Vals: entries.size(), |
1148 | Vals&: tableSize); |
1149 | |
1150 | // Find out how many type and item indices there are. |
1151 | auto mid = llvm::lower_bound(Range&: entries, Value: GHashCell(true, 0, 0)); |
1152 | assert((mid == entries.end() || mid->isItem()) && |
1153 | (mid == entries.begin() || !std::prev(mid)->isItem()) && |
1154 | "midpoint is not midpoint"); |
1155 | uint32_t numTypes = std::distance(first: entries.begin(), last: mid); |
1156 | uint32_t numItems = std::distance(first: mid, last: entries.end()); |
1157 | Log(ctx) << "Tpi record count: "<< numTypes; |
1158 | Log(ctx) << "Ipi record count: "<< numItems; |
1159 | |
1160 | // Make a list of the "unique" type records to merge for each tpi source. Type |
1161 | // merging will skip indices not on this list. Store the destination PDB type |
1162 | // index for these unique types in the tpiMap for each source. The entries for |
1163 | // non-unique types will be filled in prior to type merging. |
1164 | for (uint32_t i = 0, e = entries.size(); i < e; ++i) { |
1165 | auto &cell = entries[i]; |
1166 | uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); |
1167 | TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; |
1168 | source->uniqueTypes.push_back(x: cell.getGHashIdx()); |
1169 | |
1170 | // Update the ghash table to store the destination PDB type index in the |
1171 | // table. |
1172 | uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; |
1173 | uint32_t ghashCellIndex = |
1174 | source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); |
1175 | ghashState.table.table[ghashCellIndex] = |
1176 | GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); |
1177 | } |
1178 | |
1179 | // In parallel, remap all types. |
1180 | for (TpiSource *source : dependencySources) |
1181 | source->remapTpiWithGHashes(g: &ghashState); |
1182 | parallelForEach(R&: objectSources, Fn: [&](TpiSource *source) { |
1183 | source->remapTpiWithGHashes(g: &ghashState); |
1184 | }); |
1185 | |
1186 | // Build a global map of from function ID to function type. |
1187 | for (TpiSource *source : ctx.tpiSourceList) { |
1188 | funcIdToType.insert_range(R&: source->funcIdToType); |
1189 | source->funcIdToType.clear(); |
1190 | } |
1191 | |
1192 | clearGHashes(); |
1193 | } |
1194 | |
1195 | void TypeMerger::sortDependencies() { |
1196 | // Order dependencies first, but preserve the existing order. |
1197 | std::vector<TpiSource *> deps; |
1198 | std::vector<TpiSource *> objs; |
1199 | for (TpiSource *s : ctx.tpiSourceList) |
1200 | (s->isDependency() ? deps : objs).push_back(x: s); |
1201 | uint32_t numDeps = deps.size(); |
1202 | uint32_t numObjs = objs.size(); |
1203 | ctx.tpiSourceList = std::move(deps); |
1204 | ctx.tpiSourceList.insert(position: ctx.tpiSourceList.end(), first: objs.begin(), last: objs.end()); |
1205 | for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i) |
1206 | ctx.tpiSourceList[i]->tpiSrcIdx = i; |
1207 | dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps); |
1208 | objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs); |
1209 | } |
1210 | |
1211 | /// Given the index into the ghash table for a particular type, return the type |
1212 | /// index for that type in the output PDB. |
1213 | static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, |
1214 | uint32_t ghashCellIdx) { |
1215 | GHashCell cell = g->table.table[ghashCellIdx]; |
1216 | return TypeIndex::fromArrayIndex(Index: cell.getGHashIdx()); |
1217 | } |
1218 | |
1219 | /// Free heap allocated ghashes. |
1220 | void TypeMerger::clearGHashes() { |
1221 | for (TpiSource *src : ctx.tpiSourceList) { |
1222 | if (src->ownedGHashes) |
1223 | delete[] src->ghashes.data(); |
1224 | src->ghashes = {}; |
1225 | src->isItemIndex.clear(); |
1226 | src->uniqueTypes.clear(); |
1227 | } |
1228 | } |
1229 | |
1230 | // Fill in a TPI or IPI index map using ghashes. For each source type, use its |
1231 | // ghash to lookup its final type index in the PDB, and store that in the map. |
1232 | void TpiSource::fillMapFromGHashes(GHashState *g) { |
1233 | for (size_t i = 0, e = ghashes.size(); i < e; ++i) { |
1234 | TypeIndex fakeCellIndex = indexMapStorage[i]; |
1235 | if (fakeCellIndex.isSimple()) |
1236 | indexMapStorage[i] = fakeCellIndex; |
1237 | else |
1238 | indexMapStorage[i] = |
1239 | loadPdbTypeIndexFromCell(g, ghashCellIdx: fakeCellIndex.toArrayIndex()); |
1240 | } |
1241 | } |
1242 |
Definitions
- TypeServerSource
- TypeServerSource
- isDependency
- TypeServerIpiSource
- TypeServerIpiSource
- mergeDebugT
- loadGHashes
- remapTpiWithGHashes
- isDependency
- UseTypeServerSource
- UseTypeServerSource
- loadGHashes
- PrecompSource
- PrecompSource
- isDependency
- UsePrecompSource
- UsePrecompSource
- TpiSource
- ~TpiSource
- makeTpiSource
- makeTypeServerSource
- makeUseTypeServerSource
- makePrecompSource
- makeUsePrecompSource
- remapTypeIndex
- remapRecord
- remapTypesInTypeRecord
- remapTypesInSymbolRecord
- canUseDebugH
- getDebugH
- getHashesFromDebugH
- mergeDebugT
- mergeDebugT
- getTypeServerSource
- mergeDebugT
- equalsPath
- findObjByName
- findPrecompSource
- findPrecompMap
- mergeInPrecompHeaderObj
- mergeDebugT
- mergeDebugT
- registerMapping
- loadGHashes
- assignGHashesFromVector
- forEachTypeChecked
- fillIsItemIndexFromDebugT
- mergeTypeRecord
- mergeUniqueTypeRecords
- remapTpiWithGHashes
- loadGHashes
- typeArrayToBytes
- remapTpiWithGHashes
- remapTpiWithGHashes
- loadGHashes
- loadGHashes
- remapTpiWithGHashes
- GHashTable
- GHashTable
- GHashCell
- GHashCell
- GHashCell
- GHashCell
- isEmpty
- getTpiSrcIdx
- getGHashIdx
- isItem
- getGHash
- operator<
- GHashState
- ~GHashTable
- init
- insert
- TypeMerger
- ~TypeMerger
- mergeTypesWithGHash
- sortDependencies
- loadPdbTypeIndexFromCell
- clearGHashes
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more