1 | //===- DebugTypes.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "DebugTypes.h" |
10 | #include "COFFLinkerContext.h" |
11 | #include "Chunks.h" |
12 | #include "Driver.h" |
13 | #include "InputFiles.h" |
14 | #include "PDB.h" |
15 | #include "TypeMerger.h" |
16 | #include "lld/Common/ErrorHandler.h" |
17 | #include "lld/Common/Memory.h" |
18 | #include "llvm/ADT/StringExtras.h" |
19 | #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" |
20 | #include "llvm/DebugInfo/CodeView/TypeRecord.h" |
21 | #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" |
22 | #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" |
23 | #include "llvm/DebugInfo/PDB/GenericError.h" |
24 | #include "llvm/DebugInfo/PDB/Native/InfoStream.h" |
25 | #include "llvm/DebugInfo/PDB/Native/NativeSession.h" |
26 | #include "llvm/DebugInfo/PDB/Native/PDBFile.h" |
27 | #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" |
28 | #include "llvm/DebugInfo/PDB/Native/TpiStream.h" |
29 | #include "llvm/Support/FormatVariadic.h" |
30 | #include "llvm/Support/Parallel.h" |
31 | #include "llvm/Support/Path.h" |
32 | #include "llvm/Support/TimeProfiler.h" |
33 | |
34 | using namespace llvm; |
35 | using namespace llvm::codeview; |
36 | using namespace lld; |
37 | using namespace lld::coff; |
38 | |
39 | namespace { |
40 | class TypeServerIpiSource; |
41 | |
42 | // The TypeServerSource class represents a PDB type server, a file referenced by |
43 | // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ |
44 | // files, therefore there must be only once instance per OBJ lot. The file path |
45 | // is discovered from the dependent OBJ's debug type stream. The |
46 | // TypeServerSource object is then queued and loaded by the COFF Driver. The |
47 | // debug type stream for such PDB files will be merged first in the final PDB, |
48 | // before any dependent OBJ. |
49 | class TypeServerSource : public TpiSource { |
50 | public: |
51 | explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f) |
52 | : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) { |
53 | if (f->loadErrorStr) |
54 | return; |
55 | pdb::PDBFile &file = f->session->getPDBFile(); |
56 | auto expectedInfo = file.getPDBInfoStream(); |
57 | if (!expectedInfo) |
58 | return; |
59 | Guid = expectedInfo->getGuid(); |
60 | auto it = ctx.typeServerSourceMappings.emplace(args&: Guid, args: this); |
61 | if (!it.second) { |
62 | // If we hit here we have collision on Guid's in two PDB files. |
63 | // This can happen if the PDB Guid is invalid or if we are really |
64 | // unlucky. This should fall back on stright file-system lookup. |
65 | it.first->second = nullptr; |
66 | } |
67 | } |
68 | |
69 | Error mergeDebugT(TypeMerger *m) override; |
70 | |
71 | void loadGHashes() override; |
72 | void remapTpiWithGHashes(GHashState *g) override; |
73 | |
74 | bool isDependency() const override { return true; } |
75 | |
76 | PDBInputFile *pdbInputFile = nullptr; |
77 | |
78 | // TpiSource for IPI stream. |
79 | TypeServerIpiSource *ipiSrc = nullptr; |
80 | |
81 | // The PDB signature GUID. |
82 | codeview::GUID Guid; |
83 | }; |
84 | |
85 | // Companion to TypeServerSource. Stores the index map for the IPI stream in the |
86 | // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the |
87 | // invariant of one type index space per source. |
88 | class TypeServerIpiSource : public TpiSource { |
89 | public: |
90 | explicit TypeServerIpiSource(COFFLinkerContext &ctx) |
91 | : TpiSource(ctx, PDBIpi, nullptr) {} |
92 | |
93 | friend class TypeServerSource; |
94 | |
95 | // All of the TpiSource methods are no-ops. The parent TypeServerSource |
96 | // handles both TPI and IPI. |
97 | Error mergeDebugT(TypeMerger *m) override { return Error::success(); } |
98 | void loadGHashes() override {} |
99 | void remapTpiWithGHashes(GHashState *g) override {} |
100 | bool isDependency() const override { return true; } |
101 | }; |
102 | |
103 | // This class represents the debug type stream of an OBJ file that depends on a |
104 | // PDB type server (see TypeServerSource). |
105 | class UseTypeServerSource : public TpiSource { |
106 | Expected<TypeServerSource *> getTypeServerSource(); |
107 | |
108 | public: |
109 | UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts) |
110 | : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {} |
111 | |
112 | Error mergeDebugT(TypeMerger *m) override; |
113 | |
114 | // No need to load ghashes from /Zi objects. |
115 | void loadGHashes() override {} |
116 | void remapTpiWithGHashes(GHashState *g) override; |
117 | |
118 | // Information about the PDB type server dependency, that needs to be loaded |
119 | // in before merging this OBJ. |
120 | TypeServer2Record typeServerDependency; |
121 | }; |
122 | |
123 | // This class represents the debug type stream of a Microsoft precompiled |
124 | // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output |
125 | // PDB, before any other OBJs that depend on this. Note that only MSVC generate |
126 | // such files, clang does not. |
127 | class PrecompSource : public TpiSource { |
128 | public: |
129 | PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) { |
130 | // If the S_OBJNAME record contains the PCH signature, we'll register this |
131 | // source file right away. |
132 | registerMapping(); |
133 | } |
134 | |
135 | Error mergeDebugT(TypeMerger *m) override; |
136 | |
137 | void loadGHashes() override; |
138 | |
139 | bool isDependency() const override { return true; } |
140 | |
141 | private: |
142 | void registerMapping(); |
143 | |
144 | // Whether this precomp OBJ was recorded in the precompSourceMappings map. |
145 | // Only happens if the file->pchSignature is valid. |
146 | bool registered = false; |
147 | }; |
148 | |
149 | // This class represents the debug type stream of an OBJ file that depends on a |
150 | // Microsoft precompiled headers OBJ (see PrecompSource). |
151 | class UsePrecompSource : public TpiSource { |
152 | public: |
153 | UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp) |
154 | : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {} |
155 | |
156 | Error mergeDebugT(TypeMerger *m) override; |
157 | |
158 | void loadGHashes() override; |
159 | void remapTpiWithGHashes(GHashState *g) override; |
160 | |
161 | private: |
162 | Error mergeInPrecompHeaderObj(); |
163 | |
164 | PrecompSource *findObjByName(StringRef fileNameOnly); |
165 | PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr); |
166 | Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr); |
167 | |
168 | public: |
169 | // Information about the Precomp OBJ dependency, that needs to be loaded in |
170 | // before merging this OBJ. |
171 | PrecompRecord precompDependency; |
172 | }; |
173 | } // namespace |
174 | |
175 | TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f) |
176 | : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) { |
177 | ctx.addTpiSource(tpi: this); |
178 | } |
179 | |
180 | // Vtable key method. |
181 | TpiSource::~TpiSource() { |
182 | // Silence any assertions about unchecked errors. |
183 | consumeError(Err: std::move(typeMergingError)); |
184 | } |
185 | |
186 | TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) { |
187 | return make<TpiSource>(args&: ctx, args: TpiSource::Regular, args&: file); |
188 | } |
189 | |
190 | TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx, |
191 | PDBInputFile *pdbInputFile) { |
192 | // Type server sources come in pairs: the TPI stream, and the IPI stream. |
193 | auto *tpiSource = make<TypeServerSource>(args&: ctx, args&: pdbInputFile); |
194 | if (pdbInputFile->session->getPDBFile().hasPDBIpiStream()) |
195 | tpiSource->ipiSrc = make<TypeServerIpiSource>(args&: ctx); |
196 | return tpiSource; |
197 | } |
198 | |
199 | TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx, |
200 | ObjFile *file, |
201 | TypeServer2Record ts) { |
202 | return make<UseTypeServerSource>(args&: ctx, args&: file, args&: ts); |
203 | } |
204 | |
205 | TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) { |
206 | return make<PrecompSource>(args&: ctx, args&: file); |
207 | } |
208 | |
209 | TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx, |
210 | ObjFile *file, |
211 | PrecompRecord precomp) { |
212 | return make<UsePrecompSource>(args&: ctx, args&: file, args&: precomp); |
213 | } |
214 | |
215 | bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { |
216 | if (ti.isSimple()) |
217 | return true; |
218 | |
219 | // This can be an item index or a type index. Choose the appropriate map. |
220 | ArrayRef<TypeIndex> tpiOrIpiMap = |
221 | (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; |
222 | if (ti.toArrayIndex() >= tpiOrIpiMap.size()) |
223 | return false; |
224 | ti = tpiOrIpiMap[ti.toArrayIndex()]; |
225 | return true; |
226 | } |
227 | |
228 | void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec, |
229 | ArrayRef<TiReference> typeRefs) { |
230 | MutableArrayRef<uint8_t> contents = rec.drop_front(N: sizeof(RecordPrefix)); |
231 | for (const TiReference &ref : typeRefs) { |
232 | unsigned byteSize = ref.Count * sizeof(TypeIndex); |
233 | if (contents.size() < ref.Offset + byteSize) |
234 | fatal(msg: "symbol record too short" ); |
235 | |
236 | MutableArrayRef<TypeIndex> indices( |
237 | reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count); |
238 | for (TypeIndex &ti : indices) { |
239 | if (!remapTypeIndex(ti, refKind: ref.Kind)) { |
240 | if (ctx.config.verbose) { |
241 | uint16_t kind = |
242 | reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind; |
243 | StringRef fname = file ? file->getName() : "<unknown PDB>" ; |
244 | log(msg: "failed to remap type index in record of kind 0x" + |
245 | utohexstr(X: kind) + " in " + fname + " with bad " + |
246 | (ref.Kind == TiRefKind::IndexRef ? "item" : "type" ) + |
247 | " index 0x" + utohexstr(X: ti.getIndex())); |
248 | } |
249 | ti = TypeIndex(SimpleTypeKind::NotTranslated); |
250 | continue; |
251 | } |
252 | } |
253 | } |
254 | } |
255 | |
256 | void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) { |
257 | // TODO: Handle errors similar to symbols. |
258 | SmallVector<TiReference, 32> typeRefs; |
259 | discoverTypeIndices(Type: CVType(rec), Refs&: typeRefs); |
260 | remapRecord(rec, typeRefs); |
261 | } |
262 | |
263 | bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) { |
264 | // Discover type index references in the record. Skip it if we don't |
265 | // know where they are. |
266 | SmallVector<TiReference, 32> typeRefs; |
267 | if (!discoverTypeIndicesInSymbol(RecordData: rec, Refs&: typeRefs)) |
268 | return false; |
269 | remapRecord(rec, typeRefs); |
270 | return true; |
271 | } |
272 | |
273 | // A COFF .debug$H section is currently a clang extension. This function checks |
274 | // if a .debug$H section is in a format that we expect / understand, so that we |
275 | // can ignore any sections which are coincidentally also named .debug$H but do |
276 | // not contain a format we recognize. |
277 | static bool canUseDebugH(ArrayRef<uint8_t> debugH) { |
278 | if (debugH.size() < sizeof(object::debug_h_header)) |
279 | return false; |
280 | auto * = |
281 | reinterpret_cast<const object::debug_h_header *>(debugH.data()); |
282 | debugH = debugH.drop_front(N: sizeof(object::debug_h_header)); |
283 | return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && |
284 | header->Version == 0 && |
285 | header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) && |
286 | (debugH.size() % 8 == 0); |
287 | } |
288 | |
289 | static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) { |
290 | SectionChunk *sec = |
291 | SectionChunk::findByName(sections: file->getDebugChunks(), name: ".debug$H" ); |
292 | if (!sec) |
293 | return std::nullopt; |
294 | ArrayRef<uint8_t> contents = sec->getContents(); |
295 | if (!canUseDebugH(debugH: contents)) |
296 | return std::nullopt; |
297 | return contents; |
298 | } |
299 | |
300 | static ArrayRef<GloballyHashedType> |
301 | getHashesFromDebugH(ArrayRef<uint8_t> debugH) { |
302 | assert(canUseDebugH(debugH)); |
303 | debugH = debugH.drop_front(N: sizeof(object::debug_h_header)); |
304 | uint32_t count = debugH.size() / sizeof(GloballyHashedType); |
305 | return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count}; |
306 | } |
307 | |
308 | // Merge .debug$T for a generic object file. |
309 | Error TpiSource::mergeDebugT(TypeMerger *m) { |
310 | assert(!ctx.config.debugGHashes && |
311 | "use remapTpiWithGHashes when ghash is enabled" ); |
312 | |
313 | CVTypeArray types; |
314 | BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); |
315 | cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength())); |
316 | |
317 | // When dealing with PCH.OBJ, some indices were already merged. |
318 | unsigned nbHeadIndices = indexMapStorage.size(); |
319 | |
320 | std::optional<PCHMergerInfo> pchInfo; |
321 | if (auto err = mergeTypeAndIdRecords(DestIds&: m->idTable, DestTypes&: m->typeTable, |
322 | SourceToDest&: indexMapStorage, IdsAndTypes: types, PCHInfo&: pchInfo)) |
323 | fatal(msg: "codeview::mergeTypeAndIdRecords failed: " + |
324 | toString(E: std::move(err))); |
325 | if (pchInfo) { |
326 | file->pchSignature = pchInfo->PCHSignature; |
327 | endPrecompIdx = pchInfo->EndPrecompIndex; |
328 | } |
329 | |
330 | // In an object, there is only one mapping for both types and items. |
331 | tpiMap = indexMapStorage; |
332 | ipiMap = indexMapStorage; |
333 | |
334 | if (ctx.config.showSummary) { |
335 | nbTypeRecords = indexMapStorage.size() - nbHeadIndices; |
336 | nbTypeRecordsBytes = reader.getLength(); |
337 | // Count how many times we saw each type record in our input. This |
338 | // calculation requires a second pass over the type records to classify each |
339 | // record as a type or index. This is slow, but this code executes when |
340 | // collecting statistics. |
341 | m->tpiCounts.resize(N: m->getTypeTable().size()); |
342 | m->ipiCounts.resize(N: m->getIDTable().size()); |
343 | uint32_t srcIdx = nbHeadIndices; |
344 | for (const CVType &ty : types) { |
345 | TypeIndex dstIdx = tpiMap[srcIdx++]; |
346 | // Type merging may fail, so a complex source type may become the simple |
347 | // NotTranslated type, which cannot be used as an array index. |
348 | if (dstIdx.isSimple()) |
349 | continue; |
350 | SmallVectorImpl<uint32_t> &counts = |
351 | isIdRecord(K: ty.kind()) ? m->ipiCounts : m->tpiCounts; |
352 | ++counts[dstIdx.toArrayIndex()]; |
353 | } |
354 | } |
355 | |
356 | return Error::success(); |
357 | } |
358 | |
359 | // Merge types from a type server PDB. |
360 | Error TypeServerSource::mergeDebugT(TypeMerger *m) { |
361 | assert(!ctx.config.debugGHashes && |
362 | "use remapTpiWithGHashes when ghash is enabled" ); |
363 | |
364 | pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); |
365 | Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); |
366 | if (auto e = expectedTpi.takeError()) |
367 | fatal(msg: "Type server does not have TPI stream: " + toString(E: std::move(e))); |
368 | pdb::TpiStream *maybeIpi = nullptr; |
369 | if (pdbFile.hasPDBIpiStream()) { |
370 | Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); |
371 | if (auto e = expectedIpi.takeError()) |
372 | fatal(msg: "Error getting type server IPI stream: " + toString(E: std::move(e))); |
373 | maybeIpi = &*expectedIpi; |
374 | } |
375 | |
376 | // Merge TPI first, because the IPI stream will reference type indices. |
377 | if (auto err = mergeTypeRecords(Dest&: m->typeTable, SourceToDest&: indexMapStorage, |
378 | Types: expectedTpi->typeArray())) |
379 | fatal(msg: "codeview::mergeTypeRecords failed: " + toString(E: std::move(err))); |
380 | tpiMap = indexMapStorage; |
381 | |
382 | // Merge IPI. |
383 | if (maybeIpi) { |
384 | if (auto err = mergeIdRecords(Dest&: m->idTable, Types: tpiMap, SourceToDest&: ipiSrc->indexMapStorage, |
385 | Ids: maybeIpi->typeArray())) |
386 | fatal(msg: "codeview::mergeIdRecords failed: " + toString(E: std::move(err))); |
387 | ipiMap = ipiSrc->indexMapStorage; |
388 | } |
389 | |
390 | if (ctx.config.showSummary) { |
391 | nbTypeRecords = tpiMap.size() + ipiMap.size(); |
392 | nbTypeRecordsBytes = |
393 | expectedTpi->typeArray().getUnderlyingStream().getLength() + |
394 | (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength() |
395 | : 0); |
396 | |
397 | // Count how many times we saw each type record in our input. If a |
398 | // destination type index is present in the source to destination type index |
399 | // map, that means we saw it once in the input. Add it to our histogram. |
400 | m->tpiCounts.resize(N: m->getTypeTable().size()); |
401 | m->ipiCounts.resize(N: m->getIDTable().size()); |
402 | for (TypeIndex ti : tpiMap) |
403 | if (!ti.isSimple()) |
404 | ++m->tpiCounts[ti.toArrayIndex()]; |
405 | for (TypeIndex ti : ipiMap) |
406 | if (!ti.isSimple()) |
407 | ++m->ipiCounts[ti.toArrayIndex()]; |
408 | } |
409 | |
410 | return Error::success(); |
411 | } |
412 | |
413 | Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() { |
414 | const codeview::GUID &tsId = typeServerDependency.getGuid(); |
415 | StringRef tsPath = typeServerDependency.getName(); |
416 | |
417 | TypeServerSource *tsSrc = nullptr; |
418 | auto it = ctx.typeServerSourceMappings.find(x: tsId); |
419 | if (it != ctx.typeServerSourceMappings.end()) { |
420 | tsSrc = (TypeServerSource *)it->second; |
421 | } |
422 | if (tsSrc == nullptr) { |
423 | // The file failed to load, lookup by name |
424 | PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, path: tsPath, fromFile: file); |
425 | if (!pdb) |
426 | return createFileError(F: tsPath, E: errorCodeToError(EC: std::error_code( |
427 | ENOENT, std::generic_category()))); |
428 | // If an error occurred during loading, throw it now |
429 | if (pdb->loadErrorStr) |
430 | return createFileError( |
431 | F: tsPath, E: make_error<StringError>(Args&: *pdb->loadErrorStr, |
432 | Args: llvm::inconvertibleErrorCode())); |
433 | |
434 | tsSrc = (TypeServerSource *)pdb->debugTypesObj; |
435 | |
436 | // Just because a file with a matching name was found and it was an actual |
437 | // PDB file doesn't mean it matches. For it to match the InfoStream's GUID |
438 | // must match the GUID specified in the TypeServer2 record. |
439 | if (tsSrc->Guid != tsId) { |
440 | return createFileError(F: tsPath, |
441 | E: make_error<pdb::PDBError>( |
442 | Args: pdb::pdb_error_code::signature_out_of_date)); |
443 | } |
444 | } |
445 | return tsSrc; |
446 | } |
447 | |
448 | Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { |
449 | Expected<TypeServerSource *> tsSrc = getTypeServerSource(); |
450 | if (!tsSrc) |
451 | return tsSrc.takeError(); |
452 | |
453 | pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); |
454 | auto expectedInfo = pdbSession.getPDBInfoStream(); |
455 | if (!expectedInfo) |
456 | return expectedInfo.takeError(); |
457 | |
458 | // Reuse the type index map of the type server. |
459 | tpiMap = (*tsSrc)->tpiMap; |
460 | ipiMap = (*tsSrc)->ipiMap; |
461 | return Error::success(); |
462 | } |
463 | |
464 | static bool equalsPath(StringRef path1, StringRef path2) { |
465 | #if defined(_WIN32) |
466 | return path1.equals_insensitive(path2); |
467 | #else |
468 | return path1.equals(RHS: path2); |
469 | #endif |
470 | } |
471 | |
472 | // Find by name an OBJ provided on the command line |
473 | PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) { |
474 | SmallString<128> currentPath; |
475 | for (auto kv : ctx.precompSourceMappings) { |
476 | StringRef currentFileName = sys::path::filename(path: kv.second->file->getName(), |
477 | style: sys::path::Style::windows); |
478 | |
479 | // Compare based solely on the file name (link.exe behavior) |
480 | if (equalsPath(path1: currentFileName, path2: fileNameOnly)) |
481 | return (PrecompSource *)kv.second; |
482 | } |
483 | return nullptr; |
484 | } |
485 | |
486 | PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file, |
487 | PrecompRecord &pr) { |
488 | // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP |
489 | // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, |
490 | // the paths embedded in the OBJs are in the Windows format. |
491 | SmallString<128> prFileName = |
492 | sys::path::filename(path: pr.getPrecompFilePath(), style: sys::path::Style::windows); |
493 | |
494 | auto it = ctx.precompSourceMappings.find(x: pr.getSignature()); |
495 | if (it != ctx.precompSourceMappings.end()) { |
496 | return (PrecompSource *)it->second; |
497 | } |
498 | // Lookup by name |
499 | return findObjByName(fileNameOnly: prFileName); |
500 | } |
501 | |
502 | Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file, |
503 | PrecompRecord &pr) { |
504 | PrecompSource *precomp = findPrecompSource(file, pr); |
505 | |
506 | if (!precomp) |
507 | return createFileError( |
508 | F: pr.getPrecompFilePath(), |
509 | E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch)); |
510 | |
511 | // Don't rely on the PCH signature to validate the concordance between the PCH |
512 | // and the OBJ that uses it. However we do validate here that the |
513 | // LF_ENDPRECOMP record index lines up with the number of type records |
514 | // LF_PRECOMP is expecting. |
515 | if (precomp->endPrecompIdx != pr.getTypesCount()) |
516 | return createFileError( |
517 | F: toString(file), |
518 | E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch)); |
519 | |
520 | return precomp; |
521 | } |
522 | |
523 | /// Merges a precompiled headers TPI map into the current TPI map. The |
524 | /// precompiled headers object will also be loaded and remapped in the |
525 | /// process. |
526 | Error UsePrecompSource::() { |
527 | auto e = findPrecompMap(file, pr&: precompDependency); |
528 | if (!e) |
529 | return e.takeError(); |
530 | |
531 | PrecompSource *precompSrc = *e; |
532 | if (precompSrc->tpiMap.empty()) |
533 | return Error::success(); |
534 | |
535 | assert(precompDependency.getStartTypeIndex() == |
536 | TypeIndex::FirstNonSimpleIndex); |
537 | assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); |
538 | // Use the previously remapped index map from the precompiled headers. |
539 | indexMapStorage.insert(I: indexMapStorage.begin(), From: precompSrc->tpiMap.begin(), |
540 | To: precompSrc->tpiMap.begin() + |
541 | precompDependency.getTypesCount()); |
542 | |
543 | return Error::success(); |
544 | } |
545 | |
546 | Error UsePrecompSource::mergeDebugT(TypeMerger *m) { |
547 | // This object was compiled with /Yu, so process the corresponding |
548 | // precompiled headers object (/Yc) first. Some type indices in the current |
549 | // object are referencing data in the precompiled headers object, so we need |
550 | // both to be loaded. |
551 | if (Error e = mergeInPrecompHeaderObj()) |
552 | return e; |
553 | |
554 | return TpiSource::mergeDebugT(m); |
555 | } |
556 | |
557 | Error PrecompSource::mergeDebugT(TypeMerger *m) { |
558 | // In some cases, the S_OBJNAME record doesn't contain the PCH signature. |
559 | // The signature comes later with the LF_ENDPRECOMP record, so we first need |
560 | // to merge in all the .PCH.OBJ file type records, before registering below. |
561 | if (Error e = TpiSource::mergeDebugT(m)) |
562 | return e; |
563 | |
564 | registerMapping(); |
565 | |
566 | return Error::success(); |
567 | } |
568 | |
569 | void PrecompSource::registerMapping() { |
570 | if (registered) |
571 | return; |
572 | if (file->pchSignature && *file->pchSignature) { |
573 | auto it = ctx.precompSourceMappings.emplace(args&: *file->pchSignature, args: this); |
574 | if (!it.second) |
575 | fatal(msg: "a PCH object with the same signature has already been provided (" + |
576 | toString(file: it.first->second->file) + " and " + toString(file) + ")" ); |
577 | registered = true; |
578 | } |
579 | } |
580 | |
581 | //===----------------------------------------------------------------------===// |
582 | // Parellel GHash type merging implementation. |
583 | //===----------------------------------------------------------------------===// |
584 | |
585 | void TpiSource::loadGHashes() { |
586 | if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) { |
587 | ghashes = getHashesFromDebugH(debugH: *debugH); |
588 | ownedGHashes = false; |
589 | } else { |
590 | CVTypeArray types; |
591 | BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); |
592 | cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength())); |
593 | assignGHashesFromVector(hashVec: GloballyHashedType::hashTypes(Records&: types)); |
594 | } |
595 | |
596 | fillIsItemIndexFromDebugT(); |
597 | } |
598 | |
599 | // Copies ghashes from a vector into an array. These are long lived, so it's |
600 | // worth the time to copy these into an appropriately sized vector to reduce |
601 | // memory usage. |
602 | void TpiSource::assignGHashesFromVector( |
603 | std::vector<GloballyHashedType> &&hashVec) { |
604 | if (hashVec.empty()) |
605 | return; |
606 | GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; |
607 | memcpy(dest: hashes, src: hashVec.data(), n: hashVec.size() * sizeof(GloballyHashedType)); |
608 | ghashes = ArrayRef(hashes, hashVec.size()); |
609 | ownedGHashes = true; |
610 | } |
611 | |
612 | // Faster way to iterate type records. forEachTypeChecked is faster than |
613 | // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. |
614 | static void forEachTypeChecked(ArrayRef<uint8_t> types, |
615 | function_ref<void(const CVType &)> fn) { |
616 | checkError( |
617 | e: forEachCodeViewRecord<CVType>(StreamBuffer: types, F: [fn](const CVType &ty) -> Error { |
618 | fn(ty); |
619 | return Error::success(); |
620 | })); |
621 | } |
622 | |
623 | // Walk over file->debugTypes and fill in the isItemIndex bit vector. |
624 | // TODO: Store this information in .debug$H so that we don't have to recompute |
625 | // it. This is the main bottleneck slowing down parallel ghashing with one |
626 | // thread over single-threaded ghashing. |
627 | void TpiSource::fillIsItemIndexFromDebugT() { |
628 | uint32_t index = 0; |
629 | isItemIndex.resize(N: ghashes.size()); |
630 | forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) { |
631 | if (isIdRecord(K: ty.kind())) |
632 | isItemIndex.set(index); |
633 | ++index; |
634 | }); |
635 | } |
636 | |
637 | void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { |
638 | // Decide if the merged type goes into TPI or IPI. |
639 | bool isItem = isIdRecord(K: ty.kind()); |
640 | MergedInfo &merged = isItem ? mergedIpi : mergedTpi; |
641 | |
642 | // Copy the type into our mutable buffer. |
643 | assert(ty.length() <= codeview::MaxRecordLength); |
644 | size_t offset = merged.recs.size(); |
645 | size_t newSize = alignTo(Value: ty.length(), Align: 4); |
646 | merged.recs.resize(new_size: offset + newSize); |
647 | auto newRec = MutableArrayRef(&merged.recs[offset], newSize); |
648 | memcpy(dest: newRec.data(), src: ty.data().data(), n: newSize); |
649 | |
650 | // Fix up the record prefix and padding bytes if it required resizing. |
651 | if (newSize != ty.length()) { |
652 | reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2; |
653 | for (size_t i = ty.length(); i < newSize; ++i) |
654 | newRec[i] = LF_PAD0 + (newSize - i); |
655 | } |
656 | |
657 | // Remap the type indices in the new record. |
658 | remapTypesInTypeRecord(rec: newRec); |
659 | uint32_t pdbHash = check(e: pdb::hashTypeRecord(Type: CVType(newRec))); |
660 | merged.recSizes.push_back(x: static_cast<uint16_t>(newSize)); |
661 | merged.recHashes.push_back(x: pdbHash); |
662 | |
663 | // Retain a mapping from PDB function id to PDB function type. This mapping is |
664 | // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32 |
665 | // symbols. |
666 | if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { |
667 | bool success = ty.length() >= 12; |
668 | TypeIndex funcId = curIndex; |
669 | if (success) |
670 | success &= remapTypeIndex(ti&: funcId, refKind: TiRefKind::IndexRef); |
671 | TypeIndex funcType = |
672 | *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]); |
673 | if (success) { |
674 | funcIdToType.push_back(x: {funcId, funcType}); |
675 | } else { |
676 | StringRef fname = file ? file->getName() : "<unknown PDB>" ; |
677 | warn(msg: "corrupt LF_[M]FUNC_ID record 0x" + utohexstr(X: curIndex.getIndex()) + |
678 | " in " + fname); |
679 | } |
680 | } |
681 | } |
682 | |
683 | void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords, |
684 | TypeIndex beginIndex) { |
685 | // Re-sort the list of unique types by index. |
686 | if (kind == PDB) |
687 | assert(llvm::is_sorted(uniqueTypes)); |
688 | else |
689 | llvm::sort(C&: uniqueTypes); |
690 | |
691 | // Accumulate all the unique types into one buffer in mergedTypes. |
692 | uint32_t ghashIndex = 0; |
693 | auto nextUniqueIndex = uniqueTypes.begin(); |
694 | assert(mergedTpi.recs.empty()); |
695 | assert(mergedIpi.recs.empty()); |
696 | |
697 | // Pre-compute the number of elements in advance to avoid std::vector resizes. |
698 | unsigned nbTpiRecs = 0; |
699 | unsigned nbIpiRecs = 0; |
700 | forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) { |
701 | if (nextUniqueIndex != uniqueTypes.end() && |
702 | *nextUniqueIndex == ghashIndex) { |
703 | assert(ty.length() <= codeview::MaxRecordLength); |
704 | size_t newSize = alignTo(Value: ty.length(), Align: 4); |
705 | (isIdRecord(K: ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize; |
706 | ++nextUniqueIndex; |
707 | } |
708 | ++ghashIndex; |
709 | }); |
710 | mergedTpi.recs.reserve(n: nbTpiRecs); |
711 | mergedIpi.recs.reserve(n: nbIpiRecs); |
712 | |
713 | // Do the actual type merge. |
714 | ghashIndex = 0; |
715 | nextUniqueIndex = uniqueTypes.begin(); |
716 | forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) { |
717 | if (nextUniqueIndex != uniqueTypes.end() && |
718 | *nextUniqueIndex == ghashIndex) { |
719 | mergeTypeRecord(curIndex: beginIndex + ghashIndex, ty); |
720 | ++nextUniqueIndex; |
721 | } |
722 | ++ghashIndex; |
723 | }); |
724 | assert(nextUniqueIndex == uniqueTypes.end() && |
725 | "failed to merge all desired records" ); |
726 | assert(uniqueTypes.size() == |
727 | mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && |
728 | "missing desired record" ); |
729 | } |
730 | |
731 | void TpiSource::remapTpiWithGHashes(GHashState *g) { |
732 | assert(ctx.config.debugGHashes && "ghashes must be enabled" ); |
733 | fillMapFromGHashes(m: g); |
734 | tpiMap = indexMapStorage; |
735 | ipiMap = indexMapStorage; |
736 | mergeUniqueTypeRecords(typeRecords: file->debugTypes); |
737 | // TODO: Free all unneeded ghash resources now that we have a full index map. |
738 | |
739 | if (ctx.config.showSummary) { |
740 | nbTypeRecords = ghashes.size(); |
741 | nbTypeRecordsBytes = file->debugTypes.size(); |
742 | } |
743 | } |
744 | |
745 | // PDBs do not actually store global hashes, so when merging a type server |
746 | // PDB we have to synthesize global hashes. To do this, we first synthesize |
747 | // global hashes for the TPI stream, since it is independent, then we |
748 | // synthesize hashes for the IPI stream, using the hashes for the TPI stream |
749 | // as inputs. |
750 | void TypeServerSource::loadGHashes() { |
751 | // Don't hash twice. |
752 | if (!ghashes.empty()) |
753 | return; |
754 | pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); |
755 | |
756 | // Hash TPI stream. |
757 | Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); |
758 | if (auto e = expectedTpi.takeError()) |
759 | fatal(msg: "Type server does not have TPI stream: " + toString(E: std::move(e))); |
760 | assignGHashesFromVector( |
761 | hashVec: GloballyHashedType::hashTypes(Records: expectedTpi->typeArray())); |
762 | isItemIndex.resize(N: ghashes.size()); |
763 | |
764 | // Hash IPI stream, which depends on TPI ghashes. |
765 | if (!pdbFile.hasPDBIpiStream()) |
766 | return; |
767 | Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); |
768 | if (auto e = expectedIpi.takeError()) |
769 | fatal(msg: "error retrieving IPI stream: " + toString(E: std::move(e))); |
770 | ipiSrc->assignGHashesFromVector( |
771 | hashVec: GloballyHashedType::hashIds(Records: expectedIpi->typeArray(), TypeHashes: ghashes)); |
772 | |
773 | // The IPI stream isItemIndex bitvector should be all ones. |
774 | ipiSrc->isItemIndex.resize(N: ipiSrc->ghashes.size()); |
775 | ipiSrc->isItemIndex.set(I: 0, E: ipiSrc->ghashes.size()); |
776 | } |
777 | |
778 | // Flatten discontiguous PDB type arrays to bytes so that we can use |
779 | // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from |
780 | // type servers is faster than iterating all object files compiled with /Z7 with |
781 | // CVTypeArray, which has high overheads due to the virtual interface of |
782 | // BinaryStream::readBytes. |
783 | static ArrayRef<uint8_t> (const CVTypeArray &types) { |
784 | BinaryStreamRef stream = types.getUnderlyingStream(); |
785 | ArrayRef<uint8_t> debugTypes; |
786 | checkError(e: stream.readBytes(Offset: 0, Size: stream.getLength(), Buffer&: debugTypes)); |
787 | return debugTypes; |
788 | } |
789 | |
790 | // Merge types from a type server PDB. |
791 | void TypeServerSource::remapTpiWithGHashes(GHashState *g) { |
792 | assert(ctx.config.debugGHashes && "ghashes must be enabled" ); |
793 | |
794 | // IPI merging depends on TPI, so do TPI first, then do IPI. No need to |
795 | // propagate errors, those should've been handled during ghash loading. |
796 | pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); |
797 | pdb::TpiStream &tpi = check(e: pdbFile.getPDBTpiStream()); |
798 | fillMapFromGHashes(m: g); |
799 | tpiMap = indexMapStorage; |
800 | mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: tpi.typeArray())); |
801 | if (pdbFile.hasPDBIpiStream()) { |
802 | pdb::TpiStream &ipi = check(e: pdbFile.getPDBIpiStream()); |
803 | ipiSrc->indexMapStorage.resize(N: ipiSrc->ghashes.size()); |
804 | ipiSrc->fillMapFromGHashes(m: g); |
805 | ipiMap = ipiSrc->indexMapStorage; |
806 | ipiSrc->tpiMap = tpiMap; |
807 | ipiSrc->ipiMap = ipiMap; |
808 | ipiSrc->mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: ipi.typeArray())); |
809 | |
810 | if (ctx.config.showSummary) { |
811 | nbTypeRecords = ipiSrc->ghashes.size(); |
812 | nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength(); |
813 | } |
814 | } |
815 | |
816 | if (ctx.config.showSummary) { |
817 | nbTypeRecords += ghashes.size(); |
818 | nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength(); |
819 | } |
820 | } |
821 | |
822 | void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { |
823 | // No remapping to do with /Zi objects. Simply use the index map from the type |
824 | // server. Errors should have been reported earlier. Symbols from this object |
825 | // will be ignored. |
826 | Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource(); |
827 | if (!maybeTsSrc) { |
828 | typeMergingError = |
829 | joinErrors(E1: std::move(typeMergingError), E2: maybeTsSrc.takeError()); |
830 | return; |
831 | } |
832 | TypeServerSource *tsSrc = *maybeTsSrc; |
833 | tpiMap = tsSrc->tpiMap; |
834 | ipiMap = tsSrc->ipiMap; |
835 | } |
836 | |
837 | void PrecompSource::loadGHashes() { |
838 | if (getDebugH(file)) { |
839 | warn(msg: "ignoring .debug$H section; pch with ghash is not implemented" ); |
840 | } |
841 | |
842 | uint32_t ghashIdx = 0; |
843 | std::vector<GloballyHashedType> hashVec; |
844 | forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) { |
845 | // Remember the index of the LF_ENDPRECOMP record so it can be excluded from |
846 | // the PDB. There must be an entry in the list of ghashes so that the type |
847 | // indexes of the following records in the /Yc PCH object line up. |
848 | if (ty.kind() == LF_ENDPRECOMP) { |
849 | EndPrecompRecord endPrecomp; |
850 | cantFail(Err: TypeDeserializer::deserializeAs<EndPrecompRecord>( |
851 | CVT&: const_cast<CVType &>(ty), Record&: endPrecomp)); |
852 | file->pchSignature = endPrecomp.getSignature(); |
853 | registerMapping(); |
854 | endPrecompIdx = ghashIdx; |
855 | } |
856 | |
857 | hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec)); |
858 | isItemIndex.push_back(Val: isIdRecord(K: ty.kind())); |
859 | ++ghashIdx; |
860 | }); |
861 | assignGHashesFromVector(hashVec: std::move(hashVec)); |
862 | } |
863 | |
864 | void UsePrecompSource::loadGHashes() { |
865 | auto e = findPrecompMap(file, pr&: precompDependency); |
866 | if (!e) { |
867 | warn(msg: toString(E: e.takeError())); |
868 | return; |
869 | } |
870 | |
871 | PrecompSource *pchSrc = *e; |
872 | |
873 | // To compute ghashes of a /Yu object file, we need to build on the ghashes of |
874 | // the /Yc PCH object. After we are done hashing, discard the ghashes from the |
875 | // PCH source so we don't unnecessarily try to deduplicate them. |
876 | std::vector<GloballyHashedType> hashVec = |
877 | pchSrc->ghashes.take_front(N: precompDependency.getTypesCount()); |
878 | forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) { |
879 | hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec)); |
880 | isItemIndex.push_back(Val: isIdRecord(K: ty.kind())); |
881 | }); |
882 | hashVec.erase(first: hashVec.begin(), |
883 | last: hashVec.begin() + precompDependency.getTypesCount()); |
884 | assignGHashesFromVector(hashVec: std::move(hashVec)); |
885 | } |
886 | |
887 | void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { |
888 | fillMapFromGHashes(m: g); |
889 | // This object was compiled with /Yu, so process the corresponding |
890 | // precompiled headers object (/Yc) first. Some type indices in the current |
891 | // object are referencing data in the precompiled headers object, so we need |
892 | // both to be loaded. |
893 | if (Error e = mergeInPrecompHeaderObj()) { |
894 | typeMergingError = joinErrors(E1: std::move(typeMergingError), E2: std::move(e)); |
895 | return; |
896 | } |
897 | |
898 | tpiMap = indexMapStorage; |
899 | ipiMap = indexMapStorage; |
900 | mergeUniqueTypeRecords(typeRecords: file->debugTypes, |
901 | beginIndex: TypeIndex(precompDependency.getStartTypeIndex() + |
902 | precompDependency.getTypesCount())); |
903 | if (ctx.config.showSummary) { |
904 | nbTypeRecords = ghashes.size(); |
905 | nbTypeRecordsBytes = file->debugTypes.size(); |
906 | } |
907 | } |
908 | |
909 | namespace { |
910 | /// A concurrent hash table for global type hashing. It is based on this paper: |
911 | /// Concurrent Hash Tables: Fast and General(?)! |
912 | /// https://dl.acm.org/doi/10.1145/3309206 |
913 | /// |
914 | /// This hash table is meant to be used in two phases: |
915 | /// 1. concurrent insertions |
916 | /// 2. concurrent reads |
917 | /// It does not support lookup, deletion, or rehashing. It uses linear probing. |
918 | /// |
919 | /// The paper describes storing a key-value pair in two machine words. |
920 | /// Generally, the values stored in this map are type indices, and we can use |
921 | /// those values to recover the ghash key from a side table. This allows us to |
922 | /// shrink the table entries further at the cost of some loads, and sidesteps |
923 | /// the need for a 128 bit atomic compare-and-swap operation. |
924 | /// |
925 | /// During insertion, a priority function is used to decide which insertion |
926 | /// should be preferred. This ensures that the output is deterministic. For |
927 | /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. |
928 | /// |
929 | class GHashCell; |
930 | struct GHashTable { |
931 | GHashCell *table = nullptr; |
932 | uint32_t tableSize = 0; |
933 | |
934 | GHashTable() = default; |
935 | ~GHashTable(); |
936 | |
937 | /// Initialize the table with the given size. Because the table cannot be |
938 | /// resized, the initial size of the table must be large enough to contain all |
939 | /// inputs, or insertion may not be able to find an empty cell. |
940 | void init(uint32_t newTableSize); |
941 | |
942 | /// Insert the cell with the given ghash into the table. Return the insertion |
943 | /// position in the table. It is safe for the caller to store the insertion |
944 | /// position because the table cannot be resized. |
945 | uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash, |
946 | GHashCell newCell); |
947 | }; |
948 | |
949 | /// A ghash table cell for deduplicating types from TpiSources. |
950 | class GHashCell { |
951 | // Force "data" to be 64-bit aligned; otherwise, some versions of clang |
952 | // will generate calls to libatomic when using some versions of libstdc++ |
953 | // on 32-bit targets. (Also, in theory, there could be a target where |
954 | // new[] doesn't always return an 8-byte-aligned allocation.) |
955 | alignas(sizeof(uint64_t)) uint64_t data = 0; |
956 | |
957 | public: |
958 | GHashCell() = default; |
959 | |
960 | // Construct data most to least significant so that sorting works well: |
961 | // - isItem |
962 | // - tpiSrcIdx |
963 | // - ghashIdx |
964 | // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a |
965 | // non-zero representation. |
966 | GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) |
967 | : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | |
968 | ghashIdx) { |
969 | assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure" ); |
970 | assert(ghashIdx == getGHashIdx() && "round trip failure" ); |
971 | } |
972 | |
973 | explicit GHashCell(uint64_t data) : data(data) {} |
974 | |
975 | // The empty cell is all zeros. |
976 | bool isEmpty() const { return data == 0ULL; } |
977 | |
978 | /// Extract the tpiSrcIdx. |
979 | uint32_t getTpiSrcIdx() const { |
980 | return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; |
981 | } |
982 | |
983 | /// Extract the index into the ghash array of the TpiSource. |
984 | uint32_t getGHashIdx() const { return (uint32_t)data; } |
985 | |
986 | bool isItem() const { return data & (1ULL << 63U); } |
987 | |
988 | /// Get the ghash key for this cell. |
989 | GloballyHashedType getGHash(const COFFLinkerContext &ctx) const { |
990 | return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()]; |
991 | } |
992 | |
993 | /// The priority function for the cell. The data is stored such that lower |
994 | /// tpiSrcIdx and ghashIdx values are preferred, which means that type record |
995 | /// from earlier sources are more likely to prevail. |
996 | friend inline bool operator<(const GHashCell &l, const GHashCell &r) { |
997 | return l.data < r.data; |
998 | } |
999 | }; |
1000 | } // namespace |
1001 | |
1002 | namespace lld::coff { |
1003 | /// This type is just a wrapper around GHashTable with external linkage so it |
1004 | /// can be used from a header. |
1005 | struct GHashState { |
1006 | GHashTable table; |
1007 | }; |
1008 | } // namespace lld::coff |
1009 | |
1010 | GHashTable::~GHashTable() { delete[] table; } |
1011 | |
1012 | void GHashTable::init(uint32_t newTableSize) { |
1013 | table = new GHashCell[newTableSize]; |
1014 | memset(s: table, c: 0, n: newTableSize * sizeof(GHashCell)); |
1015 | tableSize = newTableSize; |
1016 | } |
1017 | |
1018 | uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash, |
1019 | GHashCell newCell) { |
1020 | assert(!newCell.isEmpty() && "cannot insert empty cell value" ); |
1021 | |
1022 | // FIXME: The low bytes of SHA1 have low entropy for short records, which |
1023 | // type records are. Swap the byte order for better entropy. A better ghash |
1024 | // won't need this. |
1025 | uint32_t startIdx = |
1026 | llvm::byteswap<uint64_t>(V: *reinterpret_cast<uint64_t *>(&ghash)) % |
1027 | tableSize; |
1028 | |
1029 | // Do a linear probe starting at startIdx. |
1030 | uint32_t idx = startIdx; |
1031 | while (true) { |
1032 | // Run a compare and swap loop. There are four cases: |
1033 | // - cell is empty: CAS into place and return |
1034 | // - cell has matching key, earlier priority: do nothing, return |
1035 | // - cell has matching key, later priority: CAS into place and return |
1036 | // - cell has non-matching key: hash collision, probe next cell |
1037 | auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]); |
1038 | GHashCell oldCell(cellPtr->load()); |
1039 | while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) { |
1040 | // Check if there is an existing ghash entry with a higher priority |
1041 | // (earlier ordering). If so, this is a duplicate, we are done. |
1042 | if (!oldCell.isEmpty() && oldCell < newCell) |
1043 | return idx; |
1044 | // Either the cell is empty, or our value is higher priority. Try to |
1045 | // compare and swap. If it succeeds, we are done. |
1046 | if (cellPtr->compare_exchange_weak(e&: oldCell, i: newCell)) |
1047 | return idx; |
1048 | // If the CAS failed, check this cell again. |
1049 | } |
1050 | |
1051 | // Advance the probe. Wrap around to the beginning if we run off the end. |
1052 | ++idx; |
1053 | idx = idx == tableSize ? 0 : idx; |
1054 | if (idx == startIdx) { |
1055 | // If this becomes an issue, we could mark failure and rehash from the |
1056 | // beginning with a bigger table. There is no difference between rehashing |
1057 | // internally and starting over. |
1058 | report_fatal_error(reason: "ghash table is full" ); |
1059 | } |
1060 | } |
1061 | llvm_unreachable("left infloop" ); |
1062 | } |
1063 | |
1064 | TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc) |
1065 | : typeTable(alloc), idTable(alloc), ctx(c) {} |
1066 | |
1067 | TypeMerger::~TypeMerger() = default; |
1068 | |
1069 | void TypeMerger::mergeTypesWithGHash() { |
1070 | // Load ghashes. Do type servers and PCH objects first. |
1071 | { |
1072 | llvm::TimeTraceScope timeScope("Load GHASHes" ); |
1073 | ScopedTimer t1(ctx.loadGHashTimer); |
1074 | parallelForEach(R&: dependencySources, |
1075 | Fn: [&](TpiSource *source) { source->loadGHashes(); }); |
1076 | parallelForEach(R&: objectSources, |
1077 | Fn: [&](TpiSource *source) { source->loadGHashes(); }); |
1078 | } |
1079 | |
1080 | llvm::TimeTraceScope timeScope("Merge types (GHASH)" ); |
1081 | ScopedTimer t2(ctx.mergeGHashTimer); |
1082 | GHashState ghashState; |
1083 | |
1084 | // Estimate the size of hash table needed to deduplicate ghashes. This *must* |
1085 | // be larger than the number of unique types, or hash table insertion may not |
1086 | // be able to find a vacant slot. Summing the input types guarantees this, but |
1087 | // it is a gross overestimate. The table size could be reduced to save memory, |
1088 | // but it would require implementing rehashing, and this table is generally |
1089 | // small compared to total memory usage, at eight bytes per input type record, |
1090 | // and most input type records are larger than eight bytes. |
1091 | size_t tableSize = 0; |
1092 | for (TpiSource *source : ctx.tpiSourceList) |
1093 | tableSize += source->ghashes.size(); |
1094 | |
1095 | // Cap the table size so that we can use 32-bit cell indices. Type indices are |
1096 | // also 32-bit, so this is an inherent PDB file format limit anyway. |
1097 | tableSize = |
1098 | std::min(a: size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, b: tableSize); |
1099 | ghashState.table.init(newTableSize: static_cast<uint32_t>(tableSize)); |
1100 | |
1101 | // Insert ghashes in parallel. During concurrent insertion, we cannot observe |
1102 | // the contents of the hash table cell, but we can remember the insertion |
1103 | // position. Because the table does not rehash, the position will not change |
1104 | // under insertion. After insertion is done, the value of the cell can be read |
1105 | // to retrieve the final PDB type index. |
1106 | parallelFor(Begin: 0, End: ctx.tpiSourceList.size(), Fn: [&](size_t tpiSrcIdx) { |
1107 | TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; |
1108 | source->indexMapStorage.resize(N: source->ghashes.size()); |
1109 | for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { |
1110 | if (source->shouldOmitFromPdb(ghashIdx: i)) { |
1111 | source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); |
1112 | continue; |
1113 | } |
1114 | GloballyHashedType ghash = source->ghashes[i]; |
1115 | bool isItem = source->isItemIndex.test(Idx: i); |
1116 | uint32_t cellIdx = |
1117 | ghashState.table.insert(ctx, ghash, newCell: GHashCell(isItem, tpiSrcIdx, i)); |
1118 | |
1119 | // Store the ghash cell index as a type index in indexMapStorage. Later |
1120 | // we will replace it with the PDB type index. |
1121 | source->indexMapStorage[i] = TypeIndex::fromArrayIndex(Index: cellIdx); |
1122 | } |
1123 | }); |
1124 | |
1125 | // Collect all non-empty cells and sort them. This will implicitly assign |
1126 | // destination type indices, and partition the entries into type records and |
1127 | // item records. It arranges types in this order: |
1128 | // - type records |
1129 | // - source 0, type 0... |
1130 | // - source 1, type 1... |
1131 | // - item records |
1132 | // - source 0, type 1... |
1133 | // - source 1, type 0... |
1134 | std::vector<GHashCell> entries; |
1135 | for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) { |
1136 | if (!cell.isEmpty()) |
1137 | entries.push_back(x: cell); |
1138 | } |
1139 | parallelSort(R&: entries, Comp: std::less<GHashCell>()); |
1140 | log(msg: formatv(Fmt: "ghash table load factor: {0:p} (size {1} / capacity {2})\n" , |
1141 | Vals: tableSize ? double(entries.size()) / tableSize : 0, |
1142 | Vals: entries.size(), Vals&: tableSize)); |
1143 | |
1144 | // Find out how many type and item indices there are. |
1145 | auto mid = llvm::lower_bound(Range&: entries, Value: GHashCell(true, 0, 0)); |
1146 | assert((mid == entries.end() || mid->isItem()) && |
1147 | (mid == entries.begin() || !std::prev(mid)->isItem()) && |
1148 | "midpoint is not midpoint" ); |
1149 | uint32_t numTypes = std::distance(first: entries.begin(), last: mid); |
1150 | uint32_t numItems = std::distance(first: mid, last: entries.end()); |
1151 | log(msg: "Tpi record count: " + Twine(numTypes)); |
1152 | log(msg: "Ipi record count: " + Twine(numItems)); |
1153 | |
1154 | // Make a list of the "unique" type records to merge for each tpi source. Type |
1155 | // merging will skip indices not on this list. Store the destination PDB type |
1156 | // index for these unique types in the tpiMap for each source. The entries for |
1157 | // non-unique types will be filled in prior to type merging. |
1158 | for (uint32_t i = 0, e = entries.size(); i < e; ++i) { |
1159 | auto &cell = entries[i]; |
1160 | uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); |
1161 | TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; |
1162 | source->uniqueTypes.push_back(x: cell.getGHashIdx()); |
1163 | |
1164 | // Update the ghash table to store the destination PDB type index in the |
1165 | // table. |
1166 | uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; |
1167 | uint32_t ghashCellIndex = |
1168 | source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); |
1169 | ghashState.table.table[ghashCellIndex] = |
1170 | GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); |
1171 | } |
1172 | |
1173 | // In parallel, remap all types. |
1174 | for (TpiSource *source : dependencySources) |
1175 | source->remapTpiWithGHashes(g: &ghashState); |
1176 | parallelForEach(R&: objectSources, Fn: [&](TpiSource *source) { |
1177 | source->remapTpiWithGHashes(g: &ghashState); |
1178 | }); |
1179 | |
1180 | // Build a global map of from function ID to function type. |
1181 | for (TpiSource *source : ctx.tpiSourceList) { |
1182 | for (auto idToType : source->funcIdToType) |
1183 | funcIdToType.insert(KV: idToType); |
1184 | source->funcIdToType.clear(); |
1185 | } |
1186 | |
1187 | clearGHashes(); |
1188 | } |
1189 | |
1190 | void TypeMerger::sortDependencies() { |
1191 | // Order dependencies first, but preserve the existing order. |
1192 | std::vector<TpiSource *> deps; |
1193 | std::vector<TpiSource *> objs; |
1194 | for (TpiSource *s : ctx.tpiSourceList) |
1195 | (s->isDependency() ? deps : objs).push_back(x: s); |
1196 | uint32_t numDeps = deps.size(); |
1197 | uint32_t numObjs = objs.size(); |
1198 | ctx.tpiSourceList = std::move(deps); |
1199 | ctx.tpiSourceList.insert(position: ctx.tpiSourceList.end(), first: objs.begin(), last: objs.end()); |
1200 | for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i) |
1201 | ctx.tpiSourceList[i]->tpiSrcIdx = i; |
1202 | dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps); |
1203 | objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs); |
1204 | } |
1205 | |
1206 | /// Given the index into the ghash table for a particular type, return the type |
1207 | /// index for that type in the output PDB. |
1208 | static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, |
1209 | uint32_t ghashCellIdx) { |
1210 | GHashCell cell = g->table.table[ghashCellIdx]; |
1211 | return TypeIndex::fromArrayIndex(Index: cell.getGHashIdx()); |
1212 | } |
1213 | |
1214 | /// Free heap allocated ghashes. |
1215 | void TypeMerger::clearGHashes() { |
1216 | for (TpiSource *src : ctx.tpiSourceList) { |
1217 | if (src->ownedGHashes) |
1218 | delete[] src->ghashes.data(); |
1219 | src->ghashes = {}; |
1220 | src->isItemIndex.clear(); |
1221 | src->uniqueTypes.clear(); |
1222 | } |
1223 | } |
1224 | |
1225 | // Fill in a TPI or IPI index map using ghashes. For each source type, use its |
1226 | // ghash to lookup its final type index in the PDB, and store that in the map. |
1227 | void TpiSource::fillMapFromGHashes(GHashState *g) { |
1228 | for (size_t i = 0, e = ghashes.size(); i < e; ++i) { |
1229 | TypeIndex fakeCellIndex = indexMapStorage[i]; |
1230 | if (fakeCellIndex.isSimple()) |
1231 | indexMapStorage[i] = fakeCellIndex; |
1232 | else |
1233 | indexMapStorage[i] = |
1234 | loadPdbTypeIndexFromCell(g, ghashCellIdx: fakeCellIndex.toArrayIndex()); |
1235 | } |
1236 | } |
1237 | |