1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "DebugTypes.h"
14#include "Driver.h"
15#include "SymbolTable.h"
16#include "Symbols.h"
17#include "lld/Common/DWARF.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/BinaryFormat/COFF.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/Object/Binary.h"
30#include "llvm/Object/COFF.h"
31#include "llvm/Object/COFFImportFile.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/Endian.h"
34#include "llvm/Support/Error.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Path.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cstring>
39#include <optional>
40#include <utility>
41
42using namespace llvm;
43using namespace llvm::COFF;
44using namespace llvm::codeview;
45using namespace llvm::object;
46using namespace llvm::support::endian;
47using namespace lld;
48using namespace lld::coff;
49
50using llvm::Triple;
51using llvm::support::ulittle32_t;
52
53// Returns the last element of a path, which is supposed to be a filename.
54static StringRef getBasename(StringRef path) {
55 return sys::path::filename(path, style: sys::path::Style::windows);
56}
57
58// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
59std::string lld::toString(const coff::InputFile *file) {
60 if (!file)
61 return "<internal>";
62 if (file->parentName.empty())
63 return std::string(file->getName());
64
65 return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) +
66 ")")
67 .str();
68}
69
70const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
71 const InputFile *f) {
72 return s << toString(file: f);
73}
74
75/// Checks that Source is compatible with being a weak alias to Target.
76/// If Source is Undefined and has no weak alias set, makes it a weak
77/// alias to Target.
78static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
79 Symbol *source, Symbol *target,
80 bool isAntiDep) {
81 if (auto *u = dyn_cast<Undefined>(Val: source)) {
82 if (u->weakAlias && u->weakAlias != target) {
83 // Ignore duplicated anti-dependency symbols.
84 if (isAntiDep)
85 return;
86 if (!u->isAntiDep) {
87 // Weak aliases as produced by GCC are named in the form
88 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
89 // of another symbol emitted near the weak symbol.
90 // Just use the definition from the first object file that defined
91 // this weak symbol.
92 if (symtab.ctx.config.allowDuplicateWeak)
93 return;
94 symtab.reportDuplicate(existing: source, newFile: f);
95 }
96 }
97 u->setWeakAlias(sym: target, antiDep: isAntiDep);
98 }
99}
100
101static bool ignoredSymbolName(StringRef name) {
102 return name == "@feat.00" || name == "@comp.id";
103}
104
105static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
106 if (sym.isBigObj()) {
107 auto *copy = make<coff_symbol32>(
108 args: *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
109 return reinterpret_cast<coff_symbol_generic *>(copy);
110 } else {
111 auto *copy = make<coff_symbol16>(
112 args: *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
113 return reinterpret_cast<coff_symbol_generic *>(copy);
114 }
115}
116
117ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
118 : InputFile(ctx.symtab, ArchiveKind, m) {}
119
120void ArchiveFile::parse() {
121 COFFLinkerContext &ctx = symtab.ctx;
122 SymbolTable *archiveSymtab = &symtab;
123
124 // Parse a MemoryBufferRef as an archive file.
125 file = CHECK(Archive::create(mb), this);
126
127 // Try to read symbols from ECSYMBOLS section on ARM64EC.
128 if (ctx.symtab.isEC()) {
129 iterator_range<Archive::symbol_iterator> symbols =
130 CHECK(file->ec_symbols(), this);
131 if (!symbols.empty()) {
132 for (const Archive::Symbol &sym : symbols)
133 ctx.symtab.addLazyArchive(f: this, sym);
134
135 // Read both EC and native symbols on ARM64X.
136 archiveSymtab = &*ctx.hybridSymtab;
137 } else {
138 // If the ECSYMBOLS section is missing in the archive, the archive could
139 // be either a native-only ARM64 or x86_64 archive. Check the machine type
140 // of the object containing a symbol to determine which symbol table to
141 // use.
142 Archive::symbol_iterator sym = file->symbol_begin();
143 if (sym != file->symbol_end()) {
144 MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
145 Archive::Child child =
146 CHECK(sym->getMember(),
147 file->getFileName() +
148 ": could not get the buffer for a child of the archive");
149 MemoryBufferRef mb = CHECK(
150 child.getMemoryBufferRef(),
151 file->getFileName() +
152 ": could not get the buffer for a child buffer of the archive");
153 switch (identify_magic(magic: mb.getBuffer())) {
154 case file_magic::coff_object: {
155 std::unique_ptr<COFFObjectFile> obj =
156 CHECK(COFFObjectFile::create(mb),
157 check(child.getName()) + ":" + ": not a valid COFF file");
158 machine = MachineTypes(obj->getMachine());
159 break;
160 }
161 case file_magic::coff_import_library:
162 machine = MachineTypes(COFFImportFile(mb).getMachine());
163 break;
164 case file_magic::bitcode: {
165 std::unique_ptr<lto::InputFile> obj =
166 check(e: lto::InputFile::create(Object: mb));
167 machine = BitcodeFile::getMachineType(obj: obj.get());
168 break;
169 }
170 default:
171 break;
172 }
173 archiveSymtab = &ctx.getSymtab(machine);
174 }
175 }
176 }
177
178 // Read the symbol table to construct Lazy objects.
179 for (const Archive::Symbol &sym : file->symbols())
180 archiveSymtab->addLazyArchive(f: this, sym);
181}
182
183// Returns a buffer pointing to a member file containing a given symbol.
184void ArchiveFile::addMember(const Archive::Symbol &sym) {
185 const Archive::Child &c =
186 CHECK(sym.getMember(), "could not get the member for symbol " +
187 toCOFFString(symtab.ctx, sym));
188
189 // Return an empty buffer if we have already returned the same buffer.
190 // FIXME: Remove this once we resolve all defineds before all undefineds in
191 // ObjFile::initializeSymbols().
192 if (!seen.insert(V: c.getChildOffset()).second)
193 return;
194
195 symtab.ctx.driver.enqueueArchiveMember(c, sym, parentName: getName());
196}
197
198std::vector<MemoryBufferRef>
199lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
200 std::vector<MemoryBufferRef> v;
201 Error err = Error::success();
202
203 // Thin archives refer to .o files, so --reproduces needs the .o files too.
204 bool addToTar = file->isThin() && ctx.driver.tar;
205
206 for (const Archive::Child &c : file->children(Err&: err)) {
207 MemoryBufferRef mbref =
208 CHECK(c.getMemoryBufferRef(),
209 file->getFileName() +
210 ": could not get the buffer for a child of the archive");
211 if (addToTar) {
212 ctx.driver.tar->append(Path: relativeToRoot(path: check(e: c.getFullName())),
213 Data: mbref.getBuffer());
214 }
215 v.push_back(x: mbref);
216 }
217 if (err)
218 Fatal(ctx) << file->getFileName()
219 << ": Archive::children failed: " << toString(E: std::move(err));
220 return v;
221}
222
223ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
224 : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
225 coffObj(coffObj) {}
226
227ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
228 // Parse a memory buffer as a COFF file.
229 Expected<std::unique_ptr<Binary>> bin = createBinary(Source: m);
230 if (!bin)
231 Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
232
233 auto *obj = dyn_cast<COFFObjectFile>(Val: bin->get());
234 if (!obj)
235 Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
236
237 bin->release();
238 return make<ObjFile>(args&: ctx.getSymtab(machine: MachineTypes(obj->getMachine())), args&: obj,
239 args&: lazy);
240}
241
242void ObjFile::parseLazy() {
243 // Native object file.
244 uint32_t numSymbols = coffObj->getNumberOfSymbols();
245 for (uint32_t i = 0; i < numSymbols; ++i) {
246 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
247 if (coffSym.isUndefined() || !coffSym.isExternal() ||
248 coffSym.isWeakExternal())
249 continue;
250 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
251 if (coffSym.isAbsolute() && ignoredSymbolName(name))
252 continue;
253 symtab.addLazyObject(f: this, n: name);
254 if (!lazy)
255 return;
256 i += coffSym.getNumberOfAuxSymbols();
257 }
258}
259
260struct ECMapEntry {
261 ulittle32_t src;
262 ulittle32_t dst;
263 ulittle32_t type;
264};
265
266void ObjFile::initializeECThunks() {
267 for (SectionChunk *chunk : hybmpChunks) {
268 if (chunk->getContents().size() % sizeof(ECMapEntry)) {
269 Err(ctx&: symtab.ctx) << "Invalid .hybmp chunk size "
270 << chunk->getContents().size();
271 continue;
272 }
273
274 const uint8_t *end =
275 chunk->getContents().data() + chunk->getContents().size();
276 for (const uint8_t *iter = chunk->getContents().data(); iter != end;
277 iter += sizeof(ECMapEntry)) {
278 auto entry = reinterpret_cast<const ECMapEntry *>(iter);
279 switch (entry->type) {
280 case Arm64ECThunkType::Entry:
281 symtab.addEntryThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
282 break;
283 case Arm64ECThunkType::Exit:
284 symtab.addExitThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
285 break;
286 case Arm64ECThunkType::GuestExit:
287 break;
288 default:
289 Warn(ctx&: symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
290 }
291 }
292 }
293}
294
295void ObjFile::parse() {
296 // Read section and symbol tables.
297 initializeChunks();
298 initializeSymbols();
299 initializeFlags();
300 initializeDependencies();
301 initializeECThunks();
302}
303
304const coff_section *ObjFile::getSection(uint32_t i) {
305 auto sec = coffObj->getSection(index: i);
306 if (!sec)
307 Fatal(ctx&: symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
308 return *sec;
309}
310
311// We set SectionChunk pointers in the SparseChunks vector to this value
312// temporarily to mark comdat sections as having an unknown resolution. As we
313// walk the object file's symbol table, once we visit either a leader symbol or
314// an associative section definition together with the parent comdat's leader,
315// we set the pointer to either nullptr (to mark the section as discarded) or a
316// valid SectionChunk for that section.
317static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
318
319void ObjFile::initializeChunks() {
320 uint32_t numSections = coffObj->getNumberOfSections();
321 sparseChunks.resize(new_size: numSections + 1);
322 for (uint32_t i = 1; i < numSections + 1; ++i) {
323 const coff_section *sec = getSection(i);
324 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
325 sparseChunks[i] = pendingComdat;
326 else
327 sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "");
328 }
329}
330
331SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
332 const coff_aux_section_definition *def,
333 StringRef leaderName) {
334 const coff_section *sec = getSection(i: sectionNumber);
335
336 StringRef name;
337 if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec))
338 name = *e;
339 else
340 Fatal(ctx&: symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
341 << e.takeError();
342
343 if (name == ".drectve") {
344 ArrayRef<uint8_t> data;
345 cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data));
346 directives = StringRef((const char *)data.data(), data.size());
347 return nullptr;
348 }
349
350 if (name == ".llvm_addrsig") {
351 addrsigSec = sec;
352 return nullptr;
353 }
354
355 if (name == ".llvm.call-graph-profile") {
356 callgraphSec = sec;
357 return nullptr;
358 }
359
360 // Object files may have DWARF debug info or MS CodeView debug info
361 // (or both).
362 //
363 // DWARF sections don't need any special handling from the perspective
364 // of the linker; they are just a data section containing relocations.
365 // We can just link them to complete debug info.
366 //
367 // CodeView needs linker support. We need to interpret debug info,
368 // and then write it to a separate .pdb file.
369
370 // Ignore DWARF debug info unless requested to be included.
371 if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_"))
372 return nullptr;
373
374 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
375 return nullptr;
376 SectionChunk *c;
377 if (isArm64EC(Machine: getMachineType()))
378 c = make<SectionChunkEC>(args: this, args&: sec);
379 else
380 c = make<SectionChunk>(args: this, args&: sec);
381 if (def)
382 c->checksum = def->CheckSum;
383
384 // CodeView sections are stored to a different vector because they are not
385 // linked in the regular manner.
386 if (c->isCodeView())
387 debugChunks.push_back(x: c);
388 else if (name == ".gfids$y")
389 guardFidChunks.push_back(x: c);
390 else if (name == ".giats$y")
391 guardIATChunks.push_back(x: c);
392 else if (name == ".gljmp$y")
393 guardLJmpChunks.push_back(x: c);
394 else if (name == ".gehcont$y")
395 guardEHContChunks.push_back(x: c);
396 else if (name == ".sxdata")
397 sxDataChunks.push_back(x: c);
398 else if (isArm64EC(Machine: getMachineType()) && name == ".hybmp$x")
399 hybmpChunks.push_back(x: c);
400 else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
401 name == ".rdata" && leaderName.starts_with(Prefix: "??_C@"))
402 // COFF sections that look like string literal sections (i.e. no
403 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
404 // for string literals) are subject to string tail merging.
405 MergeChunk::addSection(ctx&: symtab.ctx, c);
406 else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$"))
407 resourceChunks.push_back(x: c);
408 else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
409 chunks.push_back(x: c);
410
411 return c;
412}
413
414void ObjFile::includeResourceChunks() {
415 chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end());
416}
417
418void ObjFile::readAssociativeDefinition(
419 COFFSymbolRef sym, const coff_aux_section_definition *def) {
420 readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj()));
421}
422
423void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
424 const coff_aux_section_definition *def,
425 uint32_t parentIndex) {
426 SectionChunk *parent = sparseChunks[parentIndex];
427 int32_t sectionNumber = sym.getSectionNumber();
428
429 auto diag = [&]() {
430 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
431
432 StringRef parentName;
433 const coff_section *parentSec = getSection(i: parentIndex);
434 if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec))
435 parentName = *e;
436 Err(ctx&: symtab.ctx) << toString(file: this) << ": associative comdat " << name
437 << " (sec " << sectionNumber
438 << ") has invalid reference to section " << parentName
439 << " (sec " << parentIndex << ")";
440 };
441
442 if (parent == pendingComdat) {
443 // This can happen if an associative comdat refers to another associative
444 // comdat that appears after it (invalid per COFF spec) or to a section
445 // without any symbols.
446 diag();
447 return;
448 }
449
450 // Check whether the parent is prevailing. If it is, so are we, and we read
451 // the section; otherwise mark it as discarded.
452 if (parent) {
453 SectionChunk *c = readSection(sectionNumber, def, leaderName: "");
454 sparseChunks[sectionNumber] = c;
455 if (c) {
456 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
457 parent->addAssociative(child: c);
458 }
459 } else {
460 sparseChunks[sectionNumber] = nullptr;
461 }
462}
463
464void ObjFile::recordPrevailingSymbolForMingw(
465 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
466 // For comdat symbols in executable sections, where this is the copy
467 // of the section chunk we actually include instead of discarding it,
468 // add the symbol to a map to allow using it for implicitly
469 // associating .[px]data$<func> sections to it.
470 // Use the suffix from the .text$<func> instead of the leader symbol
471 // name, for cases where the names differ (i386 mangling/decorations,
472 // cases where the leader is a weak symbol named .weak.func.default*).
473 int32_t sectionNumber = sym.getSectionNumber();
474 SectionChunk *sc = sparseChunks[sectionNumber];
475 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
476 StringRef name = sc->getSectionName().split(Separator: '$').second;
477 prevailingSectionMap[name] = sectionNumber;
478 }
479}
480
481void ObjFile::maybeAssociateSEHForMingw(
482 COFFSymbolRef sym, const coff_aux_section_definition *def,
483 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
484 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
485 if (name.consume_front(Prefix: ".pdata$") || name.consume_front(Prefix: ".xdata$") ||
486 name.consume_front(Prefix: ".eh_frame$")) {
487 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
488 // associative to the symbol <func>.
489 auto parentSym = prevailingSectionMap.find(Val: name);
490 if (parentSym != prevailingSectionMap.end())
491 readAssociativeDefinition(sym, def, parentIndex: parentSym->second);
492 }
493}
494
495Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
496 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
497 if (sym.isExternal()) {
498 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
499 if (sc)
500 return symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc,
501 sectionOffset: sym.getValue());
502 // For MinGW symbols named .weak.* that point to a discarded section,
503 // don't create an Undefined symbol. If nothing ever refers to the symbol,
504 // everything should be fine. If something actually refers to the symbol
505 // (e.g. the undefined weak alias), linking will fail due to undefined
506 // references at the end.
507 if (symtab.ctx.config.mingw && name.starts_with(Prefix: ".weak."))
508 return nullptr;
509 return symtab.addUndefined(name, f: this, overrideLazy: false);
510 }
511 if (sc) {
512 const coff_symbol_generic *symGen = sym.getGeneric();
513 if (sym.isSection()) {
514 auto *customSymGen = cloneSymbol(sym);
515 customSymGen->Value = 0;
516 symGen = customSymGen;
517 }
518 return make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
519 /*IsExternal*/ args: false, args&: symGen, args&: sc);
520 }
521 return nullptr;
522}
523
524void ObjFile::initializeSymbols() {
525 uint32_t numSymbols = coffObj->getNumberOfSymbols();
526 symbols.resize(new_size: numSymbols);
527
528 SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
529 weakAliases;
530 std::vector<uint32_t> pendingIndexes;
531 pendingIndexes.reserve(n: numSymbols);
532
533 DenseMap<StringRef, uint32_t> prevailingSectionMap;
534 std::vector<const coff_aux_section_definition *> comdatDefs(
535 coffObj->getNumberOfSections() + 1);
536 COFFLinkerContext &ctx = symtab.ctx;
537
538 for (uint32_t i = 0; i < numSymbols; ++i) {
539 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
540 bool prevailingComdat;
541 if (coffSym.isUndefined()) {
542 symbols[i] = createUndefined(sym: coffSym, overrideLazy: false);
543 } else if (coffSym.isWeakExternal()) {
544 auto aux = coffSym.getAux<coff_aux_weak_external>();
545 bool overrideLazy = true;
546
547 // On ARM64EC, external function calls emit a pair of weak-dependency
548 // aliases: func to #func and #func to the func guess exit thunk
549 // (instead of a single undefined func symbol, which would be emitted on
550 // other targets). Allow such aliases to be overridden by lazy archive
551 // symbols, just as we would for undefined symbols.
552 if (isArm64EC(Machine: getMachineType()) &&
553 aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
554 COFFSymbolRef targetSym = check(e: coffObj->getSymbol(index: aux->TagIndex));
555 if (!targetSym.isAnyUndefined()) {
556 // If the target is defined, it may be either a guess exit thunk or
557 // the actual implementation. If it's the latter, consider the alias
558 // to be part of the implementation and override potential lazy
559 // archive symbols.
560 StringRef targetName = check(e: coffObj->getSymbolName(Symbol: targetSym));
561 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
562 std::optional<std::string> mangledName =
563 getArm64ECMangledFunctionName(Name: name);
564 overrideLazy = mangledName == targetName;
565 } else {
566 overrideLazy = false;
567 }
568 }
569 symbols[i] = createUndefined(sym: coffSym, overrideLazy);
570 weakAliases.emplace_back(Args&: symbols[i], Args&: aux);
571 } else if (std::optional<Symbol *> optSym =
572 createDefined(sym: coffSym, comdatDefs, prevailingComdat)) {
573 symbols[i] = *optSym;
574 if (ctx.config.mingw && prevailingComdat)
575 recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap);
576 } else {
577 // createDefined() returns std::nullopt if a symbol belongs to a section
578 // that was pending at the point when the symbol was read. This can happen
579 // in two cases:
580 // 1) section definition symbol for a comdat leader;
581 // 2) symbol belongs to a comdat section associated with another section.
582 // In both of these cases, we can expect the section to be resolved by
583 // the time we finish visiting the remaining symbols in the symbol
584 // table. So we postpone the handling of this symbol until that time.
585 pendingIndexes.push_back(x: i);
586 }
587 i += coffSym.getNumberOfAuxSymbols();
588 }
589
590 for (uint32_t i : pendingIndexes) {
591 COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i));
592 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
593 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
594 readAssociativeDefinition(sym, def);
595 else if (ctx.config.mingw)
596 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
597 }
598 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
599 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
600 Log(ctx) << "comdat section " << name
601 << " without leader and unassociated, discarding";
602 continue;
603 }
604 symbols[i] = createRegular(sym);
605 }
606
607 for (auto &kv : weakAliases) {
608 Symbol *sym = kv.first;
609 const coff_aux_weak_external *aux = kv.second;
610 checkAndSetWeakAlias(symtab, f: this, source: sym, target: symbols[aux->TagIndex],
611 isAntiDep: aux->Characteristics ==
612 IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
613 }
614
615 // Free the memory used by sparseChunks now that symbol loading is finished.
616 decltype(sparseChunks)().swap(x&: sparseChunks);
617}
618
619Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
620 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
621 Symbol *s = symtab.addUndefined(name, f: this, overrideLazy);
622
623 // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
624 // target.
625 if (symtab.isEC() && getMachineType() == AMD64) {
626 auto u = dyn_cast<Undefined>(Val: s);
627 if (u && !u->weakAlias) {
628 if (std::optional<std::string> mangledName =
629 getArm64ECMangledFunctionName(Name: name)) {
630 Symbol *m = symtab.addUndefined(name: saver().save(S: *mangledName), f: this,
631 /*overrideLazy=*/false);
632 u->setWeakAlias(sym: m, /*antiDep=*/true);
633 }
634 }
635 }
636 return s;
637}
638
639static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
640 int32_t section) {
641 uint32_t numSymbols = obj->getNumberOfSymbols();
642 for (uint32_t i = 0; i < numSymbols; ++i) {
643 COFFSymbolRef sym = check(e: obj->getSymbol(index: i));
644 if (sym.getSectionNumber() != section)
645 continue;
646 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
647 return def;
648 }
649 return nullptr;
650}
651
652void ObjFile::handleComdatSelection(
653 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
654 DefinedRegular *leader,
655 const llvm::object::coff_aux_section_definition *def) {
656 if (prevailing)
657 return;
658 // There's already an existing comdat for this symbol: `Leader`.
659 // Use the comdats's selection field to determine if the new
660 // symbol in `Sym` should be discarded, produce a duplicate symbol
661 // error, etc.
662
663 SectionChunk *leaderChunk = leader->getChunk();
664 COMDATType leaderSelection = leaderChunk->selection;
665 COFFLinkerContext &ctx = symtab.ctx;
666
667 assert(leader->data && "Comdat leader without SectionChunk?");
668 if (isa<BitcodeFile>(Val: leader->file)) {
669 // If the leader is only a LTO symbol, we don't know e.g. its final size
670 // yet, so we can't do the full strict comdat selection checking yet.
671 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
672 }
673
674 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
675 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
676 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
677 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
678 // cl.exe picks "any" for vftables when building with /GR- and
679 // "largest" when building with /GR. To be able to link object files
680 // compiled with each flag, "any" and "largest" are merged as "largest".
681 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
682 }
683
684 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
685 // Clang on the other hand picks "any". To be able to link two object files
686 // with a __declspec(selectany) declaration, one compiled with gcc and the
687 // other with clang, we merge them as proper "same size as"
688 if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
689 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
690 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
691 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
692 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
693 }
694
695 // Other than that, comdat selections must match. This is a bit more
696 // strict than link.exe which allows merging "any" and "largest" if "any"
697 // is the first symbol the linker sees, and it allows merging "largest"
698 // with everything (!) if "largest" is the first symbol the linker sees.
699 // Making this symmetric independent of which selection is seen first
700 // seems better though.
701 // (This behavior matches ModuleLinker::getComdatResult().)
702 if (selection != leaderSelection) {
703 Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(sym: leader)
704 << ": " << (int)leaderSelection << " in " << leader->getFile()
705 << " and " << (int)selection << " in " << this;
706 symtab.reportDuplicate(existing: leader, newFile: this);
707 return;
708 }
709
710 switch (selection) {
711 case IMAGE_COMDAT_SELECT_NODUPLICATES:
712 symtab.reportDuplicate(existing: leader, newFile: this);
713 break;
714
715 case IMAGE_COMDAT_SELECT_ANY:
716 // Nothing to do.
717 break;
718
719 case IMAGE_COMDAT_SELECT_SAME_SIZE:
720 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
721 if (!ctx.config.mingw) {
722 symtab.reportDuplicate(existing: leader, newFile: this);
723 } else {
724 const coff_aux_section_definition *leaderDef = nullptr;
725 if (leaderChunk->file)
726 leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(),
727 section: leaderChunk->getSectionNumber());
728 if (!leaderDef || leaderDef->Length != def->Length)
729 symtab.reportDuplicate(existing: leader, newFile: this);
730 }
731 }
732 break;
733
734 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
735 SectionChunk newChunk(this, getSection(sym));
736 // link.exe only compares section contents here and doesn't complain
737 // if the two comdat sections have e.g. different alignment.
738 // Match that.
739 if (leaderChunk->getContents() != newChunk.getContents())
740 symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue());
741 break;
742 }
743
744 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
745 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
746 // (This means lld-link doesn't produce duplicate symbol errors for
747 // associative comdats while link.exe does, but associate comdats
748 // are never extern in practice.)
749 llvm_unreachable("createDefined not called for associative comdats");
750
751 case IMAGE_COMDAT_SELECT_LARGEST:
752 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
753 // Replace the existing comdat symbol with the new one.
754 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
755 // FIXME: This is incorrect: With /opt:noref, the previous sections
756 // make it into the final executable as well. Correct handling would
757 // be to undo reading of the whole old section that's being replaced,
758 // or doing one pass that determines what the final largest comdat
759 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
760 // only the largest one.
761 replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true,
762 /*IsExternal*/ arg: true, arg: sym.getGeneric(),
763 arg: nullptr);
764 prevailing = true;
765 }
766 break;
767
768 case IMAGE_COMDAT_SELECT_NEWEST:
769 llvm_unreachable("should have been rejected earlier");
770 }
771}
772
773std::optional<Symbol *> ObjFile::createDefined(
774 COFFSymbolRef sym,
775 std::vector<const coff_aux_section_definition *> &comdatDefs,
776 bool &prevailing) {
777 prevailing = false;
778 auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); };
779
780 if (sym.isCommon()) {
781 auto *c = make<CommonChunk>(args&: sym);
782 chunks.push_back(x: c);
783 return symtab.addCommon(f: this, n: getName(), size: sym.getValue(), s: sym.getGeneric(),
784 c);
785 }
786
787 COFFLinkerContext &ctx = symtab.ctx;
788 if (sym.isAbsolute()) {
789 StringRef name = getName();
790
791 if (name == "@feat.00")
792 feat00Flags = sym.getValue();
793 // Skip special symbols.
794 if (ignoredSymbolName(name))
795 return nullptr;
796
797 if (sym.isExternal())
798 return symtab.addAbsolute(n: name, s: sym);
799 return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym);
800 }
801
802 int32_t sectionNumber = sym.getSectionNumber();
803 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
804 return nullptr;
805
806 if (sym.isEmptySectionDeclaration()) {
807 // As there is no coff_section in the object file for these, make a
808 // new virtual one, with everything zeroed out (i.e. an empty section),
809 // with only the name and characteristics set.
810 StringRef name = getName();
811 auto *hdr = make<coff_section>();
812 memset(s: hdr, c: 0, n: sizeof(*hdr));
813 strncpy(dest: hdr->Name, src: name.data(),
814 n: std::min(a: name.size(), b: (size_t)COFF::NameSize));
815 // The Value field in a section symbol may contain the characteristics,
816 // or it may be zero, where we make something up (that matches what is
817 // used in .idata sections in the regular object files in import libraries).
818 if (sym.getValue())
819 hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
820 else
821 hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
822 IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
823 IMAGE_SCN_ALIGN_4BYTES;
824 auto *sc = make<SectionChunk>(args: this, args&: hdr);
825 chunks.push_back(x: sc);
826
827 auto *symGen = cloneSymbol(sym);
828 // Ignore the Value offset of these symbols, as it may be a bitmask.
829 symGen->Value = 0;
830 return make<DefinedRegular>(args: this, /*name=*/args: "", /*isCOMDAT=*/args: false,
831 /*isExternal=*/args: false, args&: symGen, args&: sc);
832 }
833
834 if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber))
835 Fatal(ctx) << toString(file: this) << ": " << getName()
836 << " should not refer to special section "
837 << Twine(sectionNumber);
838
839 if ((uint32_t)sectionNumber >= sparseChunks.size())
840 Fatal(ctx) << toString(file: this) << ": " << getName()
841 << " should not refer to non-existent section "
842 << Twine(sectionNumber);
843
844 // Comdat handling.
845 // A comdat symbol consists of two symbol table entries.
846 // The first symbol entry has the name of the section (e.g. .text), fixed
847 // values for the other fields, and one auxiliary record.
848 // The second symbol entry has the name of the comdat symbol, called the
849 // "comdat leader".
850 // When this function is called for the first symbol entry of a comdat,
851 // it sets comdatDefs and returns std::nullopt, and when it's called for the
852 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
853
854 // Handle comdat leader.
855 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
856 comdatDefs[sectionNumber] = nullptr;
857 DefinedRegular *leader;
858
859 if (sym.isExternal()) {
860 std::tie(args&: leader, args&: prevailing) =
861 symtab.addComdat(f: this, n: getName(), s: sym.getGeneric());
862 } else {
863 leader = make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
864 /*IsExternal*/ args: false, args: sym.getGeneric());
865 prevailing = true;
866 }
867
868 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
869 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
870 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
871 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
872 Fatal(ctx) << "unknown comdat type "
873 << std::to_string(val: (int)def->Selection) << " for " << getName()
874 << " in " << toString(file: this);
875 }
876 COMDATType selection = (COMDATType)def->Selection;
877
878 if (leader->isCOMDAT)
879 handleComdatSelection(sym, selection, prevailing, leader, def);
880
881 if (prevailing) {
882 SectionChunk *c = readSection(sectionNumber, def, leaderName: getName());
883 sparseChunks[sectionNumber] = c;
884 if (!c)
885 return nullptr;
886 c->sym = cast<DefinedRegular>(Val: leader);
887 c->selection = selection;
888 cast<DefinedRegular>(Val: leader)->data = &c->repl;
889 } else {
890 sparseChunks[sectionNumber] = nullptr;
891 }
892 return leader;
893 }
894
895 // Prepare to handle the comdat leader symbol by setting the section's
896 // ComdatDefs pointer if we encounter a non-associative comdat.
897 if (sparseChunks[sectionNumber] == pendingComdat) {
898 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
899 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
900 comdatDefs[sectionNumber] = def;
901 }
902 return std::nullopt;
903 }
904
905 return createRegular(sym);
906}
907
908MachineTypes ObjFile::getMachineType() const {
909 return static_cast<MachineTypes>(coffObj->getMachine());
910}
911
912ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
913 if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName))
914 return sec->consumeDebugMagic();
915 return {};
916}
917
918// OBJ files systematically store critical information in a .debug$S stream,
919// even if the TU was compiled with no debug info. At least two records are
920// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
921// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
922// currently used to initialize the hotPatchable member.
923void ObjFile::initializeFlags() {
924 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S");
925 if (data.empty())
926 return;
927
928 DebugSubsectionArray subsections;
929
930 BinaryStreamReader reader(data, llvm::endianness::little);
931 ExitOnError exitOnErr;
932 exitOnErr(reader.readArray(Array&: subsections, Size: data.size()));
933
934 for (const DebugSubsectionRecord &ss : subsections) {
935 if (ss.kind() != DebugSubsectionKind::Symbols)
936 continue;
937
938 unsigned offset = 0;
939
940 // Only parse the first two records. We are only looking for S_OBJNAME
941 // and S_COMPILE3, and they usually appear at the beginning of the
942 // stream.
943 for (unsigned i = 0; i < 2; ++i) {
944 Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset);
945 if (!sym) {
946 consumeError(Err: sym.takeError());
947 return;
948 }
949 if (sym->kind() == SymbolKind::S_COMPILE3) {
950 auto cs =
951 cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get()));
952 hotPatchable =
953 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
954 }
955 if (sym->kind() == SymbolKind::S_OBJNAME) {
956 auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>(
957 Symbol: sym.get()));
958 if (objName.Signature)
959 pchSignature = objName.Signature;
960 }
961 offset += sym->length();
962 }
963 }
964}
965
966// Depending on the compilation flags, OBJs can refer to external files,
967// necessary to merge this OBJ into the final PDB. We currently support two
968// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
969// And PDB type servers, when compiling with /Zi. This function extracts these
970// dependencies and makes them available as a TpiSource interface (see
971// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
972// output even with /Yc and /Yu and with /Zi.
973void ObjFile::initializeDependencies() {
974 COFFLinkerContext &ctx = symtab.ctx;
975 if (!ctx.config.debug)
976 return;
977
978 bool isPCH = false;
979
980 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P");
981 if (!data.empty())
982 isPCH = true;
983 else
984 data = getDebugSection(secName: ".debug$T");
985
986 // symbols but no types, make a plain, empty TpiSource anyway, because it
987 // simplifies adding the symbols later.
988 if (data.empty()) {
989 if (!debugChunks.empty())
990 debugTypesObj = makeTpiSource(ctx, f: this);
991 return;
992 }
993
994 // Get the first type record. It will indicate if this object uses a type
995 // server (/Zi) or a PCH file (/Yu).
996 CVTypeArray types;
997 BinaryStreamReader reader(data, llvm::endianness::little);
998 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
999 CVTypeArray::Iterator firstType = types.begin();
1000 if (firstType == types.end())
1001 return;
1002
1003 // Remember the .debug$T or .debug$P section.
1004 debugTypes = data;
1005
1006 // This object file is a PCH file that others will depend on.
1007 if (isPCH) {
1008 debugTypesObj = makePrecompSource(ctx, file: this);
1009 return;
1010 }
1011
1012 // This object file was compiled with /Zi. Enqueue the PDB dependency.
1013 if (firstType->kind() == LF_TYPESERVER2) {
1014 TypeServer2Record ts = cantFail(
1015 ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data()));
1016 debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts);
1017 enqueuePdbFile(path: ts.getName(), fromFile: this);
1018 return;
1019 }
1020
1021 // This object was compiled with /Yu. It uses types from another object file
1022 // with a matching signature.
1023 if (firstType->kind() == LF_PRECOMP) {
1024 PrecompRecord precomp = cantFail(
1025 ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data()));
1026 // We're better off trusting the LF_PRECOMP signature. In some cases the
1027 // S_OBJNAME record doesn't contain a valid PCH signature.
1028 if (precomp.Signature)
1029 pchSignature = precomp.Signature;
1030 debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp);
1031 // Drop the LF_PRECOMP record from the input stream.
1032 debugTypes = debugTypes.drop_front(N: firstType->RecordData.size());
1033 return;
1034 }
1035
1036 // This is a plain old object file.
1037 debugTypesObj = makeTpiSource(ctx, f: this);
1038}
1039
1040// The casing of the PDB path stamped in the OBJ can differ from the actual path
1041// on disk. With this, we ensure to always use lowercase as a key for the
1042// pdbInputFileInstances map, at least on Windows.
1043static std::string normalizePdbPath(StringRef path) {
1044#if defined(_WIN32)
1045 return path.lower();
1046#else // LINUX
1047 return std::string(path);
1048#endif
1049}
1050
1051// If existing, return the actual PDB path on disk.
1052static std::optional<std::string>
1053findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1054 // Ensure the file exists before anything else. In some cases, if the path
1055 // points to a removable device, Driver::enqueuePath() would fail with an
1056 // error (EAGAIN, "resource unavailable try again") which we want to skip
1057 // silently.
1058 if (llvm::sys::fs::exists(Path: pdbPath))
1059 return normalizePdbPath(path: pdbPath);
1060
1061 StringRef objPath = !dependentFile->parentName.empty()
1062 ? dependentFile->parentName
1063 : dependentFile->getName();
1064
1065 // Currently, type server PDBs are only created by MSVC cl, which only runs
1066 // on Windows, so we can assume type server paths are Windows style.
1067 StringRef pdbName = sys::path::filename(path: pdbPath, style: sys::path::Style::windows);
1068
1069 // Check if the PDB is in the same folder as the OBJ.
1070 SmallString<128> path;
1071 sys::path::append(path, a: sys::path::parent_path(path: objPath), b: pdbName);
1072 if (llvm::sys::fs::exists(Path: path))
1073 return normalizePdbPath(path);
1074
1075 // Check if the PDB is in the output folder.
1076 path.clear();
1077 sys::path::append(path, a: sys::path::parent_path(path: outputPath), b: pdbName);
1078 if (llvm::sys::fs::exists(Path: path))
1079 return normalizePdbPath(path);
1080
1081 return std::nullopt;
1082}
1083
1084PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1085 : InputFile(ctx.symtab, PDBKind, m) {}
1086
1087PDBInputFile::~PDBInputFile() = default;
1088
1089PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1090 StringRef path,
1091 ObjFile *fromFile) {
1092 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: ctx.config.outputFile);
1093 if (!p)
1094 return nullptr;
1095 auto it = ctx.pdbInputFileInstances.find(x: *p);
1096 if (it != ctx.pdbInputFileInstances.end())
1097 return it->second;
1098 return nullptr;
1099}
1100
1101void PDBInputFile::parse() {
1102 symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1103
1104 std::unique_ptr<pdb::IPDBSession> thisSession;
1105 Error E = pdb::NativeSession::createFromPdb(
1106 MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession);
1107 if (E) {
1108 loadErrorStr.emplace(args: toString(E: std::move(E)));
1109 return; // fail silently at this point - the error will be handled later,
1110 // when merging the debug type stream
1111 }
1112
1113 session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release()));
1114
1115 pdb::PDBFile &pdbFile = session->getPDBFile();
1116 auto expectedInfo = pdbFile.getPDBInfoStream();
1117 // All PDB Files should have an Info stream.
1118 if (!expectedInfo) {
1119 loadErrorStr.emplace(args: toString(E: expectedInfo.takeError()));
1120 return;
1121 }
1122 debugTypesObj = makeTypeServerSource(ctx&: symtab.ctx, pdbInputFile: this);
1123}
1124
1125// Used only for DWARF debug info, which is not common (except in MinGW
1126// environments). This returns an optional pair of file name and line
1127// number for where the variable was defined.
1128std::optional<std::pair<StringRef, uint32_t>>
1129ObjFile::getVariableLocation(StringRef var) {
1130 if (!dwarf) {
1131 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1132 if (!dwarf)
1133 return std::nullopt;
1134 }
1135 if (symtab.machine == I386)
1136 var.consume_front(Prefix: "_");
1137 std::optional<std::pair<std::string, unsigned>> ret =
1138 dwarf->getVariableLoc(name: var);
1139 if (!ret)
1140 return std::nullopt;
1141 return std::make_pair(x: saver().save(S: ret->first), y&: ret->second);
1142}
1143
1144// Used only for DWARF debug info, which is not common (except in MinGW
1145// environments).
1146std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1147 uint32_t sectionIndex) {
1148 if (!dwarf) {
1149 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1150 if (!dwarf)
1151 return std::nullopt;
1152 }
1153
1154 return dwarf->getDILineInfo(offset, sectionIndex);
1155}
1156
1157void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1158 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: symtab.ctx.config.outputFile);
1159 if (!p)
1160 return;
1161 auto it = symtab.ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr);
1162 if (!it.second)
1163 return; // already scheduled for load
1164 symtab.ctx.driver.enqueuePDB(Path: *p);
1165}
1166
1167ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1168 : InputFile(ctx.getSymtab(machine: getMachineType(m)), ImportKind, m),
1169 live(!ctx.config.doGC) {}
1170
1171MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1172 uint16_t machine =
1173 reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1174 return MachineTypes(machine);
1175}
1176
1177bool ImportFile::isSameImport(const ImportFile *other) const {
1178 if (!externalName.empty())
1179 return other->externalName == externalName;
1180 return hdr->OrdinalHint == other->hdr->OrdinalHint;
1181}
1182
1183ImportThunkChunk *ImportFile::makeImportThunk() {
1184 switch (hdr->Machine) {
1185 case AMD64:
1186 return make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym);
1187 case I386:
1188 return make<ImportThunkChunkX86>(args&: symtab.ctx, args&: impSym);
1189 case ARM64:
1190 return make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impSym, args: ARM64);
1191 case ARMNT:
1192 return make<ImportThunkChunkARM>(args&: symtab.ctx, args&: impSym);
1193 }
1194 llvm_unreachable("unknown machine type");
1195}
1196
1197void ImportFile::parse() {
1198 const auto *hdr =
1199 reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1200
1201 // Check if the total size is valid.
1202 if (mb.getBufferSize() < sizeof(*hdr) ||
1203 mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1204 Fatal(ctx&: symtab.ctx) << "broken import library";
1205
1206 // Read names and create an __imp_ symbol.
1207 StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr));
1208 auto split = buf.split(Separator: '\0');
1209 buf = split.second;
1210 StringRef name;
1211 if (isArm64EC(Machine: hdr->Machine)) {
1212 if (std::optional<std::string> demangledName =
1213 getArm64ECDemangledFunctionName(Name: split.first))
1214 name = saver().save(S: *demangledName);
1215 }
1216 if (name.empty())
1217 name = saver().save(S: split.first);
1218 StringRef impName = saver().save(S: "__imp_" + name);
1219 dllName = buf.split(Separator: '\0').first;
1220 StringRef extName;
1221 switch (hdr->getNameType()) {
1222 case IMPORT_ORDINAL:
1223 extName = "";
1224 break;
1225 case IMPORT_NAME:
1226 extName = name;
1227 break;
1228 case IMPORT_NAME_NOPREFIX:
1229 extName = ltrim1(s: name, chars: "?@_");
1230 break;
1231 case IMPORT_NAME_UNDECORATE:
1232 extName = ltrim1(s: name, chars: "?@_");
1233 extName = extName.substr(Start: 0, N: extName.find(C: '@'));
1234 break;
1235 case IMPORT_NAME_EXPORTAS:
1236 extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first;
1237 break;
1238 }
1239
1240 this->hdr = hdr;
1241 externalName = extName;
1242
1243 bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1244
1245 if (!symtab.isEC()) {
1246 impSym = symtab.addImportData(n: impName, f: this, location);
1247 } else {
1248 // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1249 // which holds addresses that are guaranteed to be callable directly from
1250 // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1251 // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1252 // data imports, the naming is reversed.
1253 StringRef auxImpName = saver().save(S: "__imp_aux_" + name);
1254 if (isCode) {
1255 impSym = symtab.addImportData(n: auxImpName, f: this, location);
1256 impECSym = symtab.addImportData(n: impName, f: this, location&: auxLocation);
1257 } else {
1258 impSym = symtab.addImportData(n: impName, f: this, location);
1259 impECSym = symtab.addImportData(n: auxImpName, f: this, location&: auxLocation);
1260 }
1261 if (!impECSym)
1262 return;
1263
1264 StringRef auxImpCopyName = saver().save(S: "__auximpcopy_" + name);
1265 auxImpCopySym = symtab.addImportData(n: auxImpCopyName, f: this, location&: auxCopyLocation);
1266 if (!auxImpCopySym)
1267 return;
1268 }
1269 // If this was a duplicate, we logged an error but may continue;
1270 // in this case, impSym is nullptr.
1271 if (!impSym)
1272 return;
1273
1274 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1275 static_cast<void>(symtab.addImportData(n: name, f: this, location));
1276
1277 // If type is function, we need to create a thunk which jump to an
1278 // address pointed by the __imp_ symbol. (This allows you to call
1279 // DLL functions just like regular non-DLL functions.)
1280 if (isCode) {
1281 if (!symtab.isEC()) {
1282 thunkSym = symtab.addImportThunk(name, s: impSym, chunk: makeImportThunk());
1283 } else {
1284 thunkSym = symtab.addImportThunk(
1285 name, s: impSym, chunk: make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym));
1286
1287 if (std::optional<std::string> mangledName =
1288 getArm64ECMangledFunctionName(Name: name)) {
1289 StringRef auxThunkName = saver().save(S: *mangledName);
1290 auxThunkSym = symtab.addImportThunk(
1291 name: auxThunkName, s: impECSym,
1292 chunk: make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impECSym, args: ARM64EC));
1293 }
1294
1295 StringRef impChkName = saver().save(S: "__impchk_" + name);
1296 impchkThunk = make<ImportThunkChunkARM64EC>(args: this);
1297 impchkThunk->sym = symtab.addImportThunk(name: impChkName, s: impSym, chunk: impchkThunk);
1298 symtab.ctx.driver.pullArm64ECIcallHelper();
1299 }
1300 }
1301}
1302
1303BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1304 std::unique_ptr<lto::InputFile> &o, bool lazy)
1305 : InputFile(symtab, BitcodeKind, mb, lazy) {
1306 obj.swap(u&: o);
1307}
1308
1309BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1310 StringRef archiveName,
1311 uint64_t offsetInArchive, bool lazy) {
1312 std::string path = mb.getBufferIdentifier().str();
1313 if (ctx.config.thinLTOIndexOnly)
1314 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(),
1315 suffix: ctx.config.thinLTOObjectSuffixReplace.first,
1316 repl: ctx.config.thinLTOObjectSuffixReplace.second);
1317
1318 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1319 // name. If two archives define two members with the same name, this
1320 // causes a collision which result in only one of the objects being taken
1321 // into consideration at LTO time (which very likely causes undefined
1322 // symbols later in the link stage). So we append file offset to make
1323 // filename unique.
1324 MemoryBufferRef mbref(mb.getBuffer(),
1325 saver().save(S: archiveName.empty()
1326 ? path
1327 : archiveName +
1328 sys::path::filename(path) +
1329 utostr(X: offsetInArchive)));
1330
1331 std::unique_ptr<lto::InputFile> obj = check(e: lto::InputFile::create(Object: mbref));
1332 return make<BitcodeFile>(args&: ctx.getSymtab(machine: getMachineType(obj: obj.get())), args&: mb, args&: obj,
1333 args&: lazy);
1334}
1335
1336BitcodeFile::~BitcodeFile() = default;
1337
1338void BitcodeFile::parse() {
1339 llvm::StringSaver &saver = lld::saver();
1340
1341 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1342 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1343 // FIXME: Check nodeduplicate
1344 comdat[i] =
1345 symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first));
1346 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1347 StringRef symName = saver.save(S: objSym.getName());
1348 int comdatIndex = objSym.getComdatIndex();
1349 Symbol *sym;
1350 SectionChunk *fakeSC = nullptr;
1351 if (objSym.isExecutable())
1352 fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1353 else
1354 fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1355 if (objSym.isUndefined()) {
1356 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1357 if (objSym.isWeak())
1358 sym->deferUndefined = true;
1359 // If one LTO object file references (i.e. has an undefined reference to)
1360 // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1361 // as unprefixed but with a dllimport attribute instead, and doesn't
1362 // understand the relation to a concrete IR symbol with the __imp_ prefix.
1363 //
1364 // For such cases, mark the symbol as used in a regular object (i.e. the
1365 // symbol must be retained) so that the linker can associate the
1366 // references in the end. If the symbol is defined in an import library
1367 // or in a regular object file, this has no effect, but if it is defined
1368 // in another LTO object file, this makes sure it is kept, to fulfill
1369 // the reference when linking the output of the LTO compilation.
1370 if (symName.starts_with(Prefix: "__imp_"))
1371 sym->isUsedInRegularObj = true;
1372 } else if (objSym.isCommon()) {
1373 sym = symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize());
1374 } else if (objSym.isWeak() && objSym.isIndirect()) {
1375 // Weak external.
1376 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: true);
1377 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1378 Symbol *alias = symtab.addUndefined(name: saver.save(S: fallback));
1379 checkAndSetWeakAlias(symtab, f: this, source: sym, target: alias, isAntiDep: false);
1380 } else if (comdatIndex != -1) {
1381 if (symName == obj->getComdatTable()[comdatIndex].first) {
1382 sym = comdat[comdatIndex].first;
1383 if (cast<DefinedRegular>(Val: sym)->data == nullptr)
1384 cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl;
1385 } else if (comdat[comdatIndex].second) {
1386 sym = symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC);
1387 } else {
1388 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1389 }
1390 } else {
1391 sym =
1392 symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, isWeak: objSym.isWeak());
1393 }
1394 symbols.push_back(x: sym);
1395 if (objSym.isUsed())
1396 symtab.ctx.config.gcroot.push_back(x: sym);
1397 }
1398 directives = saver.save(S: obj->getCOFFLinkerOpts());
1399}
1400
1401void BitcodeFile::parseLazy() {
1402 for (const lto::InputFile::Symbol &sym : obj->symbols())
1403 if (!sym.isUndefined()) {
1404 symtab.addLazyObject(f: this, n: sym.getName());
1405 if (!lazy)
1406 return;
1407 }
1408}
1409
1410MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1411 Triple t(obj->getTargetTriple());
1412 switch (t.getArch()) {
1413 case Triple::x86_64:
1414 return AMD64;
1415 case Triple::x86:
1416 return I386;
1417 case Triple::arm:
1418 case Triple::thumb:
1419 return ARMNT;
1420 case Triple::aarch64:
1421 return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1422 default:
1423 return IMAGE_FILE_MACHINE_UNKNOWN;
1424 }
1425}
1426
1427std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1428 StringRef repl) {
1429 if (path.consume_back(Suffix: suffix))
1430 return (path + repl).str();
1431 return std::string(path);
1432}
1433
1434static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1435 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1436 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1437 if (rva >= sec->VirtualAddress &&
1438 rva <= sec->VirtualAddress + sec->VirtualSize) {
1439 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1440 }
1441 }
1442 return false;
1443}
1444
1445void DLLFile::parse() {
1446 // Parse a memory buffer as a PE-COFF executable.
1447 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1448
1449 if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) {
1450 bin.release();
1451 coffObj.reset(p: obj);
1452 } else {
1453 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a COFF file";
1454 return;
1455 }
1456
1457 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1458 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a PE-COFF executable";
1459 return;
1460 }
1461
1462 for (const auto &exp : coffObj->export_directories()) {
1463 StringRef dllName, symbolName;
1464 uint32_t exportRVA;
1465 checkError(e: exp.getDllName(Result&: dllName));
1466 checkError(e: exp.getSymbolName(Result&: symbolName));
1467 checkError(e: exp.getExportRVA(Result&: exportRVA));
1468
1469 if (symbolName.empty())
1470 continue;
1471
1472 bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this);
1473
1474 Symbol *s = make<Symbol>();
1475 s->dllName = dllName;
1476 s->symbolName = symbolName;
1477 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1478 s->nameType = ImportNameType::IMPORT_NAME;
1479
1480 if (coffObj->getMachine() == I386) {
1481 s->symbolName = symbolName = saver().save(S: "_" + symbolName);
1482 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1483 }
1484
1485 StringRef impName = saver().save(S: "__imp_" + symbolName);
1486 symtab.addLazyDLLSymbol(f: this, sym: s, n: impName);
1487 if (code)
1488 symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName);
1489 if (symtab.isEC()) {
1490 StringRef impAuxName = saver().save(S: "__imp_aux_" + symbolName);
1491 symtab.addLazyDLLSymbol(f: this, sym: s, n: impAuxName);
1492
1493 if (code) {
1494 std::optional<std::string> mangledName =
1495 getArm64ECMangledFunctionName(Name: symbolName);
1496 if (mangledName)
1497 symtab.addLazyDLLSymbol(f: this, sym: s, n: *mangledName);
1498 }
1499 }
1500 }
1501}
1502
1503MachineTypes DLLFile::getMachineType() const {
1504 if (coffObj)
1505 return static_cast<MachineTypes>(coffObj->getMachine());
1506 return IMAGE_FILE_MACHINE_UNKNOWN;
1507}
1508
1509void DLLFile::makeImport(DLLFile::Symbol *s) {
1510 if (!seen.insert(key: s->symbolName).second)
1511 return;
1512
1513 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1514 size_t size = sizeof(coff_import_header) + impSize;
1515 char *buf = bAlloc().Allocate<char>(Num: size);
1516 memset(s: buf, c: 0, n: size);
1517 char *p = buf;
1518 auto *imp = reinterpret_cast<coff_import_header *>(p);
1519 p += sizeof(*imp);
1520 imp->Sig2 = 0xFFFF;
1521 imp->Machine = coffObj->getMachine();
1522 imp->SizeOfData = impSize;
1523 imp->OrdinalHint = 0; // Only linking by name
1524 imp->TypeInfo = (s->nameType << 2) | s->importType;
1525
1526 // Write symbol name and DLL name.
1527 memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size());
1528 p += s->symbolName.size() + 1;
1529 memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size());
1530 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1531 ImportFile *impFile = make<ImportFile>(args&: symtab.ctx, args&: mbref);
1532 symtab.ctx.driver.addFile(file: impFile);
1533}
1534

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of lld/COFF/InputFiles.cpp