1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "COFFLinkerContext.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "DebugTypes.h"
14#include "Driver.h"
15#include "SymbolTable.h"
16#include "Symbols.h"
17#include "lld/Common/DWARF.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/BinaryFormat/COFF.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27#include "llvm/IR/Mangler.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/Object/Binary.h"
30#include "llvm/Object/COFF.h"
31#include "llvm/Object/COFFImportFile.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/Endian.h"
34#include "llvm/Support/Error.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Path.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cstring>
39#include <optional>
40#include <utility>
41
42using namespace llvm;
43using namespace llvm::COFF;
44using namespace llvm::codeview;
45using namespace llvm::object;
46using namespace llvm::support::endian;
47using namespace lld;
48using namespace lld::coff;
49
50using llvm::Triple;
51using llvm::support::ulittle32_t;
52
53// Returns the last element of a path, which is supposed to be a filename.
54static StringRef getBasename(StringRef path) {
55 return sys::path::filename(path, style: sys::path::Style::windows);
56}
57
58// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
59std::string lld::toString(const coff::InputFile *file) {
60 if (!file)
61 return "<internal>";
62 if (file->parentName.empty())
63 return std::string(file->getName());
64
65 return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) +
66 ")")
67 .str();
68}
69
70const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
71 const InputFile *f) {
72 return s << toString(file: f);
73}
74
75/// Checks that Source is compatible with being a weak alias to Target.
76/// If Source is Undefined and has no weak alias set, makes it a weak
77/// alias to Target.
78static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
79 Symbol *source, Symbol *target,
80 bool isAntiDep) {
81 if (auto *u = dyn_cast<Undefined>(Val: source)) {
82 if (u->weakAlias && u->weakAlias != target) {
83 // Ignore duplicated anti-dependency symbols.
84 if (isAntiDep)
85 return;
86 if (!u->isAntiDep) {
87 // Weak aliases as produced by GCC are named in the form
88 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
89 // of another symbol emitted near the weak symbol.
90 // Just use the definition from the first object file that defined
91 // this weak symbol.
92 if (symtab.ctx.config.allowDuplicateWeak)
93 return;
94 symtab.reportDuplicate(existing: source, newFile: f);
95 }
96 }
97 u->setWeakAlias(sym: target, antiDep: isAntiDep);
98 }
99}
100
101static bool ignoredSymbolName(StringRef name) {
102 return name == "@feat.00" || name == "@comp.id";
103}
104
105static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
106 if (sym.isBigObj()) {
107 auto *copy = make<coff_symbol32>(
108 args: *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
109 return reinterpret_cast<coff_symbol_generic *>(copy);
110 } else {
111 auto *copy = make<coff_symbol16>(
112 args: *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
113 return reinterpret_cast<coff_symbol_generic *>(copy);
114 }
115}
116
117// Skip importing DllMain thunks from import libraries.
118static bool fixupDllMain(COFFLinkerContext &ctx, llvm::object::Archive *file,
119 const Archive::Symbol &sym, bool &skipDllMain) {
120 const Archive::Child &c =
121 CHECK(sym.getMember(), file->getFileName() +
122 ": could not get the member for symbol " +
123 toCOFFString(ctx, sym));
124 MemoryBufferRef mb =
125 CHECK(c.getMemoryBufferRef(),
126 file->getFileName() +
127 ": could not get the buffer for a child buffer of the archive");
128 if (identify_magic(magic: mb.getBuffer()) == file_magic::coff_import_library) {
129 if (ctx.config.warnImportedDllMain) {
130 // We won't place DllMain symbols in the symbol table if they are
131 // coming from a import library. This message can be ignored with the flag
132 // '/ignore:importeddllmain'
133 Warn(ctx)
134 << file->getFileName()
135 << ": skipping imported DllMain symbol [importeddllmain]\nNOTE: this "
136 "might be a mistake when the DLL/library was produced.";
137 }
138 skipDllMain = true;
139 return true;
140 }
141 return false;
142}
143
144ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
145 : InputFile(ctx.symtab, ArchiveKind, m) {}
146
147void ArchiveFile::parse() {
148 COFFLinkerContext &ctx = symtab.ctx;
149 SymbolTable *archiveSymtab = &symtab;
150
151 // Parse a MemoryBufferRef as an archive file.
152 file = CHECK(Archive::create(mb), this);
153
154 // Try to read symbols from ECSYMBOLS section on ARM64EC.
155 if (ctx.symtab.isEC()) {
156 iterator_range<Archive::symbol_iterator> symbols =
157 CHECK(file->ec_symbols(), this);
158 if (!symbols.empty()) {
159 for (const Archive::Symbol &sym : symbols)
160 ctx.symtab.addLazyArchive(f: this, sym);
161
162 // Read both EC and native symbols on ARM64X.
163 archiveSymtab = &*ctx.hybridSymtab;
164 } else {
165 // If the ECSYMBOLS section is missing in the archive, the archive could
166 // be either a native-only ARM64 or x86_64 archive. Check the machine type
167 // of the object containing a symbol to determine which symbol table to
168 // use.
169 Archive::symbol_iterator sym = file->symbol_begin();
170 if (sym != file->symbol_end()) {
171 MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
172 Archive::Child child =
173 CHECK(sym->getMember(),
174 file->getFileName() +
175 ": could not get the buffer for a child of the archive");
176 MemoryBufferRef mb = CHECK(
177 child.getMemoryBufferRef(),
178 file->getFileName() +
179 ": could not get the buffer for a child buffer of the archive");
180 switch (identify_magic(magic: mb.getBuffer())) {
181 case file_magic::coff_object: {
182 std::unique_ptr<COFFObjectFile> obj =
183 CHECK(COFFObjectFile::create(mb),
184 check(child.getName()) + ":" + ": not a valid COFF file");
185 machine = MachineTypes(obj->getMachine());
186 break;
187 }
188 case file_magic::coff_import_library:
189 machine = MachineTypes(COFFImportFile(mb).getMachine());
190 break;
191 case file_magic::bitcode: {
192 std::unique_ptr<lto::InputFile> obj =
193 check(e: lto::InputFile::create(Object: mb));
194 machine = BitcodeFile::getMachineType(obj: obj.get());
195 break;
196 }
197 default:
198 break;
199 }
200 archiveSymtab = &ctx.getSymtab(machine);
201 }
202 }
203 }
204
205 bool skipDllMain = false;
206 StringRef mangledDllMain, impMangledDllMain;
207
208 // The calls below will fail if we haven't set the machine type yet. Instead
209 // of failing, it is preferable to skip this "imported DllMain" check if we
210 // don't know the machine type at this point.
211 if (!file->isEmpty() && ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN) {
212 mangledDllMain = archiveSymtab->mangle(sym: "DllMain");
213 impMangledDllMain = uniqueSaver().save(S: "__imp_" + mangledDllMain);
214 }
215
216 // Read the symbol table to construct Lazy objects.
217 for (const Archive::Symbol &sym : file->symbols()) {
218 // If an import library provides the DllMain symbol, skip importing it, as
219 // we should be using our own DllMain, not another DLL's DllMain.
220 if (!mangledDllMain.empty() && (sym.getName() == mangledDllMain ||
221 sym.getName() == impMangledDllMain)) {
222 if (skipDllMain || fixupDllMain(ctx, file: file.get(), sym, skipDllMain))
223 continue;
224 }
225 archiveSymtab->addLazyArchive(f: this, sym);
226 }
227}
228
229// Returns a buffer pointing to a member file containing a given symbol.
230void ArchiveFile::addMember(const Archive::Symbol &sym) {
231 const Archive::Child &c =
232 CHECK(sym.getMember(), "could not get the member for symbol " +
233 toCOFFString(symtab.ctx, sym));
234
235 // Return an empty buffer if we have already returned the same buffer.
236 // FIXME: Remove this once we resolve all defineds before all undefineds in
237 // ObjFile::initializeSymbols().
238 if (!seen.insert(V: c.getChildOffset()).second)
239 return;
240
241 symtab.ctx.driver.enqueueArchiveMember(c, sym, parentName: getName());
242}
243
244std::vector<MemoryBufferRef>
245lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
246 std::vector<MemoryBufferRef> v;
247 Error err = Error::success();
248
249 // Thin archives refer to .o files, so --reproduces needs the .o files too.
250 bool addToTar = file->isThin() && ctx.driver.tar;
251
252 for (const Archive::Child &c : file->children(Err&: err)) {
253 MemoryBufferRef mbref =
254 CHECK(c.getMemoryBufferRef(),
255 file->getFileName() +
256 ": could not get the buffer for a child of the archive");
257 if (addToTar) {
258 ctx.driver.tar->append(Path: relativeToRoot(path: check(e: c.getFullName())),
259 Data: mbref.getBuffer());
260 }
261 v.push_back(x: mbref);
262 }
263 if (err)
264 Fatal(ctx) << file->getFileName()
265 << ": Archive::children failed: " << toString(E: std::move(err));
266 return v;
267}
268
269ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
270 : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
271 coffObj(coffObj) {}
272
273ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
274 // Parse a memory buffer as a COFF file.
275 Expected<std::unique_ptr<Binary>> bin = createBinary(Source: m);
276 if (!bin)
277 Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
278
279 auto *obj = dyn_cast<COFFObjectFile>(Val: bin->get());
280 if (!obj)
281 Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
282
283 bin->release();
284 return make<ObjFile>(args&: ctx.getSymtab(machine: MachineTypes(obj->getMachine())), args&: obj,
285 args&: lazy);
286}
287
288void ObjFile::parseLazy() {
289 // Native object file.
290 uint32_t numSymbols = coffObj->getNumberOfSymbols();
291 for (uint32_t i = 0; i < numSymbols; ++i) {
292 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
293 if (coffSym.isUndefined() || !coffSym.isExternal() ||
294 coffSym.isWeakExternal())
295 continue;
296 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
297 if (coffSym.isAbsolute() && ignoredSymbolName(name))
298 continue;
299 symtab.addLazyObject(f: this, n: name);
300 if (!lazy)
301 return;
302 i += coffSym.getNumberOfAuxSymbols();
303 }
304}
305
306struct ECMapEntry {
307 ulittle32_t src;
308 ulittle32_t dst;
309 ulittle32_t type;
310};
311
312void ObjFile::initializeECThunks() {
313 for (SectionChunk *chunk : hybmpChunks) {
314 if (chunk->getContents().size() % sizeof(ECMapEntry)) {
315 Err(ctx&: symtab.ctx) << "Invalid .hybmp chunk size "
316 << chunk->getContents().size();
317 continue;
318 }
319
320 const uint8_t *end =
321 chunk->getContents().data() + chunk->getContents().size();
322 for (const uint8_t *iter = chunk->getContents().data(); iter != end;
323 iter += sizeof(ECMapEntry)) {
324 auto entry = reinterpret_cast<const ECMapEntry *>(iter);
325 switch (entry->type) {
326 case Arm64ECThunkType::Entry:
327 symtab.addEntryThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
328 break;
329 case Arm64ECThunkType::Exit:
330 symtab.addExitThunk(from: getSymbol(symbolIndex: entry->src), to: getSymbol(symbolIndex: entry->dst));
331 break;
332 case Arm64ECThunkType::GuestExit:
333 break;
334 default:
335 Warn(ctx&: symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
336 }
337 }
338 }
339}
340
341void ObjFile::parse() {
342 // Read section and symbol tables.
343 initializeChunks();
344 initializeSymbols();
345 initializeFlags();
346 initializeDependencies();
347 initializeECThunks();
348}
349
350const coff_section *ObjFile::getSection(uint32_t i) {
351 auto sec = coffObj->getSection(index: i);
352 if (!sec)
353 Fatal(ctx&: symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
354 return *sec;
355}
356
357// We set SectionChunk pointers in the SparseChunks vector to this value
358// temporarily to mark comdat sections as having an unknown resolution. As we
359// walk the object file's symbol table, once we visit either a leader symbol or
360// an associative section definition together with the parent comdat's leader,
361// we set the pointer to either nullptr (to mark the section as discarded) or a
362// valid SectionChunk for that section.
363static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
364
365void ObjFile::initializeChunks() {
366 uint32_t numSections = coffObj->getNumberOfSections();
367 sparseChunks.resize(new_size: numSections + 1);
368 for (uint32_t i = 1; i < numSections + 1; ++i) {
369 const coff_section *sec = getSection(i);
370 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
371 sparseChunks[i] = pendingComdat;
372 else
373 sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "");
374 }
375}
376
377SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
378 const coff_aux_section_definition *def,
379 StringRef leaderName) {
380 const coff_section *sec = getSection(i: sectionNumber);
381
382 StringRef name;
383 if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec))
384 name = *e;
385 else
386 Fatal(ctx&: symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
387 << e.takeError();
388
389 if (name == ".drectve") {
390 ArrayRef<uint8_t> data;
391 cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data));
392 directives = StringRef((const char *)data.data(), data.size());
393 return nullptr;
394 }
395
396 if (name == ".llvm_addrsig") {
397 addrsigSec = sec;
398 return nullptr;
399 }
400
401 if (name == ".llvm.call-graph-profile") {
402 callgraphSec = sec;
403 return nullptr;
404 }
405
406 // Object files may have DWARF debug info or MS CodeView debug info
407 // (or both).
408 //
409 // DWARF sections don't need any special handling from the perspective
410 // of the linker; they are just a data section containing relocations.
411 // We can just link them to complete debug info.
412 //
413 // CodeView needs linker support. We need to interpret debug info,
414 // and then write it to a separate .pdb file.
415
416 // Ignore DWARF debug info unless requested to be included.
417 if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_"))
418 return nullptr;
419
420 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
421 return nullptr;
422 SectionChunk *c;
423 if (isArm64EC(Machine: getMachineType()))
424 c = make<SectionChunkEC>(args: this, args&: sec);
425 else
426 c = make<SectionChunk>(args: this, args&: sec);
427 if (def)
428 c->checksum = def->CheckSum;
429
430 // CodeView sections are stored to a different vector because they are not
431 // linked in the regular manner.
432 if (c->isCodeView())
433 debugChunks.push_back(x: c);
434 else if (name == ".gfids$y")
435 guardFidChunks.push_back(x: c);
436 else if (name == ".giats$y")
437 guardIATChunks.push_back(x: c);
438 else if (name == ".gljmp$y")
439 guardLJmpChunks.push_back(x: c);
440 else if (name == ".gehcont$y")
441 guardEHContChunks.push_back(x: c);
442 else if (name == ".sxdata")
443 sxDataChunks.push_back(x: c);
444 else if (isArm64EC(Machine: getMachineType()) && name == ".hybmp$x")
445 hybmpChunks.push_back(x: c);
446 else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
447 name == ".rdata" && leaderName.starts_with(Prefix: "??_C@"))
448 // COFF sections that look like string literal sections (i.e. no
449 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
450 // for string literals) are subject to string tail merging.
451 MergeChunk::addSection(ctx&: symtab.ctx, c);
452 else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$"))
453 resourceChunks.push_back(x: c);
454 else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
455 chunks.push_back(x: c);
456
457 return c;
458}
459
460void ObjFile::includeResourceChunks() {
461 chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end());
462}
463
464void ObjFile::readAssociativeDefinition(
465 COFFSymbolRef sym, const coff_aux_section_definition *def) {
466 readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj()));
467}
468
469void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
470 const coff_aux_section_definition *def,
471 uint32_t parentIndex) {
472 SectionChunk *parent = sparseChunks[parentIndex];
473 int32_t sectionNumber = sym.getSectionNumber();
474
475 auto diag = [&]() {
476 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
477
478 StringRef parentName;
479 const coff_section *parentSec = getSection(i: parentIndex);
480 if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec))
481 parentName = *e;
482 Err(ctx&: symtab.ctx) << toString(file: this) << ": associative comdat " << name
483 << " (sec " << sectionNumber
484 << ") has invalid reference to section " << parentName
485 << " (sec " << parentIndex << ")";
486 };
487
488 if (parent == pendingComdat) {
489 // This can happen if an associative comdat refers to another associative
490 // comdat that appears after it (invalid per COFF spec) or to a section
491 // without any symbols.
492 diag();
493 return;
494 }
495
496 // Check whether the parent is prevailing. If it is, so are we, and we read
497 // the section; otherwise mark it as discarded.
498 if (parent) {
499 SectionChunk *c = readSection(sectionNumber, def, leaderName: "");
500 sparseChunks[sectionNumber] = c;
501 if (c) {
502 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
503 parent->addAssociative(child: c);
504 }
505 } else {
506 sparseChunks[sectionNumber] = nullptr;
507 }
508}
509
510void ObjFile::recordPrevailingSymbolForMingw(
511 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
512 // For comdat symbols in executable sections, where this is the copy
513 // of the section chunk we actually include instead of discarding it,
514 // add the symbol to a map to allow using it for implicitly
515 // associating .[px]data$<func> sections to it.
516 // Use the suffix from the .text$<func> instead of the leader symbol
517 // name, for cases where the names differ (i386 mangling/decorations,
518 // cases where the leader is a weak symbol named .weak.func.default*).
519 int32_t sectionNumber = sym.getSectionNumber();
520 SectionChunk *sc = sparseChunks[sectionNumber];
521 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
522 StringRef name = sc->getSectionName().split(Separator: '$').second;
523 prevailingSectionMap[name] = sectionNumber;
524 }
525}
526
527void ObjFile::maybeAssociateSEHForMingw(
528 COFFSymbolRef sym, const coff_aux_section_definition *def,
529 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
530 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
531 if (name.consume_front(Prefix: ".pdata$") || name.consume_front(Prefix: ".xdata$") ||
532 name.consume_front(Prefix: ".eh_frame$")) {
533 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
534 // associative to the symbol <func>.
535 auto parentSym = prevailingSectionMap.find(Val: name);
536 if (parentSym != prevailingSectionMap.end())
537 readAssociativeDefinition(sym, def, parentIndex: parentSym->second);
538 }
539}
540
541Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
542 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
543 if (sym.isExternal()) {
544 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
545 if (sc)
546 return symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc,
547 sectionOffset: sym.getValue());
548 // For MinGW symbols named .weak.* that point to a discarded section,
549 // don't create an Undefined symbol. If nothing ever refers to the symbol,
550 // everything should be fine. If something actually refers to the symbol
551 // (e.g. the undefined weak alias), linking will fail due to undefined
552 // references at the end.
553 if (symtab.ctx.config.mingw && name.starts_with(Prefix: ".weak."))
554 return nullptr;
555 return symtab.addUndefined(name, f: this, overrideLazy: false);
556 }
557 if (sc) {
558 const coff_symbol_generic *symGen = sym.getGeneric();
559 if (sym.isSection()) {
560 auto *customSymGen = cloneSymbol(sym);
561 customSymGen->Value = 0;
562 symGen = customSymGen;
563 }
564 return make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
565 /*IsExternal*/ args: false, args&: symGen, args&: sc);
566 }
567 return nullptr;
568}
569
570void ObjFile::initializeSymbols() {
571 uint32_t numSymbols = coffObj->getNumberOfSymbols();
572 symbols.resize(new_size: numSymbols);
573
574 SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
575 weakAliases;
576 std::vector<uint32_t> pendingIndexes;
577 pendingIndexes.reserve(n: numSymbols);
578
579 DenseMap<StringRef, uint32_t> prevailingSectionMap;
580 std::vector<const coff_aux_section_definition *> comdatDefs(
581 coffObj->getNumberOfSections() + 1);
582 COFFLinkerContext &ctx = symtab.ctx;
583
584 for (uint32_t i = 0; i < numSymbols; ++i) {
585 COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i));
586 bool prevailingComdat;
587 if (coffSym.isUndefined()) {
588 symbols[i] = createUndefined(sym: coffSym, overrideLazy: false);
589 } else if (coffSym.isWeakExternal()) {
590 auto aux = coffSym.getAux<coff_aux_weak_external>();
591 bool overrideLazy = true;
592
593 // On ARM64EC, external function calls emit a pair of weak-dependency
594 // aliases: func to #func and #func to the func guess exit thunk
595 // (instead of a single undefined func symbol, which would be emitted on
596 // other targets). Allow such aliases to be overridden by lazy archive
597 // symbols, just as we would for undefined symbols.
598 if (isArm64EC(Machine: getMachineType()) &&
599 aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
600 COFFSymbolRef targetSym = check(e: coffObj->getSymbol(index: aux->TagIndex));
601 if (!targetSym.isAnyUndefined()) {
602 // If the target is defined, it may be either a guess exit thunk or
603 // the actual implementation. If it's the latter, consider the alias
604 // to be part of the implementation and override potential lazy
605 // archive symbols.
606 StringRef targetName = check(e: coffObj->getSymbolName(Symbol: targetSym));
607 StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym));
608 std::optional<std::string> mangledName =
609 getArm64ECMangledFunctionName(Name: name);
610 overrideLazy = mangledName == targetName;
611 } else {
612 overrideLazy = false;
613 }
614 }
615 symbols[i] = createUndefined(sym: coffSym, overrideLazy);
616 weakAliases.emplace_back(Args&: symbols[i], Args&: aux);
617 } else if (std::optional<Symbol *> optSym =
618 createDefined(sym: coffSym, comdatDefs, prevailingComdat)) {
619 symbols[i] = *optSym;
620 if (ctx.config.mingw && prevailingComdat)
621 recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap);
622 } else {
623 // createDefined() returns std::nullopt if a symbol belongs to a section
624 // that was pending at the point when the symbol was read. This can happen
625 // in two cases:
626 // 1) section definition symbol for a comdat leader;
627 // 2) symbol belongs to a comdat section associated with another section.
628 // In both of these cases, we can expect the section to be resolved by
629 // the time we finish visiting the remaining symbols in the symbol
630 // table. So we postpone the handling of this symbol until that time.
631 pendingIndexes.push_back(x: i);
632 }
633 i += coffSym.getNumberOfAuxSymbols();
634 }
635
636 for (uint32_t i : pendingIndexes) {
637 COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i));
638 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
639 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
640 readAssociativeDefinition(sym, def);
641 else if (ctx.config.mingw)
642 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
643 }
644 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
645 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
646 Log(ctx) << "comdat section " << name
647 << " without leader and unassociated, discarding";
648 continue;
649 }
650 symbols[i] = createRegular(sym);
651 }
652
653 for (auto &kv : weakAliases) {
654 Symbol *sym = kv.first;
655 const coff_aux_weak_external *aux = kv.second;
656 checkAndSetWeakAlias(symtab, f: this, source: sym, target: symbols[aux->TagIndex],
657 isAntiDep: aux->Characteristics ==
658 IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
659 }
660
661 // Free the memory used by sparseChunks now that symbol loading is finished.
662 decltype(sparseChunks)().swap(x&: sparseChunks);
663}
664
665Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
666 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
667 Symbol *s = symtab.addUndefined(name, f: this, overrideLazy);
668
669 // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
670 // target.
671 if (symtab.isEC() && getMachineType() == AMD64) {
672 auto u = dyn_cast<Undefined>(Val: s);
673 if (u && !u->weakAlias) {
674 if (std::optional<std::string> mangledName =
675 getArm64ECMangledFunctionName(Name: name)) {
676 Symbol *m = symtab.addUndefined(name: saver().save(S: *mangledName), f: this,
677 /*overrideLazy=*/false);
678 u->setWeakAlias(sym: m, /*antiDep=*/true);
679 }
680 }
681 }
682 return s;
683}
684
685static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
686 int32_t section) {
687 uint32_t numSymbols = obj->getNumberOfSymbols();
688 for (uint32_t i = 0; i < numSymbols; ++i) {
689 COFFSymbolRef sym = check(e: obj->getSymbol(index: i));
690 if (sym.getSectionNumber() != section)
691 continue;
692 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
693 return def;
694 }
695 return nullptr;
696}
697
698void ObjFile::handleComdatSelection(
699 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
700 DefinedRegular *leader,
701 const llvm::object::coff_aux_section_definition *def) {
702 if (prevailing)
703 return;
704 // There's already an existing comdat for this symbol: `Leader`.
705 // Use the comdats's selection field to determine if the new
706 // symbol in `Sym` should be discarded, produce a duplicate symbol
707 // error, etc.
708
709 SectionChunk *leaderChunk = leader->getChunk();
710 COMDATType leaderSelection = leaderChunk->selection;
711 COFFLinkerContext &ctx = symtab.ctx;
712
713 assert(leader->data && "Comdat leader without SectionChunk?");
714 if (isa<BitcodeFile>(Val: leader->file)) {
715 // If the leader is only a LTO symbol, we don't know e.g. its final size
716 // yet, so we can't do the full strict comdat selection checking yet.
717 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
718 }
719
720 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
721 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
722 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
723 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
724 // cl.exe picks "any" for vftables when building with /GR- and
725 // "largest" when building with /GR. To be able to link object files
726 // compiled with each flag, "any" and "largest" are merged as "largest".
727 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
728 }
729
730 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
731 // Clang on the other hand picks "any". To be able to link two object files
732 // with a __declspec(selectany) declaration, one compiled with gcc and the
733 // other with clang, we merge them as proper "same size as"
734 if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
735 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
736 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
737 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
738 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
739 }
740
741 // Other than that, comdat selections must match. This is a bit more
742 // strict than link.exe which allows merging "any" and "largest" if "any"
743 // is the first symbol the linker sees, and it allows merging "largest"
744 // with everything (!) if "largest" is the first symbol the linker sees.
745 // Making this symmetric independent of which selection is seen first
746 // seems better though.
747 // (This behavior matches ModuleLinker::getComdatResult().)
748 if (selection != leaderSelection) {
749 Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(sym: leader)
750 << ": " << (int)leaderSelection << " in " << leader->getFile()
751 << " and " << (int)selection << " in " << this;
752 symtab.reportDuplicate(existing: leader, newFile: this);
753 return;
754 }
755
756 switch (selection) {
757 case IMAGE_COMDAT_SELECT_NODUPLICATES:
758 symtab.reportDuplicate(existing: leader, newFile: this);
759 break;
760
761 case IMAGE_COMDAT_SELECT_ANY:
762 // Nothing to do.
763 break;
764
765 case IMAGE_COMDAT_SELECT_SAME_SIZE:
766 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
767 if (!ctx.config.mingw) {
768 symtab.reportDuplicate(existing: leader, newFile: this);
769 } else {
770 const coff_aux_section_definition *leaderDef = nullptr;
771 if (leaderChunk->file)
772 leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(),
773 section: leaderChunk->getSectionNumber());
774 if (!leaderDef || leaderDef->Length != def->Length)
775 symtab.reportDuplicate(existing: leader, newFile: this);
776 }
777 }
778 break;
779
780 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
781 SectionChunk newChunk(this, getSection(sym));
782 // link.exe only compares section contents here and doesn't complain
783 // if the two comdat sections have e.g. different alignment.
784 // Match that.
785 if (leaderChunk->getContents() != newChunk.getContents())
786 symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue());
787 break;
788 }
789
790 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
791 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
792 // (This means lld-link doesn't produce duplicate symbol errors for
793 // associative comdats while link.exe does, but associate comdats
794 // are never extern in practice.)
795 llvm_unreachable("createDefined not called for associative comdats");
796
797 case IMAGE_COMDAT_SELECT_LARGEST:
798 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
799 // Replace the existing comdat symbol with the new one.
800 StringRef name = check(e: coffObj->getSymbolName(Symbol: sym));
801 // FIXME: This is incorrect: With /opt:noref, the previous sections
802 // make it into the final executable as well. Correct handling would
803 // be to undo reading of the whole old section that's being replaced,
804 // or doing one pass that determines what the final largest comdat
805 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
806 // only the largest one.
807 replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true,
808 /*IsExternal*/ arg: true, arg: sym.getGeneric(),
809 arg: nullptr);
810 prevailing = true;
811 }
812 break;
813
814 case IMAGE_COMDAT_SELECT_NEWEST:
815 llvm_unreachable("should have been rejected earlier");
816 }
817}
818
819std::optional<Symbol *> ObjFile::createDefined(
820 COFFSymbolRef sym,
821 std::vector<const coff_aux_section_definition *> &comdatDefs,
822 bool &prevailing) {
823 prevailing = false;
824 auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); };
825
826 if (sym.isCommon()) {
827 auto *c = make<CommonChunk>(args&: sym);
828 chunks.push_back(x: c);
829 return symtab.addCommon(f: this, n: getName(), size: sym.getValue(), s: sym.getGeneric(),
830 c);
831 }
832
833 COFFLinkerContext &ctx = symtab.ctx;
834 if (sym.isAbsolute()) {
835 StringRef name = getName();
836
837 if (name == "@feat.00")
838 feat00Flags = sym.getValue();
839 // Skip special symbols.
840 if (ignoredSymbolName(name))
841 return nullptr;
842
843 if (sym.isExternal())
844 return symtab.addAbsolute(n: name, s: sym);
845 return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym);
846 }
847
848 int32_t sectionNumber = sym.getSectionNumber();
849 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
850 return nullptr;
851
852 if (sym.isEmptySectionDeclaration()) {
853 // As there is no coff_section in the object file for these, make a
854 // new virtual one, with everything zeroed out (i.e. an empty section),
855 // with only the name and characteristics set.
856 StringRef name = getName();
857 auto *hdr = make<coff_section>();
858 memset(s: hdr, c: 0, n: sizeof(*hdr));
859 strncpy(dest: hdr->Name, src: name.data(),
860 n: std::min(a: name.size(), b: (size_t)COFF::NameSize));
861 // The Value field in a section symbol may contain the characteristics,
862 // or it may be zero, where we make something up (that matches what is
863 // used in .idata sections in the regular object files in import libraries).
864 if (sym.getValue())
865 hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
866 else
867 hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
868 IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
869 IMAGE_SCN_ALIGN_4BYTES;
870 auto *sc = make<SectionChunk>(args: this, args&: hdr);
871 chunks.push_back(x: sc);
872
873 auto *symGen = cloneSymbol(sym);
874 // Ignore the Value offset of these symbols, as it may be a bitmask.
875 symGen->Value = 0;
876 return make<DefinedRegular>(args: this, /*name=*/args: "", /*isCOMDAT=*/args: false,
877 /*isExternal=*/args: false, args&: symGen, args&: sc);
878 }
879
880 if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber))
881 Fatal(ctx) << toString(file: this) << ": " << getName()
882 << " should not refer to special section "
883 << Twine(sectionNumber);
884
885 if ((uint32_t)sectionNumber >= sparseChunks.size())
886 Fatal(ctx) << toString(file: this) << ": " << getName()
887 << " should not refer to non-existent section "
888 << Twine(sectionNumber);
889
890 // Comdat handling.
891 // A comdat symbol consists of two symbol table entries.
892 // The first symbol entry has the name of the section (e.g. .text), fixed
893 // values for the other fields, and one auxiliary record.
894 // The second symbol entry has the name of the comdat symbol, called the
895 // "comdat leader".
896 // When this function is called for the first symbol entry of a comdat,
897 // it sets comdatDefs and returns std::nullopt, and when it's called for the
898 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
899
900 // Handle comdat leader.
901 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
902 comdatDefs[sectionNumber] = nullptr;
903 DefinedRegular *leader;
904
905 if (sym.isExternal()) {
906 std::tie(args&: leader, args&: prevailing) =
907 symtab.addComdat(f: this, n: getName(), s: sym.getGeneric());
908 } else {
909 leader = make<DefinedRegular>(args: this, /*Name*/ args: "", /*IsCOMDAT*/ args: false,
910 /*IsExternal*/ args: false, args: sym.getGeneric());
911 prevailing = true;
912 }
913
914 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
915 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
916 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
917 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
918 Fatal(ctx) << "unknown comdat type "
919 << std::to_string(val: (int)def->Selection) << " for " << getName()
920 << " in " << toString(file: this);
921 }
922 COMDATType selection = (COMDATType)def->Selection;
923
924 if (leader->isCOMDAT)
925 handleComdatSelection(sym, selection, prevailing, leader, def);
926
927 if (prevailing) {
928 SectionChunk *c = readSection(sectionNumber, def, leaderName: getName());
929 sparseChunks[sectionNumber] = c;
930 if (!c)
931 return nullptr;
932 c->sym = cast<DefinedRegular>(Val: leader);
933 c->selection = selection;
934 cast<DefinedRegular>(Val: leader)->data = &c->repl;
935 } else {
936 sparseChunks[sectionNumber] = nullptr;
937 }
938 return leader;
939 }
940
941 // Prepare to handle the comdat leader symbol by setting the section's
942 // ComdatDefs pointer if we encounter a non-associative comdat.
943 if (sparseChunks[sectionNumber] == pendingComdat) {
944 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
945 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
946 comdatDefs[sectionNumber] = def;
947 }
948 return std::nullopt;
949 }
950
951 return createRegular(sym);
952}
953
954MachineTypes ObjFile::getMachineType() const {
955 return static_cast<MachineTypes>(coffObj->getMachine());
956}
957
958ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
959 if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName))
960 return sec->consumeDebugMagic();
961 return {};
962}
963
964// OBJ files systematically store critical information in a .debug$S stream,
965// even if the TU was compiled with no debug info. At least two records are
966// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
967// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
968// currently used to initialize the hotPatchable member.
969void ObjFile::initializeFlags() {
970 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S");
971 if (data.empty())
972 return;
973
974 DebugSubsectionArray subsections;
975
976 BinaryStreamReader reader(data, llvm::endianness::little);
977 ExitOnError exitOnErr;
978 exitOnErr(reader.readArray(Array&: subsections, Size: data.size()));
979
980 for (const DebugSubsectionRecord &ss : subsections) {
981 if (ss.kind() != DebugSubsectionKind::Symbols)
982 continue;
983
984 unsigned offset = 0;
985
986 // Only parse the first two records. We are only looking for S_OBJNAME
987 // and S_COMPILE3, and they usually appear at the beginning of the
988 // stream.
989 for (unsigned i = 0; i < 2; ++i) {
990 Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset);
991 if (!sym) {
992 consumeError(Err: sym.takeError());
993 return;
994 }
995 if (sym->kind() == SymbolKind::S_COMPILE3) {
996 auto cs =
997 cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get()));
998 hotPatchable =
999 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
1000 }
1001 if (sym->kind() == SymbolKind::S_OBJNAME) {
1002 auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>(
1003 Symbol: sym.get()));
1004 if (objName.Signature)
1005 pchSignature = objName.Signature;
1006 }
1007 offset += sym->length();
1008 }
1009 }
1010}
1011
1012// Depending on the compilation flags, OBJs can refer to external files,
1013// necessary to merge this OBJ into the final PDB. We currently support two
1014// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
1015// And PDB type servers, when compiling with /Zi. This function extracts these
1016// dependencies and makes them available as a TpiSource interface (see
1017// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
1018// output even with /Yc and /Yu and with /Zi.
1019void ObjFile::initializeDependencies() {
1020 COFFLinkerContext &ctx = symtab.ctx;
1021 if (!ctx.config.debug)
1022 return;
1023
1024 bool isPCH = false;
1025
1026 ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P");
1027 if (!data.empty())
1028 isPCH = true;
1029 else
1030 data = getDebugSection(secName: ".debug$T");
1031
1032 // symbols but no types, make a plain, empty TpiSource anyway, because it
1033 // simplifies adding the symbols later.
1034 if (data.empty()) {
1035 if (!debugChunks.empty())
1036 debugTypesObj = makeTpiSource(ctx, f: this);
1037 return;
1038 }
1039
1040 // Get the first type record. It will indicate if this object uses a type
1041 // server (/Zi) or a PCH file (/Yu).
1042 CVTypeArray types;
1043 BinaryStreamReader reader(data, llvm::endianness::little);
1044 cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
1045 CVTypeArray::Iterator firstType = types.begin();
1046 if (firstType == types.end())
1047 return;
1048
1049 // Remember the .debug$T or .debug$P section.
1050 debugTypes = data;
1051
1052 // This object file is a PCH file that others will depend on.
1053 if (isPCH) {
1054 debugTypesObj = makePrecompSource(ctx, file: this);
1055 return;
1056 }
1057
1058 // This object file was compiled with /Zi. Enqueue the PDB dependency.
1059 if (firstType->kind() == LF_TYPESERVER2) {
1060 TypeServer2Record ts = cantFail(
1061 ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data()));
1062 debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts);
1063 enqueuePdbFile(path: ts.getName(), fromFile: this);
1064 return;
1065 }
1066
1067 // This object was compiled with /Yu. It uses types from another object file
1068 // with a matching signature.
1069 if (firstType->kind() == LF_PRECOMP) {
1070 PrecompRecord precomp = cantFail(
1071 ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data()));
1072 // We're better off trusting the LF_PRECOMP signature. In some cases the
1073 // S_OBJNAME record doesn't contain a valid PCH signature.
1074 if (precomp.Signature)
1075 pchSignature = precomp.Signature;
1076 debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp);
1077 // Drop the LF_PRECOMP record from the input stream.
1078 debugTypes = debugTypes.drop_front(N: firstType->RecordData.size());
1079 return;
1080 }
1081
1082 // This is a plain old object file.
1083 debugTypesObj = makeTpiSource(ctx, f: this);
1084}
1085
1086// The casing of the PDB path stamped in the OBJ can differ from the actual path
1087// on disk. With this, we ensure to always use lowercase as a key for the
1088// pdbInputFileInstances map, at least on Windows.
1089static std::string normalizePdbPath(StringRef path) {
1090#if defined(_WIN32)
1091 return path.lower();
1092#else // LINUX
1093 return std::string(path);
1094#endif
1095}
1096
1097// If existing, return the actual PDB path on disk.
1098static std::optional<std::string>
1099findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1100 // Ensure the file exists before anything else. In some cases, if the path
1101 // points to a removable device, Driver::enqueuePath() would fail with an
1102 // error (EAGAIN, "resource unavailable try again") which we want to skip
1103 // silently.
1104 if (llvm::sys::fs::exists(Path: pdbPath))
1105 return normalizePdbPath(path: pdbPath);
1106
1107 StringRef objPath = !dependentFile->parentName.empty()
1108 ? dependentFile->parentName
1109 : dependentFile->getName();
1110
1111 // Currently, type server PDBs are only created by MSVC cl, which only runs
1112 // on Windows, so we can assume type server paths are Windows style.
1113 StringRef pdbName = sys::path::filename(path: pdbPath, style: sys::path::Style::windows);
1114
1115 // Check if the PDB is in the same folder as the OBJ.
1116 SmallString<128> path;
1117 sys::path::append(path, a: sys::path::parent_path(path: objPath), b: pdbName);
1118 if (llvm::sys::fs::exists(Path: path))
1119 return normalizePdbPath(path);
1120
1121 // Check if the PDB is in the output folder.
1122 path.clear();
1123 sys::path::append(path, a: sys::path::parent_path(path: outputPath), b: pdbName);
1124 if (llvm::sys::fs::exists(Path: path))
1125 return normalizePdbPath(path);
1126
1127 return std::nullopt;
1128}
1129
1130PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1131 : InputFile(ctx.symtab, PDBKind, m) {}
1132
1133PDBInputFile::~PDBInputFile() = default;
1134
1135PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1136 StringRef path,
1137 ObjFile *fromFile) {
1138 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: ctx.config.outputFile);
1139 if (!p)
1140 return nullptr;
1141 auto it = ctx.pdbInputFileInstances.find(x: *p);
1142 if (it != ctx.pdbInputFileInstances.end())
1143 return it->second;
1144 return nullptr;
1145}
1146
1147void PDBInputFile::parse() {
1148 symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1149
1150 std::unique_ptr<pdb::IPDBSession> thisSession;
1151 Error E = pdb::NativeSession::createFromPdb(
1152 MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession);
1153 if (E) {
1154 loadErrorStr.emplace(args: toString(E: std::move(E)));
1155 return; // fail silently at this point - the error will be handled later,
1156 // when merging the debug type stream
1157 }
1158
1159 session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release()));
1160
1161 pdb::PDBFile &pdbFile = session->getPDBFile();
1162 auto expectedInfo = pdbFile.getPDBInfoStream();
1163 // All PDB Files should have an Info stream.
1164 if (!expectedInfo) {
1165 loadErrorStr.emplace(args: toString(E: expectedInfo.takeError()));
1166 return;
1167 }
1168 debugTypesObj = makeTypeServerSource(ctx&: symtab.ctx, pdbInputFile: this);
1169}
1170
1171// Used only for DWARF debug info, which is not common (except in MinGW
1172// environments). This returns an optional pair of file name and line
1173// number for where the variable was defined.
1174std::optional<std::pair<StringRef, uint32_t>>
1175ObjFile::getVariableLocation(StringRef var) {
1176 if (!dwarf) {
1177 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1178 if (!dwarf)
1179 return std::nullopt;
1180 }
1181 if (symtab.machine == I386)
1182 var.consume_front(Prefix: "_");
1183 std::optional<std::pair<std::string, unsigned>> ret =
1184 dwarf->getVariableLoc(name: var);
1185 if (!ret)
1186 return std::nullopt;
1187 return std::make_pair(x: saver().save(S: ret->first), y&: ret->second);
1188}
1189
1190// Used only for DWARF debug info, which is not common (except in MinGW
1191// environments).
1192std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1193 uint32_t sectionIndex) {
1194 if (!dwarf) {
1195 dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj()));
1196 if (!dwarf)
1197 return std::nullopt;
1198 }
1199
1200 return dwarf->getDILineInfo(offset, sectionIndex);
1201}
1202
1203void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1204 auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile, outputPath: symtab.ctx.config.outputFile);
1205 if (!p)
1206 return;
1207 auto it = symtab.ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr);
1208 if (!it.second)
1209 return; // already scheduled for load
1210 symtab.ctx.driver.enqueuePDB(Path: *p);
1211}
1212
1213ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1214 : InputFile(ctx.getSymtab(machine: getMachineType(m)), ImportKind, m),
1215 live(!ctx.config.doGC) {}
1216
1217MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1218 uint16_t machine =
1219 reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1220 return MachineTypes(machine);
1221}
1222
1223bool ImportFile::isSameImport(const ImportFile *other) const {
1224 if (!externalName.empty())
1225 return other->externalName == externalName;
1226 return hdr->OrdinalHint == other->hdr->OrdinalHint;
1227}
1228
1229ImportThunkChunk *ImportFile::makeImportThunk() {
1230 switch (hdr->Machine) {
1231 case AMD64:
1232 return make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym);
1233 case I386:
1234 return make<ImportThunkChunkX86>(args&: symtab.ctx, args&: impSym);
1235 case ARM64:
1236 return make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impSym, args: ARM64);
1237 case ARMNT:
1238 return make<ImportThunkChunkARM>(args&: symtab.ctx, args&: impSym);
1239 }
1240 llvm_unreachable("unknown machine type");
1241}
1242
1243void ImportFile::parse() {
1244 const auto *hdr =
1245 reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1246
1247 // Check if the total size is valid.
1248 if (mb.getBufferSize() < sizeof(*hdr) ||
1249 mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1250 Fatal(ctx&: symtab.ctx) << "broken import library";
1251
1252 // Read names and create an __imp_ symbol.
1253 StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr));
1254 auto split = buf.split(Separator: '\0');
1255 buf = split.second;
1256 StringRef name;
1257 if (isArm64EC(Machine: hdr->Machine)) {
1258 if (std::optional<std::string> demangledName =
1259 getArm64ECDemangledFunctionName(Name: split.first))
1260 name = saver().save(S: *demangledName);
1261 }
1262 if (name.empty())
1263 name = saver().save(S: split.first);
1264 StringRef impName = saver().save(S: "__imp_" + name);
1265 dllName = buf.split(Separator: '\0').first;
1266 StringRef extName;
1267 switch (hdr->getNameType()) {
1268 case IMPORT_ORDINAL:
1269 extName = "";
1270 break;
1271 case IMPORT_NAME:
1272 extName = name;
1273 break;
1274 case IMPORT_NAME_NOPREFIX:
1275 extName = ltrim1(s: name, chars: "?@_");
1276 break;
1277 case IMPORT_NAME_UNDECORATE:
1278 extName = ltrim1(s: name, chars: "?@_");
1279 extName = extName.substr(Start: 0, N: extName.find(C: '@'));
1280 break;
1281 case IMPORT_NAME_EXPORTAS:
1282 extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first;
1283 break;
1284 }
1285
1286 this->hdr = hdr;
1287 externalName = extName;
1288
1289 bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1290
1291 if (!symtab.isEC()) {
1292 impSym = symtab.addImportData(n: impName, f: this, location);
1293 } else {
1294 // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1295 // which holds addresses that are guaranteed to be callable directly from
1296 // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1297 // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1298 // data imports, the naming is reversed.
1299 StringRef auxImpName = saver().save(S: "__imp_aux_" + name);
1300 if (isCode) {
1301 impSym = symtab.addImportData(n: auxImpName, f: this, location);
1302 impECSym = symtab.addImportData(n: impName, f: this, location&: auxLocation);
1303 } else {
1304 impSym = symtab.addImportData(n: impName, f: this, location);
1305 impECSym = symtab.addImportData(n: auxImpName, f: this, location&: auxLocation);
1306 }
1307 if (!impECSym)
1308 return;
1309
1310 StringRef auxImpCopyName = saver().save(S: "__auximpcopy_" + name);
1311 auxImpCopySym = symtab.addImportData(n: auxImpCopyName, f: this, location&: auxCopyLocation);
1312 if (!auxImpCopySym)
1313 return;
1314 }
1315 // If this was a duplicate, we logged an error but may continue;
1316 // in this case, impSym is nullptr.
1317 if (!impSym)
1318 return;
1319
1320 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1321 static_cast<void>(symtab.addImportData(n: name, f: this, location));
1322
1323 // If type is function, we need to create a thunk which jump to an
1324 // address pointed by the __imp_ symbol. (This allows you to call
1325 // DLL functions just like regular non-DLL functions.)
1326 if (isCode) {
1327 if (!symtab.isEC()) {
1328 thunkSym = symtab.addImportThunk(name, s: impSym, chunk: makeImportThunk());
1329 } else {
1330 thunkSym = symtab.addImportThunk(
1331 name, s: impSym, chunk: make<ImportThunkChunkX64>(args&: symtab.ctx, args&: impSym));
1332
1333 if (std::optional<std::string> mangledName =
1334 getArm64ECMangledFunctionName(Name: name)) {
1335 StringRef auxThunkName = saver().save(S: *mangledName);
1336 auxThunkSym = symtab.addImportThunk(
1337 name: auxThunkName, s: impECSym,
1338 chunk: make<ImportThunkChunkARM64>(args&: symtab.ctx, args&: impECSym, args: ARM64EC));
1339 }
1340
1341 StringRef impChkName = saver().save(S: "__impchk_" + name);
1342 impchkThunk = make<ImportThunkChunkARM64EC>(args: this);
1343 impchkThunk->sym = symtab.addImportThunk(name: impChkName, s: impSym, chunk: impchkThunk);
1344 symtab.ctx.driver.pullArm64ECIcallHelper();
1345 }
1346 }
1347}
1348
1349BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1350 std::unique_ptr<lto::InputFile> &o, bool lazy)
1351 : InputFile(symtab, BitcodeKind, mb, lazy) {
1352 obj.swap(u&: o);
1353}
1354
1355BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1356 StringRef archiveName,
1357 uint64_t offsetInArchive, bool lazy) {
1358 std::string path = mb.getBufferIdentifier().str();
1359 if (ctx.config.thinLTOIndexOnly)
1360 path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(),
1361 suffix: ctx.config.thinLTOObjectSuffixReplace.first,
1362 repl: ctx.config.thinLTOObjectSuffixReplace.second);
1363
1364 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1365 // name. If two archives define two members with the same name, this
1366 // causes a collision which result in only one of the objects being taken
1367 // into consideration at LTO time (which very likely causes undefined
1368 // symbols later in the link stage). So we append file offset to make
1369 // filename unique.
1370 MemoryBufferRef mbref(mb.getBuffer(),
1371 saver().save(S: archiveName.empty()
1372 ? path
1373 : archiveName +
1374 sys::path::filename(path) +
1375 utostr(X: offsetInArchive)));
1376
1377 std::unique_ptr<lto::InputFile> obj = check(e: lto::InputFile::create(Object: mbref));
1378 return make<BitcodeFile>(args&: ctx.getSymtab(machine: getMachineType(obj: obj.get())), args&: mb, args&: obj,
1379 args&: lazy);
1380}
1381
1382BitcodeFile::~BitcodeFile() = default;
1383
1384void BitcodeFile::parse() {
1385 llvm::StringSaver &saver = lld::saver();
1386
1387 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1388 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1389 // FIXME: Check nodeduplicate
1390 comdat[i] =
1391 symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first));
1392 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1393 StringRef symName = saver.save(S: objSym.getName());
1394 int comdatIndex = objSym.getComdatIndex();
1395 Symbol *sym;
1396 SectionChunk *fakeSC = nullptr;
1397 if (objSym.isExecutable())
1398 fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1399 else
1400 fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1401 if (objSym.isUndefined()) {
1402 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1403 if (objSym.isWeak())
1404 sym->deferUndefined = true;
1405 // If one LTO object file references (i.e. has an undefined reference to)
1406 // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1407 // as unprefixed but with a dllimport attribute instead, and doesn't
1408 // understand the relation to a concrete IR symbol with the __imp_ prefix.
1409 //
1410 // For such cases, mark the symbol as used in a regular object (i.e. the
1411 // symbol must be retained) so that the linker can associate the
1412 // references in the end. If the symbol is defined in an import library
1413 // or in a regular object file, this has no effect, but if it is defined
1414 // in another LTO object file, this makes sure it is kept, to fulfill
1415 // the reference when linking the output of the LTO compilation.
1416 if (symName.starts_with(Prefix: "__imp_"))
1417 sym->isUsedInRegularObj = true;
1418 } else if (objSym.isCommon()) {
1419 sym = symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize());
1420 } else if (objSym.isWeak() && objSym.isIndirect()) {
1421 // Weak external.
1422 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: true);
1423 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1424 Symbol *alias = symtab.addUndefined(name: saver.save(S: fallback));
1425 checkAndSetWeakAlias(symtab, f: this, source: sym, target: alias, isAntiDep: false);
1426 } else if (comdatIndex != -1) {
1427 if (symName == obj->getComdatTable()[comdatIndex].first) {
1428 sym = comdat[comdatIndex].first;
1429 if (cast<DefinedRegular>(Val: sym)->data == nullptr)
1430 cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl;
1431 } else if (comdat[comdatIndex].second) {
1432 sym = symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC);
1433 } else {
1434 sym = symtab.addUndefined(name: symName, f: this, overrideLazy: false);
1435 }
1436 } else {
1437 sym =
1438 symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, isWeak: objSym.isWeak());
1439 }
1440 symbols.push_back(x: sym);
1441 if (objSym.isUsed())
1442 symtab.ctx.config.gcroot.push_back(x: sym);
1443 }
1444 directives = saver.save(S: obj->getCOFFLinkerOpts());
1445}
1446
1447void BitcodeFile::parseLazy() {
1448 for (const lto::InputFile::Symbol &sym : obj->symbols())
1449 if (!sym.isUndefined()) {
1450 symtab.addLazyObject(f: this, n: sym.getName());
1451 if (!lazy)
1452 return;
1453 }
1454}
1455
1456MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1457 Triple t(obj->getTargetTriple());
1458 switch (t.getArch()) {
1459 case Triple::x86_64:
1460 return AMD64;
1461 case Triple::x86:
1462 return I386;
1463 case Triple::arm:
1464 case Triple::thumb:
1465 return ARMNT;
1466 case Triple::aarch64:
1467 return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1468 default:
1469 return IMAGE_FILE_MACHINE_UNKNOWN;
1470 }
1471}
1472
1473std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1474 StringRef repl) {
1475 if (path.consume_back(Suffix: suffix))
1476 return (path + repl).str();
1477 return std::string(path);
1478}
1479
1480static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1481 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1482 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1483 if (rva >= sec->VirtualAddress &&
1484 rva <= sec->VirtualAddress + sec->VirtualSize) {
1485 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1486 }
1487 }
1488 return false;
1489}
1490
1491void DLLFile::parse() {
1492 // Parse a memory buffer as a PE-COFF executable.
1493 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1494
1495 if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) {
1496 bin.release();
1497 coffObj.reset(p: obj);
1498 } else {
1499 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a COFF file";
1500 return;
1501 }
1502
1503 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1504 Err(ctx&: symtab.ctx) << toString(file: this) << " is not a PE-COFF executable";
1505 return;
1506 }
1507
1508 for (const auto &exp : coffObj->export_directories()) {
1509 StringRef dllName, symbolName;
1510 uint32_t exportRVA;
1511 checkError(e: exp.getDllName(Result&: dllName));
1512 checkError(e: exp.getSymbolName(Result&: symbolName));
1513 checkError(e: exp.getExportRVA(Result&: exportRVA));
1514
1515 if (symbolName.empty())
1516 continue;
1517
1518 bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this);
1519
1520 Symbol *s = make<Symbol>();
1521 s->dllName = dllName;
1522 s->symbolName = symbolName;
1523 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1524 s->nameType = ImportNameType::IMPORT_NAME;
1525
1526 if (coffObj->getMachine() == I386) {
1527 s->symbolName = symbolName = saver().save(S: "_" + symbolName);
1528 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1529 }
1530
1531 StringRef impName = saver().save(S: "__imp_" + symbolName);
1532 symtab.addLazyDLLSymbol(f: this, sym: s, n: impName);
1533 if (code)
1534 symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName);
1535 if (symtab.isEC()) {
1536 StringRef impAuxName = saver().save(S: "__imp_aux_" + symbolName);
1537 symtab.addLazyDLLSymbol(f: this, sym: s, n: impAuxName);
1538
1539 if (code) {
1540 std::optional<std::string> mangledName =
1541 getArm64ECMangledFunctionName(Name: symbolName);
1542 if (mangledName)
1543 symtab.addLazyDLLSymbol(f: this, sym: s, n: *mangledName);
1544 }
1545 }
1546 }
1547}
1548
1549MachineTypes DLLFile::getMachineType() const {
1550 if (coffObj)
1551 return static_cast<MachineTypes>(coffObj->getMachine());
1552 return IMAGE_FILE_MACHINE_UNKNOWN;
1553}
1554
1555void DLLFile::makeImport(DLLFile::Symbol *s) {
1556 if (!seen.insert(key: s->symbolName).second)
1557 return;
1558
1559 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1560 size_t size = sizeof(coff_import_header) + impSize;
1561 char *buf = bAlloc().Allocate<char>(Num: size);
1562 memset(s: buf, c: 0, n: size);
1563 char *p = buf;
1564 auto *imp = reinterpret_cast<coff_import_header *>(p);
1565 p += sizeof(*imp);
1566 imp->Sig2 = 0xFFFF;
1567 imp->Machine = coffObj->getMachine();
1568 imp->SizeOfData = impSize;
1569 imp->OrdinalHint = 0; // Only linking by name
1570 imp->TypeInfo = (s->nameType << 2) | s->importType;
1571
1572 // Write symbol name and DLL name.
1573 memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size());
1574 p += s->symbolName.size() + 1;
1575 memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size());
1576 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1577 ImportFile *impFile = make<ImportFile>(args&: symtab.ctx, args&: mbref);
1578 symtab.ctx.driver.addFile(file: impFile);
1579}
1580

source code of lld/COFF/InputFiles.cpp