1 | //===- InputFiles.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputFiles.h" |
10 | #include "COFFLinkerContext.h" |
11 | #include "Chunks.h" |
12 | #include "Config.h" |
13 | #include "DebugTypes.h" |
14 | #include "Driver.h" |
15 | #include "SymbolTable.h" |
16 | #include "Symbols.h" |
17 | #include "lld/Common/DWARF.h" |
18 | #include "llvm-c/lto.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/Twine.h" |
21 | #include "llvm/BinaryFormat/COFF.h" |
22 | #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" |
23 | #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" |
24 | #include "llvm/DebugInfo/CodeView/SymbolRecord.h" |
25 | #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" |
26 | #include "llvm/DebugInfo/PDB/Native/NativeSession.h" |
27 | #include "llvm/DebugInfo/PDB/Native/PDBFile.h" |
28 | #include "llvm/LTO/LTO.h" |
29 | #include "llvm/Object/Binary.h" |
30 | #include "llvm/Object/COFF.h" |
31 | #include "llvm/Support/Casting.h" |
32 | #include "llvm/Support/Endian.h" |
33 | #include "llvm/Support/Error.h" |
34 | #include "llvm/Support/ErrorOr.h" |
35 | #include "llvm/Support/FileSystem.h" |
36 | #include "llvm/Support/Path.h" |
37 | #include "llvm/Target/TargetOptions.h" |
38 | #include "llvm/TargetParser/Triple.h" |
39 | #include <cstring> |
40 | #include <optional> |
41 | #include <system_error> |
42 | #include <utility> |
43 | |
44 | using namespace llvm; |
45 | using namespace llvm::COFF; |
46 | using namespace llvm::codeview; |
47 | using namespace llvm::object; |
48 | using namespace llvm::support::endian; |
49 | using namespace lld; |
50 | using namespace lld::coff; |
51 | |
52 | using llvm::Triple; |
53 | using llvm::support::ulittle32_t; |
54 | |
55 | // Returns the last element of a path, which is supposed to be a filename. |
56 | static StringRef getBasename(StringRef path) { |
57 | return sys::path::filename(path, style: sys::path::Style::windows); |
58 | } |
59 | |
60 | // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". |
61 | std::string lld::toString(const coff::InputFile *file) { |
62 | if (!file) |
63 | return "<internal>" ; |
64 | if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind) |
65 | return std::string(file->getName()); |
66 | |
67 | return (getBasename(path: file->parentName) + "(" + getBasename(path: file->getName()) + |
68 | ")" ) |
69 | .str(); |
70 | } |
71 | |
72 | /// Checks that Source is compatible with being a weak alias to Target. |
73 | /// If Source is Undefined and has no weak alias set, makes it a weak |
74 | /// alias to Target. |
75 | static void checkAndSetWeakAlias(COFFLinkerContext &ctx, InputFile *f, |
76 | Symbol *source, Symbol *target) { |
77 | if (auto *u = dyn_cast<Undefined>(Val: source)) { |
78 | if (u->weakAlias && u->weakAlias != target) { |
79 | // Weak aliases as produced by GCC are named in the form |
80 | // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name |
81 | // of another symbol emitted near the weak symbol. |
82 | // Just use the definition from the first object file that defined |
83 | // this weak symbol. |
84 | if (ctx.config.allowDuplicateWeak) |
85 | return; |
86 | ctx.symtab.reportDuplicate(existing: source, newFile: f); |
87 | } |
88 | u->weakAlias = target; |
89 | } |
90 | } |
91 | |
92 | static bool ignoredSymbolName(StringRef name) { |
93 | return name == "@feat.00" || name == "@comp.id" ; |
94 | } |
95 | |
96 | ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m) |
97 | : InputFile(ctx, ArchiveKind, m) {} |
98 | |
99 | void ArchiveFile::parse() { |
100 | // Parse a MemoryBufferRef as an archive file. |
101 | file = CHECK(Archive::create(mb), this); |
102 | |
103 | // Read the symbol table to construct Lazy objects. |
104 | for (const Archive::Symbol &sym : file->symbols()) |
105 | ctx.symtab.addLazyArchive(f: this, sym); |
106 | } |
107 | |
108 | // Returns a buffer pointing to a member file containing a given symbol. |
109 | void ArchiveFile::addMember(const Archive::Symbol &sym) { |
110 | const Archive::Child &c = |
111 | CHECK(sym.getMember(), |
112 | "could not get the member for symbol " + toCOFFString(ctx, sym)); |
113 | |
114 | // Return an empty buffer if we have already returned the same buffer. |
115 | if (!seen.insert(V: c.getChildOffset()).second) |
116 | return; |
117 | |
118 | ctx.driver.enqueueArchiveMember(c, sym, parentName: getName()); |
119 | } |
120 | |
121 | std::vector<MemoryBufferRef> lld::coff::getArchiveMembers(Archive *file) { |
122 | std::vector<MemoryBufferRef> v; |
123 | Error err = Error::success(); |
124 | for (const Archive::Child &c : file->children(Err&: err)) { |
125 | MemoryBufferRef mbref = |
126 | CHECK(c.getMemoryBufferRef(), |
127 | file->getFileName() + |
128 | ": could not get the buffer for a child of the archive" ); |
129 | v.push_back(x: mbref); |
130 | } |
131 | if (err) |
132 | fatal(msg: file->getFileName() + |
133 | ": Archive::children failed: " + toString(E: std::move(err))); |
134 | return v; |
135 | } |
136 | |
137 | void ObjFile::parseLazy() { |
138 | // Native object file. |
139 | std::unique_ptr<Binary> coffObjPtr = CHECK(createBinary(mb), this); |
140 | COFFObjectFile *coffObj = cast<COFFObjectFile>(Val: coffObjPtr.get()); |
141 | uint32_t numSymbols = coffObj->getNumberOfSymbols(); |
142 | for (uint32_t i = 0; i < numSymbols; ++i) { |
143 | COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i)); |
144 | if (coffSym.isUndefined() || !coffSym.isExternal() || |
145 | coffSym.isWeakExternal()) |
146 | continue; |
147 | StringRef name = check(e: coffObj->getSymbolName(Symbol: coffSym)); |
148 | if (coffSym.isAbsolute() && ignoredSymbolName(name)) |
149 | continue; |
150 | ctx.symtab.addLazyObject(f: this, n: name); |
151 | i += coffSym.getNumberOfAuxSymbols(); |
152 | } |
153 | } |
154 | |
155 | void ObjFile::parse() { |
156 | // Parse a memory buffer as a COFF file. |
157 | std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this); |
158 | |
159 | if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) { |
160 | bin.release(); |
161 | coffObj.reset(p: obj); |
162 | } else { |
163 | fatal(msg: toString(file: this) + " is not a COFF file" ); |
164 | } |
165 | |
166 | // Read section and symbol tables. |
167 | initializeChunks(); |
168 | initializeSymbols(); |
169 | initializeFlags(); |
170 | initializeDependencies(); |
171 | } |
172 | |
173 | const coff_section *ObjFile::getSection(uint32_t i) { |
174 | auto sec = coffObj->getSection(index: i); |
175 | if (!sec) |
176 | fatal(msg: "getSection failed: #" + Twine(i) + ": " + toString(E: sec.takeError())); |
177 | return *sec; |
178 | } |
179 | |
180 | // We set SectionChunk pointers in the SparseChunks vector to this value |
181 | // temporarily to mark comdat sections as having an unknown resolution. As we |
182 | // walk the object file's symbol table, once we visit either a leader symbol or |
183 | // an associative section definition together with the parent comdat's leader, |
184 | // we set the pointer to either nullptr (to mark the section as discarded) or a |
185 | // valid SectionChunk for that section. |
186 | static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1); |
187 | |
188 | void ObjFile::initializeChunks() { |
189 | uint32_t numSections = coffObj->getNumberOfSections(); |
190 | sparseChunks.resize(new_size: numSections + 1); |
191 | for (uint32_t i = 1; i < numSections + 1; ++i) { |
192 | const coff_section *sec = getSection(i); |
193 | if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT) |
194 | sparseChunks[i] = pendingComdat; |
195 | else |
196 | sparseChunks[i] = readSection(sectionNumber: i, def: nullptr, leaderName: "" ); |
197 | } |
198 | } |
199 | |
200 | SectionChunk *ObjFile::readSection(uint32_t sectionNumber, |
201 | const coff_aux_section_definition *def, |
202 | StringRef leaderName) { |
203 | const coff_section *sec = getSection(i: sectionNumber); |
204 | |
205 | StringRef name; |
206 | if (Expected<StringRef> e = coffObj->getSectionName(Sec: sec)) |
207 | name = *e; |
208 | else |
209 | fatal(msg: "getSectionName failed: #" + Twine(sectionNumber) + ": " + |
210 | toString(E: e.takeError())); |
211 | |
212 | if (name == ".drectve" ) { |
213 | ArrayRef<uint8_t> data; |
214 | cantFail(Err: coffObj->getSectionContents(Sec: sec, Res&: data)); |
215 | directives = StringRef((const char *)data.data(), data.size()); |
216 | return nullptr; |
217 | } |
218 | |
219 | if (name == ".llvm_addrsig" ) { |
220 | addrsigSec = sec; |
221 | return nullptr; |
222 | } |
223 | |
224 | if (name == ".llvm.call-graph-profile" ) { |
225 | callgraphSec = sec; |
226 | return nullptr; |
227 | } |
228 | |
229 | // Object files may have DWARF debug info or MS CodeView debug info |
230 | // (or both). |
231 | // |
232 | // DWARF sections don't need any special handling from the perspective |
233 | // of the linker; they are just a data section containing relocations. |
234 | // We can just link them to complete debug info. |
235 | // |
236 | // CodeView needs linker support. We need to interpret debug info, |
237 | // and then write it to a separate .pdb file. |
238 | |
239 | // Ignore DWARF debug info unless requested to be included. |
240 | if (!ctx.config.includeDwarfChunks && name.starts_with(Prefix: ".debug_" )) |
241 | return nullptr; |
242 | |
243 | if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) |
244 | return nullptr; |
245 | auto *c = make<SectionChunk>(args: this, args&: sec); |
246 | if (def) |
247 | c->checksum = def->CheckSum; |
248 | |
249 | // CodeView sections are stored to a different vector because they are not |
250 | // linked in the regular manner. |
251 | if (c->isCodeView()) |
252 | debugChunks.push_back(x: c); |
253 | else if (name == ".gfids$y" ) |
254 | guardFidChunks.push_back(x: c); |
255 | else if (name == ".giats$y" ) |
256 | guardIATChunks.push_back(x: c); |
257 | else if (name == ".gljmp$y" ) |
258 | guardLJmpChunks.push_back(x: c); |
259 | else if (name == ".gehcont$y" ) |
260 | guardEHContChunks.push_back(x: c); |
261 | else if (name == ".sxdata" ) |
262 | sxDataChunks.push_back(x: c); |
263 | else if (ctx.config.tailMerge && sec->NumberOfRelocations == 0 && |
264 | name == ".rdata" && leaderName.starts_with(Prefix: "??_C@" )) |
265 | // COFF sections that look like string literal sections (i.e. no |
266 | // relocations, in .rdata, leader symbol name matches the MSVC name mangling |
267 | // for string literals) are subject to string tail merging. |
268 | MergeChunk::addSection(ctx, c); |
269 | else if (name == ".rsrc" || name.starts_with(Prefix: ".rsrc$" )) |
270 | resourceChunks.push_back(x: c); |
271 | else |
272 | chunks.push_back(x: c); |
273 | |
274 | return c; |
275 | } |
276 | |
277 | void ObjFile::includeResourceChunks() { |
278 | chunks.insert(position: chunks.end(), first: resourceChunks.begin(), last: resourceChunks.end()); |
279 | } |
280 | |
281 | void ObjFile::readAssociativeDefinition( |
282 | COFFSymbolRef sym, const coff_aux_section_definition *def) { |
283 | readAssociativeDefinition(coffSym: sym, def, parentSection: def->getNumber(IsBigObj: sym.isBigObj())); |
284 | } |
285 | |
286 | void ObjFile::readAssociativeDefinition(COFFSymbolRef sym, |
287 | const coff_aux_section_definition *def, |
288 | uint32_t parentIndex) { |
289 | SectionChunk *parent = sparseChunks[parentIndex]; |
290 | int32_t sectionNumber = sym.getSectionNumber(); |
291 | |
292 | auto diag = [&]() { |
293 | StringRef name = check(e: coffObj->getSymbolName(Symbol: sym)); |
294 | |
295 | StringRef parentName; |
296 | const coff_section *parentSec = getSection(i: parentIndex); |
297 | if (Expected<StringRef> e = coffObj->getSectionName(Sec: parentSec)) |
298 | parentName = *e; |
299 | error(msg: toString(file: this) + ": associative comdat " + name + " (sec " + |
300 | Twine(sectionNumber) + ") has invalid reference to section " + |
301 | parentName + " (sec " + Twine(parentIndex) + ")" ); |
302 | }; |
303 | |
304 | if (parent == pendingComdat) { |
305 | // This can happen if an associative comdat refers to another associative |
306 | // comdat that appears after it (invalid per COFF spec) or to a section |
307 | // without any symbols. |
308 | diag(); |
309 | return; |
310 | } |
311 | |
312 | // Check whether the parent is prevailing. If it is, so are we, and we read |
313 | // the section; otherwise mark it as discarded. |
314 | if (parent) { |
315 | SectionChunk *c = readSection(sectionNumber, def, leaderName: "" ); |
316 | sparseChunks[sectionNumber] = c; |
317 | if (c) { |
318 | c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE; |
319 | parent->addAssociative(child: c); |
320 | } |
321 | } else { |
322 | sparseChunks[sectionNumber] = nullptr; |
323 | } |
324 | } |
325 | |
326 | void ObjFile::recordPrevailingSymbolForMingw( |
327 | COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) { |
328 | // For comdat symbols in executable sections, where this is the copy |
329 | // of the section chunk we actually include instead of discarding it, |
330 | // add the symbol to a map to allow using it for implicitly |
331 | // associating .[px]data$<func> sections to it. |
332 | // Use the suffix from the .text$<func> instead of the leader symbol |
333 | // name, for cases where the names differ (i386 mangling/decorations, |
334 | // cases where the leader is a weak symbol named .weak.func.default*). |
335 | int32_t sectionNumber = sym.getSectionNumber(); |
336 | SectionChunk *sc = sparseChunks[sectionNumber]; |
337 | if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { |
338 | StringRef name = sc->getSectionName().split(Separator: '$').second; |
339 | prevailingSectionMap[name] = sectionNumber; |
340 | } |
341 | } |
342 | |
343 | void ObjFile::maybeAssociateSEHForMingw( |
344 | COFFSymbolRef sym, const coff_aux_section_definition *def, |
345 | const DenseMap<StringRef, uint32_t> &prevailingSectionMap) { |
346 | StringRef name = check(e: coffObj->getSymbolName(Symbol: sym)); |
347 | if (name.consume_front(Prefix: ".pdata$" ) || name.consume_front(Prefix: ".xdata$" ) || |
348 | name.consume_front(Prefix: ".eh_frame$" )) { |
349 | // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly |
350 | // associative to the symbol <func>. |
351 | auto parentSym = prevailingSectionMap.find(Val: name); |
352 | if (parentSym != prevailingSectionMap.end()) |
353 | readAssociativeDefinition(sym, def, parentIndex: parentSym->second); |
354 | } |
355 | } |
356 | |
357 | Symbol *ObjFile::createRegular(COFFSymbolRef sym) { |
358 | SectionChunk *sc = sparseChunks[sym.getSectionNumber()]; |
359 | if (sym.isExternal()) { |
360 | StringRef name = check(e: coffObj->getSymbolName(Symbol: sym)); |
361 | if (sc) |
362 | return ctx.symtab.addRegular(f: this, n: name, s: sym.getGeneric(), c: sc, |
363 | sectionOffset: sym.getValue()); |
364 | // For MinGW symbols named .weak.* that point to a discarded section, |
365 | // don't create an Undefined symbol. If nothing ever refers to the symbol, |
366 | // everything should be fine. If something actually refers to the symbol |
367 | // (e.g. the undefined weak alias), linking will fail due to undefined |
368 | // references at the end. |
369 | if (ctx.config.mingw && name.starts_with(Prefix: ".weak." )) |
370 | return nullptr; |
371 | return ctx.symtab.addUndefined(name, f: this, isWeakAlias: false); |
372 | } |
373 | if (sc) |
374 | return make<DefinedRegular>(args: this, /*Name*/ args: "" , /*IsCOMDAT*/ args: false, |
375 | /*IsExternal*/ args: false, args: sym.getGeneric(), args&: sc); |
376 | return nullptr; |
377 | } |
378 | |
379 | void ObjFile::initializeSymbols() { |
380 | uint32_t numSymbols = coffObj->getNumberOfSymbols(); |
381 | symbols.resize(new_size: numSymbols); |
382 | |
383 | SmallVector<std::pair<Symbol *, uint32_t>, 8> weakAliases; |
384 | std::vector<uint32_t> pendingIndexes; |
385 | pendingIndexes.reserve(n: numSymbols); |
386 | |
387 | DenseMap<StringRef, uint32_t> prevailingSectionMap; |
388 | std::vector<const coff_aux_section_definition *> comdatDefs( |
389 | coffObj->getNumberOfSections() + 1); |
390 | |
391 | for (uint32_t i = 0; i < numSymbols; ++i) { |
392 | COFFSymbolRef coffSym = check(e: coffObj->getSymbol(index: i)); |
393 | bool prevailingComdat; |
394 | if (coffSym.isUndefined()) { |
395 | symbols[i] = createUndefined(sym: coffSym); |
396 | } else if (coffSym.isWeakExternal()) { |
397 | symbols[i] = createUndefined(sym: coffSym); |
398 | uint32_t tagIndex = coffSym.getAux<coff_aux_weak_external>()->TagIndex; |
399 | weakAliases.emplace_back(Args&: symbols[i], Args&: tagIndex); |
400 | } else if (std::optional<Symbol *> optSym = |
401 | createDefined(sym: coffSym, comdatDefs, prevailingComdat)) { |
402 | symbols[i] = *optSym; |
403 | if (ctx.config.mingw && prevailingComdat) |
404 | recordPrevailingSymbolForMingw(sym: coffSym, prevailingSectionMap); |
405 | } else { |
406 | // createDefined() returns std::nullopt if a symbol belongs to a section |
407 | // that was pending at the point when the symbol was read. This can happen |
408 | // in two cases: |
409 | // 1) section definition symbol for a comdat leader; |
410 | // 2) symbol belongs to a comdat section associated with another section. |
411 | // In both of these cases, we can expect the section to be resolved by |
412 | // the time we finish visiting the remaining symbols in the symbol |
413 | // table. So we postpone the handling of this symbol until that time. |
414 | pendingIndexes.push_back(x: i); |
415 | } |
416 | i += coffSym.getNumberOfAuxSymbols(); |
417 | } |
418 | |
419 | for (uint32_t i : pendingIndexes) { |
420 | COFFSymbolRef sym = check(e: coffObj->getSymbol(index: i)); |
421 | if (const coff_aux_section_definition *def = sym.getSectionDefinition()) { |
422 | if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) |
423 | readAssociativeDefinition(sym, def); |
424 | else if (ctx.config.mingw) |
425 | maybeAssociateSEHForMingw(sym, def, prevailingSectionMap); |
426 | } |
427 | if (sparseChunks[sym.getSectionNumber()] == pendingComdat) { |
428 | StringRef name = check(e: coffObj->getSymbolName(Symbol: sym)); |
429 | log(msg: "comdat section " + name + |
430 | " without leader and unassociated, discarding" ); |
431 | continue; |
432 | } |
433 | symbols[i] = createRegular(sym); |
434 | } |
435 | |
436 | for (auto &kv : weakAliases) { |
437 | Symbol *sym = kv.first; |
438 | uint32_t idx = kv.second; |
439 | checkAndSetWeakAlias(ctx, f: this, source: sym, target: symbols[idx]); |
440 | } |
441 | |
442 | // Free the memory used by sparseChunks now that symbol loading is finished. |
443 | decltype(sparseChunks)().swap(x&: sparseChunks); |
444 | } |
445 | |
446 | Symbol *ObjFile::createUndefined(COFFSymbolRef sym) { |
447 | StringRef name = check(e: coffObj->getSymbolName(Symbol: sym)); |
448 | return ctx.symtab.addUndefined(name, f: this, isWeakAlias: sym.isWeakExternal()); |
449 | } |
450 | |
451 | static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj, |
452 | int32_t section) { |
453 | uint32_t numSymbols = obj->getNumberOfSymbols(); |
454 | for (uint32_t i = 0; i < numSymbols; ++i) { |
455 | COFFSymbolRef sym = check(e: obj->getSymbol(index: i)); |
456 | if (sym.getSectionNumber() != section) |
457 | continue; |
458 | if (const coff_aux_section_definition *def = sym.getSectionDefinition()) |
459 | return def; |
460 | } |
461 | return nullptr; |
462 | } |
463 | |
464 | void ObjFile::handleComdatSelection( |
465 | COFFSymbolRef sym, COMDATType &selection, bool &prevailing, |
466 | DefinedRegular *leader, |
467 | const llvm::object::coff_aux_section_definition *def) { |
468 | if (prevailing) |
469 | return; |
470 | // There's already an existing comdat for this symbol: `Leader`. |
471 | // Use the comdats's selection field to determine if the new |
472 | // symbol in `Sym` should be discarded, produce a duplicate symbol |
473 | // error, etc. |
474 | |
475 | SectionChunk *leaderChunk = leader->getChunk(); |
476 | COMDATType leaderSelection = leaderChunk->selection; |
477 | |
478 | assert(leader->data && "Comdat leader without SectionChunk?" ); |
479 | if (isa<BitcodeFile>(Val: leader->file)) { |
480 | // If the leader is only a LTO symbol, we don't know e.g. its final size |
481 | // yet, so we can't do the full strict comdat selection checking yet. |
482 | selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY; |
483 | } |
484 | |
485 | if ((selection == IMAGE_COMDAT_SELECT_ANY && |
486 | leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) || |
487 | (selection == IMAGE_COMDAT_SELECT_LARGEST && |
488 | leaderSelection == IMAGE_COMDAT_SELECT_ANY)) { |
489 | // cl.exe picks "any" for vftables when building with /GR- and |
490 | // "largest" when building with /GR. To be able to link object files |
491 | // compiled with each flag, "any" and "largest" are merged as "largest". |
492 | leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST; |
493 | } |
494 | |
495 | // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as". |
496 | // Clang on the other hand picks "any". To be able to link two object files |
497 | // with a __declspec(selectany) declaration, one compiled with gcc and the |
498 | // other with clang, we merge them as proper "same size as" |
499 | if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY && |
500 | leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) || |
501 | (selection == IMAGE_COMDAT_SELECT_SAME_SIZE && |
502 | leaderSelection == IMAGE_COMDAT_SELECT_ANY))) { |
503 | leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE; |
504 | } |
505 | |
506 | // Other than that, comdat selections must match. This is a bit more |
507 | // strict than link.exe which allows merging "any" and "largest" if "any" |
508 | // is the first symbol the linker sees, and it allows merging "largest" |
509 | // with everything (!) if "largest" is the first symbol the linker sees. |
510 | // Making this symmetric independent of which selection is seen first |
511 | // seems better though. |
512 | // (This behavior matches ModuleLinker::getComdatResult().) |
513 | if (selection != leaderSelection) { |
514 | log(msg: ("conflicting comdat type for " + toString(ctx, b&: *leader) + ": " + |
515 | Twine((int)leaderSelection) + " in " + toString(file: leader->getFile()) + |
516 | " and " + Twine((int)selection) + " in " + toString(file: this)) |
517 | .str()); |
518 | ctx.symtab.reportDuplicate(existing: leader, newFile: this); |
519 | return; |
520 | } |
521 | |
522 | switch (selection) { |
523 | case IMAGE_COMDAT_SELECT_NODUPLICATES: |
524 | ctx.symtab.reportDuplicate(existing: leader, newFile: this); |
525 | break; |
526 | |
527 | case IMAGE_COMDAT_SELECT_ANY: |
528 | // Nothing to do. |
529 | break; |
530 | |
531 | case IMAGE_COMDAT_SELECT_SAME_SIZE: |
532 | if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) { |
533 | if (!ctx.config.mingw) { |
534 | ctx.symtab.reportDuplicate(existing: leader, newFile: this); |
535 | } else { |
536 | const coff_aux_section_definition *leaderDef = nullptr; |
537 | if (leaderChunk->file) |
538 | leaderDef = findSectionDef(obj: leaderChunk->file->getCOFFObj(), |
539 | section: leaderChunk->getSectionNumber()); |
540 | if (!leaderDef || leaderDef->Length != def->Length) |
541 | ctx.symtab.reportDuplicate(existing: leader, newFile: this); |
542 | } |
543 | } |
544 | break; |
545 | |
546 | case IMAGE_COMDAT_SELECT_EXACT_MATCH: { |
547 | SectionChunk newChunk(this, getSection(sym)); |
548 | // link.exe only compares section contents here and doesn't complain |
549 | // if the two comdat sections have e.g. different alignment. |
550 | // Match that. |
551 | if (leaderChunk->getContents() != newChunk.getContents()) |
552 | ctx.symtab.reportDuplicate(existing: leader, newFile: this, newSc: &newChunk, newSectionOffset: sym.getValue()); |
553 | break; |
554 | } |
555 | |
556 | case IMAGE_COMDAT_SELECT_ASSOCIATIVE: |
557 | // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE. |
558 | // (This means lld-link doesn't produce duplicate symbol errors for |
559 | // associative comdats while link.exe does, but associate comdats |
560 | // are never extern in practice.) |
561 | llvm_unreachable("createDefined not called for associative comdats" ); |
562 | |
563 | case IMAGE_COMDAT_SELECT_LARGEST: |
564 | if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) { |
565 | // Replace the existing comdat symbol with the new one. |
566 | StringRef name = check(e: coffObj->getSymbolName(Symbol: sym)); |
567 | // FIXME: This is incorrect: With /opt:noref, the previous sections |
568 | // make it into the final executable as well. Correct handling would |
569 | // be to undo reading of the whole old section that's being replaced, |
570 | // or doing one pass that determines what the final largest comdat |
571 | // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading |
572 | // only the largest one. |
573 | replaceSymbol<DefinedRegular>(s: leader, arg: this, arg&: name, /*IsCOMDAT*/ arg: true, |
574 | /*IsExternal*/ arg: true, arg: sym.getGeneric(), |
575 | arg: nullptr); |
576 | prevailing = true; |
577 | } |
578 | break; |
579 | |
580 | case IMAGE_COMDAT_SELECT_NEWEST: |
581 | llvm_unreachable("should have been rejected earlier" ); |
582 | } |
583 | } |
584 | |
585 | std::optional<Symbol *> ObjFile::createDefined( |
586 | COFFSymbolRef sym, |
587 | std::vector<const coff_aux_section_definition *> &comdatDefs, |
588 | bool &prevailing) { |
589 | prevailing = false; |
590 | auto getName = [&]() { return check(e: coffObj->getSymbolName(Symbol: sym)); }; |
591 | |
592 | if (sym.isCommon()) { |
593 | auto *c = make<CommonChunk>(args&: sym); |
594 | chunks.push_back(x: c); |
595 | return ctx.symtab.addCommon(f: this, n: getName(), size: sym.getValue(), |
596 | s: sym.getGeneric(), c); |
597 | } |
598 | |
599 | if (sym.isAbsolute()) { |
600 | StringRef name = getName(); |
601 | |
602 | if (name == "@feat.00" ) |
603 | feat00Flags = sym.getValue(); |
604 | // Skip special symbols. |
605 | if (ignoredSymbolName(name)) |
606 | return nullptr; |
607 | |
608 | if (sym.isExternal()) |
609 | return ctx.symtab.addAbsolute(n: name, s: sym); |
610 | return make<DefinedAbsolute>(args&: ctx, args&: name, args&: sym); |
611 | } |
612 | |
613 | int32_t sectionNumber = sym.getSectionNumber(); |
614 | if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) |
615 | return nullptr; |
616 | |
617 | if (llvm::COFF::isReservedSectionNumber(SectionNumber: sectionNumber)) |
618 | fatal(msg: toString(file: this) + ": " + getName() + |
619 | " should not refer to special section " + Twine(sectionNumber)); |
620 | |
621 | if ((uint32_t)sectionNumber >= sparseChunks.size()) |
622 | fatal(msg: toString(file: this) + ": " + getName() + |
623 | " should not refer to non-existent section " + Twine(sectionNumber)); |
624 | |
625 | // Comdat handling. |
626 | // A comdat symbol consists of two symbol table entries. |
627 | // The first symbol entry has the name of the section (e.g. .text), fixed |
628 | // values for the other fields, and one auxiliary record. |
629 | // The second symbol entry has the name of the comdat symbol, called the |
630 | // "comdat leader". |
631 | // When this function is called for the first symbol entry of a comdat, |
632 | // it sets comdatDefs and returns std::nullopt, and when it's called for the |
633 | // second symbol entry it reads comdatDefs and then sets it back to nullptr. |
634 | |
635 | // Handle comdat leader. |
636 | if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) { |
637 | comdatDefs[sectionNumber] = nullptr; |
638 | DefinedRegular *leader; |
639 | |
640 | if (sym.isExternal()) { |
641 | std::tie(args&: leader, args&: prevailing) = |
642 | ctx.symtab.addComdat(f: this, n: getName(), s: sym.getGeneric()); |
643 | } else { |
644 | leader = make<DefinedRegular>(args: this, /*Name*/ args: "" , /*IsCOMDAT*/ args: false, |
645 | /*IsExternal*/ args: false, args: sym.getGeneric()); |
646 | prevailing = true; |
647 | } |
648 | |
649 | if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES || |
650 | // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe |
651 | // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either. |
652 | def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) { |
653 | fatal(msg: "unknown comdat type " + std::to_string(val: (int)def->Selection) + |
654 | " for " + getName() + " in " + toString(file: this)); |
655 | } |
656 | COMDATType selection = (COMDATType)def->Selection; |
657 | |
658 | if (leader->isCOMDAT) |
659 | handleComdatSelection(sym, selection, prevailing, leader, def); |
660 | |
661 | if (prevailing) { |
662 | SectionChunk *c = readSection(sectionNumber, def, leaderName: getName()); |
663 | sparseChunks[sectionNumber] = c; |
664 | if (!c) |
665 | return nullptr; |
666 | c->sym = cast<DefinedRegular>(Val: leader); |
667 | c->selection = selection; |
668 | cast<DefinedRegular>(Val: leader)->data = &c->repl; |
669 | } else { |
670 | sparseChunks[sectionNumber] = nullptr; |
671 | } |
672 | return leader; |
673 | } |
674 | |
675 | // Prepare to handle the comdat leader symbol by setting the section's |
676 | // ComdatDefs pointer if we encounter a non-associative comdat. |
677 | if (sparseChunks[sectionNumber] == pendingComdat) { |
678 | if (const coff_aux_section_definition *def = sym.getSectionDefinition()) { |
679 | if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE) |
680 | comdatDefs[sectionNumber] = def; |
681 | } |
682 | return std::nullopt; |
683 | } |
684 | |
685 | return createRegular(sym); |
686 | } |
687 | |
688 | MachineTypes ObjFile::getMachineType() { |
689 | if (coffObj) |
690 | return static_cast<MachineTypes>(coffObj->getMachine()); |
691 | return IMAGE_FILE_MACHINE_UNKNOWN; |
692 | } |
693 | |
694 | ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) { |
695 | if (SectionChunk *sec = SectionChunk::findByName(sections: debugChunks, name: secName)) |
696 | return sec->consumeDebugMagic(); |
697 | return {}; |
698 | } |
699 | |
700 | // OBJ files systematically store critical information in a .debug$S stream, |
701 | // even if the TU was compiled with no debug info. At least two records are |
702 | // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the |
703 | // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is |
704 | // currently used to initialize the hotPatchable member. |
705 | void ObjFile::initializeFlags() { |
706 | ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$S" ); |
707 | if (data.empty()) |
708 | return; |
709 | |
710 | DebugSubsectionArray subsections; |
711 | |
712 | BinaryStreamReader reader(data, llvm::endianness::little); |
713 | ExitOnError exitOnErr; |
714 | exitOnErr(reader.readArray(Array&: subsections, Size: data.size())); |
715 | |
716 | for (const DebugSubsectionRecord &ss : subsections) { |
717 | if (ss.kind() != DebugSubsectionKind::Symbols) |
718 | continue; |
719 | |
720 | unsigned offset = 0; |
721 | |
722 | // Only parse the first two records. We are only looking for S_OBJNAME |
723 | // and S_COMPILE3, and they usually appear at the beginning of the |
724 | // stream. |
725 | for (unsigned i = 0; i < 2; ++i) { |
726 | Expected<CVSymbol> sym = readSymbolFromStream(Stream: ss.getRecordData(), Offset: offset); |
727 | if (!sym) { |
728 | consumeError(Err: sym.takeError()); |
729 | return; |
730 | } |
731 | if (sym->kind() == SymbolKind::S_COMPILE3) { |
732 | auto cs = |
733 | cantFail(ValOrErr: SymbolDeserializer::deserializeAs<Compile3Sym>(Symbol: sym.get())); |
734 | hotPatchable = |
735 | (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None; |
736 | } |
737 | if (sym->kind() == SymbolKind::S_OBJNAME) { |
738 | auto objName = cantFail(ValOrErr: SymbolDeserializer::deserializeAs<ObjNameSym>( |
739 | Symbol: sym.get())); |
740 | if (objName.Signature) |
741 | pchSignature = objName.Signature; |
742 | } |
743 | offset += sym->length(); |
744 | } |
745 | } |
746 | } |
747 | |
748 | // Depending on the compilation flags, OBJs can refer to external files, |
749 | // necessary to merge this OBJ into the final PDB. We currently support two |
750 | // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu. |
751 | // And PDB type servers, when compiling with /Zi. This function extracts these |
752 | // dependencies and makes them available as a TpiSource interface (see |
753 | // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular |
754 | // output even with /Yc and /Yu and with /Zi. |
755 | void ObjFile::initializeDependencies() { |
756 | if (!ctx.config.debug) |
757 | return; |
758 | |
759 | bool isPCH = false; |
760 | |
761 | ArrayRef<uint8_t> data = getDebugSection(secName: ".debug$P" ); |
762 | if (!data.empty()) |
763 | isPCH = true; |
764 | else |
765 | data = getDebugSection(secName: ".debug$T" ); |
766 | |
767 | // symbols but no types, make a plain, empty TpiSource anyway, because it |
768 | // simplifies adding the symbols later. |
769 | if (data.empty()) { |
770 | if (!debugChunks.empty()) |
771 | debugTypesObj = makeTpiSource(ctx, f: this); |
772 | return; |
773 | } |
774 | |
775 | // Get the first type record. It will indicate if this object uses a type |
776 | // server (/Zi) or a PCH file (/Yu). |
777 | CVTypeArray types; |
778 | BinaryStreamReader reader(data, llvm::endianness::little); |
779 | cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength())); |
780 | CVTypeArray::Iterator firstType = types.begin(); |
781 | if (firstType == types.end()) |
782 | return; |
783 | |
784 | // Remember the .debug$T or .debug$P section. |
785 | debugTypes = data; |
786 | |
787 | // This object file is a PCH file that others will depend on. |
788 | if (isPCH) { |
789 | debugTypesObj = makePrecompSource(ctx, file: this); |
790 | return; |
791 | } |
792 | |
793 | // This object file was compiled with /Zi. Enqueue the PDB dependency. |
794 | if (firstType->kind() == LF_TYPESERVER2) { |
795 | TypeServer2Record ts = cantFail( |
796 | ValOrErr: TypeDeserializer::deserializeAs<TypeServer2Record>(Data: firstType->data())); |
797 | debugTypesObj = makeUseTypeServerSource(ctx, file: this, ts); |
798 | enqueuePdbFile(path: ts.getName(), fromFile: this); |
799 | return; |
800 | } |
801 | |
802 | // This object was compiled with /Yu. It uses types from another object file |
803 | // with a matching signature. |
804 | if (firstType->kind() == LF_PRECOMP) { |
805 | PrecompRecord precomp = cantFail( |
806 | ValOrErr: TypeDeserializer::deserializeAs<PrecompRecord>(Data: firstType->data())); |
807 | // We're better off trusting the LF_PRECOMP signature. In some cases the |
808 | // S_OBJNAME record doesn't contain a valid PCH signature. |
809 | if (precomp.Signature) |
810 | pchSignature = precomp.Signature; |
811 | debugTypesObj = makeUsePrecompSource(ctx, file: this, ts: precomp); |
812 | // Drop the LF_PRECOMP record from the input stream. |
813 | debugTypes = debugTypes.drop_front(N: firstType->RecordData.size()); |
814 | return; |
815 | } |
816 | |
817 | // This is a plain old object file. |
818 | debugTypesObj = makeTpiSource(ctx, f: this); |
819 | } |
820 | |
821 | // Make a PDB path assuming the PDB is in the same folder as the OBJ |
822 | static std::string getPdbBaseName(ObjFile *file, StringRef tSPath) { |
823 | StringRef localPath = |
824 | !file->parentName.empty() ? file->parentName : file->getName(); |
825 | SmallString<128> path = sys::path::parent_path(path: localPath); |
826 | |
827 | // Currently, type server PDBs are only created by MSVC cl, which only runs |
828 | // on Windows, so we can assume type server paths are Windows style. |
829 | sys::path::append(path, |
830 | a: sys::path::filename(path: tSPath, style: sys::path::Style::windows)); |
831 | return std::string(path); |
832 | } |
833 | |
834 | // The casing of the PDB path stamped in the OBJ can differ from the actual path |
835 | // on disk. With this, we ensure to always use lowercase as a key for the |
836 | // pdbInputFileInstances map, at least on Windows. |
837 | static std::string normalizePdbPath(StringRef path) { |
838 | #if defined(_WIN32) |
839 | return path.lower(); |
840 | #else // LINUX |
841 | return std::string(path); |
842 | #endif |
843 | } |
844 | |
845 | // If existing, return the actual PDB path on disk. |
846 | static std::optional<std::string> findPdbPath(StringRef pdbPath, |
847 | ObjFile *dependentFile) { |
848 | // Ensure the file exists before anything else. In some cases, if the path |
849 | // points to a removable device, Driver::enqueuePath() would fail with an |
850 | // error (EAGAIN, "resource unavailable try again") which we want to skip |
851 | // silently. |
852 | if (llvm::sys::fs::exists(Path: pdbPath)) |
853 | return normalizePdbPath(path: pdbPath); |
854 | std::string ret = getPdbBaseName(file: dependentFile, tSPath: pdbPath); |
855 | if (llvm::sys::fs::exists(Path: ret)) |
856 | return normalizePdbPath(path: ret); |
857 | return std::nullopt; |
858 | } |
859 | |
860 | PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m) |
861 | : InputFile(ctx, PDBKind, m) {} |
862 | |
863 | PDBInputFile::~PDBInputFile() = default; |
864 | |
865 | PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx, |
866 | StringRef path, |
867 | ObjFile *fromFile) { |
868 | auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile); |
869 | if (!p) |
870 | return nullptr; |
871 | auto it = ctx.pdbInputFileInstances.find(x: *p); |
872 | if (it != ctx.pdbInputFileInstances.end()) |
873 | return it->second; |
874 | return nullptr; |
875 | } |
876 | |
877 | void PDBInputFile::parse() { |
878 | ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this; |
879 | |
880 | std::unique_ptr<pdb::IPDBSession> thisSession; |
881 | Error E = pdb::NativeSession::createFromPdb( |
882 | MB: MemoryBuffer::getMemBuffer(Ref: mb, RequiresNullTerminator: false), Session&: thisSession); |
883 | if (E) { |
884 | loadErrorStr.emplace(args: toString(E: std::move(E))); |
885 | return; // fail silently at this point - the error will be handled later, |
886 | // when merging the debug type stream |
887 | } |
888 | |
889 | session.reset(p: static_cast<pdb::NativeSession *>(thisSession.release())); |
890 | |
891 | pdb::PDBFile &pdbFile = session->getPDBFile(); |
892 | auto expectedInfo = pdbFile.getPDBInfoStream(); |
893 | // All PDB Files should have an Info stream. |
894 | if (!expectedInfo) { |
895 | loadErrorStr.emplace(args: toString(E: expectedInfo.takeError())); |
896 | return; |
897 | } |
898 | debugTypesObj = makeTypeServerSource(ctx, pdbInputFile: this); |
899 | } |
900 | |
901 | // Used only for DWARF debug info, which is not common (except in MinGW |
902 | // environments). This returns an optional pair of file name and line |
903 | // number for where the variable was defined. |
904 | std::optional<std::pair<StringRef, uint32_t>> |
905 | ObjFile::getVariableLocation(StringRef var) { |
906 | if (!dwarf) { |
907 | dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj())); |
908 | if (!dwarf) |
909 | return std::nullopt; |
910 | } |
911 | if (ctx.config.machine == I386) |
912 | var.consume_front(Prefix: "_" ); |
913 | std::optional<std::pair<std::string, unsigned>> ret = |
914 | dwarf->getVariableLoc(name: var); |
915 | if (!ret) |
916 | return std::nullopt; |
917 | return std::make_pair(x: saver().save(S: ret->first), y&: ret->second); |
918 | } |
919 | |
920 | // Used only for DWARF debug info, which is not common (except in MinGW |
921 | // environments). |
922 | std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset, |
923 | uint32_t sectionIndex) { |
924 | if (!dwarf) { |
925 | dwarf = make<DWARFCache>(args: DWARFContext::create(Obj: *getCOFFObj())); |
926 | if (!dwarf) |
927 | return std::nullopt; |
928 | } |
929 | |
930 | return dwarf->getDILineInfo(offset, sectionIndex); |
931 | } |
932 | |
933 | void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) { |
934 | auto p = findPdbPath(pdbPath: path.str(), dependentFile: fromFile); |
935 | if (!p) |
936 | return; |
937 | auto it = ctx.pdbInputFileInstances.emplace(args&: *p, args: nullptr); |
938 | if (!it.second) |
939 | return; // already scheduled for load |
940 | ctx.driver.enqueuePDB(Path: *p); |
941 | } |
942 | |
943 | ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m) |
944 | : InputFile(ctx, ImportKind, m), live(!ctx.config.doGC), thunkLive(live) {} |
945 | |
946 | void ImportFile::parse() { |
947 | const auto *hdr = |
948 | reinterpret_cast<const coff_import_header *>(mb.getBufferStart()); |
949 | |
950 | // Check if the total size is valid. |
951 | if (mb.getBufferSize() < sizeof(*hdr) || |
952 | mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData) |
953 | fatal(msg: "broken import library" ); |
954 | |
955 | // Read names and create an __imp_ symbol. |
956 | StringRef buf = mb.getBuffer().substr(Start: sizeof(*hdr)); |
957 | StringRef name = saver().save(S: buf.split(Separator: '\0').first); |
958 | StringRef impName = saver().save(S: "__imp_" + name); |
959 | buf = buf.substr(Start: name.size() + 1); |
960 | dllName = buf.split(Separator: '\0').first; |
961 | StringRef extName; |
962 | switch (hdr->getNameType()) { |
963 | case IMPORT_ORDINAL: |
964 | extName = "" ; |
965 | break; |
966 | case IMPORT_NAME: |
967 | extName = name; |
968 | break; |
969 | case IMPORT_NAME_NOPREFIX: |
970 | extName = ltrim1(s: name, chars: "?@_" ); |
971 | break; |
972 | case IMPORT_NAME_UNDECORATE: |
973 | extName = ltrim1(s: name, chars: "?@_" ); |
974 | extName = extName.substr(Start: 0, N: extName.find(C: '@')); |
975 | break; |
976 | case IMPORT_NAME_EXPORTAS: |
977 | extName = buf.substr(Start: dllName.size() + 1).split(Separator: '\0').first; |
978 | break; |
979 | } |
980 | |
981 | this->hdr = hdr; |
982 | externalName = extName; |
983 | |
984 | impSym = ctx.symtab.addImportData(n: impName, f: this); |
985 | // If this was a duplicate, we logged an error but may continue; |
986 | // in this case, impSym is nullptr. |
987 | if (!impSym) |
988 | return; |
989 | |
990 | if (hdr->getType() == llvm::COFF::IMPORT_CONST) |
991 | static_cast<void>(ctx.symtab.addImportData(n: name, f: this)); |
992 | |
993 | // If type is function, we need to create a thunk which jump to an |
994 | // address pointed by the __imp_ symbol. (This allows you to call |
995 | // DLL functions just like regular non-DLL functions.) |
996 | if (hdr->getType() == llvm::COFF::IMPORT_CODE) |
997 | thunkSym = ctx.symtab.addImportThunk( |
998 | name, s: cast_or_null<DefinedImportData>(Val: impSym), machine: hdr->Machine); |
999 | } |
1000 | |
1001 | BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, |
1002 | StringRef archiveName, uint64_t offsetInArchive, |
1003 | bool lazy) |
1004 | : InputFile(ctx, BitcodeKind, mb, lazy) { |
1005 | std::string path = mb.getBufferIdentifier().str(); |
1006 | if (ctx.config.thinLTOIndexOnly) |
1007 | path = replaceThinLTOSuffix(path: mb.getBufferIdentifier(), |
1008 | suffix: ctx.config.thinLTOObjectSuffixReplace.first, |
1009 | repl: ctx.config.thinLTOObjectSuffixReplace.second); |
1010 | |
1011 | // ThinLTO assumes that all MemoryBufferRefs given to it have a unique |
1012 | // name. If two archives define two members with the same name, this |
1013 | // causes a collision which result in only one of the objects being taken |
1014 | // into consideration at LTO time (which very likely causes undefined |
1015 | // symbols later in the link stage). So we append file offset to make |
1016 | // filename unique. |
1017 | MemoryBufferRef mbref(mb.getBuffer(), |
1018 | saver().save(S: archiveName.empty() |
1019 | ? path |
1020 | : archiveName + |
1021 | sys::path::filename(path) + |
1022 | utostr(X: offsetInArchive))); |
1023 | |
1024 | obj = check(e: lto::InputFile::create(Object: mbref)); |
1025 | } |
1026 | |
1027 | BitcodeFile::~BitcodeFile() = default; |
1028 | |
1029 | void BitcodeFile::parse() { |
1030 | llvm::StringSaver &saver = lld::saver(); |
1031 | |
1032 | std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size()); |
1033 | for (size_t i = 0; i != obj->getComdatTable().size(); ++i) |
1034 | // FIXME: Check nodeduplicate |
1035 | comdat[i] = |
1036 | ctx.symtab.addComdat(f: this, n: saver.save(S: obj->getComdatTable()[i].first)); |
1037 | for (const lto::InputFile::Symbol &objSym : obj->symbols()) { |
1038 | StringRef symName = saver.save(S: objSym.getName()); |
1039 | int comdatIndex = objSym.getComdatIndex(); |
1040 | Symbol *sym; |
1041 | SectionChunk *fakeSC = nullptr; |
1042 | if (objSym.isExecutable()) |
1043 | fakeSC = &ctx.ltoTextSectionChunk.chunk; |
1044 | else |
1045 | fakeSC = &ctx.ltoDataSectionChunk.chunk; |
1046 | if (objSym.isUndefined()) { |
1047 | sym = ctx.symtab.addUndefined(name: symName, f: this, isWeakAlias: false); |
1048 | if (objSym.isWeak()) |
1049 | sym->deferUndefined = true; |
1050 | // If one LTO object file references (i.e. has an undefined reference to) |
1051 | // a symbol with an __imp_ prefix, the LTO compilation itself sees it |
1052 | // as unprefixed but with a dllimport attribute instead, and doesn't |
1053 | // understand the relation to a concrete IR symbol with the __imp_ prefix. |
1054 | // |
1055 | // For such cases, mark the symbol as used in a regular object (i.e. the |
1056 | // symbol must be retained) so that the linker can associate the |
1057 | // references in the end. If the symbol is defined in an import library |
1058 | // or in a regular object file, this has no effect, but if it is defined |
1059 | // in another LTO object file, this makes sure it is kept, to fulfill |
1060 | // the reference when linking the output of the LTO compilation. |
1061 | if (symName.starts_with(Prefix: "__imp_" )) |
1062 | sym->isUsedInRegularObj = true; |
1063 | } else if (objSym.isCommon()) { |
1064 | sym = ctx.symtab.addCommon(f: this, n: symName, size: objSym.getCommonSize()); |
1065 | } else if (objSym.isWeak() && objSym.isIndirect()) { |
1066 | // Weak external. |
1067 | sym = ctx.symtab.addUndefined(name: symName, f: this, isWeakAlias: true); |
1068 | std::string fallback = std::string(objSym.getCOFFWeakExternalFallback()); |
1069 | Symbol *alias = ctx.symtab.addUndefined(name: saver.save(S: fallback)); |
1070 | checkAndSetWeakAlias(ctx, f: this, source: sym, target: alias); |
1071 | } else if (comdatIndex != -1) { |
1072 | if (symName == obj->getComdatTable()[comdatIndex].first) { |
1073 | sym = comdat[comdatIndex].first; |
1074 | if (cast<DefinedRegular>(Val: sym)->data == nullptr) |
1075 | cast<DefinedRegular>(Val: sym)->data = &fakeSC->repl; |
1076 | } else if (comdat[comdatIndex].second) { |
1077 | sym = ctx.symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC); |
1078 | } else { |
1079 | sym = ctx.symtab.addUndefined(name: symName, f: this, isWeakAlias: false); |
1080 | } |
1081 | } else { |
1082 | sym = ctx.symtab.addRegular(f: this, n: symName, s: nullptr, c: fakeSC, sectionOffset: 0, |
1083 | isWeak: objSym.isWeak()); |
1084 | } |
1085 | symbols.push_back(x: sym); |
1086 | if (objSym.isUsed()) |
1087 | ctx.config.gcroot.push_back(x: sym); |
1088 | } |
1089 | directives = saver.save(S: obj->getCOFFLinkerOpts()); |
1090 | } |
1091 | |
1092 | void BitcodeFile::parseLazy() { |
1093 | for (const lto::InputFile::Symbol &sym : obj->symbols()) |
1094 | if (!sym.isUndefined()) |
1095 | ctx.symtab.addLazyObject(f: this, n: sym.getName()); |
1096 | } |
1097 | |
1098 | MachineTypes BitcodeFile::getMachineType() { |
1099 | switch (Triple(obj->getTargetTriple()).getArch()) { |
1100 | case Triple::x86_64: |
1101 | return AMD64; |
1102 | case Triple::x86: |
1103 | return I386; |
1104 | case Triple::arm: |
1105 | case Triple::thumb: |
1106 | return ARMNT; |
1107 | case Triple::aarch64: |
1108 | return ARM64; |
1109 | default: |
1110 | return IMAGE_FILE_MACHINE_UNKNOWN; |
1111 | } |
1112 | } |
1113 | |
1114 | std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix, |
1115 | StringRef repl) { |
1116 | if (path.consume_back(Suffix: suffix)) |
1117 | return (path + repl).str(); |
1118 | return std::string(path); |
1119 | } |
1120 | |
1121 | static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) { |
1122 | for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) { |
1123 | const coff_section *sec = CHECK(coffObj->getSection(i), file); |
1124 | if (rva >= sec->VirtualAddress && |
1125 | rva <= sec->VirtualAddress + sec->VirtualSize) { |
1126 | return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0; |
1127 | } |
1128 | } |
1129 | return false; |
1130 | } |
1131 | |
1132 | void DLLFile::parse() { |
1133 | // Parse a memory buffer as a PE-COFF executable. |
1134 | std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this); |
1135 | |
1136 | if (auto *obj = dyn_cast<COFFObjectFile>(Val: bin.get())) { |
1137 | bin.release(); |
1138 | coffObj.reset(p: obj); |
1139 | } else { |
1140 | error(msg: toString(file: this) + " is not a COFF file" ); |
1141 | return; |
1142 | } |
1143 | |
1144 | if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) { |
1145 | error(msg: toString(file: this) + " is not a PE-COFF executable" ); |
1146 | return; |
1147 | } |
1148 | |
1149 | for (const auto &exp : coffObj->export_directories()) { |
1150 | StringRef dllName, symbolName; |
1151 | uint32_t exportRVA; |
1152 | checkError(e: exp.getDllName(Result&: dllName)); |
1153 | checkError(e: exp.getSymbolName(Result&: symbolName)); |
1154 | checkError(e: exp.getExportRVA(Result&: exportRVA)); |
1155 | |
1156 | if (symbolName.empty()) |
1157 | continue; |
1158 | |
1159 | bool code = isRVACode(coffObj: coffObj.get(), rva: exportRVA, file: this); |
1160 | |
1161 | Symbol *s = make<Symbol>(); |
1162 | s->dllName = dllName; |
1163 | s->symbolName = symbolName; |
1164 | s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA; |
1165 | s->nameType = ImportNameType::IMPORT_NAME; |
1166 | |
1167 | if (coffObj->getMachine() == I386) { |
1168 | s->symbolName = symbolName = saver().save(S: "_" + symbolName); |
1169 | s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX; |
1170 | } |
1171 | |
1172 | StringRef impName = saver().save(S: "__imp_" + symbolName); |
1173 | ctx.symtab.addLazyDLLSymbol(f: this, sym: s, n: impName); |
1174 | if (code) |
1175 | ctx.symtab.addLazyDLLSymbol(f: this, sym: s, n: symbolName); |
1176 | } |
1177 | } |
1178 | |
1179 | MachineTypes DLLFile::getMachineType() { |
1180 | if (coffObj) |
1181 | return static_cast<MachineTypes>(coffObj->getMachine()); |
1182 | return IMAGE_FILE_MACHINE_UNKNOWN; |
1183 | } |
1184 | |
1185 | void DLLFile::makeImport(DLLFile::Symbol *s) { |
1186 | if (!seen.insert(key: s->symbolName).second) |
1187 | return; |
1188 | |
1189 | size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs |
1190 | size_t size = sizeof(coff_import_header) + impSize; |
1191 | char *buf = bAlloc().Allocate<char>(Num: size); |
1192 | memset(s: buf, c: 0, n: size); |
1193 | char *p = buf; |
1194 | auto *imp = reinterpret_cast<coff_import_header *>(p); |
1195 | p += sizeof(*imp); |
1196 | imp->Sig2 = 0xFFFF; |
1197 | imp->Machine = coffObj->getMachine(); |
1198 | imp->SizeOfData = impSize; |
1199 | imp->OrdinalHint = 0; // Only linking by name |
1200 | imp->TypeInfo = (s->nameType << 2) | s->importType; |
1201 | |
1202 | // Write symbol name and DLL name. |
1203 | memcpy(dest: p, src: s->symbolName.data(), n: s->symbolName.size()); |
1204 | p += s->symbolName.size() + 1; |
1205 | memcpy(dest: p, src: s->dllName.data(), n: s->dllName.size()); |
1206 | MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName); |
1207 | ImportFile *impFile = make<ImportFile>(args&: ctx, args&: mbref); |
1208 | ctx.symtab.addFile(file: impFile); |
1209 | } |
1210 | |