1 | //===- InputSection.cpp ---------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputSection.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "OutputSegment.h" |
14 | #include "Symbols.h" |
15 | #include "SyntheticSections.h" |
16 | #include "Target.h" |
17 | #include "UnwindInfoSection.h" |
18 | #include "Writer.h" |
19 | |
20 | #include "lld/Common/ErrorHandler.h" |
21 | #include "lld/Common/Memory.h" |
22 | #include "llvm/Support/Endian.h" |
23 | #include "llvm/Support/xxhash.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace llvm::MachO; |
27 | using namespace llvm::support; |
28 | using namespace lld; |
29 | using namespace lld::macho; |
30 | |
31 | // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector |
32 | // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), |
33 | // so account for that. |
34 | static_assert(sizeof(void *) != 8 || |
35 | sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88, |
36 | "Try to minimize ConcatInputSection's size, we create many " |
37 | "instances of it" ); |
38 | |
39 | std::vector<ConcatInputSection *> macho::inputSections; |
40 | int macho::inputSectionsOrder = 0; |
41 | |
42 | // Call this function to add a new InputSection and have it routed to the |
43 | // appropriate container. Depending on its type and current config, it will |
44 | // either be added to 'inputSections' vector or to a synthetic section. |
45 | void lld::macho::addInputSection(InputSection *inputSection) { |
46 | if (auto *isec = dyn_cast<ConcatInputSection>(Val: inputSection)) { |
47 | if (isec->isCoalescedWeak()) |
48 | return; |
49 | if (config->emitRelativeMethodLists && |
50 | ObjCMethListSection::isMethodList(isec)) { |
51 | if (in.objcMethList->inputOrder == UnspecifiedInputOrder) |
52 | in.objcMethList->inputOrder = inputSectionsOrder++; |
53 | in.objcMethList->addInput(isec); |
54 | isec->parent = in.objcMethList; |
55 | return; |
56 | } |
57 | if (config->emitInitOffsets && |
58 | sectionType(flags: isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { |
59 | in.initOffsets->addInput(isec); |
60 | return; |
61 | } |
62 | isec->outSecOff = inputSectionsOrder++; |
63 | auto *osec = ConcatOutputSection::getOrCreateForInput(isec); |
64 | isec->parent = osec; |
65 | inputSections.push_back(x: isec); |
66 | } else if (auto *isec = dyn_cast<CStringInputSection>(Val: inputSection)) { |
67 | if (isec->getName() == section_names::objcMethname) { |
68 | if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) |
69 | in.objcMethnameSection->inputOrder = inputSectionsOrder++; |
70 | in.objcMethnameSection->addInput(isec); |
71 | } else { |
72 | if (in.cStringSection->inputOrder == UnspecifiedInputOrder) |
73 | in.cStringSection->inputOrder = inputSectionsOrder++; |
74 | in.cStringSection->addInput(isec); |
75 | } |
76 | } else if (auto *isec = dyn_cast<WordLiteralInputSection>(Val: inputSection)) { |
77 | if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) |
78 | in.wordLiteralSection->inputOrder = inputSectionsOrder++; |
79 | in.wordLiteralSection->addInput(isec); |
80 | } else { |
81 | llvm_unreachable("unexpected input section kind" ); |
82 | } |
83 | |
84 | assert(inputSectionsOrder <= UnspecifiedInputOrder); |
85 | } |
86 | |
87 | uint64_t InputSection::getFileSize() const { |
88 | return isZeroFill(flags: getFlags()) ? 0 : getSize(); |
89 | } |
90 | |
91 | uint64_t InputSection::getVA(uint64_t off) const { |
92 | return parent->addr + getOffset(off); |
93 | } |
94 | |
95 | static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { |
96 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type); |
97 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) |
98 | return sym->resolveBranchVA(); |
99 | if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) |
100 | return sym->resolveGotVA(); |
101 | if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) |
102 | return sym->resolveTlvVA(); |
103 | return sym->getVA(); |
104 | } |
105 | |
106 | const Defined *InputSection::getContainingSymbol(uint64_t off) const { |
107 | auto *nextSym = llvm::upper_bound( |
108 | Range: symbols, Value&: off, C: [](uint64_t a, const Defined *b) { return a < b->value; }); |
109 | if (nextSym == symbols.begin()) |
110 | return nullptr; |
111 | return *std::prev(x: nextSym); |
112 | } |
113 | |
114 | std::string InputSection::getLocation(uint64_t off) const { |
115 | // First, try to find a symbol that's near the offset. Use it as a reference |
116 | // point. |
117 | if (auto *sym = getContainingSymbol(off)) |
118 | return (toString(file: getFile()) + ":(symbol " + toString(*sym) + "+0x" + |
119 | Twine::utohexstr(Val: off - sym->value) + ")" ) |
120 | .str(); |
121 | |
122 | // If that fails, use the section itself as a reference point. |
123 | for (const Subsection &subsec : section.subsections) { |
124 | if (subsec.isec == this) { |
125 | off += subsec.offset; |
126 | break; |
127 | } |
128 | } |
129 | |
130 | return (toString(file: getFile()) + ":(" + getName() + "+0x" + |
131 | Twine::utohexstr(Val: off) + ")" ) |
132 | .str(); |
133 | } |
134 | |
135 | std::string InputSection::getSourceLocation(uint64_t off) const { |
136 | auto *obj = dyn_cast_or_null<ObjFile>(Val: getFile()); |
137 | if (!obj) |
138 | return {}; |
139 | |
140 | DWARFCache *dwarf = obj->getDwarf(); |
141 | if (!dwarf) |
142 | return std::string(); |
143 | |
144 | for (const Subsection &subsec : section.subsections) { |
145 | if (subsec.isec == this) { |
146 | off += subsec.offset; |
147 | break; |
148 | } |
149 | } |
150 | |
151 | auto createMsg = [&](StringRef path, unsigned line) { |
152 | std::string filename = sys::path::filename(path).str(); |
153 | std::string lineStr = (":" + Twine(line)).str(); |
154 | if (filename == path) |
155 | return filename + lineStr; |
156 | return (filename + lineStr + " (" + path + lineStr + ")" ).str(); |
157 | }; |
158 | |
159 | // First, look up a function for a given offset. |
160 | if (std::optional<DILineInfo> li = dwarf->getDILineInfo( |
161 | offset: section.addr + off, sectionIndex: object::SectionedAddress::UndefSection)) |
162 | return createMsg(li->FileName, li->Line); |
163 | |
164 | // If it failed, look up again as a variable. |
165 | if (const Defined *sym = getContainingSymbol(off)) { |
166 | // Symbols are generally prefixed with an underscore, which is not included |
167 | // in the debug information. |
168 | StringRef symName = sym->getName(); |
169 | if (!symName.empty() && symName[0] == '_') |
170 | symName = symName.substr(Start: 1); |
171 | |
172 | if (std::optional<std::pair<std::string, unsigned>> fileLine = |
173 | dwarf->getVariableLoc(name: symName)) |
174 | return createMsg(fileLine->first, fileLine->second); |
175 | } |
176 | |
177 | // Try to get the source file's name from the DWARF information. |
178 | if (obj->compileUnit) |
179 | return obj->sourceFile(); |
180 | |
181 | return {}; |
182 | } |
183 | |
184 | const Reloc *InputSection::getRelocAt(uint32_t off) const { |
185 | auto it = llvm::find_if( |
186 | Range: relocs, P: [=](const macho::Reloc &r) { return r.offset == off; }); |
187 | if (it == relocs.end()) |
188 | return nullptr; |
189 | return &*it; |
190 | } |
191 | |
192 | void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { |
193 | align = std::max(a: align, b: copy->align); |
194 | copy->live = false; |
195 | copy->wasCoalesced = true; |
196 | copy->replacement = this; |
197 | for (auto ©Sym : copy->symbols) |
198 | copySym->wasIdenticalCodeFolded = true; |
199 | |
200 | symbols.insert(I: symbols.end(), From: copy->symbols.begin(), To: copy->symbols.end()); |
201 | copy->symbols.clear(); |
202 | |
203 | // Remove duplicate compact unwind info for symbols at the same address. |
204 | if (symbols.empty()) |
205 | return; |
206 | for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) { |
207 | assert((*it)->value == 0); |
208 | (*it)->originalUnwindEntry = nullptr; |
209 | } |
210 | } |
211 | |
212 | void ConcatInputSection::writeTo(uint8_t *buf) { |
213 | assert(!shouldOmitFromOutput()); |
214 | |
215 | if (getFileSize() == 0) |
216 | return; |
217 | |
218 | memcpy(dest: buf, src: data.data(), n: data.size()); |
219 | |
220 | for (size_t i = 0; i < relocs.size(); i++) { |
221 | const Reloc &r = relocs[i]; |
222 | uint8_t *loc = buf + r.offset; |
223 | uint64_t referentVA = 0; |
224 | |
225 | const bool needsFixup = config->emitChainedFixups && |
226 | target->hasAttr(type: r.type, bit: RelocAttrBits::UNSIGNED); |
227 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
228 | const Symbol *fromSym = r.referent.get<Symbol *>(); |
229 | const Reloc &minuend = relocs[++i]; |
230 | uint64_t minuendVA; |
231 | if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) |
232 | minuendVA = toSym->getVA() + minuend.addend; |
233 | else { |
234 | auto *referentIsec = minuend.referent.get<InputSection *>(); |
235 | assert(!::shouldOmitFromOutput(referentIsec)); |
236 | minuendVA = referentIsec->getVA(off: minuend.addend); |
237 | } |
238 | referentVA = minuendVA - fromSym->getVA(); |
239 | } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { |
240 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::LOAD) && |
241 | !referentSym->isInGot()) |
242 | target->relaxGotLoad(loc, type: r.type); |
243 | // For dtrace symbols, do not handle them as normal undefined symbols |
244 | if (referentSym->getName().starts_with(Prefix: "___dtrace_" )) { |
245 | // Change dtrace call site to pre-defined instructions |
246 | target->handleDtraceReloc(sym: referentSym, r, loc); |
247 | continue; |
248 | } |
249 | referentVA = resolveSymbolVA(sym: referentSym, type: r.type) + r.addend; |
250 | |
251 | if (isThreadLocalVariables(flags: getFlags()) && isa<Defined>(Val: referentSym)) { |
252 | // References from thread-local variable sections are treated as offsets |
253 | // relative to the start of the thread-local data memory area, which |
254 | // is initialized via copying all the TLV data sections (which are all |
255 | // contiguous). |
256 | referentVA -= firstTLVDataSection->addr; |
257 | } else if (needsFixup) { |
258 | writeChainedFixup(buf: loc, sym: referentSym, addend: r.addend); |
259 | continue; |
260 | } |
261 | } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { |
262 | assert(!::shouldOmitFromOutput(referentIsec)); |
263 | referentVA = referentIsec->getVA(off: r.addend); |
264 | |
265 | if (needsFixup) { |
266 | writeChainedRebase(buf: loc, targetVA: referentVA); |
267 | continue; |
268 | } |
269 | } |
270 | target->relocateOne(loc, r, va: referentVA, relocVA: getVA() + r.offset); |
271 | } |
272 | } |
273 | |
274 | ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, |
275 | StringRef sectName, |
276 | uint32_t flags, |
277 | ArrayRef<uint8_t> data, |
278 | uint32_t align) { |
279 | Section §ion = |
280 | *make<Section>(/*file=*/args: nullptr, args&: segName, args&: sectName, args&: flags, /*addr=*/args: 0); |
281 | auto isec = make<ConcatInputSection>(args&: section, args&: data, args&: align); |
282 | // Since this is an explicitly created 'fake' input section, |
283 | // it should not be dead stripped. |
284 | isec->live = true; |
285 | section.subsections.push_back(x: {.offset: 0, .isec: isec}); |
286 | return isec; |
287 | } |
288 | |
289 | void CStringInputSection::splitIntoPieces() { |
290 | size_t off = 0; |
291 | StringRef s = toStringRef(Input: data); |
292 | while (!s.empty()) { |
293 | size_t end = s.find(C: 0); |
294 | if (end == StringRef::npos) |
295 | fatal(msg: getLocation(off) + ": string is not null terminated" ); |
296 | uint32_t hash = deduplicateLiterals ? xxh3_64bits(data: s.take_front(N: end)) : 0; |
297 | pieces.emplace_back(args&: off, args&: hash); |
298 | size_t size = end + 1; // include null terminator |
299 | s = s.substr(Start: size); |
300 | off += size; |
301 | } |
302 | } |
303 | |
304 | StringPiece &CStringInputSection::getStringPiece(uint64_t off) { |
305 | if (off >= data.size()) |
306 | fatal(msg: toString(this) + ": offset is outside the section" ); |
307 | |
308 | auto it = |
309 | partition_point(Range&: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; }); |
310 | return it[-1]; |
311 | } |
312 | |
313 | const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { |
314 | return const_cast<CStringInputSection *>(this)->getStringPiece(off); |
315 | } |
316 | |
317 | size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { |
318 | if (off >= data.size()) |
319 | fatal(msg: toString(this) + ": offset is outside the section" ); |
320 | |
321 | auto it = |
322 | partition_point(Range: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; }); |
323 | return std::distance(first: pieces.begin(), last: it) - 1; |
324 | } |
325 | |
326 | uint64_t CStringInputSection::getOffset(uint64_t off) const { |
327 | const StringPiece &piece = getStringPiece(off); |
328 | uint64_t addend = off - piece.inSecOff; |
329 | return piece.outSecOff + addend; |
330 | } |
331 | |
332 | WordLiteralInputSection::WordLiteralInputSection(const Section §ion, |
333 | ArrayRef<uint8_t> data, |
334 | uint32_t align) |
335 | : InputSection(WordLiteralKind, section, data, align) { |
336 | switch (sectionType(flags: getFlags())) { |
337 | case S_4BYTE_LITERALS: |
338 | power2LiteralSize = 2; |
339 | break; |
340 | case S_8BYTE_LITERALS: |
341 | power2LiteralSize = 3; |
342 | break; |
343 | case S_16BYTE_LITERALS: |
344 | power2LiteralSize = 4; |
345 | break; |
346 | default: |
347 | llvm_unreachable("invalid literal section type" ); |
348 | } |
349 | |
350 | live.resize(N: data.size() >> power2LiteralSize, t: !config->deadStrip); |
351 | } |
352 | |
353 | uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { |
354 | auto *osec = cast<WordLiteralSection>(Val: parent); |
355 | const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); |
356 | switch (sectionType(flags: getFlags())) { |
357 | case S_4BYTE_LITERALS: |
358 | return osec->getLiteral4Offset(buf: buf + (off & ~3LLU)) | (off & 3); |
359 | case S_8BYTE_LITERALS: |
360 | return osec->getLiteral8Offset(buf: buf + (off & ~7LLU)) | (off & 7); |
361 | case S_16BYTE_LITERALS: |
362 | return osec->getLiteral16Offset(buf: buf + (off & ~15LLU)) | (off & 15); |
363 | default: |
364 | llvm_unreachable("invalid literal section type" ); |
365 | } |
366 | } |
367 | |
368 | bool macho::isCodeSection(const InputSection *isec) { |
369 | uint32_t type = sectionType(flags: isec->getFlags()); |
370 | if (type != S_REGULAR && type != S_COALESCED) |
371 | return false; |
372 | |
373 | uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; |
374 | if (attr == S_ATTR_PURE_INSTRUCTIONS) |
375 | return true; |
376 | |
377 | if (isec->getSegName() == segment_names::text) |
378 | return StringSwitch<bool>(isec->getName()) |
379 | .Cases(S0: section_names::textCoalNt, S1: section_names::staticInit, Value: true) |
380 | .Default(Value: false); |
381 | |
382 | return false; |
383 | } |
384 | |
385 | bool macho::isCfStringSection(const InputSection *isec) { |
386 | return isec->getName() == section_names::cfString && |
387 | isec->getSegName() == segment_names::data; |
388 | } |
389 | |
390 | bool macho::isClassRefsSection(const InputSection *isec) { |
391 | return isec->getName() == section_names::objcClassRefs && |
392 | isec->getSegName() == segment_names::data; |
393 | } |
394 | |
395 | bool macho::isSelRefsSection(const InputSection *isec) { |
396 | return isec->getName() == section_names::objcSelrefs && |
397 | isec->getSegName() == segment_names::data; |
398 | } |
399 | |
400 | bool macho::isEhFrameSection(const InputSection *isec) { |
401 | return isec->getName() == section_names::ehFrame && |
402 | isec->getSegName() == segment_names::text; |
403 | } |
404 | |
405 | bool macho::isGccExceptTabSection(const InputSection *isec) { |
406 | return isec->getName() == section_names::gccExceptTab && |
407 | isec->getSegName() == segment_names::text; |
408 | } |
409 | |
410 | std::string lld::toString(const InputSection *isec) { |
411 | return (toString(file: isec->getFile()) + ":(" + isec->getName() + ")" ).str(); |
412 | } |
413 | |