1 | //===- InputSection.cpp ---------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputSection.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "OutputSegment.h" |
14 | #include "Sections.h" |
15 | #include "Symbols.h" |
16 | #include "SyntheticSections.h" |
17 | #include "Target.h" |
18 | #include "Writer.h" |
19 | |
20 | #include "lld/Common/ErrorHandler.h" |
21 | #include "lld/Common/Memory.h" |
22 | #include "llvm/Support/xxhash.h" |
23 | |
24 | using namespace llvm; |
25 | using namespace llvm::MachO; |
26 | using namespace llvm::support; |
27 | using namespace lld; |
28 | using namespace lld::macho; |
29 | |
30 | // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector |
31 | // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), |
32 | // so account for that. |
33 | static_assert(sizeof(void *) != 8 || |
34 | sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88, |
35 | "Try to minimize ConcatInputSection's size, we create many " |
36 | "instances of it"); |
37 | |
38 | std::vector<ConcatInputSection *> macho::inputSections; |
39 | int macho::inputSectionsOrder = 0; |
40 | |
41 | // Call this function to add a new InputSection and have it routed to the |
42 | // appropriate container. Depending on its type and current config, it will |
43 | // either be added to 'inputSections' vector or to a synthetic section. |
44 | void lld::macho::addInputSection(InputSection *inputSection) { |
45 | if (auto *isec = dyn_cast<ConcatInputSection>(Val: inputSection)) { |
46 | if (isec->isCoalescedWeak()) |
47 | return; |
48 | if (config->emitRelativeMethodLists && |
49 | ObjCMethListSection::isMethodList(isec)) { |
50 | if (in.objcMethList->inputOrder == UnspecifiedInputOrder) |
51 | in.objcMethList->inputOrder = inputSectionsOrder++; |
52 | in.objcMethList->addInput(isec); |
53 | isec->parent = in.objcMethList; |
54 | return; |
55 | } |
56 | if (config->emitInitOffsets && |
57 | sectionType(flags: isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { |
58 | in.initOffsets->addInput(isec); |
59 | return; |
60 | } |
61 | isec->outSecOff = inputSectionsOrder++; |
62 | auto *osec = ConcatOutputSection::getOrCreateForInput(isec); |
63 | isec->parent = osec; |
64 | inputSections.push_back(x: isec); |
65 | } else if (auto *isec = dyn_cast<CStringInputSection>(Val: inputSection)) { |
66 | if (isec->getName() == section_names::objcMethname) { |
67 | if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) |
68 | in.objcMethnameSection->inputOrder = inputSectionsOrder++; |
69 | in.objcMethnameSection->addInput(isec); |
70 | } else { |
71 | if (in.cStringSection->inputOrder == UnspecifiedInputOrder) |
72 | in.cStringSection->inputOrder = inputSectionsOrder++; |
73 | in.cStringSection->addInput(isec); |
74 | } |
75 | } else if (auto *isec = dyn_cast<WordLiteralInputSection>(Val: inputSection)) { |
76 | if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) |
77 | in.wordLiteralSection->inputOrder = inputSectionsOrder++; |
78 | in.wordLiteralSection->addInput(isec); |
79 | } else { |
80 | llvm_unreachable("unexpected input section kind"); |
81 | } |
82 | |
83 | assert(inputSectionsOrder <= UnspecifiedInputOrder); |
84 | } |
85 | |
86 | uint64_t InputSection::getFileSize() const { |
87 | return isZeroFill(flags: getFlags()) ? 0 : getSize(); |
88 | } |
89 | |
90 | uint64_t InputSection::getVA(uint64_t off) const { |
91 | return parent->addr + getOffset(off); |
92 | } |
93 | |
94 | static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { |
95 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type); |
96 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) |
97 | return sym->resolveBranchVA(); |
98 | if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) |
99 | return sym->resolveGotVA(); |
100 | if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) |
101 | return sym->resolveTlvVA(); |
102 | return sym->getVA(); |
103 | } |
104 | |
105 | const Defined *InputSection::getContainingSymbol(uint64_t off) const { |
106 | auto *nextSym = llvm::upper_bound( |
107 | Range: symbols, Value&: off, C: [](uint64_t a, const Defined *b) { return a < b->value; }); |
108 | if (nextSym == symbols.begin()) |
109 | return nullptr; |
110 | return *std::prev(x: nextSym); |
111 | } |
112 | |
113 | std::string InputSection::getLocation(uint64_t off) const { |
114 | // First, try to find a symbol that's near the offset. Use it as a reference |
115 | // point. |
116 | if (auto *sym = getContainingSymbol(off)) |
117 | return (toString(file: getFile()) + ":(symbol "+ toString(*sym) + "+0x"+ |
118 | Twine::utohexstr(Val: off - sym->value) + ")") |
119 | .str(); |
120 | |
121 | // If that fails, use the section itself as a reference point. |
122 | for (const Subsection &subsec : section.subsections) { |
123 | if (subsec.isec == this) { |
124 | off += subsec.offset; |
125 | break; |
126 | } |
127 | } |
128 | |
129 | return (toString(file: getFile()) + ":("+ getName() + "+0x"+ |
130 | Twine::utohexstr(Val: off) + ")") |
131 | .str(); |
132 | } |
133 | |
134 | std::string InputSection::getSourceLocation(uint64_t off) const { |
135 | auto *obj = dyn_cast_or_null<ObjFile>(Val: getFile()); |
136 | if (!obj) |
137 | return {}; |
138 | |
139 | DWARFCache *dwarf = obj->getDwarf(); |
140 | if (!dwarf) |
141 | return std::string(); |
142 | |
143 | for (const Subsection &subsec : section.subsections) { |
144 | if (subsec.isec == this) { |
145 | off += subsec.offset; |
146 | break; |
147 | } |
148 | } |
149 | |
150 | auto createMsg = [&](StringRef path, unsigned line) { |
151 | std::string filename = sys::path::filename(path).str(); |
152 | std::string lineStr = (":"+ Twine(line)).str(); |
153 | if (filename == path) |
154 | return filename + lineStr; |
155 | return (filename + lineStr + " ("+ path + lineStr + ")").str(); |
156 | }; |
157 | |
158 | // First, look up a function for a given offset. |
159 | if (std::optional<DILineInfo> li = dwarf->getDILineInfo( |
160 | offset: section.addr + off, sectionIndex: object::SectionedAddress::UndefSection)) |
161 | return createMsg(li->FileName, li->Line); |
162 | |
163 | // If it failed, look up again as a variable. |
164 | if (const Defined *sym = getContainingSymbol(off)) { |
165 | // Symbols are generally prefixed with an underscore, which is not included |
166 | // in the debug information. |
167 | StringRef symName = sym->getName(); |
168 | symName.consume_front(Prefix: "_"); |
169 | |
170 | if (std::optional<std::pair<std::string, unsigned>> fileLine = |
171 | dwarf->getVariableLoc(name: symName)) |
172 | return createMsg(fileLine->first, fileLine->second); |
173 | } |
174 | |
175 | // Try to get the source file's name from the DWARF information. |
176 | if (obj->compileUnit) |
177 | return obj->sourceFile(); |
178 | |
179 | return {}; |
180 | } |
181 | |
182 | const Reloc *InputSection::getRelocAt(uint32_t off) const { |
183 | auto it = llvm::find_if( |
184 | Range: relocs, P: [=](const macho::Reloc &r) { return r.offset == off; }); |
185 | if (it == relocs.end()) |
186 | return nullptr; |
187 | return &*it; |
188 | } |
189 | |
190 | void ConcatInputSection::foldIdentical(ConcatInputSection *copy, |
191 | Symbol::ICFFoldKind foldKind) { |
192 | align = std::max(a: align, b: copy->align); |
193 | copy->live = false; |
194 | copy->wasCoalesced = true; |
195 | copy->replacement = this; |
196 | for (auto ©Sym : copy->symbols) |
197 | copySym->identicalCodeFoldingKind = foldKind; |
198 | |
199 | symbols.insert(I: symbols.end(), From: copy->symbols.begin(), To: copy->symbols.end()); |
200 | copy->symbols.clear(); |
201 | |
202 | // Remove duplicate compact unwind info for symbols at the same address. |
203 | if (symbols.empty()) |
204 | return; |
205 | for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) { |
206 | assert((*it)->value == 0); |
207 | (*it)->originalUnwindEntry = nullptr; |
208 | } |
209 | } |
210 | |
211 | void ConcatInputSection::writeTo(uint8_t *buf) { |
212 | assert(!shouldOmitFromOutput()); |
213 | |
214 | if (getFileSize() == 0) |
215 | return; |
216 | |
217 | memcpy(dest: buf, src: data.data(), n: data.size()); |
218 | |
219 | for (size_t i = 0; i < relocs.size(); i++) { |
220 | const Reloc &r = relocs[i]; |
221 | uint8_t *loc = buf + r.offset; |
222 | uint64_t referentVA = 0; |
223 | |
224 | const bool needsFixup = config->emitChainedFixups && |
225 | target->hasAttr(type: r.type, bit: RelocAttrBits::UNSIGNED); |
226 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
227 | const Symbol *fromSym = cast<Symbol *>(Val: r.referent); |
228 | const Reloc &minuend = relocs[++i]; |
229 | uint64_t minuendVA; |
230 | if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) |
231 | minuendVA = toSym->getVA() + minuend.addend; |
232 | else { |
233 | auto *referentIsec = cast<InputSection *>(Val: minuend.referent); |
234 | assert(!::shouldOmitFromOutput(referentIsec)); |
235 | minuendVA = referentIsec->getVA(off: minuend.addend); |
236 | } |
237 | referentVA = minuendVA - fromSym->getVA(); |
238 | } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { |
239 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::LOAD) && |
240 | !referentSym->isInGot()) |
241 | target->relaxGotLoad(loc, type: r.type); |
242 | // For dtrace symbols, do not handle them as normal undefined symbols |
243 | if (referentSym->getName().starts_with(Prefix: "___dtrace_")) { |
244 | // Change dtrace call site to pre-defined instructions |
245 | target->handleDtraceReloc(sym: referentSym, r, loc); |
246 | continue; |
247 | } |
248 | referentVA = resolveSymbolVA(sym: referentSym, type: r.type) + r.addend; |
249 | |
250 | if (isThreadLocalVariables(flags: getFlags()) && isa<Defined>(Val: referentSym)) { |
251 | // References from thread-local variable sections are treated as offsets |
252 | // relative to the start of the thread-local data memory area, which |
253 | // is initialized via copying all the TLV data sections (which are all |
254 | // contiguous). |
255 | referentVA -= firstTLVDataSection->addr; |
256 | } else if (needsFixup) { |
257 | writeChainedFixup(buf: loc, sym: referentSym, addend: r.addend); |
258 | continue; |
259 | } |
260 | } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { |
261 | assert(!::shouldOmitFromOutput(referentIsec)); |
262 | referentVA = referentIsec->getVA(off: r.addend); |
263 | |
264 | if (needsFixup) { |
265 | writeChainedRebase(buf: loc, targetVA: referentVA); |
266 | continue; |
267 | } |
268 | } |
269 | target->relocateOne(loc, r, va: referentVA, relocVA: getVA() + r.offset); |
270 | } |
271 | } |
272 | |
273 | ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, |
274 | StringRef sectName, |
275 | uint32_t flags, |
276 | ArrayRef<uint8_t> data, |
277 | uint32_t align) { |
278 | Section §ion = |
279 | *make<Section>(/*file=*/args: nullptr, args&: segName, args&: sectName, args&: flags, /*addr=*/args: 0); |
280 | auto isec = make<ConcatInputSection>(args&: section, args&: data, args&: align); |
281 | // Since this is an explicitly created 'fake' input section, |
282 | // it should not be dead stripped. |
283 | isec->live = true; |
284 | section.subsections.push_back(x: {.offset: 0, .isec: isec}); |
285 | return isec; |
286 | } |
287 | |
288 | void CStringInputSection::splitIntoPieces() { |
289 | size_t off = 0; |
290 | StringRef s = toStringRef(Input: data); |
291 | while (!s.empty()) { |
292 | size_t end = s.find(C: 0); |
293 | if (end == StringRef::npos) |
294 | fatal(msg: getLocation(off) + ": string is not null terminated"); |
295 | uint32_t hash = deduplicateLiterals ? xxh3_64bits(data: s.take_front(N: end)) : 0; |
296 | pieces.emplace_back(args&: off, args&: hash); |
297 | size_t size = end + 1; // include null terminator |
298 | s = s.substr(Start: size); |
299 | off += size; |
300 | } |
301 | } |
302 | |
303 | StringPiece &CStringInputSection::getStringPiece(uint64_t off) { |
304 | if (off >= data.size()) |
305 | fatal(msg: toString(this) + ": offset is outside the section"); |
306 | |
307 | auto it = |
308 | partition_point(Range&: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; }); |
309 | return it[-1]; |
310 | } |
311 | |
312 | const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { |
313 | return const_cast<CStringInputSection *>(this)->getStringPiece(off); |
314 | } |
315 | |
316 | size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { |
317 | if (off >= data.size()) |
318 | fatal(msg: toString(this) + ": offset is outside the section"); |
319 | |
320 | auto it = |
321 | partition_point(Range: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; }); |
322 | return std::distance(first: pieces.begin(), last: it) - 1; |
323 | } |
324 | |
325 | uint64_t CStringInputSection::getOffset(uint64_t off) const { |
326 | const StringPiece &piece = getStringPiece(off); |
327 | uint64_t addend = off - piece.inSecOff; |
328 | return piece.outSecOff + addend; |
329 | } |
330 | |
331 | WordLiteralInputSection::WordLiteralInputSection(const Section §ion, |
332 | ArrayRef<uint8_t> data, |
333 | uint32_t align) |
334 | : InputSection(WordLiteralKind, section, data, align) { |
335 | switch (sectionType(flags: getFlags())) { |
336 | case S_4BYTE_LITERALS: |
337 | power2LiteralSize = 2; |
338 | break; |
339 | case S_8BYTE_LITERALS: |
340 | power2LiteralSize = 3; |
341 | break; |
342 | case S_16BYTE_LITERALS: |
343 | power2LiteralSize = 4; |
344 | break; |
345 | default: |
346 | llvm_unreachable("invalid literal section type"); |
347 | } |
348 | |
349 | live.resize(N: data.size() >> power2LiteralSize, t: !config->deadStrip); |
350 | } |
351 | |
352 | uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { |
353 | auto *osec = cast<WordLiteralSection>(Val: parent); |
354 | const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); |
355 | switch (sectionType(flags: getFlags())) { |
356 | case S_4BYTE_LITERALS: |
357 | return osec->getLiteral4Offset(buf: buf + (off & ~3LLU)) | (off & 3); |
358 | case S_8BYTE_LITERALS: |
359 | return osec->getLiteral8Offset(buf: buf + (off & ~7LLU)) | (off & 7); |
360 | case S_16BYTE_LITERALS: |
361 | return osec->getLiteral16Offset(buf: buf + (off & ~15LLU)) | (off & 15); |
362 | default: |
363 | llvm_unreachable("invalid literal section type"); |
364 | } |
365 | } |
366 | |
367 | bool macho::isCodeSection(const InputSection *isec) { |
368 | return sections::isCodeSection(name: isec->getName(), segName: isec->getSegName(), |
369 | flags: isec->getFlags()); |
370 | } |
371 | |
372 | bool macho::isCfStringSection(const InputSection *isec) { |
373 | return isec->getName() == section_names::cfString && |
374 | isec->getSegName() == segment_names::data; |
375 | } |
376 | |
377 | bool macho::isClassRefsSection(const InputSection *isec) { |
378 | return isec->getName() == section_names::objcClassRefs && |
379 | isec->getSegName() == segment_names::data; |
380 | } |
381 | |
382 | bool macho::isSelRefsSection(const InputSection *isec) { |
383 | return isec->getName() == section_names::objcSelrefs && |
384 | isec->getSegName() == segment_names::data; |
385 | } |
386 | |
387 | bool macho::isEhFrameSection(const InputSection *isec) { |
388 | return isec->getName() == section_names::ehFrame && |
389 | isec->getSegName() == segment_names::text; |
390 | } |
391 | |
392 | bool macho::isGccExceptTabSection(const InputSection *isec) { |
393 | return isec->getName() == section_names::gccExceptTab && |
394 | isec->getSegName() == segment_names::text; |
395 | } |
396 | |
397 | std::string lld::toString(const InputSection *isec) { |
398 | return (toString(file: isec->getFile()) + ":("+ isec->getName() + ")").str(); |
399 | } |
400 |
Definitions
- inputSections
- inputSectionsOrder
- addInputSection
- getFileSize
- getVA
- resolveSymbolVA
- getContainingSymbol
- getLocation
- getSourceLocation
- getRelocAt
- foldIdentical
- writeTo
- makeSyntheticInputSection
- splitIntoPieces
- getStringPiece
- getStringPiece
- getStringPieceIndex
- getOffset
- WordLiteralInputSection
- getOffset
- isCodeSection
- isCfStringSection
- isClassRefsSection
- isSelRefsSection
- isEhFrameSection
- isGccExceptTabSection
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more