1 | //===- InputSection.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_ELF_INPUT_SECTION_H |
10 | #define LLD_ELF_INPUT_SECTION_H |
11 | |
12 | #include "Config.h" |
13 | #include "Relocations.h" |
14 | #include "lld/Common/CommonLinkerContext.h" |
15 | #include "lld/Common/LLVM.h" |
16 | #include "lld/Common/Memory.h" |
17 | #include "llvm/ADT/CachedHashString.h" |
18 | #include "llvm/ADT/DenseSet.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/ADT/TinyPtrVector.h" |
21 | #include "llvm/Object/ELF.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | |
24 | namespace lld { |
25 | namespace elf { |
26 | |
27 | class InputFile; |
28 | class Symbol; |
29 | |
30 | class Defined; |
31 | struct Partition; |
32 | class SyntheticSection; |
33 | template <class ELFT> class ObjFile; |
34 | class OutputSection; |
35 | |
36 | LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions; |
37 | |
38 | // Returned by InputSectionBase::relsOrRelas. At least one member is empty. |
39 | template <class ELFT> struct RelsOrRelas { |
40 | ArrayRef<typename ELFT::Rel> rels; |
41 | ArrayRef<typename ELFT::Rela> relas; |
42 | bool areRelocsRel() const { return rels.size(); } |
43 | }; |
44 | |
45 | // This is the base class of all sections that lld handles. Some are sections in |
46 | // input files, some are sections in the produced output file and some exist |
47 | // just as a convenience for implementing special ways of combining some |
48 | // sections. |
49 | class SectionBase { |
50 | public: |
51 | enum Kind { Regular, Synthetic, EHFrame, Merge, Output }; |
52 | |
53 | Kind kind() const { return (Kind)sectionKind; } |
54 | |
55 | uint8_t sectionKind : 3; |
56 | |
57 | // The next two bit fields are only used by InputSectionBase, but we |
58 | // put them here so the struct packs better. |
59 | |
60 | uint8_t bss : 1; |
61 | |
62 | // Set for sections that should not be folded by ICF. |
63 | uint8_t keepUnique : 1; |
64 | |
65 | uint8_t partition = 1; |
66 | uint32_t type; |
67 | StringRef name; |
68 | |
69 | // The 1-indexed partition that this section is assigned to by the garbage |
70 | // collector, or 0 if this section is dead. Normally there is only one |
71 | // partition, so this will either be 0 or 1. |
72 | elf::Partition &getPartition() const; |
73 | |
74 | // These corresponds to the fields in Elf_Shdr. |
75 | uint64_t flags; |
76 | uint32_t addralign; |
77 | uint32_t entsize; |
78 | uint32_t link; |
79 | uint32_t info; |
80 | |
81 | OutputSection *getOutputSection(); |
82 | const OutputSection *getOutputSection() const { |
83 | return const_cast<SectionBase *>(this)->getOutputSection(); |
84 | } |
85 | |
86 | // Translate an offset in the input section to an offset in the output |
87 | // section. |
88 | uint64_t getOffset(uint64_t offset) const; |
89 | |
90 | uint64_t getVA(uint64_t offset = 0) const; |
91 | |
92 | bool isLive() const { return partition != 0; } |
93 | void markLive() { partition = 1; } |
94 | void markDead() { partition = 0; } |
95 | |
96 | protected: |
97 | constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, |
98 | uint32_t entsize, uint32_t addralign, uint32_t type, |
99 | uint32_t info, uint32_t link) |
100 | : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), |
101 | name(name), flags(flags), addralign(addralign), entsize(entsize), |
102 | link(link), info(info) {} |
103 | }; |
104 | |
105 | struct SymbolAnchor { |
106 | uint64_t offset; |
107 | Defined *d; |
108 | bool end; // true for the anchor of st_value+st_size |
109 | }; |
110 | |
111 | struct RelaxAux { |
112 | // This records symbol start and end offsets which will be adjusted according |
113 | // to the nearest relocDeltas element. |
114 | SmallVector<SymbolAnchor, 0> anchors; |
115 | // For relocations[i], the actual offset is |
116 | // r_offset - (i ? relocDeltas[i-1] : 0). |
117 | std::unique_ptr<uint32_t[]> relocDeltas; |
118 | // For relocations[i], the actual type is relocTypes[i]. |
119 | std::unique_ptr<RelType[]> relocTypes; |
120 | SmallVector<uint32_t, 0> writes; |
121 | }; |
122 | |
123 | // This corresponds to a section of an input file. |
124 | class InputSectionBase : public SectionBase { |
125 | public: |
126 | template <class ELFT> |
127 | InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &, |
128 | StringRef name, Kind sectionKind); |
129 | |
130 | InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, |
131 | uint64_t entsize, uint32_t link, uint32_t info, |
132 | uint32_t addralign, ArrayRef<uint8_t> data, StringRef name, |
133 | Kind sectionKind); |
134 | |
135 | static bool classof(const SectionBase *s) { return s->kind() != Output; } |
136 | |
137 | // The file which contains this section. Its dynamic type is usually |
138 | // ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic |
139 | // section). |
140 | InputFile *file; |
141 | |
142 | // Input sections are part of an output section. Special sections |
143 | // like .eh_frame and merge sections are first combined into a |
144 | // synthetic section that is then added to an output section. In all |
145 | // cases this points one level up. |
146 | SectionBase *parent = nullptr; |
147 | |
148 | // Section index of the relocation section if exists. |
149 | uint32_t relSecIdx = 0; |
150 | |
151 | // Getter when the dynamic type is ObjFile<ELFT>. |
152 | template <class ELFT> ObjFile<ELFT> *getFile() const { |
153 | return cast<ObjFile<ELFT>>(file); |
154 | } |
155 | |
156 | // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to |
157 | // indicate the number of bytes which is not counted in the size. This should |
158 | // be reset to zero after uses. |
159 | uint32_t bytesDropped = 0; |
160 | |
161 | mutable bool compressed = false; |
162 | |
163 | // Whether the section needs to be padded with a NOP filler due to |
164 | // deleteFallThruJmpInsn. |
165 | bool nopFiller = false; |
166 | |
167 | void drop_back(unsigned num) { |
168 | assert(bytesDropped + num < 256); |
169 | bytesDropped += num; |
170 | } |
171 | |
172 | void push_back(uint64_t num) { |
173 | assert(bytesDropped >= num); |
174 | bytesDropped -= num; |
175 | } |
176 | |
177 | mutable const uint8_t *content_; |
178 | uint64_t size; |
179 | |
180 | void trim() { |
181 | if (bytesDropped) { |
182 | size -= bytesDropped; |
183 | bytesDropped = 0; |
184 | } |
185 | } |
186 | |
187 | ArrayRef<uint8_t> content() const { |
188 | return ArrayRef<uint8_t>(content_, size); |
189 | } |
190 | ArrayRef<uint8_t> contentMaybeDecompress() const { |
191 | if (compressed) |
192 | decompress(); |
193 | return content(); |
194 | } |
195 | |
196 | // The next member in the section group if this section is in a group. This is |
197 | // used by --gc-sections. |
198 | InputSectionBase *nextInSectionGroup = nullptr; |
199 | |
200 | template <class ELFT> RelsOrRelas<ELFT> relsOrRelas() const; |
201 | |
202 | // InputSections that are dependent on us (reverse dependency for GC) |
203 | llvm::TinyPtrVector<InputSection *> dependentSections; |
204 | |
205 | // Returns the size of this section (even if this is a common or BSS.) |
206 | size_t getSize() const; |
207 | |
208 | InputSection *getLinkOrderDep() const; |
209 | |
210 | // Get a symbol that encloses this offset from within the section. If type is |
211 | // not zero, return a symbol with the specified type. |
212 | Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; |
213 | Defined *getEnclosingFunction(uint64_t offset) const { |
214 | return getEnclosingSymbol(offset, type: llvm::ELF::STT_FUNC); |
215 | } |
216 | |
217 | // Returns a source location string. Used to construct an error message. |
218 | std::string getLocation(uint64_t offset) const; |
219 | std::string getSrcMsg(const Symbol &sym, uint64_t offset) const; |
220 | std::string getObjMsg(uint64_t offset) const; |
221 | |
222 | // Each section knows how to relocate itself. These functions apply |
223 | // relocations, assuming that Buf points to this section's copy in |
224 | // the mmap'ed output buffer. |
225 | template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd); |
226 | static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, |
227 | int64_t A, uint64_t P, const Symbol &Sym, |
228 | RelExpr Expr); |
229 | |
230 | // The native ELF reloc data type is not very convenient to handle. |
231 | // So we convert ELF reloc records to our own records in Relocations.cpp. |
232 | // This vector contains such "cooked" relocations. |
233 | SmallVector<Relocation, 0> relocations; |
234 | |
235 | void addReloc(const Relocation &r) { relocations.push_back(Elt: r); } |
236 | MutableArrayRef<Relocation> relocs() { return relocations; } |
237 | ArrayRef<Relocation> relocs() const { return relocations; } |
238 | |
239 | union { |
240 | // These are modifiers to jump instructions that are necessary when basic |
241 | // block sections are enabled. Basic block sections creates opportunities |
242 | // to relax jump instructions at basic block boundaries after reordering the |
243 | // basic blocks. |
244 | JumpInstrMod *jumpInstrMod = nullptr; |
245 | |
246 | // Auxiliary information for RISC-V and LoongArch linker relaxation. |
247 | // They do not use jumpInstrMod. |
248 | RelaxAux *relaxAux; |
249 | |
250 | // The compressed content size when `compressed` is true. |
251 | size_t compressedSize; |
252 | }; |
253 | |
254 | // A function compiled with -fsplit-stack calling a function |
255 | // compiled without -fsplit-stack needs its prologue adjusted. Find |
256 | // such functions and adjust their prologues. This is very similar |
257 | // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more |
258 | // information. |
259 | template <typename ELFT> |
260 | void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end); |
261 | |
262 | |
263 | template <typename T> llvm::ArrayRef<T> getDataAs() const { |
264 | size_t s = content().size(); |
265 | assert(s % sizeof(T) == 0); |
266 | return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); |
267 | } |
268 | |
269 | protected: |
270 | template <typename ELFT> |
271 | void (); |
272 | void decompress() const; |
273 | }; |
274 | |
275 | // SectionPiece represents a piece of splittable section contents. |
276 | // We allocate a lot of these and binary search on them. This means that they |
277 | // have to be as compact as possible, which is why we don't store the size (can |
278 | // be found by looking at the next one). |
279 | struct SectionPiece { |
280 | SectionPiece() = default; |
281 | SectionPiece(size_t off, uint32_t hash, bool live) |
282 | : inputOff(off), live(live), hash(hash >> 1) {} |
283 | |
284 | uint32_t inputOff; |
285 | uint32_t live : 1; |
286 | uint32_t hash : 31; |
287 | uint64_t outputOff = 0; |
288 | }; |
289 | |
290 | static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big" ); |
291 | |
292 | // This corresponds to a SHF_MERGE section of an input file. |
293 | class MergeInputSection : public InputSectionBase { |
294 | public: |
295 | template <class ELFT> |
296 | MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
297 | StringRef name); |
298 | MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize, |
299 | ArrayRef<uint8_t> data, StringRef name); |
300 | |
301 | static bool classof(const SectionBase *s) { return s->kind() == Merge; } |
302 | void splitIntoPieces(); |
303 | |
304 | // Translate an offset in the input section to an offset in the parent |
305 | // MergeSyntheticSection. |
306 | uint64_t getParentOffset(uint64_t offset) const; |
307 | |
308 | // Splittable sections are handled as a sequence of data |
309 | // rather than a single large blob of data. |
310 | SmallVector<SectionPiece, 0> pieces; |
311 | |
312 | // Returns I'th piece's data. This function is very hot when |
313 | // string merging is enabled, so we want to inline. |
314 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
315 | llvm::CachedHashStringRef getData(size_t i) const { |
316 | size_t begin = pieces[i].inputOff; |
317 | size_t end = |
318 | (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; |
319 | return {toStringRef(Input: content().slice(N: begin, M: end - begin)), pieces[i].hash}; |
320 | } |
321 | |
322 | // Returns the SectionPiece at a given input section offset. |
323 | SectionPiece &getSectionPiece(uint64_t offset); |
324 | const SectionPiece &getSectionPiece(uint64_t offset) const { |
325 | return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); |
326 | } |
327 | |
328 | SyntheticSection *getParent() const { |
329 | return cast_or_null<SyntheticSection>(Val: parent); |
330 | } |
331 | |
332 | private: |
333 | void splitStrings(StringRef s, size_t size); |
334 | void splitNonStrings(ArrayRef<uint8_t> a, size_t size); |
335 | }; |
336 | |
337 | struct EhSectionPiece { |
338 | EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, |
339 | unsigned firstRelocation) |
340 | : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} |
341 | |
342 | ArrayRef<uint8_t> data() const { |
343 | return {sec->content().data() + this->inputOff, size}; |
344 | } |
345 | |
346 | size_t inputOff; |
347 | ssize_t outputOff = -1; |
348 | InputSectionBase *sec; |
349 | uint32_t size; |
350 | unsigned firstRelocation; |
351 | }; |
352 | |
353 | // This corresponds to a .eh_frame section of an input file. |
354 | class EhInputSection : public InputSectionBase { |
355 | public: |
356 | template <class ELFT> |
357 | EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
358 | StringRef name); |
359 | static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } |
360 | template <class ELFT> void split(); |
361 | template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); |
362 | |
363 | // Splittable sections are handled as a sequence of data |
364 | // rather than a single large blob of data. |
365 | SmallVector<EhSectionPiece, 0> cies, fdes; |
366 | |
367 | SyntheticSection *getParent() const; |
368 | uint64_t getParentOffset(uint64_t offset) const; |
369 | }; |
370 | |
371 | // This is a section that is added directly to an output section |
372 | // instead of needing special combination via a synthetic section. This |
373 | // includes all input sections with the exceptions of SHF_MERGE and |
374 | // .eh_frame. It also includes the synthetic sections themselves. |
375 | class InputSection : public InputSectionBase { |
376 | public: |
377 | InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, |
378 | ArrayRef<uint8_t> data, StringRef name, Kind k = Regular); |
379 | template <class ELFT> |
380 | InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
381 | StringRef name); |
382 | |
383 | static bool classof(const SectionBase *s) { |
384 | return s->kind() == SectionBase::Regular || |
385 | s->kind() == SectionBase::Synthetic; |
386 | } |
387 | |
388 | // Write this section to a mmap'ed file, assuming Buf is pointing to |
389 | // beginning of the output section. |
390 | template <class ELFT> void writeTo(uint8_t *buf); |
391 | |
392 | OutputSection *getParent() const { |
393 | return reinterpret_cast<OutputSection *>(parent); |
394 | } |
395 | |
396 | // This variable has two usages. Initially, it represents an index in the |
397 | // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER |
398 | // sections. After assignAddresses is called, it represents the offset from |
399 | // the beginning of the output section this section was assigned to. |
400 | uint64_t outSecOff = 0; |
401 | |
402 | InputSectionBase *getRelocatedSection() const; |
403 | |
404 | template <class ELFT, class RelTy> |
405 | void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef<RelTy> rels); |
406 | |
407 | // Points to the canonical section. If ICF folds two sections, repl pointer of |
408 | // one section points to the other. |
409 | InputSection *repl = this; |
410 | |
411 | // Used by ICF. |
412 | uint32_t eqClass[2] = {0, 0}; |
413 | |
414 | // Called by ICF to merge two input sections. |
415 | void replace(InputSection *other); |
416 | |
417 | static InputSection discarded; |
418 | |
419 | private: |
420 | template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf); |
421 | |
422 | template <class ELFT, class RelTy, class RelIt> |
423 | void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels); |
424 | |
425 | template <class ELFT> void copyShtGroup(uint8_t *buf); |
426 | }; |
427 | |
428 | static_assert(sizeof(InputSection) <= 160, "InputSection is too big" ); |
429 | |
430 | class SyntheticSection : public InputSection { |
431 | public: |
432 | SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign, |
433 | StringRef name) |
434 | : InputSection(ctx.internalFile, flags, type, addralign, {}, name, |
435 | InputSectionBase::Synthetic) {} |
436 | |
437 | virtual ~SyntheticSection() = default; |
438 | virtual size_t getSize() const = 0; |
439 | virtual bool updateAllocSize() { return false; } |
440 | // If the section has the SHF_ALLOC flag and the size may be changed if |
441 | // thunks are added, update the section size. |
442 | virtual bool isNeeded() const { return true; } |
443 | virtual void finalizeContents() {} |
444 | virtual void writeTo(uint8_t *buf) = 0; |
445 | |
446 | static bool classof(const SectionBase *sec) { |
447 | return sec->kind() == InputSectionBase::Synthetic; |
448 | } |
449 | }; |
450 | |
451 | inline bool isStaticRelSecType(uint32_t type) { |
452 | return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_REL; |
453 | } |
454 | |
455 | inline bool isDebugSection(const InputSectionBase &sec) { |
456 | return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && |
457 | sec.name.starts_with(Prefix: ".debug" ); |
458 | } |
459 | |
460 | // The set of TOC entries (.toc + addend) for which we should not apply |
461 | // toc-indirect to toc-relative relaxation. const Symbol * refers to the |
462 | // STT_SECTION symbol associated to the .toc input section. |
463 | extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax; |
464 | |
465 | } // namespace elf |
466 | |
467 | std::string toString(const elf::InputSectionBase *); |
468 | } // namespace lld |
469 | |
470 | #endif |
471 | |