1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "Writer.h"
14#include "lld/Common/LLVM.h"
15#include "lld/Common/Strings.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Support/Compiler.h"
22#include <cstddef>
23#include <cstdint>
24#include <functional>
25#include <memory>
26
27namespace lld::elf {
28
29class Defined;
30class InputFile;
31class InputSection;
32class InputSectionBase;
33class OutputSection;
34class SectionBase;
35class ThunkSection;
36struct OutputDesc;
37
38// This represents an r-value in the linker script.
39struct ExprValue {
40 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
41 const Twine &loc)
42 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
43
44 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
45
46 bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
47 uint64_t getValue() const;
48 uint64_t getSecAddr() const;
49 uint64_t getSectionOffset() const;
50
51 // If a value is relative to a section, it has a non-null Sec.
52 SectionBase *sec;
53
54 uint64_t val;
55 uint64_t alignment = 1;
56
57 // The original st_type if the expression represents a symbol. Any operation
58 // resets type to STT_NOTYPE.
59 uint8_t type = llvm::ELF::STT_NOTYPE;
60
61 // True if this expression is enclosed in ABSOLUTE().
62 // This flag affects the return value of getValue().
63 bool forceAbsolute;
64
65 // Original source location. Used for error messages.
66 std::string loc;
67};
68
69// This represents an expression in the linker script.
70// ScriptParser::readExpr reads an expression and returns an Expr.
71// Later, we evaluate the expression by calling the function.
72using Expr = std::function<ExprValue()>;
73
74// This enum is used to implement linker script SECTIONS command.
75// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
76enum SectionsCommandKind {
77 AssignmentKind, // . = expr or <sym> = expr
78 OutputSectionKind,
79 InputSectionKind,
80 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
81};
82
83struct SectionCommand {
84 SectionCommand(int k) : kind(k) {}
85 int kind;
86};
87
88// This represents ". = <expr>" or "<symbol> = <expr>".
89struct SymbolAssignment : SectionCommand {
90 SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
91 : SectionCommand(AssignmentKind), name(name), expression(e),
92 symOrder(symOrder), location(loc) {}
93
94 static bool classof(const SectionCommand *c) {
95 return c->kind == AssignmentKind;
96 }
97
98 // The LHS of an expression. Name is either a symbol name or ".".
99 StringRef name;
100 Defined *sym = nullptr;
101
102 // The RHS of an expression.
103 Expr expression;
104
105 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
106 bool provide = false;
107 bool hidden = false;
108
109 // This assignment references DATA_SEGMENT_RELRO_END.
110 bool dataSegmentRelroEnd = false;
111
112 unsigned symOrder;
113
114 // Holds file name and line number for error reporting.
115 std::string location;
116
117 // A string representation of this command. We use this for -Map.
118 std::string commandString;
119
120 // Address of this assignment command.
121 uint64_t addr;
122
123 // Size of this assignment command. This is usually 0, but if
124 // you move '.' this may be greater than 0.
125 uint64_t size;
126};
127
128// Linker scripts allow additional constraints to be put on output sections.
129// If an output section is marked as ONLY_IF_RO, the section is created
130// only if its input sections are read-only. Likewise, an output section
131// with ONLY_IF_RW is created if all input sections are RW.
132enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
133
134// This struct is used to represent the location and size of regions of
135// target memory. Instances of the struct are created by parsing the
136// MEMORY command.
137struct MemoryRegion {
138 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
139 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
140 : name(std::string(name)), origin(origin), length(length), flags(flags),
141 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
142
143 std::string name;
144 Expr origin;
145 Expr length;
146 // A section can be assigned to the region if any of these ELF section flags
147 // are set...
148 uint32_t flags;
149 // ... or any of these flags are not set.
150 // For example, the memory region attribute "r" maps to SHF_WRITE.
151 uint32_t invFlags;
152 // A section cannot be assigned to the region if any of these ELF section
153 // flags are set...
154 uint32_t negFlags;
155 // ... or any of these flags are not set.
156 // For example, the memory region attribute "!r" maps to SHF_WRITE.
157 uint32_t negInvFlags;
158 uint64_t curPos = 0;
159
160 uint64_t getOrigin() const { return origin().getValue(); }
161 uint64_t getLength() const { return length().getValue(); }
162
163 bool compatibleWith(uint32_t secFlags) const {
164 if ((secFlags & negFlags) || (~secFlags & negInvFlags))
165 return false;
166 return (secFlags & flags) || (~secFlags & invFlags);
167 }
168};
169
170// This struct represents one section match pattern in SECTIONS() command.
171// It can optionally have negative match pattern for EXCLUDED_FILE command.
172// Also it may be surrounded with SORT() command, so contains sorting rules.
173class SectionPattern {
174 StringMatcher excludedFilePat;
175
176 // Cache of the most recent input argument and result of excludesFile().
177 mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
178
179public:
180 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
181 : excludedFilePat(pat1), sectionPat(pat2),
182 sortOuter(SortSectionPolicy::Default),
183 sortInner(SortSectionPolicy::Default) {}
184
185 bool excludesFile(const InputFile *file) const;
186
187 StringMatcher sectionPat;
188 SortSectionPolicy sortOuter;
189 SortSectionPolicy sortInner;
190};
191
192class InputSectionDescription : public SectionCommand {
193 SingleStringMatcher filePat;
194
195 // Cache of the most recent input argument and result of matchesFile().
196 mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
197
198public:
199 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
200 uint64_t withoutFlags = 0)
201 : SectionCommand(InputSectionKind), filePat(filePattern),
202 withFlags(withFlags), withoutFlags(withoutFlags) {}
203
204 static bool classof(const SectionCommand *c) {
205 return c->kind == InputSectionKind;
206 }
207
208 bool matchesFile(const InputFile *file) const;
209
210 // Input sections that matches at least one of SectionPatterns
211 // will be associated with this InputSectionDescription.
212 SmallVector<SectionPattern, 0> sectionPatterns;
213
214 // Includes InputSections and MergeInputSections. Used temporarily during
215 // assignment of input sections to output sections.
216 SmallVector<InputSectionBase *, 0> sectionBases;
217
218 // Used after the finalizeInputSections() pass. MergeInputSections have been
219 // merged into MergeSyntheticSections.
220 SmallVector<InputSection *, 0> sections;
221
222 // Temporary record of synthetic ThunkSection instances and the pass that
223 // they were created in. This is used to insert newly created ThunkSections
224 // into Sections at the end of a createThunks() pass.
225 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
226
227 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
228 uint64_t withFlags;
229 uint64_t withoutFlags;
230};
231
232// Represents BYTE(), SHORT(), LONG(), or QUAD().
233struct ByteCommand : SectionCommand {
234 ByteCommand(Expr e, unsigned size, std::string commandString)
235 : SectionCommand(ByteKind), commandString(commandString), expression(e),
236 size(size) {}
237
238 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
239
240 // Keeps string representing the command. Used for -Map" is perhaps better.
241 std::string commandString;
242
243 Expr expression;
244
245 // This is just an offset of this assignment command in the output section.
246 unsigned offset;
247
248 // Size of this data command.
249 unsigned size;
250};
251
252struct InsertCommand {
253 SmallVector<StringRef, 0> names;
254 bool isAfter;
255 StringRef where;
256};
257
258struct PhdrsCommand {
259 StringRef name;
260 unsigned type = llvm::ELF::PT_NULL;
261 bool hasFilehdr = false;
262 bool hasPhdrs = false;
263 std::optional<unsigned> flags;
264 Expr lmaExpr = nullptr;
265};
266
267class LinkerScript final {
268 // Temporary state used in processSectionCommands() and assignAddresses()
269 // that must be reinitialized for each call to the above functions, and must
270 // not be used outside of the scope of a call to the above functions.
271 struct AddressState {
272 AddressState();
273 OutputSection *outSec = nullptr;
274 MemoryRegion *memRegion = nullptr;
275 MemoryRegion *lmaRegion = nullptr;
276 uint64_t lmaOffset = 0;
277 uint64_t tbssAddr = 0;
278 };
279
280 llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
281
282 void addSymbol(SymbolAssignment *cmd);
283 void assignSymbol(SymbolAssignment *cmd, bool inSec);
284 void setDot(Expr e, const Twine &loc, bool inSec);
285 void expandOutputSection(uint64_t size);
286 void expandMemoryRegions(uint64_t size);
287
288 SmallVector<InputSectionBase *, 0>
289 computeInputSections(const InputSectionDescription *,
290 ArrayRef<InputSectionBase *>);
291
292 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
293
294 void discardSynthetic(OutputSection &);
295
296 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
297
298 std::pair<MemoryRegion *, MemoryRegion *>
299 findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
300
301 void assignOffsets(OutputSection *sec);
302
303 // This captures the local AddressState and makes it accessible
304 // deliberately. This is needed as there are some cases where we cannot just
305 // thread the current state through to a lambda function created by the
306 // script parser.
307 // This should remain a plain pointer as its lifetime is smaller than
308 // LinkerScript.
309 AddressState *state = nullptr;
310
311 OutputSection *aether;
312
313 uint64_t dot;
314
315public:
316 OutputDesc *createOutputSection(StringRef name, StringRef location);
317 OutputDesc *getOrCreateOutputSection(StringRef name);
318
319 bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
320 uint64_t getDot() { return dot; }
321 void discard(InputSectionBase &s);
322
323 ExprValue getSymbolValue(StringRef name, const Twine &loc);
324
325 void addOrphanSections();
326 void diagnoseOrphanHandling() const;
327 void diagnoseMissingSGSectionAddress() const;
328 void adjustOutputSections();
329 void adjustSectionsAfterSorting();
330
331 SmallVector<PhdrEntry *, 0> createPhdrs();
332 bool needsInterpSection();
333
334 bool shouldKeep(InputSectionBase *s);
335 const Defined *assignAddresses();
336 void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
337 void processSectionCommands();
338 void processSymbolAssignments();
339 void declareSymbols();
340
341 bool isDiscarded(const OutputSection *sec) const;
342
343 // Used to handle INSERT AFTER statements.
344 void processInsertCommands();
345
346 // Describe memory region usage.
347 void printMemoryUsage(raw_ostream &os);
348
349 // Check backward location counter assignment and memory region/LMA overflows.
350 void checkFinalScriptConditions() const;
351
352 // Add symbols that are referenced in the linker script to the symbol table.
353 // Symbols referenced in a PROVIDE command are only added to the symbol table
354 // if the PROVIDE command actually provides the symbol.
355 // It also adds the symbols referenced by the used PROVIDE symbols to the
356 // linker script referenced symbols list.
357 void addScriptReferencedSymbolsToSymTable();
358
359 // Returns true if the PROVIDE symbol should be added to the link.
360 // A PROVIDE symbol is added to the link only if it satisfies an
361 // undefined reference.
362 static bool shouldAddProvideSym(StringRef symName);
363
364 // SECTIONS command list.
365 SmallVector<SectionCommand *, 0> sectionCommands;
366
367 // PHDRS command list.
368 SmallVector<PhdrsCommand, 0> phdrsCommands;
369
370 bool hasSectionsCommand = false;
371 bool seenDataAlign = false;
372 bool seenRelroEnd = false;
373 bool errorOnMissingSection = false;
374 std::string backwardDotErr;
375
376 // List of section patterns specified with KEEP commands. They will
377 // be kept even if they are unused and --gc-sections is specified.
378 SmallVector<InputSectionDescription *, 0> keptSections;
379
380 // A map from memory region name to a memory region descriptor.
381 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
382
383 // A list of symbols referenced by the script.
384 SmallVector<llvm::StringRef, 0> referencedSymbols;
385
386 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
387 // to be reordered.
388 SmallVector<InsertCommand, 0> insertCommands;
389
390 // OutputSections specified by OVERWRITE_SECTIONS.
391 SmallVector<OutputDesc *, 0> overwriteSections;
392
393 // Sections that will be warned/errored by --orphan-handling.
394 SmallVector<const InputSectionBase *, 0> orphanSections;
395
396 // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
397 // expression. For example, if the PROVIDE command is:
398 //
399 // PROVIDE(v = a + b + c);
400 //
401 // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
402 llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
403};
404
405LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
406
407} // end namespace lld::elf
408
409#endif // LLD_ELF_LINKER_SCRIPT_H
410

source code of lld/ELF/LinkerScript.h