1 | //===- LinkerScript.cpp ---------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the parser/evaluator of the linker script. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "LinkerScript.h" |
14 | #include "Config.h" |
15 | #include "InputFiles.h" |
16 | #include "InputSection.h" |
17 | #include "OutputSections.h" |
18 | #include "SymbolTable.h" |
19 | #include "Symbols.h" |
20 | #include "SyntheticSections.h" |
21 | #include "Target.h" |
22 | #include "Writer.h" |
23 | #include "lld/Common/CommonLinkerContext.h" |
24 | #include "lld/Common/Strings.h" |
25 | #include "llvm/ADT/STLExtras.h" |
26 | #include "llvm/ADT/StringRef.h" |
27 | #include "llvm/BinaryFormat/ELF.h" |
28 | #include "llvm/Support/Casting.h" |
29 | #include "llvm/Support/ErrorHandling.h" |
30 | #include "llvm/Support/TimeProfiler.h" |
31 | #include <algorithm> |
32 | #include <cassert> |
33 | #include <cstddef> |
34 | #include <cstdint> |
35 | #include <limits> |
36 | #include <string> |
37 | |
38 | using namespace llvm; |
39 | using namespace llvm::ELF; |
40 | using namespace llvm::object; |
41 | using namespace llvm::support::endian; |
42 | using namespace lld; |
43 | using namespace lld::elf; |
44 | |
45 | static bool isSectionPrefix(StringRef prefix, StringRef name) { |
46 | return name.consume_front(Prefix: prefix) && (name.empty() || name[0] == '.'); |
47 | } |
48 | |
49 | StringRef LinkerScript::getOutputSectionName(const InputSectionBase *s) const { |
50 | // This is for --emit-relocs and -r. If .text.foo is emitted as .text.bar, we |
51 | // want to emit .rela.text.foo as .rela.text.bar for consistency (this is not |
52 | // technically required, but not doing it is odd). This code guarantees that. |
53 | if (auto *isec = dyn_cast<InputSection>(Val: s)) { |
54 | if (InputSectionBase *rel = isec->getRelocatedSection()) { |
55 | OutputSection *out = rel->getOutputSection(); |
56 | if (!out) { |
57 | assert(ctx.arg.relocatable && (rel->flags & SHF_LINK_ORDER)); |
58 | return s->name; |
59 | } |
60 | StringSaver &ss = ctx.saver; |
61 | if (s->type == SHT_CREL) |
62 | return ss.save(S: ".crel"+ out->name); |
63 | if (s->type == SHT_RELA) |
64 | return ss.save(S: ".rela"+ out->name); |
65 | return ss.save(S: ".rel"+ out->name); |
66 | } |
67 | } |
68 | |
69 | if (ctx.arg.relocatable) |
70 | return s->name; |
71 | |
72 | // A BssSection created for a common symbol is identified as "COMMON" in |
73 | // linker scripts. It should go to .bss section. |
74 | if (s->name == "COMMON") |
75 | return ".bss"; |
76 | |
77 | if (hasSectionsCommand) |
78 | return s->name; |
79 | |
80 | // When no SECTIONS is specified, emulate GNU ld's internal linker scripts |
81 | // by grouping sections with certain prefixes. |
82 | |
83 | // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.", |
84 | // ".text.unlikely.", ".text.startup." or ".text.exit." before others. |
85 | // We provide an option -z keep-text-section-prefix to group such sections |
86 | // into separate output sections. This is more flexible. See also |
87 | // sortISDBySectionOrder(). |
88 | // ".text.unknown" means the hotness of the section is unknown. When |
89 | // SampleFDO is used, if a function doesn't have sample, it could be very |
90 | // cold or it could be a new function never being sampled. Those functions |
91 | // will be kept in the ".text.unknown" section. |
92 | // ".text.split." holds symbols which are split out from functions in other |
93 | // input sections. For example, with -fsplit-machine-functions, placing the |
94 | // cold parts in .text.split instead of .text.unlikely mitigates against poor |
95 | // profile inaccuracy. Techniques such as hugepage remapping can make |
96 | // conservative decisions at the section granularity. |
97 | if (isSectionPrefix(prefix: ".text", name: s->name)) { |
98 | if (ctx.arg.zKeepTextSectionPrefix) |
99 | for (StringRef v : {".text.hot", ".text.unknown", ".text.unlikely", |
100 | ".text.startup", ".text.exit", ".text.split"}) |
101 | if (isSectionPrefix(prefix: v.substr(Start: 5), name: s->name.substr(Start: 5))) |
102 | return v; |
103 | return ".text"; |
104 | } |
105 | |
106 | for (StringRef v : {".data.rel.ro", ".data", ".rodata", |
107 | ".bss.rel.ro", ".bss", ".ldata", |
108 | ".lrodata", ".lbss", ".gcc_except_table", |
109 | ".init_array", ".fini_array", ".tbss", |
110 | ".tdata", ".ARM.exidx", ".ARM.extab", |
111 | ".ctors", ".dtors", ".sbss", |
112 | ".sdata", ".srodata"}) |
113 | if (isSectionPrefix(prefix: v, name: s->name)) |
114 | return v; |
115 | |
116 | return s->name; |
117 | } |
118 | |
119 | uint64_t ExprValue::getValue() const { |
120 | if (sec) |
121 | return alignToPowerOf2(Value: sec->getOutputSection()->addr + sec->getOffset(offset: val), |
122 | Align: alignment); |
123 | return alignToPowerOf2(Value: val, Align: alignment); |
124 | } |
125 | |
126 | uint64_t ExprValue::getSecAddr() const { |
127 | return sec ? sec->getOutputSection()->addr + sec->getOffset(offset: 0) : 0; |
128 | } |
129 | |
130 | uint64_t ExprValue::getSectionOffset() const { |
131 | return getValue() - getSecAddr(); |
132 | } |
133 | |
134 | // std::unique_ptr<OutputSection> may be incomplete type. |
135 | LinkerScript::LinkerScript(Ctx &ctx) : ctx(ctx) {} |
136 | LinkerScript::~LinkerScript() {} |
137 | |
138 | OutputDesc *LinkerScript::createOutputSection(StringRef name, |
139 | StringRef location) { |
140 | OutputDesc *&secRef = nameToOutputSection[CachedHashStringRef(name)]; |
141 | OutputDesc *sec; |
142 | if (secRef && secRef->osec.location.empty()) { |
143 | // There was a forward reference. |
144 | sec = secRef; |
145 | } else { |
146 | descPool.emplace_back( |
147 | Args: std::make_unique<OutputDesc>(args&: ctx, args&: name, args: SHT_PROGBITS, args: 0)); |
148 | sec = descPool.back().get(); |
149 | if (!secRef) |
150 | secRef = sec; |
151 | } |
152 | sec->osec.location = std::string(location); |
153 | return sec; |
154 | } |
155 | |
156 | OutputDesc *LinkerScript::getOrCreateOutputSection(StringRef name) { |
157 | auto &secRef = nameToOutputSection[CachedHashStringRef(name)]; |
158 | if (!secRef) { |
159 | secRef = descPool |
160 | .emplace_back( |
161 | Args: std::make_unique<OutputDesc>(args&: ctx, args&: name, args: SHT_PROGBITS, args: 0)) |
162 | .get(); |
163 | } |
164 | return secRef; |
165 | } |
166 | |
167 | // Expands the memory region by the specified size. |
168 | static void expandMemoryRegion(MemoryRegion *memRegion, uint64_t size, |
169 | StringRef secName) { |
170 | memRegion->curPos += size; |
171 | } |
172 | |
173 | void LinkerScript::expandMemoryRegions(uint64_t size) { |
174 | if (state->memRegion) |
175 | expandMemoryRegion(memRegion: state->memRegion, size, secName: state->outSec->name); |
176 | // Only expand the LMARegion if it is different from memRegion. |
177 | if (state->lmaRegion && state->memRegion != state->lmaRegion) |
178 | expandMemoryRegion(memRegion: state->lmaRegion, size, secName: state->outSec->name); |
179 | } |
180 | |
181 | void LinkerScript::expandOutputSection(uint64_t size) { |
182 | state->outSec->size += size; |
183 | size_t regionSize = size; |
184 | if (state->outSec->inOverlay) { |
185 | // Expand the overlay if necessary, and expand the region by the |
186 | // corresponding amount. |
187 | if (state->outSec->size > state->overlaySize) { |
188 | regionSize = state->outSec->size - state->overlaySize; |
189 | state->overlaySize = state->outSec->size; |
190 | } else { |
191 | regionSize = 0; |
192 | } |
193 | } |
194 | expandMemoryRegions(size: regionSize); |
195 | } |
196 | |
197 | void LinkerScript::setDot(Expr e, const Twine &loc, bool inSec) { |
198 | uint64_t val = e().getValue(); |
199 | // If val is smaller and we are in an output section, record the error and |
200 | // report it if this is the last assignAddresses iteration. dot may be smaller |
201 | // if there is another assignAddresses iteration. |
202 | if (val < dot && inSec) { |
203 | recordError(msg: loc + ": unable to move location counter (0x"+ |
204 | Twine::utohexstr(Val: dot) + ") backward to 0x"+ |
205 | Twine::utohexstr(Val: val) + " for section '"+ state->outSec->name + |
206 | "'"); |
207 | } |
208 | |
209 | // Update to location counter means update to section size. |
210 | if (inSec) |
211 | expandOutputSection(size: val - dot); |
212 | |
213 | dot = val; |
214 | } |
215 | |
216 | // Used for handling linker symbol assignments, for both finalizing |
217 | // their values and doing early declarations. Returns true if symbol |
218 | // should be defined from linker script. |
219 | static bool shouldDefineSym(Ctx &ctx, SymbolAssignment *cmd) { |
220 | if (cmd->name == ".") |
221 | return false; |
222 | |
223 | return !cmd->provide || ctx.script->shouldAddProvideSym(symName: cmd->name); |
224 | } |
225 | |
226 | // Called by processSymbolAssignments() to assign definitions to |
227 | // linker-script-defined symbols. |
228 | void LinkerScript::addSymbol(SymbolAssignment *cmd) { |
229 | if (!shouldDefineSym(ctx, cmd)) |
230 | return; |
231 | |
232 | // Define a symbol. |
233 | ExprValue value = cmd->expression(); |
234 | SectionBase *sec = value.isAbsolute() ? nullptr : value.sec; |
235 | uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; |
236 | |
237 | // When this function is called, section addresses have not been |
238 | // fixed yet. So, we may or may not know the value of the RHS |
239 | // expression. |
240 | // |
241 | // For example, if an expression is `x = 42`, we know x is always 42. |
242 | // However, if an expression is `x = .`, there's no way to know its |
243 | // value at the moment. |
244 | // |
245 | // We want to set symbol values early if we can. This allows us to |
246 | // use symbols as variables in linker scripts. Doing so allows us to |
247 | // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`. |
248 | uint64_t symValue = value.sec ? 0 : value.getValue(); |
249 | |
250 | Defined newSym(ctx, createInternalFile(ctx, name: cmd->location), cmd->name, |
251 | STB_GLOBAL, visibility, value.type, symValue, 0, sec); |
252 | |
253 | Symbol *sym = ctx.symtab->insert(name: cmd->name); |
254 | sym->mergeProperties(other: newSym); |
255 | newSym.overwrite(sym&: *sym); |
256 | sym->isUsedInRegularObj = true; |
257 | cmd->sym = cast<Defined>(Val: sym); |
258 | } |
259 | |
260 | // This function is called from LinkerScript::declareSymbols. |
261 | // It creates a placeholder symbol if needed. |
262 | void LinkerScript::declareSymbol(SymbolAssignment *cmd) { |
263 | if (!shouldDefineSym(ctx, cmd)) |
264 | return; |
265 | |
266 | uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; |
267 | Defined newSym(ctx, ctx.internalFile, cmd->name, STB_GLOBAL, visibility, |
268 | STT_NOTYPE, 0, 0, nullptr); |
269 | |
270 | // If the symbol is already defined, its order is 0 (with absence indicating |
271 | // 0); otherwise it's assigned the order of the SymbolAssignment. |
272 | Symbol *sym = ctx.symtab->insert(name: cmd->name); |
273 | if (!sym->isDefined()) |
274 | ctx.scriptSymOrder.insert(KV: {sym, cmd->symOrder}); |
275 | |
276 | // We can't calculate final value right now. |
277 | sym->mergeProperties(other: newSym); |
278 | newSym.overwrite(sym&: *sym); |
279 | |
280 | cmd->sym = cast<Defined>(Val: sym); |
281 | cmd->provide = false; |
282 | sym->isUsedInRegularObj = true; |
283 | sym->scriptDefined = true; |
284 | } |
285 | |
286 | using SymbolAssignmentMap = |
287 | DenseMap<const Defined *, std::pair<SectionBase *, uint64_t>>; |
288 | |
289 | // Collect section/value pairs of linker-script-defined symbols. This is used to |
290 | // check whether symbol values converge. |
291 | static SymbolAssignmentMap |
292 | getSymbolAssignmentValues(ArrayRef<SectionCommand *> sectionCommands) { |
293 | SymbolAssignmentMap ret; |
294 | for (SectionCommand *cmd : sectionCommands) { |
295 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
296 | if (assign->sym) // sym is nullptr for dot. |
297 | ret.try_emplace(Key: assign->sym, Args: std::make_pair(x&: assign->sym->section, |
298 | y&: assign->sym->value)); |
299 | continue; |
300 | } |
301 | if (isa<SectionClassDesc>(Val: cmd)) |
302 | continue; |
303 | for (SectionCommand *subCmd : cast<OutputDesc>(Val: cmd)->osec.commands) |
304 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: subCmd)) |
305 | if (assign->sym) |
306 | ret.try_emplace(Key: assign->sym, Args: std::make_pair(x&: assign->sym->section, |
307 | y&: assign->sym->value)); |
308 | } |
309 | return ret; |
310 | } |
311 | |
312 | // Returns the lexicographical smallest (for determinism) Defined whose |
313 | // section/value has changed. |
314 | static const Defined * |
315 | getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { |
316 | const Defined *changed = nullptr; |
317 | for (auto &it : oldValues) { |
318 | const Defined *sym = it.first; |
319 | if (std::make_pair(x: sym->section, y: sym->value) != it.second && |
320 | (!changed || sym->getName() < changed->getName())) |
321 | changed = sym; |
322 | } |
323 | return changed; |
324 | } |
325 | |
326 | // Process INSERT [AFTER|BEFORE] commands. For each command, we move the |
327 | // specified output section to the designated place. |
328 | void LinkerScript::processInsertCommands() { |
329 | SmallVector<OutputDesc *, 0> moves; |
330 | for (const InsertCommand &cmd : insertCommands) { |
331 | if (ctx.arg.enableNonContiguousRegions) |
332 | ErrAlways(ctx) |
333 | << "INSERT cannot be used with --enable-non-contiguous-regions"; |
334 | |
335 | for (StringRef name : cmd.names) { |
336 | // If base is empty, it may have been discarded by |
337 | // adjustOutputSections(). We do not handle such output sections. |
338 | auto from = llvm::find_if(Range&: sectionCommands, P: [&](SectionCommand *subCmd) { |
339 | return isa<OutputDesc>(Val: subCmd) && |
340 | cast<OutputDesc>(Val: subCmd)->osec.name == name; |
341 | }); |
342 | if (from == sectionCommands.end()) |
343 | continue; |
344 | moves.push_back(Elt: cast<OutputDesc>(Val: *from)); |
345 | sectionCommands.erase(CI: from); |
346 | } |
347 | |
348 | auto insertPos = |
349 | llvm::find_if(Range&: sectionCommands, P: [&cmd](SectionCommand *subCmd) { |
350 | auto *to = dyn_cast<OutputDesc>(Val: subCmd); |
351 | return to != nullptr && to->osec.name == cmd.where; |
352 | }); |
353 | if (insertPos == sectionCommands.end()) { |
354 | ErrAlways(ctx) << "unable to insert "<< cmd.names[0] |
355 | << (cmd.isAfter ? " after ": " before ") << cmd.where; |
356 | } else { |
357 | if (cmd.isAfter) |
358 | ++insertPos; |
359 | sectionCommands.insert(I: insertPos, From: moves.begin(), To: moves.end()); |
360 | } |
361 | moves.clear(); |
362 | } |
363 | } |
364 | |
365 | // Symbols defined in script should not be inlined by LTO. At the same time |
366 | // we don't know their final values until late stages of link. Here we scan |
367 | // over symbol assignment commands and create placeholder symbols if needed. |
368 | void LinkerScript::declareSymbols() { |
369 | assert(!state); |
370 | for (SectionCommand *cmd : sectionCommands) { |
371 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
372 | declareSymbol(cmd: assign); |
373 | continue; |
374 | } |
375 | if (isa<SectionClassDesc>(Val: cmd)) |
376 | continue; |
377 | |
378 | // If the output section directive has constraints, |
379 | // we can't say for sure if it is going to be included or not. |
380 | // Skip such sections for now. Improve the checks if we ever |
381 | // need symbols from that sections to be declared early. |
382 | const OutputSection &sec = cast<OutputDesc>(Val: cmd)->osec; |
383 | if (sec.constraint != ConstraintKind::NoConstraint) |
384 | continue; |
385 | for (SectionCommand *cmd : sec.commands) |
386 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) |
387 | declareSymbol(cmd: assign); |
388 | } |
389 | } |
390 | |
391 | // This function is called from assignAddresses, while we are |
392 | // fixing the output section addresses. This function is supposed |
393 | // to set the final value for a given symbol assignment. |
394 | void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) { |
395 | if (cmd->name == ".") { |
396 | setDot(e: cmd->expression, loc: cmd->location, inSec); |
397 | return; |
398 | } |
399 | |
400 | if (!cmd->sym) |
401 | return; |
402 | |
403 | ExprValue v = cmd->expression(); |
404 | if (v.isAbsolute()) { |
405 | cmd->sym->section = nullptr; |
406 | cmd->sym->value = v.getValue(); |
407 | } else { |
408 | cmd->sym->section = v.sec; |
409 | cmd->sym->value = v.getSectionOffset(); |
410 | } |
411 | cmd->sym->type = v.type; |
412 | } |
413 | |
414 | bool InputSectionDescription::matchesFile(const InputFile &file) const { |
415 | if (filePat.isTrivialMatchAll()) |
416 | return true; |
417 | |
418 | if (!matchesFileCache || matchesFileCache->first != &file) { |
419 | if (matchType == MatchType::WholeArchive) { |
420 | matchesFileCache.emplace(args: &file, args: filePat.match(s: file.archiveName)); |
421 | } else { |
422 | if (matchType == MatchType::ArchivesExcluded && !file.archiveName.empty()) |
423 | matchesFileCache.emplace(args: &file, args: false); |
424 | else |
425 | matchesFileCache.emplace(args: &file, args: filePat.match(s: file.getNameForScript())); |
426 | } |
427 | } |
428 | |
429 | return matchesFileCache->second; |
430 | } |
431 | |
432 | bool SectionPattern::excludesFile(const InputFile &file) const { |
433 | if (excludedFilePat.empty()) |
434 | return false; |
435 | |
436 | if (!excludesFileCache || excludesFileCache->first != &file) |
437 | excludesFileCache.emplace(args: &file, |
438 | args: excludedFilePat.match(s: file.getNameForScript())); |
439 | |
440 | return excludesFileCache->second; |
441 | } |
442 | |
443 | bool LinkerScript::shouldKeep(InputSectionBase *s) { |
444 | for (InputSectionDescription *id : keptSections) |
445 | if (id->matchesFile(file: *s->file)) |
446 | for (SectionPattern &p : id->sectionPatterns) |
447 | if (p.sectionPat.match(s: s->name) && |
448 | (s->flags & id->withFlags) == id->withFlags && |
449 | (s->flags & id->withoutFlags) == 0) |
450 | return true; |
451 | return false; |
452 | } |
453 | |
454 | // A helper function for the SORT() command. |
455 | static bool matchConstraints(ArrayRef<InputSectionBase *> sections, |
456 | ConstraintKind kind) { |
457 | if (kind == ConstraintKind::NoConstraint) |
458 | return true; |
459 | |
460 | bool isRW = llvm::any_of( |
461 | Range&: sections, P: [](InputSectionBase *sec) { return sec->flags & SHF_WRITE; }); |
462 | |
463 | return (isRW && kind == ConstraintKind::ReadWrite) || |
464 | (!isRW && kind == ConstraintKind::ReadOnly); |
465 | } |
466 | |
467 | static void sortSections(MutableArrayRef<InputSectionBase *> vec, |
468 | SortSectionPolicy k) { |
469 | auto alignmentComparator = [](InputSectionBase *a, InputSectionBase *b) { |
470 | // ">" is not a mistake. Sections with larger alignments are placed |
471 | // before sections with smaller alignments in order to reduce the |
472 | // amount of padding necessary. This is compatible with GNU. |
473 | return a->addralign > b->addralign; |
474 | }; |
475 | auto nameComparator = [](InputSectionBase *a, InputSectionBase *b) { |
476 | return a->name < b->name; |
477 | }; |
478 | auto priorityComparator = [](InputSectionBase *a, InputSectionBase *b) { |
479 | return getPriority(s: a->name) < getPriority(s: b->name); |
480 | }; |
481 | |
482 | switch (k) { |
483 | case SortSectionPolicy::Default: |
484 | case SortSectionPolicy::None: |
485 | return; |
486 | case SortSectionPolicy::Alignment: |
487 | return llvm::stable_sort(Range&: vec, C: alignmentComparator); |
488 | case SortSectionPolicy::Name: |
489 | return llvm::stable_sort(Range&: vec, C: nameComparator); |
490 | case SortSectionPolicy::Priority: |
491 | return llvm::stable_sort(Range&: vec, C: priorityComparator); |
492 | case SortSectionPolicy::Reverse: |
493 | return std::reverse(first: vec.begin(), last: vec.end()); |
494 | } |
495 | } |
496 | |
497 | // Sort sections as instructed by SORT-family commands and --sort-section |
498 | // option. Because SORT-family commands can be nested at most two depth |
499 | // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command |
500 | // line option is respected even if a SORT command is given, the exact |
501 | // behavior we have here is a bit complicated. Here are the rules. |
502 | // |
503 | // 1. If two SORT commands are given, --sort-section is ignored. |
504 | // 2. If one SORT command is given, and if it is not SORT_NONE, |
505 | // --sort-section is handled as an inner SORT command. |
506 | // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. |
507 | // 4. If no SORT command is given, sort according to --sort-section. |
508 | static void sortInputSections(Ctx &ctx, MutableArrayRef<InputSectionBase *> vec, |
509 | SortSectionPolicy outer, |
510 | SortSectionPolicy inner) { |
511 | if (outer == SortSectionPolicy::None) |
512 | return; |
513 | |
514 | if (inner == SortSectionPolicy::Default) |
515 | sortSections(vec, k: ctx.arg.sortSection); |
516 | else |
517 | sortSections(vec, k: inner); |
518 | sortSections(vec, k: outer); |
519 | } |
520 | |
521 | // Compute and remember which sections the InputSectionDescription matches. |
522 | SmallVector<InputSectionBase *, 0> |
523 | LinkerScript::computeInputSections(const InputSectionDescription *cmd, |
524 | ArrayRef<InputSectionBase *> sections, |
525 | const SectionBase &outCmd) { |
526 | SmallVector<InputSectionBase *, 0> ret; |
527 | DenseSet<InputSectionBase *> spills; |
528 | |
529 | // Returns whether an input section's flags match the input section |
530 | // description's specifiers. |
531 | auto flagsMatch = [cmd](InputSectionBase *sec) { |
532 | return (sec->flags & cmd->withFlags) == cmd->withFlags && |
533 | (sec->flags & cmd->withoutFlags) == 0; |
534 | }; |
535 | |
536 | // Collects all sections that satisfy constraints of Cmd. |
537 | if (cmd->classRef.empty()) { |
538 | DenseSet<size_t> seen; |
539 | size_t sizeAfterPrevSort = 0; |
540 | SmallVector<size_t, 0> indexes; |
541 | auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) { |
542 | llvm::sort(C: MutableArrayRef<size_t>(indexes).slice(N: begin, M: end - begin)); |
543 | for (size_t i = begin; i != end; ++i) |
544 | ret[i] = sections[indexes[i]]; |
545 | sortInputSections( |
546 | ctx, |
547 | vec: MutableArrayRef<InputSectionBase *>(ret).slice(N: begin, M: end - begin), |
548 | outer: ctx.arg.sortSection, inner: SortSectionPolicy::None); |
549 | }; |
550 | |
551 | for (const SectionPattern &pat : cmd->sectionPatterns) { |
552 | size_t sizeBeforeCurrPat = ret.size(); |
553 | |
554 | for (size_t i = 0, e = sections.size(); i != e; ++i) { |
555 | // Skip if the section is dead or has been matched by a previous pattern |
556 | // in this input section description. |
557 | InputSectionBase *sec = sections[i]; |
558 | if (!sec->isLive() || seen.contains(V: i)) |
559 | continue; |
560 | |
561 | // For --emit-relocs we have to ignore entries like |
562 | // .rela.dyn : { *(.rela.data) } |
563 | // which are common because they are in the default bfd script. |
564 | // We do not ignore SHT_REL[A] linker-synthesized sections here because |
565 | // want to support scripts that do custom layout for them. |
566 | if (isa<InputSection>(Val: sec) && |
567 | cast<InputSection>(Val: sec)->getRelocatedSection()) |
568 | continue; |
569 | |
570 | // Check the name early to improve performance in the common case. |
571 | if (!pat.sectionPat.match(s: sec->name)) |
572 | continue; |
573 | |
574 | if (!cmd->matchesFile(file: *sec->file) || pat.excludesFile(file: *sec->file) || |
575 | !flagsMatch(sec)) |
576 | continue; |
577 | |
578 | if (sec->parent) { |
579 | // Skip if not allowing multiple matches. |
580 | if (!ctx.arg.enableNonContiguousRegions) |
581 | continue; |
582 | |
583 | // Disallow spilling into /DISCARD/; special handling would be needed |
584 | // for this in address assignment, and the semantics are nebulous. |
585 | if (outCmd.name == "/DISCARD/") |
586 | continue; |
587 | |
588 | // Class definitions cannot contain spills, nor can a class definition |
589 | // generate a spill in a subsequent match. Those behaviors belong to |
590 | // class references and additional matches. |
591 | if (!isa<SectionClass>(Val: outCmd) && !isa<SectionClass>(Val: sec->parent)) |
592 | spills.insert(V: sec); |
593 | } |
594 | |
595 | ret.push_back(Elt: sec); |
596 | indexes.push_back(Elt: i); |
597 | seen.insert(V: i); |
598 | } |
599 | |
600 | if (pat.sortOuter == SortSectionPolicy::Default) |
601 | continue; |
602 | |
603 | // Matched sections are ordered by radix sort with the keys being (SORT*, |
604 | // --sort-section, input order), where SORT* (if present) is most |
605 | // significant. |
606 | // |
607 | // Matched sections between the previous SORT* and this SORT* are sorted |
608 | // by (--sort-alignment, input order). |
609 | sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat); |
610 | // Matched sections by this SORT* pattern are sorted using all 3 keys. |
611 | // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we |
612 | // just sort by sortOuter and sortInner. |
613 | sortInputSections( |
614 | ctx, |
615 | vec: MutableArrayRef<InputSectionBase *>(ret).slice(N: sizeBeforeCurrPat), |
616 | outer: pat.sortOuter, inner: pat.sortInner); |
617 | sizeAfterPrevSort = ret.size(); |
618 | } |
619 | |
620 | // Matched sections after the last SORT* are sorted by (--sort-alignment, |
621 | // input order). |
622 | sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size()); |
623 | } else { |
624 | SectionClassDesc *scd = |
625 | sectionClasses.lookup(Val: CachedHashStringRef(cmd->classRef)); |
626 | if (!scd) { |
627 | Err(ctx) << "undefined section class '"<< cmd->classRef << "'"; |
628 | return ret; |
629 | } |
630 | if (!scd->sc.assigned) { |
631 | Err(ctx) << "section class '"<< cmd->classRef << "' referenced by '" |
632 | << outCmd.name << "' before class definition"; |
633 | return ret; |
634 | } |
635 | |
636 | for (InputSectionDescription *isd : scd->sc.commands) { |
637 | for (InputSectionBase *sec : isd->sectionBases) { |
638 | if (!flagsMatch(sec)) |
639 | continue; |
640 | bool isSpill = sec->parent && isa<OutputSection>(Val: sec->parent); |
641 | if (!sec->parent || (isSpill && outCmd.name == "/DISCARD/")) { |
642 | Err(ctx) << "section '"<< sec->name |
643 | << "' cannot spill from/to /DISCARD/"; |
644 | continue; |
645 | } |
646 | if (isSpill) |
647 | spills.insert(V: sec); |
648 | ret.push_back(Elt: sec); |
649 | } |
650 | } |
651 | } |
652 | |
653 | // The flag --enable-non-contiguous-regions or the section CLASS syntax may |
654 | // cause sections to match an InputSectionDescription in more than one |
655 | // OutputSection. Matches after the first were collected in the spills set, so |
656 | // replace these with potential spill sections. |
657 | if (!spills.empty()) { |
658 | for (InputSectionBase *&sec : ret) { |
659 | if (!spills.contains(V: sec)) |
660 | continue; |
661 | |
662 | // Append the spill input section to the list for the input section, |
663 | // creating it if necessary. |
664 | PotentialSpillSection *pss = make<PotentialSpillSection>( |
665 | args&: *sec, args&: const_cast<InputSectionDescription &>(*cmd)); |
666 | auto [it, inserted] = |
667 | potentialSpillLists.try_emplace(Key: sec, Args: PotentialSpillList{.head: pss, .tail: pss}); |
668 | if (!inserted) { |
669 | PotentialSpillSection *&tail = it->second.tail; |
670 | tail = tail->next = pss; |
671 | } |
672 | sec = pss; |
673 | } |
674 | } |
675 | |
676 | return ret; |
677 | } |
678 | |
679 | void LinkerScript::discard(InputSectionBase &s) { |
680 | if (&s == ctx.in.shStrTab.get()) |
681 | ErrAlways(ctx) << "discarding "<< s.name << " section is not allowed"; |
682 | |
683 | s.markDead(); |
684 | s.parent = nullptr; |
685 | for (InputSection *sec : s.dependentSections) |
686 | discard(s&: *sec); |
687 | } |
688 | |
689 | void LinkerScript::discardSynthetic(OutputSection &outCmd) { |
690 | for (Partition &part : ctx.partitions) { |
691 | if (!part.armExidx || !part.armExidx->isLive()) |
692 | continue; |
693 | SmallVector<InputSectionBase *, 0> secs( |
694 | part.armExidx->exidxSections.begin(), |
695 | part.armExidx->exidxSections.end()); |
696 | for (SectionCommand *cmd : outCmd.commands) |
697 | if (auto *isd = dyn_cast<InputSectionDescription>(Val: cmd)) |
698 | for (InputSectionBase *s : computeInputSections(cmd: isd, sections: secs, outCmd)) |
699 | discard(s&: *s); |
700 | } |
701 | } |
702 | |
703 | SmallVector<InputSectionBase *, 0> |
704 | LinkerScript::createInputSectionList(OutputSection &outCmd) { |
705 | SmallVector<InputSectionBase *, 0> ret; |
706 | |
707 | for (SectionCommand *cmd : outCmd.commands) { |
708 | if (auto *isd = dyn_cast<InputSectionDescription>(Val: cmd)) { |
709 | isd->sectionBases = computeInputSections(cmd: isd, sections: ctx.inputSections, outCmd); |
710 | for (InputSectionBase *s : isd->sectionBases) |
711 | s->parent = &outCmd; |
712 | ret.insert(I: ret.end(), From: isd->sectionBases.begin(), To: isd->sectionBases.end()); |
713 | } |
714 | } |
715 | return ret; |
716 | } |
717 | |
718 | // Create output sections described by SECTIONS commands. |
719 | void LinkerScript::processSectionCommands() { |
720 | auto process = [this](OutputSection *osec) { |
721 | SmallVector<InputSectionBase *, 0> v = createInputSectionList(outCmd&: *osec); |
722 | |
723 | // The output section name `/DISCARD/' is special. |
724 | // Any input section assigned to it is discarded. |
725 | if (osec->name == "/DISCARD/") { |
726 | for (InputSectionBase *s : v) |
727 | discard(s&: *s); |
728 | discardSynthetic(outCmd&: *osec); |
729 | osec->commands.clear(); |
730 | return false; |
731 | } |
732 | |
733 | // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive |
734 | // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input |
735 | // sections satisfy a given constraint. If not, a directive is handled |
736 | // as if it wasn't present from the beginning. |
737 | // |
738 | // Because we'll iterate over SectionCommands many more times, the easy |
739 | // way to "make it as if it wasn't present" is to make it empty. |
740 | if (!matchConstraints(sections: v, kind: osec->constraint)) { |
741 | for (InputSectionBase *s : v) |
742 | s->parent = nullptr; |
743 | osec->commands.clear(); |
744 | return false; |
745 | } |
746 | |
747 | // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign |
748 | // is given, input sections are aligned to that value, whether the |
749 | // given value is larger or smaller than the original section alignment. |
750 | if (osec->subalignExpr) { |
751 | uint32_t subalign = osec->subalignExpr().getValue(); |
752 | for (InputSectionBase *s : v) |
753 | s->addralign = subalign; |
754 | } |
755 | |
756 | // Set the partition field the same way OutputSection::recordSection() |
757 | // does. Partitions cannot be used with the SECTIONS command, so this is |
758 | // always 1. |
759 | osec->partition = 1; |
760 | return true; |
761 | }; |
762 | |
763 | // Process OVERWRITE_SECTIONS first so that it can overwrite the main script |
764 | // or orphans. |
765 | if (ctx.arg.enableNonContiguousRegions && !overwriteSections.empty()) |
766 | ErrAlways(ctx) << "OVERWRITE_SECTIONS cannot be used with " |
767 | "--enable-non-contiguous-regions"; |
768 | DenseMap<CachedHashStringRef, OutputDesc *> map; |
769 | size_t i = 0; |
770 | for (OutputDesc *osd : overwriteSections) { |
771 | OutputSection *osec = &osd->osec; |
772 | if (process(osec) && |
773 | !map.try_emplace(Key: CachedHashStringRef(osec->name), Args&: osd).second) |
774 | Warn(ctx) << "OVERWRITE_SECTIONS specifies duplicate "<< osec->name; |
775 | } |
776 | for (SectionCommand *&base : sectionCommands) { |
777 | if (auto *osd = dyn_cast<OutputDesc>(Val: base)) { |
778 | OutputSection *osec = &osd->osec; |
779 | if (OutputDesc *overwrite = map.lookup(Val: CachedHashStringRef(osec->name))) { |
780 | Log(ctx) << overwrite->osec.location << " overwrites "<< osec->name; |
781 | overwrite->osec.sectionIndex = i++; |
782 | base = overwrite; |
783 | } else if (process(osec)) { |
784 | osec->sectionIndex = i++; |
785 | } |
786 | } else if (auto *sc = dyn_cast<SectionClassDesc>(Val: base)) { |
787 | for (InputSectionDescription *isd : sc->sc.commands) { |
788 | isd->sectionBases = |
789 | computeInputSections(cmd: isd, sections: ctx.inputSections, outCmd: sc->sc); |
790 | for (InputSectionBase *s : isd->sectionBases) { |
791 | // A section class containing a section with different parent isn't |
792 | // necessarily an error due to --enable-non-contiguous-regions. Such |
793 | // sections all become potential spills when the class is referenced. |
794 | if (!s->parent) |
795 | s->parent = &sc->sc; |
796 | } |
797 | } |
798 | sc->sc.assigned = true; |
799 | } |
800 | } |
801 | |
802 | // Check that input sections cannot spill into or out of INSERT, |
803 | // since the semantics are nebulous. This is also true for OVERWRITE_SECTIONS, |
804 | // but no check is needed, since the order of processing ensures they cannot |
805 | // legally reference classes. |
806 | if (!potentialSpillLists.empty()) { |
807 | DenseSet<StringRef> insertNames; |
808 | for (InsertCommand &ic : insertCommands) |
809 | insertNames.insert_range(R&: ic.names); |
810 | for (SectionCommand *&base : sectionCommands) { |
811 | auto *osd = dyn_cast<OutputDesc>(Val: base); |
812 | if (!osd) |
813 | continue; |
814 | OutputSection *os = &osd->osec; |
815 | if (!insertNames.contains(V: os->name)) |
816 | continue; |
817 | for (SectionCommand *sc : os->commands) { |
818 | auto *isd = dyn_cast<InputSectionDescription>(Val: sc); |
819 | if (!isd) |
820 | continue; |
821 | for (InputSectionBase *isec : isd->sectionBases) |
822 | if (isa<PotentialSpillSection>(Val: isec) || |
823 | potentialSpillLists.contains(Val: isec)) |
824 | Err(ctx) << "section '"<< isec->name |
825 | << "' cannot spill from/to INSERT section '"<< os->name |
826 | << "'"; |
827 | } |
828 | } |
829 | } |
830 | |
831 | // If an OVERWRITE_SECTIONS specified output section is not in |
832 | // sectionCommands, append it to the end. The section will be inserted by |
833 | // orphan placement. |
834 | for (OutputDesc *osd : overwriteSections) |
835 | if (osd->osec.partition == 1 && osd->osec.sectionIndex == UINT32_MAX) |
836 | sectionCommands.push_back(Elt: osd); |
837 | |
838 | // Input sections cannot have a section class parent past this point; they |
839 | // must have been assigned to an output section. |
840 | for (const auto &[_, sc] : sectionClasses) { |
841 | for (InputSectionDescription *isd : sc->sc.commands) { |
842 | for (InputSectionBase *sec : isd->sectionBases) { |
843 | if (sec->parent && isa<SectionClass>(Val: sec->parent)) { |
844 | Err(ctx) << "section class '"<< sec->parent->name |
845 | << "' is unreferenced"; |
846 | goto nextClass; |
847 | } |
848 | } |
849 | } |
850 | nextClass:; |
851 | } |
852 | } |
853 | |
854 | void LinkerScript::processSymbolAssignments() { |
855 | // Dot outside an output section still represents a relative address, whose |
856 | // sh_shndx should not be SHN_UNDEF or SHN_ABS. Create a dummy aether section |
857 | // that fills the void outside a section. It has an index of one, which is |
858 | // indistinguishable from any other regular section index. |
859 | aether = std::make_unique<OutputSection>(args&: ctx, args: "", args: 0, args: SHF_ALLOC); |
860 | aether->sectionIndex = 1; |
861 | |
862 | // `st` captures the local AddressState and makes it accessible deliberately. |
863 | // This is needed as there are some cases where we cannot just thread the |
864 | // current state through to a lambda function created by the script parser. |
865 | AddressState st(*this); |
866 | state = &st; |
867 | st.outSec = aether.get(); |
868 | |
869 | for (SectionCommand *cmd : sectionCommands) { |
870 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) |
871 | addSymbol(cmd: assign); |
872 | else if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) |
873 | for (SectionCommand *subCmd : osd->osec.commands) |
874 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: subCmd)) |
875 | addSymbol(cmd: assign); |
876 | } |
877 | |
878 | state = nullptr; |
879 | } |
880 | |
881 | static OutputSection *findByName(ArrayRef<SectionCommand *> vec, |
882 | StringRef name) { |
883 | for (SectionCommand *cmd : vec) |
884 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) |
885 | if (osd->osec.name == name) |
886 | return &osd->osec; |
887 | return nullptr; |
888 | } |
889 | |
890 | static OutputDesc *createSection(Ctx &ctx, InputSectionBase *isec, |
891 | StringRef outsecName) { |
892 | OutputDesc *osd = ctx.script->createOutputSection(name: outsecName, location: "<internal>"); |
893 | osd->osec.recordSection(isec); |
894 | return osd; |
895 | } |
896 | |
897 | static OutputDesc *addInputSec(Ctx &ctx, |
898 | StringMap<TinyPtrVector<OutputSection *>> &map, |
899 | InputSectionBase *isec, StringRef outsecName) { |
900 | // Sections with SHT_GROUP or SHF_GROUP attributes reach here only when the -r |
901 | // option is given. A section with SHT_GROUP defines a "section group", and |
902 | // its members have SHF_GROUP attribute. Usually these flags have already been |
903 | // stripped by InputFiles.cpp as section groups are processed and uniquified. |
904 | // However, for the -r option, we want to pass through all section groups |
905 | // as-is because adding/removing members or merging them with other groups |
906 | // change their semantics. |
907 | if (isec->type == SHT_GROUP || (isec->flags & SHF_GROUP)) |
908 | return createSection(ctx, isec, outsecName); |
909 | |
910 | // Imagine .zed : { *(.foo) *(.bar) } script. Both foo and bar may have |
911 | // relocation sections .rela.foo and .rela.bar for example. Most tools do |
912 | // not allow multiple REL[A] sections for output section. Hence we |
913 | // should combine these relocation sections into single output. |
914 | // We skip synthetic sections because it can be .rela.dyn/.rela.plt or any |
915 | // other REL[A] sections created by linker itself. |
916 | if (!isa<SyntheticSection>(Val: isec) && isStaticRelSecType(type: isec->type)) { |
917 | auto *sec = cast<InputSection>(Val: isec); |
918 | OutputSection *out = sec->getRelocatedSection()->getOutputSection(); |
919 | |
920 | if (auto *relSec = out->relocationSection) { |
921 | relSec->recordSection(isec: sec); |
922 | return nullptr; |
923 | } |
924 | |
925 | OutputDesc *osd = createSection(ctx, isec, outsecName); |
926 | out->relocationSection = &osd->osec; |
927 | return osd; |
928 | } |
929 | |
930 | // The ELF spec just says |
931 | // ---------------------------------------------------------------- |
932 | // In the first phase, input sections that match in name, type and |
933 | // attribute flags should be concatenated into single sections. |
934 | // ---------------------------------------------------------------- |
935 | // |
936 | // However, it is clear that at least some flags have to be ignored for |
937 | // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be |
938 | // ignored. We should not have two output .text sections just because one was |
939 | // in a group and another was not for example. |
940 | // |
941 | // It also seems that wording was a late addition and didn't get the |
942 | // necessary scrutiny. |
943 | // |
944 | // Merging sections with different flags is expected by some users. One |
945 | // reason is that if one file has |
946 | // |
947 | // int *const bar __attribute__((section(".foo"))) = (int *)0; |
948 | // |
949 | // gcc with -fPIC will produce a read only .foo section. But if another |
950 | // file has |
951 | // |
952 | // int zed; |
953 | // int *const bar __attribute__((section(".foo"))) = (int *)&zed; |
954 | // |
955 | // gcc with -fPIC will produce a read write section. |
956 | // |
957 | // Last but not least, when using linker script the merge rules are forced by |
958 | // the script. Unfortunately, linker scripts are name based. This means that |
959 | // expressions like *(.foo*) can refer to multiple input sections with |
960 | // different flags. We cannot put them in different output sections or we |
961 | // would produce wrong results for |
962 | // |
963 | // start = .; *(.foo.*) end = .; *(.bar) |
964 | // |
965 | // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to |
966 | // another. The problem is that there is no way to layout those output |
967 | // sections such that the .foo sections are the only thing between the start |
968 | // and end symbols. |
969 | // |
970 | // Given the above issues, we instead merge sections by name and error on |
971 | // incompatible types and flags. |
972 | TinyPtrVector<OutputSection *> &v = map[outsecName]; |
973 | for (OutputSection *sec : v) { |
974 | if (sec->partition != isec->partition) |
975 | continue; |
976 | |
977 | if (ctx.arg.relocatable && (isec->flags & SHF_LINK_ORDER)) { |
978 | // Merging two SHF_LINK_ORDER sections with different sh_link fields will |
979 | // change their semantics, so we only merge them in -r links if they will |
980 | // end up being linked to the same output section. The casts are fine |
981 | // because everything in the map was created by the orphan placement code. |
982 | auto *firstIsec = cast<InputSectionBase>( |
983 | Val: cast<InputSectionDescription>(Val: sec->commands[0])->sectionBases[0]); |
984 | OutputSection *firstIsecOut = |
985 | (firstIsec->flags & SHF_LINK_ORDER) |
986 | ? firstIsec->getLinkOrderDep()->getOutputSection() |
987 | : nullptr; |
988 | if (firstIsecOut != isec->getLinkOrderDep()->getOutputSection()) |
989 | continue; |
990 | } |
991 | |
992 | sec->recordSection(isec); |
993 | return nullptr; |
994 | } |
995 | |
996 | OutputDesc *osd = createSection(ctx, isec, outsecName); |
997 | v.push_back(NewVal: &osd->osec); |
998 | return osd; |
999 | } |
1000 | |
1001 | // Add sections that didn't match any sections command. |
1002 | void LinkerScript::addOrphanSections() { |
1003 | StringMap<TinyPtrVector<OutputSection *>> map; |
1004 | SmallVector<OutputDesc *, 0> v; |
1005 | |
1006 | auto add = [&](InputSectionBase *s) { |
1007 | if (s->isLive() && !s->parent) { |
1008 | orphanSections.push_back(Elt: s); |
1009 | |
1010 | StringRef name = getOutputSectionName(s); |
1011 | if (ctx.arg.unique) { |
1012 | v.push_back(Elt: createSection(ctx, isec: s, outsecName: name)); |
1013 | } else if (OutputSection *sec = findByName(vec: sectionCommands, name)) { |
1014 | sec->recordSection(isec: s); |
1015 | } else { |
1016 | if (OutputDesc *osd = addInputSec(ctx, map, isec: s, outsecName: name)) |
1017 | v.push_back(Elt: osd); |
1018 | assert(isa<MergeInputSection>(s) || |
1019 | s->getOutputSection()->sectionIndex == UINT32_MAX); |
1020 | } |
1021 | } |
1022 | }; |
1023 | |
1024 | // For further --emit-reloc handling code we need target output section |
1025 | // to be created before we create relocation output section, so we want |
1026 | // to create target sections first. We do not want priority handling |
1027 | // for synthetic sections because them are special. |
1028 | size_t n = 0; |
1029 | for (InputSectionBase *isec : ctx.inputSections) { |
1030 | // Process InputSection and MergeInputSection. |
1031 | if (LLVM_LIKELY(isa<InputSection>(isec))) |
1032 | ctx.inputSections[n++] = isec; |
1033 | |
1034 | // In -r links, SHF_LINK_ORDER sections are added while adding their parent |
1035 | // sections because we need to know the parent's output section before we |
1036 | // can select an output section for the SHF_LINK_ORDER section. |
1037 | if (ctx.arg.relocatable && (isec->flags & SHF_LINK_ORDER)) |
1038 | continue; |
1039 | |
1040 | if (auto *sec = dyn_cast<InputSection>(Val: isec)) |
1041 | if (InputSectionBase *rel = sec->getRelocatedSection()) |
1042 | if (auto *relIS = dyn_cast_or_null<InputSectionBase>(Val: rel->parent)) |
1043 | add(relIS); |
1044 | add(isec); |
1045 | if (ctx.arg.relocatable) |
1046 | for (InputSectionBase *depSec : isec->dependentSections) |
1047 | if (depSec->flags & SHF_LINK_ORDER) |
1048 | add(depSec); |
1049 | } |
1050 | // Keep just InputSection. |
1051 | ctx.inputSections.resize(N: n); |
1052 | |
1053 | // If no SECTIONS command was given, we should insert sections commands |
1054 | // before others, so that we can handle scripts which refers them, |
1055 | // for example: "foo = ABSOLUTE(ADDR(.text)));". |
1056 | // When SECTIONS command is present we just add all orphans to the end. |
1057 | if (hasSectionsCommand) |
1058 | sectionCommands.insert(I: sectionCommands.end(), From: v.begin(), To: v.end()); |
1059 | else |
1060 | sectionCommands.insert(I: sectionCommands.begin(), From: v.begin(), To: v.end()); |
1061 | } |
1062 | |
1063 | void LinkerScript::diagnoseOrphanHandling() const { |
1064 | llvm::TimeTraceScope timeScope("Diagnose orphan sections"); |
1065 | if (ctx.arg.orphanHandling == OrphanHandlingPolicy::Place || |
1066 | !hasSectionsCommand) |
1067 | return; |
1068 | for (const InputSectionBase *sec : orphanSections) { |
1069 | // .relro_padding is inserted before DATA_SEGMENT_RELRO_END, if present, |
1070 | // automatically. The section is not supposed to be specified by scripts. |
1071 | if (sec == ctx.in.relroPadding.get()) |
1072 | continue; |
1073 | // Input SHT_REL[A] retained by --emit-relocs are ignored by |
1074 | // computeInputSections(). Don't warn/error. |
1075 | if (isa<InputSection>(Val: sec) && |
1076 | cast<InputSection>(Val: sec)->getRelocatedSection()) |
1077 | continue; |
1078 | |
1079 | StringRef name = getOutputSectionName(s: sec); |
1080 | if (ctx.arg.orphanHandling == OrphanHandlingPolicy::Error) |
1081 | ErrAlways(ctx) << sec << " is being placed in '"<< name << "'"; |
1082 | else |
1083 | Warn(ctx) << sec << " is being placed in '"<< name << "'"; |
1084 | } |
1085 | } |
1086 | |
1087 | void LinkerScript::diagnoseMissingSGSectionAddress() const { |
1088 | if (!ctx.arg.cmseImplib || !ctx.in.armCmseSGSection->isNeeded()) |
1089 | return; |
1090 | |
1091 | OutputSection *sec = findByName(vec: sectionCommands, name: ".gnu.sgstubs"); |
1092 | if (sec && !sec->addrExpr && !ctx.arg.sectionStartMap.count(Key: ".gnu.sgstubs")) |
1093 | ErrAlways(ctx) << "no address assigned to the veneers output section " |
1094 | << sec->name; |
1095 | } |
1096 | |
1097 | // This function searches for a memory region to place the given output |
1098 | // section in. If found, a pointer to the appropriate memory region is |
1099 | // returned in the first member of the pair. Otherwise, a nullptr is returned. |
1100 | // The second member of the pair is a hint that should be passed to the |
1101 | // subsequent call of this method. |
1102 | std::pair<MemoryRegion *, MemoryRegion *> |
1103 | LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) { |
1104 | // Non-allocatable sections are not part of the process image. |
1105 | if (!(sec->flags & SHF_ALLOC)) { |
1106 | bool hasInputOrByteCommand = |
1107 | sec->hasInputSections || |
1108 | llvm::any_of(Range&: sec->commands, P: [](SectionCommand *comm) { |
1109 | return ByteCommand::classof(c: comm); |
1110 | }); |
1111 | if (!sec->memoryRegionName.empty() && hasInputOrByteCommand) |
1112 | Warn(ctx) |
1113 | << "ignoring memory region assignment for non-allocatable section '" |
1114 | << sec->name << "'"; |
1115 | return {nullptr, nullptr}; |
1116 | } |
1117 | |
1118 | // If a memory region name was specified in the output section command, |
1119 | // then try to find that region first. |
1120 | if (!sec->memoryRegionName.empty()) { |
1121 | if (MemoryRegion *m = memoryRegions.lookup(Key: sec->memoryRegionName)) |
1122 | return {m, m}; |
1123 | ErrAlways(ctx) << "memory region '"<< sec->memoryRegionName |
1124 | << "' not declared"; |
1125 | return {nullptr, nullptr}; |
1126 | } |
1127 | |
1128 | // If at least one memory region is defined, all sections must |
1129 | // belong to some memory region. Otherwise, we don't need to do |
1130 | // anything for memory regions. |
1131 | if (memoryRegions.empty()) |
1132 | return {nullptr, nullptr}; |
1133 | |
1134 | // An orphan section should continue the previous memory region. |
1135 | if (sec->sectionIndex == UINT32_MAX && hint) |
1136 | return {hint, hint}; |
1137 | |
1138 | // See if a region can be found by matching section flags. |
1139 | for (auto &pair : memoryRegions) { |
1140 | MemoryRegion *m = pair.second; |
1141 | if (m->compatibleWith(secFlags: sec->flags)) |
1142 | return {m, nullptr}; |
1143 | } |
1144 | |
1145 | // Otherwise, no suitable region was found. |
1146 | ErrAlways(ctx) << "no memory region specified for section '"<< sec->name |
1147 | << "'"; |
1148 | return {nullptr, nullptr}; |
1149 | } |
1150 | |
1151 | static OutputSection *findFirstSection(Ctx &ctx, PhdrEntry *load) { |
1152 | for (OutputSection *sec : ctx.outputSections) |
1153 | if (sec->ptLoad == load) |
1154 | return sec; |
1155 | return nullptr; |
1156 | } |
1157 | |
1158 | // Assign addresses to an output section and offsets to its input sections and |
1159 | // symbol assignments. Return true if the output section's address has changed. |
1160 | bool LinkerScript::assignOffsets(OutputSection *sec) { |
1161 | const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS; |
1162 | const bool sameMemRegion = state->memRegion == sec->memRegion; |
1163 | const bool prevLMARegionIsDefault = state->lmaRegion == nullptr; |
1164 | const uint64_t savedDot = dot; |
1165 | bool addressChanged = false; |
1166 | state->memRegion = sec->memRegion; |
1167 | state->lmaRegion = sec->lmaRegion; |
1168 | |
1169 | if (!(sec->flags & SHF_ALLOC)) { |
1170 | // Non-SHF_ALLOC sections have zero addresses. |
1171 | dot = 0; |
1172 | } else if (isTbss) { |
1173 | // Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range |
1174 | // starts from the end address of the previous tbss section. |
1175 | if (state->tbssAddr == 0) |
1176 | state->tbssAddr = dot; |
1177 | else |
1178 | dot = state->tbssAddr; |
1179 | } else { |
1180 | if (state->memRegion) |
1181 | dot = state->memRegion->curPos; |
1182 | if (sec->addrExpr) |
1183 | setDot(e: sec->addrExpr, loc: sec->location, inSec: false); |
1184 | |
1185 | // If the address of the section has been moved forward by an explicit |
1186 | // expression so that it now starts past the current curPos of the enclosing |
1187 | // region, we need to expand the current region to account for the space |
1188 | // between the previous section, if any, and the start of this section. |
1189 | if (state->memRegion && state->memRegion->curPos < dot) |
1190 | expandMemoryRegion(memRegion: state->memRegion, size: dot - state->memRegion->curPos, |
1191 | secName: sec->name); |
1192 | } |
1193 | |
1194 | state->outSec = sec; |
1195 | if (!(sec->addrExpr && hasSectionsCommand)) { |
1196 | // ALIGN is respected. sec->alignment is the max of ALIGN and the maximum of |
1197 | // input section alignments. |
1198 | const uint64_t pos = dot; |
1199 | dot = alignToPowerOf2(Value: dot, Align: sec->addralign); |
1200 | expandMemoryRegions(size: dot - pos); |
1201 | } |
1202 | addressChanged = sec->addr != dot; |
1203 | sec->addr = dot; |
1204 | |
1205 | // state->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() |
1206 | // or AT>, recompute state->lmaOffset; otherwise, if both previous/current LMA |
1207 | // region is the default, and the two sections are in the same memory region, |
1208 | // reuse previous lmaOffset; otherwise, reset lmaOffset to 0. This emulates |
1209 | // heuristics described in |
1210 | // https://sourceware.org/binutils/docs/ld/Output-Section-LMA.html |
1211 | if (sec->lmaExpr) { |
1212 | state->lmaOffset = sec->lmaExpr().getValue() - dot; |
1213 | } else if (MemoryRegion *mr = sec->lmaRegion) { |
1214 | uint64_t lmaStart = alignToPowerOf2(Value: mr->curPos, Align: sec->addralign); |
1215 | if (mr->curPos < lmaStart) |
1216 | expandMemoryRegion(memRegion: mr, size: lmaStart - mr->curPos, secName: sec->name); |
1217 | state->lmaOffset = lmaStart - dot; |
1218 | } else if (!sameMemRegion || !prevLMARegionIsDefault) { |
1219 | state->lmaOffset = 0; |
1220 | } |
1221 | |
1222 | // Propagate state->lmaOffset to the first "non-header" section. |
1223 | if (PhdrEntry *l = sec->ptLoad) |
1224 | if (sec == findFirstSection(ctx, load: l)) |
1225 | l->lmaOffset = state->lmaOffset; |
1226 | |
1227 | // We can call this method multiple times during the creation of |
1228 | // thunks and want to start over calculation each time. |
1229 | sec->size = 0; |
1230 | if (sec->firstInOverlay) |
1231 | state->overlaySize = 0; |
1232 | |
1233 | // We visited SectionsCommands from processSectionCommands to |
1234 | // layout sections. Now, we visit SectionsCommands again to fix |
1235 | // section offsets. |
1236 | for (SectionCommand *cmd : sec->commands) { |
1237 | // This handles the assignments to symbol or to the dot. |
1238 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
1239 | assign->addr = dot; |
1240 | assignSymbol(cmd: assign, inSec: true); |
1241 | assign->size = dot - assign->addr; |
1242 | continue; |
1243 | } |
1244 | |
1245 | // Handle BYTE(), SHORT(), LONG(), or QUAD(). |
1246 | if (auto *data = dyn_cast<ByteCommand>(Val: cmd)) { |
1247 | data->offset = dot - sec->addr; |
1248 | dot += data->size; |
1249 | expandOutputSection(size: data->size); |
1250 | continue; |
1251 | } |
1252 | |
1253 | // Handle a single input section description command. |
1254 | // It calculates and assigns the offsets for each section and also |
1255 | // updates the output section size. |
1256 | |
1257 | auto §ions = cast<InputSectionDescription>(Val: cmd)->sections; |
1258 | for (InputSection *isec : sections) { |
1259 | assert(isec->getParent() == sec); |
1260 | if (isa<PotentialSpillSection>(Val: isec)) |
1261 | continue; |
1262 | const uint64_t pos = dot; |
1263 | dot = alignToPowerOf2(Value: dot, Align: isec->addralign); |
1264 | isec->outSecOff = dot - sec->addr; |
1265 | dot += isec->getSize(); |
1266 | |
1267 | // Update output section size after adding each section. This is so that |
1268 | // SIZEOF works correctly in the case below: |
1269 | // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } |
1270 | expandOutputSection(size: dot - pos); |
1271 | } |
1272 | } |
1273 | |
1274 | // If .relro_padding is present, round up the end to a common-page-size |
1275 | // boundary to protect the last page. |
1276 | if (ctx.in.relroPadding && sec == ctx.in.relroPadding->getParent()) |
1277 | expandOutputSection(size: alignToPowerOf2(Value: dot, Align: ctx.arg.commonPageSize) - dot); |
1278 | |
1279 | // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections |
1280 | // as they are not part of the process image. |
1281 | if (!(sec->flags & SHF_ALLOC)) { |
1282 | dot = savedDot; |
1283 | } else if (isTbss) { |
1284 | // NOBITS TLS sections are similar. Additionally save the end address. |
1285 | state->tbssAddr = dot; |
1286 | dot = savedDot; |
1287 | } |
1288 | return addressChanged; |
1289 | } |
1290 | |
1291 | static bool isDiscardable(const OutputSection &sec) { |
1292 | if (sec.name == "/DISCARD/") |
1293 | return true; |
1294 | |
1295 | // We do not want to remove OutputSections with expressions that reference |
1296 | // symbols even if the OutputSection is empty. We want to ensure that the |
1297 | // expressions can be evaluated and report an error if they cannot. |
1298 | if (sec.expressionsUseSymbols) |
1299 | return false; |
1300 | |
1301 | // OutputSections may be referenced by name in ADDR and LOADADDR expressions, |
1302 | // as an empty Section can has a valid VMA and LMA we keep the OutputSection |
1303 | // to maintain the integrity of the other Expression. |
1304 | if (sec.usedInExpression) |
1305 | return false; |
1306 | |
1307 | for (SectionCommand *cmd : sec.commands) { |
1308 | if (auto assign = dyn_cast<SymbolAssignment>(Val: cmd)) |
1309 | // Don't create empty output sections just for unreferenced PROVIDE |
1310 | // symbols. |
1311 | if (assign->name != "."&& !assign->sym) |
1312 | continue; |
1313 | |
1314 | if (!isa<InputSectionDescription>(Val: *cmd)) |
1315 | return false; |
1316 | } |
1317 | return true; |
1318 | } |
1319 | |
1320 | static void maybePropagatePhdrs(OutputSection &sec, |
1321 | SmallVector<StringRef, 0> &phdrs) { |
1322 | if (sec.phdrs.empty()) { |
1323 | // To match the bfd linker script behaviour, only propagate program |
1324 | // headers to sections that are allocated. |
1325 | if (sec.flags & SHF_ALLOC) |
1326 | sec.phdrs = phdrs; |
1327 | } else { |
1328 | phdrs = sec.phdrs; |
1329 | } |
1330 | } |
1331 | |
1332 | void LinkerScript::adjustOutputSections() { |
1333 | // If the output section contains only symbol assignments, create a |
1334 | // corresponding output section. The issue is what to do with linker script |
1335 | // like ".foo : { symbol = 42; }". One option would be to convert it to |
1336 | // "symbol = 42;". That is, move the symbol out of the empty section |
1337 | // description. That seems to be what bfd does for this simple case. The |
1338 | // problem is that this is not completely general. bfd will give up and |
1339 | // create a dummy section too if there is a ". = . + 1" inside the section |
1340 | // for example. |
1341 | // Given that we want to create the section, we have to worry what impact |
1342 | // it will have on the link. For example, if we just create a section with |
1343 | // 0 for flags, it would change which PT_LOADs are created. |
1344 | // We could remember that particular section is dummy and ignore it in |
1345 | // other parts of the linker, but unfortunately there are quite a few places |
1346 | // that would need to change: |
1347 | // * The program header creation. |
1348 | // * The orphan section placement. |
1349 | // * The address assignment. |
1350 | // The other option is to pick flags that minimize the impact the section |
1351 | // will have on the rest of the linker. That is why we copy the flags from |
1352 | // the previous sections. We copy just SHF_ALLOC and SHF_WRITE to keep the |
1353 | // impact low. We do not propagate SHF_EXECINSTR as in some cases this can |
1354 | // lead to executable writeable section. |
1355 | uint64_t flags = SHF_ALLOC; |
1356 | |
1357 | SmallVector<StringRef, 0> defPhdrs; |
1358 | bool seenRelro = false; |
1359 | for (SectionCommand *&cmd : sectionCommands) { |
1360 | if (!isa<OutputDesc>(Val: cmd)) |
1361 | continue; |
1362 | auto *sec = &cast<OutputDesc>(Val: cmd)->osec; |
1363 | |
1364 | // Handle align (e.g. ".foo : ALIGN(16) { ... }"). |
1365 | if (sec->alignExpr) |
1366 | sec->addralign = |
1367 | std::max<uint32_t>(a: sec->addralign, b: sec->alignExpr().getValue()); |
1368 | |
1369 | bool isEmpty = (getFirstInputSection(os: sec) == nullptr); |
1370 | bool discardable = isEmpty && isDiscardable(sec: *sec); |
1371 | // If sec has at least one input section and not discarded, remember its |
1372 | // flags to be inherited by subsequent output sections. (sec may contain |
1373 | // just one empty synthetic section.) |
1374 | if (sec->hasInputSections && !discardable) |
1375 | flags = sec->flags; |
1376 | |
1377 | // We do not want to keep any special flags for output section |
1378 | // in case it is empty. |
1379 | if (isEmpty) { |
1380 | sec->flags = |
1381 | flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | SHF_WRITE); |
1382 | sec->sortRank = getSectionRank(ctx, osec&: *sec); |
1383 | } |
1384 | |
1385 | // The code below may remove empty output sections. We should save the |
1386 | // specified program headers (if exist) and propagate them to subsequent |
1387 | // sections which do not specify program headers. |
1388 | // An example of such a linker script is: |
1389 | // SECTIONS { .empty : { *(.empty) } :rw |
1390 | // .foo : { *(.foo) } } |
1391 | // Note: at this point the order of output sections has not been finalized, |
1392 | // because orphans have not been inserted into their expected positions. We |
1393 | // will handle them in adjustSectionsAfterSorting(). |
1394 | if (sec->sectionIndex != UINT32_MAX) |
1395 | maybePropagatePhdrs(sec&: *sec, phdrs&: defPhdrs); |
1396 | |
1397 | // Discard .relro_padding if we have not seen one RELRO section. Note: when |
1398 | // .tbss is the only RELRO section, there is no associated PT_LOAD segment |
1399 | // (needsPtLoad), so we don't append .relro_padding in the case. |
1400 | if (ctx.in.relroPadding && ctx.in.relroPadding->getParent() == sec && |
1401 | !seenRelro) |
1402 | discardable = true; |
1403 | if (discardable) { |
1404 | sec->markDead(); |
1405 | cmd = nullptr; |
1406 | } else { |
1407 | seenRelro |= |
1408 | sec->relro && !(sec->type == SHT_NOBITS && (sec->flags & SHF_TLS)); |
1409 | } |
1410 | } |
1411 | |
1412 | // It is common practice to use very generic linker scripts. So for any |
1413 | // given run some of the output sections in the script will be empty. |
1414 | // We could create corresponding empty output sections, but that would |
1415 | // clutter the output. |
1416 | // We instead remove trivially empty sections. The bfd linker seems even |
1417 | // more aggressive at removing them. |
1418 | llvm::erase_if(C&: sectionCommands, P: [&](SectionCommand *cmd) { return !cmd; }); |
1419 | } |
1420 | |
1421 | void LinkerScript::adjustSectionsAfterSorting() { |
1422 | // Try and find an appropriate memory region to assign offsets in. |
1423 | MemoryRegion *hint = nullptr; |
1424 | for (SectionCommand *cmd : sectionCommands) { |
1425 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) { |
1426 | OutputSection *sec = &osd->osec; |
1427 | if (!sec->lmaRegionName.empty()) { |
1428 | if (MemoryRegion *m = memoryRegions.lookup(Key: sec->lmaRegionName)) |
1429 | sec->lmaRegion = m; |
1430 | else |
1431 | ErrAlways(ctx) << "memory region '"<< sec->lmaRegionName |
1432 | << "' not declared"; |
1433 | } |
1434 | std::tie(args&: sec->memRegion, args&: hint) = findMemoryRegion(sec, hint); |
1435 | } |
1436 | } |
1437 | |
1438 | // If output section command doesn't specify any segments, |
1439 | // and we haven't previously assigned any section to segment, |
1440 | // then we simply assign section to the very first load segment. |
1441 | // Below is an example of such linker script: |
1442 | // PHDRS { seg PT_LOAD; } |
1443 | // SECTIONS { .aaa : { *(.aaa) } } |
1444 | SmallVector<StringRef, 0> defPhdrs; |
1445 | auto firstPtLoad = llvm::find_if(Range&: phdrsCommands, P: [](const PhdrsCommand &cmd) { |
1446 | return cmd.type == PT_LOAD; |
1447 | }); |
1448 | if (firstPtLoad != phdrsCommands.end()) |
1449 | defPhdrs.push_back(Elt: firstPtLoad->name); |
1450 | |
1451 | // Walk the commands and propagate the program headers to commands that don't |
1452 | // explicitly specify them. |
1453 | for (SectionCommand *cmd : sectionCommands) |
1454 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) |
1455 | maybePropagatePhdrs(sec&: osd->osec, phdrs&: defPhdrs); |
1456 | } |
1457 | |
1458 | // When the SECTIONS command is used, try to find an address for the file and |
1459 | // program headers output sections, which can be added to the first PT_LOAD |
1460 | // segment when program headers are created. |
1461 | // |
1462 | // We check if the headers fit below the first allocated section. If there isn't |
1463 | // enough space for these sections, we'll remove them from the PT_LOAD segment, |
1464 | // and we'll also remove the PT_PHDR segment. |
1465 | void LinkerScript::allocateHeaders( |
1466 | SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs) { |
1467 | uint64_t min = std::numeric_limits<uint64_t>::max(); |
1468 | for (OutputSection *sec : ctx.outputSections) |
1469 | if (sec->flags & SHF_ALLOC) |
1470 | min = std::min<uint64_t>(a: min, b: sec->addr); |
1471 | |
1472 | auto it = llvm::find_if(Range&: phdrs, P: [](auto &e) { return e->p_type == PT_LOAD; }); |
1473 | if (it == phdrs.end()) |
1474 | return; |
1475 | PhdrEntry *firstPTLoad = it->get(); |
1476 | |
1477 | bool hasExplicitHeaders = |
1478 | llvm::any_of(Range&: phdrsCommands, P: [](const PhdrsCommand &cmd) { |
1479 | return cmd.hasPhdrs || cmd.hasFilehdr; |
1480 | }); |
1481 | bool paged = !ctx.arg.omagic && !ctx.arg.nmagic; |
1482 | uint64_t headerSize = getHeaderSize(ctx); |
1483 | |
1484 | uint64_t base = 0; |
1485 | // If SECTIONS is present and the linkerscript is not explicit about program |
1486 | // headers, only allocate program headers if that would not add a page. |
1487 | if (hasSectionsCommand && !hasExplicitHeaders) |
1488 | base = alignDown(Value: min, Align: ctx.arg.maxPageSize); |
1489 | if ((paged || hasExplicitHeaders) && headerSize <= min - base) { |
1490 | min = alignDown(Value: min - headerSize, Align: ctx.arg.maxPageSize); |
1491 | ctx.out.elfHeader->addr = min; |
1492 | ctx.out.programHeaders->addr = min + ctx.out.elfHeader->size; |
1493 | return; |
1494 | } |
1495 | |
1496 | // Error if we were explicitly asked to allocate headers. |
1497 | if (hasExplicitHeaders) |
1498 | ErrAlways(ctx) << "could not allocate headers"; |
1499 | |
1500 | ctx.out.elfHeader->ptLoad = nullptr; |
1501 | ctx.out.programHeaders->ptLoad = nullptr; |
1502 | firstPTLoad->firstSec = findFirstSection(ctx, load: firstPTLoad); |
1503 | |
1504 | llvm::erase_if(C&: phdrs, P: [](auto &e) { return e->p_type == PT_PHDR; }); |
1505 | } |
1506 | |
1507 | LinkerScript::AddressState::AddressState(const LinkerScript &script) { |
1508 | for (auto &mri : script.memoryRegions) { |
1509 | MemoryRegion *mr = mri.second; |
1510 | mr->curPos = (mr->origin)().getValue(); |
1511 | } |
1512 | } |
1513 | |
1514 | // Here we assign addresses as instructed by linker script SECTIONS |
1515 | // sub-commands. Doing that allows us to use final VA values, so here |
1516 | // we also handle rest commands like symbol assignments and ASSERTs. |
1517 | // Return an output section that has changed its address or null, and a symbol |
1518 | // that has changed its section or value (or nullptr if no symbol has changed). |
1519 | std::pair<const OutputSection *, const Defined *> |
1520 | LinkerScript::assignAddresses() { |
1521 | if (hasSectionsCommand) { |
1522 | // With a linker script, assignment of addresses to headers is covered by |
1523 | // allocateHeaders(). |
1524 | dot = ctx.arg.imageBase.value_or(u: 0); |
1525 | } else { |
1526 | // Assign addresses to headers right now. |
1527 | dot = ctx.target->getImageBase(); |
1528 | ctx.out.elfHeader->addr = dot; |
1529 | ctx.out.programHeaders->addr = dot + ctx.out.elfHeader->size; |
1530 | dot += getHeaderSize(ctx); |
1531 | } |
1532 | |
1533 | OutputSection *changedOsec = nullptr; |
1534 | AddressState st(*this); |
1535 | state = &st; |
1536 | errorOnMissingSection = true; |
1537 | st.outSec = aether.get(); |
1538 | recordedErrors.clear(); |
1539 | |
1540 | SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands); |
1541 | for (SectionCommand *cmd : sectionCommands) { |
1542 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
1543 | assign->addr = dot; |
1544 | assignSymbol(cmd: assign, inSec: false); |
1545 | assign->size = dot - assign->addr; |
1546 | continue; |
1547 | } |
1548 | if (isa<SectionClassDesc>(Val: cmd)) |
1549 | continue; |
1550 | if (assignOffsets(sec: &cast<OutputDesc>(Val: cmd)->osec) && !changedOsec) |
1551 | changedOsec = &cast<OutputDesc>(Val: cmd)->osec; |
1552 | } |
1553 | |
1554 | state = nullptr; |
1555 | return {changedOsec, getChangedSymbolAssignment(oldValues)}; |
1556 | } |
1557 | |
1558 | static bool hasRegionOverflowed(MemoryRegion *mr) { |
1559 | if (!mr) |
1560 | return false; |
1561 | return mr->curPos - mr->getOrigin() > mr->getLength(); |
1562 | } |
1563 | |
1564 | // Spill input sections in reverse order of address assignment to (potentially) |
1565 | // bring memory regions out of overflow. The size savings of a spill can only be |
1566 | // estimated, since general linker script arithmetic may occur afterwards. |
1567 | // Under-estimates may cause unnecessary spills, but over-estimates can always |
1568 | // be corrected on the next pass. |
1569 | bool LinkerScript::spillSections() { |
1570 | if (potentialSpillLists.empty()) |
1571 | return false; |
1572 | |
1573 | DenseSet<PotentialSpillSection *> skippedSpills; |
1574 | |
1575 | bool spilled = false; |
1576 | for (SectionCommand *cmd : reverse(C&: sectionCommands)) { |
1577 | auto *osd = dyn_cast<OutputDesc>(Val: cmd); |
1578 | if (!osd) |
1579 | continue; |
1580 | OutputSection *osec = &osd->osec; |
1581 | if (!osec->memRegion) |
1582 | continue; |
1583 | |
1584 | // Input sections that have replaced a potential spill and should be removed |
1585 | // from their input section description. |
1586 | DenseSet<InputSection *> spilledInputSections; |
1587 | |
1588 | for (SectionCommand *cmd : reverse(C&: osec->commands)) { |
1589 | if (!hasRegionOverflowed(mr: osec->memRegion) && |
1590 | !hasRegionOverflowed(mr: osec->lmaRegion)) |
1591 | break; |
1592 | |
1593 | auto *isd = dyn_cast<InputSectionDescription>(Val: cmd); |
1594 | if (!isd) |
1595 | continue; |
1596 | for (InputSection *isec : reverse(C&: isd->sections)) { |
1597 | // Potential spill locations cannot be spilled. |
1598 | if (isa<PotentialSpillSection>(Val: isec)) |
1599 | continue; |
1600 | |
1601 | auto it = potentialSpillLists.find(Val: isec); |
1602 | if (it == potentialSpillLists.end()) |
1603 | break; |
1604 | |
1605 | // Consume spills until finding one that might help, then consume it. |
1606 | auto canSpillHelp = [&](PotentialSpillSection *spill) { |
1607 | // Spills to the same region that overflowed cannot help. |
1608 | if (hasRegionOverflowed(mr: osec->memRegion) && |
1609 | spill->getParent()->memRegion == osec->memRegion) |
1610 | return false; |
1611 | if (hasRegionOverflowed(mr: osec->lmaRegion) && |
1612 | spill->getParent()->lmaRegion == osec->lmaRegion) |
1613 | return false; |
1614 | return true; |
1615 | }; |
1616 | PotentialSpillList &list = it->second; |
1617 | PotentialSpillSection *spill; |
1618 | for (spill = list.head; spill; spill = spill->next) { |
1619 | if (list.head->next) |
1620 | list.head = spill->next; |
1621 | else |
1622 | potentialSpillLists.erase(Val: isec); |
1623 | if (canSpillHelp(spill)) |
1624 | break; |
1625 | skippedSpills.insert(V: spill); |
1626 | } |
1627 | if (!spill) |
1628 | continue; |
1629 | |
1630 | // Replace the next spill location with the spilled section and adjust |
1631 | // its properties to match the new location. Note that the alignment of |
1632 | // the spill section may have diverged from the original due to e.g. a |
1633 | // SUBALIGN. Correct assignment requires the spill's alignment to be |
1634 | // used, not the original. |
1635 | spilledInputSections.insert(V: isec); |
1636 | *llvm::find(Range&: spill->isd->sections, Val: spill) = isec; |
1637 | isec->parent = spill->parent; |
1638 | isec->addralign = spill->addralign; |
1639 | |
1640 | // Record the (potential) reduction in the region's end position. |
1641 | osec->memRegion->curPos -= isec->getSize(); |
1642 | if (osec->lmaRegion) |
1643 | osec->lmaRegion->curPos -= isec->getSize(); |
1644 | |
1645 | // Spilling continues until the end position no longer overflows the |
1646 | // region. Then, another round of address assignment will either confirm |
1647 | // the spill's success or lead to yet more spilling. |
1648 | if (!hasRegionOverflowed(mr: osec->memRegion) && |
1649 | !hasRegionOverflowed(mr: osec->lmaRegion)) |
1650 | break; |
1651 | } |
1652 | |
1653 | // Remove any spilled input sections to complete their move. |
1654 | if (!spilledInputSections.empty()) { |
1655 | spilled = true; |
1656 | llvm::erase_if(C&: isd->sections, P: [&](InputSection *isec) { |
1657 | return spilledInputSections.contains(V: isec); |
1658 | }); |
1659 | } |
1660 | } |
1661 | } |
1662 | |
1663 | // Clean up any skipped spills. |
1664 | DenseSet<InputSectionDescription *> isds; |
1665 | for (PotentialSpillSection *s : skippedSpills) |
1666 | isds.insert(V: s->isd); |
1667 | for (InputSectionDescription *isd : isds) |
1668 | llvm::erase_if(C&: isd->sections, P: [&](InputSection *s) { |
1669 | return skippedSpills.contains(V: dyn_cast<PotentialSpillSection>(Val: s)); |
1670 | }); |
1671 | |
1672 | return spilled; |
1673 | } |
1674 | |
1675 | // Erase any potential spill sections that were not used. |
1676 | void LinkerScript::erasePotentialSpillSections() { |
1677 | if (potentialSpillLists.empty()) |
1678 | return; |
1679 | |
1680 | // Collect the set of input section descriptions that contain potential |
1681 | // spills. |
1682 | DenseSet<InputSectionDescription *> isds; |
1683 | for (const auto &[_, list] : potentialSpillLists) |
1684 | for (PotentialSpillSection *s = list.head; s; s = s->next) |
1685 | isds.insert(V: s->isd); |
1686 | |
1687 | for (InputSectionDescription *isd : isds) |
1688 | llvm::erase_if(C&: isd->sections, P: [](InputSection *s) { |
1689 | return isa<PotentialSpillSection>(Val: s); |
1690 | }); |
1691 | |
1692 | potentialSpillLists.clear(); |
1693 | } |
1694 | |
1695 | // Creates program headers as instructed by PHDRS linker script command. |
1696 | SmallVector<std::unique_ptr<PhdrEntry>, 0> LinkerScript::createPhdrs() { |
1697 | SmallVector<std::unique_ptr<PhdrEntry>, 0> ret; |
1698 | |
1699 | // Process PHDRS and FILEHDR keywords because they are not |
1700 | // real output sections and cannot be added in the following loop. |
1701 | for (const PhdrsCommand &cmd : phdrsCommands) { |
1702 | auto phdr = |
1703 | std::make_unique<PhdrEntry>(args&: ctx, args: cmd.type, args: cmd.flags.value_or(u: PF_R)); |
1704 | |
1705 | if (cmd.hasFilehdr) |
1706 | phdr->add(sec: ctx.out.elfHeader.get()); |
1707 | if (cmd.hasPhdrs) |
1708 | phdr->add(sec: ctx.out.programHeaders.get()); |
1709 | |
1710 | if (cmd.lmaExpr) { |
1711 | phdr->p_paddr = cmd.lmaExpr().getValue(); |
1712 | phdr->hasLMA = true; |
1713 | } |
1714 | ret.push_back(Elt: std::move(phdr)); |
1715 | } |
1716 | |
1717 | // Add output sections to program headers. |
1718 | for (OutputSection *sec : ctx.outputSections) { |
1719 | // Assign headers specified by linker script |
1720 | for (size_t id : getPhdrIndices(sec)) { |
1721 | ret[id]->add(sec); |
1722 | if (!phdrsCommands[id].flags) |
1723 | ret[id]->p_flags |= sec->getPhdrFlags(); |
1724 | } |
1725 | } |
1726 | return ret; |
1727 | } |
1728 | |
1729 | // Returns true if we should emit an .interp section. |
1730 | // |
1731 | // We usually do. But if PHDRS commands are given, and |
1732 | // no PT_INTERP is there, there's no place to emit an |
1733 | // .interp, so we don't do that in that case. |
1734 | bool LinkerScript::needsInterpSection() { |
1735 | if (phdrsCommands.empty()) |
1736 | return true; |
1737 | for (PhdrsCommand &cmd : phdrsCommands) |
1738 | if (cmd.type == PT_INTERP) |
1739 | return true; |
1740 | return false; |
1741 | } |
1742 | |
1743 | ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) { |
1744 | if (name == ".") { |
1745 | if (state) |
1746 | return {state->outSec, false, dot - state->outSec->addr, loc}; |
1747 | ErrAlways(ctx) << loc << ": unable to get location counter value"; |
1748 | return 0; |
1749 | } |
1750 | |
1751 | if (Symbol *sym = ctx.symtab->find(name)) { |
1752 | if (auto *ds = dyn_cast<Defined>(Val: sym)) { |
1753 | ExprValue v{ds->section, false, ds->value, loc}; |
1754 | // Retain the original st_type, so that the alias will get the same |
1755 | // behavior in relocation processing. Any operation will reset st_type to |
1756 | // STT_NOTYPE. |
1757 | v.type = ds->type; |
1758 | return v; |
1759 | } |
1760 | if (isa<SharedSymbol>(Val: sym)) |
1761 | if (!errorOnMissingSection) |
1762 | return {nullptr, false, 0, loc}; |
1763 | } |
1764 | |
1765 | ErrAlways(ctx) << loc << ": symbol not found: "<< name; |
1766 | return 0; |
1767 | } |
1768 | |
1769 | // Returns the index of the segment named Name. |
1770 | static std::optional<size_t> getPhdrIndex(ArrayRef<PhdrsCommand> vec, |
1771 | StringRef name) { |
1772 | for (size_t i = 0; i < vec.size(); ++i) |
1773 | if (vec[i].name == name) |
1774 | return i; |
1775 | return std::nullopt; |
1776 | } |
1777 | |
1778 | // Returns indices of ELF headers containing specific section. Each index is a |
1779 | // zero based number of ELF header listed within PHDRS {} script block. |
1780 | SmallVector<size_t, 0> LinkerScript::getPhdrIndices(OutputSection *cmd) { |
1781 | SmallVector<size_t, 0> ret; |
1782 | |
1783 | for (StringRef s : cmd->phdrs) { |
1784 | if (std::optional<size_t> idx = getPhdrIndex(vec: phdrsCommands, name: s)) |
1785 | ret.push_back(Elt: *idx); |
1786 | else if (s != "NONE") |
1787 | ErrAlways(ctx) << cmd->location << ": program header '"<< s |
1788 | << "' is not listed in PHDRS"; |
1789 | } |
1790 | return ret; |
1791 | } |
1792 | |
1793 | void LinkerScript::printMemoryUsage(raw_ostream& os) { |
1794 | auto printSize = [&](uint64_t size) { |
1795 | if ((size & 0x3fffffff) == 0) |
1796 | os << format_decimal(N: size >> 30, Width: 10) << " GB"; |
1797 | else if ((size & 0xfffff) == 0) |
1798 | os << format_decimal(N: size >> 20, Width: 10) << " MB"; |
1799 | else if ((size & 0x3ff) == 0) |
1800 | os << format_decimal(N: size >> 10, Width: 10) << " KB"; |
1801 | else |
1802 | os << " "<< format_decimal(N: size, Width: 10) << " B"; |
1803 | }; |
1804 | os << "Memory region Used Size Region Size %age Used\n"; |
1805 | for (auto &pair : memoryRegions) { |
1806 | MemoryRegion *m = pair.second; |
1807 | uint64_t usedLength = m->curPos - m->getOrigin(); |
1808 | os << right_justify(Str: m->name, Width: 16) << ": "; |
1809 | printSize(usedLength); |
1810 | uint64_t length = m->getLength(); |
1811 | if (length != 0) { |
1812 | printSize(length); |
1813 | double percent = usedLength * 100.0 / length; |
1814 | os << " "<< format(Fmt: "%6.2f%%", Vals: percent); |
1815 | } |
1816 | os << '\n'; |
1817 | } |
1818 | } |
1819 | |
1820 | void LinkerScript::recordError(const Twine &msg) { |
1821 | auto &str = recordedErrors.emplace_back(); |
1822 | msg.toVector(Out&: str); |
1823 | } |
1824 | |
1825 | static void checkMemoryRegion(Ctx &ctx, const MemoryRegion *region, |
1826 | const OutputSection *osec, uint64_t addr) { |
1827 | uint64_t osecEnd = addr + osec->size; |
1828 | uint64_t regionEnd = region->getOrigin() + region->getLength(); |
1829 | if (osecEnd > regionEnd) { |
1830 | ErrAlways(ctx) << "section '"<< osec->name << "' will not fit in region '" |
1831 | << region->name << "': overflowed by " |
1832 | << (osecEnd - regionEnd) << " bytes"; |
1833 | } |
1834 | } |
1835 | |
1836 | void LinkerScript::checkFinalScriptConditions() const { |
1837 | for (StringRef err : recordedErrors) |
1838 | Err(ctx) << err; |
1839 | for (const OutputSection *sec : ctx.outputSections) { |
1840 | if (const MemoryRegion *memoryRegion = sec->memRegion) |
1841 | checkMemoryRegion(ctx, region: memoryRegion, osec: sec, addr: sec->addr); |
1842 | if (const MemoryRegion *lmaRegion = sec->lmaRegion) |
1843 | checkMemoryRegion(ctx, region: lmaRegion, osec: sec, addr: sec->getLMA()); |
1844 | } |
1845 | } |
1846 | |
1847 | void LinkerScript::addScriptReferencedSymbolsToSymTable() { |
1848 | // Some symbols (such as __ehdr_start) are defined lazily only when there |
1849 | // are undefined symbols for them, so we add these to trigger that logic. |
1850 | auto reference = [&ctx = ctx](StringRef name) { |
1851 | Symbol *sym = ctx.symtab->addUnusedUndefined(name); |
1852 | sym->isUsedInRegularObj = true; |
1853 | sym->referenced = true; |
1854 | }; |
1855 | for (StringRef name : referencedSymbols) |
1856 | reference(name); |
1857 | |
1858 | // Keeps track of references from which PROVIDE symbols have been added to the |
1859 | // symbol table. |
1860 | DenseSet<StringRef> added; |
1861 | SmallVector<const SmallVector<StringRef, 0> *, 0> symRefsVec; |
1862 | for (const auto &[name, symRefs] : provideMap) |
1863 | if (shouldAddProvideSym(symName: name) && added.insert(V: name).second) |
1864 | symRefsVec.push_back(Elt: &symRefs); |
1865 | while (symRefsVec.size()) { |
1866 | for (StringRef name : *symRefsVec.pop_back_val()) { |
1867 | reference(name); |
1868 | // Prevent the symbol from being discarded by --gc-sections. |
1869 | referencedSymbols.push_back(Elt: name); |
1870 | auto it = provideMap.find(Key: name); |
1871 | if (it != provideMap.end() && shouldAddProvideSym(symName: name) && |
1872 | added.insert(V: name).second) { |
1873 | symRefsVec.push_back(Elt: &it->second); |
1874 | } |
1875 | } |
1876 | } |
1877 | } |
1878 | |
1879 | bool LinkerScript::shouldAddProvideSym(StringRef symName) { |
1880 | // This function is called before and after garbage collection. To prevent |
1881 | // undefined references from the RHS, the result of this function for a |
1882 | // symbol must be the same for each call. We use unusedProvideSyms to not |
1883 | // change the return value of a demoted symbol. |
1884 | Symbol *sym = ctx.symtab->find(name: symName); |
1885 | if (!sym) |
1886 | return false; |
1887 | if (sym->isDefined() || sym->isCommon()) { |
1888 | unusedProvideSyms.insert(V: sym); |
1889 | return false; |
1890 | } |
1891 | return !unusedProvideSyms.count(V: sym); |
1892 | } |
1893 |
Definitions
- isSectionPrefix
- getOutputSectionName
- getValue
- getSecAddr
- getSectionOffset
- LinkerScript
- ~LinkerScript
- createOutputSection
- getOrCreateOutputSection
- expandMemoryRegion
- expandMemoryRegions
- expandOutputSection
- setDot
- shouldDefineSym
- addSymbol
- declareSymbol
- getSymbolAssignmentValues
- getChangedSymbolAssignment
- processInsertCommands
- declareSymbols
- assignSymbol
- matchesFile
- excludesFile
- shouldKeep
- matchConstraints
- sortSections
- sortInputSections
- computeInputSections
- discard
- discardSynthetic
- createInputSectionList
- processSectionCommands
- processSymbolAssignments
- findByName
- createSection
- addInputSec
- addOrphanSections
- diagnoseOrphanHandling
- diagnoseMissingSGSectionAddress
- findMemoryRegion
- findFirstSection
- assignOffsets
- isDiscardable
- maybePropagatePhdrs
- adjustOutputSections
- adjustSectionsAfterSorting
- allocateHeaders
- AddressState
- assignAddresses
- hasRegionOverflowed
- spillSections
- erasePotentialSpillSections
- createPhdrs
- needsInterpSection
- getSymbolValue
- getPhdrIndex
- getPhdrIndices
- printMemoryUsage
- recordError
- checkMemoryRegion
- checkFinalScriptConditions
- addScriptReferencedSymbolsToSymTable
Learn to use CMake with our Intro Training
Find out more