1 | //===- MarkLive.cpp -------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements --gc-sections, which is a feature to remove unused |
10 | // sections from output. Unused sections are sections that are not reachable |
11 | // from known GC-root symbols or sections. Naturally the feature is |
12 | // implemented as a mark-sweep garbage collector. |
13 | // |
14 | // Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off |
15 | // by default. Starting with GC-root symbols or sections, markLive function |
16 | // defined in this file visits all reachable sections to set their Live |
17 | // bits. Writer will then ignore sections whose Live bits are off, so that |
18 | // such sections are not included into output. |
19 | // |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "MarkLive.h" |
23 | #include "InputFiles.h" |
24 | #include "InputSection.h" |
25 | #include "LinkerScript.h" |
26 | #include "SymbolTable.h" |
27 | #include "Symbols.h" |
28 | #include "SyntheticSections.h" |
29 | #include "Target.h" |
30 | #include "lld/Common/CommonLinkerContext.h" |
31 | #include "lld/Common/Strings.h" |
32 | #include "llvm/ADT/STLExtras.h" |
33 | #include "llvm/Object/ELF.h" |
34 | #include "llvm/Support/TimeProfiler.h" |
35 | #include <vector> |
36 | |
37 | using namespace llvm; |
38 | using namespace llvm::ELF; |
39 | using namespace llvm::object; |
40 | using namespace llvm::support::endian; |
41 | using namespace lld; |
42 | using namespace lld::elf; |
43 | |
44 | namespace { |
45 | template <class ELFT> class MarkLive { |
46 | public: |
47 | MarkLive(unsigned partition) : partition(partition) {} |
48 | |
49 | void run(); |
50 | void moveToMain(); |
51 | |
52 | private: |
53 | void enqueue(InputSectionBase *sec, uint64_t offset); |
54 | void markSymbol(Symbol *sym); |
55 | void mark(); |
56 | |
57 | template <class RelTy> |
58 | void resolveReloc(InputSectionBase &sec, RelTy &rel, bool fromFDE); |
59 | |
60 | template <class RelTy> |
61 | void scanEhFrameSection(EhInputSection &eh, ArrayRef<RelTy> rels); |
62 | |
63 | // The index of the partition that we are currently processing. |
64 | unsigned partition; |
65 | |
66 | // A list of sections to visit. |
67 | SmallVector<InputSection *, 0> queue; |
68 | |
69 | // There are normally few input sections whose names are valid C |
70 | // identifiers, so we just store a SmallVector instead of a multimap. |
71 | DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections; |
72 | }; |
73 | } // namespace |
74 | |
75 | template <class ELFT> |
76 | static uint64_t getAddend(InputSectionBase &sec, |
77 | const typename ELFT::Rel &rel) { |
78 | return target->getImplicitAddend(buf: sec.content().begin() + rel.r_offset, |
79 | type: rel.getType(config->isMips64EL)); |
80 | } |
81 | |
82 | template <class ELFT> |
83 | static uint64_t getAddend(InputSectionBase &sec, |
84 | const typename ELFT::Rela &rel) { |
85 | return rel.r_addend; |
86 | } |
87 | |
88 | template <class ELFT> |
89 | template <class RelTy> |
90 | void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel, |
91 | bool fromFDE) { |
92 | // If a symbol is referenced in a live section, it is used. |
93 | Symbol &sym = sec.file->getRelocTargetSym(rel); |
94 | sym.used = true; |
95 | |
96 | if (auto *d = dyn_cast<Defined>(Val: &sym)) { |
97 | auto *relSec = dyn_cast_or_null<InputSectionBase>(Val: d->section); |
98 | if (!relSec) |
99 | return; |
100 | |
101 | uint64_t offset = d->value; |
102 | if (d->isSection()) |
103 | offset += getAddend<ELFT>(sec, rel); |
104 | |
105 | // fromFDE being true means this is referenced by a FDE in a .eh_frame |
106 | // piece. The relocation points to the described function or to a LSDA. We |
107 | // only need to keep the LSDA live, so ignore anything that points to |
108 | // executable sections. If the LSDA is in a section group or has the |
109 | // SHF_LINK_ORDER flag, we ignore the relocation as well because (a) if the |
110 | // associated text section is live, the LSDA will be retained due to section |
111 | // group/SHF_LINK_ORDER rules (b) if the associated text section should be |
112 | // discarded, marking the LSDA will unnecessarily retain the text section. |
113 | if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) || |
114 | relSec->nextInSectionGroup))) |
115 | enqueue(sec: relSec, offset); |
116 | return; |
117 | } |
118 | |
119 | if (auto *ss = dyn_cast<SharedSymbol>(Val: &sym)) |
120 | if (!ss->isWeak()) |
121 | cast<SharedFile>(Val: ss->file)->isNeeded = true; |
122 | |
123 | for (InputSectionBase *sec : cNamedSections.lookup(Val: sym.getName())) |
124 | enqueue(sec, offset: 0); |
125 | } |
126 | |
127 | // The .eh_frame section is an unfortunate special case. |
128 | // The section is divided in CIEs and FDEs and the relocations it can have are |
129 | // * CIEs can refer to a personality function. |
130 | // * FDEs can refer to a LSDA |
131 | // * FDEs refer to the function they contain information about |
132 | // The last kind of relocation cannot keep the referred section alive, or they |
133 | // would keep everything alive in a common object file. In fact, each FDE is |
134 | // alive if the section it refers to is alive. |
135 | // To keep things simple, in here we just ignore the last relocation kind. The |
136 | // other two keep the referred section alive. |
137 | // |
138 | // A possible improvement would be to fully process .eh_frame in the middle of |
139 | // the gc pass. With that we would be able to also gc some sections holding |
140 | // LSDAs and personality functions if we found that they were unused. |
141 | template <class ELFT> |
142 | template <class RelTy> |
143 | void MarkLive<ELFT>::scanEhFrameSection(EhInputSection &eh, |
144 | ArrayRef<RelTy> rels) { |
145 | for (const EhSectionPiece &cie : eh.cies) |
146 | if (cie.firstRelocation != unsigned(-1)) |
147 | resolveReloc(eh, rels[cie.firstRelocation], false); |
148 | for (const EhSectionPiece &fde : eh.fdes) { |
149 | size_t firstRelI = fde.firstRelocation; |
150 | if (firstRelI == (unsigned)-1) |
151 | continue; |
152 | uint64_t pieceEnd = fde.inputOff + fde.size; |
153 | for (size_t j = firstRelI, end2 = rels.size(); |
154 | j < end2 && rels[j].r_offset < pieceEnd; ++j) |
155 | resolveReloc(eh, rels[j], true); |
156 | } |
157 | } |
158 | |
159 | // Some sections are used directly by the loader, so they should never be |
160 | // garbage-collected. This function returns true if a given section is such |
161 | // section. |
162 | static bool isReserved(InputSectionBase *sec) { |
163 | switch (sec->type) { |
164 | case SHT_FINI_ARRAY: |
165 | case SHT_INIT_ARRAY: |
166 | case SHT_PREINIT_ARRAY: |
167 | return true; |
168 | case SHT_NOTE: |
169 | // SHT_NOTE sections in a group are subject to garbage collection. |
170 | return !sec->nextInSectionGroup; |
171 | default: |
172 | // Support SHT_PROGBITS .init_array (https://golang.org/issue/50295) and |
173 | // .init_array.N (https://github.com/rust-lang/rust/issues/92181) for a |
174 | // while. |
175 | StringRef s = sec->name; |
176 | return s == ".init" || s == ".fini" || s.starts_with(Prefix: ".init_array" ) || |
177 | s == ".jcr" || s.starts_with(Prefix: ".ctors" ) || s.starts_with(Prefix: ".dtors" ); |
178 | } |
179 | } |
180 | |
181 | template <class ELFT> |
182 | void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) { |
183 | // Usually, a whole section is marked as live or dead, but in mergeable |
184 | // (splittable) sections, each piece of data has independent liveness bit. |
185 | // So we explicitly tell it which offset is in use. |
186 | if (auto *ms = dyn_cast<MergeInputSection>(Val: sec)) |
187 | ms->getSectionPiece(offset).live = true; |
188 | |
189 | // Set Sec->Partition to the meet (i.e. the "minimum") of Partition and |
190 | // Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition |
191 | // doesn't change, we don't need to do anything. |
192 | if (sec->partition == 1 || sec->partition == partition) |
193 | return; |
194 | sec->partition = sec->partition ? 1 : partition; |
195 | |
196 | // Add input section to the queue. |
197 | if (InputSection *s = dyn_cast<InputSection>(Val: sec)) |
198 | queue.push_back(Elt: s); |
199 | } |
200 | |
201 | template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) { |
202 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
203 | if (auto *isec = dyn_cast_or_null<InputSectionBase>(Val: d->section)) |
204 | enqueue(sec: isec, offset: d->value); |
205 | } |
206 | |
207 | // This is the main function of the garbage collector. |
208 | // Starting from GC-root sections, this function visits all reachable |
209 | // sections to set their "Live" bits. |
210 | template <class ELFT> void MarkLive<ELFT>::run() { |
211 | // Add GC root symbols. |
212 | |
213 | // Preserve externally-visible symbols if the symbols defined by this |
214 | // file can interpose other ELF file's symbols at runtime. |
215 | for (Symbol *sym : symtab.getSymbols()) |
216 | if (sym->includeInDynsym() && sym->partition == partition) |
217 | markSymbol(sym); |
218 | |
219 | // If this isn't the main partition, that's all that we need to preserve. |
220 | if (partition != 1) { |
221 | mark(); |
222 | return; |
223 | } |
224 | |
225 | markSymbol(sym: symtab.find(name: config->entry)); |
226 | markSymbol(sym: symtab.find(name: config->init)); |
227 | markSymbol(sym: symtab.find(name: config->fini)); |
228 | for (StringRef s : config->undefined) |
229 | markSymbol(sym: symtab.find(name: s)); |
230 | for (StringRef s : script->referencedSymbols) |
231 | markSymbol(sym: symtab.find(name: s)); |
232 | for (auto [symName, _] : symtab.cmseSymMap) { |
233 | markSymbol(sym: symtab.cmseSymMap[symName].sym); |
234 | markSymbol(sym: symtab.cmseSymMap[symName].acleSeSym); |
235 | } |
236 | |
237 | // Mark .eh_frame sections as live because there are usually no relocations |
238 | // that point to .eh_frames. Otherwise, the garbage collector would drop |
239 | // all of them. We also want to preserve personality routines and LSDA |
240 | // referenced by .eh_frame sections, so we scan them for that here. |
241 | for (EhInputSection *eh : ctx.ehInputSections) { |
242 | const RelsOrRelas<ELFT> rels = eh->template relsOrRelas<ELFT>(); |
243 | if (rels.areRelocsRel()) |
244 | scanEhFrameSection(*eh, rels.rels); |
245 | else if (rels.relas.size()) |
246 | scanEhFrameSection(*eh, rels.relas); |
247 | } |
248 | for (InputSectionBase *sec : ctx.inputSections) { |
249 | if (sec->flags & SHF_GNU_RETAIN) { |
250 | enqueue(sec, offset: 0); |
251 | continue; |
252 | } |
253 | if (sec->flags & SHF_LINK_ORDER) |
254 | continue; |
255 | |
256 | // Usually, non-SHF_ALLOC sections are not removed even if they are |
257 | // unreachable through relocations because reachability is not a good signal |
258 | // whether they are garbage or not (e.g. there is usually no section |
259 | // referring to a .comment section, but we want to keep it.) When a |
260 | // non-SHF_ALLOC section is retained, we also retain sections dependent on |
261 | // it. |
262 | // |
263 | // Note on SHF_LINK_ORDER: Such sections contain metadata and they |
264 | // have a reverse dependency on the InputSection they are linked with. |
265 | // We are able to garbage collect them. |
266 | // |
267 | // Note on SHF_REL{,A}: Such sections reach here only when -r |
268 | // or --emit-reloc were given. And they are subject of garbage |
269 | // collection because, if we remove a text section, we also |
270 | // remove its relocation section. |
271 | // |
272 | // Note on nextInSectionGroup: The ELF spec says that group sections are |
273 | // included or omitted as a unit. We take the interpretation that: |
274 | // |
275 | // - Group members (nextInSectionGroup != nullptr) are subject to garbage |
276 | // collection. |
277 | // - Groups members are retained or discarded as a unit. |
278 | if (!(sec->flags & SHF_ALLOC)) { |
279 | if (!isStaticRelSecType(type: sec->type) && !sec->nextInSectionGroup) { |
280 | sec->markLive(); |
281 | for (InputSection *isec : sec->dependentSections) |
282 | isec->markLive(); |
283 | } |
284 | } |
285 | |
286 | // Preserve special sections and those which are specified in linker |
287 | // script KEEP command. |
288 | if (isReserved(sec) || script->shouldKeep(s: sec)) { |
289 | enqueue(sec, offset: 0); |
290 | } else if ((!config->zStartStopGC || sec->name.starts_with(Prefix: "__libc_" )) && |
291 | isValidCIdentifier(s: sec->name)) { |
292 | // As a workaround for glibc libc.a before 2.34 |
293 | // (https://sourceware.org/PR27492), retain __libc_atexit and similar |
294 | // sections regardless of zStartStopGC. |
295 | cNamedSections[saver().save(S: "__start_" + sec->name)].push_back(Elt: sec); |
296 | cNamedSections[saver().save(S: "__stop_" + sec->name)].push_back(Elt: sec); |
297 | } |
298 | } |
299 | |
300 | mark(); |
301 | } |
302 | |
303 | template <class ELFT> void MarkLive<ELFT>::mark() { |
304 | // Mark all reachable sections. |
305 | while (!queue.empty()) { |
306 | InputSectionBase &sec = *queue.pop_back_val(); |
307 | |
308 | const RelsOrRelas<ELFT> rels = sec.template relsOrRelas<ELFT>(); |
309 | for (const typename ELFT::Rel &rel : rels.rels) |
310 | resolveReloc(sec, rel, false); |
311 | for (const typename ELFT::Rela &rel : rels.relas) |
312 | resolveReloc(sec, rel, false); |
313 | |
314 | for (InputSectionBase *isec : sec.dependentSections) |
315 | enqueue(sec: isec, offset: 0); |
316 | |
317 | // Mark the next group member. |
318 | if (sec.nextInSectionGroup) |
319 | enqueue(sec: sec.nextInSectionGroup, offset: 0); |
320 | } |
321 | } |
322 | |
323 | // Move the sections for some symbols to the main partition, specifically ifuncs |
324 | // (because they can result in an IRELATIVE being added to the main partition's |
325 | // GOT, which means that the ifunc must be available when the main partition is |
326 | // loaded) and TLS symbols (because we only know how to correctly process TLS |
327 | // relocations for the main partition). |
328 | // |
329 | // We also need to move sections whose names are C identifiers that are referred |
330 | // to from __start_/__stop_ symbols because there will only be one set of |
331 | // symbols for the whole program. |
332 | template <class ELFT> void MarkLive<ELFT>::moveToMain() { |
333 | for (ELFFileBase *file : ctx.objectFiles) |
334 | for (Symbol *s : file->getSymbols()) |
335 | if (auto *d = dyn_cast<Defined>(Val: s)) |
336 | if ((d->type == STT_GNU_IFUNC || d->type == STT_TLS) && d->section && |
337 | d->section->isLive()) |
338 | markSymbol(sym: s); |
339 | |
340 | for (InputSectionBase *sec : ctx.inputSections) { |
341 | if (!sec->isLive() || !isValidCIdentifier(s: sec->name)) |
342 | continue; |
343 | if (symtab.find(name: ("__start_" + sec->name).str()) || |
344 | symtab.find(name: ("__stop_" + sec->name).str())) |
345 | enqueue(sec, offset: 0); |
346 | } |
347 | |
348 | mark(); |
349 | } |
350 | |
351 | // Before calling this function, Live bits are off for all |
352 | // input sections. This function make some or all of them on |
353 | // so that they are emitted to the output file. |
354 | template <class ELFT> void elf::markLive() { |
355 | llvm::TimeTraceScope timeScope("markLive" ); |
356 | // If --gc-sections is not given, retain all input sections. |
357 | if (!config->gcSections) { |
358 | // If a DSO defines a symbol referenced in a regular object, it is needed. |
359 | for (Symbol *sym : symtab.getSymbols()) |
360 | if (auto *s = dyn_cast<SharedSymbol>(Val: sym)) |
361 | if (s->isUsedInRegularObj && !s->isWeak()) |
362 | cast<SharedFile>(Val: s->file)->isNeeded = true; |
363 | return; |
364 | } |
365 | |
366 | for (InputSectionBase *sec : ctx.inputSections) |
367 | sec->markDead(); |
368 | |
369 | // Follow the graph to mark all live sections. |
370 | for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart) |
371 | MarkLive<ELFT>(curPart).run(); |
372 | |
373 | // If we have multiple partitions, some sections need to live in the main |
374 | // partition even if they were allocated to a loadable partition. Move them |
375 | // there now. |
376 | if (partitions.size() != 1) |
377 | MarkLive<ELFT>(1).moveToMain(); |
378 | |
379 | // Report garbage-collected sections. |
380 | if (config->printGcSections) |
381 | for (InputSectionBase *sec : ctx.inputSections) |
382 | if (!sec->isLive()) |
383 | message(msg: "removing unused section " + toString(sec)); |
384 | } |
385 | |
386 | template void elf::markLive<ELF32LE>(); |
387 | template void elf::markLive<ELF32BE>(); |
388 | template void elf::markLive<ELF64LE>(); |
389 | template void elf::markLive<ELF64BE>(); |
390 | |