1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_INPUT_FILES_H
10#define LLD_MACHO_INPUT_FILES_H
11
12#include "MachOStructs.h"
13#include "Target.h"
14
15#include "lld/Common/DWARF.h"
16#include "lld/Common/LLVM.h"
17#include "lld/Common/Memory.h"
18#include "llvm/ADT/CachedHashString.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/BinaryFormat/MachO.h"
22#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23#include "llvm/Object/Archive.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/Threading.h"
26#include "llvm/TextAPI/TextAPIReader.h"
27
28#include <vector>
29
30namespace llvm {
31namespace lto {
32class InputFile;
33} // namespace lto
34namespace MachO {
35class InterfaceFile;
36} // namespace MachO
37class TarWriter;
38} // namespace llvm
39
40namespace lld {
41namespace macho {
42
43struct PlatformInfo;
44class ConcatInputSection;
45class Symbol;
46class Defined;
47struct Reloc;
48enum class RefState : uint8_t;
49
50// If --reproduce option is given, all input files are written
51// to this tar archive.
52extern std::unique_ptr<llvm::TarWriter> tar;
53
54// If .subsections_via_symbols is set, each InputSection will be split along
55// symbol boundaries. The field offset represents the offset of the subsection
56// from the start of the original pre-split InputSection.
57struct Subsection {
58 uint64_t offset = 0;
59 InputSection *isec = nullptr;
60};
61
62using Subsections = std::vector<Subsection>;
63class InputFile;
64
65class Section {
66public:
67 InputFile *file;
68 StringRef segname;
69 StringRef name;
70 uint32_t flags;
71 uint64_t addr;
72 Subsections subsections;
73
74 Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
75 uint64_t addr)
76 : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
77 // Ensure pointers to Sections are never invalidated.
78 Section(const Section &) = delete;
79 Section &operator=(const Section &) = delete;
80 Section(Section &&) = delete;
81 Section &operator=(Section &&) = delete;
82
83private:
84 // Whether we have already split this section into individual subsections.
85 // For sections that cannot be split (e.g. literal sections), this is always
86 // false.
87 bool doneSplitting = false;
88 friend class ObjFile;
89};
90
91// Represents a call graph profile edge.
92struct CallGraphEntry {
93 // The index of the caller in the symbol table.
94 uint32_t fromIndex;
95 // The index of the callee in the symbol table.
96 uint32_t toIndex;
97 // Number of calls from callee to caller in the profile.
98 uint64_t count;
99
100 CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
101 : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
102};
103
104class InputFile {
105public:
106 enum Kind {
107 ObjKind,
108 OpaqueKind,
109 DylibKind,
110 ArchiveKind,
111 BitcodeKind,
112 };
113
114 virtual ~InputFile() = default;
115 Kind kind() const { return fileKind; }
116 StringRef getName() const { return name; }
117 static void resetIdCount() { idCount = 0; }
118
119 MemoryBufferRef mb;
120
121 std::vector<Symbol *> symbols;
122 std::vector<Section *> sections;
123 ArrayRef<uint8_t> objCImageInfo;
124
125 // If not empty, this stores the name of the archive containing this file.
126 // We use this string for creating error messages.
127 std::string archiveName;
128
129 // Provides an easy way to sort InputFiles deterministically.
130 const int id;
131
132 // True if this is a lazy ObjFile or BitcodeFile.
133 bool lazy = false;
134
135protected:
136 InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
137 : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
138 name(mb.getBufferIdentifier()) {}
139
140 InputFile(Kind, const llvm::MachO::InterfaceFile &);
141
142private:
143 const Kind fileKind;
144 const StringRef name;
145
146 static int idCount;
147};
148
149struct FDE {
150 uint32_t funcLength;
151 Symbol *personality;
152 InputSection *lsda;
153};
154
155// .o file
156class ObjFile final : public InputFile {
157public:
158 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
159 bool lazy = false, bool forceHidden = false);
160 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
161 template <class LP> void parse();
162
163 static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
164
165 std::string sourceFile() const;
166 // Parses line table information for diagnostics. compileUnit should be used
167 // for other purposes.
168 lld::DWARFCache *getDwarf();
169
170 llvm::DWARFUnit *compileUnit = nullptr;
171 std::unique_ptr<lld::DWARFCache> dwarfCache;
172 Section *addrSigSection = nullptr;
173 const uint32_t modTime;
174 bool forceHidden;
175 std::vector<ConcatInputSection *> debugSections;
176 std::vector<CallGraphEntry> callGraph;
177 llvm::DenseMap<ConcatInputSection *, FDE> fdes;
178 std::vector<OptimizationHint> optimizationHints;
179
180private:
181 llvm::once_flag initDwarf;
182 template <class LP> void parseLazy();
183 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
184 template <class LP>
185 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
186 ArrayRef<typename LP::nlist> nList, const char *strtab,
187 bool subsectionsViaSymbols);
188 template <class NList>
189 Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
190 template <class SectionHeader>
191 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
192 const SectionHeader &, Section &);
193 void parseDebugInfo();
194 void parseOptimizationHints(ArrayRef<uint8_t> data);
195 void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
196 void registerCompactUnwind(Section &compactUnwindSection);
197 void registerEhFrames(Section &ehFrameSection);
198};
199
200// command-line -sectcreate file
201class OpaqueFile final : public InputFile {
202public:
203 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
204 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
205};
206
207// .dylib or .tbd file
208class DylibFile final : public InputFile {
209public:
210 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
211 // symbols in those sub-libraries will be available under the umbrella
212 // library's namespace. Those sub-libraries can also have their own
213 // re-exports. When loading a re-exported dylib, `umbrella` should be set to
214 // the root dylib to ensure symbols in the child library are correctly bound
215 // to the root. On the other hand, if a dylib is being directly loaded
216 // (through an -lfoo flag), then `umbrella` should be a nullptr.
217 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
218 bool isBundleLoader, bool explicitlyLinked);
219 explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
220 DylibFile *umbrella, bool isBundleLoader,
221 bool explicitlyLinked);
222 explicit DylibFile(DylibFile *umbrella);
223
224 void parseLoadCommands(MemoryBufferRef mb);
225 void parseReexports(const llvm::MachO::InterfaceFile &interface);
226 bool isReferenced() const { return numReferencedSymbols > 0; }
227 bool isExplicitlyLinked() const;
228 void setExplicitlyLinked() { explicitlyLinked = true; }
229
230 static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
231
232 StringRef installName;
233 DylibFile *exportingFile = nullptr;
234 DylibFile *umbrella;
235 SmallVector<StringRef, 2> rpaths;
236 uint32_t compatibilityVersion = 0;
237 uint32_t currentVersion = 0;
238 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
239 unsigned numReferencedSymbols = 0;
240 RefState refState;
241 bool reexport = false;
242 bool forceNeeded = false;
243 bool forceWeakImport = false;
244 bool deadStrippable = false;
245
246private:
247 bool explicitlyLinked = false; // Access via isExplicitlyLinked().
248
249public:
250 // An executable can be used as a bundle loader that will load the output
251 // file being linked, and that contains symbols referenced, but not
252 // implemented in the bundle. When used like this, it is very similar
253 // to a dylib, so we've used the same class to represent it.
254 bool isBundleLoader;
255
256 // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
257 // Usually empty. These synthetic dylibs won't have synthetic dylibs
258 // themselves.
259 SmallVector<DylibFile *, 2> extraDylibs;
260
261private:
262 DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
263 uint32_t compatVersion);
264
265 bool handleLDSymbol(StringRef originalName);
266 void handleLDPreviousSymbol(StringRef name, StringRef originalName);
267 void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
268 void handleLDHideSymbol(StringRef name, StringRef originalName);
269 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
270 void parseExportedSymbols(uint32_t offset, uint32_t size);
271
272 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
273};
274
275// .a file
276class ArchiveFile final : public InputFile {
277public:
278 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
279 bool forceHidden);
280 void addLazySymbols();
281 void fetch(const llvm::object::Archive::Symbol &);
282 // LLD normally doesn't use Error for error-handling, but the underlying
283 // Archive library does, so this is the cleanest way to wrap it.
284 Error fetch(const llvm::object::Archive::Child &, StringRef reason);
285 const llvm::object::Archive &getArchive() const { return *file; };
286 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
287
288private:
289 std::unique_ptr<llvm::object::Archive> file;
290 // Keep track of children fetched from the archive by tracking
291 // which address offsets have been fetched already.
292 llvm::DenseSet<uint64_t> seen;
293 // Load all symbols with hidden visibility (-load_hidden).
294 bool forceHidden;
295};
296
297class BitcodeFile final : public InputFile {
298public:
299 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
300 uint64_t offsetInArchive, bool lazy = false,
301 bool forceHidden = false);
302 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
303 void parse();
304
305 std::unique_ptr<llvm::lto::InputFile> obj;
306 bool forceHidden;
307
308private:
309 void parseLazy();
310};
311
312extern llvm::SetVector<InputFile *> inputFiles;
313extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
314
315llvm::Optional<MemoryBufferRef> readFile(StringRef path);
316
317void extract(InputFile &file, StringRef reason);
318
319namespace detail {
320
321template <class CommandType, class... Types>
322std::vector<const CommandType *>
323findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
324 std::vector<const CommandType *> cmds;
325 std::initializer_list<uint32_t> typesList{types...};
326 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
327 const uint8_t *p =
328 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
329 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
330 auto *cmd = reinterpret_cast<const CommandType *>(p);
331 if (llvm::is_contained(typesList, cmd->cmd)) {
332 cmds.push_back(cmd);
333 if (cmds.size() == maxCommands)
334 return cmds;
335 }
336 p += cmd->cmdsize;
337 }
338 return cmds;
339}
340
341} // namespace detail
342
343// anyHdr should be a pointer to either mach_header or mach_header_64
344template <class CommandType = llvm::MachO::load_command, class... Types>
345const CommandType *findCommand(const void *anyHdr, Types... types) {
346 std::vector<const CommandType *> cmds =
347 detail::findCommands<CommandType>(anyHdr, 1, types...);
348 return cmds.size() ? cmds[0] : nullptr;
349}
350
351template <class CommandType = llvm::MachO::load_command, class... Types>
352std::vector<const CommandType *> findCommands(const void *anyHdr,
353 Types... types) {
354 return detail::findCommands<CommandType>(anyHdr, 0, types...);
355}
356
357} // namespace macho
358
359std::string toString(const macho::InputFile *file);
360std::string toString(const macho::Section &);
361} // namespace lld
362
363#endif
364

source code of lld/MachO/InputFiles.h