1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_COFF_INPUT_FILES_H
10#define LLD_COFF_INPUT_FILES_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/StringSet.h"
18#include "llvm/BinaryFormat/Magic.h"
19#include "llvm/Object/Archive.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Support/StringSaver.h"
22#include <memory>
23#include <set>
24#include <vector>
25
26namespace llvm {
27struct DILineInfo;
28namespace pdb {
29class DbiModuleDescriptorBuilder;
30class NativeSession;
31}
32namespace lto {
33class InputFile;
34}
35}
36
37namespace lld {
38class DWARFCache;
39
40namespace coff {
41class COFFLinkerContext;
42
43std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
44
45using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
46using llvm::COFF::MachineTypes;
47using llvm::object::Archive;
48using llvm::object::COFFObjectFile;
49using llvm::object::COFFSymbolRef;
50using llvm::object::coff_import_header;
51using llvm::object::coff_section;
52
53class Chunk;
54class Defined;
55class DefinedImportData;
56class DefinedImportThunk;
57class DefinedRegular;
58class SectionChunk;
59class Symbol;
60class Undefined;
61class TpiSource;
62
63// The root class of input files.
64class InputFile {
65public:
66 enum Kind {
67 ArchiveKind,
68 ObjectKind,
69 LazyObjectKind,
70 PDBKind,
71 ImportKind,
72 BitcodeKind,
73 DLLKind
74 };
75 Kind kind() const { return fileKind; }
76 virtual ~InputFile() {}
77
78 // Returns the filename.
79 StringRef getName() const { return mb.getBufferIdentifier(); }
80
81 // Reads a file (the constructor doesn't do that).
82 virtual void parse() = 0;
83
84 // Returns the CPU type this file was compiled to.
85 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
86
87 MemoryBufferRef mb;
88
89 // An archive file name if this file is created from an archive.
90 StringRef parentName;
91
92 // Returns .drectve section contents if exist.
93 StringRef getDirectives() { return directives; }
94
95 COFFLinkerContext &ctx;
96
97protected:
98 InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false)
99 : mb(m), ctx(c), fileKind(k), lazy(lazy) {}
100
101 StringRef directives;
102
103private:
104 const Kind fileKind;
105
106public:
107 // True if this is a lazy ObjFile or BitcodeFile.
108 bool lazy = false;
109};
110
111// .lib or .a file.
112class ArchiveFile : public InputFile {
113public:
114 explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
115 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
116 void parse() override;
117
118 // Enqueues an archive member load for the given symbol. If we've already
119 // enqueued a load for the same archive member, this function does nothing,
120 // which ensures that we don't load the same member more than once.
121 void addMember(const Archive::Symbol &sym);
122
123private:
124 std::unique_ptr<Archive> file;
125 llvm::DenseSet<uint64_t> seen;
126};
127
128// .obj or .o file. This may be a member of an archive file.
129class ObjFile : public InputFile {
130public:
131 explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false)
132 : InputFile(ctx, ObjectKind, m, lazy) {}
133 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
134 void parse() override;
135 void parseLazy();
136 MachineTypes getMachineType() override;
137 ArrayRef<Chunk *> getChunks() { return chunks; }
138 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
139 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
140 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
141 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
142 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
143 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
144 ArrayRef<Symbol *> getSymbols() { return symbols; }
145
146 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
147
148 ArrayRef<uint8_t> getDebugSection(StringRef secName);
149
150 // Returns a Symbol object for the symbolIndex'th symbol in the
151 // underlying object file.
152 Symbol *getSymbol(uint32_t symbolIndex) {
153 return symbols[symbolIndex];
154 }
155
156 // Returns the underlying COFF file.
157 COFFObjectFile *getCOFFObj() { return coffObj.get(); }
158
159 // Add a symbol for a range extension thunk. Return the new symbol table
160 // index. This index can be used to modify a relocation.
161 uint32_t addRangeThunkSymbol(Symbol *thunk) {
162 symbols.push_back(x: thunk);
163 return symbols.size() - 1;
164 }
165
166 void includeResourceChunks();
167
168 bool isResourceObjFile() const { return !resourceChunks.empty(); }
169
170 // Flags in the absolute @feat.00 symbol if it is present. These usually
171 // indicate if an object was compiled with certain security features enabled
172 // like stack guard, safeseh, /guard:cf, or other things.
173 uint32_t feat00Flags = 0;
174
175 // True if this object file is compatible with SEH. COFF-specific and
176 // x86-only. COFF spec 5.10.1. The .sxdata section.
177 bool hasSafeSEH() { return feat00Flags & 0x1; }
178
179 // True if this file was compiled with /guard:cf.
180 bool hasGuardCF() { return feat00Flags & 0x4800; }
181
182 // Pointer to the PDB module descriptor builder. Various debug info records
183 // will reference object files by "module index", which is here. Things like
184 // source files and section contributions are also recorded here. Will be null
185 // if we are not producing a PDB.
186 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
187
188 const coff_section *addrsigSec = nullptr;
189
190 const coff_section *callgraphSec = nullptr;
191
192 // When using Microsoft precompiled headers, this is the PCH's key.
193 // The same key is used by both the precompiled object, and objects using the
194 // precompiled object. Any difference indicates out-of-date objects.
195 std::optional<uint32_t> pchSignature;
196
197 // Whether this file was compiled with /hotpatch.
198 bool hotPatchable = false;
199
200 // Whether the object was already merged into the final PDB.
201 bool mergedIntoPDB = false;
202
203 // If the OBJ has a .debug$T stream, this tells how it will be handled.
204 TpiSource *debugTypesObj = nullptr;
205
206 // The .debug$P or .debug$T section data if present. Empty otherwise.
207 ArrayRef<uint8_t> debugTypes;
208
209 std::optional<std::pair<StringRef, uint32_t>>
210 getVariableLocation(StringRef var);
211
212 std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
213 uint32_t sectionIndex);
214
215private:
216 const coff_section* getSection(uint32_t i);
217 const coff_section *getSection(COFFSymbolRef sym) {
218 return getSection(i: sym.getSectionNumber());
219 }
220
221 void enqueuePdbFile(StringRef path, ObjFile *fromFile);
222
223 void initializeChunks();
224 void initializeSymbols();
225 void initializeFlags();
226 void initializeDependencies();
227
228 SectionChunk *
229 readSection(uint32_t sectionNumber,
230 const llvm::object::coff_aux_section_definition *def,
231 StringRef leaderName);
232
233 void readAssociativeDefinition(
234 COFFSymbolRef coffSym,
235 const llvm::object::coff_aux_section_definition *def);
236
237 void readAssociativeDefinition(
238 COFFSymbolRef coffSym,
239 const llvm::object::coff_aux_section_definition *def,
240 uint32_t parentSection);
241
242 void recordPrevailingSymbolForMingw(
243 COFFSymbolRef coffSym,
244 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
245
246 void maybeAssociateSEHForMingw(
247 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
248 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
249
250 // Given a new symbol Sym with comdat selection Selection, if the new
251 // symbol is not (yet) Prevailing and the existing comdat leader set to
252 // Leader, emits a diagnostic if the new symbol and its selection doesn't
253 // match the existing symbol and its selection. If either old or new
254 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
255 // the existing leader. In that case, Prevailing is set to true.
256 void
257 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
258 bool &prevailing, DefinedRegular *leader,
259 const llvm::object::coff_aux_section_definition *def);
260
261 std::optional<Symbol *>
262 createDefined(COFFSymbolRef sym,
263 std::vector<const llvm::object::coff_aux_section_definition *>
264 &comdatDefs,
265 bool &prevailingComdat);
266 Symbol *createRegular(COFFSymbolRef sym);
267 Symbol *createUndefined(COFFSymbolRef sym);
268
269 std::unique_ptr<COFFObjectFile> coffObj;
270
271 // List of all chunks defined by this file. This includes both section
272 // chunks and non-section chunks for common symbols.
273 std::vector<Chunk *> chunks;
274
275 std::vector<SectionChunk *> resourceChunks;
276
277 // CodeView debug info sections.
278 std::vector<SectionChunk *> debugChunks;
279
280 // Chunks containing symbol table indices of exception handlers. Only used for
281 // 32-bit x86.
282 std::vector<SectionChunk *> sxDataChunks;
283
284 // Chunks containing symbol table indices of address taken symbols, address
285 // taken IAT entries, longjmp and ehcont targets. These are not linked into
286 // the final binary when /guard:cf is set.
287 std::vector<SectionChunk *> guardFidChunks;
288 std::vector<SectionChunk *> guardIATChunks;
289 std::vector<SectionChunk *> guardLJmpChunks;
290 std::vector<SectionChunk *> guardEHContChunks;
291
292 // This vector contains a list of all symbols defined or referenced by this
293 // file. They are indexed such that you can get a Symbol by symbol
294 // index. Nonexistent indices (which are occupied by auxiliary
295 // symbols in the real symbol table) are filled with null pointers.
296 std::vector<Symbol *> symbols;
297
298 // This vector contains the same chunks as Chunks, but they are
299 // indexed such that you can get a SectionChunk by section index.
300 // Nonexistent section indices are filled with null pointers.
301 // (Because section number is 1-based, the first slot is always a
302 // null pointer.) This vector is only valid during initialization.
303 std::vector<SectionChunk *> sparseChunks;
304
305 DWARFCache *dwarf = nullptr;
306};
307
308// This is a PDB type server dependency, that is not a input file per se, but
309// needs to be treated like one. Such files are discovered from the debug type
310// stream.
311class PDBInputFile : public InputFile {
312public:
313 explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
314 ~PDBInputFile();
315 static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
316 void parse() override;
317
318 static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
319 StringRef path, ObjFile *fromFile);
320
321 // Record possible errors while opening the PDB file
322 std::optional<std::string> loadErrorStr;
323
324 // This is the actual interface to the PDB (if it was opened successfully)
325 std::unique_ptr<llvm::pdb::NativeSession> session;
326
327 // If the PDB has a .debug$T stream, this tells how it will be handled.
328 TpiSource *debugTypesObj = nullptr;
329};
330
331// This type represents import library members that contain DLL names
332// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
333// for details about the format.
334class ImportFile : public InputFile {
335public:
336 explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m);
337
338 static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
339
340 Symbol *impSym = nullptr;
341 Symbol *thunkSym = nullptr;
342 std::string dllName;
343
344private:
345 void parse() override;
346
347public:
348 StringRef externalName;
349 const coff_import_header *hdr;
350 Chunk *location = nullptr;
351
352 // We want to eliminate dllimported symbols if no one actually refers to them.
353 // These "Live" bits are used to keep track of which import library members
354 // are actually in use.
355 //
356 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
357 // symbols provided by this import library member. We also track whether the
358 // imported symbol is used separately from whether the thunk is used in order
359 // to avoid creating unnecessary thunks.
360 bool live;
361 bool thunkLive;
362};
363
364// Used for LTO.
365class BitcodeFile : public InputFile {
366public:
367 explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
368 StringRef archiveName, uint64_t offsetInArchive,
369 bool lazy);
370 ~BitcodeFile();
371 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
372 ArrayRef<Symbol *> getSymbols() { return symbols; }
373 MachineTypes getMachineType() override;
374 void parseLazy();
375 std::unique_ptr<llvm::lto::InputFile> obj;
376
377private:
378 void parse() override;
379
380 std::vector<Symbol *> symbols;
381};
382
383// .dll file. MinGW only.
384class DLLFile : public InputFile {
385public:
386 explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m)
387 : InputFile(ctx, DLLKind, m) {}
388 static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
389 void parse() override;
390 MachineTypes getMachineType() override;
391
392 struct Symbol {
393 StringRef dllName;
394 StringRef symbolName;
395 llvm::COFF::ImportNameType nameType;
396 llvm::COFF::ImportType importType;
397 };
398
399 void makeImport(Symbol *s);
400
401private:
402 std::unique_ptr<COFFObjectFile> coffObj;
403 llvm::StringSet<> seen;
404};
405
406inline bool isBitcode(MemoryBufferRef mb) {
407 return identify_magic(magic: mb.getBuffer()) == llvm::file_magic::bitcode;
408}
409
410std::string replaceThinLTOSuffix(StringRef path, StringRef suffix,
411 StringRef repl);
412} // namespace coff
413
414std::string toString(const coff::InputFile *file);
415} // namespace lld
416
417#endif
418

source code of lld/COFF/InputFiles.h