1 | //===- Writer.cpp ---------------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Writer.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "InputSection.h" |
14 | #include "MapFile.h" |
15 | #include "OutputSection.h" |
16 | #include "OutputSegment.h" |
17 | #include "SectionPriorities.h" |
18 | #include "SymbolTable.h" |
19 | #include "Symbols.h" |
20 | #include "SyntheticSections.h" |
21 | #include "Target.h" |
22 | #include "UnwindInfoSection.h" |
23 | |
24 | #include "lld/Common/Arrays.h" |
25 | #include "lld/Common/CommonLinkerContext.h" |
26 | #include "llvm/BinaryFormat/MachO.h" |
27 | #include "llvm/Config/llvm-config.h" |
28 | #include "llvm/Support/Parallel.h" |
29 | #include "llvm/Support/Path.h" |
30 | #include "llvm/Support/TimeProfiler.h" |
31 | #include "llvm/Support/thread.h" |
32 | #include "llvm/Support/xxhash.h" |
33 | |
34 | #include <algorithm> |
35 | |
36 | using namespace llvm; |
37 | using namespace llvm::MachO; |
38 | using namespace llvm::sys; |
39 | using namespace lld; |
40 | using namespace lld::macho; |
41 | |
42 | namespace { |
43 | class LCUuid; |
44 | |
45 | class Writer { |
46 | public: |
47 | Writer() : buffer(errorHandler().outputBuffer) {} |
48 | |
49 | void treatSpecialUndefineds(); |
50 | void scanRelocations(); |
51 | void scanSymbols(); |
52 | template <class LP> void createOutputSections(); |
53 | template <class LP> void createLoadCommands(); |
54 | void finalizeAddresses(); |
55 | void finalizeLinkEditSegment(); |
56 | void assignAddresses(OutputSegment *); |
57 | |
58 | void openFile(); |
59 | void writeSections(); |
60 | void applyOptimizationHints(); |
61 | void buildFixupChains(); |
62 | void writeUuid(); |
63 | void writeCodeSignature(); |
64 | void writeOutputFile(); |
65 | |
66 | template <class LP> void run(); |
67 | |
68 | std::unique_ptr<FileOutputBuffer> &buffer; |
69 | uint64_t addr = 0; |
70 | uint64_t fileOff = 0; |
71 | MachHeaderSection *header = nullptr; |
72 | StringTableSection *stringTableSection = nullptr; |
73 | SymtabSection *symtabSection = nullptr; |
74 | IndirectSymtabSection *indirectSymtabSection = nullptr; |
75 | CodeSignatureSection *codeSignatureSection = nullptr; |
76 | DataInCodeSection *dataInCodeSection = nullptr; |
77 | FunctionStartsSection *functionStartsSection = nullptr; |
78 | |
79 | LCUuid *uuidCommand = nullptr; |
80 | OutputSegment *linkEditSegment = nullptr; |
81 | }; |
82 | |
83 | // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. |
84 | class LCDyldInfo final : public LoadCommand { |
85 | public: |
86 | LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, |
87 | WeakBindingSection *weakBindingSection, |
88 | LazyBindingSection *lazyBindingSection, |
89 | ExportSection *exportSection) |
90 | : rebaseSection(rebaseSection), bindingSection(bindingSection), |
91 | weakBindingSection(weakBindingSection), |
92 | lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} |
93 | |
94 | uint32_t getSize() const override { return sizeof(dyld_info_command); } |
95 | |
96 | void writeTo(uint8_t *buf) const override { |
97 | auto *c = reinterpret_cast<dyld_info_command *>(buf); |
98 | c->cmd = LC_DYLD_INFO_ONLY; |
99 | c->cmdsize = getSize(); |
100 | if (rebaseSection->isNeeded()) { |
101 | c->rebase_off = rebaseSection->fileOff; |
102 | c->rebase_size = rebaseSection->getFileSize(); |
103 | } |
104 | if (bindingSection->isNeeded()) { |
105 | c->bind_off = bindingSection->fileOff; |
106 | c->bind_size = bindingSection->getFileSize(); |
107 | } |
108 | if (weakBindingSection->isNeeded()) { |
109 | c->weak_bind_off = weakBindingSection->fileOff; |
110 | c->weak_bind_size = weakBindingSection->getFileSize(); |
111 | } |
112 | if (lazyBindingSection->isNeeded()) { |
113 | c->lazy_bind_off = lazyBindingSection->fileOff; |
114 | c->lazy_bind_size = lazyBindingSection->getFileSize(); |
115 | } |
116 | if (exportSection->isNeeded()) { |
117 | c->export_off = exportSection->fileOff; |
118 | c->export_size = exportSection->getFileSize(); |
119 | } |
120 | } |
121 | |
122 | RebaseSection *rebaseSection; |
123 | BindingSection *bindingSection; |
124 | WeakBindingSection *weakBindingSection; |
125 | LazyBindingSection *lazyBindingSection; |
126 | ExportSection *exportSection; |
127 | }; |
128 | |
129 | class LCSubFramework final : public LoadCommand { |
130 | public: |
131 | LCSubFramework(StringRef umbrella) : umbrella(umbrella) {} |
132 | |
133 | uint32_t getSize() const override { |
134 | return alignToPowerOf2(Value: sizeof(sub_framework_command) + umbrella.size() + 1, |
135 | Align: target->wordSize); |
136 | } |
137 | |
138 | void writeTo(uint8_t *buf) const override { |
139 | auto *c = reinterpret_cast<sub_framework_command *>(buf); |
140 | buf += sizeof(sub_framework_command); |
141 | |
142 | c->cmd = LC_SUB_FRAMEWORK; |
143 | c->cmdsize = getSize(); |
144 | c->umbrella = sizeof(sub_framework_command); |
145 | |
146 | memcpy(dest: buf, src: umbrella.data(), n: umbrella.size()); |
147 | buf[umbrella.size()] = '\0'; |
148 | } |
149 | |
150 | private: |
151 | const StringRef umbrella; |
152 | }; |
153 | |
154 | class LCFunctionStarts final : public LoadCommand { |
155 | public: |
156 | explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) |
157 | : functionStartsSection(functionStartsSection) {} |
158 | |
159 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
160 | |
161 | void writeTo(uint8_t *buf) const override { |
162 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
163 | c->cmd = LC_FUNCTION_STARTS; |
164 | c->cmdsize = getSize(); |
165 | c->dataoff = functionStartsSection->fileOff; |
166 | c->datasize = functionStartsSection->getFileSize(); |
167 | } |
168 | |
169 | private: |
170 | FunctionStartsSection *functionStartsSection; |
171 | }; |
172 | |
173 | class LCDataInCode final : public LoadCommand { |
174 | public: |
175 | explicit LCDataInCode(DataInCodeSection *dataInCodeSection) |
176 | : dataInCodeSection(dataInCodeSection) {} |
177 | |
178 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
179 | |
180 | void writeTo(uint8_t *buf) const override { |
181 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
182 | c->cmd = LC_DATA_IN_CODE; |
183 | c->cmdsize = getSize(); |
184 | c->dataoff = dataInCodeSection->fileOff; |
185 | c->datasize = dataInCodeSection->getFileSize(); |
186 | } |
187 | |
188 | private: |
189 | DataInCodeSection *dataInCodeSection; |
190 | }; |
191 | |
192 | class LCDysymtab final : public LoadCommand { |
193 | public: |
194 | LCDysymtab(SymtabSection *symtabSection, |
195 | IndirectSymtabSection *indirectSymtabSection) |
196 | : symtabSection(symtabSection), |
197 | indirectSymtabSection(indirectSymtabSection) {} |
198 | |
199 | uint32_t getSize() const override { return sizeof(dysymtab_command); } |
200 | |
201 | void writeTo(uint8_t *buf) const override { |
202 | auto *c = reinterpret_cast<dysymtab_command *>(buf); |
203 | c->cmd = LC_DYSYMTAB; |
204 | c->cmdsize = getSize(); |
205 | |
206 | c->ilocalsym = 0; |
207 | c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); |
208 | c->nextdefsym = symtabSection->getNumExternalSymbols(); |
209 | c->iundefsym = c->iextdefsym + c->nextdefsym; |
210 | c->nundefsym = symtabSection->getNumUndefinedSymbols(); |
211 | |
212 | c->indirectsymoff = indirectSymtabSection->fileOff; |
213 | c->nindirectsyms = indirectSymtabSection->getNumSymbols(); |
214 | } |
215 | |
216 | SymtabSection *symtabSection; |
217 | IndirectSymtabSection *indirectSymtabSection; |
218 | }; |
219 | |
220 | template <class LP> class LCSegment final : public LoadCommand { |
221 | public: |
222 | LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} |
223 | |
224 | uint32_t getSize() const override { |
225 | return sizeof(typename LP::segment_command) + |
226 | seg->numNonHiddenSections() * sizeof(typename LP::section); |
227 | } |
228 | |
229 | void writeTo(uint8_t *buf) const override { |
230 | using SegmentCommand = typename LP::segment_command; |
231 | using SectionHeader = typename LP::section; |
232 | |
233 | auto *c = reinterpret_cast<SegmentCommand *>(buf); |
234 | buf += sizeof(SegmentCommand); |
235 | |
236 | c->cmd = LP::segmentLCType; |
237 | c->cmdsize = getSize(); |
238 | memcpy(c->segname, name.data(), name.size()); |
239 | c->fileoff = seg->fileOff; |
240 | c->maxprot = seg->maxProt; |
241 | c->initprot = seg->initProt; |
242 | |
243 | c->vmaddr = seg->addr; |
244 | c->vmsize = seg->vmSize; |
245 | c->filesize = seg->fileSize; |
246 | c->nsects = seg->numNonHiddenSections(); |
247 | c->flags = seg->flags; |
248 | |
249 | for (const OutputSection *osec : seg->getSections()) { |
250 | if (osec->isHidden()) |
251 | continue; |
252 | |
253 | auto *sectHdr = reinterpret_cast<SectionHeader *>(buf); |
254 | buf += sizeof(SectionHeader); |
255 | |
256 | memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); |
257 | memcpy(sectHdr->segname, name.data(), name.size()); |
258 | |
259 | sectHdr->addr = osec->addr; |
260 | sectHdr->offset = osec->fileOff; |
261 | sectHdr->align = Log2_32(Value: osec->align); |
262 | sectHdr->flags = osec->flags; |
263 | sectHdr->size = osec->getSize(); |
264 | sectHdr->reserved1 = osec->reserved1; |
265 | sectHdr->reserved2 = osec->reserved2; |
266 | } |
267 | } |
268 | |
269 | private: |
270 | StringRef name; |
271 | OutputSegment *seg; |
272 | }; |
273 | |
274 | class LCMain final : public LoadCommand { |
275 | uint32_t getSize() const override { |
276 | return sizeof(structs::entry_point_command); |
277 | } |
278 | |
279 | void writeTo(uint8_t *buf) const override { |
280 | auto *c = reinterpret_cast<structs::entry_point_command *>(buf); |
281 | c->cmd = LC_MAIN; |
282 | c->cmdsize = getSize(); |
283 | |
284 | if (config->entry->isInStubs()) |
285 | c->entryoff = |
286 | in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; |
287 | else |
288 | c->entryoff = config->entry->getVA() - in.header->addr; |
289 | |
290 | c->stacksize = 0; |
291 | } |
292 | }; |
293 | |
294 | class LCSymtab final : public LoadCommand { |
295 | public: |
296 | LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) |
297 | : symtabSection(symtabSection), stringTableSection(stringTableSection) {} |
298 | |
299 | uint32_t getSize() const override { return sizeof(symtab_command); } |
300 | |
301 | void writeTo(uint8_t *buf) const override { |
302 | auto *c = reinterpret_cast<symtab_command *>(buf); |
303 | c->cmd = LC_SYMTAB; |
304 | c->cmdsize = getSize(); |
305 | c->symoff = symtabSection->fileOff; |
306 | c->nsyms = symtabSection->getNumSymbols(); |
307 | c->stroff = stringTableSection->fileOff; |
308 | c->strsize = stringTableSection->getFileSize(); |
309 | } |
310 | |
311 | SymtabSection *symtabSection = nullptr; |
312 | StringTableSection *stringTableSection = nullptr; |
313 | }; |
314 | |
315 | // There are several dylib load commands that share the same structure: |
316 | // * LC_LOAD_DYLIB |
317 | // * LC_ID_DYLIB |
318 | // * LC_REEXPORT_DYLIB |
319 | class LCDylib final : public LoadCommand { |
320 | public: |
321 | LCDylib(LoadCommandType type, StringRef path, |
322 | uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) |
323 | : type(type), path(path), compatibilityVersion(compatibilityVersion), |
324 | currentVersion(currentVersion) { |
325 | instanceCount++; |
326 | } |
327 | |
328 | uint32_t getSize() const override { |
329 | return alignToPowerOf2(Value: sizeof(dylib_command) + path.size() + 1, |
330 | Align: target->wordSize); |
331 | } |
332 | |
333 | void writeTo(uint8_t *buf) const override { |
334 | auto *c = reinterpret_cast<dylib_command *>(buf); |
335 | buf += sizeof(dylib_command); |
336 | |
337 | c->cmd = type; |
338 | c->cmdsize = getSize(); |
339 | c->dylib.name = sizeof(dylib_command); |
340 | c->dylib.timestamp = 0; |
341 | c->dylib.compatibility_version = compatibilityVersion; |
342 | c->dylib.current_version = currentVersion; |
343 | |
344 | memcpy(dest: buf, src: path.data(), n: path.size()); |
345 | buf[path.size()] = '\0'; |
346 | } |
347 | |
348 | static uint32_t getInstanceCount() { return instanceCount; } |
349 | static void resetInstanceCount() { instanceCount = 0; } |
350 | |
351 | private: |
352 | LoadCommandType type; |
353 | StringRef path; |
354 | uint32_t compatibilityVersion; |
355 | uint32_t currentVersion; |
356 | static uint32_t instanceCount; |
357 | }; |
358 | |
359 | uint32_t LCDylib::instanceCount = 0; |
360 | |
361 | class LCLoadDylinker final : public LoadCommand { |
362 | public: |
363 | uint32_t getSize() const override { |
364 | return alignToPowerOf2(Value: sizeof(dylinker_command) + path.size() + 1, |
365 | Align: target->wordSize); |
366 | } |
367 | |
368 | void writeTo(uint8_t *buf) const override { |
369 | auto *c = reinterpret_cast<dylinker_command *>(buf); |
370 | buf += sizeof(dylinker_command); |
371 | |
372 | c->cmd = LC_LOAD_DYLINKER; |
373 | c->cmdsize = getSize(); |
374 | c->name = sizeof(dylinker_command); |
375 | |
376 | memcpy(dest: buf, src: path.data(), n: path.size()); |
377 | buf[path.size()] = '\0'; |
378 | } |
379 | |
380 | private: |
381 | // Recent versions of Darwin won't run any binary that has dyld at a |
382 | // different location. |
383 | const StringRef path = "/usr/lib/dyld"; |
384 | }; |
385 | |
386 | class LCRPath final : public LoadCommand { |
387 | public: |
388 | explicit LCRPath(StringRef path) : path(path) {} |
389 | |
390 | uint32_t getSize() const override { |
391 | return alignToPowerOf2(Value: sizeof(rpath_command) + path.size() + 1, |
392 | Align: target->wordSize); |
393 | } |
394 | |
395 | void writeTo(uint8_t *buf) const override { |
396 | auto *c = reinterpret_cast<rpath_command *>(buf); |
397 | buf += sizeof(rpath_command); |
398 | |
399 | c->cmd = LC_RPATH; |
400 | c->cmdsize = getSize(); |
401 | c->path = sizeof(rpath_command); |
402 | |
403 | memcpy(dest: buf, src: path.data(), n: path.size()); |
404 | buf[path.size()] = '\0'; |
405 | } |
406 | |
407 | private: |
408 | StringRef path; |
409 | }; |
410 | |
411 | class LCSubClient final : public LoadCommand { |
412 | public: |
413 | explicit LCSubClient(StringRef client) : client(client) {} |
414 | |
415 | uint32_t getSize() const override { |
416 | return alignToPowerOf2(Value: sizeof(sub_client_command) + client.size() + 1, |
417 | Align: target->wordSize); |
418 | } |
419 | |
420 | void writeTo(uint8_t *buf) const override { |
421 | auto *c = reinterpret_cast<sub_client_command *>(buf); |
422 | buf += sizeof(sub_client_command); |
423 | |
424 | c->cmd = LC_SUB_CLIENT; |
425 | c->cmdsize = getSize(); |
426 | c->client = sizeof(sub_client_command); |
427 | |
428 | memcpy(dest: buf, src: client.data(), n: client.size()); |
429 | buf[client.size()] = '\0'; |
430 | } |
431 | |
432 | private: |
433 | StringRef client; |
434 | }; |
435 | |
436 | class LCDyldEnv final : public LoadCommand { |
437 | public: |
438 | explicit LCDyldEnv(StringRef name) : name(name) {} |
439 | |
440 | uint32_t getSize() const override { |
441 | return alignToPowerOf2(Value: sizeof(dyld_env_command) + name.size() + 1, |
442 | Align: target->wordSize); |
443 | } |
444 | |
445 | void writeTo(uint8_t *buf) const override { |
446 | auto *c = reinterpret_cast<dyld_env_command *>(buf); |
447 | buf += sizeof(dyld_env_command); |
448 | |
449 | c->cmd = LC_DYLD_ENVIRONMENT; |
450 | c->cmdsize = getSize(); |
451 | c->name = sizeof(dyld_env_command); |
452 | |
453 | memcpy(dest: buf, src: name.data(), n: name.size()); |
454 | buf[name.size()] = '\0'; |
455 | } |
456 | |
457 | private: |
458 | StringRef name; |
459 | }; |
460 | |
461 | class LCMinVersion final : public LoadCommand { |
462 | public: |
463 | explicit LCMinVersion(const PlatformInfo &platformInfo) |
464 | : platformInfo(platformInfo) {} |
465 | |
466 | uint32_t getSize() const override { return sizeof(version_min_command); } |
467 | |
468 | void writeTo(uint8_t *buf) const override { |
469 | auto *c = reinterpret_cast<version_min_command *>(buf); |
470 | switch (platformInfo.target.Platform) { |
471 | case PLATFORM_MACOS: |
472 | c->cmd = LC_VERSION_MIN_MACOSX; |
473 | break; |
474 | case PLATFORM_IOS: |
475 | case PLATFORM_IOSSIMULATOR: |
476 | c->cmd = LC_VERSION_MIN_IPHONEOS; |
477 | break; |
478 | case PLATFORM_TVOS: |
479 | case PLATFORM_TVOSSIMULATOR: |
480 | c->cmd = LC_VERSION_MIN_TVOS; |
481 | break; |
482 | case PLATFORM_WATCHOS: |
483 | case PLATFORM_WATCHOSSIMULATOR: |
484 | c->cmd = LC_VERSION_MIN_WATCHOS; |
485 | break; |
486 | default: |
487 | llvm_unreachable("invalid platform"); |
488 | break; |
489 | } |
490 | c->cmdsize = getSize(); |
491 | c->version = encodeVersion(version: platformInfo.target.MinDeployment); |
492 | c->sdk = encodeVersion(version: platformInfo.sdk); |
493 | } |
494 | |
495 | private: |
496 | const PlatformInfo &platformInfo; |
497 | }; |
498 | |
499 | class LCBuildVersion final : public LoadCommand { |
500 | public: |
501 | explicit LCBuildVersion(const PlatformInfo &platformInfo) |
502 | : platformInfo(platformInfo) {} |
503 | |
504 | const int ntools = 1; |
505 | |
506 | uint32_t getSize() const override { |
507 | return sizeof(build_version_command) + ntools * sizeof(build_tool_version); |
508 | } |
509 | |
510 | void writeTo(uint8_t *buf) const override { |
511 | auto *c = reinterpret_cast<build_version_command *>(buf); |
512 | c->cmd = LC_BUILD_VERSION; |
513 | c->cmdsize = getSize(); |
514 | |
515 | c->platform = static_cast<uint32_t>(platformInfo.target.Platform); |
516 | c->minos = encodeVersion(version: platformInfo.target.MinDeployment); |
517 | c->sdk = encodeVersion(version: platformInfo.sdk); |
518 | |
519 | c->ntools = ntools; |
520 | auto *t = reinterpret_cast<build_tool_version *>(&c[1]); |
521 | t->tool = TOOL_LLD; |
522 | t->version = encodeVersion(version: VersionTuple( |
523 | LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); |
524 | } |
525 | |
526 | private: |
527 | const PlatformInfo &platformInfo; |
528 | }; |
529 | |
530 | // Stores a unique identifier for the output file based on an MD5 hash of its |
531 | // contents. In order to hash the contents, we must first write them, but |
532 | // LC_UUID itself must be part of the written contents in order for all the |
533 | // offsets to be calculated correctly. We resolve this circular paradox by |
534 | // first writing an LC_UUID with an all-zero UUID, then updating the UUID with |
535 | // its real value later. |
536 | class LCUuid final : public LoadCommand { |
537 | public: |
538 | uint32_t getSize() const override { return sizeof(uuid_command); } |
539 | |
540 | void writeTo(uint8_t *buf) const override { |
541 | auto *c = reinterpret_cast<uuid_command *>(buf); |
542 | c->cmd = LC_UUID; |
543 | c->cmdsize = getSize(); |
544 | uuidBuf = c->uuid; |
545 | } |
546 | |
547 | void writeUuid(uint64_t digest) const { |
548 | // xxhash only gives us 8 bytes, so put some fixed data in the other half. |
549 | static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size"); |
550 | memcpy(dest: uuidBuf, src: "LLD\xa1UU1D", n: 8); |
551 | memcpy(dest: uuidBuf + 8, src: &digest, n: 8); |
552 | |
553 | // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in |
554 | // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't |
555 | // want to lose bits of the digest in byte 8, so swap that with a byte of |
556 | // fixed data that happens to have the right bits set. |
557 | std::swap(a&: uuidBuf[3], b&: uuidBuf[8]); |
558 | |
559 | // Claim that this is an MD5-based hash. It isn't, but this signals that |
560 | // this is not a time-based and not a random hash. MD5 seems like the least |
561 | // bad lie we can put here. |
562 | assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3"); |
563 | assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2"); |
564 | } |
565 | |
566 | mutable uint8_t *uuidBuf; |
567 | }; |
568 | |
569 | template <class LP> class LCEncryptionInfo final : public LoadCommand { |
570 | public: |
571 | uint32_t getSize() const override { |
572 | return sizeof(typename LP::encryption_info_command); |
573 | } |
574 | |
575 | void writeTo(uint8_t *buf) const override { |
576 | using EncryptionInfo = typename LP::encryption_info_command; |
577 | auto *c = reinterpret_cast<EncryptionInfo *>(buf); |
578 | buf += sizeof(EncryptionInfo); |
579 | c->cmd = LP::encryptionInfoLCType; |
580 | c->cmdsize = getSize(); |
581 | c->cryptoff = in.header->getSize(); |
582 | auto it = find_if(outputSegments, [](const OutputSegment *seg) { |
583 | return seg->name == segment_names::text; |
584 | }); |
585 | assert(it != outputSegments.end()); |
586 | c->cryptsize = (*it)->fileSize - c->cryptoff; |
587 | } |
588 | }; |
589 | |
590 | class LCCodeSignature final : public LoadCommand { |
591 | public: |
592 | LCCodeSignature(CodeSignatureSection *section) : section(section) {} |
593 | |
594 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
595 | |
596 | void writeTo(uint8_t *buf) const override { |
597 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
598 | c->cmd = LC_CODE_SIGNATURE; |
599 | c->cmdsize = getSize(); |
600 | c->dataoff = static_cast<uint32_t>(section->fileOff); |
601 | c->datasize = section->getSize(); |
602 | } |
603 | |
604 | CodeSignatureSection *section; |
605 | }; |
606 | |
607 | class LCExportsTrie final : public LoadCommand { |
608 | public: |
609 | LCExportsTrie(ExportSection *section) : section(section) {} |
610 | |
611 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
612 | |
613 | void writeTo(uint8_t *buf) const override { |
614 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
615 | c->cmd = LC_DYLD_EXPORTS_TRIE; |
616 | c->cmdsize = getSize(); |
617 | c->dataoff = section->fileOff; |
618 | c->datasize = section->getSize(); |
619 | } |
620 | |
621 | ExportSection *section; |
622 | }; |
623 | |
624 | class LCChainedFixups final : public LoadCommand { |
625 | public: |
626 | LCChainedFixups(ChainedFixupsSection *section) : section(section) {} |
627 | |
628 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
629 | |
630 | void writeTo(uint8_t *buf) const override { |
631 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
632 | c->cmd = LC_DYLD_CHAINED_FIXUPS; |
633 | c->cmdsize = getSize(); |
634 | c->dataoff = section->fileOff; |
635 | c->datasize = section->getSize(); |
636 | } |
637 | |
638 | ChainedFixupsSection *section; |
639 | }; |
640 | |
641 | } // namespace |
642 | |
643 | void Writer::treatSpecialUndefineds() { |
644 | if (config->entry) |
645 | if (auto *undefined = dyn_cast<Undefined>(Val: config->entry)) |
646 | treatUndefinedSymbol(*undefined, source: "the entry point"); |
647 | |
648 | // FIXME: This prints symbols that are undefined both in input files and |
649 | // via -u flag twice. |
650 | for (const Symbol *sym : config->explicitUndefineds) { |
651 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
652 | treatUndefinedSymbol(*undefined, source: "-u"); |
653 | } |
654 | // Literal exported-symbol names must be defined, but glob |
655 | // patterns need not match. |
656 | for (const CachedHashStringRef &cachedName : |
657 | config->exportedSymbols.literals) { |
658 | if (const Symbol *sym = symtab->find(name: cachedName)) |
659 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
660 | treatUndefinedSymbol(*undefined, source: "-exported_symbol(s_list)"); |
661 | } |
662 | } |
663 | |
664 | static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, |
665 | const lld::macho::Reloc &r) { |
666 | if (!sym->isLive()) { |
667 | if (Defined *defined = dyn_cast<Defined>(Val: sym)) { |
668 | if (config->emitInitOffsets && |
669 | defined->isec()->getName() == section_names::moduleInitFunc) |
670 | fatal(msg: isec->getLocation(off: r.offset) + ": cannot reference "+ |
671 | sym->getName() + |
672 | " defined in __mod_init_func when -init_offsets is used"); |
673 | } |
674 | assert(false && "referenced symbol must be live"); |
675 | } |
676 | |
677 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type: r.type); |
678 | |
679 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) { |
680 | if (needsBinding(sym)) |
681 | in.stubs->addEntry(sym); |
682 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) { |
683 | if (relocAttrs.hasAttr(b: RelocAttrBits::POINTER) || needsBinding(sym)) |
684 | in.got->addEntry(sym); |
685 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) { |
686 | if (needsBinding(sym)) |
687 | in.tlvPointers->addEntry(sym); |
688 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED)) { |
689 | // References from thread-local variable sections are treated as offsets |
690 | // relative to the start of the referent section, and therefore have no |
691 | // need of rebase opcodes. |
692 | if (!(isThreadLocalVariables(flags: isec->getFlags()) && isa<Defined>(Val: sym))) |
693 | addNonLazyBindingEntries(sym, isec, offset: r.offset, addend: r.addend); |
694 | } |
695 | } |
696 | |
697 | void Writer::scanRelocations() { |
698 | TimeTraceScope timeScope("Scan relocations"); |
699 | |
700 | // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can |
701 | // add to inputSections, which invalidates inputSections's iterators. |
702 | for (size_t i = 0; i < inputSections.size(); ++i) { |
703 | ConcatInputSection *isec = inputSections[i]; |
704 | |
705 | if (isec->shouldOmitFromOutput()) |
706 | continue; |
707 | |
708 | for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { |
709 | lld::macho::Reloc &r = *it; |
710 | |
711 | // Canonicalize the referent so that later accesses in Writer won't |
712 | // have to worry about it. |
713 | if (auto *referentIsec = dyn_cast_if_present<InputSection *>(Val&: r.referent)) |
714 | r.referent = referentIsec->canonical(); |
715 | |
716 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
717 | // Skip over the following UNSIGNED relocation -- it's just there as the |
718 | // minuend, and doesn't have the usual UNSIGNED semantics. We don't want |
719 | // to emit rebase opcodes for it. |
720 | ++it; |
721 | // Canonicalize the referent so that later accesses in Writer won't |
722 | // have to worry about it. |
723 | if (auto *referentIsec = it->referent.dyn_cast<InputSection *>()) |
724 | it->referent = referentIsec->canonical(); |
725 | continue; |
726 | } |
727 | if (auto *sym = dyn_cast_if_present<Symbol *>(Val&: r.referent)) { |
728 | if (auto *undefined = dyn_cast<Undefined>(Val: sym)) |
729 | treatUndefinedSymbol(*undefined, isec, offset: r.offset); |
730 | // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. |
731 | if (!isa<Undefined>(Val: sym) && validateSymbolRelocation(sym, isec, r)) |
732 | prepareSymbolRelocation(sym, isec, r); |
733 | } else { |
734 | if (!r.pcrel) { |
735 | if (config->emitChainedFixups) |
736 | in.chainedFixups->addRebase(isec, offset: r.offset); |
737 | else |
738 | in.rebase->addEntry(isec, offset: r.offset); |
739 | } |
740 | } |
741 | } |
742 | } |
743 | |
744 | in.unwindInfo->prepare(); |
745 | } |
746 | |
747 | static void addNonWeakDefinition(const Defined *defined) { |
748 | if (config->emitChainedFixups) |
749 | in.chainedFixups->setHasNonWeakDefinition(); |
750 | else |
751 | in.weakBinding->addNonWeakDefinition(defined); |
752 | } |
753 | |
754 | void Writer::scanSymbols() { |
755 | TimeTraceScope timeScope("Scan symbols"); |
756 | ObjCSelRefsHelper::initialize(); |
757 | for (Symbol *sym : symtab->getSymbols()) { |
758 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
759 | if (!defined->isLive()) |
760 | continue; |
761 | if (defined->overridesWeakDef) |
762 | addNonWeakDefinition(defined); |
763 | if (!defined->isAbsolute() && isCodeSection(defined->isec())) |
764 | in.unwindInfo->addSymbol(defined); |
765 | } else if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) { |
766 | // This branch intentionally doesn't check isLive(). |
767 | if (dysym->isDynamicLookup()) |
768 | continue; |
769 | dysym->getFile()->refState = |
770 | std::max(a: dysym->getFile()->refState, b: dysym->getRefState()); |
771 | } else if (isa<Undefined>(Val: sym)) { |
772 | if (ObjCStubsSection::isObjCStubSymbol(sym)) { |
773 | // When -dead_strip is enabled, we don't want to emit any dead stubs. |
774 | // Although this stub symbol is yet undefined, addSym() was called |
775 | // during MarkLive. |
776 | if (config->deadStrip) { |
777 | if (!sym->isLive()) |
778 | continue; |
779 | } |
780 | in.objcStubs->addEntry(sym); |
781 | } |
782 | } |
783 | } |
784 | |
785 | for (const InputFile *file : inputFiles) { |
786 | if (auto *objFile = dyn_cast<ObjFile>(Val: file)) |
787 | for (Symbol *sym : objFile->symbols) { |
788 | if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) { |
789 | if (!defined->isLive()) |
790 | continue; |
791 | if (!defined->isExternal() && !defined->isAbsolute() && |
792 | isCodeSection(defined->isec())) |
793 | in.unwindInfo->addSymbol(defined); |
794 | } |
795 | } |
796 | } |
797 | } |
798 | |
799 | // TODO: ld64 enforces the old load commands in a few other cases. |
800 | static bool useLCBuildVersion(const PlatformInfo &platformInfo) { |
801 | static const std::array<std::pair<PlatformType, VersionTuple>, 7> minVersion = |
802 | {._M_elems: {{PLATFORM_MACOS, VersionTuple(10, 14)}, |
803 | {PLATFORM_IOS, VersionTuple(12, 0)}, |
804 | {PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)}, |
805 | {PLATFORM_TVOS, VersionTuple(12, 0)}, |
806 | {PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)}, |
807 | {PLATFORM_WATCHOS, VersionTuple(5, 0)}, |
808 | {PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}}}; |
809 | auto it = llvm::find_if(Range: minVersion, P: [&](const auto &p) { |
810 | return p.first == platformInfo.target.Platform; |
811 | }); |
812 | return it == minVersion.end() |
813 | ? true |
814 | : platformInfo.target.MinDeployment >= it->second; |
815 | } |
816 | |
817 | template <class LP> void Writer::createLoadCommands() { |
818 | uint8_t segIndex = 0; |
819 | for (OutputSegment *seg : outputSegments) { |
820 | in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); |
821 | seg->index = segIndex++; |
822 | } |
823 | |
824 | if (config->emitChainedFixups) { |
825 | in.header->addLoadCommand(make<LCChainedFixups>(args&: in.chainedFixups)); |
826 | in.header->addLoadCommand(make<LCExportsTrie>(args&: in.exports)); |
827 | } else { |
828 | in.header->addLoadCommand(make<LCDyldInfo>( |
829 | args&: in.rebase, args&: in.binding, args&: in.weakBinding, args&: in.lazyBinding, args&: in.exports)); |
830 | } |
831 | in.header->addLoadCommand(make<LCSymtab>(args&: symtabSection, args&: stringTableSection)); |
832 | in.header->addLoadCommand( |
833 | make<LCDysymtab>(args&: symtabSection, args&: indirectSymtabSection)); |
834 | if (!config->umbrella.empty()) |
835 | in.header->addLoadCommand(make<LCSubFramework>(args&: config->umbrella)); |
836 | if (config->emitEncryptionInfo) |
837 | in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); |
838 | for (StringRef path : config->runtimePaths) |
839 | in.header->addLoadCommand(make<LCRPath>(args&: path)); |
840 | |
841 | switch (config->outputType) { |
842 | case MH_EXECUTE: |
843 | in.header->addLoadCommand(make<LCLoadDylinker>()); |
844 | break; |
845 | case MH_DYLIB: |
846 | in.header->addLoadCommand(make<LCDylib>(args: LC_ID_DYLIB, args&: config->installName, |
847 | args&: config->dylibCompatibilityVersion, |
848 | args&: config->dylibCurrentVersion)); |
849 | for (StringRef client : config->allowableClients) |
850 | in.header->addLoadCommand(make<LCSubClient>(args&: client)); |
851 | break; |
852 | case MH_BUNDLE: |
853 | break; |
854 | default: |
855 | llvm_unreachable("unhandled output file type"); |
856 | } |
857 | |
858 | if (config->generateUuid) { |
859 | uuidCommand = make<LCUuid>(); |
860 | in.header->addLoadCommand(uuidCommand); |
861 | } |
862 | |
863 | if (useLCBuildVersion(platformInfo: config->platformInfo)) |
864 | in.header->addLoadCommand(make<LCBuildVersion>(args&: config->platformInfo)); |
865 | else |
866 | in.header->addLoadCommand(make<LCMinVersion>(args&: config->platformInfo)); |
867 | |
868 | if (config->secondaryPlatformInfo) { |
869 | in.header->addLoadCommand( |
870 | make<LCBuildVersion>(args&: *config->secondaryPlatformInfo)); |
871 | } |
872 | |
873 | // This is down here to match ld64's load command order. |
874 | if (config->outputType == MH_EXECUTE) |
875 | in.header->addLoadCommand(make<LCMain>()); |
876 | |
877 | // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding |
878 | // library ordinal computation code in ld64. |
879 | int64_t dylibOrdinal = 1; |
880 | DenseMap<StringRef, int64_t> ordinalForInstallName; |
881 | |
882 | std::vector<DylibFile *> dylibFiles; |
883 | for (InputFile *file : inputFiles) { |
884 | if (auto *dylibFile = dyn_cast<DylibFile>(Val: file)) |
885 | dylibFiles.push_back(x: dylibFile); |
886 | } |
887 | for (size_t i = 0; i < dylibFiles.size(); ++i) |
888 | dylibFiles.insert(position: dylibFiles.end(), first: dylibFiles[i]->extraDylibs.begin(), |
889 | last: dylibFiles[i]->extraDylibs.end()); |
890 | |
891 | for (DylibFile *dylibFile : dylibFiles) { |
892 | if (dylibFile->isBundleLoader) { |
893 | dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; |
894 | // Shortcut since bundle-loader does not re-export the symbols. |
895 | |
896 | dylibFile->reexport = false; |
897 | continue; |
898 | } |
899 | |
900 | // Don't emit load commands for a dylib that is not referenced if: |
901 | // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- |
902 | // if it's on the linker command line, it's explicit) |
903 | // - or it's marked MH_DEAD_STRIPPABLE_DYLIB |
904 | // - or the flag -dead_strip_dylibs is used |
905 | // FIXME: `isReferenced()` is currently computed before dead code |
906 | // stripping, so references from dead code keep a dylib alive. This |
907 | // matches ld64, but it's something we should do better. |
908 | if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && |
909 | (!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable || |
910 | config->deadStripDylibs)) |
911 | continue; |
912 | |
913 | // Several DylibFiles can have the same installName. Only emit a single |
914 | // load command for that installName and give all these DylibFiles the |
915 | // same ordinal. |
916 | // This can happen in several cases: |
917 | // - a new framework could change its installName to an older |
918 | // framework name via an $ld$ symbol depending on platform_version |
919 | // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; |
920 | // Foo.framework/Foo.tbd is usually a symlink to |
921 | // Foo.framework/Versions/Current/Foo.tbd, where |
922 | // Foo.framework/Versions/Current is usually a symlink to |
923 | // Foo.framework/Versions/A) |
924 | // - a framework can be linked both explicitly on the linker |
925 | // command line and implicitly as a reexport from a different |
926 | // framework. The re-export will usually point to the tbd file |
927 | // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will |
928 | // usually find Foo.framework/Foo.tbd. These are usually symlinks, |
929 | // but in a --reproduce archive they will be identical but distinct |
930 | // files. |
931 | // In the first case, *semantically distinct* DylibFiles will have the |
932 | // same installName. |
933 | int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; |
934 | if (ordinal) { |
935 | dylibFile->ordinal = ordinal; |
936 | continue; |
937 | } |
938 | |
939 | ordinal = dylibFile->ordinal = dylibOrdinal++; |
940 | LoadCommandType lcType = LC_LOAD_DYLIB; |
941 | if (dylibFile->reexport) { |
942 | if (dylibFile->forceWeakImport) |
943 | warn(msg: path::filename(path: dylibFile->getName()) + |
944 | " is re-exported so cannot be weak-linked"); |
945 | |
946 | lcType = LC_REEXPORT_DYLIB; |
947 | } else if (dylibFile->forceWeakImport || |
948 | dylibFile->refState == RefState::Weak) { |
949 | lcType = LC_LOAD_WEAK_DYLIB; |
950 | } |
951 | in.header->addLoadCommand(make<LCDylib>(args&: lcType, args&: dylibFile->installName, |
952 | args&: dylibFile->compatibilityVersion, |
953 | args&: dylibFile->currentVersion)); |
954 | } |
955 | |
956 | for (const auto &dyldEnv : config->dyldEnvs) |
957 | in.header->addLoadCommand(make<LCDyldEnv>(args: dyldEnv)); |
958 | |
959 | if (functionStartsSection) |
960 | in.header->addLoadCommand(make<LCFunctionStarts>(args&: functionStartsSection)); |
961 | if (dataInCodeSection) |
962 | in.header->addLoadCommand(make<LCDataInCode>(args&: dataInCodeSection)); |
963 | if (codeSignatureSection) |
964 | in.header->addLoadCommand(make<LCCodeSignature>(args&: codeSignatureSection)); |
965 | |
966 | const uint32_t MACOS_MAXPATHLEN = 1024; |
967 | config->headerPad = std::max( |
968 | a: config->headerPad, b: (config->headerPadMaxInstallNames |
969 | ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN |
970 | : 0)); |
971 | } |
972 | |
973 | // Sorting only can happen once all outputs have been collected. Here we sort |
974 | // segments, output sections within each segment, and input sections within each |
975 | // output segment. |
976 | static void sortSegmentsAndSections() { |
977 | TimeTraceScope timeScope("Sort segments and sections"); |
978 | sortOutputSegments(); |
979 | |
980 | DenseMap<const InputSection *, int> isecPriorities = |
981 | priorityBuilder.buildInputSectionPriorities(); |
982 | |
983 | uint32_t sectionIndex = 0; |
984 | for (OutputSegment *seg : outputSegments) { |
985 | seg->sortOutputSections(); |
986 | // References from thread-local variable sections are treated as offsets |
987 | // relative to the start of the thread-local data memory area, which |
988 | // is initialized via copying all the TLV data sections (which are all |
989 | // contiguous). If later data sections require a greater alignment than |
990 | // earlier ones, the offsets of data within those sections won't be |
991 | // guaranteed to aligned unless we normalize alignments. We therefore use |
992 | // the largest alignment for all TLV data sections. |
993 | uint32_t tlvAlign = 0; |
994 | for (const OutputSection *osec : seg->getSections()) |
995 | if (isThreadLocalData(flags: osec->flags) && osec->align > tlvAlign) |
996 | tlvAlign = osec->align; |
997 | |
998 | for (OutputSection *osec : seg->getSections()) { |
999 | // Now that the output sections are sorted, assign the final |
1000 | // output section indices. |
1001 | if (!osec->isHidden()) |
1002 | osec->index = ++sectionIndex; |
1003 | if (isThreadLocalData(flags: osec->flags)) { |
1004 | if (!firstTLVDataSection) |
1005 | firstTLVDataSection = osec; |
1006 | osec->align = tlvAlign; |
1007 | } |
1008 | |
1009 | if (!isecPriorities.empty()) { |
1010 | if (auto *merged = dyn_cast<ConcatOutputSection>(Val: osec)) { |
1011 | llvm::stable_sort( |
1012 | Range&: merged->inputs, C: [&](InputSection *a, InputSection *b) { |
1013 | return isecPriorities.lookup(Val: a) < isecPriorities.lookup(Val: b); |
1014 | }); |
1015 | } |
1016 | } |
1017 | } |
1018 | } |
1019 | } |
1020 | |
1021 | template <class LP> void Writer::createOutputSections() { |
1022 | TimeTraceScope timeScope("Create output sections"); |
1023 | // First, create hidden sections |
1024 | stringTableSection = make<StringTableSection>(); |
1025 | symtabSection = makeSymtabSection<LP>(*stringTableSection); |
1026 | indirectSymtabSection = make<IndirectSymtabSection>(); |
1027 | if (config->adhocCodesign) |
1028 | codeSignatureSection = make<CodeSignatureSection>(); |
1029 | if (config->emitDataInCodeInfo) |
1030 | dataInCodeSection = make<DataInCodeSection>(); |
1031 | if (config->emitFunctionStarts) |
1032 | functionStartsSection = make<FunctionStartsSection>(); |
1033 | |
1034 | switch (config->outputType) { |
1035 | case MH_EXECUTE: |
1036 | make<PageZeroSection>(); |
1037 | break; |
1038 | case MH_DYLIB: |
1039 | case MH_BUNDLE: |
1040 | break; |
1041 | default: |
1042 | llvm_unreachable("unhandled output file type"); |
1043 | } |
1044 | |
1045 | // Then add input sections to output sections. |
1046 | for (ConcatInputSection *isec : inputSections) { |
1047 | if (isec->shouldOmitFromOutput()) |
1048 | continue; |
1049 | ConcatOutputSection *osec = cast<ConcatOutputSection>(Val: isec->parent); |
1050 | osec->addInput(input: isec); |
1051 | osec->inputOrder = |
1052 | std::min(a: osec->inputOrder, b: static_cast<int>(isec->outSecOff)); |
1053 | } |
1054 | |
1055 | // Once all the inputs are added, we can finalize the output section |
1056 | // properties and create the corresponding output segments. |
1057 | for (const auto &it : concatOutputSections) { |
1058 | StringRef segname = it.first.first; |
1059 | ConcatOutputSection *osec = it.second; |
1060 | assert(segname != segment_names::ld); |
1061 | if (osec->isNeeded()) { |
1062 | // See comment in ObjFile::splitEhFrames() |
1063 | if (osec->name == section_names::ehFrame && |
1064 | segname == segment_names::text) |
1065 | osec->align = target->wordSize; |
1066 | |
1067 | // MC keeps the default 1-byte alignment for __thread_vars, even though it |
1068 | // contains pointers that are fixed up by dyld, which requires proper |
1069 | // alignment. |
1070 | if (isThreadLocalVariables(flags: osec->flags)) |
1071 | osec->align = std::max<uint32_t>(a: osec->align, b: target->wordSize); |
1072 | |
1073 | getOrCreateOutputSegment(name: segname)->addOutputSection(os: osec); |
1074 | } |
1075 | } |
1076 | |
1077 | for (SyntheticSection *ssec : syntheticSections) { |
1078 | auto it = concatOutputSections.find(Key: {ssec->segname, ssec->name}); |
1079 | // We add all LinkEdit sections here because we don't know if they are |
1080 | // needed until their finalizeContents() methods get called later. While |
1081 | // this means that we add some redundant sections to __LINKEDIT, there is |
1082 | // is no redundancy in the output, as we do not emit section headers for |
1083 | // any LinkEdit sections. |
1084 | if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) { |
1085 | if (it == concatOutputSections.end()) { |
1086 | getOrCreateOutputSegment(name: ssec->segname)->addOutputSection(os: ssec); |
1087 | } else { |
1088 | fatal(msg: "section from "+ |
1089 | toString(file: it->second->firstSection()->getFile()) + |
1090 | " conflicts with synthetic section "+ ssec->segname + ","+ |
1091 | ssec->name); |
1092 | } |
1093 | } |
1094 | } |
1095 | |
1096 | // dyld requires __LINKEDIT segment to always exist (even if empty). |
1097 | linkEditSegment = getOrCreateOutputSegment(name: segment_names::linkEdit); |
1098 | } |
1099 | |
1100 | void Writer::finalizeAddresses() { |
1101 | TimeTraceScope timeScope("Finalize addresses"); |
1102 | uint64_t pageSize = target->getPageSize(); |
1103 | |
1104 | // We could parallelize this loop, but local benchmarking indicates it is |
1105 | // faster to do it all in the main thread. |
1106 | for (OutputSegment *seg : outputSegments) { |
1107 | if (seg == linkEditSegment) |
1108 | continue; |
1109 | for (OutputSection *osec : seg->getSections()) { |
1110 | if (!osec->isNeeded()) |
1111 | continue; |
1112 | // Other kinds of OutputSections have already been finalized. |
1113 | if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) |
1114 | concatOsec->finalizeContents(); |
1115 | } |
1116 | } |
1117 | |
1118 | // Ensure that segments (and the sections they contain) are allocated |
1119 | // addresses in ascending order, which dyld requires. |
1120 | // |
1121 | // Note that at this point, __LINKEDIT sections are empty, but we need to |
1122 | // determine addresses of other segments/sections before generating its |
1123 | // contents. |
1124 | for (OutputSegment *seg : outputSegments) { |
1125 | if (seg == linkEditSegment) |
1126 | continue; |
1127 | seg->addr = addr; |
1128 | assignAddresses(seg); |
1129 | // codesign / libstuff checks for segment ordering by verifying that |
1130 | // `fileOff + fileSize == next segment fileOff`. So we call |
1131 | // alignToPowerOf2() before (instead of after) computing fileSize to ensure |
1132 | // that the segments are contiguous. We handle addr / vmSize similarly for |
1133 | // the same reason. |
1134 | fileOff = alignToPowerOf2(Value: fileOff, Align: pageSize); |
1135 | addr = alignToPowerOf2(Value: addr, Align: pageSize); |
1136 | seg->vmSize = addr - seg->addr; |
1137 | seg->fileSize = fileOff - seg->fileOff; |
1138 | seg->assignAddressesToStartEndSymbols(); |
1139 | } |
1140 | } |
1141 | |
1142 | void Writer::finalizeLinkEditSegment() { |
1143 | TimeTraceScope timeScope("Finalize __LINKEDIT segment"); |
1144 | // Fill __LINKEDIT contents. |
1145 | std::array<LinkEditSection *, 10> linkEditSections{ |
1146 | in.rebase, in.binding, |
1147 | in.weakBinding, in.lazyBinding, |
1148 | in.exports, in.chainedFixups, |
1149 | symtabSection, indirectSymtabSection, |
1150 | dataInCodeSection, functionStartsSection, |
1151 | }; |
1152 | |
1153 | parallelForEach(Begin: linkEditSections.begin(), End: linkEditSections.end(), |
1154 | Fn: [](LinkEditSection *osec) { |
1155 | if (osec) |
1156 | osec->finalizeContents(); |
1157 | }); |
1158 | |
1159 | // Now that __LINKEDIT is filled out, do a proper calculation of its |
1160 | // addresses and offsets. |
1161 | linkEditSegment->addr = addr; |
1162 | assignAddresses(linkEditSegment); |
1163 | // No need to page-align fileOff / addr here since this is the last segment. |
1164 | linkEditSegment->vmSize = addr - linkEditSegment->addr; |
1165 | linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; |
1166 | } |
1167 | |
1168 | void Writer::assignAddresses(OutputSegment *seg) { |
1169 | seg->fileOff = fileOff; |
1170 | |
1171 | for (OutputSection *osec : seg->getSections()) { |
1172 | if (!osec->isNeeded()) |
1173 | continue; |
1174 | addr = alignToPowerOf2(Value: addr, Align: osec->align); |
1175 | fileOff = alignToPowerOf2(Value: fileOff, Align: osec->align); |
1176 | osec->addr = addr; |
1177 | osec->fileOff = isZeroFill(flags: osec->flags) ? 0 : fileOff; |
1178 | osec->finalize(); |
1179 | osec->assignAddressesToStartEndSymbols(); |
1180 | |
1181 | addr += osec->getSize(); |
1182 | fileOff += osec->getFileSize(); |
1183 | } |
1184 | } |
1185 | |
1186 | void Writer::openFile() { |
1187 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
1188 | FileOutputBuffer::create(FilePath: config->outputFile, Size: fileOff, |
1189 | Flags: FileOutputBuffer::F_executable); |
1190 | |
1191 | if (!bufferOrErr) |
1192 | fatal(msg: "failed to open "+ config->outputFile + ": "+ |
1193 | llvm::toString(E: bufferOrErr.takeError())); |
1194 | buffer = std::move(*bufferOrErr); |
1195 | in.bufferStart = buffer->getBufferStart(); |
1196 | } |
1197 | |
1198 | void Writer::writeSections() { |
1199 | TimeTraceScope timeScope("Write output sections"); |
1200 | |
1201 | uint8_t *buf = buffer->getBufferStart(); |
1202 | std::vector<const OutputSection *> osecs; |
1203 | for (const OutputSegment *seg : outputSegments) |
1204 | append_range(C&: osecs, R: seg->getSections()); |
1205 | |
1206 | parallelForEach(Begin: osecs.begin(), End: osecs.end(), Fn: [&](const OutputSection *osec) { |
1207 | osec->writeTo(buf: buf + osec->fileOff); |
1208 | }); |
1209 | } |
1210 | |
1211 | void Writer::applyOptimizationHints() { |
1212 | if (config->arch() != AK_arm64 || config->ignoreOptimizationHints) |
1213 | return; |
1214 | |
1215 | uint8_t *buf = buffer->getBufferStart(); |
1216 | TimeTraceScope timeScope("Apply linker optimization hints"); |
1217 | parallelForEach(R&: inputFiles, Fn: [buf](const InputFile *file) { |
1218 | if (const auto *objFile = dyn_cast<ObjFile>(Val: file)) |
1219 | target->applyOptimizationHints(buf, *objFile); |
1220 | }); |
1221 | } |
1222 | |
1223 | // In order to utilize multiple cores, we first split the buffer into chunks, |
1224 | // compute a hash for each chunk, and then compute a hash value of the hash |
1225 | // values. |
1226 | void Writer::writeUuid() { |
1227 | TimeTraceScope timeScope("Computing UUID"); |
1228 | |
1229 | ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; |
1230 | std::vector<ArrayRef<uint8_t>> chunks = split(arr: data, chunkSize: 1024 * 1024); |
1231 | |
1232 | // Leave one slot for filename |
1233 | std::vector<uint64_t> hashes(chunks.size() + 1); |
1234 | parallelFor(Begin: 0, End: chunks.size(), |
1235 | Fn: [&](size_t i) { hashes[i] = xxh3_64bits(data: chunks[i]); }); |
1236 | // Append the output filename so that identical binaries with different names |
1237 | // don't get the same UUID. |
1238 | hashes[chunks.size()] = xxh3_64bits(data: sys::path::filename(path: config->finalOutput)); |
1239 | |
1240 | uint64_t digest = xxh3_64bits(data: {reinterpret_cast<uint8_t *>(hashes.data()), |
1241 | hashes.size() * sizeof(uint64_t)}); |
1242 | uuidCommand->writeUuid(digest); |
1243 | } |
1244 | |
1245 | // This is step 5 of the algorithm described in the class comment of |
1246 | // ChainedFixupsSection. |
1247 | void Writer::buildFixupChains() { |
1248 | if (!config->emitChainedFixups) |
1249 | return; |
1250 | |
1251 | const std::vector<Location> &loc = in.chainedFixups->getLocations(); |
1252 | if (loc.empty()) |
1253 | return; |
1254 | |
1255 | TimeTraceScope timeScope("Build fixup chains"); |
1256 | |
1257 | const uint64_t pageSize = target->getPageSize(); |
1258 | constexpr uint32_t stride = 4; // for DYLD_CHAINED_PTR_64 |
1259 | |
1260 | for (size_t i = 0, count = loc.size(); i < count;) { |
1261 | const OutputSegment *oseg = loc[i].isec->parent->parent; |
1262 | uint8_t *buf = buffer->getBufferStart() + oseg->fileOff; |
1263 | uint64_t pageIdx = loc[i].offset / pageSize; |
1264 | ++i; |
1265 | |
1266 | while (i < count && loc[i].isec->parent->parent == oseg && |
1267 | (loc[i].offset / pageSize) == pageIdx) { |
1268 | uint64_t offset = loc[i].offset - loc[i - 1].offset; |
1269 | |
1270 | auto fail = [&](Twine message) { |
1271 | error(msg: loc[i].isec->getSegName() + ","+ loc[i].isec->getName() + |
1272 | ", offset "+ |
1273 | Twine(loc[i].offset - loc[i].isec->parent->getSegmentOffset()) + |
1274 | ": "+ message); |
1275 | }; |
1276 | |
1277 | if (offset < target->wordSize) |
1278 | return fail("fixups overlap"); |
1279 | if (offset % stride != 0) |
1280 | return fail( |
1281 | "fixups are unaligned (offset "+ Twine(offset) + |
1282 | " is not a multiple of the stride). Re-link with -no_fixup_chains"); |
1283 | |
1284 | // The "next" field is in the same location for bind and rebase entries. |
1285 | reinterpret_cast<dyld_chained_ptr_64_bind *>(buf + loc[i - 1].offset) |
1286 | ->next = offset / stride; |
1287 | ++i; |
1288 | } |
1289 | } |
1290 | } |
1291 | |
1292 | void Writer::writeCodeSignature() { |
1293 | if (codeSignatureSection) { |
1294 | TimeTraceScope timeScope("Write code signature"); |
1295 | codeSignatureSection->writeHashes(buf: buffer->getBufferStart()); |
1296 | } |
1297 | } |
1298 | |
1299 | void Writer::writeOutputFile() { |
1300 | TimeTraceScope timeScope("Write output file"); |
1301 | openFile(); |
1302 | reportPendingUndefinedSymbols(); |
1303 | if (errorCount()) |
1304 | return; |
1305 | writeSections(); |
1306 | applyOptimizationHints(); |
1307 | buildFixupChains(); |
1308 | if (config->generateUuid) |
1309 | writeUuid(); |
1310 | writeCodeSignature(); |
1311 | |
1312 | if (auto e = buffer->commit()) |
1313 | fatal(msg: "failed to write output '"+ buffer->getPath() + |
1314 | "': "+ toString(E: std::move(e))); |
1315 | } |
1316 | |
1317 | template <class LP> void Writer::run() { |
1318 | treatSpecialUndefineds(); |
1319 | if (config->entry && needsBinding(sym: config->entry)) |
1320 | in.stubs->addEntry(config->entry); |
1321 | |
1322 | // Canonicalization of all pointers to InputSections should be handled by |
1323 | // these two scan* methods. I.e. from this point onward, for all live |
1324 | // InputSections, we should have `isec->canonical() == isec`. |
1325 | scanSymbols(); |
1326 | if (in.objcStubs->isNeeded()) |
1327 | in.objcStubs->setUp(); |
1328 | if (in.objcMethList->isNeeded()) |
1329 | in.objcMethList->setUp(); |
1330 | scanRelocations(); |
1331 | if (in.initOffsets->isNeeded()) |
1332 | in.initOffsets->setUp(); |
1333 | |
1334 | // Do not proceed if there were undefined or duplicate symbols. |
1335 | reportPendingUndefinedSymbols(); |
1336 | reportPendingDuplicateSymbols(); |
1337 | if (errorCount()) |
1338 | return; |
1339 | |
1340 | if (in.stubHelper && in.stubHelper->isNeeded()) |
1341 | in.stubHelper->setUp(); |
1342 | |
1343 | if (in.objCImageInfo->isNeeded()) |
1344 | in.objCImageInfo->finalizeContents(); |
1345 | |
1346 | // At this point, we should know exactly which output sections are needed, |
1347 | // courtesy of scanSymbols() and scanRelocations(). |
1348 | createOutputSections<LP>(); |
1349 | |
1350 | // After this point, we create no new segments; HOWEVER, we might |
1351 | // yet create branch-range extension thunks for architectures whose |
1352 | // hardware call instructions have limited range, e.g., ARM(64). |
1353 | // The thunks are created as InputSections interspersed among |
1354 | // the ordinary __TEXT,_text InputSections. |
1355 | sortSegmentsAndSections(); |
1356 | createLoadCommands<LP>(); |
1357 | finalizeAddresses(); |
1358 | |
1359 | llvm::thread mapFileWriter([&] { |
1360 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1361 | timeTraceProfilerInitialize(TimeTraceGranularity: config->timeTraceGranularity, ProcName: "writeMapFile"); |
1362 | writeMapFile(); |
1363 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1364 | timeTraceProfilerFinishThread(); |
1365 | }); |
1366 | |
1367 | finalizeLinkEditSegment(); |
1368 | writeOutputFile(); |
1369 | mapFileWriter.join(); |
1370 | } |
1371 | |
1372 | template <class LP> void macho::writeResult() { Writer().run<LP>(); } |
1373 | |
1374 | void macho::resetWriter() { LCDylib::resetInstanceCount(); } |
1375 | |
1376 | void macho::createSyntheticSections() { |
1377 | in.header = make<MachHeaderSection>(); |
1378 | if (config->dedupStrings) |
1379 | in.cStringSection = |
1380 | make<DeduplicatedCStringSection>(args: section_names::cString); |
1381 | else |
1382 | in.cStringSection = make<CStringSection>(args: section_names::cString); |
1383 | in.objcMethnameSection = |
1384 | make<DeduplicatedCStringSection>(args: section_names::objcMethname); |
1385 | in.wordLiteralSection = make<WordLiteralSection>(); |
1386 | if (config->emitChainedFixups) { |
1387 | in.chainedFixups = make<ChainedFixupsSection>(); |
1388 | } else { |
1389 | in.rebase = make<RebaseSection>(); |
1390 | in.binding = make<BindingSection>(); |
1391 | in.weakBinding = make<WeakBindingSection>(); |
1392 | in.lazyBinding = make<LazyBindingSection>(); |
1393 | in.lazyPointers = make<LazyPointerSection>(); |
1394 | in.stubHelper = make<StubHelperSection>(); |
1395 | } |
1396 | in.exports = make<ExportSection>(); |
1397 | in.got = make<GotSection>(); |
1398 | in.tlvPointers = make<TlvPointerSection>(); |
1399 | in.stubs = make<StubsSection>(); |
1400 | in.objcStubs = make<ObjCStubsSection>(); |
1401 | in.unwindInfo = makeUnwindInfoSection(); |
1402 | in.objCImageInfo = make<ObjCImageInfoSection>(); |
1403 | in.initOffsets = make<InitOffsetsSection>(); |
1404 | in.objcMethList = make<ObjCMethListSection>(); |
1405 | |
1406 | // This section contains space for just a single word, and will be used by |
1407 | // dyld to cache an address to the image loader it uses. |
1408 | uint8_t *arr = bAlloc().Allocate<uint8_t>(Num: target->wordSize); |
1409 | memset(s: arr, c: 0, n: target->wordSize); |
1410 | in.imageLoaderCache = makeSyntheticInputSection( |
1411 | segName: segment_names::data, sectName: section_names::data, flags: S_REGULAR, |
1412 | data: ArrayRef<uint8_t>{arr, target->wordSize}, |
1413 | /*align=*/target->wordSize); |
1414 | assert(in.imageLoaderCache->live); |
1415 | } |
1416 | |
1417 | OutputSection *macho::firstTLVDataSection = nullptr; |
1418 | |
1419 | template void macho::writeResult<LP64>(); |
1420 | template void macho::writeResult<ILP32>(); |
1421 |
Definitions
- Writer
- Writer
- LCDyldInfo
- LCDyldInfo
- getSize
- writeTo
- LCSubFramework
- LCSubFramework
- getSize
- writeTo
- LCFunctionStarts
- LCFunctionStarts
- getSize
- writeTo
- LCDataInCode
- LCDataInCode
- getSize
- writeTo
- LCDysymtab
- LCDysymtab
- getSize
- writeTo
- LCSegment
- LCSegment
- getSize
- writeTo
- LCMain
- getSize
- writeTo
- LCSymtab
- LCSymtab
- getSize
- writeTo
- LCDylib
- LCDylib
- getSize
- writeTo
- getInstanceCount
- resetInstanceCount
- instanceCount
- LCLoadDylinker
- getSize
- writeTo
- LCRPath
- LCRPath
- getSize
- writeTo
- LCSubClient
- LCSubClient
- getSize
- writeTo
- LCDyldEnv
- LCDyldEnv
- getSize
- writeTo
- LCMinVersion
- LCMinVersion
- getSize
- writeTo
- LCBuildVersion
- LCBuildVersion
- getSize
- writeTo
- LCUuid
- getSize
- writeTo
- writeUuid
- LCEncryptionInfo
- getSize
- writeTo
- LCCodeSignature
- LCCodeSignature
- getSize
- writeTo
- LCExportsTrie
- LCExportsTrie
- getSize
- writeTo
- LCChainedFixups
- LCChainedFixups
- getSize
- writeTo
- treatSpecialUndefineds
- prepareSymbolRelocation
- scanRelocations
- addNonWeakDefinition
- scanSymbols
- useLCBuildVersion
- createLoadCommands
- sortSegmentsAndSections
- createOutputSections
- finalizeAddresses
- finalizeLinkEditSegment
- assignAddresses
- openFile
- writeSections
- applyOptimizationHints
- writeUuid
- buildFixupChains
- writeCodeSignature
- writeOutputFile
- run
- writeResult
- resetWriter
- createSyntheticSections
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more