1 | //===- Writer.cpp ---------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Writer.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "InputSection.h" |
14 | #include "MapFile.h" |
15 | #include "OutputSection.h" |
16 | #include "OutputSegment.h" |
17 | #include "SectionPriorities.h" |
18 | #include "SymbolTable.h" |
19 | #include "Symbols.h" |
20 | #include "SyntheticSections.h" |
21 | #include "Target.h" |
22 | #include "UnwindInfoSection.h" |
23 | |
24 | #include "lld/Common/Arrays.h" |
25 | #include "lld/Common/CommonLinkerContext.h" |
26 | #include "llvm/BinaryFormat/MachO.h" |
27 | #include "llvm/Config/llvm-config.h" |
28 | #include "llvm/Support/LEB128.h" |
29 | #include "llvm/Support/Parallel.h" |
30 | #include "llvm/Support/Path.h" |
31 | #include "llvm/Support/ThreadPool.h" |
32 | #include "llvm/Support/TimeProfiler.h" |
33 | #include "llvm/Support/xxhash.h" |
34 | |
35 | #include <algorithm> |
36 | |
37 | using namespace llvm; |
38 | using namespace llvm::MachO; |
39 | using namespace llvm::sys; |
40 | using namespace lld; |
41 | using namespace lld::macho; |
42 | |
43 | namespace { |
44 | class LCUuid; |
45 | |
46 | class Writer { |
47 | public: |
48 | Writer() : buffer(errorHandler().outputBuffer) {} |
49 | |
50 | void treatSpecialUndefineds(); |
51 | void scanRelocations(); |
52 | void scanSymbols(); |
53 | template <class LP> void createOutputSections(); |
54 | template <class LP> void createLoadCommands(); |
55 | void finalizeAddresses(); |
56 | void finalizeLinkEditSegment(); |
57 | void assignAddresses(OutputSegment *); |
58 | |
59 | void openFile(); |
60 | void writeSections(); |
61 | void applyOptimizationHints(); |
62 | void buildFixupChains(); |
63 | void writeUuid(); |
64 | void writeCodeSignature(); |
65 | void writeOutputFile(); |
66 | |
67 | template <class LP> void run(); |
68 | |
69 | DefaultThreadPool threadPool; |
70 | std::unique_ptr<FileOutputBuffer> &buffer; |
71 | uint64_t addr = 0; |
72 | uint64_t fileOff = 0; |
73 | MachHeaderSection * = nullptr; |
74 | StringTableSection *stringTableSection = nullptr; |
75 | SymtabSection *symtabSection = nullptr; |
76 | IndirectSymtabSection *indirectSymtabSection = nullptr; |
77 | CodeSignatureSection *codeSignatureSection = nullptr; |
78 | DataInCodeSection *dataInCodeSection = nullptr; |
79 | FunctionStartsSection *functionStartsSection = nullptr; |
80 | |
81 | LCUuid *uuidCommand = nullptr; |
82 | OutputSegment *linkEditSegment = nullptr; |
83 | }; |
84 | |
85 | // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. |
86 | class LCDyldInfo final : public LoadCommand { |
87 | public: |
88 | LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, |
89 | WeakBindingSection *weakBindingSection, |
90 | LazyBindingSection *lazyBindingSection, |
91 | ExportSection *exportSection) |
92 | : rebaseSection(rebaseSection), bindingSection(bindingSection), |
93 | weakBindingSection(weakBindingSection), |
94 | lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} |
95 | |
96 | uint32_t getSize() const override { return sizeof(dyld_info_command); } |
97 | |
98 | void writeTo(uint8_t *buf) const override { |
99 | auto *c = reinterpret_cast<dyld_info_command *>(buf); |
100 | c->cmd = LC_DYLD_INFO_ONLY; |
101 | c->cmdsize = getSize(); |
102 | if (rebaseSection->isNeeded()) { |
103 | c->rebase_off = rebaseSection->fileOff; |
104 | c->rebase_size = rebaseSection->getFileSize(); |
105 | } |
106 | if (bindingSection->isNeeded()) { |
107 | c->bind_off = bindingSection->fileOff; |
108 | c->bind_size = bindingSection->getFileSize(); |
109 | } |
110 | if (weakBindingSection->isNeeded()) { |
111 | c->weak_bind_off = weakBindingSection->fileOff; |
112 | c->weak_bind_size = weakBindingSection->getFileSize(); |
113 | } |
114 | if (lazyBindingSection->isNeeded()) { |
115 | c->lazy_bind_off = lazyBindingSection->fileOff; |
116 | c->lazy_bind_size = lazyBindingSection->getFileSize(); |
117 | } |
118 | if (exportSection->isNeeded()) { |
119 | c->export_off = exportSection->fileOff; |
120 | c->export_size = exportSection->getFileSize(); |
121 | } |
122 | } |
123 | |
124 | RebaseSection *rebaseSection; |
125 | BindingSection *bindingSection; |
126 | WeakBindingSection *weakBindingSection; |
127 | LazyBindingSection *lazyBindingSection; |
128 | ExportSection *exportSection; |
129 | }; |
130 | |
131 | class LCSubFramework final : public LoadCommand { |
132 | public: |
133 | LCSubFramework(StringRef umbrella) : umbrella(umbrella) {} |
134 | |
135 | uint32_t getSize() const override { |
136 | return alignToPowerOf2(Value: sizeof(sub_framework_command) + umbrella.size() + 1, |
137 | Align: target->wordSize); |
138 | } |
139 | |
140 | void writeTo(uint8_t *buf) const override { |
141 | auto *c = reinterpret_cast<sub_framework_command *>(buf); |
142 | buf += sizeof(sub_framework_command); |
143 | |
144 | c->cmd = LC_SUB_FRAMEWORK; |
145 | c->cmdsize = getSize(); |
146 | c->umbrella = sizeof(sub_framework_command); |
147 | |
148 | memcpy(dest: buf, src: umbrella.data(), n: umbrella.size()); |
149 | buf[umbrella.size()] = '\0'; |
150 | } |
151 | |
152 | private: |
153 | const StringRef umbrella; |
154 | }; |
155 | |
156 | class LCFunctionStarts final : public LoadCommand { |
157 | public: |
158 | explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) |
159 | : functionStartsSection(functionStartsSection) {} |
160 | |
161 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
162 | |
163 | void writeTo(uint8_t *buf) const override { |
164 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
165 | c->cmd = LC_FUNCTION_STARTS; |
166 | c->cmdsize = getSize(); |
167 | c->dataoff = functionStartsSection->fileOff; |
168 | c->datasize = functionStartsSection->getFileSize(); |
169 | } |
170 | |
171 | private: |
172 | FunctionStartsSection *functionStartsSection; |
173 | }; |
174 | |
175 | class LCDataInCode final : public LoadCommand { |
176 | public: |
177 | explicit LCDataInCode(DataInCodeSection *dataInCodeSection) |
178 | : dataInCodeSection(dataInCodeSection) {} |
179 | |
180 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
181 | |
182 | void writeTo(uint8_t *buf) const override { |
183 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
184 | c->cmd = LC_DATA_IN_CODE; |
185 | c->cmdsize = getSize(); |
186 | c->dataoff = dataInCodeSection->fileOff; |
187 | c->datasize = dataInCodeSection->getFileSize(); |
188 | } |
189 | |
190 | private: |
191 | DataInCodeSection *dataInCodeSection; |
192 | }; |
193 | |
194 | class LCDysymtab final : public LoadCommand { |
195 | public: |
196 | LCDysymtab(SymtabSection *symtabSection, |
197 | IndirectSymtabSection *indirectSymtabSection) |
198 | : symtabSection(symtabSection), |
199 | indirectSymtabSection(indirectSymtabSection) {} |
200 | |
201 | uint32_t getSize() const override { return sizeof(dysymtab_command); } |
202 | |
203 | void writeTo(uint8_t *buf) const override { |
204 | auto *c = reinterpret_cast<dysymtab_command *>(buf); |
205 | c->cmd = LC_DYSYMTAB; |
206 | c->cmdsize = getSize(); |
207 | |
208 | c->ilocalsym = 0; |
209 | c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); |
210 | c->nextdefsym = symtabSection->getNumExternalSymbols(); |
211 | c->iundefsym = c->iextdefsym + c->nextdefsym; |
212 | c->nundefsym = symtabSection->getNumUndefinedSymbols(); |
213 | |
214 | c->indirectsymoff = indirectSymtabSection->fileOff; |
215 | c->nindirectsyms = indirectSymtabSection->getNumSymbols(); |
216 | } |
217 | |
218 | SymtabSection *symtabSection; |
219 | IndirectSymtabSection *indirectSymtabSection; |
220 | }; |
221 | |
222 | template <class LP> class LCSegment final : public LoadCommand { |
223 | public: |
224 | LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} |
225 | |
226 | uint32_t getSize() const override { |
227 | return sizeof(typename LP::segment_command) + |
228 | seg->numNonHiddenSections() * sizeof(typename LP::section); |
229 | } |
230 | |
231 | void writeTo(uint8_t *buf) const override { |
232 | using SegmentCommand = typename LP::segment_command; |
233 | using = typename LP::section; |
234 | |
235 | auto *c = reinterpret_cast<SegmentCommand *>(buf); |
236 | buf += sizeof(SegmentCommand); |
237 | |
238 | c->cmd = LP::segmentLCType; |
239 | c->cmdsize = getSize(); |
240 | memcpy(c->segname, name.data(), name.size()); |
241 | c->fileoff = seg->fileOff; |
242 | c->maxprot = seg->maxProt; |
243 | c->initprot = seg->initProt; |
244 | |
245 | c->vmaddr = seg->addr; |
246 | c->vmsize = seg->vmSize; |
247 | c->filesize = seg->fileSize; |
248 | c->nsects = seg->numNonHiddenSections(); |
249 | c->flags = seg->flags; |
250 | |
251 | for (const OutputSection *osec : seg->getSections()) { |
252 | if (osec->isHidden()) |
253 | continue; |
254 | |
255 | auto *sectHdr = reinterpret_cast<SectionHeader *>(buf); |
256 | buf += sizeof(SectionHeader); |
257 | |
258 | memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); |
259 | memcpy(sectHdr->segname, name.data(), name.size()); |
260 | |
261 | sectHdr->addr = osec->addr; |
262 | sectHdr->offset = osec->fileOff; |
263 | sectHdr->align = Log2_32(Value: osec->align); |
264 | sectHdr->flags = osec->flags; |
265 | sectHdr->size = osec->getSize(); |
266 | sectHdr->reserved1 = osec->reserved1; |
267 | sectHdr->reserved2 = osec->reserved2; |
268 | } |
269 | } |
270 | |
271 | private: |
272 | StringRef name; |
273 | OutputSegment *seg; |
274 | }; |
275 | |
276 | class LCMain final : public LoadCommand { |
277 | uint32_t getSize() const override { |
278 | return sizeof(structs::entry_point_command); |
279 | } |
280 | |
281 | void writeTo(uint8_t *buf) const override { |
282 | auto *c = reinterpret_cast<structs::entry_point_command *>(buf); |
283 | c->cmd = LC_MAIN; |
284 | c->cmdsize = getSize(); |
285 | |
286 | if (config->entry->isInStubs()) |
287 | c->entryoff = |
288 | in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; |
289 | else |
290 | c->entryoff = config->entry->getVA() - in.header->addr; |
291 | |
292 | c->stacksize = 0; |
293 | } |
294 | }; |
295 | |
296 | class LCSymtab final : public LoadCommand { |
297 | public: |
298 | LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) |
299 | : symtabSection(symtabSection), stringTableSection(stringTableSection) {} |
300 | |
301 | uint32_t getSize() const override { return sizeof(symtab_command); } |
302 | |
303 | void writeTo(uint8_t *buf) const override { |
304 | auto *c = reinterpret_cast<symtab_command *>(buf); |
305 | c->cmd = LC_SYMTAB; |
306 | c->cmdsize = getSize(); |
307 | c->symoff = symtabSection->fileOff; |
308 | c->nsyms = symtabSection->getNumSymbols(); |
309 | c->stroff = stringTableSection->fileOff; |
310 | c->strsize = stringTableSection->getFileSize(); |
311 | } |
312 | |
313 | SymtabSection *symtabSection = nullptr; |
314 | StringTableSection *stringTableSection = nullptr; |
315 | }; |
316 | |
317 | // There are several dylib load commands that share the same structure: |
318 | // * LC_LOAD_DYLIB |
319 | // * LC_ID_DYLIB |
320 | // * LC_REEXPORT_DYLIB |
321 | class LCDylib final : public LoadCommand { |
322 | public: |
323 | LCDylib(LoadCommandType type, StringRef path, |
324 | uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) |
325 | : type(type), path(path), compatibilityVersion(compatibilityVersion), |
326 | currentVersion(currentVersion) { |
327 | instanceCount++; |
328 | } |
329 | |
330 | uint32_t getSize() const override { |
331 | return alignToPowerOf2(Value: sizeof(dylib_command) + path.size() + 1, |
332 | Align: target->wordSize); |
333 | } |
334 | |
335 | void writeTo(uint8_t *buf) const override { |
336 | auto *c = reinterpret_cast<dylib_command *>(buf); |
337 | buf += sizeof(dylib_command); |
338 | |
339 | c->cmd = type; |
340 | c->cmdsize = getSize(); |
341 | c->dylib.name = sizeof(dylib_command); |
342 | c->dylib.timestamp = 0; |
343 | c->dylib.compatibility_version = compatibilityVersion; |
344 | c->dylib.current_version = currentVersion; |
345 | |
346 | memcpy(dest: buf, src: path.data(), n: path.size()); |
347 | buf[path.size()] = '\0'; |
348 | } |
349 | |
350 | static uint32_t getInstanceCount() { return instanceCount; } |
351 | static void resetInstanceCount() { instanceCount = 0; } |
352 | |
353 | private: |
354 | LoadCommandType type; |
355 | StringRef path; |
356 | uint32_t compatibilityVersion; |
357 | uint32_t currentVersion; |
358 | static uint32_t instanceCount; |
359 | }; |
360 | |
361 | uint32_t LCDylib::instanceCount = 0; |
362 | |
363 | class LCLoadDylinker final : public LoadCommand { |
364 | public: |
365 | uint32_t getSize() const override { |
366 | return alignToPowerOf2(Value: sizeof(dylinker_command) + path.size() + 1, |
367 | Align: target->wordSize); |
368 | } |
369 | |
370 | void writeTo(uint8_t *buf) const override { |
371 | auto *c = reinterpret_cast<dylinker_command *>(buf); |
372 | buf += sizeof(dylinker_command); |
373 | |
374 | c->cmd = LC_LOAD_DYLINKER; |
375 | c->cmdsize = getSize(); |
376 | c->name = sizeof(dylinker_command); |
377 | |
378 | memcpy(dest: buf, src: path.data(), n: path.size()); |
379 | buf[path.size()] = '\0'; |
380 | } |
381 | |
382 | private: |
383 | // Recent versions of Darwin won't run any binary that has dyld at a |
384 | // different location. |
385 | const StringRef path = "/usr/lib/dyld" ; |
386 | }; |
387 | |
388 | class LCRPath final : public LoadCommand { |
389 | public: |
390 | explicit LCRPath(StringRef path) : path(path) {} |
391 | |
392 | uint32_t getSize() const override { |
393 | return alignToPowerOf2(Value: sizeof(rpath_command) + path.size() + 1, |
394 | Align: target->wordSize); |
395 | } |
396 | |
397 | void writeTo(uint8_t *buf) const override { |
398 | auto *c = reinterpret_cast<rpath_command *>(buf); |
399 | buf += sizeof(rpath_command); |
400 | |
401 | c->cmd = LC_RPATH; |
402 | c->cmdsize = getSize(); |
403 | c->path = sizeof(rpath_command); |
404 | |
405 | memcpy(dest: buf, src: path.data(), n: path.size()); |
406 | buf[path.size()] = '\0'; |
407 | } |
408 | |
409 | private: |
410 | StringRef path; |
411 | }; |
412 | |
413 | class LCDyldEnv final : public LoadCommand { |
414 | public: |
415 | explicit LCDyldEnv(StringRef name) : name(name) {} |
416 | |
417 | uint32_t getSize() const override { |
418 | return alignToPowerOf2(Value: sizeof(dyld_env_command) + name.size() + 1, |
419 | Align: target->wordSize); |
420 | } |
421 | |
422 | void writeTo(uint8_t *buf) const override { |
423 | auto *c = reinterpret_cast<dyld_env_command *>(buf); |
424 | buf += sizeof(dyld_env_command); |
425 | |
426 | c->cmd = LC_DYLD_ENVIRONMENT; |
427 | c->cmdsize = getSize(); |
428 | c->name = sizeof(dyld_env_command); |
429 | |
430 | memcpy(dest: buf, src: name.data(), n: name.size()); |
431 | buf[name.size()] = '\0'; |
432 | } |
433 | |
434 | private: |
435 | StringRef name; |
436 | }; |
437 | |
438 | class LCMinVersion final : public LoadCommand { |
439 | public: |
440 | explicit LCMinVersion(const PlatformInfo &platformInfo) |
441 | : platformInfo(platformInfo) {} |
442 | |
443 | uint32_t getSize() const override { return sizeof(version_min_command); } |
444 | |
445 | void writeTo(uint8_t *buf) const override { |
446 | auto *c = reinterpret_cast<version_min_command *>(buf); |
447 | switch (platformInfo.target.Platform) { |
448 | case PLATFORM_MACOS: |
449 | c->cmd = LC_VERSION_MIN_MACOSX; |
450 | break; |
451 | case PLATFORM_IOS: |
452 | case PLATFORM_IOSSIMULATOR: |
453 | c->cmd = LC_VERSION_MIN_IPHONEOS; |
454 | break; |
455 | case PLATFORM_TVOS: |
456 | case PLATFORM_TVOSSIMULATOR: |
457 | c->cmd = LC_VERSION_MIN_TVOS; |
458 | break; |
459 | case PLATFORM_WATCHOS: |
460 | case PLATFORM_WATCHOSSIMULATOR: |
461 | c->cmd = LC_VERSION_MIN_WATCHOS; |
462 | break; |
463 | default: |
464 | llvm_unreachable("invalid platform" ); |
465 | break; |
466 | } |
467 | c->cmdsize = getSize(); |
468 | c->version = encodeVersion(version: platformInfo.target.MinDeployment); |
469 | c->sdk = encodeVersion(version: platformInfo.sdk); |
470 | } |
471 | |
472 | private: |
473 | const PlatformInfo &platformInfo; |
474 | }; |
475 | |
476 | class LCBuildVersion final : public LoadCommand { |
477 | public: |
478 | explicit LCBuildVersion(const PlatformInfo &platformInfo) |
479 | : platformInfo(platformInfo) {} |
480 | |
481 | const int ntools = 1; |
482 | |
483 | uint32_t getSize() const override { |
484 | return sizeof(build_version_command) + ntools * sizeof(build_tool_version); |
485 | } |
486 | |
487 | void writeTo(uint8_t *buf) const override { |
488 | auto *c = reinterpret_cast<build_version_command *>(buf); |
489 | c->cmd = LC_BUILD_VERSION; |
490 | c->cmdsize = getSize(); |
491 | |
492 | c->platform = static_cast<uint32_t>(platformInfo.target.Platform); |
493 | c->minos = encodeVersion(version: platformInfo.target.MinDeployment); |
494 | c->sdk = encodeVersion(version: platformInfo.sdk); |
495 | |
496 | c->ntools = ntools; |
497 | auto *t = reinterpret_cast<build_tool_version *>(&c[1]); |
498 | t->tool = TOOL_LLD; |
499 | t->version = encodeVersion(version: VersionTuple( |
500 | LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); |
501 | } |
502 | |
503 | private: |
504 | const PlatformInfo &platformInfo; |
505 | }; |
506 | |
507 | // Stores a unique identifier for the output file based on an MD5 hash of its |
508 | // contents. In order to hash the contents, we must first write them, but |
509 | // LC_UUID itself must be part of the written contents in order for all the |
510 | // offsets to be calculated correctly. We resolve this circular paradox by |
511 | // first writing an LC_UUID with an all-zero UUID, then updating the UUID with |
512 | // its real value later. |
513 | class LCUuid final : public LoadCommand { |
514 | public: |
515 | uint32_t getSize() const override { return sizeof(uuid_command); } |
516 | |
517 | void writeTo(uint8_t *buf) const override { |
518 | auto *c = reinterpret_cast<uuid_command *>(buf); |
519 | c->cmd = LC_UUID; |
520 | c->cmdsize = getSize(); |
521 | uuidBuf = c->uuid; |
522 | } |
523 | |
524 | void writeUuid(uint64_t digest) const { |
525 | // xxhash only gives us 8 bytes, so put some fixed data in the other half. |
526 | static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size" ); |
527 | memcpy(dest: uuidBuf, src: "LLD\xa1UU1D" , n: 8); |
528 | memcpy(dest: uuidBuf + 8, src: &digest, n: 8); |
529 | |
530 | // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in |
531 | // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't |
532 | // want to lose bits of the digest in byte 8, so swap that with a byte of |
533 | // fixed data that happens to have the right bits set. |
534 | std::swap(a&: uuidBuf[3], b&: uuidBuf[8]); |
535 | |
536 | // Claim that this is an MD5-based hash. It isn't, but this signals that |
537 | // this is not a time-based and not a random hash. MD5 seems like the least |
538 | // bad lie we can put here. |
539 | assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3" ); |
540 | assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2" ); |
541 | } |
542 | |
543 | mutable uint8_t *uuidBuf; |
544 | }; |
545 | |
546 | template <class LP> class LCEncryptionInfo final : public LoadCommand { |
547 | public: |
548 | uint32_t getSize() const override { |
549 | return sizeof(typename LP::encryption_info_command); |
550 | } |
551 | |
552 | void writeTo(uint8_t *buf) const override { |
553 | using EncryptionInfo = typename LP::encryption_info_command; |
554 | auto *c = reinterpret_cast<EncryptionInfo *>(buf); |
555 | buf += sizeof(EncryptionInfo); |
556 | c->cmd = LP::encryptionInfoLCType; |
557 | c->cmdsize = getSize(); |
558 | c->cryptoff = in.header->getSize(); |
559 | auto it = find_if(outputSegments, [](const OutputSegment *seg) { |
560 | return seg->name == segment_names::text; |
561 | }); |
562 | assert(it != outputSegments.end()); |
563 | c->cryptsize = (*it)->fileSize - c->cryptoff; |
564 | } |
565 | }; |
566 | |
567 | class LCCodeSignature final : public LoadCommand { |
568 | public: |
569 | LCCodeSignature(CodeSignatureSection *section) : section(section) {} |
570 | |
571 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
572 | |
573 | void writeTo(uint8_t *buf) const override { |
574 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
575 | c->cmd = LC_CODE_SIGNATURE; |
576 | c->cmdsize = getSize(); |
577 | c->dataoff = static_cast<uint32_t>(section->fileOff); |
578 | c->datasize = section->getSize(); |
579 | } |
580 | |
581 | CodeSignatureSection *section; |
582 | }; |
583 | |
584 | class LCExportsTrie final : public LoadCommand { |
585 | public: |
586 | LCExportsTrie(ExportSection *section) : section(section) {} |
587 | |
588 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
589 | |
590 | void writeTo(uint8_t *buf) const override { |
591 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
592 | c->cmd = LC_DYLD_EXPORTS_TRIE; |
593 | c->cmdsize = getSize(); |
594 | c->dataoff = section->fileOff; |
595 | c->datasize = section->getSize(); |
596 | } |
597 | |
598 | ExportSection *section; |
599 | }; |
600 | |
601 | class LCChainedFixups final : public LoadCommand { |
602 | public: |
603 | LCChainedFixups(ChainedFixupsSection *section) : section(section) {} |
604 | |
605 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
606 | |
607 | void writeTo(uint8_t *buf) const override { |
608 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
609 | c->cmd = LC_DYLD_CHAINED_FIXUPS; |
610 | c->cmdsize = getSize(); |
611 | c->dataoff = section->fileOff; |
612 | c->datasize = section->getSize(); |
613 | } |
614 | |
615 | ChainedFixupsSection *section; |
616 | }; |
617 | |
618 | } // namespace |
619 | |
620 | void Writer::treatSpecialUndefineds() { |
621 | if (config->entry) |
622 | if (auto *undefined = dyn_cast<Undefined>(Val: config->entry)) |
623 | treatUndefinedSymbol(*undefined, source: "the entry point" ); |
624 | |
625 | // FIXME: This prints symbols that are undefined both in input files and |
626 | // via -u flag twice. |
627 | for (const Symbol *sym : config->explicitUndefineds) { |
628 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
629 | treatUndefinedSymbol(*undefined, source: "-u" ); |
630 | } |
631 | // Literal exported-symbol names must be defined, but glob |
632 | // patterns need not match. |
633 | for (const CachedHashStringRef &cachedName : |
634 | config->exportedSymbols.literals) { |
635 | if (const Symbol *sym = symtab->find(name: cachedName)) |
636 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
637 | treatUndefinedSymbol(*undefined, source: "-exported_symbol(s_list)" ); |
638 | } |
639 | } |
640 | |
641 | static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, |
642 | const lld::macho::Reloc &r) { |
643 | assert(sym->isLive()); |
644 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type: r.type); |
645 | |
646 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) { |
647 | if (needsBinding(sym)) |
648 | in.stubs->addEntry(sym); |
649 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) { |
650 | if (relocAttrs.hasAttr(b: RelocAttrBits::POINTER) || needsBinding(sym)) |
651 | in.got->addEntry(sym); |
652 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) { |
653 | if (needsBinding(sym)) |
654 | in.tlvPointers->addEntry(sym); |
655 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED)) { |
656 | // References from thread-local variable sections are treated as offsets |
657 | // relative to the start of the referent section, and therefore have no |
658 | // need of rebase opcodes. |
659 | if (!(isThreadLocalVariables(flags: isec->getFlags()) && isa<Defined>(Val: sym))) |
660 | addNonLazyBindingEntries(sym, isec, offset: r.offset, addend: r.addend); |
661 | } |
662 | } |
663 | |
664 | void Writer::scanRelocations() { |
665 | TimeTraceScope timeScope("Scan relocations" ); |
666 | |
667 | // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can |
668 | // add to inputSections, which invalidates inputSections's iterators. |
669 | for (size_t i = 0; i < inputSections.size(); ++i) { |
670 | ConcatInputSection *isec = inputSections[i]; |
671 | |
672 | if (isec->shouldOmitFromOutput()) |
673 | continue; |
674 | |
675 | for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { |
676 | lld::macho::Reloc &r = *it; |
677 | |
678 | // Canonicalize the referent so that later accesses in Writer won't |
679 | // have to worry about it. |
680 | if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) |
681 | r.referent = referentIsec->canonical(); |
682 | |
683 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
684 | // Skip over the following UNSIGNED relocation -- it's just there as the |
685 | // minuend, and doesn't have the usual UNSIGNED semantics. We don't want |
686 | // to emit rebase opcodes for it. |
687 | ++it; |
688 | // Canonicalize the referent so that later accesses in Writer won't |
689 | // have to worry about it. |
690 | if (auto *referentIsec = it->referent.dyn_cast<InputSection *>()) |
691 | it->referent = referentIsec->canonical(); |
692 | continue; |
693 | } |
694 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) { |
695 | if (auto *undefined = dyn_cast<Undefined>(Val: sym)) |
696 | treatUndefinedSymbol(*undefined, isec, offset: r.offset); |
697 | // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. |
698 | if (!isa<Undefined>(Val: sym) && validateSymbolRelocation(sym, isec, r)) |
699 | prepareSymbolRelocation(sym, isec, r); |
700 | } else { |
701 | if (!r.pcrel) { |
702 | if (config->emitChainedFixups) |
703 | in.chainedFixups->addRebase(isec, offset: r.offset); |
704 | else |
705 | in.rebase->addEntry(isec, offset: r.offset); |
706 | } |
707 | } |
708 | } |
709 | } |
710 | |
711 | in.unwindInfo->prepare(); |
712 | } |
713 | |
714 | static void addNonWeakDefinition(const Defined *defined) { |
715 | if (config->emitChainedFixups) |
716 | in.chainedFixups->setHasNonWeakDefinition(); |
717 | else |
718 | in.weakBinding->addNonWeakDefinition(defined); |
719 | } |
720 | |
721 | void Writer::scanSymbols() { |
722 | TimeTraceScope timeScope("Scan symbols" ); |
723 | ObjCSelRefsHelper::initialize(); |
724 | for (Symbol *sym : symtab->getSymbols()) { |
725 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
726 | if (!defined->isLive()) |
727 | continue; |
728 | if (defined->overridesWeakDef) |
729 | addNonWeakDefinition(defined); |
730 | if (!defined->isAbsolute() && isCodeSection(defined->isec())) |
731 | in.unwindInfo->addSymbol(defined); |
732 | } else if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) { |
733 | // This branch intentionally doesn't check isLive(). |
734 | if (dysym->isDynamicLookup()) |
735 | continue; |
736 | dysym->getFile()->refState = |
737 | std::max(a: dysym->getFile()->refState, b: dysym->getRefState()); |
738 | } else if (isa<Undefined>(Val: sym)) { |
739 | if (ObjCStubsSection::isObjCStubSymbol(sym)) { |
740 | // When -dead_strip is enabled, we don't want to emit any dead stubs. |
741 | // Although this stub symbol is yet undefined, addSym() was called |
742 | // during MarkLive. |
743 | if (config->deadStrip) { |
744 | if (!sym->isLive()) |
745 | continue; |
746 | } |
747 | in.objcStubs->addEntry(sym); |
748 | } |
749 | } |
750 | } |
751 | |
752 | for (const InputFile *file : inputFiles) { |
753 | if (auto *objFile = dyn_cast<ObjFile>(Val: file)) |
754 | for (Symbol *sym : objFile->symbols) { |
755 | if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) { |
756 | if (!defined->isLive()) |
757 | continue; |
758 | if (!defined->isExternal() && !defined->isAbsolute() && |
759 | isCodeSection(defined->isec())) |
760 | in.unwindInfo->addSymbol(defined); |
761 | } |
762 | } |
763 | } |
764 | } |
765 | |
766 | // TODO: ld64 enforces the old load commands in a few other cases. |
767 | static bool useLCBuildVersion(const PlatformInfo &platformInfo) { |
768 | static const std::array<std::pair<PlatformType, VersionTuple>, 7> minVersion = |
769 | {._M_elems: {{PLATFORM_MACOS, VersionTuple(10, 14)}, |
770 | {PLATFORM_IOS, VersionTuple(12, 0)}, |
771 | {PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)}, |
772 | {PLATFORM_TVOS, VersionTuple(12, 0)}, |
773 | {PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)}, |
774 | {PLATFORM_WATCHOS, VersionTuple(5, 0)}, |
775 | {PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}}}; |
776 | auto it = llvm::find_if(Range: minVersion, P: [&](const auto &p) { |
777 | return p.first == platformInfo.target.Platform; |
778 | }); |
779 | return it == minVersion.end() |
780 | ? true |
781 | : platformInfo.target.MinDeployment >= it->second; |
782 | } |
783 | |
784 | template <class LP> void Writer::createLoadCommands() { |
785 | uint8_t segIndex = 0; |
786 | for (OutputSegment *seg : outputSegments) { |
787 | in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); |
788 | seg->index = segIndex++; |
789 | } |
790 | |
791 | if (config->emitChainedFixups) { |
792 | in.header->addLoadCommand(make<LCChainedFixups>(args&: in.chainedFixups)); |
793 | in.header->addLoadCommand(make<LCExportsTrie>(args&: in.exports)); |
794 | } else { |
795 | in.header->addLoadCommand(make<LCDyldInfo>( |
796 | args&: in.rebase, args&: in.binding, args&: in.weakBinding, args&: in.lazyBinding, args&: in.exports)); |
797 | } |
798 | in.header->addLoadCommand(make<LCSymtab>(args&: symtabSection, args&: stringTableSection)); |
799 | in.header->addLoadCommand( |
800 | make<LCDysymtab>(args&: symtabSection, args&: indirectSymtabSection)); |
801 | if (!config->umbrella.empty()) |
802 | in.header->addLoadCommand(make<LCSubFramework>(args&: config->umbrella)); |
803 | if (config->emitEncryptionInfo) |
804 | in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); |
805 | for (StringRef path : config->runtimePaths) |
806 | in.header->addLoadCommand(make<LCRPath>(args&: path)); |
807 | |
808 | switch (config->outputType) { |
809 | case MH_EXECUTE: |
810 | in.header->addLoadCommand(make<LCLoadDylinker>()); |
811 | break; |
812 | case MH_DYLIB: |
813 | in.header->addLoadCommand(make<LCDylib>(args: LC_ID_DYLIB, args&: config->installName, |
814 | args&: config->dylibCompatibilityVersion, |
815 | args&: config->dylibCurrentVersion)); |
816 | break; |
817 | case MH_BUNDLE: |
818 | break; |
819 | default: |
820 | llvm_unreachable("unhandled output file type" ); |
821 | } |
822 | |
823 | if (config->generateUuid) { |
824 | uuidCommand = make<LCUuid>(); |
825 | in.header->addLoadCommand(uuidCommand); |
826 | } |
827 | |
828 | if (useLCBuildVersion(platformInfo: config->platformInfo)) |
829 | in.header->addLoadCommand(make<LCBuildVersion>(args&: config->platformInfo)); |
830 | else |
831 | in.header->addLoadCommand(make<LCMinVersion>(args&: config->platformInfo)); |
832 | |
833 | if (config->secondaryPlatformInfo) { |
834 | in.header->addLoadCommand( |
835 | make<LCBuildVersion>(args&: *config->secondaryPlatformInfo)); |
836 | } |
837 | |
838 | // This is down here to match ld64's load command order. |
839 | if (config->outputType == MH_EXECUTE) |
840 | in.header->addLoadCommand(make<LCMain>()); |
841 | |
842 | // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding |
843 | // library ordinal computation code in ld64. |
844 | int64_t dylibOrdinal = 1; |
845 | DenseMap<StringRef, int64_t> ordinalForInstallName; |
846 | |
847 | std::vector<DylibFile *> dylibFiles; |
848 | for (InputFile *file : inputFiles) { |
849 | if (auto *dylibFile = dyn_cast<DylibFile>(Val: file)) |
850 | dylibFiles.push_back(x: dylibFile); |
851 | } |
852 | for (size_t i = 0; i < dylibFiles.size(); ++i) |
853 | dylibFiles.insert(position: dylibFiles.end(), first: dylibFiles[i]->extraDylibs.begin(), |
854 | last: dylibFiles[i]->extraDylibs.end()); |
855 | |
856 | for (DylibFile *dylibFile : dylibFiles) { |
857 | if (dylibFile->isBundleLoader) { |
858 | dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; |
859 | // Shortcut since bundle-loader does not re-export the symbols. |
860 | |
861 | dylibFile->reexport = false; |
862 | continue; |
863 | } |
864 | |
865 | // Don't emit load commands for a dylib that is not referenced if: |
866 | // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- |
867 | // if it's on the linker command line, it's explicit) |
868 | // - or it's marked MH_DEAD_STRIPPABLE_DYLIB |
869 | // - or the flag -dead_strip_dylibs is used |
870 | // FIXME: `isReferenced()` is currently computed before dead code |
871 | // stripping, so references from dead code keep a dylib alive. This |
872 | // matches ld64, but it's something we should do better. |
873 | if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && |
874 | (!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable || |
875 | config->deadStripDylibs)) |
876 | continue; |
877 | |
878 | // Several DylibFiles can have the same installName. Only emit a single |
879 | // load command for that installName and give all these DylibFiles the |
880 | // same ordinal. |
881 | // This can happen in several cases: |
882 | // - a new framework could change its installName to an older |
883 | // framework name via an $ld$ symbol depending on platform_version |
884 | // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; |
885 | // Foo.framework/Foo.tbd is usually a symlink to |
886 | // Foo.framework/Versions/Current/Foo.tbd, where |
887 | // Foo.framework/Versions/Current is usually a symlink to |
888 | // Foo.framework/Versions/A) |
889 | // - a framework can be linked both explicitly on the linker |
890 | // command line and implicitly as a reexport from a different |
891 | // framework. The re-export will usually point to the tbd file |
892 | // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will |
893 | // usually find Foo.framework/Foo.tbd. These are usually symlinks, |
894 | // but in a --reproduce archive they will be identical but distinct |
895 | // files. |
896 | // In the first case, *semantically distinct* DylibFiles will have the |
897 | // same installName. |
898 | int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; |
899 | if (ordinal) { |
900 | dylibFile->ordinal = ordinal; |
901 | continue; |
902 | } |
903 | |
904 | ordinal = dylibFile->ordinal = dylibOrdinal++; |
905 | LoadCommandType lcType = |
906 | dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak |
907 | ? LC_LOAD_WEAK_DYLIB |
908 | : LC_LOAD_DYLIB; |
909 | in.header->addLoadCommand(make<LCDylib>(args&: lcType, args&: dylibFile->installName, |
910 | args&: dylibFile->compatibilityVersion, |
911 | args&: dylibFile->currentVersion)); |
912 | |
913 | if (dylibFile->reexport) |
914 | in.header->addLoadCommand( |
915 | make<LCDylib>(args: LC_REEXPORT_DYLIB, args&: dylibFile->installName)); |
916 | } |
917 | |
918 | for (const auto &dyldEnv : config->dyldEnvs) |
919 | in.header->addLoadCommand(make<LCDyldEnv>(args: dyldEnv)); |
920 | |
921 | if (functionStartsSection) |
922 | in.header->addLoadCommand(make<LCFunctionStarts>(args&: functionStartsSection)); |
923 | if (dataInCodeSection) |
924 | in.header->addLoadCommand(make<LCDataInCode>(args&: dataInCodeSection)); |
925 | if (codeSignatureSection) |
926 | in.header->addLoadCommand(make<LCCodeSignature>(args&: codeSignatureSection)); |
927 | |
928 | const uint32_t MACOS_MAXPATHLEN = 1024; |
929 | config->headerPad = std::max( |
930 | a: config->headerPad, b: (config->headerPadMaxInstallNames |
931 | ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN |
932 | : 0)); |
933 | } |
934 | |
935 | // Sorting only can happen once all outputs have been collected. Here we sort |
936 | // segments, output sections within each segment, and input sections within each |
937 | // output segment. |
938 | static void sortSegmentsAndSections() { |
939 | TimeTraceScope timeScope("Sort segments and sections" ); |
940 | sortOutputSegments(); |
941 | |
942 | DenseMap<const InputSection *, size_t> isecPriorities = |
943 | priorityBuilder.buildInputSectionPriorities(); |
944 | |
945 | uint32_t sectionIndex = 0; |
946 | for (OutputSegment *seg : outputSegments) { |
947 | seg->sortOutputSections(); |
948 | // References from thread-local variable sections are treated as offsets |
949 | // relative to the start of the thread-local data memory area, which |
950 | // is initialized via copying all the TLV data sections (which are all |
951 | // contiguous). If later data sections require a greater alignment than |
952 | // earlier ones, the offsets of data within those sections won't be |
953 | // guaranteed to aligned unless we normalize alignments. We therefore use |
954 | // the largest alignment for all TLV data sections. |
955 | uint32_t tlvAlign = 0; |
956 | for (const OutputSection *osec : seg->getSections()) |
957 | if (isThreadLocalData(flags: osec->flags) && osec->align > tlvAlign) |
958 | tlvAlign = osec->align; |
959 | |
960 | for (OutputSection *osec : seg->getSections()) { |
961 | // Now that the output sections are sorted, assign the final |
962 | // output section indices. |
963 | if (!osec->isHidden()) |
964 | osec->index = ++sectionIndex; |
965 | if (isThreadLocalData(flags: osec->flags)) { |
966 | if (!firstTLVDataSection) |
967 | firstTLVDataSection = osec; |
968 | osec->align = tlvAlign; |
969 | } |
970 | |
971 | if (!isecPriorities.empty()) { |
972 | if (auto *merged = dyn_cast<ConcatOutputSection>(Val: osec)) { |
973 | llvm::stable_sort( |
974 | Range&: merged->inputs, C: [&](InputSection *a, InputSection *b) { |
975 | return isecPriorities.lookup(Val: a) > isecPriorities.lookup(Val: b); |
976 | }); |
977 | } |
978 | } |
979 | } |
980 | } |
981 | } |
982 | |
983 | template <class LP> void Writer::createOutputSections() { |
984 | TimeTraceScope timeScope("Create output sections" ); |
985 | // First, create hidden sections |
986 | stringTableSection = make<StringTableSection>(); |
987 | symtabSection = makeSymtabSection<LP>(*stringTableSection); |
988 | indirectSymtabSection = make<IndirectSymtabSection>(); |
989 | if (config->adhocCodesign) |
990 | codeSignatureSection = make<CodeSignatureSection>(); |
991 | if (config->emitDataInCodeInfo) |
992 | dataInCodeSection = make<DataInCodeSection>(); |
993 | if (config->emitFunctionStarts) |
994 | functionStartsSection = make<FunctionStartsSection>(); |
995 | |
996 | switch (config->outputType) { |
997 | case MH_EXECUTE: |
998 | make<PageZeroSection>(); |
999 | break; |
1000 | case MH_DYLIB: |
1001 | case MH_BUNDLE: |
1002 | break; |
1003 | default: |
1004 | llvm_unreachable("unhandled output file type" ); |
1005 | } |
1006 | |
1007 | // Then add input sections to output sections. |
1008 | for (ConcatInputSection *isec : inputSections) { |
1009 | if (isec->shouldOmitFromOutput()) |
1010 | continue; |
1011 | ConcatOutputSection *osec = cast<ConcatOutputSection>(Val: isec->parent); |
1012 | osec->addInput(input: isec); |
1013 | osec->inputOrder = |
1014 | std::min(a: osec->inputOrder, b: static_cast<int>(isec->outSecOff)); |
1015 | } |
1016 | |
1017 | // Once all the inputs are added, we can finalize the output section |
1018 | // properties and create the corresponding output segments. |
1019 | for (const auto &it : concatOutputSections) { |
1020 | StringRef segname = it.first.first; |
1021 | ConcatOutputSection *osec = it.second; |
1022 | assert(segname != segment_names::ld); |
1023 | if (osec->isNeeded()) { |
1024 | // See comment in ObjFile::splitEhFrames() |
1025 | if (osec->name == section_names::ehFrame && |
1026 | segname == segment_names::text) |
1027 | osec->align = target->wordSize; |
1028 | |
1029 | // MC keeps the default 1-byte alignment for __thread_vars, even though it |
1030 | // contains pointers that are fixed up by dyld, which requires proper |
1031 | // alignment. |
1032 | if (isThreadLocalVariables(flags: osec->flags)) |
1033 | osec->align = std::max<uint32_t>(a: osec->align, b: target->wordSize); |
1034 | |
1035 | getOrCreateOutputSegment(name: segname)->addOutputSection(os: osec); |
1036 | } |
1037 | } |
1038 | |
1039 | for (SyntheticSection *ssec : syntheticSections) { |
1040 | auto it = concatOutputSections.find(Key: {ssec->segname, ssec->name}); |
1041 | // We add all LinkEdit sections here because we don't know if they are |
1042 | // needed until their finalizeContents() methods get called later. While |
1043 | // this means that we add some redundant sections to __LINKEDIT, there is |
1044 | // is no redundancy in the output, as we do not emit section headers for |
1045 | // any LinkEdit sections. |
1046 | if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) { |
1047 | if (it == concatOutputSections.end()) { |
1048 | getOrCreateOutputSegment(name: ssec->segname)->addOutputSection(os: ssec); |
1049 | } else { |
1050 | fatal(msg: "section from " + |
1051 | toString(file: it->second->firstSection()->getFile()) + |
1052 | " conflicts with synthetic section " + ssec->segname + "," + |
1053 | ssec->name); |
1054 | } |
1055 | } |
1056 | } |
1057 | |
1058 | // dyld requires __LINKEDIT segment to always exist (even if empty). |
1059 | linkEditSegment = getOrCreateOutputSegment(name: segment_names::linkEdit); |
1060 | } |
1061 | |
1062 | void Writer::finalizeAddresses() { |
1063 | TimeTraceScope timeScope("Finalize addresses" ); |
1064 | uint64_t pageSize = target->getPageSize(); |
1065 | |
1066 | // We could parallelize this loop, but local benchmarking indicates it is |
1067 | // faster to do it all in the main thread. |
1068 | for (OutputSegment *seg : outputSegments) { |
1069 | if (seg == linkEditSegment) |
1070 | continue; |
1071 | for (OutputSection *osec : seg->getSections()) { |
1072 | if (!osec->isNeeded()) |
1073 | continue; |
1074 | // Other kinds of OutputSections have already been finalized. |
1075 | if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) |
1076 | concatOsec->finalizeContents(); |
1077 | } |
1078 | } |
1079 | |
1080 | // Ensure that segments (and the sections they contain) are allocated |
1081 | // addresses in ascending order, which dyld requires. |
1082 | // |
1083 | // Note that at this point, __LINKEDIT sections are empty, but we need to |
1084 | // determine addresses of other segments/sections before generating its |
1085 | // contents. |
1086 | for (OutputSegment *seg : outputSegments) { |
1087 | if (seg == linkEditSegment) |
1088 | continue; |
1089 | seg->addr = addr; |
1090 | assignAddresses(seg); |
1091 | // codesign / libstuff checks for segment ordering by verifying that |
1092 | // `fileOff + fileSize == next segment fileOff`. So we call |
1093 | // alignToPowerOf2() before (instead of after) computing fileSize to ensure |
1094 | // that the segments are contiguous. We handle addr / vmSize similarly for |
1095 | // the same reason. |
1096 | fileOff = alignToPowerOf2(Value: fileOff, Align: pageSize); |
1097 | addr = alignToPowerOf2(Value: addr, Align: pageSize); |
1098 | seg->vmSize = addr - seg->addr; |
1099 | seg->fileSize = fileOff - seg->fileOff; |
1100 | seg->assignAddressesToStartEndSymbols(); |
1101 | } |
1102 | } |
1103 | |
1104 | void Writer::finalizeLinkEditSegment() { |
1105 | TimeTraceScope timeScope("Finalize __LINKEDIT segment" ); |
1106 | // Fill __LINKEDIT contents. |
1107 | std::array<LinkEditSection *, 10> linkEditSections{ |
1108 | in.rebase, in.binding, |
1109 | in.weakBinding, in.lazyBinding, |
1110 | in.exports, in.chainedFixups, |
1111 | symtabSection, indirectSymtabSection, |
1112 | dataInCodeSection, functionStartsSection, |
1113 | }; |
1114 | SmallVector<std::shared_future<void>> threadFutures; |
1115 | threadFutures.reserve(N: linkEditSections.size()); |
1116 | for (LinkEditSection *osec : linkEditSections) |
1117 | if (osec) |
1118 | threadFutures.emplace_back(Args: threadPool.async( |
1119 | F: [](LinkEditSection *osec) { osec->finalizeContents(); }, ArgList&: osec)); |
1120 | for (std::shared_future<void> &future : threadFutures) |
1121 | future.wait(); |
1122 | |
1123 | // Now that __LINKEDIT is filled out, do a proper calculation of its |
1124 | // addresses and offsets. |
1125 | linkEditSegment->addr = addr; |
1126 | assignAddresses(linkEditSegment); |
1127 | // No need to page-align fileOff / addr here since this is the last segment. |
1128 | linkEditSegment->vmSize = addr - linkEditSegment->addr; |
1129 | linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; |
1130 | } |
1131 | |
1132 | void Writer::assignAddresses(OutputSegment *seg) { |
1133 | seg->fileOff = fileOff; |
1134 | |
1135 | for (OutputSection *osec : seg->getSections()) { |
1136 | if (!osec->isNeeded()) |
1137 | continue; |
1138 | addr = alignToPowerOf2(Value: addr, Align: osec->align); |
1139 | fileOff = alignToPowerOf2(Value: fileOff, Align: osec->align); |
1140 | osec->addr = addr; |
1141 | osec->fileOff = isZeroFill(flags: osec->flags) ? 0 : fileOff; |
1142 | osec->finalize(); |
1143 | osec->assignAddressesToStartEndSymbols(); |
1144 | |
1145 | addr += osec->getSize(); |
1146 | fileOff += osec->getFileSize(); |
1147 | } |
1148 | } |
1149 | |
1150 | void Writer::openFile() { |
1151 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
1152 | FileOutputBuffer::create(FilePath: config->outputFile, Size: fileOff, |
1153 | Flags: FileOutputBuffer::F_executable); |
1154 | |
1155 | if (!bufferOrErr) |
1156 | fatal(msg: "failed to open " + config->outputFile + ": " + |
1157 | llvm::toString(E: bufferOrErr.takeError())); |
1158 | buffer = std::move(*bufferOrErr); |
1159 | in.bufferStart = buffer->getBufferStart(); |
1160 | } |
1161 | |
1162 | void Writer::writeSections() { |
1163 | uint8_t *buf = buffer->getBufferStart(); |
1164 | std::vector<const OutputSection *> osecs; |
1165 | for (const OutputSegment *seg : outputSegments) |
1166 | append_range(C&: osecs, R: seg->getSections()); |
1167 | |
1168 | parallelForEach(Begin: osecs.begin(), End: osecs.end(), Fn: [&](const OutputSection *osec) { |
1169 | osec->writeTo(buf: buf + osec->fileOff); |
1170 | }); |
1171 | } |
1172 | |
1173 | void Writer::applyOptimizationHints() { |
1174 | if (config->arch() != AK_arm64 || config->ignoreOptimizationHints) |
1175 | return; |
1176 | |
1177 | uint8_t *buf = buffer->getBufferStart(); |
1178 | TimeTraceScope timeScope("Apply linker optimization hints" ); |
1179 | parallelForEach(R&: inputFiles, Fn: [buf](const InputFile *file) { |
1180 | if (const auto *objFile = dyn_cast<ObjFile>(Val: file)) |
1181 | target->applyOptimizationHints(buf, *objFile); |
1182 | }); |
1183 | } |
1184 | |
1185 | // In order to utilize multiple cores, we first split the buffer into chunks, |
1186 | // compute a hash for each chunk, and then compute a hash value of the hash |
1187 | // values. |
1188 | void Writer::writeUuid() { |
1189 | TimeTraceScope timeScope("Computing UUID" ); |
1190 | |
1191 | ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; |
1192 | std::vector<ArrayRef<uint8_t>> chunks = split(arr: data, chunkSize: 1024 * 1024); |
1193 | // Leave one slot for filename |
1194 | std::vector<uint64_t> hashes(chunks.size() + 1); |
1195 | SmallVector<std::shared_future<void>> threadFutures; |
1196 | threadFutures.reserve(N: chunks.size()); |
1197 | for (size_t i = 0; i < chunks.size(); ++i) |
1198 | threadFutures.emplace_back(Args: threadPool.async( |
1199 | F: [&](size_t j) { hashes[j] = xxh3_64bits(data: chunks[j]); }, ArgList&: i)); |
1200 | for (std::shared_future<void> &future : threadFutures) |
1201 | future.wait(); |
1202 | // Append the output filename so that identical binaries with different names |
1203 | // don't get the same UUID. |
1204 | hashes[chunks.size()] = xxh3_64bits(data: sys::path::filename(path: config->finalOutput)); |
1205 | uint64_t digest = xxh3_64bits(data: {reinterpret_cast<uint8_t *>(hashes.data()), |
1206 | hashes.size() * sizeof(uint64_t)}); |
1207 | uuidCommand->writeUuid(digest); |
1208 | } |
1209 | |
1210 | // This is step 5 of the algorithm described in the class comment of |
1211 | // ChainedFixupsSection. |
1212 | void Writer::buildFixupChains() { |
1213 | if (!config->emitChainedFixups) |
1214 | return; |
1215 | |
1216 | const std::vector<Location> &loc = in.chainedFixups->getLocations(); |
1217 | if (loc.empty()) |
1218 | return; |
1219 | |
1220 | TimeTraceScope timeScope("Build fixup chains" ); |
1221 | |
1222 | const uint64_t pageSize = target->getPageSize(); |
1223 | constexpr uint32_t stride = 4; // for DYLD_CHAINED_PTR_64 |
1224 | |
1225 | for (size_t i = 0, count = loc.size(); i < count;) { |
1226 | const OutputSegment *oseg = loc[i].isec->parent->parent; |
1227 | uint8_t *buf = buffer->getBufferStart() + oseg->fileOff; |
1228 | uint64_t pageIdx = loc[i].offset / pageSize; |
1229 | ++i; |
1230 | |
1231 | while (i < count && loc[i].isec->parent->parent == oseg && |
1232 | (loc[i].offset / pageSize) == pageIdx) { |
1233 | uint64_t offset = loc[i].offset - loc[i - 1].offset; |
1234 | |
1235 | auto fail = [&](Twine message) { |
1236 | error(msg: loc[i].isec->getSegName() + "," + loc[i].isec->getName() + |
1237 | ", offset " + |
1238 | Twine(loc[i].offset - loc[i].isec->parent->getSegmentOffset()) + |
1239 | ": " + message); |
1240 | }; |
1241 | |
1242 | if (offset < target->wordSize) |
1243 | return fail("fixups overlap" ); |
1244 | if (offset % stride != 0) |
1245 | return fail( |
1246 | "fixups are unaligned (offset " + Twine(offset) + |
1247 | " is not a multiple of the stride). Re-link with -no_fixup_chains" ); |
1248 | |
1249 | // The "next" field is in the same location for bind and rebase entries. |
1250 | reinterpret_cast<dyld_chained_ptr_64_bind *>(buf + loc[i - 1].offset) |
1251 | ->next = offset / stride; |
1252 | ++i; |
1253 | } |
1254 | } |
1255 | } |
1256 | |
1257 | void Writer::writeCodeSignature() { |
1258 | if (codeSignatureSection) { |
1259 | TimeTraceScope timeScope("Write code signature" ); |
1260 | codeSignatureSection->writeHashes(buf: buffer->getBufferStart()); |
1261 | } |
1262 | } |
1263 | |
1264 | void Writer::writeOutputFile() { |
1265 | TimeTraceScope timeScope("Write output file" ); |
1266 | openFile(); |
1267 | reportPendingUndefinedSymbols(); |
1268 | if (errorCount()) |
1269 | return; |
1270 | writeSections(); |
1271 | applyOptimizationHints(); |
1272 | buildFixupChains(); |
1273 | if (config->generateUuid) |
1274 | writeUuid(); |
1275 | writeCodeSignature(); |
1276 | |
1277 | if (auto e = buffer->commit()) |
1278 | fatal(msg: "failed to write output '" + buffer->getPath() + |
1279 | "': " + toString(E: std::move(e))); |
1280 | } |
1281 | |
1282 | template <class LP> void Writer::run() { |
1283 | treatSpecialUndefineds(); |
1284 | if (config->entry && needsBinding(sym: config->entry)) |
1285 | in.stubs->addEntry(config->entry); |
1286 | |
1287 | // Canonicalization of all pointers to InputSections should be handled by |
1288 | // these two scan* methods. I.e. from this point onward, for all live |
1289 | // InputSections, we should have `isec->canonical() == isec`. |
1290 | scanSymbols(); |
1291 | if (in.objcStubs->isNeeded()) |
1292 | in.objcStubs->setUp(); |
1293 | if (in.objcMethList->isNeeded()) |
1294 | in.objcMethList->setUp(); |
1295 | scanRelocations(); |
1296 | if (in.initOffsets->isNeeded()) |
1297 | in.initOffsets->setUp(); |
1298 | |
1299 | // Do not proceed if there were undefined or duplicate symbols. |
1300 | reportPendingUndefinedSymbols(); |
1301 | reportPendingDuplicateSymbols(); |
1302 | if (errorCount()) |
1303 | return; |
1304 | |
1305 | if (in.stubHelper && in.stubHelper->isNeeded()) |
1306 | in.stubHelper->setUp(); |
1307 | |
1308 | if (in.objCImageInfo->isNeeded()) |
1309 | in.objCImageInfo->finalizeContents(); |
1310 | |
1311 | // At this point, we should know exactly which output sections are needed, |
1312 | // courtesy of scanSymbols() and scanRelocations(). |
1313 | createOutputSections<LP>(); |
1314 | |
1315 | // After this point, we create no new segments; HOWEVER, we might |
1316 | // yet create branch-range extension thunks for architectures whose |
1317 | // hardware call instructions have limited range, e.g., ARM(64). |
1318 | // The thunks are created as InputSections interspersed among |
1319 | // the ordinary __TEXT,_text InputSections. |
1320 | sortSegmentsAndSections(); |
1321 | createLoadCommands<LP>(); |
1322 | finalizeAddresses(); |
1323 | threadPool.async([&] { |
1324 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1325 | timeTraceProfilerInitialize(TimeTraceGranularity: config->timeTraceGranularity, ProcName: "writeMapFile" ); |
1326 | writeMapFile(); |
1327 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1328 | timeTraceProfilerFinishThread(); |
1329 | }); |
1330 | finalizeLinkEditSegment(); |
1331 | writeOutputFile(); |
1332 | } |
1333 | |
1334 | template <class LP> void macho::writeResult() { Writer().run<LP>(); } |
1335 | |
1336 | void macho::resetWriter() { LCDylib::resetInstanceCount(); } |
1337 | |
1338 | void macho::createSyntheticSections() { |
1339 | in.header = make<MachHeaderSection>(); |
1340 | if (config->dedupStrings) |
1341 | in.cStringSection = |
1342 | make<DeduplicatedCStringSection>(args: section_names::cString); |
1343 | else |
1344 | in.cStringSection = make<CStringSection>(args: section_names::cString); |
1345 | in.objcMethnameSection = |
1346 | make<DeduplicatedCStringSection>(args: section_names::objcMethname); |
1347 | in.wordLiteralSection = make<WordLiteralSection>(); |
1348 | if (config->emitChainedFixups) { |
1349 | in.chainedFixups = make<ChainedFixupsSection>(); |
1350 | } else { |
1351 | in.rebase = make<RebaseSection>(); |
1352 | in.binding = make<BindingSection>(); |
1353 | in.weakBinding = make<WeakBindingSection>(); |
1354 | in.lazyBinding = make<LazyBindingSection>(); |
1355 | in.lazyPointers = make<LazyPointerSection>(); |
1356 | in.stubHelper = make<StubHelperSection>(); |
1357 | } |
1358 | in.exports = make<ExportSection>(); |
1359 | in.got = make<GotSection>(); |
1360 | in.tlvPointers = make<TlvPointerSection>(); |
1361 | in.stubs = make<StubsSection>(); |
1362 | in.objcStubs = make<ObjCStubsSection>(); |
1363 | in.unwindInfo = makeUnwindInfoSection(); |
1364 | in.objCImageInfo = make<ObjCImageInfoSection>(); |
1365 | in.initOffsets = make<InitOffsetsSection>(); |
1366 | in.objcMethList = make<ObjCMethListSection>(); |
1367 | |
1368 | // This section contains space for just a single word, and will be used by |
1369 | // dyld to cache an address to the image loader it uses. |
1370 | uint8_t *arr = bAlloc().Allocate<uint8_t>(Num: target->wordSize); |
1371 | memset(s: arr, c: 0, n: target->wordSize); |
1372 | in.imageLoaderCache = makeSyntheticInputSection( |
1373 | segName: segment_names::data, sectName: section_names::data, flags: S_REGULAR, |
1374 | data: ArrayRef<uint8_t>{arr, target->wordSize}, |
1375 | /*align=*/target->wordSize); |
1376 | assert(in.imageLoaderCache->live); |
1377 | } |
1378 | |
1379 | OutputSection *macho::firstTLVDataSection = nullptr; |
1380 | |
1381 | template void macho::writeResult<LP64>(); |
1382 | template void macho::writeResult<ILP32>(); |
1383 | |