1//===- ObjC.cpp -----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjC.h"
10#include "ConcatOutputSection.h"
11#include "InputFiles.h"
12#include "InputSection.h"
13#include "Layout.h"
14#include "OutputSegment.h"
15#include "SyntheticSections.h"
16#include "Target.h"
17
18#include "lld/Common/ErrorHandler.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/Bitcode/BitcodeReader.h"
21#include "llvm/Support/TimeProfiler.h"
22
23using namespace llvm;
24using namespace llvm::MachO;
25using namespace lld;
26using namespace lld::macho;
27
28template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
29 using SectionHeader = typename LP::section;
30
31 auto *hdr =
32 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
33 if (hdr->magic != LP::magic)
34 return false;
35
36 if (const auto *c =
37 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
38 auto sectionHeaders = ArrayRef<SectionHeader>{
39 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
40 for (const SectionHeader &secHead : sectionHeaders) {
41 StringRef sectname(secHead.sectname,
42 strnlen(secHead.sectname, sizeof(secHead.sectname)));
43 StringRef segname(secHead.segname,
44 strnlen(secHead.segname, sizeof(secHead.segname)));
45 if ((segname == segment_names::data &&
46 sectname == section_names::objcCatList) ||
47 (segname == segment_names::text &&
48 sectname.starts_with(Prefix: section_names::swift))) {
49 return true;
50 }
51 }
52 }
53 return false;
54}
55
56static bool objectHasObjCSection(MemoryBufferRef mb) {
57 if (target->wordSize == 8)
58 return ::objectHasObjCSection<LP64>(mb);
59 else
60 return ::objectHasObjCSection<ILP32>(mb);
61}
62
63bool macho::hasObjCSection(MemoryBufferRef mb) {
64 switch (identify_magic(magic: mb.getBuffer())) {
65 case file_magic::macho_object:
66 return objectHasObjCSection(mb);
67 case file_magic::bitcode:
68 return check(e: isBitcodeContainingObjCCategory(Buffer: mb));
69 default:
70 return false;
71 }
72}
73
74namespace {
75
76#define FOR_EACH_CATEGORY_FIELD(DO) \
77 DO(Ptr, name) \
78 DO(Ptr, klass) \
79 DO(Ptr, instanceMethods) \
80 DO(Ptr, classMethods) \
81 DO(Ptr, protocols) \
82 DO(Ptr, instanceProps) \
83 DO(Ptr, classProps) \
84 DO(uint32_t, size)
85
86CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
87
88#undef FOR_EACH_CATEGORY_FIELD
89
90#define FOR_EACH_CLASS_FIELD(DO) \
91 DO(Ptr, metaClass) \
92 DO(Ptr, superClass) \
93 DO(Ptr, methodCache) \
94 DO(Ptr, vtable) \
95 DO(Ptr, roData)
96
97CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
98
99#undef FOR_EACH_CLASS_FIELD
100
101#define FOR_EACH_RO_CLASS_FIELD(DO) \
102 DO(uint32_t, flags) \
103 DO(uint32_t, instanceStart) \
104 DO(Ptr, instanceSize) \
105 DO(Ptr, ivarLayout) \
106 DO(Ptr, name) \
107 DO(Ptr, baseMethods) \
108 DO(Ptr, baseProtocols) \
109 DO(Ptr, ivars) \
110 DO(Ptr, weakIvarLayout) \
111 DO(Ptr, baseProperties)
112
113CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
114
115#undef FOR_EACH_RO_CLASS_FIELD
116
117#define FOR_EACH_LIST_HEADER(DO) \
118 DO(uint32_t, structSize) \
119 DO(uint32_t, structCount)
120
121CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
122
123#undef FOR_EACH_LIST_HEADER
124
125#define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
126
127CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
128
129#undef FOR_EACH_PROTOCOL_LIST_HEADER
130
131#define FOR_EACH_METHOD(DO) \
132 DO(Ptr, name) \
133 DO(Ptr, type) \
134 DO(Ptr, impl)
135
136CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
137
138#undef FOR_EACH_METHOD
139
140enum MethodContainerKind {
141 MCK_Class,
142 MCK_Category,
143};
144
145struct MethodContainer {
146 MethodContainerKind kind;
147 const ConcatInputSection *isec;
148};
149
150enum MethodKind {
151 MK_Instance,
152 MK_Static,
153};
154
155struct ObjcClass {
156 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
157 DenseMap<CachedHashStringRef, MethodContainer> classMethods;
158};
159
160} // namespace
161
162class ObjcCategoryChecker {
163public:
164 ObjcCategoryChecker();
165 void parseCategory(const ConcatInputSection *catListIsec);
166
167private:
168 void parseClass(const Defined *classSym);
169 void parseMethods(const ConcatInputSection *methodsIsec,
170 const Symbol *methodContainer,
171 const ConcatInputSection *containerIsec,
172 MethodContainerKind, MethodKind);
173
174 CategoryLayout catLayout;
175 ClassLayout classLayout;
176 ROClassLayout roClassLayout;
177 ListHeaderLayout listHeaderLayout;
178 MethodLayout methodLayout;
179
180 DenseMap<const Symbol *, ObjcClass> classMap;
181};
182
183ObjcCategoryChecker::ObjcCategoryChecker()
184 : catLayout(target->wordSize), classLayout(target->wordSize),
185 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
186 methodLayout(target->wordSize) {}
187
188void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
189 const Symbol *methodContainerSym,
190 const ConcatInputSection *containerIsec,
191 MethodContainerKind mcKind,
192 MethodKind mKind) {
193 ObjcClass &klass = classMap[methodContainerSym];
194 for (const Reloc &r : methodsIsec->relocs) {
195 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
196 methodLayout.nameOffset)
197 continue;
198
199 CachedHashStringRef methodName(r.getReferentString());
200 // +load methods are special: all implementations are called by the runtime
201 // even if they are part of the same class. Thus there is no need to check
202 // for duplicates.
203 // NOTE: Instead of specifically checking for this method name, ld64 simply
204 // checks whether a class / category is present in __objc_nlclslist /
205 // __objc_nlcatlist respectively. This will be the case if the class /
206 // category has a +load method. It skips optimizing the categories if there
207 // are multiple +load methods. Since it does dupe checking as part of the
208 // optimization process, this avoids spurious dupe messages around +load,
209 // but it also means that legit dupe issues for other methods are ignored.
210 if (mKind == MK_Static && methodName.val() == "load")
211 continue;
212
213 auto &methodMap =
214 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
215 if (methodMap
216 .try_emplace(Key: methodName, Args: MethodContainer{.kind: mcKind, .isec: containerIsec})
217 .second)
218 continue;
219
220 // We have a duplicate; generate a warning message.
221 const auto &mc = methodMap.lookup(Val: methodName);
222 const Reloc *nameReloc = nullptr;
223 if (mc.kind == MCK_Category) {
224 nameReloc = mc.isec->getRelocAt(off: catLayout.nameOffset);
225 } else {
226 assert(mc.kind == MCK_Class);
227 const auto *roIsec = mc.isec->getRelocAt(off: classLayout.roDataOffset)
228 ->getReferentInputSection();
229 nameReloc = roIsec->getRelocAt(off: roClassLayout.nameOffset);
230 }
231 StringRef containerName = nameReloc->getReferentString();
232 StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
233
234 // We should only ever encounter collisions when parsing category methods
235 // (since the Class struct is parsed before any of its categories).
236 assert(mcKind == MCK_Category);
237 StringRef newCatName =
238 containerIsec->getRelocAt(off: catLayout.nameOffset)->getReferentString();
239
240 auto formatObjAndSrcFileName = [](const InputSection *section) {
241 lld::macho::InputFile *inputFile = section->getFile();
242 std::string result = toString(file: inputFile);
243
244 auto objFile = dyn_cast_or_null<ObjFile>(Val: inputFile);
245 if (objFile && objFile->compileUnit)
246 result += " (" + objFile->sourceFile() + ")";
247
248 return result;
249 };
250
251 StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
252 warn(msg: "method '" + methPrefix + methodName.val() +
253 "' has conflicting definitions:\n>>> defined in category " +
254 newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
255 "\n>>> defined in " + containerType + " " + containerName + " from " +
256 formatObjAndSrcFileName(mc.isec));
257 }
258}
259
260void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
261 auto *classReloc = catIsec->getRelocAt(off: catLayout.klassOffset);
262 if (!classReloc)
263 return;
264
265 auto *classSym = cast<Symbol *>(Val: classReloc->referent);
266 if (auto *d = dyn_cast<Defined>(Val: classSym))
267 if (!classMap.count(Val: d))
268 parseClass(classSym: d);
269
270 if (const auto *r = catIsec->getRelocAt(off: catLayout.classMethodsOffset)) {
271 parseMethods(methodsIsec: cast<ConcatInputSection>(Val: r->getReferentInputSection()),
272 methodContainerSym: classSym, containerIsec: catIsec, mcKind: MCK_Category, mKind: MK_Static);
273 }
274
275 if (const auto *r = catIsec->getRelocAt(off: catLayout.instanceMethodsOffset)) {
276 parseMethods(methodsIsec: cast<ConcatInputSection>(Val: r->getReferentInputSection()),
277 methodContainerSym: classSym, containerIsec: catIsec, mcKind: MCK_Category, mKind: MK_Instance);
278 }
279}
280
281void ObjcCategoryChecker::parseClass(const Defined *classSym) {
282 // Given a Class struct, get its corresponding Methods struct
283 auto getMethodsIsec =
284 [&](const InputSection *classIsec) -> ConcatInputSection * {
285 if (const auto *r = classIsec->getRelocAt(off: classLayout.roDataOffset)) {
286 if (const auto *roIsec =
287 cast_or_null<ConcatInputSection>(Val: r->getReferentInputSection())) {
288 if (const auto *r =
289 roIsec->getRelocAt(off: roClassLayout.baseMethodsOffset)) {
290 if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
291 Val: r->getReferentInputSection()))
292 return methodsIsec;
293 }
294 }
295 }
296 return nullptr;
297 };
298
299 const auto *classIsec = cast<ConcatInputSection>(Val: classSym->isec());
300
301 // Parse instance methods.
302 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
303 parseMethods(methodsIsec: instanceMethodsIsec, methodContainerSym: classSym, containerIsec: classIsec, mcKind: MCK_Class,
304 mKind: MK_Instance);
305
306 // Class methods are contained in the metaclass.
307 if (const auto *r = classSym->isec()->getRelocAt(off: classLayout.metaClassOffset))
308 if (const auto *classMethodsIsec = getMethodsIsec(
309 cast<ConcatInputSection>(Val: r->getReferentInputSection())))
310 parseMethods(methodsIsec: classMethodsIsec, methodContainerSym: classSym, containerIsec: classIsec, mcKind: MCK_Class, mKind: MK_Static);
311}
312
313void objc::checkCategories() {
314 TimeTraceScope timeScope("ObjcCategoryChecker");
315
316 ObjcCategoryChecker checker;
317 for (const InputSection *isec : inputSections) {
318 if (isec->getName() == section_names::objcCatList)
319 for (const Reloc &r : isec->relocs) {
320 auto *catIsec = cast<ConcatInputSection>(Val: r.getReferentInputSection());
321 checker.parseCategory(catIsec);
322 }
323 }
324}
325
326namespace {
327
328class ObjcCategoryMerger {
329 // In which language was a particular construct originally defined
330 enum SourceLanguage { Unknown, ObjC, Swift };
331
332 // Information about an input category
333 struct InfoInputCategory {
334 ConcatInputSection *catListIsec;
335 ConcatInputSection *catBodyIsec;
336 uint32_t offCatListIsec = 0;
337 SourceLanguage sourceLanguage = SourceLanguage::Unknown;
338
339 bool wasMerged = false;
340 };
341
342 // To write new (merged) categories or classes, we will try make limited
343 // assumptions about the alignment and the sections the various class/category
344 // info are stored in and . So we'll just reuse the same sections and
345 // alignment as already used in existing (input) categories. To do this we
346 // have InfoCategoryWriter which contains the various sections that the
347 // generated categories will be written to.
348 struct InfoWriteSection {
349 bool valid = false; // Data has been successfully collected from input
350 uint32_t align = 0;
351 Section *inputSection;
352 Reloc relocTemplate;
353 OutputSection *outputSection;
354 };
355
356 struct InfoCategoryWriter {
357 InfoWriteSection catListInfo;
358 InfoWriteSection catBodyInfo;
359 InfoWriteSection catNameInfo;
360 InfoWriteSection catPtrListInfo;
361 };
362
363 // Information about a pointer list in the original categories or class(method
364 // lists, protocol lists, etc)
365 struct PointerListInfo {
366 PointerListInfo() = default;
367 PointerListInfo(const PointerListInfo &) = default;
368 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
369 : categoryPrefix(_categoryPrefix),
370 pointersPerStruct(_pointersPerStruct) {}
371
372 inline bool operator==(const PointerListInfo &cmp) const {
373 return pointersPerStruct == cmp.pointersPerStruct &&
374 structSize == cmp.structSize && structCount == cmp.structCount &&
375 allPtrs == cmp.allPtrs;
376 }
377
378 const char *categoryPrefix;
379
380 uint32_t pointersPerStruct = 0;
381
382 uint32_t structSize = 0;
383 uint32_t structCount = 0;
384
385 std::vector<Symbol *> allPtrs;
386 };
387
388 // Full information describing an ObjC class . This will include all the
389 // additional methods, protocols, and properties that are contained in the
390 // class and all the categories that extend a particular class.
391 struct ClassExtensionInfo {
392 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
393
394 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
395 std::string mergedContainerName;
396 std::string baseClassName;
397 const Symbol *baseClass = nullptr;
398 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
399
400 CategoryLayout &catLayout;
401
402 // In case we generate new data, mark the new data as belonging to this file
403 ObjFile *objFileForMergeData = nullptr;
404
405 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
406 /*pointersPerStruct=*/3};
407 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
408 /*pointersPerStruct=*/3};
409 PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
410 /*pointersPerStruct=*/0};
411 PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
412 /*pointersPerStruct=*/2};
413 PointerListInfo classProps = {objc::symbol_names::klassPropList,
414 /*pointersPerStruct=*/2};
415 };
416
417public:
418 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
419 void doMerge();
420 static void doCleanup();
421
422private:
423 DenseSet<const Symbol *> collectNlCategories();
424 void collectAndValidateCategoriesData();
425 bool
426 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
427
428 void eraseISec(ConcatInputSection *isec);
429 void eraseMergedCategories();
430
431 void generateCatListForNonErasedCategories(
432 MapVector<ConcatInputSection *, std::set<uint64_t>>
433 catListToErasedOffsets);
434 void collectSectionWriteInfoFromIsec(const InputSection *isec,
435 InfoWriteSection &catWriteInfo);
436 bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
437 bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
438 ClassExtensionInfo &extInfo);
439
440 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
441 PointerListInfo &ptrList,
442 SourceLanguage sourceLang);
443
444 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
445 uint32_t secOffset,
446 SourceLanguage sourceLang);
447
448 bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
449 PointerListInfo &ptrList);
450
451 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
452 const ClassExtensionInfo &extInfo,
453 const PointerListInfo &ptrList);
454
455 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
456 const ClassExtensionInfo &extInfo,
457 const PointerListInfo &ptrList);
458
459 Defined *emitCategory(const ClassExtensionInfo &extInfo);
460 Defined *emitCatListEntrySec(const std::string &forCategoryName,
461 const std::string &forBaseClassName,
462 ObjFile *objFile);
463 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
464 const Symbol *baseClassSym,
465 const std::string &baseClassName, ObjFile *objFile);
466 Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
467 void createSymbolReference(Defined *refFrom, const Symbol *refTo,
468 uint32_t offset, const Reloc &relocTemplate);
469 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
470 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
471 uint32_t offset);
472 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
473 uint32_t offset);
474 Defined *getClassRo(const Defined *classSym, bool getMetaRo);
475 SourceLanguage getClassSymSourceLang(const Defined *classSym);
476 bool mergeCategoriesIntoBaseClass(const Defined *baseClass,
477 std::vector<InfoInputCategory> &categories);
478 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
479 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
480 uint32_t offset);
481
482 // Allocate a null-terminated StringRef backed by generatedSectionData
483 StringRef newStringData(const char *str);
484 // Allocate section data, backed by generatedSectionData
485 SmallVector<uint8_t> &newSectionData(uint32_t size);
486
487 CategoryLayout catLayout;
488 ClassLayout classLayout;
489 ROClassLayout roClassLayout;
490 ListHeaderLayout listHeaderLayout;
491 MethodLayout methodLayout;
492 ProtocolListHeaderLayout protocolListHeaderLayout;
493
494 InfoCategoryWriter infoCategoryWriter;
495 std::vector<ConcatInputSection *> &allInputSections;
496 // Map of base class Symbol to list of InfoInputCategory's for it
497 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
498
499 // Normally, the binary data comes from the input files, but since we're
500 // generating binary data ourselves, we use the below array to store it in.
501 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
502 // object, as the data will be read by the Writer when the final binary is
503 // generated.
504 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
505 generatedSectionData;
506};
507
508SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
509 ObjcCategoryMerger::generatedSectionData;
510
511ObjcCategoryMerger::ObjcCategoryMerger(
512 std::vector<ConcatInputSection *> &_allInputSections)
513 : catLayout(target->wordSize), classLayout(target->wordSize),
514 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
515 methodLayout(target->wordSize),
516 protocolListHeaderLayout(target->wordSize),
517 allInputSections(_allInputSections) {}
518
519void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
520 const InputSection *isec, InfoWriteSection &catWriteInfo) {
521
522 catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
523 catWriteInfo.align = isec->align;
524 catWriteInfo.outputSection = isec->parent;
525
526 assert(catWriteInfo.outputSection &&
527 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
528
529 if (isec->relocs.size())
530 catWriteInfo.relocTemplate = isec->relocs[0];
531
532 catWriteInfo.valid = true;
533}
534
535Symbol *
536ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
537 uint32_t offset) {
538 if (!isec)
539 return nullptr;
540 const Reloc *reloc = isec->getRelocAt(off: offset);
541
542 if (!reloc)
543 return nullptr;
544
545 Symbol *sym = dyn_cast_if_present<Symbol *>(Val: reloc->referent);
546
547 if (reloc->addend && sym) {
548 assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
549 Defined *definedSym = cast<Defined>(Val: sym);
550 sym = tryFindDefinedOnIsec(isec: definedSym->isec(),
551 offset: definedSym->value + reloc->addend);
552 }
553
554 return sym;
555}
556
557Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
558 uint32_t offset) {
559 for (Defined *sym : isec->symbols)
560 if ((sym->value <= offset) && (sym->value + sym->size > offset))
561 return sym;
562
563 return nullptr;
564}
565
566Defined *
567ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
568 uint32_t offset) {
569 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
570 return dyn_cast_or_null<Defined>(Val: sym);
571}
572
573// Get the class's ro_data symbol. If getMetaRo is true, then we will return
574// the meta-class's ro_data symbol. Otherwise, we will return the class
575// (instance) ro_data symbol.
576Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
577 bool getMetaRo) {
578 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(Val: classSym->isec());
579 if (!isec)
580 return nullptr;
581
582 if (!getMetaRo)
583 return tryGetDefinedAtIsecOffset(isec, offset: classLayout.roDataOffset +
584 classSym->value);
585
586 Defined *metaClass = tryGetDefinedAtIsecOffset(
587 isec, offset: classLayout.metaClassOffset + classSym->value);
588 if (!metaClass)
589 return nullptr;
590
591 return tryGetDefinedAtIsecOffset(
592 isec: dyn_cast<ConcatInputSection>(Val: metaClass->isec()),
593 offset: classLayout.roDataOffset);
594}
595
596// Given an ConcatInputSection or CStringInputSection and an offset, if there is
597// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
598void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
599 const ConcatInputSection *isec, uint32_t offset) {
600 const Reloc *reloc = isec->getRelocAt(off: offset);
601
602 if (!reloc)
603 return;
604
605 Defined *sym = dyn_cast_or_null<Defined>(Val: cast<Symbol *>(Val: reloc->referent));
606 if (!sym)
607 return;
608
609 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(Val: sym->isec()))
610 eraseISec(isec: cisec);
611 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(Val: sym->isec())) {
612 uint32_t totalOffset = sym->value + reloc->addend;
613 StringPiece &piece = csisec->getStringPiece(off: totalOffset);
614 piece.live = false;
615 } else {
616 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
617 }
618}
619
620bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
621 const InfoInputCategory &catInfo) {
622
623 if (!infoCategoryWriter.catListInfo.valid)
624 collectSectionWriteInfoFromIsec(isec: catInfo.catListIsec,
625 catWriteInfo&: infoCategoryWriter.catListInfo);
626 if (!infoCategoryWriter.catBodyInfo.valid)
627 collectSectionWriteInfoFromIsec(isec: catInfo.catBodyIsec,
628 catWriteInfo&: infoCategoryWriter.catBodyInfo);
629
630 if (!infoCategoryWriter.catNameInfo.valid) {
631 lld::macho::Defined *catNameSym =
632 tryGetDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.nameOffset);
633
634 if (!catNameSym) {
635 // This is an unhandeled case where the category name is not a symbol but
636 // instead points to an CStringInputSection (that doesn't have any symbol)
637 // TODO: Find a small repro and either fix or add a test case for this
638 // scenario
639 return false;
640 }
641
642 collectSectionWriteInfoFromIsec(isec: catNameSym->isec(),
643 catWriteInfo&: infoCategoryWriter.catNameInfo);
644 }
645
646 // Collect writer info from all the category lists (we're assuming they all
647 // would provide the same info)
648 if (!infoCategoryWriter.catPtrListInfo.valid) {
649 for (uint32_t off = catLayout.instanceMethodsOffset;
650 off <= catLayout.classPropsOffset; off += target->wordSize) {
651 if (Defined *ptrList =
652 tryGetDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: off)) {
653 collectSectionWriteInfoFromIsec(isec: ptrList->isec(),
654 catWriteInfo&: infoCategoryWriter.catPtrListInfo);
655 // we've successfully collected data, so we can break
656 break;
657 }
658 }
659 }
660
661 return true;
662}
663
664// Parse a protocol list that might be linked to ConcatInputSection at a given
665// offset. The format of the protocol list is different than other lists (prop
666// lists, method lists) so we need to parse it differently
667void ObjcCategoryMerger::parseProtocolListInfo(
668 const ConcatInputSection *isec, uint32_t secOffset,
669 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
670 assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
671 "Tried to read pointer list beyond protocol section end");
672
673 const Reloc *reloc = isec->getRelocAt(off: secOffset);
674 if (!reloc)
675 return;
676
677 auto *ptrListSym = dyn_cast_or_null<Defined>(Val: cast<Symbol *>(Val: reloc->referent));
678 assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
679
680 // Theoretically protocol count can be either 32b or 64b, depending on
681 // platform pointer size, but to simplify implementation we always just read
682 // the lower 32b which should be good enough.
683 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
684 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
685
686 ptrList.structCount += protocolCount;
687 ptrList.structSize = target->wordSize;
688
689 [[maybe_unused]] uint32_t expectedListSize =
690 (protocolCount * target->wordSize) +
691 /*header(count)*/ protocolListHeaderLayout.totalSize +
692 /*extra null value*/ target->wordSize;
693
694 // On Swift, the protocol list does not have the extra (unnecessary) null
695 [[maybe_unused]] uint32_t expectedListSizeSwift =
696 expectedListSize - target->wordSize;
697
698 assert(((expectedListSize == ptrListSym->isec()->data.size() &&
699 sourceLang == SourceLanguage::ObjC) ||
700 (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
701 sourceLang == SourceLanguage::Swift)) &&
702 "Protocol list does not match expected size");
703
704 uint32_t off = protocolListHeaderLayout.totalSize;
705 for (uint32_t inx = 0; inx < protocolCount; ++inx) {
706 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
707 assert(reloc && "No reloc found at protocol list offset");
708
709 auto *listSym = dyn_cast_or_null<Defined>(Val: cast<Symbol *>(Val: reloc->referent));
710 assert(listSym && "Protocol list reloc does not have a valid Defined");
711
712 ptrList.allPtrs.push_back(x: listSym);
713 off += target->wordSize;
714 }
715 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
716 "expected null terminating protocol");
717 assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
718 "Protocol list end offset does not match expected size");
719}
720
721// Parse a protocol list and return the PointerListInfo for it
722ObjcCategoryMerger::PointerListInfo
723ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
724 uint32_t secOffset,
725 SourceLanguage sourceLang) {
726 PointerListInfo ptrList;
727 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
728 return ptrList;
729}
730
731// Parse a pointer list that might be linked to ConcatInputSection at a given
732// offset. This can be used for instance methods, class methods, instance props
733// and class props since they have the same format.
734bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
735 uint32_t secOffset,
736 PointerListInfo &ptrList) {
737 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
738 assert(isec && "Trying to parse pointer list from null isec");
739 assert(secOffset + target->wordSize <= isec->data.size() &&
740 "Trying to read pointer list beyond section end");
741
742 const Reloc *reloc = isec->getRelocAt(off: secOffset);
743 // Empty list is a valid case, return true.
744 if (!reloc)
745 return true;
746
747 auto *ptrListSym = dyn_cast_or_null<Defined>(Val: cast<Symbol *>(Val: reloc->referent));
748 assert(ptrListSym && "Reloc does not have a valid Defined");
749
750 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
751 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
752 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
753 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
754 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
755
756 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
757
758 ptrList.structCount += thisStructCount;
759 ptrList.structSize = thisStructSize;
760
761 uint32_t expectedListSize =
762 listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
763 assert(expectedListSize == ptrListSym->isec()->data.size() &&
764 "Pointer list does not match expected size");
765
766 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
767 off += target->wordSize) {
768 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
769 assert(reloc && "No reloc found at pointer list offset");
770
771 auto *listSym =
772 dyn_cast_or_null<Defined>(Val: reloc->referent.dyn_cast<Symbol *>());
773 // Sometimes, the reloc points to a StringPiece (InputSection + addend)
774 // instead of a symbol.
775 // TODO: Skip these cases for now, but we should fix this.
776 if (!listSym)
777 return false;
778
779 ptrList.allPtrs.push_back(x: listSym);
780 }
781
782 return true;
783}
784
785// Here we parse all the information of an input category (catInfo) and
786// append the parsed info into the structure which will contain all the
787// information about how a class is extended (extInfo)
788bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
789 ClassExtensionInfo &extInfo) {
790 const Reloc *catNameReloc =
791 catInfo.catBodyIsec->getRelocAt(off: catLayout.nameOffset);
792
793 // Parse name
794 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
795
796 // is this the first category we are parsing?
797 if (extInfo.mergedContainerName.empty())
798 extInfo.objFileForMergeData =
799 dyn_cast_or_null<ObjFile>(Val: catInfo.catBodyIsec->getFile());
800 else
801 extInfo.mergedContainerName += "|";
802
803 assert(extInfo.objFileForMergeData &&
804 "Expected to already have valid objextInfo.objFileForMergeData");
805
806 StringRef catName = catNameReloc->getReferentString();
807 extInfo.mergedContainerName += catName.str();
808
809 // Parse base class
810 if (!extInfo.baseClass) {
811 Symbol *classSym =
812 tryGetSymbolAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.klassOffset);
813 assert(extInfo.baseClassName.empty());
814 extInfo.baseClass = classSym;
815 llvm::StringRef classPrefix(objc::symbol_names::klass);
816 assert(classSym->getName().starts_with(classPrefix) &&
817 "Base class symbol does not start with expected prefix");
818 extInfo.baseClassName = classSym->getName().substr(Start: classPrefix.size());
819 } else {
820 assert((extInfo.baseClass ==
821 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
822 catLayout.klassOffset)) &&
823 "Trying to parse category info into container with different base "
824 "class");
825 }
826
827 if (!parsePointerListInfo(isec: catInfo.catBodyIsec,
828 secOffset: catLayout.instanceMethodsOffset,
829 ptrList&: extInfo.instanceMethods))
830 return false;
831
832 if (!parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.classMethodsOffset,
833 ptrList&: extInfo.classMethods))
834 return false;
835
836 parseProtocolListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.protocolsOffset,
837 ptrList&: extInfo.protocols, sourceLang: catInfo.sourceLanguage);
838
839 if (!parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.instancePropsOffset,
840 ptrList&: extInfo.instanceProps))
841 return false;
842
843 if (!parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.classPropsOffset,
844 ptrList&: extInfo.classProps))
845 return false;
846
847 return true;
848}
849
850// Generate a protocol list (including header) and link it into the parent at
851// the specified offset.
852Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
853 Defined *parentSym, uint32_t linkAtOffset,
854 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
855 if (ptrList.allPtrs.empty())
856 return nullptr;
857
858 assert(ptrList.allPtrs.size() == ptrList.structCount);
859
860 uint32_t bodySize = (ptrList.structCount * target->wordSize) +
861 /*header(count)*/ protocolListHeaderLayout.totalSize +
862 /*extra null value*/ target->wordSize;
863 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: bodySize);
864
865 // This theoretically can be either 32b or 64b, but writing just the first 32b
866 // is good enough
867 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
868 bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
869
870 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
871
872 ConcatInputSection *listSec = make<ConcatInputSection>(
873 args&: *infoCategoryWriter.catPtrListInfo.inputSection, args&: bodyData,
874 args&: infoCategoryWriter.catPtrListInfo.align);
875 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
876 listSec->live = true;
877
878 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
879
880 std::string symName = ptrList.categoryPrefix;
881 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
882
883 Defined *ptrListSym = make<Defined>(
884 args: newStringData(str: symName.c_str()), /*file=*/args: parentSym->getObjectFile(),
885 args&: listSec, /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false,
886 /*isExternal=*/args: false, /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
887 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
888 /*isWeakDefCanBeHidden=*/args: false);
889
890 ptrListSym->used = true;
891 parentSym->getObjectFile()->symbols.push_back(x: ptrListSym);
892 addInputSection(inputSection: listSec);
893
894 createSymbolReference(refFrom: parentSym, refTo: ptrListSym, offset: linkAtOffset,
895 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
896
897 uint32_t offset = protocolListHeaderLayout.totalSize;
898 for (Symbol *symbol : ptrList.allPtrs) {
899 createSymbolReference(refFrom: ptrListSym, refTo: symbol, offset,
900 relocTemplate: infoCategoryWriter.catPtrListInfo.relocTemplate);
901 offset += target->wordSize;
902 }
903
904 return ptrListSym;
905}
906
907// Generate a pointer list (including header) and link it into the parent at the
908// specified offset. This is used for instance and class methods and
909// proprieties.
910void ObjcCategoryMerger::emitAndLinkPointerList(
911 Defined *parentSym, uint32_t linkAtOffset,
912 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
913 if (ptrList.allPtrs.empty())
914 return;
915
916 assert(ptrList.allPtrs.size() * target->wordSize ==
917 ptrList.structCount * ptrList.structSize);
918
919 // Generate body
920 uint32_t bodySize =
921 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
922 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: bodySize);
923
924 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
925 bodyData.data() + listHeaderLayout.structSizeOffset);
926 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
927 bodyData.data() + listHeaderLayout.structCountOffset);
928
929 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
930 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
931
932 ConcatInputSection *listSec = make<ConcatInputSection>(
933 args&: *infoCategoryWriter.catPtrListInfo.inputSection, args&: bodyData,
934 args&: infoCategoryWriter.catPtrListInfo.align);
935 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
936 listSec->live = true;
937
938 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
939
940 std::string symName = ptrList.categoryPrefix;
941 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
942
943 Defined *ptrListSym = make<Defined>(
944 args: newStringData(str: symName.c_str()), /*file=*/args: parentSym->getObjectFile(),
945 args&: listSec, /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false,
946 /*isExternal=*/args: false, /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
947 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
948 /*isWeakDefCanBeHidden=*/args: false);
949
950 ptrListSym->used = true;
951 parentSym->getObjectFile()->symbols.push_back(x: ptrListSym);
952 addInputSection(inputSection: listSec);
953
954 createSymbolReference(refFrom: parentSym, refTo: ptrListSym, offset: linkAtOffset,
955 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
956
957 uint32_t offset = listHeaderLayout.totalSize;
958 for (Symbol *symbol : ptrList.allPtrs) {
959 createSymbolReference(refFrom: ptrListSym, refTo: symbol, offset,
960 relocTemplate: infoCategoryWriter.catPtrListInfo.relocTemplate);
961 offset += target->wordSize;
962 }
963}
964
965// This method creates an __objc_catlist ConcatInputSection with a single slot
966Defined *
967ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
968 const std::string &forBaseClassName,
969 ObjFile *objFile) {
970 uint32_t sectionSize = target->wordSize;
971 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: sectionSize);
972
973 ConcatInputSection *newCatList =
974 make<ConcatInputSection>(args&: *infoCategoryWriter.catListInfo.inputSection,
975 args&: bodyData, args&: infoCategoryWriter.catListInfo.align);
976 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
977 newCatList->live = true;
978
979 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
980
981 std::string catSymName = "<__objc_catlist slot for merged category ";
982 catSymName += forBaseClassName + "(" + forCategoryName + ")>";
983
984 Defined *catListSym = make<Defined>(
985 args: newStringData(str: catSymName.c_str()), /*file=*/args&: objFile, args&: newCatList,
986 /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false, /*isExternal=*/args: false,
987 /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: false,
988 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
989 /*isWeakDefCanBeHidden=*/args: false);
990
991 catListSym->used = true;
992 objFile->symbols.push_back(x: catListSym);
993 addInputSection(inputSection: newCatList);
994 return catListSym;
995}
996
997// Here we generate the main category body and link the name and base class into
998// it. We don't link any other info yet like the protocol and class/instance
999// methods/props.
1000Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1001 const Defined *nameSym,
1002 const Symbol *baseClassSym,
1003 const std::string &baseClassName,
1004 ObjFile *objFile) {
1005 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: catLayout.totalSize);
1006
1007 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1008 catLayout.sizeOffset);
1009 *ptrSize = catLayout.totalSize;
1010
1011 ConcatInputSection *newBodySec =
1012 make<ConcatInputSection>(args&: *infoCategoryWriter.catBodyInfo.inputSection,
1013 args&: bodyData, args&: infoCategoryWriter.catBodyInfo.align);
1014 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1015 newBodySec->live = true;
1016
1017 std::string symName =
1018 objc::symbol_names::category + baseClassName + "(" + name + ")";
1019 Defined *catBodySym = make<Defined>(
1020 args: newStringData(str: symName.c_str()), /*file=*/args&: objFile, args&: newBodySec,
1021 /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false, /*isExternal=*/args: false,
1022 /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
1023 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
1024 /*isWeakDefCanBeHidden=*/args: false);
1025
1026 catBodySym->used = true;
1027 objFile->symbols.push_back(x: catBodySym);
1028 addInputSection(inputSection: newBodySec);
1029
1030 createSymbolReference(refFrom: catBodySym, refTo: nameSym, offset: catLayout.nameOffset,
1031 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
1032
1033 // Create a reloc to the base class (either external or internal)
1034 createSymbolReference(refFrom: catBodySym, refTo: baseClassSym, offset: catLayout.klassOffset,
1035 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
1036
1037 return catBodySym;
1038}
1039
1040// This writes the new category name (for the merged category) into the binary
1041// and returns the sybmol for it.
1042Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1043 ObjFile *objFile) {
1044 StringRef nameStrData = newStringData(str: name.c_str());
1045 // We use +1 below to include the null terminator
1046 llvm::ArrayRef<uint8_t> nameData(
1047 reinterpret_cast<const uint8_t *>(nameStrData.data()),
1048 nameStrData.size() + 1);
1049
1050 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1051 CStringInputSection *newStringSec = make<CStringInputSection>(
1052 args&: *infoCategoryWriter.catNameInfo.inputSection, args&: nameData,
1053 args&: infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/args: true);
1054
1055 parentSection->subsections.push_back(x: {.offset: 0, .isec: newStringSec});
1056
1057 newStringSec->splitIntoPieces();
1058 newStringSec->pieces[0].live = true;
1059 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1060 in.cStringSection->addInput(newStringSec);
1061 assert(newStringSec->pieces.size() == 1);
1062
1063 Defined *catNameSym = make<Defined>(
1064 args: "<merged category name>", /*file=*/args&: objFile, args&: newStringSec,
1065 /*value=*/args: 0, args: nameData.size(),
1066 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: false,
1067 /*includeInSymtab=*/args: false, /*isReferencedDynamically=*/args: false,
1068 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
1069
1070 catNameSym->used = true;
1071 objFile->symbols.push_back(x: catNameSym);
1072 return catNameSym;
1073}
1074
1075// This method fully creates a new category from the given ClassExtensionInfo.
1076// It creates the category name, body and method/protocol/prop lists and links
1077// them all together. Then it creates a new __objc_catlist entry and adds the
1078// category to it. Calling this method will fully generate a category which will
1079// be available in the final binary.
1080Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1081 Defined *catNameSym = emitCategoryName(name: extInfo.mergedContainerName,
1082 objFile: extInfo.objFileForMergeData);
1083
1084 Defined *catBodySym = emitCategoryBody(
1085 name: extInfo.mergedContainerName, nameSym: catNameSym, baseClassSym: extInfo.baseClass,
1086 baseClassName: extInfo.baseClassName, objFile: extInfo.objFileForMergeData);
1087
1088 Defined *catListSym =
1089 emitCatListEntrySec(forCategoryName: extInfo.mergedContainerName, forBaseClassName: extInfo.baseClassName,
1090 objFile: extInfo.objFileForMergeData);
1091
1092 // Add the single category body to the category list at the offset 0.
1093 createSymbolReference(refFrom: catListSym, refTo: catBodySym, /*offset=*/0,
1094 relocTemplate: infoCategoryWriter.catListInfo.relocTemplate);
1095
1096 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.instanceMethodsOffset, extInfo,
1097 ptrList: extInfo.instanceMethods);
1098
1099 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.classMethodsOffset, extInfo,
1100 ptrList: extInfo.classMethods);
1101
1102 emitAndLinkProtocolList(parentSym: catBodySym, linkAtOffset: catLayout.protocolsOffset, extInfo,
1103 ptrList: extInfo.protocols);
1104
1105 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.instancePropsOffset, extInfo,
1106 ptrList: extInfo.instanceProps);
1107
1108 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.classPropsOffset, extInfo,
1109 ptrList: extInfo.classProps);
1110
1111 return catBodySym;
1112}
1113
1114// This method merges all the categories (sharing a base class) into a single
1115// category.
1116bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1117 std::vector<InfoInputCategory> &categories) {
1118 assert(categories.size() > 1 && "Expected at least 2 categories");
1119
1120 ClassExtensionInfo extInfo(catLayout);
1121
1122 for (auto &catInfo : categories)
1123 if (!parseCatInfoToExtInfo(catInfo, extInfo))
1124 return false;
1125
1126 Defined *newCatDef = emitCategory(extInfo);
1127 assert(newCatDef && "Failed to create a new category");
1128
1129 // Suppress unsuded var warning
1130 (void)newCatDef;
1131
1132 for (auto &catInfo : categories)
1133 catInfo.wasMerged = true;
1134
1135 return true;
1136}
1137
1138void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1139 const Symbol *refTo,
1140 uint32_t offset,
1141 const Reloc &relocTemplate) {
1142 Reloc r = relocTemplate;
1143 r.offset = offset;
1144 r.addend = 0;
1145 r.referent = const_cast<Symbol *>(refTo);
1146 refFrom->isec()->relocs.push_back(x: r);
1147}
1148
1149// Get the list of categories in the '__objc_nlcatlist' section. We can't
1150// optimize these as they have a '+load' method that has to be called at
1151// runtime.
1152DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1153 DenseSet<const Symbol *> nlCategories;
1154
1155 for (InputSection *sec : allInputSections) {
1156 if (sec->getName() != section_names::objcNonLazyCatList)
1157 continue;
1158
1159 for (auto &r : sec->relocs) {
1160 const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1161 nlCategories.insert(V: sym);
1162 }
1163 }
1164 return nlCategories;
1165}
1166
1167void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1168 auto nlCategories = collectNlCategories();
1169
1170 for (InputSection *sec : allInputSections) {
1171 if (sec->getName() != section_names::objcCatList)
1172 continue;
1173 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(Val: sec);
1174 assert(catListCisec &&
1175 "__objc_catList InputSection is not a ConcatInputSection");
1176
1177 for (uint32_t off = 0; off < catListCisec->getSize();
1178 off += target->wordSize) {
1179 Defined *categorySym = tryGetDefinedAtIsecOffset(isec: catListCisec, offset: off);
1180 assert(categorySym &&
1181 "Failed to get a valid category at __objc_catlit offset");
1182
1183 if (nlCategories.count(V: categorySym))
1184 continue;
1185
1186 auto *catBodyIsec = dyn_cast<ConcatInputSection>(Val: categorySym->isec());
1187 assert(catBodyIsec &&
1188 "Category data section is not an ConcatInputSection");
1189
1190 SourceLanguage eLang = SourceLanguage::Unknown;
1191 if (categorySym->getName().starts_with(Prefix: objc::symbol_names::category))
1192 eLang = SourceLanguage::ObjC;
1193 else if (categorySym->getName().starts_with(
1194 Prefix: objc::symbol_names::swift_objc_category))
1195 eLang = SourceLanguage::Swift;
1196 else
1197 llvm_unreachable("Unexpected category symbol name");
1198
1199 InfoInputCategory catInputInfo{.catListIsec: catListCisec, .catBodyIsec: catBodyIsec, .offCatListIsec: off, .sourceLanguage: eLang};
1200
1201 // Check that the category has a reloc at 'klassOffset' (which is
1202 // a pointer to the class symbol)
1203
1204 Symbol *classSym =
1205 tryGetSymbolAtIsecOffset(isec: catBodyIsec, offset: catLayout.klassOffset);
1206 assert(classSym && "Category does not have a valid base class");
1207
1208 if (!collectCategoryWriterInfoFromCategory(catInfo: catInputInfo))
1209 continue;
1210
1211 categoryMap[classSym].push_back(x: catInputInfo);
1212 }
1213 }
1214}
1215
1216// In the input we have multiple __objc_catlist InputSection, each of which may
1217// contain links to multiple categories. Of these categories, we will merge (and
1218// erase) only some. There will be some categories that will remain untouched
1219// (not erased). For these not erased categories, we generate new __objc_catlist
1220// entries since the parent __objc_catlist entry will be erased
1221void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1222 const MapVector<ConcatInputSection *, std::set<uint64_t>>
1223 catListToErasedOffsets) {
1224
1225 // Go through all offsets of all __objc_catlist's that we process and if there
1226 // are categories that we didn't process - generate a new __objc_catlist for
1227 // each.
1228 for (auto &mapEntry : catListToErasedOffsets) {
1229 ConcatInputSection *catListIsec = mapEntry.first;
1230 for (uint32_t catListIsecOffset = 0;
1231 catListIsecOffset < catListIsec->data.size();
1232 catListIsecOffset += target->wordSize) {
1233 // This slot was erased, we can just skip it
1234 if (mapEntry.second.count(x: catListIsecOffset))
1235 continue;
1236
1237 Defined *nonErasedCatBody =
1238 tryGetDefinedAtIsecOffset(isec: catListIsec, offset: catListIsecOffset);
1239 assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1240
1241 // Allocate data for the new __objc_catlist slot
1242 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: target->wordSize);
1243
1244 // We mark the __objc_catlist slot as belonging to the same file as the
1245 // category
1246 ObjFile *objFile = dyn_cast<ObjFile>(Val: nonErasedCatBody->getFile());
1247
1248 ConcatInputSection *listSec = make<ConcatInputSection>(
1249 args&: *infoCategoryWriter.catListInfo.inputSection, args&: bodyData,
1250 args&: infoCategoryWriter.catListInfo.align);
1251 listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1252 listSec->live = true;
1253
1254 std::string slotSymName = "<__objc_catlist slot for category ";
1255 slotSymName += nonErasedCatBody->getName();
1256 slotSymName += ">";
1257
1258 Defined *catListSlotSym = make<Defined>(
1259 args: newStringData(str: slotSymName.c_str()), /*file=*/args&: objFile, args&: listSec,
1260 /*value=*/args: 0, args: bodyData.size(),
1261 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: false,
1262 /*includeInSymtab=*/args: false, /*isReferencedDynamically=*/args: false,
1263 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
1264
1265 catListSlotSym->used = true;
1266 objFile->symbols.push_back(x: catListSlotSym);
1267 addInputSection(inputSection: listSec);
1268
1269 // Now link the category body into the newly created slot
1270 createSymbolReference(refFrom: catListSlotSym, refTo: nonErasedCatBody, offset: 0,
1271 relocTemplate: infoCategoryWriter.catListInfo.relocTemplate);
1272 }
1273 }
1274}
1275
1276void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1277 isec->live = false;
1278 for (auto &sym : isec->symbols)
1279 sym->used = false;
1280}
1281
1282// This fully erases the merged categories, including their body, their names,
1283// their method/protocol/prop lists and the __objc_catlist entries that link to
1284// them.
1285void ObjcCategoryMerger::eraseMergedCategories() {
1286 // Map of InputSection to a set of offsets of the categories that were merged
1287 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1288
1289 for (auto &mapEntry : categoryMap) {
1290 for (InfoInputCategory &catInfo : mapEntry.second) {
1291 if (catInfo.wasMerged) {
1292 eraseISec(isec: catInfo.catListIsec);
1293 catListToErasedOffsets[catInfo.catListIsec].insert(
1294 x: catInfo.offCatListIsec);
1295 }
1296 }
1297 }
1298
1299 // If there were categories that we did not erase, we need to generate a new
1300 // __objc_catList that contains only the un-merged categories, and get rid of
1301 // the references to the ones we merged.
1302 generateCatListForNonErasedCategories(catListToErasedOffsets);
1303
1304 // Erase the old method lists & names of the categories that were merged
1305 for (auto &mapEntry : categoryMap) {
1306 for (InfoInputCategory &catInfo : mapEntry.second) {
1307 if (!catInfo.wasMerged)
1308 continue;
1309
1310 eraseISec(isec: catInfo.catBodyIsec);
1311
1312 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1313 // categories because the name will sometimes also be used for other
1314 // purposes.
1315 // For Swift, see usages of 'l_.str.11.SimpleClass' in
1316 // objc-category-merging-swift.s
1317 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1318 // objc-category-merging-erase-objc-name-test.s
1319 // TODO: handle the above in a smarter way
1320
1321 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1322 offset: catLayout.instanceMethodsOffset);
1323 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1324 offset: catLayout.classMethodsOffset);
1325 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1326 offset: catLayout.protocolsOffset);
1327 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1328 offset: catLayout.classPropsOffset);
1329 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1330 offset: catLayout.instancePropsOffset);
1331 }
1332 }
1333}
1334
1335void ObjcCategoryMerger::doMerge() {
1336 collectAndValidateCategoriesData();
1337
1338 for (auto &[baseClass, catInfos] : categoryMap) {
1339 bool merged = false;
1340 if (auto *baseClassDef = dyn_cast<Defined>(Val: baseClass)) {
1341 // Merge all categories into the base class
1342 merged = mergeCategoriesIntoBaseClass(baseClass: baseClassDef, categories&: catInfos);
1343 } else if (catInfos.size() > 1) {
1344 // Merge all categories into a new, single category
1345 merged = mergeCategoriesIntoSingleCategory(categories&: catInfos);
1346 }
1347 if (!merged)
1348 warn(msg: "ObjC category merging skipped for class symbol' " +
1349 baseClass->getName().str() + "'\n");
1350 }
1351
1352 // Erase all categories that were merged
1353 eraseMergedCategories();
1354}
1355
1356void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1357
1358StringRef ObjcCategoryMerger::newStringData(const char *str) {
1359 uint32_t len = strlen(s: str);
1360 uint32_t bufSize = len + 1;
1361 SmallVector<uint8_t> &data = newSectionData(size: bufSize);
1362 char *strData = reinterpret_cast<char *>(data.data());
1363 // Copy the string chars and null-terminator
1364 memcpy(dest: strData, src: str, n: bufSize);
1365 return StringRef(strData, len);
1366}
1367
1368SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1369 generatedSectionData.push_back(
1370 Elt: std::make_unique<SmallVector<uint8_t>>(args&: size, args: 0));
1371 return *generatedSectionData.back();
1372}
1373
1374} // namespace
1375
1376void objc::mergeCategories() {
1377 TimeTraceScope timeScope("ObjcCategoryMerger");
1378
1379 ObjcCategoryMerger merger(inputSections);
1380 merger.doMerge();
1381}
1382
1383void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1384
1385ObjcCategoryMerger::SourceLanguage
1386ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1387 if (classSym->getName().starts_with(Prefix: objc::symbol_names::swift_objc_klass))
1388 return SourceLanguage::Swift;
1389
1390 // If the symbol name matches the ObjC prefix, we don't necessarely know this
1391 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1392 // classes. Ex:
1393 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1394 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1395 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1396 //
1397 // So we scan for symbols with the same address and check for the Swift class
1398 if (classSym->getName().starts_with(Prefix: objc::symbol_names::klass)) {
1399 for (auto &sym : classSym->originalIsec->symbols)
1400 if (sym->value == classSym->value)
1401 if (sym->getName().starts_with(Prefix: objc::symbol_names::swift_objc_klass))
1402 return SourceLanguage::Swift;
1403 return SourceLanguage::ObjC;
1404 }
1405
1406 llvm_unreachable("Unexpected class symbol name during category merging");
1407}
1408
1409bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1410 const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1411 assert(categories.size() >= 1 && "Expected at least one category to merge");
1412
1413 // Collect all the info from the categories
1414 ClassExtensionInfo extInfo(catLayout);
1415 extInfo.baseClass = baseClass;
1416 extInfo.baseClassSourceLanguage = getClassSymSourceLang(classSym: baseClass);
1417
1418 for (auto &catInfo : categories)
1419 if (!parseCatInfoToExtInfo(catInfo, extInfo))
1420 return false;
1421
1422 // Get metadata for the base class
1423 Defined *metaRo = getClassRo(classSym: baseClass, /*getMetaRo=*/true);
1424 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(Val: metaRo->isec());
1425 Defined *classRo = getClassRo(classSym: baseClass, /*getMetaRo=*/false);
1426 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(Val: classRo->isec());
1427
1428 // Now collect the info from the base class from the various lists in the
1429 // class metadata
1430
1431 // Protocol lists are a special case - the same protocol list is in classRo
1432 // and metaRo, so we only need to parse it once
1433 parseProtocolListInfo(isec: classIsec, secOffset: roClassLayout.baseProtocolsOffset,
1434 ptrList&: extInfo.protocols, sourceLang: extInfo.baseClassSourceLanguage);
1435
1436 // Check that the classRo and metaRo protocol lists are identical
1437 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1438 extInfo.baseClassSourceLanguage) ==
1439 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1440 extInfo.baseClassSourceLanguage) &&
1441 "Category merger expects classRo and metaRo to have the same protocol "
1442 "list");
1443
1444 parsePointerListInfo(isec: metaIsec, secOffset: roClassLayout.baseMethodsOffset,
1445 ptrList&: extInfo.classMethods);
1446 parsePointerListInfo(isec: classIsec, secOffset: roClassLayout.baseMethodsOffset,
1447 ptrList&: extInfo.instanceMethods);
1448
1449 parsePointerListInfo(isec: metaIsec, secOffset: roClassLayout.basePropertiesOffset,
1450 ptrList&: extInfo.classProps);
1451 parsePointerListInfo(isec: classIsec, secOffset: roClassLayout.basePropertiesOffset,
1452 ptrList&: extInfo.instanceProps);
1453
1454 // Erase the old lists - these will be generated and replaced
1455 eraseSymbolAtIsecOffset(isec: metaIsec, offset: roClassLayout.baseMethodsOffset);
1456 eraseSymbolAtIsecOffset(isec: metaIsec, offset: roClassLayout.baseProtocolsOffset);
1457 eraseSymbolAtIsecOffset(isec: metaIsec, offset: roClassLayout.basePropertiesOffset);
1458 eraseSymbolAtIsecOffset(isec: classIsec, offset: roClassLayout.baseMethodsOffset);
1459 eraseSymbolAtIsecOffset(isec: classIsec, offset: roClassLayout.baseProtocolsOffset);
1460 eraseSymbolAtIsecOffset(isec: classIsec, offset: roClassLayout.basePropertiesOffset);
1461
1462 // Emit the newly merged lists - first into the meta RO then into the class RO
1463 // First we emit and link the protocol list into the meta RO. Then we link it
1464 // in the classRo as well (they're supposed to be identical)
1465 if (Defined *protoListSym =
1466 emitAndLinkProtocolList(parentSym: metaRo, linkAtOffset: roClassLayout.baseProtocolsOffset,
1467 extInfo, ptrList: extInfo.protocols)) {
1468 createSymbolReference(refFrom: classRo, refTo: protoListSym,
1469 offset: roClassLayout.baseProtocolsOffset,
1470 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
1471 }
1472
1473 emitAndLinkPointerList(parentSym: metaRo, linkAtOffset: roClassLayout.baseMethodsOffset, extInfo,
1474 ptrList: extInfo.classMethods);
1475 emitAndLinkPointerList(parentSym: classRo, linkAtOffset: roClassLayout.baseMethodsOffset, extInfo,
1476 ptrList: extInfo.instanceMethods);
1477
1478 emitAndLinkPointerList(parentSym: metaRo, linkAtOffset: roClassLayout.basePropertiesOffset, extInfo,
1479 ptrList: extInfo.classProps);
1480
1481 emitAndLinkPointerList(parentSym: classRo, linkAtOffset: roClassLayout.basePropertiesOffset, extInfo,
1482 ptrList: extInfo.instanceProps);
1483
1484 // Mark all the categories as merged - this will be used to erase them later
1485 for (auto &catInfo : categories)
1486 catInfo.wasMerged = true;
1487
1488 return true;
1489}
1490
1491// Erase the symbol at a given offset in an InputSection
1492void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1493 uint32_t offset) {
1494 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1495 if (!sym)
1496 return;
1497
1498 // Remove the symbol from isec->symbols
1499 assert(isa<Defined>(sym) && "Can only erase a Defined");
1500 llvm::erase(C&: isec->symbols, V: sym);
1501
1502 // Remove the relocs that refer to this symbol
1503 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1504 llvm::erase_if(C&: isec->relocs, P: removeAtOff);
1505
1506 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1507 // the whole ConcatInputSection
1508 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(Val: sym->isec()))
1509 if (cisec->data.size() == sym->size)
1510 eraseISec(isec: cisec);
1511}
1512

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of lld/MachO/ObjC.cpp