1//===- ObjC.cpp -----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjC.h"
10#include "ConcatOutputSection.h"
11#include "InputFiles.h"
12#include "InputSection.h"
13#include "Layout.h"
14#include "OutputSegment.h"
15#include "SyntheticSections.h"
16#include "Target.h"
17
18#include "lld/Common/ErrorHandler.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/BinaryFormat/MachO.h"
21#include "llvm/Bitcode/BitcodeReader.h"
22#include "llvm/Support/TimeProfiler.h"
23
24using namespace llvm;
25using namespace llvm::MachO;
26using namespace lld;
27using namespace lld::macho;
28
29template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30 using SectionHeader = typename LP::section;
31
32 auto *hdr =
33 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34 if (hdr->magic != LP::magic)
35 return false;
36
37 if (const auto *c =
38 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39 auto sectionHeaders = ArrayRef<SectionHeader>{
40 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41 for (const SectionHeader &secHead : sectionHeaders) {
42 StringRef sectname(secHead.sectname,
43 strnlen(secHead.sectname, sizeof(secHead.sectname)));
44 StringRef segname(secHead.segname,
45 strnlen(secHead.segname, sizeof(secHead.segname)));
46 if ((segname == segment_names::data &&
47 sectname == section_names::objcCatList) ||
48 (segname == segment_names::text &&
49 sectname.starts_with(Prefix: section_names::swift))) {
50 return true;
51 }
52 }
53 }
54 return false;
55}
56
57static bool objectHasObjCSection(MemoryBufferRef mb) {
58 if (target->wordSize == 8)
59 return ::objectHasObjCSection<LP64>(mb);
60 else
61 return ::objectHasObjCSection<ILP32>(mb);
62}
63
64bool macho::hasObjCSection(MemoryBufferRef mb) {
65 switch (identify_magic(magic: mb.getBuffer())) {
66 case file_magic::macho_object:
67 return objectHasObjCSection(mb);
68 case file_magic::bitcode:
69 return check(e: isBitcodeContainingObjCCategory(Buffer: mb));
70 default:
71 return false;
72 }
73}
74
75namespace {
76
77#define FOR_EACH_CATEGORY_FIELD(DO) \
78 DO(Ptr, name) \
79 DO(Ptr, klass) \
80 DO(Ptr, instanceMethods) \
81 DO(Ptr, classMethods) \
82 DO(Ptr, protocols) \
83 DO(Ptr, instanceProps) \
84 DO(Ptr, classProps) \
85 DO(uint32_t, size)
86
87CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
88
89#undef FOR_EACH_CATEGORY_FIELD
90
91#define FOR_EACH_CLASS_FIELD(DO) \
92 DO(Ptr, metaClass) \
93 DO(Ptr, superClass) \
94 DO(Ptr, methodCache) \
95 DO(Ptr, vtable) \
96 DO(Ptr, roData)
97
98CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
99
100#undef FOR_EACH_CLASS_FIELD
101
102#define FOR_EACH_RO_CLASS_FIELD(DO) \
103 DO(uint32_t, flags) \
104 DO(uint32_t, instanceStart) \
105 DO(Ptr, instanceSize) \
106 DO(Ptr, ivarLayout) \
107 DO(Ptr, name) \
108 DO(Ptr, baseMethods) \
109 DO(Ptr, baseProtocols) \
110 DO(Ptr, ivars) \
111 DO(Ptr, weakIvarLayout) \
112 DO(Ptr, baseProperties)
113
114CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
115
116#undef FOR_EACH_RO_CLASS_FIELD
117
118#define FOR_EACH_LIST_HEADER(DO) \
119 DO(uint32_t, structSize) \
120 DO(uint32_t, structCount)
121
122CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
123
124#undef FOR_EACH_LIST_HEADER
125
126#define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
127
128CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
129
130#undef FOR_EACH_PROTOCOL_LIST_HEADER
131
132#define FOR_EACH_METHOD(DO) \
133 DO(Ptr, name) \
134 DO(Ptr, type) \
135 DO(Ptr, impl)
136
137CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
138
139#undef FOR_EACH_METHOD
140
141enum MethodContainerKind {
142 MCK_Class,
143 MCK_Category,
144};
145
146struct MethodContainer {
147 MethodContainerKind kind;
148 const ConcatInputSection *isec;
149};
150
151enum MethodKind {
152 MK_Instance,
153 MK_Static,
154};
155
156struct ObjcClass {
157 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158 DenseMap<CachedHashStringRef, MethodContainer> classMethods;
159};
160
161} // namespace
162
163class ObjcCategoryChecker {
164public:
165 ObjcCategoryChecker();
166 void parseCategory(const ConcatInputSection *catListIsec);
167
168private:
169 void parseClass(const Defined *classSym);
170 void parseMethods(const ConcatInputSection *methodsIsec,
171 const Symbol *methodContainer,
172 const ConcatInputSection *containerIsec,
173 MethodContainerKind, MethodKind);
174
175 CategoryLayout catLayout;
176 ClassLayout classLayout;
177 ROClassLayout roClassLayout;
178 ListHeaderLayout listHeaderLayout;
179 MethodLayout methodLayout;
180
181 DenseMap<const Symbol *, ObjcClass> classMap;
182};
183
184ObjcCategoryChecker::ObjcCategoryChecker()
185 : catLayout(target->wordSize), classLayout(target->wordSize),
186 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187 methodLayout(target->wordSize) {}
188
189// \p r must point to an offset within a cstring section.
190static StringRef getReferentString(const Reloc &r) {
191 if (auto *isec = r.referent.dyn_cast<InputSection *>())
192 return cast<CStringInputSection>(Val: isec)->getStringRefAtOffset(off: r.addend);
193 auto *sym = cast<Defined>(Val: r.referent.get<Symbol *>());
194 return cast<CStringInputSection>(Val: sym->isec())
195 ->getStringRefAtOffset(off: sym->value + r.addend);
196}
197
198void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
199 const Symbol *methodContainerSym,
200 const ConcatInputSection *containerIsec,
201 MethodContainerKind mcKind,
202 MethodKind mKind) {
203 ObjcClass &klass = classMap[methodContainerSym];
204 for (const Reloc &r : methodsIsec->relocs) {
205 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
206 methodLayout.nameOffset)
207 continue;
208
209 CachedHashStringRef methodName(getReferentString(r));
210 // +load methods are special: all implementations are called by the runtime
211 // even if they are part of the same class. Thus there is no need to check
212 // for duplicates.
213 // NOTE: Instead of specifically checking for this method name, ld64 simply
214 // checks whether a class / category is present in __objc_nlclslist /
215 // __objc_nlcatlist respectively. This will be the case if the class /
216 // category has a +load method. It skips optimizing the categories if there
217 // are multiple +load methods. Since it does dupe checking as part of the
218 // optimization process, this avoids spurious dupe messages around +load,
219 // but it also means that legit dupe issues for other methods are ignored.
220 if (mKind == MK_Static && methodName.val() == "load")
221 continue;
222
223 auto &methodMap =
224 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
225 if (methodMap
226 .try_emplace(Key: methodName, Args: MethodContainer{.kind: mcKind, .isec: containerIsec})
227 .second)
228 continue;
229
230 // We have a duplicate; generate a warning message.
231 const auto &mc = methodMap.lookup(Val: methodName);
232 const Reloc *nameReloc = nullptr;
233 if (mc.kind == MCK_Category) {
234 nameReloc = mc.isec->getRelocAt(off: catLayout.nameOffset);
235 } else {
236 assert(mc.kind == MCK_Class);
237 const auto *roIsec = mc.isec->getRelocAt(off: classLayout.roDataOffset)
238 ->getReferentInputSection();
239 nameReloc = roIsec->getRelocAt(off: roClassLayout.nameOffset);
240 }
241 StringRef containerName = getReferentString(r: *nameReloc);
242 StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
243
244 // We should only ever encounter collisions when parsing category methods
245 // (since the Class struct is parsed before any of its categories).
246 assert(mcKind == MCK_Category);
247 StringRef newCatName =
248 getReferentString(r: *containerIsec->getRelocAt(off: catLayout.nameOffset));
249
250 auto formatObjAndSrcFileName = [](const InputSection *section) {
251 lld::macho::InputFile *inputFile = section->getFile();
252 std::string result = toString(file: inputFile);
253
254 auto objFile = dyn_cast_or_null<ObjFile>(Val: inputFile);
255 if (objFile && objFile->compileUnit)
256 result += " (" + objFile->sourceFile() + ")";
257
258 return result;
259 };
260
261 StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
262 warn(msg: "method '" + methPrefix + methodName.val() +
263 "' has conflicting definitions:\n>>> defined in category " +
264 newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
265 "\n>>> defined in " + containerType + " " + containerName + " from " +
266 formatObjAndSrcFileName(mc.isec));
267 }
268}
269
270void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
271 auto *classReloc = catIsec->getRelocAt(off: catLayout.klassOffset);
272 if (!classReloc)
273 return;
274
275 auto *classSym = classReloc->referent.get<Symbol *>();
276 if (auto *d = dyn_cast<Defined>(Val: classSym))
277 if (!classMap.count(Val: d))
278 parseClass(classSym: d);
279
280 if (const auto *r = catIsec->getRelocAt(off: catLayout.classMethodsOffset)) {
281 parseMethods(methodsIsec: cast<ConcatInputSection>(Val: r->getReferentInputSection()),
282 methodContainerSym: classSym, containerIsec: catIsec, mcKind: MCK_Category, mKind: MK_Static);
283 }
284
285 if (const auto *r = catIsec->getRelocAt(off: catLayout.instanceMethodsOffset)) {
286 parseMethods(methodsIsec: cast<ConcatInputSection>(Val: r->getReferentInputSection()),
287 methodContainerSym: classSym, containerIsec: catIsec, mcKind: MCK_Category, mKind: MK_Instance);
288 }
289}
290
291void ObjcCategoryChecker::parseClass(const Defined *classSym) {
292 // Given a Class struct, get its corresponding Methods struct
293 auto getMethodsIsec =
294 [&](const InputSection *classIsec) -> ConcatInputSection * {
295 if (const auto *r = classIsec->getRelocAt(off: classLayout.roDataOffset)) {
296 if (const auto *roIsec =
297 cast_or_null<ConcatInputSection>(Val: r->getReferentInputSection())) {
298 if (const auto *r =
299 roIsec->getRelocAt(off: roClassLayout.baseMethodsOffset)) {
300 if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
301 Val: r->getReferentInputSection()))
302 return methodsIsec;
303 }
304 }
305 }
306 return nullptr;
307 };
308
309 const auto *classIsec = cast<ConcatInputSection>(Val: classSym->isec());
310
311 // Parse instance methods.
312 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
313 parseMethods(methodsIsec: instanceMethodsIsec, methodContainerSym: classSym, containerIsec: classIsec, mcKind: MCK_Class,
314 mKind: MK_Instance);
315
316 // Class methods are contained in the metaclass.
317 if (const auto *r = classSym->isec()->getRelocAt(off: classLayout.metaClassOffset))
318 if (const auto *classMethodsIsec = getMethodsIsec(
319 cast<ConcatInputSection>(Val: r->getReferentInputSection())))
320 parseMethods(methodsIsec: classMethodsIsec, methodContainerSym: classSym, containerIsec: classIsec, mcKind: MCK_Class, mKind: MK_Static);
321}
322
323void objc::checkCategories() {
324 TimeTraceScope timeScope("ObjcCategoryChecker");
325
326 ObjcCategoryChecker checker;
327 for (const InputSection *isec : inputSections) {
328 if (isec->getName() == section_names::objcCatList)
329 for (const Reloc &r : isec->relocs) {
330 auto *catIsec = cast<ConcatInputSection>(Val: r.getReferentInputSection());
331 checker.parseCategory(catIsec);
332 }
333 }
334}
335
336namespace {
337
338class ObjcCategoryMerger {
339 // Information about an input category
340 struct InfoInputCategory {
341 ConcatInputSection *catListIsec;
342 ConcatInputSection *catBodyIsec;
343 uint32_t offCatListIsec = 0;
344
345 bool wasMerged = false;
346 };
347
348 // To write new (merged) categories or classes, we will try make limited
349 // assumptions about the alignment and the sections the various class/category
350 // info are stored in and . So we'll just reuse the same sections and
351 // alignment as already used in existing (input) categories. To do this we
352 // have InfoCategoryWriter which contains the various sections that the
353 // generated categories will be written to.
354 template <typename T> struct InfoWriteSection {
355 bool valid = false; // Data has been successfully collected from input
356 uint32_t align = 0;
357 Section *inputSection;
358 Reloc relocTemplate;
359 T *outputSection;
360 };
361
362 struct InfoCategoryWriter {
363 InfoWriteSection<ConcatOutputSection> catListInfo;
364 InfoWriteSection<ConcatOutputSection> catBodyInfo;
365 InfoWriteSection<CStringSection> catNameInfo;
366 InfoWriteSection<ConcatOutputSection> catPtrListInfo;
367 };
368
369 // Information about a pointer list in the original categories (method lists,
370 // protocol lists, etc)
371 struct PointerListInfo {
372 PointerListInfo(const char *_categoryPrefix, uint32_t _categoryOffset,
373 uint32_t _pointersPerStruct)
374 : categoryPrefix(_categoryPrefix), categoryOffset(_categoryOffset),
375 pointersPerStruct(_pointersPerStruct) {}
376 const char *categoryPrefix;
377 uint32_t categoryOffset = 0;
378
379 uint32_t pointersPerStruct = 0;
380
381 uint32_t structSize = 0;
382 uint32_t structCount = 0;
383
384 std::vector<Symbol *> allPtrs;
385 };
386
387 // Full information about all the categories that extend a class. This will
388 // include all the additional methods, protocols, and properties that are
389 // contained in all the categories that extend a particular class.
390 struct ClassExtensionInfo {
391 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
392
393 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
394 std::string mergedContainerName;
395 std::string baseClassName;
396 Symbol *baseClass = nullptr;
397 CategoryLayout &catLayout;
398
399 // In case we generate new data, mark the new data as belonging to this file
400 ObjFile *objFileForMergeData = nullptr;
401
402 PointerListInfo instanceMethods = {
403 objc::symbol_names::categoryInstanceMethods,
404 /*_categoryOffset=*/catLayout.instanceMethodsOffset,
405 /*pointersPerStruct=*/3};
406 PointerListInfo classMethods = {
407 objc::symbol_names::categoryClassMethods,
408 /*_categoryOffset=*/catLayout.classMethodsOffset,
409 /*pointersPerStruct=*/3};
410 PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
411 /*_categoryOffset=*/catLayout.protocolsOffset,
412 /*pointersPerStruct=*/0};
413 PointerListInfo instanceProps = {
414 objc::symbol_names::listProprieties,
415 /*_categoryOffset=*/catLayout.instancePropsOffset,
416 /*pointersPerStruct=*/2};
417 PointerListInfo classProps = {
418 objc::symbol_names::klassPropList,
419 /*_categoryOffset=*/catLayout.classPropsOffset,
420 /*pointersPerStruct=*/2};
421 };
422
423public:
424 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
425 void doMerge();
426 static void doCleanup();
427
428private:
429 void collectAndValidateCategoriesData();
430 void
431 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
432
433 void eraseISec(ConcatInputSection *isec);
434 void eraseMergedCategories();
435
436 void generateCatListForNonErasedCategories(
437 std::map<ConcatInputSection *, std::set<uint64_t>>
438 catListToErasedOffsets);
439 template <typename T>
440 void collectSectionWriteInfoFromIsec(const InputSection *isec,
441 InfoWriteSection<T> &catWriteInfo);
442 void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
443 void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
444 ClassExtensionInfo &extInfo);
445
446 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
447 PointerListInfo &ptrList);
448
449 void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
450 PointerListInfo &ptrList);
451
452 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
453 const ClassExtensionInfo &extInfo,
454 const PointerListInfo &ptrList);
455
456 void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
457 const ClassExtensionInfo &extInfo,
458 const PointerListInfo &ptrList);
459
460 Defined *emitCategory(const ClassExtensionInfo &extInfo);
461 Defined *emitCatListEntrySec(const std::string &forCategoryName,
462 const std::string &forBaseClassName,
463 ObjFile *objFile);
464 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
465 const Symbol *baseClassSym,
466 const std::string &baseClassName, ObjFile *objFile);
467 Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
468 void createSymbolReference(Defined *refFrom, const Symbol *refTo,
469 uint32_t offset, const Reloc &relocTemplate);
470 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
471 uint32_t offset);
472 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
473 uint32_t offset);
474 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
475 uint32_t offset);
476
477 // Allocate a null-terminated StringRef backed by generatedSectionData
478 StringRef newStringData(const char *str);
479 // Allocate section data, backed by generatedSectionData
480 SmallVector<uint8_t> &newSectionData(uint32_t size);
481
482 CategoryLayout catLayout;
483 ClassLayout classLayout;
484 ROClassLayout roClassLayout;
485 ListHeaderLayout listHeaderLayout;
486 MethodLayout methodLayout;
487 ProtocolListHeaderLayout protocolListHeaderLayout;
488
489 InfoCategoryWriter infoCategoryWriter;
490 std::vector<ConcatInputSection *> &allInputSections;
491 // Map of base class Symbol to list of InfoInputCategory's for it
492 DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
493
494 // Normally, the binary data comes from the input files, but since we're
495 // generating binary data ourselves, we use the below array to store it in.
496 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
497 // object, as the data will be read by the Writer when the final binary is
498 // generated.
499 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
500 generatedSectionData;
501};
502
503SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
504 ObjcCategoryMerger::generatedSectionData;
505
506ObjcCategoryMerger::ObjcCategoryMerger(
507 std::vector<ConcatInputSection *> &_allInputSections)
508 : catLayout(target->wordSize), classLayout(target->wordSize),
509 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
510 methodLayout(target->wordSize),
511 protocolListHeaderLayout(target->wordSize),
512 allInputSections(_allInputSections) {}
513
514// This is a template so that it can be used both for CStringSection and
515// ConcatOutputSection
516template <typename T>
517void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
518 const InputSection *isec, InfoWriteSection<T> &catWriteInfo) {
519
520 catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
521 catWriteInfo.align = isec->align;
522 catWriteInfo.outputSection = dyn_cast_or_null<T>(isec->parent);
523
524 assert(catWriteInfo.outputSection &&
525 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
526
527 if (isec->relocs.size())
528 catWriteInfo.relocTemplate = isec->relocs[0];
529
530 catWriteInfo.valid = true;
531}
532
533Symbol *
534ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
535 uint32_t offset) {
536 const Reloc *reloc = isec->getRelocAt(off: offset);
537
538 if (!reloc)
539 return nullptr;
540
541 return reloc->referent.get<Symbol *>();
542}
543
544Defined *
545ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
546 uint32_t offset) {
547 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
548 return dyn_cast_or_null<Defined>(Val: sym);
549}
550
551// Given an ConcatInputSection or CStringInputSection and an offset, if there is
552// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
553void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
554 const ConcatInputSection *isec, uint32_t offset) {
555 const Reloc *reloc = isec->getRelocAt(off: offset);
556
557 if (!reloc)
558 return;
559
560 Defined *sym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
561 if (!sym)
562 return;
563
564 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(Val: sym->isec()))
565 eraseISec(isec: cisec);
566 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(Val: sym->isec())) {
567 uint32_t totalOffset = sym->value + reloc->addend;
568 StringPiece &piece = csisec->getStringPiece(off: totalOffset);
569 piece.live = false;
570 } else {
571 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
572 }
573}
574
575void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
576 const InfoInputCategory &catInfo) {
577
578 if (!infoCategoryWriter.catListInfo.valid)
579 collectSectionWriteInfoFromIsec<ConcatOutputSection>(
580 isec: catInfo.catListIsec, catWriteInfo&: infoCategoryWriter.catListInfo);
581 if (!infoCategoryWriter.catBodyInfo.valid)
582 collectSectionWriteInfoFromIsec<ConcatOutputSection>(
583 isec: catInfo.catBodyIsec, catWriteInfo&: infoCategoryWriter.catBodyInfo);
584
585 if (!infoCategoryWriter.catNameInfo.valid) {
586 lld::macho::Defined *catNameSym =
587 tryGetDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.nameOffset);
588 assert(catNameSym && "Category does not have a valid name Symbol");
589
590 collectSectionWriteInfoFromIsec<CStringSection>(
591 isec: catNameSym->isec(), catWriteInfo&: infoCategoryWriter.catNameInfo);
592 }
593
594 // Collect writer info from all the category lists (we're assuming they all
595 // would provide the same info)
596 if (!infoCategoryWriter.catPtrListInfo.valid) {
597 for (uint32_t off = catLayout.instanceMethodsOffset;
598 off <= catLayout.classPropsOffset; off += target->wordSize) {
599 if (Defined *ptrList =
600 tryGetDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: off)) {
601 collectSectionWriteInfoFromIsec<ConcatOutputSection>(
602 isec: ptrList->isec(), catWriteInfo&: infoCategoryWriter.catPtrListInfo);
603 // we've successfully collected data, so we can break
604 break;
605 }
606 }
607 }
608}
609
610// Parse a protocol list that might be linked to ConcatInputSection at a given
611// offset. The format of the protocol list is different than other lists (prop
612// lists, method lists) so we need to parse it differently
613void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
614 uint32_t secOffset,
615 PointerListInfo &ptrList) {
616 assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
617 "Tried to read pointer list beyond protocol section end");
618
619 const Reloc *reloc = isec->getRelocAt(off: secOffset);
620 if (!reloc)
621 return;
622
623 auto *ptrListSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
624 assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
625
626 // Theoretically protocol count can be either 32b or 64b, depending on
627 // platform pointer size, but to simplify implementation we always just read
628 // the lower 32b which should be good enough.
629 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
630 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
631
632 ptrList.structCount += protocolCount;
633 ptrList.structSize = target->wordSize;
634
635 uint32_t expectedListSize =
636 (protocolCount * target->wordSize) +
637 /*header(count)*/ protocolListHeaderLayout.totalSize +
638 /*extra null value*/ target->wordSize;
639 assert(expectedListSize == ptrListSym->isec()->data.size() &&
640 "Protocol list does not match expected size");
641
642 // Suppress unsuded var warning
643 (void)expectedListSize;
644
645 uint32_t off = protocolListHeaderLayout.totalSize;
646 for (uint32_t inx = 0; inx < protocolCount; ++inx) {
647 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
648 assert(reloc && "No reloc found at protocol list offset");
649
650 auto *listSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
651 assert(listSym && "Protocol list reloc does not have a valid Defined");
652
653 ptrList.allPtrs.push_back(x: listSym);
654 off += target->wordSize;
655 }
656 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
657 "expected null terminating protocol");
658 assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
659 "Protocol list end offset does not match expected size");
660}
661
662// Parse a pointer list that might be linked to ConcatInputSection at a given
663// offset. This can be used for instance methods, class methods, instance props
664// and class props since they have the same format.
665void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
666 uint32_t secOffset,
667 PointerListInfo &ptrList) {
668 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
669 assert(isec && "Trying to parse pointer list from null isec");
670 assert(secOffset + target->wordSize <= isec->data.size() &&
671 "Trying to read pointer list beyond section end");
672
673 const Reloc *reloc = isec->getRelocAt(off: secOffset);
674 if (!reloc)
675 return;
676
677 auto *ptrListSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
678 assert(ptrListSym && "Reloc does not have a valid Defined");
679
680 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
681 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
682 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
683 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
684 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
685
686 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
687
688 ptrList.structCount += thisStructCount;
689 ptrList.structSize = thisStructSize;
690
691 uint32_t expectedListSize =
692 listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
693 assert(expectedListSize == ptrListSym->isec()->data.size() &&
694 "Pointer list does not match expected size");
695
696 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
697 off += target->wordSize) {
698 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
699 assert(reloc && "No reloc found at pointer list offset");
700
701 auto *listSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
702 assert(listSym && "Reloc does not have a valid Defined");
703
704 ptrList.allPtrs.push_back(x: listSym);
705 }
706}
707
708// Here we parse all the information of an input category (catInfo) and
709// append the parsed info into the structure which will contain all the
710// information about how a class is extended (extInfo)
711void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
712 ClassExtensionInfo &extInfo) {
713 const Reloc *catNameReloc =
714 catInfo.catBodyIsec->getRelocAt(off: catLayout.nameOffset);
715
716 // Parse name
717 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
718
719 // is this the first category we are parsing?
720 if (extInfo.mergedContainerName.empty())
721 extInfo.objFileForMergeData =
722 dyn_cast_or_null<ObjFile>(Val: catInfo.catBodyIsec->getFile());
723 else
724 extInfo.mergedContainerName += "|";
725
726 assert(extInfo.objFileForMergeData &&
727 "Expected to already have valid objextInfo.objFileForMergeData");
728
729 StringRef catName = getReferentString(r: *catNameReloc);
730 extInfo.mergedContainerName += catName.str();
731
732 // Parse base class
733 if (!extInfo.baseClass) {
734 Symbol *classSym =
735 tryGetSymbolAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.klassOffset);
736 assert(extInfo.baseClassName.empty());
737 extInfo.baseClass = classSym;
738 llvm::StringRef classPrefix(objc::symbol_names::klass);
739 assert(classSym->getName().starts_with(classPrefix) &&
740 "Base class symbol does not start with expected prefix");
741 extInfo.baseClassName = classSym->getName().substr(Start: classPrefix.size());
742 } else {
743 assert((extInfo.baseClass ==
744 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
745 catLayout.klassOffset)) &&
746 "Trying to parse category info into container with different base "
747 "class");
748 }
749
750 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.instanceMethodsOffset,
751 ptrList&: extInfo.instanceMethods);
752
753 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.classMethodsOffset,
754 ptrList&: extInfo.classMethods);
755
756 parseProtocolListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.protocolsOffset,
757 ptrList&: extInfo.protocols);
758
759 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.instancePropsOffset,
760 ptrList&: extInfo.instanceProps);
761
762 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.classPropsOffset,
763 ptrList&: extInfo.classProps);
764}
765
766// Generate a protocol list (including header) and link it into the parent at
767// the specified offset.
768void ObjcCategoryMerger::emitAndLinkProtocolList(
769 Defined *parentSym, uint32_t linkAtOffset,
770 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
771 if (ptrList.allPtrs.empty())
772 return;
773
774 assert(ptrList.allPtrs.size() == ptrList.structCount);
775
776 uint32_t bodySize = (ptrList.structCount * target->wordSize) +
777 /*header(count)*/ protocolListHeaderLayout.totalSize +
778 /*extra null value*/ target->wordSize;
779 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: bodySize);
780
781 // This theoretically can be either 32b or 64b, but writing just the first 32b
782 // is good enough
783 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
784 bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
785
786 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
787
788 ConcatInputSection *listSec = make<ConcatInputSection>(
789 args&: *infoCategoryWriter.catPtrListInfo.inputSection, args&: bodyData,
790 args&: infoCategoryWriter.catPtrListInfo.align);
791 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
792 listSec->live = true;
793 addInputSection(inputSection: listSec);
794
795 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
796
797 std::string symName = ptrList.categoryPrefix;
798 symName += extInfo.baseClassName + "_$_(" + extInfo.mergedContainerName + ")";
799
800 Defined *ptrListSym = make<Defined>(
801 args: newStringData(str: symName.c_str()), /*file=*/args: parentSym->getObjectFile(),
802 args&: listSec, /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false,
803 /*isExternal=*/args: false, /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
804 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
805 /*isWeakDefCanBeHidden=*/args: false);
806
807 ptrListSym->used = true;
808 parentSym->getObjectFile()->symbols.push_back(x: ptrListSym);
809
810 createSymbolReference(refFrom: parentSym, refTo: ptrListSym, offset: linkAtOffset,
811 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
812
813 uint32_t offset = protocolListHeaderLayout.totalSize;
814 for (Symbol *symbol : ptrList.allPtrs) {
815 createSymbolReference(refFrom: ptrListSym, refTo: symbol, offset,
816 relocTemplate: infoCategoryWriter.catPtrListInfo.relocTemplate);
817 offset += target->wordSize;
818 }
819}
820
821// Generate a pointer list (including header) and link it into the parent at the
822// specified offset. This is used for instance and class methods and
823// proprieties.
824void ObjcCategoryMerger::emitAndLinkPointerList(
825 Defined *parentSym, uint32_t linkAtOffset,
826 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
827 if (ptrList.allPtrs.empty())
828 return;
829
830 assert(ptrList.allPtrs.size() * target->wordSize ==
831 ptrList.structCount * ptrList.structSize);
832
833 // Generate body
834 uint32_t bodySize =
835 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
836 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: bodySize);
837
838 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
839 bodyData.data() + listHeaderLayout.structSizeOffset);
840 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
841 bodyData.data() + listHeaderLayout.structCountOffset);
842
843 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
844 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
845
846 ConcatInputSection *listSec = make<ConcatInputSection>(
847 args&: *infoCategoryWriter.catPtrListInfo.inputSection, args&: bodyData,
848 args&: infoCategoryWriter.catPtrListInfo.align);
849 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
850 listSec->live = true;
851 addInputSection(inputSection: listSec);
852
853 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
854
855 std::string symName = ptrList.categoryPrefix;
856 symName += extInfo.baseClassName + "_$_" + extInfo.mergedContainerName;
857
858 Defined *ptrListSym = make<Defined>(
859 args: newStringData(str: symName.c_str()), /*file=*/args: parentSym->getObjectFile(),
860 args&: listSec, /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false,
861 /*isExternal=*/args: false, /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
862 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
863 /*isWeakDefCanBeHidden=*/args: false);
864
865 ptrListSym->used = true;
866 parentSym->getObjectFile()->symbols.push_back(x: ptrListSym);
867
868 createSymbolReference(refFrom: parentSym, refTo: ptrListSym, offset: linkAtOffset,
869 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
870
871 uint32_t offset = listHeaderLayout.totalSize;
872 for (Symbol *symbol : ptrList.allPtrs) {
873 createSymbolReference(refFrom: ptrListSym, refTo: symbol, offset,
874 relocTemplate: infoCategoryWriter.catPtrListInfo.relocTemplate);
875 offset += target->wordSize;
876 }
877}
878
879// This method creates an __objc_catlist ConcatInputSection with a single slot
880Defined *
881ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
882 const std::string &forBaseClassName,
883 ObjFile *objFile) {
884 uint32_t sectionSize = target->wordSize;
885 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: sectionSize);
886
887 ConcatInputSection *newCatList =
888 make<ConcatInputSection>(args&: *infoCategoryWriter.catListInfo.inputSection,
889 args&: bodyData, args&: infoCategoryWriter.catListInfo.align);
890 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
891 newCatList->live = true;
892 addInputSection(inputSection: newCatList);
893
894 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
895
896 std::string catSymName = "<__objc_catlist slot for merged category ";
897 catSymName += forBaseClassName + "(" + forCategoryName + ")>";
898
899 Defined *catListSym = make<Defined>(
900 args: newStringData(str: catSymName.c_str()), /*file=*/args&: objFile, args&: newCatList,
901 /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false, /*isExternal=*/args: false,
902 /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: false,
903 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
904 /*isWeakDefCanBeHidden=*/args: false);
905
906 catListSym->used = true;
907 objFile->symbols.push_back(x: catListSym);
908 return catListSym;
909}
910
911// Here we generate the main category body and link the name and base class into
912// it. We don't link any other info yet like the protocol and class/instance
913// methods/props.
914Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
915 const Defined *nameSym,
916 const Symbol *baseClassSym,
917 const std::string &baseClassName,
918 ObjFile *objFile) {
919 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: catLayout.totalSize);
920
921 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
922 catLayout.sizeOffset);
923 *ptrSize = catLayout.totalSize;
924
925 ConcatInputSection *newBodySec =
926 make<ConcatInputSection>(args&: *infoCategoryWriter.catBodyInfo.inputSection,
927 args&: bodyData, args&: infoCategoryWriter.catBodyInfo.align);
928 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
929 newBodySec->live = true;
930 addInputSection(inputSection: newBodySec);
931
932 std::string symName =
933 objc::symbol_names::category + baseClassName + "_$_(" + name + ")";
934 Defined *catBodySym = make<Defined>(
935 args: newStringData(str: symName.c_str()), /*file=*/args&: objFile, args&: newBodySec,
936 /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false, /*isExternal=*/args: false,
937 /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
938 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
939 /*isWeakDefCanBeHidden=*/args: false);
940
941 catBodySym->used = true;
942 objFile->symbols.push_back(x: catBodySym);
943
944 createSymbolReference(refFrom: catBodySym, refTo: nameSym, offset: catLayout.nameOffset,
945 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
946
947 // Create a reloc to the base class (either external or internal)
948 createSymbolReference(refFrom: catBodySym, refTo: baseClassSym, offset: catLayout.klassOffset,
949 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
950
951 return catBodySym;
952}
953
954// This writes the new category name (for the merged category) into the binary
955// and returns the sybmol for it.
956Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
957 ObjFile *objFile) {
958 StringRef nameStrData = newStringData(str: name.c_str());
959 // We use +1 below to include the null terminator
960 llvm::ArrayRef<uint8_t> nameData(
961 reinterpret_cast<const uint8_t *>(nameStrData.data()),
962 nameStrData.size() + 1);
963
964 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
965 CStringInputSection *newStringSec = make<CStringInputSection>(
966 args&: *infoCategoryWriter.catNameInfo.inputSection, args&: nameData,
967 args&: infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/args: true);
968
969 parentSection->subsections.push_back(x: {.offset: 0, .isec: newStringSec});
970
971 newStringSec->splitIntoPieces();
972 newStringSec->pieces[0].live = true;
973 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
974 in.cStringSection->addInput(newStringSec);
975 assert(newStringSec->pieces.size() == 1);
976
977 Defined *catNameSym = make<Defined>(
978 args: "<merged category name>", /*file=*/args&: objFile, args&: newStringSec,
979 /*value=*/args: 0, args: nameData.size(),
980 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: false,
981 /*includeInSymtab=*/args: false, /*isReferencedDynamically=*/args: false,
982 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
983
984 catNameSym->used = true;
985 objFile->symbols.push_back(x: catNameSym);
986 return catNameSym;
987}
988
989// This method fully creates a new category from the given ClassExtensionInfo.
990// It creates the category name, body and method/protocol/prop lists and links
991// them all together. Then it creates a new __objc_catlist entry and adds the
992// category to it. Calling this method will fully generate a category which will
993// be available in the final binary.
994Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
995 Defined *catNameSym = emitCategoryName(name: extInfo.mergedContainerName,
996 objFile: extInfo.objFileForMergeData);
997
998 Defined *catBodySym = emitCategoryBody(
999 name: extInfo.mergedContainerName, nameSym: catNameSym, baseClassSym: extInfo.baseClass,
1000 baseClassName: extInfo.baseClassName, objFile: extInfo.objFileForMergeData);
1001
1002 Defined *catListSym =
1003 emitCatListEntrySec(forCategoryName: extInfo.mergedContainerName, forBaseClassName: extInfo.baseClassName,
1004 objFile: extInfo.objFileForMergeData);
1005
1006 // Add the single category body to the category list at the offset 0.
1007 createSymbolReference(refFrom: catListSym, refTo: catBodySym, /*offset=*/0,
1008 relocTemplate: infoCategoryWriter.catListInfo.relocTemplate);
1009
1010 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.instanceMethodsOffset, extInfo,
1011 ptrList: extInfo.instanceMethods);
1012
1013 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.classMethodsOffset, extInfo,
1014 ptrList: extInfo.classMethods);
1015
1016 emitAndLinkProtocolList(parentSym: catBodySym, linkAtOffset: catLayout.protocolsOffset, extInfo,
1017 ptrList: extInfo.protocols);
1018
1019 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.instancePropsOffset, extInfo,
1020 ptrList: extInfo.instanceProps);
1021
1022 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.classPropsOffset, extInfo,
1023 ptrList: extInfo.classProps);
1024
1025 return catBodySym;
1026}
1027
1028// This method merges all the categories (sharing a base class) into a single
1029// category.
1030void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1031 std::vector<InfoInputCategory> &categories) {
1032 assert(categories.size() > 1 && "Expected at least 2 categories");
1033
1034 ClassExtensionInfo extInfo(catLayout);
1035
1036 for (auto &catInfo : categories)
1037 parseCatInfoToExtInfo(catInfo, extInfo);
1038
1039 Defined *newCatDef = emitCategory(extInfo);
1040 assert(newCatDef && "Failed to create a new category");
1041
1042 // Suppress unsuded var warning
1043 (void)newCatDef;
1044
1045 for (auto &catInfo : categories)
1046 catInfo.wasMerged = true;
1047}
1048
1049void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1050 const Symbol *refTo,
1051 uint32_t offset,
1052 const Reloc &relocTemplate) {
1053 Reloc r = relocTemplate;
1054 r.offset = offset;
1055 r.addend = 0;
1056 r.referent = const_cast<Symbol *>(refTo);
1057 refFrom->isec()->relocs.push_back(x: r);
1058}
1059
1060void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1061 for (InputSection *sec : allInputSections) {
1062 if (sec->getName() != section_names::objcCatList)
1063 continue;
1064 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(Val: sec);
1065 assert(catListCisec &&
1066 "__objc_catList InputSection is not a ConcatInputSection");
1067
1068 for (uint32_t off = 0; off < catListCisec->getSize();
1069 off += target->wordSize) {
1070 Defined *categorySym = tryGetDefinedAtIsecOffset(isec: catListCisec, offset: off);
1071 assert(categorySym &&
1072 "Failed to get a valid category at __objc_catlit offset");
1073
1074 // We only support ObjC categories (no swift + @objc)
1075 // TODO: Support swift + @objc categories also
1076 if (!categorySym->getName().starts_with(Prefix: objc::symbol_names::category))
1077 continue;
1078
1079 auto *catBodyIsec = dyn_cast<ConcatInputSection>(Val: categorySym->isec());
1080 assert(catBodyIsec &&
1081 "Category data section is not an ConcatInputSection");
1082
1083 // Check that the category has a reloc at 'klassOffset' (which is
1084 // a pointer to the class symbol)
1085
1086 Symbol *classSym =
1087 tryGetSymbolAtIsecOffset(isec: catBodyIsec, offset: catLayout.klassOffset);
1088 assert(classSym && "Category does not have a valid base class");
1089
1090 InfoInputCategory catInputInfo{.catListIsec: catListCisec, .catBodyIsec: catBodyIsec, .offCatListIsec: off};
1091 categoryMap[classSym].push_back(x: catInputInfo);
1092
1093 collectCategoryWriterInfoFromCategory(catInfo: catInputInfo);
1094 }
1095 }
1096}
1097
1098// In the input we have multiple __objc_catlist InputSection, each of which may
1099// contain links to multiple categories. Of these categories, we will merge (and
1100// erase) only some. There will be some categories that will remain untouched
1101// (not erased). For these not erased categories, we generate new __objc_catlist
1102// entries since the parent __objc_catlist entry will be erased
1103void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1104 const std::map<ConcatInputSection *, std::set<uint64_t>>
1105 catListToErasedOffsets) {
1106
1107 // Go through all offsets of all __objc_catlist's that we process and if there
1108 // are categories that we didn't process - generate a new __objc_catlist for
1109 // each.
1110 for (auto &mapEntry : catListToErasedOffsets) {
1111 ConcatInputSection *catListIsec = mapEntry.first;
1112 for (uint32_t catListIsecOffset = 0;
1113 catListIsecOffset < catListIsec->data.size();
1114 catListIsecOffset += target->wordSize) {
1115 // This slot was erased, we can just skip it
1116 if (mapEntry.second.count(x: catListIsecOffset))
1117 continue;
1118
1119 Defined *nonErasedCatBody =
1120 tryGetDefinedAtIsecOffset(isec: catListIsec, offset: catListIsecOffset);
1121 assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1122
1123 // Allocate data for the new __objc_catlist slot
1124 auto bodyData = newSectionData(size: target->wordSize);
1125
1126 // We mark the __objc_catlist slot as belonging to the same file as the
1127 // category
1128 ObjFile *objFile = dyn_cast<ObjFile>(Val: nonErasedCatBody->getFile());
1129
1130 ConcatInputSection *listSec = make<ConcatInputSection>(
1131 args&: *infoCategoryWriter.catListInfo.inputSection, args&: bodyData,
1132 args&: infoCategoryWriter.catListInfo.align);
1133 listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1134 listSec->live = true;
1135 addInputSection(inputSection: listSec);
1136
1137 std::string slotSymName = "<__objc_catlist slot for category ";
1138 slotSymName += nonErasedCatBody->getName();
1139 slotSymName += ">";
1140
1141 Defined *catListSlotSym = make<Defined>(
1142 args: newStringData(str: slotSymName.c_str()), /*file=*/args&: objFile, args&: listSec,
1143 /*value=*/args: 0, args: bodyData.size(),
1144 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: false,
1145 /*includeInSymtab=*/args: false, /*isReferencedDynamically=*/args: false,
1146 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
1147
1148 catListSlotSym->used = true;
1149 objFile->symbols.push_back(x: catListSlotSym);
1150
1151 // Now link the category body into the newly created slot
1152 createSymbolReference(refFrom: catListSlotSym, refTo: nonErasedCatBody, offset: 0,
1153 relocTemplate: infoCategoryWriter.catListInfo.relocTemplate);
1154 }
1155 }
1156}
1157
1158void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1159 isec->live = false;
1160 for (auto &sym : isec->symbols)
1161 sym->used = false;
1162}
1163
1164// This fully erases the merged categories, including their body, their names,
1165// their method/protocol/prop lists and the __objc_catlist entries that link to
1166// them.
1167void ObjcCategoryMerger::eraseMergedCategories() {
1168 // Map of InputSection to a set of offsets of the categories that were merged
1169 std::map<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1170
1171 for (auto &mapEntry : categoryMap) {
1172 for (InfoInputCategory &catInfo : mapEntry.second) {
1173 if (catInfo.wasMerged) {
1174 eraseISec(isec: catInfo.catListIsec);
1175 catListToErasedOffsets[catInfo.catListIsec].insert(
1176 x: catInfo.offCatListIsec);
1177 }
1178 }
1179 }
1180
1181 // If there were categories that we did not erase, we need to generate a new
1182 // __objc_catList that contains only the un-merged categories, and get rid of
1183 // the references to the ones we merged.
1184 generateCatListForNonErasedCategories(catListToErasedOffsets);
1185
1186 // Erase the old method lists & names of the categories that were merged
1187 for (auto &mapEntry : categoryMap) {
1188 for (InfoInputCategory &catInfo : mapEntry.second) {
1189 if (!catInfo.wasMerged)
1190 continue;
1191
1192 eraseISec(isec: catInfo.catBodyIsec);
1193 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.nameOffset);
1194 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1195 offset: catLayout.instanceMethodsOffset);
1196 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1197 offset: catLayout.classMethodsOffset);
1198 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1199 offset: catLayout.protocolsOffset);
1200 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1201 offset: catLayout.classPropsOffset);
1202 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1203 offset: catLayout.instancePropsOffset);
1204 }
1205 }
1206}
1207
1208void ObjcCategoryMerger::doMerge() {
1209 collectAndValidateCategoriesData();
1210
1211 for (auto &entry : categoryMap)
1212 if (entry.second.size() > 1)
1213 // Merge all categories into a new, single category
1214 mergeCategoriesIntoSingleCategory(categories&: entry.second);
1215
1216 // Erase all categories that were merged
1217 eraseMergedCategories();
1218}
1219
1220void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1221
1222StringRef ObjcCategoryMerger::newStringData(const char *str) {
1223 uint32_t len = strlen(s: str);
1224 uint32_t bufSize = len + 1;
1225 auto &data = newSectionData(size: bufSize);
1226 char *strData = reinterpret_cast<char *>(data.data());
1227 // Copy the string chars and null-terminator
1228 memcpy(dest: strData, src: str, n: bufSize);
1229 return StringRef(strData, len);
1230}
1231
1232SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1233 generatedSectionData.push_back(
1234 Elt: std::make_unique<SmallVector<uint8_t>>(args&: size, args: 0));
1235 return *generatedSectionData.back();
1236}
1237
1238} // namespace
1239
1240void objc::mergeCategories() {
1241 TimeTraceScope timeScope("ObjcCategoryMerger");
1242
1243 ObjcCategoryMerger merger(inputSections);
1244 merger.doMerge();
1245}
1246
1247void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1248

source code of lld/MachO/ObjC.cpp