1 | //===- MapFile.cpp --------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the -map option, which maps address ranges to their |
10 | // respective contents, plus the input file these contents were originally from. |
11 | // The contents (typically symbols) are listed in address order. Dead-stripped |
12 | // contents are included as well. |
13 | // |
14 | // # Path: test |
15 | // # Arch: x86_84 |
16 | // # Object files: |
17 | // [ 0] linker synthesized |
18 | // [ 1] a.o |
19 | // # Sections: |
20 | // # Address Size Segment Section |
21 | // 0x1000005C0 0x0000004C __TEXT __text |
22 | // # Symbols: |
23 | // # Address Size File Name |
24 | // 0x1000005C0 0x00000001 [ 1] _main |
25 | // # Dead Stripped Symbols: |
26 | // # Size File Name |
27 | // <<dead>> 0x00000001 [ 1] _foo |
28 | // |
29 | //===----------------------------------------------------------------------===// |
30 | |
31 | #include "MapFile.h" |
32 | #include "ConcatOutputSection.h" |
33 | #include "Config.h" |
34 | #include "InputFiles.h" |
35 | #include "InputSection.h" |
36 | #include "OutputSegment.h" |
37 | #include "Symbols.h" |
38 | #include "SyntheticSections.h" |
39 | #include "Target.h" |
40 | #include "lld/Common/ErrorHandler.h" |
41 | #include "llvm/ADT/DenseMap.h" |
42 | #include "llvm/Support/Parallel.h" |
43 | #include "llvm/Support/TimeProfiler.h" |
44 | |
45 | using namespace llvm; |
46 | using namespace llvm::sys; |
47 | using namespace lld; |
48 | using namespace lld::macho; |
49 | |
50 | struct CStringInfo { |
51 | uint32_t fileIndex; |
52 | StringRef str; |
53 | }; |
54 | |
55 | struct MapInfo { |
56 | SmallVector<InputFile *> files; |
57 | SmallVector<Defined *> deadSymbols; |
58 | DenseMap<const OutputSection *, |
59 | SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>> |
60 | liveCStringsForSection; |
61 | SmallVector<CStringInfo> deadCStrings; |
62 | }; |
63 | |
64 | static MapInfo gatherMapInfo() { |
65 | MapInfo info; |
66 | for (InputFile *file : inputFiles) { |
67 | bool isReferencedFile = false; |
68 | |
69 | if (isa<ObjFile>(Val: file) || isa<BitcodeFile>(Val: file)) { |
70 | uint32_t fileIndex = info.files.size() + 1; |
71 | |
72 | // Gather the dead symbols. We don't have to bother with the live ones |
73 | // because we will pick them up as we iterate over the OutputSections |
74 | // later. |
75 | for (Symbol *sym : file->symbols) { |
76 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
77 | // Only emit the prevailing definition of a symbol. Also, don't emit |
78 | // the symbol if it is part of a cstring section (we use the literal |
79 | // value instead, similar to ld64) |
80 | if (d->isec() && d->getFile() == file && |
81 | !isa<CStringInputSection>(Val: d->isec())) { |
82 | isReferencedFile = true; |
83 | if (!d->isLive()) |
84 | info.deadSymbols.push_back(Elt: d); |
85 | } |
86 | } |
87 | |
88 | // Gather all the cstrings (both live and dead). A CString(Output)Section |
89 | // doesn't provide us a way of figuring out which InputSections its |
90 | // cstring contents came from, so we need to build up that mapping here. |
91 | for (const Section *sec : file->sections) { |
92 | for (const Subsection &subsec : sec->subsections) { |
93 | if (auto isec = dyn_cast<CStringInputSection>(Val: subsec.isec)) { |
94 | auto &liveCStrings = info.liveCStringsForSection[isec->parent]; |
95 | for (const auto &[i, piece] : llvm::enumerate(First&: isec->pieces)) { |
96 | if (piece.live) |
97 | liveCStrings.push_back(Elt: {isec->parent->addr + piece.outSecOff, |
98 | {.fileIndex: fileIndex, .str: isec->getStringRef(i)}}); |
99 | else |
100 | info.deadCStrings.push_back(Elt: {.fileIndex: fileIndex, .str: isec->getStringRef(i)}); |
101 | isReferencedFile = true; |
102 | } |
103 | } else { |
104 | break; |
105 | } |
106 | } |
107 | } |
108 | } else if (const auto *dylibFile = dyn_cast<DylibFile>(Val: file)) { |
109 | isReferencedFile = dylibFile->isReferenced(); |
110 | } |
111 | |
112 | if (isReferencedFile) |
113 | info.files.push_back(Elt: file); |
114 | } |
115 | |
116 | // cstrings are not stored in sorted order in their OutputSections, so we sort |
117 | // them here. |
118 | for (auto &liveCStrings : info.liveCStringsForSection) |
119 | parallelSort(R&: liveCStrings.second, Comp: [](const auto &p1, const auto &p2) { |
120 | return p1.first < p2.first; |
121 | }); |
122 | return info; |
123 | } |
124 | |
125 | // We use this instead of `toString(const InputFile *)` as we don't want to |
126 | // include the dylib install name in our output. |
127 | static void printFileName(raw_fd_ostream &os, const InputFile *f) { |
128 | if (f->archiveName.empty()) |
129 | os << f->getName(); |
130 | else |
131 | os << f->archiveName << "(" << path::filename(path: f->getName()) + ")" ; |
132 | } |
133 | |
134 | // For printing the contents of the __stubs and __la_symbol_ptr sections. |
135 | static void printStubsEntries( |
136 | raw_fd_ostream &os, |
137 | const DenseMap<lld::macho::InputFile *, uint32_t> &readerToFileOrdinal, |
138 | const OutputSection *osec, size_t entrySize) { |
139 | for (const Symbol *sym : in.stubs->getEntries()) |
140 | os << format(Fmt: "0x%08llX\t0x%08zX\t[%3u] %s\n" , |
141 | Vals: osec->addr + sym->stubsIndex * entrySize, Vals: entrySize, |
142 | Vals: readerToFileOrdinal.lookup(Val: sym->getFile()), |
143 | Vals: sym->getName().str().data()); |
144 | } |
145 | |
146 | static void printNonLazyPointerSection(raw_fd_ostream &os, |
147 | NonLazyPointerSectionBase *osec) { |
148 | // ld64 considers stubs to belong to particular files, but considers GOT |
149 | // entries to be linker-synthesized. Not sure why they made that decision, but |
150 | // I think we can follow suit unless there's demand for better symbol-to-file |
151 | // associations. |
152 | for (const Symbol *sym : osec->getEntries()) |
153 | os << format(Fmt: "0x%08llX\t0x%08zX\t[ 0] non-lazy-pointer-to-local: %s\n" , |
154 | Vals: osec->addr + sym->gotIndex * target->wordSize, |
155 | Vals: target->wordSize, Vals: sym->getName().str().data()); |
156 | } |
157 | |
158 | static uint64_t getSymSizeForMap(Defined *sym) { |
159 | if (sym->wasIdenticalCodeFolded) |
160 | return 0; |
161 | return sym->size; |
162 | } |
163 | |
164 | void macho::writeMapFile() { |
165 | if (config->mapFile.empty()) |
166 | return; |
167 | |
168 | TimeTraceScope timeScope("Write map file" ); |
169 | |
170 | // Open a map file for writing. |
171 | std::error_code ec; |
172 | raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None); |
173 | if (ec) { |
174 | error(msg: "cannot open " + config->mapFile + ": " + ec.message()); |
175 | return; |
176 | } |
177 | |
178 | os << format(Fmt: "# Path: %s\n" , Vals: config->outputFile.str().c_str()); |
179 | os << format(Fmt: "# Arch: %s\n" , |
180 | Vals: getArchitectureName(Arch: config->arch()).str().c_str()); |
181 | |
182 | MapInfo info = gatherMapInfo(); |
183 | |
184 | os << "# Object files:\n" ; |
185 | os << format(Fmt: "[%3u] %s\n" , Vals: 0, Vals: (const char *)"linker synthesized" ); |
186 | uint32_t fileIndex = 1; |
187 | DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal; |
188 | for (InputFile *file : info.files) { |
189 | os << format(Fmt: "[%3u] " , Vals: fileIndex); |
190 | printFileName(os, f: file); |
191 | os << "\n" ; |
192 | readerToFileOrdinal[file] = fileIndex++; |
193 | } |
194 | |
195 | os << "# Sections:\n" ; |
196 | os << "# Address\tSize \tSegment\tSection\n" ; |
197 | for (OutputSegment *seg : outputSegments) |
198 | for (OutputSection *osec : seg->getSections()) { |
199 | if (osec->isHidden()) |
200 | continue; |
201 | |
202 | os << format(Fmt: "0x%08llX\t0x%08llX\t%s\t%s\n" , Vals: osec->addr, Vals: osec->getSize(), |
203 | Vals: seg->name.str().c_str(), Vals: osec->name.str().c_str()); |
204 | } |
205 | |
206 | // Shared function to print an array of symbols. |
207 | auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) { |
208 | for (const ConcatInputSection *isec : arr) { |
209 | for (Defined *sym : isec->symbols) { |
210 | if (!(isPrivateLabel(name: sym->getName()) && getSymSizeForMap(sym) == 0)) |
211 | os << format(Fmt: "0x%08llX\t0x%08llX\t[%3u] %s\n" , Vals: sym->getVA(), |
212 | Vals: getSymSizeForMap(sym), |
213 | Vals: readerToFileOrdinal[sym->getFile()], |
214 | Vals: sym->getName().str().data()); |
215 | } |
216 | } |
217 | }; |
218 | |
219 | os << "# Symbols:\n" ; |
220 | os << "# Address\tSize \tFile Name\n" ; |
221 | for (const OutputSegment *seg : outputSegments) { |
222 | for (const OutputSection *osec : seg->getSections()) { |
223 | if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) { |
224 | printIsecArrSyms(concatOsec->inputs); |
225 | } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { |
226 | const auto &liveCStrings = info.liveCStringsForSection.lookup(Val: osec); |
227 | uint64_t lastAddr = 0; // strings will never start at address 0, so this |
228 | // is a sentinel value |
229 | for (const auto &[addr, info] : liveCStrings) { |
230 | uint64_t size = 0; |
231 | if (addr != lastAddr) |
232 | size = info.str.size() + 1; // include null terminator |
233 | lastAddr = addr; |
234 | os << format(Fmt: "0x%08llX\t0x%08llX\t[%3u] literal string: " , Vals: addr, Vals: size, |
235 | Vals: info.fileIndex); |
236 | os.write_escaped(Str: info.str) << "\n" ; |
237 | } |
238 | } else if (osec == (void *)in.unwindInfo) { |
239 | os << format(Fmt: "0x%08llX\t0x%08llX\t[ 0] compact unwind info\n" , |
240 | Vals: osec->addr, Vals: osec->getSize()); |
241 | } else if (osec == in.stubs) { |
242 | printStubsEntries(os, readerToFileOrdinal, osec, entrySize: target->stubSize); |
243 | } else if (osec == in.lazyPointers) { |
244 | printStubsEntries(os, readerToFileOrdinal, osec, entrySize: target->wordSize); |
245 | } else if (osec == in.stubHelper) { |
246 | // yes, ld64 calls it "helper helper"... |
247 | os << format(Fmt: "0x%08llX\t0x%08llX\t[ 0] helper helper\n" , Vals: osec->addr, |
248 | Vals: osec->getSize()); |
249 | } else if (osec == in.got) { |
250 | printNonLazyPointerSection(os, osec: in.got); |
251 | } else if (osec == in.tlvPointers) { |
252 | printNonLazyPointerSection(os, osec: in.tlvPointers); |
253 | } else if (osec == in.objcMethList) { |
254 | printIsecArrSyms(in.objcMethList->getInputs()); |
255 | } |
256 | // TODO print other synthetic sections |
257 | } |
258 | } |
259 | |
260 | if (config->deadStrip) { |
261 | os << "# Dead Stripped Symbols:\n" ; |
262 | os << "# \tSize \tFile Name\n" ; |
263 | for (Defined *sym : info.deadSymbols) { |
264 | assert(!sym->isLive()); |
265 | os << format(Fmt: "<<dead>>\t0x%08llX\t[%3u] %s\n" , Vals: getSymSizeForMap(sym), |
266 | Vals: readerToFileOrdinal[sym->getFile()], |
267 | Vals: sym->getName().str().data()); |
268 | } |
269 | for (CStringInfo &cstrInfo : info.deadCStrings) { |
270 | os << format(Fmt: "<<dead>>\t0x%08zX\t[%3u] literal string: " , |
271 | Vals: cstrInfo.str.size() + 1, Vals: cstrInfo.fileIndex); |
272 | os.write_escaped(Str: cstrInfo.str) << "\n" ; |
273 | } |
274 | } |
275 | } |
276 | |