1 | //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the BinarySection class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinarySection.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "bolt/Utils/CommandLineOpts.h" |
16 | #include "bolt/Utils/Utils.h" |
17 | #include "llvm/MC/MCStreamer.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | |
20 | #define DEBUG_TYPE "bolt" |
21 | |
22 | using namespace llvm; |
23 | using namespace bolt; |
24 | |
25 | namespace opts { |
26 | extern cl::opt<bool> HotData; |
27 | extern cl::opt<bool> PrintRelocations; |
28 | } // namespace opts |
29 | |
30 | uint64_t BinarySection::Count = 0; |
31 | |
32 | bool BinarySection::isELF() const { return BC.isELF(); } |
33 | |
34 | bool BinarySection::isMachO() const { return BC.isMachO(); } |
35 | |
36 | uint64_t |
37 | BinarySection::hash(const BinaryData &BD, |
38 | std::map<const BinaryData *, uint64_t> &Cache) const { |
39 | auto Itr = Cache.find(x: &BD); |
40 | if (Itr != Cache.end()) |
41 | return Itr->second; |
42 | |
43 | hash_code Hash = |
44 | hash_combine(args: hash_value(value: BD.getSize()), args: hash_value(S: BD.getSectionName())); |
45 | |
46 | Cache[&BD] = Hash; |
47 | |
48 | if (!containsRange(Address: BD.getAddress(), Size: BD.getSize())) |
49 | return Hash; |
50 | |
51 | uint64_t Offset = BD.getAddress() - getAddress(); |
52 | const uint64_t EndOffset = BD.getEndAddress() - getAddress(); |
53 | auto Begin = Relocations.lower_bound(x: Relocation{Offset, 0, 0, 0, 0}); |
54 | auto End = Relocations.upper_bound(x: Relocation{EndOffset, 0, 0, 0, 0}); |
55 | const StringRef Contents = getContents(); |
56 | |
57 | while (Begin != End) { |
58 | const Relocation &Rel = *Begin++; |
59 | Hash = hash_combine( |
60 | args: Hash, args: hash_value(S: Contents.substr(Start: Offset, N: Begin->Offset - Offset))); |
61 | if (BinaryData *RelBD = BC.getBinaryDataByName(Name: Rel.Symbol->getName())) |
62 | Hash = hash_combine(args: Hash, args: hash(BD: *RelBD, Cache)); |
63 | Offset = Rel.Offset + Rel.getSize(); |
64 | } |
65 | |
66 | Hash = hash_combine(args: Hash, |
67 | args: hash_value(S: Contents.substr(Start: Offset, N: EndOffset - Offset))); |
68 | |
69 | Cache[&BD] = Hash; |
70 | |
71 | return Hash; |
72 | } |
73 | |
74 | void BinarySection::emitAsData(MCStreamer &Streamer, |
75 | const Twine &SectionName) const { |
76 | StringRef SectionContents = |
77 | isFinalized() ? getOutputContents() : getContents(); |
78 | MCSectionELF *ELFSection = |
79 | BC.Ctx->getELFSection(Section: SectionName, Type: getELFType(), Flags: getELFFlags()); |
80 | |
81 | Streamer.switchSection(Section: ELFSection); |
82 | Streamer.emitValueToAlignment(Alignment: getAlign()); |
83 | |
84 | if (BC.HasRelocations && opts::HotData && isReordered()) |
85 | Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_start" )); |
86 | |
87 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting " |
88 | << (isAllocatable() ? "" : "non-" ) |
89 | << "allocatable data section " << SectionName << '\n'); |
90 | |
91 | if (!hasRelocations()) { |
92 | Streamer.emitBytes(Data: SectionContents); |
93 | } else { |
94 | uint64_t SectionOffset = 0; |
95 | for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { |
96 | auto RelocationOffset = RI->Offset; |
97 | assert(RelocationOffset < SectionContents.size() && "overflow detected" ); |
98 | |
99 | if (SectionOffset < RelocationOffset) { |
100 | Streamer.emitBytes(Data: SectionContents.substr( |
101 | Start: SectionOffset, N: RelocationOffset - SectionOffset)); |
102 | SectionOffset = RelocationOffset; |
103 | } |
104 | |
105 | // Get iterators to all relocations with the same offset. Usually, there |
106 | // is only one such relocation but there can be more for composed |
107 | // relocations. |
108 | auto ROI = RI; |
109 | auto ROE = Relocations.upper_bound(x: RelocationOffset); |
110 | |
111 | // Start from the next offset on the next iteration. |
112 | RI = ROE; |
113 | |
114 | // Skip undefined symbols. |
115 | auto HasUndefSym = [this](const auto &Relocation) { |
116 | return BC.UndefinedSymbols.count(x: Relocation.Symbol); |
117 | }; |
118 | |
119 | if (std::any_of(first: ROI, last: ROE, pred: HasUndefSym)) |
120 | continue; |
121 | |
122 | #ifndef NDEBUG |
123 | for (const auto &Relocation : make_range(x: ROI, y: ROE)) { |
124 | LLVM_DEBUG( |
125 | dbgs() << "BOLT-DEBUG: emitting relocation for symbol " |
126 | << (Relocation.Symbol ? Relocation.Symbol->getName() |
127 | : StringRef("<none>" )) |
128 | << " at offset 0x" << Twine::utohexstr(Relocation.Offset) |
129 | << " with size " |
130 | << Relocation::getSizeForType(Relocation.Type) << '\n'); |
131 | } |
132 | #endif |
133 | |
134 | size_t RelocationSize = Relocation::emit(Begin: ROI, End: ROE, Streamer: &Streamer); |
135 | SectionOffset += RelocationSize; |
136 | } |
137 | assert(SectionOffset <= SectionContents.size() && "overflow error" ); |
138 | if (SectionOffset < SectionContents.size()) |
139 | Streamer.emitBytes(Data: SectionContents.substr(Start: SectionOffset)); |
140 | } |
141 | |
142 | if (BC.HasRelocations && opts::HotData && isReordered()) |
143 | Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_end" )); |
144 | } |
145 | |
146 | uint64_t BinarySection::write(raw_ostream &OS) const { |
147 | const uint64_t NumValidContentBytes = |
148 | std::min<uint64_t>(a: getOutputContents().size(), b: getOutputSize()); |
149 | OS.write(Ptr: getOutputContents().data(), Size: NumValidContentBytes); |
150 | if (getOutputSize() > NumValidContentBytes) |
151 | OS.write_zeros(NumZeros: getOutputSize() - NumValidContentBytes); |
152 | return getOutputSize(); |
153 | } |
154 | |
155 | void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, |
156 | SymbolResolverFuncTy Resolver) { |
157 | if (PendingRelocations.empty() && Patches.empty()) |
158 | return; |
159 | |
160 | const uint64_t SectionAddress = getAddress(); |
161 | |
162 | // We apply relocations to original section contents. For allocatable sections |
163 | // this means using their input file offsets, since the output file offset |
164 | // could change (e.g. for new instance of .text). For non-allocatable |
165 | // sections, the output offset should always be a valid one. |
166 | const uint64_t SectionFileOffset = |
167 | isAllocatable() ? getInputFileOffset() : getOutputFileOffset(); |
168 | LLVM_DEBUG( |
169 | dbgs() << "BOLT-DEBUG: flushing pending relocations for section " |
170 | << getName() << '\n' |
171 | << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n' |
172 | << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n'); |
173 | |
174 | for (BinaryPatch &Patch : Patches) |
175 | OS.pwrite(Ptr: Patch.Bytes.data(), Size: Patch.Bytes.size(), |
176 | Offset: SectionFileOffset + Patch.Offset); |
177 | |
178 | uint64_t SkippedPendingRelocations = 0; |
179 | for (Relocation &Reloc : PendingRelocations) { |
180 | uint64_t Value = Reloc.Addend; |
181 | if (Reloc.Symbol) |
182 | Value += Resolver(Reloc.Symbol); |
183 | |
184 | // Safely skip any optional pending relocation that cannot be encoded. |
185 | if (Reloc.isOptional() && |
186 | !Relocation::canEncodeValue(Type: Reloc.Type, Value, |
187 | PC: SectionAddress + Reloc.Offset)) { |
188 | |
189 | ++SkippedPendingRelocations; |
190 | continue; |
191 | } |
192 | Value = Relocation::encodeValue(Type: Reloc.Type, Value, |
193 | PC: SectionAddress + Reloc.Offset); |
194 | |
195 | OS.pwrite(Ptr: reinterpret_cast<const char *>(&Value), |
196 | Size: Relocation::getSizeForType(Type: Reloc.Type), |
197 | Offset: SectionFileOffset + Reloc.Offset); |
198 | |
199 | LLVM_DEBUG( |
200 | dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value) |
201 | << " of size " << Relocation::getSizeForType(Reloc.Type) |
202 | << " at section offset 0x" << Twine::utohexstr(Reloc.Offset) |
203 | << " address 0x" |
204 | << Twine::utohexstr(SectionAddress + Reloc.Offset) |
205 | << " file offset 0x" |
206 | << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';); |
207 | } |
208 | |
209 | clearList(List&: PendingRelocations); |
210 | |
211 | if (SkippedPendingRelocations > 0 && opts::Verbosity >= 1) { |
212 | BC.outs() << "BOLT-INFO: skipped " << SkippedPendingRelocations |
213 | << " out-of-range optional relocations\n" ; |
214 | } |
215 | } |
216 | |
217 | BinarySection::~BinarySection() { updateContents(NewData: nullptr, NewSize: 0); } |
218 | |
219 | void BinarySection::clearRelocations() { clearList(List&: Relocations); } |
220 | |
221 | void BinarySection::print(raw_ostream &OS) const { |
222 | OS << getName() << ", " |
223 | << "0x" << Twine::utohexstr(Val: getAddress()) << ", " << getSize() << " (0x" |
224 | << Twine::utohexstr(Val: getOutputAddress()) << ", " << getOutputSize() << ")" |
225 | << ", data = " << getData() << ", output data = " << getOutputData(); |
226 | |
227 | if (isAllocatable()) |
228 | OS << " (allocatable)" ; |
229 | |
230 | if (isVirtual()) |
231 | OS << " (virtual)" ; |
232 | |
233 | if (isTLS()) |
234 | OS << " (tls)" ; |
235 | |
236 | if (opts::PrintRelocations) |
237 | for (const Relocation &R : relocations()) |
238 | OS << "\n " << R; |
239 | } |
240 | |
241 | BinarySection::RelocationSetType |
242 | BinarySection::reorderRelocations(bool Inplace) const { |
243 | assert(PendingRelocations.empty() && |
244 | "reordering pending relocations not supported" ); |
245 | RelocationSetType NewRelocations; |
246 | for (const Relocation &Rel : relocations()) { |
247 | uint64_t RelAddr = Rel.Offset + getAddress(); |
248 | BinaryData *BD = BC.getBinaryDataContainingAddress(Address: RelAddr); |
249 | BD = BD->getAtomicRoot(); |
250 | assert(BD); |
251 | |
252 | if ((!BD->isMoved() && !Inplace) || BD->isJumpTable()) |
253 | continue; |
254 | |
255 | Relocation NewRel(Rel); |
256 | uint64_t RelOffset = RelAddr - BD->getAddress(); |
257 | NewRel.Offset = BD->getOutputOffset() + RelOffset; |
258 | assert(NewRel.Offset < getSize()); |
259 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel |
260 | << "\n" ); |
261 | NewRelocations.emplace(args: std::move(NewRel)); |
262 | } |
263 | return NewRelocations; |
264 | } |
265 | |
266 | void BinarySection::reorderContents(const std::vector<BinaryData *> &Order, |
267 | bool Inplace) { |
268 | IsReordered = true; |
269 | |
270 | Relocations = reorderRelocations(Inplace); |
271 | |
272 | std::string Str; |
273 | raw_string_ostream OS(Str); |
274 | const char *Src = Contents.data(); |
275 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n" ); |
276 | for (BinaryData *BD : Order) { |
277 | assert((BD->isMoved() || !Inplace) && !BD->isJumpTable()); |
278 | assert(BD->isAtomic() && BD->isMoveable()); |
279 | const uint64_t SrcOffset = BD->getAddress() - getAddress(); |
280 | assert(SrcOffset < Contents.size()); |
281 | assert(SrcOffset == BD->getOffset()); |
282 | while (OS.tell() < BD->getOutputOffset()) |
283 | OS.write(C: (unsigned char)0); |
284 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell() |
285 | << "\n" ); |
286 | OS.write(Ptr: &Src[SrcOffset], Size: BD->getOutputSize()); |
287 | } |
288 | if (Relocations.empty()) { |
289 | // If there are no existing relocations, tack a phony one at the end |
290 | // of the reordered segment to force LLVM to recognize and map this |
291 | // section. |
292 | MCSymbol *ZeroSym = BC.registerNameAtAddress(Name: "Zero" , Address: 0, Size: 0, Alignment: 0); |
293 | addRelocation(Offset: OS.tell(), Symbol: ZeroSym, Type: Relocation::getAbs64(), Addend: 0xdeadbeef); |
294 | |
295 | uint64_t Zero = 0; |
296 | OS.write(Ptr: reinterpret_cast<const char *>(&Zero), Size: sizeof(Zero)); |
297 | } |
298 | auto *NewData = reinterpret_cast<char *>(copyByteArray(Buffer: OS.str())); |
299 | Contents = OutputContents = StringRef(NewData, OS.str().size()); |
300 | OutputSize = Contents.size(); |
301 | } |
302 | |
303 | std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr, |
304 | uint32_t Type) { |
305 | std::string Str; |
306 | raw_string_ostream OS(Str); |
307 | const uint32_t NameSz = NameStr.size() + 1; |
308 | const uint32_t DescSz = DescStr.size(); |
309 | OS.write(Ptr: reinterpret_cast<const char *>(&(NameSz)), Size: 4); |
310 | OS.write(Ptr: reinterpret_cast<const char *>(&(DescSz)), Size: 4); |
311 | OS.write(Ptr: reinterpret_cast<const char *>(&(Type)), Size: 4); |
312 | OS << NameStr << '\0'; |
313 | for (uint64_t I = NameSz; I < alignTo(Value: NameSz, Align: 4); ++I) |
314 | OS << '\0'; |
315 | OS << DescStr; |
316 | for (uint64_t I = DescStr.size(); I < alignTo(Value: DescStr.size(), Align: 4); ++I) |
317 | OS << '\0'; |
318 | return OS.str(); |
319 | } |
320 | |