1 | //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the BinarySection class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinarySection.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "bolt/Utils/Utils.h" |
16 | #include "llvm/MC/MCStreamer.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | |
19 | #define DEBUG_TYPE "bolt" |
20 | |
21 | using namespace llvm; |
22 | using namespace bolt; |
23 | |
24 | namespace opts { |
25 | extern cl::opt<bool> PrintRelocations; |
26 | extern cl::opt<bool> HotData; |
27 | } // namespace opts |
28 | |
29 | uint64_t BinarySection::Count = 0; |
30 | |
31 | bool BinarySection::isELF() const { return BC.isELF(); } |
32 | |
33 | bool BinarySection::isMachO() const { return BC.isMachO(); } |
34 | |
35 | uint64_t |
36 | BinarySection::hash(const BinaryData &BD, |
37 | std::map<const BinaryData *, uint64_t> &Cache) const { |
38 | auto Itr = Cache.find(x: &BD); |
39 | if (Itr != Cache.end()) |
40 | return Itr->second; |
41 | |
42 | hash_code Hash = |
43 | hash_combine(args: hash_value(value: BD.getSize()), args: hash_value(S: BD.getSectionName())); |
44 | |
45 | Cache[&BD] = Hash; |
46 | |
47 | if (!containsRange(Address: BD.getAddress(), Size: BD.getSize())) |
48 | return Hash; |
49 | |
50 | uint64_t Offset = BD.getAddress() - getAddress(); |
51 | const uint64_t EndOffset = BD.getEndAddress() - getAddress(); |
52 | auto Begin = Relocations.lower_bound(x: Relocation{.Offset: Offset, .Symbol: 0, .Type: 0, .Addend: 0, .Value: 0}); |
53 | auto End = Relocations.upper_bound(x: Relocation{.Offset: EndOffset, .Symbol: 0, .Type: 0, .Addend: 0, .Value: 0}); |
54 | const StringRef Contents = getContents(); |
55 | |
56 | while (Begin != End) { |
57 | const Relocation &Rel = *Begin++; |
58 | Hash = hash_combine( |
59 | args: Hash, args: hash_value(S: Contents.substr(Start: Offset, N: Begin->Offset - Offset))); |
60 | if (BinaryData *RelBD = BC.getBinaryDataByName(Name: Rel.Symbol->getName())) |
61 | Hash = hash_combine(args: Hash, args: hash(BD: *RelBD, Cache)); |
62 | Offset = Rel.Offset + Rel.getSize(); |
63 | } |
64 | |
65 | Hash = hash_combine(args: Hash, |
66 | args: hash_value(S: Contents.substr(Start: Offset, N: EndOffset - Offset))); |
67 | |
68 | Cache[&BD] = Hash; |
69 | |
70 | return Hash; |
71 | } |
72 | |
73 | void BinarySection::emitAsData(MCStreamer &Streamer, |
74 | const Twine &SectionName) const { |
75 | StringRef SectionContents = |
76 | isFinalized() ? getOutputContents() : getContents(); |
77 | MCSectionELF *ELFSection = |
78 | BC.Ctx->getELFSection(Section: SectionName, Type: getELFType(), Flags: getELFFlags()); |
79 | |
80 | Streamer.switchSection(Section: ELFSection); |
81 | Streamer.emitValueToAlignment(Alignment: getAlign()); |
82 | |
83 | if (BC.HasRelocations && opts::HotData && isReordered()) |
84 | Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_start" )); |
85 | |
86 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting " |
87 | << (isAllocatable() ? "" : "non-" ) |
88 | << "allocatable data section " << SectionName << '\n'); |
89 | |
90 | if (!hasRelocations()) { |
91 | Streamer.emitBytes(Data: SectionContents); |
92 | } else { |
93 | uint64_t SectionOffset = 0; |
94 | for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { |
95 | auto RelocationOffset = RI->Offset; |
96 | assert(RelocationOffset < SectionContents.size() && "overflow detected" ); |
97 | |
98 | if (SectionOffset < RelocationOffset) { |
99 | Streamer.emitBytes(Data: SectionContents.substr( |
100 | Start: SectionOffset, N: RelocationOffset - SectionOffset)); |
101 | SectionOffset = RelocationOffset; |
102 | } |
103 | |
104 | // Get iterators to all relocations with the same offset. Usually, there |
105 | // is only one such relocation but there can be more for composed |
106 | // relocations. |
107 | auto ROI = RI; |
108 | auto ROE = Relocations.upper_bound(x: RelocationOffset); |
109 | |
110 | // Start from the next offset on the next iteration. |
111 | RI = ROE; |
112 | |
113 | // Skip undefined symbols. |
114 | auto HasUndefSym = [this](const auto &Relocation) { |
115 | return BC.UndefinedSymbols.count(x: Relocation.Symbol); |
116 | }; |
117 | |
118 | if (std::any_of(first: ROI, last: ROE, pred: HasUndefSym)) |
119 | continue; |
120 | |
121 | #ifndef NDEBUG |
122 | for (const auto &Relocation : make_range(x: ROI, y: ROE)) { |
123 | LLVM_DEBUG( |
124 | dbgs() << "BOLT-DEBUG: emitting relocation for symbol " |
125 | << (Relocation.Symbol ? Relocation.Symbol->getName() |
126 | : StringRef("<none>" )) |
127 | << " at offset 0x" << Twine::utohexstr(Relocation.Offset) |
128 | << " with size " |
129 | << Relocation::getSizeForType(Relocation.Type) << '\n'); |
130 | } |
131 | #endif |
132 | |
133 | size_t RelocationSize = Relocation::emit(Begin: ROI, End: ROE, Streamer: &Streamer); |
134 | SectionOffset += RelocationSize; |
135 | } |
136 | assert(SectionOffset <= SectionContents.size() && "overflow error" ); |
137 | if (SectionOffset < SectionContents.size()) |
138 | Streamer.emitBytes(Data: SectionContents.substr(Start: SectionOffset)); |
139 | } |
140 | |
141 | if (BC.HasRelocations && opts::HotData && isReordered()) |
142 | Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_end" )); |
143 | } |
144 | |
145 | void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, |
146 | SymbolResolverFuncTy Resolver) { |
147 | if (PendingRelocations.empty() && Patches.empty()) |
148 | return; |
149 | |
150 | const uint64_t SectionAddress = getAddress(); |
151 | |
152 | // We apply relocations to original section contents. For allocatable sections |
153 | // this means using their input file offsets, since the output file offset |
154 | // could change (e.g. for new instance of .text). For non-allocatable |
155 | // sections, the output offset should always be a valid one. |
156 | const uint64_t SectionFileOffset = |
157 | isAllocatable() ? getInputFileOffset() : getOutputFileOffset(); |
158 | LLVM_DEBUG( |
159 | dbgs() << "BOLT-DEBUG: flushing pending relocations for section " |
160 | << getName() << '\n' |
161 | << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n' |
162 | << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n'); |
163 | |
164 | for (BinaryPatch &Patch : Patches) |
165 | OS.pwrite(Ptr: Patch.Bytes.data(), Size: Patch.Bytes.size(), |
166 | Offset: SectionFileOffset + Patch.Offset); |
167 | |
168 | for (Relocation &Reloc : PendingRelocations) { |
169 | uint64_t Value = Reloc.Addend; |
170 | if (Reloc.Symbol) |
171 | Value += Resolver(Reloc.Symbol); |
172 | |
173 | Value = Relocation::encodeValue(Type: Reloc.Type, Value, |
174 | PC: SectionAddress + Reloc.Offset); |
175 | |
176 | OS.pwrite(Ptr: reinterpret_cast<const char *>(&Value), |
177 | Size: Relocation::getSizeForType(Type: Reloc.Type), |
178 | Offset: SectionFileOffset + Reloc.Offset); |
179 | |
180 | LLVM_DEBUG( |
181 | dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value) |
182 | << " of size " << Relocation::getSizeForType(Reloc.Type) |
183 | << " at section offset 0x" << Twine::utohexstr(Reloc.Offset) |
184 | << " address 0x" |
185 | << Twine::utohexstr(SectionAddress + Reloc.Offset) |
186 | << " file offset 0x" |
187 | << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';); |
188 | } |
189 | |
190 | clearList(List&: PendingRelocations); |
191 | } |
192 | |
193 | BinarySection::~BinarySection() { updateContents(NewData: nullptr, NewSize: 0); } |
194 | |
195 | void BinarySection::clearRelocations() { clearList(List&: Relocations); } |
196 | |
197 | void BinarySection::print(raw_ostream &OS) const { |
198 | OS << getName() << ", " |
199 | << "0x" << Twine::utohexstr(Val: getAddress()) << ", " << getSize() << " (0x" |
200 | << Twine::utohexstr(Val: getOutputAddress()) << ", " << getOutputSize() << ")" |
201 | << ", data = " << getData() << ", output data = " << getOutputData(); |
202 | |
203 | if (isAllocatable()) |
204 | OS << " (allocatable)" ; |
205 | |
206 | if (isVirtual()) |
207 | OS << " (virtual)" ; |
208 | |
209 | if (isTLS()) |
210 | OS << " (tls)" ; |
211 | |
212 | if (opts::PrintRelocations) |
213 | for (const Relocation &R : relocations()) |
214 | OS << "\n " << R; |
215 | } |
216 | |
217 | BinarySection::RelocationSetType |
218 | BinarySection::reorderRelocations(bool Inplace) const { |
219 | assert(PendingRelocations.empty() && |
220 | "reordering pending relocations not supported" ); |
221 | RelocationSetType NewRelocations; |
222 | for (const Relocation &Rel : relocations()) { |
223 | uint64_t RelAddr = Rel.Offset + getAddress(); |
224 | BinaryData *BD = BC.getBinaryDataContainingAddress(Address: RelAddr); |
225 | BD = BD->getAtomicRoot(); |
226 | assert(BD); |
227 | |
228 | if ((!BD->isMoved() && !Inplace) || BD->isJumpTable()) |
229 | continue; |
230 | |
231 | Relocation NewRel(Rel); |
232 | uint64_t RelOffset = RelAddr - BD->getAddress(); |
233 | NewRel.Offset = BD->getOutputOffset() + RelOffset; |
234 | assert(NewRel.Offset < getSize()); |
235 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel |
236 | << "\n" ); |
237 | NewRelocations.emplace(args: std::move(NewRel)); |
238 | } |
239 | return NewRelocations; |
240 | } |
241 | |
242 | void BinarySection::reorderContents(const std::vector<BinaryData *> &Order, |
243 | bool Inplace) { |
244 | IsReordered = true; |
245 | |
246 | Relocations = reorderRelocations(Inplace); |
247 | |
248 | std::string Str; |
249 | raw_string_ostream OS(Str); |
250 | const char *Src = Contents.data(); |
251 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n" ); |
252 | for (BinaryData *BD : Order) { |
253 | assert((BD->isMoved() || !Inplace) && !BD->isJumpTable()); |
254 | assert(BD->isAtomic() && BD->isMoveable()); |
255 | const uint64_t SrcOffset = BD->getAddress() - getAddress(); |
256 | assert(SrcOffset < Contents.size()); |
257 | assert(SrcOffset == BD->getOffset()); |
258 | while (OS.tell() < BD->getOutputOffset()) |
259 | OS.write(C: (unsigned char)0); |
260 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell() |
261 | << "\n" ); |
262 | OS.write(Ptr: &Src[SrcOffset], Size: BD->getOutputSize()); |
263 | } |
264 | if (Relocations.empty()) { |
265 | // If there are no existing relocations, tack a phony one at the end |
266 | // of the reordered segment to force LLVM to recognize and map this |
267 | // section. |
268 | MCSymbol *ZeroSym = BC.registerNameAtAddress(Name: "Zero" , Address: 0, Size: 0, Alignment: 0); |
269 | addRelocation(Offset: OS.tell(), Symbol: ZeroSym, Type: Relocation::getAbs64(), Addend: 0xdeadbeef); |
270 | |
271 | uint64_t Zero = 0; |
272 | OS.write(Ptr: reinterpret_cast<const char *>(&Zero), Size: sizeof(Zero)); |
273 | } |
274 | auto *NewData = reinterpret_cast<char *>(copyByteArray(Buffer: OS.str())); |
275 | Contents = OutputContents = StringRef(NewData, OS.str().size()); |
276 | OutputSize = Contents.size(); |
277 | } |
278 | |
279 | std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr, |
280 | uint32_t Type) { |
281 | std::string Str; |
282 | raw_string_ostream OS(Str); |
283 | const uint32_t NameSz = NameStr.size() + 1; |
284 | const uint32_t DescSz = DescStr.size(); |
285 | OS.write(Ptr: reinterpret_cast<const char *>(&(NameSz)), Size: 4); |
286 | OS.write(Ptr: reinterpret_cast<const char *>(&(DescSz)), Size: 4); |
287 | OS.write(Ptr: reinterpret_cast<const char *>(&(Type)), Size: 4); |
288 | OS << NameStr << '\0'; |
289 | for (uint64_t I = NameSz; I < alignTo(Value: NameSz, Align: 4); ++I) |
290 | OS << '\0'; |
291 | OS << DescStr; |
292 | for (uint64_t I = DescStr.size(); I < alignTo(Value: DescStr.size(), Align: 4); ++I) |
293 | OS << '\0'; |
294 | return OS.str(); |
295 | } |
296 | |