1//===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the BinarySection class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinarySection.h"
14#include "bolt/Core/BinaryContext.h"
15#include "bolt/Utils/Utils.h"
16#include "llvm/MC/MCStreamer.h"
17#include "llvm/Support/CommandLine.h"
18
19#define DEBUG_TYPE "bolt"
20
21using namespace llvm;
22using namespace bolt;
23
24namespace opts {
25extern cl::opt<bool> PrintRelocations;
26extern cl::opt<bool> HotData;
27} // namespace opts
28
29uint64_t BinarySection::Count = 0;
30
31bool BinarySection::isELF() const { return BC.isELF(); }
32
33bool BinarySection::isMachO() const { return BC.isMachO(); }
34
35uint64_t
36BinarySection::hash(const BinaryData &BD,
37 std::map<const BinaryData *, uint64_t> &Cache) const {
38 auto Itr = Cache.find(x: &BD);
39 if (Itr != Cache.end())
40 return Itr->second;
41
42 hash_code Hash =
43 hash_combine(args: hash_value(value: BD.getSize()), args: hash_value(S: BD.getSectionName()));
44
45 Cache[&BD] = Hash;
46
47 if (!containsRange(Address: BD.getAddress(), Size: BD.getSize()))
48 return Hash;
49
50 uint64_t Offset = BD.getAddress() - getAddress();
51 const uint64_t EndOffset = BD.getEndAddress() - getAddress();
52 auto Begin = Relocations.lower_bound(x: Relocation{.Offset: Offset, .Symbol: 0, .Type: 0, .Addend: 0, .Value: 0});
53 auto End = Relocations.upper_bound(x: Relocation{.Offset: EndOffset, .Symbol: 0, .Type: 0, .Addend: 0, .Value: 0});
54 const StringRef Contents = getContents();
55
56 while (Begin != End) {
57 const Relocation &Rel = *Begin++;
58 Hash = hash_combine(
59 args: Hash, args: hash_value(S: Contents.substr(Start: Offset, N: Begin->Offset - Offset)));
60 if (BinaryData *RelBD = BC.getBinaryDataByName(Name: Rel.Symbol->getName()))
61 Hash = hash_combine(args: Hash, args: hash(BD: *RelBD, Cache));
62 Offset = Rel.Offset + Rel.getSize();
63 }
64
65 Hash = hash_combine(args: Hash,
66 args: hash_value(S: Contents.substr(Start: Offset, N: EndOffset - Offset)));
67
68 Cache[&BD] = Hash;
69
70 return Hash;
71}
72
73void BinarySection::emitAsData(MCStreamer &Streamer,
74 const Twine &SectionName) const {
75 StringRef SectionContents =
76 isFinalized() ? getOutputContents() : getContents();
77 MCSectionELF *ELFSection =
78 BC.Ctx->getELFSection(Section: SectionName, Type: getELFType(), Flags: getELFFlags());
79
80 Streamer.switchSection(Section: ELFSection);
81 Streamer.emitValueToAlignment(Alignment: getAlign());
82
83 if (BC.HasRelocations && opts::HotData && isReordered())
84 Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_start"));
85
86 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting "
87 << (isAllocatable() ? "" : "non-")
88 << "allocatable data section " << SectionName << '\n');
89
90 if (!hasRelocations()) {
91 Streamer.emitBytes(Data: SectionContents);
92 } else {
93 uint64_t SectionOffset = 0;
94 for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) {
95 auto RelocationOffset = RI->Offset;
96 assert(RelocationOffset < SectionContents.size() && "overflow detected");
97
98 if (SectionOffset < RelocationOffset) {
99 Streamer.emitBytes(Data: SectionContents.substr(
100 Start: SectionOffset, N: RelocationOffset - SectionOffset));
101 SectionOffset = RelocationOffset;
102 }
103
104 // Get iterators to all relocations with the same offset. Usually, there
105 // is only one such relocation but there can be more for composed
106 // relocations.
107 auto ROI = RI;
108 auto ROE = Relocations.upper_bound(x: RelocationOffset);
109
110 // Start from the next offset on the next iteration.
111 RI = ROE;
112
113 // Skip undefined symbols.
114 auto HasUndefSym = [this](const auto &Relocation) {
115 return BC.UndefinedSymbols.count(x: Relocation.Symbol);
116 };
117
118 if (std::any_of(first: ROI, last: ROE, pred: HasUndefSym))
119 continue;
120
121#ifndef NDEBUG
122 for (const auto &Relocation : make_range(x: ROI, y: ROE)) {
123 LLVM_DEBUG(
124 dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
125 << (Relocation.Symbol ? Relocation.Symbol->getName()
126 : StringRef("<none>"))
127 << " at offset 0x" << Twine::utohexstr(Relocation.Offset)
128 << " with size "
129 << Relocation::getSizeForType(Relocation.Type) << '\n');
130 }
131#endif
132
133 size_t RelocationSize = Relocation::emit(Begin: ROI, End: ROE, Streamer: &Streamer);
134 SectionOffset += RelocationSize;
135 }
136 assert(SectionOffset <= SectionContents.size() && "overflow error");
137 if (SectionOffset < SectionContents.size())
138 Streamer.emitBytes(Data: SectionContents.substr(Start: SectionOffset));
139 }
140
141 if (BC.HasRelocations && opts::HotData && isReordered())
142 Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_end"));
143}
144
145void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
146 SymbolResolverFuncTy Resolver) {
147 if (PendingRelocations.empty() && Patches.empty())
148 return;
149
150 const uint64_t SectionAddress = getAddress();
151
152 // We apply relocations to original section contents. For allocatable sections
153 // this means using their input file offsets, since the output file offset
154 // could change (e.g. for new instance of .text). For non-allocatable
155 // sections, the output offset should always be a valid one.
156 const uint64_t SectionFileOffset =
157 isAllocatable() ? getInputFileOffset() : getOutputFileOffset();
158 LLVM_DEBUG(
159 dbgs() << "BOLT-DEBUG: flushing pending relocations for section "
160 << getName() << '\n'
161 << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n'
162 << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n');
163
164 for (BinaryPatch &Patch : Patches)
165 OS.pwrite(Ptr: Patch.Bytes.data(), Size: Patch.Bytes.size(),
166 Offset: SectionFileOffset + Patch.Offset);
167
168 for (Relocation &Reloc : PendingRelocations) {
169 uint64_t Value = Reloc.Addend;
170 if (Reloc.Symbol)
171 Value += Resolver(Reloc.Symbol);
172
173 Value = Relocation::encodeValue(Type: Reloc.Type, Value,
174 PC: SectionAddress + Reloc.Offset);
175
176 OS.pwrite(Ptr: reinterpret_cast<const char *>(&Value),
177 Size: Relocation::getSizeForType(Type: Reloc.Type),
178 Offset: SectionFileOffset + Reloc.Offset);
179
180 LLVM_DEBUG(
181 dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value)
182 << " of size " << Relocation::getSizeForType(Reloc.Type)
183 << " at section offset 0x" << Twine::utohexstr(Reloc.Offset)
184 << " address 0x"
185 << Twine::utohexstr(SectionAddress + Reloc.Offset)
186 << " file offset 0x"
187 << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';);
188 }
189
190 clearList(List&: PendingRelocations);
191}
192
193BinarySection::~BinarySection() { updateContents(NewData: nullptr, NewSize: 0); }
194
195void BinarySection::clearRelocations() { clearList(List&: Relocations); }
196
197void BinarySection::print(raw_ostream &OS) const {
198 OS << getName() << ", "
199 << "0x" << Twine::utohexstr(Val: getAddress()) << ", " << getSize() << " (0x"
200 << Twine::utohexstr(Val: getOutputAddress()) << ", " << getOutputSize() << ")"
201 << ", data = " << getData() << ", output data = " << getOutputData();
202
203 if (isAllocatable())
204 OS << " (allocatable)";
205
206 if (isVirtual())
207 OS << " (virtual)";
208
209 if (isTLS())
210 OS << " (tls)";
211
212 if (opts::PrintRelocations)
213 for (const Relocation &R : relocations())
214 OS << "\n " << R;
215}
216
217BinarySection::RelocationSetType
218BinarySection::reorderRelocations(bool Inplace) const {
219 assert(PendingRelocations.empty() &&
220 "reordering pending relocations not supported");
221 RelocationSetType NewRelocations;
222 for (const Relocation &Rel : relocations()) {
223 uint64_t RelAddr = Rel.Offset + getAddress();
224 BinaryData *BD = BC.getBinaryDataContainingAddress(Address: RelAddr);
225 BD = BD->getAtomicRoot();
226 assert(BD);
227
228 if ((!BD->isMoved() && !Inplace) || BD->isJumpTable())
229 continue;
230
231 Relocation NewRel(Rel);
232 uint64_t RelOffset = RelAddr - BD->getAddress();
233 NewRel.Offset = BD->getOutputOffset() + RelOffset;
234 assert(NewRel.Offset < getSize());
235 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel
236 << "\n");
237 NewRelocations.emplace(args: std::move(NewRel));
238 }
239 return NewRelocations;
240}
241
242void BinarySection::reorderContents(const std::vector<BinaryData *> &Order,
243 bool Inplace) {
244 IsReordered = true;
245
246 Relocations = reorderRelocations(Inplace);
247
248 std::string Str;
249 raw_string_ostream OS(Str);
250 const char *Src = Contents.data();
251 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n");
252 for (BinaryData *BD : Order) {
253 assert((BD->isMoved() || !Inplace) && !BD->isJumpTable());
254 assert(BD->isAtomic() && BD->isMoveable());
255 const uint64_t SrcOffset = BD->getAddress() - getAddress();
256 assert(SrcOffset < Contents.size());
257 assert(SrcOffset == BD->getOffset());
258 while (OS.tell() < BD->getOutputOffset())
259 OS.write(C: (unsigned char)0);
260 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell()
261 << "\n");
262 OS.write(Ptr: &Src[SrcOffset], Size: BD->getOutputSize());
263 }
264 if (Relocations.empty()) {
265 // If there are no existing relocations, tack a phony one at the end
266 // of the reordered segment to force LLVM to recognize and map this
267 // section.
268 MCSymbol *ZeroSym = BC.registerNameAtAddress(Name: "Zero", Address: 0, Size: 0, Alignment: 0);
269 addRelocation(Offset: OS.tell(), Symbol: ZeroSym, Type: Relocation::getAbs64(), Addend: 0xdeadbeef);
270
271 uint64_t Zero = 0;
272 OS.write(Ptr: reinterpret_cast<const char *>(&Zero), Size: sizeof(Zero));
273 }
274 auto *NewData = reinterpret_cast<char *>(copyByteArray(Buffer: OS.str()));
275 Contents = OutputContents = StringRef(NewData, OS.str().size());
276 OutputSize = Contents.size();
277}
278
279std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr,
280 uint32_t Type) {
281 std::string Str;
282 raw_string_ostream OS(Str);
283 const uint32_t NameSz = NameStr.size() + 1;
284 const uint32_t DescSz = DescStr.size();
285 OS.write(Ptr: reinterpret_cast<const char *>(&(NameSz)), Size: 4);
286 OS.write(Ptr: reinterpret_cast<const char *>(&(DescSz)), Size: 4);
287 OS.write(Ptr: reinterpret_cast<const char *>(&(Type)), Size: 4);
288 OS << NameStr << '\0';
289 for (uint64_t I = NameSz; I < alignTo(Value: NameSz, Align: 4); ++I)
290 OS << '\0';
291 OS << DescStr;
292 for (uint64_t I = DescStr.size(); I < alignTo(Value: DescStr.size(), Align: 4); ++I)
293 OS << '\0';
294 return OS.str();
295}
296

source code of bolt/lib/Core/BinarySection.cpp