| 1 | //===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the BinarySection class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Core/BinarySection.h" |
| 14 | #include "bolt/Core/BinaryContext.h" |
| 15 | #include "bolt/Utils/CommandLineOpts.h" |
| 16 | #include "bolt/Utils/Utils.h" |
| 17 | #include "llvm/MC/MCStreamer.h" |
| 18 | #include "llvm/Support/CommandLine.h" |
| 19 | |
| 20 | #define DEBUG_TYPE "bolt" |
| 21 | |
| 22 | using namespace llvm; |
| 23 | using namespace bolt; |
| 24 | |
| 25 | namespace opts { |
| 26 | extern cl::opt<bool> HotData; |
| 27 | extern cl::opt<bool> PrintRelocations; |
| 28 | } // namespace opts |
| 29 | |
| 30 | uint64_t BinarySection::Count = 0; |
| 31 | |
| 32 | bool BinarySection::isELF() const { return BC.isELF(); } |
| 33 | |
| 34 | bool BinarySection::isMachO() const { return BC.isMachO(); } |
| 35 | |
| 36 | uint64_t |
| 37 | BinarySection::hash(const BinaryData &BD, |
| 38 | std::map<const BinaryData *, uint64_t> &Cache) const { |
| 39 | auto Itr = Cache.find(x: &BD); |
| 40 | if (Itr != Cache.end()) |
| 41 | return Itr->second; |
| 42 | |
| 43 | hash_code Hash = |
| 44 | hash_combine(args: hash_value(value: BD.getSize()), args: hash_value(S: BD.getSectionName())); |
| 45 | |
| 46 | Cache[&BD] = Hash; |
| 47 | |
| 48 | if (!containsRange(Address: BD.getAddress(), Size: BD.getSize())) |
| 49 | return Hash; |
| 50 | |
| 51 | uint64_t Offset = BD.getAddress() - getAddress(); |
| 52 | const uint64_t EndOffset = BD.getEndAddress() - getAddress(); |
| 53 | auto Begin = Relocations.lower_bound(x: Relocation{Offset, 0, 0, 0, 0}); |
| 54 | auto End = Relocations.upper_bound(x: Relocation{EndOffset, 0, 0, 0, 0}); |
| 55 | const StringRef Contents = getContents(); |
| 56 | |
| 57 | while (Begin != End) { |
| 58 | const Relocation &Rel = *Begin++; |
| 59 | Hash = hash_combine( |
| 60 | args: Hash, args: hash_value(S: Contents.substr(Start: Offset, N: Begin->Offset - Offset))); |
| 61 | if (BinaryData *RelBD = BC.getBinaryDataByName(Name: Rel.Symbol->getName())) |
| 62 | Hash = hash_combine(args: Hash, args: hash(BD: *RelBD, Cache)); |
| 63 | Offset = Rel.Offset + Rel.getSize(); |
| 64 | } |
| 65 | |
| 66 | Hash = hash_combine(args: Hash, |
| 67 | args: hash_value(S: Contents.substr(Start: Offset, N: EndOffset - Offset))); |
| 68 | |
| 69 | Cache[&BD] = Hash; |
| 70 | |
| 71 | return Hash; |
| 72 | } |
| 73 | |
| 74 | void BinarySection::emitAsData(MCStreamer &Streamer, |
| 75 | const Twine &SectionName) const { |
| 76 | StringRef SectionContents = |
| 77 | isFinalized() ? getOutputContents() : getContents(); |
| 78 | MCSectionELF *ELFSection = |
| 79 | BC.Ctx->getELFSection(Section: SectionName, Type: getELFType(), Flags: getELFFlags()); |
| 80 | |
| 81 | Streamer.switchSection(Section: ELFSection); |
| 82 | Streamer.emitValueToAlignment(Alignment: getAlign()); |
| 83 | |
| 84 | if (BC.HasRelocations && opts::HotData && isReordered()) |
| 85 | Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_start" )); |
| 86 | |
| 87 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting " |
| 88 | << (isAllocatable() ? "" : "non-" ) |
| 89 | << "allocatable data section " << SectionName << '\n'); |
| 90 | |
| 91 | if (!hasRelocations()) { |
| 92 | Streamer.emitBytes(Data: SectionContents); |
| 93 | } else { |
| 94 | uint64_t SectionOffset = 0; |
| 95 | for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { |
| 96 | auto RelocationOffset = RI->Offset; |
| 97 | assert(RelocationOffset < SectionContents.size() && "overflow detected" ); |
| 98 | |
| 99 | if (SectionOffset < RelocationOffset) { |
| 100 | Streamer.emitBytes(Data: SectionContents.substr( |
| 101 | Start: SectionOffset, N: RelocationOffset - SectionOffset)); |
| 102 | SectionOffset = RelocationOffset; |
| 103 | } |
| 104 | |
| 105 | // Get iterators to all relocations with the same offset. Usually, there |
| 106 | // is only one such relocation but there can be more for composed |
| 107 | // relocations. |
| 108 | auto ROI = RI; |
| 109 | auto ROE = Relocations.upper_bound(x: RelocationOffset); |
| 110 | |
| 111 | // Start from the next offset on the next iteration. |
| 112 | RI = ROE; |
| 113 | |
| 114 | // Skip undefined symbols. |
| 115 | auto HasUndefSym = [this](const auto &Relocation) { |
| 116 | return BC.UndefinedSymbols.count(x: Relocation.Symbol); |
| 117 | }; |
| 118 | |
| 119 | if (std::any_of(first: ROI, last: ROE, pred: HasUndefSym)) |
| 120 | continue; |
| 121 | |
| 122 | #ifndef NDEBUG |
| 123 | for (const auto &Relocation : make_range(x: ROI, y: ROE)) { |
| 124 | LLVM_DEBUG( |
| 125 | dbgs() << "BOLT-DEBUG: emitting relocation for symbol " |
| 126 | << (Relocation.Symbol ? Relocation.Symbol->getName() |
| 127 | : StringRef("<none>" )) |
| 128 | << " at offset 0x" << Twine::utohexstr(Relocation.Offset) |
| 129 | << " with size " |
| 130 | << Relocation::getSizeForType(Relocation.Type) << '\n'); |
| 131 | } |
| 132 | #endif |
| 133 | |
| 134 | size_t RelocationSize = Relocation::emit(Begin: ROI, End: ROE, Streamer: &Streamer); |
| 135 | SectionOffset += RelocationSize; |
| 136 | } |
| 137 | assert(SectionOffset <= SectionContents.size() && "overflow error" ); |
| 138 | if (SectionOffset < SectionContents.size()) |
| 139 | Streamer.emitBytes(Data: SectionContents.substr(Start: SectionOffset)); |
| 140 | } |
| 141 | |
| 142 | if (BC.HasRelocations && opts::HotData && isReordered()) |
| 143 | Streamer.emitLabel(Symbol: BC.Ctx->getOrCreateSymbol(Name: "__hot_data_end" )); |
| 144 | } |
| 145 | |
| 146 | uint64_t BinarySection::write(raw_ostream &OS) const { |
| 147 | const uint64_t NumValidContentBytes = |
| 148 | std::min<uint64_t>(a: getOutputContents().size(), b: getOutputSize()); |
| 149 | OS.write(Ptr: getOutputContents().data(), Size: NumValidContentBytes); |
| 150 | if (getOutputSize() > NumValidContentBytes) |
| 151 | OS.write_zeros(NumZeros: getOutputSize() - NumValidContentBytes); |
| 152 | return getOutputSize(); |
| 153 | } |
| 154 | |
| 155 | void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, |
| 156 | SymbolResolverFuncTy Resolver) { |
| 157 | if (PendingRelocations.empty() && Patches.empty()) |
| 158 | return; |
| 159 | |
| 160 | const uint64_t SectionAddress = getAddress(); |
| 161 | |
| 162 | // We apply relocations to original section contents. For allocatable sections |
| 163 | // this means using their input file offsets, since the output file offset |
| 164 | // could change (e.g. for new instance of .text). For non-allocatable |
| 165 | // sections, the output offset should always be a valid one. |
| 166 | const uint64_t SectionFileOffset = |
| 167 | isAllocatable() ? getInputFileOffset() : getOutputFileOffset(); |
| 168 | LLVM_DEBUG( |
| 169 | dbgs() << "BOLT-DEBUG: flushing pending relocations for section " |
| 170 | << getName() << '\n' |
| 171 | << " address: 0x" << Twine::utohexstr(SectionAddress) << '\n' |
| 172 | << " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n'); |
| 173 | |
| 174 | for (BinaryPatch &Patch : Patches) |
| 175 | OS.pwrite(Ptr: Patch.Bytes.data(), Size: Patch.Bytes.size(), |
| 176 | Offset: SectionFileOffset + Patch.Offset); |
| 177 | |
| 178 | uint64_t SkippedPendingRelocations = 0; |
| 179 | for (Relocation &Reloc : PendingRelocations) { |
| 180 | uint64_t Value = Reloc.Addend; |
| 181 | if (Reloc.Symbol) |
| 182 | Value += Resolver(Reloc.Symbol); |
| 183 | |
| 184 | // Safely skip any optional pending relocation that cannot be encoded. |
| 185 | if (Reloc.isOptional() && |
| 186 | !Relocation::canEncodeValue(Type: Reloc.Type, Value, |
| 187 | PC: SectionAddress + Reloc.Offset)) { |
| 188 | |
| 189 | ++SkippedPendingRelocations; |
| 190 | continue; |
| 191 | } |
| 192 | Value = Relocation::encodeValue(Type: Reloc.Type, Value, |
| 193 | PC: SectionAddress + Reloc.Offset); |
| 194 | |
| 195 | OS.pwrite(Ptr: reinterpret_cast<const char *>(&Value), |
| 196 | Size: Relocation::getSizeForType(Type: Reloc.Type), |
| 197 | Offset: SectionFileOffset + Reloc.Offset); |
| 198 | |
| 199 | LLVM_DEBUG( |
| 200 | dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value) |
| 201 | << " of size " << Relocation::getSizeForType(Reloc.Type) |
| 202 | << " at section offset 0x" << Twine::utohexstr(Reloc.Offset) |
| 203 | << " address 0x" |
| 204 | << Twine::utohexstr(SectionAddress + Reloc.Offset) |
| 205 | << " file offset 0x" |
| 206 | << Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';); |
| 207 | } |
| 208 | |
| 209 | clearList(List&: PendingRelocations); |
| 210 | |
| 211 | if (SkippedPendingRelocations > 0 && opts::Verbosity >= 1) { |
| 212 | BC.outs() << "BOLT-INFO: skipped " << SkippedPendingRelocations |
| 213 | << " out-of-range optional relocations\n" ; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | BinarySection::~BinarySection() { updateContents(NewData: nullptr, NewSize: 0); } |
| 218 | |
| 219 | void BinarySection::clearRelocations() { clearList(List&: Relocations); } |
| 220 | |
| 221 | void BinarySection::print(raw_ostream &OS) const { |
| 222 | OS << getName() << ", " |
| 223 | << "0x" << Twine::utohexstr(Val: getAddress()) << ", " << getSize() << " (0x" |
| 224 | << Twine::utohexstr(Val: getOutputAddress()) << ", " << getOutputSize() << ")" |
| 225 | << ", data = " << getData() << ", output data = " << getOutputData(); |
| 226 | |
| 227 | if (isAllocatable()) |
| 228 | OS << " (allocatable)" ; |
| 229 | |
| 230 | if (isVirtual()) |
| 231 | OS << " (virtual)" ; |
| 232 | |
| 233 | if (isTLS()) |
| 234 | OS << " (tls)" ; |
| 235 | |
| 236 | if (opts::PrintRelocations) |
| 237 | for (const Relocation &R : relocations()) |
| 238 | OS << "\n " << R; |
| 239 | } |
| 240 | |
| 241 | BinarySection::RelocationSetType |
| 242 | BinarySection::reorderRelocations(bool Inplace) const { |
| 243 | assert(PendingRelocations.empty() && |
| 244 | "reordering pending relocations not supported" ); |
| 245 | RelocationSetType NewRelocations; |
| 246 | for (const Relocation &Rel : relocations()) { |
| 247 | uint64_t RelAddr = Rel.Offset + getAddress(); |
| 248 | BinaryData *BD = BC.getBinaryDataContainingAddress(Address: RelAddr); |
| 249 | BD = BD->getAtomicRoot(); |
| 250 | assert(BD); |
| 251 | |
| 252 | if ((!BD->isMoved() && !Inplace) || BD->isJumpTable()) |
| 253 | continue; |
| 254 | |
| 255 | Relocation NewRel(Rel); |
| 256 | uint64_t RelOffset = RelAddr - BD->getAddress(); |
| 257 | NewRel.Offset = BD->getOutputOffset() + RelOffset; |
| 258 | assert(NewRel.Offset < getSize()); |
| 259 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel |
| 260 | << "\n" ); |
| 261 | NewRelocations.emplace(args: std::move(NewRel)); |
| 262 | } |
| 263 | return NewRelocations; |
| 264 | } |
| 265 | |
| 266 | void BinarySection::reorderContents(const std::vector<BinaryData *> &Order, |
| 267 | bool Inplace) { |
| 268 | IsReordered = true; |
| 269 | |
| 270 | Relocations = reorderRelocations(Inplace); |
| 271 | |
| 272 | std::string Str; |
| 273 | raw_string_ostream OS(Str); |
| 274 | const char *Src = Contents.data(); |
| 275 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n" ); |
| 276 | for (BinaryData *BD : Order) { |
| 277 | assert((BD->isMoved() || !Inplace) && !BD->isJumpTable()); |
| 278 | assert(BD->isAtomic() && BD->isMoveable()); |
| 279 | const uint64_t SrcOffset = BD->getAddress() - getAddress(); |
| 280 | assert(SrcOffset < Contents.size()); |
| 281 | assert(SrcOffset == BD->getOffset()); |
| 282 | while (OS.tell() < BD->getOutputOffset()) |
| 283 | OS.write(C: (unsigned char)0); |
| 284 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell() |
| 285 | << "\n" ); |
| 286 | OS.write(Ptr: &Src[SrcOffset], Size: BD->getOutputSize()); |
| 287 | } |
| 288 | if (Relocations.empty()) { |
| 289 | // If there are no existing relocations, tack a phony one at the end |
| 290 | // of the reordered segment to force LLVM to recognize and map this |
| 291 | // section. |
| 292 | MCSymbol *ZeroSym = BC.registerNameAtAddress(Name: "Zero" , Address: 0, Size: 0, Alignment: 0); |
| 293 | addRelocation(Offset: OS.tell(), Symbol: ZeroSym, Type: Relocation::getAbs64(), Addend: 0xdeadbeef); |
| 294 | |
| 295 | uint64_t Zero = 0; |
| 296 | OS.write(Ptr: reinterpret_cast<const char *>(&Zero), Size: sizeof(Zero)); |
| 297 | } |
| 298 | auto *NewData = reinterpret_cast<char *>(copyByteArray(Buffer: OS.str())); |
| 299 | Contents = OutputContents = StringRef(NewData, OS.str().size()); |
| 300 | OutputSize = Contents.size(); |
| 301 | } |
| 302 | |
| 303 | std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr, |
| 304 | uint32_t Type) { |
| 305 | std::string Str; |
| 306 | raw_string_ostream OS(Str); |
| 307 | const uint32_t NameSz = NameStr.size() + 1; |
| 308 | const uint32_t DescSz = DescStr.size(); |
| 309 | OS.write(Ptr: reinterpret_cast<const char *>(&(NameSz)), Size: 4); |
| 310 | OS.write(Ptr: reinterpret_cast<const char *>(&(DescSz)), Size: 4); |
| 311 | OS.write(Ptr: reinterpret_cast<const char *>(&(Type)), Size: 4); |
| 312 | OS << NameStr << '\0'; |
| 313 | for (uint64_t I = NameSz; I < alignTo(Value: NameSz, Align: 4); ++I) |
| 314 | OS << '\0'; |
| 315 | OS << DescStr; |
| 316 | for (uint64_t I = DescStr.size(); I < alignTo(Value: DescStr.size(), Align: 4); ++I) |
| 317 | OS << '\0'; |
| 318 | return OS.str(); |
| 319 | } |
| 320 | |