| 1 | //===- bolt/Passes/JTFootprintReduction.cpp -------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements JTFootprintReduction class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Passes/JTFootprintReduction.h" |
| 14 | #include "bolt/Core/BinaryFunctionCallGraph.h" |
| 15 | #include "bolt/Passes/DataflowInfoManager.h" |
| 16 | #include "llvm/Support/CommandLine.h" |
| 17 | |
| 18 | #define DEBUG_TYPE "JT" |
| 19 | |
| 20 | using namespace llvm; |
| 21 | using namespace bolt; |
| 22 | |
| 23 | namespace opts { |
| 24 | |
| 25 | extern cl::OptionCategory BoltOptCategory; |
| 26 | |
| 27 | extern cl::opt<unsigned> Verbosity; |
| 28 | |
| 29 | extern cl::opt<JumpTableSupportLevel> JumpTables; |
| 30 | |
| 31 | static cl::opt<bool> ( |
| 32 | "jt-footprint-optimize-for-icache" , |
| 33 | cl::desc("with jt-footprint-reduction, only process PIC jumptables and turn" |
| 34 | " off other transformations that increase code size" ), |
| 35 | cl::init(Val: false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
| 36 | |
| 37 | } // namespace opts |
| 38 | |
| 39 | namespace llvm { |
| 40 | namespace bolt { |
| 41 | |
| 42 | void JTFootprintReduction::(BinaryFunction &Function, |
| 43 | DataflowInfoManager &Info) { |
| 44 | BinaryContext &BC = Function.getBinaryContext(); |
| 45 | std::map<JumpTable *, uint64_t> AllJTs; |
| 46 | |
| 47 | for (BinaryBasicBlock &BB : Function) { |
| 48 | for (MCInst &Inst : BB) { |
| 49 | JumpTable *JumpTable = Function.getJumpTable(Inst); |
| 50 | if (!JumpTable) |
| 51 | continue; |
| 52 | |
| 53 | AllJTs[JumpTable] += BB.getKnownExecutionCount(); |
| 54 | ++IndJmps; |
| 55 | |
| 56 | if (BlacklistedJTs.count(V: JumpTable)) { |
| 57 | ++IndJmpsDenied; |
| 58 | continue; |
| 59 | } |
| 60 | |
| 61 | uint64_t Scale; |
| 62 | // Try a standard indirect jump matcher |
| 63 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher = |
| 64 | BC.MIB->matchIndJmp(Base: BC.MIB->matchAnyOperand(), |
| 65 | Scale: BC.MIB->matchImm(Imm&: Scale), Index: BC.MIB->matchReg(), |
| 66 | Offset: BC.MIB->matchAnyOperand()); |
| 67 | if (!opts::JTFootprintOnlyPIC && |
| 68 | IndJmpMatcher->match(MRI: *BC.MRI, MIA&: *BC.MIB, |
| 69 | InInstrWindow: MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1), |
| 70 | OpNum: -1) && |
| 71 | Scale == 8) { |
| 72 | if (Info.getLivenessAnalysis().scavengeRegAfter(P: &Inst)) |
| 73 | continue; |
| 74 | BlacklistedJTs.insert(V: JumpTable); |
| 75 | ++IndJmpsDenied; |
| 76 | ++NumJTsNoReg; |
| 77 | continue; |
| 78 | } |
| 79 | |
| 80 | // Try a PIC matcher. The pattern we are looking for is a PIC JT ind jmp: |
| 81 | // addq %rdx, %rsi |
| 82 | // addq %rdx, %rdi |
| 83 | // leaq DATAat0x402450(%rip), %r11 |
| 84 | // movslq (%r11,%rdx,4), %rcx |
| 85 | // addq %r11, %rcx |
| 86 | // jmpq *%rcx # JUMPTABLE @0x402450 |
| 87 | MCPhysReg BaseReg1; |
| 88 | MCPhysReg BaseReg2; |
| 89 | uint64_t Offset; |
| 90 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher = |
| 91 | BC.MIB->matchIndJmp(Target: BC.MIB->matchAdd( |
| 92 | A: BC.MIB->matchReg(Reg&: BaseReg1), |
| 93 | B: BC.MIB->matchLoad(Base: BC.MIB->matchReg(Reg&: BaseReg2), |
| 94 | Scale: BC.MIB->matchImm(Imm&: Scale), Index: BC.MIB->matchReg(), |
| 95 | Offset: BC.MIB->matchImm(Imm&: Offset)))); |
| 96 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher = |
| 97 | BC.MIB->matchIndJmp( |
| 98 | Target: BC.MIB->matchAdd(A: BC.MIB->matchLoadAddr(Target: BC.MIB->matchSymbol()), |
| 99 | B: BC.MIB->matchAnyOperand())); |
| 100 | if (!PICIndJmpMatcher->match( |
| 101 | MRI: *BC.MRI, MIA&: *BC.MIB, |
| 102 | InInstrWindow: MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1), OpNum: -1) || |
| 103 | Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 || |
| 104 | !PICBaseAddrMatcher->match( |
| 105 | MRI: *BC.MRI, MIA&: *BC.MIB, |
| 106 | InInstrWindow: MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1), OpNum: -1)) { |
| 107 | BlacklistedJTs.insert(V: JumpTable); |
| 108 | ++IndJmpsDenied; |
| 109 | ++NumJTsBadMatch; |
| 110 | continue; |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | // Statistics only |
| 116 | for (const auto &JTFreq : AllJTs) { |
| 117 | JumpTable *JT = JTFreq.first; |
| 118 | uint64_t CurScore = JTFreq.second; |
| 119 | TotalJTScore += CurScore; |
| 120 | if (!BlacklistedJTs.count(V: JT)) { |
| 121 | OptimizedScore += CurScore; |
| 122 | if (JT->EntrySize == 8) |
| 123 | BytesSaved += JT->getSize() >> 1; |
| 124 | } |
| 125 | } |
| 126 | TotalJTs += AllJTs.size(); |
| 127 | TotalJTsDenied += BlacklistedJTs.size(); |
| 128 | } |
| 129 | |
| 130 | bool JTFootprintReduction::( |
| 131 | BinaryContext &BC, BinaryBasicBlock &BB, BinaryBasicBlock::iterator Inst, |
| 132 | uint64_t JTAddr, JumpTable *JumpTable, DataflowInfoManager &Info) { |
| 133 | if (opts::JTFootprintOnlyPIC) |
| 134 | return false; |
| 135 | |
| 136 | MCOperand Base; |
| 137 | uint64_t Scale; |
| 138 | MCPhysReg Index; |
| 139 | MCOperand Offset; |
| 140 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher = |
| 141 | BC.MIB->matchIndJmp(Base: BC.MIB->matchAnyOperand(Op&: Base), |
| 142 | Scale: BC.MIB->matchImm(Imm&: Scale), Index: BC.MIB->matchReg(Reg&: Index), |
| 143 | Offset: BC.MIB->matchAnyOperand(Op&: Offset)); |
| 144 | if (!IndJmpMatcher->match(MRI: *BC.MRI, MIA&: *BC.MIB, |
| 145 | InInstrWindow: MutableArrayRef<MCInst>(&*BB.begin(), &*Inst + 1), |
| 146 | OpNum: -1)) |
| 147 | return false; |
| 148 | |
| 149 | assert(Scale == 8 && "Wrong scale" ); |
| 150 | |
| 151 | Scale = 4; |
| 152 | IndJmpMatcher->annotate(MIA&: *BC.MIB, Annotation: "DeleteMe" ); |
| 153 | |
| 154 | LivenessAnalysis &LA = Info.getLivenessAnalysis(); |
| 155 | MCPhysReg Reg = LA.scavengeRegAfter(P: &*Inst); |
| 156 | assert(Reg != 0 && "Register scavenger failed!" ); |
| 157 | MCOperand RegOp = MCOperand::createReg(Reg); |
| 158 | SmallVector<MCInst, 4> NewFrag; |
| 159 | |
| 160 | BC.MIB->createIJmp32Frag(Insts&: NewFrag, BaseReg: Base, Scale: MCOperand::createImm(Val: Scale), |
| 161 | IndexReg: MCOperand::createReg(Reg: Index), Offset, TmpReg: RegOp); |
| 162 | BC.MIB->setJumpTable(Inst&: NewFrag.back(), Value: JTAddr, IndexReg: Index); |
| 163 | |
| 164 | JumpTable->OutputEntrySize = 4; |
| 165 | |
| 166 | BB.replaceInstruction(II: Inst, Begin: NewFrag.begin(), End: NewFrag.end()); |
| 167 | return true; |
| 168 | } |
| 169 | |
| 170 | bool JTFootprintReduction::(BinaryContext &BC, |
| 171 | BinaryBasicBlock &BB, |
| 172 | BinaryBasicBlock::iterator Inst, |
| 173 | uint64_t JTAddr, JumpTable *JumpTable, |
| 174 | DataflowInfoManager &Info) { |
| 175 | MCPhysReg BaseReg; |
| 176 | uint64_t Scale; |
| 177 | MCPhysReg Index; |
| 178 | MCOperand Offset; |
| 179 | MCOperand JumpTableRef; |
| 180 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher = |
| 181 | BC.MIB->matchIndJmp(Target: BC.MIB->matchAdd( |
| 182 | A: BC.MIB->matchLoadAddr(Target: BC.MIB->matchAnyOperand(Op&: JumpTableRef)), |
| 183 | B: BC.MIB->matchLoad(Base: BC.MIB->matchReg(Reg&: BaseReg), Scale: BC.MIB->matchImm(Imm&: Scale), |
| 184 | Index: BC.MIB->matchReg(Reg&: Index), |
| 185 | Offset: BC.MIB->matchAnyOperand()))); |
| 186 | if (!PICIndJmpMatcher->match( |
| 187 | MRI: *BC.MRI, MIA&: *BC.MIB, InInstrWindow: MutableArrayRef<MCInst>(&*BB.begin(), &*Inst + 1), |
| 188 | OpNum: -1)) |
| 189 | return false; |
| 190 | |
| 191 | assert(Scale == 4 && "Wrong scale" ); |
| 192 | |
| 193 | PICIndJmpMatcher->annotate(MIA&: *BC.MIB, Annotation: "DeleteMe" ); |
| 194 | |
| 195 | MCOperand RegOp = MCOperand::createReg(Reg: BaseReg); |
| 196 | SmallVector<MCInst, 4> NewFrag; |
| 197 | |
| 198 | BC.MIB->createIJmp32Frag(Insts&: NewFrag, BaseReg: MCOperand::createReg(Reg: 0), |
| 199 | Scale: MCOperand::createImm(Val: Scale), |
| 200 | IndexReg: MCOperand::createReg(Reg: Index), Offset: JumpTableRef, TmpReg: RegOp); |
| 201 | BC.MIB->setJumpTable(Inst&: NewFrag.back(), Value: JTAddr, IndexReg: Index); |
| 202 | |
| 203 | JumpTable->OutputEntrySize = 4; |
| 204 | // DePICify |
| 205 | JumpTable->Type = JumpTable::JTT_NORMAL; |
| 206 | |
| 207 | BB.replaceInstruction(II: Inst, Begin: NewFrag.begin(), End: NewFrag.end()); |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | void JTFootprintReduction::(BinaryFunction &Function, |
| 212 | DataflowInfoManager &Info) { |
| 213 | BinaryContext &BC = Function.getBinaryContext(); |
| 214 | for (BinaryBasicBlock &BB : Function) { |
| 215 | if (!BB.getNumNonPseudos()) |
| 216 | continue; |
| 217 | |
| 218 | auto IndJmpRI = BB.getLastNonPseudo(); |
| 219 | auto IndJmp = std::prev(x: IndJmpRI.base()); |
| 220 | const uint64_t JTAddr = BC.MIB->getJumpTable(Inst: *IndJmp); |
| 221 | |
| 222 | if (!JTAddr) |
| 223 | continue; |
| 224 | |
| 225 | JumpTable *JumpTable = Function.getJumpTable(Inst: *IndJmp); |
| 226 | if (BlacklistedJTs.count(V: JumpTable)) |
| 227 | continue; |
| 228 | |
| 229 | if (tryOptimizeNonPIC(BC, BB, Inst: IndJmp, JTAddr, JumpTable, Info) || |
| 230 | tryOptimizePIC(BC, BB, Inst: IndJmp, JTAddr, JumpTable, Info)) { |
| 231 | Modified.insert(V: &Function); |
| 232 | continue; |
| 233 | } |
| 234 | |
| 235 | llvm_unreachable("Should either optimize PIC or NonPIC successfully" ); |
| 236 | } |
| 237 | |
| 238 | if (!Modified.count(V: &Function)) |
| 239 | return; |
| 240 | |
| 241 | for (BinaryBasicBlock &BB : Function) |
| 242 | for (auto I = BB.begin(); I != BB.end();) |
| 243 | if (BC.MIB->hasAnnotation(Inst: *I, Name: "DeleteMe" )) |
| 244 | I = BB.eraseInstruction(II: I); |
| 245 | else |
| 246 | ++I; |
| 247 | } |
| 248 | |
| 249 | Error JTFootprintReduction::(BinaryContext &BC) { |
| 250 | if (opts::JumpTables == JTS_BASIC && BC.HasRelocations) |
| 251 | return Error::success(); |
| 252 | |
| 253 | std::unique_ptr<RegAnalysis> RA; |
| 254 | std::unique_ptr<BinaryFunctionCallGraph> CG; |
| 255 | if (!opts::JTFootprintOnlyPIC) { |
| 256 | CG.reset(p: new BinaryFunctionCallGraph(buildCallGraph(BC))); |
| 257 | RA.reset(p: new RegAnalysis(BC, &BC.getBinaryFunctions(), &*CG)); |
| 258 | } |
| 259 | for (auto &BFIt : BC.getBinaryFunctions()) { |
| 260 | BinaryFunction &Function = BFIt.second; |
| 261 | |
| 262 | if (!Function.isSimple() || Function.isIgnored()) |
| 263 | continue; |
| 264 | |
| 265 | if (Function.getKnownExecutionCount() == 0) |
| 266 | continue; |
| 267 | |
| 268 | DataflowInfoManager Info(Function, RA.get(), nullptr); |
| 269 | BlacklistedJTs.clear(); |
| 270 | checkOpportunities(Function, Info); |
| 271 | optimizeFunction(Function, Info); |
| 272 | } |
| 273 | |
| 274 | if (TotalJTs == TotalJTsDenied) { |
| 275 | BC.outs() << "BOLT-INFO: JT Footprint reduction: no changes were made.\n" ; |
| 276 | return Error::success(); |
| 277 | } |
| 278 | |
| 279 | BC.outs() << "BOLT-INFO: JT Footprint reduction stats (simple funcs only):\n" ; |
| 280 | if (OptimizedScore) |
| 281 | BC.outs() << format(Fmt: "\t %.2lf%%" , Vals: (OptimizedScore * 100.0 / TotalJTScore)) |
| 282 | << " of dynamic JT entries were reduced.\n" ; |
| 283 | BC.outs() << "\t " << TotalJTs - TotalJTsDenied << " of " << TotalJTs |
| 284 | << " jump tables affected.\n" ; |
| 285 | BC.outs() << "\t " << IndJmps - IndJmpsDenied << " of " << IndJmps |
| 286 | << " indirect jumps to JTs affected.\n" ; |
| 287 | BC.outs() << "\t " << NumJTsBadMatch |
| 288 | << " JTs discarded due to unsupported jump pattern.\n" ; |
| 289 | BC.outs() << "\t " << NumJTsNoReg |
| 290 | << " JTs discarded due to register unavailability.\n" ; |
| 291 | BC.outs() << "\t " << BytesSaved << " bytes saved.\n" ; |
| 292 | return Error::success(); |
| 293 | } |
| 294 | |
| 295 | } // namespace bolt |
| 296 | } // namespace llvm |
| 297 | |