| 1 | //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Computation of hash values over BinaryFunction and BinaryBasicBlock. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Core/HashUtilities.h" |
| 14 | #include "bolt/Core/BinaryContext.h" |
| 15 | #include "bolt/Utils/NameResolver.h" |
| 16 | #include "llvm/MC/MCInstPrinter.h" |
| 17 | |
| 18 | namespace llvm { |
| 19 | namespace bolt { |
| 20 | |
| 21 | std::string hashInteger(uint64_t Value) { |
| 22 | std::string HashString; |
| 23 | if (Value == 0) |
| 24 | HashString.push_back(c: 0); |
| 25 | |
| 26 | while (Value) { |
| 27 | uint8_t LSB = Value & 0xff; |
| 28 | HashString.push_back(c: LSB); |
| 29 | Value >>= 8; |
| 30 | } |
| 31 | |
| 32 | return HashString; |
| 33 | } |
| 34 | |
| 35 | std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { |
| 36 | std::string HashString; |
| 37 | |
| 38 | // Ignore function references. |
| 39 | if (BC.getFunctionForSymbol(Symbol: &Symbol)) |
| 40 | return HashString; |
| 41 | |
| 42 | llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); |
| 43 | if (!ErrorOrValue) |
| 44 | return HashString; |
| 45 | |
| 46 | // Ignore jump table references. |
| 47 | if (BC.getJumpTableContainingAddress(Address: *ErrorOrValue)) |
| 48 | return HashString; |
| 49 | |
| 50 | return HashString.append(str: hashInteger(Value: *ErrorOrValue)); |
| 51 | } |
| 52 | |
| 53 | std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { |
| 54 | switch (Expr.getKind()) { |
| 55 | case MCExpr::Constant: |
| 56 | return hashInteger(Value: cast<MCConstantExpr>(Val: Expr).getValue()); |
| 57 | case MCExpr::SymbolRef: |
| 58 | return hashSymbol(BC, Symbol: cast<MCSymbolRefExpr>(Val: Expr).getSymbol()); |
| 59 | case MCExpr::Unary: { |
| 60 | const auto &UnaryExpr = cast<MCUnaryExpr>(Val: Expr); |
| 61 | return hashInteger(Value: UnaryExpr.getOpcode()) |
| 62 | .append(str: hashExpr(BC, Expr: *UnaryExpr.getSubExpr())); |
| 63 | } |
| 64 | case MCExpr::Binary: { |
| 65 | const auto &BinaryExpr = cast<MCBinaryExpr>(Val: Expr); |
| 66 | return hashExpr(BC, Expr: *BinaryExpr.getLHS()) |
| 67 | .append(str: hashInteger(Value: BinaryExpr.getOpcode())) |
| 68 | .append(str: hashExpr(BC, Expr: *BinaryExpr.getRHS())); |
| 69 | } |
| 70 | case MCExpr::Specifier: |
| 71 | case MCExpr::Target: |
| 72 | return std::string(); |
| 73 | } |
| 74 | |
| 75 | llvm_unreachable("invalid expression kind" ); |
| 76 | } |
| 77 | |
| 78 | std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { |
| 79 | if (Operand.isImm()) |
| 80 | return hashInteger(Value: Operand.getImm()); |
| 81 | if (Operand.isReg()) |
| 82 | return hashInteger(Value: Operand.getReg()); |
| 83 | if (Operand.isExpr()) |
| 84 | return hashExpr(BC, Expr: *Operand.getExpr()); |
| 85 | |
| 86 | return std::string(); |
| 87 | } |
| 88 | |
| 89 | std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB, |
| 90 | OperandHashFuncTy OperandHashFunc) { |
| 91 | const bool IsX86 = BC.isX86(); |
| 92 | |
| 93 | // The hash is computed by creating a string of all instruction opcodes and |
| 94 | // possibly their operands and then hashing that string with std::hash. |
| 95 | std::string HashString; |
| 96 | |
| 97 | for (const MCInst &Inst : BB) { |
| 98 | if (BC.MIB->isPseudo(Inst)) |
| 99 | continue; |
| 100 | |
| 101 | unsigned Opcode = Inst.getOpcode(); |
| 102 | |
| 103 | // Ignore unconditional jumps since we check CFG consistency by processing |
| 104 | // basic blocks in order and do not rely on branches to be in-sync with |
| 105 | // CFG. Note that we still use condition code of conditional jumps. |
| 106 | if (BC.MIB->isUnconditionalBranch(Inst)) |
| 107 | continue; |
| 108 | |
| 109 | if (IsX86 && BC.MIB->isConditionalBranch(Inst)) |
| 110 | Opcode = BC.MIB->getShortBranchOpcode(Opcode); |
| 111 | |
| 112 | if (Opcode == 0) { |
| 113 | HashString.push_back(c: 0); |
| 114 | } else { |
| 115 | StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode); |
| 116 | HashString.append(str: OpcodeName.str()); |
| 117 | } |
| 118 | |
| 119 | for (const MCOperand &Op : MCPlus::primeOperands(Inst)) |
| 120 | HashString.append(str: OperandHashFunc(Op)); |
| 121 | } |
| 122 | return HashString; |
| 123 | } |
| 124 | |
| 125 | /// A "loose" hash of a basic block to use with the stale profile matching. The |
| 126 | /// computed value will be the same for blocks with minor changes (such as |
| 127 | /// reordering of instructions or using different operands) but may result in |
| 128 | /// collisions that need to be resolved by a stronger hashing. |
| 129 | std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { |
| 130 | // The hash is computed by creating a string of all lexicographically ordered |
| 131 | // instruction opcodes, which is then hashed with std::hash. |
| 132 | std::set<std::string> Opcodes; |
| 133 | for (const MCInst &Inst : BB) { |
| 134 | // Skip pseudo instructions and nops. |
| 135 | if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst)) |
| 136 | continue; |
| 137 | |
| 138 | // Ignore unconditional jumps, as they can be added / removed as a result |
| 139 | // of basic block reordering. |
| 140 | if (BC.MIB->isUnconditionalBranch(Inst)) |
| 141 | continue; |
| 142 | |
| 143 | // Do not distinguish different types of conditional jumps. |
| 144 | if (BC.MIB->isConditionalBranch(Inst)) { |
| 145 | Opcodes.insert(x: "JMP" ); |
| 146 | continue; |
| 147 | } |
| 148 | |
| 149 | std::string Mnemonic = BC.InstPrinter->getMnemonic(MI: Inst).first; |
| 150 | llvm::erase_if(C&: Mnemonic, P: [](unsigned char ch) { return std::isspace(ch); }); |
| 151 | Opcodes.insert(x: Mnemonic); |
| 152 | } |
| 153 | |
| 154 | std::string HashString; |
| 155 | for (const std::string &Opcode : Opcodes) |
| 156 | HashString.append(str: Opcode); |
| 157 | return HashString; |
| 158 | } |
| 159 | |
| 160 | /// An even looser hash level relative to $ hashBlockLoose to use with stale |
| 161 | /// profile matching, composed of the names of a block's called functions in |
| 162 | /// lexicographic order. |
| 163 | std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) { |
| 164 | // The hash is computed by creating a string of all lexicographically ordered |
| 165 | // called function names. |
| 166 | std::vector<std::string> FunctionNames; |
| 167 | for (const MCInst &Instr : BB) { |
| 168 | // Skip non-call instructions. |
| 169 | if (!BC.MIB->isCall(Inst: Instr)) |
| 170 | continue; |
| 171 | const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Inst: Instr); |
| 172 | if (!CallSymbol) |
| 173 | continue; |
| 174 | FunctionNames.push_back(x: std::string(CallSymbol->getName())); |
| 175 | } |
| 176 | std::sort(first: FunctionNames.begin(), last: FunctionNames.end()); |
| 177 | std::string HashString; |
| 178 | for (const std::string &FunctionName : FunctionNames) |
| 179 | HashString.append(str: FunctionName); |
| 180 | |
| 181 | return HashString; |
| 182 | } |
| 183 | |
| 184 | /// The same as the $hashBlockCalls function, but for profiled functions. |
| 185 | std::string |
| 186 | hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *> |
| 187 | &IdToYamlFunction, |
| 188 | const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { |
| 189 | std::vector<std::string> FunctionNames; |
| 190 | for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) { |
| 191 | auto It = IdToYamlFunction.find(Val: CallSiteInfo.DestId); |
| 192 | if (It == IdToYamlFunction.end()) |
| 193 | continue; |
| 194 | StringRef Name = NameResolver::dropNumNames(Name: It->second->Name); |
| 195 | FunctionNames.push_back(x: std::string(Name)); |
| 196 | } |
| 197 | std::sort(first: FunctionNames.begin(), last: FunctionNames.end()); |
| 198 | std::string HashString; |
| 199 | for (const std::string &FunctionName : FunctionNames) |
| 200 | HashString.append(str: FunctionName); |
| 201 | |
| 202 | return HashString; |
| 203 | } |
| 204 | |
| 205 | } // namespace bolt |
| 206 | } // namespace llvm |
| 207 | |