1 | //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Computation of hash values over BinaryFunction and BinaryBasicBlock. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/HashUtilities.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "bolt/Utils/NameResolver.h" |
16 | #include "llvm/MC/MCInstPrinter.h" |
17 | |
18 | namespace llvm { |
19 | namespace bolt { |
20 | |
21 | std::string hashInteger(uint64_t Value) { |
22 | std::string HashString; |
23 | if (Value == 0) |
24 | HashString.push_back(c: 0); |
25 | |
26 | while (Value) { |
27 | uint8_t LSB = Value & 0xff; |
28 | HashString.push_back(c: LSB); |
29 | Value >>= 8; |
30 | } |
31 | |
32 | return HashString; |
33 | } |
34 | |
35 | std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { |
36 | std::string HashString; |
37 | |
38 | // Ignore function references. |
39 | if (BC.getFunctionForSymbol(Symbol: &Symbol)) |
40 | return HashString; |
41 | |
42 | llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); |
43 | if (!ErrorOrValue) |
44 | return HashString; |
45 | |
46 | // Ignore jump table references. |
47 | if (BC.getJumpTableContainingAddress(Address: *ErrorOrValue)) |
48 | return HashString; |
49 | |
50 | return HashString.append(str: hashInteger(Value: *ErrorOrValue)); |
51 | } |
52 | |
53 | std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { |
54 | switch (Expr.getKind()) { |
55 | case MCExpr::Constant: |
56 | return hashInteger(Value: cast<MCConstantExpr>(Val: Expr).getValue()); |
57 | case MCExpr::SymbolRef: |
58 | return hashSymbol(BC, Symbol: cast<MCSymbolRefExpr>(Val: Expr).getSymbol()); |
59 | case MCExpr::Unary: { |
60 | const auto &UnaryExpr = cast<MCUnaryExpr>(Val: Expr); |
61 | return hashInteger(Value: UnaryExpr.getOpcode()) |
62 | .append(str: hashExpr(BC, Expr: *UnaryExpr.getSubExpr())); |
63 | } |
64 | case MCExpr::Binary: { |
65 | const auto &BinaryExpr = cast<MCBinaryExpr>(Val: Expr); |
66 | return hashExpr(BC, Expr: *BinaryExpr.getLHS()) |
67 | .append(str: hashInteger(Value: BinaryExpr.getOpcode())) |
68 | .append(str: hashExpr(BC, Expr: *BinaryExpr.getRHS())); |
69 | } |
70 | case MCExpr::Specifier: |
71 | case MCExpr::Target: |
72 | return std::string(); |
73 | } |
74 | |
75 | llvm_unreachable("invalid expression kind" ); |
76 | } |
77 | |
78 | std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { |
79 | if (Operand.isImm()) |
80 | return hashInteger(Value: Operand.getImm()); |
81 | if (Operand.isReg()) |
82 | return hashInteger(Value: Operand.getReg()); |
83 | if (Operand.isExpr()) |
84 | return hashExpr(BC, Expr: *Operand.getExpr()); |
85 | |
86 | return std::string(); |
87 | } |
88 | |
89 | std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB, |
90 | OperandHashFuncTy OperandHashFunc) { |
91 | const bool IsX86 = BC.isX86(); |
92 | |
93 | // The hash is computed by creating a string of all instruction opcodes and |
94 | // possibly their operands and then hashing that string with std::hash. |
95 | std::string HashString; |
96 | |
97 | for (const MCInst &Inst : BB) { |
98 | if (BC.MIB->isPseudo(Inst)) |
99 | continue; |
100 | |
101 | unsigned Opcode = Inst.getOpcode(); |
102 | |
103 | // Ignore unconditional jumps since we check CFG consistency by processing |
104 | // basic blocks in order and do not rely on branches to be in-sync with |
105 | // CFG. Note that we still use condition code of conditional jumps. |
106 | if (BC.MIB->isUnconditionalBranch(Inst)) |
107 | continue; |
108 | |
109 | if (IsX86 && BC.MIB->isConditionalBranch(Inst)) |
110 | Opcode = BC.MIB->getShortBranchOpcode(Opcode); |
111 | |
112 | if (Opcode == 0) { |
113 | HashString.push_back(c: 0); |
114 | } else { |
115 | StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode); |
116 | HashString.append(str: OpcodeName.str()); |
117 | } |
118 | |
119 | for (const MCOperand &Op : MCPlus::primeOperands(Inst)) |
120 | HashString.append(str: OperandHashFunc(Op)); |
121 | } |
122 | return HashString; |
123 | } |
124 | |
125 | /// A "loose" hash of a basic block to use with the stale profile matching. The |
126 | /// computed value will be the same for blocks with minor changes (such as |
127 | /// reordering of instructions or using different operands) but may result in |
128 | /// collisions that need to be resolved by a stronger hashing. |
129 | std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { |
130 | // The hash is computed by creating a string of all lexicographically ordered |
131 | // instruction opcodes, which is then hashed with std::hash. |
132 | std::set<std::string> Opcodes; |
133 | for (const MCInst &Inst : BB) { |
134 | // Skip pseudo instructions and nops. |
135 | if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst)) |
136 | continue; |
137 | |
138 | // Ignore unconditional jumps, as they can be added / removed as a result |
139 | // of basic block reordering. |
140 | if (BC.MIB->isUnconditionalBranch(Inst)) |
141 | continue; |
142 | |
143 | // Do not distinguish different types of conditional jumps. |
144 | if (BC.MIB->isConditionalBranch(Inst)) { |
145 | Opcodes.insert(x: "JMP" ); |
146 | continue; |
147 | } |
148 | |
149 | std::string Mnemonic = BC.InstPrinter->getMnemonic(MI: Inst).first; |
150 | llvm::erase_if(C&: Mnemonic, P: [](unsigned char ch) { return std::isspace(ch); }); |
151 | Opcodes.insert(x: Mnemonic); |
152 | } |
153 | |
154 | std::string HashString; |
155 | for (const std::string &Opcode : Opcodes) |
156 | HashString.append(str: Opcode); |
157 | return HashString; |
158 | } |
159 | |
160 | /// An even looser hash level relative to $ hashBlockLoose to use with stale |
161 | /// profile matching, composed of the names of a block's called functions in |
162 | /// lexicographic order. |
163 | std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) { |
164 | // The hash is computed by creating a string of all lexicographically ordered |
165 | // called function names. |
166 | std::vector<std::string> FunctionNames; |
167 | for (const MCInst &Instr : BB) { |
168 | // Skip non-call instructions. |
169 | if (!BC.MIB->isCall(Inst: Instr)) |
170 | continue; |
171 | const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Inst: Instr); |
172 | if (!CallSymbol) |
173 | continue; |
174 | FunctionNames.push_back(x: std::string(CallSymbol->getName())); |
175 | } |
176 | std::sort(first: FunctionNames.begin(), last: FunctionNames.end()); |
177 | std::string HashString; |
178 | for (const std::string &FunctionName : FunctionNames) |
179 | HashString.append(str: FunctionName); |
180 | |
181 | return HashString; |
182 | } |
183 | |
184 | /// The same as the $hashBlockCalls function, but for profiled functions. |
185 | std::string |
186 | hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *> |
187 | &IdToYamlFunction, |
188 | const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { |
189 | std::vector<std::string> FunctionNames; |
190 | for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) { |
191 | auto It = IdToYamlFunction.find(Val: CallSiteInfo.DestId); |
192 | if (It == IdToYamlFunction.end()) |
193 | continue; |
194 | StringRef Name = NameResolver::dropNumNames(Name: It->second->Name); |
195 | FunctionNames.push_back(x: std::string(Name)); |
196 | } |
197 | std::sort(first: FunctionNames.begin(), last: FunctionNames.end()); |
198 | std::string HashString; |
199 | for (const std::string &FunctionName : FunctionNames) |
200 | HashString.append(str: FunctionName); |
201 | |
202 | return HashString; |
203 | } |
204 | |
205 | } // namespace bolt |
206 | } // namespace llvm |
207 | |