1//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Computation of hash values over BinaryFunction and BinaryBasicBlock.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/HashUtilities.h"
14#include "bolt/Core/BinaryContext.h"
15#include "bolt/Utils/NameResolver.h"
16#include "llvm/MC/MCInstPrinter.h"
17
18namespace llvm {
19namespace bolt {
20
21std::string hashInteger(uint64_t Value) {
22 std::string HashString;
23 if (Value == 0)
24 HashString.push_back(c: 0);
25
26 while (Value) {
27 uint8_t LSB = Value & 0xff;
28 HashString.push_back(c: LSB);
29 Value >>= 8;
30 }
31
32 return HashString;
33}
34
35std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
36 std::string HashString;
37
38 // Ignore function references.
39 if (BC.getFunctionForSymbol(Symbol: &Symbol))
40 return HashString;
41
42 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
43 if (!ErrorOrValue)
44 return HashString;
45
46 // Ignore jump table references.
47 if (BC.getJumpTableContainingAddress(Address: *ErrorOrValue))
48 return HashString;
49
50 return HashString.append(str: hashInteger(Value: *ErrorOrValue));
51}
52
53std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
54 switch (Expr.getKind()) {
55 case MCExpr::Constant:
56 return hashInteger(Value: cast<MCConstantExpr>(Val: Expr).getValue());
57 case MCExpr::SymbolRef:
58 return hashSymbol(BC, Symbol: cast<MCSymbolRefExpr>(Val: Expr).getSymbol());
59 case MCExpr::Unary: {
60 const auto &UnaryExpr = cast<MCUnaryExpr>(Val: Expr);
61 return hashInteger(Value: UnaryExpr.getOpcode())
62 .append(str: hashExpr(BC, Expr: *UnaryExpr.getSubExpr()));
63 }
64 case MCExpr::Binary: {
65 const auto &BinaryExpr = cast<MCBinaryExpr>(Val: Expr);
66 return hashExpr(BC, Expr: *BinaryExpr.getLHS())
67 .append(str: hashInteger(Value: BinaryExpr.getOpcode()))
68 .append(str: hashExpr(BC, Expr: *BinaryExpr.getRHS()));
69 }
70 case MCExpr::Specifier:
71 case MCExpr::Target:
72 return std::string();
73 }
74
75 llvm_unreachable("invalid expression kind");
76}
77
78std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
79 if (Operand.isImm())
80 return hashInteger(Value: Operand.getImm());
81 if (Operand.isReg())
82 return hashInteger(Value: Operand.getReg());
83 if (Operand.isExpr())
84 return hashExpr(BC, Expr: *Operand.getExpr());
85
86 return std::string();
87}
88
89std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
90 OperandHashFuncTy OperandHashFunc) {
91 const bool IsX86 = BC.isX86();
92
93 // The hash is computed by creating a string of all instruction opcodes and
94 // possibly their operands and then hashing that string with std::hash.
95 std::string HashString;
96
97 for (const MCInst &Inst : BB) {
98 if (BC.MIB->isPseudo(Inst))
99 continue;
100
101 unsigned Opcode = Inst.getOpcode();
102
103 // Ignore unconditional jumps since we check CFG consistency by processing
104 // basic blocks in order and do not rely on branches to be in-sync with
105 // CFG. Note that we still use condition code of conditional jumps.
106 if (BC.MIB->isUnconditionalBranch(Inst))
107 continue;
108
109 if (IsX86 && BC.MIB->isConditionalBranch(Inst))
110 Opcode = BC.MIB->getShortBranchOpcode(Opcode);
111
112 if (Opcode == 0) {
113 HashString.push_back(c: 0);
114 } else {
115 StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
116 HashString.append(str: OpcodeName.str());
117 }
118
119 for (const MCOperand &Op : MCPlus::primeOperands(Inst))
120 HashString.append(str: OperandHashFunc(Op));
121 }
122 return HashString;
123}
124
125/// A "loose" hash of a basic block to use with the stale profile matching. The
126/// computed value will be the same for blocks with minor changes (such as
127/// reordering of instructions or using different operands) but may result in
128/// collisions that need to be resolved by a stronger hashing.
129std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
130 // The hash is computed by creating a string of all lexicographically ordered
131 // instruction opcodes, which is then hashed with std::hash.
132 std::set<std::string> Opcodes;
133 for (const MCInst &Inst : BB) {
134 // Skip pseudo instructions and nops.
135 if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
136 continue;
137
138 // Ignore unconditional jumps, as they can be added / removed as a result
139 // of basic block reordering.
140 if (BC.MIB->isUnconditionalBranch(Inst))
141 continue;
142
143 // Do not distinguish different types of conditional jumps.
144 if (BC.MIB->isConditionalBranch(Inst)) {
145 Opcodes.insert(x: "JMP");
146 continue;
147 }
148
149 std::string Mnemonic = BC.InstPrinter->getMnemonic(MI: Inst).first;
150 llvm::erase_if(C&: Mnemonic, P: [](unsigned char ch) { return std::isspace(ch); });
151 Opcodes.insert(x: Mnemonic);
152 }
153
154 std::string HashString;
155 for (const std::string &Opcode : Opcodes)
156 HashString.append(str: Opcode);
157 return HashString;
158}
159
160/// An even looser hash level relative to $ hashBlockLoose to use with stale
161/// profile matching, composed of the names of a block's called functions in
162/// lexicographic order.
163std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
164 // The hash is computed by creating a string of all lexicographically ordered
165 // called function names.
166 std::vector<std::string> FunctionNames;
167 for (const MCInst &Instr : BB) {
168 // Skip non-call instructions.
169 if (!BC.MIB->isCall(Inst: Instr))
170 continue;
171 const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Inst: Instr);
172 if (!CallSymbol)
173 continue;
174 FunctionNames.push_back(x: std::string(CallSymbol->getName()));
175 }
176 std::sort(first: FunctionNames.begin(), last: FunctionNames.end());
177 std::string HashString;
178 for (const std::string &FunctionName : FunctionNames)
179 HashString.append(str: FunctionName);
180
181 return HashString;
182}
183
184/// The same as the $hashBlockCalls function, but for profiled functions.
185std::string
186hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
187 &IdToYamlFunction,
188 const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
189 std::vector<std::string> FunctionNames;
190 for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
191 auto It = IdToYamlFunction.find(Val: CallSiteInfo.DestId);
192 if (It == IdToYamlFunction.end())
193 continue;
194 StringRef Name = NameResolver::dropNumNames(Name: It->second->Name);
195 FunctionNames.push_back(x: std::string(Name));
196 }
197 std::sort(first: FunctionNames.begin(), last: FunctionNames.end());
198 std::string HashString;
199 for (const std::string &FunctionName : FunctionNames)
200 HashString.append(str: FunctionName);
201
202 return HashString;
203}
204
205} // namespace bolt
206} // namespace llvm
207

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Core/HashUtilities.cpp