1 | //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Computation of hash values over BinaryFunction and BinaryBasicBlock. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/HashUtilities.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "llvm/MC/MCInstPrinter.h" |
16 | |
17 | namespace llvm { |
18 | namespace bolt { |
19 | |
20 | std::string hashInteger(uint64_t Value) { |
21 | std::string HashString; |
22 | if (Value == 0) |
23 | HashString.push_back(c: 0); |
24 | |
25 | while (Value) { |
26 | uint8_t LSB = Value & 0xff; |
27 | HashString.push_back(c: LSB); |
28 | Value >>= 8; |
29 | } |
30 | |
31 | return HashString; |
32 | } |
33 | |
34 | std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { |
35 | std::string HashString; |
36 | |
37 | // Ignore function references. |
38 | if (BC.getFunctionForSymbol(Symbol: &Symbol)) |
39 | return HashString; |
40 | |
41 | llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); |
42 | if (!ErrorOrValue) |
43 | return HashString; |
44 | |
45 | // Ignore jump table references. |
46 | if (BC.getJumpTableContainingAddress(Address: *ErrorOrValue)) |
47 | return HashString; |
48 | |
49 | return HashString.append(str: hashInteger(Value: *ErrorOrValue)); |
50 | } |
51 | |
52 | std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { |
53 | switch (Expr.getKind()) { |
54 | case MCExpr::Constant: |
55 | return hashInteger(Value: cast<MCConstantExpr>(Val: Expr).getValue()); |
56 | case MCExpr::SymbolRef: |
57 | return hashSymbol(BC, Symbol: cast<MCSymbolRefExpr>(Val: Expr).getSymbol()); |
58 | case MCExpr::Unary: { |
59 | const auto &UnaryExpr = cast<MCUnaryExpr>(Val: Expr); |
60 | return hashInteger(Value: UnaryExpr.getOpcode()) |
61 | .append(str: hashExpr(BC, Expr: *UnaryExpr.getSubExpr())); |
62 | } |
63 | case MCExpr::Binary: { |
64 | const auto &BinaryExpr = cast<MCBinaryExpr>(Val: Expr); |
65 | return hashExpr(BC, Expr: *BinaryExpr.getLHS()) |
66 | .append(str: hashInteger(Value: BinaryExpr.getOpcode())) |
67 | .append(str: hashExpr(BC, Expr: *BinaryExpr.getRHS())); |
68 | } |
69 | case MCExpr::Target: |
70 | return std::string(); |
71 | } |
72 | |
73 | llvm_unreachable("invalid expression kind" ); |
74 | } |
75 | |
76 | std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { |
77 | if (Operand.isImm()) |
78 | return hashInteger(Value: Operand.getImm()); |
79 | if (Operand.isReg()) |
80 | return hashInteger(Value: Operand.getReg()); |
81 | if (Operand.isExpr()) |
82 | return hashExpr(BC, Expr: *Operand.getExpr()); |
83 | |
84 | return std::string(); |
85 | } |
86 | |
87 | std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB, |
88 | OperandHashFuncTy OperandHashFunc) { |
89 | const bool IsX86 = BC.isX86(); |
90 | |
91 | // The hash is computed by creating a string of all instruction opcodes and |
92 | // possibly their operands and then hashing that string with std::hash. |
93 | std::string HashString; |
94 | |
95 | for (const MCInst &Inst : BB) { |
96 | if (BC.MIB->isPseudo(Inst)) |
97 | continue; |
98 | |
99 | unsigned Opcode = Inst.getOpcode(); |
100 | |
101 | // Ignore unconditional jumps since we check CFG consistency by processing |
102 | // basic blocks in order and do not rely on branches to be in-sync with |
103 | // CFG. Note that we still use condition code of conditional jumps. |
104 | if (BC.MIB->isUnconditionalBranch(Inst)) |
105 | continue; |
106 | |
107 | if (IsX86 && BC.MIB->isConditionalBranch(Inst)) |
108 | Opcode = BC.MIB->getShortBranchOpcode(Opcode); |
109 | |
110 | if (Opcode == 0) { |
111 | HashString.push_back(c: 0); |
112 | } else { |
113 | StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode); |
114 | HashString.append(str: OpcodeName.str()); |
115 | } |
116 | |
117 | for (const MCOperand &Op : MCPlus::primeOperands(Inst)) |
118 | HashString.append(str: OperandHashFunc(Op)); |
119 | } |
120 | return HashString; |
121 | } |
122 | |
123 | /// A "loose" hash of a basic block to use with the stale profile matching. The |
124 | /// computed value will be the same for blocks with minor changes (such as |
125 | /// reordering of instructions or using different operands) but may result in |
126 | /// collisions that need to be resolved by a stronger hashing. |
127 | std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { |
128 | // The hash is computed by creating a string of all lexicographically ordered |
129 | // instruction opcodes, which is then hashed with std::hash. |
130 | std::set<std::string> Opcodes; |
131 | for (const MCInst &Inst : BB) { |
132 | // Skip pseudo instructions and nops. |
133 | if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst)) |
134 | continue; |
135 | |
136 | // Ignore unconditional jumps, as they can be added / removed as a result |
137 | // of basic block reordering. |
138 | if (BC.MIB->isUnconditionalBranch(Inst)) |
139 | continue; |
140 | |
141 | // Do not distinguish different types of conditional jumps. |
142 | if (BC.MIB->isConditionalBranch(Inst)) { |
143 | Opcodes.insert(x: "JMP" ); |
144 | continue; |
145 | } |
146 | |
147 | std::string Mnemonic = BC.InstPrinter->getMnemonic(MI: &Inst).first; |
148 | llvm::erase_if(C&: Mnemonic, P: [](unsigned char ch) { return std::isspace(ch); }); |
149 | Opcodes.insert(x: Mnemonic); |
150 | } |
151 | |
152 | std::string HashString; |
153 | for (const std::string &Opcode : Opcodes) |
154 | HashString.append(str: Opcode); |
155 | return HashString; |
156 | } |
157 | |
158 | } // namespace bolt |
159 | } // namespace llvm |
160 | |