1//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Computation of hash values over BinaryFunction and BinaryBasicBlock.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/HashUtilities.h"
14#include "bolt/Core/BinaryContext.h"
15#include "llvm/MC/MCInstPrinter.h"
16
17namespace llvm {
18namespace bolt {
19
20std::string hashInteger(uint64_t Value) {
21 std::string HashString;
22 if (Value == 0)
23 HashString.push_back(c: 0);
24
25 while (Value) {
26 uint8_t LSB = Value & 0xff;
27 HashString.push_back(c: LSB);
28 Value >>= 8;
29 }
30
31 return HashString;
32}
33
34std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
35 std::string HashString;
36
37 // Ignore function references.
38 if (BC.getFunctionForSymbol(Symbol: &Symbol))
39 return HashString;
40
41 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
42 if (!ErrorOrValue)
43 return HashString;
44
45 // Ignore jump table references.
46 if (BC.getJumpTableContainingAddress(Address: *ErrorOrValue))
47 return HashString;
48
49 return HashString.append(str: hashInteger(Value: *ErrorOrValue));
50}
51
52std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
53 switch (Expr.getKind()) {
54 case MCExpr::Constant:
55 return hashInteger(Value: cast<MCConstantExpr>(Val: Expr).getValue());
56 case MCExpr::SymbolRef:
57 return hashSymbol(BC, Symbol: cast<MCSymbolRefExpr>(Val: Expr).getSymbol());
58 case MCExpr::Unary: {
59 const auto &UnaryExpr = cast<MCUnaryExpr>(Val: Expr);
60 return hashInteger(Value: UnaryExpr.getOpcode())
61 .append(str: hashExpr(BC, Expr: *UnaryExpr.getSubExpr()));
62 }
63 case MCExpr::Binary: {
64 const auto &BinaryExpr = cast<MCBinaryExpr>(Val: Expr);
65 return hashExpr(BC, Expr: *BinaryExpr.getLHS())
66 .append(str: hashInteger(Value: BinaryExpr.getOpcode()))
67 .append(str: hashExpr(BC, Expr: *BinaryExpr.getRHS()));
68 }
69 case MCExpr::Target:
70 return std::string();
71 }
72
73 llvm_unreachable("invalid expression kind");
74}
75
76std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
77 if (Operand.isImm())
78 return hashInteger(Value: Operand.getImm());
79 if (Operand.isReg())
80 return hashInteger(Value: Operand.getReg());
81 if (Operand.isExpr())
82 return hashExpr(BC, Expr: *Operand.getExpr());
83
84 return std::string();
85}
86
87std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
88 OperandHashFuncTy OperandHashFunc) {
89 const bool IsX86 = BC.isX86();
90
91 // The hash is computed by creating a string of all instruction opcodes and
92 // possibly their operands and then hashing that string with std::hash.
93 std::string HashString;
94
95 for (const MCInst &Inst : BB) {
96 if (BC.MIB->isPseudo(Inst))
97 continue;
98
99 unsigned Opcode = Inst.getOpcode();
100
101 // Ignore unconditional jumps since we check CFG consistency by processing
102 // basic blocks in order and do not rely on branches to be in-sync with
103 // CFG. Note that we still use condition code of conditional jumps.
104 if (BC.MIB->isUnconditionalBranch(Inst))
105 continue;
106
107 if (IsX86 && BC.MIB->isConditionalBranch(Inst))
108 Opcode = BC.MIB->getShortBranchOpcode(Opcode);
109
110 if (Opcode == 0) {
111 HashString.push_back(c: 0);
112 } else {
113 StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
114 HashString.append(str: OpcodeName.str());
115 }
116
117 for (const MCOperand &Op : MCPlus::primeOperands(Inst))
118 HashString.append(str: OperandHashFunc(Op));
119 }
120 return HashString;
121}
122
123/// A "loose" hash of a basic block to use with the stale profile matching. The
124/// computed value will be the same for blocks with minor changes (such as
125/// reordering of instructions or using different operands) but may result in
126/// collisions that need to be resolved by a stronger hashing.
127std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
128 // The hash is computed by creating a string of all lexicographically ordered
129 // instruction opcodes, which is then hashed with std::hash.
130 std::set<std::string> Opcodes;
131 for (const MCInst &Inst : BB) {
132 // Skip pseudo instructions and nops.
133 if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
134 continue;
135
136 // Ignore unconditional jumps, as they can be added / removed as a result
137 // of basic block reordering.
138 if (BC.MIB->isUnconditionalBranch(Inst))
139 continue;
140
141 // Do not distinguish different types of conditional jumps.
142 if (BC.MIB->isConditionalBranch(Inst)) {
143 Opcodes.insert(x: "JMP");
144 continue;
145 }
146
147 std::string Mnemonic = BC.InstPrinter->getMnemonic(MI: &Inst).first;
148 llvm::erase_if(C&: Mnemonic, P: [](unsigned char ch) { return std::isspace(ch); });
149 Opcodes.insert(x: Mnemonic);
150 }
151
152 std::string HashString;
153 for (const std::string &Opcode : Opcodes)
154 HashString.append(str: Opcode);
155 return HashString;
156}
157
158} // namespace bolt
159} // namespace llvm
160

source code of bolt/lib/Core/HashUtilities.cpp