HashUtilities.cpp source code [bolt/lib/Core/HashUtilities.cpp]

1	//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Computation of hash values over BinaryFunction and BinaryBasicBlock.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "bolt/Core/HashUtilities.h"
14	#include "bolt/Core/BinaryContext.h"
15	#include "llvm/MC/MCInstPrinter.h"
16
17	namespace llvm {
18	namespace bolt {
19
20	std::string hashInteger(uint64_t Value) {
21	std::string HashString;
22	if (Value == `0`)
23	HashString.push_back(c: `0`);
24
25	while (Value) {
26	uint8_t LSB = Value & `0xff`;
27	HashString.push_back(c: LSB);
28	Value >>= `8`;
29	}
30
31	return HashString;
32	}
33
34	std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
35	std::string HashString;
36
37	// Ignore function references.
38	if (BC.getFunctionForSymbol(Symbol: &Symbol))
39	return HashString;
40
41	llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
42	if (!ErrorOrValue)
43	return HashString;
44
45	// Ignore jump table references.
46	if (BC.getJumpTableContainingAddress(Address: *ErrorOrValue))
47	return HashString;
48
49	return HashString.append(str: hashInteger(Value: *ErrorOrValue));
50	}
51
52	std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
53	switch (Expr.getKind()) {
54	case MCExpr::Constant:
55	return hashInteger(Value: cast<MCConstantExpr>(Val: Expr).getValue());
56	case MCExpr::SymbolRef:
57	return hashSymbol(BC, Symbol: cast<MCSymbolRefExpr>(Val: Expr).getSymbol());
58	case MCExpr::Unary: {
59	const auto &UnaryExpr = cast<MCUnaryExpr>(Val: Expr);
60	return hashInteger(Value: UnaryExpr.getOpcode())
61	.append(str: hashExpr(BC, Expr: *UnaryExpr.getSubExpr()));
62	}
63	case MCExpr::Binary: {
64	const auto &BinaryExpr = cast<MCBinaryExpr>(Val: Expr);
65	return hashExpr(BC, Expr: *BinaryExpr.getLHS())
66	.append(str: hashInteger(Value: BinaryExpr.getOpcode()))
67	.append(str: hashExpr(BC, Expr: *BinaryExpr.getRHS()));
68	}
69	case MCExpr::Target:
70	return std::string ();
71	}
72
73	llvm_unreachable("invalid expression kind");
74	}
75
76	std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
77	if (Operand.isImm())
78	return hashInteger(Value: Operand.getImm());
79	if (Operand.isReg())
80	return hashInteger(Value: Operand.getReg());
81	if (Operand.isExpr())
82	return hashExpr(BC, Expr: *Operand.getExpr());
83
84	return std::string ();
85	}
86
87	std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
88	OperandHashFuncTy OperandHashFunc) {
89	const bool IsX86 = BC.isX86();
90
91	// The hash is computed by creating a string of all instruction opcodes and
92	// possibly their operands and then hashing that string with std::hash.
93	std::string HashString;
94
95	for (const MCInst &Inst : BB) {
96	if (BC.MIB ->isPseudo(Inst))
97	continue;
98
99	unsigned Opcode = Inst.getOpcode();
100
101	// Ignore unconditional jumps since we check CFG consistency by processing
102	// basic blocks in order and do not rely on branches to be in-sync with
103	// CFG. Note that we still use condition code of conditional jumps.
104	if (BC.MIB ->isUnconditionalBranch(Inst))
105	continue;
106
107	if (IsX86 && BC.MIB ->isConditionalBranch(Inst))
108	Opcode = BC.MIB ->getShortBranchOpcode(Opcode);
109
110	if (Opcode == `0`) {
111	HashString.push_back(c: `0`);
112	} else {
113	StringRef OpcodeName = BC.InstPrinter ->getOpcodeName(Opcode);
114	HashString.append(str: OpcodeName.str());
115	}
116
117	for (const MCOperand &Op : MCPlus::primeOperands(Inst))
118	HashString.append(str: OperandHashFunc (Op));
119	}
120	return HashString;
121	}
122
123	/// A "loose" hash of a basic block to use with the stale profile matching. The
124	/// computed value will be the same for blocks with minor changes (such as
125	/// reordering of instructions or using different operands) but may result in
126	/// collisions that need to be resolved by a stronger hashing.
127	std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
128	// The hash is computed by creating a string of all lexicographically ordered
129	// instruction opcodes, which is then hashed with std::hash.
130	std::set<std::string> Opcodes;
131	for (const MCInst &Inst : BB) {
132	// Skip pseudo instructions and nops.
133	if (BC.MIB ->isPseudo(Inst) \|\| BC.MIB ->isNoop(Inst))
134	continue;
135
136	// Ignore unconditional jumps, as they can be added / removed as a result
137	// of basic block reordering.
138	if (BC.MIB ->isUnconditionalBranch(Inst))
139	continue;
140
141	// Do not distinguish different types of conditional jumps.
142	if (BC.MIB ->isConditionalBranch(Inst)) {
143	Opcodes.insert(x: "JMP");
144	continue;
145	}
146
147	std::string Mnemonic = BC.InstPrinter ->getMnemonic(MI: &Inst).first;
148	llvm::erase_if(C&: Mnemonic, P: [](unsigned char ch) { return std::isspace(ch); });
149	Opcodes.insert(x: Mnemonic);
150	}
151
152	std::string HashString;
153	for (const std::string &Opcode : Opcodes)
154	HashString.append(str: Opcode);
155	return HashString;
156	}
157
158	} // namespace bolt
159	} // namespace llvm
160

source code of bolt/lib/Core/HashUtilities.cpp