1//===- bolt/Target/X86/X86MCSymbolizer.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "X86MCSymbolizer.h"
10#include "MCTargetDesc/X86BaseInfo.h"
11#include "bolt/Core/BinaryContext.h"
12#include "bolt/Core/BinaryFunction.h"
13#include "bolt/Core/MCPlusBuilder.h"
14#include "bolt/Core/Relocation.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCRegisterInfo.h"
17
18#define DEBUG_TYPE "bolt-symbolizer"
19
20namespace llvm {
21namespace bolt {
22
23X86MCSymbolizer::~X86MCSymbolizer() {}
24
25bool X86MCSymbolizer::tryAddingSymbolicOperand(
26 MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
27 bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
28 if (IsBranch)
29 return false;
30
31 // Ignore implicit operands.
32 if (ImmSize == 0)
33 return false;
34
35 BinaryContext &BC = Function.getBinaryContext();
36 MCContext *Ctx = BC.Ctx.get();
37
38 if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
39 return false;
40
41 /// Add symbolic operand to the instruction with an optional addend.
42 auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend) {
43 const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Ctx&: *Ctx);
44 if (Addend)
45 Expr = MCBinaryExpr::createAdd(LHS: Expr, RHS: MCConstantExpr::create(Value: Addend, Ctx&: *Ctx),
46 Ctx&: *Ctx);
47 Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
48 };
49
50 // Check if the operand being added is a displacement part of a compound
51 // memory operand that uses PC-relative addressing. If it is, try to symbolize
52 // it without relocations. Return true on success, false otherwise.
53 auto processPCRelOperandNoRel = [&]() {
54 const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
55 if (MemOp == -1)
56 return false;
57
58 const unsigned DispOp = MemOp + X86::AddrDisp;
59 if (Inst.getNumOperands() != DispOp)
60 return false;
61
62 const MCOperand &Base = Inst.getOperand(i: MemOp + X86::AddrBaseReg);
63 if (Base.getReg() != BC.MRI->getProgramCounter())
64 return false;
65
66 const MCOperand &Scale = Inst.getOperand(i: MemOp + X86::AddrScaleAmt);
67 const MCOperand &Index = Inst.getOperand(i: MemOp + X86::AddrIndexReg);
68 if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
69 return false;
70
71 const MCSymbol *TargetSymbol;
72 uint64_t TargetOffset;
73
74 if (!CreateNewSymbols) {
75 if (BinaryData *BD = BC.getBinaryDataContainingAddress(Address: Value)) {
76 TargetSymbol = BD->getSymbol();
77 TargetOffset = Value - BD->getAddress();
78 } else {
79 return false;
80 }
81 } else {
82 std::tie(args&: TargetSymbol, args&: TargetOffset) =
83 BC.handleAddressRef(Address: Value, BF&: Function, /*IsPCRel=*/true);
84 }
85
86 addOperand(TargetSymbol, TargetOffset);
87
88 return true;
89 };
90
91 // Check for GOTPCRELX relocations first. Because these relocations allow the
92 // linker to modify the instruction, we have to check the offset range
93 // corresponding to the instruction, not the offset of the operand.
94 // Note that if there is GOTPCRELX relocation against the instruction, there
95 // will be no other relocation in this range, since GOTPCRELX applies only to
96 // certain instruction types.
97 const uint64_t InstOffset = InstAddress - Function.getAddress();
98 const Relocation *Relocation =
99 Function.getRelocationInRange(StartOffset: InstOffset, EndOffset: InstOffset + InstSize);
100 if (Relocation && Relocation::isX86GOTPCRELX(Type: Relocation->Type)) {
101 // If the operand is PC-relative, convert it without using the relocation
102 // information. For GOTPCRELX, it is safe to use the absolute address
103 // instead of extracting the addend from the relocation, as non-standard
104 // forms will be rejected by linker conversion process and the operand
105 // will always reference GOT which we don't rewrite.
106 if (processPCRelOperandNoRel())
107 return true;
108
109 // The linker converted the PC-relative address to an absolute one.
110 // Symbolize this address.
111 if (CreateNewSymbols)
112 BC.handleAddressRef(Address: Value, BF&: Function, /*IsPCRel=*/false);
113
114 const BinaryData *Target = BC.getBinaryDataAtAddress(Address: Value);
115 if (!Target) {
116 assert(!CreateNewSymbols &&
117 "BinaryData should exist at converted GOTPCRELX destination");
118 return false;
119 }
120
121 addOperand(Target->getSymbol(), /*Addend=*/0);
122
123 return true;
124 }
125
126 // Check for relocations against the operand.
127 if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
128 Relocation = Function.getRelocationAt(Offset: InstOffset + ImmOffset);
129
130 if (!Relocation)
131 return processPCRelOperandNoRel();
132
133 // GOTPC64 is special because the X86 Assembler doesn't know how to emit
134 // a PC-relative 8-byte fixup, which is what we need to cover this. The
135 // only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
136 if (Relocation::isX86GOTPC64(Type: Relocation->Type)) {
137 auto PairOrErr = handleGOTPC64(R: *Relocation, InstrAddr: InstAddress);
138 if (auto E = PairOrErr.takeError()) {
139 Function.setSimple(false);
140 BC.logBOLTErrorsAndQuitOnFatal(E: std::move(E));
141 return false;
142 }
143 auto [Sym, Addend] = *PairOrErr;
144 addOperand(Sym, Addend);
145 return true;
146 }
147
148 uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
149 if (Relocation->isPCRelative())
150 SymbolValue += InstAddress + ImmOffset;
151
152 // Process reference to the symbol.
153 if (CreateNewSymbols)
154 BC.handleAddressRef(Address: SymbolValue, BF&: Function, IsPCRel: Relocation->isPCRelative());
155
156 uint64_t Addend = Relocation->Addend;
157 // Real addend for pc-relative targets is adjusted with a delta from
158 // the relocation placement to the next instruction.
159 if (Relocation->isPCRelative())
160 Addend += InstOffset + InstSize - Relocation->Offset;
161
162 addOperand(Relocation->Symbol, Addend);
163
164 return true;
165}
166
167Expected<std::pair<MCSymbol *, uint64_t>>
168X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
169 BinaryContext &BC = Function.getBinaryContext();
170 const BinaryData *GOTSymBD = BC.getGOTSymbol();
171 if (!GOTSymBD || !GOTSymBD->getAddress()) {
172 // This error is pretty serious but we can't kill the disassembler
173 // because of it, so don't make it fatal. Log it and warn the user.
174 return createNonFatalBOLTError(
175 S: "R_X86_GOTPC64 relocation is present but we did not detect "
176 "a valid _GLOBAL_OFFSET_TABLE_ in symbol table\n");
177 }
178 // R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
179 // but the start of the MOVABSQ instruction. So the Target Address is
180 // whatever is encoded in the original operand when we disassembled
181 // the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
182 // Here we extract the intended Addend by subtracting the real
183 // GOT addr.
184 const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
185 return std::make_pair(x: BC.Ctx->getOrCreateSymbol(Name: "_GLOBAL_OFFSET_TABLE_"),
186 y: Addend);
187}
188
189void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
190 int64_t Value,
191 uint64_t Address) {}
192
193} // namespace bolt
194} // namespace llvm
195

source code of bolt/lib/Target/X86/X86MCSymbolizer.cpp