1//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64MCSymbolizer.h"
10#include "bolt/Core/BinaryContext.h"
11#include "bolt/Core/BinaryFunction.h"
12#include "bolt/Core/MCPlusBuilder.h"
13#include "bolt/Core/Relocation.h"
14#include "llvm/MC/MCInst.h"
15#include "llvm/Support/Debug.h"
16
17#define DEBUG_TYPE "bolt-symbolizer"
18
19namespace llvm {
20namespace bolt {
21
22AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
23
24bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
25 MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
26 bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
27 BinaryContext &BC = Function.getBinaryContext();
28 MCContext *Ctx = BC.Ctx.get();
29
30 // NOTE: the callee may incorrectly set IsBranch.
31 if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
32 return false;
33
34 const uint64_t InstOffset = InstAddress - Function.getAddress();
35 const Relocation *Relocation = Function.getRelocationAt(Offset: InstOffset);
36
37 /// Add symbolic operand to the instruction with an optional addend.
38 auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
39 uint64_t RelType) {
40 const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Ctx&: *Ctx);
41 if (Addend)
42 Expr = MCBinaryExpr::createAdd(LHS: Expr, RHS: MCConstantExpr::create(Value: Addend, Ctx&: *Ctx),
43 Ctx&: *Ctx);
44 Inst.addOperand(Op: MCOperand::createExpr(
45 Val: BC.MIB->getTargetExprFor(Inst, Expr, Ctx&: *Ctx, RelType)));
46 };
47
48 if (Relocation) {
49 auto AdjustedRel = adjustRelocation(Rel: *Relocation, Inst);
50 if (AdjustedRel) {
51 addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
52 return true;
53 }
54
55 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
56 << Twine::utohexstr(InstAddress) << '\n');
57 }
58
59 if (!BC.MIB->hasPCRelOperand(Inst))
60 return false;
61
62 Value += InstAddress;
63 const MCSymbol *TargetSymbol;
64 uint64_t TargetOffset;
65 if (!CreateNewSymbols) {
66 if (BinaryData *BD = BC.getBinaryDataContainingAddress(Address: Value)) {
67 TargetSymbol = BD->getSymbol();
68 TargetOffset = Value - BD->getAddress();
69 } else {
70 return false;
71 }
72 } else {
73 std::tie(args&: TargetSymbol, args&: TargetOffset) =
74 BC.handleAddressRef(Address: Value, BF&: Function, /*IsPCRel*/ true);
75 }
76
77 addOperand(TargetSymbol, TargetOffset, 0);
78
79 return true;
80}
81
82std::optional<Relocation>
83AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
84 const MCInst &Inst) const {
85 BinaryContext &BC = Function.getBinaryContext();
86
87 // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
88 // NOP+ADR. After the conversion, the linker might keep the relocations and
89 // if we try to symbolize ADR's operand using outdated relocations, we might
90 // get unexpected results. Hence, we check for the conversion/relaxation, and
91 // ignore the relocation. The symbolization is done based on the PC-relative
92 // value of the operand instead.
93 if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
94 Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
95 return std::nullopt;
96
97 // The linker might perform TLS relocations relaxations, such as changed TLS
98 // access model (e.g. changed global dynamic model to initial exec), thus
99 // changing the instructions. The static relocations might be invalid at this
100 // point and we don't have to process these relocations anymore. More
101 // information could be found by searching elfNN_aarch64_tls_relax in bfd.
102 if (BC.MIB->isMOVW(Inst)) {
103 switch (Rel.Type) {
104 default:
105 break;
106 case ELF::R_AARCH64_TLSDESC_LD64_LO12:
107 case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
108 case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
109 case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
110 return std::nullopt;
111 }
112 }
113
114 if (!Relocation::isGOT(Type: Rel.Type))
115 return Rel;
116
117 Relocation AdjustedRel = Rel;
118 if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
119 // The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
120 // second instruction and have to use the relocation type for ADD.
121 AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
122 return AdjustedRel;
123 }
124
125 // ADRP is a special case since the linker can leave the instruction opcode
126 // intact and modify only the operand. We are doing our best to detect when
127 // such conversion has happened without looking at the next instruction.
128 //
129 // If we detect that a page referenced by the ADRP cannot belong to GOT, and
130 // that it matches the symbol from the relocation, then we can be certain
131 // that the linker converted the GOT reference into the local one. Otherwise,
132 // we leave the disambiguation resolution to FixRelaxationPass.
133 //
134 // Note that ADRP relaxation described above cannot happen for TLS relocation.
135 // Since TLS relocations may not even have a valid symbol (not supported by
136 // BOLT), we explicitly exclude them from the check.
137 if (BC.MIB->isADRP(Inst) && Rel.Addend == 0 && !Relocation::isTLS(Type: Rel.Type)) {
138 ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(Symbol: *Rel.Symbol);
139 assert(SymbolValue && "Symbol value should be set");
140 const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
141
142 if (SymbolPageAddr == Rel.Value &&
143 !isPageAddressValidForGOT(PageAddress: SymbolPageAddr)) {
144 AdjustedRel.Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
145 return AdjustedRel;
146 }
147 }
148
149 // For instructions that reference GOT, ignore the referenced symbol and
150 // use value at the relocation site. FixRelaxationPass will look at
151 // instruction pairs and will perform necessary adjustments.
152 AdjustedRel.Symbol = BC.registerNameAtAddress(Name: "__BOLT_got_zero", Address: 0, Size: 0, Alignment: 0);
153 AdjustedRel.Addend = Rel.Value;
154
155 return AdjustedRel;
156}
157
158bool AArch64MCSymbolizer::isPageAddressValidForGOT(uint64_t PageAddress) const {
159 assert(!(PageAddress & 0xfffULL) && "Page address not aligned at 4KB");
160
161 ErrorOr<BinarySection &> GOT =
162 Function.getBinaryContext().getUniqueSectionByName(SectionName: ".got");
163 if (!GOT || !GOT->getSize())
164 return false;
165
166 const uint64_t GOTFirstPageAddress = GOT->getAddress() & ~0xfffULL;
167 const uint64_t GOTLastPageAddress =
168 (GOT->getAddress() + GOT->getSize() - 1) & ~0xfffULL;
169
170 return PageAddress >= GOTFirstPageAddress &&
171 PageAddress <= GOTLastPageAddress;
172}
173
174void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
175 int64_t Value,
176 uint64_t Address) {}
177
178} // namespace bolt
179} // namespace llvm
180

source code of bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp