1 | //===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AArch64MCSymbolizer.h" |
10 | #include "bolt/Core/BinaryContext.h" |
11 | #include "bolt/Core/BinaryFunction.h" |
12 | #include "bolt/Core/MCPlusBuilder.h" |
13 | #include "bolt/Core/Relocation.h" |
14 | #include "llvm/MC/MCInst.h" |
15 | #include "llvm/Support/Debug.h" |
16 | |
17 | #define DEBUG_TYPE "bolt-symbolizer" |
18 | |
19 | namespace llvm { |
20 | namespace bolt { |
21 | |
22 | AArch64MCSymbolizer::~AArch64MCSymbolizer() {} |
23 | |
24 | bool AArch64MCSymbolizer::tryAddingSymbolicOperand( |
25 | MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress, |
26 | bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) { |
27 | BinaryContext &BC = Function.getBinaryContext(); |
28 | MCContext *Ctx = BC.Ctx.get(); |
29 | |
30 | // NOTE: the callee may incorrectly set IsBranch. |
31 | if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) |
32 | return false; |
33 | |
34 | const uint64_t InstOffset = InstAddress - Function.getAddress(); |
35 | const Relocation *Relocation = Function.getRelocationAt(Offset: InstOffset); |
36 | |
37 | /// Add symbolic operand to the instruction with an optional addend. |
38 | auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend, |
39 | uint64_t RelType) { |
40 | const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Ctx&: *Ctx); |
41 | if (Addend) |
42 | Expr = MCBinaryExpr::createAdd(LHS: Expr, RHS: MCConstantExpr::create(Value: Addend, Ctx&: *Ctx), |
43 | Ctx&: *Ctx); |
44 | Inst.addOperand(Op: MCOperand::createExpr( |
45 | Val: BC.MIB->getTargetExprFor(Inst, Expr, Ctx&: *Ctx, RelType))); |
46 | }; |
47 | |
48 | if (Relocation) { |
49 | auto AdjustedRel = adjustRelocation(Rel: *Relocation, Inst); |
50 | if (AdjustedRel) { |
51 | addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type); |
52 | return true; |
53 | } |
54 | |
55 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x" |
56 | << Twine::utohexstr(InstAddress) << '\n'); |
57 | } |
58 | |
59 | if (!BC.MIB->hasPCRelOperand(Inst)) |
60 | return false; |
61 | |
62 | Value += InstAddress; |
63 | const MCSymbol *TargetSymbol; |
64 | uint64_t TargetOffset; |
65 | if (!CreateNewSymbols) { |
66 | if (BinaryData *BD = BC.getBinaryDataContainingAddress(Address: Value)) { |
67 | TargetSymbol = BD->getSymbol(); |
68 | TargetOffset = Value - BD->getAddress(); |
69 | } else { |
70 | return false; |
71 | } |
72 | } else { |
73 | std::tie(args&: TargetSymbol, args&: TargetOffset) = |
74 | BC.handleAddressRef(Address: Value, BF&: Function, /*IsPCRel*/ true); |
75 | } |
76 | |
77 | addOperand(TargetSymbol, TargetOffset, 0); |
78 | |
79 | return true; |
80 | } |
81 | |
82 | std::optional<Relocation> |
83 | AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel, |
84 | const MCInst &Inst) const { |
85 | BinaryContext &BC = Function.getBinaryContext(); |
86 | |
87 | // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into |
88 | // NOP+ADR. After the conversion, the linker might keep the relocations and |
89 | // if we try to symbolize ADR's operand using outdated relocations, we might |
90 | // get unexpected results. Hence, we check for the conversion/relaxation, and |
91 | // ignore the relocation. The symbolization is done based on the PC-relative |
92 | // value of the operand instead. |
93 | if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC || |
94 | Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC)) |
95 | return std::nullopt; |
96 | |
97 | // The linker might perform TLS relocations relaxations, such as changed TLS |
98 | // access model (e.g. changed global dynamic model to initial exec), thus |
99 | // changing the instructions. The static relocations might be invalid at this |
100 | // point and we don't have to process these relocations anymore. More |
101 | // information could be found by searching elfNN_aarch64_tls_relax in bfd. |
102 | if (BC.MIB->isMOVW(Inst)) { |
103 | switch (Rel.Type) { |
104 | default: |
105 | break; |
106 | case ELF::R_AARCH64_TLSDESC_LD64_LO12: |
107 | case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: |
108 | case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: |
109 | case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: |
110 | return std::nullopt; |
111 | } |
112 | } |
113 | |
114 | if (!Relocation::isGOT(Type: Rel.Type)) |
115 | return Rel; |
116 | |
117 | Relocation AdjustedRel = Rel; |
118 | if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) { |
119 | // The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the |
120 | // second instruction and have to use the relocation type for ADD. |
121 | AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; |
122 | return AdjustedRel; |
123 | } |
124 | |
125 | // ADRP is a special case since the linker can leave the instruction opcode |
126 | // intact and modify only the operand. We are doing our best to detect when |
127 | // such conversion has happened without looking at the next instruction. |
128 | // |
129 | // If we detect that a page referenced by the ADRP cannot belong to GOT, and |
130 | // that it matches the symbol from the relocation, then we can be certain |
131 | // that the linker converted the GOT reference into the local one. Otherwise, |
132 | // we leave the disambiguation resolution to FixRelaxationPass. |
133 | // |
134 | // Note that ADRP relaxation described above cannot happen for TLS relocation. |
135 | // Since TLS relocations may not even have a valid symbol (not supported by |
136 | // BOLT), we explicitly exclude them from the check. |
137 | if (BC.MIB->isADRP(Inst) && Rel.Addend == 0 && !Relocation::isTLS(Type: Rel.Type)) { |
138 | ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(Symbol: *Rel.Symbol); |
139 | assert(SymbolValue && "Symbol value should be set" ); |
140 | const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL; |
141 | |
142 | if (SymbolPageAddr == Rel.Value && |
143 | !isPageAddressValidForGOT(PageAddress: SymbolPageAddr)) { |
144 | AdjustedRel.Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; |
145 | return AdjustedRel; |
146 | } |
147 | } |
148 | |
149 | // For instructions that reference GOT, ignore the referenced symbol and |
150 | // use value at the relocation site. FixRelaxationPass will look at |
151 | // instruction pairs and will perform necessary adjustments. |
152 | AdjustedRel.Symbol = BC.registerNameAtAddress(Name: "__BOLT_got_zero" , Address: 0, Size: 0, Alignment: 0); |
153 | AdjustedRel.Addend = Rel.Value; |
154 | |
155 | return AdjustedRel; |
156 | } |
157 | |
158 | bool AArch64MCSymbolizer::isPageAddressValidForGOT(uint64_t PageAddress) const { |
159 | assert(!(PageAddress & 0xfffULL) && "Page address not aligned at 4KB" ); |
160 | |
161 | ErrorOr<BinarySection &> GOT = |
162 | Function.getBinaryContext().getUniqueSectionByName(SectionName: ".got" ); |
163 | if (!GOT || !GOT->getSize()) |
164 | return false; |
165 | |
166 | const uint64_t GOTFirstPageAddress = GOT->getAddress() & ~0xfffULL; |
167 | const uint64_t GOTLastPageAddress = |
168 | (GOT->getAddress() + GOT->getSize() - 1) & ~0xfffULL; |
169 | |
170 | return PageAddress >= GOTFirstPageAddress && |
171 | PageAddress <= GOTLastPageAddress; |
172 | } |
173 | |
174 | void AArch64MCSymbolizer::(raw_ostream &CStream, |
175 | int64_t Value, |
176 | uint64_t Address) {} |
177 | |
178 | } // namespace bolt |
179 | } // namespace llvm |
180 | |