1 | //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ByteCodeEmitter.h" |
10 | #include "Context.h" |
11 | #include "Floating.h" |
12 | #include "IntegralAP.h" |
13 | #include "Opcode.h" |
14 | #include "Program.h" |
15 | #include "clang/AST/ASTLambda.h" |
16 | #include "clang/AST/Attr.h" |
17 | #include "clang/AST/DeclCXX.h" |
18 | #include "clang/Basic/Builtins.h" |
19 | #include <type_traits> |
20 | |
21 | using namespace clang; |
22 | using namespace clang::interp; |
23 | |
24 | /// Unevaluated builtins don't get their arguments put on the stack |
25 | /// automatically. They instead operate on the AST of their Call |
26 | /// Expression. |
27 | /// Similar information is available via ASTContext::BuiltinInfo, |
28 | /// but that is not correct for our use cases. |
29 | static bool isUnevaluatedBuiltin(unsigned BuiltinID) { |
30 | return BuiltinID == Builtin::BI__builtin_classify_type; |
31 | } |
32 | |
33 | Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) { |
34 | bool IsLambdaStaticInvoker = false; |
35 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl); |
36 | MD && MD->isLambdaStaticInvoker()) { |
37 | // For a lambda static invoker, we might have to pick a specialized |
38 | // version if the lambda is generic. In that case, the picked function |
39 | // will *NOT* be a static invoker anymore. However, it will still |
40 | // be a non-static member function, this (usually) requiring an |
41 | // instance pointer. We suppress that later in this function. |
42 | IsLambdaStaticInvoker = true; |
43 | |
44 | const CXXRecordDecl *ClosureClass = MD->getParent(); |
45 | assert(ClosureClass->captures_begin() == ClosureClass->captures_end()); |
46 | if (ClosureClass->isGenericLambda()) { |
47 | const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator(); |
48 | assert(MD->isFunctionTemplateSpecialization() && |
49 | "A generic lambda's static-invoker function must be a " |
50 | "template specialization" ); |
51 | const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); |
52 | FunctionTemplateDecl *CallOpTemplate = |
53 | LambdaCallOp->getDescribedFunctionTemplate(); |
54 | void *InsertPos = nullptr; |
55 | const FunctionDecl *CorrespondingCallOpSpecialization = |
56 | CallOpTemplate->findSpecialization(Args: TAL->asArray(), InsertPos); |
57 | assert(CorrespondingCallOpSpecialization); |
58 | FuncDecl = cast<CXXMethodDecl>(Val: CorrespondingCallOpSpecialization); |
59 | } |
60 | } |
61 | |
62 | // Set up argument indices. |
63 | unsigned ParamOffset = 0; |
64 | SmallVector<PrimType, 8> ParamTypes; |
65 | SmallVector<unsigned, 8> ParamOffsets; |
66 | llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors; |
67 | |
68 | // If the return is not a primitive, a pointer to the storage where the |
69 | // value is initialized in is passed as the first argument. See 'RVO' |
70 | // elsewhere in the code. |
71 | QualType Ty = FuncDecl->getReturnType(); |
72 | bool HasRVO = false; |
73 | if (!Ty->isVoidType() && !Ctx.classify(T: Ty)) { |
74 | HasRVO = true; |
75 | ParamTypes.push_back(Elt: PT_Ptr); |
76 | ParamOffsets.push_back(Elt: ParamOffset); |
77 | ParamOffset += align(Size: primSize(Type: PT_Ptr)); |
78 | } |
79 | |
80 | // If the function decl is a member decl, the next parameter is |
81 | // the 'this' pointer. This parameter is pop()ed from the |
82 | // InterpStack when calling the function. |
83 | bool HasThisPointer = false; |
84 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl)) { |
85 | if (MD->isImplicitObjectMemberFunction() && !IsLambdaStaticInvoker) { |
86 | HasThisPointer = true; |
87 | ParamTypes.push_back(Elt: PT_Ptr); |
88 | ParamOffsets.push_back(Elt: ParamOffset); |
89 | ParamOffset += align(Size: primSize(Type: PT_Ptr)); |
90 | } |
91 | |
92 | // Set up lambda capture to closure record field mapping. |
93 | if (isLambdaCallOperator(MD)) { |
94 | const Record *R = P.getOrCreateRecord(MD->getParent()); |
95 | llvm::DenseMap<const ValueDecl *, FieldDecl *> LC; |
96 | FieldDecl *LTC; |
97 | |
98 | MD->getParent()->getCaptureFields(Captures&: LC, ThisCapture&: LTC); |
99 | |
100 | for (auto Cap : LC) { |
101 | // Static lambdas cannot have any captures. If this one does, |
102 | // it has already been diagnosed and we can only ignore it. |
103 | if (MD->isStatic()) |
104 | return nullptr; |
105 | |
106 | unsigned Offset = R->getField(FD: Cap.second)->Offset; |
107 | this->LambdaCaptures[Cap.first] = { |
108 | Offset, Cap.second->getType()->isReferenceType()}; |
109 | } |
110 | if (LTC) { |
111 | QualType CaptureType = R->getField(FD: LTC)->Decl->getType(); |
112 | this->LambdaThisCapture = {.Offset: R->getField(FD: LTC)->Offset, |
113 | .IsPtr: CaptureType->isReferenceType() || |
114 | CaptureType->isPointerType()}; |
115 | } |
116 | } |
117 | } |
118 | |
119 | // Assign descriptors to all parameters. |
120 | // Composite objects are lowered to pointers. |
121 | for (const ParmVarDecl *PD : FuncDecl->parameters()) { |
122 | std::optional<PrimType> T = Ctx.classify(PD->getType()); |
123 | PrimType PT = T.value_or(u: PT_Ptr); |
124 | Descriptor *Desc = P.createDescriptor(PD, PT); |
125 | ParamDescriptors.insert(KV: {ParamOffset, {PT, Desc}}); |
126 | Params.insert(KV: {PD, {.Offset: ParamOffset, .IsPtr: T != std::nullopt}}); |
127 | ParamOffsets.push_back(Elt: ParamOffset); |
128 | ParamOffset += align(Size: primSize(Type: PT)); |
129 | ParamTypes.push_back(Elt: PT); |
130 | } |
131 | |
132 | // Create a handle over the emitted code. |
133 | Function *Func = P.getFunction(F: FuncDecl); |
134 | if (!Func) { |
135 | bool IsUnevaluatedBuiltin = false; |
136 | if (unsigned BI = FuncDecl->getBuiltinID()) |
137 | IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BuiltinID: BI); |
138 | |
139 | Func = |
140 | P.createFunction(Def: FuncDecl, Args&: ParamOffset, Args: std::move(ParamTypes), |
141 | Args: std::move(ParamDescriptors), Args: std::move(ParamOffsets), |
142 | Args&: HasThisPointer, Args&: HasRVO, Args&: IsUnevaluatedBuiltin); |
143 | } |
144 | |
145 | assert(Func); |
146 | // For not-yet-defined functions, we only create a Function instance and |
147 | // compile their body later. |
148 | if (!FuncDecl->isDefined()) { |
149 | Func->setDefined(false); |
150 | return Func; |
151 | } |
152 | |
153 | Func->setDefined(true); |
154 | |
155 | // Lambda static invokers are a special case that we emit custom code for. |
156 | bool IsEligibleForCompilation = false; |
157 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl)) |
158 | IsEligibleForCompilation = MD->isLambdaStaticInvoker(); |
159 | if (!IsEligibleForCompilation) |
160 | IsEligibleForCompilation = |
161 | FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>(); |
162 | |
163 | // Compile the function body. |
164 | if (!IsEligibleForCompilation || !visitFunc(E: FuncDecl)) { |
165 | Func->setIsFullyCompiled(true); |
166 | return Func; |
167 | } |
168 | |
169 | // Create scopes from descriptors. |
170 | llvm::SmallVector<Scope, 2> Scopes; |
171 | for (auto &DS : Descriptors) { |
172 | Scopes.emplace_back(Args: std::move(DS)); |
173 | } |
174 | |
175 | // Set the function's code. |
176 | Func->setCode(NewFrameSize: NextLocalOffset, NewCode: std::move(Code), NewSrcMap: std::move(SrcMap), |
177 | NewScopes: std::move(Scopes), NewHasBody: FuncDecl->hasBody()); |
178 | Func->setIsFullyCompiled(true); |
179 | return Func; |
180 | } |
181 | |
182 | Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) { |
183 | NextLocalOffset += sizeof(Block); |
184 | unsigned Location = NextLocalOffset; |
185 | NextLocalOffset += align(Size: D->getAllocSize()); |
186 | return {.Offset: Location, .Desc: D}; |
187 | } |
188 | |
189 | void ByteCodeEmitter::emitLabel(LabelTy Label) { |
190 | const size_t Target = Code.size(); |
191 | LabelOffsets.insert(KV: {Label, Target}); |
192 | |
193 | if (auto It = LabelRelocs.find(Val: Label); |
194 | It != LabelRelocs.end()) { |
195 | for (unsigned Reloc : It->second) { |
196 | using namespace llvm::support; |
197 | |
198 | // Rewrite the operand of all jumps to this label. |
199 | void *Location = Code.data() + Reloc - align(Size: sizeof(int32_t)); |
200 | assert(aligned(Location)); |
201 | const int32_t Offset = Target - static_cast<int64_t>(Reloc); |
202 | endian::write<int32_t, llvm::endianness::native>(P: Location, V: Offset); |
203 | } |
204 | LabelRelocs.erase(I: It); |
205 | } |
206 | } |
207 | |
208 | int32_t ByteCodeEmitter::getOffset(LabelTy Label) { |
209 | // Compute the PC offset which the jump is relative to. |
210 | const int64_t Position = |
211 | Code.size() + align(Size: sizeof(Opcode)) + align(Size: sizeof(int32_t)); |
212 | assert(aligned(Position)); |
213 | |
214 | // If target is known, compute jump offset. |
215 | if (auto It = LabelOffsets.find(Val: Label); |
216 | It != LabelOffsets.end()) |
217 | return It->second - Position; |
218 | |
219 | // Otherwise, record relocation and return dummy offset. |
220 | LabelRelocs[Label].push_back(Elt: Position); |
221 | return 0ull; |
222 | } |
223 | |
224 | /// Helper to write bytecode and bail out if 32-bit offsets become invalid. |
225 | /// Pointers will be automatically marshalled as 32-bit IDs. |
226 | template <typename T> |
227 | static void emit(Program &P, std::vector<std::byte> &Code, const T &Val, |
228 | bool &Success) { |
229 | size_t Size; |
230 | |
231 | if constexpr (std::is_pointer_v<T>) |
232 | Size = sizeof(uint32_t); |
233 | else |
234 | Size = sizeof(T); |
235 | |
236 | if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { |
237 | Success = false; |
238 | return; |
239 | } |
240 | |
241 | // Access must be aligned! |
242 | size_t ValPos = align(Size: Code.size()); |
243 | Size = align(Size); |
244 | assert(aligned(ValPos + Size)); |
245 | Code.resize(new_size: ValPos + Size); |
246 | |
247 | if constexpr (!std::is_pointer_v<T>) { |
248 | new (Code.data() + ValPos) T(Val); |
249 | } else { |
250 | uint32_t ID = P.getOrCreateNativePointer(Ptr: Val); |
251 | new (Code.data() + ValPos) uint32_t(ID); |
252 | } |
253 | } |
254 | |
255 | /// Emits a serializable value. These usually (potentially) contain |
256 | /// heap-allocated memory and aren't trivially copyable. |
257 | template <typename T> |
258 | static void emitSerialized(std::vector<std::byte> &Code, const T &Val, |
259 | bool &Success) { |
260 | size_t Size = Val.bytesToSerialize(); |
261 | |
262 | if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { |
263 | Success = false; |
264 | return; |
265 | } |
266 | |
267 | // Access must be aligned! |
268 | size_t ValPos = align(Size: Code.size()); |
269 | Size = align(Size); |
270 | assert(aligned(ValPos + Size)); |
271 | Code.resize(new_size: ValPos + Size); |
272 | |
273 | Val.serialize(Code.data() + ValPos); |
274 | } |
275 | |
276 | template <> |
277 | void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val, |
278 | bool &Success) { |
279 | emitSerialized(Code, Val, Success); |
280 | } |
281 | |
282 | template <> |
283 | void emit(Program &P, std::vector<std::byte> &Code, |
284 | const IntegralAP<false> &Val, bool &Success) { |
285 | emitSerialized(Code, Val, Success); |
286 | } |
287 | |
288 | template <> |
289 | void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val, |
290 | bool &Success) { |
291 | emitSerialized(Code, Val, Success); |
292 | } |
293 | |
294 | template <typename... Tys> |
295 | bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) { |
296 | bool Success = true; |
297 | |
298 | // The opcode is followed by arguments. The source info is |
299 | // attached to the address after the opcode. |
300 | emit(P, Code, Val: Op, Success); |
301 | if (SI) |
302 | SrcMap.emplace_back(args: Code.size(), args: SI); |
303 | |
304 | (..., emit(P, Code, Args, Success)); |
305 | return Success; |
306 | } |
307 | |
308 | bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) { |
309 | return emitJt(getOffset(Label), SourceInfo{}); |
310 | } |
311 | |
312 | bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) { |
313 | return emitJf(getOffset(Label), SourceInfo{}); |
314 | } |
315 | |
316 | bool ByteCodeEmitter::jump(const LabelTy &Label) { |
317 | return emitJmp(getOffset(Label), SourceInfo{}); |
318 | } |
319 | |
320 | bool ByteCodeEmitter::fallthrough(const LabelTy &Label) { |
321 | emitLabel(Label); |
322 | return true; |
323 | } |
324 | |
325 | //===----------------------------------------------------------------------===// |
326 | // Opcode emitters |
327 | //===----------------------------------------------------------------------===// |
328 | |
329 | #define GET_LINK_IMPL |
330 | #include "Opcodes.inc" |
331 | #undef GET_LINK_IMPL |
332 | |