1//===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines manifest constants for the wasm object file format.
10// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_BINARYFORMAT_WASM_H
15#define LLVM_BINARYFORMAT_WASM_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Compiler.h"
21#include <optional>
22
23namespace llvm {
24namespace wasm {
25
26// Object file magic string.
27const char WasmMagic[] = {'\0', 'a', 's', 'm'};
28// Wasm binary format version
29const uint32_t WasmVersion = 0x1;
30// Wasm linking metadata version
31const uint32_t WasmMetadataVersion = 0x2;
32// Wasm uses a 64k page size by default (but the custom-page-sizes proposal
33// allows changing it)
34const uint32_t WasmDefaultPageSize = 65536;
35
36enum : unsigned {
37 WASM_SEC_CUSTOM = 0, // Custom / User-defined section
38 WASM_SEC_TYPE = 1, // Function signature declarations
39 WASM_SEC_IMPORT = 2, // Import declarations
40 WASM_SEC_FUNCTION = 3, // Function declarations
41 WASM_SEC_TABLE = 4, // Indirect function table and other tables
42 WASM_SEC_MEMORY = 5, // Memory attributes
43 WASM_SEC_GLOBAL = 6, // Global declarations
44 WASM_SEC_EXPORT = 7, // Exports
45 WASM_SEC_START = 8, // Start function declaration
46 WASM_SEC_ELEM = 9, // Elements section
47 WASM_SEC_CODE = 10, // Function bodies (code)
48 WASM_SEC_DATA = 11, // Data segments
49 WASM_SEC_DATACOUNT = 12, // Data segment count
50 WASM_SEC_TAG = 13, // Tag declarations
51 WASM_SEC_LAST_KNOWN = WASM_SEC_TAG,
52};
53
54// Type immediate encodings used in various contexts.
55enum : unsigned {
56 WASM_TYPE_I32 = 0x7F,
57 WASM_TYPE_I64 = 0x7E,
58 WASM_TYPE_F32 = 0x7D,
59 WASM_TYPE_F64 = 0x7C,
60 WASM_TYPE_V128 = 0x7B,
61 WASM_TYPE_NULLFUNCREF = 0x73,
62 WASM_TYPE_NULLEXTERNREF = 0x72,
63 WASM_TYPE_NULLEXNREF = 0x74,
64 WASM_TYPE_NULLREF = 0x71,
65 WASM_TYPE_FUNCREF = 0x70,
66 WASM_TYPE_EXTERNREF = 0x6F,
67 WASM_TYPE_EXNREF = 0x69,
68 WASM_TYPE_ANYREF = 0x6E,
69 WASM_TYPE_EQREF = 0x6D,
70 WASM_TYPE_I31REF = 0x6C,
71 WASM_TYPE_STRUCTREF = 0x6B,
72 WASM_TYPE_ARRAYREF = 0x6A,
73 WASM_TYPE_NONNULLABLE = 0x64,
74 WASM_TYPE_NULLABLE = 0x63,
75 WASM_TYPE_FUNC = 0x60,
76 WASM_TYPE_ARRAY = 0x5E,
77 WASM_TYPE_STRUCT = 0x5F,
78 WASM_TYPE_SUB = 0x50,
79 WASM_TYPE_SUB_FINAL = 0x4F,
80 WASM_TYPE_REC = 0x4E,
81 WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
82};
83
84// Kinds of externals (for imports and exports).
85enum : unsigned {
86 WASM_EXTERNAL_FUNCTION = 0x0,
87 WASM_EXTERNAL_TABLE = 0x1,
88 WASM_EXTERNAL_MEMORY = 0x2,
89 WASM_EXTERNAL_GLOBAL = 0x3,
90 WASM_EXTERNAL_TAG = 0x4,
91};
92
93// Opcodes used in initializer expressions.
94enum : unsigned {
95 WASM_OPCODE_END = 0x0b,
96 WASM_OPCODE_CALL = 0x10,
97 WASM_OPCODE_LOCAL_GET = 0x20,
98 WASM_OPCODE_LOCAL_SET = 0x21,
99 WASM_OPCODE_LOCAL_TEE = 0x22,
100 WASM_OPCODE_GLOBAL_GET = 0x23,
101 WASM_OPCODE_GLOBAL_SET = 0x24,
102 WASM_OPCODE_I32_STORE = 0x36,
103 WASM_OPCODE_I64_STORE = 0x37,
104 WASM_OPCODE_I32_CONST = 0x41,
105 WASM_OPCODE_I64_CONST = 0x42,
106 WASM_OPCODE_F32_CONST = 0x43,
107 WASM_OPCODE_F64_CONST = 0x44,
108 WASM_OPCODE_I32_ADD = 0x6a,
109 WASM_OPCODE_I32_SUB = 0x6b,
110 WASM_OPCODE_I32_MUL = 0x6c,
111 WASM_OPCODE_I64_ADD = 0x7c,
112 WASM_OPCODE_I64_SUB = 0x7d,
113 WASM_OPCODE_I64_MUL = 0x7e,
114 WASM_OPCODE_REF_NULL = 0xd0,
115 WASM_OPCODE_REF_FUNC = 0xd2,
116 WASM_OPCODE_GC_PREFIX = 0xfb,
117};
118
119// Opcodes in the GC-prefixed space (0xfb)
120enum : unsigned {
121 WASM_OPCODE_STRUCT_NEW = 0x00,
122 WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01,
123 WASM_OPCODE_ARRAY_NEW = 0x06,
124 WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07,
125 WASM_OPCODE_ARRAY_NEW_FIXED = 0x08,
126 WASM_OPCODE_REF_I31 = 0x1c,
127 // any.convert_extern and extern.convert_any don't seem to be supported by
128 // Binaryen.
129};
130
131// Opcodes used in synthetic functions.
132enum : unsigned {
133 WASM_OPCODE_BLOCK = 0x02,
134 WASM_OPCODE_BR = 0x0c,
135 WASM_OPCODE_BR_TABLE = 0x0e,
136 WASM_OPCODE_RETURN = 0x0f,
137 WASM_OPCODE_DROP = 0x1a,
138 WASM_OPCODE_MISC_PREFIX = 0xfc,
139 WASM_OPCODE_MEMORY_INIT = 0x08,
140 WASM_OPCODE_MEMORY_FILL = 0x0b,
141 WASM_OPCODE_DATA_DROP = 0x09,
142 WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
143 WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
144 WASM_OPCODE_I32_ATOMIC_WAIT = 0x01,
145 WASM_OPCODE_I32_ATOMIC_STORE = 0x17,
146 WASM_OPCODE_I32_RMW_CMPXCHG = 0x48,
147};
148
149// Sub-opcodes for catch clauses in a try_table instruction
150enum : unsigned {
151 WASM_OPCODE_CATCH = 0x00,
152 WASM_OPCODE_CATCH_REF = 0x01,
153 WASM_OPCODE_CATCH_ALL = 0x02,
154 WASM_OPCODE_CATCH_ALL_REF = 0x03,
155};
156
157enum : unsigned {
158 WASM_LIMITS_FLAG_NONE = 0x0,
159 WASM_LIMITS_FLAG_HAS_MAX = 0x1,
160 WASM_LIMITS_FLAG_IS_SHARED = 0x2,
161 WASM_LIMITS_FLAG_IS_64 = 0x4,
162 WASM_LIMITS_FLAG_HAS_PAGE_SIZE = 0x8,
163};
164
165enum : unsigned {
166 WASM_DATA_SEGMENT_IS_PASSIVE = 0x01,
167 WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02,
168};
169
170enum : unsigned {
171 WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01,
172 WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1
173 WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
174 WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
175};
176const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3;
177
178// Feature policy prefixes used in the custom "target_features" section
179enum : uint8_t {
180 WASM_FEATURE_PREFIX_USED = '+',
181 WASM_FEATURE_PREFIX_DISALLOWED = '-',
182};
183
184// Kind codes used in the custom "name" section
185enum : unsigned {
186 WASM_NAMES_MODULE = 0,
187 WASM_NAMES_FUNCTION = 1,
188 WASM_NAMES_LOCAL = 2,
189 WASM_NAMES_GLOBAL = 7,
190 WASM_NAMES_DATA_SEGMENT = 9,
191};
192
193// Kind codes used in the custom "linking" section
194enum : unsigned {
195 WASM_SEGMENT_INFO = 0x5,
196 WASM_INIT_FUNCS = 0x6,
197 WASM_COMDAT_INFO = 0x7,
198 WASM_SYMBOL_TABLE = 0x8,
199};
200
201// Kind codes used in the custom "dylink" section
202enum : unsigned {
203 WASM_DYLINK_MEM_INFO = 0x1,
204 WASM_DYLINK_NEEDED = 0x2,
205 WASM_DYLINK_EXPORT_INFO = 0x3,
206 WASM_DYLINK_IMPORT_INFO = 0x4,
207 WASM_DYLINK_RUNTIME_PATH = 0x5,
208};
209
210// Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
211enum : unsigned {
212 WASM_COMDAT_DATA = 0x0,
213 WASM_COMDAT_FUNCTION = 0x1,
214 // GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet.
215 WASM_COMDAT_SECTION = 0x5,
216};
217
218// Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
219enum WasmSymbolType : unsigned {
220 WASM_SYMBOL_TYPE_FUNCTION = 0x0,
221 WASM_SYMBOL_TYPE_DATA = 0x1,
222 WASM_SYMBOL_TYPE_GLOBAL = 0x2,
223 WASM_SYMBOL_TYPE_SECTION = 0x3,
224 WASM_SYMBOL_TYPE_TAG = 0x4,
225 WASM_SYMBOL_TYPE_TABLE = 0x5,
226};
227
228enum WasmSegmentFlag : unsigned {
229 WASM_SEG_FLAG_STRINGS = 0x1,
230 WASM_SEG_FLAG_TLS = 0x2,
231 WASM_SEG_FLAG_RETAIN = 0x4,
232};
233
234// Kinds of tag attributes.
235enum WasmTagAttribute : uint8_t {
236 WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0,
237};
238
239const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
240const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
241
242const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
243const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
244const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
245const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
246const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
247const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
248const unsigned WASM_SYMBOL_EXPORTED = 0x20;
249const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
250const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
251const unsigned WASM_SYMBOL_TLS = 0x100;
252const unsigned WASM_SYMBOL_ABSOLUTE = 0x200;
253
254#define WASM_RELOC(name, value) name = value,
255
256enum : unsigned {
257#include "WasmRelocs.def"
258};
259
260#undef WASM_RELOC
261
262struct WasmObjectHeader {
263 StringRef Magic;
264 uint32_t Version;
265};
266
267// Subset of types that a value can have
268enum class ValType {
269 I32 = WASM_TYPE_I32,
270 I64 = WASM_TYPE_I64,
271 F32 = WASM_TYPE_F32,
272 F64 = WASM_TYPE_F64,
273 V128 = WASM_TYPE_V128,
274 FUNCREF = WASM_TYPE_FUNCREF,
275 EXTERNREF = WASM_TYPE_EXTERNREF,
276 EXNREF = WASM_TYPE_EXNREF,
277 // Unmodeled value types include ref types with heap types other than
278 // func, extern or exn, and type-specialized funcrefs
279 OTHERREF = 0xff,
280};
281
282struct WasmDylinkImportInfo {
283 StringRef Module;
284 StringRef Field;
285 uint32_t Flags;
286};
287
288struct WasmDylinkExportInfo {
289 StringRef Name;
290 uint32_t Flags;
291};
292
293struct WasmDylinkInfo {
294 uint32_t MemorySize; // Memory size in bytes
295 uint32_t MemoryAlignment; // P2 alignment of memory
296 uint32_t TableSize; // Table size in elements
297 uint32_t TableAlignment; // P2 alignment of table
298 std::vector<StringRef> Needed; // Shared library dependencies
299 std::vector<WasmDylinkImportInfo> ImportInfo;
300 std::vector<WasmDylinkExportInfo> ExportInfo;
301 std::vector<StringRef> RuntimePath;
302};
303
304struct WasmProducerInfo {
305 std::vector<std::pair<std::string, std::string>> Languages;
306 std::vector<std::pair<std::string, std::string>> Tools;
307 std::vector<std::pair<std::string, std::string>> SDKs;
308};
309
310struct WasmFeatureEntry {
311 uint8_t Prefix;
312 std::string Name;
313};
314
315struct WasmExport {
316 StringRef Name;
317 uint8_t Kind;
318 uint32_t Index;
319};
320
321struct WasmLimits {
322 uint8_t Flags;
323 uint64_t Minimum;
324 uint64_t Maximum;
325 uint32_t PageSize;
326};
327
328struct WasmTableType {
329 ValType ElemType;
330 WasmLimits Limits;
331};
332
333struct WasmTable {
334 uint32_t Index;
335 WasmTableType Type;
336 StringRef SymbolName; // from the "linking" section
337};
338
339struct WasmInitExprMVP {
340 uint8_t Opcode;
341 union {
342 int32_t Int32;
343 int64_t Int64;
344 uint32_t Float32;
345 uint64_t Float64;
346 uint32_t Global;
347 } Value;
348};
349
350// Extended-const init exprs and exprs with GC types are not explicitly
351// modeled, but the raw body of the expr is attached.
352struct WasmInitExpr {
353 uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
354 // one instruction)
355 WasmInitExprMVP Inst;
356 ArrayRef<uint8_t> Body;
357};
358
359struct WasmGlobalType {
360 uint8_t Type; // TODO: make this a ValType?
361 bool Mutable;
362};
363
364struct WasmGlobal {
365 uint32_t Index;
366 WasmGlobalType Type;
367 WasmInitExpr InitExpr;
368 StringRef SymbolName; // from the "linking" section
369 uint32_t Offset; // Offset of the definition in the binary's Global section
370 uint32_t Size; // Size of the definition in the binary's Global section
371};
372
373struct WasmTag {
374 uint32_t Index;
375 uint32_t SigIndex;
376 StringRef SymbolName; // from the "linking" section
377};
378
379struct WasmImport {
380 StringRef Module;
381 StringRef Field;
382 uint8_t Kind;
383 union {
384 uint32_t SigIndex;
385 WasmGlobalType Global;
386 WasmTableType Table;
387 WasmLimits Memory;
388 };
389};
390
391struct WasmLocalDecl {
392 uint8_t Type;
393 uint32_t Count;
394};
395
396struct WasmFunction {
397 uint32_t Index;
398 uint32_t SigIndex;
399 std::vector<WasmLocalDecl> Locals;
400 ArrayRef<uint8_t> Body;
401 uint32_t CodeSectionOffset;
402 uint32_t Size;
403 uint32_t CodeOffset; // start of Locals and Body
404 std::optional<StringRef> ExportName; // from the "export" section
405 StringRef SymbolName; // from the "linking" section
406 StringRef DebugName; // from the "name" section
407 uint32_t Comdat; // from the "comdat info" section
408};
409
410struct WasmDataSegment {
411 uint32_t InitFlags;
412 // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX.
413 uint32_t MemoryIndex;
414 // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0.
415 WasmInitExpr Offset;
416
417 ArrayRef<uint8_t> Content;
418 StringRef Name; // from the "segment info" section
419 uint32_t Alignment;
420 uint32_t LinkingFlags;
421 uint32_t Comdat; // from the "comdat info" section
422};
423
424// 3 different element segment modes are encodable. This class is currently
425// only used during decoding (see WasmElemSegment below).
426enum class ElemSegmentMode { Active, Passive, Declarative };
427
428// Represents a Wasm element segment, with some limitations compared the spec:
429// 1) Does not model passive or declarative segments (Segment will end up with
430// an Offset field of i32.const 0)
431// 2) Does not model init exprs (Segment will get an empty Functions list)
432// 3) Does not model types other than basic funcref/externref/exnref (see
433// ValType)
434struct WasmElemSegment {
435 uint32_t Flags;
436 uint32_t TableNumber;
437 ValType ElemKind;
438 WasmInitExpr Offset;
439 std::vector<uint32_t> Functions;
440};
441
442// Represents the location of a Wasm data symbol within a WasmDataSegment, as
443// the index of the segment, and the offset and size within the segment.
444struct WasmDataReference {
445 uint32_t Segment;
446 uint64_t Offset;
447 uint64_t Size;
448};
449
450struct WasmRelocation {
451 uint8_t Type; // The type of the relocation.
452 uint32_t Index; // Index into either symbol or type index space.
453 uint64_t Offset; // Offset from the start of the section.
454 int64_t Addend; // A value to add to the symbol.
455};
456
457struct WasmInitFunc {
458 uint32_t Priority;
459 uint32_t Symbol;
460};
461
462struct WasmSymbolInfo {
463 StringRef Name;
464 uint8_t Kind;
465 uint32_t Flags;
466 // For undefined symbols the module of the import
467 std::optional<StringRef> ImportModule;
468 // For undefined symbols the name of the import
469 std::optional<StringRef> ImportName;
470 // For symbols to be exported from the final module
471 std::optional<StringRef> ExportName;
472 union {
473 // For function, table, or global symbols, the index in function, table, or
474 // global index space.
475 uint32_t ElementIndex;
476 // For a data symbols, the address of the data relative to segment.
477 WasmDataReference DataRef;
478 };
479};
480
481enum class NameType {
482 FUNCTION,
483 GLOBAL,
484 DATA_SEGMENT,
485};
486
487struct WasmDebugName {
488 NameType Type;
489 uint32_t Index;
490 StringRef Name;
491};
492
493// Info from the linking metadata section of a wasm object file.
494struct WasmLinkingData {
495 uint32_t Version;
496 std::vector<WasmInitFunc> InitFunctions;
497 std::vector<StringRef> Comdats;
498 // The linking section also contains a symbol table. This info (represented
499 // in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead
500 // of in this structure; this allows vectors of WasmSymbols and
501 // WasmLinkingDatas to be reallocated.
502};
503
504struct WasmSignature {
505 SmallVector<ValType, 1> Returns;
506 SmallVector<ValType, 4> Params;
507 // LLVM can parse types other than functions encoded in the type section,
508 // but does not actually model them. Instead a placeholder signature is
509 // created in the Object's signature list.
510 enum { Function, Tag, Placeholder } Kind = Function;
511 // Support empty and tombstone instances, needed by DenseMap.
512 enum { Plain, Empty, Tombstone } State = Plain;
513
514 WasmSignature(SmallVector<ValType, 1> &&InReturns,
515 SmallVector<ValType, 4> &&InParams)
516 : Returns(InReturns), Params(InParams) {}
517 WasmSignature() = default;
518};
519
520// Useful comparison operators
521inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) {
522 return LHS.State == RHS.State && LHS.Returns == RHS.Returns &&
523 LHS.Params == RHS.Params;
524}
525
526inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) {
527 return !(LHS == RHS);
528}
529
530inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
531 return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable;
532}
533
534inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
535 return !(LHS == RHS);
536}
537
538inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) {
539 return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum &&
540 (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum
541 : true) &&
542 (LHS.Flags & WASM_LIMITS_FLAG_HAS_PAGE_SIZE
543 ? LHS.PageSize == RHS.PageSize
544 : true);
545}
546
547inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
548 return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
549}
550
551LLVM_ABI llvm::StringRef toString(WasmSymbolType type);
552LLVM_ABI llvm::StringRef relocTypetoString(uint32_t type);
553LLVM_ABI llvm::StringRef sectionTypeToString(uint32_t type);
554LLVM_ABI bool relocTypeHasAddend(uint32_t type);
555
556} // end namespace wasm
557} // end namespace llvm
558
559#endif
560

source code of llvm/include/llvm/BinaryFormat/Wasm.h