1 | /* |
2 | * Copyright 2015 WebAssembly Community Group participants |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // |
18 | // Parses WebAssembly code in S-Expression format, as in .wast files |
19 | // such as are in the spec test suite. |
20 | // |
21 | |
22 | #ifndef wasm_wasm_s_parser_h |
23 | #define wasm_wasm_s_parser_h |
24 | |
25 | #include "mixed_arena.h" |
26 | #include "parsing.h" // for UniqueNameMapper. TODO: move dependency to cpp file? |
27 | #include "wasm-builder.h" |
28 | #include "wasm.h" |
29 | |
30 | namespace wasm { |
31 | |
32 | class SourceLocation { |
33 | public: |
34 | IString filename; |
35 | uint32_t line; |
36 | uint32_t column; |
37 | SourceLocation(IString filename_, uint32_t line_, uint32_t column_ = 0) |
38 | : filename(filename_), line(line_), column(column_) {} |
39 | }; |
40 | |
41 | // |
42 | // An element in an S-Expression: a list or a string |
43 | // |
44 | class Element { |
45 | using List = ArenaVector<Element*>; |
46 | |
47 | bool isList_ = true; |
48 | List list_; |
49 | IString str_; |
50 | bool dollared_; |
51 | bool quoted_; |
52 | |
53 | public: |
54 | Element(MixedArena& allocator) : list_(allocator) {} |
55 | |
56 | bool isList() const { return isList_; } |
57 | bool isStr() const { return !isList_; } |
58 | bool dollared() const { return isStr() && dollared_; } |
59 | bool quoted() const { return isStr() && quoted_; } |
60 | |
61 | size_t line = -1; |
62 | size_t col = -1; |
63 | // original locations at the start/end of the S-Expression list |
64 | SourceLocation* startLoc = nullptr; |
65 | SourceLocation* endLoc = nullptr; |
66 | |
67 | // list methods |
68 | List& list(); |
69 | Element* operator[](unsigned i); |
70 | size_t size() { return list().size(); } |
71 | List::Iterator begin() { return list().begin(); } |
72 | List::Iterator end() { return list().end(); } |
73 | |
74 | // string methods |
75 | IString str() const; |
76 | std::string toString() const; |
77 | Element* setString(IString str__, bool dollared__, bool quoted__); |
78 | Element* setMetadata(size_t line_, size_t col_, SourceLocation* startLoc_); |
79 | |
80 | // comparisons |
81 | bool operator==(Name name) { return isStr() && str() == name; } |
82 | |
83 | template<typename T> bool operator!=(T t) { return !(*this == t); } |
84 | |
85 | // printing |
86 | friend std::ostream& operator<<(std::ostream& o, Element& e); |
87 | void dump(); |
88 | }; |
89 | |
90 | // |
91 | // Generic S-Expression parsing into lists |
92 | // |
93 | class SExpressionParser { |
94 | const char* input; |
95 | size_t line; |
96 | char const* lineStart; |
97 | SourceLocation* loc = nullptr; |
98 | |
99 | MixedArena allocator; |
100 | |
101 | public: |
102 | // Assumes control of and modifies the input. |
103 | SExpressionParser(const char* input); |
104 | Element* root; |
105 | |
106 | private: |
107 | Element* parse(); |
108 | void skipWhitespace(); |
109 | void parseDebugLocation(); |
110 | Element* parseString(); |
111 | }; |
112 | |
113 | // |
114 | // SExpressions => WebAssembly module |
115 | // |
116 | class SExpressionWasmBuilder { |
117 | Module& wasm; |
118 | MixedArena& allocator; |
119 | IRProfile profile; |
120 | |
121 | // The main list of types declared in the module |
122 | std::vector<HeapType> types; |
123 | std::unordered_map<std::string, size_t> typeIndices; |
124 | |
125 | std::vector<Name> functionNames; |
126 | std::vector<Name> tableNames; |
127 | std::vector<Name> elemSegmentNames; |
128 | std::vector<Name> memoryNames; |
129 | std::vector<Name> dataSegmentNames; |
130 | std::vector<Name> globalNames; |
131 | std::vector<Name> tagNames; |
132 | int functionCounter = 0; |
133 | int globalCounter = 0; |
134 | int tagCounter = 0; |
135 | int tableCounter = 0; |
136 | int elemCounter = 0; |
137 | int memoryCounter = 0; |
138 | int dataCounter = 0; |
139 | // we need to know function return types before we parse their contents |
140 | std::map<Name, HeapType> functionTypes; |
141 | std::unordered_map<IString, Index> debugInfoFileIndices; |
142 | |
143 | // Maps type indexes to a mapping of field index => name. This is not the same |
144 | // as the field names stored on the wasm object, as that maps types after |
145 | // their canonicalization. Canonicalization loses information, which means |
146 | // that structurally identical types cannot have different names. However, |
147 | // while parsing the text format we keep this mapping of type indexes to names |
148 | // which does allow reading such content. |
149 | std::unordered_map<size_t, std::unordered_map<Index, Name>> fieldNames; |
150 | |
151 | public: |
152 | // Assumes control of and modifies the input. |
153 | SExpressionWasmBuilder(Module& wasm, Element& module, IRProfile profile); |
154 | |
155 | private: |
156 | void preParseHeapTypes(Element& module); |
157 | // pre-parse types and function definitions, so we know function return types |
158 | // before parsing their contents |
159 | void preParseFunctionType(Element& s); |
160 | bool isImport(Element& curr); |
161 | void preParseImports(Element& curr); |
162 | void preParseMemory(Element& curr); |
163 | void parseModuleElement(Element& curr); |
164 | |
165 | // function parsing state |
166 | std::unique_ptr<Function> currFunction; |
167 | bool brokeToAutoBlock; |
168 | |
169 | UniqueNameMapper nameMapper; |
170 | |
171 | int parseIndex(Element& s); |
172 | |
173 | Name getFunctionName(Element& s); |
174 | Name getTableName(Element& s); |
175 | Name getElemSegmentName(Element& s); |
176 | Name getMemoryName(Element& s); |
177 | Name getDataSegmentName(Element& s); |
178 | Name getGlobalName(Element& s); |
179 | Name getTagName(Element& s); |
180 | void parseStart(Element& s) { wasm.addStart(s: getFunctionName(s&: *s[1])); } |
181 | |
182 | Name getMemoryNameAtIdx(Index i); |
183 | bool isMemory64(Name memoryName); |
184 | bool hasMemoryIdx(Element& s, Index defaultSize, Index i); |
185 | |
186 | // returns the next index in s |
187 | size_t parseFunctionNames(Element& s, Name& name, Name& exportName); |
188 | void parseFunction(Element& s, bool preParseImport = false); |
189 | |
190 | Type stringToType(IString str, bool allowError = false, bool prefix = false) { |
191 | return stringToType(str: str.str, allowError, prefix); |
192 | } |
193 | Type stringToType(std::string_view str, |
194 | bool allowError = false, |
195 | bool prefix = false); |
196 | HeapType stringToHeapType(IString str, bool prefix = false) { |
197 | return stringToHeapType(str: str.str, prefix); |
198 | } |
199 | HeapType stringToHeapType(std::string_view str, bool prefix = false); |
200 | Type elementToType(Element& s); |
201 | // TODO: Use std::string_view for this and similar functions. |
202 | Type stringToLaneType(const char* str); |
203 | bool isType(IString str) { return stringToType(str, allowError: true) != Type::none; } |
204 | HeapType getFunctionType(Name name, Element& s); |
205 | |
206 | public: |
207 | Expression* parseExpression(Element* s) { return parseExpression(s&: *s); } |
208 | Expression* parseExpression(Element& s); |
209 | |
210 | Module& getModule() { return wasm; } |
211 | |
212 | private: |
213 | Expression* makeExpression(Element& s); |
214 | Expression* makeUnreachable(); |
215 | Expression* makeNop(); |
216 | Expression* makeBinary(Element& s, BinaryOp op); |
217 | Expression* makeUnary(Element& s, UnaryOp op); |
218 | Expression* makeSelect(Element& s); |
219 | Expression* makeDrop(Element& s); |
220 | Expression* makeMemorySize(Element& s); |
221 | Expression* makeMemoryGrow(Element& s); |
222 | Index getLocalIndex(Element& s); |
223 | Expression* makeLocalGet(Element& s); |
224 | Expression* makeLocalTee(Element& s); |
225 | Expression* makeLocalSet(Element& s); |
226 | Expression* makeGlobalGet(Element& s); |
227 | Expression* makeGlobalSet(Element& s); |
228 | Expression* makeBlock(Element& s); |
229 | Expression* makeThenOrElse(Element& s); |
230 | Expression* makeConst(Element& s, Type type); |
231 | Expression* |
232 | makeLoad(Element& s, Type type, bool signed_, int bytes, bool isAtomic); |
233 | Expression* makeStore(Element& s, Type type, int bytes, bool isAtomic); |
234 | Expression* |
235 | makeAtomicRMW(Element& s, AtomicRMWOp op, Type type, uint8_t bytes); |
236 | Expression* makeAtomicCmpxchg(Element& s, Type type, uint8_t bytes); |
237 | Expression* makeAtomicWait(Element& s, Type type); |
238 | Expression* makeAtomicNotify(Element& s); |
239 | Expression* makeAtomicFence(Element& s); |
240 | Expression* (Element& s, SIMDExtractOp op, size_t lanes); |
241 | Expression* makeSIMDReplace(Element& s, SIMDReplaceOp op, size_t lanes); |
242 | Expression* makeSIMDShuffle(Element& s); |
243 | Expression* makeSIMDTernary(Element& s, SIMDTernaryOp op); |
244 | Expression* makeSIMDShift(Element& s, SIMDShiftOp op); |
245 | Expression* makeSIMDLoad(Element& s, SIMDLoadOp op, int bytes); |
246 | Expression* |
247 | makeSIMDLoadStoreLane(Element& s, SIMDLoadStoreLaneOp op, int bytes); |
248 | Expression* makeMemoryInit(Element& s); |
249 | Expression* makeDataDrop(Element& s); |
250 | Expression* makeMemoryCopy(Element& s); |
251 | Expression* makeMemoryFill(Element& s); |
252 | Expression* makePop(Element& s); |
253 | Expression* makeIf(Element& s); |
254 | Expression* makeMaybeBlock(Element& s, size_t i, Type type); |
255 | Expression* makeLoop(Element& s); |
256 | Expression* makeCall(Element& s, bool isReturn); |
257 | Expression* makeCallIndirect(Element& s, bool isReturn); |
258 | template<class T> void parseOperands(Element& s, Index i, Index j, T& list) { |
259 | while (i < j) { |
260 | list.push_back(parseExpression(s: s[i])); |
261 | i++; |
262 | } |
263 | } |
264 | template<class T> |
265 | void parseCallOperands(Element& s, Index i, Index j, T* call) { |
266 | parseOperands(s, i, j, call->operands); |
267 | } |
268 | enum class LabelType { Break, Exception }; |
269 | Name getLabel(Element& s, LabelType labelType = LabelType::Break); |
270 | Expression* makeBreak(Element& s); |
271 | Expression* makeBreakTable(Element& s); |
272 | Expression* makeReturn(Element& s); |
273 | Expression* makeRefNull(Element& s); |
274 | Expression* makeRefIsNull(Element& s); |
275 | Expression* makeRefFunc(Element& s); |
276 | Expression* makeRefEq(Element& s); |
277 | Expression* makeTableGet(Element& s); |
278 | Expression* makeTableSet(Element& s); |
279 | Expression* makeTableSize(Element& s); |
280 | Expression* makeTableGrow(Element& s); |
281 | Expression* makeTry(Element& s); |
282 | Expression* makeTryOrCatchBody(Element& s, Type type, bool isTry); |
283 | Expression* makeThrow(Element& s); |
284 | Expression* makeRethrow(Element& s); |
285 | Expression* makeTupleMake(Element& s); |
286 | Expression* (Element& s); |
287 | Expression* makeCallRef(Element& s, bool isReturn); |
288 | Expression* makeI31New(Element& s); |
289 | Expression* makeI31Get(Element& s, bool signed_); |
290 | Expression* makeRefTest(Element& s, |
291 | std::optional<Type> castType = std::nullopt); |
292 | Expression* makeRefCast(Element& s, |
293 | std::optional<Type> castType = std::nullopt); |
294 | Expression* makeRefCastNop(Element& s); |
295 | Expression* makeBrOnNull(Element& s, bool onFail = false); |
296 | Expression* |
297 | makeBrOnCast(Element& s, std::optional<Type> castType, bool onFail = false); |
298 | Expression* makeStructNew(Element& s, bool default_); |
299 | Index getStructIndex(Element& type, Element& field); |
300 | Expression* makeStructGet(Element& s, bool signed_ = false); |
301 | Expression* makeStructSet(Element& s); |
302 | Expression* makeArrayNew(Element& s, bool default_); |
303 | Expression* makeArrayNewData(Element& s); |
304 | Expression* makeArrayNewElem(Element& s); |
305 | Expression* makeArrayNewFixed(Element& s); |
306 | Expression* makeArrayGet(Element& s, bool signed_ = false); |
307 | Expression* makeArraySet(Element& s); |
308 | Expression* makeArrayLen(Element& s); |
309 | Expression* makeArrayCopy(Element& s); |
310 | Expression* makeArrayFill(Element& s); |
311 | Expression* makeArrayInitData(Element& s); |
312 | Expression* makeArrayInitElem(Element& s); |
313 | Expression* makeRefAs(Element& s, RefAsOp op); |
314 | Expression* makeRefAsNonNull(Element& s); |
315 | Expression* makeStringNew(Element& s, StringNewOp op, bool try_); |
316 | Expression* makeStringConst(Element& s); |
317 | Expression* makeStringMeasure(Element& s, StringMeasureOp op); |
318 | Expression* makeStringEncode(Element& s, StringEncodeOp op); |
319 | Expression* makeStringConcat(Element& s); |
320 | Expression* makeStringEq(Element& s, StringEqOp op); |
321 | Expression* makeStringAs(Element& s, StringAsOp op); |
322 | Expression* makeStringWTF8Advance(Element& s); |
323 | Expression* makeStringWTF16Get(Element& s); |
324 | Expression* makeStringIterNext(Element& s); |
325 | Expression* makeStringIterMove(Element& s, StringIterMoveOp op); |
326 | Expression* makeStringSliceWTF(Element& s, StringSliceWTFOp op); |
327 | Expression* makeStringSliceIter(Element& s); |
328 | |
329 | // Helper functions |
330 | Type parseOptionalResultType(Element& s, Index& i); |
331 | Index parseMemoryLimits(Element& s, Index i, std::unique_ptr<Memory>& memory); |
332 | Index parseMemoryIndex(Element& s, Index i, std::unique_ptr<Memory>& memory); |
333 | Index parseMemoryForInstruction(const std::string& instrName, |
334 | Memory& memory, |
335 | Element& s, |
336 | Index i); |
337 | std::vector<Type> parseParamOrLocal(Element& s); |
338 | std::vector<NameType> parseParamOrLocal(Element& s, size_t& localIndex); |
339 | std::vector<Type> parseResults(Element& s); |
340 | HeapType parseTypeRef(Element& s); |
341 | size_t parseTypeUse(Element& s, |
342 | size_t startPos, |
343 | HeapType& functionType, |
344 | std::vector<NameType>& namedParams); |
345 | size_t parseTypeUse(Element& s, size_t startPos, HeapType& functionType); |
346 | |
347 | void |
348 | stringToBinary(Element& s, std::string_view str, std::vector<char>& data); |
349 | void parseMemory(Element& s, bool preParseImport = false); |
350 | void parseData(Element& s); |
351 | void parseInnerData(Element& s, Index i, std::unique_ptr<DataSegment>& seg); |
352 | void parseExport(Element& s); |
353 | void parseImport(Element& s); |
354 | void parseGlobal(Element& s, bool preParseImport = false); |
355 | void parseTable(Element& s, bool preParseImport = false); |
356 | void parseElem(Element& s, Table* table = nullptr); |
357 | ElementSegment* parseElemFinish(Element& s, |
358 | std::unique_ptr<ElementSegment>& segment, |
359 | Index i = 1, |
360 | bool usesExpressions = false); |
361 | |
362 | // Parses something like (func ..), (array ..), (struct) |
363 | HeapType parseHeapType(Element& s); |
364 | |
365 | void parseTag(Element& s, bool preParseImport = false); |
366 | |
367 | Function::DebugLocation getDebugLocation(const SourceLocation& loc); |
368 | |
369 | // Struct/Array instructions have an unnecessary heap type that is just for |
370 | // validation (except for the case of unreachability, but that's not a problem |
371 | // anyhow, we can ignore it there). That is, we also have a reference typed |
372 | // child from which we can infer the type anyhow, and we just need to check |
373 | // that type is the same. |
374 | void |
375 | validateHeapTypeUsingChild(Expression* child, HeapType heapType, Element& s); |
376 | }; |
377 | |
378 | } // namespace wasm |
379 | |
380 | #endif // wasm_wasm_s_parser_h |
381 | |