1 | //! A Rust parser for the [WebAssembly Text format][wat] |
2 | //! |
3 | //! This crate contains a stable interface to the parser for the [WAT][wat] |
4 | //! format of WebAssembly text files. The format parsed by this crate follows |
5 | //! the [online specification][wat]. |
6 | //! |
7 | //! # Examples |
8 | //! |
9 | //! Parse an in-memory string: |
10 | //! |
11 | //! ``` |
12 | //! # fn foo() -> wat::Result<()> { |
13 | //! let wat = r#" |
14 | //! (module |
15 | //! (func $foo) |
16 | //! |
17 | //! (func (export "bar") |
18 | //! call $foo |
19 | //! ) |
20 | //! ) |
21 | //! "# ; |
22 | //! |
23 | //! let binary = wat::parse_str(wat)?; |
24 | //! // ... |
25 | //! # Ok(()) |
26 | //! # } |
27 | //! ``` |
28 | //! |
29 | //! Parse an on-disk file: |
30 | //! |
31 | //! ``` |
32 | //! # fn foo() -> wat::Result<()> { |
33 | //! let binary = wat::parse_file("./foo.wat" )?; |
34 | //! // ... |
35 | //! # Ok(()) |
36 | //! # } |
37 | //! ``` |
38 | //! |
39 | //! ## Evolution of the WAT Format |
40 | //! |
41 | //! WebAssembly, and the WAT format, are an evolving specification. Features are |
42 | //! added to WAT, WAT changes, and sometimes WAT breaks. The policy of this |
43 | //! crate is that it will always follow the [official specification][wat] for |
44 | //! WAT files. |
45 | //! |
46 | //! Future WebAssembly features will be accepted to this parser **and they will |
47 | //! not require a feature gate to opt-in**. All implemented WebAssembly features |
48 | //! will be enabled at all times. Using a future WebAssembly feature in the WAT |
49 | //! format may cause breakage because while specifications are in development |
50 | //! the WAT syntax (and/or binary encoding) will often change. This crate will |
51 | //! do its best to keep up with these proposals, but breaking textual changes |
52 | //! will be published as non-breaking semver changes to this crate. |
53 | //! |
54 | //! ## Stability |
55 | //! |
56 | //! This crate is intended to be a very stable shim over the `wast` crate |
57 | //! which is expected to be much more unstable. The `wast` crate contains |
58 | //! AST data structures for parsing `*.wat` files and they will evolve was the |
59 | //! WAT and WebAssembly specifications evolve over time. |
60 | //! |
61 | //! This crate is currently at version 1.x.y, and it is intended that it will |
62 | //! remain here for quite some time. Breaking changes to the WAT format will be |
63 | //! landed as a non-semver-breaking version change in this crate. This crate |
64 | //! will always follow the [official specification for WAT][wat]. |
65 | //! |
66 | //! [wat]: http://webassembly.github.io/spec/core/text/index.html |
67 | |
68 | #![deny (missing_docs)] |
69 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
70 | |
71 | use std::borrow::Cow; |
72 | use std::fmt; |
73 | use std::path::{Path, PathBuf}; |
74 | use std::str; |
75 | use wast::core::EncodeOptions; |
76 | use wast::lexer::{Lexer, TokenKind}; |
77 | use wast::parser::{self, ParseBuffer}; |
78 | |
79 | #[doc (inline)] |
80 | pub use wast::core::GenerateDwarf; |
81 | |
82 | /// Parses a file on disk as a [WebAssembly Text format][wat] file, or a binary |
83 | /// WebAssembly file |
84 | /// |
85 | /// This function will read the bytes on disk and delegate them to the |
86 | /// [`parse_bytes`] function. For more information on the behavior of parsing |
87 | /// see [`parse_bytes`]. |
88 | /// |
89 | /// # Errors |
90 | /// |
91 | /// For information about errors, see the [`parse_bytes`] documentation. |
92 | /// |
93 | /// # Examples |
94 | /// |
95 | /// ``` |
96 | /// # fn foo() -> wat::Result<()> { |
97 | /// let binary = wat::parse_file("./foo.wat" )?; |
98 | /// // ... |
99 | /// # Ok(()) |
100 | /// # } |
101 | /// ``` |
102 | /// |
103 | /// [wat]: http://webassembly.github.io/spec/core/text/index.html |
104 | pub fn parse_file(file: impl AsRef<Path>) -> Result<Vec<u8>> { |
105 | Parser::new().parse_file(path:file) |
106 | } |
107 | |
108 | /// Parses in-memory bytes as either the [WebAssembly Text format][wat], or a |
109 | /// binary WebAssembly module. |
110 | /// |
111 | /// This function will attempt to interpret the given bytes as one of two |
112 | /// options: |
113 | /// |
114 | /// * A utf-8 string which is a `*.wat` file to be parsed. |
115 | /// * A binary WebAssembly file starting with `b"\0asm"` |
116 | /// |
117 | /// If the input is a string then it will be parsed as `*.wat`, and then after |
118 | /// parsing it will be encoded back into a WebAssembly binary module. If the |
119 | /// input is a binary that starts with `b"\0asm"` it will be returned verbatim. |
120 | /// Everything that doesn't start with `b"\0asm"` will be parsed as a utf-8 |
121 | /// `*.wat` file, returning errors as appropriate. |
122 | /// |
123 | /// For more information about parsing wat files, see [`parse_str`]. |
124 | /// |
125 | /// # Errors |
126 | /// |
127 | /// In addition to all of the errors that can be returned from [`parse_str`], |
128 | /// this function will also return an error if the input does not start with |
129 | /// `b"\0asm"` and is invalid utf-8. (failed to even try to call [`parse_str`]). |
130 | /// |
131 | /// # Examples |
132 | /// |
133 | /// ``` |
134 | /// # fn foo() -> wat::Result<()> { |
135 | /// // Parsing bytes that are actually `*.wat` files |
136 | /// assert_eq!(&*wat::parse_bytes(b"(module)" )?, b" \0asm \x01\0\0\0" ); |
137 | /// assert!(wat::parse_bytes(b"module" ).is_err()); |
138 | /// assert!(wat::parse_bytes(b"binary \0file \0\that \0is \0not \0wat" ).is_err()); |
139 | /// |
140 | /// // Pass through binaries that look like real wasm files |
141 | /// assert_eq!(&*wat::parse_bytes(b" \0asm \x01\0\0\0" )?, b" \0asm \x01\0\0\0" ); |
142 | /// # Ok(()) |
143 | /// # } |
144 | /// ``` |
145 | /// |
146 | /// [wat]: http://webassembly.github.io/spec/core/text/index.html |
147 | pub fn parse_bytes(bytes: &[u8]) -> Result<Cow<'_, [u8]>> { |
148 | Parser::new().parse_bytes(path:None, bytes) |
149 | } |
150 | |
151 | /// Parses an in-memory string as the [WebAssembly Text format][wat], returning |
152 | /// the file as a binary WebAssembly file. |
153 | /// |
154 | /// This function is intended to be a stable convenience function for parsing a |
155 | /// wat file into a WebAssembly binary file. This is a high-level operation |
156 | /// which does not expose any parsing internals, for that you'll want to use the |
157 | /// `wast` crate. |
158 | /// |
159 | /// # Errors |
160 | /// |
161 | /// This function can fail for a number of reasons, including (but not limited |
162 | /// to): |
163 | /// |
164 | /// * The `wat` input may fail to lex, such as having invalid tokens or syntax |
165 | /// * The `wat` input may fail to parse, such as having incorrect syntactical |
166 | /// structure |
167 | /// * The `wat` input may contain names that could not be resolved |
168 | /// |
169 | /// # Examples |
170 | /// |
171 | /// ``` |
172 | /// # fn foo() -> wat::Result<()> { |
173 | /// assert_eq!(wat::parse_str("(module)" )?, b" \0asm \x01\0\0\0" ); |
174 | /// assert!(wat::parse_str("module" ).is_err()); |
175 | /// |
176 | /// let wat = r#" |
177 | /// (module |
178 | /// (func $foo) |
179 | /// |
180 | /// (func (export "bar") |
181 | /// call $foo |
182 | /// ) |
183 | /// ) |
184 | /// "# ; |
185 | /// |
186 | /// let binary = wat::parse_str(wat)?; |
187 | /// // ... |
188 | /// # Ok(()) |
189 | /// # } |
190 | /// ``` |
191 | /// |
192 | /// [wat]: http://webassembly.github.io/spec/core/text/index.html |
193 | pub fn parse_str(wat: impl AsRef<str>) -> Result<Vec<u8>> { |
194 | Parser::default().parse_str(path:None, wat) |
195 | } |
196 | |
197 | /// Parser configuration for transforming bytes into WebAssembly binaries. |
198 | #[derive (Default)] |
199 | pub struct Parser { |
200 | #[cfg (feature = "dwarf" )] |
201 | generate_dwarf: Option<GenerateDwarf>, |
202 | _private: (), |
203 | } |
204 | |
205 | impl Parser { |
206 | /// Creates a new parser with th default settings. |
207 | pub fn new() -> Parser { |
208 | Parser::default() |
209 | } |
210 | |
211 | /// Indicates that DWARF debugging information should be generated and |
212 | /// emitted by default. |
213 | /// |
214 | /// Note that DWARF debugging information is only emitted for textual-based |
215 | /// modules. For example if a WebAssembly binary is parsed via |
216 | /// [`Parser::parse_bytes`] this won't insert new DWARF information in such |
217 | /// a binary. Additionally if the text format used the `(module binary ...)` |
218 | /// form then no DWARF information will be emitted. |
219 | #[cfg (feature = "dwarf" )] |
220 | pub fn generate_dwarf(&mut self, generate: GenerateDwarf) -> &mut Self { |
221 | self.generate_dwarf = Some(generate); |
222 | self |
223 | } |
224 | |
225 | /// Equivalent of [`parse_file`] but uses this parser's settings. |
226 | pub fn parse_file(&self, path: impl AsRef<Path>) -> Result<Vec<u8>> { |
227 | self._parse_file(path.as_ref()) |
228 | } |
229 | |
230 | fn _parse_file(&self, file: &Path) -> Result<Vec<u8>> { |
231 | let contents = std::fs::read(file).map_err(|err| Error { |
232 | kind: Box::new(ErrorKind::Io { |
233 | err, |
234 | file: Some(file.to_owned()), |
235 | }), |
236 | })?; |
237 | match self.parse_bytes(Some(file), &contents) { |
238 | // If the result here is borrowed then that means that the input |
239 | // `&contents` was itself already a wasm module. We've already got |
240 | // an owned copy of that so return `contents` directly after |
241 | // double-checking it is indeed the same as the `bytes` return value |
242 | // here. That helps avoid a copy of `bytes` via something like |
243 | // `Cow::to_owned` which would otherwise copy the bytes. |
244 | Ok(Cow::Borrowed(bytes)) => { |
245 | assert_eq!(bytes.len(), contents.len()); |
246 | assert_eq!(bytes.as_ptr(), contents.as_ptr()); |
247 | Ok(contents) |
248 | } |
249 | Ok(Cow::Owned(bytes)) => Ok(bytes), |
250 | Err(mut e) => { |
251 | e.set_path(file); |
252 | Err(e) |
253 | } |
254 | } |
255 | } |
256 | |
257 | /// Equivalent of [`parse_bytes`] but uses this parser's settings. |
258 | /// |
259 | /// The `path` argument is an optional path to use when error messages are |
260 | /// generated. |
261 | pub fn parse_bytes<'a>(&self, path: Option<&Path>, bytes: &'a [u8]) -> Result<Cow<'a, [u8]>> { |
262 | if bytes.starts_with(b" \0asm" ) { |
263 | return Ok(bytes.into()); |
264 | } |
265 | match str::from_utf8(bytes) { |
266 | Ok(s) => self._parse_str(path, s).map(|s| s.into()), |
267 | Err(_) => Err(Error { |
268 | kind: Box::new(ErrorKind::Custom { |
269 | msg: "input bytes aren't valid utf-8" .to_string(), |
270 | file: path.map(|p| p.to_owned()), |
271 | }), |
272 | }), |
273 | } |
274 | } |
275 | |
276 | /// Equivalent of [`parse_str`] but uses this parser's settings. |
277 | /// |
278 | /// The `path` argument is an optional path to use when error messages are |
279 | /// generated. |
280 | pub fn parse_str(&self, path: Option<&Path>, wat: impl AsRef<str>) -> Result<Vec<u8>> { |
281 | self._parse_str(path, wat.as_ref()) |
282 | } |
283 | |
284 | fn _parse_str(&self, path: Option<&Path>, wat: &str) -> Result<Vec<u8>> { |
285 | let mut _buf = ParseBuffer::new(wat).map_err(|e| Error::cvt(e, wat, path))?; |
286 | #[cfg (feature = "dwarf" )] |
287 | _buf.track_instr_spans(self.generate_dwarf.is_some()); |
288 | let mut ast = parser::parse::<wast::Wat>(&_buf).map_err(|e| Error::cvt(e, wat, path))?; |
289 | |
290 | let mut _opts = EncodeOptions::default(); |
291 | #[cfg (feature = "dwarf" )] |
292 | if let Some(style) = self.generate_dwarf { |
293 | _opts.dwarf(path.unwrap_or("<input>.wat" .as_ref()), wat, style); |
294 | } |
295 | _opts |
296 | .encode_wat(&mut ast) |
297 | .map_err(|e| Error::cvt(e, wat, path)) |
298 | } |
299 | } |
300 | |
301 | /// Result of [`Detect::from_bytes`] to indicate what some input bytes look |
302 | /// like. |
303 | #[derive (Debug, PartialEq, Eq, Clone, Copy)] |
304 | pub enum Detect { |
305 | /// The input bytes look like the WebAssembly text format. |
306 | WasmText, |
307 | /// The input bytes look like the WebAssembly binary format. |
308 | WasmBinary, |
309 | /// The input bytes don't look like WebAssembly at all. |
310 | Unknown, |
311 | } |
312 | |
313 | impl Detect { |
314 | /// Detect quickly if supplied bytes represent a Wasm module, |
315 | /// whether binary encoded or in WAT-encoded. |
316 | /// |
317 | /// This briefly lexes past whitespace and comments as a `*.wat` file to see if |
318 | /// we can find a left-paren. If that fails then it's probably `*.wit` instead. |
319 | /// |
320 | /// |
321 | /// Examples |
322 | /// ``` |
323 | /// use wat::Detect; |
324 | /// |
325 | /// assert_eq!(Detect::from_bytes(r#" |
326 | /// (module |
327 | /// (type (;0;) (func)) |
328 | /// (func (;0;) (type 0) |
329 | /// nop |
330 | /// ) |
331 | /// ) |
332 | /// "# ), Detect::WasmText); |
333 | /// ``` |
334 | pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Detect { |
335 | if bytes.as_ref().starts_with(b" \0asm" ) { |
336 | return Detect::WasmBinary; |
337 | } |
338 | let text = match std::str::from_utf8(bytes.as_ref()) { |
339 | Ok(s) => s, |
340 | Err(_) => return Detect::Unknown, |
341 | }; |
342 | |
343 | let lexer = Lexer::new(text); |
344 | let mut iter = lexer.iter(0); |
345 | |
346 | while let Some(next) = iter.next() { |
347 | match next.map(|t| t.kind) { |
348 | Ok(TokenKind::Whitespace) |
349 | | Ok(TokenKind::BlockComment) |
350 | | Ok(TokenKind::LineComment) => {} |
351 | Ok(TokenKind::LParen) => return Detect::WasmText, |
352 | _ => break, |
353 | } |
354 | } |
355 | |
356 | Detect::Unknown |
357 | } |
358 | |
359 | /// Returns whether this is either binary or textual wasm. |
360 | pub fn is_wasm(&self) -> bool { |
361 | match self { |
362 | Detect::WasmText | Detect::WasmBinary => true, |
363 | Detect::Unknown => false, |
364 | } |
365 | } |
366 | } |
367 | |
368 | /// A convenience type definition for `Result` where the error is [`Error`] |
369 | pub type Result<T> = std::result::Result<T, Error>; |
370 | |
371 | /// Errors from this crate related to parsing WAT files |
372 | /// |
373 | /// An error can during example phases like: |
374 | /// |
375 | /// * Lexing can fail if the document is syntactically invalid. |
376 | /// * A string may not be utf-8 |
377 | /// * The syntactical structure of the wat file may be invalid |
378 | /// * The wat file may be semantically invalid such as having name resolution |
379 | /// failures |
380 | #[derive (Debug)] |
381 | pub struct Error { |
382 | kind: Box<ErrorKind>, |
383 | } |
384 | |
385 | #[derive (Debug)] |
386 | enum ErrorKind { |
387 | Wast(wast::Error), |
388 | Io { |
389 | err: std::io::Error, |
390 | file: Option<PathBuf>, |
391 | }, |
392 | Custom { |
393 | msg: String, |
394 | file: Option<PathBuf>, |
395 | }, |
396 | } |
397 | |
398 | impl Error { |
399 | fn cvt<E: Into<wast::Error>>(e: E, contents: &str, path: Option<&Path>) -> Error { |
400 | let mut err = e.into(); |
401 | if let Some(path) = path { |
402 | err.set_path(path); |
403 | } |
404 | err.set_text(contents); |
405 | Error { |
406 | kind: Box::new(ErrorKind::Wast(err)), |
407 | } |
408 | } |
409 | |
410 | /// To provide a more useful error this function can be used to set |
411 | /// the file name that this error is associated with. |
412 | /// |
413 | /// The `file` here will be stored in this error and later rendered in the |
414 | /// `Display` implementation. |
415 | pub fn set_path<P: AsRef<Path>>(&mut self, file: P) { |
416 | let file = file.as_ref(); |
417 | match &mut *self.kind { |
418 | ErrorKind::Wast(e) => e.set_path(file), |
419 | ErrorKind::Custom { file: f, .. } => *f = Some(file.to_owned()), |
420 | ErrorKind::Io { file: f, .. } => *f = Some(file.to_owned()), |
421 | } |
422 | } |
423 | } |
424 | |
425 | impl fmt::Display for Error { |
426 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
427 | match &*self.kind { |
428 | ErrorKind::Wast(err: &Error) => err.fmt(f), |
429 | ErrorKind::Custom { msg: &String, file: &Option, .. } => match file { |
430 | Some(file: &PathBuf) => { |
431 | write!(f, "failed to parse ` {}`: {}" , file.display(), msg) |
432 | } |
433 | None => msg.fmt(f), |
434 | }, |
435 | ErrorKind::Io { err: &Error, file: &Option, .. } => match file { |
436 | Some(file: &PathBuf) => { |
437 | write!(f, "failed to read from ` {}`" , file.display()) |
438 | } |
439 | None => err.fmt(f), |
440 | }, |
441 | } |
442 | } |
443 | } |
444 | |
445 | impl std::error::Error for Error { |
446 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
447 | match &*self.kind { |
448 | ErrorKind::Wast(_) => None, |
449 | ErrorKind::Custom { .. } => None, |
450 | ErrorKind::Io { err: &Error, .. } => Some(err), |
451 | } |
452 | } |
453 | } |
454 | |
455 | #[cfg (test)] |
456 | mod test { |
457 | use super::*; |
458 | |
459 | #[test ] |
460 | fn test_set_path() { |
461 | let mut e = parse_bytes(&[0xFF]).unwrap_err(); |
462 | e.set_path("foo" ); |
463 | assert_eq!( |
464 | e.to_string(), |
465 | "failed to parse `foo`: input bytes aren't valid utf-8" |
466 | ); |
467 | |
468 | let e = parse_file("_does_not_exist_" ).unwrap_err(); |
469 | assert!(e |
470 | .to_string() |
471 | .starts_with("failed to read from `_does_not_exist_`" )); |
472 | |
473 | let mut e = parse_bytes("()" .as_bytes()).unwrap_err(); |
474 | e.set_path("foo" ); |
475 | assert_eq!( |
476 | e.to_string(), |
477 | "expected valid module field \n --> foo:1:2 \n | \n 1 | () \n | ^" |
478 | ); |
479 | } |
480 | } |
481 | |