| 1 | //! A Rust parser for the [WebAssembly Text format][wat] |
| 2 | //! |
| 3 | //! This crate contains a stable interface to the parser for the [WAT][wat] |
| 4 | //! format of WebAssembly text files. The format parsed by this crate follows |
| 5 | //! the [online specification][wat]. |
| 6 | //! |
| 7 | //! # Examples |
| 8 | //! |
| 9 | //! Parse an in-memory string: |
| 10 | //! |
| 11 | //! ``` |
| 12 | //! # fn foo() -> wat::Result<()> { |
| 13 | //! let wat = r#" |
| 14 | //! (module |
| 15 | //! (func $foo) |
| 16 | //! |
| 17 | //! (func (export "bar") |
| 18 | //! call $foo |
| 19 | //! ) |
| 20 | //! ) |
| 21 | //! "# ; |
| 22 | //! |
| 23 | //! let binary = wat::parse_str(wat)?; |
| 24 | //! // ... |
| 25 | //! # Ok(()) |
| 26 | //! # } |
| 27 | //! ``` |
| 28 | //! |
| 29 | //! Parse an on-disk file: |
| 30 | //! |
| 31 | //! ``` |
| 32 | //! # fn foo() -> wat::Result<()> { |
| 33 | //! let binary = wat::parse_file("./foo.wat" )?; |
| 34 | //! // ... |
| 35 | //! # Ok(()) |
| 36 | //! # } |
| 37 | //! ``` |
| 38 | //! |
| 39 | //! ## Evolution of the WAT Format |
| 40 | //! |
| 41 | //! WebAssembly, and the WAT format, are an evolving specification. Features are |
| 42 | //! added to WAT, WAT changes, and sometimes WAT breaks. The policy of this |
| 43 | //! crate is that it will always follow the [official specification][wat] for |
| 44 | //! WAT files. |
| 45 | //! |
| 46 | //! Future WebAssembly features will be accepted to this parser **and they will |
| 47 | //! not require a feature gate to opt-in**. All implemented WebAssembly features |
| 48 | //! will be enabled at all times. Using a future WebAssembly feature in the WAT |
| 49 | //! format may cause breakage because while specifications are in development |
| 50 | //! the WAT syntax (and/or binary encoding) will often change. This crate will |
| 51 | //! do its best to keep up with these proposals, but breaking textual changes |
| 52 | //! will be published as non-breaking semver changes to this crate. |
| 53 | //! |
| 54 | //! ## Stability |
| 55 | //! |
| 56 | //! This crate is intended to be a very stable shim over the `wast` crate |
| 57 | //! which is expected to be much more unstable. The `wast` crate contains |
| 58 | //! AST data structures for parsing `*.wat` files and they will evolve was the |
| 59 | //! WAT and WebAssembly specifications evolve over time. |
| 60 | //! |
| 61 | //! This crate is currently at version 1.x.y, and it is intended that it will |
| 62 | //! remain here for quite some time. Breaking changes to the WAT format will be |
| 63 | //! landed as a non-semver-breaking version change in this crate. This crate |
| 64 | //! will always follow the [official specification for WAT][wat]. |
| 65 | //! |
| 66 | //! [wat]: http://webassembly.github.io/spec/core/text/index.html |
| 67 | |
| 68 | #![deny (missing_docs)] |
| 69 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
| 70 | |
| 71 | use std::borrow::Cow; |
| 72 | use std::fmt; |
| 73 | use std::path::{Path, PathBuf}; |
| 74 | use std::str; |
| 75 | use wast::core::EncodeOptions; |
| 76 | use wast::lexer::{Lexer, TokenKind}; |
| 77 | use wast::parser::{self, ParseBuffer}; |
| 78 | |
| 79 | #[doc (inline)] |
| 80 | pub use wast::core::GenerateDwarf; |
| 81 | |
| 82 | /// Parses a file on disk as a [WebAssembly Text format][wat] file, or a binary |
| 83 | /// WebAssembly file |
| 84 | /// |
| 85 | /// This function will read the bytes on disk and delegate them to the |
| 86 | /// [`parse_bytes`] function. For more information on the behavior of parsing |
| 87 | /// see [`parse_bytes`]. |
| 88 | /// |
| 89 | /// # Errors |
| 90 | /// |
| 91 | /// For information about errors, see the [`parse_bytes`] documentation. |
| 92 | /// |
| 93 | /// # Examples |
| 94 | /// |
| 95 | /// ``` |
| 96 | /// # fn foo() -> wat::Result<()> { |
| 97 | /// let binary = wat::parse_file("./foo.wat" )?; |
| 98 | /// // ... |
| 99 | /// # Ok(()) |
| 100 | /// # } |
| 101 | /// ``` |
| 102 | /// |
| 103 | /// [wat]: http://webassembly.github.io/spec/core/text/index.html |
| 104 | pub fn parse_file(file: impl AsRef<Path>) -> Result<Vec<u8>> { |
| 105 | Parser::new().parse_file(path:file) |
| 106 | } |
| 107 | |
| 108 | /// Parses in-memory bytes as either the [WebAssembly Text format][wat], or a |
| 109 | /// binary WebAssembly module. |
| 110 | /// |
| 111 | /// This function will attempt to interpret the given bytes as one of two |
| 112 | /// options: |
| 113 | /// |
| 114 | /// * A utf-8 string which is a `*.wat` file to be parsed. |
| 115 | /// * A binary WebAssembly file starting with `b"\0asm"` |
| 116 | /// |
| 117 | /// If the input is a string then it will be parsed as `*.wat`, and then after |
| 118 | /// parsing it will be encoded back into a WebAssembly binary module. If the |
| 119 | /// input is a binary that starts with `b"\0asm"` it will be returned verbatim. |
| 120 | /// Everything that doesn't start with `b"\0asm"` will be parsed as a utf-8 |
| 121 | /// `*.wat` file, returning errors as appropriate. |
| 122 | /// |
| 123 | /// For more information about parsing wat files, see [`parse_str`]. |
| 124 | /// |
| 125 | /// # Errors |
| 126 | /// |
| 127 | /// In addition to all of the errors that can be returned from [`parse_str`], |
| 128 | /// this function will also return an error if the input does not start with |
| 129 | /// `b"\0asm"` and is invalid utf-8. (failed to even try to call [`parse_str`]). |
| 130 | /// |
| 131 | /// # Examples |
| 132 | /// |
| 133 | /// ``` |
| 134 | /// # fn foo() -> wat::Result<()> { |
| 135 | /// // Parsing bytes that are actually `*.wat` files |
| 136 | /// assert_eq!(&*wat::parse_bytes(b"(module)" )?, b" \0asm \x01\0\0\0" ); |
| 137 | /// assert!(wat::parse_bytes(b"module" ).is_err()); |
| 138 | /// assert!(wat::parse_bytes(b"binary \0file \0\that \0is \0not \0wat" ).is_err()); |
| 139 | /// |
| 140 | /// // Pass through binaries that look like real wasm files |
| 141 | /// assert_eq!(&*wat::parse_bytes(b" \0asm \x01\0\0\0" )?, b" \0asm \x01\0\0\0" ); |
| 142 | /// # Ok(()) |
| 143 | /// # } |
| 144 | /// ``` |
| 145 | /// |
| 146 | /// [wat]: http://webassembly.github.io/spec/core/text/index.html |
| 147 | pub fn parse_bytes(bytes: &[u8]) -> Result<Cow<'_, [u8]>> { |
| 148 | Parser::new().parse_bytes(path:None, bytes) |
| 149 | } |
| 150 | |
| 151 | /// Parses an in-memory string as the [WebAssembly Text format][wat], returning |
| 152 | /// the file as a binary WebAssembly file. |
| 153 | /// |
| 154 | /// This function is intended to be a stable convenience function for parsing a |
| 155 | /// wat file into a WebAssembly binary file. This is a high-level operation |
| 156 | /// which does not expose any parsing internals, for that you'll want to use the |
| 157 | /// `wast` crate. |
| 158 | /// |
| 159 | /// # Errors |
| 160 | /// |
| 161 | /// This function can fail for a number of reasons, including (but not limited |
| 162 | /// to): |
| 163 | /// |
| 164 | /// * The `wat` input may fail to lex, such as having invalid tokens or syntax |
| 165 | /// * The `wat` input may fail to parse, such as having incorrect syntactical |
| 166 | /// structure |
| 167 | /// * The `wat` input may contain names that could not be resolved |
| 168 | /// |
| 169 | /// # Examples |
| 170 | /// |
| 171 | /// ``` |
| 172 | /// # fn foo() -> wat::Result<()> { |
| 173 | /// assert_eq!(wat::parse_str("(module)" )?, b" \0asm \x01\0\0\0" ); |
| 174 | /// assert!(wat::parse_str("module" ).is_err()); |
| 175 | /// |
| 176 | /// let wat = r#" |
| 177 | /// (module |
| 178 | /// (func $foo) |
| 179 | /// |
| 180 | /// (func (export "bar") |
| 181 | /// call $foo |
| 182 | /// ) |
| 183 | /// ) |
| 184 | /// "# ; |
| 185 | /// |
| 186 | /// let binary = wat::parse_str(wat)?; |
| 187 | /// // ... |
| 188 | /// # Ok(()) |
| 189 | /// # } |
| 190 | /// ``` |
| 191 | /// |
| 192 | /// [wat]: http://webassembly.github.io/spec/core/text/index.html |
| 193 | pub fn parse_str(wat: impl AsRef<str>) -> Result<Vec<u8>> { |
| 194 | Parser::default().parse_str(path:None, wat) |
| 195 | } |
| 196 | |
| 197 | /// Parser configuration for transforming bytes into WebAssembly binaries. |
| 198 | #[derive (Default)] |
| 199 | pub struct Parser { |
| 200 | #[cfg (feature = "dwarf" )] |
| 201 | generate_dwarf: Option<GenerateDwarf>, |
| 202 | _private: (), |
| 203 | } |
| 204 | |
| 205 | impl Parser { |
| 206 | /// Creates a new parser with th default settings. |
| 207 | pub fn new() -> Parser { |
| 208 | Parser::default() |
| 209 | } |
| 210 | |
| 211 | /// Indicates that DWARF debugging information should be generated and |
| 212 | /// emitted by default. |
| 213 | /// |
| 214 | /// Note that DWARF debugging information is only emitted for textual-based |
| 215 | /// modules. For example if a WebAssembly binary is parsed via |
| 216 | /// [`Parser::parse_bytes`] this won't insert new DWARF information in such |
| 217 | /// a binary. Additionally if the text format used the `(module binary ...)` |
| 218 | /// form then no DWARF information will be emitted. |
| 219 | #[cfg (feature = "dwarf" )] |
| 220 | pub fn generate_dwarf(&mut self, generate: GenerateDwarf) -> &mut Self { |
| 221 | self.generate_dwarf = Some(generate); |
| 222 | self |
| 223 | } |
| 224 | |
| 225 | /// Equivalent of [`parse_file`] but uses this parser's settings. |
| 226 | pub fn parse_file(&self, path: impl AsRef<Path>) -> Result<Vec<u8>> { |
| 227 | self._parse_file(path.as_ref()) |
| 228 | } |
| 229 | |
| 230 | fn _parse_file(&self, file: &Path) -> Result<Vec<u8>> { |
| 231 | let contents = std::fs::read(file).map_err(|err| Error { |
| 232 | kind: Box::new(ErrorKind::Io { |
| 233 | err, |
| 234 | file: Some(file.to_owned()), |
| 235 | }), |
| 236 | })?; |
| 237 | match self.parse_bytes(Some(file), &contents) { |
| 238 | // If the result here is borrowed then that means that the input |
| 239 | // `&contents` was itself already a wasm module. We've already got |
| 240 | // an owned copy of that so return `contents` directly after |
| 241 | // double-checking it is indeed the same as the `bytes` return value |
| 242 | // here. That helps avoid a copy of `bytes` via something like |
| 243 | // `Cow::to_owned` which would otherwise copy the bytes. |
| 244 | Ok(Cow::Borrowed(bytes)) => { |
| 245 | assert_eq!(bytes.len(), contents.len()); |
| 246 | assert_eq!(bytes.as_ptr(), contents.as_ptr()); |
| 247 | Ok(contents) |
| 248 | } |
| 249 | Ok(Cow::Owned(bytes)) => Ok(bytes), |
| 250 | Err(mut e) => { |
| 251 | e.set_path(file); |
| 252 | Err(e) |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | /// Equivalent of [`parse_bytes`] but uses this parser's settings. |
| 258 | /// |
| 259 | /// The `path` argument is an optional path to use when error messages are |
| 260 | /// generated. |
| 261 | pub fn parse_bytes<'a>(&self, path: Option<&Path>, bytes: &'a [u8]) -> Result<Cow<'a, [u8]>> { |
| 262 | if bytes.starts_with(b" \0asm" ) { |
| 263 | return Ok(bytes.into()); |
| 264 | } |
| 265 | match str::from_utf8(bytes) { |
| 266 | Ok(s) => self._parse_str(path, s).map(|s| s.into()), |
| 267 | Err(_) => Err(Error { |
| 268 | kind: Box::new(ErrorKind::Custom { |
| 269 | msg: "input bytes aren't valid utf-8" .to_string(), |
| 270 | file: path.map(|p| p.to_owned()), |
| 271 | }), |
| 272 | }), |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | /// Equivalent of [`parse_str`] but uses this parser's settings. |
| 277 | /// |
| 278 | /// The `path` argument is an optional path to use when error messages are |
| 279 | /// generated. |
| 280 | pub fn parse_str(&self, path: Option<&Path>, wat: impl AsRef<str>) -> Result<Vec<u8>> { |
| 281 | self._parse_str(path, wat.as_ref()) |
| 282 | } |
| 283 | |
| 284 | fn _parse_str(&self, path: Option<&Path>, wat: &str) -> Result<Vec<u8>> { |
| 285 | let mut _buf = ParseBuffer::new(wat).map_err(|e| Error::cvt(e, wat, path))?; |
| 286 | #[cfg (feature = "dwarf" )] |
| 287 | _buf.track_instr_spans(self.generate_dwarf.is_some()); |
| 288 | let mut ast = parser::parse::<wast::Wat>(&_buf).map_err(|e| Error::cvt(e, wat, path))?; |
| 289 | |
| 290 | let mut _opts = EncodeOptions::default(); |
| 291 | #[cfg (feature = "dwarf" )] |
| 292 | if let Some(style) = self.generate_dwarf { |
| 293 | _opts.dwarf(path.unwrap_or("<input>.wat" .as_ref()), wat, style); |
| 294 | } |
| 295 | _opts |
| 296 | .encode_wat(&mut ast) |
| 297 | .map_err(|e| Error::cvt(e, wat, path)) |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | /// Result of [`Detect::from_bytes`] to indicate what some input bytes look |
| 302 | /// like. |
| 303 | #[derive (Debug, PartialEq, Eq, Clone, Copy)] |
| 304 | pub enum Detect { |
| 305 | /// The input bytes look like the WebAssembly text format. |
| 306 | WasmText, |
| 307 | /// The input bytes look like the WebAssembly binary format. |
| 308 | WasmBinary, |
| 309 | /// The input bytes don't look like WebAssembly at all. |
| 310 | Unknown, |
| 311 | } |
| 312 | |
| 313 | impl Detect { |
| 314 | /// Detect quickly if supplied bytes represent a Wasm module, |
| 315 | /// whether binary encoded or in WAT-encoded. |
| 316 | /// |
| 317 | /// This briefly lexes past whitespace and comments as a `*.wat` file to see if |
| 318 | /// we can find a left-paren. If that fails then it's probably `*.wit` instead. |
| 319 | /// |
| 320 | /// |
| 321 | /// Examples |
| 322 | /// ``` |
| 323 | /// use wat::Detect; |
| 324 | /// |
| 325 | /// assert_eq!(Detect::from_bytes(r#" |
| 326 | /// (module |
| 327 | /// (type (;0;) (func)) |
| 328 | /// (func (;0;) (type 0) |
| 329 | /// nop |
| 330 | /// ) |
| 331 | /// ) |
| 332 | /// "# ), Detect::WasmText); |
| 333 | /// ``` |
| 334 | pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Detect { |
| 335 | if bytes.as_ref().starts_with(b" \0asm" ) { |
| 336 | return Detect::WasmBinary; |
| 337 | } |
| 338 | let text = match std::str::from_utf8(bytes.as_ref()) { |
| 339 | Ok(s) => s, |
| 340 | Err(_) => return Detect::Unknown, |
| 341 | }; |
| 342 | |
| 343 | let lexer = Lexer::new(text); |
| 344 | let mut iter = lexer.iter(0); |
| 345 | |
| 346 | while let Some(next) = iter.next() { |
| 347 | match next.map(|t| t.kind) { |
| 348 | Ok(TokenKind::Whitespace) |
| 349 | | Ok(TokenKind::BlockComment) |
| 350 | | Ok(TokenKind::LineComment) => {} |
| 351 | Ok(TokenKind::LParen) => return Detect::WasmText, |
| 352 | _ => break, |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | Detect::Unknown |
| 357 | } |
| 358 | |
| 359 | /// Returns whether this is either binary or textual wasm. |
| 360 | pub fn is_wasm(&self) -> bool { |
| 361 | match self { |
| 362 | Detect::WasmText | Detect::WasmBinary => true, |
| 363 | Detect::Unknown => false, |
| 364 | } |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | /// A convenience type definition for `Result` where the error is [`Error`] |
| 369 | pub type Result<T> = std::result::Result<T, Error>; |
| 370 | |
| 371 | /// Errors from this crate related to parsing WAT files |
| 372 | /// |
| 373 | /// An error can during example phases like: |
| 374 | /// |
| 375 | /// * Lexing can fail if the document is syntactically invalid. |
| 376 | /// * A string may not be utf-8 |
| 377 | /// * The syntactical structure of the wat file may be invalid |
| 378 | /// * The wat file may be semantically invalid such as having name resolution |
| 379 | /// failures |
| 380 | #[derive (Debug)] |
| 381 | pub struct Error { |
| 382 | kind: Box<ErrorKind>, |
| 383 | } |
| 384 | |
| 385 | #[derive (Debug)] |
| 386 | enum ErrorKind { |
| 387 | Wast(wast::Error), |
| 388 | Io { |
| 389 | err: std::io::Error, |
| 390 | file: Option<PathBuf>, |
| 391 | }, |
| 392 | Custom { |
| 393 | msg: String, |
| 394 | file: Option<PathBuf>, |
| 395 | }, |
| 396 | } |
| 397 | |
| 398 | impl Error { |
| 399 | fn cvt<E: Into<wast::Error>>(e: E, contents: &str, path: Option<&Path>) -> Error { |
| 400 | let mut err = e.into(); |
| 401 | if let Some(path) = path { |
| 402 | err.set_path(path); |
| 403 | } |
| 404 | err.set_text(contents); |
| 405 | Error { |
| 406 | kind: Box::new(ErrorKind::Wast(err)), |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | /// To provide a more useful error this function can be used to set |
| 411 | /// the file name that this error is associated with. |
| 412 | /// |
| 413 | /// The `file` here will be stored in this error and later rendered in the |
| 414 | /// `Display` implementation. |
| 415 | pub fn set_path<P: AsRef<Path>>(&mut self, file: P) { |
| 416 | let file = file.as_ref(); |
| 417 | match &mut *self.kind { |
| 418 | ErrorKind::Wast(e) => e.set_path(file), |
| 419 | ErrorKind::Custom { file: f, .. } => *f = Some(file.to_owned()), |
| 420 | ErrorKind::Io { file: f, .. } => *f = Some(file.to_owned()), |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | impl fmt::Display for Error { |
| 426 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 427 | match &*self.kind { |
| 428 | ErrorKind::Wast(err: &Error) => err.fmt(f), |
| 429 | ErrorKind::Custom { msg: &String, file: &Option, .. } => match file { |
| 430 | Some(file: &PathBuf) => { |
| 431 | write!(f, "failed to parse ` {}`: {}" , file.display(), msg) |
| 432 | } |
| 433 | None => msg.fmt(f), |
| 434 | }, |
| 435 | ErrorKind::Io { err: &Error, file: &Option, .. } => match file { |
| 436 | Some(file: &PathBuf) => { |
| 437 | write!(f, "failed to read from ` {}`" , file.display()) |
| 438 | } |
| 439 | None => err.fmt(f), |
| 440 | }, |
| 441 | } |
| 442 | } |
| 443 | } |
| 444 | |
| 445 | impl std::error::Error for Error { |
| 446 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| 447 | match &*self.kind { |
| 448 | ErrorKind::Wast(_) => None, |
| 449 | ErrorKind::Custom { .. } => None, |
| 450 | ErrorKind::Io { err: &Error, .. } => Some(err), |
| 451 | } |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | #[cfg (test)] |
| 456 | mod test { |
| 457 | use super::*; |
| 458 | |
| 459 | #[test ] |
| 460 | fn test_set_path() { |
| 461 | let mut e = parse_bytes(&[0xFF]).unwrap_err(); |
| 462 | e.set_path("foo" ); |
| 463 | assert_eq!( |
| 464 | e.to_string(), |
| 465 | "failed to parse `foo`: input bytes aren't valid utf-8" |
| 466 | ); |
| 467 | |
| 468 | let e = parse_file("_does_not_exist_" ).unwrap_err(); |
| 469 | assert!(e |
| 470 | .to_string() |
| 471 | .starts_with("failed to read from `_does_not_exist_`" )); |
| 472 | |
| 473 | let mut e = parse_bytes("()" .as_bytes()).unwrap_err(); |
| 474 | e.set_path("foo" ); |
| 475 | assert_eq!( |
| 476 | e.to_string(), |
| 477 | "expected valid module field \n --> foo:1:2 \n | \n 1 | () \n | ^" |
| 478 | ); |
| 479 | } |
| 480 | } |
| 481 | |