lib.rs source code [crates/wat/src/lib.rs]

1	//! A Rust parser for the [WebAssembly Text format][wat]
2	//!
3	//! This crate contains a stable interface to the parser for the [WAT][wat]
4	//! format of WebAssembly text files. The format parsed by this crate follows
5	//! the [online specification][wat].
6	//!
7	//! # Examples
8	//!
9	//! Parse an in-memory string:
10	//!
11	//! ```
12	//! # fn foo() -> wat::Result<()> {
13	//! let wat = r#"
14	//! (module
15	//! (func $foo)
16	//!
17	//! (func (export "bar")
18	//! call $foo
19	//! )
20	//! )
21	//! "#;
22	//!
23	//! let binary = wat::parse_str(wat)?;
24	//! // ...
25	//! # Ok(())
26	//! # }
27	//! ```
28	//!
29	//! Parse an on-disk file:
30	//!
31	//! ```
32	//! # fn foo() -> wat::Result<()> {
33	//! let binary = wat::parse_file("./foo.wat")?;
34	//! // ...
35	//! # Ok(())
36	//! # }
37	//! ```
38	//!
39	//! ## Evolution of the WAT Format
40	//!
41	//! WebAssembly, and the WAT format, are an evolving specification. Features are
42	//! added to WAT, WAT changes, and sometimes WAT breaks. The policy of this
43	//! crate is that it will always follow the [official specification][wat] for
44	//! WAT files.
45	//!
46	//! Future WebAssembly features will be accepted to this parser and they will
47	//! not require a feature gate to opt-in. All implemented WebAssembly features
48	//! will be enabled at all times. Using a future WebAssembly feature in the WAT
49	//! format may cause breakage because while specifications are in development
50	//! the WAT syntax (and/or binary encoding) will often change. This crate will
51	//! do its best to keep up with these proposals, but breaking textual changes
52	//! will be published as non-breaking semver changes to this crate.
53	//!
54	//! ## Stability
55	//!
56	//! This crate is intended to be a very stable shim over the `wast` crate
57	//! which is expected to be much more unstable. The `wast` crate contains
58	//! AST data structures for parsing `.wat` files and they will evolve was the*
59	//! WAT and WebAssembly specifications evolve over time.
60	//!
61	//! This crate is currently at version 1.x.y, and it is intended that it will
62	//! remain here for quite some time. Breaking changes to the WAT format will be
63	//! landed as a non-semver-breaking version change in this crate. This crate
64	//! will always follow the [official specification for WAT][wat].
65	//!
66	//! [wat]: http://webassembly.github.io/spec/core/text/index.html
67
68	#![deny(missing_docs)]
69	#![cfg_attr(docsrs, feature(doc_auto_cfg))]
70
71	use std::borrow::Cow;
72	use std::fmt;
73	use std::path::{Path, PathBuf};
74	use std::str;
75	use wast::core::EncodeOptions;
76	use wast::lexer::{Lexer, TokenKind};
77	use wast::parser::{self, ParseBuffer};
78
79	#[doc(inline)]
80	pub use wast::core::GenerateDwarf;
81
82	/// Parses a file on disk as a [WebAssembly Text format][wat] file, or a binary
83	/// WebAssembly file
84	///
85	/// This function will read the bytes on disk and delegate them to the
86	/// [`parse_bytes`] function. For more information on the behavior of parsing
87	/// see [`parse_bytes`].
88	///
89	/// # Errors
90	///
91	/// For information about errors, see the [`parse_bytes`] documentation.
92	///
93	/// # Examples
94	///
95	/// ```
96	/// # fn foo() -> wat::Result<()> {
97	/// let binary = wat::parse_file("./foo.wat")?;
98	/// // ...
99	/// # Ok(())
100	/// # }
101	/// ```
102	///
103	/// [wat]: http://webassembly.github.io/spec/core/text/index.html
104	pub fn parse_file(file: impl AsRef<Path>) -> Result<Vec<u8>> {
105	Parser::new().parse_file(path:file)
106	}
107
108	/// Parses in-memory bytes as either the [WebAssembly Text format][wat], or a
109	/// binary WebAssembly module.
110	///
111	/// This function will attempt to interpret the given bytes as one of two
112	/// options:
113	///
114	/// A utf-8 string which is a `.wat` file to be parsed.
115	/// A binary WebAssembly file starting with `b"\0asm"`*
116	///
117	/// If the input is a string then it will be parsed as `.wat`, and then after*
118	/// parsing it will be encoded back into a WebAssembly binary module. If the
119	/// input is a binary that starts with `b"\0asm"` it will be returned verbatim.
120	/// Everything that doesn't start with `b"\0asm"` will be parsed as a utf-8
121	/// `.wat` file, returning errors as appropriate.*
122	///
123	/// For more information about parsing wat files, see [`parse_str`].
124	///
125	/// # Errors
126	///
127	/// In addition to all of the errors that can be returned from [`parse_str`],
128	/// this function will also return an error if the input does not start with
129	/// `b"\0asm"` and is invalid utf-8. (failed to even try to call [`parse_str`]).
130	///
131	/// # Examples
132	///
133	/// ```
134	/// # fn foo() -> wat::Result<()> {
135	/// // Parsing bytes that are actually `.wat` files*
136	/// assert_eq!(&*wat::parse_bytes(b"(module)")?, b"`\0`asm`\x01\0\0\0`");
137	/// assert!(wat::parse_bytes(b"module").is_err());
138	/// assert!(wat::parse_bytes(b"binary`\0`file`\0\t`hat`\0`is`\0`not`\0`wat").is_err());
139	///
140	/// // Pass through binaries that look like real wasm files
141	/// assert_eq!(&*wat::parse_bytes(b"`\0`asm`\x01\0\0\0`")?, b"`\0`asm`\x01\0\0\0`");
142	/// # Ok(())
143	/// # }
144	/// ```
145	///
146	/// [wat]: http://webassembly.github.io/spec/core/text/index.html
147	pub fn parse_bytes(bytes: &[u8]) -> Result<Cow<'_, [u8]>> {
148	Parser::new().parse_bytes(path:None, bytes)
149	}
150
151	/// Parses an in-memory string as the [WebAssembly Text format][wat], returning
152	/// the file as a binary WebAssembly file.
153	///
154	/// This function is intended to be a stable convenience function for parsing a
155	/// wat file into a WebAssembly binary file. This is a high-level operation
156	/// which does not expose any parsing internals, for that you'll want to use the
157	/// `wast` crate.
158	///
159	/// # Errors
160	///
161	/// This function can fail for a number of reasons, including (but not limited
162	/// to):
163	///
164	/// The `wat` input may fail to lex, such as having invalid tokens or syntax*
165	/// The `wat` input may fail to parse, such as having incorrect syntactical*
166	/// structure
167	/// The `wat` input may contain names that could not be resolved*
168	///
169	/// # Examples
170	///
171	/// ```
172	/// # fn foo() -> wat::Result<()> {
173	/// assert_eq!(wat::parse_str("(module)")?, b"`\0`asm`\x01\0\0\0`");
174	/// assert!(wat::parse_str("module").is_err());
175	///
176	/// let wat = r#"
177	/// (module
178	/// (func $foo)
179	///
180	/// (func (export "bar")
181	/// call $foo
182	/// )
183	/// )
184	/// "#;
185	///
186	/// let binary = wat::parse_str(wat)?;
187	/// // ...
188	/// # Ok(())
189	/// # }
190	/// ```
191	///
192	/// [wat]: http://webassembly.github.io/spec/core/text/index.html
193	pub fn parse_str(wat: impl AsRef<str>) -> Result<Vec<u8>> {
194	Parser::default().parse_str(path:None, wat)
195	}
196
197	/// Parser configuration for transforming bytes into WebAssembly binaries.
198	#[derive(Default)]
199	pub struct Parser {
200	#[cfg(feature = "dwarf")]
201	generate_dwarf: Option<GenerateDwarf>,
202	_private: (),
203	}
204
205	impl Parser {
206	/// Creates a new parser with th default settings.
207	pub fn new() -> Parser {
208	Parser::default()
209	}
210
211	/// Indicates that DWARF debugging information should be generated and
212	/// emitted by default.
213	///
214	/// Note that DWARF debugging information is only emitted for textual-based
215	/// modules. For example if a WebAssembly binary is parsed via
216	/// [`Parser::parse_bytes`] this won't insert new DWARF information in such
217	/// a binary. Additionally if the text format used the `(module binary ...)`
218	/// form then no DWARF information will be emitted.
219	#[cfg(feature = "dwarf")]
220	pub fn generate_dwarf(&mut self, generate: GenerateDwarf) -> &mut Self {
221	self.generate_dwarf = Some(generate);
222	self
223	}
224
225	/// Equivalent of [`parse_file`] but uses this parser's settings.
226	pub fn parse_file(&self, path: impl AsRef<Path>) -> Result<Vec<u8>> {
227	self._parse_file(path.as_ref())
228	}
229
230	fn _parse_file(&self, file: &Path) -> Result<Vec<u8>> {
231	let contents = std::fs::read(file).map_err(\|err\| Error {
232	kind: Box::new(ErrorKind::Io {
233	err,
234	file: Some(file.to_owned()),
235	}),
236	})?;
237	match self.parse_bytes(Some(file), &contents) {
238	// If the result here is borrowed then that means that the input
239	// `&contents` was itself already a wasm module. We've already got
240	// an owned copy of that so return `contents` directly after
241	// double-checking it is indeed the same as the `bytes` return value
242	// here. That helps avoid a copy of `bytes` via something like
243	// `Cow::to_owned` which would otherwise copy the bytes.
244	Ok(Cow::Borrowed(bytes)) => {
245	assert_eq!(bytes.len(), contents.len());
246	assert_eq!(bytes.as_ptr(), contents.as_ptr());
247	Ok(contents)
248	}
249	Ok(Cow::Owned(bytes)) => Ok(bytes),
250	Err(mut e) => {
251	e.set_path(file);
252	Err(e)
253	}
254	}
255	}
256
257	/// Equivalent of [`parse_bytes`] but uses this parser's settings.
258	///
259	/// The `path` argument is an optional path to use when error messages are
260	/// generated.
261	pub fn parse_bytes<'a>(&self, path: Option<&Path>, bytes: &'a [u8]) -> Result<Cow<'a, [u8]>> {
262	if bytes.starts_with(b"`\0`asm") {
263	return Ok(bytes.into());
264	}
265	match str::from_utf8(bytes) {
266	Ok(s) => self._parse_str(path, s).map(\|s\| s.into()),
267	Err(_) => Err(Error {
268	kind: Box::new(ErrorKind::Custom {
269	msg: "input bytes aren't valid utf-8".to_string(),
270	file: path.map(\|p\| p.to_owned()),
271	}),
272	}),
273	}
274	}
275
276	/// Equivalent of [`parse_str`] but uses this parser's settings.
277	///
278	/// The `path` argument is an optional path to use when error messages are
279	/// generated.
280	pub fn parse_str(&self, path: Option<&Path>, wat: impl AsRef<str>) -> Result<Vec<u8>> {
281	self._parse_str(path, wat.as_ref())
282	}
283
284	fn _parse_str(&self, path: Option<&Path>, wat: &str) -> Result<Vec<u8>> {
285	let mut _buf = ParseBuffer::new(wat).map_err(\|e\| Error::cvt(e, wat, path))?;
286	#[cfg(feature = "dwarf")]
287	_buf.track_instr_spans(self.generate_dwarf.is_some());
288	let mut ast = parser::parse::<wast::Wat>(&_buf).map_err(\|e\| Error::cvt(e, wat, path))?;
289
290	let mut _opts = EncodeOptions::default();
291	#[cfg(feature = "dwarf")]
292	if let Some(style) = self.generate_dwarf {
293	_opts.dwarf(path.unwrap_or("<input>.wat".as_ref()), wat, style);
294	}
295	_opts
296	.encode_wat(&mut ast)
297	.map_err(\|e\| Error::cvt(e, wat, path))
298	}
299	}
300
301	/// Result of [`Detect::from_bytes`] to indicate what some input bytes look
302	/// like.
303	#[derive(Debug, PartialEq, Eq, Clone, Copy)]
304	pub enum Detect {
305	/// The input bytes look like the WebAssembly text format.
306	WasmText,
307	/// The input bytes look like the WebAssembly binary format.
308	WasmBinary,
309	/// The input bytes don't look like WebAssembly at all.
310	Unknown,
311	}
312
313	impl Detect {
314	/// Detect quickly if supplied bytes represent a Wasm module,
315	/// whether binary encoded or in WAT-encoded.
316	///
317	/// This briefly lexes past whitespace and comments as a `.wat` file to see if*
318	/// we can find a left-paren. If that fails then it's probably `.wit` instead.*
319	///
320	///
321	/// Examples
322	/// ```
323	/// use wat::Detect;
324	///
325	/// assert_eq!(Detect::from_bytes(r#"
326	/// (module
327	/// (type (;0;) (func))
328	/// (func (;0;) (type 0)
329	/// nop
330	/// )
331	/// )
332	/// "#), Detect::WasmText);
333	/// ```
334	pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Detect {
335	if bytes.as_ref().starts_with(b"`\0`asm") {
336	return Detect::WasmBinary;
337	}
338	let text = match std::str::from_utf8(bytes.as_ref()) {
339	Ok(s) => s,
340	Err(_) => return Detect::Unknown,
341	};
342
343	let lexer = Lexer::new(text);
344	let mut iter = lexer.iter(`0`);
345
346	while let Some(next) = iter.next() {
347	match next.map(\|t\| t.kind) {
348	Ok(TokenKind::Whitespace)
349	\| Ok(TokenKind::BlockComment)
350	\| Ok(TokenKind::LineComment) => {}
351	Ok(TokenKind::LParen) => return Detect::WasmText,
352	_ => break,
353	}
354	}
355
356	Detect::Unknown
357	}
358
359	/// Returns whether this is either binary or textual wasm.
360	pub fn is_wasm(&self) -> bool {
361	match self {
362	Detect::WasmText \| Detect::WasmBinary => `true`,
363	Detect::Unknown => `false`,
364	}
365	}
366	}
367
368	/// A convenience type definition for `Result` where the error is [`Error`]
369	pub type Result<T> = std::result::Result<T, Error>;
370
371	/// Errors from this crate related to parsing WAT files
372	///
373	/// An error can during example phases like:
374	///
375	/// Lexing can fail if the document is syntactically invalid.*
376	/// A string may not be utf-8*
377	/// The syntactical structure of the wat file may be invalid*
378	/// The wat file may be semantically invalid such as having name resolution*
379	/// failures
380	#[derive(Debug)]
381	pub struct Error {
382	kind: Box<ErrorKind>,
383	}
384
385	#[derive(Debug)]
386	enum ErrorKind {
387	Wast(wast::Error),
388	Io {
389	err: std::io::Error,
390	file: Option<PathBuf>,
391	},
392	Custom {
393	msg: String,
394	file: Option<PathBuf>,
395	},
396	}
397
398	impl Error {
399	fn cvt<E: Into<wast::Error>>(e: E, contents: &str, path: Option<&Path>) -> Error {
400	let mut err = e.into();
401	if let Some(path) = path {
402	err.set_path(path);
403	}
404	err.set_text(contents);
405	Error {
406	kind: Box::new(ErrorKind::Wast(err)),
407	}
408	}
409
410	/// To provide a more useful error this function can be used to set
411	/// the file name that this error is associated with.
412	///
413	/// The `file` here will be stored in this error and later rendered in the
414	/// `Display` implementation.
415	pub fn set_path<P: AsRef<Path>>(&mut self, file: P) {
416	let file = file.as_ref();
417	match &mut *self.kind {
418	ErrorKind::Wast(e) => e.set_path(file),
419	ErrorKind::Custom { file: f, .. } => *f = Some(file.to_owned()),
420	ErrorKind::Io { file: f, .. } => *f = Some(file.to_owned()),
421	}
422	}
423	}
424
425	impl fmt::Display for Error {
426	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
427	match &*self.kind {
428	ErrorKind::Wast(err: &Error) => err.fmt(f),
429	ErrorKind::Custom { msg: &String, file: &Option, .. } => match file {
430	Some(file: &PathBuf) => {
431	write!(f, "failed to parse `{}`: {}", file.display(), msg)
432	}
433	None => msg.fmt(f),
434	},
435	ErrorKind::Io { err: &Error, file: &Option, .. } => match file {
436	Some(file: &PathBuf) => {
437	write!(f, "failed to read from `{}`", file.display())
438	}
439	None => err.fmt(f),
440	},
441	}
442	}
443	}
444
445	impl std::error::Error for Error {
446	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
447	match &*self.kind {
448	ErrorKind::Wast(_) => None,
449	ErrorKind::Custom { .. } => None,
450	ErrorKind::Io { err: &Error, .. } => Some(err),
451	}
452	}
453	}
454
455	#[cfg(test)]
456	mod test {
457	use super::*;
458
459	#[test]
460	fn test_set_path() {
461	let mut e = parse_bytes(&[`0xFF`]).unwrap_err();
462	e.set_path("foo");
463	assert_eq!(
464	e.to_string(),
465	"failed to parse `foo`: input bytes aren't valid utf-8"
466	);
467
468	let e = parse_file("_does_not_exist_").unwrap_err();
469	assert!(e
470	.to_string()
471	.starts_with("failed to read from `_does_not_exist_`"));
472
473	let mut e = parse_bytes("()".as_bytes()).unwrap_err();
474	e.set_path("foo");
475	assert_eq!(
476	e.to_string(),
477	"expected valid module field`\n` --> foo:1:2`\n` \|`\n` 1 \| ()`\n` \| ^"
478	);
479	}
480	}
481