parser.rs source code [crates/wasmparser-0.223.0/src/parser.rs]

1	use crate::binary_reader::WASM_MAGIC_NUMBER;
2	use crate::prelude::*;
3	#[cfg(feature = "features")]
4	use crate::WasmFeatures;
5	#[cfg(feature = "component-model")]
6	use crate::{
7	limits::MAX_WASM_MODULE_SIZE, ComponentCanonicalSectionReader, ComponentExportSectionReader,
8	ComponentImportSectionReader, ComponentInstanceSectionReader, ComponentStartFunction,
9	ComponentTypeSectionReader, CoreTypeSectionReader, InstanceSectionReader, SectionLimited,
10	};
11	use crate::{
12	BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
13	ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
14	ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
15	TypeSectionReader,
16	};
17	use core::fmt;
18	use core::iter;
19	use core::ops::Range;
20
21	pub(crate) const WASM_MODULE_VERSION: u16 = `0x1`;
22
23	// Note that this started at `0xa` and we're incrementing up from there. When
24	// the component model is stabilized this will become 0x1. The changes here are:
25	//
26	// [????-??-??] 0xa - original version*
27	// [2023-01-05] 0xb - `export` introduces an alias*
28	// [2023-02-06] 0xc - `export` has an optional type ascribed to it*
29	// [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which*
30	// allows for `(import (interface "...") ...)` syntax.
31	pub(crate) const WASM_COMPONENT_VERSION: u16 = `0xd`;
32
33	const KIND_MODULE: u16 = `0x00`;
34	const KIND_COMPONENT: u16 = `0x01`;
35
36	/// The supported encoding formats for the parser.
37	#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38	pub enum Encoding {
39	/// The encoding format is a WebAssembly module.
40	Module,
41	/// The encoding format is a WebAssembly component.
42	Component,
43	}
44
45	/// An incremental parser of a binary WebAssembly module or component.
46	///
47	/// This type is intended to be used to incrementally parse a WebAssembly module
48	/// or component as bytes become available for the module. This can also be used
49	/// to parse modules or components that are already entirely resident within memory.
50	///
51	/// This primary function for a parser is the [`Parser::parse`] function which
52	/// will incrementally consume input. You can also use the [`Parser::parse_all`]
53	/// function to parse a module or component that is entirely resident in memory.
54	#[derive(Debug, Clone)]
55	pub struct Parser {
56	state: State,
57	offset: u64,
58	max_size: u64,
59	encoding: Encoding,
60	#[cfg(feature = "features")]
61	features: WasmFeatures,
62	}
63
64	#[derive(Debug, Clone)]
65	enum State {
66	Header,
67	SectionStart,
68	FunctionBody { remaining: u32, len: u32 },
69	}
70
71	/// A successful return payload from [`Parser::parse`].
72	///
73	/// On success one of two possible values can be returned, either that more data
74	/// is needed to continue parsing or a chunk of the input was parsed, indicating
75	/// how much of it was parsed.
76	#[derive(Debug)]
77	pub enum Chunk<'a> {
78	/// This can be returned at any time and indicates that more data is needed
79	/// to proceed with parsing. Zero bytes were consumed from the input to
80	/// [`Parser::parse`]. The `u64` value here is a hint as to how many more
81	/// bytes are needed to continue parsing.
82	NeedMoreData(u64),
83
84	/// A chunk was successfully parsed.
85	Parsed {
86	/// This many bytes of the `data` input to [`Parser::parse`] were
87	/// consumed to produce `payload`.
88	consumed: usize,
89	/// The value that we actually parsed.
90	payload: Payload<'a>,
91	},
92	}
93
94	/// Values that can be parsed from a WebAssembly module or component.
95	///
96	/// This enumeration is all possible chunks of pieces that can be parsed by a
97	/// [`Parser`] from a binary WebAssembly module or component. Note that for many
98	/// sections the entire section is parsed all at once, whereas other functions,
99	/// like the code section, are parsed incrementally. This is a distinction where some
100	/// sections, like the type section, are required to be fully resident in memory
101	/// (fully downloaded) before proceeding. Other sections, like the code section,
102	/// can be processed in a streaming fashion where each function is extracted
103	/// individually so it can possibly be shipped to another thread while you wait
104	/// for more functions to get downloaded.
105	///
106	/// Note that payloads, when returned, do not indicate that the module or component
107	/// is valid. For example when you receive a `Payload::TypeSection` the type
108	/// section itself has not yet actually been parsed. The reader returned will be
109	/// able to parse it, but you'll have to actually iterate the reader to do the
110	/// full parse. Each payload returned is intended to be a window* into the*
111	/// original `data` passed to [`Parser::parse`] which can be further processed
112	/// if necessary.
113	#[non_exhaustive]
114	pub enum Payload<'a> {
115	/// Indicates the header of a WebAssembly module or component.
116	Version {
117	/// The version number found in the header.
118	num: u16,
119	/// The encoding format being parsed.
120	encoding: Encoding,
121	/// The range of bytes that were parsed to consume the header of the
122	/// module or component. Note that this range is relative to the start
123	/// of the byte stream.
124	range: Range<usize>,
125	},
126
127	/// A module type section was received and the provided reader can be
128	/// used to parse the contents of the type section.
129	TypeSection(TypeSectionReader<'a>),
130	/// A module import section was received and the provided reader can be
131	/// used to parse the contents of the import section.
132	ImportSection(ImportSectionReader<'a>),
133	/// A module function section was received and the provided reader can be
134	/// used to parse the contents of the function section.
135	FunctionSection(FunctionSectionReader<'a>),
136	/// A module table section was received and the provided reader can be
137	/// used to parse the contents of the table section.
138	TableSection(TableSectionReader<'a>),
139	/// A module memory section was received and the provided reader can be
140	/// used to parse the contents of the memory section.
141	MemorySection(MemorySectionReader<'a>),
142	/// A module tag section was received, and the provided reader can be
143	/// used to parse the contents of the tag section.
144	TagSection(TagSectionReader<'a>),
145	/// A module global section was received and the provided reader can be
146	/// used to parse the contents of the global section.
147	GlobalSection(GlobalSectionReader<'a>),
148	/// A module export section was received, and the provided reader can be
149	/// used to parse the contents of the export section.
150	ExportSection(ExportSectionReader<'a>),
151	/// A module start section was received.
152	StartSection {
153	/// The start function index
154	func: u32,
155	/// The range of bytes that specify the `func` field, specified in
156	/// offsets relative to the start of the byte stream.
157	range: Range<usize>,
158	},
159	/// A module element section was received and the provided reader can be
160	/// used to parse the contents of the element section.
161	ElementSection(ElementSectionReader<'a>),
162	/// A module data count section was received.
163	DataCountSection {
164	/// The number of data segments.
165	count: u32,
166	/// The range of bytes that specify the `count` field, specified in
167	/// offsets relative to the start of the byte stream.
168	range: Range<usize>,
169	},
170	/// A module data section was received and the provided reader can be
171	/// used to parse the contents of the data section.
172	DataSection(DataSectionReader<'a>),
173	/// Indicator of the start of the code section of a WebAssembly module.
174	///
175	/// This entry is returned whenever the code section starts. The `count`
176	/// field indicates how many entries are in this code section. After
177	/// receiving this start marker you're guaranteed that the next `count`
178	/// items will be either `CodeSectionEntry` or an error will be returned.
179	///
180	/// This, unlike other sections, is intended to be used for streaming the
181	/// contents of the code section. The code section is not required to be
182	/// fully resident in memory when we parse it. Instead a [`Parser`] is
183	/// capable of parsing piece-by-piece of a code section.
184	CodeSectionStart {
185	/// The number of functions in this section.
186	count: u32,
187	/// The range of bytes that represent this section, specified in
188	/// offsets relative to the start of the byte stream.
189	range: Range<usize>,
190	/// The size, in bytes, of the remaining contents of this section.
191	///
192	/// This can be used in combination with [`Parser::skip_section`]
193	/// where the caller will know how many bytes to skip before feeding
194	/// bytes into `Parser` again.
195	size: u32,
196	},
197	/// An entry of the code section, a function, was parsed from a WebAssembly
198	/// module.
199	///
200	/// This entry indicates that a function was successfully received from the
201	/// code section, and the payload here is the window into the original input
202	/// where the function resides. Note that the function itself has not been
203	/// parsed, it's only been outlined. You'll need to process the
204	/// `FunctionBody` provided to test whether it parses and/or is valid.
205	CodeSectionEntry(FunctionBody<'a>),
206
207	/// A core module section was received and the provided parser can be
208	/// used to parse the nested module.
209	///
210	/// This variant is special in that it returns a sub-`Parser`. Upon
211	/// receiving a `ModuleSection` it is expected that the returned
212	/// `Parser` will be used instead of the parent `Parser` until the parse has
213	/// finished. You'll need to feed data into the `Parser` returned until it
214	/// returns `Payload::End`. After that you'll switch back to the parent
215	/// parser to resume parsing the rest of the current component.
216	///
217	/// Note that binaries will not be parsed correctly if you feed the data for
218	/// a nested module into the parent [`Parser`].
219	#[cfg(feature = "component-model")]
220	ModuleSection {
221	/// The parser for the nested module.
222	parser: Parser,
223	/// The range of bytes that represent the nested module in the
224	/// original byte stream.
225	///
226	/// Note that, to better support streaming parsing and validation, the
227	/// validator does not* check that this range is in bounds.*
228	unchecked_range: Range<usize>,
229	},
230	/// A core instance section was received and the provided parser can be
231	/// used to parse the contents of the core instance section.
232	///
233	/// Currently this section is only parsed in a component.
234	#[cfg(feature = "component-model")]
235	InstanceSection(InstanceSectionReader<'a>),
236	/// A core type section was received and the provided parser can be
237	/// used to parse the contents of the core type section.
238	///
239	/// Currently this section is only parsed in a component.
240	#[cfg(feature = "component-model")]
241	CoreTypeSection(CoreTypeSectionReader<'a>),
242	/// A component section from a WebAssembly component was received and the
243	/// provided parser can be used to parse the nested component.
244	///
245	/// This variant is special in that it returns a sub-`Parser`. Upon
246	/// receiving a `ComponentSection` it is expected that the returned
247	/// `Parser` will be used instead of the parent `Parser` until the parse has
248	/// finished. You'll need to feed data into the `Parser` returned until it
249	/// returns `Payload::End`. After that you'll switch back to the parent
250	/// parser to resume parsing the rest of the current component.
251	///
252	/// Note that binaries will not be parsed correctly if you feed the data for
253	/// a nested component into the parent [`Parser`].
254	#[cfg(feature = "component-model")]
255	ComponentSection {
256	/// The parser for the nested component.
257	parser: Parser,
258	/// The range of bytes that represent the nested component in the
259	/// original byte stream.
260	///
261	/// Note that, to better support streaming parsing and validation, the
262	/// validator does not* check that this range is in bounds.*
263	unchecked_range: Range<usize>,
264	},
265	/// A component instance section was received and the provided reader can be
266	/// used to parse the contents of the component instance section.
267	#[cfg(feature = "component-model")]
268	ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
269	/// A component alias section was received and the provided reader can be
270	/// used to parse the contents of the component alias section.
271	#[cfg(feature = "component-model")]
272	ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
273	/// A component type section was received and the provided reader can be
274	/// used to parse the contents of the component type section.
275	#[cfg(feature = "component-model")]
276	ComponentTypeSection(ComponentTypeSectionReader<'a>),
277	/// A component canonical section was received and the provided reader can be
278	/// used to parse the contents of the component canonical section.
279	#[cfg(feature = "component-model")]
280	ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
281	/// A component start section was received.
282	#[cfg(feature = "component-model")]
283	ComponentStartSection {
284	/// The start function description.
285	start: ComponentStartFunction,
286	/// The range of bytes that specify the `start` field.
287	range: Range<usize>,
288	},
289	/// A component import section was received and the provided reader can be
290	/// used to parse the contents of the component import section.
291	#[cfg(feature = "component-model")]
292	ComponentImportSection(ComponentImportSectionReader<'a>),
293	/// A component export section was received, and the provided reader can be
294	/// used to parse the contents of the component export section.
295	#[cfg(feature = "component-model")]
296	ComponentExportSection(ComponentExportSectionReader<'a>),
297
298	/// A module or component custom section was received.
299	CustomSection(CustomSectionReader<'a>),
300
301	/// An unknown section was found.
302	///
303	/// This variant is returned for all unknown sections encountered. This
304	/// likely wants to be interpreted as an error by consumers of the parser,
305	/// but this can also be used to parse sections currently unsupported by
306	/// the parser.
307	UnknownSection {
308	/// The 8-bit identifier for this section.
309	id: u8,
310	/// The contents of this section.
311	contents: &'a [u8],
312	/// The range of bytes, relative to the start of the original data
313	/// stream, that the contents of this section reside in.
314	range: Range<usize>,
315	},
316
317	/// The end of the WebAssembly module or component was reached.
318	///
319	/// The value is the offset in the input byte stream where the end
320	/// was reached.
321	End(usize),
322	}
323
324	const CUSTOM_SECTION: u8 = `0`;
325	const TYPE_SECTION: u8 = `1`;
326	const IMPORT_SECTION: u8 = `2`;
327	const FUNCTION_SECTION: u8 = `3`;
328	const TABLE_SECTION: u8 = `4`;
329	const MEMORY_SECTION: u8 = `5`;
330	const GLOBAL_SECTION: u8 = `6`;
331	const EXPORT_SECTION: u8 = `7`;
332	const START_SECTION: u8 = `8`;
333	const ELEMENT_SECTION: u8 = `9`;
334	const CODE_SECTION: u8 = `10`;
335	const DATA_SECTION: u8 = `11`;
336	const DATA_COUNT_SECTION: u8 = `12`;
337	const TAG_SECTION: u8 = `13`;
338
339	#[cfg(feature = "component-model")]
340	const COMPONENT_MODULE_SECTION: u8 = `1`;
341	#[cfg(feature = "component-model")]
342	const COMPONENT_CORE_INSTANCE_SECTION: u8 = `2`;
343	#[cfg(feature = "component-model")]
344	const COMPONENT_CORE_TYPE_SECTION: u8 = `3`;
345	#[cfg(feature = "component-model")]
346	const COMPONENT_SECTION: u8 = `4`;
347	#[cfg(feature = "component-model")]
348	const COMPONENT_INSTANCE_SECTION: u8 = `5`;
349	#[cfg(feature = "component-model")]
350	const COMPONENT_ALIAS_SECTION: u8 = `6`;
351	#[cfg(feature = "component-model")]
352	const COMPONENT_TYPE_SECTION: u8 = `7`;
353	#[cfg(feature = "component-model")]
354	const COMPONENT_CANONICAL_SECTION: u8 = `8`;
355	#[cfg(feature = "component-model")]
356	const COMPONENT_START_SECTION: u8 = `9`;
357	#[cfg(feature = "component-model")]
358	const COMPONENT_IMPORT_SECTION: u8 = `10`;
359	#[cfg(feature = "component-model")]
360	const COMPONENT_EXPORT_SECTION: u8 = `11`;
361
362	impl Parser {
363	/// Creates a new parser.
364	///
365	/// Reports errors and ranges relative to `offset` provided, where `offset`
366	/// is some logical offset within the input stream that we're parsing.
367	pub fn new(offset: u64) -> Parser {
368	Parser {
369	state: State::Header,
370	offset,
371	max_size: u64::MAX,
372	// Assume the encoding is a module until we know otherwise
373	encoding: Encoding::Module,
374	#[cfg(feature = "features")]
375	features: WasmFeatures::all(),
376	}
377	}
378
379	/// Tests whether `bytes` looks like a core WebAssembly module.
380	///
381	/// This will inspect the first 8 bytes of `bytes` and return `true` if it
382	/// starts with the standard core WebAssembly header.
383	pub fn is_core_wasm(bytes: &[u8]) -> bool {
384	const HEADER: [u8; `8`] = [
385	WASM_MAGIC_NUMBER[`0`],
386	WASM_MAGIC_NUMBER[`1`],
387	WASM_MAGIC_NUMBER[`2`],
388	WASM_MAGIC_NUMBER[`3`],
389	WASM_MODULE_VERSION.to_le_bytes()[`0`],
390	WASM_MODULE_VERSION.to_le_bytes()[`1`],
391	KIND_MODULE.to_le_bytes()[`0`],
392	KIND_MODULE.to_le_bytes()[`1`],
393	];
394	bytes.starts_with(&HEADER)
395	}
396
397	/// Tests whether `bytes` looks like a WebAssembly component.
398	///
399	/// This will inspect the first 8 bytes of `bytes` and return `true` if it
400	/// starts with the standard WebAssembly component header.
401	pub fn is_component(bytes: &[u8]) -> bool {
402	const HEADER: [u8; `8`] = [
403	WASM_MAGIC_NUMBER[`0`],
404	WASM_MAGIC_NUMBER[`1`],
405	WASM_MAGIC_NUMBER[`2`],
406	WASM_MAGIC_NUMBER[`3`],
407	WASM_COMPONENT_VERSION.to_le_bytes()[`0`],
408	WASM_COMPONENT_VERSION.to_le_bytes()[`1`],
409	KIND_COMPONENT.to_le_bytes()[`0`],
410	KIND_COMPONENT.to_le_bytes()[`1`],
411	];
412	bytes.starts_with(&HEADER)
413	}
414
415	/// Returns the currently active set of wasm features that this parser is
416	/// using while parsing.
417	///
418	/// The default set of features is [`WasmFeatures::all()`] for new parsers.
419	///
420	/// For more information see [`BinaryReader::new`].
421	#[cfg(feature = "features")]
422	pub fn features(&self) -> WasmFeatures {
423	self.features
424	}
425
426	/// Sets the wasm features active while parsing to the `features` specified.
427	///
428	/// The default set of features is [`WasmFeatures::all()`] for new parsers.
429	///
430	/// For more information see [`BinaryReader::new`].
431	#[cfg(feature = "features")]
432	pub fn set_features(&mut self, features: WasmFeatures) {
433	self.features = features;
434	}
435
436	/// Returns the original offset that this parser is currently at.
437	pub fn offset(&self) -> u64 {
438	self.offset
439	}
440
441	/// Attempts to parse a chunk of data.
442	///
443	/// This method will attempt to parse the next incremental portion of a
444	/// WebAssembly binary. Data available for the module or component is
445	/// provided as `data`, and the data can be incomplete if more data has yet
446	/// to arrive. The `eof` flag indicates whether more data will ever be received.
447	///
448	/// There are two ways parsing can succeed with this method:
449	///
450	/// `Chunk::NeedMoreData` - this indicates that there is not enough bytes*
451	/// in `data` to parse a payload. The caller needs to wait for more data to
452	/// be available in this situation before calling this method again. It is
453	/// guaranteed that this is only returned if `eof` is `false`.
454	///
455	/// `Chunk::Parsed` - this indicates that a chunk of the input was*
456	/// successfully parsed. The payload is available in this variant of what
457	/// was parsed, and this also indicates how many bytes of `data` was
458	/// consumed. It's expected that the caller will not provide these bytes
459	/// back to the [`Parser`] again.
460	///
461	/// Note that all `Chunk` return values are connected, with a lifetime, to
462	/// the input buffer. Each parsed chunk borrows the input buffer and is a
463	/// view into it for successfully parsed chunks.
464	///
465	/// It is expected that you'll call this method until `Payload::End` is
466	/// reached, at which point you're guaranteed that the parse has completed.
467	/// Note that complete parsing, for the top-level module or component,
468	/// implies that `data` is empty and `eof` is `true`.
469	///
470	/// # Errors
471	///
472	/// Parse errors are returned as an `Err`. Errors can happen when the
473	/// structure of the data is unexpected or if sections are too large for
474	/// example. Note that errors are not returned for malformed contents* of*
475	/// sections here. Sections are generally not individually parsed and each
476	/// returned [`Payload`] needs to be iterated over further to detect all
477	/// errors.
478	///
479	/// # Examples
480	///
481	/// An example of reading a wasm file from a stream (`std::io::Read`) and
482	/// incrementally parsing it.
483	///
484	/// ```
485	/// use std::io::Read;
486	/// use anyhow::Result;
487	/// use wasmparser::{Parser, Chunk, Payload::*};
488	///
489	/// fn parse(mut reader: impl Read) -> Result<()> {
490	/// let mut buf = Vec::new();
491	/// let mut cur = Parser::new(`0`);
492	/// let mut eof = `false`;
493	/// let mut stack = Vec::new();
494	///
495	/// loop {
496	/// let (payload, consumed) = match cur.parse(&buf, eof)? {
497	/// Chunk::NeedMoreData(hint) => {
498	/// assert!(!eof); // otherwise an error would be returned
499	///
500	/// // Use the hint to preallocate more space, then read
501	/// // some more data into our buffer.
502	/// //
503	/// // Note that the buffer management here is not ideal,
504	/// // but it's compact enough to fit in an example!
505	/// let len = buf.len();
506	/// buf.extend((`0`..hint).map(\|_\| `0u8`));
507	/// let n = reader.read(&mut buf[len..])?;
508	/// buf.truncate(len + n);
509	/// eof = n == `0`;
510	/// continue;
511	/// }
512	///
513	/// Chunk::Parsed { consumed, payload } => (payload, consumed),
514	/// };
515	///
516	/// match payload {
517	/// // Sections for WebAssembly modules
518	/// Version { .. } => { / ... / }
519	/// TypeSection(_) => { / ... / }
520	/// ImportSection(_) => { / ... / }
521	/// FunctionSection(_) => { / ... / }
522	/// TableSection(_) => { / ... / }
523	/// MemorySection(_) => { / ... / }
524	/// TagSection(_) => { / ... / }
525	/// GlobalSection(_) => { / ... / }
526	/// ExportSection(_) => { / ... / }
527	/// StartSection { .. } => { / ... / }
528	/// ElementSection(_) => { / ... / }
529	/// DataCountSection { .. } => { / ... / }
530	/// DataSection(_) => { / ... / }
531	///
532	/// // Here we know how many functions we'll be receiving as
533	/// // `CodeSectionEntry`, so we can prepare for that, and
534	/// // afterwards we can parse and handle each function
535	/// // individually.
536	/// CodeSectionStart { .. } => { / ... / }
537	/// CodeSectionEntry(body) => {
538	/// // here we can iterate over `body` to parse the function
539	/// // and its locals
540	/// }
541	///
542	/// // Sections for WebAssembly components
543	/// InstanceSection(_) => { / ... / }
544	/// CoreTypeSection(_) => { / ... / }
545	/// ComponentInstanceSection(_) => { / ... / }
546	/// ComponentAliasSection(_) => { / ... / }
547	/// ComponentTypeSection(_) => { / ... / }
548	/// ComponentCanonicalSection(_) => { / ... / }
549	/// ComponentStartSection { .. } => { / ... / }
550	/// ComponentImportSection(_) => { / ... / }
551	/// ComponentExportSection(_) => { / ... / }
552	///
553	/// ModuleSection { parser, .. }
554	/// \| ComponentSection { parser, .. } => {
555	/// stack.push(cur.clone());
556	/// cur = parser.clone();
557	/// }
558	///
559	/// CustomSection(_) => { / ... / }
560	///
561	/// // Once we've reached the end of a parser we either resume
562	/// // at the parent parser or we break out of the loop because
563	/// // we're done.
564	/// End(_) => {
565	/// if let Some(parent_parser) = stack.pop() {
566	/// cur = parent_parser;
567	/// } else {
568	/// break;
569	/// }
570	/// }
571	///
572	/// // most likely you'd return an error here
573	/// _ => { / ... / }
574	/// }
575	///
576	/// // once we're done processing the payload we can forget the
577	/// // original.
578	/// buf.drain(..consumed);
579	/// }
580	///
581	/// Ok(())
582	/// }
583	///
584	/// # parse(&b"`\0`asm`\x01\0\0\0`"[..]).unwrap();
585	/// ```
586	pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
587	let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
588	(&data[..(self.max_size as usize)], `true`)
589	} else {
590	(data, eof)
591	};
592	// TODO: thread through `offset: u64` to `BinaryReader`, remove
593	// the cast here.
594	let starting_offset = self.offset as usize;
595	let mut reader = BinaryReader::new(data, starting_offset);
596	#[cfg(feature = "features")]
597	{
598	reader.set_features(self.features);
599	}
600	match self.parse_reader(&mut reader, eof) {
601	Ok(payload) => {
602	// Be sure to update our offset with how far we got in the
603	// reader
604	let consumed = reader.original_position() - starting_offset;
605	self.offset += usize_to_u64(consumed);
606	self.max_size -= usize_to_u64(consumed);
607	Ok(Chunk::Parsed {
608	consumed: consumed,
609	payload,
610	})
611	}
612	Err(e) => {
613	// If we're at EOF then there's no way we can recover from any
614	// error, so continue to propagate it.
615	if eof {
616	return Err(e);
617	}
618
619	// If our error doesn't look like it can be resolved with more
620	// data being pulled down, then propagate it, otherwise switch
621	// the error to "feed me please"
622	match e.inner.needed_hint {
623	Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
624	None => Err(e),
625	}
626	}
627	}
628	}
629
630	fn parse_reader<'a>(
631	&mut self,
632	reader: &mut BinaryReader<'a>,
633	eof: bool,
634	) -> Result<Payload<'a>> {
635	use Payload::*;
636
637	match self.state {
638	State::Header => {
639	let start = reader.original_position();
640	let header_version = reader.read_header_version()?;
641	self.encoding = match (header_version >> `16`) as u16 {
642	KIND_MODULE => Encoding::Module,
643	KIND_COMPONENT => Encoding::Component,
644	_ => bail!(start + `4`, "unknown binary version: {header_version:#`10`x}"),
645	};
646	let num = header_version as u16;
647	self.state = State::SectionStart;
648	Ok(Version {
649	num,
650	encoding: self.encoding,
651	range: start..reader.original_position(),
652	})
653	}
654	State::SectionStart => {
655	// If we're at eof and there are no bytes in our buffer, then
656	// that means we reached the end of the data since it's
657	// just a bunch of sections concatenated after the header.
658	if eof && reader.bytes_remaining() == `0` {
659	return Ok(Payload::End(reader.original_position()));
660	}
661
662	let id_pos = reader.original_position();
663	let id = reader.read_u8()?;
664	if id & `0x80` != `0` {
665	return Err(BinaryReaderError::new("malformed section id", id_pos));
666	}
667	let len_pos = reader.original_position();
668	let mut len = reader.read_var_u32()?;
669
670	// Test to make sure that this section actually fits within
671	// `Parser::max_size`. This doesn't matter for top-level modules
672	// but it is required for nested modules/components to correctly ensure
673	// that all sections live entirely within their section of the
674	// file.
675	let consumed = reader.original_position() - id_pos;
676	let section_overflow = self
677	.max_size
678	.checked_sub(usize_to_u64(consumed))
679	.and_then(\|s\| s.checked_sub(len.into()))
680	.is_none();
681	if section_overflow {
682	return Err(BinaryReaderError::new("section too large", len_pos));
683	}
684
685	match (self.encoding, id) {
686	// Sections for both modules and components.
687	(_, `0`) => section(reader, len, CustomSectionReader::new, CustomSection),
688
689	// Module sections
690	(Encoding::Module, TYPE_SECTION) => {
691	section(reader, len, TypeSectionReader::new, TypeSection)
692	}
693	(Encoding::Module, IMPORT_SECTION) => {
694	section(reader, len, ImportSectionReader::new, ImportSection)
695	}
696	(Encoding::Module, FUNCTION_SECTION) => {
697	section(reader, len, FunctionSectionReader::new, FunctionSection)
698	}
699	(Encoding::Module, TABLE_SECTION) => {
700	section(reader, len, TableSectionReader::new, TableSection)
701	}
702	(Encoding::Module, MEMORY_SECTION) => {
703	section(reader, len, MemorySectionReader::new, MemorySection)
704	}
705	(Encoding::Module, GLOBAL_SECTION) => {
706	section(reader, len, GlobalSectionReader::new, GlobalSection)
707	}
708	(Encoding::Module, EXPORT_SECTION) => {
709	section(reader, len, ExportSectionReader::new, ExportSection)
710	}
711	(Encoding::Module, START_SECTION) => {
712	let (func, range) = single_item(reader, len, "start")?;
713	Ok(StartSection { func, range })
714	}
715	(Encoding::Module, ELEMENT_SECTION) => {
716	section(reader, len, ElementSectionReader::new, ElementSection)
717	}
718	(Encoding::Module, CODE_SECTION) => {
719	let start = reader.original_position();
720	let count = delimited(reader, &mut len, \|r\| r.read_var_u32())?;
721	let range = start..reader.original_position() + len as usize;
722	self.state = State::FunctionBody {
723	remaining: count,
724	len,
725	};
726	Ok(CodeSectionStart {
727	count,
728	range,
729	size: len,
730	})
731	}
732	(Encoding::Module, DATA_SECTION) => {
733	section(reader, len, DataSectionReader::new, DataSection)
734	}
735	(Encoding::Module, DATA_COUNT_SECTION) => {
736	let (count, range) = single_item(reader, len, "data count")?;
737	Ok(DataCountSection { count, range })
738	}
739	(Encoding::Module, TAG_SECTION) => {
740	section(reader, len, TagSectionReader::new, TagSection)
741	}
742
743	// Component sections
744	#[cfg(feature = "component-model")]
745	(Encoding::Component, COMPONENT_MODULE_SECTION)
746	\| (Encoding::Component, COMPONENT_SECTION) => {
747	if len as usize > MAX_WASM_MODULE_SIZE {
748	bail!(
749	len_pos,
750	"{} section is too large",
751	if id == `1` { "module" } else { "component " }
752	);
753	}
754
755	let range = reader.original_position()
756	..reader.original_position() + usize::try_from(len).unwrap();
757	self.max_size -= u64::from(len);
758	self.offset += u64::from(len);
759	let mut parser = Parser::new(usize_to_u64(reader.original_position()));
760	#[cfg(feature = "features")]
761	{
762	parser.features = self.features;
763	}
764	parser.max_size = u64::from(len);
765
766	Ok(match id {
767	`1` => ModuleSection {
768	parser,
769	unchecked_range: range,
770	},
771	`4` => ComponentSection {
772	parser,
773	unchecked_range: range,
774	},
775	_ => unreachable!(),
776	})
777	}
778	#[cfg(feature = "component-model")]
779	(Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
780	section(reader, len, InstanceSectionReader::new, InstanceSection)
781	}
782	#[cfg(feature = "component-model")]
783	(Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
784	section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
785	}
786	#[cfg(feature = "component-model")]
787	(Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
788	reader,
789	len,
790	ComponentInstanceSectionReader::new,
791	ComponentInstanceSection,
792	),
793	#[cfg(feature = "component-model")]
794	(Encoding::Component, COMPONENT_ALIAS_SECTION) => {
795	section(reader, len, SectionLimited::new, ComponentAliasSection)
796	}
797	#[cfg(feature = "component-model")]
798	(Encoding::Component, COMPONENT_TYPE_SECTION) => section(
799	reader,
800	len,
801	ComponentTypeSectionReader::new,
802	ComponentTypeSection,
803	),
804	#[cfg(feature = "component-model")]
805	(Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
806	reader,
807	len,
808	ComponentCanonicalSectionReader::new,
809	ComponentCanonicalSection,
810	),
811	#[cfg(feature = "component-model")]
812	(Encoding::Component, COMPONENT_START_SECTION) => {
813	let (start, range) = single_item(reader, len, "component start")?;
814	Ok(ComponentStartSection { start, range })
815	}
816	#[cfg(feature = "component-model")]
817	(Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
818	reader,
819	len,
820	ComponentImportSectionReader::new,
821	ComponentImportSection,
822	),
823	#[cfg(feature = "component-model")]
824	(Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
825	reader,
826	len,
827	ComponentExportSectionReader::new,
828	ComponentExportSection,
829	),
830	(_, id) => {
831	let offset = reader.original_position();
832	let contents = reader.read_bytes(len as usize)?;
833	let range = offset..offset + len as usize;
834	Ok(UnknownSection {
835	id,
836	contents,
837	range,
838	})
839	}
840	}
841	}
842
843	// Once we hit 0 remaining incrementally parsed items, with 0
844	// remaining bytes in each section, we're done and can switch back
845	// to parsing sections.
846	State::FunctionBody {
847	remaining: `0`,
848	len: `0`,
849	} => {
850	self.state = State::SectionStart;
851	self.parse_reader(reader, eof)
852	}
853
854	// ... otherwise trailing bytes with no remaining entries in these
855	// sections indicates an error.
856	State::FunctionBody { remaining: `0`, len } => {
857	debug_assert!(len > `0`);
858	let offset = reader.original_position();
859	Err(BinaryReaderError::new(
860	"trailing bytes at end of section",
861	offset,
862	))
863	}
864
865	// Functions are relatively easy to parse when we know there's at
866	// least one remaining and at least one byte available to read
867	// things.
868	//
869	// We use the remaining length try to read a u32 size of the
870	// function, and using that size we require the entire function be
871	// resident in memory. This means that we're reading whole chunks of
872	// functions at a time.
873	//
874	// Limiting via `Parser::max_size` (nested parsing) happens above in
875	// `fn parse`, and limiting by our section size happens via
876	// `delimited`. Actual parsing of the function body is delegated to
877	// the caller to iterate over the `FunctionBody` structure.
878	State::FunctionBody { remaining, mut len } => {
879	let body = delimited(reader, &mut len, \|r\| {
880	Ok(FunctionBody::new(r.read_reader()?))
881	})?;
882	self.state = State::FunctionBody {
883	remaining: remaining - `1`,
884	len,
885	};
886	Ok(CodeSectionEntry(body))
887	}
888	}
889	}
890
891	/// Convenience function that can be used to parse a module or component
892	/// that is entirely resident in memory.
893	///
894	/// This function will parse the `data` provided as a WebAssembly module
895	/// or component.
896	///
897	/// Note that when this function yields sections that provide parsers,
898	/// no further action is required for those sections as payloads from
899	/// those parsers will be automatically returned.
900	///
901	/// # Examples
902	///
903	/// An example of reading a wasm file from a stream (`std::io::Read`) into
904	/// a buffer and then parsing it.
905	///
906	/// ```
907	/// use std::io::Read;
908	/// use anyhow::Result;
909	/// use wasmparser::{Parser, Chunk, Payload::*};
910	///
911	/// fn parse(mut reader: impl Read) -> Result<()> {
912	/// let mut buf = Vec::new();
913	/// reader.read_to_end(&mut buf)?;
914	/// let parser = Parser::new(`0`);
915	///
916	/// for payload in parser.parse_all(&buf) {
917	/// match payload? {
918	/// // Sections for WebAssembly modules
919	/// Version { .. } => { / ... / }
920	/// TypeSection(_) => { / ... / }
921	/// ImportSection(_) => { / ... / }
922	/// FunctionSection(_) => { / ... / }
923	/// TableSection(_) => { / ... / }
924	/// MemorySection(_) => { / ... / }
925	/// TagSection(_) => { / ... / }
926	/// GlobalSection(_) => { / ... / }
927	/// ExportSection(_) => { / ... / }
928	/// StartSection { .. } => { / ... / }
929	/// ElementSection(_) => { / ... / }
930	/// DataCountSection { .. } => { / ... / }
931	/// DataSection(_) => { / ... / }
932	///
933	/// // Here we know how many functions we'll be receiving as
934	/// // `CodeSectionEntry`, so we can prepare for that, and
935	/// // afterwards we can parse and handle each function
936	/// // individually.
937	/// CodeSectionStart { .. } => { / ... / }
938	/// CodeSectionEntry(body) => {
939	/// // here we can iterate over `body` to parse the function
940	/// // and its locals
941	/// }
942	///
943	/// // Sections for WebAssembly components
944	/// ModuleSection { .. } => { / ... / }
945	/// InstanceSection(_) => { / ... / }
946	/// CoreTypeSection(_) => { / ... / }
947	/// ComponentSection { .. } => { / ... / }
948	/// ComponentInstanceSection(_) => { / ... / }
949	/// ComponentAliasSection(_) => { / ... / }
950	/// ComponentTypeSection(_) => { / ... / }
951	/// ComponentCanonicalSection(_) => { / ... / }
952	/// ComponentStartSection { .. } => { / ... / }
953	/// ComponentImportSection(_) => { / ... / }
954	/// ComponentExportSection(_) => { / ... / }
955	///
956	/// CustomSection(_) => { / ... / }
957	///
958	/// // Once we've reached the end of a parser we either resume
959	/// // at the parent parser or the payload iterator is at its
960	/// // end and we're done.
961	/// End(_) => {}
962	///
963	/// // most likely you'd return an error here, but if you want
964	/// // you can also inspect the raw contents of unknown sections
965	/// other => {
966	/// match other.as_section() {
967	/// Some((id, range)) => { / ... / }
968	/// None => { / ... / }
969	/// }
970	/// }
971	/// }
972	/// }
973	///
974	/// Ok(())
975	/// }
976	///
977	/// # parse(&b"`\0`asm`\x01\0\0\0`"[..]).unwrap();
978	/// ```
979	pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
980	let mut stack = Vec::new();
981	let mut cur = self;
982	let mut done = `false`;
983	iter::from_fn(move \|\| {
984	if done {
985	return None;
986	}
987	let payload = match cur.parse(data, `true`) {
988	// Propagate all errors
989	Err(e) => {
990	done = `true`;
991	return Some(Err(e));
992	}
993
994	// This isn't possible because `eof` is always true.
995	Ok(Chunk::NeedMoreData(_)) => unreachable!(),
996
997	Ok(Chunk::Parsed { payload, consumed }) => {
998	data = &data[consumed..];
999	payload
1000	}
1001	};
1002
1003	match &payload {
1004	#[cfg(feature = "component-model")]
1005	Payload::ModuleSection { parser, .. }
1006	\| Payload::ComponentSection { parser, .. } => {
1007	stack.push(cur.clone());
1008	cur = parser.clone();
1009	}
1010	Payload::End(_) => match stack.pop() {
1011	Some(p) => cur = p,
1012	None => done = `true`,
1013	},
1014
1015	_ => {}
1016	}
1017
1018	Some(Ok(payload))
1019	})
1020	}
1021
1022	/// Skip parsing the code section entirely.
1023	///
1024	/// This function can be used to indicate, after receiving
1025	/// `CodeSectionStart`, that the section will not be parsed.
1026	///
1027	/// The caller will be responsible for skipping `size` bytes (found in the
1028	/// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1029	/// after the `size` bytes have been skipped.
1030	///
1031	/// # Panics
1032	///
1033	/// This function will panic if the parser is not in a state where it's
1034	/// parsing the code section.
1035	///
1036	/// # Examples
1037	///
1038	/// ```
1039	/// use wasmparser::{Result, Parser, Chunk, Payload::*};
1040	/// use core::ops::Range;
1041	///
1042	/// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1043	/// let mut parser = Parser::new(`0`);
1044	/// loop {
1045	/// let payload = match parser.parse(wasm, `true`)? {
1046	/// Chunk::Parsed { consumed, payload } => {
1047	/// wasm = &wasm[consumed..];
1048	/// payload
1049	/// }
1050	/// // this state isn't possible with `eof = true`
1051	/// Chunk::NeedMoreData(_) => unreachable!(),
1052	/// };
1053	/// match payload {
1054	/// TypeSection(s) => print_range("type section", &s.range()),
1055	/// ImportSection(s) => print_range("import section", &s.range()),
1056	/// // .. other sections
1057	///
1058	/// // Print the range of the code section we see, but don't
1059	/// // actually iterate over each individual function.
1060	/// CodeSectionStart { range, size, .. } => {
1061	/// print_range("code section", &range);
1062	/// parser.skip_section();
1063	/// wasm = &wasm[size as usize..];
1064	/// }
1065	/// End(_) => break,
1066	/// _ => {}
1067	/// }
1068	/// }
1069	/// Ok(())
1070	/// }
1071	///
1072	/// fn print_range(section: &str, range: &Range<usize>) {
1073	/// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1074	/// }
1075	/// ```
1076	pub fn skip_section(&mut self) {
1077	let skip = match self.state {
1078	State::FunctionBody { remaining: _, len } => len,
1079	_ => panic!("wrong state to call `skip_section`"),
1080	};
1081	self.offset += u64::from(skip);
1082	self.max_size -= u64::from(skip);
1083	self.state = State::SectionStart;
1084	}
1085	}
1086
1087	fn usize_to_u64(a: usize) -> u64 {
1088	a.try_into().unwrap()
1089	}
1090
1091	/// Parses an entire section resident in memory into a `Payload`.
1092	///
1093	/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1094	/// to construct the section to return.
1095	fn section<'a, T>(
1096	reader: &mut BinaryReader<'a>,
1097	len: u32,
1098	ctor: fn(BinaryReader<'a>) -> Result<T>,
1099	variant: fn(T) -> Payload<'a>,
1100	) -> Result<Payload<'a>> {
1101	let reader: BinaryReader<'a> = reader.skip(\|r: &mut BinaryReader<'a>\| {
1102	r.read_bytes(size:len as usize)?;
1103	Ok(())
1104	})?;
1105	// clear the hint for "need this many more bytes" here because we already
1106	// read all the bytes, so it's not possible to read more bytes if this
1107	// fails.
1108	let reader: T = ctor(reader).map_err(op:clear_hint)?;
1109	Ok(variant(reader))
1110	}
1111
1112	/// Reads a section that is represented by a single uleb-encoded `u32`.
1113	fn single_item<'a, T>(
1114	reader: &mut BinaryReader<'a>,
1115	len: u32,
1116	desc: &str,
1117	) -> Result<(T, Range<usize>)>
1118	where
1119	T: FromReader<'a>,
1120	{
1121	let range: Range = reader.original_position()..reader.original_position() + len as usize;
1122	let mut content: BinaryReader<'a> = reader.skip(\|r: &mut BinaryReader<'a>\| {
1123	r.read_bytes(size:len as usize)?;
1124	Ok(())
1125	})?;
1126	// We can't recover from "unexpected eof" here because our entire section is
1127	// already resident in memory, so clear the hint for how many more bytes are
1128	// expected.
1129	let ret: T = content.read().map_err(op:clear_hint)?;
1130	if !content.eof() {
1131	bail!(
1132	content.original_position(),
1133	"unexpected content in the {desc} section",
1134	);
1135	}
1136	Ok((ret, range))
1137	}
1138
1139	/// Attempts to parse using `f`.
1140	///
1141	/// This will update `len` with the number of bytes consumed, and it will cause*
1142	/// a failure to be returned instead of the number of bytes consumed exceeds
1143	/// what `len` currently is.*
1144	fn delimited<'a, T>(
1145	reader: &mut BinaryReader<'a>,
1146	len: &mut u32,
1147	f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1148	) -> Result<T> {
1149	let start: usize = reader.original_position();
1150	let ret: T = f(reader)?;
1151	len = match* (reader.original_position() - start)
1152	.try_into()
1153	.ok()
1154	.and_then(\|i: u32\| len.checked_sub(i))
1155	{
1156	Some(i: u32) => i,
1157	None => return Err(BinaryReaderError::new(message:"unexpected end-of-file", offset:start)),
1158	};
1159	Ok(ret)
1160	}
1161
1162	impl Default for Parser {
1163	fn default() -> Parser {
1164	Parser::new(offset:`0`)
1165	}
1166	}
1167
1168	impl Payload<'_> {
1169	/// If this `Payload` represents a section in the original wasm module then
1170	/// the section's id and range within the original wasm binary are returned.
1171	///
1172	/// Not all payloads refer to entire sections, such as the `Version` and
1173	/// `CodeSectionEntry` variants. These variants will return `None` from this
1174	/// function.
1175	///
1176	/// Otherwise this function will return `Some` where the first element is
1177	/// the byte identifier for the section and the second element is the range
1178	/// of the contents of the section within the original wasm binary.
1179	///
1180	/// The purpose of this method is to enable tools to easily iterate over
1181	/// entire sections if necessary and handle sections uniformly, for example
1182	/// dropping custom sections while preserving all other sections.
1183	pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1184	use Payload::*;
1185
1186	match self {
1187	Version { .. } => None,
1188	TypeSection(s) => Some((TYPE_SECTION, s.range())),
1189	ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1190	FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1191	TableSection(s) => Some((TABLE_SECTION, s.range())),
1192	MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1193	TagSection(s) => Some((TAG_SECTION, s.range())),
1194	GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1195	ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1196	ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1197	DataSection(s) => Some((DATA_SECTION, s.range())),
1198	StartSection { range, .. } => Some((START_SECTION, range.clone())),
1199	DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1200	CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1201	CodeSectionEntry(_) => None,
1202
1203	#[cfg(feature = "component-model")]
1204	ModuleSection {
1205	unchecked_range: range,
1206	..
1207	} => Some((COMPONENT_MODULE_SECTION, range.clone())),
1208	#[cfg(feature = "component-model")]
1209	InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1210	#[cfg(feature = "component-model")]
1211	CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1212	#[cfg(feature = "component-model")]
1213	ComponentSection {
1214	unchecked_range: range,
1215	..
1216	} => Some((COMPONENT_SECTION, range.clone())),
1217	#[cfg(feature = "component-model")]
1218	ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1219	#[cfg(feature = "component-model")]
1220	ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1221	#[cfg(feature = "component-model")]
1222	ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1223	#[cfg(feature = "component-model")]
1224	ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1225	#[cfg(feature = "component-model")]
1226	ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1227	#[cfg(feature = "component-model")]
1228	ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1229	#[cfg(feature = "component-model")]
1230	ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1231
1232	CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1233
1234	UnknownSection { id, range, .. } => Some((*id, range.clone())),
1235
1236	End(_) => None,
1237	}
1238	}
1239	}
1240
1241	impl fmt::Debug for Payload<'_> {
1242	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1243	use Payload::*;
1244	match self {
1245	Version {
1246	num,
1247	encoding,
1248	range,
1249	} => f
1250	.debug_struct("Version")
1251	.field("num", num)
1252	.field("encoding", encoding)
1253	.field("range", range)
1254	.finish(),
1255
1256	// Module sections
1257	TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1258	ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1259	FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1260	TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1261	MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1262	TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1263	GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1264	ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1265	ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1266	DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1267	StartSection { func, range } => f
1268	.debug_struct("StartSection")
1269	.field("func", func)
1270	.field("range", range)
1271	.finish(),
1272	DataCountSection { count, range } => f
1273	.debug_struct("DataCountSection")
1274	.field("count", count)
1275	.field("range", range)
1276	.finish(),
1277	CodeSectionStart { count, range, size } => f
1278	.debug_struct("CodeSectionStart")
1279	.field("count", count)
1280	.field("range", range)
1281	.field("size", size)
1282	.finish(),
1283	CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1284
1285	// Component sections
1286	#[cfg(feature = "component-model")]
1287	ModuleSection {
1288	parser: _,
1289	unchecked_range: range,
1290	} => f
1291	.debug_struct("ModuleSection")
1292	.field("range", range)
1293	.finish(),
1294	#[cfg(feature = "component-model")]
1295	InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1296	#[cfg(feature = "component-model")]
1297	CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1298	#[cfg(feature = "component-model")]
1299	ComponentSection {
1300	parser: _,
1301	unchecked_range: range,
1302	} => f
1303	.debug_struct("ComponentSection")
1304	.field("range", range)
1305	.finish(),
1306	#[cfg(feature = "component-model")]
1307	ComponentInstanceSection(_) => f
1308	.debug_tuple("ComponentInstanceSection")
1309	.field(&"...")
1310	.finish(),
1311	#[cfg(feature = "component-model")]
1312	ComponentAliasSection(_) => f
1313	.debug_tuple("ComponentAliasSection")
1314	.field(&"...")
1315	.finish(),
1316	#[cfg(feature = "component-model")]
1317	ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1318	#[cfg(feature = "component-model")]
1319	ComponentCanonicalSection(_) => f
1320	.debug_tuple("ComponentCanonicalSection")
1321	.field(&"...")
1322	.finish(),
1323	#[cfg(feature = "component-model")]
1324	ComponentStartSection { .. } => f
1325	.debug_tuple("ComponentStartSection")
1326	.field(&"...")
1327	.finish(),
1328	#[cfg(feature = "component-model")]
1329	ComponentImportSection(_) => f
1330	.debug_tuple("ComponentImportSection")
1331	.field(&"...")
1332	.finish(),
1333	#[cfg(feature = "component-model")]
1334	ComponentExportSection(_) => f
1335	.debug_tuple("ComponentExportSection")
1336	.field(&"...")
1337	.finish(),
1338
1339	CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1340
1341	UnknownSection { id, range, .. } => f
1342	.debug_struct("UnknownSection")
1343	.field("id", id)
1344	.field("range", range)
1345	.finish(),
1346
1347	End(offset) => f.debug_tuple("End").field(offset).finish(),
1348	}
1349	}
1350	}
1351
1352	fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1353	err.inner.needed_hint = None;
1354	err
1355	}
1356
1357	#[cfg(test)]
1358	mod tests {
1359	use super::*;
1360
1361	macro_rules! assert_matches {
1362	($a:expr, $b:pat $(,)?) => {
1363	match $a {
1364	$b => {}
1365	a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1366	}
1367	};
1368	}
1369
1370	#[test]
1371	fn header() {
1372	assert!(Parser::default().parse(&[], `true`).is_err());
1373	assert_matches!(
1374	Parser::default().parse(&[], `false`),
1375	Ok(Chunk::NeedMoreData(`4`)),
1376	);
1377	assert_matches!(
1378	Parser::default().parse(b"`\0`", `false`),
1379	Ok(Chunk::NeedMoreData(`3`)),
1380	);
1381	assert_matches!(
1382	Parser::default().parse(b"`\0`asm", `false`),
1383	Ok(Chunk::NeedMoreData(`4`)),
1384	);
1385	assert_matches!(
1386	Parser::default().parse(b"`\0`asm`\x01\0\0\0`", `false`),
1387	Ok(Chunk::Parsed {
1388	consumed: `8`,
1389	payload: Payload::Version { num: `1`, .. },
1390	}),
1391	);
1392	}
1393
1394	#[test]
1395	fn header_iter() {
1396	for _ in Parser::default().parse_all(&[]) {}
1397	for _ in Parser::default().parse_all(b"`\0`") {}
1398	for _ in Parser::default().parse_all(b"`\0`asm") {}
1399	for _ in Parser::default().parse_all(b"`\0`asm`\x01\x01\x01\x01`") {}
1400	}
1401
1402	fn parser_after_header() -> Parser {
1403	let mut p = Parser::default();
1404	assert_matches!(
1405	p.parse(b"`\0`asm`\x01\0\0\0`", `false`),
1406	Ok(Chunk::Parsed {
1407	consumed: `8`,
1408	payload: Payload::Version {
1409	num: WASM_MODULE_VERSION,
1410	encoding: Encoding::Module,
1411	..
1412	},
1413	}),
1414	);
1415	p
1416	}
1417
1418	fn parser_after_component_header() -> Parser {
1419	let mut p = Parser::default();
1420	assert_matches!(
1421	p.parse(b"`\0`asm`\x0d\0\x01\0`", `false`),
1422	Ok(Chunk::Parsed {
1423	consumed: `8`,
1424	payload: Payload::Version {
1425	num: WASM_COMPONENT_VERSION,
1426	encoding: Encoding::Component,
1427	..
1428	},
1429	}),
1430	);
1431	p
1432	}
1433
1434	#[test]
1435	fn start_section() {
1436	assert_matches!(
1437	parser_after_header().parse(&[], `false`),
1438	Ok(Chunk::NeedMoreData(`1`)),
1439	);
1440	assert!(parser_after_header().parse(&[`8`], `true`).is_err());
1441	assert!(parser_after_header().parse(&[`8`, `1`], `true`).is_err());
1442	assert!(parser_after_header().parse(&[`8`, `2`], `true`).is_err());
1443	assert_matches!(
1444	parser_after_header().parse(&[`8`], `false`),
1445	Ok(Chunk::NeedMoreData(`1`)),
1446	);
1447	assert_matches!(
1448	parser_after_header().parse(&[`8`, `1`], `false`),
1449	Ok(Chunk::NeedMoreData(`1`)),
1450	);
1451	assert_matches!(
1452	parser_after_header().parse(&[`8`, `2`], `false`),
1453	Ok(Chunk::NeedMoreData(`2`)),
1454	);
1455	assert_matches!(
1456	parser_after_header().parse(&[`8`, `1`, `1`], `false`),
1457	Ok(Chunk::Parsed {
1458	consumed: `3`,
1459	payload: Payload::StartSection { func: `1`, .. },
1460	}),
1461	);
1462	assert!(parser_after_header().parse(&[`8`, `2`, `1`, `1`], `false`).is_err());
1463	assert!(parser_after_header().parse(&[`8`, `0`], `false`).is_err());
1464	}
1465
1466	#[test]
1467	fn end_works() {
1468	assert_matches!(
1469	parser_after_header().parse(&[], `true`),
1470	Ok(Chunk::Parsed {
1471	consumed: `0`,
1472	payload: Payload::End(`8`),
1473	}),
1474	);
1475	}
1476
1477	#[test]
1478	fn type_section() {
1479	assert!(parser_after_header().parse(&[`1`], `true`).is_err());
1480	assert!(parser_after_header().parse(&[`1`, `0`], `false`).is_err());
1481	assert!(parser_after_header().parse(&[`8`, `2`], `true`).is_err());
1482	assert_matches!(
1483	parser_after_header().parse(&[`1`], `false`),
1484	Ok(Chunk::NeedMoreData(`1`)),
1485	);
1486	assert_matches!(
1487	parser_after_header().parse(&[`1`, `1`], `false`),
1488	Ok(Chunk::NeedMoreData(`1`)),
1489	);
1490	assert_matches!(
1491	parser_after_header().parse(&[`1`, `1`, `1`], `false`),
1492	Ok(Chunk::Parsed {
1493	consumed: `3`,
1494	payload: Payload::TypeSection(_),
1495	}),
1496	);
1497	assert_matches!(
1498	parser_after_header().parse(&[`1`, `1`, `1`, `2`, `3`, `4`], `false`),
1499	Ok(Chunk::Parsed {
1500	consumed: `3`,
1501	payload: Payload::TypeSection(_),
1502	}),
1503	);
1504	}
1505
1506	#[test]
1507	fn custom_section() {
1508	assert!(parser_after_header().parse(&[`0`], `true`).is_err());
1509	assert!(parser_after_header().parse(&[`0`, `0`], `false`).is_err());
1510	assert!(parser_after_header().parse(&[`0`, `1`, `1`], `false`).is_err());
1511	assert_matches!(
1512	parser_after_header().parse(&[`0`, `2`, `1`], `false`),
1513	Ok(Chunk::NeedMoreData(`1`)),
1514	);
1515	assert_custom(
1516	parser_after_header().parse(&[`0`, `1`, `0`], `false`).unwrap(),
1517	`3`,
1518	"",
1519	`11`,
1520	b"",
1521	Range { start: `10`, end: `11` },
1522	);
1523	assert_custom(
1524	parser_after_header()
1525	.parse(&[`0`, `2`, `1`, b'a'], `false`)
1526	.unwrap(),
1527	`4`,
1528	"a",
1529	`12`,
1530	b"",
1531	Range { start: `10`, end: `12` },
1532	);
1533	assert_custom(
1534	parser_after_header()
1535	.parse(&[`0`, `2`, `0`, b'a'], `false`)
1536	.unwrap(),
1537	`4`,
1538	"",
1539	`11`,
1540	b"a",
1541	Range { start: `10`, end: `12` },
1542	);
1543	}
1544
1545	fn assert_custom(
1546	chunk: Chunk<'_>,
1547	expected_consumed: usize,
1548	expected_name: &str,
1549	expected_data_offset: usize,
1550	expected_data: &[u8],
1551	expected_range: Range<usize>,
1552	) {
1553	let (consumed, s) = match chunk {
1554	Chunk::Parsed {
1555	consumed,
1556	payload: Payload::CustomSection(s),
1557	} => (consumed, s),
1558	_ => panic!("not a custom section payload"),
1559	};
1560	assert_eq!(consumed, expected_consumed);
1561	assert_eq!(s.name(), expected_name);
1562	assert_eq!(s.data_offset(), expected_data_offset);
1563	assert_eq!(s.data(), expected_data);
1564	assert_eq!(s.range(), expected_range);
1565	}
1566
1567	#[test]
1568	fn function_section() {
1569	assert!(parser_after_header().parse(&[`10`], `true`).is_err());
1570	assert!(parser_after_header().parse(&[`10`, `0`], `true`).is_err());
1571	assert!(parser_after_header().parse(&[`10`, `1`], `true`).is_err());
1572	assert_matches!(
1573	parser_after_header().parse(&[`10`], `false`),
1574	Ok(Chunk::NeedMoreData(`1`))
1575	);
1576	assert_matches!(
1577	parser_after_header().parse(&[`10`, `1`], `false`),
1578	Ok(Chunk::NeedMoreData(`1`))
1579	);
1580	let mut p = parser_after_header();
1581	assert_matches!(
1582	p.parse(&[`10`, `1`, `0`], `false`),
1583	Ok(Chunk::Parsed {
1584	consumed: `3`,
1585	payload: Payload::CodeSectionStart { count: `0`, .. },
1586	}),
1587	);
1588	assert_matches!(
1589	p.parse(&[], `true`),
1590	Ok(Chunk::Parsed {
1591	consumed: `0`,
1592	payload: Payload::End(`11`),
1593	}),
1594	);
1595	let mut p = parser_after_header();
1596	assert_matches!(
1597	p.parse(&[`10`, `2`, `1`, `0`], `false`),
1598	Ok(Chunk::Parsed {
1599	consumed: `3`,
1600	payload: Payload::CodeSectionStart { count: `1`, .. },
1601	}),
1602	);
1603	assert_matches!(
1604	p.parse(&[`0`], `false`),
1605	Ok(Chunk::Parsed {
1606	consumed: `1`,
1607	payload: Payload::CodeSectionEntry(_),
1608	}),
1609	);
1610	assert_matches!(
1611	p.parse(&[], `true`),
1612	Ok(Chunk::Parsed {
1613	consumed: `0`,
1614	payload: Payload::End(`12`),
1615	}),
1616	);
1617
1618	// 1 byte section with 1 function can't read the function body because
1619	// the section is too small
1620	let mut p = parser_after_header();
1621	assert_matches!(
1622	p.parse(&[`10`, `1`, `1`], `false`),
1623	Ok(Chunk::Parsed {
1624	consumed: `3`,
1625	payload: Payload::CodeSectionStart { count: `1`, .. },
1626	}),
1627	);
1628	assert_eq!(
1629	p.parse(&[`0`], `false`).unwrap_err().message(),
1630	"unexpected end-of-file"
1631	);
1632
1633	// section with 2 functions but section is cut off
1634	let mut p = parser_after_header();
1635	assert_matches!(
1636	p.parse(&[`10`, `2`, `2`], `false`),
1637	Ok(Chunk::Parsed {
1638	consumed: `3`,
1639	payload: Payload::CodeSectionStart { count: `2`, .. },
1640	}),
1641	);
1642	assert_matches!(
1643	p.parse(&[`0`], `false`),
1644	Ok(Chunk::Parsed {
1645	consumed: `1`,
1646	payload: Payload::CodeSectionEntry(_),
1647	}),
1648	);
1649	assert_matches!(p.parse(&[], `false`), Ok(Chunk::NeedMoreData(`1`)));
1650	assert_eq!(
1651	p.parse(&[`0`], `false`).unwrap_err().message(),
1652	"unexpected end-of-file",
1653	);
1654
1655	// trailing data is bad
1656	let mut p = parser_after_header();
1657	assert_matches!(
1658	p.parse(&[`10`, `3`, `1`], `false`),
1659	Ok(Chunk::Parsed {
1660	consumed: `3`,
1661	payload: Payload::CodeSectionStart { count: `1`, .. },
1662	}),
1663	);
1664	assert_matches!(
1665	p.parse(&[`0`], `false`),
1666	Ok(Chunk::Parsed {
1667	consumed: `1`,
1668	payload: Payload::CodeSectionEntry(_),
1669	}),
1670	);
1671	assert_eq!(
1672	p.parse(&[`0`], `false`).unwrap_err().message(),
1673	"trailing bytes at end of section",
1674	);
1675	}
1676
1677	#[test]
1678	fn single_module() {
1679	let mut p = parser_after_component_header();
1680	assert_matches!(p.parse(&[`4`], `false`), Ok(Chunk::NeedMoreData(`1`)));
1681
1682	// A module that's 8 bytes in length
1683	let mut sub = match p.parse(&[`1`, `8`], `false`) {
1684	Ok(Chunk::Parsed {
1685	consumed: `2`,
1686	payload: Payload::ModuleSection { parser, .. },
1687	}) => parser,
1688	other => panic!("bad parse {:?}", other),
1689	};
1690
1691	// Parse the header of the submodule with the sub-parser.
1692	assert_matches!(sub.parse(&[], `false`), Ok(Chunk::NeedMoreData(`4`)));
1693	assert_matches!(sub.parse(b"`\0`asm", `false`), Ok(Chunk::NeedMoreData(`4`)));
1694	assert_matches!(
1695	sub.parse(b"`\0`asm`\x01\0\0\0`", `false`),
1696	Ok(Chunk::Parsed {
1697	consumed: `8`,
1698	payload: Payload::Version {
1699	num: `1`,
1700	encoding: Encoding::Module,
1701	..
1702	},
1703	}),
1704	);
1705
1706	// The sub-parser should be byte-limited so the next byte shouldn't get
1707	// consumed, it's intended for the parent parser.
1708	assert_matches!(
1709	sub.parse(&[`10`], `false`),
1710	Ok(Chunk::Parsed {
1711	consumed: `0`,
1712	payload: Payload::End(`18`),
1713	}),
1714	);
1715
1716	// The parent parser should now be back to resuming, and we simulate it
1717	// being done with bytes to ensure that it's safely at the end,
1718	// completing the module code section.
1719	assert_matches!(p.parse(&[], `false`), Ok(Chunk::NeedMoreData(`1`)));
1720	assert_matches!(
1721	p.parse(&[], `true`),
1722	Ok(Chunk::Parsed {
1723	consumed: `0`,
1724	payload: Payload::End(`18`),
1725	}),
1726	);
1727	}
1728
1729	#[test]
1730	fn nested_section_too_big() {
1731	let mut p = parser_after_component_header();
1732
1733	// A module that's 10 bytes in length
1734	let mut sub = match p.parse(&[`1`, `10`], `false`) {
1735	Ok(Chunk::Parsed {
1736	consumed: `2`,
1737	payload: Payload::ModuleSection { parser, .. },
1738	}) => parser,
1739	other => panic!("bad parse {:?}", other),
1740	};
1741
1742	// use 8 bytes to parse the header, leaving 2 remaining bytes in our
1743	// module.
1744	assert_matches!(
1745	sub.parse(b"`\0`asm`\x01\0\0\0`", `false`),
1746	Ok(Chunk::Parsed {
1747	consumed: `8`,
1748	payload: Payload::Version { num: `1`, .. },
1749	}),
1750	);
1751
1752	// We can't parse a section which declares its bigger than the outer
1753	// module. This is a custom section, one byte big, with one content byte. The
1754	// content byte, however, lives outside of the parent's module code
1755	// section.
1756	assert_eq!(
1757	sub.parse(&[`0`, `1`, `0`], `false`).unwrap_err().message(),
1758	"section too large",
1759	);
1760	}
1761	}
1762