1 | use self::bitvec::BitVec; |
2 | use anyhow::{bail, Result}; |
3 | use indexmap::{IndexMap, IndexSet}; |
4 | use std::{ |
5 | borrow::Cow, |
6 | collections::{HashMap, HashSet}, |
7 | convert::Infallible, |
8 | mem, |
9 | ops::Deref, |
10 | }; |
11 | use wasm_encoder::reencode::Reencode; |
12 | use wasm_encoder::{Encode, EntityType, Instruction, RawCustomSection}; |
13 | use wasmparser::*; |
14 | |
15 | const PAGE_SIZE: i32 = 64 * 1024; |
16 | |
17 | /// This function will reduce the input core `wasm` module to only the set of |
18 | /// exports `required`. |
19 | /// |
20 | /// This internally performs a "gc" pass after removing exports to ensure that |
21 | /// the resulting module imports the minimal set of functions necessary. |
22 | pub fn run( |
23 | wasm: &[u8], |
24 | required: &IndexSet<String>, |
25 | main_module_realloc: Option<&str>, |
26 | ) -> Result<Vec<u8>> { |
27 | assert!(!required.is_empty()); |
28 | |
29 | let mut module = Module::default(); |
30 | module.parse(wasm)?; |
31 | |
32 | // Make sure that all required names are present in the module, and then |
33 | // remove all names that are not required. |
34 | for name in required { |
35 | if !module.exports.contains_key(name.as_str()) { |
36 | bail!("adapter module does not have export ` {name}`" ) |
37 | } |
38 | } |
39 | let mut not_required = IndexSet::new(); |
40 | for name in module.exports.keys().copied() { |
41 | if !required.contains(name) { |
42 | not_required.insert(name); |
43 | } |
44 | } |
45 | for name in not_required { |
46 | module.exports.swap_remove(name); |
47 | } |
48 | assert!(!module.exports.is_empty()); |
49 | module.liveness()?; |
50 | module.encode(main_module_realloc) |
51 | } |
52 | |
53 | /// This function generates a Wasm function body which implements `cabi_realloc` in terms of `memory.grow`. It |
54 | /// only accepts new, page-sized allocations. |
55 | fn realloc_via_memory_grow() -> wasm_encoder::Function { |
56 | use wasm_encoder::Instruction::*; |
57 | |
58 | let mut func = wasm_encoder::Function::new([(1, wasm_encoder::ValType::I32)]); |
59 | |
60 | // Assert `old_ptr` is null. |
61 | func.instruction(&I32Const(0)); |
62 | func.instruction(&LocalGet(0)); |
63 | func.instruction(&I32Ne); |
64 | func.instruction(&If(wasm_encoder::BlockType::Empty)); |
65 | func.instruction(&Unreachable); |
66 | func.instruction(&End); |
67 | |
68 | // Assert `old_len` is zero. |
69 | func.instruction(&I32Const(0)); |
70 | func.instruction(&LocalGet(1)); |
71 | func.instruction(&I32Ne); |
72 | func.instruction(&If(wasm_encoder::BlockType::Empty)); |
73 | func.instruction(&Unreachable); |
74 | func.instruction(&End); |
75 | |
76 | // Assert `new_len` is equal to the page size (which is the only value we currently support) |
77 | // Note: we could easily support arbitrary multiples of PAGE_SIZE here if the need arises. |
78 | func.instruction(&I32Const(PAGE_SIZE)); |
79 | func.instruction(&LocalGet(3)); |
80 | func.instruction(&I32Ne); |
81 | func.instruction(&If(wasm_encoder::BlockType::Empty)); |
82 | func.instruction(&Unreachable); |
83 | func.instruction(&End); |
84 | |
85 | // Grow the memory by 1 page. |
86 | func.instruction(&I32Const(1)); |
87 | func.instruction(&MemoryGrow(0)); |
88 | func.instruction(&LocalTee(4)); |
89 | |
90 | // Test if the return value of the growth was -1 and, if so, trap due to a failed allocation. |
91 | func.instruction(&I32Const(-1)); |
92 | func.instruction(&I32Eq); |
93 | func.instruction(&If(wasm_encoder::BlockType::Empty)); |
94 | func.instruction(&Unreachable); |
95 | func.instruction(&End); |
96 | |
97 | func.instruction(&LocalGet(4)); |
98 | func.instruction(&I32Const(16)); |
99 | func.instruction(&I32Shl); |
100 | func.instruction(&End); |
101 | |
102 | func |
103 | } |
104 | |
105 | #[repr (i32)] |
106 | #[non_exhaustive ] |
107 | enum StackAllocationState { |
108 | Unallocated, |
109 | Allocating, |
110 | Allocated, |
111 | } |
112 | |
113 | fn allocate_stack_via_realloc( |
114 | realloc_index: u32, |
115 | sp: u32, |
116 | allocation_state: Option<u32>, |
117 | ) -> wasm_encoder::Function { |
118 | use wasm_encoder::Instruction::*; |
119 | |
120 | let mut func = wasm_encoder::Function::new([]); |
121 | |
122 | if let Some(allocation_state) = allocation_state { |
123 | // This means we're lazily allocating the stack, keeping track of state via `$allocation_state` |
124 | func.instruction(&GlobalGet(allocation_state)); |
125 | func.instruction(&I32Const(StackAllocationState::Unallocated as _)); |
126 | func.instruction(&I32Eq); |
127 | func.instruction(&If(wasm_encoder::BlockType::Empty)); |
128 | func.instruction(&I32Const(StackAllocationState::Allocating as _)); |
129 | func.instruction(&GlobalSet(allocation_state)); |
130 | // We could also set `sp` to zero here to ensure the yet-to-be-allocated stack is empty. However, we |
131 | // assume it defaults to zero anyway, in which case setting it would be redundant. |
132 | } |
133 | |
134 | func.instruction(&I32Const(0)); |
135 | func.instruction(&I32Const(0)); |
136 | func.instruction(&I32Const(8)); |
137 | func.instruction(&I32Const(PAGE_SIZE)); |
138 | func.instruction(&Call(realloc_index)); |
139 | func.instruction(&I32Const(PAGE_SIZE)); |
140 | func.instruction(&I32Add); |
141 | func.instruction(&GlobalSet(sp)); |
142 | |
143 | if let Some(allocation_state) = allocation_state { |
144 | func.instruction(&I32Const(StackAllocationState::Allocated as _)); |
145 | func.instruction(&GlobalSet(allocation_state)); |
146 | func.instruction(&End); |
147 | } |
148 | |
149 | func.instruction(&End); |
150 | |
151 | func |
152 | } |
153 | |
154 | // Represents a function called while processing a module work list. |
155 | type WorklistFunc<'a> = fn(&mut Module<'a>, u32) -> Result<()>; |
156 | |
157 | // Representation of a wasm module which is used to GC a module to its minimal |
158 | // set of required items necessary to implement the `exports` |
159 | // |
160 | // Note that this is not a complete representation of a wasm module since it |
161 | // doesn't represent everything such as data and element segments. This is only |
162 | // used for adapter modules which otherwise have these restrictions and makes |
163 | // this gc pass a bit easier to write. |
164 | #[derive (Default)] |
165 | struct Module<'a> { |
166 | // Definitions found when parsing a module |
167 | types: Vec<FuncType>, |
168 | tables: Vec<Table<'a>>, |
169 | globals: Vec<Global<'a>>, |
170 | memories: Vec<Memory<'a>>, |
171 | funcs: Vec<Func<'a>>, |
172 | exports: IndexMap<&'a str, Export<'a>>, |
173 | func_names: HashMap<u32, &'a str>, |
174 | global_names: HashMap<u32, &'a str>, |
175 | producers: Option<wasm_metadata::Producers>, |
176 | |
177 | // Known-live sets of indices after the `liveness` pass has run. |
178 | live_types: BitVec, |
179 | live_tables: BitVec, |
180 | live_globals: BitVec, |
181 | live_memories: BitVec, |
182 | live_funcs: BitVec, |
183 | |
184 | // Helper data structure used during the `liveness` path to avoid recursion. |
185 | // When calculating the liveness of an item this `worklist` is pushed to and |
186 | // then processed until it's empty. An item pushed onto this list represents |
187 | // a new index that has been discovered to be live and the function is what |
188 | // walks the item's definition to find other items that it references. |
189 | worklist: Vec<(u32, WorklistFunc<'a>)>, |
190 | } |
191 | |
192 | struct Table<'a> { |
193 | def: Definition<'a, ()>, |
194 | ty: TableType, |
195 | } |
196 | |
197 | struct Memory<'a> { |
198 | def: Definition<'a, ()>, |
199 | ty: MemoryType, |
200 | } |
201 | |
202 | struct Global<'a> { |
203 | def: Definition<'a, ConstExpr<'a>>, |
204 | ty: GlobalType, |
205 | } |
206 | |
207 | #[derive (Clone)] |
208 | struct Func<'a> { |
209 | def: Definition<'a, FunctionBody<'a>>, |
210 | ty: u32, |
211 | } |
212 | |
213 | #[derive (Clone)] |
214 | enum Definition<'a, T> { |
215 | Import(&'a str, &'a str), |
216 | Local(T), |
217 | } |
218 | |
219 | impl<'a> Module<'a> { |
220 | fn parse(&mut self, wasm: &'a [u8]) -> Result<()> { |
221 | let mut next_code_index = 0; |
222 | let mut validator = Validator::new(); |
223 | for payload in Parser::new(0).parse_all(wasm) { |
224 | let payload = payload?; |
225 | validator.payload(&payload)?; |
226 | match payload { |
227 | Payload::Version { encoding, .. } => { |
228 | if encoding != Encoding::Module { |
229 | bail!("adapter must be a core wasm module, not a component" ); |
230 | } |
231 | } |
232 | Payload::End(_) => {} |
233 | Payload::TypeSection(s) => { |
234 | for ty in s.into_iter_err_on_gc_types() { |
235 | self.types.push(ty?); |
236 | } |
237 | } |
238 | Payload::ImportSection(s) => { |
239 | for i in s { |
240 | let i = i?; |
241 | match i.ty { |
242 | TypeRef::Func(ty) => self.funcs.push(Func { |
243 | def: Definition::Import(i.module, i.name), |
244 | ty, |
245 | }), |
246 | TypeRef::Table(ty) => self.tables.push(Table { |
247 | def: Definition::Import(i.module, i.name), |
248 | ty, |
249 | }), |
250 | TypeRef::Global(ty) => self.globals.push(Global { |
251 | def: Definition::Import(i.module, i.name), |
252 | ty, |
253 | }), |
254 | TypeRef::Memory(ty) => self.memories.push(Memory { |
255 | def: Definition::Import(i.module, i.name), |
256 | ty, |
257 | }), |
258 | TypeRef::Tag(_) => bail!("unsupported `tag` type" ), |
259 | } |
260 | } |
261 | } |
262 | Payload::TableSection(s) => { |
263 | for table in s { |
264 | let table = table?; |
265 | self.tables.push(Table { |
266 | def: Definition::Local(()), |
267 | ty: table.ty, |
268 | }); |
269 | } |
270 | } |
271 | Payload::MemorySection(s) => { |
272 | for ty in s { |
273 | let ty = ty?; |
274 | self.memories.push(Memory { |
275 | def: Definition::Local(()), |
276 | ty, |
277 | }); |
278 | } |
279 | } |
280 | Payload::GlobalSection(s) => { |
281 | for g in s { |
282 | let g = g?; |
283 | self.globals.push(Global { |
284 | def: Definition::Local(g.init_expr), |
285 | ty: g.ty, |
286 | }); |
287 | } |
288 | } |
289 | |
290 | Payload::ExportSection(s) => { |
291 | for e in s { |
292 | let e = e?; |
293 | self.exports.insert(e.name, e); |
294 | } |
295 | } |
296 | |
297 | Payload::FunctionSection(s) => { |
298 | next_code_index = self.funcs.len(); |
299 | for ty in s { |
300 | let ty = ty?; |
301 | self.funcs.push(Func { |
302 | // Specify a dummy definition to get filled in later |
303 | // when parsing the code section. |
304 | def: Definition::Local(FunctionBody::new(BinaryReader::new(&[], 0))), |
305 | ty, |
306 | }); |
307 | } |
308 | } |
309 | |
310 | Payload::CodeSectionStart { .. } => {} |
311 | Payload::CodeSectionEntry(body) => { |
312 | self.funcs[next_code_index].def = Definition::Local(body); |
313 | next_code_index += 1; |
314 | } |
315 | |
316 | // Ignore all custom sections except for the `name` and |
317 | // `producers` sections which we parse, but ignore errors within. |
318 | Payload::CustomSection(s) => match s.as_known() { |
319 | KnownCustom::Name(s) => drop(self.parse_name_section(s)), |
320 | KnownCustom::Producers(_) => drop(self.parse_producers_section(&s)), |
321 | _ => {} |
322 | }, |
323 | |
324 | // sections that shouldn't appear in the specially-crafted core |
325 | // wasm adapter self we're processing |
326 | other => match other.as_section() { |
327 | Some((id, _)) => bail!("unsupported section ` {}` in adapter" , id), |
328 | None => bail!("unsupported payload in adapter" ), |
329 | }, |
330 | } |
331 | } |
332 | |
333 | Ok(()) |
334 | } |
335 | |
336 | fn parse_name_section(&mut self, section: NameSectionReader<'a>) -> Result<()> { |
337 | for s in section { |
338 | match s? { |
339 | Name::Function(map) => { |
340 | for naming in map { |
341 | let naming = naming?; |
342 | self.func_names.insert(naming.index, naming.name); |
343 | } |
344 | } |
345 | Name::Global(map) => { |
346 | for naming in map { |
347 | let naming = naming?; |
348 | self.global_names.insert(naming.index, naming.name); |
349 | } |
350 | } |
351 | _ => {} |
352 | } |
353 | } |
354 | Ok(()) |
355 | } |
356 | |
357 | fn parse_producers_section(&mut self, section: &CustomSectionReader<'a>) -> Result<()> { |
358 | let producers = |
359 | wasm_metadata::Producers::from_bytes(section.data(), section.data_offset())?; |
360 | self.producers = Some(producers); |
361 | Ok(()) |
362 | } |
363 | |
364 | /// Iteratively calculates the set of live items within this module |
365 | /// considering all exports as the root of live functions. |
366 | fn liveness(&mut self) -> Result<()> { |
367 | let exports = mem::take(&mut self.exports); |
368 | for (_, e) in exports.iter() { |
369 | match e.kind { |
370 | ExternalKind::Func => self.func(e.index), |
371 | ExternalKind::Global => self.global(e.index), |
372 | ExternalKind::Table => self.table(e.index), |
373 | ExternalKind::Memory => self.memory(e.index), |
374 | ExternalKind::Tag => bail!("unsupported exported tag" ), |
375 | } |
376 | } |
377 | self.exports = exports; |
378 | |
379 | while let Some((idx, func)) = self.worklist.pop() { |
380 | func(self, idx)?; |
381 | } |
382 | Ok(()) |
383 | } |
384 | |
385 | fn func(&mut self, func: u32) { |
386 | if !self.live_funcs.insert(func) { |
387 | return; |
388 | } |
389 | self.worklist.push((func, |me, func| { |
390 | let func = me.funcs[func as usize].clone(); |
391 | me.ty(func.ty); |
392 | let mut body = match &func.def { |
393 | Definition::Import(..) => return Ok(()), |
394 | Definition::Local(e) => e.get_binary_reader(), |
395 | }; |
396 | let local_count = body.read_var_u32()?; |
397 | for _ in 0..local_count { |
398 | body.read_var_u32()?; |
399 | body.read::<ValType>()?; |
400 | } |
401 | me.operators(body) |
402 | })); |
403 | } |
404 | |
405 | fn global(&mut self, global: u32) { |
406 | if !self.live_globals.insert(global) { |
407 | return; |
408 | } |
409 | self.worklist.push((global, |me, global| { |
410 | let init = match &me.globals[global as usize].def { |
411 | Definition::Import(..) => return Ok(()), |
412 | Definition::Local(e) => e, |
413 | }; |
414 | me.operators(init.get_binary_reader()) |
415 | })); |
416 | } |
417 | |
418 | fn table(&mut self, table: u32) { |
419 | if !self.live_tables.insert(table) { |
420 | return; |
421 | } |
422 | self.worklist.push((table, |me, table| { |
423 | let ty = me.tables[table as usize].ty.element_type; |
424 | me.valty(ty.into()); |
425 | Ok(()) |
426 | })); |
427 | } |
428 | |
429 | fn memory(&mut self, memory: u32) { |
430 | self.live_memories.insert(memory); |
431 | } |
432 | |
433 | fn blockty(&mut self, ty: BlockType) { |
434 | if let BlockType::FuncType(ty) = ty { |
435 | self.ty(ty); |
436 | } |
437 | } |
438 | |
439 | fn valty(&mut self, ty: ValType) { |
440 | match ty { |
441 | ValType::Ref(r) => self.refty(r), |
442 | ValType::I32 | ValType::I64 | ValType::F32 | ValType::F64 | ValType::V128 => {} |
443 | } |
444 | } |
445 | |
446 | fn refty(&mut self, ty: RefType) { |
447 | self.heapty(ty.heap_type()) |
448 | } |
449 | |
450 | fn heapty(&mut self, ty: HeapType) { |
451 | match ty { |
452 | HeapType::Abstract { .. } => {} |
453 | HeapType::Concrete(i) => self.ty(i.as_module_index().unwrap()), |
454 | } |
455 | } |
456 | |
457 | fn ty(&mut self, ty: u32) { |
458 | if !self.live_types.insert(ty) { |
459 | return; |
460 | } |
461 | self.worklist.push((ty, |me, ty| { |
462 | let ty = me.types[ty as usize].clone(); |
463 | for param in ty.params().iter().chain(ty.results()) { |
464 | me.valty(*param); |
465 | } |
466 | Ok(()) |
467 | })); |
468 | } |
469 | |
470 | fn operators(&mut self, mut reader: BinaryReader<'a>) -> Result<()> { |
471 | while !reader.eof() { |
472 | reader.visit_operator(self)?; |
473 | } |
474 | Ok(()) |
475 | } |
476 | |
477 | fn live_types(&self) -> impl Iterator<Item = (u32, &FuncType)> + '_ { |
478 | live_iter(&self.live_types, self.types.iter()) |
479 | } |
480 | |
481 | fn live_funcs(&self) -> impl Iterator<Item = (u32, &Func<'a>)> + '_ { |
482 | live_iter(&self.live_funcs, self.funcs.iter()) |
483 | } |
484 | |
485 | fn live_memories(&self) -> impl Iterator<Item = (u32, &Memory<'a>)> + '_ { |
486 | live_iter(&self.live_memories, self.memories.iter()) |
487 | } |
488 | |
489 | fn live_globals(&self) -> impl Iterator<Item = (u32, &Global<'a>)> + '_ { |
490 | live_iter(&self.live_globals, self.globals.iter()) |
491 | } |
492 | |
493 | fn live_tables(&self) -> impl Iterator<Item = (u32, &Table<'a>)> + '_ { |
494 | live_iter(&self.live_tables, self.tables.iter()) |
495 | } |
496 | |
497 | /// Encodes this `Module` to a new wasm module which is gc'd and only |
498 | /// contains the items that are live as calculated by the `liveness` pass. |
499 | fn encode(&mut self, main_module_realloc: Option<&str>) -> Result<Vec<u8>> { |
500 | // Data structure used to track the mapping of old index to new index |
501 | // for all live items. |
502 | let mut map = Encoder::default(); |
503 | |
504 | // Sections that will be assembled into the final module at the end of |
505 | // this function. |
506 | let mut types = wasm_encoder::TypeSection::new(); |
507 | let mut imports = wasm_encoder::ImportSection::new(); |
508 | let mut funcs = wasm_encoder::FunctionSection::new(); |
509 | let mut tables = wasm_encoder::TableSection::new(); |
510 | let mut memories = wasm_encoder::MemorySection::new(); |
511 | let mut globals = wasm_encoder::GlobalSection::new(); |
512 | let mut code = wasm_encoder::CodeSection::new(); |
513 | |
514 | let mut empty_type = None; |
515 | for (i, ty) in self.live_types() { |
516 | map.types.push(i); |
517 | |
518 | let ty = map.func_type(ty.clone())?; |
519 | types.ty().func_type(&ty); |
520 | |
521 | // Keep track of the "empty type" to see if we can reuse an |
522 | // existing one or one needs to be injected if a `start` |
523 | // function is calculated at the end. |
524 | if ty.params().is_empty() && ty.results().is_empty() { |
525 | empty_type = Some(map.types.remap(i)); |
526 | } |
527 | } |
528 | |
529 | let mut num_memories = 0; |
530 | for (i, mem) in self.live_memories() { |
531 | map.memories.push(i); |
532 | let ty = map.memory_type(mem.ty); |
533 | match &mem.def { |
534 | Definition::Import(m, n) => { |
535 | imports.import(m, n, ty); |
536 | } |
537 | Definition::Local(()) => { |
538 | memories.memory(ty); |
539 | } |
540 | } |
541 | num_memories += 1; |
542 | } |
543 | |
544 | for (i, table) in self.live_tables() { |
545 | map.tables.push(i); |
546 | let ty = map.table_type(table.ty)?; |
547 | match &table.def { |
548 | Definition::Import(m, n) => { |
549 | imports.import(m, n, ty); |
550 | } |
551 | Definition::Local(()) => { |
552 | tables.table(ty); |
553 | } |
554 | } |
555 | } |
556 | |
557 | for (i, global) in self.live_globals() { |
558 | map.globals.push(i); |
559 | let ty = map.global_type(global.ty)?; |
560 | match &global.def { |
561 | Definition::Import(m, n) => { |
562 | imports.import(m, n, ty); |
563 | } |
564 | Definition::Local(init) => { |
565 | let init = &map.const_expr(init.clone())?; |
566 | globals.global(ty, &init); |
567 | } |
568 | } |
569 | } |
570 | |
571 | let mut realloc_index = None; |
572 | let mut num_func_imports = 0; |
573 | |
574 | // For functions first assign a new index to all functions and then |
575 | // afterwards actually map the body of all functions so the `map` of all |
576 | // index mappings is fully populated before instructions are mapped. |
577 | |
578 | let is_realloc = |
579 | |m, n| m == "__main_module__" && matches!(n, "canonical_abi_realloc" | "cabi_realloc" ); |
580 | |
581 | let (imported, local) = |
582 | self.live_funcs() |
583 | .partition::<Vec<_>, _>(|(_, func)| match &func.def { |
584 | Definition::Import(m, n) => { |
585 | !is_realloc(*m, *n) || main_module_realloc.is_some() |
586 | } |
587 | Definition::Local(_) => false, |
588 | }); |
589 | |
590 | for (i, func) in imported { |
591 | map.funcs.push(i); |
592 | let ty = map.types.remap(func.ty); |
593 | match &func.def { |
594 | Definition::Import(m, n) => { |
595 | let name = if is_realloc(*m, *n) { |
596 | // The adapter is importing `cabi_realloc` from the main module, and the main module |
597 | // exports that function, but possibly using a different name |
598 | // (e.g. `canonical_abi_realloc`). Update the name to match if necessary. |
599 | realloc_index = Some(num_func_imports); |
600 | main_module_realloc.unwrap_or(n) |
601 | } else { |
602 | n |
603 | }; |
604 | imports.import(m, name, EntityType::Function(ty)); |
605 | num_func_imports += 1; |
606 | } |
607 | Definition::Local(_) => unreachable!(), |
608 | } |
609 | } |
610 | |
611 | let add_realloc_type = |types: &mut wasm_encoder::TypeSection| { |
612 | let type_index = types.len(); |
613 | types.ty().function( |
614 | [ |
615 | wasm_encoder::ValType::I32, |
616 | wasm_encoder::ValType::I32, |
617 | wasm_encoder::ValType::I32, |
618 | wasm_encoder::ValType::I32, |
619 | ], |
620 | [wasm_encoder::ValType::I32], |
621 | ); |
622 | type_index |
623 | }; |
624 | |
625 | let add_empty_type = |types: &mut wasm_encoder::TypeSection| { |
626 | let type_index = types.len(); |
627 | types.ty().function([], []); |
628 | type_index |
629 | }; |
630 | |
631 | let sp = self.find_mut_i32_global("__stack_pointer" )?; |
632 | let allocation_state = self.find_mut_i32_global("allocation_state" )?; |
633 | |
634 | let mut func_names = Vec::new(); |
635 | |
636 | if let (Some(realloc), Some(_), None) = (main_module_realloc, sp, realloc_index) { |
637 | // The main module exports a realloc function, and although the adapter doesn't import it, we're going |
638 | // to add a function which calls it to allocate some stack space, so let's add an import now. |
639 | |
640 | // Tell the function remapper we're reserving a slot for our extra import: |
641 | map.funcs.next += 1; |
642 | |
643 | realloc_index = Some(num_func_imports); |
644 | imports.import( |
645 | "__main_module__" , |
646 | realloc, |
647 | EntityType::Function(add_realloc_type(&mut types)), |
648 | ); |
649 | func_names.push((num_func_imports, realloc)); |
650 | num_func_imports += 1; |
651 | } |
652 | |
653 | for (i, func) in local { |
654 | map.funcs.push(i); |
655 | let ty = map.types.remap(func.ty); |
656 | match &func.def { |
657 | Definition::Import(_, _) => { |
658 | // The adapter is importing `cabi_realloc` from the main module, but the main module isn't |
659 | // exporting it. In this case, we need to define a local function it can call instead. |
660 | realloc_index = Some(num_func_imports + funcs.len()); |
661 | funcs.function(ty); |
662 | code.function(&realloc_via_memory_grow()); |
663 | } |
664 | Definition::Local(_) => { |
665 | funcs.function(ty); |
666 | } |
667 | } |
668 | } |
669 | |
670 | let lazy_stack_init_index = |
671 | if sp.is_some() && allocation_state.is_some() && main_module_realloc.is_some() { |
672 | // We have a stack pointer, a `cabi_realloc` function from the main module, and a global variable for |
673 | // keeping track of (and short-circuiting) reentrance. That means we can (and should) do lazy stack |
674 | // allocation. |
675 | let index = num_func_imports + funcs.len(); |
676 | |
677 | // Tell the function remapper we're reserving a slot for our extra function: |
678 | map.funcs.next += 1; |
679 | |
680 | funcs.function(add_empty_type(&mut types)); |
681 | |
682 | Some(index) |
683 | } else { |
684 | None |
685 | }; |
686 | |
687 | let exported_funcs = self |
688 | .exports |
689 | .values() |
690 | .filter_map(|export| match export.kind { |
691 | ExternalKind::Func => Some(export.index), |
692 | _ => None, |
693 | }) |
694 | .collect::<HashSet<_>>(); |
695 | |
696 | for (i, func) in self.live_funcs() { |
697 | let body = match &func.def { |
698 | Definition::Import(..) => continue, |
699 | Definition::Local(body) => body, |
700 | }; |
701 | |
702 | match (lazy_stack_init_index, exported_funcs.contains(&i)) { |
703 | // Prepend an `allocate_stack` call to all exports if we're |
704 | // lazily allocating the stack. |
705 | (Some(lazy_stack_init_index), true) => { |
706 | let mut func = map.new_function_with_parsed_locals(&body)?; |
707 | func.instruction(&Instruction::Call(lazy_stack_init_index)); |
708 | let mut reader = body.get_operators_reader()?; |
709 | while !reader.eof() { |
710 | func.instruction(&map.parse_instruction(&mut reader)?); |
711 | } |
712 | code.function(&func); |
713 | } |
714 | _ => { |
715 | map.parse_function_body(&mut code, body.clone())?; |
716 | } |
717 | } |
718 | } |
719 | |
720 | if lazy_stack_init_index.is_some() { |
721 | code.function(&allocate_stack_via_realloc( |
722 | realloc_index.unwrap(), |
723 | sp.unwrap(), |
724 | allocation_state, |
725 | )); |
726 | } |
727 | |
728 | if sp.is_some() && (realloc_index.is_none() || allocation_state.is_none()) { |
729 | // Either the main module does _not_ export a realloc function, or it is not safe to use for stack |
730 | // allocation because we have no way to short-circuit reentrance, so we'll use `memory.grow` instead. |
731 | realloc_index = Some(num_func_imports + funcs.len()); |
732 | funcs.function(add_realloc_type(&mut types)); |
733 | code.function(&realloc_via_memory_grow()); |
734 | } |
735 | |
736 | // Inject a start function to initialize the stack pointer which will be local to this module. This only |
737 | // happens if a memory is preserved, a stack pointer global is found, and we're not doing lazy stack |
738 | // allocation. |
739 | let mut start = None; |
740 | if let (Some(sp), None) = (sp, lazy_stack_init_index) { |
741 | if num_memories > 0 { |
742 | // If there are any memories or any mutable globals there must be |
743 | // precisely one of each as otherwise we don't know how to filter |
744 | // down to the right one. |
745 | if num_memories != 1 { |
746 | bail!("adapter modules don't support multi-memory" ); |
747 | } |
748 | |
749 | let sp = map.globals.remap(sp); |
750 | |
751 | let function_index = num_func_imports + funcs.len(); |
752 | |
753 | // Generate a function type for this start function, adding a new |
754 | // function type to the module if necessary. |
755 | let empty_type = empty_type.unwrap_or_else(|| { |
756 | types.ty().function([], []); |
757 | types.len() - 1 |
758 | }); |
759 | funcs.function(empty_type); |
760 | func_names.push((function_index, "allocate_stack" )); |
761 | code.function(&allocate_stack_via_realloc( |
762 | realloc_index.unwrap(), |
763 | sp, |
764 | allocation_state, |
765 | )); |
766 | |
767 | start = Some(wasm_encoder::StartSection { function_index }); |
768 | } |
769 | } |
770 | |
771 | // Sanity-check the shape of the module since some parts won't work if |
772 | // this fails. Note that during parsing we've already validated there |
773 | // are no data segments or element segments. |
774 | |
775 | // Shouldn't have any tables if there are no element segments since |
776 | // otherwise there's no meaning to a defined or imported table. |
777 | if self.live_tables().count() != 0 { |
778 | bail!("tables should not be present in the final adapter module" ); |
779 | } |
780 | |
781 | // multi-memory should not be enabled and if any memory it should be |
782 | // imported. |
783 | if self.live_memories().count() > 1 { |
784 | bail!("the adapter module should not use multi-memory" ); |
785 | } |
786 | if !memories.is_empty() { |
787 | bail!("locally-defined memories are not allowed define a local memory" ); |
788 | } |
789 | |
790 | let mut ret = wasm_encoder::Module::default(); |
791 | if !types.is_empty() { |
792 | ret.section(&types); |
793 | } |
794 | if !imports.is_empty() { |
795 | ret.section(&imports); |
796 | } |
797 | if !funcs.is_empty() { |
798 | ret.section(&funcs); |
799 | } |
800 | if !tables.is_empty() { |
801 | ret.section(&tables); |
802 | } |
803 | if !memories.is_empty() { |
804 | ret.section(&memories); |
805 | } |
806 | if !globals.is_empty() { |
807 | ret.section(&globals); |
808 | } |
809 | |
810 | if !self.exports.is_empty() { |
811 | let mut exports = wasm_encoder::ExportSection::new(); |
812 | for (_, export) in self.exports.iter() { |
813 | let (kind, index) = match export.kind { |
814 | ExternalKind::Func => ( |
815 | wasm_encoder::ExportKind::Func, |
816 | map.funcs.remap(export.index), |
817 | ), |
818 | ExternalKind::Table => ( |
819 | wasm_encoder::ExportKind::Table, |
820 | map.tables.remap(export.index), |
821 | ), |
822 | ExternalKind::Memory => ( |
823 | wasm_encoder::ExportKind::Memory, |
824 | map.memories.remap(export.index), |
825 | ), |
826 | ExternalKind::Global => ( |
827 | wasm_encoder::ExportKind::Global, |
828 | map.globals.remap(export.index), |
829 | ), |
830 | kind => bail!("unsupported export kind {kind:?}" ), |
831 | }; |
832 | exports.export(export.name, kind, index); |
833 | } |
834 | ret.section(&exports); |
835 | } |
836 | |
837 | if let Some(start) = &start { |
838 | ret.section(start); |
839 | } |
840 | |
841 | if !code.is_empty() { |
842 | ret.section(&code); |
843 | } |
844 | |
845 | // Append a custom `name` section using the names of the functions that |
846 | // were found prior to the GC pass in the original module. |
847 | let mut global_names = Vec::new(); |
848 | for (i, _func) in self.live_funcs() { |
849 | let name = match self.func_names.get(&i) { |
850 | Some(name) => name, |
851 | None => continue, |
852 | }; |
853 | func_names.push((map.funcs.remap(i), *name)); |
854 | } |
855 | for (i, _global) in self.live_globals() { |
856 | let name = match self.global_names.get(&i) { |
857 | Some(name) => name, |
858 | None => continue, |
859 | }; |
860 | global_names.push((map.globals.remap(i), *name)); |
861 | } |
862 | let mut section = Vec::new(); |
863 | let mut encode_subsection = |code: u8, names: &[(u32, &str)]| { |
864 | if names.is_empty() { |
865 | return; |
866 | } |
867 | let mut subsection = Vec::new(); |
868 | names.len().encode(&mut subsection); |
869 | for (i, name) in names { |
870 | i.encode(&mut subsection); |
871 | name.encode(&mut subsection); |
872 | } |
873 | section.push(code); |
874 | subsection.encode(&mut section); |
875 | }; |
876 | if let (Some(realloc_index), true) = ( |
877 | realloc_index, |
878 | main_module_realloc.is_none() || allocation_state.is_none(), |
879 | ) { |
880 | func_names.push((realloc_index, "realloc_via_memory_grow" )); |
881 | } |
882 | if let Some(lazy_stack_init_index) = lazy_stack_init_index { |
883 | func_names.push((lazy_stack_init_index, "allocate_stack" )); |
884 | } |
885 | encode_subsection(0x01, &func_names); |
886 | encode_subsection(0x07, &global_names); |
887 | if !section.is_empty() { |
888 | ret.section(&wasm_encoder::CustomSection { |
889 | name: "name" .into(), |
890 | data: Cow::Borrowed(§ion), |
891 | }); |
892 | } |
893 | if let Some(producers) = &self.producers { |
894 | ret.section(&RawCustomSection(&producers.raw_custom_section())); |
895 | } |
896 | |
897 | Ok(ret.finish()) |
898 | } |
899 | |
900 | fn find_mut_i32_global(&self, name: &str) -> Result<Option<u32>> { |
901 | let matches = &self |
902 | .live_globals() |
903 | .filter_map(|(i, g)| { |
904 | if g.ty.mutable |
905 | && g.ty.content_type == ValType::I32 |
906 | && *self.global_names.get(&i)? == name |
907 | { |
908 | Some(i) |
909 | } else { |
910 | None |
911 | } |
912 | }) |
913 | .collect::<Vec<_>>(); |
914 | |
915 | match matches.deref() { |
916 | [] => Ok(None), |
917 | [i] => Ok(Some(*i)), |
918 | _ => bail!( |
919 | "found {} mutable i32 globals with name {name}" , |
920 | matches.len() |
921 | ), |
922 | } |
923 | } |
924 | } |
925 | |
926 | // This helper macro is used to define a visitor of all instructions with |
927 | // special handling for all payloads of instructions to mark any referenced |
928 | // items live. |
929 | // |
930 | // Currently item identification happens through the field name of the payload. |
931 | // While not exactly the most robust solution this should work well enough for |
932 | // now. |
933 | macro_rules! define_visit { |
934 | ($(@$p:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident ($($ann:tt)*))*) => { |
935 | $( |
936 | fn $visit(&mut self $(, $($arg: $argty),*)?) { |
937 | $( |
938 | $( |
939 | define_visit!(mark_live self $arg $arg); |
940 | )* |
941 | )? |
942 | } |
943 | )* |
944 | }; |
945 | |
946 | (mark_live $self:ident $arg:ident type_index) => {$self.ty($arg);}; |
947 | (mark_live $self:ident $arg:ident array_type_index) => {$self.ty($arg);}; |
948 | (mark_live $self:ident $arg:ident array_type_index_dst) => {$self.ty($arg);}; |
949 | (mark_live $self:ident $arg:ident array_type_index_src) => {$self.ty($arg);}; |
950 | (mark_live $self:ident $arg:ident struct_type_index) => {$self.ty($arg);}; |
951 | (mark_live $self:ident $arg:ident src_table) => {$self.table($arg);}; |
952 | (mark_live $self:ident $arg:ident dst_table) => {$self.table($arg);}; |
953 | (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; |
954 | (mark_live $self:ident $arg:ident table) => {$self.table($arg);}; |
955 | (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; |
956 | (mark_live $self:ident $arg:ident global_index) => {$self.global($arg);}; |
957 | (mark_live $self:ident $arg:ident function_index) => {$self.func($arg);}; |
958 | (mark_live $self:ident $arg:ident mem) => {$self.memory($arg);}; |
959 | (mark_live $self:ident $arg:ident src_mem) => {$self.memory($arg);}; |
960 | (mark_live $self:ident $arg:ident dst_mem) => {$self.memory($arg);}; |
961 | (mark_live $self:ident $arg:ident memarg) => {$self.memory($arg.memory);}; |
962 | (mark_live $self:ident $arg:ident blockty) => {$self.blockty($arg);}; |
963 | (mark_live $self:ident $arg:ident ty) => {$self.valty($arg)}; |
964 | (mark_live $self:ident $arg:ident hty) => {$self.heapty($arg)}; |
965 | (mark_live $self:ident $arg:ident from_ref_type) => {$self.refty($arg);}; |
966 | (mark_live $self:ident $arg:ident to_ref_type) => {$self.refty($arg);}; |
967 | (mark_live $self:ident $arg:ident lane) => {}; |
968 | (mark_live $self:ident $arg:ident lanes) => {}; |
969 | (mark_live $self:ident $arg:ident flags) => {}; |
970 | (mark_live $self:ident $arg:ident value) => {}; |
971 | (mark_live $self:ident $arg:ident local_index) => {}; |
972 | (mark_live $self:ident $arg:ident relative_depth) => {}; |
973 | (mark_live $self:ident $arg:ident tag_index) => {}; |
974 | (mark_live $self:ident $arg:ident targets) => {}; |
975 | (mark_live $self:ident $arg:ident data_index) => {}; |
976 | (mark_live $self:ident $arg:ident array_data_index) => {}; |
977 | (mark_live $self:ident $arg:ident elem_index) => {}; |
978 | (mark_live $self:ident $arg:ident array_elem_index) => {}; |
979 | (mark_live $self:ident $arg:ident array_size) => {}; |
980 | (mark_live $self:ident $arg:ident field_index) => {}; |
981 | (mark_live $self:ident $arg:ident from_type_nullable) => {}; |
982 | (mark_live $self:ident $arg:ident to_type_nullable) => {}; |
983 | (mark_live $self:ident $arg:ident ordering) => {}; |
984 | (mark_live $self:ident $arg:ident try_table) => {unimplemented!();}; |
985 | (mark_live $self:ident $arg:ident argument_index) => {}; |
986 | (mark_live $self:ident $arg:ident result_index) => {}; |
987 | (mark_live $self:ident $arg:ident cont_type_index) => {}; |
988 | (mark_live $self:ident $arg:ident resume_table) => {unimplemented!();}; |
989 | } |
990 | |
991 | impl<'a> VisitOperator<'a> for Module<'a> { |
992 | type Output = (); |
993 | |
994 | fn simd_visitor(&mut self) -> Option<&mut dyn VisitSimdOperator<'a, Output = Self::Output>> { |
995 | Some(self) |
996 | } |
997 | |
998 | wasmparser::for_each_visit_operator!(define_visit); |
999 | } |
1000 | |
1001 | impl<'a> VisitSimdOperator<'a> for Module<'a> { |
1002 | wasmparser::for_each_visit_simd_operator!(define_visit); |
1003 | } |
1004 | |
1005 | /// Helper function to filter `iter` based on the `live` set, yielding an |
1006 | /// iterator over the index of the item that's live as well as the item itself. |
1007 | fn live_iter<'a, T>( |
1008 | live: &'a BitVec, |
1009 | iter: impl Iterator<Item = T> + 'a, |
1010 | ) -> impl Iterator<Item = (u32, T)> + 'a { |
1011 | iter.enumerate().filter_map(|(i: usize, t: T)| { |
1012 | let i: u32 = i as u32; |
1013 | if live.contains(idx:i) { |
1014 | Some((i, t)) |
1015 | } else { |
1016 | None |
1017 | } |
1018 | }) |
1019 | } |
1020 | |
1021 | #[derive (Default)] |
1022 | struct Encoder { |
1023 | types: Remap, |
1024 | funcs: Remap, |
1025 | memories: Remap, |
1026 | globals: Remap, |
1027 | tables: Remap, |
1028 | } |
1029 | |
1030 | impl Reencode for Encoder { |
1031 | type Error = Infallible; |
1032 | |
1033 | fn type_index(&mut self, i: u32) -> u32 { |
1034 | self.types.remap(old:i) |
1035 | } |
1036 | fn function_index(&mut self, i: u32) -> u32 { |
1037 | self.funcs.remap(old:i) |
1038 | } |
1039 | fn memory_index(&mut self, i: u32) -> u32 { |
1040 | self.memories.remap(old:i) |
1041 | } |
1042 | fn global_index(&mut self, i: u32) -> u32 { |
1043 | self.globals.remap(old:i) |
1044 | } |
1045 | fn table_index(&mut self, i: u32) -> u32 { |
1046 | self.tables.remap(old:i) |
1047 | } |
1048 | } |
1049 | |
1050 | // Minimal definition of a bit vector necessary for the liveness calculations |
1051 | // above. |
1052 | mod bitvec { |
1053 | use std::mem; |
1054 | |
1055 | type T = u64; |
1056 | |
1057 | #[derive (Default)] |
1058 | pub struct BitVec { |
1059 | bits: Vec<T>, |
1060 | } |
1061 | |
1062 | impl BitVec { |
1063 | /// Inserts `idx` into this bit vector, returning whether it was not |
1064 | /// previously present. |
1065 | pub fn insert(&mut self, idx: u32) -> bool { |
1066 | let (idx, bit) = idx_bit(idx); |
1067 | match self.bits.get_mut(idx) { |
1068 | Some(bits) => { |
1069 | if *bits & bit != 0 { |
1070 | return false; |
1071 | } |
1072 | *bits |= bit; |
1073 | } |
1074 | None => { |
1075 | self.bits.resize(idx + 1, 0); |
1076 | self.bits[idx] = bit; |
1077 | } |
1078 | } |
1079 | true |
1080 | } |
1081 | |
1082 | /// Returns whether this bit vector contains the specified `idx`th bit. |
1083 | pub fn contains(&self, idx: u32) -> bool { |
1084 | let (idx, bit) = idx_bit(idx); |
1085 | match self.bits.get(idx) { |
1086 | Some(bits) => (*bits & bit) != 0, |
1087 | None => false, |
1088 | } |
1089 | } |
1090 | } |
1091 | |
1092 | fn idx_bit(idx: u32) -> (usize, T) { |
1093 | let idx = idx as usize; |
1094 | let size = mem::size_of::<T>() * 8; |
1095 | let index = idx / size; |
1096 | let bit = 1 << (idx % size); |
1097 | (index, bit) |
1098 | } |
1099 | } |
1100 | |
1101 | /// Small data structure used to track index mappings from an old index space to |
1102 | /// a new. |
1103 | #[derive (Default)] |
1104 | struct Remap { |
1105 | /// Map, indexed by the old index set, to the new index set. |
1106 | map: HashMap<u32, u32>, |
1107 | /// The next available index in the new index space. |
1108 | next: u32, |
1109 | } |
1110 | |
1111 | impl Remap { |
1112 | /// Appends a new live "old index" into this remapping structure. |
1113 | /// |
1114 | /// This will assign a new index for the old index provided. |
1115 | fn push(&mut self, old: u32) { |
1116 | self.map.insert(k:old, self.next); |
1117 | self.next += 1; |
1118 | } |
1119 | |
1120 | /// Returns the new index corresponding to an old index. |
1121 | /// |
1122 | /// Panics if the `old` index was not added via `push` above. |
1123 | fn remap(&self, old: u32) -> u32 { |
1124 | *self |
1125 | .map |
1126 | .get(&old) |
1127 | .unwrap_or_else(|| panic!("can't map {old} to a new index" )) |
1128 | } |
1129 | } |
1130 | |