1//! Canonicalization of types.
2//!
3//! The unit of canonicalization is a recursion group. Having "unnecessary"
4//! types in a recursion group can "break" canonicalization of other types
5//! within that same recursion group, as can reordering types within a recursion
6//! group.
7//!
8//! It is an invariant that all types defined before the recursion group we are
9//! currently canonicalizing have already been canonicalized themselves.
10//!
11//! Canonicalizing a recursion group then proceeds as follows:
12//!
13//! * First we walk each of its `SubType` elements and put their type references
14//! (i.e. their `PackedIndex`es) into canonical form. Canonicalizing a
15//! `PackedIndex` means switching it from indexing into the Wasm module's
16//! types space into either
17//!
18//! 1. Referencing an already-canonicalized type, for types outside of this
19//! recursion group. Because inter-group type references can only go
20//! towards types defined before this recursion group, we know the type is
21//! already canonicalized and we have a `CoreTypeId` for each of those
22//! types. This updates the `PackedIndex` into a `CoreTypeId`.
23//!
24//! 2. Indexing into the current recursion group, for intra-group type
25//! references.
26//!
27//! Note that (2) has the effect of making the "same" structure of mutual type
28//! recursion look identical across recursion groups:
29//!
30//! ```wat
31//! ;; Before
32//! (rec (struct (field (module-type 1))) (struct (field (module-type 0))))
33//! (rec (struct (field (module-type 3))) (struct (field (module-type 2))))
34//!
35//! ;; After
36//! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0))))
37//! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0))))
38//! ```
39//!
40//! * Now that the recursion group's elements are in canonical form, we can
41//! "simply" hash cons whole rec groups at a time. The `TypesList` morally
42//! maintains a hash map from `Vec<SubType>` to `RecGroupId` and we can do
43//! get-or-create operations on it. I say "morally" because we don't actually
44//! duplicate the `Vec<SubType>` key in that hash map since those elements are
45//! already stored in the `TypeList`'s internal `SnapshotList<CoreType>`. This
46//! means we need to do some low-level hash table fiddling with the
47//! `hashbrown` crate.
48//!
49//! And that's it! That is the whole canonicalization algorithm.
50//!
51//! Some more random things to note:
52//!
53//! * Because we essentially already have to do the check to canonicalize, and
54//! to avoid additional passes over the types, the canonicalization pass also
55//! checks that type references are in bounds. These are the only errors that
56//! can be returned from canonicalization.
57//!
58//! * Canonicalizing requires the `Module` to translate type indices to
59//! actual `CoreTypeId`s.
60//!
61//! * It is important that *after* we have canonicalized all types, we don't
62//! need the `Module` anymore. This makes sure that we can, for example,
63//! intern all types from the same store into the same `TypeList`. Which in
64//! turn lets us type check function imports of a same-store instance's
65//! exported functions and we don't need to translate from one module's
66//! canonical representation to another module's canonical representation or
67//! perform additional expensive checks to see if the types match or not
68//! (since the whole point of canonicalization is to avoid that!).
69
70use super::{RecGroupId, TypeAlloc, TypeList};
71use crate::{
72 types::{CoreTypeId, TypeIdentifier},
73 BinaryReaderError, CompositeInnerType, CompositeType, PackedIndex, RecGroup, Result,
74 StorageType, UnpackedIndex, ValType, WasmFeatures,
75};
76
77pub(crate) trait InternRecGroup {
78 fn add_type_id(&mut self, id: CoreTypeId);
79 fn type_id_at(&self, idx: u32, offset: usize) -> Result<CoreTypeId>;
80 fn types_len(&self) -> u32;
81
82 /// Canonicalize the rec group and return its id and whether it is a new group
83 /// (we added its types to the `TypeAlloc`) or not (we deduplicated it with an
84 /// existing canonical rec group).
85 fn canonicalize_and_intern_rec_group(
86 &mut self,
87 features: &WasmFeatures,
88 types: &mut TypeAlloc,
89 mut rec_group: RecGroup,
90 offset: usize,
91 ) -> Result<()>
92 where
93 Self: Sized,
94 {
95 debug_assert!(rec_group.is_explicit_rec_group() || rec_group.types().len() == 1);
96 if rec_group.is_explicit_rec_group() && !features.gc() {
97 bail!(
98 offset,
99 "rec group usage requires `gc` proposal to be enabled"
100 );
101 }
102 if features.needs_type_canonicalization() {
103 TypeCanonicalizer::new(self, offset)
104 .with_features(features)
105 .canonicalize_rec_group(&mut rec_group)?;
106 }
107 let (is_new, rec_group_id) =
108 types.intern_canonical_rec_group(features.needs_type_canonicalization(), rec_group);
109 let range = &types[rec_group_id];
110 let start = range.start.index();
111 let end = range.end.index();
112
113 for i in start..end {
114 let i = u32::try_from(i).unwrap();
115 let id = CoreTypeId::from_index(i);
116 debug_assert!(types.get(id).is_some());
117 self.add_type_id(id);
118 if is_new {
119 self.check_subtype(rec_group_id, id, features, types, offset)?;
120 }
121 }
122
123 Ok(())
124 }
125
126 fn check_subtype(
127 &mut self,
128 rec_group: RecGroupId,
129 id: CoreTypeId,
130 features: &WasmFeatures,
131 types: &mut TypeAlloc,
132 offset: usize,
133 ) -> Result<()> {
134 let ty = &types[id];
135 if !features.gc() && (!ty.is_final || ty.supertype_idx.is_some()) {
136 bail!(offset, "gc proposal must be enabled to use subtypes");
137 }
138
139 self.check_composite_type(&ty.composite_type, features, &types, offset)?;
140
141 let depth = if let Some(supertype_index) = ty.supertype_idx {
142 debug_assert!(supertype_index.is_canonical());
143 let sup_id = self.at_packed_index(types, rec_group, supertype_index, offset)?;
144 if types[sup_id].is_final {
145 bail!(offset, "sub type cannot have a final super type");
146 }
147 if !types.matches(id, sup_id) {
148 bail!(offset, "sub type must match super type");
149 }
150 let depth = types.get_subtyping_depth(sup_id) + 1;
151 if usize::from(depth) > crate::limits::MAX_WASM_SUBTYPING_DEPTH {
152 bail!(
153 offset,
154 "sub type hierarchy too deep: found depth {}, cannot exceed depth {}",
155 depth,
156 crate::limits::MAX_WASM_SUBTYPING_DEPTH,
157 );
158 }
159 depth
160 } else {
161 0
162 };
163 types.set_subtyping_depth(id, depth);
164
165 Ok(())
166 }
167
168 fn check_composite_type(
169 &mut self,
170 ty: &CompositeType,
171 features: &WasmFeatures,
172 types: &TypeList,
173 offset: usize,
174 ) -> Result<()> {
175 let check = |ty: &ValType, shared: bool| {
176 features
177 .check_value_type(*ty)
178 .map_err(|e| BinaryReaderError::new(e, offset))?;
179 if shared && !types.valtype_is_shared(*ty) {
180 return Err(BinaryReaderError::new(
181 "shared composite type must contain shared types",
182 offset,
183 ));
184 // The other cases are fine:
185 // - both shared or unshared: good to go
186 // - the func type is unshared, `ty` is shared: though
187 // odd, we _can_ in fact use shared values in
188 // unshared composite types (e.g., functions).
189 }
190 Ok(())
191 };
192 if !features.shared_everything_threads() && ty.shared {
193 return Err(BinaryReaderError::new(
194 "shared composite types require the shared-everything-threads proposal",
195 offset,
196 ));
197 }
198 match &ty.inner {
199 CompositeInnerType::Func(t) => {
200 for vt in t.params().iter().chain(t.results()) {
201 check(vt, ty.shared)?;
202 }
203 if t.results().len() > 1 && !features.multi_value() {
204 return Err(BinaryReaderError::new(
205 "func type returns multiple values but the multi-value feature is not enabled",
206 offset,
207 ));
208 }
209 }
210 CompositeInnerType::Array(t) => {
211 if !features.gc() {
212 bail!(
213 offset,
214 "array indexed types not supported without the gc feature",
215 );
216 }
217 if !features.gc_types() {
218 bail!(
219 offset,
220 "cannot define array types when gc types are disabled",
221 );
222 }
223 match &t.0.element_type {
224 StorageType::I8 | StorageType::I16 => {
225 // Note: scalar types are always `shared`.
226 }
227 StorageType::Val(value_type) => check(value_type, ty.shared)?,
228 };
229 }
230 CompositeInnerType::Struct(t) => {
231 if !features.gc() {
232 bail!(
233 offset,
234 "struct indexed types not supported without the gc feature",
235 );
236 }
237 if !features.gc_types() {
238 bail!(
239 offset,
240 "cannot define struct types when gc types are disabled",
241 );
242 }
243 for ft in t.fields.iter() {
244 match &ft.element_type {
245 StorageType::I8 | StorageType::I16 => {
246 // Note: scalar types are always `shared`.
247 }
248 StorageType::Val(value_type) => check(value_type, ty.shared)?,
249 }
250 }
251 }
252 CompositeInnerType::Cont(t) => {
253 if !features.stack_switching() {
254 bail!(
255 offset,
256 "cannot define continuation types when stack switching is disabled",
257 );
258 }
259 if !features.gc_types() {
260 bail!(
261 offset,
262 "cannot define continuation types when gc types are disabled",
263 );
264 }
265 // Check that the type index points to a valid function type.
266 let id = t.0.as_core_type_id().unwrap();
267 match types[id].composite_type.inner {
268 CompositeInnerType::Func(_) => (),
269 _ => bail!(offset, "non-function type {}", id.index()),
270 }
271 }
272 }
273 Ok(())
274 }
275
276 fn at_packed_index(
277 &self,
278 types: &TypeList,
279 rec_group: RecGroupId,
280 index: PackedIndex,
281 offset: usize,
282 ) -> Result<CoreTypeId> {
283 match index.unpack() {
284 UnpackedIndex::Id(id) => Ok(id),
285 UnpackedIndex::Module(idx) => self.type_id_at(idx, offset),
286 UnpackedIndex::RecGroup(idx) => types.rec_group_local_id(rec_group, idx, offset),
287 }
288 }
289}
290
291/// The kind of canonicalization we are doing.
292#[derive(Clone, Copy, Debug, PartialEq, Eq)]
293enum CanonicalizationMode {
294 /// Standard canonicalization: turns module indices into either (1)
295 /// `CoreTypeId`s for inter-group references or (2) rec-group-local indices
296 /// for intra-group references.
297 HashConsing,
298
299 /// Turns all type reference indices into `CoreTypeId`s, even from within
300 /// the same rec group. Not useful for hash consing, but useful when
301 /// exposing types to end users so they don't have to deal with
302 /// rec-group-local indices.
303 OnlyIds,
304}
305
306pub(crate) struct TypeCanonicalizer<'a> {
307 module: &'a dyn InternRecGroup,
308 features: Option<&'a WasmFeatures>,
309 rec_group_start: u32,
310 rec_group_len: u32,
311 offset: usize,
312 mode: CanonicalizationMode,
313 within_rec_group: Option<core::ops::Range<CoreTypeId>>,
314}
315
316impl<'a> TypeCanonicalizer<'a> {
317 pub fn new(module: &'a dyn InternRecGroup, offset: usize) -> Self {
318 // These defaults will work for when we are canonicalizing types from
319 // outside of a rec group definition, forcing all `PackedIndex`es to be
320 // canonicalized to `CoreTypeId`s.
321 let rec_group_start = u32::MAX;
322 let rec_group_len = 0;
323
324 Self {
325 module,
326 features: None,
327 rec_group_start,
328 rec_group_len,
329 offset,
330 mode: CanonicalizationMode::HashConsing,
331 within_rec_group: None,
332 }
333 }
334
335 pub fn with_features(&mut self, features: &'a WasmFeatures) -> &mut Self {
336 debug_assert!(self.features.is_none());
337 self.features = Some(features);
338 self
339 }
340
341 fn allow_gc(&self) -> bool {
342 self.features.map_or(true, |f| f.gc())
343 }
344
345 fn canonicalize_rec_group(&mut self, rec_group: &mut RecGroup) -> Result<()> {
346 // Re-initialize these fields so that we properly canonicalize
347 // intra-rec-group type references into indices into the rec group
348 // rather than as `CoreTypeId`s.
349 self.rec_group_start = self.module.types_len();
350 self.rec_group_len = u32::try_from(rec_group.types().len()).unwrap();
351
352 for (rec_group_local_index, ty) in rec_group.types_mut().enumerate() {
353 let rec_group_local_index = u32::try_from(rec_group_local_index).unwrap();
354 let type_index = self.rec_group_start + rec_group_local_index;
355
356 if let Some(sup) = ty.supertype_idx.as_mut() {
357 if sup.as_module_index().map_or(false, |i| i >= type_index) {
358 bail!(self.offset, "supertypes must be defined before subtypes");
359 }
360 }
361
362 ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?;
363 }
364
365 Ok(())
366 }
367
368 fn canonicalize_type_index(&self, ty: &mut PackedIndex) -> Result<()> {
369 match ty.unpack() {
370 UnpackedIndex::Id(_) => Ok(()),
371 UnpackedIndex::Module(index) => {
372 if index < self.rec_group_start || self.mode == CanonicalizationMode::OnlyIds {
373 let id = self.module.type_id_at(index, self.offset)?;
374 if let Some(id) = PackedIndex::from_id(id) {
375 *ty = id;
376 return Ok(());
377 } else {
378 bail!(
379 self.offset,
380 "implementation limit: too many types in `TypeList`"
381 )
382 }
383 }
384
385 // When GC is not enabled the `rec_group_len == 1` so any rec group
386 // local type references will be direct self references. But any kind of
387 // type recursion, including self references, is not allowed in the
388 // typed function references proposal, only the GC proposal.
389 debug_assert!(self.allow_gc() || self.rec_group_len == 1);
390 let local = index - self.rec_group_start;
391 if self.allow_gc() && local < self.rec_group_len {
392 if let Some(id) = PackedIndex::from_rec_group_index(local) {
393 *ty = id;
394 return Ok(());
395 } else {
396 bail!(
397 self.offset,
398 "implementation limit: too many types in a recursion group"
399 )
400 }
401 }
402
403 bail!(
404 self.offset,
405 "unknown type {index}: type index out of bounds"
406 )
407 }
408 UnpackedIndex::RecGroup(local_index) => match self.mode {
409 CanonicalizationMode::HashConsing => Ok(()),
410 CanonicalizationMode::OnlyIds => {
411 let rec_group_elems = self.within_rec_group.as_ref().expect(
412 "configured to canonicalize all type reference indices to `CoreTypeId`s \
413 and found rec-group-local index, but missing `within_rec_group` context",
414 );
415
416 let rec_group_len = rec_group_elems.end.index() - rec_group_elems.start.index();
417 let rec_group_len = u32::try_from(rec_group_len).unwrap();
418 assert!(local_index < rec_group_len);
419
420 let rec_group_start = u32::try_from(rec_group_elems.start.index()).unwrap();
421
422 let id = CoreTypeId::from_index(rec_group_start + local_index);
423 *ty = PackedIndex::from_id(id).expect(
424 "should fit in impl limits since we already have the end of the rec group \
425 constructed successfully",
426 );
427 Ok(())
428 }
429 },
430 }
431 }
432}
433