1 | //! Canonicalization of types. |
2 | //! |
3 | //! The unit of canonicalization is a recursion group. Having "unnecessary" |
4 | //! types in a recursion group can "break" canonicalization of other types |
5 | //! within that same recursion group, as can reordering types within a recursion |
6 | //! group. |
7 | //! |
8 | //! It is an invariant that all types defined before the recursion group we are |
9 | //! currently canonicalizing have already been canonicalized themselves. |
10 | //! |
11 | //! Canonicalizing a recursion group then proceeds as follows: |
12 | //! |
13 | //! * First we walk each of its `SubType` elements and put their type references |
14 | //! (i.e. their `PackedIndex`es) into canonical form. Canonicalizing a |
15 | //! `PackedIndex` means switching it from indexing into the Wasm module's |
16 | //! types space into either |
17 | //! |
18 | //! 1. Referencing an already-canonicalized type, for types outside of this |
19 | //! recursion group. Because inter-group type references can only go |
20 | //! towards types defined before this recursion group, we know the type is |
21 | //! already canonicalized and we have a `CoreTypeId` for each of those |
22 | //! types. This updates the `PackedIndex` into a `CoreTypeId`. |
23 | //! |
24 | //! 2. Indexing into the current recursion group, for intra-group type |
25 | //! references. |
26 | //! |
27 | //! Note that (2) has the effect of making the "same" structure of mutual type |
28 | //! recursion look identical across recursion groups: |
29 | //! |
30 | //! ```wat |
31 | //! ;; Before |
32 | //! (rec (struct (field (module-type 1))) (struct (field (module-type 0)))) |
33 | //! (rec (struct (field (module-type 3))) (struct (field (module-type 2)))) |
34 | //! |
35 | //! ;; After |
36 | //! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0)))) |
37 | //! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0)))) |
38 | //! ``` |
39 | //! |
40 | //! * Now that the recursion group's elements are in canonical form, we can |
41 | //! "simply" hash cons whole rec groups at a time. The `TypesList` morally |
42 | //! maintains a hash map from `Vec<SubType>` to `RecGroupId` and we can do |
43 | //! get-or-create operations on it. I say "morally" because we don't actually |
44 | //! duplicate the `Vec<SubType>` key in that hash map since those elements are |
45 | //! already stored in the `TypeList`'s internal `SnapshotList<CoreType>`. This |
46 | //! means we need to do some low-level hash table fiddling with the |
47 | //! `hashbrown` crate. |
48 | //! |
49 | //! And that's it! That is the whole canonicalization algorithm. |
50 | //! |
51 | //! Some more random things to note: |
52 | //! |
53 | //! * Because we essentially already have to do the check to canonicalize, and |
54 | //! to avoid additional passes over the types, the canonicalization pass also |
55 | //! checks that type references are in bounds. These are the only errors that |
56 | //! can be returned from canonicalization. |
57 | //! |
58 | //! * Canonicalizing requires the `Module` to translate type indices to |
59 | //! actual `CoreTypeId`s. |
60 | //! |
61 | //! * It is important that *after* we have canonicalized all types, we don't |
62 | //! need the `Module` anymore. This makes sure that we can, for example, |
63 | //! intern all types from the same store into the same `TypeList`. Which in |
64 | //! turn lets us type check function imports of a same-store instance's |
65 | //! exported functions and we don't need to translate from one module's |
66 | //! canonical representation to another module's canonical representation or |
67 | //! perform additional expensive checks to see if the types match or not |
68 | //! (since the whole point of canonicalization is to avoid that!). |
69 | |
70 | use super::{RecGroupId, TypeAlloc, TypeList}; |
71 | use crate::{ |
72 | types::{CoreTypeId, TypeIdentifier}, |
73 | BinaryReaderError, CompositeInnerType, CompositeType, PackedIndex, RecGroup, Result, |
74 | StorageType, UnpackedIndex, ValType, WasmFeatures, |
75 | }; |
76 | |
77 | pub(crate) trait InternRecGroup { |
78 | fn add_type_id(&mut self, id: CoreTypeId); |
79 | fn type_id_at(&self, idx: u32, offset: usize) -> Result<CoreTypeId>; |
80 | fn types_len(&self) -> u32; |
81 | |
82 | /// Canonicalize the rec group and return its id and whether it is a new group |
83 | /// (we added its types to the `TypeAlloc`) or not (we deduplicated it with an |
84 | /// existing canonical rec group). |
85 | fn canonicalize_and_intern_rec_group( |
86 | &mut self, |
87 | features: &WasmFeatures, |
88 | types: &mut TypeAlloc, |
89 | mut rec_group: RecGroup, |
90 | offset: usize, |
91 | ) -> Result<()> |
92 | where |
93 | Self: Sized, |
94 | { |
95 | debug_assert!(rec_group.is_explicit_rec_group() || rec_group.types().len() == 1); |
96 | if rec_group.is_explicit_rec_group() && !features.gc() { |
97 | bail!( |
98 | offset, |
99 | "rec group usage requires `gc` proposal to be enabled" |
100 | ); |
101 | } |
102 | if features.needs_type_canonicalization() { |
103 | TypeCanonicalizer::new(self, offset) |
104 | .with_features(features) |
105 | .canonicalize_rec_group(&mut rec_group)?; |
106 | } |
107 | let (is_new, rec_group_id) = |
108 | types.intern_canonical_rec_group(features.needs_type_canonicalization(), rec_group); |
109 | let range = &types[rec_group_id]; |
110 | let start = range.start.index(); |
111 | let end = range.end.index(); |
112 | |
113 | for i in start..end { |
114 | let i = u32::try_from(i).unwrap(); |
115 | let id = CoreTypeId::from_index(i); |
116 | debug_assert!(types.get(id).is_some()); |
117 | self.add_type_id(id); |
118 | if is_new { |
119 | self.check_subtype(rec_group_id, id, features, types, offset)?; |
120 | } |
121 | } |
122 | |
123 | Ok(()) |
124 | } |
125 | |
126 | fn check_subtype( |
127 | &mut self, |
128 | rec_group: RecGroupId, |
129 | id: CoreTypeId, |
130 | features: &WasmFeatures, |
131 | types: &mut TypeAlloc, |
132 | offset: usize, |
133 | ) -> Result<()> { |
134 | let ty = &types[id]; |
135 | if !features.gc() && (!ty.is_final || ty.supertype_idx.is_some()) { |
136 | bail!(offset, "gc proposal must be enabled to use subtypes" ); |
137 | } |
138 | |
139 | self.check_composite_type(&ty.composite_type, features, &types, offset)?; |
140 | |
141 | let depth = if let Some(supertype_index) = ty.supertype_idx { |
142 | debug_assert!(supertype_index.is_canonical()); |
143 | let sup_id = self.at_packed_index(types, rec_group, supertype_index, offset)?; |
144 | if types[sup_id].is_final { |
145 | bail!(offset, "sub type cannot have a final super type" ); |
146 | } |
147 | if !types.matches(id, sup_id) { |
148 | bail!(offset, "sub type must match super type" ); |
149 | } |
150 | let depth = types.get_subtyping_depth(sup_id) + 1; |
151 | if usize::from(depth) > crate::limits::MAX_WASM_SUBTYPING_DEPTH { |
152 | bail!( |
153 | offset, |
154 | "sub type hierarchy too deep: found depth {}, cannot exceed depth {}" , |
155 | depth, |
156 | crate::limits::MAX_WASM_SUBTYPING_DEPTH, |
157 | ); |
158 | } |
159 | depth |
160 | } else { |
161 | 0 |
162 | }; |
163 | types.set_subtyping_depth(id, depth); |
164 | |
165 | Ok(()) |
166 | } |
167 | |
168 | fn check_composite_type( |
169 | &mut self, |
170 | ty: &CompositeType, |
171 | features: &WasmFeatures, |
172 | types: &TypeList, |
173 | offset: usize, |
174 | ) -> Result<()> { |
175 | let check = |ty: &ValType, shared: bool| { |
176 | features |
177 | .check_value_type(*ty) |
178 | .map_err(|e| BinaryReaderError::new(e, offset))?; |
179 | if shared && !types.valtype_is_shared(*ty) { |
180 | return Err(BinaryReaderError::new( |
181 | "shared composite type must contain shared types" , |
182 | offset, |
183 | )); |
184 | // The other cases are fine: |
185 | // - both shared or unshared: good to go |
186 | // - the func type is unshared, `ty` is shared: though |
187 | // odd, we _can_ in fact use shared values in |
188 | // unshared composite types (e.g., functions). |
189 | } |
190 | Ok(()) |
191 | }; |
192 | if !features.shared_everything_threads() && ty.shared { |
193 | return Err(BinaryReaderError::new( |
194 | "shared composite types require the shared-everything-threads proposal" , |
195 | offset, |
196 | )); |
197 | } |
198 | match &ty.inner { |
199 | CompositeInnerType::Func(t) => { |
200 | for vt in t.params().iter().chain(t.results()) { |
201 | check(vt, ty.shared)?; |
202 | } |
203 | if t.results().len() > 1 && !features.multi_value() { |
204 | return Err(BinaryReaderError::new( |
205 | "func type returns multiple values but the multi-value feature is not enabled" , |
206 | offset, |
207 | )); |
208 | } |
209 | } |
210 | CompositeInnerType::Array(t) => { |
211 | if !features.gc() { |
212 | bail!( |
213 | offset, |
214 | "array indexed types not supported without the gc feature" , |
215 | ); |
216 | } |
217 | if !features.gc_types() { |
218 | bail!( |
219 | offset, |
220 | "cannot define array types when gc types are disabled" , |
221 | ); |
222 | } |
223 | match &t.0.element_type { |
224 | StorageType::I8 | StorageType::I16 => { |
225 | // Note: scalar types are always `shared`. |
226 | } |
227 | StorageType::Val(value_type) => check(value_type, ty.shared)?, |
228 | }; |
229 | } |
230 | CompositeInnerType::Struct(t) => { |
231 | if !features.gc() { |
232 | bail!( |
233 | offset, |
234 | "struct indexed types not supported without the gc feature" , |
235 | ); |
236 | } |
237 | if !features.gc_types() { |
238 | bail!( |
239 | offset, |
240 | "cannot define struct types when gc types are disabled" , |
241 | ); |
242 | } |
243 | for ft in t.fields.iter() { |
244 | match &ft.element_type { |
245 | StorageType::I8 | StorageType::I16 => { |
246 | // Note: scalar types are always `shared`. |
247 | } |
248 | StorageType::Val(value_type) => check(value_type, ty.shared)?, |
249 | } |
250 | } |
251 | } |
252 | CompositeInnerType::Cont(t) => { |
253 | if !features.stack_switching() { |
254 | bail!( |
255 | offset, |
256 | "cannot define continuation types when stack switching is disabled" , |
257 | ); |
258 | } |
259 | if !features.gc_types() { |
260 | bail!( |
261 | offset, |
262 | "cannot define continuation types when gc types are disabled" , |
263 | ); |
264 | } |
265 | // Check that the type index points to a valid function type. |
266 | let id = t.0.as_core_type_id().unwrap(); |
267 | match types[id].composite_type.inner { |
268 | CompositeInnerType::Func(_) => (), |
269 | _ => bail!(offset, "non-function type {}" , id.index()), |
270 | } |
271 | } |
272 | } |
273 | Ok(()) |
274 | } |
275 | |
276 | fn at_packed_index( |
277 | &self, |
278 | types: &TypeList, |
279 | rec_group: RecGroupId, |
280 | index: PackedIndex, |
281 | offset: usize, |
282 | ) -> Result<CoreTypeId> { |
283 | match index.unpack() { |
284 | UnpackedIndex::Id(id) => Ok(id), |
285 | UnpackedIndex::Module(idx) => self.type_id_at(idx, offset), |
286 | UnpackedIndex::RecGroup(idx) => types.rec_group_local_id(rec_group, idx, offset), |
287 | } |
288 | } |
289 | } |
290 | |
291 | /// The kind of canonicalization we are doing. |
292 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
293 | enum CanonicalizationMode { |
294 | /// Standard canonicalization: turns module indices into either (1) |
295 | /// `CoreTypeId`s for inter-group references or (2) rec-group-local indices |
296 | /// for intra-group references. |
297 | HashConsing, |
298 | |
299 | /// Turns all type reference indices into `CoreTypeId`s, even from within |
300 | /// the same rec group. Not useful for hash consing, but useful when |
301 | /// exposing types to end users so they don't have to deal with |
302 | /// rec-group-local indices. |
303 | OnlyIds, |
304 | } |
305 | |
306 | pub(crate) struct TypeCanonicalizer<'a> { |
307 | module: &'a dyn InternRecGroup, |
308 | features: Option<&'a WasmFeatures>, |
309 | rec_group_start: u32, |
310 | rec_group_len: u32, |
311 | offset: usize, |
312 | mode: CanonicalizationMode, |
313 | within_rec_group: Option<core::ops::Range<CoreTypeId>>, |
314 | } |
315 | |
316 | impl<'a> TypeCanonicalizer<'a> { |
317 | pub fn new(module: &'a dyn InternRecGroup, offset: usize) -> Self { |
318 | // These defaults will work for when we are canonicalizing types from |
319 | // outside of a rec group definition, forcing all `PackedIndex`es to be |
320 | // canonicalized to `CoreTypeId`s. |
321 | let rec_group_start = u32::MAX; |
322 | let rec_group_len = 0; |
323 | |
324 | Self { |
325 | module, |
326 | features: None, |
327 | rec_group_start, |
328 | rec_group_len, |
329 | offset, |
330 | mode: CanonicalizationMode::HashConsing, |
331 | within_rec_group: None, |
332 | } |
333 | } |
334 | |
335 | pub fn with_features(&mut self, features: &'a WasmFeatures) -> &mut Self { |
336 | debug_assert!(self.features.is_none()); |
337 | self.features = Some(features); |
338 | self |
339 | } |
340 | |
341 | fn allow_gc(&self) -> bool { |
342 | self.features.map_or(true, |f| f.gc()) |
343 | } |
344 | |
345 | fn canonicalize_rec_group(&mut self, rec_group: &mut RecGroup) -> Result<()> { |
346 | // Re-initialize these fields so that we properly canonicalize |
347 | // intra-rec-group type references into indices into the rec group |
348 | // rather than as `CoreTypeId`s. |
349 | self.rec_group_start = self.module.types_len(); |
350 | self.rec_group_len = u32::try_from(rec_group.types().len()).unwrap(); |
351 | |
352 | for (rec_group_local_index, ty) in rec_group.types_mut().enumerate() { |
353 | let rec_group_local_index = u32::try_from(rec_group_local_index).unwrap(); |
354 | let type_index = self.rec_group_start + rec_group_local_index; |
355 | |
356 | if let Some(sup) = ty.supertype_idx.as_mut() { |
357 | if sup.as_module_index().map_or(false, |i| i >= type_index) { |
358 | bail!(self.offset, "supertypes must be defined before subtypes" ); |
359 | } |
360 | } |
361 | |
362 | ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?; |
363 | } |
364 | |
365 | Ok(()) |
366 | } |
367 | |
368 | fn canonicalize_type_index(&self, ty: &mut PackedIndex) -> Result<()> { |
369 | match ty.unpack() { |
370 | UnpackedIndex::Id(_) => Ok(()), |
371 | UnpackedIndex::Module(index) => { |
372 | if index < self.rec_group_start || self.mode == CanonicalizationMode::OnlyIds { |
373 | let id = self.module.type_id_at(index, self.offset)?; |
374 | if let Some(id) = PackedIndex::from_id(id) { |
375 | *ty = id; |
376 | return Ok(()); |
377 | } else { |
378 | bail!( |
379 | self.offset, |
380 | "implementation limit: too many types in `TypeList`" |
381 | ) |
382 | } |
383 | } |
384 | |
385 | // When GC is not enabled the `rec_group_len == 1` so any rec group |
386 | // local type references will be direct self references. But any kind of |
387 | // type recursion, including self references, is not allowed in the |
388 | // typed function references proposal, only the GC proposal. |
389 | debug_assert!(self.allow_gc() || self.rec_group_len == 1); |
390 | let local = index - self.rec_group_start; |
391 | if self.allow_gc() && local < self.rec_group_len { |
392 | if let Some(id) = PackedIndex::from_rec_group_index(local) { |
393 | *ty = id; |
394 | return Ok(()); |
395 | } else { |
396 | bail!( |
397 | self.offset, |
398 | "implementation limit: too many types in a recursion group" |
399 | ) |
400 | } |
401 | } |
402 | |
403 | bail!( |
404 | self.offset, |
405 | "unknown type {index}: type index out of bounds" |
406 | ) |
407 | } |
408 | UnpackedIndex::RecGroup(local_index) => match self.mode { |
409 | CanonicalizationMode::HashConsing => Ok(()), |
410 | CanonicalizationMode::OnlyIds => { |
411 | let rec_group_elems = self.within_rec_group.as_ref().expect( |
412 | "configured to canonicalize all type reference indices to `CoreTypeId`s \ |
413 | and found rec-group-local index, but missing `within_rec_group` context" , |
414 | ); |
415 | |
416 | let rec_group_len = rec_group_elems.end.index() - rec_group_elems.start.index(); |
417 | let rec_group_len = u32::try_from(rec_group_len).unwrap(); |
418 | assert!(local_index < rec_group_len); |
419 | |
420 | let rec_group_start = u32::try_from(rec_group_elems.start.index()).unwrap(); |
421 | |
422 | let id = CoreTypeId::from_index(rec_group_start + local_index); |
423 | *ty = PackedIndex::from_id(id).expect( |
424 | "should fit in impl limits since we already have the end of the rec group \ |
425 | constructed successfully" , |
426 | ); |
427 | Ok(()) |
428 | } |
429 | }, |
430 | } |
431 | } |
432 | } |
433 | |