| 1 | //! Canonicalization of types. |
| 2 | //! |
| 3 | //! The unit of canonicalization is a recursion group. Having "unnecessary" |
| 4 | //! types in a recursion group can "break" canonicalization of other types |
| 5 | //! within that same recursion group, as can reordering types within a recursion |
| 6 | //! group. |
| 7 | //! |
| 8 | //! It is an invariant that all types defined before the recursion group we are |
| 9 | //! currently canonicalizing have already been canonicalized themselves. |
| 10 | //! |
| 11 | //! Canonicalizing a recursion group then proceeds as follows: |
| 12 | //! |
| 13 | //! * First we walk each of its `SubType` elements and put their type references |
| 14 | //! (i.e. their `PackedIndex`es) into canonical form. Canonicalizing a |
| 15 | //! `PackedIndex` means switching it from indexing into the Wasm module's |
| 16 | //! types space into either |
| 17 | //! |
| 18 | //! 1. Referencing an already-canonicalized type, for types outside of this |
| 19 | //! recursion group. Because inter-group type references can only go |
| 20 | //! towards types defined before this recursion group, we know the type is |
| 21 | //! already canonicalized and we have a `CoreTypeId` for each of those |
| 22 | //! types. This updates the `PackedIndex` into a `CoreTypeId`. |
| 23 | //! |
| 24 | //! 2. Indexing into the current recursion group, for intra-group type |
| 25 | //! references. |
| 26 | //! |
| 27 | //! Note that (2) has the effect of making the "same" structure of mutual type |
| 28 | //! recursion look identical across recursion groups: |
| 29 | //! |
| 30 | //! ```wat |
| 31 | //! ;; Before |
| 32 | //! (rec (struct (field (module-type 1))) (struct (field (module-type 0)))) |
| 33 | //! (rec (struct (field (module-type 3))) (struct (field (module-type 2)))) |
| 34 | //! |
| 35 | //! ;; After |
| 36 | //! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0)))) |
| 37 | //! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0)))) |
| 38 | //! ``` |
| 39 | //! |
| 40 | //! * Now that the recursion group's elements are in canonical form, we can |
| 41 | //! "simply" hash cons whole rec groups at a time. The `TypesList` morally |
| 42 | //! maintains a hash map from `Vec<SubType>` to `RecGroupId` and we can do |
| 43 | //! get-or-create operations on it. I say "morally" because we don't actually |
| 44 | //! duplicate the `Vec<SubType>` key in that hash map since those elements are |
| 45 | //! already stored in the `TypeList`'s internal `SnapshotList<CoreType>`. This |
| 46 | //! means we need to do some low-level hash table fiddling with the |
| 47 | //! `hashbrown` crate. |
| 48 | //! |
| 49 | //! And that's it! That is the whole canonicalization algorithm. |
| 50 | //! |
| 51 | //! Some more random things to note: |
| 52 | //! |
| 53 | //! * Because we essentially already have to do the check to canonicalize, and |
| 54 | //! to avoid additional passes over the types, the canonicalization pass also |
| 55 | //! checks that type references are in bounds. These are the only errors that |
| 56 | //! can be returned from canonicalization. |
| 57 | //! |
| 58 | //! * Canonicalizing requires the `Module` to translate type indices to |
| 59 | //! actual `CoreTypeId`s. |
| 60 | //! |
| 61 | //! * It is important that *after* we have canonicalized all types, we don't |
| 62 | //! need the `Module` anymore. This makes sure that we can, for example, |
| 63 | //! intern all types from the same store into the same `TypeList`. Which in |
| 64 | //! turn lets us type check function imports of a same-store instance's |
| 65 | //! exported functions and we don't need to translate from one module's |
| 66 | //! canonical representation to another module's canonical representation or |
| 67 | //! perform additional expensive checks to see if the types match or not |
| 68 | //! (since the whole point of canonicalization is to avoid that!). |
| 69 | |
| 70 | use super::{RecGroupId, TypeAlloc, TypeList}; |
| 71 | use crate::{ |
| 72 | types::{CoreTypeId, TypeIdentifier}, |
| 73 | BinaryReaderError, CompositeInnerType, CompositeType, PackedIndex, RecGroup, Result, |
| 74 | StorageType, UnpackedIndex, ValType, WasmFeatures, |
| 75 | }; |
| 76 | |
| 77 | pub(crate) trait InternRecGroup { |
| 78 | fn add_type_id(&mut self, id: CoreTypeId); |
| 79 | fn type_id_at(&self, idx: u32, offset: usize) -> Result<CoreTypeId>; |
| 80 | fn types_len(&self) -> u32; |
| 81 | |
| 82 | /// Canonicalize the rec group and return its id and whether it is a new group |
| 83 | /// (we added its types to the `TypeAlloc`) or not (we deduplicated it with an |
| 84 | /// existing canonical rec group). |
| 85 | fn canonicalize_and_intern_rec_group( |
| 86 | &mut self, |
| 87 | features: &WasmFeatures, |
| 88 | types: &mut TypeAlloc, |
| 89 | mut rec_group: RecGroup, |
| 90 | offset: usize, |
| 91 | ) -> Result<()> |
| 92 | where |
| 93 | Self: Sized, |
| 94 | { |
| 95 | debug_assert!(rec_group.is_explicit_rec_group() || rec_group.types().len() == 1); |
| 96 | if rec_group.is_explicit_rec_group() && !features.gc() { |
| 97 | bail!( |
| 98 | offset, |
| 99 | "rec group usage requires `gc` proposal to be enabled" |
| 100 | ); |
| 101 | } |
| 102 | if features.needs_type_canonicalization() { |
| 103 | TypeCanonicalizer::new(self, offset) |
| 104 | .with_features(features) |
| 105 | .canonicalize_rec_group(&mut rec_group)?; |
| 106 | } |
| 107 | let (is_new, rec_group_id) = |
| 108 | types.intern_canonical_rec_group(features.needs_type_canonicalization(), rec_group); |
| 109 | let range = &types[rec_group_id]; |
| 110 | let start = range.start.index(); |
| 111 | let end = range.end.index(); |
| 112 | |
| 113 | for i in start..end { |
| 114 | let i = u32::try_from(i).unwrap(); |
| 115 | let id = CoreTypeId::from_index(i); |
| 116 | debug_assert!(types.get(id).is_some()); |
| 117 | self.add_type_id(id); |
| 118 | if is_new { |
| 119 | self.check_subtype(rec_group_id, id, features, types, offset)?; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | Ok(()) |
| 124 | } |
| 125 | |
| 126 | fn check_subtype( |
| 127 | &mut self, |
| 128 | rec_group: RecGroupId, |
| 129 | id: CoreTypeId, |
| 130 | features: &WasmFeatures, |
| 131 | types: &mut TypeAlloc, |
| 132 | offset: usize, |
| 133 | ) -> Result<()> { |
| 134 | let ty = &types[id]; |
| 135 | if !features.gc() && (!ty.is_final || ty.supertype_idx.is_some()) { |
| 136 | bail!(offset, "gc proposal must be enabled to use subtypes" ); |
| 137 | } |
| 138 | |
| 139 | self.check_composite_type(&ty.composite_type, features, &types, offset)?; |
| 140 | |
| 141 | let depth = if let Some(supertype_index) = ty.supertype_idx { |
| 142 | debug_assert!(supertype_index.is_canonical()); |
| 143 | let sup_id = self.at_packed_index(types, rec_group, supertype_index, offset)?; |
| 144 | if types[sup_id].is_final { |
| 145 | bail!(offset, "sub type cannot have a final super type" ); |
| 146 | } |
| 147 | if !types.matches(id, sup_id) { |
| 148 | bail!(offset, "sub type must match super type" ); |
| 149 | } |
| 150 | let depth = types.get_subtyping_depth(sup_id) + 1; |
| 151 | if usize::from(depth) > crate::limits::MAX_WASM_SUBTYPING_DEPTH { |
| 152 | bail!( |
| 153 | offset, |
| 154 | "sub type hierarchy too deep: found depth {}, cannot exceed depth {}" , |
| 155 | depth, |
| 156 | crate::limits::MAX_WASM_SUBTYPING_DEPTH, |
| 157 | ); |
| 158 | } |
| 159 | depth |
| 160 | } else { |
| 161 | 0 |
| 162 | }; |
| 163 | types.set_subtyping_depth(id, depth); |
| 164 | |
| 165 | Ok(()) |
| 166 | } |
| 167 | |
| 168 | fn check_composite_type( |
| 169 | &mut self, |
| 170 | ty: &CompositeType, |
| 171 | features: &WasmFeatures, |
| 172 | types: &TypeList, |
| 173 | offset: usize, |
| 174 | ) -> Result<()> { |
| 175 | let check = |ty: &ValType, shared: bool| { |
| 176 | features |
| 177 | .check_value_type(*ty) |
| 178 | .map_err(|e| BinaryReaderError::new(e, offset))?; |
| 179 | if shared && !types.valtype_is_shared(*ty) { |
| 180 | return Err(BinaryReaderError::new( |
| 181 | "shared composite type must contain shared types" , |
| 182 | offset, |
| 183 | )); |
| 184 | // The other cases are fine: |
| 185 | // - both shared or unshared: good to go |
| 186 | // - the func type is unshared, `ty` is shared: though |
| 187 | // odd, we _can_ in fact use shared values in |
| 188 | // unshared composite types (e.g., functions). |
| 189 | } |
| 190 | Ok(()) |
| 191 | }; |
| 192 | if !features.shared_everything_threads() && ty.shared { |
| 193 | return Err(BinaryReaderError::new( |
| 194 | "shared composite types require the shared-everything-threads proposal" , |
| 195 | offset, |
| 196 | )); |
| 197 | } |
| 198 | match &ty.inner { |
| 199 | CompositeInnerType::Func(t) => { |
| 200 | for vt in t.params().iter().chain(t.results()) { |
| 201 | check(vt, ty.shared)?; |
| 202 | } |
| 203 | if t.results().len() > 1 && !features.multi_value() { |
| 204 | return Err(BinaryReaderError::new( |
| 205 | "func type returns multiple values but the multi-value feature is not enabled" , |
| 206 | offset, |
| 207 | )); |
| 208 | } |
| 209 | } |
| 210 | CompositeInnerType::Array(t) => { |
| 211 | if !features.gc() { |
| 212 | bail!( |
| 213 | offset, |
| 214 | "array indexed types not supported without the gc feature" , |
| 215 | ); |
| 216 | } |
| 217 | if !features.gc_types() { |
| 218 | bail!( |
| 219 | offset, |
| 220 | "cannot define array types when gc types are disabled" , |
| 221 | ); |
| 222 | } |
| 223 | match &t.0.element_type { |
| 224 | StorageType::I8 | StorageType::I16 => { |
| 225 | // Note: scalar types are always `shared`. |
| 226 | } |
| 227 | StorageType::Val(value_type) => check(value_type, ty.shared)?, |
| 228 | }; |
| 229 | } |
| 230 | CompositeInnerType::Struct(t) => { |
| 231 | if !features.gc() { |
| 232 | bail!( |
| 233 | offset, |
| 234 | "struct indexed types not supported without the gc feature" , |
| 235 | ); |
| 236 | } |
| 237 | if !features.gc_types() { |
| 238 | bail!( |
| 239 | offset, |
| 240 | "cannot define struct types when gc types are disabled" , |
| 241 | ); |
| 242 | } |
| 243 | for ft in t.fields.iter() { |
| 244 | match &ft.element_type { |
| 245 | StorageType::I8 | StorageType::I16 => { |
| 246 | // Note: scalar types are always `shared`. |
| 247 | } |
| 248 | StorageType::Val(value_type) => check(value_type, ty.shared)?, |
| 249 | } |
| 250 | } |
| 251 | } |
| 252 | CompositeInnerType::Cont(t) => { |
| 253 | if !features.stack_switching() { |
| 254 | bail!( |
| 255 | offset, |
| 256 | "cannot define continuation types when stack switching is disabled" , |
| 257 | ); |
| 258 | } |
| 259 | if !features.gc_types() { |
| 260 | bail!( |
| 261 | offset, |
| 262 | "cannot define continuation types when gc types are disabled" , |
| 263 | ); |
| 264 | } |
| 265 | // Check that the type index points to a valid function type. |
| 266 | let id = t.0.as_core_type_id().unwrap(); |
| 267 | match types[id].composite_type.inner { |
| 268 | CompositeInnerType::Func(_) => (), |
| 269 | _ => bail!(offset, "non-function type {}" , id.index()), |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | Ok(()) |
| 274 | } |
| 275 | |
| 276 | fn at_packed_index( |
| 277 | &self, |
| 278 | types: &TypeList, |
| 279 | rec_group: RecGroupId, |
| 280 | index: PackedIndex, |
| 281 | offset: usize, |
| 282 | ) -> Result<CoreTypeId> { |
| 283 | match index.unpack() { |
| 284 | UnpackedIndex::Id(id) => Ok(id), |
| 285 | UnpackedIndex::Module(idx) => self.type_id_at(idx, offset), |
| 286 | UnpackedIndex::RecGroup(idx) => types.rec_group_local_id(rec_group, idx, offset), |
| 287 | } |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | /// The kind of canonicalization we are doing. |
| 292 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
| 293 | enum CanonicalizationMode { |
| 294 | /// Standard canonicalization: turns module indices into either (1) |
| 295 | /// `CoreTypeId`s for inter-group references or (2) rec-group-local indices |
| 296 | /// for intra-group references. |
| 297 | HashConsing, |
| 298 | |
| 299 | /// Turns all type reference indices into `CoreTypeId`s, even from within |
| 300 | /// the same rec group. Not useful for hash consing, but useful when |
| 301 | /// exposing types to end users so they don't have to deal with |
| 302 | /// rec-group-local indices. |
| 303 | OnlyIds, |
| 304 | } |
| 305 | |
| 306 | pub(crate) struct TypeCanonicalizer<'a> { |
| 307 | module: &'a dyn InternRecGroup, |
| 308 | features: Option<&'a WasmFeatures>, |
| 309 | rec_group_start: u32, |
| 310 | rec_group_len: u32, |
| 311 | offset: usize, |
| 312 | mode: CanonicalizationMode, |
| 313 | within_rec_group: Option<core::ops::Range<CoreTypeId>>, |
| 314 | } |
| 315 | |
| 316 | impl<'a> TypeCanonicalizer<'a> { |
| 317 | pub fn new(module: &'a dyn InternRecGroup, offset: usize) -> Self { |
| 318 | // These defaults will work for when we are canonicalizing types from |
| 319 | // outside of a rec group definition, forcing all `PackedIndex`es to be |
| 320 | // canonicalized to `CoreTypeId`s. |
| 321 | let rec_group_start = u32::MAX; |
| 322 | let rec_group_len = 0; |
| 323 | |
| 324 | Self { |
| 325 | module, |
| 326 | features: None, |
| 327 | rec_group_start, |
| 328 | rec_group_len, |
| 329 | offset, |
| 330 | mode: CanonicalizationMode::HashConsing, |
| 331 | within_rec_group: None, |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | pub fn with_features(&mut self, features: &'a WasmFeatures) -> &mut Self { |
| 336 | debug_assert!(self.features.is_none()); |
| 337 | self.features = Some(features); |
| 338 | self |
| 339 | } |
| 340 | |
| 341 | fn allow_gc(&self) -> bool { |
| 342 | self.features.map_or(true, |f| f.gc()) |
| 343 | } |
| 344 | |
| 345 | fn canonicalize_rec_group(&mut self, rec_group: &mut RecGroup) -> Result<()> { |
| 346 | // Re-initialize these fields so that we properly canonicalize |
| 347 | // intra-rec-group type references into indices into the rec group |
| 348 | // rather than as `CoreTypeId`s. |
| 349 | self.rec_group_start = self.module.types_len(); |
| 350 | self.rec_group_len = u32::try_from(rec_group.types().len()).unwrap(); |
| 351 | |
| 352 | for (rec_group_local_index, ty) in rec_group.types_mut().enumerate() { |
| 353 | let rec_group_local_index = u32::try_from(rec_group_local_index).unwrap(); |
| 354 | let type_index = self.rec_group_start + rec_group_local_index; |
| 355 | |
| 356 | if let Some(sup) = ty.supertype_idx.as_mut() { |
| 357 | if sup.as_module_index().map_or(false, |i| i >= type_index) { |
| 358 | bail!(self.offset, "supertypes must be defined before subtypes" ); |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?; |
| 363 | } |
| 364 | |
| 365 | Ok(()) |
| 366 | } |
| 367 | |
| 368 | fn canonicalize_type_index(&self, ty: &mut PackedIndex) -> Result<()> { |
| 369 | match ty.unpack() { |
| 370 | UnpackedIndex::Id(_) => Ok(()), |
| 371 | UnpackedIndex::Module(index) => { |
| 372 | if index < self.rec_group_start || self.mode == CanonicalizationMode::OnlyIds { |
| 373 | let id = self.module.type_id_at(index, self.offset)?; |
| 374 | if let Some(id) = PackedIndex::from_id(id) { |
| 375 | *ty = id; |
| 376 | return Ok(()); |
| 377 | } else { |
| 378 | bail!( |
| 379 | self.offset, |
| 380 | "implementation limit: too many types in `TypeList`" |
| 381 | ) |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | // When GC is not enabled the `rec_group_len == 1` so any rec group |
| 386 | // local type references will be direct self references. But any kind of |
| 387 | // type recursion, including self references, is not allowed in the |
| 388 | // typed function references proposal, only the GC proposal. |
| 389 | debug_assert!(self.allow_gc() || self.rec_group_len == 1); |
| 390 | let local = index - self.rec_group_start; |
| 391 | if self.allow_gc() && local < self.rec_group_len { |
| 392 | if let Some(id) = PackedIndex::from_rec_group_index(local) { |
| 393 | *ty = id; |
| 394 | return Ok(()); |
| 395 | } else { |
| 396 | bail!( |
| 397 | self.offset, |
| 398 | "implementation limit: too many types in a recursion group" |
| 399 | ) |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | bail!( |
| 404 | self.offset, |
| 405 | "unknown type {index}: type index out of bounds" |
| 406 | ) |
| 407 | } |
| 408 | UnpackedIndex::RecGroup(local_index) => match self.mode { |
| 409 | CanonicalizationMode::HashConsing => Ok(()), |
| 410 | CanonicalizationMode::OnlyIds => { |
| 411 | let rec_group_elems = self.within_rec_group.as_ref().expect( |
| 412 | "configured to canonicalize all type reference indices to `CoreTypeId`s \ |
| 413 | and found rec-group-local index, but missing `within_rec_group` context" , |
| 414 | ); |
| 415 | |
| 416 | let rec_group_len = rec_group_elems.end.index() - rec_group_elems.start.index(); |
| 417 | let rec_group_len = u32::try_from(rec_group_len).unwrap(); |
| 418 | assert!(local_index < rec_group_len); |
| 419 | |
| 420 | let rec_group_start = u32::try_from(rec_group_elems.start.index()).unwrap(); |
| 421 | |
| 422 | let id = CoreTypeId::from_index(rec_group_start + local_index); |
| 423 | *ty = PackedIndex::from_id(id).expect( |
| 424 | "should fit in impl limits since we already have the end of the rec group \ |
| 425 | constructed successfully" , |
| 426 | ); |
| 427 | Ok(()) |
| 428 | } |
| 429 | }, |
| 430 | } |
| 431 | } |
| 432 | } |
| 433 | |