| 1 | //! Definition for encoding of custom sections within core wasm modules of |
| 2 | //! component-model related data. |
| 3 | //! |
| 4 | //! When creating a component from a source language the high-level process for |
| 5 | //! doing this is that code will be generated into the source language by |
| 6 | //! `wit-bindgen` or a similar tool which will be compiled down to core wasm. |
| 7 | //! The core wasm file is then fed into `wit-component` and a component is |
| 8 | //! created. This means that the componentization process is decoupled from the |
| 9 | //! binding generation process and intentionally affords for linking together |
| 10 | //! libraries into the main core wasm module that import different interfaces. |
| 11 | //! |
| 12 | //! The purpose of this module is to define an intermediate format to reside in |
| 13 | //! a custom section in the core wasm output. This intermediate format is |
| 14 | //! carried through the wasm linker through a custom section whose name starts |
| 15 | //! with `component-type`. This custom section is created |
| 16 | //! per-language-binding-generation and consumed by slurping up all the |
| 17 | //! sections during the component creation process. |
| 18 | //! |
| 19 | //! Currently the encoding of this custom section is itself a component. The |
| 20 | //! component has a single export which is a component type which represents the |
| 21 | //! `world` that was bound during bindings generation. This single export is |
| 22 | //! used to decode back into a `Resolve` with a WIT representation. |
| 23 | //! |
| 24 | //! Currently the component additionally has a custom section named |
| 25 | //! `wit-component-encoding` (see `CUSTOM_SECTION_NAME`). This section is |
| 26 | //! currently defined as 2 bytes: |
| 27 | //! |
| 28 | //! * The first byte is `CURRENT_VERSION` to help protect against future and |
| 29 | //! past changes. |
| 30 | //! * The second byte indicates the string encoding used for imports/exports as |
| 31 | //! part of the bindings process. The mapping is defined by |
| 32 | //! `encode_string_encoding`. |
| 33 | //! |
| 34 | //! This means that the top-level `encode` function takes a `Resolve`, a |
| 35 | //! `WorldId`, and a `StringEncoding`. Note that the top-level `decode` function |
| 36 | //! is slightly difference because it's taking all custom sections in a core |
| 37 | //! wasm binary, possibly from multiple invocations of bindgen, and unioning |
| 38 | //! them all together. This means that the output is a `Bindgen` which |
| 39 | //! represents the union of all previous bindings. |
| 40 | //! |
| 41 | //! The dual of `encode` is the `decode_custom_section` fucntion which decodes |
| 42 | //! the three arguments originally passed to `encode`. |
| 43 | |
| 44 | use crate::{DecodedWasm, StringEncoding}; |
| 45 | use anyhow::{bail, Context, Result}; |
| 46 | use indexmap::{IndexMap, IndexSet}; |
| 47 | use std::borrow::Cow; |
| 48 | use wasm_encoder::{ |
| 49 | ComponentBuilder, ComponentExportKind, ComponentType, ComponentTypeRef, CustomSection, |
| 50 | }; |
| 51 | use wasm_metadata::Producers; |
| 52 | use wasmparser::{BinaryReader, Encoding, Parser, Payload}; |
| 53 | use wit_parser::{Package, PackageName, Resolve, World, WorldId, WorldItem, WorldKey}; |
| 54 | |
| 55 | const CURRENT_VERSION: u8 = 0x04; |
| 56 | const CUSTOM_SECTION_NAME: &str = "wit-component-encoding" ; |
| 57 | |
| 58 | /// The result of decoding binding information from a WebAssembly binary. |
| 59 | /// |
| 60 | /// This structure is returned by [`decode`] and represents the interface of a |
| 61 | /// WebAssembly binary. |
| 62 | pub struct Bindgen { |
| 63 | /// Interface and type information for this binary. |
| 64 | pub resolve: Resolve, |
| 65 | /// The world that was bound. |
| 66 | pub world: WorldId, |
| 67 | /// Metadata about this specific module that was bound. |
| 68 | pub metadata: ModuleMetadata, |
| 69 | /// Producer information about tools used to produce this specific module. |
| 70 | pub producers: Option<Producers>, |
| 71 | } |
| 72 | |
| 73 | impl Default for Bindgen { |
| 74 | fn default() -> Bindgen { |
| 75 | let mut resolve = Resolve::default(); |
| 76 | let package = resolve.packages.alloc(Package { |
| 77 | name: PackageName { |
| 78 | namespace: "root" .to_string(), |
| 79 | name: "root" .to_string(), |
| 80 | version: None, |
| 81 | }, |
| 82 | docs: Default::default(), |
| 83 | interfaces: Default::default(), |
| 84 | worlds: Default::default(), |
| 85 | }); |
| 86 | let world = resolve.worlds.alloc(World { |
| 87 | name: "root" .to_string(), |
| 88 | docs: Default::default(), |
| 89 | imports: Default::default(), |
| 90 | exports: Default::default(), |
| 91 | includes: Default::default(), |
| 92 | include_names: Default::default(), |
| 93 | package: Some(package), |
| 94 | stability: Default::default(), |
| 95 | }); |
| 96 | resolve.packages[package] |
| 97 | .worlds |
| 98 | .insert("root" .to_string(), world); |
| 99 | Bindgen { |
| 100 | resolve, |
| 101 | world, |
| 102 | metadata: ModuleMetadata::default(), |
| 103 | producers: None, |
| 104 | } |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | /// Module-level metadata that's specific to one core WebAssembly module. This |
| 109 | /// is extracted with a [`Bindgen`]. |
| 110 | #[derive (Default)] |
| 111 | pub struct ModuleMetadata { |
| 112 | /// Per-function options imported into the core wasm module, currently only |
| 113 | /// related to string encoding. |
| 114 | pub import_encodings: EncodingMap, |
| 115 | |
| 116 | /// Per-function options exported from the core wasm module, currently only |
| 117 | /// related to string encoding. |
| 118 | pub export_encodings: EncodingMap, |
| 119 | } |
| 120 | |
| 121 | /// Internal map that keeps track of encodings for various world imports and |
| 122 | /// exports. |
| 123 | /// |
| 124 | /// Stored in [`ModuleMetadata`]. |
| 125 | #[derive (Default)] |
| 126 | pub struct EncodingMap { |
| 127 | /// A map of an "identifying string" for world items to what string |
| 128 | /// encoding the import or export is using. |
| 129 | /// |
| 130 | /// The keys of this map are created by `EncodingMap::key` and are |
| 131 | /// specifically chosen to be able to be looked up during both insertion and |
| 132 | /// fetching. Note that in particular this map does not use `*Id` types such |
| 133 | /// as `InterfaceId` from `wit_parser`. This is due to the fact that during |
| 134 | /// world merging new interfaces are created for named imports (e.g. `import |
| 135 | /// x: interface { ... }`) as inline interfaces are copied from one world to |
| 136 | /// another. Additionally during world merging different interfaces at the |
| 137 | /// same version may be deduplicated. |
| 138 | /// |
| 139 | /// For these reasons a string-based key is chosen to avoid juggling IDs |
| 140 | /// through the world merging process. Additionally versions are chopped off |
| 141 | /// for now to help with a problem such as: |
| 142 | /// |
| 143 | /// * The main module imports a:b/c@0.1.0 |
| 144 | /// * An adapter imports a:b/c@0.1.1 |
| 145 | /// * The final world uses a:b/c@0.1.1, but the main module has no |
| 146 | /// encoding listed for that exact item. |
| 147 | /// |
| 148 | /// By chopping off versions this is able to get everything registered |
| 149 | /// correctly even in the fact of merging interfaces and worlds. |
| 150 | encodings: IndexMap<String, StringEncoding>, |
| 151 | } |
| 152 | |
| 153 | impl EncodingMap { |
| 154 | fn insert_all( |
| 155 | &mut self, |
| 156 | resolve: &Resolve, |
| 157 | set: &IndexMap<WorldKey, WorldItem>, |
| 158 | encoding: StringEncoding, |
| 159 | ) { |
| 160 | for (name, item) in set { |
| 161 | match item { |
| 162 | WorldItem::Function(func) => { |
| 163 | let key = self.key(resolve, name, &func.name); |
| 164 | self.encodings.insert(key, encoding); |
| 165 | } |
| 166 | WorldItem::Interface { id, .. } => { |
| 167 | for (func, _) in resolve.interfaces[*id].functions.iter() { |
| 168 | let key = self.key(resolve, name, func); |
| 169 | self.encodings.insert(key, encoding); |
| 170 | } |
| 171 | } |
| 172 | WorldItem::Type(_) => {} |
| 173 | } |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | /// Looks up the encoding of the function `func` which is scoped under `key` |
| 178 | /// in the world in question. |
| 179 | pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option<StringEncoding> { |
| 180 | let key = self.key(resolve, key, func); |
| 181 | self.encodings.get(&key).copied() |
| 182 | } |
| 183 | |
| 184 | fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String { |
| 185 | format!( |
| 186 | " {}/ {func}" , |
| 187 | match key { |
| 188 | WorldKey::Name(name) => name.to_string(), |
| 189 | WorldKey::Interface(id) => { |
| 190 | let iface = &resolve.interfaces[*id]; |
| 191 | let pkg = &resolve.packages[iface.package.unwrap()]; |
| 192 | format!( |
| 193 | " {}: {}/ {}" , |
| 194 | pkg.name.namespace, |
| 195 | pkg.name.name, |
| 196 | iface.name.as_ref().unwrap() |
| 197 | ) |
| 198 | } |
| 199 | } |
| 200 | ) |
| 201 | } |
| 202 | |
| 203 | fn merge(&mut self, other: EncodingMap) -> Result<()> { |
| 204 | for (key, encoding) in other.encodings { |
| 205 | if let Some(prev) = self.encodings.insert(key.clone(), encoding) { |
| 206 | if prev != encoding { |
| 207 | bail!("conflicting string encodings specified for ` {key}`" ); |
| 208 | } |
| 209 | } |
| 210 | } |
| 211 | Ok(()) |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | /// This function will parse the core `wasm` binary given as input and return a |
| 216 | /// [`Bindgen`] which extracts the custom sections describing component-level |
| 217 | /// types from within the binary itself. |
| 218 | /// |
| 219 | /// This is used to parse the output of `wit-bindgen`-generated modules and is |
| 220 | /// one of the earliest phases in transitioning such a module to a component. |
| 221 | /// The extraction here provides the metadata necessary to continue the process |
| 222 | /// later on. |
| 223 | /// |
| 224 | /// This will return an error if `wasm` is not a valid WebAssembly module. |
| 225 | /// |
| 226 | /// If a `component-type` custom section was found then a new binary is |
| 227 | /// optionally returned with the custom sections stripped out. If no |
| 228 | /// `component-type` custom sections are found then `None` is returned. |
| 229 | pub fn decode(wasm: &[u8]) -> Result<(Option<Vec<u8>>, Bindgen)> { |
| 230 | let mut ret = Bindgen::default(); |
| 231 | let mut new_module = wasm_encoder::Module::new(); |
| 232 | |
| 233 | let mut found_custom = false; |
| 234 | for payload in wasmparser::Parser::new(0).parse_all(wasm) { |
| 235 | let payload = payload.context("decoding item in module" )?; |
| 236 | match payload { |
| 237 | wasmparser::Payload::CustomSection(cs) if cs.name().starts_with("component-type" ) => { |
| 238 | let data = Bindgen::decode_custom_section(cs.data()) |
| 239 | .with_context(|| format!("decoding custom section {}" , cs.name()))?; |
| 240 | ret.merge(data) |
| 241 | .with_context(|| format!("updating metadata for section {}" , cs.name()))?; |
| 242 | found_custom = true; |
| 243 | } |
| 244 | wasmparser::Payload::Version { encoding, .. } if encoding != Encoding::Module => { |
| 245 | bail!("decoding a component is not supported" ) |
| 246 | } |
| 247 | _ => { |
| 248 | if let Some((id, range)) = payload.as_section() { |
| 249 | new_module.section(&wasm_encoder::RawSection { |
| 250 | id, |
| 251 | data: &wasm[range], |
| 252 | }); |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | if found_custom { |
| 259 | Ok((Some(new_module.finish()), ret)) |
| 260 | } else { |
| 261 | Ok((None, ret)) |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | /// Creates a `component-type*` custom section to be decoded by `decode` above. |
| 266 | /// |
| 267 | /// This is primarily created by wit-bindgen-based guest generators to embed |
| 268 | /// into the final core wasm binary. The core wasm binary is later fed |
| 269 | /// through `wit-component` to produce the actual component where this returned |
| 270 | /// section will be decoded. |
| 271 | pub fn encode( |
| 272 | resolve: &Resolve, |
| 273 | world: WorldId, |
| 274 | string_encoding: StringEncoding, |
| 275 | extra_producers: Option<&Producers>, |
| 276 | ) -> Result<Vec<u8>> { |
| 277 | let ty = crate::encoding::encode_world(resolve, world)?; |
| 278 | |
| 279 | let world = &resolve.worlds[world]; |
| 280 | let mut outer_ty = ComponentType::new(); |
| 281 | outer_ty.ty().component(&ty); |
| 282 | outer_ty.export( |
| 283 | &resolve.id_of_name(world.package.unwrap(), &world.name), |
| 284 | ComponentTypeRef::Component(0), |
| 285 | ); |
| 286 | |
| 287 | let mut builder = ComponentBuilder::default(); |
| 288 | |
| 289 | let string_encoding = encode_string_encoding(string_encoding); |
| 290 | builder.custom_section(&CustomSection { |
| 291 | name: CUSTOM_SECTION_NAME.into(), |
| 292 | data: Cow::Borrowed(&[CURRENT_VERSION, string_encoding]), |
| 293 | }); |
| 294 | |
| 295 | let ty = builder.type_component(&outer_ty); |
| 296 | builder.export(&world.name, ComponentExportKind::Type, ty, None); |
| 297 | |
| 298 | let mut producers = crate::base_producers(); |
| 299 | if let Some(p) = extra_producers { |
| 300 | producers.merge(&p); |
| 301 | } |
| 302 | builder.raw_custom_section(&producers.raw_custom_section()); |
| 303 | Ok(builder.finish()) |
| 304 | } |
| 305 | |
| 306 | fn decode_custom_section(wasm: &[u8]) -> Result<(Resolve, WorldId, StringEncoding)> { |
| 307 | let (resolve: Resolve, world: Id) = wit_parser::decoding::decode_world(wasm)?; |
| 308 | let mut custom_section: Option<&[u8]> = None; |
| 309 | |
| 310 | for payload: Result, BinaryReaderError> in Parser::new(0).parse_all(data:wasm) { |
| 311 | match payload? { |
| 312 | Payload::CustomSection(s: CustomSectionReader<'_>) if s.name() == CUSTOM_SECTION_NAME => { |
| 313 | custom_section = Some(s.data()); |
| 314 | } |
| 315 | _ => {} |
| 316 | } |
| 317 | } |
| 318 | let string_encoding: StringEncoding = match custom_section { |
| 319 | None => bail!("missing custom section of name ` {CUSTOM_SECTION_NAME}`" ), |
| 320 | Some([CURRENT_VERSION, byte: &u8]) => decode_string_encoding(*byte)?, |
| 321 | Some([]) => bail!("custom section ` {CUSTOM_SECTION_NAME}` in unknown format" ), |
| 322 | Some([version: &u8, ..]) => bail!( |
| 323 | "custom section ` {CUSTOM_SECTION_NAME}` uses format {version} but only {CURRENT_VERSION} is supported" |
| 324 | ), |
| 325 | }; |
| 326 | Ok((resolve, world, string_encoding)) |
| 327 | } |
| 328 | |
| 329 | fn encode_string_encoding(e: StringEncoding) -> u8 { |
| 330 | match e { |
| 331 | StringEncoding::UTF8 => 0x00, |
| 332 | StringEncoding::UTF16 => 0x01, |
| 333 | StringEncoding::CompactUTF16 => 0x02, |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | fn decode_string_encoding(byte: u8) -> Result<StringEncoding> { |
| 338 | match byte { |
| 339 | 0x00 => Ok(StringEncoding::UTF8), |
| 340 | 0x01 => Ok(StringEncoding::UTF16), |
| 341 | 0x02 => Ok(StringEncoding::CompactUTF16), |
| 342 | byte: u8 => bail!("invalid string encoding {byte:#x}" ), |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | impl Bindgen { |
| 347 | fn decode_custom_section(data: &[u8]) -> Result<Bindgen> { |
| 348 | let wasm; |
| 349 | let world; |
| 350 | let resolve; |
| 351 | let encoding; |
| 352 | |
| 353 | let mut reader = BinaryReader::new(data, 0); |
| 354 | match reader.read_u8()? { |
| 355 | // Historical 0x03 format where the support here will be deleted in |
| 356 | // the future |
| 357 | 0x03 => { |
| 358 | encoding = decode_string_encoding(reader.read_u8()?)?; |
| 359 | let world_name = reader.read_string()?; |
| 360 | wasm = &data[reader.original_position()..]; |
| 361 | |
| 362 | let (r, pkg) = match crate::decode(wasm)? { |
| 363 | DecodedWasm::WitPackage(resolve, pkgs) => (resolve, pkgs), |
| 364 | DecodedWasm::Component(..) => bail!("expected encoded wit package(s)" ), |
| 365 | }; |
| 366 | resolve = r; |
| 367 | world = resolve.select_world(pkg, Some(world_name.into()))?; |
| 368 | } |
| 369 | |
| 370 | // Current format where `data` is a wasm component itself. |
| 371 | _ => { |
| 372 | wasm = data; |
| 373 | (resolve, world, encoding) = decode_custom_section(wasm)?; |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | Ok(Bindgen { |
| 378 | metadata: ModuleMetadata::new(&resolve, world, encoding), |
| 379 | producers: wasm_metadata::Producers::from_wasm(wasm)?, |
| 380 | resolve, |
| 381 | world, |
| 382 | }) |
| 383 | } |
| 384 | |
| 385 | /// Merges another `BindgenMetadata` into this one. |
| 386 | /// |
| 387 | /// This operation is intended to be akin to "merging worlds" when the |
| 388 | /// abstraction level for that is what we're working at here. For now the |
| 389 | /// merge operation only succeeds if the two metadata descriptions are |
| 390 | /// entirely disjoint. |
| 391 | /// |
| 392 | /// Note that at this time there's no support for changing string encodings |
| 393 | /// between metadata. |
| 394 | /// |
| 395 | /// This function returns the set of exports that the main world of |
| 396 | /// `other` added to the world in `self`. |
| 397 | pub fn merge(&mut self, other: Bindgen) -> Result<IndexSet<WorldKey>> { |
| 398 | let Bindgen { |
| 399 | resolve, |
| 400 | world, |
| 401 | metadata: |
| 402 | ModuleMetadata { |
| 403 | import_encodings, |
| 404 | export_encodings, |
| 405 | }, |
| 406 | producers, |
| 407 | } = other; |
| 408 | |
| 409 | let remap = self |
| 410 | .resolve |
| 411 | .merge(resolve) |
| 412 | .context("failed to merge WIT package sets together" )?; |
| 413 | let world = remap.map_world(world, None)?; |
| 414 | let exports = self.resolve.worlds[world].exports.keys().cloned().collect(); |
| 415 | self.resolve |
| 416 | .merge_worlds(world, self.world) |
| 417 | .context("failed to merge worlds from two documents" )?; |
| 418 | |
| 419 | self.metadata.import_encodings.merge(import_encodings)?; |
| 420 | self.metadata.export_encodings.merge(export_encodings)?; |
| 421 | if let Some(producers) = producers { |
| 422 | if let Some(mine) = &mut self.producers { |
| 423 | mine.merge(&producers); |
| 424 | } else { |
| 425 | self.producers = Some(producers); |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | Ok(exports) |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | impl ModuleMetadata { |
| 434 | /// Creates a new `ModuleMetadata` instance holding the given set of |
| 435 | /// interfaces which are expected to all use the `encoding` specified. |
| 436 | pub fn new(resolve: &Resolve, world: WorldId, encoding: StringEncoding) -> ModuleMetadata { |
| 437 | let mut ret: ModuleMetadata = ModuleMetadata::default(); |
| 438 | |
| 439 | let world: &World = &resolve.worlds[world]; |
| 440 | ret.export_encodings |
| 441 | .insert_all(resolve, &world.exports, encoding); |
| 442 | ret.import_encodings |
| 443 | .insert_all(resolve, &world.imports, encoding); |
| 444 | |
| 445 | ret |
| 446 | } |
| 447 | } |
| 448 | |