| 1 | use hashbrown::hash_map::RawEntryMut; |
| 2 | use rustc_hash::FxHasher; |
| 3 | use std::hash::{BuildHasherDefault, Hash, Hasher}; |
| 4 | |
| 5 | use crate::{ |
| 6 | green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken, |
| 7 | SyntaxKind, |
| 8 | }; |
| 9 | |
| 10 | use super::element::GreenElement; |
| 11 | |
| 12 | type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>; |
| 13 | |
| 14 | #[derive (Debug)] |
| 15 | struct NoHash<T>(T); |
| 16 | |
| 17 | /// Interner for GreenTokens and GreenNodes |
| 18 | // XXX: the impl is a bit tricky. As usual when writing interners, we want to |
| 19 | // store all values in one HashSet. |
| 20 | // |
| 21 | // However, hashing trees is fun: hash of the tree is recursively defined. We |
| 22 | // maintain an invariant -- if the tree is interned, then all of its children |
| 23 | // are interned as well. |
| 24 | // |
| 25 | // That means that computing the hash naively is wasteful -- we just *know* |
| 26 | // hashes of children, and we can re-use those. |
| 27 | // |
| 28 | // So here we use *raw* API of hashbrown and provide the hashes manually, |
| 29 | // instead of going via a `Hash` impl. Our manual `Hash` and the |
| 30 | // `#[derive(Hash)]` are actually different! At some point we had a fun bug, |
| 31 | // where we accidentally mixed the two hashes, which made the cache much less |
| 32 | // efficient. |
| 33 | // |
| 34 | // To fix that, we additionally wrap the data in `NoHash` wrapper, to make sure |
| 35 | // we don't accidentally use the wrong hash! |
| 36 | #[derive (Default, Debug)] |
| 37 | pub struct NodeCache { |
| 38 | nodes: HashMap<NoHash<GreenNode>, ()>, |
| 39 | tokens: HashMap<NoHash<GreenToken>, ()>, |
| 40 | } |
| 41 | |
| 42 | fn token_hash(token: &GreenTokenData) -> u64 { |
| 43 | let mut h: FxHasher = FxHasher::default(); |
| 44 | token.kind().hash(&mut h); |
| 45 | token.text().hash(&mut h); |
| 46 | h.finish() |
| 47 | } |
| 48 | |
| 49 | fn node_hash(node: &GreenNodeData) -> u64 { |
| 50 | let mut h: FxHasher = FxHasher::default(); |
| 51 | node.kind().hash(&mut h); |
| 52 | for child: NodeOrToken<&GreenNodeData, …> in node.children() { |
| 53 | match child { |
| 54 | NodeOrToken::Node(it) => node_hash(it), |
| 55 | NodeOrToken::Token(it) => token_hash(it), |
| 56 | } |
| 57 | .hash(&mut h) |
| 58 | } |
| 59 | h.finish() |
| 60 | } |
| 61 | |
| 62 | fn element_id(elem: GreenElementRef<'_>) -> *const () { |
| 63 | match elem { |
| 64 | NodeOrToken::Node(it: &GreenNodeData) => it as *const GreenNodeData as *const (), |
| 65 | NodeOrToken::Token(it: &GreenTokenData) => it as *const GreenTokenData as *const (), |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | impl NodeCache { |
| 70 | pub(crate) fn node( |
| 71 | &mut self, |
| 72 | kind: SyntaxKind, |
| 73 | children: &mut Vec<(u64, GreenElement)>, |
| 74 | first_child: usize, |
| 75 | ) -> (u64, GreenNode) { |
| 76 | let build_node = move |children: &mut Vec<(u64, GreenElement)>| { |
| 77 | GreenNode::new(kind, children.drain(first_child..).map(|(_, it)| it)) |
| 78 | }; |
| 79 | |
| 80 | let children_ref = &children[first_child..]; |
| 81 | if children_ref.len() > 3 { |
| 82 | let node = build_node(children); |
| 83 | return (0, node); |
| 84 | } |
| 85 | |
| 86 | let hash = { |
| 87 | let mut h = FxHasher::default(); |
| 88 | kind.hash(&mut h); |
| 89 | for &(hash, _) in children_ref { |
| 90 | if hash == 0 { |
| 91 | let node = build_node(children); |
| 92 | return (0, node); |
| 93 | } |
| 94 | hash.hash(&mut h); |
| 95 | } |
| 96 | h.finish() |
| 97 | }; |
| 98 | |
| 99 | // Green nodes are fully immutable, so it's ok to deduplicate them. |
| 100 | // This is the same optimization that Roslyn does |
| 101 | // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees |
| 102 | // |
| 103 | // For example, all `#[inline]` in this file share the same green node! |
| 104 | // For `libsyntax/parse/parser.rs`, measurements show that deduping saves |
| 105 | // 17% of the memory for green nodes! |
| 106 | let entry = self.nodes.raw_entry_mut().from_hash(hash, |node| { |
| 107 | node.0.kind() == kind && node.0.children().len() == children_ref.len() && { |
| 108 | let lhs = node.0.children(); |
| 109 | let rhs = children_ref.iter().map(|(_, it)| it.as_deref()); |
| 110 | |
| 111 | let lhs = lhs.map(element_id); |
| 112 | let rhs = rhs.map(element_id); |
| 113 | |
| 114 | lhs.eq(rhs) |
| 115 | } |
| 116 | }); |
| 117 | |
| 118 | let node = match entry { |
| 119 | RawEntryMut::Occupied(entry) => { |
| 120 | drop(children.drain(first_child..)); |
| 121 | entry.key().0.clone() |
| 122 | } |
| 123 | RawEntryMut::Vacant(entry) => { |
| 124 | let node = build_node(children); |
| 125 | entry.insert_with_hasher(hash, NoHash(node.clone()), (), |n| node_hash(&n.0)); |
| 126 | node |
| 127 | } |
| 128 | }; |
| 129 | |
| 130 | (hash, node) |
| 131 | } |
| 132 | |
| 133 | pub(crate) fn token(&mut self, kind: SyntaxKind, text: &str) -> (u64, GreenToken) { |
| 134 | let hash = { |
| 135 | let mut h = FxHasher::default(); |
| 136 | kind.hash(&mut h); |
| 137 | text.hash(&mut h); |
| 138 | h.finish() |
| 139 | }; |
| 140 | |
| 141 | let entry = self |
| 142 | .tokens |
| 143 | .raw_entry_mut() |
| 144 | .from_hash(hash, |token| token.0.kind() == kind && token.0.text() == text); |
| 145 | |
| 146 | let token = match entry { |
| 147 | RawEntryMut::Occupied(entry) => entry.key().0.clone(), |
| 148 | RawEntryMut::Vacant(entry) => { |
| 149 | let token = GreenToken::new(kind, text); |
| 150 | entry.insert_with_hasher(hash, NoHash(token.clone()), (), |t| token_hash(&t.0)); |
| 151 | token |
| 152 | } |
| 153 | }; |
| 154 | |
| 155 | (hash, token) |
| 156 | } |
| 157 | } |
| 158 | |