1 | use hashbrown::hash_map::RawEntryMut; |
2 | use rustc_hash::FxHasher; |
3 | use std::hash::{BuildHasherDefault, Hash, Hasher}; |
4 | |
5 | use crate::{ |
6 | green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken, |
7 | SyntaxKind, |
8 | }; |
9 | |
10 | use super::element::GreenElement; |
11 | |
12 | type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>; |
13 | |
14 | #[derive (Debug)] |
15 | struct NoHash<T>(T); |
16 | |
17 | /// Interner for GreenTokens and GreenNodes |
18 | // XXX: the impl is a bit tricky. As usual when writing interners, we want to |
19 | // store all values in one HashSet. |
20 | // |
21 | // However, hashing trees is fun: hash of the tree is recursively defined. We |
22 | // maintain an invariant -- if the tree is interned, then all of its children |
23 | // are interned as well. |
24 | // |
25 | // That means that computing the hash naively is wasteful -- we just *know* |
26 | // hashes of children, and we can re-use those. |
27 | // |
28 | // So here we use *raw* API of hashbrown and provide the hashes manually, |
29 | // instead of going via a `Hash` impl. Our manual `Hash` and the |
30 | // `#[derive(Hash)]` are actually different! At some point we had a fun bug, |
31 | // where we accidentally mixed the two hashes, which made the cache much less |
32 | // efficient. |
33 | // |
34 | // To fix that, we additionally wrap the data in `NoHash` wrapper, to make sure |
35 | // we don't accidentally use the wrong hash! |
36 | #[derive (Default, Debug)] |
37 | pub struct NodeCache { |
38 | nodes: HashMap<NoHash<GreenNode>, ()>, |
39 | tokens: HashMap<NoHash<GreenToken>, ()>, |
40 | } |
41 | |
42 | fn token_hash(token: &GreenTokenData) -> u64 { |
43 | let mut h: FxHasher = FxHasher::default(); |
44 | token.kind().hash(&mut h); |
45 | token.text().hash(&mut h); |
46 | h.finish() |
47 | } |
48 | |
49 | fn node_hash(node: &GreenNodeData) -> u64 { |
50 | let mut h: FxHasher = FxHasher::default(); |
51 | node.kind().hash(&mut h); |
52 | for child: NodeOrToken<&GreenNodeData, …> in node.children() { |
53 | match child { |
54 | NodeOrToken::Node(it) => node_hash(it), |
55 | NodeOrToken::Token(it) => token_hash(it), |
56 | } |
57 | .hash(&mut h) |
58 | } |
59 | h.finish() |
60 | } |
61 | |
62 | fn element_id(elem: GreenElementRef<'_>) -> *const () { |
63 | match elem { |
64 | NodeOrToken::Node(it: &GreenNodeData) => it as *const GreenNodeData as *const (), |
65 | NodeOrToken::Token(it: &GreenTokenData) => it as *const GreenTokenData as *const (), |
66 | } |
67 | } |
68 | |
69 | impl NodeCache { |
70 | pub(crate) fn node( |
71 | &mut self, |
72 | kind: SyntaxKind, |
73 | children: &mut Vec<(u64, GreenElement)>, |
74 | first_child: usize, |
75 | ) -> (u64, GreenNode) { |
76 | let build_node = move |children: &mut Vec<(u64, GreenElement)>| { |
77 | GreenNode::new(kind, children.drain(first_child..).map(|(_, it)| it)) |
78 | }; |
79 | |
80 | let children_ref = &children[first_child..]; |
81 | if children_ref.len() > 3 { |
82 | let node = build_node(children); |
83 | return (0, node); |
84 | } |
85 | |
86 | let hash = { |
87 | let mut h = FxHasher::default(); |
88 | kind.hash(&mut h); |
89 | for &(hash, _) in children_ref { |
90 | if hash == 0 { |
91 | let node = build_node(children); |
92 | return (0, node); |
93 | } |
94 | hash.hash(&mut h); |
95 | } |
96 | h.finish() |
97 | }; |
98 | |
99 | // Green nodes are fully immutable, so it's ok to deduplicate them. |
100 | // This is the same optimization that Roslyn does |
101 | // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees |
102 | // |
103 | // For example, all `#[inline]` in this file share the same green node! |
104 | // For `libsyntax/parse/parser.rs`, measurements show that deduping saves |
105 | // 17% of the memory for green nodes! |
106 | let entry = self.nodes.raw_entry_mut().from_hash(hash, |node| { |
107 | node.0.kind() == kind && node.0.children().len() == children_ref.len() && { |
108 | let lhs = node.0.children(); |
109 | let rhs = children_ref.iter().map(|(_, it)| it.as_deref()); |
110 | |
111 | let lhs = lhs.map(element_id); |
112 | let rhs = rhs.map(element_id); |
113 | |
114 | lhs.eq(rhs) |
115 | } |
116 | }); |
117 | |
118 | let node = match entry { |
119 | RawEntryMut::Occupied(entry) => { |
120 | drop(children.drain(first_child..)); |
121 | entry.key().0.clone() |
122 | } |
123 | RawEntryMut::Vacant(entry) => { |
124 | let node = build_node(children); |
125 | entry.insert_with_hasher(hash, NoHash(node.clone()), (), |n| node_hash(&n.0)); |
126 | node |
127 | } |
128 | }; |
129 | |
130 | (hash, node) |
131 | } |
132 | |
133 | pub(crate) fn token(&mut self, kind: SyntaxKind, text: &str) -> (u64, GreenToken) { |
134 | let hash = { |
135 | let mut h = FxHasher::default(); |
136 | kind.hash(&mut h); |
137 | text.hash(&mut h); |
138 | h.finish() |
139 | }; |
140 | |
141 | let entry = self |
142 | .tokens |
143 | .raw_entry_mut() |
144 | .from_hash(hash, |token| token.0.kind() == kind && token.0.text() == text); |
145 | |
146 | let token = match entry { |
147 | RawEntryMut::Occupied(entry) => entry.key().0.clone(), |
148 | RawEntryMut::Vacant(entry) => { |
149 | let token = GreenToken::new(kind, text); |
150 | entry.insert_with_hasher(hash, NoHash(token.clone()), (), |t| token_hash(&t.0)); |
151 | token |
152 | } |
153 | }; |
154 | |
155 | (hash, token) |
156 | } |
157 | } |
158 | |