node_cache.rs source code [crates/rowan/src/green/node_cache.rs]

1	use hashbrown::hash_map::RawEntryMut;
2	use rustc_hash::FxHasher;
3	use std::hash::{BuildHasherDefault, Hash, Hasher};
4
5	use crate::{
6	green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken,
7	SyntaxKind,
8	};
9
10	use super::element::GreenElement;
11
12	type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
13
14	#[derive(Debug)]
15	struct NoHash<T>(T);
16
17	/// Interner for GreenTokens and GreenNodes
18	// XXX: the impl is a bit tricky. As usual when writing interners, we want to
19	// store all values in one HashSet.
20	//
21	// However, hashing trees is fun: hash of the tree is recursively defined. We
22	// maintain an invariant -- if the tree is interned, then all of its children
23	// are interned as well.
24	//
25	// That means that computing the hash naively is wasteful -- we just know
26	// hashes of children, and we can re-use those.
27	//
28	// So here we use raw* API of hashbrown and provide the hashes manually,*
29	// instead of going via a `Hash` impl. Our manual `Hash` and the
30	// `#[derive(Hash)]` are actually different! At some point we had a fun bug,
31	// where we accidentally mixed the two hashes, which made the cache much less
32	// efficient.
33	//
34	// To fix that, we additionally wrap the data in `NoHash` wrapper, to make sure
35	// we don't accidentally use the wrong hash!
36	#[derive(Default, Debug)]
37	pub struct NodeCache {
38	nodes: HashMap<NoHash<GreenNode>, ()>,
39	tokens: HashMap<NoHash<GreenToken>, ()>,
40	}
41
42	fn token_hash(token: &GreenTokenData) -> u64 {
43	let mut h: FxHasher = FxHasher::default();
44	token.kind().hash(&mut h);
45	token.text().hash(&mut h);
46	h.finish()
47	}
48
49	fn node_hash(node: &GreenNodeData) -> u64 {
50	let mut h: FxHasher = FxHasher::default();
51	node.kind().hash(&mut h);
52	for child: NodeOrToken<&GreenNodeData, …> in node.children() {
53	match child {
54	NodeOrToken::Node(it) => node_hash(it),
55	NodeOrToken::Token(it) => token_hash(it),
56	}
57	.hash(&mut h)
58	}
59	h.finish()
60	}
61
62	fn element_id(elem: GreenElementRef<'_>) -> *const () {
63	match elem {
64	NodeOrToken::Node(it: &GreenNodeData) => it as *const GreenNodeData as *const (),
65	NodeOrToken::Token(it: &GreenTokenData) => it as *const GreenTokenData as *const (),
66	}
67	}
68
69	impl NodeCache {
70	pub(crate) fn node(
71	&mut self,
72	kind: SyntaxKind,
73	children: &mut Vec<(u64, GreenElement)>,
74	first_child: usize,
75	) -> (u64, GreenNode) {
76	let build_node = move \|children: &mut Vec<(u64, GreenElement)>\| {
77	GreenNode::new(kind, children.drain(first_child..).map(\|(_, it)\| it))
78	};
79
80	let children_ref = &children[first_child..];
81	if children_ref.len() > `3` {
82	let node = build_node(children);
83	return (`0`, node);
84	}
85
86	let hash = {
87	let mut h = FxHasher::default();
88	kind.hash(&mut h);
89	for &(hash, _) in children_ref {
90	if hash == `0` {
91	let node = build_node(children);
92	return (`0`, node);
93	}
94	hash.hash(&mut h);
95	}
96	h.finish()
97	};
98
99	// Green nodes are fully immutable, so it's ok to deduplicate them.
100	// This is the same optimization that Roslyn does
101	// https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
102	//
103	// For example, all `#[inline]` in this file share the same green node!
104	// For `libsyntax/parse/parser.rs`, measurements show that deduping saves
105	// 17% of the memory for green nodes!
106	let entry = self.nodes.raw_entry_mut().from_hash(hash, \|node\| {
107	node.0.kind() == kind && node.0.children().len() == children_ref.len() && {
108	let lhs = node.0.children();
109	let rhs = children_ref.iter().map(\|(_, it)\| it.as_deref());
110
111	let lhs = lhs.map(element_id);
112	let rhs = rhs.map(element_id);
113
114	lhs.eq(rhs)
115	}
116	});
117
118	let node = match entry {
119	RawEntryMut::Occupied(entry) => {
120	drop(children.drain(first_child..));
121	entry.key().0.clone()
122	}
123	RawEntryMut::Vacant(entry) => {
124	let node = build_node(children);
125	entry.insert_with_hasher(hash, NoHash(node.clone()), (), \|n\| node_hash(&n.0));
126	node
127	}
128	};
129
130	(hash, node)
131	}
132
133	pub(crate) fn token(&mut self, kind: SyntaxKind, text: &str) -> (u64, GreenToken) {
134	let hash = {
135	let mut h = FxHasher::default();
136	kind.hash(&mut h);
137	text.hash(&mut h);
138	h.finish()
139	};
140
141	let entry = self
142	.tokens
143	.raw_entry_mut()
144	.from_hash(hash, \|token\| token.0.kind() == kind && token.0.text() == text);
145
146	let token = match entry {
147	RawEntryMut::Occupied(entry) => entry.key().0.clone(),
148	RawEntryMut::Vacant(entry) => {
149	let token = GreenToken::new(kind, text);
150	entry.insert_with_hasher(hash, NoHash(token.clone()), (), \|t\| token_hash(&t.0));
151	token
152	}
153	};
154
155	(hash, token)
156	}
157	}
158