1use hashbrown::hash_map::RawEntryMut;
2use rustc_hash::FxHasher;
3use std::hash::{BuildHasherDefault, Hash, Hasher};
4
5use crate::{
6 green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken,
7 SyntaxKind,
8};
9
10use super::element::GreenElement;
11
12type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
13
14#[derive(Debug)]
15struct NoHash<T>(T);
16
17/// Interner for GreenTokens and GreenNodes
18// XXX: the impl is a bit tricky. As usual when writing interners, we want to
19// store all values in one HashSet.
20//
21// However, hashing trees is fun: hash of the tree is recursively defined. We
22// maintain an invariant -- if the tree is interned, then all of its children
23// are interned as well.
24//
25// That means that computing the hash naively is wasteful -- we just *know*
26// hashes of children, and we can re-use those.
27//
28// So here we use *raw* API of hashbrown and provide the hashes manually,
29// instead of going via a `Hash` impl. Our manual `Hash` and the
30// `#[derive(Hash)]` are actually different! At some point we had a fun bug,
31// where we accidentally mixed the two hashes, which made the cache much less
32// efficient.
33//
34// To fix that, we additionally wrap the data in `NoHash` wrapper, to make sure
35// we don't accidentally use the wrong hash!
36#[derive(Default, Debug)]
37pub struct NodeCache {
38 nodes: HashMap<NoHash<GreenNode>, ()>,
39 tokens: HashMap<NoHash<GreenToken>, ()>,
40}
41
42fn token_hash(token: &GreenTokenData) -> u64 {
43 let mut h: FxHasher = FxHasher::default();
44 token.kind().hash(&mut h);
45 token.text().hash(&mut h);
46 h.finish()
47}
48
49fn node_hash(node: &GreenNodeData) -> u64 {
50 let mut h: FxHasher = FxHasher::default();
51 node.kind().hash(&mut h);
52 for child: NodeOrToken<&GreenNodeData, …> in node.children() {
53 match child {
54 NodeOrToken::Node(it) => node_hash(it),
55 NodeOrToken::Token(it) => token_hash(it),
56 }
57 .hash(&mut h)
58 }
59 h.finish()
60}
61
62fn element_id(elem: GreenElementRef<'_>) -> *const () {
63 match elem {
64 NodeOrToken::Node(it: &GreenNodeData) => it as *const GreenNodeData as *const (),
65 NodeOrToken::Token(it: &GreenTokenData) => it as *const GreenTokenData as *const (),
66 }
67}
68
69impl NodeCache {
70 pub(crate) fn node(
71 &mut self,
72 kind: SyntaxKind,
73 children: &mut Vec<(u64, GreenElement)>,
74 first_child: usize,
75 ) -> (u64, GreenNode) {
76 let build_node = move |children: &mut Vec<(u64, GreenElement)>| {
77 GreenNode::new(kind, children.drain(first_child..).map(|(_, it)| it))
78 };
79
80 let children_ref = &children[first_child..];
81 if children_ref.len() > 3 {
82 let node = build_node(children);
83 return (0, node);
84 }
85
86 let hash = {
87 let mut h = FxHasher::default();
88 kind.hash(&mut h);
89 for &(hash, _) in children_ref {
90 if hash == 0 {
91 let node = build_node(children);
92 return (0, node);
93 }
94 hash.hash(&mut h);
95 }
96 h.finish()
97 };
98
99 // Green nodes are fully immutable, so it's ok to deduplicate them.
100 // This is the same optimization that Roslyn does
101 // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
102 //
103 // For example, all `#[inline]` in this file share the same green node!
104 // For `libsyntax/parse/parser.rs`, measurements show that deduping saves
105 // 17% of the memory for green nodes!
106 let entry = self.nodes.raw_entry_mut().from_hash(hash, |node| {
107 node.0.kind() == kind && node.0.children().len() == children_ref.len() && {
108 let lhs = node.0.children();
109 let rhs = children_ref.iter().map(|(_, it)| it.as_deref());
110
111 let lhs = lhs.map(element_id);
112 let rhs = rhs.map(element_id);
113
114 lhs.eq(rhs)
115 }
116 });
117
118 let node = match entry {
119 RawEntryMut::Occupied(entry) => {
120 drop(children.drain(first_child..));
121 entry.key().0.clone()
122 }
123 RawEntryMut::Vacant(entry) => {
124 let node = build_node(children);
125 entry.insert_with_hasher(hash, NoHash(node.clone()), (), |n| node_hash(&n.0));
126 node
127 }
128 };
129
130 (hash, node)
131 }
132
133 pub(crate) fn token(&mut self, kind: SyntaxKind, text: &str) -> (u64, GreenToken) {
134 let hash = {
135 let mut h = FxHasher::default();
136 kind.hash(&mut h);
137 text.hash(&mut h);
138 h.finish()
139 };
140
141 let entry = self
142 .tokens
143 .raw_entry_mut()
144 .from_hash(hash, |token| token.0.kind() == kind && token.0.text() == text);
145
146 let token = match entry {
147 RawEntryMut::Occupied(entry) => entry.key().0.clone(),
148 RawEntryMut::Vacant(entry) => {
149 let token = GreenToken::new(kind, text);
150 entry.insert_with_hasher(hash, NoHash(token.clone()), (), |t| token_hash(&t.0));
151 token
152 }
153 };
154
155 (hash, token)
156 }
157}
158