1 | //! A stably addressed token buffer supporting efficient traversal based on a |
2 | //! cheaply copyable cursor. |
3 | //! |
4 | //! *This module is available only if Syn is built with the `"parsing"` feature.* |
5 | |
6 | // This module is heavily commented as it contains most of the unsafe code in |
7 | // Syn, and caution should be used when editing it. The public-facing interface |
8 | // is 100% safe but the implementation is fragile internally. |
9 | |
10 | #[cfg (all( |
11 | not(all(target_arch = "wasm32" , any(target_os = "unknown" , target_os = "wasi" ))), |
12 | feature = "proc-macro" |
13 | ))] |
14 | use crate::proc_macro as pm; |
15 | use crate::Lifetime; |
16 | use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; |
17 | use std::cmp::Ordering; |
18 | use std::marker::PhantomData; |
19 | |
20 | /// Internal type which is used instead of `TokenTree` to represent a token tree |
21 | /// within a `TokenBuffer`. |
22 | enum Entry { |
23 | // Mimicking types from proc-macro. |
24 | // Group entries contain the offset to the matching End entry. |
25 | Group(Group, usize), |
26 | Ident(Ident), |
27 | Punct(Punct), |
28 | Literal(Literal), |
29 | // End entries contain the offset (negative) to the start of the buffer. |
30 | End(isize), |
31 | } |
32 | |
33 | /// A buffer that can be efficiently traversed multiple times, unlike |
34 | /// `TokenStream` which requires a deep copy in order to traverse more than |
35 | /// once. |
36 | /// |
37 | /// *This type is available only if Syn is built with the `"parsing"` feature.* |
38 | pub struct TokenBuffer { |
39 | // NOTE: Do not implement clone on this - while the current design could be |
40 | // cloned, other designs which could be desirable may not be cloneable. |
41 | entries: Box<[Entry]>, |
42 | } |
43 | |
44 | impl TokenBuffer { |
45 | fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) { |
46 | for tt in stream { |
47 | match tt { |
48 | TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)), |
49 | TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)), |
50 | TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)), |
51 | TokenTree::Group(group) => { |
52 | let group_start_index = entries.len(); |
53 | entries.push(Entry::End(0)); // we replace this below |
54 | Self::recursive_new(entries, group.stream()); |
55 | let group_end_index = entries.len(); |
56 | entries.push(Entry::End(-(group_end_index as isize))); |
57 | let group_end_offset = group_end_index - group_start_index; |
58 | entries[group_start_index] = Entry::Group(group, group_end_offset); |
59 | } |
60 | } |
61 | } |
62 | } |
63 | |
64 | /// Creates a `TokenBuffer` containing all the tokens from the input |
65 | /// `proc_macro::TokenStream`. |
66 | /// |
67 | /// *This method is available only if Syn is built with both the `"parsing"` and |
68 | /// `"proc-macro"` features.* |
69 | #[cfg (all( |
70 | not(all(target_arch = "wasm32" , any(target_os = "unknown" , target_os = "wasi" ))), |
71 | feature = "proc-macro" |
72 | ))] |
73 | pub fn new(stream: pm::TokenStream) -> Self { |
74 | Self::new2(stream.into()) |
75 | } |
76 | |
77 | /// Creates a `TokenBuffer` containing all the tokens from the input |
78 | /// `proc_macro2::TokenStream`. |
79 | pub fn new2(stream: TokenStream) -> Self { |
80 | let mut entries = Vec::new(); |
81 | Self::recursive_new(&mut entries, stream); |
82 | entries.push(Entry::End(-(entries.len() as isize))); |
83 | Self { |
84 | entries: entries.into_boxed_slice(), |
85 | } |
86 | } |
87 | |
88 | /// Creates a cursor referencing the first token in the buffer and able to |
89 | /// traverse until the end of the buffer. |
90 | pub fn begin(&self) -> Cursor { |
91 | let ptr = self.entries.as_ptr(); |
92 | unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) } |
93 | } |
94 | } |
95 | |
96 | /// A cheaply copyable cursor into a `TokenBuffer`. |
97 | /// |
98 | /// This cursor holds a shared reference into the immutable data which is used |
99 | /// internally to represent a `TokenStream`, and can be efficiently manipulated |
100 | /// and copied around. |
101 | /// |
102 | /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer` |
103 | /// object and get a cursor to its first token with `begin()`. |
104 | /// |
105 | /// Two cursors are equal if they have the same location in the same input |
106 | /// stream, and have the same scope. |
107 | /// |
108 | /// *This type is available only if Syn is built with the `"parsing"` feature.* |
109 | pub struct Cursor<'a> { |
110 | // The current entry which the `Cursor` is pointing at. |
111 | ptr: *const Entry, |
112 | // This is the only `Entry::End` object which this cursor is allowed to |
113 | // point at. All other `End` objects are skipped over in `Cursor::create`. |
114 | scope: *const Entry, |
115 | // Cursor is covariant in 'a. This field ensures that our pointers are still |
116 | // valid. |
117 | marker: PhantomData<&'a Entry>, |
118 | } |
119 | |
120 | impl<'a> Cursor<'a> { |
121 | /// Creates a cursor referencing a static empty TokenStream. |
122 | pub fn empty() -> Self { |
123 | // It's safe in this situation for us to put an `Entry` object in global |
124 | // storage, despite it not actually being safe to send across threads |
125 | // (`Ident` is a reference into a thread-local table). This is because |
126 | // this entry never includes a `Ident` object. |
127 | // |
128 | // This wrapper struct allows us to break the rules and put a `Sync` |
129 | // object in global storage. |
130 | struct UnsafeSyncEntry(Entry); |
131 | unsafe impl Sync for UnsafeSyncEntry {} |
132 | static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0)); |
133 | |
134 | Cursor { |
135 | ptr: &EMPTY_ENTRY.0, |
136 | scope: &EMPTY_ENTRY.0, |
137 | marker: PhantomData, |
138 | } |
139 | } |
140 | |
141 | /// This create method intelligently exits non-explicitly-entered |
142 | /// `None`-delimited scopes when the cursor reaches the end of them, |
143 | /// allowing for them to be treated transparently. |
144 | unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self { |
145 | // NOTE: If we're looking at a `End`, we want to advance the cursor |
146 | // past it, unless `ptr == scope`, which means that we're at the edge of |
147 | // our cursor's scope. We should only have `ptr != scope` at the exit |
148 | // from None-delimited groups entered with `ignore_none`. |
149 | while let Entry::End(_) = *ptr { |
150 | if ptr == scope { |
151 | break; |
152 | } |
153 | ptr = ptr.add(1); |
154 | } |
155 | |
156 | Cursor { |
157 | ptr, |
158 | scope, |
159 | marker: PhantomData, |
160 | } |
161 | } |
162 | |
163 | /// Get the current entry. |
164 | fn entry(self) -> &'a Entry { |
165 | unsafe { &*self.ptr } |
166 | } |
167 | |
168 | /// Bump the cursor to point at the next token after the current one. This |
169 | /// is undefined behavior if the cursor is currently looking at an |
170 | /// `Entry::End`. |
171 | /// |
172 | /// If the cursor is looking at an `Entry::Group`, the bumped cursor will |
173 | /// point at the first token in the group (with the same scope end). |
174 | unsafe fn bump_ignore_group(self) -> Cursor<'a> { |
175 | Cursor::create(self.ptr.offset(1), self.scope) |
176 | } |
177 | |
178 | /// While the cursor is looking at a `None`-delimited group, move it to look |
179 | /// at the first token inside instead. If the group is empty, this will move |
180 | /// the cursor past the `None`-delimited group. |
181 | /// |
182 | /// WARNING: This mutates its argument. |
183 | fn ignore_none(&mut self) { |
184 | while let Entry::Group(group, _) = self.entry() { |
185 | if group.delimiter() == Delimiter::None { |
186 | unsafe { *self = self.bump_ignore_group() }; |
187 | } else { |
188 | break; |
189 | } |
190 | } |
191 | } |
192 | |
193 | /// Checks whether the cursor is currently pointing at the end of its valid |
194 | /// scope. |
195 | pub fn eof(self) -> bool { |
196 | // We're at eof if we're at the end of our scope. |
197 | self.ptr == self.scope |
198 | } |
199 | |
200 | /// If the cursor is pointing at a `Group` with the given delimiter, returns |
201 | /// a cursor into that group and one pointing to the next `TokenTree`. |
202 | pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> { |
203 | // If we're not trying to enter a none-delimited group, we want to |
204 | // ignore them. We have to make sure to _not_ ignore them when we want |
205 | // to enter them, of course. For obvious reasons. |
206 | if delim != Delimiter::None { |
207 | self.ignore_none(); |
208 | } |
209 | |
210 | if let Entry::Group(group, end_offset) = self.entry() { |
211 | if group.delimiter() == delim { |
212 | let end_of_group = unsafe { self.ptr.add(*end_offset) }; |
213 | let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) }; |
214 | let after_group = unsafe { Cursor::create(end_of_group, self.scope) }; |
215 | return Some((inside_of_group, group.span(), after_group)); |
216 | } |
217 | } |
218 | |
219 | None |
220 | } |
221 | |
222 | /// If the cursor is pointing at a `Ident`, returns it along with a cursor |
223 | /// pointing at the next `TokenTree`. |
224 | pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> { |
225 | self.ignore_none(); |
226 | match self.entry() { |
227 | Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })), |
228 | _ => None, |
229 | } |
230 | } |
231 | |
232 | /// If the cursor is pointing at a `Punct`, returns it along with a cursor |
233 | /// pointing at the next `TokenTree`. |
234 | pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> { |
235 | self.ignore_none(); |
236 | match self.entry() { |
237 | Entry::Punct(punct) if punct.as_char() != ' \'' => { |
238 | Some((punct.clone(), unsafe { self.bump_ignore_group() })) |
239 | } |
240 | _ => None, |
241 | } |
242 | } |
243 | |
244 | /// If the cursor is pointing at a `Literal`, return it along with a cursor |
245 | /// pointing at the next `TokenTree`. |
246 | pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> { |
247 | self.ignore_none(); |
248 | match self.entry() { |
249 | Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })), |
250 | _ => None, |
251 | } |
252 | } |
253 | |
254 | /// If the cursor is pointing at a `Lifetime`, returns it along with a |
255 | /// cursor pointing at the next `TokenTree`. |
256 | pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> { |
257 | self.ignore_none(); |
258 | match self.entry() { |
259 | Entry::Punct(punct) if punct.as_char() == ' \'' && punct.spacing() == Spacing::Joint => { |
260 | let next = unsafe { self.bump_ignore_group() }; |
261 | let (ident, rest) = next.ident()?; |
262 | let lifetime = Lifetime { |
263 | apostrophe: punct.span(), |
264 | ident, |
265 | }; |
266 | Some((lifetime, rest)) |
267 | } |
268 | _ => None, |
269 | } |
270 | } |
271 | |
272 | /// Copies all remaining tokens visible from this cursor into a |
273 | /// `TokenStream`. |
274 | pub fn token_stream(self) -> TokenStream { |
275 | let mut tts = Vec::new(); |
276 | let mut cursor = self; |
277 | while let Some((tt, rest)) = cursor.token_tree() { |
278 | tts.push(tt); |
279 | cursor = rest; |
280 | } |
281 | tts.into_iter().collect() |
282 | } |
283 | |
284 | /// If the cursor is pointing at a `TokenTree`, returns it along with a |
285 | /// cursor pointing at the next `TokenTree`. |
286 | /// |
287 | /// Returns `None` if the cursor has reached the end of its stream. |
288 | /// |
289 | /// This method does not treat `None`-delimited groups as transparent, and |
290 | /// will return a `Group(None, ..)` if the cursor is looking at one. |
291 | pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> { |
292 | let (tree, len) = match self.entry() { |
293 | Entry::Group(group, end_offset) => (group.clone().into(), *end_offset), |
294 | Entry::Literal(literal) => (literal.clone().into(), 1), |
295 | Entry::Ident(ident) => (ident.clone().into(), 1), |
296 | Entry::Punct(punct) => (punct.clone().into(), 1), |
297 | Entry::End(_) => return None, |
298 | }; |
299 | |
300 | let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) }; |
301 | Some((tree, rest)) |
302 | } |
303 | |
304 | /// Returns the `Span` of the current token, or `Span::call_site()` if this |
305 | /// cursor points to eof. |
306 | pub fn span(self) -> Span { |
307 | match self.entry() { |
308 | Entry::Group(group, _) => group.span(), |
309 | Entry::Literal(literal) => literal.span(), |
310 | Entry::Ident(ident) => ident.span(), |
311 | Entry::Punct(punct) => punct.span(), |
312 | Entry::End(_) => Span::call_site(), |
313 | } |
314 | } |
315 | |
316 | /// Skip over the next token without cloning it. Returns `None` if this |
317 | /// cursor points to eof. |
318 | /// |
319 | /// This method treats `'lifetimes` as a single token. |
320 | pub(crate) fn skip(self) -> Option<Cursor<'a>> { |
321 | let len = match self.entry() { |
322 | Entry::End(_) => return None, |
323 | |
324 | // Treat lifetimes as a single tt for the purposes of 'skip'. |
325 | Entry::Punct(punct) if punct.as_char() == ' \'' && punct.spacing() == Spacing::Joint => { |
326 | match unsafe { &*self.ptr.add(1) } { |
327 | Entry::Ident(_) => 2, |
328 | _ => 1, |
329 | } |
330 | } |
331 | |
332 | Entry::Group(_, end_offset) => *end_offset, |
333 | _ => 1, |
334 | }; |
335 | |
336 | Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) }) |
337 | } |
338 | } |
339 | |
340 | impl<'a> Copy for Cursor<'a> {} |
341 | |
342 | impl<'a> Clone for Cursor<'a> { |
343 | fn clone(&self) -> Self { |
344 | *self |
345 | } |
346 | } |
347 | |
348 | impl<'a> Eq for Cursor<'a> {} |
349 | |
350 | impl<'a> PartialEq for Cursor<'a> { |
351 | fn eq(&self, other: &Self) -> bool { |
352 | self.ptr == other.ptr |
353 | } |
354 | } |
355 | |
356 | impl<'a> PartialOrd for Cursor<'a> { |
357 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
358 | if same_buffer(*self, *other) { |
359 | Some(self.ptr.cmp(&other.ptr)) |
360 | } else { |
361 | None |
362 | } |
363 | } |
364 | } |
365 | |
366 | pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool { |
367 | a.scope == b.scope |
368 | } |
369 | |
370 | pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool { |
371 | unsafe { |
372 | match (&*a.scope, &*b.scope) { |
373 | (Entry::End(a_offset: &isize), Entry::End(b_offset: &isize)) => { |
374 | a.scope.offset(*a_offset) == b.scope.offset(*b_offset) |
375 | } |
376 | _ => unreachable!(), |
377 | } |
378 | } |
379 | } |
380 | |
381 | #[cfg (any(feature = "full" , feature = "derive" ))] |
382 | pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering { |
383 | a.ptr.cmp(&b.ptr) |
384 | } |
385 | |
386 | pub(crate) fn open_span_of_group(cursor: Cursor) -> Span { |
387 | match cursor.entry() { |
388 | Entry::Group(group: &Group, _) => group.span_open(), |
389 | _ => cursor.span(), |
390 | } |
391 | } |
392 | |
393 | pub(crate) fn close_span_of_group(cursor: Cursor) -> Span { |
394 | match cursor.entry() { |
395 | Entry::Group(group: &Group, _) => group.span_close(), |
396 | _ => cursor.span(), |
397 | } |
398 | } |
399 | |