1//! Extensions to the parsing API with niche applicability.
2
3use super::*;
4use proc_macro2::extra::DelimSpan;
5
6/// Extensions to the `ParseStream` API to support speculative parsing.
7pub trait Speculative {
8 /// Advance this parse stream to the position of a forked parse stream.
9 ///
10 /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
11 /// parse stream, perform some speculative parsing, then join the original
12 /// stream to the fork to "commit" the parsing from the fork to the main
13 /// stream.
14 ///
15 /// If you can avoid doing this, you should, as it limits the ability to
16 /// generate useful errors. That said, it is often the only way to parse
17 /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
18 /// is that when the fork fails to parse an `A`, it's impossible to tell
19 /// whether that was because of a syntax error and the user meant to provide
20 /// an `A`, or that the `A`s are finished and it's time to start parsing
21 /// `B`s. Use with care.
22 ///
23 /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
24 /// parsing `B*` and removing the leading members of `A` from the
25 /// repetition, bypassing the need to involve the downsides associated with
26 /// speculative parsing.
27 ///
28 /// [`ParseStream::fork`]: ParseBuffer::fork
29 ///
30 /// # Example
31 ///
32 /// There has been chatter about the possibility of making the colons in the
33 /// turbofish syntax like `path::to::<T>` no longer required by accepting
34 /// `path::to<T>` in expression position. Specifically, according to [RFC
35 /// 2544], [`PathSegment`] parsing should always try to consume a following
36 /// `<` token as the start of generic arguments, and reset to the `<` if
37 /// that fails (e.g. the token is acting as a less-than operator).
38 ///
39 /// This is the exact kind of parsing behavior which requires the "fork,
40 /// try, commit" behavior that [`ParseStream::fork`] discourages. With
41 /// `advance_to`, we can avoid having to parse the speculatively parsed
42 /// content a second time.
43 ///
44 /// This change in behavior can be implemented in syn by replacing just the
45 /// `Parse` implementation for `PathSegment`:
46 ///
47 /// ```
48 /// # use syn::ext::IdentExt;
49 /// use syn::parse::discouraged::Speculative;
50 /// # use syn::parse::{Parse, ParseStream};
51 /// # use syn::{Ident, PathArguments, Result, Token};
52 ///
53 /// pub struct PathSegment {
54 /// pub ident: Ident,
55 /// pub arguments: PathArguments,
56 /// }
57 /// #
58 /// # impl<T> From<T> for PathSegment
59 /// # where
60 /// # T: Into<Ident>,
61 /// # {
62 /// # fn from(ident: T) -> Self {
63 /// # PathSegment {
64 /// # ident: ident.into(),
65 /// # arguments: PathArguments::None,
66 /// # }
67 /// # }
68 /// # }
69 ///
70 /// impl Parse for PathSegment {
71 /// fn parse(input: ParseStream) -> Result<Self> {
72 /// if input.peek(Token![super])
73 /// || input.peek(Token![self])
74 /// || input.peek(Token![Self])
75 /// || input.peek(Token![crate])
76 /// {
77 /// let ident = input.call(Ident::parse_any)?;
78 /// return Ok(PathSegment::from(ident));
79 /// }
80 ///
81 /// let ident = input.parse()?;
82 /// if input.peek(Token![::]) && input.peek3(Token![<]) {
83 /// return Ok(PathSegment {
84 /// ident,
85 /// arguments: PathArguments::AngleBracketed(input.parse()?),
86 /// });
87 /// }
88 /// if input.peek(Token![<]) && !input.peek(Token![<=]) {
89 /// let fork = input.fork();
90 /// if let Ok(arguments) = fork.parse() {
91 /// input.advance_to(&fork);
92 /// return Ok(PathSegment {
93 /// ident,
94 /// arguments: PathArguments::AngleBracketed(arguments),
95 /// });
96 /// }
97 /// }
98 /// Ok(PathSegment::from(ident))
99 /// }
100 /// }
101 ///
102 /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
103 /// ```
104 ///
105 /// # Drawbacks
106 ///
107 /// The main drawback of this style of speculative parsing is in error
108 /// presentation. Even if the lookahead is the "correct" parse, the error
109 /// that is shown is that of the "fallback" parse. To use the same example
110 /// as the turbofish above, take the following unfinished "turbofish":
111 ///
112 /// ```text
113 /// let _ = f<&'a fn(), for<'a> serde::>();
114 /// ```
115 ///
116 /// If this is parsed as generic arguments, we can provide the error message
117 ///
118 /// ```text
119 /// error: expected identifier
120 /// --> src.rs:L:C
121 /// |
122 /// L | let _ = f<&'a fn(), for<'a> serde::>();
123 /// | ^
124 /// ```
125 ///
126 /// but if parsed using the above speculative parsing, it falls back to
127 /// assuming that the `<` is a less-than when it fails to parse the generic
128 /// arguments, and tries to interpret the `&'a` as the start of a labelled
129 /// loop, resulting in the much less helpful error
130 ///
131 /// ```text
132 /// error: expected `:`
133 /// --> src.rs:L:C
134 /// |
135 /// L | let _ = f<&'a fn(), for<'a> serde::>();
136 /// | ^^
137 /// ```
138 ///
139 /// This can be mitigated with various heuristics (two examples: show both
140 /// forks' parse errors, or show the one that consumed more tokens), but
141 /// when you can control the grammar, sticking to something that can be
142 /// parsed LL(3) and without the LL(*) speculative parsing this makes
143 /// possible, displaying reasonable errors becomes much more simple.
144 ///
145 /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
146 /// [`PathSegment`]: crate::PathSegment
147 ///
148 /// # Performance
149 ///
150 /// This method performs a cheap fixed amount of work that does not depend
151 /// on how far apart the two streams are positioned.
152 ///
153 /// # Panics
154 ///
155 /// The forked stream in the argument of `advance_to` must have been
156 /// obtained by forking `self`. Attempting to advance to any other stream
157 /// will cause a panic.
158 fn advance_to(&self, fork: &Self);
159}
160
161impl<'a> Speculative for ParseBuffer<'a> {
162 fn advance_to(&self, fork: &Self) {
163 if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
164 panic!("Fork was not derived from the advancing parse stream");
165 }
166
167 let (self_unexp, self_sp) = inner_unexpected(self);
168 let (fork_unexp, fork_sp) = inner_unexpected(fork);
169 if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
170 match (fork_sp, self_sp) {
171 // Unexpected set on the fork, but not on `self`, copy it over.
172 (Some(span), None) => {
173 self_unexp.set(Unexpected::Some(span));
174 }
175 // Unexpected unset. Use chain to propagate errors from fork.
176 (None, None) => {
177 fork_unexp.set(Unexpected::Chain(self_unexp));
178
179 // Ensure toplevel 'unexpected' tokens from the fork don't
180 // bubble up the chain by replacing the root `unexpected`
181 // pointer, only 'unexpected' tokens from existing group
182 // parsers should bubble.
183 fork.unexpected
184 .set(Some(Rc::new(Cell::new(Unexpected::None))));
185 }
186 // Unexpected has been set on `self`. No changes needed.
187 (_, Some(_)) => {}
188 }
189 }
190
191 // See comment on `cell` in the struct definition.
192 self.cell
193 .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
194 }
195}
196
197/// Extensions to the `ParseStream` API to support manipulating invisible
198/// delimiters the same as if they were visible.
199pub trait AnyDelimiter {
200 /// Returns the delimiter, the span of the delimiter token, and the nested
201 /// contents for further parsing.
202 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>;
203}
204
205impl<'a> AnyDelimiter for ParseBuffer<'a> {
206 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> {
207 self.step(|cursor| {
208 if let Some((content, delimiter, span, rest)) = cursor.any_group() {
209 let scope = crate::buffer::close_span_of_group(*cursor);
210 let nested = crate::parse::advance_step_cursor(cursor, content);
211 let unexpected = crate::parse::get_unexpected(self);
212 let content = crate::parse::new_parse_buffer(scope, nested, unexpected);
213 Ok(((delimiter, span, content), rest))
214 } else {
215 Err(cursor.error("expected any delimiter"))
216 }
217 })
218 }
219}
220