1 | //! Extensions to the parsing API with niche applicability. |
2 | |
3 | use crate::buffer::Cursor; |
4 | use crate::error::Result; |
5 | use crate::parse::{inner_unexpected, ParseBuffer, Unexpected}; |
6 | use proc_macro2::extra::DelimSpan; |
7 | use proc_macro2::Delimiter; |
8 | use std::cell::Cell; |
9 | use std::mem; |
10 | use std::rc::Rc; |
11 | |
12 | /// Extensions to the `ParseStream` API to support speculative parsing. |
13 | pub trait Speculative { |
14 | /// Advance this parse stream to the position of a forked parse stream. |
15 | /// |
16 | /// This is the opposite operation to [`ParseStream::fork`]. You can fork a |
17 | /// parse stream, perform some speculative parsing, then join the original |
18 | /// stream to the fork to "commit" the parsing from the fork to the main |
19 | /// stream. |
20 | /// |
21 | /// If you can avoid doing this, you should, as it limits the ability to |
22 | /// generate useful errors. That said, it is often the only way to parse |
23 | /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem |
24 | /// is that when the fork fails to parse an `A`, it's impossible to tell |
25 | /// whether that was because of a syntax error and the user meant to provide |
26 | /// an `A`, or that the `A`s are finished and it's time to start parsing |
27 | /// `B`s. Use with care. |
28 | /// |
29 | /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by |
30 | /// parsing `B*` and removing the leading members of `A` from the |
31 | /// repetition, bypassing the need to involve the downsides associated with |
32 | /// speculative parsing. |
33 | /// |
34 | /// [`ParseStream::fork`]: ParseBuffer::fork |
35 | /// |
36 | /// # Example |
37 | /// |
38 | /// There has been chatter about the possibility of making the colons in the |
39 | /// turbofish syntax like `path::to::<T>` no longer required by accepting |
40 | /// `path::to<T>` in expression position. Specifically, according to [RFC |
41 | /// 2544], [`PathSegment`] parsing should always try to consume a following |
42 | /// `<` token as the start of generic arguments, and reset to the `<` if |
43 | /// that fails (e.g. the token is acting as a less-than operator). |
44 | /// |
45 | /// This is the exact kind of parsing behavior which requires the "fork, |
46 | /// try, commit" behavior that [`ParseStream::fork`] discourages. With |
47 | /// `advance_to`, we can avoid having to parse the speculatively parsed |
48 | /// content a second time. |
49 | /// |
50 | /// This change in behavior can be implemented in syn by replacing just the |
51 | /// `Parse` implementation for `PathSegment`: |
52 | /// |
53 | /// ``` |
54 | /// # use syn::ext::IdentExt; |
55 | /// use syn::parse::discouraged::Speculative; |
56 | /// # use syn::parse::{Parse, ParseStream}; |
57 | /// # use syn::{Ident, PathArguments, Result, Token}; |
58 | /// |
59 | /// pub struct PathSegment { |
60 | /// pub ident: Ident, |
61 | /// pub arguments: PathArguments, |
62 | /// } |
63 | /// # |
64 | /// # impl<T> From<T> for PathSegment |
65 | /// # where |
66 | /// # T: Into<Ident>, |
67 | /// # { |
68 | /// # fn from(ident: T) -> Self { |
69 | /// # PathSegment { |
70 | /// # ident: ident.into(), |
71 | /// # arguments: PathArguments::None, |
72 | /// # } |
73 | /// # } |
74 | /// # } |
75 | /// |
76 | /// impl Parse for PathSegment { |
77 | /// fn parse(input: ParseStream) -> Result<Self> { |
78 | /// if input.peek(Token![super]) |
79 | /// || input.peek(Token![self]) |
80 | /// || input.peek(Token![Self]) |
81 | /// || input.peek(Token![crate]) |
82 | /// { |
83 | /// let ident = input.call(Ident::parse_any)?; |
84 | /// return Ok(PathSegment::from(ident)); |
85 | /// } |
86 | /// |
87 | /// let ident = input.parse()?; |
88 | /// if input.peek(Token![::]) && input.peek3(Token![<]) { |
89 | /// return Ok(PathSegment { |
90 | /// ident, |
91 | /// arguments: PathArguments::AngleBracketed(input.parse()?), |
92 | /// }); |
93 | /// } |
94 | /// if input.peek(Token![<]) && !input.peek(Token![<=]) { |
95 | /// let fork = input.fork(); |
96 | /// if let Ok(arguments) = fork.parse() { |
97 | /// input.advance_to(&fork); |
98 | /// return Ok(PathSegment { |
99 | /// ident, |
100 | /// arguments: PathArguments::AngleBracketed(arguments), |
101 | /// }); |
102 | /// } |
103 | /// } |
104 | /// Ok(PathSegment::from(ident)) |
105 | /// } |
106 | /// } |
107 | /// |
108 | /// # syn::parse_str::<PathSegment>("a<b,c>" ).unwrap(); |
109 | /// ``` |
110 | /// |
111 | /// # Drawbacks |
112 | /// |
113 | /// The main drawback of this style of speculative parsing is in error |
114 | /// presentation. Even if the lookahead is the "correct" parse, the error |
115 | /// that is shown is that of the "fallback" parse. To use the same example |
116 | /// as the turbofish above, take the following unfinished "turbofish": |
117 | /// |
118 | /// ```text |
119 | /// let _ = f<&'a fn(), for<'a> serde::>(); |
120 | /// ``` |
121 | /// |
122 | /// If this is parsed as generic arguments, we can provide the error message |
123 | /// |
124 | /// ```text |
125 | /// error: expected identifier |
126 | /// --> src.rs:L:C |
127 | /// | |
128 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); |
129 | /// | ^ |
130 | /// ``` |
131 | /// |
132 | /// but if parsed using the above speculative parsing, it falls back to |
133 | /// assuming that the `<` is a less-than when it fails to parse the generic |
134 | /// arguments, and tries to interpret the `&'a` as the start of a labelled |
135 | /// loop, resulting in the much less helpful error |
136 | /// |
137 | /// ```text |
138 | /// error: expected `:` |
139 | /// --> src.rs:L:C |
140 | /// | |
141 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); |
142 | /// | ^^ |
143 | /// ``` |
144 | /// |
145 | /// This can be mitigated with various heuristics (two examples: show both |
146 | /// forks' parse errors, or show the one that consumed more tokens), but |
147 | /// when you can control the grammar, sticking to something that can be |
148 | /// parsed LL(3) and without the LL(*) speculative parsing this makes |
149 | /// possible, displaying reasonable errors becomes much more simple. |
150 | /// |
151 | /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 |
152 | /// [`PathSegment`]: crate::PathSegment |
153 | /// |
154 | /// # Performance |
155 | /// |
156 | /// This method performs a cheap fixed amount of work that does not depend |
157 | /// on how far apart the two streams are positioned. |
158 | /// |
159 | /// # Panics |
160 | /// |
161 | /// The forked stream in the argument of `advance_to` must have been |
162 | /// obtained by forking `self`. Attempting to advance to any other stream |
163 | /// will cause a panic. |
164 | fn advance_to(&self, fork: &Self); |
165 | } |
166 | |
167 | impl<'a> Speculative for ParseBuffer<'a> { |
168 | fn advance_to(&self, fork: &Self) { |
169 | if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { |
170 | panic!("Fork was not derived from the advancing parse stream" ); |
171 | } |
172 | |
173 | let (self_unexp, self_sp) = inner_unexpected(self); |
174 | let (fork_unexp, fork_sp) = inner_unexpected(fork); |
175 | if !Rc::ptr_eq(&self_unexp, &fork_unexp) { |
176 | match (fork_sp, self_sp) { |
177 | // Unexpected set on the fork, but not on `self`, copy it over. |
178 | (Some(span), None) => { |
179 | self_unexp.set(Unexpected::Some(span)); |
180 | } |
181 | // Unexpected unset. Use chain to propagate errors from fork. |
182 | (None, None) => { |
183 | fork_unexp.set(Unexpected::Chain(self_unexp)); |
184 | |
185 | // Ensure toplevel 'unexpected' tokens from the fork don't |
186 | // bubble up the chain by replacing the root `unexpected` |
187 | // pointer, only 'unexpected' tokens from existing group |
188 | // parsers should bubble. |
189 | fork.unexpected |
190 | .set(Some(Rc::new(Cell::new(Unexpected::None)))); |
191 | } |
192 | // Unexpected has been set on `self`. No changes needed. |
193 | (_, Some(_)) => {} |
194 | } |
195 | } |
196 | |
197 | // See comment on `cell` in the struct definition. |
198 | self.cell |
199 | .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); |
200 | } |
201 | } |
202 | |
203 | /// Extensions to the `ParseStream` API to support manipulating invisible |
204 | /// delimiters the same as if they were visible. |
205 | pub trait AnyDelimiter { |
206 | /// Returns the delimiter, the span of the delimiter token, and the nested |
207 | /// contents for further parsing. |
208 | fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; |
209 | } |
210 | |
211 | impl<'a> AnyDelimiter for ParseBuffer<'a> { |
212 | fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { |
213 | self.step(|cursor: StepCursor<'_, '_>| { |
214 | if let Some((content: Cursor<'_>, delimiter: Delimiter, span: DelimSpan, rest: Cursor<'_>)) = cursor.any_group() { |
215 | let scope: Span = crate::buffer::close_span_of_group(*cursor); |
216 | let nested: Cursor<'_> = crate::parse::advance_step_cursor(proof:cursor, to:content); |
217 | let unexpected: Rc> | = crate::parse::get_unexpected(self); |
218 | let content: ParseBuffer<'_> = crate::parse::new_parse_buffer(scope, cursor:nested, unexpected); |
219 | Ok(((delimiter, span, content), rest)) |
220 | } else { |
221 | Err(cursor.error(message:"expected any delimiter" )) |
222 | } |
223 | }) |
224 | } |
225 | } |
226 | |