1//! Extensions to the parsing API with niche applicability.
2
3use super::*;
4
5/// Extensions to the `ParseStream` API to support speculative parsing.
6pub trait Speculative {
7 /// Advance this parse stream to the position of a forked parse stream.
8 ///
9 /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
10 /// parse stream, perform some speculative parsing, then join the original
11 /// stream to the fork to "commit" the parsing from the fork to the main
12 /// stream.
13 ///
14 /// If you can avoid doing this, you should, as it limits the ability to
15 /// generate useful errors. That said, it is often the only way to parse
16 /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
17 /// is that when the fork fails to parse an `A`, it's impossible to tell
18 /// whether that was because of a syntax error and the user meant to provide
19 /// an `A`, or that the `A`s are finished and it's time to start parsing
20 /// `B`s. Use with care.
21 ///
22 /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
23 /// parsing `B*` and removing the leading members of `A` from the
24 /// repetition, bypassing the need to involve the downsides associated with
25 /// speculative parsing.
26 ///
27 /// [`ParseStream::fork`]: ParseBuffer::fork
28 ///
29 /// # Example
30 ///
31 /// There has been chatter about the possibility of making the colons in the
32 /// turbofish syntax like `path::to::<T>` no longer required by accepting
33 /// `path::to<T>` in expression position. Specifically, according to [RFC
34 /// 2544], [`PathSegment`] parsing should always try to consume a following
35 /// `<` token as the start of generic arguments, and reset to the `<` if
36 /// that fails (e.g. the token is acting as a less-than operator).
37 ///
38 /// This is the exact kind of parsing behavior which requires the "fork,
39 /// try, commit" behavior that [`ParseStream::fork`] discourages. With
40 /// `advance_to`, we can avoid having to parse the speculatively parsed
41 /// content a second time.
42 ///
43 /// This change in behavior can be implemented in syn by replacing just the
44 /// `Parse` implementation for `PathSegment`:
45 ///
46 /// ```
47 /// # use syn::ext::IdentExt;
48 /// use syn::parse::discouraged::Speculative;
49 /// # use syn::parse::{Parse, ParseStream};
50 /// # use syn::{Ident, PathArguments, Result, Token};
51 ///
52 /// pub struct PathSegment {
53 /// pub ident: Ident,
54 /// pub arguments: PathArguments,
55 /// }
56 /// #
57 /// # impl<T> From<T> for PathSegment
58 /// # where
59 /// # T: Into<Ident>,
60 /// # {
61 /// # fn from(ident: T) -> Self {
62 /// # PathSegment {
63 /// # ident: ident.into(),
64 /// # arguments: PathArguments::None,
65 /// # }
66 /// # }
67 /// # }
68 ///
69 /// impl Parse for PathSegment {
70 /// fn parse(input: ParseStream) -> Result<Self> {
71 /// if input.peek(Token![super])
72 /// || input.peek(Token![self])
73 /// || input.peek(Token![Self])
74 /// || input.peek(Token![crate])
75 /// {
76 /// let ident = input.call(Ident::parse_any)?;
77 /// return Ok(PathSegment::from(ident));
78 /// }
79 ///
80 /// let ident = input.parse()?;
81 /// if input.peek(Token![::]) && input.peek3(Token![<]) {
82 /// return Ok(PathSegment {
83 /// ident,
84 /// arguments: PathArguments::AngleBracketed(input.parse()?),
85 /// });
86 /// }
87 /// if input.peek(Token![<]) && !input.peek(Token![<=]) {
88 /// let fork = input.fork();
89 /// if let Ok(arguments) = fork.parse() {
90 /// input.advance_to(&fork);
91 /// return Ok(PathSegment {
92 /// ident,
93 /// arguments: PathArguments::AngleBracketed(arguments),
94 /// });
95 /// }
96 /// }
97 /// Ok(PathSegment::from(ident))
98 /// }
99 /// }
100 ///
101 /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
102 /// ```
103 ///
104 /// # Drawbacks
105 ///
106 /// The main drawback of this style of speculative parsing is in error
107 /// presentation. Even if the lookahead is the "correct" parse, the error
108 /// that is shown is that of the "fallback" parse. To use the same example
109 /// as the turbofish above, take the following unfinished "turbofish":
110 ///
111 /// ```text
112 /// let _ = f<&'a fn(), for<'a> serde::>();
113 /// ```
114 ///
115 /// If this is parsed as generic arguments, we can provide the error message
116 ///
117 /// ```text
118 /// error: expected identifier
119 /// --> src.rs:L:C
120 /// |
121 /// L | let _ = f<&'a fn(), for<'a> serde::>();
122 /// | ^
123 /// ```
124 ///
125 /// but if parsed using the above speculative parsing, it falls back to
126 /// assuming that the `<` is a less-than when it fails to parse the generic
127 /// arguments, and tries to interpret the `&'a` as the start of a labelled
128 /// loop, resulting in the much less helpful error
129 ///
130 /// ```text
131 /// error: expected `:`
132 /// --> src.rs:L:C
133 /// |
134 /// L | let _ = f<&'a fn(), for<'a> serde::>();
135 /// | ^^
136 /// ```
137 ///
138 /// This can be mitigated with various heuristics (two examples: show both
139 /// forks' parse errors, or show the one that consumed more tokens), but
140 /// when you can control the grammar, sticking to something that can be
141 /// parsed LL(3) and without the LL(*) speculative parsing this makes
142 /// possible, displaying reasonable errors becomes much more simple.
143 ///
144 /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
145 /// [`PathSegment`]: crate::PathSegment
146 ///
147 /// # Performance
148 ///
149 /// This method performs a cheap fixed amount of work that does not depend
150 /// on how far apart the two streams are positioned.
151 ///
152 /// # Panics
153 ///
154 /// The forked stream in the argument of `advance_to` must have been
155 /// obtained by forking `self`. Attempting to advance to any other stream
156 /// will cause a panic.
157 fn advance_to(&self, fork: &Self);
158}
159
160impl<'a> Speculative for ParseBuffer<'a> {
161 fn advance_to(&self, fork: &Self) {
162 if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
163 panic!("Fork was not derived from the advancing parse stream");
164 }
165
166 let (self_unexp, self_sp) = inner_unexpected(self);
167 let (fork_unexp, fork_sp) = inner_unexpected(fork);
168 if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
169 match (fork_sp, self_sp) {
170 // Unexpected set on the fork, but not on `self`, copy it over.
171 (Some(span), None) => {
172 self_unexp.set(Unexpected::Some(span));
173 }
174 // Unexpected unset. Use chain to propagate errors from fork.
175 (None, None) => {
176 fork_unexp.set(Unexpected::Chain(self_unexp));
177
178 // Ensure toplevel 'unexpected' tokens from the fork don't
179 // bubble up the chain by replacing the root `unexpected`
180 // pointer, only 'unexpected' tokens from existing group
181 // parsers should bubble.
182 fork.unexpected
183 .set(Some(Rc::new(Cell::new(Unexpected::None))));
184 }
185 // Unexpected has been set on `self`. No changes needed.
186 (_, Some(_)) => {}
187 }
188 }
189
190 // See comment on `cell` in the struct definition.
191 self.cell
192 .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
193 }
194}
195