1 | //! [![github]](https://github.com/dtolnay/prettyplease) [![crates-io]](https://crates.io/crates/prettyplease) [![docs-rs]](https://docs.rs/prettyplease) |
2 | //! |
3 | //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github |
4 | //! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust |
5 | //! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs |
6 | //! |
7 | //! <br> |
8 | //! |
9 | //! **prettyplease::unparse** — a minimal `syn` syntax tree pretty-printer |
10 | //! |
11 | //! <br> |
12 | //! |
13 | //! # Overview |
14 | //! |
15 | //! This is a pretty-printer to turn a `syn` syntax tree into a `String` of |
16 | //! well-formatted source code. In contrast to rustfmt, this library is intended |
17 | //! to be suitable for arbitrary generated code. |
18 | //! |
19 | //! Rustfmt prioritizes high-quality output that is impeccable enough that you'd |
20 | //! be comfortable spending your career staring at its output — but that |
21 | //! means some heavyweight algorithms, and it has a tendency to bail out on code |
22 | //! that is hard to format (for example [rustfmt#3697], and there are dozens |
23 | //! more issues like it). That's not necessarily a big deal for human-generated |
24 | //! code because when code gets highly nested, the human will naturally be |
25 | //! inclined to refactor into more easily formattable code. But for generated |
26 | //! code, having the formatter just give up leaves it totally unreadable. |
27 | //! |
28 | //! [rustfmt#3697]: https://github.com/rust-lang/rustfmt/issues/3697 |
29 | //! |
30 | //! This library is designed using the simplest possible algorithm and data |
31 | //! structures that can deliver about 95% of the quality of rustfmt-formatted |
32 | //! output. In my experience testing real-world code, approximately 97-98% of |
33 | //! output lines come out identical between rustfmt's formatting and this |
34 | //! crate's. The rest have slightly different linebreak decisions, but still |
35 | //! clearly follow the dominant modern Rust style. |
36 | //! |
37 | //! The tradeoffs made by this crate are a good fit for generated code that you |
38 | //! will *not* spend your career staring at. For example, the output of |
39 | //! `bindgen`, or the output of `cargo-expand`. In those cases it's more |
40 | //! important that the whole thing be formattable without the formatter giving |
41 | //! up, than that it be flawless. |
42 | //! |
43 | //! <br> |
44 | //! |
45 | //! # Feature matrix |
46 | //! |
47 | //! Here are a few superficial comparisons of this crate against the AST |
48 | //! pretty-printer built into rustc, and rustfmt. The sections below go into |
49 | //! more detail comparing the output of each of these libraries. |
50 | //! |
51 | //! | | prettyplease | rustc | rustfmt | |
52 | //! |:---|:---:|:---:|:---:| |
53 | //! | non-pathological behavior on big or generated code | 💚 | ❌ | ❌ | |
54 | //! | idiomatic modern formatting ("locally indistinguishable from rustfmt") | 💚 | ❌ | 💚 | |
55 | //! | throughput | 60 MB/s | 39 MB/s | 2.8 MB/s | |
56 | //! | number of dependencies | 3 | 72 | 66 | |
57 | //! | compile time including dependencies | 2.4 sec | 23.1 sec | 29.8 sec | |
58 | //! | buildable using a stable Rust compiler | 💚 | ❌ | ❌ | |
59 | //! | published to crates.io | 💚 | ❌ | ❌ | |
60 | //! | extensively configurable output | ❌ | ❌ | 💚 | |
61 | //! | intended to accommodate hand-maintained source code | ❌ | ❌ | 💚 | |
62 | //! |
63 | //! <br> |
64 | //! |
65 | //! # Comparison to rustfmt |
66 | //! |
67 | //! - [input.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/input.rs) |
68 | //! - [output.prettyplease.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.prettyplease.rs) |
69 | //! - [output.rustfmt.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.rustfmt.rs) |
70 | //! |
71 | //! If you weren't told which output file is which, it would be practically |
72 | //! impossible to tell — **except** for line 435 in the rustfmt output, |
73 | //! which is more than 1000 characters long because rustfmt just gave up |
74 | //! formatting that part of the file: |
75 | //! |
76 | //! ``` |
77 | //! # const _: &str = stringify! {{{ |
78 | //! match segments[5] { |
79 | //! 0 => write!(f, "::{}" , ipv4), |
80 | //! 0xffff => write!(f, "::ffff:{}" , ipv4), |
81 | //! _ => unreachable!(), |
82 | //! } |
83 | //! } else { # [derive (Copy , Clone , Default)] struct Span { start : usize , len : usize , } let zeroes = { let mut longest = Span :: default () ; let mut current = Span :: default () ; for (i , & segment) in segments . iter () . enumerate () { if segment == 0 { if current . len == 0 { current . start = i ; } current . len += 1 ; if current . len > longest . len { longest = current ; } } else { current = Span :: default () ; } } longest } ; # [doc = " Write a colon-separated part of the address" ] # [inline] fn fmt_subslice (f : & mut fmt :: Formatter < '_ > , chunk : & [u16]) -> fmt :: Result { if let Some ((first , tail)) = chunk . split_first () { write ! (f , "{:x}" , first) ? ; for segment in tail { f . write_char (':' ) ? ; write ! (f , "{:x}" , segment) ? ; } } Ok (()) } if zeroes . len > 1 { fmt_subslice (f , & segments [.. zeroes . start]) ? ; f . write_str ("::" ) ? ; fmt_subslice (f , & segments [zeroes . start + zeroes . len ..]) } else { fmt_subslice (f , & segments) } } |
84 | //! } else { |
85 | //! const IPV6_BUF_LEN: usize = (4 * 8) + 7; |
86 | //! let mut buf = [0u8; IPV6_BUF_LEN]; |
87 | //! let mut buf_slice = &mut buf[..]; |
88 | //! # }}; |
89 | //! ``` |
90 | //! |
91 | //! This is a pretty typical manifestation of rustfmt bailing out in generated |
92 | //! code — a chunk of the input ends up on one line. The other |
93 | //! manifestation is that you're working on some code, running rustfmt on save |
94 | //! like a conscientious developer, but after a while notice it isn't doing |
95 | //! anything. You introduce an intentional formatting issue, like a stray indent |
96 | //! or semicolon, and run rustfmt to check your suspicion. Nope, it doesn't get |
97 | //! cleaned up — rustfmt is just not formatting the part of the file you |
98 | //! are working on. |
99 | //! |
100 | //! The prettyplease library is designed to have no pathological cases that |
101 | //! force a bail out; the entire input you give it will get formatted in some |
102 | //! "good enough" form. |
103 | //! |
104 | //! Separately, rustfmt can be problematic to integrate into projects. It's |
105 | //! written using rustc's internal syntax tree, so it can't be built by a stable |
106 | //! compiler. Its releases are not regularly published to crates.io, so in Cargo |
107 | //! builds you'd need to depend on it as a git dependency, which precludes |
108 | //! publishing your crate to crates.io also. You can shell out to a `rustfmt` |
109 | //! binary, but that'll be whatever rustfmt version is installed on each |
110 | //! developer's system (if any), which can lead to spurious diffs in checked-in |
111 | //! generated code formatted by different versions. In contrast prettyplease is |
112 | //! designed to be easy to pull in as a library, and compiles fast. |
113 | //! |
114 | //! <br> |
115 | //! |
116 | //! # Comparison to rustc_ast_pretty |
117 | //! |
118 | //! - [input.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/input.rs) |
119 | //! - [output.prettyplease.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.prettyplease.rs) |
120 | //! - [output.rustc.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.rustc.rs) |
121 | //! |
122 | //! This is the pretty-printer that gets used when rustc prints source code, |
123 | //! such as `rustc -Zunpretty=expanded`. It's used also by the standard |
124 | //! library's `stringify!` when stringifying an interpolated macro_rules AST |
125 | //! fragment, like an $:expr, and transitively by `dbg!` and many macros in the |
126 | //! ecosystem. |
127 | //! |
128 | //! Rustc's formatting is mostly okay, but does not hew closely to the dominant |
129 | //! contemporary style of Rust formatting. Some things wouldn't ever be written |
130 | //! on one line, like this `match` expression, and certainly not with a comma in |
131 | //! front of the closing brace: |
132 | //! |
133 | //! ``` |
134 | //! # const _: &str = stringify! { |
135 | //! fn eq(&self, other: &IpAddr) -> bool { |
136 | //! match other { IpAddr::V4(v4) => self == v4, IpAddr::V6(_) => false, } |
137 | //! } |
138 | //! # }; |
139 | //! ``` |
140 | //! |
141 | //! Some places use non-multiple-of-4 indentation, which is definitely not the |
142 | //! norm: |
143 | //! |
144 | //! ``` |
145 | //! # const _: &str = stringify! { |
146 | //! pub const fn to_ipv6_mapped(&self) -> Ipv6Addr { |
147 | //! let [a, b, c, d] = self.octets(); |
148 | //! Ipv6Addr{inner: |
149 | //! c::in6_addr{s6_addr: |
150 | //! [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, |
151 | //! 0xFF, a, b, c, d],},} |
152 | //! } |
153 | //! # }; |
154 | //! ``` |
155 | //! |
156 | //! And although there isn't an egregious example of it in the link because the |
157 | //! input code is pretty tame, in general rustc_ast_pretty has pathological |
158 | //! behavior on generated code. It has a tendency to use excessive horizontal |
159 | //! indentation and rapidly run out of width: |
160 | //! |
161 | //! ``` |
162 | //! # const _: &str = stringify! { |
163 | //! ::std::io::_print(::core::fmt::Arguments::new_v1(&["" ], |
164 | //! &match (&msg,) { |
165 | //! _args => |
166 | //! [::core::fmt::ArgumentV1::new(_args.0, |
167 | //! ::core::fmt::Display::fmt)], |
168 | //! })); |
169 | //! # }; |
170 | //! ``` |
171 | //! |
172 | //! The snippets above are clearly different from modern rustfmt style. In |
173 | //! contrast, prettyplease is designed to have output that is practically |
174 | //! indistinguishable from rustfmt-formatted code. |
175 | //! |
176 | //! <br> |
177 | //! |
178 | //! # Example |
179 | //! |
180 | //! ``` |
181 | //! // [dependencies] |
182 | //! // prettyplease = "0.2" |
183 | //! // syn = { version = "2", default-features = false, features = ["full", "parsing"] } |
184 | //! |
185 | //! const INPUT: &str = stringify! { |
186 | //! use crate::{ |
187 | //! lazy::{Lazy, SyncLazy, SyncOnceCell}, panic, |
188 | //! sync::{ atomic::{AtomicUsize, Ordering::SeqCst}, |
189 | //! mpsc::channel, Mutex, }, |
190 | //! thread, |
191 | //! }; |
192 | //! impl<T, U> Into<U> for T where U: From<T> { |
193 | //! fn into(self) -> U { U::from(self) } |
194 | //! } |
195 | //! }; |
196 | //! |
197 | //! fn main() { |
198 | //! let syntax_tree = syn::parse_file(INPUT).unwrap(); |
199 | //! let formatted = prettyplease::unparse(&syntax_tree); |
200 | //! print!("{}" , formatted); |
201 | //! } |
202 | //! ``` |
203 | //! |
204 | //! <br> |
205 | //! |
206 | //! # Algorithm notes |
207 | //! |
208 | //! The approach and terminology used in the implementation are derived from |
209 | //! [*Derek C. Oppen, "Pretty Printing" (1979)*][paper], on which |
210 | //! rustc_ast_pretty is also based, and from rustc_ast_pretty's implementation |
211 | //! written by Graydon Hoare in 2011 (and modernized over the years by dozens of |
212 | //! volunteer maintainers). |
213 | //! |
214 | //! [paper]: http://i.stanford.edu/pub/cstr/reports/cs/tr/79/770/CS-TR-79-770.pdf |
215 | //! |
216 | //! The paper describes two language-agnostic interacting procedures `Scan()` |
217 | //! and `Print()`. Language-specific code decomposes an input data structure |
218 | //! into a stream of `string` and `break` tokens, and `begin` and `end` tokens |
219 | //! for grouping. Each `begin`–`end` range may be identified as either |
220 | //! "consistent breaking" or "inconsistent breaking". If a group is consistently |
221 | //! breaking, then if the whole contents do not fit on the line, *every* `break` |
222 | //! token in the group will receive a linebreak. This is appropriate, for |
223 | //! example, for Rust struct literals, or arguments of a function call. If a |
224 | //! group is inconsistently breaking, then the `string` tokens in the group are |
225 | //! greedily placed on the line until out of space, and linebroken only at those |
226 | //! `break` tokens for which the next string would not fit. For example, this is |
227 | //! appropriate for the contents of a braced `use` statement in Rust. |
228 | //! |
229 | //! Scan's job is to efficiently accumulate sizing information about groups and |
230 | //! breaks. For every `begin` token we compute the distance to the matched `end` |
231 | //! token, and for every `break` we compute the distance to the next `break`. |
232 | //! The algorithm uses a ringbuffer to hold tokens whose size is not yet |
233 | //! ascertained. The maximum size of the ringbuffer is bounded by the target |
234 | //! line length and does not grow indefinitely, regardless of deep nesting in |
235 | //! the input stream. That's because once a group is sufficiently big, the |
236 | //! precise size can no longer make a difference to linebreak decisions and we |
237 | //! can effectively treat it as "infinity". |
238 | //! |
239 | //! Print's job is to use the sizing information to efficiently assign a |
240 | //! "broken" or "not broken" status to every `begin` token. At that point the |
241 | //! output is easily constructed by concatenating `string` tokens and breaking |
242 | //! at `break` tokens contained within a broken group. |
243 | //! |
244 | //! Leveraging these primitives (i.e. cleverly placing the all-or-nothing |
245 | //! consistent breaks and greedy inconsistent breaks) to yield |
246 | //! rustfmt-compatible formatting for all of Rust's syntax tree nodes is a fun |
247 | //! challenge. |
248 | //! |
249 | //! Here is a visualization of some Rust tokens fed into the pretty printing |
250 | //! algorithm. Consistently breaking `begin`—`end` pairs are represented |
251 | //! by `«`⁠`»`, inconsistently breaking by `‹`⁠`›`, `break` by `·`, |
252 | //! and the rest of the non-whitespace are `string`. |
253 | //! |
254 | //! ```text |
255 | //! use crate::«{· |
256 | //! ‹ lazy::«{·‹Lazy,· SyncLazy,· SyncOnceCell›·}»,· |
257 | //! panic,· |
258 | //! sync::«{· |
259 | //! ‹ atomic::«{·‹AtomicUsize,· Ordering::SeqCst›·}»,· |
260 | //! mpsc::channel,· Mutex›,· |
261 | //! }»,· |
262 | //! thread›,· |
263 | //! }»;· |
264 | //! «‹«impl<«·T‹›,· U‹›·»>» Into<«·U·»>· for T›· |
265 | //! where· |
266 | //! U:‹ From<«·T·»>›,· |
267 | //! {· |
268 | //! « fn into(·«·self·») -> U {· |
269 | //! ‹ U::from(«·self·»)›· |
270 | //! » }· |
271 | //! »}· |
272 | //! ``` |
273 | //! |
274 | //! The algorithm described in the paper is not quite sufficient for producing |
275 | //! well-formatted Rust code that is locally indistinguishable from rustfmt's |
276 | //! style. The reason is that in the paper, the complete non-whitespace contents |
277 | //! are assumed to be independent of linebreak decisions, with Scan and Print |
278 | //! being only in control of the whitespace (spaces and line breaks). In Rust as |
279 | //! idiomatically formattted by rustfmt, that is not the case. Trailing commas |
280 | //! are one example; the punctuation is only known *after* the broken vs |
281 | //! non-broken status of the surrounding group is known: |
282 | //! |
283 | //! ``` |
284 | //! # struct Struct { x: u64, y: bool } |
285 | //! # let xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx = 0; |
286 | //! # let yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy = true; |
287 | //! # |
288 | //! let _ = Struct { x: 0, y: true }; |
289 | //! |
290 | //! let _ = Struct { |
291 | //! x: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx, |
292 | //! y: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy, //<- trailing comma if the expression wrapped |
293 | //! }; |
294 | //! ``` |
295 | //! |
296 | //! The formatting of `match` expressions is another case; we want small arms on |
297 | //! the same line as the pattern, and big arms wrapped in a brace. The presence |
298 | //! of the brace punctuation, comma, and semicolon are all dependent on whether |
299 | //! the arm fits on the line: |
300 | //! |
301 | //! ``` |
302 | //! # struct Entry { nanos: u32 } |
303 | //! # let total_nanos = 0u64; |
304 | //! # let mut total_secs = 0u64; |
305 | //! # let tmp; |
306 | //! # let entry = Entry { nanos: 0 }; |
307 | //! # const NANOS_PER_SEC: u32 = 1_000_000_000; |
308 | //! # |
309 | //! match total_nanos.checked_add(entry.nanos as u64) { |
310 | //! Some(n) => tmp = n, //<- small arm, inline with comma |
311 | //! None => { |
312 | //! total_secs = total_secs |
313 | //! .checked_add(total_nanos / NANOS_PER_SEC as u64) |
314 | //! .expect("overflow in iter::sum over durations" ); |
315 | //! } //<- big arm, needs brace added, and also semicolon^ |
316 | //! } |
317 | //! ``` |
318 | //! |
319 | //! The printing algorithm implementation in this crate accommodates all of |
320 | //! these situations with conditional punctuation tokens whose selection can be |
321 | //! deferred and populated after it's known that the group is or is not broken. |
322 | |
323 | #![doc (html_root_url = "https://docs.rs/prettyplease/0.2.16" )] |
324 | #![allow ( |
325 | clippy::cast_possible_wrap, |
326 | clippy::cast_sign_loss, |
327 | clippy::derive_partial_eq_without_eq, |
328 | clippy::doc_markdown, |
329 | clippy::enum_glob_use, |
330 | clippy::items_after_statements, |
331 | clippy::let_underscore_untyped, |
332 | clippy::match_like_matches_macro, |
333 | clippy::match_same_arms, |
334 | clippy::module_name_repetitions, |
335 | clippy::must_use_candidate, |
336 | clippy::needless_pass_by_value, |
337 | clippy::similar_names, |
338 | clippy::too_many_lines, |
339 | clippy::unused_self, |
340 | clippy::vec_init_then_push |
341 | )] |
342 | #![cfg_attr (all(test, exhaustive), feature(non_exhaustive_omitted_patterns_lint))] |
343 | |
344 | mod algorithm; |
345 | mod attr; |
346 | mod convenience; |
347 | mod data; |
348 | mod expr; |
349 | mod file; |
350 | mod generics; |
351 | mod item; |
352 | mod iter; |
353 | mod lifetime; |
354 | mod lit; |
355 | mod mac; |
356 | mod pat; |
357 | mod path; |
358 | mod ring; |
359 | mod stmt; |
360 | mod token; |
361 | mod ty; |
362 | |
363 | use crate::algorithm::Printer; |
364 | use syn::File; |
365 | |
366 | // Target line width. |
367 | const MARGIN: isize = 89; |
368 | |
369 | // Number of spaces increment at each level of block indentation. |
370 | const INDENT: isize = 4; |
371 | |
372 | // Every line is allowed at least this much space, even if highly indented. |
373 | const MIN_SPACE: isize = 60; |
374 | |
375 | pub fn unparse(file: &File) -> String { |
376 | let mut p: Printer = Printer::new(); |
377 | p.file(file); |
378 | p.eof() |
379 | } |
380 | |