| 1 | //! [![github]](https://github.com/dtolnay/prettyplease) [![crates-io]](https://crates.io/crates/prettyplease) [![docs-rs]](https://docs.rs/prettyplease) |
| 2 | //! |
| 3 | //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github |
| 4 | //! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust |
| 5 | //! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs |
| 6 | //! |
| 7 | //! <br> |
| 8 | //! |
| 9 | //! **prettyplease::unparse** — a minimal `syn` syntax tree pretty-printer |
| 10 | //! |
| 11 | //! <br> |
| 12 | //! |
| 13 | //! # Overview |
| 14 | //! |
| 15 | //! This is a pretty-printer to turn a `syn` syntax tree into a `String` of |
| 16 | //! well-formatted source code. In contrast to rustfmt, this library is intended |
| 17 | //! to be suitable for arbitrary generated code. |
| 18 | //! |
| 19 | //! Rustfmt prioritizes high-quality output that is impeccable enough that you'd |
| 20 | //! be comfortable spending your career staring at its output — but that |
| 21 | //! means some heavyweight algorithms, and it has a tendency to bail out on code |
| 22 | //! that is hard to format (for example [rustfmt#3697], and there are dozens |
| 23 | //! more issues like it). That's not necessarily a big deal for human-generated |
| 24 | //! code because when code gets highly nested, the human will naturally be |
| 25 | //! inclined to refactor into more easily formattable code. But for generated |
| 26 | //! code, having the formatter just give up leaves it totally unreadable. |
| 27 | //! |
| 28 | //! [rustfmt#3697]: https://github.com/rust-lang/rustfmt/issues/3697 |
| 29 | //! |
| 30 | //! This library is designed using the simplest possible algorithm and data |
| 31 | //! structures that can deliver about 95% of the quality of rustfmt-formatted |
| 32 | //! output. In my experience testing real-world code, approximately 97-98% of |
| 33 | //! output lines come out identical between rustfmt's formatting and this |
| 34 | //! crate's. The rest have slightly different linebreak decisions, but still |
| 35 | //! clearly follow the dominant modern Rust style. |
| 36 | //! |
| 37 | //! The tradeoffs made by this crate are a good fit for generated code that you |
| 38 | //! will *not* spend your career staring at. For example, the output of |
| 39 | //! `bindgen`, or the output of `cargo-expand`. In those cases it's more |
| 40 | //! important that the whole thing be formattable without the formatter giving |
| 41 | //! up, than that it be flawless. |
| 42 | //! |
| 43 | //! <br> |
| 44 | //! |
| 45 | //! # Feature matrix |
| 46 | //! |
| 47 | //! Here are a few superficial comparisons of this crate against the AST |
| 48 | //! pretty-printer built into rustc, and rustfmt. The sections below go into |
| 49 | //! more detail comparing the output of each of these libraries. |
| 50 | //! |
| 51 | //! | | prettyplease | rustc | rustfmt | |
| 52 | //! |:---|:---:|:---:|:---:| |
| 53 | //! | non-pathological behavior on big or generated code | 💚 | ❌ | ❌ | |
| 54 | //! | idiomatic modern formatting ("locally indistinguishable from rustfmt") | 💚 | ❌ | 💚 | |
| 55 | //! | throughput | 60 MB/s | 39 MB/s | 2.8 MB/s | |
| 56 | //! | number of dependencies | 3 | 72 | 66 | |
| 57 | //! | compile time including dependencies | 2.4 sec | 23.1 sec | 29.8 sec | |
| 58 | //! | buildable using a stable Rust compiler | 💚 | ❌ | ❌ | |
| 59 | //! | published to crates.io | 💚 | ❌ | ❌ | |
| 60 | //! | extensively configurable output | ❌ | ❌ | 💚 | |
| 61 | //! | intended to accommodate hand-maintained source code | ❌ | ❌ | 💚 | |
| 62 | //! |
| 63 | //! <br> |
| 64 | //! |
| 65 | //! # Comparison to rustfmt |
| 66 | //! |
| 67 | //! - [input.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/input.rs) |
| 68 | //! - [output.prettyplease.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.prettyplease.rs) |
| 69 | //! - [output.rustfmt.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.rustfmt.rs) |
| 70 | //! |
| 71 | //! If you weren't told which output file is which, it would be practically |
| 72 | //! impossible to tell — **except** for line 435 in the rustfmt output, |
| 73 | //! which is more than 1000 characters long because rustfmt just gave up |
| 74 | //! formatting that part of the file: |
| 75 | //! |
| 76 | //! ``` |
| 77 | //! # const _: &str = stringify! {{{ |
| 78 | //! match segments[5] { |
| 79 | //! 0 => write!(f, "::{}" , ipv4), |
| 80 | //! 0xffff => write!(f, "::ffff:{}" , ipv4), |
| 81 | //! _ => unreachable!(), |
| 82 | //! } |
| 83 | //! } else { # [derive (Copy , Clone , Default)] struct Span { start : usize , len : usize , } let zeroes = { let mut longest = Span :: default () ; let mut current = Span :: default () ; for (i , & segment) in segments . iter () . enumerate () { if segment == 0 { if current . len == 0 { current . start = i ; } current . len += 1 ; if current . len > longest . len { longest = current ; } } else { current = Span :: default () ; } } longest } ; # [doc = " Write a colon-separated part of the address" ] # [inline] fn fmt_subslice (f : & mut fmt :: Formatter < '_ > , chunk : & [u16]) -> fmt :: Result { if let Some ((first , tail)) = chunk . split_first () { write ! (f , "{:x}" , first) ? ; for segment in tail { f . write_char (':' ) ? ; write ! (f , "{:x}" , segment) ? ; } } Ok (()) } if zeroes . len > 1 { fmt_subslice (f , & segments [.. zeroes . start]) ? ; f . write_str ("::" ) ? ; fmt_subslice (f , & segments [zeroes . start + zeroes . len ..]) } else { fmt_subslice (f , & segments) } } |
| 84 | //! } else { |
| 85 | //! const IPV6_BUF_LEN: usize = (4 * 8) + 7; |
| 86 | //! let mut buf = [0u8; IPV6_BUF_LEN]; |
| 87 | //! let mut buf_slice = &mut buf[..]; |
| 88 | //! # }}; |
| 89 | //! ``` |
| 90 | //! |
| 91 | //! This is a pretty typical manifestation of rustfmt bailing out in generated |
| 92 | //! code — a chunk of the input ends up on one line. The other |
| 93 | //! manifestation is that you're working on some code, running rustfmt on save |
| 94 | //! like a conscientious developer, but after a while notice it isn't doing |
| 95 | //! anything. You introduce an intentional formatting issue, like a stray indent |
| 96 | //! or semicolon, and run rustfmt to check your suspicion. Nope, it doesn't get |
| 97 | //! cleaned up — rustfmt is just not formatting the part of the file you |
| 98 | //! are working on. |
| 99 | //! |
| 100 | //! The prettyplease library is designed to have no pathological cases that |
| 101 | //! force a bail out; the entire input you give it will get formatted in some |
| 102 | //! "good enough" form. |
| 103 | //! |
| 104 | //! Separately, rustfmt can be problematic to integrate into projects. It's |
| 105 | //! written using rustc's internal syntax tree, so it can't be built by a stable |
| 106 | //! compiler. Its releases are not regularly published to crates.io, so in Cargo |
| 107 | //! builds you'd need to depend on it as a git dependency, which precludes |
| 108 | //! publishing your crate to crates.io also. You can shell out to a `rustfmt` |
| 109 | //! binary, but that'll be whatever rustfmt version is installed on each |
| 110 | //! developer's system (if any), which can lead to spurious diffs in checked-in |
| 111 | //! generated code formatted by different versions. In contrast prettyplease is |
| 112 | //! designed to be easy to pull in as a library, and compiles fast. |
| 113 | //! |
| 114 | //! <br> |
| 115 | //! |
| 116 | //! # Comparison to rustc_ast_pretty |
| 117 | //! |
| 118 | //! - [input.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/input.rs) |
| 119 | //! - [output.prettyplease.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.prettyplease.rs) |
| 120 | //! - [output.rustc.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.rustc.rs) |
| 121 | //! |
| 122 | //! This is the pretty-printer that gets used when rustc prints source code, |
| 123 | //! such as `rustc -Zunpretty=expanded`. It's used also by the standard |
| 124 | //! library's `stringify!` when stringifying an interpolated macro_rules AST |
| 125 | //! fragment, like an $:expr, and transitively by `dbg!` and many macros in the |
| 126 | //! ecosystem. |
| 127 | //! |
| 128 | //! Rustc's formatting is mostly okay, but does not hew closely to the dominant |
| 129 | //! contemporary style of Rust formatting. Some things wouldn't ever be written |
| 130 | //! on one line, like this `match` expression, and certainly not with a comma in |
| 131 | //! front of the closing brace: |
| 132 | //! |
| 133 | //! ``` |
| 134 | //! # const _: &str = stringify! { |
| 135 | //! fn eq(&self, other: &IpAddr) -> bool { |
| 136 | //! match other { IpAddr::V4(v4) => self == v4, IpAddr::V6(_) => false, } |
| 137 | //! } |
| 138 | //! # }; |
| 139 | //! ``` |
| 140 | //! |
| 141 | //! Some places use non-multiple-of-4 indentation, which is definitely not the |
| 142 | //! norm: |
| 143 | //! |
| 144 | //! ``` |
| 145 | //! # const _: &str = stringify! { |
| 146 | //! pub const fn to_ipv6_mapped(&self) -> Ipv6Addr { |
| 147 | //! let [a, b, c, d] = self.octets(); |
| 148 | //! Ipv6Addr{inner: |
| 149 | //! c::in6_addr{s6_addr: |
| 150 | //! [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, |
| 151 | //! 0xFF, a, b, c, d],},} |
| 152 | //! } |
| 153 | //! # }; |
| 154 | //! ``` |
| 155 | //! |
| 156 | //! And although there isn't an egregious example of it in the link because the |
| 157 | //! input code is pretty tame, in general rustc_ast_pretty has pathological |
| 158 | //! behavior on generated code. It has a tendency to use excessive horizontal |
| 159 | //! indentation and rapidly run out of width: |
| 160 | //! |
| 161 | //! ``` |
| 162 | //! # const _: &str = stringify! { |
| 163 | //! ::std::io::_print(::core::fmt::Arguments::new_v1(&["" ], |
| 164 | //! &match (&msg,) { |
| 165 | //! _args => |
| 166 | //! [::core::fmt::ArgumentV1::new(_args.0, |
| 167 | //! ::core::fmt::Display::fmt)], |
| 168 | //! })); |
| 169 | //! # }; |
| 170 | //! ``` |
| 171 | //! |
| 172 | //! The snippets above are clearly different from modern rustfmt style. In |
| 173 | //! contrast, prettyplease is designed to have output that is practically |
| 174 | //! indistinguishable from rustfmt-formatted code. |
| 175 | //! |
| 176 | //! <br> |
| 177 | //! |
| 178 | //! # Example |
| 179 | //! |
| 180 | //! ``` |
| 181 | //! // [dependencies] |
| 182 | //! // prettyplease = "0.2" |
| 183 | //! // syn = { version = "2", default-features = false, features = ["full", "parsing"] } |
| 184 | //! |
| 185 | //! const INPUT: &str = stringify! { |
| 186 | //! use crate::{ |
| 187 | //! lazy::{Lazy, SyncLazy, SyncOnceCell}, panic, |
| 188 | //! sync::{ atomic::{AtomicUsize, Ordering::SeqCst}, |
| 189 | //! mpsc::channel, Mutex, }, |
| 190 | //! thread, |
| 191 | //! }; |
| 192 | //! impl<T, U> Into<U> for T where U: From<T> { |
| 193 | //! fn into(self) -> U { U::from(self) } |
| 194 | //! } |
| 195 | //! }; |
| 196 | //! |
| 197 | //! fn main() { |
| 198 | //! let syntax_tree = syn::parse_file(INPUT).unwrap(); |
| 199 | //! let formatted = prettyplease::unparse(&syntax_tree); |
| 200 | //! print!("{}" , formatted); |
| 201 | //! } |
| 202 | //! ``` |
| 203 | //! |
| 204 | //! <br> |
| 205 | //! |
| 206 | //! # Algorithm notes |
| 207 | //! |
| 208 | //! The approach and terminology used in the implementation are derived from |
| 209 | //! [*Derek C. Oppen, "Pretty Printing" (1979)*][paper], on which |
| 210 | //! rustc_ast_pretty is also based, and from rustc_ast_pretty's implementation |
| 211 | //! written by Graydon Hoare in 2011 (and modernized over the years by dozens of |
| 212 | //! volunteer maintainers). |
| 213 | //! |
| 214 | //! [paper]: http://i.stanford.edu/pub/cstr/reports/cs/tr/79/770/CS-TR-79-770.pdf |
| 215 | //! |
| 216 | //! The paper describes two language-agnostic interacting procedures `Scan()` |
| 217 | //! and `Print()`. Language-specific code decomposes an input data structure |
| 218 | //! into a stream of `string` and `break` tokens, and `begin` and `end` tokens |
| 219 | //! for grouping. Each `begin`–`end` range may be identified as either |
| 220 | //! "consistent breaking" or "inconsistent breaking". If a group is consistently |
| 221 | //! breaking, then if the whole contents do not fit on the line, *every* `break` |
| 222 | //! token in the group will receive a linebreak. This is appropriate, for |
| 223 | //! example, for Rust struct literals, or arguments of a function call. If a |
| 224 | //! group is inconsistently breaking, then the `string` tokens in the group are |
| 225 | //! greedily placed on the line until out of space, and linebroken only at those |
| 226 | //! `break` tokens for which the next string would not fit. For example, this is |
| 227 | //! appropriate for the contents of a braced `use` statement in Rust. |
| 228 | //! |
| 229 | //! Scan's job is to efficiently accumulate sizing information about groups and |
| 230 | //! breaks. For every `begin` token we compute the distance to the matched `end` |
| 231 | //! token, and for every `break` we compute the distance to the next `break`. |
| 232 | //! The algorithm uses a ringbuffer to hold tokens whose size is not yet |
| 233 | //! ascertained. The maximum size of the ringbuffer is bounded by the target |
| 234 | //! line length and does not grow indefinitely, regardless of deep nesting in |
| 235 | //! the input stream. That's because once a group is sufficiently big, the |
| 236 | //! precise size can no longer make a difference to linebreak decisions and we |
| 237 | //! can effectively treat it as "infinity". |
| 238 | //! |
| 239 | //! Print's job is to use the sizing information to efficiently assign a |
| 240 | //! "broken" or "not broken" status to every `begin` token. At that point the |
| 241 | //! output is easily constructed by concatenating `string` tokens and breaking |
| 242 | //! at `break` tokens contained within a broken group. |
| 243 | //! |
| 244 | //! Leveraging these primitives (i.e. cleverly placing the all-or-nothing |
| 245 | //! consistent breaks and greedy inconsistent breaks) to yield |
| 246 | //! rustfmt-compatible formatting for all of Rust's syntax tree nodes is a fun |
| 247 | //! challenge. |
| 248 | //! |
| 249 | //! Here is a visualization of some Rust tokens fed into the pretty printing |
| 250 | //! algorithm. Consistently breaking `begin`—`end` pairs are represented |
| 251 | //! by `«`⁠`»`, inconsistently breaking by `‹`⁠`›`, `break` by `·`, |
| 252 | //! and the rest of the non-whitespace are `string`. |
| 253 | //! |
| 254 | //! ```text |
| 255 | //! use crate::«{· |
| 256 | //! ‹ lazy::«{·‹Lazy,· SyncLazy,· SyncOnceCell›·}»,· |
| 257 | //! panic,· |
| 258 | //! sync::«{· |
| 259 | //! ‹ atomic::«{·‹AtomicUsize,· Ordering::SeqCst›·}»,· |
| 260 | //! mpsc::channel,· Mutex›,· |
| 261 | //! }»,· |
| 262 | //! thread›,· |
| 263 | //! }»;· |
| 264 | //! «‹«impl<«·T‹›,· U‹›·»>» Into<«·U·»>· for T›· |
| 265 | //! where· |
| 266 | //! U:‹ From<«·T·»>›,· |
| 267 | //! {· |
| 268 | //! « fn into(·«·self·») -> U {· |
| 269 | //! ‹ U::from(«·self·»)›· |
| 270 | //! » }· |
| 271 | //! »}· |
| 272 | //! ``` |
| 273 | //! |
| 274 | //! The algorithm described in the paper is not quite sufficient for producing |
| 275 | //! well-formatted Rust code that is locally indistinguishable from rustfmt's |
| 276 | //! style. The reason is that in the paper, the complete non-whitespace contents |
| 277 | //! are assumed to be independent of linebreak decisions, with Scan and Print |
| 278 | //! being only in control of the whitespace (spaces and line breaks). In Rust as |
| 279 | //! idiomatically formatted by rustfmt, that is not the case. Trailing commas |
| 280 | //! are one example; the punctuation is only known *after* the broken vs |
| 281 | //! non-broken status of the surrounding group is known: |
| 282 | //! |
| 283 | //! ``` |
| 284 | //! # struct Struct { x: u64, y: bool } |
| 285 | //! # let xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx = 0; |
| 286 | //! # let yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy = true; |
| 287 | //! # |
| 288 | //! let _ = Struct { x: 0, y: true }; |
| 289 | //! |
| 290 | //! let _ = Struct { |
| 291 | //! x: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx, |
| 292 | //! y: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy, //<- trailing comma if the expression wrapped |
| 293 | //! }; |
| 294 | //! ``` |
| 295 | //! |
| 296 | //! The formatting of `match` expressions is another case; we want small arms on |
| 297 | //! the same line as the pattern, and big arms wrapped in a brace. The presence |
| 298 | //! of the brace punctuation, comma, and semicolon are all dependent on whether |
| 299 | //! the arm fits on the line: |
| 300 | //! |
| 301 | //! ``` |
| 302 | //! # struct Entry { nanos: u32 } |
| 303 | //! # let total_nanos = 0u64; |
| 304 | //! # let mut total_secs = 0u64; |
| 305 | //! # let tmp; |
| 306 | //! # let entry = Entry { nanos: 0 }; |
| 307 | //! # const NANOS_PER_SEC: u32 = 1_000_000_000; |
| 308 | //! # |
| 309 | //! match total_nanos.checked_add(entry.nanos as u64) { |
| 310 | //! Some(n) => tmp = n, //<- small arm, inline with comma |
| 311 | //! None => { |
| 312 | //! total_secs = total_secs |
| 313 | //! .checked_add(total_nanos / NANOS_PER_SEC as u64) |
| 314 | //! .expect("overflow in iter::sum over durations" ); |
| 315 | //! } //<- big arm, needs brace added, and also semicolon^ |
| 316 | //! } |
| 317 | //! ``` |
| 318 | //! |
| 319 | //! The printing algorithm implementation in this crate accommodates all of |
| 320 | //! these situations with conditional punctuation tokens whose selection can be |
| 321 | //! deferred and populated after it's known that the group is or is not broken. |
| 322 | |
| 323 | #![doc (html_root_url = "https://docs.rs/prettyplease/0.2.32" )] |
| 324 | #![allow ( |
| 325 | clippy::bool_to_int_with_if, |
| 326 | clippy::cast_possible_wrap, |
| 327 | clippy::cast_sign_loss, |
| 328 | clippy::derive_partial_eq_without_eq, |
| 329 | clippy::doc_markdown, |
| 330 | clippy::enum_glob_use, |
| 331 | clippy::items_after_statements, |
| 332 | clippy::let_underscore_untyped, |
| 333 | clippy::match_like_matches_macro, |
| 334 | clippy::match_same_arms, |
| 335 | clippy::module_name_repetitions, |
| 336 | clippy::must_use_candidate, |
| 337 | clippy::needless_pass_by_value, |
| 338 | clippy::ref_option, |
| 339 | clippy::similar_names, |
| 340 | clippy::struct_excessive_bools, |
| 341 | clippy::too_many_lines, |
| 342 | clippy::unused_self, |
| 343 | clippy::vec_init_then_push |
| 344 | )] |
| 345 | #![cfg_attr (all(test, exhaustive), feature(non_exhaustive_omitted_patterns_lint))] |
| 346 | |
| 347 | mod algorithm; |
| 348 | mod attr; |
| 349 | mod classify; |
| 350 | mod convenience; |
| 351 | mod data; |
| 352 | mod expr; |
| 353 | mod file; |
| 354 | mod fixup; |
| 355 | mod generics; |
| 356 | mod item; |
| 357 | mod iter; |
| 358 | mod lifetime; |
| 359 | mod lit; |
| 360 | mod mac; |
| 361 | mod pat; |
| 362 | mod path; |
| 363 | mod precedence; |
| 364 | mod ring; |
| 365 | mod stmt; |
| 366 | mod token; |
| 367 | mod ty; |
| 368 | |
| 369 | use crate::algorithm::Printer; |
| 370 | use syn::File; |
| 371 | |
| 372 | // Target line width. |
| 373 | const MARGIN: isize = 89; |
| 374 | |
| 375 | // Number of spaces increment at each level of block indentation. |
| 376 | const INDENT: isize = 4; |
| 377 | |
| 378 | // Every line is allowed at least this much space, even if highly indented. |
| 379 | const MIN_SPACE: isize = 60; |
| 380 | |
| 381 | pub fn unparse(file: &File) -> String { |
| 382 | let mut p: Printer = Printer::new(); |
| 383 | p.file(file); |
| 384 | p.eof() |
| 385 | } |
| 386 | |