lib.rs source code [crates/prettyplease/src/lib.rs]

1	//! [![github]](https://github.com/dtolnay/prettyplease)&ensp;[![crates-io]](https://crates.io/crates/prettyplease)&ensp;[![docs-rs]](https://docs.rs/prettyplease)
2	//!
3	//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4	//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5	//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6	//!
7	//! <br>
8	//!
9	//! prettyplease::unparse* — a minimal `syn` syntax tree pretty-printer*
10	//!
11	//! <br>
12	//!
13	//! # Overview
14	//!
15	//! This is a pretty-printer to turn a `syn` syntax tree into a `String` of
16	//! well-formatted source code. In contrast to rustfmt, this library is intended
17	//! to be suitable for arbitrary generated code.
18	//!
19	//! Rustfmt prioritizes high-quality output that is impeccable enough that you'd
20	//! be comfortable spending your career staring at its output — but that
21	//! means some heavyweight algorithms, and it has a tendency to bail out on code
22	//! that is hard to format (for example [rustfmt#3697], and there are dozens
23	//! more issues like it). That's not necessarily a big deal for human-generated
24	//! code because when code gets highly nested, the human will naturally be
25	//! inclined to refactor into more easily formattable code. But for generated
26	//! code, having the formatter just give up leaves it totally unreadable.
27	//!
28	//! [rustfmt#3697]: https://github.com/rust-lang/rustfmt/issues/3697
29	//!
30	//! This library is designed using the simplest possible algorithm and data
31	//! structures that can deliver about 95% of the quality of rustfmt-formatted
32	//! output. In my experience testing real-world code, approximately 97-98% of
33	//! output lines come out identical between rustfmt's formatting and this
34	//! crate's. The rest have slightly different linebreak decisions, but still
35	//! clearly follow the dominant modern Rust style.
36	//!
37	//! The tradeoffs made by this crate are a good fit for generated code that you
38	//! will not* spend your career staring at. For example, the output of*
39	//! `bindgen`, or the output of `cargo-expand`. In those cases it's more
40	//! important that the whole thing be formattable without the formatter giving
41	//! up, than that it be flawless.
42	//!
43	//! <br>
44	//!
45	//! # Feature matrix
46	//!
47	//! Here are a few superficial comparisons of this crate against the AST
48	//! pretty-printer built into rustc, and rustfmt. The sections below go into
49	//! more detail comparing the output of each of these libraries.
50	//!
51	//! \| \| prettyplease \| rustc \| rustfmt \|
52	//! \|:---\|:---:\|:---:\|:---:\|
53	//! \| non-pathological behavior on big or generated code \| 💚 \| ❌ \| ❌ \|
54	//! \| idiomatic modern formatting ("locally indistinguishable from rustfmt") \| 💚 \| ❌ \| 💚 \|
55	//! \| throughput \| 60 MB/s \| 39 MB/s \| 2.8 MB/s \|
56	//! \| number of dependencies \| 3 \| 72 \| 66 \|
57	//! \| compile time including dependencies \| 2.4 sec \| 23.1 sec \| 29.8 sec \|
58	//! \| buildable using a stable Rust compiler \| 💚 \| ❌ \| ❌ \|
59	//! \| published to crates.io \| 💚 \| ❌ \| ❌ \|
60	//! \| extensively configurable output \| ❌ \| ❌ \| 💚 \|
61	//! \| intended to accommodate hand-maintained source code \| ❌ \| ❌ \| 💚 \|
62	//!
63	//! <br>
64	//!
65	//! # Comparison to rustfmt
66	//!
67	//! - [input.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/input.rs)
68	//! - [output.prettyplease.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.prettyplease.rs)
69	//! - [output.rustfmt.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.rustfmt.rs)
70	//!
71	//! If you weren't told which output file is which, it would be practically
72	//! impossible to tell — except* for line 435 in the rustfmt output,*
73	//! which is more than 1000 characters long because rustfmt just gave up
74	//! formatting that part of the file:
75	//!
76	//! ```
77	//! # const _: &str = stringify! {{{
78	//! match segments[`5`] {
79	//! `0` => write!(f, "::{}", ipv4),
80	//! `0xffff` => write!(f, "::ffff:{}", ipv4),
81	//! _ => unreachable!(),
82	//! }
83	//! } else { # [derive (Copy , Clone , Default)] struct Span { start : usize , len : usize , } let zeroes = { let mut longest = Span :: default () ; let mut current = Span :: default () ; for (i , & segment) in segments . iter () . enumerate () { if segment == `0` { if current . len == `0` { current . start = i ; } current . len += `1` ; if current . len > longest . len { longest = current ; } } else { current = Span :: default () ; } } longest } ; # [doc = " Write a colon-separated part of the address"] # [inline] fn fmt_subslice (f : & mut fmt :: Formatter < '_ > , chunk : & [u16]) -> fmt :: Result { if let Some ((first , tail)) = chunk . split_first () { write ! (f , "{:x}" , first) ? ; for segment in tail { f . write_char (':') ? ; write ! (f , "{:x}" , segment) ? ; } } Ok (()) } if zeroes . len > `1` { fmt_subslice (f , & segments [.. zeroes . start]) ? ; f . write_str ("::") ? ; fmt_subslice (f , & segments [zeroes . start + zeroes . len ..]) } else { fmt_subslice (f , & segments) } }
84	//! } else {
85	//! const IPV6_BUF_LEN: usize = (`4` * `8`) + `7`;
86	//! let mut buf = [`0u8`; IPV6_BUF_LEN];
87	//! let mut buf_slice = &mut buf[..];
88	//! # }};
89	//! ```
90	//!
91	//! This is a pretty typical manifestation of rustfmt bailing out in generated
92	//! code — a chunk of the input ends up on one line. The other
93	//! manifestation is that you're working on some code, running rustfmt on save
94	//! like a conscientious developer, but after a while notice it isn't doing
95	//! anything. You introduce an intentional formatting issue, like a stray indent
96	//! or semicolon, and run rustfmt to check your suspicion. Nope, it doesn't get
97	//! cleaned up — rustfmt is just not formatting the part of the file you
98	//! are working on.
99	//!
100	//! The prettyplease library is designed to have no pathological cases that
101	//! force a bail out; the entire input you give it will get formatted in some
102	//! "good enough" form.
103	//!
104	//! Separately, rustfmt can be problematic to integrate into projects. It's
105	//! written using rustc's internal syntax tree, so it can't be built by a stable
106	//! compiler. Its releases are not regularly published to crates.io, so in Cargo
107	//! builds you'd need to depend on it as a git dependency, which precludes
108	//! publishing your crate to crates.io also. You can shell out to a `rustfmt`
109	//! binary, but that'll be whatever rustfmt version is installed on each
110	//! developer's system (if any), which can lead to spurious diffs in checked-in
111	//! generated code formatted by different versions. In contrast prettyplease is
112	//! designed to be easy to pull in as a library, and compiles fast.
113	//!
114	//! <br>
115	//!
116	//! # Comparison to rustc_ast_pretty
117	//!
118	//! - [input.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/input.rs)
119	//! - [output.prettyplease.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.prettyplease.rs)
120	//! - [output.rustc.rs](https://github.com/dtolnay/prettyplease/blob/0.1.0/examples/output.rustc.rs)
121	//!
122	//! This is the pretty-printer that gets used when rustc prints source code,
123	//! such as `rustc -Zunpretty=expanded`. It's used also by the standard
124	//! library's `stringify!` when stringifying an interpolated macro_rules AST
125	//! fragment, like an $:expr, and transitively by `dbg!` and many macros in the
126	//! ecosystem.
127	//!
128	//! Rustc's formatting is mostly okay, but does not hew closely to the dominant
129	//! contemporary style of Rust formatting. Some things wouldn't ever be written
130	//! on one line, like this `match` expression, and certainly not with a comma in
131	//! front of the closing brace:
132	//!
133	//! ```
134	//! # const _: &str = stringify! {
135	//! fn eq(&self, other: &IpAddr) -> bool {
136	//! match other { IpAddr::V4(v4) => self == v4, IpAddr::V6(_) => `false`, }
137	//! }
138	//! # };
139	//! ```
140	//!
141	//! Some places use non-multiple-of-4 indentation, which is definitely not the
142	//! norm:
143	//!
144	//! ```
145	//! # const _: &str = stringify! {
146	//! pub const fn to_ipv6_mapped(&self) -> Ipv6Addr {
147	//! let [a, b, c, d] = self.octets();
148	//! Ipv6Addr{inner:
149	//! c::in6_addr{s6_addr:
150	//! [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0xFF`,
151	//! `0xFF`, a, b, c, d],},}
152	//! }
153	//! # };
154	//! ```
155	//!
156	//! And although there isn't an egregious example of it in the link because the
157	//! input code is pretty tame, in general rustc_ast_pretty has pathological
158	//! behavior on generated code. It has a tendency to use excessive horizontal
159	//! indentation and rapidly run out of width:
160	//!
161	//! ```
162	//! # const _: &str = stringify! {
163	//! ::std::io::_print(::core::fmt::Arguments::new_v1(&[""],
164	//! &match (&msg,) {
165	//! _args =>
166	//! [::core::fmt::ArgumentV1::new(_args.`0`,
167	//! ::core::fmt::Display::fmt)],
168	//! }));
169	//! # };
170	//! ```
171	//!
172	//! The snippets above are clearly different from modern rustfmt style. In
173	//! contrast, prettyplease is designed to have output that is practically
174	//! indistinguishable from rustfmt-formatted code.
175	//!
176	//! <br>
177	//!
178	//! # Example
179	//!
180	//! ```
181	//! // [dependencies]
182	//! // prettyplease = "0.2"
183	//! // syn = { version = "2", default-features = false, features = ["full", "parsing"] }
184	//!
185	//! const INPUT: &str = stringify! {
186	//! use crate::{
187	//! lazy::{Lazy, SyncLazy, SyncOnceCell}, panic,
188	//! sync::{ atomic::{AtomicUsize, Ordering::SeqCst},
189	//! mpsc::channel, Mutex, },
190	//! thread,
191	//! };
192	//! impl<T, U> Into<U> for T where U: From<T> {
193	//! fn into(self) -> U { U::from(self) }
194	//! }
195	//! };
196	//!
197	//! fn main() {
198	//! let syntax_tree = syn::parse_file(INPUT).unwrap();
199	//! let formatted = prettyplease::unparse(&syntax_tree);
200	//! print!("{}", formatted);
201	//! }
202	//! ```
203	//!
204	//! <br>
205	//!
206	//! # Algorithm notes
207	//!
208	//! The approach and terminology used in the implementation are derived from
209	//! [Derek C. Oppen, "Pretty Printing" (1979)][paper], on which
210	//! rustc_ast_pretty is also based, and from rustc_ast_pretty's implementation
211	//! written by Graydon Hoare in 2011 (and modernized over the years by dozens of
212	//! volunteer maintainers).
213	//!
214	//! [paper]: http://i.stanford.edu/pub/cstr/reports/cs/tr/79/770/CS-TR-79-770.pdf
215	//!
216	//! The paper describes two language-agnostic interacting procedures `Scan()`
217	//! and `Print()`. Language-specific code decomposes an input data structure
218	//! into a stream of `string` and `break` tokens, and `begin` and `end` tokens
219	//! for grouping. Each `begin`–`end` range may be identified as either
220	//! "consistent breaking" or "inconsistent breaking". If a group is consistently
221	//! breaking, then if the whole contents do not fit on the line, every* `break`*
222	//! token in the group will receive a linebreak. This is appropriate, for
223	//! example, for Rust struct literals, or arguments of a function call. If a
224	//! group is inconsistently breaking, then the `string` tokens in the group are
225	//! greedily placed on the line until out of space, and linebroken only at those
226	//! `break` tokens for which the next string would not fit. For example, this is
227	//! appropriate for the contents of a braced `use` statement in Rust.
228	//!
229	//! Scan's job is to efficiently accumulate sizing information about groups and
230	//! breaks. For every `begin` token we compute the distance to the matched `end`
231	//! token, and for every `break` we compute the distance to the next `break`.
232	//! The algorithm uses a ringbuffer to hold tokens whose size is not yet
233	//! ascertained. The maximum size of the ringbuffer is bounded by the target
234	//! line length and does not grow indefinitely, regardless of deep nesting in
235	//! the input stream. That's because once a group is sufficiently big, the
236	//! precise size can no longer make a difference to linebreak decisions and we
237	//! can effectively treat it as "infinity".
238	//!
239	//! Print's job is to use the sizing information to efficiently assign a
240	//! "broken" or "not broken" status to every `begin` token. At that point the
241	//! output is easily constructed by concatenating `string` tokens and breaking
242	//! at `break` tokens contained within a broken group.
243	//!
244	//! Leveraging these primitives (i.e. cleverly placing the all-or-nothing
245	//! consistent breaks and greedy inconsistent breaks) to yield
246	//! rustfmt-compatible formatting for all of Rust's syntax tree nodes is a fun
247	//! challenge.
248	//!
249	//! Here is a visualization of some Rust tokens fed into the pretty printing
250	//! algorithm. Consistently breaking `begin`—`end` pairs are represented
251	//! by `«`⁠`»`, inconsistently breaking by `‹`⁠`›`, `break` by `·`,
252	//! and the rest of the non-whitespace are `string`.
253	//!
254	//! ```text
255	//! use crate::«{·
256	//! ‹ lazy::«{·‹Lazy,· SyncLazy,· SyncOnceCell›·}»,·
257	//! panic,·
258	//! sync::«{·
259	//! ‹ atomic::«{·‹AtomicUsize,· Ordering::SeqCst›·}»,·
260	//! mpsc::channel,· Mutex›,·
261	//! }»,·
262	//! thread›,·
263	//! }»;·
264	//! «‹«impl<«·T‹›,· U‹›·»>» Into<«·U·»>· for T›·
265	//! where·
266	//! U:‹ From<«·T·»>›,·
267	//! {·
268	//! « fn into(·«·self·») -> U {·
269	//! ‹ U::from(«·self·»)›·
270	//! » }·
271	//! »}·
272	//! ```
273	//!
274	//! The algorithm described in the paper is not quite sufficient for producing
275	//! well-formatted Rust code that is locally indistinguishable from rustfmt's
276	//! style. The reason is that in the paper, the complete non-whitespace contents
277	//! are assumed to be independent of linebreak decisions, with Scan and Print
278	//! being only in control of the whitespace (spaces and line breaks). In Rust as
279	//! idiomatically formatted by rustfmt, that is not the case. Trailing commas
280	//! are one example; the punctuation is only known after* the broken vs*
281	//! non-broken status of the surrounding group is known:
282	//!
283	//! ```
284	//! # struct Struct { x: u64, y: bool }
285	//! # let xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx = `0`;
286	//! # let yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy = `true`;
287	//! #
288	//! let _ = Struct { x: `0`, y: `true` };
289	//!
290	//! let _ = Struct {
291	//! x: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,
292	//! y: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy, //<- trailing comma if the expression wrapped
293	//! };
294	//! ```
295	//!
296	//! The formatting of `match` expressions is another case; we want small arms on
297	//! the same line as the pattern, and big arms wrapped in a brace. The presence
298	//! of the brace punctuation, comma, and semicolon are all dependent on whether
299	//! the arm fits on the line:
300	//!
301	//! ```
302	//! # struct Entry { nanos: u32 }
303	//! # let total_nanos = `0u64`;
304	//! # let mut total_secs = `0u64`;
305	//! # let tmp;
306	//! # let entry = Entry { nanos: `0` };
307	//! # const NANOS_PER_SEC: u32 = `1_000_000_000`;
308	//! #
309	//! match total_nanos.checked_add(entry.nanos as u64) {
310	//! Some(n) => tmp = n, //<- small arm, inline with comma
311	//! None => {
312	//! total_secs = total_secs
313	//! .checked_add(total_nanos / NANOS_PER_SEC as u64)
314	//! .expect("overflow in iter::sum over durations");
315	//! } //<- big arm, needs brace added, and also semicolon^
316	//! }
317	//! ```
318	//!
319	//! The printing algorithm implementation in this crate accommodates all of
320	//! these situations with conditional punctuation tokens whose selection can be
321	//! deferred and populated after it's known that the group is or is not broken.
322
323	#![doc(html_root_url = "https://docs.rs/prettyplease/0.2.32")]
324	#![allow(
325	clippy::bool_to_int_with_if,
326	clippy::cast_possible_wrap,
327	clippy::cast_sign_loss,
328	clippy::derive_partial_eq_without_eq,
329	clippy::doc_markdown,
330	clippy::enum_glob_use,
331	clippy::items_after_statements,
332	clippy::let_underscore_untyped,
333	clippy::match_like_matches_macro,
334	clippy::match_same_arms,
335	clippy::module_name_repetitions,
336	clippy::must_use_candidate,
337	clippy::needless_pass_by_value,
338	clippy::ref_option,
339	clippy::similar_names,
340	clippy::struct_excessive_bools,
341	clippy::too_many_lines,
342	clippy::unused_self,
343	clippy::vec_init_then_push
344	)]
345	#![cfg_attr(all(test, exhaustive), feature(non_exhaustive_omitted_patterns_lint))]
346
347	mod algorithm;
348	mod attr;
349	mod classify;
350	mod convenience;
351	mod data;
352	mod expr;
353	mod file;
354	mod fixup;
355	mod generics;
356	mod item;
357	mod iter;
358	mod lifetime;
359	mod lit;
360	mod mac;
361	mod pat;
362	mod path;
363	mod precedence;
364	mod ring;
365	mod stmt;
366	mod token;
367	mod ty;
368
369	use crate::algorithm::Printer;
370	use syn::File;
371
372	// Target line width.
373	const MARGIN: isize = `89`;
374
375	// Number of spaces increment at each level of block indentation.
376	const INDENT: isize = `4`;
377
378	// Every line is allowed at least this much space, even if highly indented.
379	const MIN_SPACE: isize = `60`;
380
381	pub fn unparse(file: &File) -> String {
382	let mut p: Printer = Printer::new();
383	p.file(file);
384	p.eof()
385	}
386