Format.cpp source code [clang-tools-extra/clangd/Format.cpp]

1	//===--- Format.cpp ------------------------------------------ C++-------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	#include "Format.h"
9	#include "support/Logger.h"
10	#include "clang/Basic/SourceManager.h"
11	#include "clang/Format/Format.h"
12	#include "clang/Lex/Lexer.h"
13	#include "clang/Tooling/Core/Replacement.h"
14	#include "llvm/Support/Unicode.h"
15
16	namespace clang {
17	namespace clangd {
18	namespace {
19
20	/// Append closing brackets )]} to \p Code to make it well-formed.
21	/// Clang-format conservatively refuses to format files with unmatched brackets
22	/// as it isn't sure where the errors are and so can't correct.
23	/// When editing, it's reasonable to assume code before the cursor is complete.
24	void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
25	SourceManagerForFile FileSM("mock_file.cpp", Code);
26	auto &SM = FileSM.get();
27	FileID FID = SM.getMainFileID();
28	LangOptions LangOpts = format::getFormattingLangOpts(Style);
29	Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts);
30	Token Tok;
31	std::vector<char> Brackets;
32	while (!Lex.LexFromRawLexer(Result&: Tok)) {
33	switch(Tok.getKind()) {
34	case tok::l_paren:
35	Brackets.push_back(x: `')'`);
36	break;
37	case tok::l_brace:
38	Brackets.push_back(x: `'}'`);
39	break;
40	case tok::l_square:
41	Brackets.push_back(x: `']'`);
42	break;
43	case tok::r_paren:
44	if (!Brackets.empty() && Brackets.back() == `')'`)
45	Brackets.pop_back();
46	break;
47	case tok::r_brace:
48	if (!Brackets.empty() && Brackets.back() == `'}'`)
49	Brackets.pop_back();
50	break;
51	case tok::r_square:
52	if (!Brackets.empty() && Brackets.back() == `']'`)
53	Brackets.pop_back();
54	break;
55	default:
56	continue;
57	}
58	}
59	// Attempt to end any open comments first.
60	Code.append(s: "\n// */\n");
61	Code.append(first: Brackets.rbegin(), last: Brackets.rend());
62	}
63
64	static StringRef commentMarker(llvm::StringRef Line) {
65	for (StringRef Marker : {"///", "//"}){
66	auto I = Line.rfind(Str: Marker);
67	if (I != StringRef::npos)
68	return Line.substr(Start: I, N: Marker.size());
69	}
70	return "";
71	}
72
73	llvm::StringRef firstLine(llvm::StringRef Code) {
74	return Code.take_until(F: [](char C) { return C == `'\n'`; });
75	}
76
77	llvm::StringRef lastLine(llvm::StringRef Code) {
78	llvm::StringRef Rest = Code;
79	while (!Rest.empty() && Rest.back() != `'\n'`)
80	Rest = Rest.drop_back();
81	return Code.substr(Start: Rest.size());
82	}
83
84	// Filename is needed for tooling::Replacement and some overloads of reformat().
85	// Its value should not affect the outcome. We use the default from reformat().
86	llvm::StringRef Filename = "<stdin>";
87
88	// tooling::Replacement from overlapping StringRefs: From must be part of Code.
89	tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
90	llvm::StringRef To) {
91	assert(From.begin() >= Code.begin() && From.end() <= Code.end());
92	// The filename is required but ignored.
93	return tooling::Replacement (Filename, From.data() - Code.data(),
94	From.size(), To);
95	}
96
97	// High-level representation of incremental formatting changes.
98	// The changes are made in two steps.
99	// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100	// comment markers when splitting a line comment with a newline).
101	// 2) a selective clang-format run:
102	// - the "source code" passed to clang format is the code up to the cursor,
103	// a placeholder for the cursor, and some closing brackets
104	// - the formatting is restricted to the cursor and (possibly) other ranges
105	// (e.g. the old line when inserting a newline).
106	// - changes before the cursor are applied, those after are discarded.
107	struct IncrementalChanges {
108	// Changes that should be applied before running clang-format.
109	tooling::Replacements Changes;
110	// Ranges of the original source code that should be clang-formatted.
111	// The CursorProxyText will also be formatted.
112	std::vector<tooling::Range> FormatRanges;
113	// The source code that should stand in for the cursor when clang-formatting.
114	// e.g. after inserting a newline, a line-comment at the cursor is used to
115	// ensure that the newline is preserved.
116	std::string CursorPlaceholder;
117	};
118
119	// The two functions below, columnWidth() and columnWidthWithTabs(), were
120	// adapted from similar functions in clang/lib/Format/Encoding.h.
121	// FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
122
123	// Helper function for columnWidthWithTabs().
124	inline unsigned columnWidth(StringRef Text) {
125	int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
126	if (ContentWidth < `0`)
127	return Text.size(); // fallback for unprintable characters
128	return ContentWidth;
129	}
130
131	// Returns the number of columns required to display the \p Text on a terminal
132	// with the \p TabWidth.
133	inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) {
134	unsigned TotalWidth = `0`;
135	StringRef Tail = Text;
136	for (;;) {
137	StringRef::size_type TabPos = Tail.find(C: `'\t'`);
138	if (TabPos == StringRef::npos)
139	return TotalWidth + columnWidth(Text: Tail);
140	TotalWidth += columnWidth(Text: Tail.substr(Start: `0`, N: TabPos));
141	if (TabWidth)
142	TotalWidth += TabWidth - TotalWidth % TabWidth;
143	Tail = Tail.substr(Start: TabPos + `1`);
144	}
145	}
146
147	// After a newline:
148	// - we continue any line-comment that was split
149	// - we format the old line in addition to the cursor
150	// - we represent the cursor with a line comment to preserve the newline
151	IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
152	unsigned Cursor,
153	unsigned TabWidth) {
154	IncrementalChanges Result;
155	// Before newline, code looked like:
156	// leading^trailing
157	// After newline, code looks like:
158	// leading
159	// indentation^trailing
160	// Where indentation was added by the editor.
161	StringRef Trailing = firstLine(Code: Code.substr(Start: Cursor));
162	StringRef Indentation = lastLine(Code: Code.take_front(N: Cursor));
163	if (Indentation.data() == Code.data()) {
164	vlog(Fmt: "Typed a newline, but we're still on the first line!");
165	return Result;
166	}
167	StringRef Leading =
168	lastLine(Code: Code.take_front(N: Indentation.data() - Code.data() - `1`));
169	StringRef NextLine = firstLine(Code: Code.substr(Start: Cursor + Trailing.size() + `1`));
170
171	// Strip leading whitespace on trailing line.
172	StringRef TrailingTrim = Trailing.ltrim();
173	if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
174	cantFail(Err: Result.Changes.add(
175	R: replacement(Code, From: StringRef(Trailing.begin(), TrailWS), To: "")));
176
177	// If we split a comment, replace indentation with a comment marker.
178	// If the editor made the new line a comment, also respect that.
179	StringRef CommentMarker = commentMarker(Line: Leading);
180	bool NewLineIsComment = !commentMarker(Line: Indentation).empty();
181	if (!CommentMarker.empty() &&
182	(NewLineIsComment \|\| !commentMarker(Line: NextLine).empty() \|\|
183	(!TrailingTrim.empty() && !TrailingTrim.starts_with(Prefix: "//")))) {
184	// We indent the new comment to match the previous one.
185	StringRef PreComment =
186	Leading.take_front(N: CommentMarker.data() - Leading.data());
187	std::string IndentAndComment =
188	(std::string (columnWidthWithTabs(Text: PreComment, TabWidth), `' '`) +
189	CommentMarker + " ")
190	.str();
191	cantFail(
192	Err: Result.Changes.add(R: replacement(Code, From: Indentation, To: IndentAndComment)));
193	} else {
194	// Remove any indentation and let clang-format re-add it.
195	// This prevents the cursor marker dragging e.g. an aligned comment with it.
196	cantFail(Err: Result.Changes.add(R: replacement(Code, From: Indentation, To: "")));
197	}
198
199	// If we put a the newline inside a {} pair, put } on its own line...
200	if (CommentMarker.empty() && Leading.ends_with(Suffix: "{") &&
201	Trailing.starts_with(Prefix: "}")) {
202	cantFail(
203	Err: Result.Changes.add(R: replacement(Code, From: Trailing.take_front(N: `1`), To: "\n}")));
204	// ...and format it.
205	Result.FormatRanges.push_back(
206	x: tooling::Range (Trailing.data() - Code.data() + `1`, `1`));
207	}
208
209	// Format the whole leading line.
210	Result.FormatRanges.push_back(
211	x: tooling::Range (Leading.data() - Code.data(), Leading.size()));
212
213	// We use a comment to represent the cursor, to preserve the newline.
214	// A trailing identifier improves parsing of e.g. for without braces.
215	// Exception: if the previous line has a trailing comment, we can't use one
216	// as the cursor (they will be aligned). But in this case we don't need to.
217	Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
218
219	return Result;
220	}
221
222	IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
223	llvm::StringRef InsertedText,
224	unsigned TabWidth) {
225	IncrementalChanges Result;
226	if (InsertedText == "\n")
227	return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth);
228
229	Result.CursorPlaceholder = " /**/";
230	return Result;
231	}
232
233	// Returns equivalent replacements that preserve the correspondence between
234	// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235	// replacement will be split.
236	std::vector<tooling::Replacement>
237	split(const tooling::Replacements &Replacements, unsigned OldCursor,
238	unsigned NewCursor) {
239	std::vector<tooling::Replacement> Result;
240	int LengthChange = `0`;
241	for (const tooling::Replacement &R : Replacements) {
242	if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
243	Result.push_back(x: R);
244	LengthChange += R.getReplacementText().size() - R.getLength();
245	} else if (R.getOffset() < OldCursor) { // overlaps cursor
246	int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
247	assert(ReplacementSplit >= `0` &&
248	ReplacementSplit <= int(R.getReplacementText().size()) &&
249	"NewCursor incompatible with OldCursor!");
250	Result.push_back(x: tooling::Replacement (
251	R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
252	R.getReplacementText().take_front(N: ReplacementSplit)));
253	Result.push_back(x: tooling::Replacement (
254	R.getFilePath(), OldCursor,
255	R.getLength() - (OldCursor - R.getOffset()),
256	R.getReplacementText().drop_front(N: ReplacementSplit)));
257	} else if (R.getOffset() >= OldCursor) { // after cursor
258	Result.push_back(x: R);
259	}
260	}
261	return Result;
262	}
263
264	} // namespace
265
266	// We're simulating the following sequence of changes:
267	// - apply the pre-formatting edits (see getIncrementalChanges)
268	// - insert a placeholder for the cursor
269	// - format some of the resulting code
270	// - remove the cursor placeholder again
271	// The replacements we return are produced by composing these.
272	//
273	// The text we actually pass to clang-format is slightly different from this,
274	// e.g. we have to close brackets. We ensure these differences are after
275	// all the regions we want to format, and discard changes in them.
276	std::vector<tooling::Replacement>
277	formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
278	llvm::StringRef InsertedText, format::FormatStyle Style) {
279	IncrementalChanges Incremental = getIncrementalChanges(
280	Code: OriginalCode, Cursor: OriginalCursor, InsertedText, TabWidth: Style.TabWidth);
281	// Never remove* lines in response to pressing enter! This annoys users.*
282	if (InsertedText == "\n") {
283	Style.MaxEmptyLinesToKeep = `1000`;
284	Style.KeepEmptyLines.AtStartOfBlock = true;
285	}
286
287	// Compute the code we want to format:
288	// 1) Start with code after the pre-formatting edits.
289	std::string CodeToFormat = cantFail(
290	ValOrErr: tooling::applyAllReplacements(Code: OriginalCode, Replaces: Incremental.Changes));
291	unsigned Cursor = Incremental.Changes.getShiftedCodePosition(Position: OriginalCursor);
292	// 2) Truncate code after the last interesting range.
293	unsigned FormatLimit = Cursor;
294	for (tooling::Range &R : Incremental.FormatRanges)
295	FormatLimit = std::max(a: FormatLimit, b: R.getOffset() + R.getLength());
296	CodeToFormat.resize(n: FormatLimit);
297	// 3) Insert a placeholder for the cursor.
298	CodeToFormat.insert(pos1: Cursor, str: Incremental.CursorPlaceholder);
299	// 4) Append brackets after FormatLimit so the code is well-formed.
300	closeBrackets(Code&: CodeToFormat, Style);
301
302	// Determine the ranges to format:
303	std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
304	// Ranges after the cursor need to be adjusted for the placeholder.
305	for (auto &R : RangesToFormat) {
306	if (R.getOffset() > Cursor)
307	R = tooling::Range (R.getOffset() + Incremental.CursorPlaceholder.size(),
308	R.getLength());
309	}
310	// We also format the cursor.
311	RangesToFormat.push_back(
312	x: tooling::Range (Cursor, Incremental.CursorPlaceholder.size()));
313	// Also update FormatLimit for the placeholder, we'll use this later.
314	FormatLimit += Incremental.CursorPlaceholder.size();
315
316	// Run clang-format, and truncate changes at FormatLimit.
317	tooling::Replacements FormattingChanges;
318	format::FormattingAttemptStatus Status;
319	for (const tooling::Replacement &R : format::reformat(
320	Style, Code: CodeToFormat, Ranges: RangesToFormat, FileName: Filename, Status: &Status)) {
321	if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
322	cantFail(Err: FormattingChanges.add(R));
323	else if(R.getOffset() < FormatLimit) { // Overlaps limit.
324	if (R.getReplacementText().empty()) // Deletions are easy to handle.
325	cantFail(Err: FormattingChanges.add(R: tooling::Replacement (Filename,
326	R.getOffset(), FormatLimit - R.getOffset(), "")));
327	else
328	// Hopefully won't happen in practice?
329	elog(Fmt: "Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
330	Vals&: Cursor, Vals&: CodeToFormat);
331	}
332	}
333	if (!Status.FormatComplete)
334	vlog(Fmt: "Incremental format incomplete at line {0}", Vals&: Status.Line);
335
336	// Now we are ready to compose the changes relative to OriginalCode.
337	// edits -> insert placeholder -> format -> remove placeholder.
338	// We must express insert/remove as Replacements.
339	tooling::Replacements InsertCursorPlaceholder(
340	tooling::Replacement (Filename, Cursor, `0`, Incremental.CursorPlaceholder));
341	unsigned FormattedCursorStart =
342	FormattingChanges.getShiftedCodePosition(Position: Cursor),
343	FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
344	Position: Cursor + Incremental.CursorPlaceholder.size());
345	tooling::Replacements RemoveCursorPlaceholder(
346	tooling::Replacement (Filename, FormattedCursorStart,
347	FormattedCursorEnd - FormattedCursorStart, ""));
348
349	// We can't simply merge() and return: tooling::Replacements will combine
350	// adjacent edits left and right of the cursor. This gives the right source
351	// code, but loses information about where the cursor is!
352	// Fortunately, none of the individual passes lose information, so:
353	// - we use merge() to compute the final Replacements
354	// - we chain getShiftedCodePosition() to compute final cursor position
355	// - we split the final Replacements at the cursor position, so that
356	// each Replacement lies either before or after the cursor.
357	tooling::Replacements Final;
358	unsigned FinalCursor = OriginalCursor;
359	#ifndef NDEBUG
360	std::string FinalCode = std::string (OriginalCode);
361	dlog("Initial code: {0}", FinalCode);
362	#endif
363	for (auto Pass :
364	std::vector<std::pair<const char , const* tooling::Replacements *>>{
365	{"Pre-formatting changes", &Incremental.Changes},
366	{"Insert placeholder", &InsertCursorPlaceholder},
367	{"clang-format", &FormattingChanges},
368	{"Remove placeholder", &RemoveCursorPlaceholder}}) {
369	Final = Final.merge(Replaces: *Pass.second);
370	FinalCursor = Pass.second->getShiftedCodePosition(Position: FinalCursor);
371	#ifndef NDEBUG
372	FinalCode =
373	cantFail(ValOrErr: tooling::applyAllReplacements(Code: FinalCode, Replaces: *Pass.second));
374	dlog("After {0}:\n{1}^{2}", Pass.first,
375	StringRef(FinalCode).take_front(FinalCursor),
376	StringRef(FinalCode).drop_front(FinalCursor));
377	#endif
378	}
379	return split(Replacements: Final, OldCursor: OriginalCursor, NewCursor: FinalCursor);
380	}
381
382	unsigned
383	transformCursorPosition(unsigned Offset,
384	const std::vector<tooling::Replacement> &Replacements) {
385	unsigned OriginalOffset = Offset;
386	for (const auto &R : Replacements) {
387	if (R.getOffset() + R.getLength() <= OriginalOffset) {
388	// Replacement is before cursor.
389	Offset += R.getReplacementText().size();
390	Offset -= R.getLength();
391	} else if (R.getOffset() < OriginalOffset) {
392	// Replacement overlaps cursor.
393	// Preserve position within replacement text, as far as possible.
394	unsigned PositionWithinReplacement = Offset - R.getOffset();
395	if (PositionWithinReplacement > R.getReplacementText().size()) {
396	Offset += R.getReplacementText().size();
397	Offset -= PositionWithinReplacement;
398	}
399	} else {
400	// Replacement after cursor.
401	break; // Replacements are sorted, the rest are also after the cursor.
402	}
403	}
404	return Offset;
405	}
406
407	} // namespace clangd
408	} // namespace clang
409

source code of clang-tools-extra/clangd/Format.cpp