1 | //===--- Format.cpp -----------------------------------------*- C++-*------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "Format.h" |
9 | #include "support/Logger.h" |
10 | #include "clang/Basic/SourceManager.h" |
11 | #include "clang/Format/Format.h" |
12 | #include "clang/Lex/Lexer.h" |
13 | #include "clang/Tooling/Core/Replacement.h" |
14 | #include "llvm/Support/Unicode.h" |
15 | |
16 | namespace clang { |
17 | namespace clangd { |
18 | namespace { |
19 | |
20 | /// Append closing brackets )]} to \p Code to make it well-formed. |
21 | /// Clang-format conservatively refuses to format files with unmatched brackets |
22 | /// as it isn't sure where the errors are and so can't correct. |
23 | /// When editing, it's reasonable to assume code before the cursor is complete. |
24 | void closeBrackets(std::string &Code, const format::FormatStyle &Style) { |
25 | SourceManagerForFile FileSM("mock_file.cpp" , Code); |
26 | auto &SM = FileSM.get(); |
27 | FileID FID = SM.getMainFileID(); |
28 | LangOptions LangOpts = format::getFormattingLangOpts(Style); |
29 | Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts); |
30 | Token Tok; |
31 | std::vector<char> Brackets; |
32 | while (!Lex.LexFromRawLexer(Result&: Tok)) { |
33 | switch(Tok.getKind()) { |
34 | case tok::l_paren: |
35 | Brackets.push_back(x: ')'); |
36 | break; |
37 | case tok::l_brace: |
38 | Brackets.push_back(x: '}'); |
39 | break; |
40 | case tok::l_square: |
41 | Brackets.push_back(x: ']'); |
42 | break; |
43 | case tok::r_paren: |
44 | if (!Brackets.empty() && Brackets.back() == ')') |
45 | Brackets.pop_back(); |
46 | break; |
47 | case tok::r_brace: |
48 | if (!Brackets.empty() && Brackets.back() == '}') |
49 | Brackets.pop_back(); |
50 | break; |
51 | case tok::r_square: |
52 | if (!Brackets.empty() && Brackets.back() == ']') |
53 | Brackets.pop_back(); |
54 | break; |
55 | default: |
56 | continue; |
57 | } |
58 | } |
59 | // Attempt to end any open comments first. |
60 | Code.append(s: "\n// */\n" ); |
61 | Code.append(first: Brackets.rbegin(), last: Brackets.rend()); |
62 | } |
63 | |
64 | static StringRef (llvm::StringRef Line) { |
65 | for (StringRef Marker : {"///" , "//" }){ |
66 | auto I = Line.rfind(Str: Marker); |
67 | if (I != StringRef::npos) |
68 | return Line.substr(Start: I, N: Marker.size()); |
69 | } |
70 | return "" ; |
71 | } |
72 | |
73 | llvm::StringRef firstLine(llvm::StringRef Code) { |
74 | return Code.take_until(F: [](char C) { return C == '\n'; }); |
75 | } |
76 | |
77 | llvm::StringRef lastLine(llvm::StringRef Code) { |
78 | llvm::StringRef Rest = Code; |
79 | while (!Rest.empty() && Rest.back() != '\n') |
80 | Rest = Rest.drop_back(); |
81 | return Code.substr(Start: Rest.size()); |
82 | } |
83 | |
84 | // Filename is needed for tooling::Replacement and some overloads of reformat(). |
85 | // Its value should not affect the outcome. We use the default from reformat(). |
86 | llvm::StringRef Filename = "<stdin>" ; |
87 | |
88 | // tooling::Replacement from overlapping StringRefs: From must be part of Code. |
89 | tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From, |
90 | llvm::StringRef To) { |
91 | assert(From.begin() >= Code.begin() && From.end() <= Code.end()); |
92 | // The filename is required but ignored. |
93 | return tooling::Replacement(Filename, From.data() - Code.data(), |
94 | From.size(), To); |
95 | } |
96 | |
97 | // High-level representation of incremental formatting changes. |
98 | // The changes are made in two steps. |
99 | // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding |
100 | // comment markers when splitting a line comment with a newline). |
101 | // 2) a selective clang-format run: |
102 | // - the "source code" passed to clang format is the code up to the cursor, |
103 | // a placeholder for the cursor, and some closing brackets |
104 | // - the formatting is restricted to the cursor and (possibly) other ranges |
105 | // (e.g. the old line when inserting a newline). |
106 | // - changes before the cursor are applied, those after are discarded. |
107 | struct IncrementalChanges { |
108 | // Changes that should be applied before running clang-format. |
109 | tooling::Replacements Changes; |
110 | // Ranges of the original source code that should be clang-formatted. |
111 | // The CursorProxyText will also be formatted. |
112 | std::vector<tooling::Range> FormatRanges; |
113 | // The source code that should stand in for the cursor when clang-formatting. |
114 | // e.g. after inserting a newline, a line-comment at the cursor is used to |
115 | // ensure that the newline is preserved. |
116 | std::string CursorPlaceholder; |
117 | }; |
118 | |
119 | // The two functions below, columnWidth() and columnWidthWithTabs(), were |
120 | // adapted from similar functions in clang/lib/Format/Encoding.h. |
121 | // FIXME: Move those functions to clang/include/clang/Format.h and reuse them? |
122 | |
123 | // Helper function for columnWidthWithTabs(). |
124 | inline unsigned columnWidth(StringRef Text) { |
125 | int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); |
126 | if (ContentWidth < 0) |
127 | return Text.size(); // fallback for unprintable characters |
128 | return ContentWidth; |
129 | } |
130 | |
131 | // Returns the number of columns required to display the \p Text on a terminal |
132 | // with the \p TabWidth. |
133 | inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) { |
134 | unsigned TotalWidth = 0; |
135 | StringRef Tail = Text; |
136 | for (;;) { |
137 | StringRef::size_type TabPos = Tail.find(C: '\t'); |
138 | if (TabPos == StringRef::npos) |
139 | return TotalWidth + columnWidth(Text: Tail); |
140 | TotalWidth += columnWidth(Text: Tail.substr(Start: 0, N: TabPos)); |
141 | if (TabWidth) |
142 | TotalWidth += TabWidth - TotalWidth % TabWidth; |
143 | Tail = Tail.substr(Start: TabPos + 1); |
144 | } |
145 | } |
146 | |
147 | // After a newline: |
148 | // - we continue any line-comment that was split |
149 | // - we format the old line in addition to the cursor |
150 | // - we represent the cursor with a line comment to preserve the newline |
151 | IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code, |
152 | unsigned Cursor, |
153 | unsigned TabWidth) { |
154 | IncrementalChanges Result; |
155 | // Before newline, code looked like: |
156 | // leading^trailing |
157 | // After newline, code looks like: |
158 | // leading |
159 | // indentation^trailing |
160 | // Where indentation was added by the editor. |
161 | StringRef Trailing = firstLine(Code: Code.substr(Start: Cursor)); |
162 | StringRef Indentation = lastLine(Code: Code.take_front(N: Cursor)); |
163 | if (Indentation.data() == Code.data()) { |
164 | vlog(Fmt: "Typed a newline, but we're still on the first line!" ); |
165 | return Result; |
166 | } |
167 | StringRef Leading = |
168 | lastLine(Code: Code.take_front(N: Indentation.data() - Code.data() - 1)); |
169 | StringRef NextLine = firstLine(Code: Code.substr(Start: Cursor + Trailing.size() + 1)); |
170 | |
171 | // Strip leading whitespace on trailing line. |
172 | StringRef TrailingTrim = Trailing.ltrim(); |
173 | if (unsigned TrailWS = Trailing.size() - TrailingTrim.size()) |
174 | cantFail(Err: Result.Changes.add( |
175 | R: replacement(Code, From: StringRef(Trailing.begin(), TrailWS), To: "" ))); |
176 | |
177 | // If we split a comment, replace indentation with a comment marker. |
178 | // If the editor made the new line a comment, also respect that. |
179 | StringRef = commentMarker(Line: Leading); |
180 | bool = !commentMarker(Line: Indentation).empty(); |
181 | if (!CommentMarker.empty() && |
182 | (NewLineIsComment || !commentMarker(Line: NextLine).empty() || |
183 | (!TrailingTrim.empty() && !TrailingTrim.starts_with(Prefix: "//" )))) { |
184 | // We indent the new comment to match the previous one. |
185 | StringRef = |
186 | Leading.take_front(N: CommentMarker.data() - Leading.data()); |
187 | std::string IndentAndComment = |
188 | (std::string(columnWidthWithTabs(Text: PreComment, TabWidth), ' ') + |
189 | CommentMarker + " " ) |
190 | .str(); |
191 | cantFail( |
192 | Err: Result.Changes.add(R: replacement(Code, From: Indentation, To: IndentAndComment))); |
193 | } else { |
194 | // Remove any indentation and let clang-format re-add it. |
195 | // This prevents the cursor marker dragging e.g. an aligned comment with it. |
196 | cantFail(Err: Result.Changes.add(R: replacement(Code, From: Indentation, To: "" ))); |
197 | } |
198 | |
199 | // If we put a the newline inside a {} pair, put } on its own line... |
200 | if (CommentMarker.empty() && Leading.ends_with(Suffix: "{" ) && |
201 | Trailing.starts_with(Prefix: "}" )) { |
202 | cantFail( |
203 | Err: Result.Changes.add(R: replacement(Code, From: Trailing.take_front(N: 1), To: "\n}" ))); |
204 | // ...and format it. |
205 | Result.FormatRanges.push_back( |
206 | x: tooling::Range(Trailing.data() - Code.data() + 1, 1)); |
207 | } |
208 | |
209 | // Format the whole leading line. |
210 | Result.FormatRanges.push_back( |
211 | x: tooling::Range(Leading.data() - Code.data(), Leading.size())); |
212 | |
213 | // We use a comment to represent the cursor, to preserve the newline. |
214 | // A trailing identifier improves parsing of e.g. for without braces. |
215 | // Exception: if the previous line has a trailing comment, we can't use one |
216 | // as the cursor (they will be aligned). But in this case we don't need to. |
217 | Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident" ; |
218 | |
219 | return Result; |
220 | } |
221 | |
222 | IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor, |
223 | llvm::StringRef InsertedText, |
224 | unsigned TabWidth) { |
225 | IncrementalChanges Result; |
226 | if (InsertedText == "\n" ) |
227 | return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth); |
228 | |
229 | Result.CursorPlaceholder = " /**/" ; |
230 | return Result; |
231 | } |
232 | |
233 | // Returns equivalent replacements that preserve the correspondence between |
234 | // OldCursor and NewCursor. If OldCursor lies in a replaced region, that |
235 | // replacement will be split. |
236 | std::vector<tooling::Replacement> |
237 | split(const tooling::Replacements &Replacements, unsigned OldCursor, |
238 | unsigned NewCursor) { |
239 | std::vector<tooling::Replacement> Result; |
240 | int LengthChange = 0; |
241 | for (const tooling::Replacement &R : Replacements) { |
242 | if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor |
243 | Result.push_back(x: R); |
244 | LengthChange += R.getReplacementText().size() - R.getLength(); |
245 | } else if (R.getOffset() < OldCursor) { // overlaps cursor |
246 | int ReplacementSplit = NewCursor - LengthChange - R.getOffset(); |
247 | assert(ReplacementSplit >= 0 && |
248 | ReplacementSplit <= int(R.getReplacementText().size()) && |
249 | "NewCursor incompatible with OldCursor!" ); |
250 | Result.push_back(x: tooling::Replacement( |
251 | R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(), |
252 | R.getReplacementText().take_front(N: ReplacementSplit))); |
253 | Result.push_back(x: tooling::Replacement( |
254 | R.getFilePath(), OldCursor, |
255 | R.getLength() - (OldCursor - R.getOffset()), |
256 | R.getReplacementText().drop_front(N: ReplacementSplit))); |
257 | } else if (R.getOffset() >= OldCursor) { // after cursor |
258 | Result.push_back(x: R); |
259 | } |
260 | } |
261 | return Result; |
262 | } |
263 | |
264 | } // namespace |
265 | |
266 | // We're simulating the following sequence of changes: |
267 | // - apply the pre-formatting edits (see getIncrementalChanges) |
268 | // - insert a placeholder for the cursor |
269 | // - format some of the resulting code |
270 | // - remove the cursor placeholder again |
271 | // The replacements we return are produced by composing these. |
272 | // |
273 | // The text we actually pass to clang-format is slightly different from this, |
274 | // e.g. we have to close brackets. We ensure these differences are *after* |
275 | // all the regions we want to format, and discard changes in them. |
276 | std::vector<tooling::Replacement> |
277 | formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, |
278 | llvm::StringRef InsertedText, format::FormatStyle Style) { |
279 | IncrementalChanges Incremental = getIncrementalChanges( |
280 | Code: OriginalCode, Cursor: OriginalCursor, InsertedText, TabWidth: Style.TabWidth); |
281 | // Never *remove* lines in response to pressing enter! This annoys users. |
282 | if (InsertedText == "\n" ) { |
283 | Style.MaxEmptyLinesToKeep = 1000; |
284 | Style.KeepEmptyLinesAtTheStartOfBlocks = true; |
285 | } |
286 | |
287 | // Compute the code we want to format: |
288 | // 1) Start with code after the pre-formatting edits. |
289 | std::string CodeToFormat = cantFail( |
290 | ValOrErr: tooling::applyAllReplacements(Code: OriginalCode, Replaces: Incremental.Changes)); |
291 | unsigned Cursor = Incremental.Changes.getShiftedCodePosition(Position: OriginalCursor); |
292 | // 2) Truncate code after the last interesting range. |
293 | unsigned FormatLimit = Cursor; |
294 | for (tooling::Range &R : Incremental.FormatRanges) |
295 | FormatLimit = std::max(a: FormatLimit, b: R.getOffset() + R.getLength()); |
296 | CodeToFormat.resize(n: FormatLimit); |
297 | // 3) Insert a placeholder for the cursor. |
298 | CodeToFormat.insert(pos1: Cursor, str: Incremental.CursorPlaceholder); |
299 | // 4) Append brackets after FormatLimit so the code is well-formed. |
300 | closeBrackets(Code&: CodeToFormat, Style); |
301 | |
302 | // Determine the ranges to format: |
303 | std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges; |
304 | // Ranges after the cursor need to be adjusted for the placeholder. |
305 | for (auto &R : RangesToFormat) { |
306 | if (R.getOffset() > Cursor) |
307 | R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(), |
308 | R.getLength()); |
309 | } |
310 | // We also format the cursor. |
311 | RangesToFormat.push_back( |
312 | x: tooling::Range(Cursor, Incremental.CursorPlaceholder.size())); |
313 | // Also update FormatLimit for the placeholder, we'll use this later. |
314 | FormatLimit += Incremental.CursorPlaceholder.size(); |
315 | |
316 | // Run clang-format, and truncate changes at FormatLimit. |
317 | tooling::Replacements FormattingChanges; |
318 | format::FormattingAttemptStatus Status; |
319 | for (const tooling::Replacement &R : format::reformat( |
320 | Style, Code: CodeToFormat, Ranges: RangesToFormat, FileName: Filename, Status: &Status)) { |
321 | if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit. |
322 | cantFail(Err: FormattingChanges.add(R)); |
323 | else if(R.getOffset() < FormatLimit) { // Overlaps limit. |
324 | if (R.getReplacementText().empty()) // Deletions are easy to handle. |
325 | cantFail(Err: FormattingChanges.add(R: tooling::Replacement(Filename, |
326 | R.getOffset(), FormatLimit - R.getOffset(), "" ))); |
327 | else |
328 | // Hopefully won't happen in practice? |
329 | elog(Fmt: "Incremental clang-format edit overlapping cursor @ {0}!\n{1}" , |
330 | Vals&: Cursor, Vals&: CodeToFormat); |
331 | } |
332 | } |
333 | if (!Status.FormatComplete) |
334 | vlog(Fmt: "Incremental format incomplete at line {0}" , Vals&: Status.Line); |
335 | |
336 | // Now we are ready to compose the changes relative to OriginalCode. |
337 | // edits -> insert placeholder -> format -> remove placeholder. |
338 | // We must express insert/remove as Replacements. |
339 | tooling::Replacements InsertCursorPlaceholder( |
340 | tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder)); |
341 | unsigned FormattedCursorStart = |
342 | FormattingChanges.getShiftedCodePosition(Position: Cursor), |
343 | FormattedCursorEnd = FormattingChanges.getShiftedCodePosition( |
344 | Position: Cursor + Incremental.CursorPlaceholder.size()); |
345 | tooling::Replacements RemoveCursorPlaceholder( |
346 | tooling::Replacement(Filename, FormattedCursorStart, |
347 | FormattedCursorEnd - FormattedCursorStart, "" )); |
348 | |
349 | // We can't simply merge() and return: tooling::Replacements will combine |
350 | // adjacent edits left and right of the cursor. This gives the right source |
351 | // code, but loses information about where the cursor is! |
352 | // Fortunately, none of the individual passes lose information, so: |
353 | // - we use merge() to compute the final Replacements |
354 | // - we chain getShiftedCodePosition() to compute final cursor position |
355 | // - we split the final Replacements at the cursor position, so that |
356 | // each Replacement lies either before or after the cursor. |
357 | tooling::Replacements Final; |
358 | unsigned FinalCursor = OriginalCursor; |
359 | #ifndef NDEBUG |
360 | std::string FinalCode = std::string(OriginalCode); |
361 | dlog("Initial code: {0}" , FinalCode); |
362 | #endif |
363 | for (auto Pass : |
364 | std::vector<std::pair<const char *, const tooling::Replacements *>>{ |
365 | {"Pre-formatting changes" , &Incremental.Changes}, |
366 | {"Insert placeholder" , &InsertCursorPlaceholder}, |
367 | {"clang-format" , &FormattingChanges}, |
368 | {"Remove placeholder" , &RemoveCursorPlaceholder}}) { |
369 | Final = Final.merge(Replaces: *Pass.second); |
370 | FinalCursor = Pass.second->getShiftedCodePosition(Position: FinalCursor); |
371 | #ifndef NDEBUG |
372 | FinalCode = |
373 | cantFail(ValOrErr: tooling::applyAllReplacements(Code: FinalCode, Replaces: *Pass.second)); |
374 | dlog("After {0}:\n{1}^{2}" , Pass.first, |
375 | StringRef(FinalCode).take_front(FinalCursor), |
376 | StringRef(FinalCode).drop_front(FinalCursor)); |
377 | #endif |
378 | } |
379 | return split(Replacements: Final, OldCursor: OriginalCursor, NewCursor: FinalCursor); |
380 | } |
381 | |
382 | unsigned |
383 | transformCursorPosition(unsigned Offset, |
384 | const std::vector<tooling::Replacement> &Replacements) { |
385 | unsigned OriginalOffset = Offset; |
386 | for (const auto &R : Replacements) { |
387 | if (R.getOffset() + R.getLength() <= OriginalOffset) { |
388 | // Replacement is before cursor. |
389 | Offset += R.getReplacementText().size(); |
390 | Offset -= R.getLength(); |
391 | } else if (R.getOffset() < OriginalOffset) { |
392 | // Replacement overlaps cursor. |
393 | // Preserve position within replacement text, as far as possible. |
394 | unsigned PositionWithinReplacement = Offset - R.getOffset(); |
395 | if (PositionWithinReplacement > R.getReplacementText().size()) { |
396 | Offset += R.getReplacementText().size(); |
397 | Offset -= PositionWithinReplacement; |
398 | } |
399 | } else { |
400 | // Replacement after cursor. |
401 | break; // Replacements are sorted, the rest are also after the cursor. |
402 | } |
403 | } |
404 | return Offset; |
405 | } |
406 | |
407 | } // namespace clangd |
408 | } // namespace clang |
409 | |