| 1 | //===--- Format.cpp -----------------------------------------*- C++-*------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #include "Format.h" |
| 9 | #include "support/Logger.h" |
| 10 | #include "clang/Basic/SourceManager.h" |
| 11 | #include "clang/Format/Format.h" |
| 12 | #include "clang/Lex/Lexer.h" |
| 13 | #include "clang/Tooling/Core/Replacement.h" |
| 14 | #include "llvm/Support/Unicode.h" |
| 15 | |
| 16 | namespace clang { |
| 17 | namespace clangd { |
| 18 | namespace { |
| 19 | |
| 20 | /// Append closing brackets )]} to \p Code to make it well-formed. |
| 21 | /// Clang-format conservatively refuses to format files with unmatched brackets |
| 22 | /// as it isn't sure where the errors are and so can't correct. |
| 23 | /// When editing, it's reasonable to assume code before the cursor is complete. |
| 24 | void closeBrackets(std::string &Code, const format::FormatStyle &Style) { |
| 25 | SourceManagerForFile FileSM("mock_file.cpp" , Code); |
| 26 | auto &SM = FileSM.get(); |
| 27 | FileID FID = SM.getMainFileID(); |
| 28 | LangOptions LangOpts = format::getFormattingLangOpts(Style); |
| 29 | Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts); |
| 30 | Token Tok; |
| 31 | std::vector<char> Brackets; |
| 32 | while (!Lex.LexFromRawLexer(Result&: Tok)) { |
| 33 | switch(Tok.getKind()) { |
| 34 | case tok::l_paren: |
| 35 | Brackets.push_back(x: ')'); |
| 36 | break; |
| 37 | case tok::l_brace: |
| 38 | Brackets.push_back(x: '}'); |
| 39 | break; |
| 40 | case tok::l_square: |
| 41 | Brackets.push_back(x: ']'); |
| 42 | break; |
| 43 | case tok::r_paren: |
| 44 | if (!Brackets.empty() && Brackets.back() == ')') |
| 45 | Brackets.pop_back(); |
| 46 | break; |
| 47 | case tok::r_brace: |
| 48 | if (!Brackets.empty() && Brackets.back() == '}') |
| 49 | Brackets.pop_back(); |
| 50 | break; |
| 51 | case tok::r_square: |
| 52 | if (!Brackets.empty() && Brackets.back() == ']') |
| 53 | Brackets.pop_back(); |
| 54 | break; |
| 55 | default: |
| 56 | continue; |
| 57 | } |
| 58 | } |
| 59 | // Attempt to end any open comments first. |
| 60 | Code.append(s: "\n// */\n" ); |
| 61 | Code.append(first: Brackets.rbegin(), last: Brackets.rend()); |
| 62 | } |
| 63 | |
| 64 | static StringRef (llvm::StringRef Line) { |
| 65 | for (StringRef Marker : {"///" , "//" }){ |
| 66 | auto I = Line.rfind(Str: Marker); |
| 67 | if (I != StringRef::npos) |
| 68 | return Line.substr(Start: I, N: Marker.size()); |
| 69 | } |
| 70 | return "" ; |
| 71 | } |
| 72 | |
| 73 | llvm::StringRef firstLine(llvm::StringRef Code) { |
| 74 | return Code.take_until(F: [](char C) { return C == '\n'; }); |
| 75 | } |
| 76 | |
| 77 | llvm::StringRef lastLine(llvm::StringRef Code) { |
| 78 | llvm::StringRef Rest = Code; |
| 79 | while (!Rest.empty() && Rest.back() != '\n') |
| 80 | Rest = Rest.drop_back(); |
| 81 | return Code.substr(Start: Rest.size()); |
| 82 | } |
| 83 | |
| 84 | // Filename is needed for tooling::Replacement and some overloads of reformat(). |
| 85 | // Its value should not affect the outcome. We use the default from reformat(). |
| 86 | llvm::StringRef Filename = "<stdin>" ; |
| 87 | |
| 88 | // tooling::Replacement from overlapping StringRefs: From must be part of Code. |
| 89 | tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From, |
| 90 | llvm::StringRef To) { |
| 91 | assert(From.begin() >= Code.begin() && From.end() <= Code.end()); |
| 92 | // The filename is required but ignored. |
| 93 | return tooling::Replacement(Filename, From.data() - Code.data(), |
| 94 | From.size(), To); |
| 95 | } |
| 96 | |
| 97 | // High-level representation of incremental formatting changes. |
| 98 | // The changes are made in two steps. |
| 99 | // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding |
| 100 | // comment markers when splitting a line comment with a newline). |
| 101 | // 2) a selective clang-format run: |
| 102 | // - the "source code" passed to clang format is the code up to the cursor, |
| 103 | // a placeholder for the cursor, and some closing brackets |
| 104 | // - the formatting is restricted to the cursor and (possibly) other ranges |
| 105 | // (e.g. the old line when inserting a newline). |
| 106 | // - changes before the cursor are applied, those after are discarded. |
| 107 | struct IncrementalChanges { |
| 108 | // Changes that should be applied before running clang-format. |
| 109 | tooling::Replacements Changes; |
| 110 | // Ranges of the original source code that should be clang-formatted. |
| 111 | // The CursorProxyText will also be formatted. |
| 112 | std::vector<tooling::Range> FormatRanges; |
| 113 | // The source code that should stand in for the cursor when clang-formatting. |
| 114 | // e.g. after inserting a newline, a line-comment at the cursor is used to |
| 115 | // ensure that the newline is preserved. |
| 116 | std::string CursorPlaceholder; |
| 117 | }; |
| 118 | |
| 119 | // The two functions below, columnWidth() and columnWidthWithTabs(), were |
| 120 | // adapted from similar functions in clang/lib/Format/Encoding.h. |
| 121 | // FIXME: Move those functions to clang/include/clang/Format.h and reuse them? |
| 122 | |
| 123 | // Helper function for columnWidthWithTabs(). |
| 124 | inline unsigned columnWidth(StringRef Text) { |
| 125 | int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); |
| 126 | if (ContentWidth < 0) |
| 127 | return Text.size(); // fallback for unprintable characters |
| 128 | return ContentWidth; |
| 129 | } |
| 130 | |
| 131 | // Returns the number of columns required to display the \p Text on a terminal |
| 132 | // with the \p TabWidth. |
| 133 | inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) { |
| 134 | unsigned TotalWidth = 0; |
| 135 | StringRef Tail = Text; |
| 136 | for (;;) { |
| 137 | StringRef::size_type TabPos = Tail.find(C: '\t'); |
| 138 | if (TabPos == StringRef::npos) |
| 139 | return TotalWidth + columnWidth(Text: Tail); |
| 140 | TotalWidth += columnWidth(Text: Tail.substr(Start: 0, N: TabPos)); |
| 141 | if (TabWidth) |
| 142 | TotalWidth += TabWidth - TotalWidth % TabWidth; |
| 143 | Tail = Tail.substr(Start: TabPos + 1); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | // After a newline: |
| 148 | // - we continue any line-comment that was split |
| 149 | // - we format the old line in addition to the cursor |
| 150 | // - we represent the cursor with a line comment to preserve the newline |
| 151 | IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code, |
| 152 | unsigned Cursor, |
| 153 | unsigned TabWidth) { |
| 154 | IncrementalChanges Result; |
| 155 | // Before newline, code looked like: |
| 156 | // leading^trailing |
| 157 | // After newline, code looks like: |
| 158 | // leading |
| 159 | // indentation^trailing |
| 160 | // Where indentation was added by the editor. |
| 161 | StringRef Trailing = firstLine(Code: Code.substr(Start: Cursor)); |
| 162 | StringRef Indentation = lastLine(Code: Code.take_front(N: Cursor)); |
| 163 | if (Indentation.data() == Code.data()) { |
| 164 | vlog(Fmt: "Typed a newline, but we're still on the first line!" ); |
| 165 | return Result; |
| 166 | } |
| 167 | StringRef Leading = |
| 168 | lastLine(Code: Code.take_front(N: Indentation.data() - Code.data() - 1)); |
| 169 | StringRef NextLine = firstLine(Code: Code.substr(Start: Cursor + Trailing.size() + 1)); |
| 170 | |
| 171 | // Strip leading whitespace on trailing line. |
| 172 | StringRef TrailingTrim = Trailing.ltrim(); |
| 173 | if (unsigned TrailWS = Trailing.size() - TrailingTrim.size()) |
| 174 | cantFail(Err: Result.Changes.add( |
| 175 | R: replacement(Code, From: StringRef(Trailing.begin(), TrailWS), To: "" ))); |
| 176 | |
| 177 | // If we split a comment, replace indentation with a comment marker. |
| 178 | // If the editor made the new line a comment, also respect that. |
| 179 | StringRef = commentMarker(Line: Leading); |
| 180 | bool = !commentMarker(Line: Indentation).empty(); |
| 181 | if (!CommentMarker.empty() && |
| 182 | (NewLineIsComment || !commentMarker(Line: NextLine).empty() || |
| 183 | (!TrailingTrim.empty() && !TrailingTrim.starts_with(Prefix: "//" )))) { |
| 184 | // We indent the new comment to match the previous one. |
| 185 | StringRef = |
| 186 | Leading.take_front(N: CommentMarker.data() - Leading.data()); |
| 187 | std::string IndentAndComment = |
| 188 | (std::string(columnWidthWithTabs(Text: PreComment, TabWidth), ' ') + |
| 189 | CommentMarker + " " ) |
| 190 | .str(); |
| 191 | cantFail( |
| 192 | Err: Result.Changes.add(R: replacement(Code, From: Indentation, To: IndentAndComment))); |
| 193 | } else { |
| 194 | // Remove any indentation and let clang-format re-add it. |
| 195 | // This prevents the cursor marker dragging e.g. an aligned comment with it. |
| 196 | cantFail(Err: Result.Changes.add(R: replacement(Code, From: Indentation, To: "" ))); |
| 197 | } |
| 198 | |
| 199 | // If we put a the newline inside a {} pair, put } on its own line... |
| 200 | if (CommentMarker.empty() && Leading.ends_with(Suffix: "{" ) && |
| 201 | Trailing.starts_with(Prefix: "}" )) { |
| 202 | cantFail( |
| 203 | Err: Result.Changes.add(R: replacement(Code, From: Trailing.take_front(N: 1), To: "\n}" ))); |
| 204 | // ...and format it. |
| 205 | Result.FormatRanges.push_back( |
| 206 | x: tooling::Range(Trailing.data() - Code.data() + 1, 1)); |
| 207 | } |
| 208 | |
| 209 | // Format the whole leading line. |
| 210 | Result.FormatRanges.push_back( |
| 211 | x: tooling::Range(Leading.data() - Code.data(), Leading.size())); |
| 212 | |
| 213 | // We use a comment to represent the cursor, to preserve the newline. |
| 214 | // A trailing identifier improves parsing of e.g. for without braces. |
| 215 | // Exception: if the previous line has a trailing comment, we can't use one |
| 216 | // as the cursor (they will be aligned). But in this case we don't need to. |
| 217 | Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident" ; |
| 218 | |
| 219 | return Result; |
| 220 | } |
| 221 | |
| 222 | IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor, |
| 223 | llvm::StringRef InsertedText, |
| 224 | unsigned TabWidth) { |
| 225 | IncrementalChanges Result; |
| 226 | if (InsertedText == "\n" ) |
| 227 | return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth); |
| 228 | |
| 229 | Result.CursorPlaceholder = " /**/" ; |
| 230 | return Result; |
| 231 | } |
| 232 | |
| 233 | // Returns equivalent replacements that preserve the correspondence between |
| 234 | // OldCursor and NewCursor. If OldCursor lies in a replaced region, that |
| 235 | // replacement will be split. |
| 236 | std::vector<tooling::Replacement> |
| 237 | split(const tooling::Replacements &Replacements, unsigned OldCursor, |
| 238 | unsigned NewCursor) { |
| 239 | std::vector<tooling::Replacement> Result; |
| 240 | int LengthChange = 0; |
| 241 | for (const tooling::Replacement &R : Replacements) { |
| 242 | if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor |
| 243 | Result.push_back(x: R); |
| 244 | LengthChange += R.getReplacementText().size() - R.getLength(); |
| 245 | } else if (R.getOffset() < OldCursor) { // overlaps cursor |
| 246 | int ReplacementSplit = NewCursor - LengthChange - R.getOffset(); |
| 247 | assert(ReplacementSplit >= 0 && |
| 248 | ReplacementSplit <= int(R.getReplacementText().size()) && |
| 249 | "NewCursor incompatible with OldCursor!" ); |
| 250 | Result.push_back(x: tooling::Replacement( |
| 251 | R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(), |
| 252 | R.getReplacementText().take_front(N: ReplacementSplit))); |
| 253 | Result.push_back(x: tooling::Replacement( |
| 254 | R.getFilePath(), OldCursor, |
| 255 | R.getLength() - (OldCursor - R.getOffset()), |
| 256 | R.getReplacementText().drop_front(N: ReplacementSplit))); |
| 257 | } else if (R.getOffset() >= OldCursor) { // after cursor |
| 258 | Result.push_back(x: R); |
| 259 | } |
| 260 | } |
| 261 | return Result; |
| 262 | } |
| 263 | |
| 264 | } // namespace |
| 265 | |
| 266 | // We're simulating the following sequence of changes: |
| 267 | // - apply the pre-formatting edits (see getIncrementalChanges) |
| 268 | // - insert a placeholder for the cursor |
| 269 | // - format some of the resulting code |
| 270 | // - remove the cursor placeholder again |
| 271 | // The replacements we return are produced by composing these. |
| 272 | // |
| 273 | // The text we actually pass to clang-format is slightly different from this, |
| 274 | // e.g. we have to close brackets. We ensure these differences are *after* |
| 275 | // all the regions we want to format, and discard changes in them. |
| 276 | std::vector<tooling::Replacement> |
| 277 | formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, |
| 278 | llvm::StringRef InsertedText, format::FormatStyle Style) { |
| 279 | IncrementalChanges Incremental = getIncrementalChanges( |
| 280 | Code: OriginalCode, Cursor: OriginalCursor, InsertedText, TabWidth: Style.TabWidth); |
| 281 | // Never *remove* lines in response to pressing enter! This annoys users. |
| 282 | if (InsertedText == "\n" ) { |
| 283 | Style.MaxEmptyLinesToKeep = 1000; |
| 284 | Style.KeepEmptyLines.AtStartOfBlock = true; |
| 285 | } |
| 286 | |
| 287 | // Compute the code we want to format: |
| 288 | // 1) Start with code after the pre-formatting edits. |
| 289 | std::string CodeToFormat = cantFail( |
| 290 | ValOrErr: tooling::applyAllReplacements(Code: OriginalCode, Replaces: Incremental.Changes)); |
| 291 | unsigned Cursor = Incremental.Changes.getShiftedCodePosition(Position: OriginalCursor); |
| 292 | // 2) Truncate code after the last interesting range. |
| 293 | unsigned FormatLimit = Cursor; |
| 294 | for (tooling::Range &R : Incremental.FormatRanges) |
| 295 | FormatLimit = std::max(a: FormatLimit, b: R.getOffset() + R.getLength()); |
| 296 | CodeToFormat.resize(n: FormatLimit); |
| 297 | // 3) Insert a placeholder for the cursor. |
| 298 | CodeToFormat.insert(pos1: Cursor, str: Incremental.CursorPlaceholder); |
| 299 | // 4) Append brackets after FormatLimit so the code is well-formed. |
| 300 | closeBrackets(Code&: CodeToFormat, Style); |
| 301 | |
| 302 | // Determine the ranges to format: |
| 303 | std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges; |
| 304 | // Ranges after the cursor need to be adjusted for the placeholder. |
| 305 | for (auto &R : RangesToFormat) { |
| 306 | if (R.getOffset() > Cursor) |
| 307 | R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(), |
| 308 | R.getLength()); |
| 309 | } |
| 310 | // We also format the cursor. |
| 311 | RangesToFormat.push_back( |
| 312 | x: tooling::Range(Cursor, Incremental.CursorPlaceholder.size())); |
| 313 | // Also update FormatLimit for the placeholder, we'll use this later. |
| 314 | FormatLimit += Incremental.CursorPlaceholder.size(); |
| 315 | |
| 316 | // Run clang-format, and truncate changes at FormatLimit. |
| 317 | tooling::Replacements FormattingChanges; |
| 318 | format::FormattingAttemptStatus Status; |
| 319 | for (const tooling::Replacement &R : format::reformat( |
| 320 | Style, Code: CodeToFormat, Ranges: RangesToFormat, FileName: Filename, Status: &Status)) { |
| 321 | if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit. |
| 322 | cantFail(Err: FormattingChanges.add(R)); |
| 323 | else if(R.getOffset() < FormatLimit) { // Overlaps limit. |
| 324 | if (R.getReplacementText().empty()) // Deletions are easy to handle. |
| 325 | cantFail(Err: FormattingChanges.add(R: tooling::Replacement(Filename, |
| 326 | R.getOffset(), FormatLimit - R.getOffset(), "" ))); |
| 327 | else |
| 328 | // Hopefully won't happen in practice? |
| 329 | elog(Fmt: "Incremental clang-format edit overlapping cursor @ {0}!\n{1}" , |
| 330 | Vals&: Cursor, Vals&: CodeToFormat); |
| 331 | } |
| 332 | } |
| 333 | if (!Status.FormatComplete) |
| 334 | vlog(Fmt: "Incremental format incomplete at line {0}" , Vals&: Status.Line); |
| 335 | |
| 336 | // Now we are ready to compose the changes relative to OriginalCode. |
| 337 | // edits -> insert placeholder -> format -> remove placeholder. |
| 338 | // We must express insert/remove as Replacements. |
| 339 | tooling::Replacements InsertCursorPlaceholder( |
| 340 | tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder)); |
| 341 | unsigned FormattedCursorStart = |
| 342 | FormattingChanges.getShiftedCodePosition(Position: Cursor), |
| 343 | FormattedCursorEnd = FormattingChanges.getShiftedCodePosition( |
| 344 | Position: Cursor + Incremental.CursorPlaceholder.size()); |
| 345 | tooling::Replacements RemoveCursorPlaceholder( |
| 346 | tooling::Replacement(Filename, FormattedCursorStart, |
| 347 | FormattedCursorEnd - FormattedCursorStart, "" )); |
| 348 | |
| 349 | // We can't simply merge() and return: tooling::Replacements will combine |
| 350 | // adjacent edits left and right of the cursor. This gives the right source |
| 351 | // code, but loses information about where the cursor is! |
| 352 | // Fortunately, none of the individual passes lose information, so: |
| 353 | // - we use merge() to compute the final Replacements |
| 354 | // - we chain getShiftedCodePosition() to compute final cursor position |
| 355 | // - we split the final Replacements at the cursor position, so that |
| 356 | // each Replacement lies either before or after the cursor. |
| 357 | tooling::Replacements Final; |
| 358 | unsigned FinalCursor = OriginalCursor; |
| 359 | #ifndef NDEBUG |
| 360 | std::string FinalCode = std::string(OriginalCode); |
| 361 | dlog("Initial code: {0}" , FinalCode); |
| 362 | #endif |
| 363 | for (auto Pass : |
| 364 | std::vector<std::pair<const char *, const tooling::Replacements *>>{ |
| 365 | {"Pre-formatting changes" , &Incremental.Changes}, |
| 366 | {"Insert placeholder" , &InsertCursorPlaceholder}, |
| 367 | {"clang-format" , &FormattingChanges}, |
| 368 | {"Remove placeholder" , &RemoveCursorPlaceholder}}) { |
| 369 | Final = Final.merge(Replaces: *Pass.second); |
| 370 | FinalCursor = Pass.second->getShiftedCodePosition(Position: FinalCursor); |
| 371 | #ifndef NDEBUG |
| 372 | FinalCode = |
| 373 | cantFail(ValOrErr: tooling::applyAllReplacements(Code: FinalCode, Replaces: *Pass.second)); |
| 374 | dlog("After {0}:\n{1}^{2}" , Pass.first, |
| 375 | StringRef(FinalCode).take_front(FinalCursor), |
| 376 | StringRef(FinalCode).drop_front(FinalCursor)); |
| 377 | #endif |
| 378 | } |
| 379 | return split(Replacements: Final, OldCursor: OriginalCursor, NewCursor: FinalCursor); |
| 380 | } |
| 381 | |
| 382 | unsigned |
| 383 | transformCursorPosition(unsigned Offset, |
| 384 | const std::vector<tooling::Replacement> &Replacements) { |
| 385 | unsigned OriginalOffset = Offset; |
| 386 | for (const auto &R : Replacements) { |
| 387 | if (R.getOffset() + R.getLength() <= OriginalOffset) { |
| 388 | // Replacement is before cursor. |
| 389 | Offset += R.getReplacementText().size(); |
| 390 | Offset -= R.getLength(); |
| 391 | } else if (R.getOffset() < OriginalOffset) { |
| 392 | // Replacement overlaps cursor. |
| 393 | // Preserve position within replacement text, as far as possible. |
| 394 | unsigned PositionWithinReplacement = Offset - R.getOffset(); |
| 395 | if (PositionWithinReplacement > R.getReplacementText().size()) { |
| 396 | Offset += R.getReplacementText().size(); |
| 397 | Offset -= PositionWithinReplacement; |
| 398 | } |
| 399 | } else { |
| 400 | // Replacement after cursor. |
| 401 | break; // Replacements are sorted, the rest are also after the cursor. |
| 402 | } |
| 403 | } |
| 404 | return Offset; |
| 405 | } |
| 406 | |
| 407 | } // namespace clangd |
| 408 | } // namespace clang |
| 409 | |