1 | //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements an indenter that manages the indentation of |
11 | /// continuations. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H |
16 | #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H |
17 | |
18 | #include "Encoding.h" |
19 | #include "FormatToken.h" |
20 | |
21 | namespace clang { |
22 | class SourceManager; |
23 | |
24 | namespace format { |
25 | |
26 | class AnnotatedLine; |
27 | class BreakableToken; |
28 | struct FormatToken; |
29 | struct LineState; |
30 | struct ParenState; |
31 | struct RawStringFormatStyleManager; |
32 | class WhitespaceManager; |
33 | |
34 | struct RawStringFormatStyleManager { |
35 | llvm::StringMap<FormatStyle> DelimiterStyle; |
36 | llvm::StringMap<FormatStyle> EnclosingFunctionStyle; |
37 | |
38 | RawStringFormatStyleManager(const FormatStyle &CodeStyle); |
39 | |
40 | std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; |
41 | |
42 | std::optional<FormatStyle> |
43 | getEnclosingFunctionStyle(StringRef EnclosingFunction) const; |
44 | }; |
45 | |
46 | class ContinuationIndenter { |
47 | public: |
48 | /// Constructs a \c ContinuationIndenter to format \p Line starting in |
49 | /// column \p FirstIndent. |
50 | ContinuationIndenter(const FormatStyle &Style, |
51 | const AdditionalKeywords &Keywords, |
52 | const SourceManager &SourceMgr, |
53 | WhitespaceManager &Whitespaces, |
54 | encoding::Encoding Encoding, |
55 | bool BinPackInconclusiveFunctions); |
56 | |
57 | /// Get the initial state, i.e. the state after placing \p Line's |
58 | /// first token at \p FirstIndent. When reformatting a fragment of code, as in |
59 | /// the case of formatting inside raw string literals, \p FirstStartColumn is |
60 | /// the column at which the state of the parent formatter is. |
61 | LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, |
62 | const AnnotatedLine *Line, bool DryRun); |
63 | |
64 | // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a |
65 | // better home. |
66 | /// Returns \c true, if a line break after \p State is allowed. |
67 | bool canBreak(const LineState &State); |
68 | |
69 | /// Returns \c true, if a line break after \p State is mandatory. |
70 | bool mustBreak(const LineState &State); |
71 | |
72 | /// Appends the next token to \p State and updates information |
73 | /// necessary for indentation. |
74 | /// |
75 | /// Puts the token on the current line if \p Newline is \c false and adds a |
76 | /// line break and necessary indentation otherwise. |
77 | /// |
78 | /// If \p DryRun is \c false, also creates and stores the required |
79 | /// \c Replacement. |
80 | unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, |
81 | unsigned = 0); |
82 | |
83 | /// Get the column limit for this line. This is the style's column |
84 | /// limit, potentially reduced for preprocessor definitions. |
85 | unsigned getColumnLimit(const LineState &State) const; |
86 | |
87 | private: |
88 | /// Mark the next token as consumed in \p State and modify its stacks |
89 | /// accordingly. |
90 | unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); |
91 | |
92 | /// Update 'State' according to the next token's fake left parentheses. |
93 | void moveStatePastFakeLParens(LineState &State, bool Newline); |
94 | /// Update 'State' according to the next token's fake r_parens. |
95 | void moveStatePastFakeRParens(LineState &State); |
96 | |
97 | /// Update 'State' according to the next token being one of "(<{[". |
98 | void moveStatePastScopeOpener(LineState &State, bool Newline); |
99 | /// Update 'State' according to the next token being one of ")>}]". |
100 | void moveStatePastScopeCloser(LineState &State); |
101 | /// Update 'State' with the next token opening a nested block. |
102 | void moveStateToNewBlock(LineState &State, bool NewLine); |
103 | |
104 | /// Reformats a raw string literal. |
105 | /// |
106 | /// \returns An extra penalty induced by reformatting the token. |
107 | unsigned reformatRawStringLiteral(const FormatToken &Current, |
108 | LineState &State, |
109 | const FormatStyle &RawStringStyle, |
110 | bool DryRun, bool Newline); |
111 | |
112 | /// If the current token is at the end of the current line, handle |
113 | /// the transition to the next line. |
114 | unsigned handleEndOfLine(const FormatToken &Current, LineState &State, |
115 | bool DryRun, bool AllowBreak, bool Newline); |
116 | |
117 | /// If \p Current is a raw string that is configured to be reformatted, |
118 | /// return the style to be used. |
119 | std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current, |
120 | const LineState &State); |
121 | |
122 | /// If the current token sticks out over the end of the line, break |
123 | /// it if possible. |
124 | /// |
125 | /// \returns A pair (penalty, exceeded), where penalty is the extra penalty |
126 | /// when tokens are broken or lines exceed the column limit, and exceeded |
127 | /// indicates whether the algorithm purposefully left lines exceeding the |
128 | /// column limit. |
129 | /// |
130 | /// The returned penalty will cover the cost of the additional line breaks |
131 | /// and column limit violation in all lines except for the last one. The |
132 | /// penalty for the column limit violation in the last line (and in single |
133 | /// line tokens) is handled in \c addNextStateToQueue. |
134 | /// |
135 | /// \p Strict indicates whether reflowing is allowed to leave characters |
136 | /// protruding the column limit; if true, lines will be split strictly within |
137 | /// the column limit where possible; if false, words are allowed to protrude |
138 | /// over the column limit as long as the penalty is less than the penalty |
139 | /// of a break. |
140 | std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, |
141 | LineState &State, |
142 | bool AllowBreak, bool DryRun, |
143 | bool Strict); |
144 | |
145 | /// Returns the \c BreakableToken starting at \p Current, or nullptr |
146 | /// if the current token cannot be broken. |
147 | std::unique_ptr<BreakableToken> |
148 | createBreakableToken(const FormatToken &Current, LineState &State, |
149 | bool AllowBreak); |
150 | |
151 | /// Appends the next token to \p State and updates information |
152 | /// necessary for indentation. |
153 | /// |
154 | /// Puts the token on the current line. |
155 | /// |
156 | /// If \p DryRun is \c false, also creates and stores the required |
157 | /// \c Replacement. |
158 | void addTokenOnCurrentLine(LineState &State, bool DryRun, |
159 | unsigned ); |
160 | |
161 | /// Appends the next token to \p State and updates information |
162 | /// necessary for indentation. |
163 | /// |
164 | /// Adds a line break and necessary indentation. |
165 | /// |
166 | /// If \p DryRun is \c false, also creates and stores the required |
167 | /// \c Replacement. |
168 | unsigned addTokenOnNewLine(LineState &State, bool DryRun); |
169 | |
170 | /// Calculate the new column for a line wrap before the next token. |
171 | unsigned getNewLineColumn(const LineState &State); |
172 | |
173 | /// Adds a multiline token to the \p State. |
174 | /// |
175 | /// \returns Extra penalty for the first line of the literal: last line is |
176 | /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't |
177 | /// matter, as we don't change them. |
178 | unsigned addMultilineToken(const FormatToken &Current, LineState &State); |
179 | |
180 | /// Returns \c true if the next token starts a multiline string |
181 | /// literal. |
182 | /// |
183 | /// This includes implicitly concatenated strings, strings that will be broken |
184 | /// by clang-format and string literals with escaped newlines. |
185 | bool nextIsMultilineString(const LineState &State); |
186 | |
187 | FormatStyle Style; |
188 | const AdditionalKeywords &Keywords; |
189 | const SourceManager &SourceMgr; |
190 | WhitespaceManager &Whitespaces; |
191 | encoding::Encoding Encoding; |
192 | bool BinPackInconclusiveFunctions; |
193 | llvm::Regex ; |
194 | const RawStringFormatStyleManager RawStringFormats; |
195 | }; |
196 | |
197 | struct ParenState { |
198 | ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, |
199 | bool AvoidBinPacking, bool NoLineBreak) |
200 | : Tok(Tok), Indent(Indent), LastSpace(LastSpace), |
201 | NestedBlockIndent(Indent), IsAligned(false), |
202 | BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false), |
203 | AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), |
204 | NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), |
205 | LastOperatorWrapped(true), ContainsLineBreak(false), |
206 | ContainsUnwrappedBuilder(false), AlignColons(true), |
207 | ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), |
208 | NestedBlockInlined(false), IsInsideObjCArrayLiteral(false), |
209 | IsCSharpGenericTypeConstraint(false), IsChainedConditional(false), |
210 | IsWrappedConditional(false), UnindentOperator(false) {} |
211 | |
212 | /// \brief The token opening this parenthesis level, or nullptr if this level |
213 | /// is opened by fake parenthesis. |
214 | /// |
215 | /// Not considered for memoization as it will always have the same value at |
216 | /// the same token. |
217 | const FormatToken *Tok; |
218 | |
219 | /// The position to which a specific parenthesis level needs to be |
220 | /// indented. |
221 | unsigned Indent; |
222 | |
223 | /// The position of the last space on each level. |
224 | /// |
225 | /// Used e.g. to break like: |
226 | /// functionCall(Parameter, otherCall( |
227 | /// OtherParameter)); |
228 | unsigned LastSpace; |
229 | |
230 | /// If a block relative to this parenthesis level gets wrapped, indent |
231 | /// it this much. |
232 | unsigned NestedBlockIndent; |
233 | |
234 | /// The position the first "<<" operator encountered on each level. |
235 | /// |
236 | /// Used to align "<<" operators. 0 if no such operator has been encountered |
237 | /// on a level. |
238 | unsigned FirstLessLess = 0; |
239 | |
240 | /// The column of a \c ? in a conditional expression; |
241 | unsigned QuestionColumn = 0; |
242 | |
243 | /// The position of the colon in an ObjC method declaration/call. |
244 | unsigned ColonPos = 0; |
245 | |
246 | /// The start of the most recent function in a builder-type call. |
247 | unsigned StartOfFunctionCall = 0; |
248 | |
249 | /// Contains the start of array subscript expressions, so that they |
250 | /// can be aligned. |
251 | unsigned StartOfArraySubscripts = 0; |
252 | |
253 | /// If a nested name specifier was broken over multiple lines, this |
254 | /// contains the start column of the second line. Otherwise 0. |
255 | unsigned NestedNameSpecifierContinuation = 0; |
256 | |
257 | /// If a call expression was broken over multiple lines, this |
258 | /// contains the start column of the second line. Otherwise 0. |
259 | unsigned CallContinuation = 0; |
260 | |
261 | /// The column of the first variable name in a variable declaration. |
262 | /// |
263 | /// Used to align further variables if necessary. |
264 | unsigned VariablePos = 0; |
265 | |
266 | /// Whether this block's indentation is used for alignment. |
267 | bool IsAligned : 1; |
268 | |
269 | /// Whether a newline needs to be inserted before the block's closing |
270 | /// brace. |
271 | /// |
272 | /// We only want to insert a newline before the closing brace if there also |
273 | /// was a newline after the beginning left brace. |
274 | bool BreakBeforeClosingBrace : 1; |
275 | |
276 | /// Whether a newline needs to be inserted before the block's closing |
277 | /// paren. |
278 | /// |
279 | /// We only want to insert a newline before the closing paren if there also |
280 | /// was a newline after the beginning left paren. |
281 | bool BreakBeforeClosingParen : 1; |
282 | |
283 | /// Avoid bin packing, i.e. multiple parameters/elements on multiple |
284 | /// lines, in this context. |
285 | bool AvoidBinPacking : 1; |
286 | |
287 | /// Break after the next comma (or all the commas in this context if |
288 | /// \c AvoidBinPacking is \c true). |
289 | bool BreakBeforeParameter : 1; |
290 | |
291 | /// Line breaking in this context would break a formatting rule. |
292 | bool NoLineBreak : 1; |
293 | |
294 | /// Same as \c NoLineBreak, but is restricted until the end of the |
295 | /// operand (including the next ","). |
296 | bool NoLineBreakInOperand : 1; |
297 | |
298 | /// True if the last binary operator on this level was wrapped to the |
299 | /// next line. |
300 | bool LastOperatorWrapped : 1; |
301 | |
302 | /// \c true if this \c ParenState already contains a line-break. |
303 | /// |
304 | /// The first line break in a certain \c ParenState causes extra penalty so |
305 | /// that clang-format prefers similar breaks, i.e. breaks in the same |
306 | /// parenthesis. |
307 | bool ContainsLineBreak : 1; |
308 | |
309 | /// \c true if this \c ParenState contains multiple segments of a |
310 | /// builder-type call on one line. |
311 | bool ContainsUnwrappedBuilder : 1; |
312 | |
313 | /// \c true if the colons of the curren ObjC method expression should |
314 | /// be aligned. |
315 | /// |
316 | /// Not considered for memoization as it will always have the same value at |
317 | /// the same token. |
318 | bool AlignColons : 1; |
319 | |
320 | /// \c true if at least one selector name was found in the current |
321 | /// ObjC method expression. |
322 | /// |
323 | /// Not considered for memoization as it will always have the same value at |
324 | /// the same token. |
325 | bool ObjCSelectorNameFound : 1; |
326 | |
327 | /// \c true if there are multiple nested blocks inside these parens. |
328 | /// |
329 | /// Not considered for memoization as it will always have the same value at |
330 | /// the same token. |
331 | bool HasMultipleNestedBlocks : 1; |
332 | |
333 | /// The start of a nested block (e.g. lambda introducer in C++ or |
334 | /// "function" in JavaScript) is not wrapped to a new line. |
335 | bool NestedBlockInlined : 1; |
336 | |
337 | /// \c true if the current \c ParenState represents an Objective-C |
338 | /// array literal. |
339 | bool IsInsideObjCArrayLiteral : 1; |
340 | |
341 | bool IsCSharpGenericTypeConstraint : 1; |
342 | |
343 | /// \brief true if the current \c ParenState represents the false branch of |
344 | /// a chained conditional expression (e.g. else-if) |
345 | bool IsChainedConditional : 1; |
346 | |
347 | /// \brief true if there conditionnal was wrapped on the first operator (the |
348 | /// question mark) |
349 | bool IsWrappedConditional : 1; |
350 | |
351 | /// \brief Indicates the indent should be reduced by the length of the |
352 | /// operator. |
353 | bool UnindentOperator : 1; |
354 | |
355 | bool operator<(const ParenState &Other) const { |
356 | if (Indent != Other.Indent) |
357 | return Indent < Other.Indent; |
358 | if (LastSpace != Other.LastSpace) |
359 | return LastSpace < Other.LastSpace; |
360 | if (NestedBlockIndent != Other.NestedBlockIndent) |
361 | return NestedBlockIndent < Other.NestedBlockIndent; |
362 | if (FirstLessLess != Other.FirstLessLess) |
363 | return FirstLessLess < Other.FirstLessLess; |
364 | if (IsAligned != Other.IsAligned) |
365 | return IsAligned; |
366 | if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) |
367 | return BreakBeforeClosingBrace; |
368 | if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen) |
369 | return BreakBeforeClosingParen; |
370 | if (QuestionColumn != Other.QuestionColumn) |
371 | return QuestionColumn < Other.QuestionColumn; |
372 | if (AvoidBinPacking != Other.AvoidBinPacking) |
373 | return AvoidBinPacking; |
374 | if (BreakBeforeParameter != Other.BreakBeforeParameter) |
375 | return BreakBeforeParameter; |
376 | if (NoLineBreak != Other.NoLineBreak) |
377 | return NoLineBreak; |
378 | if (LastOperatorWrapped != Other.LastOperatorWrapped) |
379 | return LastOperatorWrapped; |
380 | if (ColonPos != Other.ColonPos) |
381 | return ColonPos < Other.ColonPos; |
382 | if (StartOfFunctionCall != Other.StartOfFunctionCall) |
383 | return StartOfFunctionCall < Other.StartOfFunctionCall; |
384 | if (StartOfArraySubscripts != Other.StartOfArraySubscripts) |
385 | return StartOfArraySubscripts < Other.StartOfArraySubscripts; |
386 | if (CallContinuation != Other.CallContinuation) |
387 | return CallContinuation < Other.CallContinuation; |
388 | if (VariablePos != Other.VariablePos) |
389 | return VariablePos < Other.VariablePos; |
390 | if (ContainsLineBreak != Other.ContainsLineBreak) |
391 | return ContainsLineBreak; |
392 | if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) |
393 | return ContainsUnwrappedBuilder; |
394 | if (NestedBlockInlined != Other.NestedBlockInlined) |
395 | return NestedBlockInlined; |
396 | if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) |
397 | return IsCSharpGenericTypeConstraint; |
398 | if (IsChainedConditional != Other.IsChainedConditional) |
399 | return IsChainedConditional; |
400 | if (IsWrappedConditional != Other.IsWrappedConditional) |
401 | return IsWrappedConditional; |
402 | if (UnindentOperator != Other.UnindentOperator) |
403 | return UnindentOperator; |
404 | return false; |
405 | } |
406 | }; |
407 | |
408 | /// The current state when indenting a unwrapped line. |
409 | /// |
410 | /// As the indenting tries different combinations this is copied by value. |
411 | struct LineState { |
412 | /// The number of used columns in the current line. |
413 | unsigned Column; |
414 | |
415 | /// The token that needs to be next formatted. |
416 | FormatToken *NextToken; |
417 | |
418 | /// \c true if \p NextToken should not continue this line. |
419 | bool NoContinuation; |
420 | |
421 | /// The \c NestingLevel at the start of this line. |
422 | unsigned StartOfLineLevel; |
423 | |
424 | /// The lowest \c NestingLevel on the current line. |
425 | unsigned LowestLevelOnLine; |
426 | |
427 | /// The start column of the string literal, if we're in a string |
428 | /// literal sequence, 0 otherwise. |
429 | unsigned StartOfStringLiteral; |
430 | |
431 | /// Disallow line breaks for this line. |
432 | bool NoLineBreak; |
433 | |
434 | /// A stack keeping track of properties applying to parenthesis |
435 | /// levels. |
436 | SmallVector<ParenState> Stack; |
437 | |
438 | /// Ignore the stack of \c ParenStates for state comparison. |
439 | /// |
440 | /// In long and deeply nested unwrapped lines, the current algorithm can |
441 | /// be insufficient for finding the best formatting with a reasonable amount |
442 | /// of time and memory. Setting this flag will effectively lead to the |
443 | /// algorithm not analyzing some combinations. However, these combinations |
444 | /// rarely contain the optimal solution: In short, accepting a higher |
445 | /// penalty early would need to lead to different values in the \c |
446 | /// ParenState stack (in an otherwise identical state) and these different |
447 | /// values would need to lead to a significant amount of avoided penalty |
448 | /// later. |
449 | /// |
450 | /// FIXME: Come up with a better algorithm instead. |
451 | bool IgnoreStackForComparison; |
452 | |
453 | /// The indent of the first token. |
454 | unsigned FirstIndent; |
455 | |
456 | /// The line that is being formatted. |
457 | /// |
458 | /// Does not need to be considered for memoization because it doesn't change. |
459 | const AnnotatedLine *Line; |
460 | |
461 | /// Comparison operator to be able to used \c LineState in \c map. |
462 | bool operator<(const LineState &Other) const { |
463 | if (NextToken != Other.NextToken) |
464 | return NextToken < Other.NextToken; |
465 | if (Column != Other.Column) |
466 | return Column < Other.Column; |
467 | if (NoContinuation != Other.NoContinuation) |
468 | return NoContinuation; |
469 | if (StartOfLineLevel != Other.StartOfLineLevel) |
470 | return StartOfLineLevel < Other.StartOfLineLevel; |
471 | if (LowestLevelOnLine != Other.LowestLevelOnLine) |
472 | return LowestLevelOnLine < Other.LowestLevelOnLine; |
473 | if (StartOfStringLiteral != Other.StartOfStringLiteral) |
474 | return StartOfStringLiteral < Other.StartOfStringLiteral; |
475 | if (IgnoreStackForComparison || Other.IgnoreStackForComparison) |
476 | return false; |
477 | return Stack < Other.Stack; |
478 | } |
479 | }; |
480 | |
481 | } // end namespace format |
482 | } // end namespace clang |
483 | |
484 | #endif |
485 | |