1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
31struct RawStringFormatStyleManager;
32class WhitespaceManager;
33
34struct RawStringFormatStyleManager {
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46class ContinuationIndenter {
47public:
48 /// Constructs a \c ContinuationIndenter to format \p Line starting in
49 /// column \p FirstIndent.
50 ContinuationIndenter(const FormatStyle &Style,
51 const AdditionalKeywords &Keywords,
52 const SourceManager &SourceMgr,
53 WhitespaceManager &Whitespaces,
54 encoding::Encoding Encoding,
55 bool BinPackInconclusiveFunctions);
56
57 /// Get the initial state, i.e. the state after placing \p Line's
58 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
59 /// the case of formatting inside raw string literals, \p FirstStartColumn is
60 /// the column at which the state of the parent formatter is.
61 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62 const AnnotatedLine *Line, bool DryRun);
63
64 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65 // better home.
66 /// Returns \c true, if a line break after \p State is allowed.
67 bool canBreak(const LineState &State);
68
69 /// Returns \c true, if a line break after \p State is mandatory.
70 bool mustBreak(const LineState &State);
71
72 /// Appends the next token to \p State and updates information
73 /// necessary for indentation.
74 ///
75 /// Puts the token on the current line if \p Newline is \c false and adds a
76 /// line break and necessary indentation otherwise.
77 ///
78 /// If \p DryRun is \c false, also creates and stores the required
79 /// \c Replacement.
80 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81 unsigned ExtraSpaces = 0);
82
83 /// Get the column limit for this line. This is the style's column
84 /// limit, potentially reduced for preprocessor definitions.
85 unsigned getColumnLimit(const LineState &State) const;
86
87private:
88 /// Mark the next token as consumed in \p State and modify its stacks
89 /// accordingly.
90 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91
92 /// Update 'State' according to the next token's fake left parentheses.
93 void moveStatePastFakeLParens(LineState &State, bool Newline);
94 /// Update 'State' according to the next token's fake r_parens.
95 void moveStatePastFakeRParens(LineState &State);
96
97 /// Update 'State' according to the next token being one of "(<{[".
98 void moveStatePastScopeOpener(LineState &State, bool Newline);
99 /// Update 'State' according to the next token being one of ")>}]".
100 void moveStatePastScopeCloser(LineState &State);
101 /// Update 'State' with the next token opening a nested block.
102 void moveStateToNewBlock(LineState &State, bool NewLine);
103
104 /// Reformats a raw string literal.
105 ///
106 /// \returns An extra penalty induced by reformatting the token.
107 unsigned reformatRawStringLiteral(const FormatToken &Current,
108 LineState &State,
109 const FormatStyle &RawStringStyle,
110 bool DryRun, bool Newline);
111
112 /// If the current token is at the end of the current line, handle
113 /// the transition to the next line.
114 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115 bool DryRun, bool AllowBreak, bool Newline);
116
117 /// If \p Current is a raw string that is configured to be reformatted,
118 /// return the style to be used.
119 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120 const LineState &State);
121
122 /// If the current token sticks out over the end of the line, break
123 /// it if possible.
124 ///
125 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126 /// when tokens are broken or lines exceed the column limit, and exceeded
127 /// indicates whether the algorithm purposefully left lines exceeding the
128 /// column limit.
129 ///
130 /// The returned penalty will cover the cost of the additional line breaks
131 /// and column limit violation in all lines except for the last one. The
132 /// penalty for the column limit violation in the last line (and in single
133 /// line tokens) is handled in \c addNextStateToQueue.
134 ///
135 /// \p Strict indicates whether reflowing is allowed to leave characters
136 /// protruding the column limit; if true, lines will be split strictly within
137 /// the column limit where possible; if false, words are allowed to protrude
138 /// over the column limit as long as the penalty is less than the penalty
139 /// of a break.
140 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141 LineState &State,
142 bool AllowBreak, bool DryRun,
143 bool Strict);
144
145 /// Returns the \c BreakableToken starting at \p Current, or nullptr
146 /// if the current token cannot be broken.
147 std::unique_ptr<BreakableToken>
148 createBreakableToken(const FormatToken &Current, LineState &State,
149 bool AllowBreak);
150
151 /// Appends the next token to \p State and updates information
152 /// necessary for indentation.
153 ///
154 /// Puts the token on the current line.
155 ///
156 /// If \p DryRun is \c false, also creates and stores the required
157 /// \c Replacement.
158 void addTokenOnCurrentLine(LineState &State, bool DryRun,
159 unsigned ExtraSpaces);
160
161 /// Appends the next token to \p State and updates information
162 /// necessary for indentation.
163 ///
164 /// Adds a line break and necessary indentation.
165 ///
166 /// If \p DryRun is \c false, also creates and stores the required
167 /// \c Replacement.
168 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169
170 /// Calculate the new column for a line wrap before the next token.
171 unsigned getNewLineColumn(const LineState &State);
172
173 /// Adds a multiline token to the \p State.
174 ///
175 /// \returns Extra penalty for the first line of the literal: last line is
176 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177 /// matter, as we don't change them.
178 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179
180 /// Returns \c true if the next token starts a multiline string
181 /// literal.
182 ///
183 /// This includes implicitly concatenated strings, strings that will be broken
184 /// by clang-format and string literals with escaped newlines.
185 bool nextIsMultilineString(const LineState &State);
186
187 FormatStyle Style;
188 const AdditionalKeywords &Keywords;
189 const SourceManager &SourceMgr;
190 WhitespaceManager &Whitespaces;
191 encoding::Encoding Encoding;
192 bool BinPackInconclusiveFunctions;
193 llvm::Regex CommentPragmasRegex;
194 const RawStringFormatStyleManager RawStringFormats;
195};
196
197struct ParenState {
198 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199 bool AvoidBinPacking, bool NoLineBreak)
200 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
201 NestedBlockIndent(Indent), IsAligned(false),
202 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
203 BreakBeforeClosingAngle(false), AvoidBinPacking(AvoidBinPacking),
204 BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
205 NoLineBreakInOperand(false), LastOperatorWrapped(true),
206 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
207 AlignColons(true), ObjCSelectorNameFound(false),
208 HasMultipleNestedBlocks(false), NestedBlockInlined(false),
209 IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false),
210 IsChainedConditional(false), IsWrappedConditional(false),
211 UnindentOperator(false) {}
212
213 /// \brief The token opening this parenthesis level, or nullptr if this level
214 /// is opened by fake parenthesis.
215 ///
216 /// Not considered for memoization as it will always have the same value at
217 /// the same token.
218 const FormatToken *Tok;
219
220 /// The position to which a specific parenthesis level needs to be
221 /// indented.
222 unsigned Indent;
223
224 /// The position of the last space on each level.
225 ///
226 /// Used e.g. to break like:
227 /// functionCall(Parameter, otherCall(
228 /// OtherParameter));
229 unsigned LastSpace;
230
231 /// If a block relative to this parenthesis level gets wrapped, indent
232 /// it this much.
233 unsigned NestedBlockIndent;
234
235 /// The position the first "<<" operator encountered on each level.
236 ///
237 /// Used to align "<<" operators. 0 if no such operator has been encountered
238 /// on a level.
239 unsigned FirstLessLess = 0;
240
241 /// The column of a \c ? in a conditional expression;
242 unsigned QuestionColumn = 0;
243
244 /// The position of the colon in an ObjC method declaration/call.
245 unsigned ColonPos = 0;
246
247 /// The start of the most recent function in a builder-type call.
248 unsigned StartOfFunctionCall = 0;
249
250 /// Contains the start of array subscript expressions, so that they
251 /// can be aligned.
252 unsigned StartOfArraySubscripts = 0;
253
254 /// If a nested name specifier was broken over multiple lines, this
255 /// contains the start column of the second line. Otherwise 0.
256 unsigned NestedNameSpecifierContinuation = 0;
257
258 /// If a call expression was broken over multiple lines, this
259 /// contains the start column of the second line. Otherwise 0.
260 unsigned CallContinuation = 0;
261
262 /// The column of the first variable name in a variable declaration.
263 ///
264 /// Used to align further variables if necessary.
265 unsigned VariablePos = 0;
266
267 /// Whether this block's indentation is used for alignment.
268 bool IsAligned : 1;
269
270 /// Whether a newline needs to be inserted before the block's closing
271 /// brace.
272 ///
273 /// We only want to insert a newline before the closing brace if there also
274 /// was a newline after the beginning left brace.
275 bool BreakBeforeClosingBrace : 1;
276
277 /// Whether a newline needs to be inserted before the block's closing
278 /// paren.
279 ///
280 /// We only want to insert a newline before the closing paren if there also
281 /// was a newline after the beginning left paren.
282 bool BreakBeforeClosingParen : 1;
283
284 /// Whether a newline needs to be inserted before a closing angle `>`.
285 bool BreakBeforeClosingAngle : 1;
286
287 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
288 /// lines, in this context.
289 bool AvoidBinPacking : 1;
290
291 /// Break after the next comma (or all the commas in this context if
292 /// \c AvoidBinPacking is \c true).
293 bool BreakBeforeParameter : 1;
294
295 /// Line breaking in this context would break a formatting rule.
296 bool NoLineBreak : 1;
297
298 /// Same as \c NoLineBreak, but is restricted until the end of the
299 /// operand (including the next ",").
300 bool NoLineBreakInOperand : 1;
301
302 /// True if the last binary operator on this level was wrapped to the
303 /// next line.
304 bool LastOperatorWrapped : 1;
305
306 /// \c true if this \c ParenState already contains a line-break.
307 ///
308 /// The first line break in a certain \c ParenState causes extra penalty so
309 /// that clang-format prefers similar breaks, i.e. breaks in the same
310 /// parenthesis.
311 bool ContainsLineBreak : 1;
312
313 /// \c true if this \c ParenState contains multiple segments of a
314 /// builder-type call on one line.
315 bool ContainsUnwrappedBuilder : 1;
316
317 /// \c true if the colons of the curren ObjC method expression should
318 /// be aligned.
319 ///
320 /// Not considered for memoization as it will always have the same value at
321 /// the same token.
322 bool AlignColons : 1;
323
324 /// \c true if at least one selector name was found in the current
325 /// ObjC method expression.
326 ///
327 /// Not considered for memoization as it will always have the same value at
328 /// the same token.
329 bool ObjCSelectorNameFound : 1;
330
331 /// \c true if there are multiple nested blocks inside these parens.
332 ///
333 /// Not considered for memoization as it will always have the same value at
334 /// the same token.
335 bool HasMultipleNestedBlocks : 1;
336
337 /// The start of a nested block (e.g. lambda introducer in C++ or
338 /// "function" in JavaScript) is not wrapped to a new line.
339 bool NestedBlockInlined : 1;
340
341 /// \c true if the current \c ParenState represents an Objective-C
342 /// array literal.
343 bool IsInsideObjCArrayLiteral : 1;
344
345 bool IsCSharpGenericTypeConstraint : 1;
346
347 /// \brief true if the current \c ParenState represents the false branch of
348 /// a chained conditional expression (e.g. else-if)
349 bool IsChainedConditional : 1;
350
351 /// \brief true if there conditionnal was wrapped on the first operator (the
352 /// question mark)
353 bool IsWrappedConditional : 1;
354
355 /// \brief Indicates the indent should be reduced by the length of the
356 /// operator.
357 bool UnindentOperator : 1;
358
359 bool operator<(const ParenState &Other) const {
360 if (Indent != Other.Indent)
361 return Indent < Other.Indent;
362 if (LastSpace != Other.LastSpace)
363 return LastSpace < Other.LastSpace;
364 if (NestedBlockIndent != Other.NestedBlockIndent)
365 return NestedBlockIndent < Other.NestedBlockIndent;
366 if (FirstLessLess != Other.FirstLessLess)
367 return FirstLessLess < Other.FirstLessLess;
368 if (IsAligned != Other.IsAligned)
369 return IsAligned;
370 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
371 return BreakBeforeClosingBrace;
372 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
373 return BreakBeforeClosingParen;
374 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
375 return BreakBeforeClosingAngle;
376 if (QuestionColumn != Other.QuestionColumn)
377 return QuestionColumn < Other.QuestionColumn;
378 if (AvoidBinPacking != Other.AvoidBinPacking)
379 return AvoidBinPacking;
380 if (BreakBeforeParameter != Other.BreakBeforeParameter)
381 return BreakBeforeParameter;
382 if (NoLineBreak != Other.NoLineBreak)
383 return NoLineBreak;
384 if (LastOperatorWrapped != Other.LastOperatorWrapped)
385 return LastOperatorWrapped;
386 if (ColonPos != Other.ColonPos)
387 return ColonPos < Other.ColonPos;
388 if (StartOfFunctionCall != Other.StartOfFunctionCall)
389 return StartOfFunctionCall < Other.StartOfFunctionCall;
390 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
391 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
392 if (CallContinuation != Other.CallContinuation)
393 return CallContinuation < Other.CallContinuation;
394 if (VariablePos != Other.VariablePos)
395 return VariablePos < Other.VariablePos;
396 if (ContainsLineBreak != Other.ContainsLineBreak)
397 return ContainsLineBreak;
398 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
399 return ContainsUnwrappedBuilder;
400 if (NestedBlockInlined != Other.NestedBlockInlined)
401 return NestedBlockInlined;
402 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
403 return IsCSharpGenericTypeConstraint;
404 if (IsChainedConditional != Other.IsChainedConditional)
405 return IsChainedConditional;
406 if (IsWrappedConditional != Other.IsWrappedConditional)
407 return IsWrappedConditional;
408 if (UnindentOperator != Other.UnindentOperator)
409 return UnindentOperator;
410 return false;
411 }
412};
413
414/// The current state when indenting a unwrapped line.
415///
416/// As the indenting tries different combinations this is copied by value.
417struct LineState {
418 /// The number of used columns in the current line.
419 unsigned Column;
420
421 /// The token that needs to be next formatted.
422 FormatToken *NextToken;
423
424 /// \c true if \p NextToken should not continue this line.
425 bool NoContinuation;
426
427 /// The \c NestingLevel at the start of this line.
428 unsigned StartOfLineLevel;
429
430 /// The lowest \c NestingLevel on the current line.
431 unsigned LowestLevelOnLine;
432
433 /// The start column of the string literal, if we're in a string
434 /// literal sequence, 0 otherwise.
435 unsigned StartOfStringLiteral;
436
437 /// Disallow line breaks for this line.
438 bool NoLineBreak;
439
440 /// A stack keeping track of properties applying to parenthesis
441 /// levels.
442 SmallVector<ParenState> Stack;
443
444 /// Ignore the stack of \c ParenStates for state comparison.
445 ///
446 /// In long and deeply nested unwrapped lines, the current algorithm can
447 /// be insufficient for finding the best formatting with a reasonable amount
448 /// of time and memory. Setting this flag will effectively lead to the
449 /// algorithm not analyzing some combinations. However, these combinations
450 /// rarely contain the optimal solution: In short, accepting a higher
451 /// penalty early would need to lead to different values in the \c
452 /// ParenState stack (in an otherwise identical state) and these different
453 /// values would need to lead to a significant amount of avoided penalty
454 /// later.
455 ///
456 /// FIXME: Come up with a better algorithm instead.
457 bool IgnoreStackForComparison;
458
459 /// The indent of the first token.
460 unsigned FirstIndent;
461
462 /// The line that is being formatted.
463 ///
464 /// Does not need to be considered for memoization because it doesn't change.
465 const AnnotatedLine *Line;
466
467 /// Comparison operator to be able to used \c LineState in \c map.
468 bool operator<(const LineState &Other) const {
469 if (NextToken != Other.NextToken)
470 return NextToken < Other.NextToken;
471 if (Column != Other.Column)
472 return Column < Other.Column;
473 if (NoContinuation != Other.NoContinuation)
474 return NoContinuation;
475 if (StartOfLineLevel != Other.StartOfLineLevel)
476 return StartOfLineLevel < Other.StartOfLineLevel;
477 if (LowestLevelOnLine != Other.LowestLevelOnLine)
478 return LowestLevelOnLine < Other.LowestLevelOnLine;
479 if (StartOfStringLiteral != Other.StartOfStringLiteral)
480 return StartOfStringLiteral < Other.StartOfStringLiteral;
481 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
482 return false;
483 return Stack < Other.Stack;
484 }
485};
486
487} // end namespace format
488} // end namespace clang
489
490#endif
491

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of clang/lib/Format/ContinuationIndenter.h