1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
21#include "clang/Basic/TokenKinds.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52 << "\"] ";
53 for (SmallVectorImpl<UnwrappedLine>::const_iterator
54 CI = I->Children.begin(),
55 CE = I->Children.end();
56 CI != CE; ++CI) {
57 OS << "\n";
58 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
59 NewLine = true;
60 }
61 }
62 if (!NewLine)
63 OS << "\n";
64}
65
66LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67 printLine(OS&: llvm::dbgs(), Line);
68}
69
70class ScopedDeclarationState {
71public:
72 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73 bool MustBeDeclaration)
74 : Line(Line), Stack(Stack) {
75 Line.MustBeDeclaration = MustBeDeclaration;
76 Stack.push_back(Val: MustBeDeclaration);
77 }
78 ~ScopedDeclarationState() {
79 Stack.pop_back();
80 if (!Stack.empty())
81 Line.MustBeDeclaration = Stack.back();
82 else
83 Line.MustBeDeclaration = true;
84 }
85
86private:
87 UnwrappedLine &Line;
88 llvm::BitVector &Stack;
89};
90
91} // end anonymous namespace
92
93std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
94 llvm::raw_os_ostream OS(Stream);
95 printLine(OS, Line);
96 return Stream;
97}
98
99class ScopedLineState {
100public:
101 ScopedLineState(UnwrappedLineParser &Parser,
102 bool SwitchToPreprocessorLines = false)
103 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
104 if (SwitchToPreprocessorLines)
105 Parser.CurrentLines = &Parser.PreprocessorDirectives;
106 else if (!Parser.Line->Tokens.empty())
107 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
108 PreBlockLine = std::move(Parser.Line);
109 Parser.Line = std::make_unique<UnwrappedLine>();
110 Parser.Line->Level = PreBlockLine->Level;
111 Parser.Line->PPLevel = PreBlockLine->PPLevel;
112 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
113 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
114 }
115
116 ~ScopedLineState() {
117 if (!Parser.Line->Tokens.empty())
118 Parser.addUnwrappedLine();
119 assert(Parser.Line->Tokens.empty());
120 Parser.Line = std::move(PreBlockLine);
121 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
122 Parser.MustBreakBeforeNextToken = true;
123 Parser.CurrentLines = OriginalLines;
124 }
125
126private:
127 UnwrappedLineParser &Parser;
128
129 std::unique_ptr<UnwrappedLine> PreBlockLine;
130 SmallVectorImpl<UnwrappedLine> *OriginalLines;
131};
132
133class CompoundStatementIndenter {
134public:
135 CompoundStatementIndenter(UnwrappedLineParser *Parser,
136 const FormatStyle &Style, unsigned &LineLevel)
137 : CompoundStatementIndenter(Parser, LineLevel,
138 Style.BraceWrapping.AfterControlStatement,
139 Style.BraceWrapping.IndentBraces) {}
140 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
141 bool WrapBrace, bool IndentBrace)
142 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
143 if (WrapBrace)
144 Parser->addUnwrappedLine();
145 if (IndentBrace)
146 ++LineLevel;
147 }
148 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
149
150private:
151 unsigned &LineLevel;
152 unsigned OldLineLevel;
153};
154
155UnwrappedLineParser::UnwrappedLineParser(
156 SourceManager &SourceMgr, const FormatStyle &Style,
157 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
158 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
159 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
160 IdentifierTable &IdentTable)
161 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
162 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
163 Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas),
164 Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
166 ? IG_Rejected
167 : IG_Inited),
168 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
169 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
170
171void UnwrappedLineParser::reset() {
172 PPBranchLevel = -1;
173 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
174 ? IG_Rejected
175 : IG_Inited;
176 IncludeGuardToken = nullptr;
177 Line.reset(p: new UnwrappedLine);
178 CommentsBeforeNextToken.clear();
179 FormatTok = nullptr;
180 MustBreakBeforeNextToken = false;
181 IsDecltypeAutoFunction = false;
182 PreprocessorDirectives.clear();
183 CurrentLines = &Lines;
184 DeclarationScopeStack.clear();
185 NestedTooDeep.clear();
186 NestedLambdas.clear();
187 PPStack.clear();
188 Line->FirstStartColumn = FirstStartColumn;
189
190 if (!Unexpanded.empty())
191 for (FormatToken *Token : AllTokens)
192 Token->MacroCtx.reset();
193 CurrentExpandedLines.clear();
194 ExpandedLines.clear();
195 Unexpanded.clear();
196 InExpansion = false;
197 Reconstruct.reset();
198}
199
200void UnwrappedLineParser::parse() {
201 IndexedTokenSource TokenSource(AllTokens);
202 Line->FirstStartColumn = FirstStartColumn;
203 do {
204 LLVM_DEBUG(llvm::dbgs() << "----\n");
205 reset();
206 Tokens = &TokenSource;
207 TokenSource.reset();
208
209 readToken();
210 parseFile();
211
212 // If we found an include guard then all preprocessor directives (other than
213 // the guard) are over-indented by one.
214 if (IncludeGuard == IG_Found) {
215 for (auto &Line : Lines)
216 if (Line.InPPDirective && Line.Level > 0)
217 --Line.Level;
218 }
219
220 // Create line with eof token.
221 assert(eof());
222 pushToken(Tok: FormatTok);
223 addUnwrappedLine();
224
225 // In a first run, format everything with the lines containing macro calls
226 // replaced by the expansion.
227 if (!ExpandedLines.empty()) {
228 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
229 for (const auto &Line : Lines) {
230 if (!Line.Tokens.empty()) {
231 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
232 if (it != ExpandedLines.end()) {
233 for (const auto &Expanded : it->second) {
234 LLVM_DEBUG(printDebugInfo(Expanded));
235 Callback.consumeUnwrappedLine(Line: Expanded);
236 }
237 continue;
238 }
239 }
240 LLVM_DEBUG(printDebugInfo(Line));
241 Callback.consumeUnwrappedLine(Line);
242 }
243 Callback.finishRun();
244 }
245
246 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
247 for (const UnwrappedLine &Line : Lines) {
248 LLVM_DEBUG(printDebugInfo(Line));
249 Callback.consumeUnwrappedLine(Line);
250 }
251 Callback.finishRun();
252 Lines.clear();
253 while (!PPLevelBranchIndex.empty() &&
254 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
255 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
256 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
257 }
258 if (!PPLevelBranchIndex.empty()) {
259 ++PPLevelBranchIndex.back();
260 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
261 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
262 }
263 } while (!PPLevelBranchIndex.empty());
264}
265
266void UnwrappedLineParser::parseFile() {
267 // The top-level context in a file always has declarations, except for pre-
268 // processor directives and JavaScript files.
269 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
270 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
271 MustBeDeclaration);
272 if (Style.Language == FormatStyle::LK_TextProto)
273 parseBracedList();
274 else
275 parseLevel();
276 // Make sure to format the remaining tokens.
277 //
278 // LK_TextProto is special since its top-level is parsed as the body of a
279 // braced list, which does not necessarily have natural line separators such
280 // as a semicolon. Comments after the last entry that have been determined to
281 // not belong to that line, as in:
282 // key: value
283 // // endfile comment
284 // do not have a chance to be put on a line of their own until this point.
285 // Here we add this newline before end-of-file comments.
286 if (Style.Language == FormatStyle::LK_TextProto &&
287 !CommentsBeforeNextToken.empty()) {
288 addUnwrappedLine();
289 }
290 flushComments(NewlineBeforeNext: true);
291 addUnwrappedLine();
292}
293
294void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
295 do {
296 switch (FormatTok->Tok.getKind()) {
297 case tok::l_brace:
298 return;
299 default:
300 if (FormatTok->is(II: Keywords.kw_where)) {
301 addUnwrappedLine();
302 nextToken();
303 parseCSharpGenericTypeConstraint();
304 break;
305 }
306 nextToken();
307 break;
308 }
309 } while (!eof());
310}
311
312void UnwrappedLineParser::parseCSharpAttribute() {
313 int UnpairedSquareBrackets = 1;
314 do {
315 switch (FormatTok->Tok.getKind()) {
316 case tok::r_square:
317 nextToken();
318 --UnpairedSquareBrackets;
319 if (UnpairedSquareBrackets == 0) {
320 addUnwrappedLine();
321 return;
322 }
323 break;
324 case tok::l_square:
325 ++UnpairedSquareBrackets;
326 nextToken();
327 break;
328 default:
329 nextToken();
330 break;
331 }
332 } while (!eof());
333}
334
335bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
336 if (!Lines.empty() && Lines.back().InPPDirective)
337 return true;
338
339 const FormatToken *Previous = Tokens->getPreviousToken();
340 return Previous && Previous->is(Kind: tok::comment) &&
341 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
342}
343
344/// \brief Parses a level, that is ???.
345/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
346/// \param IfKind The \p if statement kind in the level.
347/// \param IfLeftBrace The left brace of the \p if block in the level.
348/// \returns true if a simple block of if/else/for/while, or false otherwise.
349/// (A simple block has a single statement.)
350bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
351 IfStmtKind *IfKind,
352 FormatToken **IfLeftBrace) {
353 const bool InRequiresExpression =
354 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
355 const bool IsPrecededByCommentOrPPDirective =
356 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
357 FormatToken *IfLBrace = nullptr;
358 bool HasDoWhile = false;
359 bool HasLabel = false;
360 unsigned StatementCount = 0;
361 bool SwitchLabelEncountered = false;
362
363 do {
364 if (FormatTok->isAttribute()) {
365 nextToken();
366 continue;
367 }
368 tok::TokenKind Kind = FormatTok->Tok.getKind();
369 if (FormatTok->is(TT: TT_MacroBlockBegin))
370 Kind = tok::l_brace;
371 else if (FormatTok->is(TT: TT_MacroBlockEnd))
372 Kind = tok::r_brace;
373
374 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
375 &HasLabel, &StatementCount] {
376 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
377 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
378 HasLabel: HasLabel ? nullptr : &HasLabel);
379 ++StatementCount;
380 assert(StatementCount > 0 && "StatementCount overflow!");
381 };
382
383 switch (Kind) {
384 case tok::comment:
385 nextToken();
386 addUnwrappedLine();
387 break;
388 case tok::l_brace:
389 if (InRequiresExpression) {
390 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
391 } else if (FormatTok->Previous &&
392 FormatTok->Previous->ClosesRequiresClause) {
393 // We need the 'default' case here to correctly parse a function
394 // l_brace.
395 ParseDefault();
396 continue;
397 }
398 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
399 if (tryToParseBracedList())
400 continue;
401 FormatTok->setFinalizedType(TT_BlockLBrace);
402 }
403 parseBlock();
404 ++StatementCount;
405 assert(StatementCount > 0 && "StatementCount overflow!");
406 addUnwrappedLine();
407 break;
408 case tok::r_brace:
409 if (OpeningBrace) {
410 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
411 !OpeningBrace->isOneOf(K1: TT_ControlStatementLBrace, K2: TT_ElseLBrace)) {
412 return false;
413 }
414 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
415 HasDoWhile || IsPrecededByCommentOrPPDirective ||
416 precededByCommentOrPPDirective()) {
417 return false;
418 }
419 const FormatToken *Next = Tokens->peekNextToken();
420 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
421 return false;
422 if (IfLeftBrace)
423 *IfLeftBrace = IfLBrace;
424 return true;
425 }
426 nextToken();
427 addUnwrappedLine();
428 break;
429 case tok::kw_default: {
430 unsigned StoredPosition = Tokens->getPosition();
431 auto *Next = Tokens->getNextNonComment();
432 FormatTok = Tokens->setPosition(StoredPosition);
433 if (Next->isNot(Kind: tok::colon)) {
434 // default not followed by ':' is not a case label; treat it like
435 // an identifier.
436 parseStructuralElement();
437 break;
438 }
439 // Else, if it is 'default:', fall through to the case handling.
440 [[fallthrough]];
441 }
442 case tok::kw_case:
443 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
444 (Style.isJavaScript() && Line->MustBeDeclaration)) {
445 // Proto: there are no switch/case statements
446 // Verilog: Case labels don't have this word. We handle case
447 // labels including default in TokenAnnotator.
448 // JavaScript: A 'case: string' style field declaration.
449 ParseDefault();
450 break;
451 }
452 if (!SwitchLabelEncountered &&
453 (Style.IndentCaseLabels ||
454 (Line->InPPDirective && Line->Level == 1))) {
455 ++Line->Level;
456 }
457 SwitchLabelEncountered = true;
458 parseStructuralElement();
459 break;
460 case tok::l_square:
461 if (Style.isCSharp()) {
462 nextToken();
463 parseCSharpAttribute();
464 break;
465 }
466 if (handleCppAttributes())
467 break;
468 [[fallthrough]];
469 default:
470 ParseDefault();
471 break;
472 }
473 } while (!eof());
474
475 return false;
476}
477
478void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
479 // We'll parse forward through the tokens until we hit
480 // a closing brace or eof - note that getNextToken() will
481 // parse macros, so this will magically work inside macro
482 // definitions, too.
483 unsigned StoredPosition = Tokens->getPosition();
484 FormatToken *Tok = FormatTok;
485 const FormatToken *PrevTok = Tok->Previous;
486 // Keep a stack of positions of lbrace tokens. We will
487 // update information about whether an lbrace starts a
488 // braced init list or a different block during the loop.
489 struct StackEntry {
490 FormatToken *Tok;
491 const FormatToken *PrevTok;
492 };
493 SmallVector<StackEntry, 8> LBraceStack;
494 assert(Tok->is(tok::l_brace));
495
496 do {
497 auto *NextTok = Tokens->getNextNonComment();
498
499 if (!Line->InMacroBody && !Style.isTableGen()) {
500 // Skip PPDirective lines and comments.
501 while (NextTok->is(Kind: tok::hash)) {
502 do {
503 NextTok = Tokens->getNextToken();
504 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(Kind: tok::eof));
505
506 while (NextTok->is(Kind: tok::comment))
507 NextTok = Tokens->getNextToken();
508 }
509 }
510
511 switch (Tok->Tok.getKind()) {
512 case tok::l_brace:
513 if (Style.isJavaScript() && PrevTok) {
514 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
515 // A ':' indicates this code is in a type, or a braced list
516 // following a label in an object literal ({a: {b: 1}}).
517 // A '<' could be an object used in a comparison, but that is nonsense
518 // code (can never return true), so more likely it is a generic type
519 // argument (`X<{a: string; b: number}>`).
520 // The code below could be confused by semicolons between the
521 // individual members in a type member list, which would normally
522 // trigger BK_Block. In both cases, this must be parsed as an inline
523 // braced init.
524 Tok->setBlockKind(BK_BracedInit);
525 } else if (PrevTok->is(Kind: tok::r_paren)) {
526 // `) { }` can only occur in function or method declarations in JS.
527 Tok->setBlockKind(BK_Block);
528 }
529 } else {
530 Tok->setBlockKind(BK_Unknown);
531 }
532 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
533 break;
534 case tok::r_brace:
535 if (LBraceStack.empty())
536 break;
537 if (LBraceStack.back().Tok->is(BBK: BK_Unknown)) {
538 bool ProbablyBracedList = false;
539 if (Style.Language == FormatStyle::LK_Proto) {
540 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
541 } else {
542 // Using OriginalColumn to distinguish between ObjC methods and
543 // binary operators is a bit hacky.
544 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
545 NextTok->OriginalColumn == 0;
546
547 // Try to detect a braced list. Note that regardless how we mark inner
548 // braces here, we will overwrite the BlockKind later if we parse a
549 // braced list (where all blocks inside are by default braced lists),
550 // or when we explicitly detect blocks (for example while parsing
551 // lambdas).
552
553 // If we already marked the opening brace as braced list, the closing
554 // must also be part of it.
555 ProbablyBracedList = LBraceStack.back().Tok->is(TT: TT_BracedListLBrace);
556
557 ProbablyBracedList = ProbablyBracedList ||
558 (Style.isJavaScript() &&
559 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
560 Ks: Keywords.kw_as));
561 ProbablyBracedList =
562 ProbablyBracedList || (IsCpp && NextTok->is(Kind: tok::l_paren));
563
564 // If there is a comma, semicolon or right paren after the closing
565 // brace, we assume this is a braced initializer list.
566 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
567 // braced list in JS.
568 ProbablyBracedList =
569 ProbablyBracedList ||
570 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
571 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
572
573 // Distinguish between braced list in a constructor initializer list
574 // followed by constructor body, or just adjacent blocks.
575 ProbablyBracedList =
576 ProbablyBracedList ||
577 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
578 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
579 K2: tok::greater));
580
581 ProbablyBracedList =
582 ProbablyBracedList ||
583 (NextTok->is(Kind: tok::identifier) &&
584 !PrevTok->isOneOf(K1: tok::semi, K2: tok::r_brace, Ks: tok::l_brace));
585
586 ProbablyBracedList = ProbablyBracedList ||
587 (NextTok->is(Kind: tok::semi) &&
588 (!ExpectClassBody || LBraceStack.size() != 1));
589
590 ProbablyBracedList =
591 ProbablyBracedList ||
592 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
593
594 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
595 // We can have an array subscript after a braced init
596 // list, but C++11 attributes are expected after blocks.
597 NextTok = Tokens->getNextToken();
598 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
599 }
600
601 // Cpp macro definition body that is a nonempty braced list or block:
602 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
603 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
604 // A statement can end with only `;` (simple statement), a block
605 // closing brace (compound statement), or `:` (label statement).
606 // If PrevTok is a block opening brace, Tok ends an empty block.
607 !PrevTok->isOneOf(K1: tok::semi, K2: BK_Block, Ks: tok::colon)) {
608 ProbablyBracedList = true;
609 }
610 }
611 if (ProbablyBracedList) {
612 Tok->setBlockKind(BK_BracedInit);
613 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
614 } else {
615 Tok->setBlockKind(BK_Block);
616 LBraceStack.back().Tok->setBlockKind(BK_Block);
617 }
618 }
619 LBraceStack.pop_back();
620 break;
621 case tok::identifier:
622 if (Tok->isNot(Kind: TT_StatementMacro))
623 break;
624 [[fallthrough]];
625 case tok::at:
626 case tok::semi:
627 case tok::kw_if:
628 case tok::kw_while:
629 case tok::kw_for:
630 case tok::kw_switch:
631 case tok::kw_try:
632 case tok::kw___try:
633 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
634 LBraceStack.back().Tok->setBlockKind(BK_Block);
635 break;
636 default:
637 break;
638 }
639
640 PrevTok = Tok;
641 Tok = NextTok;
642 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
643
644 // Assume other blocks for all unclosed opening braces.
645 for (const auto &Entry : LBraceStack)
646 if (Entry.Tok->is(BBK: BK_Unknown))
647 Entry.Tok->setBlockKind(BK_Block);
648
649 FormatTok = Tokens->setPosition(StoredPosition);
650}
651
652// Sets the token type of the directly previous right brace.
653void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
654 if (auto Prev = FormatTok->getPreviousNonComment();
655 Prev && Prev->is(Kind: tok::r_brace)) {
656 Prev->setFinalizedType(Type);
657 }
658}
659
660template <class T>
661static inline void hash_combine(std::size_t &seed, const T &v) {
662 std::hash<T> hasher;
663 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
664}
665
666size_t UnwrappedLineParser::computePPHash() const {
667 size_t h = 0;
668 for (const auto &i : PPStack) {
669 hash_combine(seed&: h, v: size_t(i.Kind));
670 hash_combine(seed&: h, v: i.Line);
671 }
672 return h;
673}
674
675// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
676// is not null, subtracts its length (plus the preceding space) when computing
677// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
678// running the token annotator on it so that we can restore them afterward.
679bool UnwrappedLineParser::mightFitOnOneLine(
680 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
681 const auto ColumnLimit = Style.ColumnLimit;
682 if (ColumnLimit == 0)
683 return true;
684
685 auto &Tokens = ParsedLine.Tokens;
686 assert(!Tokens.empty());
687
688 const auto *LastToken = Tokens.back().Tok;
689 assert(LastToken);
690
691 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
692
693 int Index = 0;
694 for (const auto &Token : Tokens) {
695 assert(Token.Tok);
696 auto &SavedToken = SavedTokens[Index++];
697 SavedToken.Tok = new FormatToken;
698 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
699 SavedToken.Children = std::move(Token.Children);
700 }
701
702 AnnotatedLine Line(ParsedLine);
703 assert(Line.Last == LastToken);
704
705 TokenAnnotator Annotator(Style, Keywords);
706 Annotator.annotate(Line);
707 Annotator.calculateFormattingInformation(Line);
708
709 auto Length = LastToken->TotalLength;
710 if (OpeningBrace) {
711 assert(OpeningBrace != Tokens.front().Tok);
712 if (auto Prev = OpeningBrace->Previous;
713 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
714 Length -= ColumnLimit;
715 }
716 Length -= OpeningBrace->TokenText.size() + 1;
717 }
718
719 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
720 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
721 Length -= FirstToken->TokenText.size() + 1;
722 }
723
724 Index = 0;
725 for (auto &Token : Tokens) {
726 const auto &SavedToken = SavedTokens[Index++];
727 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
728 Token.Children = std::move(SavedToken.Children);
729 delete SavedToken.Tok;
730 }
731
732 // If these change PPLevel needs to be used for get correct indentation.
733 assert(!Line.InMacroBody);
734 assert(!Line.InPPDirective);
735 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
736}
737
738FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
739 unsigned AddLevels, bool MunchSemi,
740 bool KeepBraces,
741 IfStmtKind *IfKind,
742 bool UnindentWhitesmithsBraces) {
743 auto HandleVerilogBlockLabel = [this]() {
744 // ":" name
745 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
746 nextToken();
747 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
748 nextToken();
749 }
750 };
751
752 // Whether this is a Verilog-specific block that has a special header like a
753 // module.
754 const bool VerilogHierarchy =
755 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
756 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
757 (Style.isVerilog() &&
758 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
759 "'{' or macro block token expected");
760 FormatToken *Tok = FormatTok;
761 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
762 auto Index = CurrentLines->size();
763 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
764 FormatTok->setBlockKind(BK_Block);
765
766 // For Whitesmiths mode, jump to the next level prior to skipping over the
767 // braces.
768 if (!VerilogHierarchy && AddLevels > 0 &&
769 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
770 ++Line->Level;
771 }
772
773 size_t PPStartHash = computePPHash();
774
775 const unsigned InitialLevel = Line->Level;
776 if (VerilogHierarchy) {
777 AddLevels += parseVerilogHierarchyHeader();
778 } else {
779 nextToken(/*LevelDifference=*/AddLevels);
780 HandleVerilogBlockLabel();
781 }
782
783 // Bail out if there are too many levels. Otherwise, the stack might overflow.
784 if (Line->Level > 300)
785 return nullptr;
786
787 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
788 parseParens();
789
790 size_t NbPreprocessorDirectives =
791 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
792 addUnwrappedLine();
793 size_t OpeningLineIndex =
794 CurrentLines->empty()
795 ? (UnwrappedLine::kInvalidIndex)
796 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
797
798 // Whitesmiths is weird here. The brace needs to be indented for the namespace
799 // block, but the block itself may not be indented depending on the style
800 // settings. This allows the format to back up one level in those cases.
801 if (UnindentWhitesmithsBraces)
802 --Line->Level;
803
804 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
805 MustBeDeclaration);
806 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
807 Line->Level += AddLevels;
808
809 FormatToken *IfLBrace = nullptr;
810 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
811
812 if (eof())
813 return IfLBrace;
814
815 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
816 : FormatTok->isNot(Kind: tok::r_brace)) {
817 Line->Level = InitialLevel;
818 FormatTok->setBlockKind(BK_Block);
819 return IfLBrace;
820 }
821
822 if (FormatTok->is(Kind: tok::r_brace)) {
823 FormatTok->setBlockKind(BK_Block);
824 if (Tok->is(TT: TT_NamespaceLBrace))
825 FormatTok->setFinalizedType(TT_NamespaceRBrace);
826 }
827
828 const bool IsFunctionRBrace =
829 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
830
831 auto RemoveBraces = [=]() mutable {
832 if (!SimpleBlock)
833 return false;
834 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
835 assert(FormatTok->is(tok::r_brace));
836 const bool WrappedOpeningBrace = !Tok->Previous;
837 if (WrappedOpeningBrace && FollowedByComment)
838 return false;
839 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
840 if (KeepBraces && !HasRequiredIfBraces)
841 return false;
842 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
843 const FormatToken *Previous = Tokens->getPreviousToken();
844 assert(Previous);
845 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
846 return false;
847 }
848 assert(!CurrentLines->empty());
849 auto &LastLine = CurrentLines->back();
850 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
851 return false;
852 if (Tok->is(TT: TT_ElseLBrace))
853 return true;
854 if (WrappedOpeningBrace) {
855 assert(Index > 0);
856 --Index; // The line above the wrapped l_brace.
857 Tok = nullptr;
858 }
859 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
860 };
861 if (RemoveBraces()) {
862 Tok->MatchingParen = FormatTok;
863 FormatTok->MatchingParen = Tok;
864 }
865
866 size_t PPEndHash = computePPHash();
867
868 // Munch the closing brace.
869 nextToken(/*LevelDifference=*/-AddLevels);
870
871 // When this is a function block and there is an unnecessary semicolon
872 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
873 // it later).
874 if (Style.RemoveSemicolon && IsFunctionRBrace) {
875 while (FormatTok->is(Kind: tok::semi)) {
876 FormatTok->Optional = true;
877 nextToken();
878 }
879 }
880
881 HandleVerilogBlockLabel();
882
883 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
884 parseParens();
885
886 Line->Level = InitialLevel;
887
888 if (FormatTok->is(Kind: tok::kw_noexcept)) {
889 // A noexcept in a requires expression.
890 nextToken();
891 }
892
893 if (FormatTok->is(Kind: tok::arrow)) {
894 // Following the } or noexcept we can find a trailing return type arrow
895 // as part of an implicit conversion constraint.
896 nextToken();
897 parseStructuralElement();
898 }
899
900 if (MunchSemi && FormatTok->is(Kind: tok::semi))
901 nextToken();
902
903 if (PPStartHash == PPEndHash) {
904 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
905 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
906 // Update the opening line to add the forward reference as well
907 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
908 CurrentLines->size() - 1;
909 }
910 }
911
912 return IfLBrace;
913}
914
915static bool isGoogScope(const UnwrappedLine &Line) {
916 // FIXME: Closure-library specific stuff should not be hard-coded but be
917 // configurable.
918 if (Line.Tokens.size() < 4)
919 return false;
920 auto I = Line.Tokens.begin();
921 if (I->Tok->TokenText != "goog")
922 return false;
923 ++I;
924 if (I->Tok->isNot(Kind: tok::period))
925 return false;
926 ++I;
927 if (I->Tok->TokenText != "scope")
928 return false;
929 ++I;
930 return I->Tok->is(Kind: tok::l_paren);
931}
932
933static bool isIIFE(const UnwrappedLine &Line,
934 const AdditionalKeywords &Keywords) {
935 // Look for the start of an immediately invoked anonymous function.
936 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
937 // This is commonly done in JavaScript to create a new, anonymous scope.
938 // Example: (function() { ... })()
939 if (Line.Tokens.size() < 3)
940 return false;
941 auto I = Line.Tokens.begin();
942 if (I->Tok->isNot(Kind: tok::l_paren))
943 return false;
944 ++I;
945 if (I->Tok->isNot(Kind: Keywords.kw_function))
946 return false;
947 ++I;
948 return I->Tok->is(Kind: tok::l_paren);
949}
950
951static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
952 const FormatToken &InitialToken) {
953 tok::TokenKind Kind = InitialToken.Tok.getKind();
954 if (InitialToken.is(TT: TT_NamespaceMacro))
955 Kind = tok::kw_namespace;
956
957 switch (Kind) {
958 case tok::kw_namespace:
959 return Style.BraceWrapping.AfterNamespace;
960 case tok::kw_class:
961 return Style.BraceWrapping.AfterClass;
962 case tok::kw_union:
963 return Style.BraceWrapping.AfterUnion;
964 case tok::kw_struct:
965 return Style.BraceWrapping.AfterStruct;
966 case tok::kw_enum:
967 return Style.BraceWrapping.AfterEnum;
968 default:
969 return false;
970 }
971}
972
973void UnwrappedLineParser::parseChildBlock() {
974 assert(FormatTok->is(tok::l_brace));
975 FormatTok->setBlockKind(BK_Block);
976 const FormatToken *OpeningBrace = FormatTok;
977 nextToken();
978 {
979 bool SkipIndent = (Style.isJavaScript() &&
980 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
981 ScopedLineState LineState(*this);
982 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
983 /*MustBeDeclaration=*/false);
984 Line->Level += SkipIndent ? 0 : 1;
985 parseLevel(OpeningBrace);
986 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
987 Line->Level -= SkipIndent ? 0 : 1;
988 }
989 nextToken();
990}
991
992void UnwrappedLineParser::parsePPDirective() {
993 assert(FormatTok->is(tok::hash) && "'#' expected");
994 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
995
996 nextToken();
997
998 if (!FormatTok->Tok.getIdentifierInfo()) {
999 parsePPUnknown();
1000 return;
1001 }
1002
1003 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1004 case tok::pp_define:
1005 parsePPDefine();
1006 return;
1007 case tok::pp_if:
1008 parsePPIf(/*IfDef=*/false);
1009 break;
1010 case tok::pp_ifdef:
1011 case tok::pp_ifndef:
1012 parsePPIf(/*IfDef=*/true);
1013 break;
1014 case tok::pp_else:
1015 case tok::pp_elifdef:
1016 case tok::pp_elifndef:
1017 case tok::pp_elif:
1018 parsePPElse();
1019 break;
1020 case tok::pp_endif:
1021 parsePPEndIf();
1022 break;
1023 case tok::pp_pragma:
1024 parsePPPragma();
1025 break;
1026 default:
1027 parsePPUnknown();
1028 break;
1029 }
1030}
1031
1032void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1033 size_t Line = CurrentLines->size();
1034 if (CurrentLines == &PreprocessorDirectives)
1035 Line += Lines.size();
1036
1037 if (Unreachable ||
1038 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1039 PPStack.push_back(Elt: {PP_Unreachable, Line});
1040 } else {
1041 PPStack.push_back(Elt: {PP_Conditional, Line});
1042 }
1043}
1044
1045void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1046 ++PPBranchLevel;
1047 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1048 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1049 PPLevelBranchIndex.push_back(Elt: 0);
1050 PPLevelBranchCount.push_back(Elt: 0);
1051 }
1052 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1053 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1054 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1055}
1056
1057void UnwrappedLineParser::conditionalCompilationAlternative() {
1058 if (!PPStack.empty())
1059 PPStack.pop_back();
1060 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1061 if (!PPChainBranchIndex.empty())
1062 ++PPChainBranchIndex.top();
1063 conditionalCompilationCondition(
1064 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1065 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1066}
1067
1068void UnwrappedLineParser::conditionalCompilationEnd() {
1069 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1070 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1071 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1072 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1073 }
1074 // Guard against #endif's without #if.
1075 if (PPBranchLevel > -1)
1076 --PPBranchLevel;
1077 if (!PPChainBranchIndex.empty())
1078 PPChainBranchIndex.pop();
1079 if (!PPStack.empty())
1080 PPStack.pop_back();
1081}
1082
1083void UnwrappedLineParser::parsePPIf(bool IfDef) {
1084 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1085 nextToken();
1086 bool Unreachable = false;
1087 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1088 Unreachable = true;
1089 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1090 Unreachable = true;
1091 conditionalCompilationStart(Unreachable);
1092 FormatToken *IfCondition = FormatTok;
1093 // If there's a #ifndef on the first line, and the only lines before it are
1094 // comments, it could be an include guard.
1095 bool MaybeIncludeGuard = IfNDef;
1096 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1097 for (auto &Line : Lines) {
1098 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1099 MaybeIncludeGuard = false;
1100 IncludeGuard = IG_Rejected;
1101 break;
1102 }
1103 }
1104 }
1105 --PPBranchLevel;
1106 parsePPUnknown();
1107 ++PPBranchLevel;
1108 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1109 IncludeGuard = IG_IfNdefed;
1110 IncludeGuardToken = IfCondition;
1111 }
1112}
1113
1114void UnwrappedLineParser::parsePPElse() {
1115 // If a potential include guard has an #else, it's not an include guard.
1116 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1117 IncludeGuard = IG_Rejected;
1118 // Don't crash when there is an #else without an #if.
1119 assert(PPBranchLevel >= -1);
1120 if (PPBranchLevel == -1)
1121 conditionalCompilationStart(/*Unreachable=*/true);
1122 conditionalCompilationAlternative();
1123 --PPBranchLevel;
1124 parsePPUnknown();
1125 ++PPBranchLevel;
1126}
1127
1128void UnwrappedLineParser::parsePPEndIf() {
1129 conditionalCompilationEnd();
1130 parsePPUnknown();
1131 // If the #endif of a potential include guard is the last thing in the file,
1132 // then we found an include guard.
1133 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1134 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1135 IncludeGuard = IG_Found;
1136 }
1137}
1138
1139void UnwrappedLineParser::parsePPDefine() {
1140 nextToken();
1141
1142 if (!FormatTok->Tok.getIdentifierInfo()) {
1143 IncludeGuard = IG_Rejected;
1144 IncludeGuardToken = nullptr;
1145 parsePPUnknown();
1146 return;
1147 }
1148
1149 if (IncludeGuard == IG_IfNdefed &&
1150 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1151 IncludeGuard = IG_Defined;
1152 IncludeGuardToken = nullptr;
1153 for (auto &Line : Lines) {
1154 if (!Line.Tokens.front().Tok->isOneOf(K1: tok::comment, K2: tok::hash)) {
1155 IncludeGuard = IG_Rejected;
1156 break;
1157 }
1158 }
1159 }
1160
1161 // In the context of a define, even keywords should be treated as normal
1162 // identifiers. Setting the kind to identifier is not enough, because we need
1163 // to treat additional keywords like __except as well, which are already
1164 // identifiers. Setting the identifier info to null interferes with include
1165 // guard processing above, and changes preprocessing nesting.
1166 FormatTok->Tok.setKind(tok::identifier);
1167 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1168 nextToken();
1169 if (FormatTok->Tok.getKind() == tok::l_paren &&
1170 !FormatTok->hasWhitespaceBefore()) {
1171 parseParens();
1172 }
1173 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1174 Line->Level += PPBranchLevel + 1;
1175 addUnwrappedLine();
1176 ++Line->Level;
1177
1178 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1179 assert((int)Line->PPLevel >= 0);
1180 Line->InMacroBody = true;
1181
1182 if (Style.SkipMacroDefinitionBody) {
1183 do {
1184 FormatTok->Finalized = true;
1185 nextToken();
1186 } while (!eof());
1187 addUnwrappedLine();
1188 return;
1189 }
1190
1191 if (FormatTok->is(Kind: tok::identifier) &&
1192 Tokens->peekNextToken()->is(Kind: tok::colon)) {
1193 nextToken();
1194 nextToken();
1195 }
1196
1197 // Errors during a preprocessor directive can only affect the layout of the
1198 // preprocessor directive, and thus we ignore them. An alternative approach
1199 // would be to use the same approach we use on the file level (no
1200 // re-indentation if there was a structural error) within the macro
1201 // definition.
1202 parseFile();
1203}
1204
1205void UnwrappedLineParser::parsePPPragma() {
1206 Line->InPragmaDirective = true;
1207 parsePPUnknown();
1208}
1209
1210void UnwrappedLineParser::parsePPUnknown() {
1211 do {
1212 nextToken();
1213 } while (!eof());
1214 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1215 Line->Level += PPBranchLevel + 1;
1216 addUnwrappedLine();
1217}
1218
1219// Here we exclude certain tokens that are not usually the first token in an
1220// unwrapped line. This is used in attempt to distinguish macro calls without
1221// trailing semicolons from other constructs split to several lines.
1222static bool tokenCanStartNewLine(const FormatToken &Tok) {
1223 // Semicolon can be a null-statement, l_square can be a start of a macro or
1224 // a C++11 attribute, but this doesn't seem to be common.
1225 return !Tok.isOneOf(K1: tok::semi, K2: tok::l_brace,
1226 // Tokens that can only be used as binary operators and a
1227 // part of overloaded operator names.
1228 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1229 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1230 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1231 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1232 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1233 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1234 Ks: tok::lesslessequal,
1235 // Colon is used in labels, base class lists, initializer
1236 // lists, range-based for loops, ternary operator, but
1237 // should never be the first token in an unwrapped line.
1238 Ks: tok::colon,
1239 // 'noexcept' is a trailing annotation.
1240 Ks: tok::kw_noexcept);
1241}
1242
1243static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1244 const FormatToken *FormatTok) {
1245 // FIXME: This returns true for C/C++ keywords like 'struct'.
1246 return FormatTok->is(Kind: tok::identifier) &&
1247 (!FormatTok->Tok.getIdentifierInfo() ||
1248 !FormatTok->isOneOf(
1249 K1: Keywords.kw_in, K2: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1250 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1251 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1252 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1253 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1254 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1255 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1256}
1257
1258static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1259 const FormatToken *FormatTok) {
1260 return FormatTok->Tok.isLiteral() ||
1261 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1262 mustBeJSIdent(Keywords, FormatTok);
1263}
1264
1265// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1266// when encountered after a value (see mustBeJSIdentOrValue).
1267static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1268 const FormatToken *FormatTok) {
1269 return FormatTok->isOneOf(
1270 K1: tok::kw_return, K2: Keywords.kw_yield,
1271 // conditionals
1272 Ks: tok::kw_if, Ks: tok::kw_else,
1273 // loops
1274 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1275 // switch/case
1276 Ks: tok::kw_switch, Ks: tok::kw_case,
1277 // exceptions
1278 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1279 // declaration
1280 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1281 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1282 // import/export
1283 Ks: Keywords.kw_import, Ks: tok::kw_export);
1284}
1285
1286// Checks whether a token is a type in K&R C (aka C78).
1287static bool isC78Type(const FormatToken &Tok) {
1288 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1289 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1290 Ks: tok::identifier);
1291}
1292
1293// This function checks whether a token starts the first parameter declaration
1294// in a K&R C (aka C78) function definition, e.g.:
1295// int f(a, b)
1296// short a, b;
1297// {
1298// return a + b;
1299// }
1300static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1301 const FormatToken *FuncName) {
1302 assert(Tok);
1303 assert(Next);
1304 assert(FuncName);
1305
1306 if (FuncName->isNot(Kind: tok::identifier))
1307 return false;
1308
1309 const FormatToken *Prev = FuncName->Previous;
1310 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1311 return false;
1312
1313 if (!isC78Type(Tok: *Tok) &&
1314 !Tok->isOneOf(K1: tok::kw_register, K2: tok::kw_struct, Ks: tok::kw_union)) {
1315 return false;
1316 }
1317
1318 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1319 return false;
1320
1321 Tok = Tok->Previous;
1322 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1323 return false;
1324
1325 Tok = Tok->Previous;
1326 if (!Tok || Tok->isNot(Kind: tok::identifier))
1327 return false;
1328
1329 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1330}
1331
1332bool UnwrappedLineParser::parseModuleImport() {
1333 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1334
1335 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1336 !Token->Tok.getIdentifierInfo() &&
1337 !Token->isOneOf(K1: tok::colon, K2: tok::less, Ks: tok::string_literal)) {
1338 return false;
1339 }
1340
1341 nextToken();
1342 while (!eof()) {
1343 if (FormatTok->is(Kind: tok::colon)) {
1344 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1345 }
1346 // Handle import <foo/bar.h> as we would an include statement.
1347 else if (FormatTok->is(Kind: tok::less)) {
1348 nextToken();
1349 while (!FormatTok->isOneOf(K1: tok::semi, K2: tok::greater, Ks: tok::eof)) {
1350 // Mark tokens up to the trailing line comments as implicit string
1351 // literals.
1352 if (FormatTok->isNot(Kind: tok::comment) &&
1353 !FormatTok->TokenText.starts_with(Prefix: "//")) {
1354 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1355 }
1356 nextToken();
1357 }
1358 }
1359 if (FormatTok->is(Kind: tok::semi)) {
1360 nextToken();
1361 break;
1362 }
1363 nextToken();
1364 }
1365
1366 addUnwrappedLine();
1367 return true;
1368}
1369
1370// readTokenWithJavaScriptASI reads the next token and terminates the current
1371// line if JavaScript Automatic Semicolon Insertion must
1372// happen between the current token and the next token.
1373//
1374// This method is conservative - it cannot cover all edge cases of JavaScript,
1375// but only aims to correctly handle certain well known cases. It *must not*
1376// return true in speculative cases.
1377void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1378 FormatToken *Previous = FormatTok;
1379 readToken();
1380 FormatToken *Next = FormatTok;
1381
1382 bool IsOnSameLine =
1383 CommentsBeforeNextToken.empty()
1384 ? Next->NewlinesBefore == 0
1385 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1386 if (IsOnSameLine)
1387 return;
1388
1389 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1390 bool PreviousStartsTemplateExpr =
1391 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1392 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1393 // If the line contains an '@' sign, the previous token might be an
1394 // annotation, which can precede another identifier/value.
1395 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1396 return LineNode.Tok->is(Kind: tok::at);
1397 });
1398 if (HasAt)
1399 return;
1400 }
1401 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1402 return addUnwrappedLine();
1403 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1404 bool NextEndsTemplateExpr =
1405 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1406 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1407 (PreviousMustBeValue ||
1408 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1409 Ks: tok::minusminus))) {
1410 return addUnwrappedLine();
1411 }
1412 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1413 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1414 return addUnwrappedLine();
1415 }
1416}
1417
1418void UnwrappedLineParser::parseStructuralElement(
1419 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1420 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1421 if (Style.Language == FormatStyle::LK_TableGen &&
1422 FormatTok->is(Kind: tok::pp_include)) {
1423 nextToken();
1424 if (FormatTok->is(Kind: tok::string_literal))
1425 nextToken();
1426 addUnwrappedLine();
1427 return;
1428 }
1429
1430 if (IsCpp) {
1431 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1432 }
1433 } else if (Style.isVerilog()) {
1434 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1435 parseForOrWhileLoop(/*HasParens=*/false);
1436 return;
1437 }
1438 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1439 parseForOrWhileLoop();
1440 return;
1441 }
1442 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1443 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1444 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1445 return;
1446 }
1447
1448 // Skip things that can exist before keywords like 'if' and 'case'.
1449 while (true) {
1450 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1451 Ks: Keywords.kw_unique0)) {
1452 nextToken();
1453 } else if (FormatTok->is(Kind: tok::l_paren) &&
1454 Tokens->peekNextToken()->is(Kind: tok::star)) {
1455 parseParens();
1456 } else {
1457 break;
1458 }
1459 }
1460 }
1461
1462 // Tokens that only make sense at the beginning of a line.
1463 switch (FormatTok->Tok.getKind()) {
1464 case tok::kw_asm:
1465 nextToken();
1466 if (FormatTok->is(Kind: tok::l_brace)) {
1467 FormatTok->setFinalizedType(TT_InlineASMBrace);
1468 nextToken();
1469 while (FormatTok && !eof()) {
1470 if (FormatTok->is(Kind: tok::r_brace)) {
1471 FormatTok->setFinalizedType(TT_InlineASMBrace);
1472 nextToken();
1473 addUnwrappedLine();
1474 break;
1475 }
1476 FormatTok->Finalized = true;
1477 nextToken();
1478 }
1479 }
1480 break;
1481 case tok::kw_namespace:
1482 parseNamespace();
1483 return;
1484 case tok::kw_public:
1485 case tok::kw_protected:
1486 case tok::kw_private:
1487 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1488 Style.isCSharp()) {
1489 nextToken();
1490 } else {
1491 parseAccessSpecifier();
1492 }
1493 return;
1494 case tok::kw_if: {
1495 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1496 // field/method declaration.
1497 break;
1498 }
1499 FormatToken *Tok = parseIfThenElse(IfKind);
1500 if (IfLeftBrace)
1501 *IfLeftBrace = Tok;
1502 return;
1503 }
1504 case tok::kw_for:
1505 case tok::kw_while:
1506 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1507 // field/method declaration.
1508 break;
1509 }
1510 parseForOrWhileLoop();
1511 return;
1512 case tok::kw_do:
1513 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1514 // field/method declaration.
1515 break;
1516 }
1517 parseDoWhile();
1518 if (HasDoWhile)
1519 *HasDoWhile = true;
1520 return;
1521 case tok::kw_switch:
1522 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1523 // 'switch: string' field declaration.
1524 break;
1525 }
1526 parseSwitch();
1527 return;
1528 case tok::kw_default:
1529 // In Verilog default along with other labels are handled in the next loop.
1530 if (Style.isVerilog())
1531 break;
1532 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1533 // 'default: string' field declaration.
1534 break;
1535 }
1536 nextToken();
1537 if (FormatTok->is(Kind: tok::colon)) {
1538 FormatTok->setFinalizedType(TT_CaseLabelColon);
1539 parseLabel();
1540 return;
1541 }
1542 // e.g. "default void f() {}" in a Java interface.
1543 break;
1544 case tok::kw_case:
1545 // Proto: there are no switch/case statements.
1546 if (Style.Language == FormatStyle::LK_Proto) {
1547 nextToken();
1548 return;
1549 }
1550 if (Style.isVerilog()) {
1551 parseBlock();
1552 addUnwrappedLine();
1553 return;
1554 }
1555 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1556 // 'case: string' field declaration.
1557 nextToken();
1558 break;
1559 }
1560 parseCaseLabel();
1561 return;
1562 case tok::kw_try:
1563 case tok::kw___try:
1564 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1565 // field/method declaration.
1566 break;
1567 }
1568 parseTryCatch();
1569 return;
1570 case tok::kw_extern:
1571 nextToken();
1572 if (Style.isVerilog()) {
1573 // In Verilog and extern module declaration looks like a start of module.
1574 // But there is no body and endmodule. So we handle it separately.
1575 if (Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
1576 parseVerilogHierarchyHeader();
1577 return;
1578 }
1579 } else if (FormatTok->is(Kind: tok::string_literal)) {
1580 nextToken();
1581 if (FormatTok->is(Kind: tok::l_brace)) {
1582 if (Style.BraceWrapping.AfterExternBlock)
1583 addUnwrappedLine();
1584 // Either we indent or for backwards compatibility we follow the
1585 // AfterExternBlock style.
1586 unsigned AddLevels =
1587 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1588 (Style.BraceWrapping.AfterExternBlock &&
1589 Style.IndentExternBlock ==
1590 FormatStyle::IEBS_AfterExternBlock)
1591 ? 1u
1592 : 0u;
1593 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1594 addUnwrappedLine();
1595 return;
1596 }
1597 }
1598 break;
1599 case tok::kw_export:
1600 if (Style.isJavaScript()) {
1601 parseJavaScriptEs6ImportExport();
1602 return;
1603 }
1604 if (IsCpp) {
1605 nextToken();
1606 if (FormatTok->is(Kind: tok::kw_namespace)) {
1607 parseNamespace();
1608 return;
1609 }
1610 if (FormatTok->is(II: Keywords.kw_import) && parseModuleImport())
1611 return;
1612 }
1613 break;
1614 case tok::kw_inline:
1615 nextToken();
1616 if (FormatTok->is(Kind: tok::kw_namespace)) {
1617 parseNamespace();
1618 return;
1619 }
1620 break;
1621 case tok::identifier:
1622 if (FormatTok->is(TT: TT_ForEachMacro)) {
1623 parseForOrWhileLoop();
1624 return;
1625 }
1626 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1627 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1628 /*MunchSemi=*/false);
1629 return;
1630 }
1631 if (FormatTok->is(II: Keywords.kw_import)) {
1632 if (Style.isJavaScript()) {
1633 parseJavaScriptEs6ImportExport();
1634 return;
1635 }
1636 if (Style.Language == FormatStyle::LK_Proto) {
1637 nextToken();
1638 if (FormatTok->is(Kind: tok::kw_public))
1639 nextToken();
1640 if (FormatTok->isNot(Kind: tok::string_literal))
1641 return;
1642 nextToken();
1643 if (FormatTok->is(Kind: tok::semi))
1644 nextToken();
1645 addUnwrappedLine();
1646 return;
1647 }
1648 if (IsCpp && parseModuleImport())
1649 return;
1650 }
1651 if (IsCpp && FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1652 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1653 nextToken();
1654 if (FormatTok->is(Kind: tok::colon)) {
1655 nextToken();
1656 addUnwrappedLine();
1657 return;
1658 }
1659 }
1660 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1661 parseStatementMacro();
1662 return;
1663 }
1664 if (IsCpp && FormatTok->is(TT: TT_NamespaceMacro)) {
1665 parseNamespace();
1666 return;
1667 }
1668 // In Verilog labels can be any expression, so we don't do them here.
1669 // JS doesn't have macros, and within classes colons indicate fields, not
1670 // labels.
1671 // TableGen doesn't have labels.
1672 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1673 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1674 nextToken();
1675 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1676 FormatTok->setFinalizedType(TT_GotoLabelColon);
1677 parseLabel(LeftAlignLabel: !Style.IndentGotoLabels);
1678 if (HasLabel)
1679 *HasLabel = true;
1680 return;
1681 }
1682 // In all other cases, parse the declaration.
1683 break;
1684 default:
1685 break;
1686 }
1687
1688 const bool InRequiresExpression =
1689 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
1690 do {
1691 const FormatToken *Previous = FormatTok->Previous;
1692 switch (FormatTok->Tok.getKind()) {
1693 case tok::at:
1694 nextToken();
1695 if (FormatTok->is(Kind: tok::l_brace)) {
1696 nextToken();
1697 parseBracedList();
1698 break;
1699 } else if (Style.Language == FormatStyle::LK_Java &&
1700 FormatTok->is(II: Keywords.kw_interface)) {
1701 nextToken();
1702 break;
1703 }
1704 switch (FormatTok->Tok.getObjCKeywordID()) {
1705 case tok::objc_public:
1706 case tok::objc_protected:
1707 case tok::objc_package:
1708 case tok::objc_private:
1709 return parseAccessSpecifier();
1710 case tok::objc_interface:
1711 case tok::objc_implementation:
1712 return parseObjCInterfaceOrImplementation();
1713 case tok::objc_protocol:
1714 if (parseObjCProtocol())
1715 return;
1716 break;
1717 case tok::objc_end:
1718 return; // Handled by the caller.
1719 case tok::objc_optional:
1720 case tok::objc_required:
1721 nextToken();
1722 addUnwrappedLine();
1723 return;
1724 case tok::objc_autoreleasepool:
1725 nextToken();
1726 if (FormatTok->is(Kind: tok::l_brace)) {
1727 if (Style.BraceWrapping.AfterControlStatement ==
1728 FormatStyle::BWACS_Always) {
1729 addUnwrappedLine();
1730 }
1731 parseBlock();
1732 }
1733 addUnwrappedLine();
1734 return;
1735 case tok::objc_synchronized:
1736 nextToken();
1737 if (FormatTok->is(Kind: tok::l_paren)) {
1738 // Skip synchronization object
1739 parseParens();
1740 }
1741 if (FormatTok->is(Kind: tok::l_brace)) {
1742 if (Style.BraceWrapping.AfterControlStatement ==
1743 FormatStyle::BWACS_Always) {
1744 addUnwrappedLine();
1745 }
1746 parseBlock();
1747 }
1748 addUnwrappedLine();
1749 return;
1750 case tok::objc_try:
1751 // This branch isn't strictly necessary (the kw_try case below would
1752 // do this too after the tok::at is parsed above). But be explicit.
1753 parseTryCatch();
1754 return;
1755 default:
1756 break;
1757 }
1758 break;
1759 case tok::kw_requires: {
1760 if (IsCpp) {
1761 bool ParsedClause = parseRequires();
1762 if (ParsedClause)
1763 return;
1764 } else {
1765 nextToken();
1766 }
1767 break;
1768 }
1769 case tok::kw_enum:
1770 // Ignore if this is part of "template <enum ..." or "... -> enum".
1771 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow)) {
1772 nextToken();
1773 break;
1774 }
1775
1776 // parseEnum falls through and does not yet add an unwrapped line as an
1777 // enum definition can start a structural element.
1778 if (!parseEnum())
1779 break;
1780 // This only applies to C++ and Verilog.
1781 if (!IsCpp && !Style.isVerilog()) {
1782 addUnwrappedLine();
1783 return;
1784 }
1785 break;
1786 case tok::kw_typedef:
1787 nextToken();
1788 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1789 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1790 Ks: Keywords.kw_CF_CLOSED_ENUM,
1791 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1792 parseEnum();
1793 }
1794 break;
1795 case tok::kw_class:
1796 if (Style.isVerilog()) {
1797 parseBlock();
1798 addUnwrappedLine();
1799 return;
1800 }
1801 if (Style.isTableGen()) {
1802 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1803 // This is same as def and so on.
1804 nextToken();
1805 break;
1806 }
1807 [[fallthrough]];
1808 case tok::kw_struct:
1809 case tok::kw_union:
1810 if (parseStructLike())
1811 return;
1812 break;
1813 case tok::kw_decltype:
1814 nextToken();
1815 if (FormatTok->is(Kind: tok::l_paren)) {
1816 parseParens();
1817 assert(FormatTok->Previous);
1818 if (FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1819 Tokens: tok::l_paren)) {
1820 Line->SeenDecltypeAuto = true;
1821 }
1822 }
1823 break;
1824 case tok::period:
1825 nextToken();
1826 // In Java, classes have an implicit static member "class".
1827 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1828 FormatTok->is(Kind: tok::kw_class)) {
1829 nextToken();
1830 }
1831 if (Style.isJavaScript() && FormatTok &&
1832 FormatTok->Tok.getIdentifierInfo()) {
1833 // JavaScript only has pseudo keywords, all keywords are allowed to
1834 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1835 nextToken();
1836 }
1837 break;
1838 case tok::semi:
1839 nextToken();
1840 addUnwrappedLine();
1841 return;
1842 case tok::r_brace:
1843 addUnwrappedLine();
1844 return;
1845 case tok::l_paren: {
1846 parseParens();
1847 // Break the unwrapped line if a K&R C function definition has a parameter
1848 // declaration.
1849 if (OpeningBrace || !IsCpp || !Previous || eof())
1850 break;
1851 if (isC78ParameterDecl(Tok: FormatTok,
1852 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1853 FuncName: Previous)) {
1854 addUnwrappedLine();
1855 return;
1856 }
1857 break;
1858 }
1859 case tok::kw_operator:
1860 nextToken();
1861 if (FormatTok->isBinaryOperator())
1862 nextToken();
1863 break;
1864 case tok::caret:
1865 nextToken();
1866 // Block return type.
1867 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(IsCpp)) {
1868 nextToken();
1869 // Return types: pointers are ok too.
1870 while (FormatTok->is(Kind: tok::star))
1871 nextToken();
1872 }
1873 // Block argument list.
1874 if (FormatTok->is(Kind: tok::l_paren))
1875 parseParens();
1876 // Block body.
1877 if (FormatTok->is(Kind: tok::l_brace))
1878 parseChildBlock();
1879 break;
1880 case tok::l_brace:
1881 if (InRequiresExpression)
1882 FormatTok->setFinalizedType(TT_BracedListLBrace);
1883 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1884 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1885 // A block outside of parentheses must be the last part of a
1886 // structural element.
1887 // FIXME: Figure out cases where this is not true, and add projections
1888 // for them (the one we know is missing are lambdas).
1889 if (Style.Language == FormatStyle::LK_Java &&
1890 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
1891 // If necessary, we could set the type to something different than
1892 // TT_FunctionLBrace.
1893 if (Style.BraceWrapping.AfterControlStatement ==
1894 FormatStyle::BWACS_Always) {
1895 addUnwrappedLine();
1896 }
1897 } else if (Style.BraceWrapping.AfterFunction) {
1898 addUnwrappedLine();
1899 }
1900 FormatTok->setFinalizedType(TT_FunctionLBrace);
1901 parseBlock();
1902 IsDecltypeAutoFunction = false;
1903 addUnwrappedLine();
1904 return;
1905 }
1906 // Otherwise this was a braced init list, and the structural
1907 // element continues.
1908 break;
1909 case tok::kw_try:
1910 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1911 // field/method declaration.
1912 nextToken();
1913 break;
1914 }
1915 // We arrive here when parsing function-try blocks.
1916 if (Style.BraceWrapping.AfterFunction)
1917 addUnwrappedLine();
1918 parseTryCatch();
1919 return;
1920 case tok::identifier: {
1921 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
1922 Line->MustBeDeclaration) {
1923 addUnwrappedLine();
1924 parseCSharpGenericTypeConstraint();
1925 break;
1926 }
1927 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
1928 addUnwrappedLine();
1929 return;
1930 }
1931
1932 // Function declarations (as opposed to function expressions) are parsed
1933 // on their own unwrapped line by continuing this loop. Function
1934 // expressions (functions that are not on their own line) must not create
1935 // a new unwrapped line, so they are special cased below.
1936 size_t TokenCount = Line->Tokens.size();
1937 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
1938 (TokenCount > 1 ||
1939 (TokenCount == 1 &&
1940 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
1941 tryToParseJSFunction();
1942 break;
1943 }
1944 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1945 FormatTok->is(II: Keywords.kw_interface)) {
1946 if (Style.isJavaScript()) {
1947 // In JavaScript/TypeScript, "interface" can be used as a standalone
1948 // identifier, e.g. in `var interface = 1;`. If "interface" is
1949 // followed by another identifier, it is very like to be an actual
1950 // interface declaration.
1951 unsigned StoredPosition = Tokens->getPosition();
1952 FormatToken *Next = Tokens->getNextToken();
1953 FormatTok = Tokens->setPosition(StoredPosition);
1954 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
1955 nextToken();
1956 break;
1957 }
1958 }
1959 parseRecord();
1960 addUnwrappedLine();
1961 return;
1962 }
1963
1964 if (Style.isVerilog()) {
1965 if (FormatTok->is(II: Keywords.kw_table)) {
1966 parseVerilogTable();
1967 return;
1968 }
1969 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
1970 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
1971 parseBlock();
1972 addUnwrappedLine();
1973 return;
1974 }
1975 }
1976
1977 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
1978 if (parseStructLike())
1979 return;
1980 break;
1981 }
1982
1983 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1984 parseStatementMacro();
1985 return;
1986 }
1987
1988 // See if the following token should start a new unwrapped line.
1989 StringRef Text = FormatTok->TokenText;
1990
1991 FormatToken *PreviousToken = FormatTok;
1992 nextToken();
1993
1994 // JS doesn't have macros, and within classes colons indicate fields, not
1995 // labels.
1996 if (Style.isJavaScript())
1997 break;
1998
1999 auto OneTokenSoFar = [&]() {
2000 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2001 while (I != E && I->Tok->is(Kind: tok::comment))
2002 ++I;
2003 if (Style.isVerilog())
2004 while (I != E && I->Tok->is(Kind: tok::hash))
2005 ++I;
2006 return I != E && (++I == E);
2007 };
2008 if (OneTokenSoFar()) {
2009 // Recognize function-like macro usages without trailing semicolon as
2010 // well as free-standing macros like Q_OBJECT.
2011 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2012 if (FunctionLike)
2013 parseParens();
2014
2015 bool FollowedByNewline =
2016 CommentsBeforeNextToken.empty()
2017 ? FormatTok->NewlinesBefore > 0
2018 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2019
2020 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2021 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2022 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2023 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2024 addUnwrappedLine();
2025 return;
2026 }
2027 }
2028 break;
2029 }
2030 case tok::equal:
2031 if ((Style.isJavaScript() || Style.isCSharp()) &&
2032 FormatTok->is(TT: TT_FatArrow)) {
2033 tryToParseChildBlock();
2034 break;
2035 }
2036
2037 nextToken();
2038 if (FormatTok->is(Kind: tok::l_brace)) {
2039 // Block kind should probably be set to BK_BracedInit for any language.
2040 // C# needs this change to ensure that array initialisers and object
2041 // initialisers are indented the same way.
2042 if (Style.isCSharp())
2043 FormatTok->setBlockKind(BK_BracedInit);
2044 // TableGen's defset statement has syntax of the form,
2045 // `defset <type> <name> = { <statement>... }`
2046 if (Style.isTableGen() &&
2047 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2048 FormatTok->setFinalizedType(TT_FunctionLBrace);
2049 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2050 /*MunchSemi=*/false);
2051 addUnwrappedLine();
2052 break;
2053 }
2054 nextToken();
2055 parseBracedList();
2056 } else if (Style.Language == FormatStyle::LK_Proto &&
2057 FormatTok->is(Kind: tok::less)) {
2058 nextToken();
2059 parseBracedList(/*IsAngleBracket=*/true);
2060 }
2061 break;
2062 case tok::l_square:
2063 parseSquare();
2064 break;
2065 case tok::kw_new:
2066 parseNew();
2067 break;
2068 case tok::kw_case:
2069 // Proto: there are no switch/case statements.
2070 if (Style.Language == FormatStyle::LK_Proto) {
2071 nextToken();
2072 return;
2073 }
2074 // In Verilog switch is called case.
2075 if (Style.isVerilog()) {
2076 parseBlock();
2077 addUnwrappedLine();
2078 return;
2079 }
2080 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2081 // 'case: string' field declaration.
2082 nextToken();
2083 break;
2084 }
2085 parseCaseLabel();
2086 break;
2087 case tok::kw_default:
2088 nextToken();
2089 if (Style.isVerilog()) {
2090 if (FormatTok->is(Kind: tok::colon)) {
2091 // The label will be handled in the next iteration.
2092 break;
2093 }
2094 if (FormatTok->is(II: Keywords.kw_clocking)) {
2095 // A default clocking block.
2096 parseBlock();
2097 addUnwrappedLine();
2098 return;
2099 }
2100 parseVerilogCaseLabel();
2101 return;
2102 }
2103 break;
2104 case tok::colon:
2105 nextToken();
2106 if (Style.isVerilog()) {
2107 parseVerilogCaseLabel();
2108 return;
2109 }
2110 break;
2111 default:
2112 nextToken();
2113 break;
2114 }
2115 } while (!eof());
2116}
2117
2118bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2119 assert(FormatTok->is(tok::l_brace));
2120 if (!Style.isCSharp())
2121 return false;
2122 // See if it's a property accessor.
2123 if (FormatTok->Previous->isNot(Kind: tok::identifier))
2124 return false;
2125
2126 // See if we are inside a property accessor.
2127 //
2128 // Record the current tokenPosition so that we can advance and
2129 // reset the current token. `Next` is not set yet so we need
2130 // another way to advance along the token stream.
2131 unsigned int StoredPosition = Tokens->getPosition();
2132 FormatToken *Tok = Tokens->getNextToken();
2133
2134 // A trivial property accessor is of the form:
2135 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2136 // Track these as they do not require line breaks to be introduced.
2137 bool HasSpecialAccessor = false;
2138 bool IsTrivialPropertyAccessor = true;
2139 while (!eof()) {
2140 if (Tok->isOneOf(K1: tok::semi, K2: tok::kw_public, Ks: tok::kw_private,
2141 Ks: tok::kw_protected, Ks: Keywords.kw_internal, Ks: Keywords.kw_get,
2142 Ks: Keywords.kw_init, Ks: Keywords.kw_set)) {
2143 if (Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set))
2144 HasSpecialAccessor = true;
2145 Tok = Tokens->getNextToken();
2146 continue;
2147 }
2148 if (Tok->isNot(Kind: tok::r_brace))
2149 IsTrivialPropertyAccessor = false;
2150 break;
2151 }
2152
2153 if (!HasSpecialAccessor) {
2154 Tokens->setPosition(StoredPosition);
2155 return false;
2156 }
2157
2158 // Try to parse the property accessor:
2159 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2160 Tokens->setPosition(StoredPosition);
2161 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2162 addUnwrappedLine();
2163 nextToken();
2164 do {
2165 switch (FormatTok->Tok.getKind()) {
2166 case tok::r_brace:
2167 nextToken();
2168 if (FormatTok->is(Kind: tok::equal)) {
2169 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2170 nextToken();
2171 nextToken();
2172 }
2173 addUnwrappedLine();
2174 return true;
2175 case tok::l_brace:
2176 ++Line->Level;
2177 parseBlock(/*MustBeDeclaration=*/true);
2178 addUnwrappedLine();
2179 --Line->Level;
2180 break;
2181 case tok::equal:
2182 if (FormatTok->is(TT: TT_FatArrow)) {
2183 ++Line->Level;
2184 do {
2185 nextToken();
2186 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2187 nextToken();
2188 addUnwrappedLine();
2189 --Line->Level;
2190 break;
2191 }
2192 nextToken();
2193 break;
2194 default:
2195 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2196 Ks: Keywords.kw_set) &&
2197 !IsTrivialPropertyAccessor) {
2198 // Non-trivial get/set needs to be on its own line.
2199 addUnwrappedLine();
2200 }
2201 nextToken();
2202 }
2203 } while (!eof());
2204
2205 // Unreachable for well-formed code (paired '{' and '}').
2206 return true;
2207}
2208
2209bool UnwrappedLineParser::tryToParseLambda() {
2210 assert(FormatTok->is(tok::l_square));
2211 if (!IsCpp) {
2212 nextToken();
2213 return false;
2214 }
2215 FormatToken &LSquare = *FormatTok;
2216 if (!tryToParseLambdaIntroducer())
2217 return false;
2218
2219 bool SeenArrow = false;
2220 bool InTemplateParameterList = false;
2221
2222 while (FormatTok->isNot(Kind: tok::l_brace)) {
2223 if (FormatTok->isTypeName(IsCpp)) {
2224 nextToken();
2225 continue;
2226 }
2227 switch (FormatTok->Tok.getKind()) {
2228 case tok::l_brace:
2229 break;
2230 case tok::l_paren:
2231 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2232 break;
2233 case tok::l_square:
2234 parseSquare();
2235 break;
2236 case tok::less:
2237 assert(FormatTok->Previous);
2238 if (FormatTok->Previous->is(Kind: tok::r_square))
2239 InTemplateParameterList = true;
2240 nextToken();
2241 break;
2242 case tok::kw_auto:
2243 case tok::kw_class:
2244 case tok::kw_template:
2245 case tok::kw_typename:
2246 case tok::amp:
2247 case tok::star:
2248 case tok::kw_const:
2249 case tok::kw_constexpr:
2250 case tok::kw_consteval:
2251 case tok::comma:
2252 case tok::greater:
2253 case tok::identifier:
2254 case tok::numeric_constant:
2255 case tok::coloncolon:
2256 case tok::kw_mutable:
2257 case tok::kw_noexcept:
2258 case tok::kw_static:
2259 nextToken();
2260 break;
2261 // Specialization of a template with an integer parameter can contain
2262 // arithmetic, logical, comparison and ternary operators.
2263 //
2264 // FIXME: This also accepts sequences of operators that are not in the scope
2265 // of a template argument list.
2266 //
2267 // In a C++ lambda a template type can only occur after an arrow. We use
2268 // this as an heuristic to distinguish between Objective-C expressions
2269 // followed by an `a->b` expression, such as:
2270 // ([obj func:arg] + a->b)
2271 // Otherwise the code below would parse as a lambda.
2272 case tok::plus:
2273 case tok::minus:
2274 case tok::exclaim:
2275 case tok::tilde:
2276 case tok::slash:
2277 case tok::percent:
2278 case tok::lessless:
2279 case tok::pipe:
2280 case tok::pipepipe:
2281 case tok::ampamp:
2282 case tok::caret:
2283 case tok::equalequal:
2284 case tok::exclaimequal:
2285 case tok::greaterequal:
2286 case tok::lessequal:
2287 case tok::question:
2288 case tok::colon:
2289 case tok::ellipsis:
2290 case tok::kw_true:
2291 case tok::kw_false:
2292 if (SeenArrow || InTemplateParameterList) {
2293 nextToken();
2294 break;
2295 }
2296 return true;
2297 case tok::arrow:
2298 // This might or might not actually be a lambda arrow (this could be an
2299 // ObjC method invocation followed by a dereferencing arrow). We might
2300 // reset this back to TT_Unknown in TokenAnnotator.
2301 FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2302 SeenArrow = true;
2303 nextToken();
2304 break;
2305 case tok::kw_requires: {
2306 auto *RequiresToken = FormatTok;
2307 nextToken();
2308 parseRequiresClause(RequiresToken);
2309 break;
2310 }
2311 case tok::equal:
2312 if (!InTemplateParameterList)
2313 return true;
2314 nextToken();
2315 break;
2316 default:
2317 return true;
2318 }
2319 }
2320
2321 FormatTok->setFinalizedType(TT_LambdaLBrace);
2322 LSquare.setFinalizedType(TT_LambdaLSquare);
2323
2324 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2325 parseChildBlock();
2326 assert(!NestedLambdas.empty());
2327 NestedLambdas.pop_back();
2328
2329 return true;
2330}
2331
2332bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2333 const FormatToken *Previous = FormatTok->Previous;
2334 const FormatToken *LeftSquare = FormatTok;
2335 nextToken();
2336 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2337 !Previous->isOneOf(K1: tok::kw_return, K2: tok::kw_co_await,
2338 Ks: tok::kw_co_yield, Ks: tok::kw_co_return)) ||
2339 Previous->closesScope())) ||
2340 LeftSquare->isCppStructuredBinding(IsCpp)) {
2341 return false;
2342 }
2343 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2344 return false;
2345 if (FormatTok->is(Kind: tok::r_square)) {
2346 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2347 if (Next->is(Kind: tok::greater))
2348 return false;
2349 }
2350 parseSquare(/*LambdaIntroducer=*/true);
2351 return true;
2352}
2353
2354void UnwrappedLineParser::tryToParseJSFunction() {
2355 assert(FormatTok->is(Keywords.kw_function));
2356 if (FormatTok->is(II: Keywords.kw_async))
2357 nextToken();
2358 // Consume "function".
2359 nextToken();
2360
2361 // Consume * (generator function). Treat it like C++'s overloaded operators.
2362 if (FormatTok->is(Kind: tok::star)) {
2363 FormatTok->setFinalizedType(TT_OverloadedOperator);
2364 nextToken();
2365 }
2366
2367 // Consume function name.
2368 if (FormatTok->is(Kind: tok::identifier))
2369 nextToken();
2370
2371 if (FormatTok->isNot(Kind: tok::l_paren))
2372 return;
2373
2374 // Parse formal parameter list.
2375 parseParens();
2376
2377 if (FormatTok->is(Kind: tok::colon)) {
2378 // Parse a type definition.
2379 nextToken();
2380
2381 // Eat the type declaration. For braced inline object types, balance braces,
2382 // otherwise just parse until finding an l_brace for the function body.
2383 if (FormatTok->is(Kind: tok::l_brace))
2384 tryToParseBracedList();
2385 else
2386 while (!FormatTok->isOneOf(K1: tok::l_brace, K2: tok::semi) && !eof())
2387 nextToken();
2388 }
2389
2390 if (FormatTok->is(Kind: tok::semi))
2391 return;
2392
2393 parseChildBlock();
2394}
2395
2396bool UnwrappedLineParser::tryToParseBracedList() {
2397 if (FormatTok->is(BBK: BK_Unknown))
2398 calculateBraceTypes();
2399 assert(FormatTok->isNot(BK_Unknown));
2400 if (FormatTok->is(BBK: BK_Block))
2401 return false;
2402 nextToken();
2403 parseBracedList();
2404 return true;
2405}
2406
2407bool UnwrappedLineParser::tryToParseChildBlock() {
2408 assert(Style.isJavaScript() || Style.isCSharp());
2409 assert(FormatTok->is(TT_FatArrow));
2410 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2411 // They always start an expression or a child block if followed by a curly
2412 // brace.
2413 nextToken();
2414 if (FormatTok->isNot(Kind: tok::l_brace))
2415 return false;
2416 parseChildBlock();
2417 return true;
2418}
2419
2420bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2421 bool HasError = false;
2422
2423 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2424 // replace this by using parseAssignmentExpression() inside.
2425 do {
2426 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2427 tryToParseChildBlock()) {
2428 continue;
2429 }
2430 if (Style.isJavaScript()) {
2431 if (FormatTok->is(II: Keywords.kw_function)) {
2432 tryToParseJSFunction();
2433 continue;
2434 }
2435 if (FormatTok->is(Kind: tok::l_brace)) {
2436 // Could be a method inside of a braced list `{a() { return 1; }}`.
2437 if (tryToParseBracedList())
2438 continue;
2439 parseChildBlock();
2440 }
2441 }
2442 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2443 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2444 addUnwrappedLine();
2445 nextToken();
2446 return !HasError;
2447 }
2448 switch (FormatTok->Tok.getKind()) {
2449 case tok::l_square:
2450 if (Style.isCSharp())
2451 parseSquare();
2452 else
2453 tryToParseLambda();
2454 break;
2455 case tok::l_paren:
2456 parseParens();
2457 // JavaScript can just have free standing methods and getters/setters in
2458 // object literals. Detect them by a "{" following ")".
2459 if (Style.isJavaScript()) {
2460 if (FormatTok->is(Kind: tok::l_brace))
2461 parseChildBlock();
2462 break;
2463 }
2464 break;
2465 case tok::l_brace:
2466 // Assume there are no blocks inside a braced init list apart
2467 // from the ones we explicitly parse out (like lambdas).
2468 FormatTok->setBlockKind(BK_BracedInit);
2469 nextToken();
2470 parseBracedList();
2471 break;
2472 case tok::less:
2473 nextToken();
2474 if (IsAngleBracket)
2475 parseBracedList(/*IsAngleBracket=*/true);
2476 break;
2477 case tok::semi:
2478 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2479 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2480 // used for error recovery if we have otherwise determined that this is
2481 // a braced list.
2482 if (Style.isJavaScript()) {
2483 nextToken();
2484 break;
2485 }
2486 HasError = true;
2487 if (!IsEnum)
2488 return false;
2489 nextToken();
2490 break;
2491 case tok::comma:
2492 nextToken();
2493 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2494 addUnwrappedLine();
2495 break;
2496 default:
2497 nextToken();
2498 break;
2499 }
2500 } while (!eof());
2501 return false;
2502}
2503
2504/// \brief Parses a pair of parentheses (and everything between them).
2505/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2506/// double ampersands. This applies for all nested scopes as well.
2507///
2508/// Returns whether there is a `=` token between the parentheses.
2509bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2510 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2511 auto *LeftParen = FormatTok;
2512 bool SeenEqual = false;
2513 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(Kind: tok::l_brace);
2514 nextToken();
2515 do {
2516 switch (FormatTok->Tok.getKind()) {
2517 case tok::l_paren:
2518 if (parseParens(AmpAmpTokenType))
2519 SeenEqual = true;
2520 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(Kind: tok::l_brace))
2521 parseChildBlock();
2522 break;
2523 case tok::r_paren:
2524 if (!MightBeStmtExpr && !Line->InMacroBody &&
2525 Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2526 const auto *Prev = LeftParen->Previous;
2527 const auto *Next = Tokens->peekNextToken();
2528 const bool DoubleParens =
2529 Prev && Prev->is(Kind: tok::l_paren) && Next && Next->is(Kind: tok::r_paren);
2530 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2531 const bool Blacklisted =
2532 PrevPrev &&
2533 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2534 (SeenEqual &&
2535 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2536 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2537 const bool ReturnParens =
2538 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2539 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2540 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2541 Prev && Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) && Next &&
2542 Next->is(Kind: tok::semi);
2543 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2544 LeftParen->Optional = true;
2545 FormatTok->Optional = true;
2546 }
2547 }
2548 nextToken();
2549 return SeenEqual;
2550 case tok::r_brace:
2551 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2552 return SeenEqual;
2553 case tok::l_square:
2554 tryToParseLambda();
2555 break;
2556 case tok::l_brace:
2557 if (!tryToParseBracedList())
2558 parseChildBlock();
2559 break;
2560 case tok::at:
2561 nextToken();
2562 if (FormatTok->is(Kind: tok::l_brace)) {
2563 nextToken();
2564 parseBracedList();
2565 }
2566 break;
2567 case tok::equal:
2568 SeenEqual = true;
2569 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2570 tryToParseChildBlock();
2571 else
2572 nextToken();
2573 break;
2574 case tok::kw_class:
2575 if (Style.isJavaScript())
2576 parseRecord(/*ParseAsExpr=*/true);
2577 else
2578 nextToken();
2579 break;
2580 case tok::identifier:
2581 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2582 tryToParseJSFunction();
2583 else
2584 nextToken();
2585 break;
2586 case tok::kw_requires: {
2587 auto RequiresToken = FormatTok;
2588 nextToken();
2589 parseRequiresExpression(RequiresToken);
2590 break;
2591 }
2592 case tok::ampamp:
2593 if (AmpAmpTokenType != TT_Unknown)
2594 FormatTok->setFinalizedType(AmpAmpTokenType);
2595 [[fallthrough]];
2596 default:
2597 nextToken();
2598 break;
2599 }
2600 } while (!eof());
2601 return SeenEqual;
2602}
2603
2604void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2605 if (!LambdaIntroducer) {
2606 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2607 if (tryToParseLambda())
2608 return;
2609 }
2610 do {
2611 switch (FormatTok->Tok.getKind()) {
2612 case tok::l_paren:
2613 parseParens();
2614 break;
2615 case tok::r_square:
2616 nextToken();
2617 return;
2618 case tok::r_brace:
2619 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2620 return;
2621 case tok::l_square:
2622 parseSquare();
2623 break;
2624 case tok::l_brace: {
2625 if (!tryToParseBracedList())
2626 parseChildBlock();
2627 break;
2628 }
2629 case tok::at:
2630 nextToken();
2631 if (FormatTok->is(Kind: tok::l_brace)) {
2632 nextToken();
2633 parseBracedList();
2634 }
2635 break;
2636 default:
2637 nextToken();
2638 break;
2639 }
2640 } while (!eof());
2641}
2642
2643void UnwrappedLineParser::keepAncestorBraces() {
2644 if (!Style.RemoveBracesLLVM)
2645 return;
2646
2647 const int MaxNestingLevels = 2;
2648 const int Size = NestedTooDeep.size();
2649 if (Size >= MaxNestingLevels)
2650 NestedTooDeep[Size - MaxNestingLevels] = true;
2651 NestedTooDeep.push_back(Elt: false);
2652}
2653
2654static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2655 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2656 if (Token.Tok->isNot(Kind: tok::comment))
2657 return Token.Tok;
2658
2659 return nullptr;
2660}
2661
2662void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2663 FormatToken *Tok = nullptr;
2664
2665 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2666 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2667 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2668 ? getLastNonComment(Line: *Line)
2669 : Line->Tokens.back().Tok;
2670 assert(Tok);
2671 if (Tok->BraceCount < 0) {
2672 assert(Tok->BraceCount == -1);
2673 Tok = nullptr;
2674 } else {
2675 Tok->BraceCount = -1;
2676 }
2677 }
2678
2679 addUnwrappedLine();
2680 ++Line->Level;
2681 parseStructuralElement();
2682
2683 if (Tok) {
2684 assert(!Line->InPPDirective);
2685 Tok = nullptr;
2686 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2687 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2688 Tok = L.Tokens.back().Tok;
2689 break;
2690 }
2691 }
2692 assert(Tok);
2693 ++Tok->BraceCount;
2694 }
2695
2696 if (CheckEOF && eof())
2697 addUnwrappedLine();
2698
2699 --Line->Level;
2700}
2701
2702static void markOptionalBraces(FormatToken *LeftBrace) {
2703 if (!LeftBrace)
2704 return;
2705
2706 assert(LeftBrace->is(tok::l_brace));
2707
2708 FormatToken *RightBrace = LeftBrace->MatchingParen;
2709 if (!RightBrace) {
2710 assert(!LeftBrace->Optional);
2711 return;
2712 }
2713
2714 assert(RightBrace->is(tok::r_brace));
2715 assert(RightBrace->MatchingParen == LeftBrace);
2716 assert(LeftBrace->Optional == RightBrace->Optional);
2717
2718 LeftBrace->Optional = true;
2719 RightBrace->Optional = true;
2720}
2721
2722void UnwrappedLineParser::handleAttributes() {
2723 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2724 if (FormatTok->isAttribute())
2725 nextToken();
2726 else if (FormatTok->is(Kind: tok::l_square))
2727 handleCppAttributes();
2728}
2729
2730bool UnwrappedLineParser::handleCppAttributes() {
2731 // Handle [[likely]] / [[unlikely]] attributes.
2732 assert(FormatTok->is(tok::l_square));
2733 if (!tryToParseSimpleAttribute())
2734 return false;
2735 parseSquare();
2736 return true;
2737}
2738
2739/// Returns whether \c Tok begins a block.
2740bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2741 // FIXME: rename the function or make
2742 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2743 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2744 : Tok.is(Kind: tok::l_brace);
2745}
2746
2747FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2748 bool KeepBraces,
2749 bool IsVerilogAssert) {
2750 assert((FormatTok->is(tok::kw_if) ||
2751 (Style.isVerilog() &&
2752 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2753 Keywords.kw_assume, Keywords.kw_cover))) &&
2754 "'if' expected");
2755 nextToken();
2756
2757 if (IsVerilogAssert) {
2758 // Handle `assert #0` and `assert final`.
2759 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2760 nextToken();
2761 if (FormatTok->is(Kind: tok::numeric_constant))
2762 nextToken();
2763 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2764 Ks: Keywords.kw_sequence)) {
2765 nextToken();
2766 }
2767 }
2768
2769 // TableGen's if statement has the form of `if <cond> then { ... }`.
2770 if (Style.isTableGen()) {
2771 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2772 // Simply skip until then. This range only contains a value.
2773 nextToken();
2774 }
2775 }
2776
2777 // Handle `if !consteval`.
2778 if (FormatTok->is(Kind: tok::exclaim))
2779 nextToken();
2780
2781 bool KeepIfBraces = true;
2782 if (FormatTok->is(Kind: tok::kw_consteval)) {
2783 nextToken();
2784 } else {
2785 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2786 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
2787 nextToken();
2788 if (FormatTok->is(Kind: tok::l_paren)) {
2789 FormatTok->setFinalizedType(TT_ConditionLParen);
2790 parseParens();
2791 }
2792 }
2793 handleAttributes();
2794 // The then action is optional in Verilog assert statements.
2795 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
2796 nextToken();
2797 addUnwrappedLine();
2798 return nullptr;
2799 }
2800
2801 bool NeedsUnwrappedLine = false;
2802 keepAncestorBraces();
2803
2804 FormatToken *IfLeftBrace = nullptr;
2805 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2806
2807 if (isBlockBegin(Tok: *FormatTok)) {
2808 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2809 IfLeftBrace = FormatTok;
2810 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2811 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2812 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
2813 setPreviousRBraceType(TT_ControlStatementRBrace);
2814 if (Style.BraceWrapping.BeforeElse)
2815 addUnwrappedLine();
2816 else
2817 NeedsUnwrappedLine = true;
2818 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
2819 addUnwrappedLine();
2820 } else {
2821 parseUnbracedBody();
2822 }
2823
2824 if (Style.RemoveBracesLLVM) {
2825 assert(!NestedTooDeep.empty());
2826 KeepIfBraces = KeepIfBraces ||
2827 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2828 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2829 IfBlockKind == IfStmtKind::IfElseIf;
2830 }
2831
2832 bool KeepElseBraces = KeepIfBraces;
2833 FormatToken *ElseLeftBrace = nullptr;
2834 IfStmtKind Kind = IfStmtKind::IfOnly;
2835
2836 if (FormatTok->is(Kind: tok::kw_else)) {
2837 if (Style.RemoveBracesLLVM) {
2838 NestedTooDeep.back() = false;
2839 Kind = IfStmtKind::IfElse;
2840 }
2841 nextToken();
2842 handleAttributes();
2843 if (isBlockBegin(Tok: *FormatTok)) {
2844 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
2845 FormatTok->setFinalizedType(TT_ElseLBrace);
2846 ElseLeftBrace = FormatTok;
2847 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2848 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2849 FormatToken *IfLBrace =
2850 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2851 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
2852 setPreviousRBraceType(TT_ElseRBrace);
2853 if (FormatTok->is(Kind: tok::kw_else)) {
2854 KeepElseBraces = KeepElseBraces ||
2855 ElseBlockKind == IfStmtKind::IfOnly ||
2856 ElseBlockKind == IfStmtKind::IfElseIf;
2857 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2858 KeepElseBraces = true;
2859 assert(ElseLeftBrace->MatchingParen);
2860 markOptionalBraces(LeftBrace: ElseLeftBrace);
2861 }
2862 addUnwrappedLine();
2863 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
2864 const FormatToken *Previous = Tokens->getPreviousToken();
2865 assert(Previous);
2866 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
2867 if (IsPrecededByComment) {
2868 addUnwrappedLine();
2869 ++Line->Level;
2870 }
2871 bool TooDeep = true;
2872 if (Style.RemoveBracesLLVM) {
2873 Kind = IfStmtKind::IfElseIf;
2874 TooDeep = NestedTooDeep.pop_back_val();
2875 }
2876 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
2877 if (Style.RemoveBracesLLVM)
2878 NestedTooDeep.push_back(Elt: TooDeep);
2879 if (IsPrecededByComment)
2880 --Line->Level;
2881 } else {
2882 parseUnbracedBody(/*CheckEOF=*/true);
2883 }
2884 } else {
2885 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2886 if (NeedsUnwrappedLine)
2887 addUnwrappedLine();
2888 }
2889
2890 if (!Style.RemoveBracesLLVM)
2891 return nullptr;
2892
2893 assert(!NestedTooDeep.empty());
2894 KeepElseBraces = KeepElseBraces ||
2895 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2896 NestedTooDeep.back();
2897
2898 NestedTooDeep.pop_back();
2899
2900 if (!KeepIfBraces && !KeepElseBraces) {
2901 markOptionalBraces(LeftBrace: IfLeftBrace);
2902 markOptionalBraces(LeftBrace: ElseLeftBrace);
2903 } else if (IfLeftBrace) {
2904 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2905 if (IfRightBrace) {
2906 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2907 assert(!IfLeftBrace->Optional);
2908 assert(!IfRightBrace->Optional);
2909 IfLeftBrace->MatchingParen = nullptr;
2910 IfRightBrace->MatchingParen = nullptr;
2911 }
2912 }
2913
2914 if (IfKind)
2915 *IfKind = Kind;
2916
2917 return IfLeftBrace;
2918}
2919
2920void UnwrappedLineParser::parseTryCatch() {
2921 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2922 nextToken();
2923 bool NeedsUnwrappedLine = false;
2924 if (FormatTok->is(Kind: tok::colon)) {
2925 // We are in a function try block, what comes is an initializer list.
2926 nextToken();
2927
2928 // In case identifiers were removed by clang-tidy, what might follow is
2929 // multiple commas in sequence - before the first identifier.
2930 while (FormatTok->is(Kind: tok::comma))
2931 nextToken();
2932
2933 while (FormatTok->is(Kind: tok::identifier)) {
2934 nextToken();
2935 if (FormatTok->is(Kind: tok::l_paren))
2936 parseParens();
2937 if (FormatTok->Previous && FormatTok->Previous->is(Kind: tok::identifier) &&
2938 FormatTok->is(Kind: tok::l_brace)) {
2939 do {
2940 nextToken();
2941 } while (FormatTok->isNot(Kind: tok::r_brace));
2942 nextToken();
2943 }
2944
2945 // In case identifiers were removed by clang-tidy, what might follow is
2946 // multiple commas in sequence - after the first identifier.
2947 while (FormatTok->is(Kind: tok::comma))
2948 nextToken();
2949 }
2950 }
2951 // Parse try with resource.
2952 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(Kind: tok::l_paren))
2953 parseParens();
2954
2955 keepAncestorBraces();
2956
2957 if (FormatTok->is(Kind: tok::l_brace)) {
2958 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2959 parseBlock();
2960 if (Style.BraceWrapping.BeforeCatch)
2961 addUnwrappedLine();
2962 else
2963 NeedsUnwrappedLine = true;
2964 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
2965 // The C++ standard requires a compound-statement after a try.
2966 // If there's none, we try to assume there's a structuralElement
2967 // and try to continue.
2968 addUnwrappedLine();
2969 ++Line->Level;
2970 parseStructuralElement();
2971 --Line->Level;
2972 }
2973 while (true) {
2974 if (FormatTok->is(Kind: tok::at))
2975 nextToken();
2976 if (!(FormatTok->isOneOf(K1: tok::kw_catch, K2: Keywords.kw___except,
2977 Ks: tok::kw___finally) ||
2978 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2979 FormatTok->is(II: Keywords.kw_finally)) ||
2980 (FormatTok->isObjCAtKeyword(Kind: tok::objc_catch) ||
2981 FormatTok->isObjCAtKeyword(Kind: tok::objc_finally)))) {
2982 break;
2983 }
2984 nextToken();
2985 while (FormatTok->isNot(Kind: tok::l_brace)) {
2986 if (FormatTok->is(Kind: tok::l_paren)) {
2987 parseParens();
2988 continue;
2989 }
2990 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace, Ks: tok::eof)) {
2991 if (Style.RemoveBracesLLVM)
2992 NestedTooDeep.pop_back();
2993 return;
2994 }
2995 nextToken();
2996 }
2997 NeedsUnwrappedLine = false;
2998 Line->MustBeDeclaration = false;
2999 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3000 parseBlock();
3001 if (Style.BraceWrapping.BeforeCatch)
3002 addUnwrappedLine();
3003 else
3004 NeedsUnwrappedLine = true;
3005 }
3006
3007 if (Style.RemoveBracesLLVM)
3008 NestedTooDeep.pop_back();
3009
3010 if (NeedsUnwrappedLine)
3011 addUnwrappedLine();
3012}
3013
3014void UnwrappedLineParser::parseNamespace() {
3015 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3016 "'namespace' expected");
3017
3018 const FormatToken &InitialToken = *FormatTok;
3019 nextToken();
3020 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3021 parseParens();
3022 } else {
3023 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3024 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3025 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3026 if (FormatTok->is(Kind: tok::l_square))
3027 parseSquare();
3028 else if (FormatTok->is(Kind: tok::l_paren))
3029 parseParens();
3030 else
3031 nextToken();
3032 }
3033 }
3034 if (FormatTok->is(Kind: tok::l_brace)) {
3035 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3036
3037 if (ShouldBreakBeforeBrace(Style, InitialToken))
3038 addUnwrappedLine();
3039
3040 unsigned AddLevels =
3041 Style.NamespaceIndentation == FormatStyle::NI_All ||
3042 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3043 DeclarationScopeStack.size() > 1)
3044 ? 1u
3045 : 0u;
3046 bool ManageWhitesmithsBraces =
3047 AddLevels == 0u &&
3048 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3049
3050 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3051 // the whole block.
3052 if (ManageWhitesmithsBraces)
3053 ++Line->Level;
3054
3055 // Munch the semicolon after a namespace. This is more common than one would
3056 // think. Putting the semicolon into its own line is very ugly.
3057 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3058 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3059 UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3060
3061 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3062
3063 if (ManageWhitesmithsBraces)
3064 --Line->Level;
3065 }
3066 // FIXME: Add error handling.
3067}
3068
3069void UnwrappedLineParser::parseNew() {
3070 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3071 nextToken();
3072
3073 if (Style.isCSharp()) {
3074 do {
3075 // Handle constructor invocation, e.g. `new(field: value)`.
3076 if (FormatTok->is(Kind: tok::l_paren))
3077 parseParens();
3078
3079 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3080 if (FormatTok->is(Kind: tok::l_brace))
3081 parseBracedList();
3082
3083 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3084 return;
3085
3086 nextToken();
3087 } while (!eof());
3088 }
3089
3090 if (Style.Language != FormatStyle::LK_Java)
3091 return;
3092
3093 // In Java, we can parse everything up to the parens, which aren't optional.
3094 do {
3095 // There should not be a ;, { or } before the new's open paren.
3096 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3097 return;
3098
3099 // Consume the parens.
3100 if (FormatTok->is(Kind: tok::l_paren)) {
3101 parseParens();
3102
3103 // If there is a class body of an anonymous class, consume that as child.
3104 if (FormatTok->is(Kind: tok::l_brace))
3105 parseChildBlock();
3106 return;
3107 }
3108 nextToken();
3109 } while (!eof());
3110}
3111
3112void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3113 keepAncestorBraces();
3114
3115 if (isBlockBegin(Tok: *FormatTok)) {
3116 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3117 FormatToken *LeftBrace = FormatTok;
3118 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3119 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3120 /*MunchSemi=*/true, KeepBraces);
3121 setPreviousRBraceType(TT_ControlStatementRBrace);
3122 if (!KeepBraces) {
3123 assert(!NestedTooDeep.empty());
3124 if (!NestedTooDeep.back())
3125 markOptionalBraces(LeftBrace);
3126 }
3127 if (WrapRightBrace)
3128 addUnwrappedLine();
3129 } else {
3130 parseUnbracedBody();
3131 }
3132
3133 if (!KeepBraces)
3134 NestedTooDeep.pop_back();
3135}
3136
3137void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3138 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3139 (Style.isVerilog() &&
3140 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3141 Keywords.kw_always_ff, Keywords.kw_always_latch,
3142 Keywords.kw_final, Keywords.kw_initial,
3143 Keywords.kw_foreach, Keywords.kw_forever,
3144 Keywords.kw_repeat))) &&
3145 "'for', 'while' or foreach macro expected");
3146 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3147 !FormatTok->isOneOf(K1: tok::kw_for, K2: tok::kw_while);
3148
3149 nextToken();
3150 // JS' for await ( ...
3151 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3152 nextToken();
3153 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3154 nextToken();
3155 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3156 // The type is only set for Verilog basically because we were afraid to
3157 // change the existing behavior for loops. See the discussion on D121756 for
3158 // details.
3159 if (Style.isVerilog())
3160 FormatTok->setFinalizedType(TT_ConditionLParen);
3161 parseParens();
3162 }
3163
3164 if (Style.isVerilog()) {
3165 // Event control.
3166 parseVerilogSensitivityList();
3167 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3168 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3169 nextToken();
3170 addUnwrappedLine();
3171 return;
3172 }
3173
3174 handleAttributes();
3175 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3176}
3177
3178void UnwrappedLineParser::parseDoWhile() {
3179 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3180 nextToken();
3181
3182 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3183
3184 // FIXME: Add error handling.
3185 if (FormatTok->isNot(Kind: tok::kw_while)) {
3186 addUnwrappedLine();
3187 return;
3188 }
3189
3190 FormatTok->setFinalizedType(TT_DoWhile);
3191
3192 // If in Whitesmiths mode, the line with the while() needs to be indented
3193 // to the same level as the block.
3194 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3195 ++Line->Level;
3196
3197 nextToken();
3198 parseStructuralElement();
3199}
3200
3201void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3202 nextToken();
3203 unsigned OldLineLevel = Line->Level;
3204 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3205 --Line->Level;
3206 if (LeftAlignLabel)
3207 Line->Level = 0;
3208
3209 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3210 FormatTok->is(Kind: tok::l_brace)) {
3211
3212 CompoundStatementIndenter Indenter(this, Line->Level,
3213 Style.BraceWrapping.AfterCaseLabel,
3214 Style.BraceWrapping.IndentBraces);
3215 parseBlock();
3216 if (FormatTok->is(Kind: tok::kw_break)) {
3217 if (Style.BraceWrapping.AfterControlStatement ==
3218 FormatStyle::BWACS_Always) {
3219 addUnwrappedLine();
3220 if (!Style.IndentCaseBlocks &&
3221 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3222 ++Line->Level;
3223 }
3224 }
3225 parseStructuralElement();
3226 }
3227 addUnwrappedLine();
3228 } else {
3229 if (FormatTok->is(Kind: tok::semi))
3230 nextToken();
3231 addUnwrappedLine();
3232 }
3233 Line->Level = OldLineLevel;
3234 if (FormatTok->isNot(Kind: tok::l_brace)) {
3235 parseStructuralElement();
3236 addUnwrappedLine();
3237 }
3238}
3239
3240void UnwrappedLineParser::parseCaseLabel() {
3241 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3242
3243 // FIXME: fix handling of complex expressions here.
3244 do {
3245 nextToken();
3246 if (FormatTok->is(Kind: tok::colon)) {
3247 FormatTok->setFinalizedType(TT_CaseLabelColon);
3248 break;
3249 }
3250 } while (!eof());
3251 parseLabel();
3252}
3253
3254void UnwrappedLineParser::parseSwitch() {
3255 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3256 nextToken();
3257 if (FormatTok->is(Kind: tok::l_paren))
3258 parseParens();
3259
3260 keepAncestorBraces();
3261
3262 if (FormatTok->is(Kind: tok::l_brace)) {
3263 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3264 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3265 parseBlock();
3266 setPreviousRBraceType(TT_ControlStatementRBrace);
3267 addUnwrappedLine();
3268 } else {
3269 addUnwrappedLine();
3270 ++Line->Level;
3271 parseStructuralElement();
3272 --Line->Level;
3273 }
3274
3275 if (Style.RemoveBracesLLVM)
3276 NestedTooDeep.pop_back();
3277}
3278
3279// Operators that can follow a C variable.
3280static bool isCOperatorFollowingVar(tok::TokenKind Kind) {
3281 switch (Kind) {
3282 case tok::ampamp:
3283 case tok::ampequal:
3284 case tok::arrow:
3285 case tok::caret:
3286 case tok::caretequal:
3287 case tok::comma:
3288 case tok::ellipsis:
3289 case tok::equal:
3290 case tok::equalequal:
3291 case tok::exclaim:
3292 case tok::exclaimequal:
3293 case tok::greater:
3294 case tok::greaterequal:
3295 case tok::greatergreater:
3296 case tok::greatergreaterequal:
3297 case tok::l_paren:
3298 case tok::l_square:
3299 case tok::less:
3300 case tok::lessequal:
3301 case tok::lessless:
3302 case tok::lesslessequal:
3303 case tok::minus:
3304 case tok::minusequal:
3305 case tok::minusminus:
3306 case tok::percent:
3307 case tok::percentequal:
3308 case tok::period:
3309 case tok::pipe:
3310 case tok::pipeequal:
3311 case tok::pipepipe:
3312 case tok::plus:
3313 case tok::plusequal:
3314 case tok::plusplus:
3315 case tok::question:
3316 case tok::r_brace:
3317 case tok::r_paren:
3318 case tok::r_square:
3319 case tok::semi:
3320 case tok::slash:
3321 case tok::slashequal:
3322 case tok::star:
3323 case tok::starequal:
3324 return true;
3325 default:
3326 return false;
3327 }
3328}
3329
3330void UnwrappedLineParser::parseAccessSpecifier() {
3331 FormatToken *AccessSpecifierCandidate = FormatTok;
3332 nextToken();
3333 // Understand Qt's slots.
3334 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3335 nextToken();
3336 // Otherwise, we don't know what it is, and we'd better keep the next token.
3337 if (FormatTok->is(Kind: tok::colon)) {
3338 nextToken();
3339 addUnwrappedLine();
3340 } else if (FormatTok->isNot(Kind: tok::coloncolon) &&
3341 !isCOperatorFollowingVar(Kind: FormatTok->Tok.getKind())) {
3342 // Not a variable name nor namespace name.
3343 addUnwrappedLine();
3344 } else if (AccessSpecifierCandidate) {
3345 // Consider the access specifier to be a C identifier.
3346 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3347 }
3348}
3349
3350/// \brief Parses a requires, decides if it is a clause or an expression.
3351/// \pre The current token has to be the requires keyword.
3352/// \returns true if it parsed a clause.
3353bool clang::format::UnwrappedLineParser::parseRequires() {
3354 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3355 auto RequiresToken = FormatTok;
3356
3357 // We try to guess if it is a requires clause, or a requires expression. For
3358 // that we first consume the keyword and check the next token.
3359 nextToken();
3360
3361 switch (FormatTok->Tok.getKind()) {
3362 case tok::l_brace:
3363 // This can only be an expression, never a clause.
3364 parseRequiresExpression(RequiresToken);
3365 return false;
3366 case tok::l_paren:
3367 // Clauses and expression can start with a paren, it's unclear what we have.
3368 break;
3369 default:
3370 // All other tokens can only be a clause.
3371 parseRequiresClause(RequiresToken);
3372 return true;
3373 }
3374
3375 // Looking forward we would have to decide if there are function declaration
3376 // like arguments to the requires expression:
3377 // requires (T t) {
3378 // Or there is a constraint expression for the requires clause:
3379 // requires (C<T> && ...
3380
3381 // But first let's look behind.
3382 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3383
3384 if (!PreviousNonComment ||
3385 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3386 // If there is no token, or an expression left brace, we are a requires
3387 // clause within a requires expression.
3388 parseRequiresClause(RequiresToken);
3389 return true;
3390 }
3391
3392 switch (PreviousNonComment->Tok.getKind()) {
3393 case tok::greater:
3394 case tok::r_paren:
3395 case tok::kw_noexcept:
3396 case tok::kw_const:
3397 // This is a requires clause.
3398 parseRequiresClause(RequiresToken);
3399 return true;
3400 case tok::amp:
3401 case tok::ampamp: {
3402 // This can be either:
3403 // if (... && requires (T t) ...)
3404 // Or
3405 // void member(...) && requires (C<T> ...
3406 // We check the one token before that for a const:
3407 // void member(...) const && requires (C<T> ...
3408 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3409 if (PrevPrev && PrevPrev->is(Kind: tok::kw_const)) {
3410 parseRequiresClause(RequiresToken);
3411 return true;
3412 }
3413 break;
3414 }
3415 default:
3416 if (PreviousNonComment->isTypeOrIdentifier(IsCpp)) {
3417 // This is a requires clause.
3418 parseRequiresClause(RequiresToken);
3419 return true;
3420 }
3421 // It's an expression.
3422 parseRequiresExpression(RequiresToken);
3423 return false;
3424 }
3425
3426 // Now we look forward and try to check if the paren content is a parameter
3427 // list. The parameters can be cv-qualified and contain references or
3428 // pointers.
3429 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3430 // of stuff: typename, const, *, &, &&, ::, identifiers.
3431
3432 unsigned StoredPosition = Tokens->getPosition();
3433 FormatToken *NextToken = Tokens->getNextToken();
3434 int Lookahead = 0;
3435 auto PeekNext = [&Lookahead, &NextToken, this] {
3436 ++Lookahead;
3437 NextToken = Tokens->getNextToken();
3438 };
3439
3440 bool FoundType = false;
3441 bool LastWasColonColon = false;
3442 int OpenAngles = 0;
3443
3444 for (; Lookahead < 50; PeekNext()) {
3445 switch (NextToken->Tok.getKind()) {
3446 case tok::kw_volatile:
3447 case tok::kw_const:
3448 case tok::comma:
3449 if (OpenAngles == 0) {
3450 FormatTok = Tokens->setPosition(StoredPosition);
3451 parseRequiresExpression(RequiresToken);
3452 return false;
3453 }
3454 break;
3455 case tok::eof:
3456 // Break out of the loop.
3457 Lookahead = 50;
3458 break;
3459 case tok::coloncolon:
3460 LastWasColonColon = true;
3461 break;
3462 case tok::kw_decltype:
3463 case tok::identifier:
3464 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3465 FormatTok = Tokens->setPosition(StoredPosition);
3466 parseRequiresExpression(RequiresToken);
3467 return false;
3468 }
3469 FoundType = true;
3470 LastWasColonColon = false;
3471 break;
3472 case tok::less:
3473 ++OpenAngles;
3474 break;
3475 case tok::greater:
3476 --OpenAngles;
3477 break;
3478 default:
3479 if (NextToken->isTypeName(IsCpp)) {
3480 FormatTok = Tokens->setPosition(StoredPosition);
3481 parseRequiresExpression(RequiresToken);
3482 return false;
3483 }
3484 break;
3485 }
3486 }
3487 // This seems to be a complicated expression, just assume it's a clause.
3488 FormatTok = Tokens->setPosition(StoredPosition);
3489 parseRequiresClause(RequiresToken);
3490 return true;
3491}
3492
3493/// \brief Parses a requires clause.
3494/// \param RequiresToken The requires keyword token, which starts this clause.
3495/// \pre We need to be on the next token after the requires keyword.
3496/// \sa parseRequiresExpression
3497///
3498/// Returns if it either has finished parsing the clause, or it detects, that
3499/// the clause is incorrect.
3500void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3501 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3502 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3503
3504 // If there is no previous token, we are within a requires expression,
3505 // otherwise we will always have the template or function declaration in front
3506 // of it.
3507 bool InRequiresExpression =
3508 !RequiresToken->Previous ||
3509 RequiresToken->Previous->is(TT: TT_RequiresExpressionLBrace);
3510
3511 RequiresToken->setFinalizedType(InRequiresExpression
3512 ? TT_RequiresClauseInARequiresExpression
3513 : TT_RequiresClause);
3514
3515 // NOTE: parseConstraintExpression is only ever called from this function.
3516 // It could be inlined into here.
3517 parseConstraintExpression();
3518
3519 if (!InRequiresExpression)
3520 FormatTok->Previous->ClosesRequiresClause = true;
3521}
3522
3523/// \brief Parses a requires expression.
3524/// \param RequiresToken The requires keyword token, which starts this clause.
3525/// \pre We need to be on the next token after the requires keyword.
3526/// \sa parseRequiresClause
3527///
3528/// Returns if it either has finished parsing the expression, or it detects,
3529/// that the expression is incorrect.
3530void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3531 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3532 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3533
3534 RequiresToken->setFinalizedType(TT_RequiresExpression);
3535
3536 if (FormatTok->is(Kind: tok::l_paren)) {
3537 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3538 parseParens();
3539 }
3540
3541 if (FormatTok->is(Kind: tok::l_brace)) {
3542 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3543 parseChildBlock();
3544 }
3545}
3546
3547/// \brief Parses a constraint expression.
3548///
3549/// This is the body of a requires clause. It returns, when the parsing is
3550/// complete, or the expression is incorrect.
3551void UnwrappedLineParser::parseConstraintExpression() {
3552 // The special handling for lambdas is needed since tryToParseLambda() eats a
3553 // token and if a requires expression is the last part of a requires clause
3554 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3555 // not set on the correct token. Thus we need to be aware if we even expect a
3556 // lambda to be possible.
3557 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3558 bool LambdaNextTimeAllowed = true;
3559
3560 // Within lambda declarations, it is permitted to put a requires clause after
3561 // its template parameter list, which would place the requires clause right
3562 // before the parentheses of the parameters of the lambda declaration. Thus,
3563 // we track if we expect to see grouping parentheses at all.
3564 // Without this check, `requires foo<T> (T t)` in the below example would be
3565 // seen as the whole requires clause, accidentally eating the parameters of
3566 // the lambda.
3567 // [&]<typename T> requires foo<T> (T t) { ... };
3568 bool TopLevelParensAllowed = true;
3569
3570 do {
3571 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3572
3573 switch (FormatTok->Tok.getKind()) {
3574 case tok::kw_requires: {
3575 auto RequiresToken = FormatTok;
3576 nextToken();
3577 parseRequiresExpression(RequiresToken);
3578 break;
3579 }
3580
3581 case tok::l_paren:
3582 if (!TopLevelParensAllowed)
3583 return;
3584 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3585 TopLevelParensAllowed = false;
3586 break;
3587
3588 case tok::l_square:
3589 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3590 return;
3591 break;
3592
3593 case tok::kw_const:
3594 case tok::semi:
3595 case tok::kw_class:
3596 case tok::kw_struct:
3597 case tok::kw_union:
3598 return;
3599
3600 case tok::l_brace:
3601 // Potential function body.
3602 return;
3603
3604 case tok::ampamp:
3605 case tok::pipepipe:
3606 FormatTok->setFinalizedType(TT_BinaryOperator);
3607 nextToken();
3608 LambdaNextTimeAllowed = true;
3609 TopLevelParensAllowed = true;
3610 break;
3611
3612 case tok::comma:
3613 case tok::comment:
3614 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3615 nextToken();
3616 break;
3617
3618 case tok::kw_sizeof:
3619 case tok::greater:
3620 case tok::greaterequal:
3621 case tok::greatergreater:
3622 case tok::less:
3623 case tok::lessequal:
3624 case tok::lessless:
3625 case tok::equalequal:
3626 case tok::exclaim:
3627 case tok::exclaimequal:
3628 case tok::plus:
3629 case tok::minus:
3630 case tok::star:
3631 case tok::slash:
3632 LambdaNextTimeAllowed = true;
3633 TopLevelParensAllowed = true;
3634 // Just eat them.
3635 nextToken();
3636 break;
3637
3638 case tok::numeric_constant:
3639 case tok::coloncolon:
3640 case tok::kw_true:
3641 case tok::kw_false:
3642 TopLevelParensAllowed = false;
3643 // Just eat them.
3644 nextToken();
3645 break;
3646
3647 case tok::kw_static_cast:
3648 case tok::kw_const_cast:
3649 case tok::kw_reinterpret_cast:
3650 case tok::kw_dynamic_cast:
3651 nextToken();
3652 if (FormatTok->isNot(Kind: tok::less))
3653 return;
3654
3655 nextToken();
3656 parseBracedList(/*IsAngleBracket=*/true);
3657 break;
3658
3659 default:
3660 if (!FormatTok->Tok.getIdentifierInfo()) {
3661 // Identifiers are part of the default case, we check for more then
3662 // tok::identifier to handle builtin type traits.
3663 return;
3664 }
3665
3666 // We need to differentiate identifiers for a template deduction guide,
3667 // variables, or function return types (the constraint expression has
3668 // ended before that), and basically all other cases. But it's easier to
3669 // check the other way around.
3670 assert(FormatTok->Previous);
3671 switch (FormatTok->Previous->Tok.getKind()) {
3672 case tok::coloncolon: // Nested identifier.
3673 case tok::ampamp: // Start of a function or variable for the
3674 case tok::pipepipe: // constraint expression. (binary)
3675 case tok::exclaim: // The same as above, but unary.
3676 case tok::kw_requires: // Initial identifier of a requires clause.
3677 case tok::equal: // Initial identifier of a concept declaration.
3678 break;
3679 default:
3680 return;
3681 }
3682
3683 // Read identifier with optional template declaration.
3684 nextToken();
3685 if (FormatTok->is(Kind: tok::less)) {
3686 nextToken();
3687 parseBracedList(/*IsAngleBracket=*/true);
3688 }
3689 TopLevelParensAllowed = false;
3690 break;
3691 }
3692 } while (!eof());
3693}
3694
3695bool UnwrappedLineParser::parseEnum() {
3696 const FormatToken &InitialToken = *FormatTok;
3697
3698 // Won't be 'enum' for NS_ENUMs.
3699 if (FormatTok->is(Kind: tok::kw_enum))
3700 nextToken();
3701
3702 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3703 // declarations. An "enum" keyword followed by a colon would be a syntax
3704 // error and thus assume it is just an identifier.
3705 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3706 return false;
3707
3708 // In protobuf, "enum" can be used as a field name.
3709 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3710 return false;
3711
3712 if (IsCpp) {
3713 // Eat up enum class ...
3714 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3715 nextToken();
3716 while (FormatTok->is(Kind: tok::l_square))
3717 if (!handleCppAttributes())
3718 return false;
3719 }
3720
3721 while (FormatTok->Tok.getIdentifierInfo() ||
3722 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3723 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3724 Ks: tok::l_square)) {
3725 if (Style.isVerilog()) {
3726 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3727 nextToken();
3728 // In Verilog the base type can have dimensions.
3729 while (FormatTok->is(Kind: tok::l_square))
3730 parseSquare();
3731 } else {
3732 nextToken();
3733 }
3734 // We can have macros or attributes in between 'enum' and the enum name.
3735 if (FormatTok->is(Kind: tok::l_paren))
3736 parseParens();
3737 if (FormatTok->is(Kind: tok::identifier)) {
3738 nextToken();
3739 // If there are two identifiers in a row, this is likely an elaborate
3740 // return type. In Java, this can be "implements", etc.
3741 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3742 return false;
3743 }
3744 }
3745
3746 // Just a declaration or something is wrong.
3747 if (FormatTok->isNot(Kind: tok::l_brace))
3748 return true;
3749 FormatTok->setFinalizedType(TT_EnumLBrace);
3750 FormatTok->setBlockKind(BK_Block);
3751
3752 if (Style.Language == FormatStyle::LK_Java) {
3753 // Java enums are different.
3754 parseJavaEnumBody();
3755 return true;
3756 }
3757 if (Style.Language == FormatStyle::LK_Proto) {
3758 parseBlock(/*MustBeDeclaration=*/true);
3759 return true;
3760 }
3761
3762 if (!Style.AllowShortEnumsOnASingleLine &&
3763 ShouldBreakBeforeBrace(Style, InitialToken)) {
3764 addUnwrappedLine();
3765 }
3766 // Parse enum body.
3767 nextToken();
3768 if (!Style.AllowShortEnumsOnASingleLine) {
3769 addUnwrappedLine();
3770 Line->Level += 1;
3771 }
3772 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3773 if (!Style.AllowShortEnumsOnASingleLine)
3774 Line->Level -= 1;
3775 if (HasError) {
3776 if (FormatTok->is(Kind: tok::semi))
3777 nextToken();
3778 addUnwrappedLine();
3779 }
3780 setPreviousRBraceType(TT_EnumRBrace);
3781 return true;
3782
3783 // There is no addUnwrappedLine() here so that we fall through to parsing a
3784 // structural element afterwards. Thus, in "enum A {} n, m;",
3785 // "} n, m;" will end up in one unwrapped line.
3786}
3787
3788bool UnwrappedLineParser::parseStructLike() {
3789 // parseRecord falls through and does not yet add an unwrapped line as a
3790 // record declaration or definition can start a structural element.
3791 parseRecord();
3792 // This does not apply to Java, JavaScript and C#.
3793 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3794 Style.isCSharp()) {
3795 if (FormatTok->is(Kind: tok::semi))
3796 nextToken();
3797 addUnwrappedLine();
3798 return true;
3799 }
3800 return false;
3801}
3802
3803namespace {
3804// A class used to set and restore the Token position when peeking
3805// ahead in the token source.
3806class ScopedTokenPosition {
3807 unsigned StoredPosition;
3808 FormatTokenSource *Tokens;
3809
3810public:
3811 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3812 assert(Tokens && "Tokens expected to not be null");
3813 StoredPosition = Tokens->getPosition();
3814 }
3815
3816 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3817};
3818} // namespace
3819
3820// Look to see if we have [[ by looking ahead, if
3821// its not then rewind to the original position.
3822bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3823 ScopedTokenPosition AutoPosition(Tokens);
3824 FormatToken *Tok = Tokens->getNextToken();
3825 // We already read the first [ check for the second.
3826 if (Tok->isNot(Kind: tok::l_square))
3827 return false;
3828 // Double check that the attribute is just something
3829 // fairly simple.
3830 while (Tok->isNot(Kind: tok::eof)) {
3831 if (Tok->is(Kind: tok::r_square))
3832 break;
3833 Tok = Tokens->getNextToken();
3834 }
3835 if (Tok->is(Kind: tok::eof))
3836 return false;
3837 Tok = Tokens->getNextToken();
3838 if (Tok->isNot(Kind: tok::r_square))
3839 return false;
3840 Tok = Tokens->getNextToken();
3841 if (Tok->is(Kind: tok::semi))
3842 return false;
3843 return true;
3844}
3845
3846void UnwrappedLineParser::parseJavaEnumBody() {
3847 assert(FormatTok->is(tok::l_brace));
3848 const FormatToken *OpeningBrace = FormatTok;
3849
3850 // Determine whether the enum is simple, i.e. does not have a semicolon or
3851 // constants with class bodies. Simple enums can be formatted like braced
3852 // lists, contracted to a single line, etc.
3853 unsigned StoredPosition = Tokens->getPosition();
3854 bool IsSimple = true;
3855 FormatToken *Tok = Tokens->getNextToken();
3856 while (Tok->isNot(Kind: tok::eof)) {
3857 if (Tok->is(Kind: tok::r_brace))
3858 break;
3859 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
3860 IsSimple = false;
3861 break;
3862 }
3863 // FIXME: This will also mark enums with braces in the arguments to enum
3864 // constants as "not simple". This is probably fine in practice, though.
3865 Tok = Tokens->getNextToken();
3866 }
3867 FormatTok = Tokens->setPosition(StoredPosition);
3868
3869 if (IsSimple) {
3870 nextToken();
3871 parseBracedList();
3872 addUnwrappedLine();
3873 return;
3874 }
3875
3876 // Parse the body of a more complex enum.
3877 // First add a line for everything up to the "{".
3878 nextToken();
3879 addUnwrappedLine();
3880 ++Line->Level;
3881
3882 // Parse the enum constants.
3883 while (!eof()) {
3884 if (FormatTok->is(Kind: tok::l_brace)) {
3885 // Parse the constant's class body.
3886 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3887 /*MunchSemi=*/false);
3888 } else if (FormatTok->is(Kind: tok::l_paren)) {
3889 parseParens();
3890 } else if (FormatTok->is(Kind: tok::comma)) {
3891 nextToken();
3892 addUnwrappedLine();
3893 } else if (FormatTok->is(Kind: tok::semi)) {
3894 nextToken();
3895 addUnwrappedLine();
3896 break;
3897 } else if (FormatTok->is(Kind: tok::r_brace)) {
3898 addUnwrappedLine();
3899 break;
3900 } else {
3901 nextToken();
3902 }
3903 }
3904
3905 // Parse the class body after the enum's ";" if any.
3906 parseLevel(OpeningBrace);
3907 nextToken();
3908 --Line->Level;
3909 addUnwrappedLine();
3910}
3911
3912void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3913 const FormatToken &InitialToken = *FormatTok;
3914 nextToken();
3915
3916 const FormatToken *ClassName = nullptr;
3917 bool IsDerived = false;
3918 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3919 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
3920 };
3921 // The actual identifier can be a nested name specifier, and in macros
3922 // it is often token-pasted.
3923 // An [[attribute]] can be before the identifier.
3924 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
3925 Ks: tok::kw_alignas, Ks: tok::l_square) ||
3926 FormatTok->isAttribute() ||
3927 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3928 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
3929 if (Style.isJavaScript() &&
3930 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
3931 // JavaScript/TypeScript supports inline object types in
3932 // extends/implements positions:
3933 // class Foo implements {bar: number} { }
3934 nextToken();
3935 if (FormatTok->is(Kind: tok::l_brace)) {
3936 tryToParseBracedList();
3937 continue;
3938 }
3939 }
3940 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
3941 continue;
3942 const auto *Previous = FormatTok;
3943 nextToken();
3944 switch (FormatTok->Tok.getKind()) {
3945 case tok::l_paren:
3946 // We can have macros in between 'class' and the class name.
3947 if (!IsNonMacroIdentifier(Previous))
3948 parseParens();
3949 break;
3950 case tok::coloncolon:
3951 break;
3952 default:
3953 if (!ClassName && Previous->is(Kind: tok::identifier))
3954 ClassName = Previous;
3955 }
3956 }
3957
3958 auto IsListInitialization = [&] {
3959 if (!ClassName || IsDerived)
3960 return false;
3961 assert(FormatTok->is(tok::l_brace));
3962 const auto *Prev = FormatTok->getPreviousNonComment();
3963 assert(Prev);
3964 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
3965 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
3966 };
3967
3968 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
3969 if (FormatTok->is(Kind: tok::colon))
3970 IsDerived = true;
3971 int AngleNestingLevel = 0;
3972 do {
3973 if (FormatTok->is(Kind: tok::less))
3974 ++AngleNestingLevel;
3975 else if (FormatTok->is(Kind: tok::greater))
3976 --AngleNestingLevel;
3977
3978 if (AngleNestingLevel == 0 && FormatTok->is(Kind: tok::l_paren) &&
3979 IsNonMacroIdentifier(FormatTok->Previous)) {
3980 break;
3981 }
3982 if (FormatTok->is(Kind: tok::l_brace)) {
3983 if (AngleNestingLevel == 0 && IsListInitialization())
3984 return;
3985 calculateBraceTypes(/*ExpectClassBody=*/true);
3986 if (!tryToParseBracedList())
3987 break;
3988 }
3989 if (FormatTok->is(Kind: tok::l_square)) {
3990 FormatToken *Previous = FormatTok->Previous;
3991 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
3992 !Previous->isTypeOrIdentifier(IsCpp))) {
3993 // Don't try parsing a lambda if we had a closing parenthesis before,
3994 // it was probably a pointer to an array: int (*)[].
3995 if (!tryToParseLambda())
3996 continue;
3997 } else {
3998 parseSquare();
3999 continue;
4000 }
4001 }
4002 if (FormatTok->is(Kind: tok::semi))
4003 return;
4004 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4005 addUnwrappedLine();
4006 nextToken();
4007 parseCSharpGenericTypeConstraint();
4008 break;
4009 }
4010 nextToken();
4011 } while (!eof());
4012 }
4013
4014 auto GetBraceTypes =
4015 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4016 switch (RecordTok.Tok.getKind()) {
4017 case tok::kw_class:
4018 return {TT_ClassLBrace, TT_ClassRBrace};
4019 case tok::kw_struct:
4020 return {TT_StructLBrace, TT_StructRBrace};
4021 case tok::kw_union:
4022 return {TT_UnionLBrace, TT_UnionRBrace};
4023 default:
4024 // Useful for e.g. interface.
4025 return {TT_RecordLBrace, TT_RecordRBrace};
4026 }
4027 };
4028 if (FormatTok->is(Kind: tok::l_brace)) {
4029 if (IsListInitialization())
4030 return;
4031 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4032 FormatTok->setFinalizedType(OpenBraceType);
4033 if (ParseAsExpr) {
4034 parseChildBlock();
4035 } else {
4036 if (ShouldBreakBeforeBrace(Style, InitialToken))
4037 addUnwrappedLine();
4038
4039 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4040 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4041 }
4042 setPreviousRBraceType(ClosingBraceType);
4043 }
4044 // There is no addUnwrappedLine() here so that we fall through to parsing a
4045 // structural element afterwards. Thus, in "class A {} n, m;",
4046 // "} n, m;" will end up in one unwrapped line.
4047}
4048
4049void UnwrappedLineParser::parseObjCMethod() {
4050 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4051 "'(' or identifier expected.");
4052 do {
4053 if (FormatTok->is(Kind: tok::semi)) {
4054 nextToken();
4055 addUnwrappedLine();
4056 return;
4057 } else if (FormatTok->is(Kind: tok::l_brace)) {
4058 if (Style.BraceWrapping.AfterFunction)
4059 addUnwrappedLine();
4060 parseBlock();
4061 addUnwrappedLine();
4062 return;
4063 } else {
4064 nextToken();
4065 }
4066 } while (!eof());
4067}
4068
4069void UnwrappedLineParser::parseObjCProtocolList() {
4070 assert(FormatTok->is(tok::less) && "'<' expected.");
4071 do {
4072 nextToken();
4073 // Early exit in case someone forgot a close angle.
4074 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace) ||
4075 FormatTok->isObjCAtKeyword(Kind: tok::objc_end)) {
4076 return;
4077 }
4078 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4079 nextToken(); // Skip '>'.
4080}
4081
4082void UnwrappedLineParser::parseObjCUntilAtEnd() {
4083 do {
4084 if (FormatTok->isObjCAtKeyword(Kind: tok::objc_end)) {
4085 nextToken();
4086 addUnwrappedLine();
4087 break;
4088 }
4089 if (FormatTok->is(Kind: tok::l_brace)) {
4090 parseBlock();
4091 // In ObjC interfaces, nothing should be following the "}".
4092 addUnwrappedLine();
4093 } else if (FormatTok->is(Kind: tok::r_brace)) {
4094 // Ignore stray "}". parseStructuralElement doesn't consume them.
4095 nextToken();
4096 addUnwrappedLine();
4097 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4098 nextToken();
4099 parseObjCMethod();
4100 } else {
4101 parseStructuralElement();
4102 }
4103 } while (!eof());
4104}
4105
4106void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4107 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4108 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4109 nextToken();
4110 nextToken(); // interface name
4111
4112 // @interface can be followed by a lightweight generic
4113 // specialization list, then either a base class or a category.
4114 if (FormatTok->is(Kind: tok::less))
4115 parseObjCLightweightGenerics();
4116 if (FormatTok->is(Kind: tok::colon)) {
4117 nextToken();
4118 nextToken(); // base class name
4119 // The base class can also have lightweight generics applied to it.
4120 if (FormatTok->is(Kind: tok::less))
4121 parseObjCLightweightGenerics();
4122 } else if (FormatTok->is(Kind: tok::l_paren)) {
4123 // Skip category, if present.
4124 parseParens();
4125 }
4126
4127 if (FormatTok->is(Kind: tok::less))
4128 parseObjCProtocolList();
4129
4130 if (FormatTok->is(Kind: tok::l_brace)) {
4131 if (Style.BraceWrapping.AfterObjCDeclaration)
4132 addUnwrappedLine();
4133 parseBlock(/*MustBeDeclaration=*/true);
4134 }
4135
4136 // With instance variables, this puts '}' on its own line. Without instance
4137 // variables, this ends the @interface line.
4138 addUnwrappedLine();
4139
4140 parseObjCUntilAtEnd();
4141}
4142
4143void UnwrappedLineParser::parseObjCLightweightGenerics() {
4144 assert(FormatTok->is(tok::less));
4145 // Unlike protocol lists, generic parameterizations support
4146 // nested angles:
4147 //
4148 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4149 // NSObject <NSCopying, NSSecureCoding>
4150 //
4151 // so we need to count how many open angles we have left.
4152 unsigned NumOpenAngles = 1;
4153 do {
4154 nextToken();
4155 // Early exit in case someone forgot a close angle.
4156 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace) ||
4157 FormatTok->isObjCAtKeyword(Kind: tok::objc_end)) {
4158 break;
4159 }
4160 if (FormatTok->is(Kind: tok::less)) {
4161 ++NumOpenAngles;
4162 } else if (FormatTok->is(Kind: tok::greater)) {
4163 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4164 --NumOpenAngles;
4165 }
4166 } while (!eof() && NumOpenAngles != 0);
4167 nextToken(); // Skip '>'.
4168}
4169
4170// Returns true for the declaration/definition form of @protocol,
4171// false for the expression form.
4172bool UnwrappedLineParser::parseObjCProtocol() {
4173 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4174 nextToken();
4175
4176 if (FormatTok->is(Kind: tok::l_paren)) {
4177 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4178 return false;
4179 }
4180
4181 // The definition/declaration form,
4182 // @protocol Foo
4183 // - (int)someMethod;
4184 // @end
4185
4186 nextToken(); // protocol name
4187
4188 if (FormatTok->is(Kind: tok::less))
4189 parseObjCProtocolList();
4190
4191 // Check for protocol declaration.
4192 if (FormatTok->is(Kind: tok::semi)) {
4193 nextToken();
4194 addUnwrappedLine();
4195 return true;
4196 }
4197
4198 addUnwrappedLine();
4199 parseObjCUntilAtEnd();
4200 return true;
4201}
4202
4203void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4204 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4205 assert(IsImport || FormatTok->is(tok::kw_export));
4206 nextToken();
4207
4208 // Consume the "default" in "export default class/function".
4209 if (FormatTok->is(Kind: tok::kw_default))
4210 nextToken();
4211
4212 // Consume "async function", "function" and "default function", so that these
4213 // get parsed as free-standing JS functions, i.e. do not require a trailing
4214 // semicolon.
4215 if (FormatTok->is(II: Keywords.kw_async))
4216 nextToken();
4217 if (FormatTok->is(II: Keywords.kw_function)) {
4218 nextToken();
4219 return;
4220 }
4221
4222 // For imports, `export *`, `export {...}`, consume the rest of the line up
4223 // to the terminating `;`. For everything else, just return and continue
4224 // parsing the structural element, i.e. the declaration or expression for
4225 // `export default`.
4226 if (!IsImport && !FormatTok->isOneOf(K1: tok::l_brace, K2: tok::star) &&
4227 !FormatTok->isStringLiteral() &&
4228 !(FormatTok->is(II: Keywords.kw_type) &&
4229 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4230 return;
4231 }
4232
4233 while (!eof()) {
4234 if (FormatTok->is(Kind: tok::semi))
4235 return;
4236 if (Line->Tokens.empty()) {
4237 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4238 // import statement should terminate.
4239 return;
4240 }
4241 if (FormatTok->is(Kind: tok::l_brace)) {
4242 FormatTok->setBlockKind(BK_Block);
4243 nextToken();
4244 parseBracedList();
4245 } else {
4246 nextToken();
4247 }
4248 }
4249}
4250
4251void UnwrappedLineParser::parseStatementMacro() {
4252 nextToken();
4253 if (FormatTok->is(Kind: tok::l_paren))
4254 parseParens();
4255 if (FormatTok->is(Kind: tok::semi))
4256 nextToken();
4257 addUnwrappedLine();
4258}
4259
4260void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4261 // consume things like a::`b.c[d:e] or a::*
4262 while (true) {
4263 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4264 Ks: tok::coloncolon, Ks: tok::hash) ||
4265 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4266 nextToken();
4267 } else if (FormatTok->is(Kind: tok::l_square)) {
4268 parseSquare();
4269 } else {
4270 break;
4271 }
4272 }
4273}
4274
4275void UnwrappedLineParser::parseVerilogSensitivityList() {
4276 if (FormatTok->isNot(Kind: tok::at))
4277 return;
4278 nextToken();
4279 // A block event expression has 2 at signs.
4280 if (FormatTok->is(Kind: tok::at))
4281 nextToken();
4282 switch (FormatTok->Tok.getKind()) {
4283 case tok::star:
4284 nextToken();
4285 break;
4286 case tok::l_paren:
4287 parseParens();
4288 break;
4289 default:
4290 parseVerilogHierarchyIdentifier();
4291 break;
4292 }
4293}
4294
4295unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4296 unsigned AddLevels = 0;
4297
4298 if (FormatTok->is(II: Keywords.kw_clocking)) {
4299 nextToken();
4300 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4301 nextToken();
4302 parseVerilogSensitivityList();
4303 if (FormatTok->is(Kind: tok::semi))
4304 nextToken();
4305 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4306 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4307 Ks: Keywords.kw_randsequence)) {
4308 if (Style.IndentCaseLabels)
4309 AddLevels++;
4310 nextToken();
4311 if (FormatTok->is(Kind: tok::l_paren)) {
4312 FormatTok->setFinalizedType(TT_ConditionLParen);
4313 parseParens();
4314 }
4315 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4316 nextToken();
4317 // The case header has no semicolon.
4318 } else {
4319 // "module" etc.
4320 nextToken();
4321 // all the words like the name of the module and specifiers like
4322 // "automatic" and the width of function return type
4323 while (true) {
4324 if (FormatTok->is(Kind: tok::l_square)) {
4325 auto Prev = FormatTok->getPreviousNonComment();
4326 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4327 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4328 parseSquare();
4329 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4330 FormatTok->isOneOf(K1: Keywords.kw_automatic, K2: tok::kw_static)) {
4331 nextToken();
4332 } else {
4333 break;
4334 }
4335 }
4336
4337 auto NewLine = [this]() {
4338 addUnwrappedLine();
4339 Line->IsContinuation = true;
4340 };
4341
4342 // package imports
4343 while (FormatTok->is(II: Keywords.kw_import)) {
4344 NewLine();
4345 nextToken();
4346 parseVerilogHierarchyIdentifier();
4347 if (FormatTok->is(Kind: tok::semi))
4348 nextToken();
4349 }
4350
4351 // parameters and ports
4352 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4353 NewLine();
4354 nextToken();
4355 if (FormatTok->is(Kind: tok::l_paren)) {
4356 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4357 parseParens();
4358 }
4359 }
4360 if (FormatTok->is(Kind: tok::l_paren)) {
4361 NewLine();
4362 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4363 parseParens();
4364 }
4365
4366 // extends and implements
4367 if (FormatTok->is(II: Keywords.kw_extends)) {
4368 NewLine();
4369 nextToken();
4370 parseVerilogHierarchyIdentifier();
4371 if (FormatTok->is(Kind: tok::l_paren))
4372 parseParens();
4373 }
4374 if (FormatTok->is(II: Keywords.kw_implements)) {
4375 NewLine();
4376 do {
4377 nextToken();
4378 parseVerilogHierarchyIdentifier();
4379 } while (FormatTok->is(Kind: tok::comma));
4380 }
4381
4382 // Coverage event for cover groups.
4383 if (FormatTok->is(Kind: tok::at)) {
4384 NewLine();
4385 parseVerilogSensitivityList();
4386 }
4387
4388 if (FormatTok->is(Kind: tok::semi))
4389 nextToken(/*LevelDifference=*/1);
4390 addUnwrappedLine();
4391 }
4392
4393 return AddLevels;
4394}
4395
4396void UnwrappedLineParser::parseVerilogTable() {
4397 assert(FormatTok->is(Keywords.kw_table));
4398 nextToken(/*LevelDifference=*/1);
4399 addUnwrappedLine();
4400
4401 auto InitialLevel = Line->Level++;
4402 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4403 FormatToken *Tok = FormatTok;
4404 nextToken();
4405 if (Tok->is(Kind: tok::semi))
4406 addUnwrappedLine();
4407 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4408 Tok->setFinalizedType(TT_VerilogTableItem);
4409 }
4410 Line->Level = InitialLevel;
4411 nextToken(/*LevelDifference=*/-1);
4412 addUnwrappedLine();
4413}
4414
4415void UnwrappedLineParser::parseVerilogCaseLabel() {
4416 // The label will get unindented in AnnotatingParser. If there are no leading
4417 // spaces, indent the rest here so that things inside the block will be
4418 // indented relative to things outside. We don't use parseLabel because we
4419 // don't know whether this colon is a label or a ternary expression at this
4420 // point.
4421 auto OrigLevel = Line->Level;
4422 auto FirstLine = CurrentLines->size();
4423 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4424 ++Line->Level;
4425 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4426 --Line->Level;
4427 parseStructuralElement();
4428 // Restore the indentation in both the new line and the line that has the
4429 // label.
4430 if (CurrentLines->size() > FirstLine)
4431 (*CurrentLines)[FirstLine].Level = OrigLevel;
4432 Line->Level = OrigLevel;
4433}
4434
4435bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4436 for (const auto &N : Line.Tokens) {
4437 if (N.Tok->MacroCtx)
4438 return true;
4439 for (const UnwrappedLine &Child : N.Children)
4440 if (containsExpansion(Line: Child))
4441 return true;
4442 }
4443 return false;
4444}
4445
4446void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4447 if (Line->Tokens.empty())
4448 return;
4449 LLVM_DEBUG({
4450 if (!parsingPPDirective()) {
4451 llvm::dbgs() << "Adding unwrapped line:\n";
4452 printDebugInfo(*Line);
4453 }
4454 });
4455
4456 // If this line closes a block when in Whitesmiths mode, remember that
4457 // information so that the level can be decreased after the line is added.
4458 // This has to happen after the addition of the line since the line itself
4459 // needs to be indented.
4460 bool ClosesWhitesmithsBlock =
4461 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4462 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4463
4464 // If the current line was expanded from a macro call, we use it to
4465 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4466 // line and the unexpanded token stream.
4467 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4468 if (!Reconstruct)
4469 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4470 Reconstruct->addLine(Line: *Line);
4471
4472 // While the reconstructed unexpanded lines are stored in the normal
4473 // flow of lines, the expanded lines are stored on the side to be analyzed
4474 // in an extra step.
4475 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4476
4477 if (Reconstruct->finished()) {
4478 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4479 assert(!Reconstructed.Tokens.empty() &&
4480 "Reconstructed must at least contain the macro identifier.");
4481 assert(!parsingPPDirective());
4482 LLVM_DEBUG({
4483 llvm::dbgs() << "Adding unexpanded line:\n";
4484 printDebugInfo(Reconstructed);
4485 });
4486 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4487 Lines.push_back(Elt: std::move(Reconstructed));
4488 CurrentExpandedLines.clear();
4489 Reconstruct.reset();
4490 }
4491 } else {
4492 // At the top level we only get here when no unexpansion is going on, or
4493 // when conditional formatting led to unfinished macro reconstructions.
4494 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4495 CurrentLines->push_back(Elt: std::move(*Line));
4496 }
4497 Line->Tokens.clear();
4498 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4499 Line->FirstStartColumn = 0;
4500 Line->IsContinuation = false;
4501 Line->SeenDecltypeAuto = false;
4502
4503 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4504 --Line->Level;
4505 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4506 CurrentLines->append(
4507 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4508 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4509 PreprocessorDirectives.clear();
4510 }
4511 // Disconnect the current token from the last token on the previous line.
4512 FormatTok->Previous = nullptr;
4513}
4514
4515bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4516
4517bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4518 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4519 FormatTok.NewlinesBefore > 0;
4520}
4521
4522// Checks if \p FormatTok is a line comment that continues the line comment
4523// section on \p Line.
4524static bool
4525continuesLineCommentSection(const FormatToken &FormatTok,
4526 const UnwrappedLine &Line,
4527 const llvm::Regex &CommentPragmasRegex) {
4528 if (Line.Tokens.empty())
4529 return false;
4530
4531 StringRef IndentContent = FormatTok.TokenText;
4532 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4533 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4534 IndentContent = FormatTok.TokenText.substr(Start: 2);
4535 }
4536 if (CommentPragmasRegex.match(String: IndentContent))
4537 return false;
4538
4539 // If Line starts with a line comment, then FormatTok continues the comment
4540 // section if its original column is greater or equal to the original start
4541 // column of the line.
4542 //
4543 // Define the min column token of a line as follows: if a line ends in '{' or
4544 // contains a '{' followed by a line comment, then the min column token is
4545 // that '{'. Otherwise, the min column token of the line is the first token of
4546 // the line.
4547 //
4548 // If Line starts with a token other than a line comment, then FormatTok
4549 // continues the comment section if its original column is greater than the
4550 // original start column of the min column token of the line.
4551 //
4552 // For example, the second line comment continues the first in these cases:
4553 //
4554 // // first line
4555 // // second line
4556 //
4557 // and:
4558 //
4559 // // first line
4560 // // second line
4561 //
4562 // and:
4563 //
4564 // int i; // first line
4565 // // second line
4566 //
4567 // and:
4568 //
4569 // do { // first line
4570 // // second line
4571 // int i;
4572 // } while (true);
4573 //
4574 // and:
4575 //
4576 // enum {
4577 // a, // first line
4578 // // second line
4579 // b
4580 // };
4581 //
4582 // The second line comment doesn't continue the first in these cases:
4583 //
4584 // // first line
4585 // // second line
4586 //
4587 // and:
4588 //
4589 // int i; // first line
4590 // // second line
4591 //
4592 // and:
4593 //
4594 // do { // first line
4595 // // second line
4596 // int i;
4597 // } while (true);
4598 //
4599 // and:
4600 //
4601 // enum {
4602 // a, // first line
4603 // // second line
4604 // };
4605 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4606
4607 // Scan for '{//'. If found, use the column of '{' as a min column for line
4608 // comment section continuation.
4609 const FormatToken *PreviousToken = nullptr;
4610 for (const UnwrappedLineNode &Node : Line.Tokens) {
4611 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4612 isLineComment(FormatTok: *Node.Tok)) {
4613 MinColumnToken = PreviousToken;
4614 break;
4615 }
4616 PreviousToken = Node.Tok;
4617
4618 // Grab the last newline preceding a token in this unwrapped line.
4619 if (Node.Tok->NewlinesBefore > 0)
4620 MinColumnToken = Node.Tok;
4621 }
4622 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4623 MinColumnToken = PreviousToken;
4624
4625 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4626 MinColumnToken);
4627}
4628
4629void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4630 bool JustComments = Line->Tokens.empty();
4631 for (FormatToken *Tok : CommentsBeforeNextToken) {
4632 // Line comments that belong to the same line comment section are put on the
4633 // same line since later we might want to reflow content between them.
4634 // Additional fine-grained breaking of line comment sections is controlled
4635 // by the class BreakableLineCommentSection in case it is desirable to keep
4636 // several line comment sections in the same unwrapped line.
4637 //
4638 // FIXME: Consider putting separate line comment sections as children to the
4639 // unwrapped line instead.
4640 Tok->ContinuesLineCommentSection =
4641 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, CommentPragmasRegex);
4642 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4643 addUnwrappedLine();
4644 pushToken(Tok);
4645 }
4646 if (NewlineBeforeNext && JustComments)
4647 addUnwrappedLine();
4648 CommentsBeforeNextToken.clear();
4649}
4650
4651void UnwrappedLineParser::nextToken(int LevelDifference) {
4652 if (eof())
4653 return;
4654 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4655 pushToken(Tok: FormatTok);
4656 FormatToken *Previous = FormatTok;
4657 if (!Style.isJavaScript())
4658 readToken(LevelDifference);
4659 else
4660 readTokenWithJavaScriptASI();
4661 FormatTok->Previous = Previous;
4662 if (Style.isVerilog()) {
4663 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4664 // keywords like `begin`, we can't treat them the same as left braces
4665 // because some contexts require one of them. For example structs use
4666 // braces and if blocks use keywords, and a left brace can occur in an if
4667 // statement, but it is not a block. For keywords like `end`, we simply
4668 // treat them the same as right braces.
4669 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4670 FormatTok->Tok.setKind(tok::r_brace);
4671 }
4672}
4673
4674void UnwrappedLineParser::distributeComments(
4675 const SmallVectorImpl<FormatToken *> &Comments,
4676 const FormatToken *NextTok) {
4677 // Whether or not a line comment token continues a line is controlled by
4678 // the method continuesLineCommentSection, with the following caveat:
4679 //
4680 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4681 // that each comment line from the trail is aligned with the next token, if
4682 // the next token exists. If a trail exists, the beginning of the maximal
4683 // trail is marked as a start of a new comment section.
4684 //
4685 // For example in this code:
4686 //
4687 // int a; // line about a
4688 // // line 1 about b
4689 // // line 2 about b
4690 // int b;
4691 //
4692 // the two lines about b form a maximal trail, so there are two sections, the
4693 // first one consisting of the single comment "// line about a" and the
4694 // second one consisting of the next two comments.
4695 if (Comments.empty())
4696 return;
4697 bool ShouldPushCommentsInCurrentLine = true;
4698 bool HasTrailAlignedWithNextToken = false;
4699 unsigned StartOfTrailAlignedWithNextToken = 0;
4700 if (NextTok) {
4701 // We are skipping the first element intentionally.
4702 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4703 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4704 HasTrailAlignedWithNextToken = true;
4705 StartOfTrailAlignedWithNextToken = i;
4706 }
4707 }
4708 }
4709 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4710 FormatToken *FormatTok = Comments[i];
4711 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4712 FormatTok->ContinuesLineCommentSection = false;
4713 } else {
4714 FormatTok->ContinuesLineCommentSection =
4715 continuesLineCommentSection(FormatTok: *FormatTok, Line: *Line, CommentPragmasRegex);
4716 }
4717 if (!FormatTok->ContinuesLineCommentSection &&
4718 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4719 ShouldPushCommentsInCurrentLine = false;
4720 }
4721 if (ShouldPushCommentsInCurrentLine)
4722 pushToken(Tok: FormatTok);
4723 else
4724 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4725 }
4726}
4727
4728void UnwrappedLineParser::readToken(int LevelDifference) {
4729 SmallVector<FormatToken *, 1> Comments;
4730 bool PreviousWasComment = false;
4731 bool FirstNonCommentOnLine = false;
4732 do {
4733 FormatTok = Tokens->getNextToken();
4734 assert(FormatTok);
4735 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4736 Ks: TT_ConflictAlternative)) {
4737 if (FormatTok->is(TT: TT_ConflictStart))
4738 conditionalCompilationStart(/*Unreachable=*/false);
4739 else if (FormatTok->is(TT: TT_ConflictAlternative))
4740 conditionalCompilationAlternative();
4741 else if (FormatTok->is(TT: TT_ConflictEnd))
4742 conditionalCompilationEnd();
4743 FormatTok = Tokens->getNextToken();
4744 FormatTok->MustBreakBefore = true;
4745 FormatTok->MustBreakBeforeFinalized = true;
4746 }
4747
4748 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4749 const FormatToken &Tok,
4750 bool PreviousWasComment) {
4751 auto IsFirstOnLine = [](const FormatToken &Tok) {
4752 return Tok.HasUnescapedNewline || Tok.IsFirst;
4753 };
4754
4755 // Consider preprocessor directives preceded by block comments as first
4756 // on line.
4757 if (PreviousWasComment)
4758 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4759 return IsFirstOnLine(Tok);
4760 };
4761
4762 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4763 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4764 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4765
4766 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
4767 (!Style.isVerilog() ||
4768 Keywords.isVerilogPPDirective(Tok: *Tokens->peekNextToken())) &&
4769 FirstNonCommentOnLine) {
4770 distributeComments(Comments, NextTok: FormatTok);
4771 Comments.clear();
4772 // If there is an unfinished unwrapped line, we flush the preprocessor
4773 // directives only after that unwrapped line was finished later.
4774 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4775 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4776 assert((LevelDifference >= 0 ||
4777 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4778 "LevelDifference makes Line->Level negative");
4779 Line->Level += LevelDifference;
4780 // Comments stored before the preprocessor directive need to be output
4781 // before the preprocessor directive, at the same level as the
4782 // preprocessor directive, as we consider them to apply to the directive.
4783 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4784 PPBranchLevel > 0) {
4785 Line->Level += PPBranchLevel;
4786 }
4787 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4788 parsePPDirective();
4789 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4790 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4791 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4792 }
4793
4794 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4795 !Line->InPPDirective) {
4796 continue;
4797 }
4798
4799 if (FormatTok->is(Kind: tok::identifier) &&
4800 Macros.defined(Name: FormatTok->TokenText) &&
4801 // FIXME: Allow expanding macros in preprocessor directives.
4802 !Line->InPPDirective) {
4803 FormatToken *ID = FormatTok;
4804 unsigned Position = Tokens->getPosition();
4805
4806 // To correctly parse the code, we need to replace the tokens of the macro
4807 // call with its expansion.
4808 auto PreCall = std::move(Line);
4809 Line.reset(p: new UnwrappedLine);
4810 bool OldInExpansion = InExpansion;
4811 InExpansion = true;
4812 // We parse the macro call into a new line.
4813 auto Args = parseMacroCall();
4814 InExpansion = OldInExpansion;
4815 assert(Line->Tokens.front().Tok == ID);
4816 // And remember the unexpanded macro call tokens.
4817 auto UnexpandedLine = std::move(Line);
4818 // Reset to the old line.
4819 Line = std::move(PreCall);
4820
4821 LLVM_DEBUG({
4822 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4823 if (Args) {
4824 llvm::dbgs() << "(";
4825 for (const auto &Arg : Args.value())
4826 for (const auto &T : Arg)
4827 llvm::dbgs() << T->TokenText << " ";
4828 llvm::dbgs() << ")";
4829 }
4830 llvm::dbgs() << "\n";
4831 });
4832 if (Macros.objectLike(Name: ID->TokenText) && Args &&
4833 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
4834 // The macro is either
4835 // - object-like, but we got argumnets, or
4836 // - overloaded to be both object-like and function-like, but none of
4837 // the function-like arities match the number of arguments.
4838 // Thus, expand as object-like macro.
4839 LLVM_DEBUG(llvm::dbgs()
4840 << "Macro \"" << ID->TokenText
4841 << "\" not overloaded for arity " << Args->size()
4842 << "or not function-like, using object-like overload.");
4843 Args.reset();
4844 UnexpandedLine->Tokens.resize(new_size: 1);
4845 Tokens->setPosition(Position);
4846 nextToken();
4847 assert(!Args && Macros.objectLike(ID->TokenText));
4848 }
4849 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
4850 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
4851 // Next, we insert the expanded tokens in the token stream at the
4852 // current position, and continue parsing.
4853 Unexpanded[ID] = std::move(UnexpandedLine);
4854 SmallVector<FormatToken *, 8> Expansion =
4855 Macros.expand(ID, OptionalArgs: std::move(Args));
4856 if (!Expansion.empty())
4857 FormatTok = Tokens->insertTokens(Tokens: Expansion);
4858
4859 LLVM_DEBUG({
4860 llvm::dbgs() << "Expanded: ";
4861 for (const auto &T : Expansion)
4862 llvm::dbgs() << T->TokenText << " ";
4863 llvm::dbgs() << "\n";
4864 });
4865 } else {
4866 LLVM_DEBUG({
4867 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4868 << "\", because it was used ";
4869 if (Args)
4870 llvm::dbgs() << "with " << Args->size();
4871 else
4872 llvm::dbgs() << "without";
4873 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4874 });
4875 Tokens->setPosition(Position);
4876 FormatTok = ID;
4877 }
4878 }
4879
4880 if (FormatTok->isNot(Kind: tok::comment)) {
4881 distributeComments(Comments, NextTok: FormatTok);
4882 Comments.clear();
4883 return;
4884 }
4885
4886 Comments.push_back(Elt: FormatTok);
4887 } while (!eof());
4888
4889 distributeComments(Comments, NextTok: nullptr);
4890 Comments.clear();
4891}
4892
4893namespace {
4894template <typename Iterator>
4895void pushTokens(Iterator Begin, Iterator End,
4896 llvm::SmallVectorImpl<FormatToken *> &Into) {
4897 for (auto I = Begin; I != End; ++I) {
4898 Into.push_back(Elt: I->Tok);
4899 for (const auto &Child : I->Children)
4900 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4901 }
4902}
4903} // namespace
4904
4905std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4906UnwrappedLineParser::parseMacroCall() {
4907 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4908 assert(Line->Tokens.empty());
4909 nextToken();
4910 if (FormatTok->isNot(Kind: tok::l_paren))
4911 return Args;
4912 unsigned Position = Tokens->getPosition();
4913 FormatToken *Tok = FormatTok;
4914 nextToken();
4915 Args.emplace();
4916 auto ArgStart = std::prev(x: Line->Tokens.end());
4917
4918 int Parens = 0;
4919 do {
4920 switch (FormatTok->Tok.getKind()) {
4921 case tok::l_paren:
4922 ++Parens;
4923 nextToken();
4924 break;
4925 case tok::r_paren: {
4926 if (Parens > 0) {
4927 --Parens;
4928 nextToken();
4929 break;
4930 }
4931 Args->push_back(Elt: {});
4932 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
4933 nextToken();
4934 return Args;
4935 }
4936 case tok::comma: {
4937 if (Parens > 0) {
4938 nextToken();
4939 break;
4940 }
4941 Args->push_back(Elt: {});
4942 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
4943 nextToken();
4944 ArgStart = std::prev(x: Line->Tokens.end());
4945 break;
4946 }
4947 default:
4948 nextToken();
4949 break;
4950 }
4951 } while (!eof());
4952 Line->Tokens.resize(new_size: 1);
4953 Tokens->setPosition(Position);
4954 FormatTok = Tok;
4955 return {};
4956}
4957
4958void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4959 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
4960 if (MustBreakBeforeNextToken) {
4961 Line->Tokens.back().Tok->MustBreakBefore = true;
4962 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
4963 MustBreakBeforeNextToken = false;
4964 }
4965}
4966
4967} // end namespace format
4968} // end namespace clang
4969

source code of clang/lib/Format/UnwrappedLineParser.cpp