1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(OS&: llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(Val: MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
96class ScopedLineState {
97public:
98 ScopedLineState(UnwrappedLineParser &Parser,
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
114 ~ScopedLineState() {
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
125 UnwrappedLineParser &Parser;
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
131class CompoundStatementIndenter {
132public:
133 CompoundStatementIndenter(UnwrappedLineParser *Parser,
134 const FormatStyle &Style, unsigned &LineLevel)
135 : CompoundStatementIndenter(Parser, LineLevel,
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
139 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
154UnwrappedLineParser::UnwrappedLineParser(
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
157 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
166 ? IG_Rejected
167 : IG_Inited),
168 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
169 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
170
171void UnwrappedLineParser::reset() {
172 PPBranchLevel = -1;
173 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
174 ? IG_Rejected
175 : IG_Inited;
176 IncludeGuardToken = nullptr;
177 Line.reset(p: new UnwrappedLine);
178 CommentsBeforeNextToken.clear();
179 FormatTok = nullptr;
180 AtEndOfPPLine = false;
181 IsDecltypeAutoFunction = false;
182 PreprocessorDirectives.clear();
183 CurrentLines = &Lines;
184 DeclarationScopeStack.clear();
185 NestedTooDeep.clear();
186 NestedLambdas.clear();
187 PPStack.clear();
188 Line->FirstStartColumn = FirstStartColumn;
189
190 if (!Unexpanded.empty())
191 for (FormatToken *Token : AllTokens)
192 Token->MacroCtx.reset();
193 CurrentExpandedLines.clear();
194 ExpandedLines.clear();
195 Unexpanded.clear();
196 InExpansion = false;
197 Reconstruct.reset();
198}
199
200void UnwrappedLineParser::parse() {
201 IndexedTokenSource TokenSource(AllTokens);
202 Line->FirstStartColumn = FirstStartColumn;
203 do {
204 LLVM_DEBUG(llvm::dbgs() << "----\n");
205 reset();
206 Tokens = &TokenSource;
207 TokenSource.reset();
208
209 readToken();
210 parseFile();
211
212 // If we found an include guard then all preprocessor directives (other than
213 // the guard) are over-indented by one.
214 if (IncludeGuard == IG_Found) {
215 for (auto &Line : Lines)
216 if (Line.InPPDirective && Line.Level > 0)
217 --Line.Level;
218 }
219
220 // Create line with eof token.
221 assert(eof());
222 pushToken(Tok: FormatTok);
223 addUnwrappedLine();
224
225 // In a first run, format everything with the lines containing macro calls
226 // replaced by the expansion.
227 if (!ExpandedLines.empty()) {
228 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
229 for (const auto &Line : Lines) {
230 if (!Line.Tokens.empty()) {
231 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
232 if (it != ExpandedLines.end()) {
233 for (const auto &Expanded : it->second) {
234 LLVM_DEBUG(printDebugInfo(Expanded));
235 Callback.consumeUnwrappedLine(Line: Expanded);
236 }
237 continue;
238 }
239 }
240 LLVM_DEBUG(printDebugInfo(Line));
241 Callback.consumeUnwrappedLine(Line);
242 }
243 Callback.finishRun();
244 }
245
246 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
247 for (const UnwrappedLine &Line : Lines) {
248 LLVM_DEBUG(printDebugInfo(Line));
249 Callback.consumeUnwrappedLine(Line);
250 }
251 Callback.finishRun();
252 Lines.clear();
253 while (!PPLevelBranchIndex.empty() &&
254 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
255 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
256 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
257 }
258 if (!PPLevelBranchIndex.empty()) {
259 ++PPLevelBranchIndex.back();
260 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
261 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
262 }
263 } while (!PPLevelBranchIndex.empty());
264}
265
266void UnwrappedLineParser::parseFile() {
267 // The top-level context in a file always has declarations, except for pre-
268 // processor directives and JavaScript files.
269 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
270 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
271 MustBeDeclaration);
272 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
273 parseBracedList();
274 else
275 parseLevel();
276 // Make sure to format the remaining tokens.
277 //
278 // LK_TextProto is special since its top-level is parsed as the body of a
279 // braced list, which does not necessarily have natural line separators such
280 // as a semicolon. Comments after the last entry that have been determined to
281 // not belong to that line, as in:
282 // key: value
283 // // endfile comment
284 // do not have a chance to be put on a line of their own until this point.
285 // Here we add this newline before end-of-file comments.
286 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
287 addUnwrappedLine();
288 flushComments(NewlineBeforeNext: true);
289 addUnwrappedLine();
290}
291
292void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
293 do {
294 switch (FormatTok->Tok.getKind()) {
295 case tok::l_brace:
296 case tok::semi:
297 return;
298 default:
299 if (FormatTok->is(II: Keywords.kw_where)) {
300 addUnwrappedLine();
301 nextToken();
302 parseCSharpGenericTypeConstraint();
303 break;
304 }
305 nextToken();
306 break;
307 }
308 } while (!eof());
309}
310
311void UnwrappedLineParser::parseCSharpAttribute() {
312 int UnpairedSquareBrackets = 1;
313 do {
314 switch (FormatTok->Tok.getKind()) {
315 case tok::r_square:
316 nextToken();
317 --UnpairedSquareBrackets;
318 if (UnpairedSquareBrackets == 0) {
319 addUnwrappedLine();
320 return;
321 }
322 break;
323 case tok::l_square:
324 ++UnpairedSquareBrackets;
325 nextToken();
326 break;
327 default:
328 nextToken();
329 break;
330 }
331 } while (!eof());
332}
333
334bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
335 if (!Lines.empty() && Lines.back().InPPDirective)
336 return true;
337
338 const FormatToken *Previous = Tokens->getPreviousToken();
339 return Previous && Previous->is(Kind: tok::comment) &&
340 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
341}
342
343/// Parses a level, that is ???.
344/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
345/// \param IfKind The \p if statement kind in the level.
346/// \param IfLeftBrace The left brace of the \p if block in the level.
347/// \returns true if a simple block of if/else/for/while, or false otherwise.
348/// (A simple block has a single statement.)
349bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
350 IfStmtKind *IfKind,
351 FormatToken **IfLeftBrace) {
352 const bool InRequiresExpression =
353 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
354 const bool IsPrecededByCommentOrPPDirective =
355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
356 FormatToken *IfLBrace = nullptr;
357 bool HasDoWhile = false;
358 bool HasLabel = false;
359 unsigned StatementCount = 0;
360 bool SwitchLabelEncountered = false;
361
362 do {
363 if (FormatTok->isAttribute()) {
364 nextToken();
365 if (FormatTok->is(Kind: tok::l_paren))
366 parseParens();
367 continue;
368 }
369 tok::TokenKind Kind = FormatTok->Tok.getKind();
370 if (FormatTok->is(TT: TT_MacroBlockBegin))
371 Kind = tok::l_brace;
372 else if (FormatTok->is(TT: TT_MacroBlockEnd))
373 Kind = tok::r_brace;
374
375 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
376 &HasLabel, &StatementCount] {
377 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
378 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
379 HasLabel: HasLabel ? nullptr : &HasLabel);
380 ++StatementCount;
381 assert(StatementCount > 0 && "StatementCount overflow!");
382 };
383
384 switch (Kind) {
385 case tok::comment:
386 nextToken();
387 addUnwrappedLine();
388 break;
389 case tok::l_brace:
390 if (InRequiresExpression) {
391 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
392 } else if (FormatTok->Previous &&
393 FormatTok->Previous->ClosesRequiresClause) {
394 // We need the 'default' case here to correctly parse a function
395 // l_brace.
396 ParseDefault();
397 continue;
398 }
399 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
400 if (tryToParseBracedList())
401 continue;
402 FormatTok->setFinalizedType(TT_BlockLBrace);
403 }
404 parseBlock();
405 ++StatementCount;
406 assert(StatementCount > 0 && "StatementCount overflow!");
407 addUnwrappedLine();
408 break;
409 case tok::r_brace:
410 if (OpeningBrace) {
411 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
412 !OpeningBrace->isOneOf(K1: TT_ControlStatementLBrace, K2: TT_ElseLBrace)) {
413 return false;
414 }
415 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
416 HasDoWhile || IsPrecededByCommentOrPPDirective ||
417 precededByCommentOrPPDirective()) {
418 return false;
419 }
420 const FormatToken *Next = Tokens->peekNextToken();
421 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
422 return false;
423 if (IfLeftBrace)
424 *IfLeftBrace = IfLBrace;
425 return true;
426 }
427 nextToken();
428 addUnwrappedLine();
429 break;
430 case tok::kw_default: {
431 unsigned StoredPosition = Tokens->getPosition();
432 auto *Next = Tokens->getNextNonComment();
433 FormatTok = Tokens->setPosition(StoredPosition);
434 if (!Next->isOneOf(K1: tok::colon, K2: tok::arrow)) {
435 // default not followed by `:` or `->` is not a case label; treat it
436 // like an identifier.
437 parseStructuralElement();
438 break;
439 }
440 // Else, if it is 'default:', fall through to the case handling.
441 [[fallthrough]];
442 }
443 case tok::kw_case:
444 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
445 (Style.isJavaScript() && Line->MustBeDeclaration)) {
446 // Proto: there are no switch/case statements
447 // Verilog: Case labels don't have this word. We handle case
448 // labels including default in TokenAnnotator.
449 // JavaScript: A 'case: string' style field declaration.
450 ParseDefault();
451 break;
452 }
453 if (!SwitchLabelEncountered &&
454 (Style.IndentCaseLabels ||
455 (OpeningBrace && OpeningBrace->is(TT: TT_SwitchExpressionLBrace)) ||
456 (Line->InPPDirective && Line->Level == 1))) {
457 ++Line->Level;
458 }
459 SwitchLabelEncountered = true;
460 parseStructuralElement();
461 break;
462 case tok::l_square:
463 if (Style.isCSharp()) {
464 nextToken();
465 parseCSharpAttribute();
466 break;
467 }
468 if (handleCppAttributes())
469 break;
470 [[fallthrough]];
471 default:
472 ParseDefault();
473 break;
474 }
475 } while (!eof());
476
477 return false;
478}
479
480void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
481 // We'll parse forward through the tokens until we hit
482 // a closing brace or eof - note that getNextToken() will
483 // parse macros, so this will magically work inside macro
484 // definitions, too.
485 unsigned StoredPosition = Tokens->getPosition();
486 FormatToken *Tok = FormatTok;
487 const FormatToken *PrevTok = Tok->Previous;
488 // Keep a stack of positions of lbrace tokens. We will
489 // update information about whether an lbrace starts a
490 // braced init list or a different block during the loop.
491 struct StackEntry {
492 FormatToken *Tok;
493 const FormatToken *PrevTok;
494 };
495 SmallVector<StackEntry, 8> LBraceStack;
496 assert(Tok->is(tok::l_brace));
497
498 do {
499 auto *NextTok = Tokens->getNextNonComment();
500
501 if (!Line->InMacroBody && !Style.isTableGen()) {
502 // Skip PPDirective lines (except macro definitions) and comments.
503 while (NextTok->is(Kind: tok::hash)) {
504 NextTok = Tokens->getNextToken();
505 if (NextTok->isOneOf(K1: tok::pp_not_keyword, K2: tok::pp_define))
506 break;
507 do {
508 NextTok = Tokens->getNextToken();
509 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(Kind: tok::eof));
510
511 while (NextTok->is(Kind: tok::comment))
512 NextTok = Tokens->getNextToken();
513 }
514 }
515
516 switch (Tok->Tok.getKind()) {
517 case tok::l_brace:
518 if (Style.isJavaScript() && PrevTok) {
519 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
520 // A ':' indicates this code is in a type, or a braced list
521 // following a label in an object literal ({a: {b: 1}}).
522 // A '<' could be an object used in a comparison, but that is nonsense
523 // code (can never return true), so more likely it is a generic type
524 // argument (`X<{a: string; b: number}>`).
525 // The code below could be confused by semicolons between the
526 // individual members in a type member list, which would normally
527 // trigger BK_Block. In both cases, this must be parsed as an inline
528 // braced init.
529 Tok->setBlockKind(BK_BracedInit);
530 } else if (PrevTok->is(Kind: tok::r_paren)) {
531 // `) { }` can only occur in function or method declarations in JS.
532 Tok->setBlockKind(BK_Block);
533 }
534 } else {
535 Tok->setBlockKind(BK_Unknown);
536 }
537 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
538 break;
539 case tok::r_brace:
540 if (LBraceStack.empty())
541 break;
542 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BBK: BK_Unknown)) {
543 bool ProbablyBracedList = false;
544 if (Style.Language == FormatStyle::LK_Proto) {
545 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
546 } else if (LBrace->isNot(Kind: TT_EnumLBrace)) {
547 // Using OriginalColumn to distinguish between ObjC methods and
548 // binary operators is a bit hacky.
549 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
550 NextTok->OriginalColumn == 0;
551
552 // Try to detect a braced list. Note that regardless how we mark inner
553 // braces here, we will overwrite the BlockKind later if we parse a
554 // braced list (where all blocks inside are by default braced lists),
555 // or when we explicitly detect blocks (for example while parsing
556 // lambdas).
557
558 // If we already marked the opening brace as braced list, the closing
559 // must also be part of it.
560 ProbablyBracedList = LBrace->is(TT: TT_BracedListLBrace);
561
562 ProbablyBracedList = ProbablyBracedList ||
563 (Style.isJavaScript() &&
564 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
565 Ks: Keywords.kw_as));
566 ProbablyBracedList =
567 ProbablyBracedList ||
568 (IsCpp && (PrevTok->Tok.isLiteral() ||
569 NextTok->isOneOf(K1: tok::l_paren, K2: tok::arrow)));
570
571 // If there is a comma, semicolon or right paren after the closing
572 // brace, we assume this is a braced initializer list.
573 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
574 // braced list in JS.
575 ProbablyBracedList =
576 ProbablyBracedList ||
577 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
578 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
579
580 // Distinguish between braced list in a constructor initializer list
581 // followed by constructor body, or just adjacent blocks.
582 ProbablyBracedList =
583 ProbablyBracedList ||
584 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
585 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
586 K2: tok::greater));
587
588 ProbablyBracedList =
589 ProbablyBracedList ||
590 (NextTok->is(Kind: tok::identifier) &&
591 !PrevTok->isOneOf(K1: tok::semi, K2: tok::r_brace, Ks: tok::l_brace));
592
593 ProbablyBracedList = ProbablyBracedList ||
594 (NextTok->is(Kind: tok::semi) &&
595 (!ExpectClassBody || LBraceStack.size() != 1));
596
597 ProbablyBracedList =
598 ProbablyBracedList ||
599 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
600
601 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
602 // We can have an array subscript after a braced init
603 // list, but C++11 attributes are expected after blocks.
604 NextTok = Tokens->getNextToken();
605 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
606 }
607
608 // Cpp macro definition body that is a nonempty braced list or block:
609 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
610 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
611 // A statement can end with only `;` (simple statement), a block
612 // closing brace (compound statement), or `:` (label statement).
613 // If PrevTok is a block opening brace, Tok ends an empty block.
614 !PrevTok->isOneOf(K1: tok::semi, K2: BK_Block, Ks: tok::colon)) {
615 ProbablyBracedList = true;
616 }
617 }
618 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
619 Tok->setBlockKind(BlockKind);
620 LBrace->setBlockKind(BlockKind);
621 }
622 LBraceStack.pop_back();
623 break;
624 case tok::identifier:
625 if (Tok->isNot(Kind: TT_StatementMacro))
626 break;
627 [[fallthrough]];
628 case tok::at:
629 case tok::semi:
630 case tok::kw_if:
631 case tok::kw_while:
632 case tok::kw_for:
633 case tok::kw_switch:
634 case tok::kw_try:
635 case tok::kw___try:
636 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
637 LBraceStack.back().Tok->setBlockKind(BK_Block);
638 break;
639 default:
640 break;
641 }
642
643 PrevTok = Tok;
644 Tok = NextTok;
645 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
646
647 // Assume other blocks for all unclosed opening braces.
648 for (const auto &Entry : LBraceStack)
649 if (Entry.Tok->is(BBK: BK_Unknown))
650 Entry.Tok->setBlockKind(BK_Block);
651
652 FormatTok = Tokens->setPosition(StoredPosition);
653}
654
655// Sets the token type of the directly previous right brace.
656void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
657 if (auto Prev = FormatTok->getPreviousNonComment();
658 Prev && Prev->is(Kind: tok::r_brace)) {
659 Prev->setFinalizedType(Type);
660 }
661}
662
663template <class T>
664static inline void hash_combine(std::size_t &seed, const T &v) {
665 std::hash<T> hasher;
666 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
667}
668
669size_t UnwrappedLineParser::computePPHash() const {
670 size_t h = 0;
671 for (const auto &i : PPStack) {
672 hash_combine(seed&: h, v: size_t(i.Kind));
673 hash_combine(seed&: h, v: i.Line);
674 }
675 return h;
676}
677
678// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
679// is not null, subtracts its length (plus the preceding space) when computing
680// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
681// running the token annotator on it so that we can restore them afterward.
682bool UnwrappedLineParser::mightFitOnOneLine(
683 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
684 const auto ColumnLimit = Style.ColumnLimit;
685 if (ColumnLimit == 0)
686 return true;
687
688 auto &Tokens = ParsedLine.Tokens;
689 assert(!Tokens.empty());
690
691 const auto *LastToken = Tokens.back().Tok;
692 assert(LastToken);
693
694 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
695
696 int Index = 0;
697 for (const auto &Token : Tokens) {
698 assert(Token.Tok);
699 auto &SavedToken = SavedTokens[Index++];
700 SavedToken.Tok = new FormatToken;
701 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
702 SavedToken.Children = std::move(Token.Children);
703 }
704
705 AnnotatedLine Line(ParsedLine);
706 assert(Line.Last == LastToken);
707
708 TokenAnnotator Annotator(Style, Keywords);
709 Annotator.annotate(Line);
710 Annotator.calculateFormattingInformation(Line);
711
712 auto Length = LastToken->TotalLength;
713 if (OpeningBrace) {
714 assert(OpeningBrace != Tokens.front().Tok);
715 if (auto Prev = OpeningBrace->Previous;
716 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
717 Length -= ColumnLimit;
718 }
719 Length -= OpeningBrace->TokenText.size() + 1;
720 }
721
722 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
723 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
724 Length -= FirstToken->TokenText.size() + 1;
725 }
726
727 Index = 0;
728 for (auto &Token : Tokens) {
729 const auto &SavedToken = SavedTokens[Index++];
730 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
731 Token.Children = std::move(SavedToken.Children);
732 delete SavedToken.Tok;
733 }
734
735 // If these change PPLevel needs to be used for get correct indentation.
736 assert(!Line.InMacroBody);
737 assert(!Line.InPPDirective);
738 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
739}
740
741FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
742 unsigned AddLevels, bool MunchSemi,
743 bool KeepBraces,
744 IfStmtKind *IfKind,
745 bool UnindentWhitesmithsBraces) {
746 auto HandleVerilogBlockLabel = [this]() {
747 // ":" name
748 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
749 nextToken();
750 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
751 nextToken();
752 }
753 };
754
755 // Whether this is a Verilog-specific block that has a special header like a
756 // module.
757 const bool VerilogHierarchy =
758 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
759 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
760 (Style.isVerilog() &&
761 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
762 "'{' or macro block token expected");
763 FormatToken *Tok = FormatTok;
764 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
765 auto Index = CurrentLines->size();
766 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
767 FormatTok->setBlockKind(BK_Block);
768
769 // For Whitesmiths mode, jump to the next level prior to skipping over the
770 // braces.
771 if (!VerilogHierarchy && AddLevels > 0 &&
772 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
773 ++Line->Level;
774 }
775
776 size_t PPStartHash = computePPHash();
777
778 const unsigned InitialLevel = Line->Level;
779 if (VerilogHierarchy) {
780 AddLevels += parseVerilogHierarchyHeader();
781 } else {
782 nextToken(/*LevelDifference=*/AddLevels);
783 HandleVerilogBlockLabel();
784 }
785
786 // Bail out if there are too many levels. Otherwise, the stack might overflow.
787 if (Line->Level > 300)
788 return nullptr;
789
790 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
791 parseParens();
792
793 size_t NbPreprocessorDirectives =
794 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
795 addUnwrappedLine();
796 size_t OpeningLineIndex =
797 CurrentLines->empty()
798 ? (UnwrappedLine::kInvalidIndex)
799 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
800
801 // Whitesmiths is weird here. The brace needs to be indented for the namespace
802 // block, but the block itself may not be indented depending on the style
803 // settings. This allows the format to back up one level in those cases.
804 if (UnindentWhitesmithsBraces)
805 --Line->Level;
806
807 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
808 MustBeDeclaration);
809 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
810 Line->Level += AddLevels;
811
812 FormatToken *IfLBrace = nullptr;
813 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
814
815 if (eof())
816 return IfLBrace;
817
818 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
819 : FormatTok->isNot(Kind: tok::r_brace)) {
820 Line->Level = InitialLevel;
821 FormatTok->setBlockKind(BK_Block);
822 return IfLBrace;
823 }
824
825 if (FormatTok->is(Kind: tok::r_brace)) {
826 FormatTok->setBlockKind(BK_Block);
827 if (Tok->is(TT: TT_NamespaceLBrace))
828 FormatTok->setFinalizedType(TT_NamespaceRBrace);
829 }
830
831 const bool IsFunctionRBrace =
832 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
833
834 auto RemoveBraces = [=]() mutable {
835 if (!SimpleBlock)
836 return false;
837 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
838 assert(FormatTok->is(tok::r_brace));
839 const bool WrappedOpeningBrace = !Tok->Previous;
840 if (WrappedOpeningBrace && FollowedByComment)
841 return false;
842 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
843 if (KeepBraces && !HasRequiredIfBraces)
844 return false;
845 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
846 const FormatToken *Previous = Tokens->getPreviousToken();
847 assert(Previous);
848 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
849 return false;
850 }
851 assert(!CurrentLines->empty());
852 auto &LastLine = CurrentLines->back();
853 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
854 return false;
855 if (Tok->is(TT: TT_ElseLBrace))
856 return true;
857 if (WrappedOpeningBrace) {
858 assert(Index > 0);
859 --Index; // The line above the wrapped l_brace.
860 Tok = nullptr;
861 }
862 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
863 };
864 if (RemoveBraces()) {
865 Tok->MatchingParen = FormatTok;
866 FormatTok->MatchingParen = Tok;
867 }
868
869 size_t PPEndHash = computePPHash();
870
871 // Munch the closing brace.
872 nextToken(/*LevelDifference=*/-AddLevels);
873
874 // When this is a function block and there is an unnecessary semicolon
875 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
876 // it later).
877 if (Style.RemoveSemicolon && IsFunctionRBrace) {
878 while (FormatTok->is(Kind: tok::semi)) {
879 FormatTok->Optional = true;
880 nextToken();
881 }
882 }
883
884 HandleVerilogBlockLabel();
885
886 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
887 parseParens();
888
889 Line->Level = InitialLevel;
890
891 if (FormatTok->is(Kind: tok::kw_noexcept)) {
892 // A noexcept in a requires expression.
893 nextToken();
894 }
895
896 if (FormatTok->is(Kind: tok::arrow)) {
897 // Following the } or noexcept we can find a trailing return type arrow
898 // as part of an implicit conversion constraint.
899 nextToken();
900 parseStructuralElement();
901 }
902
903 if (MunchSemi && FormatTok->is(Kind: tok::semi))
904 nextToken();
905
906 if (PPStartHash == PPEndHash) {
907 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
908 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
909 // Update the opening line to add the forward reference as well
910 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
911 CurrentLines->size() - 1;
912 }
913 }
914
915 return IfLBrace;
916}
917
918static bool isGoogScope(const UnwrappedLine &Line) {
919 // FIXME: Closure-library specific stuff should not be hard-coded but be
920 // configurable.
921 if (Line.Tokens.size() < 4)
922 return false;
923 auto I = Line.Tokens.begin();
924 if (I->Tok->TokenText != "goog")
925 return false;
926 ++I;
927 if (I->Tok->isNot(Kind: tok::period))
928 return false;
929 ++I;
930 if (I->Tok->TokenText != "scope")
931 return false;
932 ++I;
933 return I->Tok->is(Kind: tok::l_paren);
934}
935
936static bool isIIFE(const UnwrappedLine &Line,
937 const AdditionalKeywords &Keywords) {
938 // Look for the start of an immediately invoked anonymous function.
939 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
940 // This is commonly done in JavaScript to create a new, anonymous scope.
941 // Example: (function() { ... })()
942 if (Line.Tokens.size() < 3)
943 return false;
944 auto I = Line.Tokens.begin();
945 if (I->Tok->isNot(Kind: tok::l_paren))
946 return false;
947 ++I;
948 if (I->Tok->isNot(Kind: Keywords.kw_function))
949 return false;
950 ++I;
951 return I->Tok->is(Kind: tok::l_paren);
952}
953
954static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
955 const FormatToken &InitialToken) {
956 tok::TokenKind Kind = InitialToken.Tok.getKind();
957 if (InitialToken.is(TT: TT_NamespaceMacro))
958 Kind = tok::kw_namespace;
959
960 switch (Kind) {
961 case tok::kw_namespace:
962 return Style.BraceWrapping.AfterNamespace;
963 case tok::kw_class:
964 return Style.BraceWrapping.AfterClass;
965 case tok::kw_union:
966 return Style.BraceWrapping.AfterUnion;
967 case tok::kw_struct:
968 return Style.BraceWrapping.AfterStruct;
969 case tok::kw_enum:
970 return Style.BraceWrapping.AfterEnum;
971 default:
972 return false;
973 }
974}
975
976void UnwrappedLineParser::parseChildBlock() {
977 assert(FormatTok->is(tok::l_brace));
978 FormatTok->setBlockKind(BK_Block);
979 const FormatToken *OpeningBrace = FormatTok;
980 nextToken();
981 {
982 bool SkipIndent = (Style.isJavaScript() &&
983 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
984 ScopedLineState LineState(*this);
985 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
986 /*MustBeDeclaration=*/false);
987 Line->Level += SkipIndent ? 0 : 1;
988 parseLevel(OpeningBrace);
989 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
990 Line->Level -= SkipIndent ? 0 : 1;
991 }
992 nextToken();
993}
994
995void UnwrappedLineParser::parsePPDirective() {
996 assert(FormatTok->is(tok::hash) && "'#' expected");
997 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
998
999 nextToken();
1000
1001 if (!FormatTok->Tok.getIdentifierInfo()) {
1002 parsePPUnknown();
1003 return;
1004 }
1005
1006 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1007 case tok::pp_define:
1008 parsePPDefine();
1009 return;
1010 case tok::pp_if:
1011 parsePPIf(/*IfDef=*/false);
1012 break;
1013 case tok::pp_ifdef:
1014 case tok::pp_ifndef:
1015 parsePPIf(/*IfDef=*/true);
1016 break;
1017 case tok::pp_else:
1018 case tok::pp_elifdef:
1019 case tok::pp_elifndef:
1020 case tok::pp_elif:
1021 parsePPElse();
1022 break;
1023 case tok::pp_endif:
1024 parsePPEndIf();
1025 break;
1026 case tok::pp_pragma:
1027 parsePPPragma();
1028 break;
1029 case tok::pp_error:
1030 case tok::pp_warning:
1031 nextToken();
1032 if (!eof() && Style.isCpp())
1033 FormatTok->setFinalizedType(TT_AfterPPDirective);
1034 [[fallthrough]];
1035 default:
1036 parsePPUnknown();
1037 break;
1038 }
1039}
1040
1041void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1042 size_t Line = CurrentLines->size();
1043 if (CurrentLines == &PreprocessorDirectives)
1044 Line += Lines.size();
1045
1046 if (Unreachable ||
1047 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1048 PPStack.push_back(Elt: {PP_Unreachable, Line});
1049 } else {
1050 PPStack.push_back(Elt: {PP_Conditional, Line});
1051 }
1052}
1053
1054void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1055 ++PPBranchLevel;
1056 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1057 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1058 PPLevelBranchIndex.push_back(Elt: 0);
1059 PPLevelBranchCount.push_back(Elt: 0);
1060 }
1061 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1062 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1063 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationAlternative() {
1067 if (!PPStack.empty())
1068 PPStack.pop_back();
1069 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1070 if (!PPChainBranchIndex.empty())
1071 ++PPChainBranchIndex.top();
1072 conditionalCompilationCondition(
1073 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1074 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1075}
1076
1077void UnwrappedLineParser::conditionalCompilationEnd() {
1078 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1079 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1080 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1081 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1082 }
1083 // Guard against #endif's without #if.
1084 if (PPBranchLevel > -1)
1085 --PPBranchLevel;
1086 if (!PPChainBranchIndex.empty())
1087 PPChainBranchIndex.pop();
1088 if (!PPStack.empty())
1089 PPStack.pop_back();
1090}
1091
1092void UnwrappedLineParser::parsePPIf(bool IfDef) {
1093 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1094 nextToken();
1095 bool Unreachable = false;
1096 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1097 Unreachable = true;
1098 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1099 Unreachable = true;
1100 conditionalCompilationStart(Unreachable);
1101 FormatToken *IfCondition = FormatTok;
1102 // If there's a #ifndef on the first line, and the only lines before it are
1103 // comments, it could be an include guard.
1104 bool MaybeIncludeGuard = IfNDef;
1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106 for (auto &Line : Lines) {
1107 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1108 MaybeIncludeGuard = false;
1109 IncludeGuard = IG_Rejected;
1110 break;
1111 }
1112 }
1113 }
1114 --PPBranchLevel;
1115 parsePPUnknown();
1116 ++PPBranchLevel;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 IncludeGuard = IG_IfNdefed;
1119 IncludeGuardToken = IfCondition;
1120 }
1121}
1122
1123void UnwrappedLineParser::parsePPElse() {
1124 // If a potential include guard has an #else, it's not an include guard.
1125 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1126 IncludeGuard = IG_Rejected;
1127 // Don't crash when there is an #else without an #if.
1128 assert(PPBranchLevel >= -1);
1129 if (PPBranchLevel == -1)
1130 conditionalCompilationStart(/*Unreachable=*/true);
1131 conditionalCompilationAlternative();
1132 --PPBranchLevel;
1133 parsePPUnknown();
1134 ++PPBranchLevel;
1135}
1136
1137void UnwrappedLineParser::parsePPEndIf() {
1138 conditionalCompilationEnd();
1139 parsePPUnknown();
1140 // If the #endif of a potential include guard is the last thing in the file,
1141 // then we found an include guard.
1142 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1143 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1144 IncludeGuard = IG_Found;
1145 }
1146}
1147
1148void UnwrappedLineParser::parsePPDefine() {
1149 nextToken();
1150
1151 if (!FormatTok->Tok.getIdentifierInfo()) {
1152 IncludeGuard = IG_Rejected;
1153 IncludeGuardToken = nullptr;
1154 parsePPUnknown();
1155 return;
1156 }
1157
1158 bool MaybeIncludeGuard = false;
1159 if (IncludeGuard == IG_IfNdefed &&
1160 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1161 IncludeGuard = IG_Defined;
1162 IncludeGuardToken = nullptr;
1163 for (auto &Line : Lines) {
1164 if (!Line.Tokens.front().Tok->isOneOf(K1: tok::comment, K2: tok::hash)) {
1165 IncludeGuard = IG_Rejected;
1166 break;
1167 }
1168 }
1169 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1170 }
1171
1172 // In the context of a define, even keywords should be treated as normal
1173 // identifiers. Setting the kind to identifier is not enough, because we need
1174 // to treat additional keywords like __except as well, which are already
1175 // identifiers. Setting the identifier info to null interferes with include
1176 // guard processing above, and changes preprocessing nesting.
1177 FormatTok->Tok.setKind(tok::identifier);
1178 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1179 nextToken();
1180
1181 // IncludeGuard can't have a non-empty macro definition.
1182 if (MaybeIncludeGuard && !eof())
1183 IncludeGuard = IG_Rejected;
1184
1185 if (FormatTok->Tok.getKind() == tok::l_paren &&
1186 !FormatTok->hasWhitespaceBefore()) {
1187 parseParens();
1188 }
1189 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1190 Line->Level += PPBranchLevel + 1;
1191 addUnwrappedLine();
1192 ++Line->Level;
1193
1194 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1195 assert((int)Line->PPLevel >= 0);
1196 Line->InMacroBody = true;
1197
1198 if (Style.SkipMacroDefinitionBody) {
1199 while (!eof()) {
1200 FormatTok->Finalized = true;
1201 FormatTok = Tokens->getNextToken();
1202 }
1203 addUnwrappedLine();
1204 return;
1205 }
1206
1207 // Errors during a preprocessor directive can only affect the layout of the
1208 // preprocessor directive, and thus we ignore them. An alternative approach
1209 // would be to use the same approach we use on the file level (no
1210 // re-indentation if there was a structural error) within the macro
1211 // definition.
1212 parseFile();
1213}
1214
1215void UnwrappedLineParser::parsePPPragma() {
1216 Line->InPragmaDirective = true;
1217 parsePPUnknown();
1218}
1219
1220void UnwrappedLineParser::parsePPUnknown() {
1221 while (!eof())
1222 nextToken();
1223 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1224 Line->Level += PPBranchLevel + 1;
1225 addUnwrappedLine();
1226}
1227
1228// Here we exclude certain tokens that are not usually the first token in an
1229// unwrapped line. This is used in attempt to distinguish macro calls without
1230// trailing semicolons from other constructs split to several lines.
1231static bool tokenCanStartNewLine(const FormatToken &Tok) {
1232 // Semicolon can be a null-statement, l_square can be a start of a macro or
1233 // a C++11 attribute, but this doesn't seem to be common.
1234 return !Tok.isOneOf(K1: tok::semi, K2: tok::l_brace,
1235 // Tokens that can only be used as binary operators and a
1236 // part of overloaded operator names.
1237 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1238 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1239 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1240 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1241 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1242 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1243 Ks: tok::lesslessequal,
1244 // Colon is used in labels, base class lists, initializer
1245 // lists, range-based for loops, ternary operator, but
1246 // should never be the first token in an unwrapped line.
1247 Ks: tok::colon,
1248 // 'noexcept' is a trailing annotation.
1249 Ks: tok::kw_noexcept);
1250}
1251
1252static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1253 const FormatToken *FormatTok) {
1254 // FIXME: This returns true for C/C++ keywords like 'struct'.
1255 return FormatTok->is(Kind: tok::identifier) &&
1256 (!FormatTok->Tok.getIdentifierInfo() ||
1257 !FormatTok->isOneOf(
1258 K1: Keywords.kw_in, K2: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1259 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1260 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1261 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1262 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1263 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1264 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1265}
1266
1267static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1268 const FormatToken *FormatTok) {
1269 return FormatTok->Tok.isLiteral() ||
1270 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1271 mustBeJSIdent(Keywords, FormatTok);
1272}
1273
1274// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1275// when encountered after a value (see mustBeJSIdentOrValue).
1276static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1277 const FormatToken *FormatTok) {
1278 return FormatTok->isOneOf(
1279 K1: tok::kw_return, K2: Keywords.kw_yield,
1280 // conditionals
1281 Ks: tok::kw_if, Ks: tok::kw_else,
1282 // loops
1283 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1284 // switch/case
1285 Ks: tok::kw_switch, Ks: tok::kw_case,
1286 // exceptions
1287 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1288 // declaration
1289 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1290 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1291 // import/export
1292 Ks: Keywords.kw_import, Ks: tok::kw_export);
1293}
1294
1295// Checks whether a token is a type in K&R C (aka C78).
1296static bool isC78Type(const FormatToken &Tok) {
1297 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1298 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1299 Ks: tok::identifier);
1300}
1301
1302// This function checks whether a token starts the first parameter declaration
1303// in a K&R C (aka C78) function definition, e.g.:
1304// int f(a, b)
1305// short a, b;
1306// {
1307// return a + b;
1308// }
1309static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1310 const FormatToken *FuncName) {
1311 assert(Tok);
1312 assert(Next);
1313 assert(FuncName);
1314
1315 if (FuncName->isNot(Kind: tok::identifier))
1316 return false;
1317
1318 const FormatToken *Prev = FuncName->Previous;
1319 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1320 return false;
1321
1322 if (!isC78Type(Tok: *Tok) &&
1323 !Tok->isOneOf(K1: tok::kw_register, K2: tok::kw_struct, Ks: tok::kw_union)) {
1324 return false;
1325 }
1326
1327 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1328 return false;
1329
1330 Tok = Tok->Previous;
1331 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1332 return false;
1333
1334 Tok = Tok->Previous;
1335 if (!Tok || Tok->isNot(Kind: tok::identifier))
1336 return false;
1337
1338 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1339}
1340
1341bool UnwrappedLineParser::parseModuleImport() {
1342 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1343
1344 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1345 !Token->Tok.getIdentifierInfo() &&
1346 !Token->isOneOf(K1: tok::colon, K2: tok::less, Ks: tok::string_literal)) {
1347 return false;
1348 }
1349
1350 nextToken();
1351 while (!eof()) {
1352 if (FormatTok->is(Kind: tok::colon)) {
1353 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1354 }
1355 // Handle import <foo/bar.h> as we would an include statement.
1356 else if (FormatTok->is(Kind: tok::less)) {
1357 nextToken();
1358 while (!FormatTok->isOneOf(K1: tok::semi, K2: tok::greater) && !eof()) {
1359 // Mark tokens up to the trailing line comments as implicit string
1360 // literals.
1361 if (FormatTok->isNot(Kind: tok::comment) &&
1362 !FormatTok->TokenText.starts_with(Prefix: "//")) {
1363 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1364 }
1365 nextToken();
1366 }
1367 }
1368 if (FormatTok->is(Kind: tok::semi)) {
1369 nextToken();
1370 break;
1371 }
1372 nextToken();
1373 }
1374
1375 addUnwrappedLine();
1376 return true;
1377}
1378
1379// readTokenWithJavaScriptASI reads the next token and terminates the current
1380// line if JavaScript Automatic Semicolon Insertion must
1381// happen between the current token and the next token.
1382//
1383// This method is conservative - it cannot cover all edge cases of JavaScript,
1384// but only aims to correctly handle certain well known cases. It *must not*
1385// return true in speculative cases.
1386void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1387 FormatToken *Previous = FormatTok;
1388 readToken();
1389 FormatToken *Next = FormatTok;
1390
1391 bool IsOnSameLine =
1392 CommentsBeforeNextToken.empty()
1393 ? Next->NewlinesBefore == 0
1394 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1395 if (IsOnSameLine)
1396 return;
1397
1398 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1399 bool PreviousStartsTemplateExpr =
1400 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1401 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1402 // If the line contains an '@' sign, the previous token might be an
1403 // annotation, which can precede another identifier/value.
1404 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1405 return LineNode.Tok->is(Kind: tok::at);
1406 });
1407 if (HasAt)
1408 return;
1409 }
1410 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1411 return addUnwrappedLine();
1412 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1413 bool NextEndsTemplateExpr =
1414 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1415 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1416 (PreviousMustBeValue ||
1417 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1418 Ks: tok::minusminus))) {
1419 return addUnwrappedLine();
1420 }
1421 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1422 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1423 return addUnwrappedLine();
1424 }
1425}
1426
1427void UnwrappedLineParser::parseStructuralElement(
1428 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1429 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1430 if (Style.isTableGen() && FormatTok->is(Kind: tok::pp_include)) {
1431 nextToken();
1432 if (FormatTok->is(Kind: tok::string_literal))
1433 nextToken();
1434 addUnwrappedLine();
1435 return;
1436 }
1437
1438 if (IsCpp) {
1439 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1440 }
1441 } else if (Style.isVerilog()) {
1442 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1443 parseForOrWhileLoop(/*HasParens=*/false);
1444 return;
1445 }
1446 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1447 parseForOrWhileLoop();
1448 return;
1449 }
1450 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1451 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1452 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1453 return;
1454 }
1455
1456 // Skip things that can exist before keywords like 'if' and 'case'.
1457 while (true) {
1458 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1459 Ks: Keywords.kw_unique0)) {
1460 nextToken();
1461 } else if (FormatTok->is(Kind: tok::l_paren) &&
1462 Tokens->peekNextToken()->is(Kind: tok::star)) {
1463 parseParens();
1464 } else {
1465 break;
1466 }
1467 }
1468 }
1469
1470 // Tokens that only make sense at the beginning of a line.
1471 if (FormatTok->isAccessSpecifierKeyword()) {
1472 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1473 nextToken();
1474 else
1475 parseAccessSpecifier();
1476 return;
1477 }
1478 switch (FormatTok->Tok.getKind()) {
1479 case tok::kw_asm:
1480 nextToken();
1481 if (FormatTok->is(Kind: tok::l_brace)) {
1482 FormatTok->setFinalizedType(TT_InlineASMBrace);
1483 nextToken();
1484 while (FormatTok && !eof()) {
1485 if (FormatTok->is(Kind: tok::r_brace)) {
1486 FormatTok->setFinalizedType(TT_InlineASMBrace);
1487 nextToken();
1488 addUnwrappedLine();
1489 break;
1490 }
1491 FormatTok->Finalized = true;
1492 nextToken();
1493 }
1494 }
1495 break;
1496 case tok::kw_namespace:
1497 parseNamespace();
1498 return;
1499 case tok::kw_if: {
1500 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501 // field/method declaration.
1502 break;
1503 }
1504 FormatToken *Tok = parseIfThenElse(IfKind);
1505 if (IfLeftBrace)
1506 *IfLeftBrace = Tok;
1507 return;
1508 }
1509 case tok::kw_for:
1510 case tok::kw_while:
1511 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512 // field/method declaration.
1513 break;
1514 }
1515 parseForOrWhileLoop();
1516 return;
1517 case tok::kw_do:
1518 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1519 // field/method declaration.
1520 break;
1521 }
1522 parseDoWhile();
1523 if (HasDoWhile)
1524 *HasDoWhile = true;
1525 return;
1526 case tok::kw_switch:
1527 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1528 // 'switch: string' field declaration.
1529 break;
1530 }
1531 parseSwitch(/*IsExpr=*/false);
1532 return;
1533 case tok::kw_default: {
1534 // In Verilog default along with other labels are handled in the next loop.
1535 if (Style.isVerilog())
1536 break;
1537 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538 // 'default: string' field declaration.
1539 break;
1540 }
1541 auto *Default = FormatTok;
1542 nextToken();
1543 if (FormatTok->is(Kind: tok::colon)) {
1544 FormatTok->setFinalizedType(TT_CaseLabelColon);
1545 parseLabel();
1546 return;
1547 }
1548 if (FormatTok->is(Kind: tok::arrow)) {
1549 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1550 Default->setFinalizedType(TT_SwitchExpressionLabel);
1551 parseLabel();
1552 return;
1553 }
1554 // e.g. "default void f() {}" in a Java interface.
1555 break;
1556 }
1557 case tok::kw_case:
1558 // Proto: there are no switch/case statements.
1559 if (Style.Language == FormatStyle::LK_Proto) {
1560 nextToken();
1561 return;
1562 }
1563 if (Style.isVerilog()) {
1564 parseBlock();
1565 addUnwrappedLine();
1566 return;
1567 }
1568 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1569 // 'case: string' field declaration.
1570 nextToken();
1571 break;
1572 }
1573 parseCaseLabel();
1574 return;
1575 case tok::kw_goto:
1576 nextToken();
1577 if (FormatTok->is(Kind: tok::kw_case))
1578 nextToken();
1579 break;
1580 case tok::kw_try:
1581 case tok::kw___try:
1582 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1583 // field/method declaration.
1584 break;
1585 }
1586 parseTryCatch();
1587 return;
1588 case tok::kw_extern:
1589 nextToken();
1590 if (Style.isVerilog()) {
1591 // In Verilog and extern module declaration looks like a start of module.
1592 // But there is no body and endmodule. So we handle it separately.
1593 if (Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
1594 parseVerilogHierarchyHeader();
1595 return;
1596 }
1597 } else if (FormatTok->is(Kind: tok::string_literal)) {
1598 nextToken();
1599 if (FormatTok->is(Kind: tok::l_brace)) {
1600 if (Style.BraceWrapping.AfterExternBlock)
1601 addUnwrappedLine();
1602 // Either we indent or for backwards compatibility we follow the
1603 // AfterExternBlock style.
1604 unsigned AddLevels =
1605 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1606 (Style.BraceWrapping.AfterExternBlock &&
1607 Style.IndentExternBlock ==
1608 FormatStyle::IEBS_AfterExternBlock)
1609 ? 1u
1610 : 0u;
1611 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1612 addUnwrappedLine();
1613 return;
1614 }
1615 }
1616 break;
1617 case tok::kw_export:
1618 if (Style.isJavaScript()) {
1619 parseJavaScriptEs6ImportExport();
1620 return;
1621 }
1622 if (IsCpp) {
1623 nextToken();
1624 if (FormatTok->is(Kind: tok::kw_namespace)) {
1625 parseNamespace();
1626 return;
1627 }
1628 if (FormatTok->is(Kind: tok::l_brace)) {
1629 parseCppExportBlock();
1630 return;
1631 }
1632 if (FormatTok->is(II: Keywords.kw_import) && parseModuleImport())
1633 return;
1634 }
1635 break;
1636 case tok::kw_inline:
1637 nextToken();
1638 if (FormatTok->is(Kind: tok::kw_namespace)) {
1639 parseNamespace();
1640 return;
1641 }
1642 break;
1643 case tok::identifier:
1644 if (FormatTok->is(TT: TT_ForEachMacro)) {
1645 parseForOrWhileLoop();
1646 return;
1647 }
1648 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1649 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1650 /*MunchSemi=*/false);
1651 return;
1652 }
1653 if (FormatTok->is(II: Keywords.kw_import)) {
1654 if (Style.isJavaScript()) {
1655 parseJavaScriptEs6ImportExport();
1656 return;
1657 }
1658 if (Style.Language == FormatStyle::LK_Proto) {
1659 nextToken();
1660 if (FormatTok->is(Kind: tok::kw_public))
1661 nextToken();
1662 if (FormatTok->isNot(Kind: tok::string_literal))
1663 return;
1664 nextToken();
1665 if (FormatTok->is(Kind: tok::semi))
1666 nextToken();
1667 addUnwrappedLine();
1668 return;
1669 }
1670 if (IsCpp && parseModuleImport())
1671 return;
1672 }
1673 if (IsCpp && FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1674 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1675 nextToken();
1676 if (FormatTok->is(Kind: tok::colon)) {
1677 nextToken();
1678 addUnwrappedLine();
1679 return;
1680 }
1681 }
1682 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1683 parseStatementMacro();
1684 return;
1685 }
1686 if (IsCpp && FormatTok->is(TT: TT_NamespaceMacro)) {
1687 parseNamespace();
1688 return;
1689 }
1690 // In Verilog labels can be any expression, so we don't do them here.
1691 // JS doesn't have macros, and within classes colons indicate fields, not
1692 // labels.
1693 // TableGen doesn't have labels.
1694 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1695 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1696 nextToken();
1697 if (!Line->InMacroBody || CurrentLines->size() > 1)
1698 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1699 FormatTok->setFinalizedType(TT_GotoLabelColon);
1700 parseLabel(LeftAlignLabel: !Style.IndentGotoLabels);
1701 if (HasLabel)
1702 *HasLabel = true;
1703 return;
1704 }
1705 if (Style.isJava() && FormatTok->is(II: Keywords.kw_record)) {
1706 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1707 addUnwrappedLine();
1708 return;
1709 }
1710 // In all other cases, parse the declaration.
1711 break;
1712 default:
1713 break;
1714 }
1715
1716 bool SeenEqual = false;
1717 for (const bool InRequiresExpression =
1718 OpeningBrace && OpeningBrace->isOneOf(K1: TT_RequiresExpressionLBrace,
1719 K2: TT_CompoundRequirementLBrace);
1720 !eof();) {
1721 const FormatToken *Previous = FormatTok->Previous;
1722 switch (FormatTok->Tok.getKind()) {
1723 case tok::at:
1724 nextToken();
1725 if (FormatTok->is(Kind: tok::l_brace)) {
1726 nextToken();
1727 parseBracedList();
1728 break;
1729 }
1730 if (Style.isJava() && FormatTok->is(II: Keywords.kw_interface)) {
1731 nextToken();
1732 break;
1733 }
1734 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1735 case tok::objc_public:
1736 case tok::objc_protected:
1737 case tok::objc_package:
1738 case tok::objc_private:
1739 return parseAccessSpecifier();
1740 case tok::objc_interface:
1741 case tok::objc_implementation:
1742 return parseObjCInterfaceOrImplementation();
1743 case tok::objc_protocol:
1744 if (parseObjCProtocol())
1745 return;
1746 break;
1747 case tok::objc_end:
1748 return; // Handled by the caller.
1749 case tok::objc_optional:
1750 case tok::objc_required:
1751 nextToken();
1752 addUnwrappedLine();
1753 return;
1754 case tok::objc_autoreleasepool:
1755 IsAutoRelease = true;
1756 [[fallthrough]];
1757 case tok::objc_synchronized:
1758 nextToken();
1759 if (!IsAutoRelease && FormatTok->is(Kind: tok::l_paren)) {
1760 // Skip synchronization object
1761 parseParens();
1762 }
1763 if (FormatTok->is(Kind: tok::l_brace)) {
1764 if (Style.BraceWrapping.AfterControlStatement ==
1765 FormatStyle::BWACS_Always) {
1766 addUnwrappedLine();
1767 }
1768 parseBlock();
1769 }
1770 addUnwrappedLine();
1771 return;
1772 case tok::objc_try:
1773 // This branch isn't strictly necessary (the kw_try case below would
1774 // do this too after the tok::at is parsed above). But be explicit.
1775 parseTryCatch();
1776 return;
1777 default:
1778 break;
1779 }
1780 break;
1781 case tok::kw_requires: {
1782 if (IsCpp) {
1783 bool ParsedClause = parseRequires(SeenEqual);
1784 if (ParsedClause)
1785 return;
1786 } else {
1787 nextToken();
1788 }
1789 break;
1790 }
1791 case tok::kw_enum:
1792 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1793 // "template <..., enum ...>".
1794 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow, Ks: tok::comma)) {
1795 nextToken();
1796 break;
1797 }
1798
1799 // parseEnum falls through and does not yet add an unwrapped line as an
1800 // enum definition can start a structural element.
1801 if (!parseEnum())
1802 break;
1803 // This only applies to C++ and Verilog.
1804 if (!IsCpp && !Style.isVerilog()) {
1805 addUnwrappedLine();
1806 return;
1807 }
1808 break;
1809 case tok::kw_typedef:
1810 nextToken();
1811 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1812 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1813 Ks: Keywords.kw_CF_CLOSED_ENUM,
1814 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1815 parseEnum();
1816 }
1817 break;
1818 case tok::kw_class:
1819 if (Style.isVerilog()) {
1820 parseBlock();
1821 addUnwrappedLine();
1822 return;
1823 }
1824 if (Style.isTableGen()) {
1825 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1826 // This is same as def and so on.
1827 nextToken();
1828 break;
1829 }
1830 [[fallthrough]];
1831 case tok::kw_struct:
1832 case tok::kw_union:
1833 if (parseStructLike())
1834 return;
1835 break;
1836 case tok::kw_decltype:
1837 nextToken();
1838 if (FormatTok->is(Kind: tok::l_paren)) {
1839 parseParens();
1840 if (FormatTok->Previous &&
1841 FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1842 Tokens: tok::l_paren)) {
1843 Line->SeenDecltypeAuto = true;
1844 }
1845 }
1846 break;
1847 case tok::period:
1848 nextToken();
1849 // In Java, classes have an implicit static member "class".
1850 if (Style.isJava() && FormatTok && FormatTok->is(Kind: tok::kw_class))
1851 nextToken();
1852 if (Style.isJavaScript() && FormatTok &&
1853 FormatTok->Tok.getIdentifierInfo()) {
1854 // JavaScript only has pseudo keywords, all keywords are allowed to
1855 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1856 nextToken();
1857 }
1858 break;
1859 case tok::semi:
1860 nextToken();
1861 addUnwrappedLine();
1862 return;
1863 case tok::r_brace:
1864 addUnwrappedLine();
1865 return;
1866 case tok::l_paren: {
1867 parseParens();
1868 // Break the unwrapped line if a K&R C function definition has a parameter
1869 // declaration.
1870 if (OpeningBrace || !IsCpp || !Previous || eof())
1871 break;
1872 if (isC78ParameterDecl(Tok: FormatTok,
1873 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1874 FuncName: Previous)) {
1875 addUnwrappedLine();
1876 return;
1877 }
1878 break;
1879 }
1880 case tok::kw_operator:
1881 nextToken();
1882 if (FormatTok->isBinaryOperator())
1883 nextToken();
1884 break;
1885 case tok::caret: {
1886 const auto *Prev = FormatTok->getPreviousNonComment();
1887 nextToken();
1888 if (Prev && Prev->is(Kind: tok::identifier))
1889 break;
1890 // Block return type.
1891 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1892 nextToken();
1893 // Return types: pointers are ok too.
1894 while (FormatTok->is(Kind: tok::star))
1895 nextToken();
1896 }
1897 // Block argument list.
1898 if (FormatTok->is(Kind: tok::l_paren))
1899 parseParens();
1900 // Block body.
1901 if (FormatTok->is(Kind: tok::l_brace))
1902 parseChildBlock();
1903 break;
1904 }
1905 case tok::l_brace:
1906 if (InRequiresExpression)
1907 FormatTok->setFinalizedType(TT_BracedListLBrace);
1908 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1909 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1910 // A block outside of parentheses must be the last part of a
1911 // structural element.
1912 // FIXME: Figure out cases where this is not true, and add projections
1913 // for them (the one we know is missing are lambdas).
1914 if (Style.isJava() &&
1915 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
1916 // If necessary, we could set the type to something different than
1917 // TT_FunctionLBrace.
1918 if (Style.BraceWrapping.AfterControlStatement ==
1919 FormatStyle::BWACS_Always) {
1920 addUnwrappedLine();
1921 }
1922 } else if (Style.BraceWrapping.AfterFunction) {
1923 addUnwrappedLine();
1924 }
1925 if (!Previous || Previous->isNot(Kind: TT_TypeDeclarationParen))
1926 FormatTok->setFinalizedType(TT_FunctionLBrace);
1927 parseBlock();
1928 IsDecltypeAutoFunction = false;
1929 addUnwrappedLine();
1930 return;
1931 }
1932 // Otherwise this was a braced init list, and the structural
1933 // element continues.
1934 break;
1935 case tok::kw_try:
1936 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1937 // field/method declaration.
1938 nextToken();
1939 break;
1940 }
1941 // We arrive here when parsing function-try blocks.
1942 if (Style.BraceWrapping.AfterFunction)
1943 addUnwrappedLine();
1944 parseTryCatch();
1945 return;
1946 case tok::identifier: {
1947 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
1948 Line->MustBeDeclaration) {
1949 addUnwrappedLine();
1950 parseCSharpGenericTypeConstraint();
1951 break;
1952 }
1953 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
1954 addUnwrappedLine();
1955 return;
1956 }
1957
1958 // Function declarations (as opposed to function expressions) are parsed
1959 // on their own unwrapped line by continuing this loop. Function
1960 // expressions (functions that are not on their own line) must not create
1961 // a new unwrapped line, so they are special cased below.
1962 size_t TokenCount = Line->Tokens.size();
1963 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
1964 (TokenCount > 1 ||
1965 (TokenCount == 1 &&
1966 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
1967 tryToParseJSFunction();
1968 break;
1969 }
1970 if ((Style.isJavaScript() || Style.isJava()) &&
1971 FormatTok->is(II: Keywords.kw_interface)) {
1972 if (Style.isJavaScript()) {
1973 // In JavaScript/TypeScript, "interface" can be used as a standalone
1974 // identifier, e.g. in `var interface = 1;`. If "interface" is
1975 // followed by another identifier, it is very like to be an actual
1976 // interface declaration.
1977 unsigned StoredPosition = Tokens->getPosition();
1978 FormatToken *Next = Tokens->getNextToken();
1979 FormatTok = Tokens->setPosition(StoredPosition);
1980 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
1981 nextToken();
1982 break;
1983 }
1984 }
1985 parseRecord();
1986 addUnwrappedLine();
1987 return;
1988 }
1989
1990 if (Style.isVerilog()) {
1991 if (FormatTok->is(II: Keywords.kw_table)) {
1992 parseVerilogTable();
1993 return;
1994 }
1995 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
1996 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
1997 parseBlock();
1998 addUnwrappedLine();
1999 return;
2000 }
2001 }
2002
2003 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
2004 if (parseStructLike())
2005 return;
2006 break;
2007 }
2008
2009 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
2010 parseStatementMacro();
2011 return;
2012 }
2013
2014 // See if the following token should start a new unwrapped line.
2015 StringRef Text = FormatTok->TokenText;
2016
2017 FormatToken *PreviousToken = FormatTok;
2018 nextToken();
2019
2020 // JS doesn't have macros, and within classes colons indicate fields, not
2021 // labels.
2022 if (Style.isJavaScript())
2023 break;
2024
2025 auto OneTokenSoFar = [&]() {
2026 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2027 while (I != E && I->Tok->is(Kind: tok::comment))
2028 ++I;
2029 if (Style.isVerilog())
2030 while (I != E && I->Tok->is(Kind: tok::hash))
2031 ++I;
2032 return I != E && (++I == E);
2033 };
2034 if (OneTokenSoFar()) {
2035 // Recognize function-like macro usages without trailing semicolon as
2036 // well as free-standing macros like Q_OBJECT.
2037 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2038 if (FunctionLike)
2039 parseParens();
2040
2041 bool FollowedByNewline =
2042 CommentsBeforeNextToken.empty()
2043 ? FormatTok->NewlinesBefore > 0
2044 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2045
2046 if (FollowedByNewline &&
2047 (Text.size() >= 5 ||
2048 (FunctionLike && FormatTok->isNot(Kind: tok::l_paren))) &&
2049 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2050 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2051 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2052 addUnwrappedLine();
2053 return;
2054 }
2055 }
2056 break;
2057 }
2058 case tok::equal:
2059 if ((Style.isJavaScript() || Style.isCSharp()) &&
2060 FormatTok->is(TT: TT_FatArrow)) {
2061 tryToParseChildBlock();
2062 break;
2063 }
2064
2065 SeenEqual = true;
2066 nextToken();
2067 if (FormatTok->is(Kind: tok::l_brace)) {
2068 // Block kind should probably be set to BK_BracedInit for any language.
2069 // C# needs this change to ensure that array initialisers and object
2070 // initialisers are indented the same way.
2071 if (Style.isCSharp())
2072 FormatTok->setBlockKind(BK_BracedInit);
2073 // TableGen's defset statement has syntax of the form,
2074 // `defset <type> <name> = { <statement>... }`
2075 if (Style.isTableGen() &&
2076 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2077 FormatTok->setFinalizedType(TT_FunctionLBrace);
2078 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2079 /*MunchSemi=*/false);
2080 addUnwrappedLine();
2081 break;
2082 }
2083 nextToken();
2084 parseBracedList();
2085 } else if (Style.Language == FormatStyle::LK_Proto &&
2086 FormatTok->is(Kind: tok::less)) {
2087 nextToken();
2088 parseBracedList(/*IsAngleBracket=*/true);
2089 }
2090 break;
2091 case tok::l_square:
2092 parseSquare();
2093 break;
2094 case tok::kw_new:
2095 if (Style.isCSharp() &&
2096 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2097 (Previous && Previous->isAccessSpecifierKeyword()))) {
2098 nextToken();
2099 } else {
2100 parseNew();
2101 }
2102 break;
2103 case tok::kw_switch:
2104 if (Style.isJava())
2105 parseSwitch(/*IsExpr=*/true);
2106 else
2107 nextToken();
2108 break;
2109 case tok::kw_case:
2110 // Proto: there are no switch/case statements.
2111 if (Style.Language == FormatStyle::LK_Proto) {
2112 nextToken();
2113 return;
2114 }
2115 // In Verilog switch is called case.
2116 if (Style.isVerilog()) {
2117 parseBlock();
2118 addUnwrappedLine();
2119 return;
2120 }
2121 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2122 // 'case: string' field declaration.
2123 nextToken();
2124 break;
2125 }
2126 parseCaseLabel();
2127 break;
2128 case tok::kw_default:
2129 nextToken();
2130 if (Style.isVerilog()) {
2131 if (FormatTok->is(Kind: tok::colon)) {
2132 // The label will be handled in the next iteration.
2133 break;
2134 }
2135 if (FormatTok->is(II: Keywords.kw_clocking)) {
2136 // A default clocking block.
2137 parseBlock();
2138 addUnwrappedLine();
2139 return;
2140 }
2141 parseVerilogCaseLabel();
2142 return;
2143 }
2144 break;
2145 case tok::colon:
2146 nextToken();
2147 if (Style.isVerilog()) {
2148 parseVerilogCaseLabel();
2149 return;
2150 }
2151 break;
2152 case tok::greater:
2153 nextToken();
2154 if (FormatTok->is(Kind: tok::l_brace))
2155 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2156 break;
2157 default:
2158 nextToken();
2159 break;
2160 }
2161 }
2162}
2163
2164bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2165 assert(FormatTok->is(tok::l_brace));
2166 if (!Style.isCSharp())
2167 return false;
2168 // See if it's a property accessor.
2169 if (!FormatTok->Previous || FormatTok->Previous->isNot(Kind: tok::identifier))
2170 return false;
2171
2172 // See if we are inside a property accessor.
2173 //
2174 // Record the current tokenPosition so that we can advance and
2175 // reset the current token. `Next` is not set yet so we need
2176 // another way to advance along the token stream.
2177 unsigned int StoredPosition = Tokens->getPosition();
2178 FormatToken *Tok = Tokens->getNextToken();
2179
2180 // A trivial property accessor is of the form:
2181 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2182 // Track these as they do not require line breaks to be introduced.
2183 bool HasSpecialAccessor = false;
2184 bool IsTrivialPropertyAccessor = true;
2185 bool HasAttribute = false;
2186 while (!eof()) {
2187 if (const bool IsAccessorKeyword =
2188 Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set);
2189 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2190 Tok->isOneOf(K1: tok::l_square, K2: tok::semi, Ks: Keywords.kw_internal)) {
2191 if (IsAccessorKeyword)
2192 HasSpecialAccessor = true;
2193 else if (Tok->is(Kind: tok::l_square))
2194 HasAttribute = true;
2195 Tok = Tokens->getNextToken();
2196 continue;
2197 }
2198 if (Tok->isNot(Kind: tok::r_brace))
2199 IsTrivialPropertyAccessor = false;
2200 break;
2201 }
2202
2203 if (!HasSpecialAccessor || HasAttribute) {
2204 Tokens->setPosition(StoredPosition);
2205 return false;
2206 }
2207
2208 // Try to parse the property accessor:
2209 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2210 Tokens->setPosition(StoredPosition);
2211 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2212 addUnwrappedLine();
2213 nextToken();
2214 do {
2215 switch (FormatTok->Tok.getKind()) {
2216 case tok::r_brace:
2217 nextToken();
2218 if (FormatTok->is(Kind: tok::equal)) {
2219 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2220 nextToken();
2221 nextToken();
2222 }
2223 addUnwrappedLine();
2224 return true;
2225 case tok::l_brace:
2226 ++Line->Level;
2227 parseBlock(/*MustBeDeclaration=*/true);
2228 addUnwrappedLine();
2229 --Line->Level;
2230 break;
2231 case tok::equal:
2232 if (FormatTok->is(TT: TT_FatArrow)) {
2233 ++Line->Level;
2234 do {
2235 nextToken();
2236 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2237 nextToken();
2238 addUnwrappedLine();
2239 --Line->Level;
2240 break;
2241 }
2242 nextToken();
2243 break;
2244 default:
2245 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2246 Ks: Keywords.kw_set) &&
2247 !IsTrivialPropertyAccessor) {
2248 // Non-trivial get/set needs to be on its own line.
2249 addUnwrappedLine();
2250 }
2251 nextToken();
2252 }
2253 } while (!eof());
2254
2255 // Unreachable for well-formed code (paired '{' and '}').
2256 return true;
2257}
2258
2259bool UnwrappedLineParser::tryToParseLambda() {
2260 assert(FormatTok->is(tok::l_square));
2261 if (!IsCpp) {
2262 nextToken();
2263 return false;
2264 }
2265 FormatToken &LSquare = *FormatTok;
2266 if (!tryToParseLambdaIntroducer())
2267 return false;
2268
2269 FormatToken *Arrow = nullptr;
2270 bool InTemplateParameterList = false;
2271
2272 while (FormatTok->isNot(Kind: tok::l_brace)) {
2273 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2274 nextToken();
2275 continue;
2276 }
2277 switch (FormatTok->Tok.getKind()) {
2278 case tok::l_brace:
2279 break;
2280 case tok::l_paren:
2281 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2282 break;
2283 case tok::l_square:
2284 parseSquare();
2285 break;
2286 case tok::less:
2287 assert(FormatTok->Previous);
2288 if (FormatTok->Previous->is(Kind: tok::r_square))
2289 InTemplateParameterList = true;
2290 nextToken();
2291 break;
2292 case tok::kw_auto:
2293 case tok::kw_class:
2294 case tok::kw_struct:
2295 case tok::kw_union:
2296 case tok::kw_template:
2297 case tok::kw_typename:
2298 case tok::amp:
2299 case tok::star:
2300 case tok::kw_const:
2301 case tok::kw_constexpr:
2302 case tok::kw_consteval:
2303 case tok::comma:
2304 case tok::greater:
2305 case tok::identifier:
2306 case tok::numeric_constant:
2307 case tok::coloncolon:
2308 case tok::kw_mutable:
2309 case tok::kw_noexcept:
2310 case tok::kw_static:
2311 nextToken();
2312 break;
2313 // Specialization of a template with an integer parameter can contain
2314 // arithmetic, logical, comparison and ternary operators.
2315 //
2316 // FIXME: This also accepts sequences of operators that are not in the scope
2317 // of a template argument list.
2318 //
2319 // In a C++ lambda a template type can only occur after an arrow. We use
2320 // this as an heuristic to distinguish between Objective-C expressions
2321 // followed by an `a->b` expression, such as:
2322 // ([obj func:arg] + a->b)
2323 // Otherwise the code below would parse as a lambda.
2324 case tok::plus:
2325 case tok::minus:
2326 case tok::exclaim:
2327 case tok::tilde:
2328 case tok::slash:
2329 case tok::percent:
2330 case tok::lessless:
2331 case tok::pipe:
2332 case tok::pipepipe:
2333 case tok::ampamp:
2334 case tok::caret:
2335 case tok::equalequal:
2336 case tok::exclaimequal:
2337 case tok::greaterequal:
2338 case tok::lessequal:
2339 case tok::question:
2340 case tok::colon:
2341 case tok::ellipsis:
2342 case tok::kw_true:
2343 case tok::kw_false:
2344 if (Arrow || InTemplateParameterList) {
2345 nextToken();
2346 break;
2347 }
2348 return true;
2349 case tok::arrow:
2350 Arrow = FormatTok;
2351 nextToken();
2352 break;
2353 case tok::kw_requires: {
2354 auto *RequiresToken = FormatTok;
2355 nextToken();
2356 parseRequiresClause(RequiresToken);
2357 break;
2358 }
2359 case tok::equal:
2360 if (!InTemplateParameterList)
2361 return true;
2362 nextToken();
2363 break;
2364 default:
2365 return true;
2366 }
2367 }
2368
2369 FormatTok->setFinalizedType(TT_LambdaLBrace);
2370 LSquare.setFinalizedType(TT_LambdaLSquare);
2371
2372 if (Arrow)
2373 Arrow->setFinalizedType(TT_LambdaArrow);
2374
2375 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2376 parseChildBlock();
2377 assert(!NestedLambdas.empty());
2378 NestedLambdas.pop_back();
2379
2380 return true;
2381}
2382
2383bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2384 const FormatToken *Previous = FormatTok->Previous;
2385 const FormatToken *LeftSquare = FormatTok;
2386 nextToken();
2387 if (Previous) {
2388 const auto *PrevPrev = Previous->getPreviousNonComment();
2389 if (Previous->is(Kind: tok::star) && PrevPrev && PrevPrev->isTypeName(LangOpts))
2390 return false;
2391 if (Previous->closesScope()) {
2392 // Not a potential C-style cast.
2393 if (Previous->isNot(Kind: tok::r_paren))
2394 return false;
2395 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2396 // and `int (*)()`.
2397 if (!PrevPrev || !PrevPrev->isOneOf(K1: tok::greater, K2: tok::r_paren))
2398 return false;
2399 }
2400 if (Previous && Previous->Tok.getIdentifierInfo() &&
2401 !Previous->isOneOf(K1: tok::kw_return, K2: tok::kw_co_await, Ks: tok::kw_co_yield,
2402 Ks: tok::kw_co_return)) {
2403 return false;
2404 }
2405 }
2406 if (LeftSquare->isCppStructuredBinding(IsCpp))
2407 return false;
2408 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2409 return false;
2410 if (FormatTok->is(Kind: tok::r_square)) {
2411 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2412 if (Next->is(Kind: tok::greater))
2413 return false;
2414 }
2415 parseSquare(/*LambdaIntroducer=*/true);
2416 return true;
2417}
2418
2419void UnwrappedLineParser::tryToParseJSFunction() {
2420 assert(FormatTok->is(Keywords.kw_function));
2421 if (FormatTok->is(II: Keywords.kw_async))
2422 nextToken();
2423 // Consume "function".
2424 nextToken();
2425
2426 // Consume * (generator function). Treat it like C++'s overloaded operators.
2427 if (FormatTok->is(Kind: tok::star)) {
2428 FormatTok->setFinalizedType(TT_OverloadedOperator);
2429 nextToken();
2430 }
2431
2432 // Consume function name.
2433 if (FormatTok->is(Kind: tok::identifier))
2434 nextToken();
2435
2436 if (FormatTok->isNot(Kind: tok::l_paren))
2437 return;
2438
2439 // Parse formal parameter list.
2440 parseParens();
2441
2442 if (FormatTok->is(Kind: tok::colon)) {
2443 // Parse a type definition.
2444 nextToken();
2445
2446 // Eat the type declaration. For braced inline object types, balance braces,
2447 // otherwise just parse until finding an l_brace for the function body.
2448 if (FormatTok->is(Kind: tok::l_brace))
2449 tryToParseBracedList();
2450 else
2451 while (!FormatTok->isOneOf(K1: tok::l_brace, K2: tok::semi) && !eof())
2452 nextToken();
2453 }
2454
2455 if (FormatTok->is(Kind: tok::semi))
2456 return;
2457
2458 parseChildBlock();
2459}
2460
2461bool UnwrappedLineParser::tryToParseBracedList() {
2462 if (FormatTok->is(BBK: BK_Unknown))
2463 calculateBraceTypes();
2464 assert(FormatTok->isNot(BK_Unknown));
2465 if (FormatTok->is(BBK: BK_Block))
2466 return false;
2467 nextToken();
2468 parseBracedList();
2469 return true;
2470}
2471
2472bool UnwrappedLineParser::tryToParseChildBlock() {
2473 assert(Style.isJavaScript() || Style.isCSharp());
2474 assert(FormatTok->is(TT_FatArrow));
2475 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2476 // They always start an expression or a child block if followed by a curly
2477 // brace.
2478 nextToken();
2479 if (FormatTok->isNot(Kind: tok::l_brace))
2480 return false;
2481 parseChildBlock();
2482 return true;
2483}
2484
2485bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2486 assert(!IsAngleBracket || !IsEnum);
2487 bool HasError = false;
2488
2489 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2490 // replace this by using parseAssignmentExpression() inside.
2491 do {
2492 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2493 tryToParseChildBlock()) {
2494 continue;
2495 }
2496 if (Style.isJavaScript()) {
2497 if (FormatTok->is(II: Keywords.kw_function)) {
2498 tryToParseJSFunction();
2499 continue;
2500 }
2501 if (FormatTok->is(Kind: tok::l_brace)) {
2502 // Could be a method inside of a braced list `{a() { return 1; }}`.
2503 if (tryToParseBracedList())
2504 continue;
2505 parseChildBlock();
2506 }
2507 }
2508 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2509 if (IsEnum) {
2510 FormatTok->setBlockKind(BK_Block);
2511 if (!Style.AllowShortEnumsOnASingleLine)
2512 addUnwrappedLine();
2513 }
2514 nextToken();
2515 return !HasError;
2516 }
2517 switch (FormatTok->Tok.getKind()) {
2518 case tok::l_square:
2519 if (Style.isCSharp())
2520 parseSquare();
2521 else
2522 tryToParseLambda();
2523 break;
2524 case tok::l_paren:
2525 parseParens();
2526 // JavaScript can just have free standing methods and getters/setters in
2527 // object literals. Detect them by a "{" following ")".
2528 if (Style.isJavaScript()) {
2529 if (FormatTok->is(Kind: tok::l_brace))
2530 parseChildBlock();
2531 break;
2532 }
2533 break;
2534 case tok::l_brace:
2535 // Assume there are no blocks inside a braced init list apart
2536 // from the ones we explicitly parse out (like lambdas).
2537 FormatTok->setBlockKind(BK_BracedInit);
2538 if (!IsAngleBracket) {
2539 auto *Prev = FormatTok->Previous;
2540 if (Prev && Prev->is(Kind: tok::greater))
2541 Prev->setFinalizedType(TT_TemplateCloser);
2542 }
2543 nextToken();
2544 parseBracedList();
2545 break;
2546 case tok::less:
2547 nextToken();
2548 if (IsAngleBracket)
2549 parseBracedList(/*IsAngleBracket=*/true);
2550 break;
2551 case tok::semi:
2552 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2553 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2554 // used for error recovery if we have otherwise determined that this is
2555 // a braced list.
2556 if (Style.isJavaScript()) {
2557 nextToken();
2558 break;
2559 }
2560 HasError = true;
2561 if (!IsEnum)
2562 return false;
2563 nextToken();
2564 break;
2565 case tok::comma:
2566 nextToken();
2567 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2568 addUnwrappedLine();
2569 break;
2570 default:
2571 nextToken();
2572 break;
2573 }
2574 } while (!eof());
2575 return false;
2576}
2577
2578/// Parses a pair of parentheses (and everything between them).
2579/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2580/// double ampersands. This applies for all nested scopes as well.
2581///
2582/// Returns whether there is a `=` token between the parentheses.
2583bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType,
2584 bool InMacroCall) {
2585 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2586 auto *LParen = FormatTok;
2587 auto *Prev = FormatTok->Previous;
2588 bool SeenComma = false;
2589 bool SeenEqual = false;
2590 bool MightBeFoldExpr = false;
2591 nextToken();
2592 const bool MightBeStmtExpr = FormatTok->is(Kind: tok::l_brace);
2593 if (!InMacroCall && Prev && Prev->is(TT: TT_FunctionLikeMacro))
2594 InMacroCall = true;
2595 do {
2596 switch (FormatTok->Tok.getKind()) {
2597 case tok::l_paren:
2598 if (parseParens(AmpAmpTokenType, InMacroCall))
2599 SeenEqual = true;
2600 if (Style.isJava() && FormatTok->is(Kind: tok::l_brace))
2601 parseChildBlock();
2602 break;
2603 case tok::r_paren: {
2604 auto *RParen = FormatTok;
2605 nextToken();
2606 if (Prev) {
2607 auto OptionalParens = [&] {
2608 if (MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2609 Line->InMacroBody ||
2610 Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2611 RParen->getPreviousNonComment() == LParen) {
2612 return false;
2613 }
2614 const bool DoubleParens =
2615 Prev->is(Kind: tok::l_paren) && FormatTok->is(Kind: tok::r_paren);
2616 if (DoubleParens) {
2617 const auto *PrevPrev = Prev->getPreviousNonComment();
2618 const bool Excluded =
2619 PrevPrev &&
2620 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2621 (SeenEqual &&
2622 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2623 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2624 if (!Excluded)
2625 return true;
2626 } else {
2627 const bool CommaSeparated =
2628 Prev->isOneOf(K1: tok::l_paren, K2: tok::comma) &&
2629 FormatTok->isOneOf(K1: tok::comma, K2: tok::r_paren);
2630 if (CommaSeparated &&
2631 // LParen is not preceded by ellipsis, comma.
2632 !Prev->endsSequence(K1: tok::comma, Tokens: tok::ellipsis) &&
2633 // RParen is not followed by comma, ellipsis.
2634 !(FormatTok->is(Kind: tok::comma) &&
2635 Tokens->peekNextToken()->is(Kind: tok::ellipsis))) {
2636 return true;
2637 }
2638 const bool ReturnParens =
2639 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2640 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2641 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2642 Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) &&
2643 FormatTok->is(Kind: tok::semi);
2644 if (ReturnParens)
2645 return true;
2646 }
2647 return false;
2648 };
2649 if (Prev->is(TT: TT_TypenameMacro)) {
2650 LParen->setFinalizedType(TT_TypeDeclarationParen);
2651 RParen->setFinalizedType(TT_TypeDeclarationParen);
2652 } else if (Prev->is(Kind: tok::greater) && RParen->Previous == LParen) {
2653 Prev->setFinalizedType(TT_TemplateCloser);
2654 } else if (OptionalParens()) {
2655 LParen->Optional = true;
2656 RParen->Optional = true;
2657 }
2658 }
2659 return SeenEqual;
2660 }
2661 case tok::r_brace:
2662 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2663 return SeenEqual;
2664 case tok::l_square:
2665 tryToParseLambda();
2666 break;
2667 case tok::l_brace:
2668 if (!tryToParseBracedList())
2669 parseChildBlock();
2670 break;
2671 case tok::at:
2672 nextToken();
2673 if (FormatTok->is(Kind: tok::l_brace)) {
2674 nextToken();
2675 parseBracedList();
2676 }
2677 break;
2678 case tok::comma:
2679 SeenComma = true;
2680 nextToken();
2681 break;
2682 case tok::ellipsis:
2683 MightBeFoldExpr = true;
2684 nextToken();
2685 break;
2686 case tok::equal:
2687 SeenEqual = true;
2688 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2689 tryToParseChildBlock();
2690 else
2691 nextToken();
2692 break;
2693 case tok::kw_class:
2694 if (Style.isJavaScript())
2695 parseRecord(/*ParseAsExpr=*/true);
2696 else
2697 nextToken();
2698 break;
2699 case tok::identifier:
2700 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2701 tryToParseJSFunction();
2702 else
2703 nextToken();
2704 break;
2705 case tok::kw_switch:
2706 if (Style.isJava())
2707 parseSwitch(/*IsExpr=*/true);
2708 else
2709 nextToken();
2710 break;
2711 case tok::kw_requires: {
2712 auto RequiresToken = FormatTok;
2713 nextToken();
2714 parseRequiresExpression(RequiresToken);
2715 break;
2716 }
2717 case tok::ampamp:
2718 if (AmpAmpTokenType != TT_Unknown)
2719 FormatTok->setFinalizedType(AmpAmpTokenType);
2720 [[fallthrough]];
2721 default:
2722 nextToken();
2723 break;
2724 }
2725 } while (!eof());
2726 return SeenEqual;
2727}
2728
2729void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2730 if (!LambdaIntroducer) {
2731 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2732 if (tryToParseLambda())
2733 return;
2734 }
2735 do {
2736 switch (FormatTok->Tok.getKind()) {
2737 case tok::l_paren:
2738 parseParens();
2739 break;
2740 case tok::r_square:
2741 nextToken();
2742 return;
2743 case tok::r_brace:
2744 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2745 return;
2746 case tok::l_square:
2747 parseSquare();
2748 break;
2749 case tok::l_brace: {
2750 if (!tryToParseBracedList())
2751 parseChildBlock();
2752 break;
2753 }
2754 case tok::at:
2755 case tok::colon:
2756 nextToken();
2757 if (FormatTok->is(Kind: tok::l_brace)) {
2758 nextToken();
2759 parseBracedList();
2760 }
2761 break;
2762 default:
2763 nextToken();
2764 break;
2765 }
2766 } while (!eof());
2767}
2768
2769void UnwrappedLineParser::keepAncestorBraces() {
2770 if (!Style.RemoveBracesLLVM)
2771 return;
2772
2773 const int MaxNestingLevels = 2;
2774 const int Size = NestedTooDeep.size();
2775 if (Size >= MaxNestingLevels)
2776 NestedTooDeep[Size - MaxNestingLevels] = true;
2777 NestedTooDeep.push_back(Elt: false);
2778}
2779
2780static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2781 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2782 if (Token.Tok->isNot(Kind: tok::comment))
2783 return Token.Tok;
2784
2785 return nullptr;
2786}
2787
2788void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2789 FormatToken *Tok = nullptr;
2790
2791 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2792 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2793 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2794 ? getLastNonComment(Line: *Line)
2795 : Line->Tokens.back().Tok;
2796 assert(Tok);
2797 if (Tok->BraceCount < 0) {
2798 assert(Tok->BraceCount == -1);
2799 Tok = nullptr;
2800 } else {
2801 Tok->BraceCount = -1;
2802 }
2803 }
2804
2805 addUnwrappedLine();
2806 ++Line->Level;
2807 ++Line->UnbracedBodyLevel;
2808 parseStructuralElement();
2809 --Line->UnbracedBodyLevel;
2810
2811 if (Tok) {
2812 assert(!Line->InPPDirective);
2813 Tok = nullptr;
2814 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2815 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2816 Tok = L.Tokens.back().Tok;
2817 break;
2818 }
2819 }
2820 assert(Tok);
2821 ++Tok->BraceCount;
2822 }
2823
2824 if (CheckEOF && eof())
2825 addUnwrappedLine();
2826
2827 --Line->Level;
2828}
2829
2830static void markOptionalBraces(FormatToken *LeftBrace) {
2831 if (!LeftBrace)
2832 return;
2833
2834 assert(LeftBrace->is(tok::l_brace));
2835
2836 FormatToken *RightBrace = LeftBrace->MatchingParen;
2837 if (!RightBrace) {
2838 assert(!LeftBrace->Optional);
2839 return;
2840 }
2841
2842 assert(RightBrace->is(tok::r_brace));
2843 assert(RightBrace->MatchingParen == LeftBrace);
2844 assert(LeftBrace->Optional == RightBrace->Optional);
2845
2846 LeftBrace->Optional = true;
2847 RightBrace->Optional = true;
2848}
2849
2850void UnwrappedLineParser::handleAttributes() {
2851 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2852 if (FormatTok->isAttribute())
2853 nextToken();
2854 else if (FormatTok->is(Kind: tok::l_square))
2855 handleCppAttributes();
2856}
2857
2858bool UnwrappedLineParser::handleCppAttributes() {
2859 // Handle [[likely]] / [[unlikely]] attributes.
2860 assert(FormatTok->is(tok::l_square));
2861 if (!tryToParseSimpleAttribute())
2862 return false;
2863 parseSquare();
2864 return true;
2865}
2866
2867/// Returns whether \c Tok begins a block.
2868bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2869 // FIXME: rename the function or make
2870 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2871 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2872 : Tok.is(Kind: tok::l_brace);
2873}
2874
2875FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2876 bool KeepBraces,
2877 bool IsVerilogAssert) {
2878 assert((FormatTok->is(tok::kw_if) ||
2879 (Style.isVerilog() &&
2880 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2881 Keywords.kw_assume, Keywords.kw_cover))) &&
2882 "'if' expected");
2883 nextToken();
2884
2885 if (IsVerilogAssert) {
2886 // Handle `assert #0` and `assert final`.
2887 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2888 nextToken();
2889 if (FormatTok->is(Kind: tok::numeric_constant))
2890 nextToken();
2891 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2892 Ks: Keywords.kw_sequence)) {
2893 nextToken();
2894 }
2895 }
2896
2897 // TableGen's if statement has the form of `if <cond> then { ... }`.
2898 if (Style.isTableGen()) {
2899 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2900 // Simply skip until then. This range only contains a value.
2901 nextToken();
2902 }
2903 }
2904
2905 // Handle `if !consteval`.
2906 if (FormatTok->is(Kind: tok::exclaim))
2907 nextToken();
2908
2909 bool KeepIfBraces = true;
2910 if (FormatTok->is(Kind: tok::kw_consteval)) {
2911 nextToken();
2912 } else {
2913 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2914 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
2915 nextToken();
2916 if (FormatTok->is(Kind: tok::l_paren)) {
2917 FormatTok->setFinalizedType(TT_ConditionLParen);
2918 parseParens();
2919 }
2920 }
2921 handleAttributes();
2922 // The then action is optional in Verilog assert statements.
2923 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
2924 nextToken();
2925 addUnwrappedLine();
2926 return nullptr;
2927 }
2928
2929 bool NeedsUnwrappedLine = false;
2930 keepAncestorBraces();
2931
2932 FormatToken *IfLeftBrace = nullptr;
2933 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2934
2935 if (isBlockBegin(Tok: *FormatTok)) {
2936 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2937 IfLeftBrace = FormatTok;
2938 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2939 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2940 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
2941 setPreviousRBraceType(TT_ControlStatementRBrace);
2942 if (Style.BraceWrapping.BeforeElse)
2943 addUnwrappedLine();
2944 else
2945 NeedsUnwrappedLine = true;
2946 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
2947 addUnwrappedLine();
2948 } else {
2949 parseUnbracedBody();
2950 }
2951
2952 if (Style.RemoveBracesLLVM) {
2953 assert(!NestedTooDeep.empty());
2954 KeepIfBraces = KeepIfBraces ||
2955 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2956 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2957 IfBlockKind == IfStmtKind::IfElseIf;
2958 }
2959
2960 bool KeepElseBraces = KeepIfBraces;
2961 FormatToken *ElseLeftBrace = nullptr;
2962 IfStmtKind Kind = IfStmtKind::IfOnly;
2963
2964 if (FormatTok->is(Kind: tok::kw_else)) {
2965 if (Style.RemoveBracesLLVM) {
2966 NestedTooDeep.back() = false;
2967 Kind = IfStmtKind::IfElse;
2968 }
2969 nextToken();
2970 handleAttributes();
2971 if (isBlockBegin(Tok: *FormatTok)) {
2972 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
2973 FormatTok->setFinalizedType(TT_ElseLBrace);
2974 ElseLeftBrace = FormatTok;
2975 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2976 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2977 FormatToken *IfLBrace =
2978 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2979 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
2980 setPreviousRBraceType(TT_ElseRBrace);
2981 if (FormatTok->is(Kind: tok::kw_else)) {
2982 KeepElseBraces = KeepElseBraces ||
2983 ElseBlockKind == IfStmtKind::IfOnly ||
2984 ElseBlockKind == IfStmtKind::IfElseIf;
2985 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2986 KeepElseBraces = true;
2987 assert(ElseLeftBrace->MatchingParen);
2988 markOptionalBraces(LeftBrace: ElseLeftBrace);
2989 }
2990 addUnwrappedLine();
2991 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
2992 const FormatToken *Previous = Tokens->getPreviousToken();
2993 assert(Previous);
2994 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
2995 if (IsPrecededByComment) {
2996 addUnwrappedLine();
2997 ++Line->Level;
2998 }
2999 bool TooDeep = true;
3000 if (Style.RemoveBracesLLVM) {
3001 Kind = IfStmtKind::IfElseIf;
3002 TooDeep = NestedTooDeep.pop_back_val();
3003 }
3004 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
3005 if (Style.RemoveBracesLLVM)
3006 NestedTooDeep.push_back(Elt: TooDeep);
3007 if (IsPrecededByComment)
3008 --Line->Level;
3009 } else {
3010 parseUnbracedBody(/*CheckEOF=*/true);
3011 }
3012 } else {
3013 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3014 if (NeedsUnwrappedLine)
3015 addUnwrappedLine();
3016 }
3017
3018 if (!Style.RemoveBracesLLVM)
3019 return nullptr;
3020
3021 assert(!NestedTooDeep.empty());
3022 KeepElseBraces = KeepElseBraces ||
3023 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3024 NestedTooDeep.back();
3025
3026 NestedTooDeep.pop_back();
3027
3028 if (!KeepIfBraces && !KeepElseBraces) {
3029 markOptionalBraces(LeftBrace: IfLeftBrace);
3030 markOptionalBraces(LeftBrace: ElseLeftBrace);
3031 } else if (IfLeftBrace) {
3032 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3033 if (IfRightBrace) {
3034 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3035 assert(!IfLeftBrace->Optional);
3036 assert(!IfRightBrace->Optional);
3037 IfLeftBrace->MatchingParen = nullptr;
3038 IfRightBrace->MatchingParen = nullptr;
3039 }
3040 }
3041
3042 if (IfKind)
3043 *IfKind = Kind;
3044
3045 return IfLeftBrace;
3046}
3047
3048void UnwrappedLineParser::parseTryCatch() {
3049 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3050 nextToken();
3051 bool NeedsUnwrappedLine = false;
3052 bool HasCtorInitializer = false;
3053 if (FormatTok->is(Kind: tok::colon)) {
3054 auto *Colon = FormatTok;
3055 // We are in a function try block, what comes is an initializer list.
3056 nextToken();
3057 if (FormatTok->is(Kind: tok::identifier)) {
3058 HasCtorInitializer = true;
3059 Colon->setFinalizedType(TT_CtorInitializerColon);
3060 }
3061
3062 // In case identifiers were removed by clang-tidy, what might follow is
3063 // multiple commas in sequence - before the first identifier.
3064 while (FormatTok->is(Kind: tok::comma))
3065 nextToken();
3066
3067 while (FormatTok->is(Kind: tok::identifier)) {
3068 nextToken();
3069 if (FormatTok->is(Kind: tok::l_paren)) {
3070 parseParens();
3071 } else if (FormatTok->is(Kind: tok::l_brace)) {
3072 nextToken();
3073 parseBracedList();
3074 }
3075
3076 // In case identifiers were removed by clang-tidy, what might follow is
3077 // multiple commas in sequence - after the first identifier.
3078 while (FormatTok->is(Kind: tok::comma))
3079 nextToken();
3080 }
3081 }
3082 // Parse try with resource.
3083 if (Style.isJava() && FormatTok->is(Kind: tok::l_paren))
3084 parseParens();
3085
3086 keepAncestorBraces();
3087
3088 if (FormatTok->is(Kind: tok::l_brace)) {
3089 if (HasCtorInitializer)
3090 FormatTok->setFinalizedType(TT_FunctionLBrace);
3091 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3092 parseBlock();
3093 if (Style.BraceWrapping.BeforeCatch)
3094 addUnwrappedLine();
3095 else
3096 NeedsUnwrappedLine = true;
3097 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
3098 // The C++ standard requires a compound-statement after a try.
3099 // If there's none, we try to assume there's a structuralElement
3100 // and try to continue.
3101 addUnwrappedLine();
3102 ++Line->Level;
3103 parseStructuralElement();
3104 --Line->Level;
3105 }
3106 for (bool SeenCatch = false;;) {
3107 if (FormatTok->is(Kind: tok::at))
3108 nextToken();
3109 if (!(FormatTok->isOneOf(K1: tok::kw_catch, K2: Keywords.kw___except,
3110 Ks: tok::kw___finally, Ks: tok::objc_catch,
3111 Ks: tok::objc_finally) ||
3112 ((Style.isJava() || Style.isJavaScript()) &&
3113 FormatTok->is(II: Keywords.kw_finally)))) {
3114 break;
3115 }
3116 if (FormatTok->is(Kind: tok::kw_catch))
3117 SeenCatch = true;
3118 nextToken();
3119 while (FormatTok->isNot(Kind: tok::l_brace)) {
3120 if (FormatTok->is(Kind: tok::l_paren)) {
3121 parseParens();
3122 continue;
3123 }
3124 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace) || eof()) {
3125 if (Style.RemoveBracesLLVM)
3126 NestedTooDeep.pop_back();
3127 return;
3128 }
3129 nextToken();
3130 }
3131 if (SeenCatch) {
3132 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3133 SeenCatch = false;
3134 }
3135 NeedsUnwrappedLine = false;
3136 Line->MustBeDeclaration = false;
3137 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3138 parseBlock();
3139 if (Style.BraceWrapping.BeforeCatch)
3140 addUnwrappedLine();
3141 else
3142 NeedsUnwrappedLine = true;
3143 }
3144
3145 if (Style.RemoveBracesLLVM)
3146 NestedTooDeep.pop_back();
3147
3148 if (NeedsUnwrappedLine)
3149 addUnwrappedLine();
3150}
3151
3152void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3153 bool ManageWhitesmithsBraces =
3154 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3155
3156 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3157 // the whole block.
3158 if (ManageWhitesmithsBraces)
3159 ++Line->Level;
3160
3161 // Munch the semicolon after the block. This is more common than one would
3162 // think. Putting the semicolon into its own line is very ugly.
3163 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3164 /*KeepBraces=*/true, /*IfKind=*/nullptr, UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3165
3166 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3167
3168 if (ManageWhitesmithsBraces)
3169 --Line->Level;
3170}
3171
3172void UnwrappedLineParser::parseNamespace() {
3173 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3174 "'namespace' expected");
3175
3176 const FormatToken &InitialToken = *FormatTok;
3177 nextToken();
3178 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3179 parseParens();
3180 } else {
3181 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3182 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3183 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3184 if (FormatTok->is(Kind: tok::l_square))
3185 parseSquare();
3186 else if (FormatTok->is(Kind: tok::l_paren))
3187 parseParens();
3188 else
3189 nextToken();
3190 }
3191 }
3192 if (FormatTok->is(Kind: tok::l_brace)) {
3193 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3194
3195 if (ShouldBreakBeforeBrace(Style, InitialToken))
3196 addUnwrappedLine();
3197
3198 unsigned AddLevels =
3199 Style.NamespaceIndentation == FormatStyle::NI_All ||
3200 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3201 DeclarationScopeStack.size() > 1)
3202 ? 1u
3203 : 0u;
3204 parseNamespaceOrExportBlock(AddLevels);
3205 }
3206 // FIXME: Add error handling.
3207}
3208
3209void UnwrappedLineParser::parseCppExportBlock() {
3210 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3211}
3212
3213void UnwrappedLineParser::parseNew() {
3214 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3215 nextToken();
3216
3217 if (Style.isCSharp()) {
3218 do {
3219 // Handle constructor invocation, e.g. `new(field: value)`.
3220 if (FormatTok->is(Kind: tok::l_paren))
3221 parseParens();
3222
3223 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3224 if (FormatTok->is(Kind: tok::l_brace))
3225 parseBracedList();
3226
3227 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3228 return;
3229
3230 nextToken();
3231 } while (!eof());
3232 }
3233
3234 if (!Style.isJava())
3235 return;
3236
3237 // In Java, we can parse everything up to the parens, which aren't optional.
3238 do {
3239 // There should not be a ;, { or } before the new's open paren.
3240 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3241 return;
3242
3243 // Consume the parens.
3244 if (FormatTok->is(Kind: tok::l_paren)) {
3245 parseParens();
3246
3247 // If there is a class body of an anonymous class, consume that as child.
3248 if (FormatTok->is(Kind: tok::l_brace))
3249 parseChildBlock();
3250 return;
3251 }
3252 nextToken();
3253 } while (!eof());
3254}
3255
3256void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3257 keepAncestorBraces();
3258
3259 if (isBlockBegin(Tok: *FormatTok)) {
3260 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3261 FormatToken *LeftBrace = FormatTok;
3262 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3263 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3264 /*MunchSemi=*/true, KeepBraces);
3265 setPreviousRBraceType(TT_ControlStatementRBrace);
3266 if (!KeepBraces) {
3267 assert(!NestedTooDeep.empty());
3268 if (!NestedTooDeep.back())
3269 markOptionalBraces(LeftBrace);
3270 }
3271 if (WrapRightBrace)
3272 addUnwrappedLine();
3273 } else {
3274 parseUnbracedBody();
3275 }
3276
3277 if (!KeepBraces)
3278 NestedTooDeep.pop_back();
3279}
3280
3281void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3282 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3283 (Style.isVerilog() &&
3284 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3285 Keywords.kw_always_ff, Keywords.kw_always_latch,
3286 Keywords.kw_final, Keywords.kw_initial,
3287 Keywords.kw_foreach, Keywords.kw_forever,
3288 Keywords.kw_repeat))) &&
3289 "'for', 'while' or foreach macro expected");
3290 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3291 !FormatTok->isOneOf(K1: tok::kw_for, K2: tok::kw_while);
3292
3293 nextToken();
3294 // JS' for await ( ...
3295 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3296 nextToken();
3297 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3298 nextToken();
3299 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3300 // The type is only set for Verilog basically because we were afraid to
3301 // change the existing behavior for loops. See the discussion on D121756 for
3302 // details.
3303 if (Style.isVerilog())
3304 FormatTok->setFinalizedType(TT_ConditionLParen);
3305 parseParens();
3306 }
3307
3308 if (Style.isVerilog()) {
3309 // Event control.
3310 parseVerilogSensitivityList();
3311 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3312 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3313 nextToken();
3314 addUnwrappedLine();
3315 return;
3316 }
3317
3318 handleAttributes();
3319 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3320}
3321
3322void UnwrappedLineParser::parseDoWhile() {
3323 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3324 nextToken();
3325
3326 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3327
3328 // FIXME: Add error handling.
3329 if (FormatTok->isNot(Kind: tok::kw_while)) {
3330 addUnwrappedLine();
3331 return;
3332 }
3333
3334 FormatTok->setFinalizedType(TT_DoWhile);
3335
3336 // If in Whitesmiths mode, the line with the while() needs to be indented
3337 // to the same level as the block.
3338 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3339 ++Line->Level;
3340
3341 nextToken();
3342 parseStructuralElement();
3343}
3344
3345void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3346 nextToken();
3347 unsigned OldLineLevel = Line->Level;
3348
3349 if (LeftAlignLabel)
3350 Line->Level = 0;
3351 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3352 --Line->Level;
3353
3354 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3355 FormatTok->is(Kind: tok::l_brace)) {
3356
3357 CompoundStatementIndenter Indenter(this, Line->Level,
3358 Style.BraceWrapping.AfterCaseLabel,
3359 Style.BraceWrapping.IndentBraces);
3360 parseBlock();
3361 if (FormatTok->is(Kind: tok::kw_break)) {
3362 if (Style.BraceWrapping.AfterControlStatement ==
3363 FormatStyle::BWACS_Always) {
3364 addUnwrappedLine();
3365 if (!Style.IndentCaseBlocks &&
3366 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3367 ++Line->Level;
3368 }
3369 }
3370 parseStructuralElement();
3371 }
3372 addUnwrappedLine();
3373 } else {
3374 if (FormatTok->is(Kind: tok::semi))
3375 nextToken();
3376 addUnwrappedLine();
3377 }
3378 Line->Level = OldLineLevel;
3379 if (FormatTok->isNot(Kind: tok::l_brace)) {
3380 parseStructuralElement();
3381 addUnwrappedLine();
3382 }
3383}
3384
3385void UnwrappedLineParser::parseCaseLabel() {
3386 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3387 auto *Case = FormatTok;
3388
3389 // FIXME: fix handling of complex expressions here.
3390 do {
3391 nextToken();
3392 if (FormatTok->is(Kind: tok::colon)) {
3393 FormatTok->setFinalizedType(TT_CaseLabelColon);
3394 break;
3395 }
3396 if (Style.isJava() && FormatTok->is(Kind: tok::arrow)) {
3397 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3398 Case->setFinalizedType(TT_SwitchExpressionLabel);
3399 break;
3400 }
3401 } while (!eof());
3402 parseLabel();
3403}
3404
3405void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3406 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3407 nextToken();
3408 if (FormatTok->is(Kind: tok::l_paren))
3409 parseParens();
3410
3411 keepAncestorBraces();
3412
3413 if (FormatTok->is(Kind: tok::l_brace)) {
3414 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3415 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3416 : TT_ControlStatementLBrace);
3417 if (IsExpr)
3418 parseChildBlock();
3419 else
3420 parseBlock();
3421 setPreviousRBraceType(TT_ControlStatementRBrace);
3422 if (!IsExpr)
3423 addUnwrappedLine();
3424 } else {
3425 addUnwrappedLine();
3426 ++Line->Level;
3427 parseStructuralElement();
3428 --Line->Level;
3429 }
3430
3431 if (Style.RemoveBracesLLVM)
3432 NestedTooDeep.pop_back();
3433}
3434
3435void UnwrappedLineParser::parseAccessSpecifier() {
3436 nextToken();
3437 // Understand Qt's slots.
3438 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3439 nextToken();
3440 // Otherwise, we don't know what it is, and we'd better keep the next token.
3441 if (FormatTok->is(Kind: tok::colon))
3442 nextToken();
3443 addUnwrappedLine();
3444}
3445
3446/// Parses a requires, decides if it is a clause or an expression.
3447/// \pre The current token has to be the requires keyword.
3448/// \returns true if it parsed a clause.
3449bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3450 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3451 auto RequiresToken = FormatTok;
3452
3453 // We try to guess if it is a requires clause, or a requires expression. For
3454 // that we first consume the keyword and check the next token.
3455 nextToken();
3456
3457 switch (FormatTok->Tok.getKind()) {
3458 case tok::l_brace:
3459 // This can only be an expression, never a clause.
3460 parseRequiresExpression(RequiresToken);
3461 return false;
3462 case tok::l_paren:
3463 // Clauses and expression can start with a paren, it's unclear what we have.
3464 break;
3465 default:
3466 // All other tokens can only be a clause.
3467 parseRequiresClause(RequiresToken);
3468 return true;
3469 }
3470
3471 // Looking forward we would have to decide if there are function declaration
3472 // like arguments to the requires expression:
3473 // requires (T t) {
3474 // Or there is a constraint expression for the requires clause:
3475 // requires (C<T> && ...
3476
3477 // But first let's look behind.
3478 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3479
3480 if (!PreviousNonComment ||
3481 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3482 // If there is no token, or an expression left brace, we are a requires
3483 // clause within a requires expression.
3484 parseRequiresClause(RequiresToken);
3485 return true;
3486 }
3487
3488 switch (PreviousNonComment->Tok.getKind()) {
3489 case tok::greater:
3490 case tok::r_paren:
3491 case tok::kw_noexcept:
3492 case tok::kw_const:
3493 case tok::star:
3494 case tok::amp:
3495 // This is a requires clause.
3496 parseRequiresClause(RequiresToken);
3497 return true;
3498 case tok::ampamp: {
3499 // This can be either:
3500 // if (... && requires (T t) ...)
3501 // Or
3502 // void member(...) && requires (C<T> ...
3503 // We check the one token before that for a const:
3504 // void member(...) const && requires (C<T> ...
3505 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3506 if ((PrevPrev && PrevPrev->is(Kind: tok::kw_const)) || !SeenEqual) {
3507 parseRequiresClause(RequiresToken);
3508 return true;
3509 }
3510 break;
3511 }
3512 default:
3513 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3514 // This is a requires clause.
3515 parseRequiresClause(RequiresToken);
3516 return true;
3517 }
3518 // It's an expression.
3519 parseRequiresExpression(RequiresToken);
3520 return false;
3521 }
3522
3523 // Now we look forward and try to check if the paren content is a parameter
3524 // list. The parameters can be cv-qualified and contain references or
3525 // pointers.
3526 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3527 // of stuff: typename, const, *, &, &&, ::, identifiers.
3528
3529 unsigned StoredPosition = Tokens->getPosition();
3530 FormatToken *NextToken = Tokens->getNextToken();
3531 int Lookahead = 0;
3532 auto PeekNext = [&Lookahead, &NextToken, this] {
3533 ++Lookahead;
3534 NextToken = Tokens->getNextToken();
3535 };
3536
3537 bool FoundType = false;
3538 bool LastWasColonColon = false;
3539 int OpenAngles = 0;
3540
3541 for (; Lookahead < 50; PeekNext()) {
3542 switch (NextToken->Tok.getKind()) {
3543 case tok::kw_volatile:
3544 case tok::kw_const:
3545 case tok::comma:
3546 if (OpenAngles == 0) {
3547 FormatTok = Tokens->setPosition(StoredPosition);
3548 parseRequiresExpression(RequiresToken);
3549 return false;
3550 }
3551 break;
3552 case tok::eof:
3553 // Break out of the loop.
3554 Lookahead = 50;
3555 break;
3556 case tok::coloncolon:
3557 LastWasColonColon = true;
3558 break;
3559 case tok::kw_decltype:
3560 case tok::identifier:
3561 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3562 FormatTok = Tokens->setPosition(StoredPosition);
3563 parseRequiresExpression(RequiresToken);
3564 return false;
3565 }
3566 FoundType = true;
3567 LastWasColonColon = false;
3568 break;
3569 case tok::less:
3570 ++OpenAngles;
3571 break;
3572 case tok::greater:
3573 --OpenAngles;
3574 break;
3575 default:
3576 if (NextToken->isTypeName(LangOpts)) {
3577 FormatTok = Tokens->setPosition(StoredPosition);
3578 parseRequiresExpression(RequiresToken);
3579 return false;
3580 }
3581 break;
3582 }
3583 }
3584 // This seems to be a complicated expression, just assume it's a clause.
3585 FormatTok = Tokens->setPosition(StoredPosition);
3586 parseRequiresClause(RequiresToken);
3587 return true;
3588}
3589
3590/// Parses a requires clause.
3591/// \param RequiresToken The requires keyword token, which starts this clause.
3592/// \pre We need to be on the next token after the requires keyword.
3593/// \sa parseRequiresExpression
3594///
3595/// Returns if it either has finished parsing the clause, or it detects, that
3596/// the clause is incorrect.
3597void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3598 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3599 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3600
3601 // If there is no previous token, we are within a requires expression,
3602 // otherwise we will always have the template or function declaration in front
3603 // of it.
3604 bool InRequiresExpression =
3605 !RequiresToken->Previous ||
3606 RequiresToken->Previous->is(TT: TT_RequiresExpressionLBrace);
3607
3608 RequiresToken->setFinalizedType(InRequiresExpression
3609 ? TT_RequiresClauseInARequiresExpression
3610 : TT_RequiresClause);
3611
3612 // NOTE: parseConstraintExpression is only ever called from this function.
3613 // It could be inlined into here.
3614 parseConstraintExpression();
3615
3616 if (!InRequiresExpression && FormatTok->Previous)
3617 FormatTok->Previous->ClosesRequiresClause = true;
3618}
3619
3620/// Parses a requires expression.
3621/// \param RequiresToken The requires keyword token, which starts this clause.
3622/// \pre We need to be on the next token after the requires keyword.
3623/// \sa parseRequiresClause
3624///
3625/// Returns if it either has finished parsing the expression, or it detects,
3626/// that the expression is incorrect.
3627void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3628 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3629 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3630
3631 RequiresToken->setFinalizedType(TT_RequiresExpression);
3632
3633 if (FormatTok->is(Kind: tok::l_paren)) {
3634 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3635 parseParens();
3636 }
3637
3638 if (FormatTok->is(Kind: tok::l_brace)) {
3639 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3640 parseChildBlock();
3641 }
3642}
3643
3644/// Parses a constraint expression.
3645///
3646/// This is the body of a requires clause. It returns, when the parsing is
3647/// complete, or the expression is incorrect.
3648void UnwrappedLineParser::parseConstraintExpression() {
3649 // The special handling for lambdas is needed since tryToParseLambda() eats a
3650 // token and if a requires expression is the last part of a requires clause
3651 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3652 // not set on the correct token. Thus we need to be aware if we even expect a
3653 // lambda to be possible.
3654 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3655 bool LambdaNextTimeAllowed = true;
3656
3657 // Within lambda declarations, it is permitted to put a requires clause after
3658 // its template parameter list, which would place the requires clause right
3659 // before the parentheses of the parameters of the lambda declaration. Thus,
3660 // we track if we expect to see grouping parentheses at all.
3661 // Without this check, `requires foo<T> (T t)` in the below example would be
3662 // seen as the whole requires clause, accidentally eating the parameters of
3663 // the lambda.
3664 // [&]<typename T> requires foo<T> (T t) { ... };
3665 bool TopLevelParensAllowed = true;
3666
3667 do {
3668 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3669
3670 switch (FormatTok->Tok.getKind()) {
3671 case tok::kw_requires: {
3672 auto RequiresToken = FormatTok;
3673 nextToken();
3674 parseRequiresExpression(RequiresToken);
3675 break;
3676 }
3677
3678 case tok::l_paren:
3679 if (!TopLevelParensAllowed)
3680 return;
3681 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3682 TopLevelParensAllowed = false;
3683 break;
3684
3685 case tok::l_square:
3686 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3687 return;
3688 break;
3689
3690 case tok::kw_const:
3691 case tok::semi:
3692 case tok::kw_class:
3693 case tok::kw_struct:
3694 case tok::kw_union:
3695 return;
3696
3697 case tok::l_brace:
3698 // Potential function body.
3699 return;
3700
3701 case tok::ampamp:
3702 case tok::pipepipe:
3703 FormatTok->setFinalizedType(TT_BinaryOperator);
3704 nextToken();
3705 LambdaNextTimeAllowed = true;
3706 TopLevelParensAllowed = true;
3707 break;
3708
3709 case tok::comma:
3710 case tok::comment:
3711 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3712 nextToken();
3713 break;
3714
3715 case tok::kw_sizeof:
3716 case tok::greater:
3717 case tok::greaterequal:
3718 case tok::greatergreater:
3719 case tok::less:
3720 case tok::lessequal:
3721 case tok::lessless:
3722 case tok::equalequal:
3723 case tok::exclaim:
3724 case tok::exclaimequal:
3725 case tok::plus:
3726 case tok::minus:
3727 case tok::star:
3728 case tok::slash:
3729 LambdaNextTimeAllowed = true;
3730 TopLevelParensAllowed = true;
3731 // Just eat them.
3732 nextToken();
3733 break;
3734
3735 case tok::numeric_constant:
3736 case tok::coloncolon:
3737 case tok::kw_true:
3738 case tok::kw_false:
3739 TopLevelParensAllowed = false;
3740 // Just eat them.
3741 nextToken();
3742 break;
3743
3744 case tok::kw_static_cast:
3745 case tok::kw_const_cast:
3746 case tok::kw_reinterpret_cast:
3747 case tok::kw_dynamic_cast:
3748 nextToken();
3749 if (FormatTok->isNot(Kind: tok::less))
3750 return;
3751
3752 nextToken();
3753 parseBracedList(/*IsAngleBracket=*/true);
3754 break;
3755
3756 default:
3757 if (!FormatTok->Tok.getIdentifierInfo()) {
3758 // Identifiers are part of the default case, we check for more then
3759 // tok::identifier to handle builtin type traits.
3760 return;
3761 }
3762
3763 // We need to differentiate identifiers for a template deduction guide,
3764 // variables, or function return types (the constraint expression has
3765 // ended before that), and basically all other cases. But it's easier to
3766 // check the other way around.
3767 assert(FormatTok->Previous);
3768 switch (FormatTok->Previous->Tok.getKind()) {
3769 case tok::coloncolon: // Nested identifier.
3770 case tok::ampamp: // Start of a function or variable for the
3771 case tok::pipepipe: // constraint expression. (binary)
3772 case tok::exclaim: // The same as above, but unary.
3773 case tok::kw_requires: // Initial identifier of a requires clause.
3774 case tok::equal: // Initial identifier of a concept declaration.
3775 break;
3776 default:
3777 return;
3778 }
3779
3780 // Read identifier with optional template declaration.
3781 nextToken();
3782 if (FormatTok->is(Kind: tok::less)) {
3783 nextToken();
3784 parseBracedList(/*IsAngleBracket=*/true);
3785 }
3786 TopLevelParensAllowed = false;
3787 break;
3788 }
3789 } while (!eof());
3790}
3791
3792bool UnwrappedLineParser::parseEnum() {
3793 const FormatToken &InitialToken = *FormatTok;
3794
3795 // Won't be 'enum' for NS_ENUMs.
3796 if (FormatTok->is(Kind: tok::kw_enum))
3797 nextToken();
3798
3799 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3800 // declarations. An "enum" keyword followed by a colon would be a syntax
3801 // error and thus assume it is just an identifier.
3802 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3803 return false;
3804
3805 // In protobuf, "enum" can be used as a field name.
3806 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3807 return false;
3808
3809 if (IsCpp) {
3810 // Eat up enum class ...
3811 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3812 nextToken();
3813 while (FormatTok->is(Kind: tok::l_square))
3814 if (!handleCppAttributes())
3815 return false;
3816 }
3817
3818 while (FormatTok->Tok.getIdentifierInfo() ||
3819 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3820 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3821 Ks: tok::l_square)) {
3822 if (Style.isVerilog()) {
3823 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3824 nextToken();
3825 // In Verilog the base type can have dimensions.
3826 while (FormatTok->is(Kind: tok::l_square))
3827 parseSquare();
3828 } else {
3829 nextToken();
3830 }
3831 // We can have macros or attributes in between 'enum' and the enum name.
3832 if (FormatTok->is(Kind: tok::l_paren))
3833 parseParens();
3834 if (FormatTok->is(Kind: tok::identifier)) {
3835 nextToken();
3836 // If there are two identifiers in a row, this is likely an elaborate
3837 // return type. In Java, this can be "implements", etc.
3838 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3839 return false;
3840 }
3841 }
3842
3843 // Just a declaration or something is wrong.
3844 if (FormatTok->isNot(Kind: tok::l_brace))
3845 return true;
3846 FormatTok->setFinalizedType(TT_EnumLBrace);
3847 FormatTok->setBlockKind(BK_Block);
3848
3849 if (Style.isJava()) {
3850 // Java enums are different.
3851 parseJavaEnumBody();
3852 return true;
3853 }
3854 if (Style.Language == FormatStyle::LK_Proto) {
3855 parseBlock(/*MustBeDeclaration=*/true);
3856 return true;
3857 }
3858
3859 if (!Style.AllowShortEnumsOnASingleLine &&
3860 ShouldBreakBeforeBrace(Style, InitialToken)) {
3861 addUnwrappedLine();
3862 }
3863 // Parse enum body.
3864 nextToken();
3865 if (!Style.AllowShortEnumsOnASingleLine) {
3866 addUnwrappedLine();
3867 Line->Level += 1;
3868 }
3869 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3870 if (!Style.AllowShortEnumsOnASingleLine)
3871 Line->Level -= 1;
3872 if (HasError) {
3873 if (FormatTok->is(Kind: tok::semi))
3874 nextToken();
3875 addUnwrappedLine();
3876 }
3877 setPreviousRBraceType(TT_EnumRBrace);
3878 return true;
3879
3880 // There is no addUnwrappedLine() here so that we fall through to parsing a
3881 // structural element afterwards. Thus, in "enum A {} n, m;",
3882 // "} n, m;" will end up in one unwrapped line.
3883}
3884
3885bool UnwrappedLineParser::parseStructLike() {
3886 // parseRecord falls through and does not yet add an unwrapped line as a
3887 // record declaration or definition can start a structural element.
3888 parseRecord();
3889 // This does not apply to Java, JavaScript and C#.
3890 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3891 if (FormatTok->is(Kind: tok::semi))
3892 nextToken();
3893 addUnwrappedLine();
3894 return true;
3895 }
3896 return false;
3897}
3898
3899namespace {
3900// A class used to set and restore the Token position when peeking
3901// ahead in the token source.
3902class ScopedTokenPosition {
3903 unsigned StoredPosition;
3904 FormatTokenSource *Tokens;
3905
3906public:
3907 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3908 assert(Tokens && "Tokens expected to not be null");
3909 StoredPosition = Tokens->getPosition();
3910 }
3911
3912 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3913};
3914} // namespace
3915
3916// Look to see if we have [[ by looking ahead, if
3917// its not then rewind to the original position.
3918bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3919 ScopedTokenPosition AutoPosition(Tokens);
3920 FormatToken *Tok = Tokens->getNextToken();
3921 // We already read the first [ check for the second.
3922 if (Tok->isNot(Kind: tok::l_square))
3923 return false;
3924 // Double check that the attribute is just something
3925 // fairly simple.
3926 while (Tok->isNot(Kind: tok::eof)) {
3927 if (Tok->is(Kind: tok::r_square))
3928 break;
3929 Tok = Tokens->getNextToken();
3930 }
3931 if (Tok->is(Kind: tok::eof))
3932 return false;
3933 Tok = Tokens->getNextToken();
3934 if (Tok->isNot(Kind: tok::r_square))
3935 return false;
3936 Tok = Tokens->getNextToken();
3937 if (Tok->is(Kind: tok::semi))
3938 return false;
3939 return true;
3940}
3941
3942void UnwrappedLineParser::parseJavaEnumBody() {
3943 assert(FormatTok->is(tok::l_brace));
3944 const FormatToken *OpeningBrace = FormatTok;
3945
3946 // Determine whether the enum is simple, i.e. does not have a semicolon or
3947 // constants with class bodies. Simple enums can be formatted like braced
3948 // lists, contracted to a single line, etc.
3949 unsigned StoredPosition = Tokens->getPosition();
3950 bool IsSimple = true;
3951 FormatToken *Tok = Tokens->getNextToken();
3952 while (Tok->isNot(Kind: tok::eof)) {
3953 if (Tok->is(Kind: tok::r_brace))
3954 break;
3955 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
3956 IsSimple = false;
3957 break;
3958 }
3959 // FIXME: This will also mark enums with braces in the arguments to enum
3960 // constants as "not simple". This is probably fine in practice, though.
3961 Tok = Tokens->getNextToken();
3962 }
3963 FormatTok = Tokens->setPosition(StoredPosition);
3964
3965 if (IsSimple) {
3966 nextToken();
3967 parseBracedList();
3968 addUnwrappedLine();
3969 return;
3970 }
3971
3972 // Parse the body of a more complex enum.
3973 // First add a line for everything up to the "{".
3974 nextToken();
3975 addUnwrappedLine();
3976 ++Line->Level;
3977
3978 // Parse the enum constants.
3979 while (!eof()) {
3980 if (FormatTok->is(Kind: tok::l_brace)) {
3981 // Parse the constant's class body.
3982 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3983 /*MunchSemi=*/false);
3984 } else if (FormatTok->is(Kind: tok::l_paren)) {
3985 parseParens();
3986 } else if (FormatTok->is(Kind: tok::comma)) {
3987 nextToken();
3988 addUnwrappedLine();
3989 } else if (FormatTok->is(Kind: tok::semi)) {
3990 nextToken();
3991 addUnwrappedLine();
3992 break;
3993 } else if (FormatTok->is(Kind: tok::r_brace)) {
3994 addUnwrappedLine();
3995 break;
3996 } else {
3997 nextToken();
3998 }
3999 }
4000
4001 // Parse the class body after the enum's ";" if any.
4002 parseLevel(OpeningBrace);
4003 nextToken();
4004 --Line->Level;
4005 addUnwrappedLine();
4006}
4007
4008void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4009 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4010 const FormatToken &InitialToken = *FormatTok;
4011 nextToken();
4012
4013 FormatToken *ClassName =
4014 IsJavaRecord && FormatTok->is(Kind: tok::identifier) ? FormatTok : nullptr;
4015 bool IsDerived = false;
4016 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4017 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4018 };
4019 // JavaScript/TypeScript supports anonymous classes like:
4020 // a = class extends foo { }
4021 bool JSPastExtendsOrImplements = false;
4022 // The actual identifier can be a nested name specifier, and in macros
4023 // it is often token-pasted.
4024 // An [[attribute]] can be before the identifier.
4025 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
4026 Ks: tok::kw_alignas, Ks: tok::l_square) ||
4027 FormatTok->isAttribute() ||
4028 ((Style.isJava() || Style.isJavaScript()) &&
4029 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
4030 if (Style.isJavaScript() &&
4031 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
4032 JSPastExtendsOrImplements = true;
4033 // JavaScript/TypeScript supports inline object types in
4034 // extends/implements positions:
4035 // class Foo implements {bar: number} { }
4036 nextToken();
4037 if (FormatTok->is(Kind: tok::l_brace)) {
4038 tryToParseBracedList();
4039 continue;
4040 }
4041 }
4042 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
4043 continue;
4044 auto *Previous = FormatTok;
4045 nextToken();
4046 switch (FormatTok->Tok.getKind()) {
4047 case tok::l_paren:
4048 // We can have macros in between 'class' and the class name.
4049 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4050 // e.g. `struct macro(a) S { int i; };`
4051 Previous->Previous == &InitialToken) {
4052 parseParens();
4053 }
4054 break;
4055 case tok::coloncolon:
4056 case tok::hashhash:
4057 break;
4058 default:
4059 if (JSPastExtendsOrImplements || ClassName ||
4060 Previous->isNot(Kind: tok::identifier) || Previous->is(TT: TT_AttributeMacro)) {
4061 break;
4062 }
4063 if (const auto Text = Previous->TokenText;
4064 Text.size() == 1 || Text != Text.upper()) {
4065 ClassName = Previous;
4066 }
4067 }
4068 }
4069
4070 auto IsListInitialization = [&] {
4071 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4072 return false;
4073 assert(FormatTok->is(tok::l_brace));
4074 const auto *Prev = FormatTok->getPreviousNonComment();
4075 assert(Prev);
4076 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
4077 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
4078 };
4079
4080 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
4081 int AngleNestingLevel = 0;
4082 do {
4083 if (FormatTok->is(Kind: tok::less))
4084 ++AngleNestingLevel;
4085 else if (FormatTok->is(Kind: tok::greater))
4086 --AngleNestingLevel;
4087
4088 if (AngleNestingLevel == 0) {
4089 if (FormatTok->is(Kind: tok::colon)) {
4090 IsDerived = true;
4091 } else if (!IsDerived && FormatTok->is(Kind: tok::identifier) &&
4092 FormatTok->Previous->is(Kind: tok::coloncolon)) {
4093 ClassName = FormatTok;
4094 } else if (FormatTok->is(Kind: tok::l_paren) &&
4095 IsNonMacroIdentifier(FormatTok->Previous)) {
4096 break;
4097 }
4098 }
4099 if (FormatTok->is(Kind: tok::l_brace)) {
4100 if (AngleNestingLevel == 0 && IsListInitialization())
4101 return;
4102 calculateBraceTypes(/*ExpectClassBody=*/true);
4103 if (!tryToParseBracedList())
4104 break;
4105 }
4106 if (FormatTok->is(Kind: tok::l_square)) {
4107 FormatToken *Previous = FormatTok->Previous;
4108 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
4109 !Previous->isTypeOrIdentifier(LangOpts))) {
4110 // Don't try parsing a lambda if we had a closing parenthesis before,
4111 // it was probably a pointer to an array: int (*)[].
4112 if (!tryToParseLambda())
4113 continue;
4114 } else {
4115 parseSquare();
4116 continue;
4117 }
4118 }
4119 if (FormatTok->is(Kind: tok::semi))
4120 return;
4121 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4122 addUnwrappedLine();
4123 nextToken();
4124 parseCSharpGenericTypeConstraint();
4125 break;
4126 }
4127 nextToken();
4128 } while (!eof());
4129 }
4130
4131 auto GetBraceTypes =
4132 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4133 switch (RecordTok.Tok.getKind()) {
4134 case tok::kw_class:
4135 return {TT_ClassLBrace, TT_ClassRBrace};
4136 case tok::kw_struct:
4137 return {TT_StructLBrace, TT_StructRBrace};
4138 case tok::kw_union:
4139 return {TT_UnionLBrace, TT_UnionRBrace};
4140 default:
4141 // Useful for e.g. interface.
4142 return {TT_RecordLBrace, TT_RecordRBrace};
4143 }
4144 };
4145 if (FormatTok->is(Kind: tok::l_brace)) {
4146 if (IsListInitialization())
4147 return;
4148 if (ClassName)
4149 ClassName->setFinalizedType(TT_ClassHeadName);
4150 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4151 FormatTok->setFinalizedType(OpenBraceType);
4152 if (ParseAsExpr) {
4153 parseChildBlock();
4154 } else {
4155 if (ShouldBreakBeforeBrace(Style, InitialToken))
4156 addUnwrappedLine();
4157
4158 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4159 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4160 }
4161 setPreviousRBraceType(ClosingBraceType);
4162 }
4163 // There is no addUnwrappedLine() here so that we fall through to parsing a
4164 // structural element afterwards. Thus, in "class A {} n, m;",
4165 // "} n, m;" will end up in one unwrapped line.
4166}
4167
4168void UnwrappedLineParser::parseObjCMethod() {
4169 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4170 "'(' or identifier expected.");
4171 do {
4172 if (FormatTok->is(Kind: tok::semi)) {
4173 nextToken();
4174 addUnwrappedLine();
4175 return;
4176 } else if (FormatTok->is(Kind: tok::l_brace)) {
4177 if (Style.BraceWrapping.AfterFunction)
4178 addUnwrappedLine();
4179 parseBlock();
4180 addUnwrappedLine();
4181 return;
4182 } else {
4183 nextToken();
4184 }
4185 } while (!eof());
4186}
4187
4188void UnwrappedLineParser::parseObjCProtocolList() {
4189 assert(FormatTok->is(tok::less) && "'<' expected.");
4190 do {
4191 nextToken();
4192 // Early exit in case someone forgot a close angle.
4193 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4194 return;
4195 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4196 nextToken(); // Skip '>'.
4197}
4198
4199void UnwrappedLineParser::parseObjCUntilAtEnd() {
4200 do {
4201 if (FormatTok->is(Kind: tok::objc_end)) {
4202 nextToken();
4203 addUnwrappedLine();
4204 break;
4205 }
4206 if (FormatTok->is(Kind: tok::l_brace)) {
4207 parseBlock();
4208 // In ObjC interfaces, nothing should be following the "}".
4209 addUnwrappedLine();
4210 } else if (FormatTok->is(Kind: tok::r_brace)) {
4211 // Ignore stray "}". parseStructuralElement doesn't consume them.
4212 nextToken();
4213 addUnwrappedLine();
4214 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4215 nextToken();
4216 parseObjCMethod();
4217 } else {
4218 parseStructuralElement();
4219 }
4220 } while (!eof());
4221}
4222
4223void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4224 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4225 nextToken();
4226 nextToken(); // interface name
4227
4228 // @interface can be followed by a lightweight generic
4229 // specialization list, then either a base class or a category.
4230 if (FormatTok->is(Kind: tok::less))
4231 parseObjCLightweightGenerics();
4232 if (FormatTok->is(Kind: tok::colon)) {
4233 nextToken();
4234 nextToken(); // base class name
4235 // The base class can also have lightweight generics applied to it.
4236 if (FormatTok->is(Kind: tok::less))
4237 parseObjCLightweightGenerics();
4238 } else if (FormatTok->is(Kind: tok::l_paren)) {
4239 // Skip category, if present.
4240 parseParens();
4241 }
4242
4243 if (FormatTok->is(Kind: tok::less))
4244 parseObjCProtocolList();
4245
4246 if (FormatTok->is(Kind: tok::l_brace)) {
4247 if (Style.BraceWrapping.AfterObjCDeclaration)
4248 addUnwrappedLine();
4249 parseBlock(/*MustBeDeclaration=*/true);
4250 }
4251
4252 // With instance variables, this puts '}' on its own line. Without instance
4253 // variables, this ends the @interface line.
4254 addUnwrappedLine();
4255
4256 parseObjCUntilAtEnd();
4257}
4258
4259void UnwrappedLineParser::parseObjCLightweightGenerics() {
4260 assert(FormatTok->is(tok::less));
4261 // Unlike protocol lists, generic parameterizations support
4262 // nested angles:
4263 //
4264 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4265 // NSObject <NSCopying, NSSecureCoding>
4266 //
4267 // so we need to count how many open angles we have left.
4268 unsigned NumOpenAngles = 1;
4269 do {
4270 nextToken();
4271 // Early exit in case someone forgot a close angle.
4272 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4273 break;
4274 if (FormatTok->is(Kind: tok::less)) {
4275 ++NumOpenAngles;
4276 } else if (FormatTok->is(Kind: tok::greater)) {
4277 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4278 --NumOpenAngles;
4279 }
4280 } while (!eof() && NumOpenAngles != 0);
4281 nextToken(); // Skip '>'.
4282}
4283
4284// Returns true for the declaration/definition form of @protocol,
4285// false for the expression form.
4286bool UnwrappedLineParser::parseObjCProtocol() {
4287 assert(FormatTok->is(tok::objc_protocol));
4288 nextToken();
4289
4290 if (FormatTok->is(Kind: tok::l_paren)) {
4291 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4292 return false;
4293 }
4294
4295 // The definition/declaration form,
4296 // @protocol Foo
4297 // - (int)someMethod;
4298 // @end
4299
4300 nextToken(); // protocol name
4301
4302 if (FormatTok->is(Kind: tok::less))
4303 parseObjCProtocolList();
4304
4305 // Check for protocol declaration.
4306 if (FormatTok->is(Kind: tok::semi)) {
4307 nextToken();
4308 addUnwrappedLine();
4309 return true;
4310 }
4311
4312 addUnwrappedLine();
4313 parseObjCUntilAtEnd();
4314 return true;
4315}
4316
4317void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4318 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4319 assert(IsImport || FormatTok->is(tok::kw_export));
4320 nextToken();
4321
4322 // Consume the "default" in "export default class/function".
4323 if (FormatTok->is(Kind: tok::kw_default))
4324 nextToken();
4325
4326 // Consume "async function", "function" and "default function", so that these
4327 // get parsed as free-standing JS functions, i.e. do not require a trailing
4328 // semicolon.
4329 if (FormatTok->is(II: Keywords.kw_async))
4330 nextToken();
4331 if (FormatTok->is(II: Keywords.kw_function)) {
4332 nextToken();
4333 return;
4334 }
4335
4336 // For imports, `export *`, `export {...}`, consume the rest of the line up
4337 // to the terminating `;`. For everything else, just return and continue
4338 // parsing the structural element, i.e. the declaration or expression for
4339 // `export default`.
4340 if (!IsImport && !FormatTok->isOneOf(K1: tok::l_brace, K2: tok::star) &&
4341 !FormatTok->isStringLiteral() &&
4342 !(FormatTok->is(II: Keywords.kw_type) &&
4343 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4344 return;
4345 }
4346
4347 while (!eof()) {
4348 if (FormatTok->is(Kind: tok::semi))
4349 return;
4350 if (Line->Tokens.empty()) {
4351 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4352 // import statement should terminate.
4353 return;
4354 }
4355 if (FormatTok->is(Kind: tok::l_brace)) {
4356 FormatTok->setBlockKind(BK_Block);
4357 nextToken();
4358 parseBracedList();
4359 } else {
4360 nextToken();
4361 }
4362 }
4363}
4364
4365void UnwrappedLineParser::parseStatementMacro() {
4366 nextToken();
4367 if (FormatTok->is(Kind: tok::l_paren))
4368 parseParens();
4369 if (FormatTok->is(Kind: tok::semi))
4370 nextToken();
4371 addUnwrappedLine();
4372}
4373
4374void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4375 // consume things like a::`b.c[d:e] or a::*
4376 while (true) {
4377 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4378 Ks: tok::coloncolon, Ks: tok::hash) ||
4379 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4380 nextToken();
4381 } else if (FormatTok->is(Kind: tok::l_square)) {
4382 parseSquare();
4383 } else {
4384 break;
4385 }
4386 }
4387}
4388
4389void UnwrappedLineParser::parseVerilogSensitivityList() {
4390 if (FormatTok->isNot(Kind: tok::at))
4391 return;
4392 nextToken();
4393 // A block event expression has 2 at signs.
4394 if (FormatTok->is(Kind: tok::at))
4395 nextToken();
4396 switch (FormatTok->Tok.getKind()) {
4397 case tok::star:
4398 nextToken();
4399 break;
4400 case tok::l_paren:
4401 parseParens();
4402 break;
4403 default:
4404 parseVerilogHierarchyIdentifier();
4405 break;
4406 }
4407}
4408
4409unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4410 unsigned AddLevels = 0;
4411
4412 if (FormatTok->is(II: Keywords.kw_clocking)) {
4413 nextToken();
4414 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4415 nextToken();
4416 parseVerilogSensitivityList();
4417 if (FormatTok->is(Kind: tok::semi))
4418 nextToken();
4419 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4420 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4421 Ks: Keywords.kw_randsequence)) {
4422 if (Style.IndentCaseLabels)
4423 AddLevels++;
4424 nextToken();
4425 if (FormatTok->is(Kind: tok::l_paren)) {
4426 FormatTok->setFinalizedType(TT_ConditionLParen);
4427 parseParens();
4428 }
4429 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4430 nextToken();
4431 // The case header has no semicolon.
4432 } else {
4433 // "module" etc.
4434 nextToken();
4435 // all the words like the name of the module and specifiers like
4436 // "automatic" and the width of function return type
4437 while (true) {
4438 if (FormatTok->is(Kind: tok::l_square)) {
4439 auto Prev = FormatTok->getPreviousNonComment();
4440 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4441 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4442 parseSquare();
4443 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4444 FormatTok->isOneOf(K1: tok::hash, K2: tok::hashhash, Ks: tok::coloncolon,
4445 Ks: Keywords.kw_automatic, Ks: tok::kw_static)) {
4446 nextToken();
4447 } else {
4448 break;
4449 }
4450 }
4451
4452 auto NewLine = [this]() {
4453 addUnwrappedLine();
4454 Line->IsContinuation = true;
4455 };
4456
4457 // package imports
4458 while (FormatTok->is(II: Keywords.kw_import)) {
4459 NewLine();
4460 nextToken();
4461 parseVerilogHierarchyIdentifier();
4462 if (FormatTok->is(Kind: tok::semi))
4463 nextToken();
4464 }
4465
4466 // parameters and ports
4467 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4468 NewLine();
4469 nextToken();
4470 if (FormatTok->is(Kind: tok::l_paren)) {
4471 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4472 parseParens();
4473 }
4474 }
4475 if (FormatTok->is(Kind: tok::l_paren)) {
4476 NewLine();
4477 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4478 parseParens();
4479 }
4480
4481 // extends and implements
4482 if (FormatTok->is(II: Keywords.kw_extends)) {
4483 NewLine();
4484 nextToken();
4485 parseVerilogHierarchyIdentifier();
4486 if (FormatTok->is(Kind: tok::l_paren))
4487 parseParens();
4488 }
4489 if (FormatTok->is(II: Keywords.kw_implements)) {
4490 NewLine();
4491 do {
4492 nextToken();
4493 parseVerilogHierarchyIdentifier();
4494 } while (FormatTok->is(Kind: tok::comma));
4495 }
4496
4497 // Coverage event for cover groups.
4498 if (FormatTok->is(Kind: tok::at)) {
4499 NewLine();
4500 parseVerilogSensitivityList();
4501 }
4502
4503 if (FormatTok->is(Kind: tok::semi))
4504 nextToken(/*LevelDifference=*/1);
4505 addUnwrappedLine();
4506 }
4507
4508 return AddLevels;
4509}
4510
4511void UnwrappedLineParser::parseVerilogTable() {
4512 assert(FormatTok->is(Keywords.kw_table));
4513 nextToken(/*LevelDifference=*/1);
4514 addUnwrappedLine();
4515
4516 auto InitialLevel = Line->Level++;
4517 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4518 FormatToken *Tok = FormatTok;
4519 nextToken();
4520 if (Tok->is(Kind: tok::semi))
4521 addUnwrappedLine();
4522 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4523 Tok->setFinalizedType(TT_VerilogTableItem);
4524 }
4525 Line->Level = InitialLevel;
4526 nextToken(/*LevelDifference=*/-1);
4527 addUnwrappedLine();
4528}
4529
4530void UnwrappedLineParser::parseVerilogCaseLabel() {
4531 // The label will get unindented in AnnotatingParser. If there are no leading
4532 // spaces, indent the rest here so that things inside the block will be
4533 // indented relative to things outside. We don't use parseLabel because we
4534 // don't know whether this colon is a label or a ternary expression at this
4535 // point.
4536 auto OrigLevel = Line->Level;
4537 auto FirstLine = CurrentLines->size();
4538 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4539 ++Line->Level;
4540 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4541 --Line->Level;
4542 parseStructuralElement();
4543 // Restore the indentation in both the new line and the line that has the
4544 // label.
4545 if (CurrentLines->size() > FirstLine)
4546 (*CurrentLines)[FirstLine].Level = OrigLevel;
4547 Line->Level = OrigLevel;
4548}
4549
4550bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4551 for (const auto &N : Line.Tokens) {
4552 if (N.Tok->MacroCtx)
4553 return true;
4554 for (const UnwrappedLine &Child : N.Children)
4555 if (containsExpansion(Line: Child))
4556 return true;
4557 }
4558 return false;
4559}
4560
4561void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4562 if (Line->Tokens.empty())
4563 return;
4564 LLVM_DEBUG({
4565 if (!parsingPPDirective()) {
4566 llvm::dbgs() << "Adding unwrapped line:\n";
4567 printDebugInfo(*Line);
4568 }
4569 });
4570
4571 // If this line closes a block when in Whitesmiths mode, remember that
4572 // information so that the level can be decreased after the line is added.
4573 // This has to happen after the addition of the line since the line itself
4574 // needs to be indented.
4575 bool ClosesWhitesmithsBlock =
4576 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4577 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4578
4579 // If the current line was expanded from a macro call, we use it to
4580 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4581 // line and the unexpanded token stream.
4582 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4583 if (!Reconstruct)
4584 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4585 Reconstruct->addLine(Line: *Line);
4586
4587 // While the reconstructed unexpanded lines are stored in the normal
4588 // flow of lines, the expanded lines are stored on the side to be analyzed
4589 // in an extra step.
4590 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4591
4592 if (Reconstruct->finished()) {
4593 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4594 assert(!Reconstructed.Tokens.empty() &&
4595 "Reconstructed must at least contain the macro identifier.");
4596 assert(!parsingPPDirective());
4597 LLVM_DEBUG({
4598 llvm::dbgs() << "Adding unexpanded line:\n";
4599 printDebugInfo(Reconstructed);
4600 });
4601 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4602 Lines.push_back(Elt: std::move(Reconstructed));
4603 CurrentExpandedLines.clear();
4604 Reconstruct.reset();
4605 }
4606 } else {
4607 // At the top level we only get here when no unexpansion is going on, or
4608 // when conditional formatting led to unfinished macro reconstructions.
4609 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4610 CurrentLines->push_back(Elt: std::move(*Line));
4611 }
4612 Line->Tokens.clear();
4613 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4614 Line->FirstStartColumn = 0;
4615 Line->IsContinuation = false;
4616 Line->SeenDecltypeAuto = false;
4617
4618 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4619 --Line->Level;
4620 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4621 CurrentLines->append(
4622 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4623 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4624 PreprocessorDirectives.clear();
4625 }
4626 // Disconnect the current token from the last token on the previous line.
4627 FormatTok->Previous = nullptr;
4628}
4629
4630bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4631
4632bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4633 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4634 FormatTok.NewlinesBefore > 0;
4635}
4636
4637// Checks if \p FormatTok is a line comment that continues the line comment
4638// section on \p Line.
4639static bool
4640continuesLineCommentSection(const FormatToken &FormatTok,
4641 const UnwrappedLine &Line, const FormatStyle &Style,
4642 const llvm::Regex &CommentPragmasRegex) {
4643 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4644 return false;
4645
4646 StringRef IndentContent = FormatTok.TokenText;
4647 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4648 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4649 IndentContent = FormatTok.TokenText.substr(Start: 2);
4650 }
4651 if (CommentPragmasRegex.match(String: IndentContent))
4652 return false;
4653
4654 // If Line starts with a line comment, then FormatTok continues the comment
4655 // section if its original column is greater or equal to the original start
4656 // column of the line.
4657 //
4658 // Define the min column token of a line as follows: if a line ends in '{' or
4659 // contains a '{' followed by a line comment, then the min column token is
4660 // that '{'. Otherwise, the min column token of the line is the first token of
4661 // the line.
4662 //
4663 // If Line starts with a token other than a line comment, then FormatTok
4664 // continues the comment section if its original column is greater than the
4665 // original start column of the min column token of the line.
4666 //
4667 // For example, the second line comment continues the first in these cases:
4668 //
4669 // // first line
4670 // // second line
4671 //
4672 // and:
4673 //
4674 // // first line
4675 // // second line
4676 //
4677 // and:
4678 //
4679 // int i; // first line
4680 // // second line
4681 //
4682 // and:
4683 //
4684 // do { // first line
4685 // // second line
4686 // int i;
4687 // } while (true);
4688 //
4689 // and:
4690 //
4691 // enum {
4692 // a, // first line
4693 // // second line
4694 // b
4695 // };
4696 //
4697 // The second line comment doesn't continue the first in these cases:
4698 //
4699 // // first line
4700 // // second line
4701 //
4702 // and:
4703 //
4704 // int i; // first line
4705 // // second line
4706 //
4707 // and:
4708 //
4709 // do { // first line
4710 // // second line
4711 // int i;
4712 // } while (true);
4713 //
4714 // and:
4715 //
4716 // enum {
4717 // a, // first line
4718 // // second line
4719 // };
4720 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4721
4722 // Scan for '{//'. If found, use the column of '{' as a min column for line
4723 // comment section continuation.
4724 const FormatToken *PreviousToken = nullptr;
4725 for (const UnwrappedLineNode &Node : Line.Tokens) {
4726 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4727 isLineComment(FormatTok: *Node.Tok)) {
4728 MinColumnToken = PreviousToken;
4729 break;
4730 }
4731 PreviousToken = Node.Tok;
4732
4733 // Grab the last newline preceding a token in this unwrapped line.
4734 if (Node.Tok->NewlinesBefore > 0)
4735 MinColumnToken = Node.Tok;
4736 }
4737 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4738 MinColumnToken = PreviousToken;
4739
4740 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4741 MinColumnToken);
4742}
4743
4744void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4745 bool JustComments = Line->Tokens.empty();
4746 for (FormatToken *Tok : CommentsBeforeNextToken) {
4747 // Line comments that belong to the same line comment section are put on the
4748 // same line since later we might want to reflow content between them.
4749 // Additional fine-grained breaking of line comment sections is controlled
4750 // by the class BreakableLineCommentSection in case it is desirable to keep
4751 // several line comment sections in the same unwrapped line.
4752 //
4753 // FIXME: Consider putting separate line comment sections as children to the
4754 // unwrapped line instead.
4755 Tok->ContinuesLineCommentSection =
4756 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, Style, CommentPragmasRegex);
4757 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4758 addUnwrappedLine();
4759 pushToken(Tok);
4760 }
4761 if (NewlineBeforeNext && JustComments)
4762 addUnwrappedLine();
4763 CommentsBeforeNextToken.clear();
4764}
4765
4766void UnwrappedLineParser::nextToken(int LevelDifference) {
4767 if (eof())
4768 return;
4769 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4770 pushToken(Tok: FormatTok);
4771 FormatToken *Previous = FormatTok;
4772 if (!Style.isJavaScript())
4773 readToken(LevelDifference);
4774 else
4775 readTokenWithJavaScriptASI();
4776 FormatTok->Previous = Previous;
4777 if (Style.isVerilog()) {
4778 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4779 // keywords like `begin`, we can't treat them the same as left braces
4780 // because some contexts require one of them. For example structs use
4781 // braces and if blocks use keywords, and a left brace can occur in an if
4782 // statement, but it is not a block. For keywords like `end`, we simply
4783 // treat them the same as right braces.
4784 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4785 FormatTok->Tok.setKind(tok::r_brace);
4786 }
4787}
4788
4789void UnwrappedLineParser::distributeComments(
4790 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4791 // Whether or not a line comment token continues a line is controlled by
4792 // the method continuesLineCommentSection, with the following caveat:
4793 //
4794 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4795 // that each comment line from the trail is aligned with the next token, if
4796 // the next token exists. If a trail exists, the beginning of the maximal
4797 // trail is marked as a start of a new comment section.
4798 //
4799 // For example in this code:
4800 //
4801 // int a; // line about a
4802 // // line 1 about b
4803 // // line 2 about b
4804 // int b;
4805 //
4806 // the two lines about b form a maximal trail, so there are two sections, the
4807 // first one consisting of the single comment "// line about a" and the
4808 // second one consisting of the next two comments.
4809 if (Comments.empty())
4810 return;
4811 bool ShouldPushCommentsInCurrentLine = true;
4812 bool HasTrailAlignedWithNextToken = false;
4813 unsigned StartOfTrailAlignedWithNextToken = 0;
4814 if (NextTok) {
4815 // We are skipping the first element intentionally.
4816 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4817 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4818 HasTrailAlignedWithNextToken = true;
4819 StartOfTrailAlignedWithNextToken = i;
4820 }
4821 }
4822 }
4823 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4824 FormatToken *FormatTok = Comments[i];
4825 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4826 FormatTok->ContinuesLineCommentSection = false;
4827 } else {
4828 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4829 FormatTok: *FormatTok, Line: *Line, Style, CommentPragmasRegex);
4830 }
4831 if (!FormatTok->ContinuesLineCommentSection &&
4832 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4833 ShouldPushCommentsInCurrentLine = false;
4834 }
4835 if (ShouldPushCommentsInCurrentLine)
4836 pushToken(Tok: FormatTok);
4837 else
4838 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4839 }
4840}
4841
4842void UnwrappedLineParser::readToken(int LevelDifference) {
4843 SmallVector<FormatToken *, 1> Comments;
4844 bool PreviousWasComment = false;
4845 bool FirstNonCommentOnLine = false;
4846 do {
4847 FormatTok = Tokens->getNextToken();
4848 assert(FormatTok);
4849 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4850 Ks: TT_ConflictAlternative)) {
4851 if (FormatTok->is(TT: TT_ConflictStart))
4852 conditionalCompilationStart(/*Unreachable=*/false);
4853 else if (FormatTok->is(TT: TT_ConflictAlternative))
4854 conditionalCompilationAlternative();
4855 else if (FormatTok->is(TT: TT_ConflictEnd))
4856 conditionalCompilationEnd();
4857 FormatTok = Tokens->getNextToken();
4858 FormatTok->MustBreakBefore = true;
4859 FormatTok->MustBreakBeforeFinalized = true;
4860 }
4861
4862 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4863 const FormatToken &Tok,
4864 bool PreviousWasComment) {
4865 auto IsFirstOnLine = [](const FormatToken &Tok) {
4866 return Tok.HasUnescapedNewline || Tok.IsFirst;
4867 };
4868
4869 // Consider preprocessor directives preceded by block comments as first
4870 // on line.
4871 if (PreviousWasComment)
4872 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4873 return IsFirstOnLine(Tok);
4874 };
4875
4876 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4877 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4878 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4879
4880 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
4881 FirstNonCommentOnLine) {
4882 // In Verilog, the backtick is used for macro invocations. In TableGen,
4883 // the single hash is used for the paste operator.
4884 const auto *Next = Tokens->peekNextToken();
4885 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(Tok: *Next)) ||
4886 (Style.isTableGen() &&
4887 !Next->isOneOf(K1: tok::kw_else, K2: tok::pp_define, Ks: tok::pp_ifdef,
4888 Ks: tok::pp_ifndef, Ks: tok::pp_endif))) {
4889 break;
4890 }
4891 distributeComments(Comments, NextTok: FormatTok);
4892 Comments.clear();
4893 // If there is an unfinished unwrapped line, we flush the preprocessor
4894 // directives only after that unwrapped line was finished later.
4895 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4896 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4897 assert((LevelDifference >= 0 ||
4898 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4899 "LevelDifference makes Line->Level negative");
4900 Line->Level += LevelDifference;
4901 // Comments stored before the preprocessor directive need to be output
4902 // before the preprocessor directive, at the same level as the
4903 // preprocessor directive, as we consider them to apply to the directive.
4904 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4905 PPBranchLevel > 0) {
4906 Line->Level += PPBranchLevel;
4907 }
4908 assert(Line->Level >= Line->UnbracedBodyLevel);
4909 Line->Level -= Line->UnbracedBodyLevel;
4910 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4911 parsePPDirective();
4912 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4913 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4914 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4915 }
4916
4917 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4918 !Line->InPPDirective) {
4919 continue;
4920 }
4921
4922 if (FormatTok->is(Kind: tok::identifier) &&
4923 Macros.defined(Name: FormatTok->TokenText) &&
4924 // FIXME: Allow expanding macros in preprocessor directives.
4925 !Line->InPPDirective) {
4926 FormatToken *ID = FormatTok;
4927 unsigned Position = Tokens->getPosition();
4928
4929 // To correctly parse the code, we need to replace the tokens of the macro
4930 // call with its expansion.
4931 auto PreCall = std::move(Line);
4932 Line.reset(p: new UnwrappedLine);
4933 bool OldInExpansion = InExpansion;
4934 InExpansion = true;
4935 // We parse the macro call into a new line.
4936 auto Args = parseMacroCall();
4937 InExpansion = OldInExpansion;
4938 assert(Line->Tokens.front().Tok == ID);
4939 // And remember the unexpanded macro call tokens.
4940 auto UnexpandedLine = std::move(Line);
4941 // Reset to the old line.
4942 Line = std::move(PreCall);
4943
4944 LLVM_DEBUG({
4945 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4946 if (Args) {
4947 llvm::dbgs() << "(";
4948 for (const auto &Arg : Args.value())
4949 for (const auto &T : Arg)
4950 llvm::dbgs() << T->TokenText << " ";
4951 llvm::dbgs() << ")";
4952 }
4953 llvm::dbgs() << "\n";
4954 });
4955 if (Macros.objectLike(Name: ID->TokenText) && Args &&
4956 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
4957 // The macro is either
4958 // - object-like, but we got argumnets, or
4959 // - overloaded to be both object-like and function-like, but none of
4960 // the function-like arities match the number of arguments.
4961 // Thus, expand as object-like macro.
4962 LLVM_DEBUG(llvm::dbgs()
4963 << "Macro \"" << ID->TokenText
4964 << "\" not overloaded for arity " << Args->size()
4965 << "or not function-like, using object-like overload.");
4966 Args.reset();
4967 UnexpandedLine->Tokens.resize(new_size: 1);
4968 Tokens->setPosition(Position);
4969 nextToken();
4970 assert(!Args && Macros.objectLike(ID->TokenText));
4971 }
4972 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
4973 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
4974 // Next, we insert the expanded tokens in the token stream at the
4975 // current position, and continue parsing.
4976 Unexpanded[ID] = std::move(UnexpandedLine);
4977 SmallVector<FormatToken *, 8> Expansion =
4978 Macros.expand(ID, OptionalArgs: std::move(Args));
4979 if (!Expansion.empty())
4980 FormatTok = Tokens->insertTokens(Tokens: Expansion);
4981
4982 LLVM_DEBUG({
4983 llvm::dbgs() << "Expanded: ";
4984 for (const auto &T : Expansion)
4985 llvm::dbgs() << T->TokenText << " ";
4986 llvm::dbgs() << "\n";
4987 });
4988 } else {
4989 LLVM_DEBUG({
4990 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4991 << "\", because it was used ";
4992 if (Args)
4993 llvm::dbgs() << "with " << Args->size();
4994 else
4995 llvm::dbgs() << "without";
4996 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4997 });
4998 Tokens->setPosition(Position);
4999 FormatTok = ID;
5000 }
5001 }
5002
5003 if (FormatTok->isNot(Kind: tok::comment)) {
5004 distributeComments(Comments, NextTok: FormatTok);
5005 Comments.clear();
5006 return;
5007 }
5008
5009 Comments.push_back(Elt: FormatTok);
5010 } while (!eof());
5011
5012 distributeComments(Comments, NextTok: nullptr);
5013 Comments.clear();
5014}
5015
5016namespace {
5017template <typename Iterator>
5018void pushTokens(Iterator Begin, Iterator End,
5019 SmallVectorImpl<FormatToken *> &Into) {
5020 for (auto I = Begin; I != End; ++I) {
5021 Into.push_back(Elt: I->Tok);
5022 for (const auto &Child : I->Children)
5023 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5024 }
5025}
5026} // namespace
5027
5028std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5029UnwrappedLineParser::parseMacroCall() {
5030 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5031 assert(Line->Tokens.empty());
5032 nextToken();
5033 if (FormatTok->isNot(Kind: tok::l_paren))
5034 return Args;
5035 unsigned Position = Tokens->getPosition();
5036 FormatToken *Tok = FormatTok;
5037 nextToken();
5038 Args.emplace();
5039 auto ArgStart = std::prev(x: Line->Tokens.end());
5040
5041 int Parens = 0;
5042 do {
5043 switch (FormatTok->Tok.getKind()) {
5044 case tok::l_paren:
5045 ++Parens;
5046 nextToken();
5047 break;
5048 case tok::r_paren: {
5049 if (Parens > 0) {
5050 --Parens;
5051 nextToken();
5052 break;
5053 }
5054 Args->push_back(Elt: {});
5055 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5056 nextToken();
5057 return Args;
5058 }
5059 case tok::comma: {
5060 if (Parens > 0) {
5061 nextToken();
5062 break;
5063 }
5064 Args->push_back(Elt: {});
5065 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5066 nextToken();
5067 ArgStart = std::prev(x: Line->Tokens.end());
5068 break;
5069 }
5070 default:
5071 nextToken();
5072 break;
5073 }
5074 } while (!eof());
5075 Line->Tokens.resize(new_size: 1);
5076 Tokens->setPosition(Position);
5077 FormatTok = Tok;
5078 return {};
5079}
5080
5081void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5082 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
5083 if (AtEndOfPPLine) {
5084 auto &Tok = *Line->Tokens.back().Tok;
5085 Tok.MustBreakBefore = true;
5086 Tok.MustBreakBeforeFinalized = true;
5087 Tok.FirstAfterPPLine = true;
5088 AtEndOfPPLine = false;
5089 }
5090}
5091
5092} // end namespace format
5093} // end namespace clang
5094

source code of clang/lib/Format/UnwrappedLineParser.cpp