1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(OS&: llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(Val: MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
96class ScopedLineState {
97public:
98 ScopedLineState(UnwrappedLineParser &Parser,
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
114 ~ScopedLineState() {
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
125 UnwrappedLineParser &Parser;
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
131class CompoundStatementIndenter {
132public:
133 CompoundStatementIndenter(UnwrappedLineParser *Parser,
134 const FormatStyle &Style, unsigned &LineLevel)
135 : CompoundStatementIndenter(Parser, LineLevel,
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
139 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
154UnwrappedLineParser::UnwrappedLineParser(
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
157 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
166 ? IG_Rejected
167 : IG_Inited),
168 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
169 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
170
171void UnwrappedLineParser::reset() {
172 PPBranchLevel = -1;
173 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
174 ? IG_Rejected
175 : IG_Inited;
176 IncludeGuardToken = nullptr;
177 Line.reset(p: new UnwrappedLine);
178 CommentsBeforeNextToken.clear();
179 FormatTok = nullptr;
180 AtEndOfPPLine = false;
181 IsDecltypeAutoFunction = false;
182 PreprocessorDirectives.clear();
183 CurrentLines = &Lines;
184 DeclarationScopeStack.clear();
185 NestedTooDeep.clear();
186 NestedLambdas.clear();
187 PPStack.clear();
188 Line->FirstStartColumn = FirstStartColumn;
189
190 if (!Unexpanded.empty())
191 for (FormatToken *Token : AllTokens)
192 Token->MacroCtx.reset();
193 CurrentExpandedLines.clear();
194 ExpandedLines.clear();
195 Unexpanded.clear();
196 InExpansion = false;
197 Reconstruct.reset();
198}
199
200void UnwrappedLineParser::parse() {
201 IndexedTokenSource TokenSource(AllTokens);
202 Line->FirstStartColumn = FirstStartColumn;
203 do {
204 LLVM_DEBUG(llvm::dbgs() << "----\n");
205 reset();
206 Tokens = &TokenSource;
207 TokenSource.reset();
208
209 readToken();
210 parseFile();
211
212 // If we found an include guard then all preprocessor directives (other than
213 // the guard) are over-indented by one.
214 if (IncludeGuard == IG_Found) {
215 for (auto &Line : Lines)
216 if (Line.InPPDirective && Line.Level > 0)
217 --Line.Level;
218 }
219
220 // Create line with eof token.
221 assert(eof());
222 pushToken(Tok: FormatTok);
223 addUnwrappedLine();
224
225 // In a first run, format everything with the lines containing macro calls
226 // replaced by the expansion.
227 if (!ExpandedLines.empty()) {
228 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
229 for (const auto &Line : Lines) {
230 if (!Line.Tokens.empty()) {
231 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
232 if (it != ExpandedLines.end()) {
233 for (const auto &Expanded : it->second) {
234 LLVM_DEBUG(printDebugInfo(Expanded));
235 Callback.consumeUnwrappedLine(Line: Expanded);
236 }
237 continue;
238 }
239 }
240 LLVM_DEBUG(printDebugInfo(Line));
241 Callback.consumeUnwrappedLine(Line);
242 }
243 Callback.finishRun();
244 }
245
246 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
247 for (const UnwrappedLine &Line : Lines) {
248 LLVM_DEBUG(printDebugInfo(Line));
249 Callback.consumeUnwrappedLine(Line);
250 }
251 Callback.finishRun();
252 Lines.clear();
253 while (!PPLevelBranchIndex.empty() &&
254 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
255 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
256 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
257 }
258 if (!PPLevelBranchIndex.empty()) {
259 ++PPLevelBranchIndex.back();
260 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
261 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
262 }
263 } while (!PPLevelBranchIndex.empty());
264}
265
266void UnwrappedLineParser::parseFile() {
267 // The top-level context in a file always has declarations, except for pre-
268 // processor directives and JavaScript files.
269 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
270 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
271 MustBeDeclaration);
272 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
273 parseBracedList();
274 else
275 parseLevel();
276 // Make sure to format the remaining tokens.
277 //
278 // LK_TextProto is special since its top-level is parsed as the body of a
279 // braced list, which does not necessarily have natural line separators such
280 // as a semicolon. Comments after the last entry that have been determined to
281 // not belong to that line, as in:
282 // key: value
283 // // endfile comment
284 // do not have a chance to be put on a line of their own until this point.
285 // Here we add this newline before end-of-file comments.
286 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
287 addUnwrappedLine();
288 flushComments(NewlineBeforeNext: true);
289 addUnwrappedLine();
290}
291
292void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
293 do {
294 switch (FormatTok->Tok.getKind()) {
295 case tok::l_brace:
296 case tok::semi:
297 return;
298 default:
299 if (FormatTok->is(II: Keywords.kw_where)) {
300 addUnwrappedLine();
301 nextToken();
302 parseCSharpGenericTypeConstraint();
303 break;
304 }
305 nextToken();
306 break;
307 }
308 } while (!eof());
309}
310
311void UnwrappedLineParser::parseCSharpAttribute() {
312 int UnpairedSquareBrackets = 1;
313 do {
314 switch (FormatTok->Tok.getKind()) {
315 case tok::r_square:
316 nextToken();
317 --UnpairedSquareBrackets;
318 if (UnpairedSquareBrackets == 0) {
319 addUnwrappedLine();
320 return;
321 }
322 break;
323 case tok::l_square:
324 ++UnpairedSquareBrackets;
325 nextToken();
326 break;
327 default:
328 nextToken();
329 break;
330 }
331 } while (!eof());
332}
333
334bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
335 if (!Lines.empty() && Lines.back().InPPDirective)
336 return true;
337
338 const FormatToken *Previous = Tokens->getPreviousToken();
339 return Previous && Previous->is(Kind: tok::comment) &&
340 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
341}
342
343/// \brief Parses a level, that is ???.
344/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
345/// \param IfKind The \p if statement kind in the level.
346/// \param IfLeftBrace The left brace of the \p if block in the level.
347/// \returns true if a simple block of if/else/for/while, or false otherwise.
348/// (A simple block has a single statement.)
349bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
350 IfStmtKind *IfKind,
351 FormatToken **IfLeftBrace) {
352 const bool InRequiresExpression =
353 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
354 const bool IsPrecededByCommentOrPPDirective =
355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
356 FormatToken *IfLBrace = nullptr;
357 bool HasDoWhile = false;
358 bool HasLabel = false;
359 unsigned StatementCount = 0;
360 bool SwitchLabelEncountered = false;
361
362 do {
363 if (FormatTok->isAttribute()) {
364 nextToken();
365 if (FormatTok->is(Kind: tok::l_paren))
366 parseParens();
367 continue;
368 }
369 tok::TokenKind Kind = FormatTok->Tok.getKind();
370 if (FormatTok->is(TT: TT_MacroBlockBegin))
371 Kind = tok::l_brace;
372 else if (FormatTok->is(TT: TT_MacroBlockEnd))
373 Kind = tok::r_brace;
374
375 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
376 &HasLabel, &StatementCount] {
377 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
378 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
379 HasLabel: HasLabel ? nullptr : &HasLabel);
380 ++StatementCount;
381 assert(StatementCount > 0 && "StatementCount overflow!");
382 };
383
384 switch (Kind) {
385 case tok::comment:
386 nextToken();
387 addUnwrappedLine();
388 break;
389 case tok::l_brace:
390 if (InRequiresExpression) {
391 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
392 } else if (FormatTok->Previous &&
393 FormatTok->Previous->ClosesRequiresClause) {
394 // We need the 'default' case here to correctly parse a function
395 // l_brace.
396 ParseDefault();
397 continue;
398 }
399 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
400 if (tryToParseBracedList())
401 continue;
402 FormatTok->setFinalizedType(TT_BlockLBrace);
403 }
404 parseBlock();
405 ++StatementCount;
406 assert(StatementCount > 0 && "StatementCount overflow!");
407 addUnwrappedLine();
408 break;
409 case tok::r_brace:
410 if (OpeningBrace) {
411 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
412 !OpeningBrace->isOneOf(K1: TT_ControlStatementLBrace, K2: TT_ElseLBrace)) {
413 return false;
414 }
415 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
416 HasDoWhile || IsPrecededByCommentOrPPDirective ||
417 precededByCommentOrPPDirective()) {
418 return false;
419 }
420 const FormatToken *Next = Tokens->peekNextToken();
421 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
422 return false;
423 if (IfLeftBrace)
424 *IfLeftBrace = IfLBrace;
425 return true;
426 }
427 nextToken();
428 addUnwrappedLine();
429 break;
430 case tok::kw_default: {
431 unsigned StoredPosition = Tokens->getPosition();
432 auto *Next = Tokens->getNextNonComment();
433 FormatTok = Tokens->setPosition(StoredPosition);
434 if (!Next->isOneOf(K1: tok::colon, K2: tok::arrow)) {
435 // default not followed by `:` or `->` is not a case label; treat it
436 // like an identifier.
437 parseStructuralElement();
438 break;
439 }
440 // Else, if it is 'default:', fall through to the case handling.
441 [[fallthrough]];
442 }
443 case tok::kw_case:
444 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
445 (Style.isJavaScript() && Line->MustBeDeclaration)) {
446 // Proto: there are no switch/case statements
447 // Verilog: Case labels don't have this word. We handle case
448 // labels including default in TokenAnnotator.
449 // JavaScript: A 'case: string' style field declaration.
450 ParseDefault();
451 break;
452 }
453 if (!SwitchLabelEncountered &&
454 (Style.IndentCaseLabels ||
455 (OpeningBrace && OpeningBrace->is(TT: TT_SwitchExpressionLBrace)) ||
456 (Line->InPPDirective && Line->Level == 1))) {
457 ++Line->Level;
458 }
459 SwitchLabelEncountered = true;
460 parseStructuralElement();
461 break;
462 case tok::l_square:
463 if (Style.isCSharp()) {
464 nextToken();
465 parseCSharpAttribute();
466 break;
467 }
468 if (handleCppAttributes())
469 break;
470 [[fallthrough]];
471 default:
472 ParseDefault();
473 break;
474 }
475 } while (!eof());
476
477 return false;
478}
479
480void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
481 // We'll parse forward through the tokens until we hit
482 // a closing brace or eof - note that getNextToken() will
483 // parse macros, so this will magically work inside macro
484 // definitions, too.
485 unsigned StoredPosition = Tokens->getPosition();
486 FormatToken *Tok = FormatTok;
487 const FormatToken *PrevTok = Tok->Previous;
488 // Keep a stack of positions of lbrace tokens. We will
489 // update information about whether an lbrace starts a
490 // braced init list or a different block during the loop.
491 struct StackEntry {
492 FormatToken *Tok;
493 const FormatToken *PrevTok;
494 };
495 SmallVector<StackEntry, 8> LBraceStack;
496 assert(Tok->is(tok::l_brace));
497
498 do {
499 auto *NextTok = Tokens->getNextNonComment();
500
501 if (!Line->InMacroBody && !Style.isTableGen()) {
502 // Skip PPDirective lines (except macro definitions) and comments.
503 while (NextTok->is(Kind: tok::hash)) {
504 NextTok = Tokens->getNextToken();
505 if (NextTok->isOneOf(K1: tok::pp_not_keyword, K2: tok::pp_define))
506 break;
507 do {
508 NextTok = Tokens->getNextToken();
509 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(Kind: tok::eof));
510
511 while (NextTok->is(Kind: tok::comment))
512 NextTok = Tokens->getNextToken();
513 }
514 }
515
516 switch (Tok->Tok.getKind()) {
517 case tok::l_brace:
518 if (Style.isJavaScript() && PrevTok) {
519 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
520 // A ':' indicates this code is in a type, or a braced list
521 // following a label in an object literal ({a: {b: 1}}).
522 // A '<' could be an object used in a comparison, but that is nonsense
523 // code (can never return true), so more likely it is a generic type
524 // argument (`X<{a: string; b: number}>`).
525 // The code below could be confused by semicolons between the
526 // individual members in a type member list, which would normally
527 // trigger BK_Block. In both cases, this must be parsed as an inline
528 // braced init.
529 Tok->setBlockKind(BK_BracedInit);
530 } else if (PrevTok->is(Kind: tok::r_paren)) {
531 // `) { }` can only occur in function or method declarations in JS.
532 Tok->setBlockKind(BK_Block);
533 }
534 } else {
535 Tok->setBlockKind(BK_Unknown);
536 }
537 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
538 break;
539 case tok::r_brace:
540 if (LBraceStack.empty())
541 break;
542 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BBK: BK_Unknown)) {
543 bool ProbablyBracedList = false;
544 if (Style.Language == FormatStyle::LK_Proto) {
545 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
546 } else if (LBrace->isNot(Kind: TT_EnumLBrace)) {
547 // Using OriginalColumn to distinguish between ObjC methods and
548 // binary operators is a bit hacky.
549 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
550 NextTok->OriginalColumn == 0;
551
552 // Try to detect a braced list. Note that regardless how we mark inner
553 // braces here, we will overwrite the BlockKind later if we parse a
554 // braced list (where all blocks inside are by default braced lists),
555 // or when we explicitly detect blocks (for example while parsing
556 // lambdas).
557
558 // If we already marked the opening brace as braced list, the closing
559 // must also be part of it.
560 ProbablyBracedList = LBrace->is(TT: TT_BracedListLBrace);
561
562 ProbablyBracedList = ProbablyBracedList ||
563 (Style.isJavaScript() &&
564 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
565 Ks: Keywords.kw_as));
566 ProbablyBracedList =
567 ProbablyBracedList ||
568 (IsCpp && (PrevTok->Tok.isLiteral() ||
569 NextTok->isOneOf(K1: tok::l_paren, K2: tok::arrow)));
570
571 // If there is a comma, semicolon or right paren after the closing
572 // brace, we assume this is a braced initializer list.
573 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
574 // braced list in JS.
575 ProbablyBracedList =
576 ProbablyBracedList ||
577 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
578 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
579
580 // Distinguish between braced list in a constructor initializer list
581 // followed by constructor body, or just adjacent blocks.
582 ProbablyBracedList =
583 ProbablyBracedList ||
584 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
585 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
586 K2: tok::greater));
587
588 ProbablyBracedList =
589 ProbablyBracedList ||
590 (NextTok->is(Kind: tok::identifier) &&
591 !PrevTok->isOneOf(K1: tok::semi, K2: tok::r_brace, Ks: tok::l_brace));
592
593 ProbablyBracedList = ProbablyBracedList ||
594 (NextTok->is(Kind: tok::semi) &&
595 (!ExpectClassBody || LBraceStack.size() != 1));
596
597 ProbablyBracedList =
598 ProbablyBracedList ||
599 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
600
601 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
602 // We can have an array subscript after a braced init
603 // list, but C++11 attributes are expected after blocks.
604 NextTok = Tokens->getNextToken();
605 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
606 }
607
608 // Cpp macro definition body that is a nonempty braced list or block:
609 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
610 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
611 // A statement can end with only `;` (simple statement), a block
612 // closing brace (compound statement), or `:` (label statement).
613 // If PrevTok is a block opening brace, Tok ends an empty block.
614 !PrevTok->isOneOf(K1: tok::semi, K2: BK_Block, Ks: tok::colon)) {
615 ProbablyBracedList = true;
616 }
617 }
618 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
619 Tok->setBlockKind(BlockKind);
620 LBrace->setBlockKind(BlockKind);
621 }
622 LBraceStack.pop_back();
623 break;
624 case tok::identifier:
625 if (Tok->isNot(Kind: TT_StatementMacro))
626 break;
627 [[fallthrough]];
628 case tok::at:
629 case tok::semi:
630 case tok::kw_if:
631 case tok::kw_while:
632 case tok::kw_for:
633 case tok::kw_switch:
634 case tok::kw_try:
635 case tok::kw___try:
636 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
637 LBraceStack.back().Tok->setBlockKind(BK_Block);
638 break;
639 default:
640 break;
641 }
642
643 PrevTok = Tok;
644 Tok = NextTok;
645 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
646
647 // Assume other blocks for all unclosed opening braces.
648 for (const auto &Entry : LBraceStack)
649 if (Entry.Tok->is(BBK: BK_Unknown))
650 Entry.Tok->setBlockKind(BK_Block);
651
652 FormatTok = Tokens->setPosition(StoredPosition);
653}
654
655// Sets the token type of the directly previous right brace.
656void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
657 if (auto Prev = FormatTok->getPreviousNonComment();
658 Prev && Prev->is(Kind: tok::r_brace)) {
659 Prev->setFinalizedType(Type);
660 }
661}
662
663template <class T>
664static inline void hash_combine(std::size_t &seed, const T &v) {
665 std::hash<T> hasher;
666 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
667}
668
669size_t UnwrappedLineParser::computePPHash() const {
670 size_t h = 0;
671 for (const auto &i : PPStack) {
672 hash_combine(seed&: h, v: size_t(i.Kind));
673 hash_combine(seed&: h, v: i.Line);
674 }
675 return h;
676}
677
678// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
679// is not null, subtracts its length (plus the preceding space) when computing
680// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
681// running the token annotator on it so that we can restore them afterward.
682bool UnwrappedLineParser::mightFitOnOneLine(
683 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
684 const auto ColumnLimit = Style.ColumnLimit;
685 if (ColumnLimit == 0)
686 return true;
687
688 auto &Tokens = ParsedLine.Tokens;
689 assert(!Tokens.empty());
690
691 const auto *LastToken = Tokens.back().Tok;
692 assert(LastToken);
693
694 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
695
696 int Index = 0;
697 for (const auto &Token : Tokens) {
698 assert(Token.Tok);
699 auto &SavedToken = SavedTokens[Index++];
700 SavedToken.Tok = new FormatToken;
701 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
702 SavedToken.Children = std::move(Token.Children);
703 }
704
705 AnnotatedLine Line(ParsedLine);
706 assert(Line.Last == LastToken);
707
708 TokenAnnotator Annotator(Style, Keywords);
709 Annotator.annotate(Line);
710 Annotator.calculateFormattingInformation(Line);
711
712 auto Length = LastToken->TotalLength;
713 if (OpeningBrace) {
714 assert(OpeningBrace != Tokens.front().Tok);
715 if (auto Prev = OpeningBrace->Previous;
716 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
717 Length -= ColumnLimit;
718 }
719 Length -= OpeningBrace->TokenText.size() + 1;
720 }
721
722 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
723 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
724 Length -= FirstToken->TokenText.size() + 1;
725 }
726
727 Index = 0;
728 for (auto &Token : Tokens) {
729 const auto &SavedToken = SavedTokens[Index++];
730 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
731 Token.Children = std::move(SavedToken.Children);
732 delete SavedToken.Tok;
733 }
734
735 // If these change PPLevel needs to be used for get correct indentation.
736 assert(!Line.InMacroBody);
737 assert(!Line.InPPDirective);
738 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
739}
740
741FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
742 unsigned AddLevels, bool MunchSemi,
743 bool KeepBraces,
744 IfStmtKind *IfKind,
745 bool UnindentWhitesmithsBraces) {
746 auto HandleVerilogBlockLabel = [this]() {
747 // ":" name
748 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
749 nextToken();
750 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
751 nextToken();
752 }
753 };
754
755 // Whether this is a Verilog-specific block that has a special header like a
756 // module.
757 const bool VerilogHierarchy =
758 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
759 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
760 (Style.isVerilog() &&
761 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
762 "'{' or macro block token expected");
763 FormatToken *Tok = FormatTok;
764 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
765 auto Index = CurrentLines->size();
766 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
767 FormatTok->setBlockKind(BK_Block);
768
769 // For Whitesmiths mode, jump to the next level prior to skipping over the
770 // braces.
771 if (!VerilogHierarchy && AddLevels > 0 &&
772 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
773 ++Line->Level;
774 }
775
776 size_t PPStartHash = computePPHash();
777
778 const unsigned InitialLevel = Line->Level;
779 if (VerilogHierarchy) {
780 AddLevels += parseVerilogHierarchyHeader();
781 } else {
782 nextToken(/*LevelDifference=*/AddLevels);
783 HandleVerilogBlockLabel();
784 }
785
786 // Bail out if there are too many levels. Otherwise, the stack might overflow.
787 if (Line->Level > 300)
788 return nullptr;
789
790 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
791 parseParens();
792
793 size_t NbPreprocessorDirectives =
794 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
795 addUnwrappedLine();
796 size_t OpeningLineIndex =
797 CurrentLines->empty()
798 ? (UnwrappedLine::kInvalidIndex)
799 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
800
801 // Whitesmiths is weird here. The brace needs to be indented for the namespace
802 // block, but the block itself may not be indented depending on the style
803 // settings. This allows the format to back up one level in those cases.
804 if (UnindentWhitesmithsBraces)
805 --Line->Level;
806
807 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
808 MustBeDeclaration);
809 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
810 Line->Level += AddLevels;
811
812 FormatToken *IfLBrace = nullptr;
813 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
814
815 if (eof())
816 return IfLBrace;
817
818 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
819 : FormatTok->isNot(Kind: tok::r_brace)) {
820 Line->Level = InitialLevel;
821 FormatTok->setBlockKind(BK_Block);
822 return IfLBrace;
823 }
824
825 if (FormatTok->is(Kind: tok::r_brace)) {
826 FormatTok->setBlockKind(BK_Block);
827 if (Tok->is(TT: TT_NamespaceLBrace))
828 FormatTok->setFinalizedType(TT_NamespaceRBrace);
829 }
830
831 const bool IsFunctionRBrace =
832 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
833
834 auto RemoveBraces = [=]() mutable {
835 if (!SimpleBlock)
836 return false;
837 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
838 assert(FormatTok->is(tok::r_brace));
839 const bool WrappedOpeningBrace = !Tok->Previous;
840 if (WrappedOpeningBrace && FollowedByComment)
841 return false;
842 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
843 if (KeepBraces && !HasRequiredIfBraces)
844 return false;
845 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
846 const FormatToken *Previous = Tokens->getPreviousToken();
847 assert(Previous);
848 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
849 return false;
850 }
851 assert(!CurrentLines->empty());
852 auto &LastLine = CurrentLines->back();
853 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
854 return false;
855 if (Tok->is(TT: TT_ElseLBrace))
856 return true;
857 if (WrappedOpeningBrace) {
858 assert(Index > 0);
859 --Index; // The line above the wrapped l_brace.
860 Tok = nullptr;
861 }
862 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
863 };
864 if (RemoveBraces()) {
865 Tok->MatchingParen = FormatTok;
866 FormatTok->MatchingParen = Tok;
867 }
868
869 size_t PPEndHash = computePPHash();
870
871 // Munch the closing brace.
872 nextToken(/*LevelDifference=*/-AddLevels);
873
874 // When this is a function block and there is an unnecessary semicolon
875 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
876 // it later).
877 if (Style.RemoveSemicolon && IsFunctionRBrace) {
878 while (FormatTok->is(Kind: tok::semi)) {
879 FormatTok->Optional = true;
880 nextToken();
881 }
882 }
883
884 HandleVerilogBlockLabel();
885
886 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
887 parseParens();
888
889 Line->Level = InitialLevel;
890
891 if (FormatTok->is(Kind: tok::kw_noexcept)) {
892 // A noexcept in a requires expression.
893 nextToken();
894 }
895
896 if (FormatTok->is(Kind: tok::arrow)) {
897 // Following the } or noexcept we can find a trailing return type arrow
898 // as part of an implicit conversion constraint.
899 nextToken();
900 parseStructuralElement();
901 }
902
903 if (MunchSemi && FormatTok->is(Kind: tok::semi))
904 nextToken();
905
906 if (PPStartHash == PPEndHash) {
907 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
908 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
909 // Update the opening line to add the forward reference as well
910 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
911 CurrentLines->size() - 1;
912 }
913 }
914
915 return IfLBrace;
916}
917
918static bool isGoogScope(const UnwrappedLine &Line) {
919 // FIXME: Closure-library specific stuff should not be hard-coded but be
920 // configurable.
921 if (Line.Tokens.size() < 4)
922 return false;
923 auto I = Line.Tokens.begin();
924 if (I->Tok->TokenText != "goog")
925 return false;
926 ++I;
927 if (I->Tok->isNot(Kind: tok::period))
928 return false;
929 ++I;
930 if (I->Tok->TokenText != "scope")
931 return false;
932 ++I;
933 return I->Tok->is(Kind: tok::l_paren);
934}
935
936static bool isIIFE(const UnwrappedLine &Line,
937 const AdditionalKeywords &Keywords) {
938 // Look for the start of an immediately invoked anonymous function.
939 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
940 // This is commonly done in JavaScript to create a new, anonymous scope.
941 // Example: (function() { ... })()
942 if (Line.Tokens.size() < 3)
943 return false;
944 auto I = Line.Tokens.begin();
945 if (I->Tok->isNot(Kind: tok::l_paren))
946 return false;
947 ++I;
948 if (I->Tok->isNot(Kind: Keywords.kw_function))
949 return false;
950 ++I;
951 return I->Tok->is(Kind: tok::l_paren);
952}
953
954static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
955 const FormatToken &InitialToken) {
956 tok::TokenKind Kind = InitialToken.Tok.getKind();
957 if (InitialToken.is(TT: TT_NamespaceMacro))
958 Kind = tok::kw_namespace;
959
960 switch (Kind) {
961 case tok::kw_namespace:
962 return Style.BraceWrapping.AfterNamespace;
963 case tok::kw_class:
964 return Style.BraceWrapping.AfterClass;
965 case tok::kw_union:
966 return Style.BraceWrapping.AfterUnion;
967 case tok::kw_struct:
968 return Style.BraceWrapping.AfterStruct;
969 case tok::kw_enum:
970 return Style.BraceWrapping.AfterEnum;
971 default:
972 return false;
973 }
974}
975
976void UnwrappedLineParser::parseChildBlock() {
977 assert(FormatTok->is(tok::l_brace));
978 FormatTok->setBlockKind(BK_Block);
979 const FormatToken *OpeningBrace = FormatTok;
980 nextToken();
981 {
982 bool SkipIndent = (Style.isJavaScript() &&
983 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
984 ScopedLineState LineState(*this);
985 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
986 /*MustBeDeclaration=*/false);
987 Line->Level += SkipIndent ? 0 : 1;
988 parseLevel(OpeningBrace);
989 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
990 Line->Level -= SkipIndent ? 0 : 1;
991 }
992 nextToken();
993}
994
995void UnwrappedLineParser::parsePPDirective() {
996 assert(FormatTok->is(tok::hash) && "'#' expected");
997 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
998
999 nextToken();
1000
1001 if (!FormatTok->Tok.getIdentifierInfo()) {
1002 parsePPUnknown();
1003 return;
1004 }
1005
1006 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1007 case tok::pp_define:
1008 parsePPDefine();
1009 return;
1010 case tok::pp_if:
1011 parsePPIf(/*IfDef=*/false);
1012 break;
1013 case tok::pp_ifdef:
1014 case tok::pp_ifndef:
1015 parsePPIf(/*IfDef=*/true);
1016 break;
1017 case tok::pp_else:
1018 case tok::pp_elifdef:
1019 case tok::pp_elifndef:
1020 case tok::pp_elif:
1021 parsePPElse();
1022 break;
1023 case tok::pp_endif:
1024 parsePPEndIf();
1025 break;
1026 case tok::pp_pragma:
1027 parsePPPragma();
1028 break;
1029 case tok::pp_error:
1030 case tok::pp_warning:
1031 nextToken();
1032 if (!eof() && Style.isCpp())
1033 FormatTok->setFinalizedType(TT_AfterPPDirective);
1034 [[fallthrough]];
1035 default:
1036 parsePPUnknown();
1037 break;
1038 }
1039}
1040
1041void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1042 size_t Line = CurrentLines->size();
1043 if (CurrentLines == &PreprocessorDirectives)
1044 Line += Lines.size();
1045
1046 if (Unreachable ||
1047 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1048 PPStack.push_back(Elt: {PP_Unreachable, Line});
1049 } else {
1050 PPStack.push_back(Elt: {PP_Conditional, Line});
1051 }
1052}
1053
1054void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1055 ++PPBranchLevel;
1056 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1057 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1058 PPLevelBranchIndex.push_back(Elt: 0);
1059 PPLevelBranchCount.push_back(Elt: 0);
1060 }
1061 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1062 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1063 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationAlternative() {
1067 if (!PPStack.empty())
1068 PPStack.pop_back();
1069 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1070 if (!PPChainBranchIndex.empty())
1071 ++PPChainBranchIndex.top();
1072 conditionalCompilationCondition(
1073 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1074 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1075}
1076
1077void UnwrappedLineParser::conditionalCompilationEnd() {
1078 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1079 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1080 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1081 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1082 }
1083 // Guard against #endif's without #if.
1084 if (PPBranchLevel > -1)
1085 --PPBranchLevel;
1086 if (!PPChainBranchIndex.empty())
1087 PPChainBranchIndex.pop();
1088 if (!PPStack.empty())
1089 PPStack.pop_back();
1090}
1091
1092void UnwrappedLineParser::parsePPIf(bool IfDef) {
1093 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1094 nextToken();
1095 bool Unreachable = false;
1096 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1097 Unreachable = true;
1098 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1099 Unreachable = true;
1100 conditionalCompilationStart(Unreachable);
1101 FormatToken *IfCondition = FormatTok;
1102 // If there's a #ifndef on the first line, and the only lines before it are
1103 // comments, it could be an include guard.
1104 bool MaybeIncludeGuard = IfNDef;
1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106 for (auto &Line : Lines) {
1107 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1108 MaybeIncludeGuard = false;
1109 IncludeGuard = IG_Rejected;
1110 break;
1111 }
1112 }
1113 }
1114 --PPBranchLevel;
1115 parsePPUnknown();
1116 ++PPBranchLevel;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 IncludeGuard = IG_IfNdefed;
1119 IncludeGuardToken = IfCondition;
1120 }
1121}
1122
1123void UnwrappedLineParser::parsePPElse() {
1124 // If a potential include guard has an #else, it's not an include guard.
1125 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1126 IncludeGuard = IG_Rejected;
1127 // Don't crash when there is an #else without an #if.
1128 assert(PPBranchLevel >= -1);
1129 if (PPBranchLevel == -1)
1130 conditionalCompilationStart(/*Unreachable=*/true);
1131 conditionalCompilationAlternative();
1132 --PPBranchLevel;
1133 parsePPUnknown();
1134 ++PPBranchLevel;
1135}
1136
1137void UnwrappedLineParser::parsePPEndIf() {
1138 conditionalCompilationEnd();
1139 parsePPUnknown();
1140 // If the #endif of a potential include guard is the last thing in the file,
1141 // then we found an include guard.
1142 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1143 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1144 IncludeGuard = IG_Found;
1145 }
1146}
1147
1148void UnwrappedLineParser::parsePPDefine() {
1149 nextToken();
1150
1151 if (!FormatTok->Tok.getIdentifierInfo()) {
1152 IncludeGuard = IG_Rejected;
1153 IncludeGuardToken = nullptr;
1154 parsePPUnknown();
1155 return;
1156 }
1157
1158 bool MaybeIncludeGuard = false;
1159 if (IncludeGuard == IG_IfNdefed &&
1160 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1161 IncludeGuard = IG_Defined;
1162 IncludeGuardToken = nullptr;
1163 for (auto &Line : Lines) {
1164 if (!Line.Tokens.front().Tok->isOneOf(K1: tok::comment, K2: tok::hash)) {
1165 IncludeGuard = IG_Rejected;
1166 break;
1167 }
1168 }
1169 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1170 }
1171
1172 // In the context of a define, even keywords should be treated as normal
1173 // identifiers. Setting the kind to identifier is not enough, because we need
1174 // to treat additional keywords like __except as well, which are already
1175 // identifiers. Setting the identifier info to null interferes with include
1176 // guard processing above, and changes preprocessing nesting.
1177 FormatTok->Tok.setKind(tok::identifier);
1178 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1179 nextToken();
1180
1181 // IncludeGuard can't have a non-empty macro definition.
1182 if (MaybeIncludeGuard && !eof())
1183 IncludeGuard = IG_Rejected;
1184
1185 if (FormatTok->Tok.getKind() == tok::l_paren &&
1186 !FormatTok->hasWhitespaceBefore()) {
1187 parseParens();
1188 }
1189 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1190 Line->Level += PPBranchLevel + 1;
1191 addUnwrappedLine();
1192 ++Line->Level;
1193
1194 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1195 assert((int)Line->PPLevel >= 0);
1196 Line->InMacroBody = true;
1197
1198 if (Style.SkipMacroDefinitionBody) {
1199 while (!eof()) {
1200 FormatTok->Finalized = true;
1201 FormatTok = Tokens->getNextToken();
1202 }
1203 addUnwrappedLine();
1204 return;
1205 }
1206
1207 // Errors during a preprocessor directive can only affect the layout of the
1208 // preprocessor directive, and thus we ignore them. An alternative approach
1209 // would be to use the same approach we use on the file level (no
1210 // re-indentation if there was a structural error) within the macro
1211 // definition.
1212 parseFile();
1213}
1214
1215void UnwrappedLineParser::parsePPPragma() {
1216 Line->InPragmaDirective = true;
1217 parsePPUnknown();
1218}
1219
1220void UnwrappedLineParser::parsePPUnknown() {
1221 while (!eof())
1222 nextToken();
1223 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1224 Line->Level += PPBranchLevel + 1;
1225 addUnwrappedLine();
1226}
1227
1228// Here we exclude certain tokens that are not usually the first token in an
1229// unwrapped line. This is used in attempt to distinguish macro calls without
1230// trailing semicolons from other constructs split to several lines.
1231static bool tokenCanStartNewLine(const FormatToken &Tok) {
1232 // Semicolon can be a null-statement, l_square can be a start of a macro or
1233 // a C++11 attribute, but this doesn't seem to be common.
1234 return !Tok.isOneOf(K1: tok::semi, K2: tok::l_brace,
1235 // Tokens that can only be used as binary operators and a
1236 // part of overloaded operator names.
1237 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1238 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1239 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1240 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1241 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1242 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1243 Ks: tok::lesslessequal,
1244 // Colon is used in labels, base class lists, initializer
1245 // lists, range-based for loops, ternary operator, but
1246 // should never be the first token in an unwrapped line.
1247 Ks: tok::colon,
1248 // 'noexcept' is a trailing annotation.
1249 Ks: tok::kw_noexcept);
1250}
1251
1252static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1253 const FormatToken *FormatTok) {
1254 // FIXME: This returns true for C/C++ keywords like 'struct'.
1255 return FormatTok->is(Kind: tok::identifier) &&
1256 (!FormatTok->Tok.getIdentifierInfo() ||
1257 !FormatTok->isOneOf(
1258 K1: Keywords.kw_in, K2: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1259 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1260 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1261 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1262 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1263 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1264 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1265}
1266
1267static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1268 const FormatToken *FormatTok) {
1269 return FormatTok->Tok.isLiteral() ||
1270 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1271 mustBeJSIdent(Keywords, FormatTok);
1272}
1273
1274// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1275// when encountered after a value (see mustBeJSIdentOrValue).
1276static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1277 const FormatToken *FormatTok) {
1278 return FormatTok->isOneOf(
1279 K1: tok::kw_return, K2: Keywords.kw_yield,
1280 // conditionals
1281 Ks: tok::kw_if, Ks: tok::kw_else,
1282 // loops
1283 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1284 // switch/case
1285 Ks: tok::kw_switch, Ks: tok::kw_case,
1286 // exceptions
1287 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1288 // declaration
1289 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1290 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1291 // import/export
1292 Ks: Keywords.kw_import, Ks: tok::kw_export);
1293}
1294
1295// Checks whether a token is a type in K&R C (aka C78).
1296static bool isC78Type(const FormatToken &Tok) {
1297 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1298 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1299 Ks: tok::identifier);
1300}
1301
1302// This function checks whether a token starts the first parameter declaration
1303// in a K&R C (aka C78) function definition, e.g.:
1304// int f(a, b)
1305// short a, b;
1306// {
1307// return a + b;
1308// }
1309static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1310 const FormatToken *FuncName) {
1311 assert(Tok);
1312 assert(Next);
1313 assert(FuncName);
1314
1315 if (FuncName->isNot(Kind: tok::identifier))
1316 return false;
1317
1318 const FormatToken *Prev = FuncName->Previous;
1319 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1320 return false;
1321
1322 if (!isC78Type(Tok: *Tok) &&
1323 !Tok->isOneOf(K1: tok::kw_register, K2: tok::kw_struct, Ks: tok::kw_union)) {
1324 return false;
1325 }
1326
1327 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1328 return false;
1329
1330 Tok = Tok->Previous;
1331 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1332 return false;
1333
1334 Tok = Tok->Previous;
1335 if (!Tok || Tok->isNot(Kind: tok::identifier))
1336 return false;
1337
1338 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1339}
1340
1341bool UnwrappedLineParser::parseModuleImport() {
1342 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1343
1344 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1345 !Token->Tok.getIdentifierInfo() &&
1346 !Token->isOneOf(K1: tok::colon, K2: tok::less, Ks: tok::string_literal)) {
1347 return false;
1348 }
1349
1350 nextToken();
1351 while (!eof()) {
1352 if (FormatTok->is(Kind: tok::colon)) {
1353 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1354 }
1355 // Handle import <foo/bar.h> as we would an include statement.
1356 else if (FormatTok->is(Kind: tok::less)) {
1357 nextToken();
1358 while (!FormatTok->isOneOf(K1: tok::semi, K2: tok::greater) && !eof()) {
1359 // Mark tokens up to the trailing line comments as implicit string
1360 // literals.
1361 if (FormatTok->isNot(Kind: tok::comment) &&
1362 !FormatTok->TokenText.starts_with(Prefix: "//")) {
1363 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1364 }
1365 nextToken();
1366 }
1367 }
1368 if (FormatTok->is(Kind: tok::semi)) {
1369 nextToken();
1370 break;
1371 }
1372 nextToken();
1373 }
1374
1375 addUnwrappedLine();
1376 return true;
1377}
1378
1379// readTokenWithJavaScriptASI reads the next token and terminates the current
1380// line if JavaScript Automatic Semicolon Insertion must
1381// happen between the current token and the next token.
1382//
1383// This method is conservative - it cannot cover all edge cases of JavaScript,
1384// but only aims to correctly handle certain well known cases. It *must not*
1385// return true in speculative cases.
1386void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1387 FormatToken *Previous = FormatTok;
1388 readToken();
1389 FormatToken *Next = FormatTok;
1390
1391 bool IsOnSameLine =
1392 CommentsBeforeNextToken.empty()
1393 ? Next->NewlinesBefore == 0
1394 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1395 if (IsOnSameLine)
1396 return;
1397
1398 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1399 bool PreviousStartsTemplateExpr =
1400 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1401 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1402 // If the line contains an '@' sign, the previous token might be an
1403 // annotation, which can precede another identifier/value.
1404 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1405 return LineNode.Tok->is(Kind: tok::at);
1406 });
1407 if (HasAt)
1408 return;
1409 }
1410 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1411 return addUnwrappedLine();
1412 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1413 bool NextEndsTemplateExpr =
1414 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1415 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1416 (PreviousMustBeValue ||
1417 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1418 Ks: tok::minusminus))) {
1419 return addUnwrappedLine();
1420 }
1421 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1422 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1423 return addUnwrappedLine();
1424 }
1425}
1426
1427void UnwrappedLineParser::parseStructuralElement(
1428 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1429 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1430 if (Style.isTableGen() && FormatTok->is(Kind: tok::pp_include)) {
1431 nextToken();
1432 if (FormatTok->is(Kind: tok::string_literal))
1433 nextToken();
1434 addUnwrappedLine();
1435 return;
1436 }
1437
1438 if (IsCpp) {
1439 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1440 }
1441 } else if (Style.isVerilog()) {
1442 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1443 parseForOrWhileLoop(/*HasParens=*/false);
1444 return;
1445 }
1446 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1447 parseForOrWhileLoop();
1448 return;
1449 }
1450 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1451 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1452 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1453 return;
1454 }
1455
1456 // Skip things that can exist before keywords like 'if' and 'case'.
1457 while (true) {
1458 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1459 Ks: Keywords.kw_unique0)) {
1460 nextToken();
1461 } else if (FormatTok->is(Kind: tok::l_paren) &&
1462 Tokens->peekNextToken()->is(Kind: tok::star)) {
1463 parseParens();
1464 } else {
1465 break;
1466 }
1467 }
1468 }
1469
1470 // Tokens that only make sense at the beginning of a line.
1471 if (FormatTok->isAccessSpecifierKeyword()) {
1472 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1473 nextToken();
1474 else
1475 parseAccessSpecifier();
1476 return;
1477 }
1478 switch (FormatTok->Tok.getKind()) {
1479 case tok::kw_asm:
1480 nextToken();
1481 if (FormatTok->is(Kind: tok::l_brace)) {
1482 FormatTok->setFinalizedType(TT_InlineASMBrace);
1483 nextToken();
1484 while (FormatTok && !eof()) {
1485 if (FormatTok->is(Kind: tok::r_brace)) {
1486 FormatTok->setFinalizedType(TT_InlineASMBrace);
1487 nextToken();
1488 addUnwrappedLine();
1489 break;
1490 }
1491 FormatTok->Finalized = true;
1492 nextToken();
1493 }
1494 }
1495 break;
1496 case tok::kw_namespace:
1497 parseNamespace();
1498 return;
1499 case tok::kw_if: {
1500 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501 // field/method declaration.
1502 break;
1503 }
1504 FormatToken *Tok = parseIfThenElse(IfKind);
1505 if (IfLeftBrace)
1506 *IfLeftBrace = Tok;
1507 return;
1508 }
1509 case tok::kw_for:
1510 case tok::kw_while:
1511 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512 // field/method declaration.
1513 break;
1514 }
1515 parseForOrWhileLoop();
1516 return;
1517 case tok::kw_do:
1518 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1519 // field/method declaration.
1520 break;
1521 }
1522 parseDoWhile();
1523 if (HasDoWhile)
1524 *HasDoWhile = true;
1525 return;
1526 case tok::kw_switch:
1527 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1528 // 'switch: string' field declaration.
1529 break;
1530 }
1531 parseSwitch(/*IsExpr=*/false);
1532 return;
1533 case tok::kw_default: {
1534 // In Verilog default along with other labels are handled in the next loop.
1535 if (Style.isVerilog())
1536 break;
1537 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538 // 'default: string' field declaration.
1539 break;
1540 }
1541 auto *Default = FormatTok;
1542 nextToken();
1543 if (FormatTok->is(Kind: tok::colon)) {
1544 FormatTok->setFinalizedType(TT_CaseLabelColon);
1545 parseLabel();
1546 return;
1547 }
1548 if (FormatTok->is(Kind: tok::arrow)) {
1549 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1550 Default->setFinalizedType(TT_SwitchExpressionLabel);
1551 parseLabel();
1552 return;
1553 }
1554 // e.g. "default void f() {}" in a Java interface.
1555 break;
1556 }
1557 case tok::kw_case:
1558 // Proto: there are no switch/case statements.
1559 if (Style.Language == FormatStyle::LK_Proto) {
1560 nextToken();
1561 return;
1562 }
1563 if (Style.isVerilog()) {
1564 parseBlock();
1565 addUnwrappedLine();
1566 return;
1567 }
1568 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1569 // 'case: string' field declaration.
1570 nextToken();
1571 break;
1572 }
1573 parseCaseLabel();
1574 return;
1575 case tok::kw_goto:
1576 nextToken();
1577 if (FormatTok->is(Kind: tok::kw_case))
1578 nextToken();
1579 break;
1580 case tok::kw_try:
1581 case tok::kw___try:
1582 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1583 // field/method declaration.
1584 break;
1585 }
1586 parseTryCatch();
1587 return;
1588 case tok::kw_extern:
1589 nextToken();
1590 if (Style.isVerilog()) {
1591 // In Verilog and extern module declaration looks like a start of module.
1592 // But there is no body and endmodule. So we handle it separately.
1593 if (Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
1594 parseVerilogHierarchyHeader();
1595 return;
1596 }
1597 } else if (FormatTok->is(Kind: tok::string_literal)) {
1598 nextToken();
1599 if (FormatTok->is(Kind: tok::l_brace)) {
1600 if (Style.BraceWrapping.AfterExternBlock)
1601 addUnwrappedLine();
1602 // Either we indent or for backwards compatibility we follow the
1603 // AfterExternBlock style.
1604 unsigned AddLevels =
1605 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1606 (Style.BraceWrapping.AfterExternBlock &&
1607 Style.IndentExternBlock ==
1608 FormatStyle::IEBS_AfterExternBlock)
1609 ? 1u
1610 : 0u;
1611 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1612 addUnwrappedLine();
1613 return;
1614 }
1615 }
1616 break;
1617 case tok::kw_export:
1618 if (Style.isJavaScript()) {
1619 parseJavaScriptEs6ImportExport();
1620 return;
1621 }
1622 if (IsCpp) {
1623 nextToken();
1624 if (FormatTok->is(Kind: tok::kw_namespace)) {
1625 parseNamespace();
1626 return;
1627 }
1628 if (FormatTok->is(Kind: tok::l_brace)) {
1629 parseCppExportBlock();
1630 return;
1631 }
1632 if (FormatTok->is(II: Keywords.kw_import) && parseModuleImport())
1633 return;
1634 }
1635 break;
1636 case tok::kw_inline:
1637 nextToken();
1638 if (FormatTok->is(Kind: tok::kw_namespace)) {
1639 parseNamespace();
1640 return;
1641 }
1642 break;
1643 case tok::identifier:
1644 if (FormatTok->is(TT: TT_ForEachMacro)) {
1645 parseForOrWhileLoop();
1646 return;
1647 }
1648 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1649 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1650 /*MunchSemi=*/false);
1651 return;
1652 }
1653 if (FormatTok->is(II: Keywords.kw_import)) {
1654 if (Style.isJavaScript()) {
1655 parseJavaScriptEs6ImportExport();
1656 return;
1657 }
1658 if (Style.Language == FormatStyle::LK_Proto) {
1659 nextToken();
1660 if (FormatTok->is(Kind: tok::kw_public))
1661 nextToken();
1662 if (FormatTok->isNot(Kind: tok::string_literal))
1663 return;
1664 nextToken();
1665 if (FormatTok->is(Kind: tok::semi))
1666 nextToken();
1667 addUnwrappedLine();
1668 return;
1669 }
1670 if (IsCpp && parseModuleImport())
1671 return;
1672 }
1673 if (IsCpp && FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1674 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1675 nextToken();
1676 if (FormatTok->is(Kind: tok::colon)) {
1677 nextToken();
1678 addUnwrappedLine();
1679 return;
1680 }
1681 }
1682 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1683 parseStatementMacro();
1684 return;
1685 }
1686 if (IsCpp && FormatTok->is(TT: TT_NamespaceMacro)) {
1687 parseNamespace();
1688 return;
1689 }
1690 // In Verilog labels can be any expression, so we don't do them here.
1691 // JS doesn't have macros, and within classes colons indicate fields, not
1692 // labels.
1693 // TableGen doesn't have labels.
1694 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1695 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1696 nextToken();
1697 if (!Line->InMacroBody || CurrentLines->size() > 1)
1698 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1699 FormatTok->setFinalizedType(TT_GotoLabelColon);
1700 parseLabel(LeftAlignLabel: !Style.IndentGotoLabels);
1701 if (HasLabel)
1702 *HasLabel = true;
1703 return;
1704 }
1705 if (Style.isJava() && FormatTok->is(II: Keywords.kw_record)) {
1706 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1707 addUnwrappedLine();
1708 return;
1709 }
1710 // In all other cases, parse the declaration.
1711 break;
1712 default:
1713 break;
1714 }
1715
1716 bool SeenEqual = false;
1717 for (const bool InRequiresExpression =
1718 OpeningBrace && OpeningBrace->isOneOf(K1: TT_RequiresExpressionLBrace,
1719 K2: TT_CompoundRequirementLBrace);
1720 !eof();) {
1721 const FormatToken *Previous = FormatTok->Previous;
1722 switch (FormatTok->Tok.getKind()) {
1723 case tok::at:
1724 nextToken();
1725 if (FormatTok->is(Kind: tok::l_brace)) {
1726 nextToken();
1727 parseBracedList();
1728 break;
1729 }
1730 if (Style.isJava() && FormatTok->is(II: Keywords.kw_interface)) {
1731 nextToken();
1732 break;
1733 }
1734 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1735 case tok::objc_public:
1736 case tok::objc_protected:
1737 case tok::objc_package:
1738 case tok::objc_private:
1739 return parseAccessSpecifier();
1740 case tok::objc_interface:
1741 case tok::objc_implementation:
1742 return parseObjCInterfaceOrImplementation();
1743 case tok::objc_protocol:
1744 if (parseObjCProtocol())
1745 return;
1746 break;
1747 case tok::objc_end:
1748 return; // Handled by the caller.
1749 case tok::objc_optional:
1750 case tok::objc_required:
1751 nextToken();
1752 addUnwrappedLine();
1753 return;
1754 case tok::objc_autoreleasepool:
1755 IsAutoRelease = true;
1756 [[fallthrough]];
1757 case tok::objc_synchronized:
1758 nextToken();
1759 if (!IsAutoRelease && FormatTok->is(Kind: tok::l_paren)) {
1760 // Skip synchronization object
1761 parseParens();
1762 }
1763 if (FormatTok->is(Kind: tok::l_brace)) {
1764 if (Style.BraceWrapping.AfterControlStatement ==
1765 FormatStyle::BWACS_Always) {
1766 addUnwrappedLine();
1767 }
1768 parseBlock();
1769 }
1770 addUnwrappedLine();
1771 return;
1772 case tok::objc_try:
1773 // This branch isn't strictly necessary (the kw_try case below would
1774 // do this too after the tok::at is parsed above). But be explicit.
1775 parseTryCatch();
1776 return;
1777 default:
1778 break;
1779 }
1780 break;
1781 case tok::kw_requires: {
1782 if (IsCpp) {
1783 bool ParsedClause = parseRequires(SeenEqual);
1784 if (ParsedClause)
1785 return;
1786 } else {
1787 nextToken();
1788 }
1789 break;
1790 }
1791 case tok::kw_enum:
1792 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1793 // "template <..., enum ...>".
1794 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow, Ks: tok::comma)) {
1795 nextToken();
1796 break;
1797 }
1798
1799 // parseEnum falls through and does not yet add an unwrapped line as an
1800 // enum definition can start a structural element.
1801 if (!parseEnum())
1802 break;
1803 // This only applies to C++ and Verilog.
1804 if (!IsCpp && !Style.isVerilog()) {
1805 addUnwrappedLine();
1806 return;
1807 }
1808 break;
1809 case tok::kw_typedef:
1810 nextToken();
1811 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1812 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1813 Ks: Keywords.kw_CF_CLOSED_ENUM,
1814 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1815 parseEnum();
1816 }
1817 break;
1818 case tok::kw_class:
1819 if (Style.isVerilog()) {
1820 parseBlock();
1821 addUnwrappedLine();
1822 return;
1823 }
1824 if (Style.isTableGen()) {
1825 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1826 // This is same as def and so on.
1827 nextToken();
1828 break;
1829 }
1830 [[fallthrough]];
1831 case tok::kw_struct:
1832 case tok::kw_union:
1833 if (parseStructLike())
1834 return;
1835 break;
1836 case tok::kw_decltype:
1837 nextToken();
1838 if (FormatTok->is(Kind: tok::l_paren)) {
1839 parseParens();
1840 if (FormatTok->Previous &&
1841 FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1842 Tokens: tok::l_paren)) {
1843 Line->SeenDecltypeAuto = true;
1844 }
1845 }
1846 break;
1847 case tok::period:
1848 nextToken();
1849 // In Java, classes have an implicit static member "class".
1850 if (Style.isJava() && FormatTok && FormatTok->is(Kind: tok::kw_class))
1851 nextToken();
1852 if (Style.isJavaScript() && FormatTok &&
1853 FormatTok->Tok.getIdentifierInfo()) {
1854 // JavaScript only has pseudo keywords, all keywords are allowed to
1855 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1856 nextToken();
1857 }
1858 break;
1859 case tok::semi:
1860 nextToken();
1861 addUnwrappedLine();
1862 return;
1863 case tok::r_brace:
1864 addUnwrappedLine();
1865 return;
1866 case tok::l_paren: {
1867 parseParens();
1868 // Break the unwrapped line if a K&R C function definition has a parameter
1869 // declaration.
1870 if (OpeningBrace || !IsCpp || !Previous || eof())
1871 break;
1872 if (isC78ParameterDecl(Tok: FormatTok,
1873 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1874 FuncName: Previous)) {
1875 addUnwrappedLine();
1876 return;
1877 }
1878 break;
1879 }
1880 case tok::kw_operator:
1881 nextToken();
1882 if (FormatTok->isBinaryOperator())
1883 nextToken();
1884 break;
1885 case tok::caret: {
1886 const auto *Prev = FormatTok->getPreviousNonComment();
1887 nextToken();
1888 if (Prev && Prev->is(Kind: tok::identifier))
1889 break;
1890 // Block return type.
1891 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1892 nextToken();
1893 // Return types: pointers are ok too.
1894 while (FormatTok->is(Kind: tok::star))
1895 nextToken();
1896 }
1897 // Block argument list.
1898 if (FormatTok->is(Kind: tok::l_paren))
1899 parseParens();
1900 // Block body.
1901 if (FormatTok->is(Kind: tok::l_brace))
1902 parseChildBlock();
1903 break;
1904 }
1905 case tok::l_brace:
1906 if (InRequiresExpression)
1907 FormatTok->setFinalizedType(TT_BracedListLBrace);
1908 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1909 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1910 // A block outside of parentheses must be the last part of a
1911 // structural element.
1912 // FIXME: Figure out cases where this is not true, and add projections
1913 // for them (the one we know is missing are lambdas).
1914 if (Style.isJava() &&
1915 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
1916 // If necessary, we could set the type to something different than
1917 // TT_FunctionLBrace.
1918 if (Style.BraceWrapping.AfterControlStatement ==
1919 FormatStyle::BWACS_Always) {
1920 addUnwrappedLine();
1921 }
1922 } else if (Style.BraceWrapping.AfterFunction) {
1923 addUnwrappedLine();
1924 }
1925 if (!Previous || Previous->isNot(Kind: TT_TypeDeclarationParen))
1926 FormatTok->setFinalizedType(TT_FunctionLBrace);
1927 parseBlock();
1928 IsDecltypeAutoFunction = false;
1929 addUnwrappedLine();
1930 return;
1931 }
1932 // Otherwise this was a braced init list, and the structural
1933 // element continues.
1934 break;
1935 case tok::kw_try:
1936 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1937 // field/method declaration.
1938 nextToken();
1939 break;
1940 }
1941 // We arrive here when parsing function-try blocks.
1942 if (Style.BraceWrapping.AfterFunction)
1943 addUnwrappedLine();
1944 parseTryCatch();
1945 return;
1946 case tok::identifier: {
1947 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
1948 Line->MustBeDeclaration) {
1949 addUnwrappedLine();
1950 parseCSharpGenericTypeConstraint();
1951 break;
1952 }
1953 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
1954 addUnwrappedLine();
1955 return;
1956 }
1957
1958 // Function declarations (as opposed to function expressions) are parsed
1959 // on their own unwrapped line by continuing this loop. Function
1960 // expressions (functions that are not on their own line) must not create
1961 // a new unwrapped line, so they are special cased below.
1962 size_t TokenCount = Line->Tokens.size();
1963 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
1964 (TokenCount > 1 ||
1965 (TokenCount == 1 &&
1966 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
1967 tryToParseJSFunction();
1968 break;
1969 }
1970 if ((Style.isJavaScript() || Style.isJava()) &&
1971 FormatTok->is(II: Keywords.kw_interface)) {
1972 if (Style.isJavaScript()) {
1973 // In JavaScript/TypeScript, "interface" can be used as a standalone
1974 // identifier, e.g. in `var interface = 1;`. If "interface" is
1975 // followed by another identifier, it is very like to be an actual
1976 // interface declaration.
1977 unsigned StoredPosition = Tokens->getPosition();
1978 FormatToken *Next = Tokens->getNextToken();
1979 FormatTok = Tokens->setPosition(StoredPosition);
1980 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
1981 nextToken();
1982 break;
1983 }
1984 }
1985 parseRecord();
1986 addUnwrappedLine();
1987 return;
1988 }
1989
1990 if (Style.isVerilog()) {
1991 if (FormatTok->is(II: Keywords.kw_table)) {
1992 parseVerilogTable();
1993 return;
1994 }
1995 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
1996 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
1997 parseBlock();
1998 addUnwrappedLine();
1999 return;
2000 }
2001 }
2002
2003 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
2004 if (parseStructLike())
2005 return;
2006 break;
2007 }
2008
2009 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
2010 parseStatementMacro();
2011 return;
2012 }
2013
2014 // See if the following token should start a new unwrapped line.
2015 StringRef Text = FormatTok->TokenText;
2016
2017 FormatToken *PreviousToken = FormatTok;
2018 nextToken();
2019
2020 // JS doesn't have macros, and within classes colons indicate fields, not
2021 // labels.
2022 if (Style.isJavaScript())
2023 break;
2024
2025 auto OneTokenSoFar = [&]() {
2026 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2027 while (I != E && I->Tok->is(Kind: tok::comment))
2028 ++I;
2029 if (Style.isVerilog())
2030 while (I != E && I->Tok->is(Kind: tok::hash))
2031 ++I;
2032 return I != E && (++I == E);
2033 };
2034 if (OneTokenSoFar()) {
2035 // Recognize function-like macro usages without trailing semicolon as
2036 // well as free-standing macros like Q_OBJECT.
2037 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2038 if (FunctionLike)
2039 parseParens();
2040
2041 bool FollowedByNewline =
2042 CommentsBeforeNextToken.empty()
2043 ? FormatTok->NewlinesBefore > 0
2044 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2045
2046 if (FollowedByNewline &&
2047 (Text.size() >= 5 ||
2048 (FunctionLike && FormatTok->isNot(Kind: tok::l_paren))) &&
2049 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2050 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2051 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2052 addUnwrappedLine();
2053 return;
2054 }
2055 }
2056 break;
2057 }
2058 case tok::equal:
2059 if ((Style.isJavaScript() || Style.isCSharp()) &&
2060 FormatTok->is(TT: TT_FatArrow)) {
2061 tryToParseChildBlock();
2062 break;
2063 }
2064
2065 SeenEqual = true;
2066 nextToken();
2067 if (FormatTok->is(Kind: tok::l_brace)) {
2068 // Block kind should probably be set to BK_BracedInit for any language.
2069 // C# needs this change to ensure that array initialisers and object
2070 // initialisers are indented the same way.
2071 if (Style.isCSharp())
2072 FormatTok->setBlockKind(BK_BracedInit);
2073 // TableGen's defset statement has syntax of the form,
2074 // `defset <type> <name> = { <statement>... }`
2075 if (Style.isTableGen() &&
2076 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2077 FormatTok->setFinalizedType(TT_FunctionLBrace);
2078 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2079 /*MunchSemi=*/false);
2080 addUnwrappedLine();
2081 break;
2082 }
2083 nextToken();
2084 parseBracedList();
2085 } else if (Style.Language == FormatStyle::LK_Proto &&
2086 FormatTok->is(Kind: tok::less)) {
2087 nextToken();
2088 parseBracedList(/*IsAngleBracket=*/true);
2089 }
2090 break;
2091 case tok::l_square:
2092 parseSquare();
2093 break;
2094 case tok::kw_new:
2095 if (Style.isCSharp() &&
2096 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2097 (Previous && Previous->isAccessSpecifierKeyword()))) {
2098 nextToken();
2099 } else {
2100 parseNew();
2101 }
2102 break;
2103 case tok::kw_switch:
2104 if (Style.isJava())
2105 parseSwitch(/*IsExpr=*/true);
2106 else
2107 nextToken();
2108 break;
2109 case tok::kw_case:
2110 // Proto: there are no switch/case statements.
2111 if (Style.Language == FormatStyle::LK_Proto) {
2112 nextToken();
2113 return;
2114 }
2115 // In Verilog switch is called case.
2116 if (Style.isVerilog()) {
2117 parseBlock();
2118 addUnwrappedLine();
2119 return;
2120 }
2121 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2122 // 'case: string' field declaration.
2123 nextToken();
2124 break;
2125 }
2126 parseCaseLabel();
2127 break;
2128 case tok::kw_default:
2129 nextToken();
2130 if (Style.isVerilog()) {
2131 if (FormatTok->is(Kind: tok::colon)) {
2132 // The label will be handled in the next iteration.
2133 break;
2134 }
2135 if (FormatTok->is(II: Keywords.kw_clocking)) {
2136 // A default clocking block.
2137 parseBlock();
2138 addUnwrappedLine();
2139 return;
2140 }
2141 parseVerilogCaseLabel();
2142 return;
2143 }
2144 break;
2145 case tok::colon:
2146 nextToken();
2147 if (Style.isVerilog()) {
2148 parseVerilogCaseLabel();
2149 return;
2150 }
2151 break;
2152 case tok::greater:
2153 nextToken();
2154 if (FormatTok->is(Kind: tok::l_brace))
2155 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2156 break;
2157 default:
2158 nextToken();
2159 break;
2160 }
2161 }
2162}
2163
2164bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2165 assert(FormatTok->is(tok::l_brace));
2166 if (!Style.isCSharp())
2167 return false;
2168 // See if it's a property accessor.
2169 if (!FormatTok->Previous || FormatTok->Previous->isNot(Kind: tok::identifier))
2170 return false;
2171
2172 // See if we are inside a property accessor.
2173 //
2174 // Record the current tokenPosition so that we can advance and
2175 // reset the current token. `Next` is not set yet so we need
2176 // another way to advance along the token stream.
2177 unsigned int StoredPosition = Tokens->getPosition();
2178 FormatToken *Tok = Tokens->getNextToken();
2179
2180 // A trivial property accessor is of the form:
2181 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2182 // Track these as they do not require line breaks to be introduced.
2183 bool HasSpecialAccessor = false;
2184 bool IsTrivialPropertyAccessor = true;
2185 bool HasAttribute = false;
2186 while (!eof()) {
2187 if (const bool IsAccessorKeyword =
2188 Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set);
2189 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2190 Tok->isOneOf(K1: tok::l_square, K2: tok::semi, Ks: Keywords.kw_internal)) {
2191 if (IsAccessorKeyword)
2192 HasSpecialAccessor = true;
2193 else if (Tok->is(Kind: tok::l_square))
2194 HasAttribute = true;
2195 Tok = Tokens->getNextToken();
2196 continue;
2197 }
2198 if (Tok->isNot(Kind: tok::r_brace))
2199 IsTrivialPropertyAccessor = false;
2200 break;
2201 }
2202
2203 if (!HasSpecialAccessor || HasAttribute) {
2204 Tokens->setPosition(StoredPosition);
2205 return false;
2206 }
2207
2208 // Try to parse the property accessor:
2209 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2210 Tokens->setPosition(StoredPosition);
2211 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2212 addUnwrappedLine();
2213 nextToken();
2214 do {
2215 switch (FormatTok->Tok.getKind()) {
2216 case tok::r_brace:
2217 nextToken();
2218 if (FormatTok->is(Kind: tok::equal)) {
2219 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2220 nextToken();
2221 nextToken();
2222 }
2223 addUnwrappedLine();
2224 return true;
2225 case tok::l_brace:
2226 ++Line->Level;
2227 parseBlock(/*MustBeDeclaration=*/true);
2228 addUnwrappedLine();
2229 --Line->Level;
2230 break;
2231 case tok::equal:
2232 if (FormatTok->is(TT: TT_FatArrow)) {
2233 ++Line->Level;
2234 do {
2235 nextToken();
2236 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2237 nextToken();
2238 addUnwrappedLine();
2239 --Line->Level;
2240 break;
2241 }
2242 nextToken();
2243 break;
2244 default:
2245 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2246 Ks: Keywords.kw_set) &&
2247 !IsTrivialPropertyAccessor) {
2248 // Non-trivial get/set needs to be on its own line.
2249 addUnwrappedLine();
2250 }
2251 nextToken();
2252 }
2253 } while (!eof());
2254
2255 // Unreachable for well-formed code (paired '{' and '}').
2256 return true;
2257}
2258
2259bool UnwrappedLineParser::tryToParseLambda() {
2260 assert(FormatTok->is(tok::l_square));
2261 if (!IsCpp) {
2262 nextToken();
2263 return false;
2264 }
2265 FormatToken &LSquare = *FormatTok;
2266 if (!tryToParseLambdaIntroducer())
2267 return false;
2268
2269 bool SeenArrow = false;
2270 bool InTemplateParameterList = false;
2271
2272 while (FormatTok->isNot(Kind: tok::l_brace)) {
2273 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2274 nextToken();
2275 continue;
2276 }
2277 switch (FormatTok->Tok.getKind()) {
2278 case tok::l_brace:
2279 break;
2280 case tok::l_paren:
2281 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2282 break;
2283 case tok::l_square:
2284 parseSquare();
2285 break;
2286 case tok::less:
2287 assert(FormatTok->Previous);
2288 if (FormatTok->Previous->is(Kind: tok::r_square))
2289 InTemplateParameterList = true;
2290 nextToken();
2291 break;
2292 case tok::kw_auto:
2293 case tok::kw_class:
2294 case tok::kw_struct:
2295 case tok::kw_union:
2296 case tok::kw_template:
2297 case tok::kw_typename:
2298 case tok::amp:
2299 case tok::star:
2300 case tok::kw_const:
2301 case tok::kw_constexpr:
2302 case tok::kw_consteval:
2303 case tok::comma:
2304 case tok::greater:
2305 case tok::identifier:
2306 case tok::numeric_constant:
2307 case tok::coloncolon:
2308 case tok::kw_mutable:
2309 case tok::kw_noexcept:
2310 case tok::kw_static:
2311 nextToken();
2312 break;
2313 // Specialization of a template with an integer parameter can contain
2314 // arithmetic, logical, comparison and ternary operators.
2315 //
2316 // FIXME: This also accepts sequences of operators that are not in the scope
2317 // of a template argument list.
2318 //
2319 // In a C++ lambda a template type can only occur after an arrow. We use
2320 // this as an heuristic to distinguish between Objective-C expressions
2321 // followed by an `a->b` expression, such as:
2322 // ([obj func:arg] + a->b)
2323 // Otherwise the code below would parse as a lambda.
2324 case tok::plus:
2325 case tok::minus:
2326 case tok::exclaim:
2327 case tok::tilde:
2328 case tok::slash:
2329 case tok::percent:
2330 case tok::lessless:
2331 case tok::pipe:
2332 case tok::pipepipe:
2333 case tok::ampamp:
2334 case tok::caret:
2335 case tok::equalequal:
2336 case tok::exclaimequal:
2337 case tok::greaterequal:
2338 case tok::lessequal:
2339 case tok::question:
2340 case tok::colon:
2341 case tok::ellipsis:
2342 case tok::kw_true:
2343 case tok::kw_false:
2344 if (SeenArrow || InTemplateParameterList) {
2345 nextToken();
2346 break;
2347 }
2348 return true;
2349 case tok::arrow:
2350 // This might or might not actually be a lambda arrow (this could be an
2351 // ObjC method invocation followed by a dereferencing arrow). We might
2352 // reset this back to TT_Unknown in TokenAnnotator.
2353 FormatTok->setFinalizedType(TT_LambdaArrow);
2354 SeenArrow = true;
2355 nextToken();
2356 break;
2357 case tok::kw_requires: {
2358 auto *RequiresToken = FormatTok;
2359 nextToken();
2360 parseRequiresClause(RequiresToken);
2361 break;
2362 }
2363 case tok::equal:
2364 if (!InTemplateParameterList)
2365 return true;
2366 nextToken();
2367 break;
2368 default:
2369 return true;
2370 }
2371 }
2372
2373 FormatTok->setFinalizedType(TT_LambdaLBrace);
2374 LSquare.setFinalizedType(TT_LambdaLSquare);
2375
2376 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2377 parseChildBlock();
2378 assert(!NestedLambdas.empty());
2379 NestedLambdas.pop_back();
2380
2381 return true;
2382}
2383
2384bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2385 const FormatToken *Previous = FormatTok->Previous;
2386 const FormatToken *LeftSquare = FormatTok;
2387 nextToken();
2388 if (Previous) {
2389 if (Previous->Tok.getIdentifierInfo() &&
2390 !Previous->isOneOf(K1: tok::kw_return, K2: tok::kw_co_await, Ks: tok::kw_co_yield,
2391 Ks: tok::kw_co_return)) {
2392 return false;
2393 }
2394 if (Previous->closesScope()) {
2395 // Not a potential C-style cast.
2396 if (Previous->isNot(Kind: tok::r_paren))
2397 return false;
2398 const auto *BeforeRParen = Previous->getPreviousNonComment();
2399 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2400 // and `int (*)()`.
2401 if (!BeforeRParen || !BeforeRParen->isOneOf(K1: tok::greater, K2: tok::r_paren))
2402 return false;
2403 }
2404 }
2405 if (LeftSquare->isCppStructuredBinding(IsCpp))
2406 return false;
2407 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2408 return false;
2409 if (FormatTok->is(Kind: tok::r_square)) {
2410 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2411 if (Next->is(Kind: tok::greater))
2412 return false;
2413 }
2414 parseSquare(/*LambdaIntroducer=*/true);
2415 return true;
2416}
2417
2418void UnwrappedLineParser::tryToParseJSFunction() {
2419 assert(FormatTok->is(Keywords.kw_function));
2420 if (FormatTok->is(II: Keywords.kw_async))
2421 nextToken();
2422 // Consume "function".
2423 nextToken();
2424
2425 // Consume * (generator function). Treat it like C++'s overloaded operators.
2426 if (FormatTok->is(Kind: tok::star)) {
2427 FormatTok->setFinalizedType(TT_OverloadedOperator);
2428 nextToken();
2429 }
2430
2431 // Consume function name.
2432 if (FormatTok->is(Kind: tok::identifier))
2433 nextToken();
2434
2435 if (FormatTok->isNot(Kind: tok::l_paren))
2436 return;
2437
2438 // Parse formal parameter list.
2439 parseParens();
2440
2441 if (FormatTok->is(Kind: tok::colon)) {
2442 // Parse a type definition.
2443 nextToken();
2444
2445 // Eat the type declaration. For braced inline object types, balance braces,
2446 // otherwise just parse until finding an l_brace for the function body.
2447 if (FormatTok->is(Kind: tok::l_brace))
2448 tryToParseBracedList();
2449 else
2450 while (!FormatTok->isOneOf(K1: tok::l_brace, K2: tok::semi) && !eof())
2451 nextToken();
2452 }
2453
2454 if (FormatTok->is(Kind: tok::semi))
2455 return;
2456
2457 parseChildBlock();
2458}
2459
2460bool UnwrappedLineParser::tryToParseBracedList() {
2461 if (FormatTok->is(BBK: BK_Unknown))
2462 calculateBraceTypes();
2463 assert(FormatTok->isNot(BK_Unknown));
2464 if (FormatTok->is(BBK: BK_Block))
2465 return false;
2466 nextToken();
2467 parseBracedList();
2468 return true;
2469}
2470
2471bool UnwrappedLineParser::tryToParseChildBlock() {
2472 assert(Style.isJavaScript() || Style.isCSharp());
2473 assert(FormatTok->is(TT_FatArrow));
2474 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2475 // They always start an expression or a child block if followed by a curly
2476 // brace.
2477 nextToken();
2478 if (FormatTok->isNot(Kind: tok::l_brace))
2479 return false;
2480 parseChildBlock();
2481 return true;
2482}
2483
2484bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2485 assert(!IsAngleBracket || !IsEnum);
2486 bool HasError = false;
2487
2488 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2489 // replace this by using parseAssignmentExpression() inside.
2490 do {
2491 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2492 tryToParseChildBlock()) {
2493 continue;
2494 }
2495 if (Style.isJavaScript()) {
2496 if (FormatTok->is(II: Keywords.kw_function)) {
2497 tryToParseJSFunction();
2498 continue;
2499 }
2500 if (FormatTok->is(Kind: tok::l_brace)) {
2501 // Could be a method inside of a braced list `{a() { return 1; }}`.
2502 if (tryToParseBracedList())
2503 continue;
2504 parseChildBlock();
2505 }
2506 }
2507 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2508 if (IsEnum) {
2509 FormatTok->setBlockKind(BK_Block);
2510 if (!Style.AllowShortEnumsOnASingleLine)
2511 addUnwrappedLine();
2512 }
2513 nextToken();
2514 return !HasError;
2515 }
2516 switch (FormatTok->Tok.getKind()) {
2517 case tok::l_square:
2518 if (Style.isCSharp())
2519 parseSquare();
2520 else
2521 tryToParseLambda();
2522 break;
2523 case tok::l_paren:
2524 parseParens();
2525 // JavaScript can just have free standing methods and getters/setters in
2526 // object literals. Detect them by a "{" following ")".
2527 if (Style.isJavaScript()) {
2528 if (FormatTok->is(Kind: tok::l_brace))
2529 parseChildBlock();
2530 break;
2531 }
2532 break;
2533 case tok::l_brace:
2534 // Assume there are no blocks inside a braced init list apart
2535 // from the ones we explicitly parse out (like lambdas).
2536 FormatTok->setBlockKind(BK_BracedInit);
2537 if (!IsAngleBracket) {
2538 auto *Prev = FormatTok->Previous;
2539 if (Prev && Prev->is(Kind: tok::greater))
2540 Prev->setFinalizedType(TT_TemplateCloser);
2541 }
2542 nextToken();
2543 parseBracedList();
2544 break;
2545 case tok::less:
2546 nextToken();
2547 if (IsAngleBracket)
2548 parseBracedList(/*IsAngleBracket=*/true);
2549 break;
2550 case tok::semi:
2551 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2552 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2553 // used for error recovery if we have otherwise determined that this is
2554 // a braced list.
2555 if (Style.isJavaScript()) {
2556 nextToken();
2557 break;
2558 }
2559 HasError = true;
2560 if (!IsEnum)
2561 return false;
2562 nextToken();
2563 break;
2564 case tok::comma:
2565 nextToken();
2566 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2567 addUnwrappedLine();
2568 break;
2569 default:
2570 nextToken();
2571 break;
2572 }
2573 } while (!eof());
2574 return false;
2575}
2576
2577/// \brief Parses a pair of parentheses (and everything between them).
2578/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2579/// double ampersands. This applies for all nested scopes as well.
2580///
2581/// Returns whether there is a `=` token between the parentheses.
2582bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2583 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2584 auto *LParen = FormatTok;
2585 bool SeenComma = false;
2586 bool SeenEqual = false;
2587 bool MightBeFoldExpr = false;
2588 nextToken();
2589 const bool MightBeStmtExpr = FormatTok->is(Kind: tok::l_brace);
2590 do {
2591 switch (FormatTok->Tok.getKind()) {
2592 case tok::l_paren:
2593 if (parseParens(AmpAmpTokenType))
2594 SeenEqual = true;
2595 if (Style.isJava() && FormatTok->is(Kind: tok::l_brace))
2596 parseChildBlock();
2597 break;
2598 case tok::r_paren: {
2599 auto *Prev = LParen->Previous;
2600 auto *RParen = FormatTok;
2601 nextToken();
2602 if (Prev) {
2603 auto OptionalParens = [&] {
2604 if (MightBeStmtExpr || MightBeFoldExpr || Line->InMacroBody ||
2605 SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2606 RParen->getPreviousNonComment() == LParen) {
2607 return false;
2608 }
2609 const bool DoubleParens =
2610 Prev->is(Kind: tok::l_paren) && FormatTok->is(Kind: tok::r_paren);
2611 if (DoubleParens) {
2612 const auto *PrevPrev = Prev->getPreviousNonComment();
2613 const bool Excluded =
2614 PrevPrev &&
2615 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2616 (SeenEqual &&
2617 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2618 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2619 if (!Excluded)
2620 return true;
2621 } else {
2622 const bool CommaSeparated =
2623 Prev->isOneOf(K1: tok::l_paren, K2: tok::comma) &&
2624 FormatTok->isOneOf(K1: tok::comma, K2: tok::r_paren);
2625 if (CommaSeparated &&
2626 // LParen is not preceded by ellipsis, comma.
2627 !Prev->endsSequence(K1: tok::comma, Tokens: tok::ellipsis) &&
2628 // RParen is not followed by comma, ellipsis.
2629 !(FormatTok->is(Kind: tok::comma) &&
2630 Tokens->peekNextToken()->is(Kind: tok::ellipsis))) {
2631 return true;
2632 }
2633 const bool ReturnParens =
2634 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2635 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2636 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2637 Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) &&
2638 FormatTok->is(Kind: tok::semi);
2639 if (ReturnParens)
2640 return true;
2641 }
2642 return false;
2643 };
2644 if (Prev->is(TT: TT_TypenameMacro)) {
2645 LParen->setFinalizedType(TT_TypeDeclarationParen);
2646 RParen->setFinalizedType(TT_TypeDeclarationParen);
2647 } else if (Prev->is(Kind: tok::greater) && RParen->Previous == LParen) {
2648 Prev->setFinalizedType(TT_TemplateCloser);
2649 } else if (OptionalParens()) {
2650 LParen->Optional = true;
2651 RParen->Optional = true;
2652 }
2653 }
2654 return SeenEqual;
2655 }
2656 case tok::r_brace:
2657 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2658 return SeenEqual;
2659 case tok::l_square:
2660 tryToParseLambda();
2661 break;
2662 case tok::l_brace:
2663 if (!tryToParseBracedList())
2664 parseChildBlock();
2665 break;
2666 case tok::at:
2667 nextToken();
2668 if (FormatTok->is(Kind: tok::l_brace)) {
2669 nextToken();
2670 parseBracedList();
2671 }
2672 break;
2673 case tok::comma:
2674 SeenComma = true;
2675 nextToken();
2676 break;
2677 case tok::ellipsis:
2678 MightBeFoldExpr = true;
2679 nextToken();
2680 break;
2681 case tok::equal:
2682 SeenEqual = true;
2683 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2684 tryToParseChildBlock();
2685 else
2686 nextToken();
2687 break;
2688 case tok::kw_class:
2689 if (Style.isJavaScript())
2690 parseRecord(/*ParseAsExpr=*/true);
2691 else
2692 nextToken();
2693 break;
2694 case tok::identifier:
2695 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2696 tryToParseJSFunction();
2697 else
2698 nextToken();
2699 break;
2700 case tok::kw_switch:
2701 if (Style.isJava())
2702 parseSwitch(/*IsExpr=*/true);
2703 else
2704 nextToken();
2705 break;
2706 case tok::kw_requires: {
2707 auto RequiresToken = FormatTok;
2708 nextToken();
2709 parseRequiresExpression(RequiresToken);
2710 break;
2711 }
2712 case tok::ampamp:
2713 if (AmpAmpTokenType != TT_Unknown)
2714 FormatTok->setFinalizedType(AmpAmpTokenType);
2715 [[fallthrough]];
2716 default:
2717 nextToken();
2718 break;
2719 }
2720 } while (!eof());
2721 return SeenEqual;
2722}
2723
2724void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2725 if (!LambdaIntroducer) {
2726 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2727 if (tryToParseLambda())
2728 return;
2729 }
2730 do {
2731 switch (FormatTok->Tok.getKind()) {
2732 case tok::l_paren:
2733 parseParens();
2734 break;
2735 case tok::r_square:
2736 nextToken();
2737 return;
2738 case tok::r_brace:
2739 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2740 return;
2741 case tok::l_square:
2742 parseSquare();
2743 break;
2744 case tok::l_brace: {
2745 if (!tryToParseBracedList())
2746 parseChildBlock();
2747 break;
2748 }
2749 case tok::at:
2750 case tok::colon:
2751 nextToken();
2752 if (FormatTok->is(Kind: tok::l_brace)) {
2753 nextToken();
2754 parseBracedList();
2755 }
2756 break;
2757 default:
2758 nextToken();
2759 break;
2760 }
2761 } while (!eof());
2762}
2763
2764void UnwrappedLineParser::keepAncestorBraces() {
2765 if (!Style.RemoveBracesLLVM)
2766 return;
2767
2768 const int MaxNestingLevels = 2;
2769 const int Size = NestedTooDeep.size();
2770 if (Size >= MaxNestingLevels)
2771 NestedTooDeep[Size - MaxNestingLevels] = true;
2772 NestedTooDeep.push_back(Elt: false);
2773}
2774
2775static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2776 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2777 if (Token.Tok->isNot(Kind: tok::comment))
2778 return Token.Tok;
2779
2780 return nullptr;
2781}
2782
2783void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2784 FormatToken *Tok = nullptr;
2785
2786 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2787 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2788 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2789 ? getLastNonComment(Line: *Line)
2790 : Line->Tokens.back().Tok;
2791 assert(Tok);
2792 if (Tok->BraceCount < 0) {
2793 assert(Tok->BraceCount == -1);
2794 Tok = nullptr;
2795 } else {
2796 Tok->BraceCount = -1;
2797 }
2798 }
2799
2800 addUnwrappedLine();
2801 ++Line->Level;
2802 ++Line->UnbracedBodyLevel;
2803 parseStructuralElement();
2804 --Line->UnbracedBodyLevel;
2805
2806 if (Tok) {
2807 assert(!Line->InPPDirective);
2808 Tok = nullptr;
2809 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2810 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2811 Tok = L.Tokens.back().Tok;
2812 break;
2813 }
2814 }
2815 assert(Tok);
2816 ++Tok->BraceCount;
2817 }
2818
2819 if (CheckEOF && eof())
2820 addUnwrappedLine();
2821
2822 --Line->Level;
2823}
2824
2825static void markOptionalBraces(FormatToken *LeftBrace) {
2826 if (!LeftBrace)
2827 return;
2828
2829 assert(LeftBrace->is(tok::l_brace));
2830
2831 FormatToken *RightBrace = LeftBrace->MatchingParen;
2832 if (!RightBrace) {
2833 assert(!LeftBrace->Optional);
2834 return;
2835 }
2836
2837 assert(RightBrace->is(tok::r_brace));
2838 assert(RightBrace->MatchingParen == LeftBrace);
2839 assert(LeftBrace->Optional == RightBrace->Optional);
2840
2841 LeftBrace->Optional = true;
2842 RightBrace->Optional = true;
2843}
2844
2845void UnwrappedLineParser::handleAttributes() {
2846 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2847 if (FormatTok->isAttribute())
2848 nextToken();
2849 else if (FormatTok->is(Kind: tok::l_square))
2850 handleCppAttributes();
2851}
2852
2853bool UnwrappedLineParser::handleCppAttributes() {
2854 // Handle [[likely]] / [[unlikely]] attributes.
2855 assert(FormatTok->is(tok::l_square));
2856 if (!tryToParseSimpleAttribute())
2857 return false;
2858 parseSquare();
2859 return true;
2860}
2861
2862/// Returns whether \c Tok begins a block.
2863bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2864 // FIXME: rename the function or make
2865 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2866 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2867 : Tok.is(Kind: tok::l_brace);
2868}
2869
2870FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2871 bool KeepBraces,
2872 bool IsVerilogAssert) {
2873 assert((FormatTok->is(tok::kw_if) ||
2874 (Style.isVerilog() &&
2875 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2876 Keywords.kw_assume, Keywords.kw_cover))) &&
2877 "'if' expected");
2878 nextToken();
2879
2880 if (IsVerilogAssert) {
2881 // Handle `assert #0` and `assert final`.
2882 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2883 nextToken();
2884 if (FormatTok->is(Kind: tok::numeric_constant))
2885 nextToken();
2886 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2887 Ks: Keywords.kw_sequence)) {
2888 nextToken();
2889 }
2890 }
2891
2892 // TableGen's if statement has the form of `if <cond> then { ... }`.
2893 if (Style.isTableGen()) {
2894 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2895 // Simply skip until then. This range only contains a value.
2896 nextToken();
2897 }
2898 }
2899
2900 // Handle `if !consteval`.
2901 if (FormatTok->is(Kind: tok::exclaim))
2902 nextToken();
2903
2904 bool KeepIfBraces = true;
2905 if (FormatTok->is(Kind: tok::kw_consteval)) {
2906 nextToken();
2907 } else {
2908 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2909 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
2910 nextToken();
2911 if (FormatTok->is(Kind: tok::l_paren)) {
2912 FormatTok->setFinalizedType(TT_ConditionLParen);
2913 parseParens();
2914 }
2915 }
2916 handleAttributes();
2917 // The then action is optional in Verilog assert statements.
2918 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
2919 nextToken();
2920 addUnwrappedLine();
2921 return nullptr;
2922 }
2923
2924 bool NeedsUnwrappedLine = false;
2925 keepAncestorBraces();
2926
2927 FormatToken *IfLeftBrace = nullptr;
2928 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2929
2930 if (isBlockBegin(Tok: *FormatTok)) {
2931 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2932 IfLeftBrace = FormatTok;
2933 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2934 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2935 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
2936 setPreviousRBraceType(TT_ControlStatementRBrace);
2937 if (Style.BraceWrapping.BeforeElse)
2938 addUnwrappedLine();
2939 else
2940 NeedsUnwrappedLine = true;
2941 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
2942 addUnwrappedLine();
2943 } else {
2944 parseUnbracedBody();
2945 }
2946
2947 if (Style.RemoveBracesLLVM) {
2948 assert(!NestedTooDeep.empty());
2949 KeepIfBraces = KeepIfBraces ||
2950 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2951 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2952 IfBlockKind == IfStmtKind::IfElseIf;
2953 }
2954
2955 bool KeepElseBraces = KeepIfBraces;
2956 FormatToken *ElseLeftBrace = nullptr;
2957 IfStmtKind Kind = IfStmtKind::IfOnly;
2958
2959 if (FormatTok->is(Kind: tok::kw_else)) {
2960 if (Style.RemoveBracesLLVM) {
2961 NestedTooDeep.back() = false;
2962 Kind = IfStmtKind::IfElse;
2963 }
2964 nextToken();
2965 handleAttributes();
2966 if (isBlockBegin(Tok: *FormatTok)) {
2967 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
2968 FormatTok->setFinalizedType(TT_ElseLBrace);
2969 ElseLeftBrace = FormatTok;
2970 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2971 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2972 FormatToken *IfLBrace =
2973 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2974 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
2975 setPreviousRBraceType(TT_ElseRBrace);
2976 if (FormatTok->is(Kind: tok::kw_else)) {
2977 KeepElseBraces = KeepElseBraces ||
2978 ElseBlockKind == IfStmtKind::IfOnly ||
2979 ElseBlockKind == IfStmtKind::IfElseIf;
2980 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2981 KeepElseBraces = true;
2982 assert(ElseLeftBrace->MatchingParen);
2983 markOptionalBraces(LeftBrace: ElseLeftBrace);
2984 }
2985 addUnwrappedLine();
2986 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
2987 const FormatToken *Previous = Tokens->getPreviousToken();
2988 assert(Previous);
2989 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
2990 if (IsPrecededByComment) {
2991 addUnwrappedLine();
2992 ++Line->Level;
2993 }
2994 bool TooDeep = true;
2995 if (Style.RemoveBracesLLVM) {
2996 Kind = IfStmtKind::IfElseIf;
2997 TooDeep = NestedTooDeep.pop_back_val();
2998 }
2999 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
3000 if (Style.RemoveBracesLLVM)
3001 NestedTooDeep.push_back(Elt: TooDeep);
3002 if (IsPrecededByComment)
3003 --Line->Level;
3004 } else {
3005 parseUnbracedBody(/*CheckEOF=*/true);
3006 }
3007 } else {
3008 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3009 if (NeedsUnwrappedLine)
3010 addUnwrappedLine();
3011 }
3012
3013 if (!Style.RemoveBracesLLVM)
3014 return nullptr;
3015
3016 assert(!NestedTooDeep.empty());
3017 KeepElseBraces = KeepElseBraces ||
3018 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3019 NestedTooDeep.back();
3020
3021 NestedTooDeep.pop_back();
3022
3023 if (!KeepIfBraces && !KeepElseBraces) {
3024 markOptionalBraces(LeftBrace: IfLeftBrace);
3025 markOptionalBraces(LeftBrace: ElseLeftBrace);
3026 } else if (IfLeftBrace) {
3027 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3028 if (IfRightBrace) {
3029 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3030 assert(!IfLeftBrace->Optional);
3031 assert(!IfRightBrace->Optional);
3032 IfLeftBrace->MatchingParen = nullptr;
3033 IfRightBrace->MatchingParen = nullptr;
3034 }
3035 }
3036
3037 if (IfKind)
3038 *IfKind = Kind;
3039
3040 return IfLeftBrace;
3041}
3042
3043void UnwrappedLineParser::parseTryCatch() {
3044 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3045 nextToken();
3046 bool NeedsUnwrappedLine = false;
3047 bool HasCtorInitializer = false;
3048 if (FormatTok->is(Kind: tok::colon)) {
3049 auto *Colon = FormatTok;
3050 // We are in a function try block, what comes is an initializer list.
3051 nextToken();
3052 if (FormatTok->is(Kind: tok::identifier)) {
3053 HasCtorInitializer = true;
3054 Colon->setFinalizedType(TT_CtorInitializerColon);
3055 }
3056
3057 // In case identifiers were removed by clang-tidy, what might follow is
3058 // multiple commas in sequence - before the first identifier.
3059 while (FormatTok->is(Kind: tok::comma))
3060 nextToken();
3061
3062 while (FormatTok->is(Kind: tok::identifier)) {
3063 nextToken();
3064 if (FormatTok->is(Kind: tok::l_paren)) {
3065 parseParens();
3066 } else if (FormatTok->is(Kind: tok::l_brace)) {
3067 nextToken();
3068 parseBracedList();
3069 }
3070
3071 // In case identifiers were removed by clang-tidy, what might follow is
3072 // multiple commas in sequence - after the first identifier.
3073 while (FormatTok->is(Kind: tok::comma))
3074 nextToken();
3075 }
3076 }
3077 // Parse try with resource.
3078 if (Style.isJava() && FormatTok->is(Kind: tok::l_paren))
3079 parseParens();
3080
3081 keepAncestorBraces();
3082
3083 if (FormatTok->is(Kind: tok::l_brace)) {
3084 if (HasCtorInitializer)
3085 FormatTok->setFinalizedType(TT_FunctionLBrace);
3086 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3087 parseBlock();
3088 if (Style.BraceWrapping.BeforeCatch)
3089 addUnwrappedLine();
3090 else
3091 NeedsUnwrappedLine = true;
3092 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
3093 // The C++ standard requires a compound-statement after a try.
3094 // If there's none, we try to assume there's a structuralElement
3095 // and try to continue.
3096 addUnwrappedLine();
3097 ++Line->Level;
3098 parseStructuralElement();
3099 --Line->Level;
3100 }
3101 for (bool SeenCatch = false;;) {
3102 if (FormatTok->is(Kind: tok::at))
3103 nextToken();
3104 if (!(FormatTok->isOneOf(K1: tok::kw_catch, K2: Keywords.kw___except,
3105 Ks: tok::kw___finally, Ks: tok::objc_catch,
3106 Ks: tok::objc_finally) ||
3107 ((Style.isJava() || Style.isJavaScript()) &&
3108 FormatTok->is(II: Keywords.kw_finally)))) {
3109 break;
3110 }
3111 if (FormatTok->is(Kind: tok::kw_catch))
3112 SeenCatch = true;
3113 nextToken();
3114 while (FormatTok->isNot(Kind: tok::l_brace)) {
3115 if (FormatTok->is(Kind: tok::l_paren)) {
3116 parseParens();
3117 continue;
3118 }
3119 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace) || eof()) {
3120 if (Style.RemoveBracesLLVM)
3121 NestedTooDeep.pop_back();
3122 return;
3123 }
3124 nextToken();
3125 }
3126 if (SeenCatch) {
3127 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3128 SeenCatch = false;
3129 }
3130 NeedsUnwrappedLine = false;
3131 Line->MustBeDeclaration = false;
3132 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3133 parseBlock();
3134 if (Style.BraceWrapping.BeforeCatch)
3135 addUnwrappedLine();
3136 else
3137 NeedsUnwrappedLine = true;
3138 }
3139
3140 if (Style.RemoveBracesLLVM)
3141 NestedTooDeep.pop_back();
3142
3143 if (NeedsUnwrappedLine)
3144 addUnwrappedLine();
3145}
3146
3147void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3148 bool ManageWhitesmithsBraces =
3149 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3150
3151 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3152 // the whole block.
3153 if (ManageWhitesmithsBraces)
3154 ++Line->Level;
3155
3156 // Munch the semicolon after the block. This is more common than one would
3157 // think. Putting the semicolon into its own line is very ugly.
3158 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3159 /*KeepBraces=*/true, /*IfKind=*/nullptr, UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3160
3161 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3162
3163 if (ManageWhitesmithsBraces)
3164 --Line->Level;
3165}
3166
3167void UnwrappedLineParser::parseNamespace() {
3168 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3169 "'namespace' expected");
3170
3171 const FormatToken &InitialToken = *FormatTok;
3172 nextToken();
3173 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3174 parseParens();
3175 } else {
3176 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3177 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3178 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3179 if (FormatTok->is(Kind: tok::l_square))
3180 parseSquare();
3181 else if (FormatTok->is(Kind: tok::l_paren))
3182 parseParens();
3183 else
3184 nextToken();
3185 }
3186 }
3187 if (FormatTok->is(Kind: tok::l_brace)) {
3188 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3189
3190 if (ShouldBreakBeforeBrace(Style, InitialToken))
3191 addUnwrappedLine();
3192
3193 unsigned AddLevels =
3194 Style.NamespaceIndentation == FormatStyle::NI_All ||
3195 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3196 DeclarationScopeStack.size() > 1)
3197 ? 1u
3198 : 0u;
3199 parseNamespaceOrExportBlock(AddLevels);
3200 }
3201 // FIXME: Add error handling.
3202}
3203
3204void UnwrappedLineParser::parseCppExportBlock() {
3205 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3206}
3207
3208void UnwrappedLineParser::parseNew() {
3209 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3210 nextToken();
3211
3212 if (Style.isCSharp()) {
3213 do {
3214 // Handle constructor invocation, e.g. `new(field: value)`.
3215 if (FormatTok->is(Kind: tok::l_paren))
3216 parseParens();
3217
3218 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3219 if (FormatTok->is(Kind: tok::l_brace))
3220 parseBracedList();
3221
3222 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3223 return;
3224
3225 nextToken();
3226 } while (!eof());
3227 }
3228
3229 if (!Style.isJava())
3230 return;
3231
3232 // In Java, we can parse everything up to the parens, which aren't optional.
3233 do {
3234 // There should not be a ;, { or } before the new's open paren.
3235 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3236 return;
3237
3238 // Consume the parens.
3239 if (FormatTok->is(Kind: tok::l_paren)) {
3240 parseParens();
3241
3242 // If there is a class body of an anonymous class, consume that as child.
3243 if (FormatTok->is(Kind: tok::l_brace))
3244 parseChildBlock();
3245 return;
3246 }
3247 nextToken();
3248 } while (!eof());
3249}
3250
3251void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3252 keepAncestorBraces();
3253
3254 if (isBlockBegin(Tok: *FormatTok)) {
3255 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3256 FormatToken *LeftBrace = FormatTok;
3257 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3258 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3259 /*MunchSemi=*/true, KeepBraces);
3260 setPreviousRBraceType(TT_ControlStatementRBrace);
3261 if (!KeepBraces) {
3262 assert(!NestedTooDeep.empty());
3263 if (!NestedTooDeep.back())
3264 markOptionalBraces(LeftBrace);
3265 }
3266 if (WrapRightBrace)
3267 addUnwrappedLine();
3268 } else {
3269 parseUnbracedBody();
3270 }
3271
3272 if (!KeepBraces)
3273 NestedTooDeep.pop_back();
3274}
3275
3276void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3277 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3278 (Style.isVerilog() &&
3279 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3280 Keywords.kw_always_ff, Keywords.kw_always_latch,
3281 Keywords.kw_final, Keywords.kw_initial,
3282 Keywords.kw_foreach, Keywords.kw_forever,
3283 Keywords.kw_repeat))) &&
3284 "'for', 'while' or foreach macro expected");
3285 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3286 !FormatTok->isOneOf(K1: tok::kw_for, K2: tok::kw_while);
3287
3288 nextToken();
3289 // JS' for await ( ...
3290 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3291 nextToken();
3292 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3293 nextToken();
3294 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3295 // The type is only set for Verilog basically because we were afraid to
3296 // change the existing behavior for loops. See the discussion on D121756 for
3297 // details.
3298 if (Style.isVerilog())
3299 FormatTok->setFinalizedType(TT_ConditionLParen);
3300 parseParens();
3301 }
3302
3303 if (Style.isVerilog()) {
3304 // Event control.
3305 parseVerilogSensitivityList();
3306 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3307 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3308 nextToken();
3309 addUnwrappedLine();
3310 return;
3311 }
3312
3313 handleAttributes();
3314 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3315}
3316
3317void UnwrappedLineParser::parseDoWhile() {
3318 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3319 nextToken();
3320
3321 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3322
3323 // FIXME: Add error handling.
3324 if (FormatTok->isNot(Kind: tok::kw_while)) {
3325 addUnwrappedLine();
3326 return;
3327 }
3328
3329 FormatTok->setFinalizedType(TT_DoWhile);
3330
3331 // If in Whitesmiths mode, the line with the while() needs to be indented
3332 // to the same level as the block.
3333 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3334 ++Line->Level;
3335
3336 nextToken();
3337 parseStructuralElement();
3338}
3339
3340void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3341 nextToken();
3342 unsigned OldLineLevel = Line->Level;
3343
3344 if (LeftAlignLabel)
3345 Line->Level = 0;
3346 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3347 --Line->Level;
3348
3349 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3350 FormatTok->is(Kind: tok::l_brace)) {
3351
3352 CompoundStatementIndenter Indenter(this, Line->Level,
3353 Style.BraceWrapping.AfterCaseLabel,
3354 Style.BraceWrapping.IndentBraces);
3355 parseBlock();
3356 if (FormatTok->is(Kind: tok::kw_break)) {
3357 if (Style.BraceWrapping.AfterControlStatement ==
3358 FormatStyle::BWACS_Always) {
3359 addUnwrappedLine();
3360 if (!Style.IndentCaseBlocks &&
3361 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3362 ++Line->Level;
3363 }
3364 }
3365 parseStructuralElement();
3366 }
3367 addUnwrappedLine();
3368 } else {
3369 if (FormatTok->is(Kind: tok::semi))
3370 nextToken();
3371 addUnwrappedLine();
3372 }
3373 Line->Level = OldLineLevel;
3374 if (FormatTok->isNot(Kind: tok::l_brace)) {
3375 parseStructuralElement();
3376 addUnwrappedLine();
3377 }
3378}
3379
3380void UnwrappedLineParser::parseCaseLabel() {
3381 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3382 auto *Case = FormatTok;
3383
3384 // FIXME: fix handling of complex expressions here.
3385 do {
3386 nextToken();
3387 if (FormatTok->is(Kind: tok::colon)) {
3388 FormatTok->setFinalizedType(TT_CaseLabelColon);
3389 break;
3390 }
3391 if (Style.isJava() && FormatTok->is(Kind: tok::arrow)) {
3392 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3393 Case->setFinalizedType(TT_SwitchExpressionLabel);
3394 break;
3395 }
3396 } while (!eof());
3397 parseLabel();
3398}
3399
3400void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3401 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3402 nextToken();
3403 if (FormatTok->is(Kind: tok::l_paren))
3404 parseParens();
3405
3406 keepAncestorBraces();
3407
3408 if (FormatTok->is(Kind: tok::l_brace)) {
3409 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3410 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3411 : TT_ControlStatementLBrace);
3412 if (IsExpr)
3413 parseChildBlock();
3414 else
3415 parseBlock();
3416 setPreviousRBraceType(TT_ControlStatementRBrace);
3417 if (!IsExpr)
3418 addUnwrappedLine();
3419 } else {
3420 addUnwrappedLine();
3421 ++Line->Level;
3422 parseStructuralElement();
3423 --Line->Level;
3424 }
3425
3426 if (Style.RemoveBracesLLVM)
3427 NestedTooDeep.pop_back();
3428}
3429
3430void UnwrappedLineParser::parseAccessSpecifier() {
3431 nextToken();
3432 // Understand Qt's slots.
3433 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3434 nextToken();
3435 // Otherwise, we don't know what it is, and we'd better keep the next token.
3436 if (FormatTok->is(Kind: tok::colon))
3437 nextToken();
3438 addUnwrappedLine();
3439}
3440
3441/// \brief Parses a requires, decides if it is a clause or an expression.
3442/// \pre The current token has to be the requires keyword.
3443/// \returns true if it parsed a clause.
3444bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3445 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3446 auto RequiresToken = FormatTok;
3447
3448 // We try to guess if it is a requires clause, or a requires expression. For
3449 // that we first consume the keyword and check the next token.
3450 nextToken();
3451
3452 switch (FormatTok->Tok.getKind()) {
3453 case tok::l_brace:
3454 // This can only be an expression, never a clause.
3455 parseRequiresExpression(RequiresToken);
3456 return false;
3457 case tok::l_paren:
3458 // Clauses and expression can start with a paren, it's unclear what we have.
3459 break;
3460 default:
3461 // All other tokens can only be a clause.
3462 parseRequiresClause(RequiresToken);
3463 return true;
3464 }
3465
3466 // Looking forward we would have to decide if there are function declaration
3467 // like arguments to the requires expression:
3468 // requires (T t) {
3469 // Or there is a constraint expression for the requires clause:
3470 // requires (C<T> && ...
3471
3472 // But first let's look behind.
3473 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3474
3475 if (!PreviousNonComment ||
3476 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3477 // If there is no token, or an expression left brace, we are a requires
3478 // clause within a requires expression.
3479 parseRequiresClause(RequiresToken);
3480 return true;
3481 }
3482
3483 switch (PreviousNonComment->Tok.getKind()) {
3484 case tok::greater:
3485 case tok::r_paren:
3486 case tok::kw_noexcept:
3487 case tok::kw_const:
3488 case tok::star:
3489 case tok::amp:
3490 // This is a requires clause.
3491 parseRequiresClause(RequiresToken);
3492 return true;
3493 case tok::ampamp: {
3494 // This can be either:
3495 // if (... && requires (T t) ...)
3496 // Or
3497 // void member(...) && requires (C<T> ...
3498 // We check the one token before that for a const:
3499 // void member(...) const && requires (C<T> ...
3500 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3501 if ((PrevPrev && PrevPrev->is(Kind: tok::kw_const)) || !SeenEqual) {
3502 parseRequiresClause(RequiresToken);
3503 return true;
3504 }
3505 break;
3506 }
3507 default:
3508 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3509 // This is a requires clause.
3510 parseRequiresClause(RequiresToken);
3511 return true;
3512 }
3513 // It's an expression.
3514 parseRequiresExpression(RequiresToken);
3515 return false;
3516 }
3517
3518 // Now we look forward and try to check if the paren content is a parameter
3519 // list. The parameters can be cv-qualified and contain references or
3520 // pointers.
3521 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3522 // of stuff: typename, const, *, &, &&, ::, identifiers.
3523
3524 unsigned StoredPosition = Tokens->getPosition();
3525 FormatToken *NextToken = Tokens->getNextToken();
3526 int Lookahead = 0;
3527 auto PeekNext = [&Lookahead, &NextToken, this] {
3528 ++Lookahead;
3529 NextToken = Tokens->getNextToken();
3530 };
3531
3532 bool FoundType = false;
3533 bool LastWasColonColon = false;
3534 int OpenAngles = 0;
3535
3536 for (; Lookahead < 50; PeekNext()) {
3537 switch (NextToken->Tok.getKind()) {
3538 case tok::kw_volatile:
3539 case tok::kw_const:
3540 case tok::comma:
3541 if (OpenAngles == 0) {
3542 FormatTok = Tokens->setPosition(StoredPosition);
3543 parseRequiresExpression(RequiresToken);
3544 return false;
3545 }
3546 break;
3547 case tok::eof:
3548 // Break out of the loop.
3549 Lookahead = 50;
3550 break;
3551 case tok::coloncolon:
3552 LastWasColonColon = true;
3553 break;
3554 case tok::kw_decltype:
3555 case tok::identifier:
3556 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3557 FormatTok = Tokens->setPosition(StoredPosition);
3558 parseRequiresExpression(RequiresToken);
3559 return false;
3560 }
3561 FoundType = true;
3562 LastWasColonColon = false;
3563 break;
3564 case tok::less:
3565 ++OpenAngles;
3566 break;
3567 case tok::greater:
3568 --OpenAngles;
3569 break;
3570 default:
3571 if (NextToken->isTypeName(LangOpts)) {
3572 FormatTok = Tokens->setPosition(StoredPosition);
3573 parseRequiresExpression(RequiresToken);
3574 return false;
3575 }
3576 break;
3577 }
3578 }
3579 // This seems to be a complicated expression, just assume it's a clause.
3580 FormatTok = Tokens->setPosition(StoredPosition);
3581 parseRequiresClause(RequiresToken);
3582 return true;
3583}
3584
3585/// \brief Parses a requires clause.
3586/// \param RequiresToken The requires keyword token, which starts this clause.
3587/// \pre We need to be on the next token after the requires keyword.
3588/// \sa parseRequiresExpression
3589///
3590/// Returns if it either has finished parsing the clause, or it detects, that
3591/// the clause is incorrect.
3592void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3593 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3594 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3595
3596 // If there is no previous token, we are within a requires expression,
3597 // otherwise we will always have the template or function declaration in front
3598 // of it.
3599 bool InRequiresExpression =
3600 !RequiresToken->Previous ||
3601 RequiresToken->Previous->is(TT: TT_RequiresExpressionLBrace);
3602
3603 RequiresToken->setFinalizedType(InRequiresExpression
3604 ? TT_RequiresClauseInARequiresExpression
3605 : TT_RequiresClause);
3606
3607 // NOTE: parseConstraintExpression is only ever called from this function.
3608 // It could be inlined into here.
3609 parseConstraintExpression();
3610
3611 if (!InRequiresExpression && FormatTok->Previous)
3612 FormatTok->Previous->ClosesRequiresClause = true;
3613}
3614
3615/// \brief Parses a requires expression.
3616/// \param RequiresToken The requires keyword token, which starts this clause.
3617/// \pre We need to be on the next token after the requires keyword.
3618/// \sa parseRequiresClause
3619///
3620/// Returns if it either has finished parsing the expression, or it detects,
3621/// that the expression is incorrect.
3622void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3623 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3624 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3625
3626 RequiresToken->setFinalizedType(TT_RequiresExpression);
3627
3628 if (FormatTok->is(Kind: tok::l_paren)) {
3629 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3630 parseParens();
3631 }
3632
3633 if (FormatTok->is(Kind: tok::l_brace)) {
3634 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3635 parseChildBlock();
3636 }
3637}
3638
3639/// \brief Parses a constraint expression.
3640///
3641/// This is the body of a requires clause. It returns, when the parsing is
3642/// complete, or the expression is incorrect.
3643void UnwrappedLineParser::parseConstraintExpression() {
3644 // The special handling for lambdas is needed since tryToParseLambda() eats a
3645 // token and if a requires expression is the last part of a requires clause
3646 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3647 // not set on the correct token. Thus we need to be aware if we even expect a
3648 // lambda to be possible.
3649 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3650 bool LambdaNextTimeAllowed = true;
3651
3652 // Within lambda declarations, it is permitted to put a requires clause after
3653 // its template parameter list, which would place the requires clause right
3654 // before the parentheses of the parameters of the lambda declaration. Thus,
3655 // we track if we expect to see grouping parentheses at all.
3656 // Without this check, `requires foo<T> (T t)` in the below example would be
3657 // seen as the whole requires clause, accidentally eating the parameters of
3658 // the lambda.
3659 // [&]<typename T> requires foo<T> (T t) { ... };
3660 bool TopLevelParensAllowed = true;
3661
3662 do {
3663 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3664
3665 switch (FormatTok->Tok.getKind()) {
3666 case tok::kw_requires: {
3667 auto RequiresToken = FormatTok;
3668 nextToken();
3669 parseRequiresExpression(RequiresToken);
3670 break;
3671 }
3672
3673 case tok::l_paren:
3674 if (!TopLevelParensAllowed)
3675 return;
3676 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3677 TopLevelParensAllowed = false;
3678 break;
3679
3680 case tok::l_square:
3681 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3682 return;
3683 break;
3684
3685 case tok::kw_const:
3686 case tok::semi:
3687 case tok::kw_class:
3688 case tok::kw_struct:
3689 case tok::kw_union:
3690 return;
3691
3692 case tok::l_brace:
3693 // Potential function body.
3694 return;
3695
3696 case tok::ampamp:
3697 case tok::pipepipe:
3698 FormatTok->setFinalizedType(TT_BinaryOperator);
3699 nextToken();
3700 LambdaNextTimeAllowed = true;
3701 TopLevelParensAllowed = true;
3702 break;
3703
3704 case tok::comma:
3705 case tok::comment:
3706 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3707 nextToken();
3708 break;
3709
3710 case tok::kw_sizeof:
3711 case tok::greater:
3712 case tok::greaterequal:
3713 case tok::greatergreater:
3714 case tok::less:
3715 case tok::lessequal:
3716 case tok::lessless:
3717 case tok::equalequal:
3718 case tok::exclaim:
3719 case tok::exclaimequal:
3720 case tok::plus:
3721 case tok::minus:
3722 case tok::star:
3723 case tok::slash:
3724 LambdaNextTimeAllowed = true;
3725 TopLevelParensAllowed = true;
3726 // Just eat them.
3727 nextToken();
3728 break;
3729
3730 case tok::numeric_constant:
3731 case tok::coloncolon:
3732 case tok::kw_true:
3733 case tok::kw_false:
3734 TopLevelParensAllowed = false;
3735 // Just eat them.
3736 nextToken();
3737 break;
3738
3739 case tok::kw_static_cast:
3740 case tok::kw_const_cast:
3741 case tok::kw_reinterpret_cast:
3742 case tok::kw_dynamic_cast:
3743 nextToken();
3744 if (FormatTok->isNot(Kind: tok::less))
3745 return;
3746
3747 nextToken();
3748 parseBracedList(/*IsAngleBracket=*/true);
3749 break;
3750
3751 default:
3752 if (!FormatTok->Tok.getIdentifierInfo()) {
3753 // Identifiers are part of the default case, we check for more then
3754 // tok::identifier to handle builtin type traits.
3755 return;
3756 }
3757
3758 // We need to differentiate identifiers for a template deduction guide,
3759 // variables, or function return types (the constraint expression has
3760 // ended before that), and basically all other cases. But it's easier to
3761 // check the other way around.
3762 assert(FormatTok->Previous);
3763 switch (FormatTok->Previous->Tok.getKind()) {
3764 case tok::coloncolon: // Nested identifier.
3765 case tok::ampamp: // Start of a function or variable for the
3766 case tok::pipepipe: // constraint expression. (binary)
3767 case tok::exclaim: // The same as above, but unary.
3768 case tok::kw_requires: // Initial identifier of a requires clause.
3769 case tok::equal: // Initial identifier of a concept declaration.
3770 break;
3771 default:
3772 return;
3773 }
3774
3775 // Read identifier with optional template declaration.
3776 nextToken();
3777 if (FormatTok->is(Kind: tok::less)) {
3778 nextToken();
3779 parseBracedList(/*IsAngleBracket=*/true);
3780 }
3781 TopLevelParensAllowed = false;
3782 break;
3783 }
3784 } while (!eof());
3785}
3786
3787bool UnwrappedLineParser::parseEnum() {
3788 const FormatToken &InitialToken = *FormatTok;
3789
3790 // Won't be 'enum' for NS_ENUMs.
3791 if (FormatTok->is(Kind: tok::kw_enum))
3792 nextToken();
3793
3794 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3795 // declarations. An "enum" keyword followed by a colon would be a syntax
3796 // error and thus assume it is just an identifier.
3797 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3798 return false;
3799
3800 // In protobuf, "enum" can be used as a field name.
3801 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3802 return false;
3803
3804 if (IsCpp) {
3805 // Eat up enum class ...
3806 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3807 nextToken();
3808 while (FormatTok->is(Kind: tok::l_square))
3809 if (!handleCppAttributes())
3810 return false;
3811 }
3812
3813 while (FormatTok->Tok.getIdentifierInfo() ||
3814 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3815 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3816 Ks: tok::l_square)) {
3817 if (Style.isVerilog()) {
3818 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3819 nextToken();
3820 // In Verilog the base type can have dimensions.
3821 while (FormatTok->is(Kind: tok::l_square))
3822 parseSquare();
3823 } else {
3824 nextToken();
3825 }
3826 // We can have macros or attributes in between 'enum' and the enum name.
3827 if (FormatTok->is(Kind: tok::l_paren))
3828 parseParens();
3829 if (FormatTok->is(Kind: tok::identifier)) {
3830 nextToken();
3831 // If there are two identifiers in a row, this is likely an elaborate
3832 // return type. In Java, this can be "implements", etc.
3833 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3834 return false;
3835 }
3836 }
3837
3838 // Just a declaration or something is wrong.
3839 if (FormatTok->isNot(Kind: tok::l_brace))
3840 return true;
3841 FormatTok->setFinalizedType(TT_EnumLBrace);
3842 FormatTok->setBlockKind(BK_Block);
3843
3844 if (Style.isJava()) {
3845 // Java enums are different.
3846 parseJavaEnumBody();
3847 return true;
3848 }
3849 if (Style.Language == FormatStyle::LK_Proto) {
3850 parseBlock(/*MustBeDeclaration=*/true);
3851 return true;
3852 }
3853
3854 if (!Style.AllowShortEnumsOnASingleLine &&
3855 ShouldBreakBeforeBrace(Style, InitialToken)) {
3856 addUnwrappedLine();
3857 }
3858 // Parse enum body.
3859 nextToken();
3860 if (!Style.AllowShortEnumsOnASingleLine) {
3861 addUnwrappedLine();
3862 Line->Level += 1;
3863 }
3864 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3865 if (!Style.AllowShortEnumsOnASingleLine)
3866 Line->Level -= 1;
3867 if (HasError) {
3868 if (FormatTok->is(Kind: tok::semi))
3869 nextToken();
3870 addUnwrappedLine();
3871 }
3872 setPreviousRBraceType(TT_EnumRBrace);
3873 return true;
3874
3875 // There is no addUnwrappedLine() here so that we fall through to parsing a
3876 // structural element afterwards. Thus, in "enum A {} n, m;",
3877 // "} n, m;" will end up in one unwrapped line.
3878}
3879
3880bool UnwrappedLineParser::parseStructLike() {
3881 // parseRecord falls through and does not yet add an unwrapped line as a
3882 // record declaration or definition can start a structural element.
3883 parseRecord();
3884 // This does not apply to Java, JavaScript and C#.
3885 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3886 if (FormatTok->is(Kind: tok::semi))
3887 nextToken();
3888 addUnwrappedLine();
3889 return true;
3890 }
3891 return false;
3892}
3893
3894namespace {
3895// A class used to set and restore the Token position when peeking
3896// ahead in the token source.
3897class ScopedTokenPosition {
3898 unsigned StoredPosition;
3899 FormatTokenSource *Tokens;
3900
3901public:
3902 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3903 assert(Tokens && "Tokens expected to not be null");
3904 StoredPosition = Tokens->getPosition();
3905 }
3906
3907 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3908};
3909} // namespace
3910
3911// Look to see if we have [[ by looking ahead, if
3912// its not then rewind to the original position.
3913bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3914 ScopedTokenPosition AutoPosition(Tokens);
3915 FormatToken *Tok = Tokens->getNextToken();
3916 // We already read the first [ check for the second.
3917 if (Tok->isNot(Kind: tok::l_square))
3918 return false;
3919 // Double check that the attribute is just something
3920 // fairly simple.
3921 while (Tok->isNot(Kind: tok::eof)) {
3922 if (Tok->is(Kind: tok::r_square))
3923 break;
3924 Tok = Tokens->getNextToken();
3925 }
3926 if (Tok->is(Kind: tok::eof))
3927 return false;
3928 Tok = Tokens->getNextToken();
3929 if (Tok->isNot(Kind: tok::r_square))
3930 return false;
3931 Tok = Tokens->getNextToken();
3932 if (Tok->is(Kind: tok::semi))
3933 return false;
3934 return true;
3935}
3936
3937void UnwrappedLineParser::parseJavaEnumBody() {
3938 assert(FormatTok->is(tok::l_brace));
3939 const FormatToken *OpeningBrace = FormatTok;
3940
3941 // Determine whether the enum is simple, i.e. does not have a semicolon or
3942 // constants with class bodies. Simple enums can be formatted like braced
3943 // lists, contracted to a single line, etc.
3944 unsigned StoredPosition = Tokens->getPosition();
3945 bool IsSimple = true;
3946 FormatToken *Tok = Tokens->getNextToken();
3947 while (Tok->isNot(Kind: tok::eof)) {
3948 if (Tok->is(Kind: tok::r_brace))
3949 break;
3950 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
3951 IsSimple = false;
3952 break;
3953 }
3954 // FIXME: This will also mark enums with braces in the arguments to enum
3955 // constants as "not simple". This is probably fine in practice, though.
3956 Tok = Tokens->getNextToken();
3957 }
3958 FormatTok = Tokens->setPosition(StoredPosition);
3959
3960 if (IsSimple) {
3961 nextToken();
3962 parseBracedList();
3963 addUnwrappedLine();
3964 return;
3965 }
3966
3967 // Parse the body of a more complex enum.
3968 // First add a line for everything up to the "{".
3969 nextToken();
3970 addUnwrappedLine();
3971 ++Line->Level;
3972
3973 // Parse the enum constants.
3974 while (!eof()) {
3975 if (FormatTok->is(Kind: tok::l_brace)) {
3976 // Parse the constant's class body.
3977 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3978 /*MunchSemi=*/false);
3979 } else if (FormatTok->is(Kind: tok::l_paren)) {
3980 parseParens();
3981 } else if (FormatTok->is(Kind: tok::comma)) {
3982 nextToken();
3983 addUnwrappedLine();
3984 } else if (FormatTok->is(Kind: tok::semi)) {
3985 nextToken();
3986 addUnwrappedLine();
3987 break;
3988 } else if (FormatTok->is(Kind: tok::r_brace)) {
3989 addUnwrappedLine();
3990 break;
3991 } else {
3992 nextToken();
3993 }
3994 }
3995
3996 // Parse the class body after the enum's ";" if any.
3997 parseLevel(OpeningBrace);
3998 nextToken();
3999 --Line->Level;
4000 addUnwrappedLine();
4001}
4002
4003void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4004 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4005 const FormatToken &InitialToken = *FormatTok;
4006 nextToken();
4007
4008 FormatToken *ClassName =
4009 IsJavaRecord && FormatTok->is(Kind: tok::identifier) ? FormatTok : nullptr;
4010 bool IsDerived = false;
4011 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4012 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4013 };
4014 // JavaScript/TypeScript supports anonymous classes like:
4015 // a = class extends foo { }
4016 bool JSPastExtendsOrImplements = false;
4017 // The actual identifier can be a nested name specifier, and in macros
4018 // it is often token-pasted.
4019 // An [[attribute]] can be before the identifier.
4020 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
4021 Ks: tok::kw_alignas, Ks: tok::l_square) ||
4022 FormatTok->isAttribute() ||
4023 ((Style.isJava() || Style.isJavaScript()) &&
4024 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
4025 if (Style.isJavaScript() &&
4026 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
4027 JSPastExtendsOrImplements = true;
4028 // JavaScript/TypeScript supports inline object types in
4029 // extends/implements positions:
4030 // class Foo implements {bar: number} { }
4031 nextToken();
4032 if (FormatTok->is(Kind: tok::l_brace)) {
4033 tryToParseBracedList();
4034 continue;
4035 }
4036 }
4037 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
4038 continue;
4039 auto *Previous = FormatTok;
4040 nextToken();
4041 switch (FormatTok->Tok.getKind()) {
4042 case tok::l_paren:
4043 // We can have macros in between 'class' and the class name.
4044 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4045 // e.g. `struct macro(a) S { int i; };`
4046 Previous->Previous == &InitialToken) {
4047 parseParens();
4048 }
4049 break;
4050 case tok::coloncolon:
4051 case tok::hashhash:
4052 break;
4053 default:
4054 if (JSPastExtendsOrImplements || ClassName ||
4055 Previous->isNot(Kind: tok::identifier) || Previous->is(TT: TT_AttributeMacro)) {
4056 break;
4057 }
4058 if (const auto Text = Previous->TokenText;
4059 Text.size() == 1 || Text != Text.upper()) {
4060 ClassName = Previous;
4061 }
4062 }
4063 }
4064
4065 auto IsListInitialization = [&] {
4066 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4067 return false;
4068 assert(FormatTok->is(tok::l_brace));
4069 const auto *Prev = FormatTok->getPreviousNonComment();
4070 assert(Prev);
4071 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
4072 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
4073 };
4074
4075 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
4076 int AngleNestingLevel = 0;
4077 do {
4078 if (FormatTok->is(Kind: tok::less))
4079 ++AngleNestingLevel;
4080 else if (FormatTok->is(Kind: tok::greater))
4081 --AngleNestingLevel;
4082
4083 if (AngleNestingLevel == 0) {
4084 if (FormatTok->is(Kind: tok::colon)) {
4085 IsDerived = true;
4086 } else if (!IsDerived && FormatTok->is(Kind: tok::identifier) &&
4087 FormatTok->Previous->is(Kind: tok::coloncolon)) {
4088 ClassName = FormatTok;
4089 } else if (FormatTok->is(Kind: tok::l_paren) &&
4090 IsNonMacroIdentifier(FormatTok->Previous)) {
4091 break;
4092 }
4093 }
4094 if (FormatTok->is(Kind: tok::l_brace)) {
4095 if (AngleNestingLevel == 0 && IsListInitialization())
4096 return;
4097 calculateBraceTypes(/*ExpectClassBody=*/true);
4098 if (!tryToParseBracedList())
4099 break;
4100 }
4101 if (FormatTok->is(Kind: tok::l_square)) {
4102 FormatToken *Previous = FormatTok->Previous;
4103 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
4104 !Previous->isTypeOrIdentifier(LangOpts))) {
4105 // Don't try parsing a lambda if we had a closing parenthesis before,
4106 // it was probably a pointer to an array: int (*)[].
4107 if (!tryToParseLambda())
4108 continue;
4109 } else {
4110 parseSquare();
4111 continue;
4112 }
4113 }
4114 if (FormatTok->is(Kind: tok::semi))
4115 return;
4116 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4117 addUnwrappedLine();
4118 nextToken();
4119 parseCSharpGenericTypeConstraint();
4120 break;
4121 }
4122 nextToken();
4123 } while (!eof());
4124 }
4125
4126 auto GetBraceTypes =
4127 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4128 switch (RecordTok.Tok.getKind()) {
4129 case tok::kw_class:
4130 return {TT_ClassLBrace, TT_ClassRBrace};
4131 case tok::kw_struct:
4132 return {TT_StructLBrace, TT_StructRBrace};
4133 case tok::kw_union:
4134 return {TT_UnionLBrace, TT_UnionRBrace};
4135 default:
4136 // Useful for e.g. interface.
4137 return {TT_RecordLBrace, TT_RecordRBrace};
4138 }
4139 };
4140 if (FormatTok->is(Kind: tok::l_brace)) {
4141 if (IsListInitialization())
4142 return;
4143 if (ClassName)
4144 ClassName->setFinalizedType(TT_ClassHeadName);
4145 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4146 FormatTok->setFinalizedType(OpenBraceType);
4147 if (ParseAsExpr) {
4148 parseChildBlock();
4149 } else {
4150 if (ShouldBreakBeforeBrace(Style, InitialToken))
4151 addUnwrappedLine();
4152
4153 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4154 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4155 }
4156 setPreviousRBraceType(ClosingBraceType);
4157 }
4158 // There is no addUnwrappedLine() here so that we fall through to parsing a
4159 // structural element afterwards. Thus, in "class A {} n, m;",
4160 // "} n, m;" will end up in one unwrapped line.
4161}
4162
4163void UnwrappedLineParser::parseObjCMethod() {
4164 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4165 "'(' or identifier expected.");
4166 do {
4167 if (FormatTok->is(Kind: tok::semi)) {
4168 nextToken();
4169 addUnwrappedLine();
4170 return;
4171 } else if (FormatTok->is(Kind: tok::l_brace)) {
4172 if (Style.BraceWrapping.AfterFunction)
4173 addUnwrappedLine();
4174 parseBlock();
4175 addUnwrappedLine();
4176 return;
4177 } else {
4178 nextToken();
4179 }
4180 } while (!eof());
4181}
4182
4183void UnwrappedLineParser::parseObjCProtocolList() {
4184 assert(FormatTok->is(tok::less) && "'<' expected.");
4185 do {
4186 nextToken();
4187 // Early exit in case someone forgot a close angle.
4188 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4189 return;
4190 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4191 nextToken(); // Skip '>'.
4192}
4193
4194void UnwrappedLineParser::parseObjCUntilAtEnd() {
4195 do {
4196 if (FormatTok->is(Kind: tok::objc_end)) {
4197 nextToken();
4198 addUnwrappedLine();
4199 break;
4200 }
4201 if (FormatTok->is(Kind: tok::l_brace)) {
4202 parseBlock();
4203 // In ObjC interfaces, nothing should be following the "}".
4204 addUnwrappedLine();
4205 } else if (FormatTok->is(Kind: tok::r_brace)) {
4206 // Ignore stray "}". parseStructuralElement doesn't consume them.
4207 nextToken();
4208 addUnwrappedLine();
4209 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4210 nextToken();
4211 parseObjCMethod();
4212 } else {
4213 parseStructuralElement();
4214 }
4215 } while (!eof());
4216}
4217
4218void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4219 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4220 nextToken();
4221 nextToken(); // interface name
4222
4223 // @interface can be followed by a lightweight generic
4224 // specialization list, then either a base class or a category.
4225 if (FormatTok->is(Kind: tok::less))
4226 parseObjCLightweightGenerics();
4227 if (FormatTok->is(Kind: tok::colon)) {
4228 nextToken();
4229 nextToken(); // base class name
4230 // The base class can also have lightweight generics applied to it.
4231 if (FormatTok->is(Kind: tok::less))
4232 parseObjCLightweightGenerics();
4233 } else if (FormatTok->is(Kind: tok::l_paren)) {
4234 // Skip category, if present.
4235 parseParens();
4236 }
4237
4238 if (FormatTok->is(Kind: tok::less))
4239 parseObjCProtocolList();
4240
4241 if (FormatTok->is(Kind: tok::l_brace)) {
4242 if (Style.BraceWrapping.AfterObjCDeclaration)
4243 addUnwrappedLine();
4244 parseBlock(/*MustBeDeclaration=*/true);
4245 }
4246
4247 // With instance variables, this puts '}' on its own line. Without instance
4248 // variables, this ends the @interface line.
4249 addUnwrappedLine();
4250
4251 parseObjCUntilAtEnd();
4252}
4253
4254void UnwrappedLineParser::parseObjCLightweightGenerics() {
4255 assert(FormatTok->is(tok::less));
4256 // Unlike protocol lists, generic parameterizations support
4257 // nested angles:
4258 //
4259 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4260 // NSObject <NSCopying, NSSecureCoding>
4261 //
4262 // so we need to count how many open angles we have left.
4263 unsigned NumOpenAngles = 1;
4264 do {
4265 nextToken();
4266 // Early exit in case someone forgot a close angle.
4267 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4268 break;
4269 if (FormatTok->is(Kind: tok::less)) {
4270 ++NumOpenAngles;
4271 } else if (FormatTok->is(Kind: tok::greater)) {
4272 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4273 --NumOpenAngles;
4274 }
4275 } while (!eof() && NumOpenAngles != 0);
4276 nextToken(); // Skip '>'.
4277}
4278
4279// Returns true for the declaration/definition form of @protocol,
4280// false for the expression form.
4281bool UnwrappedLineParser::parseObjCProtocol() {
4282 assert(FormatTok->is(tok::objc_protocol));
4283 nextToken();
4284
4285 if (FormatTok->is(Kind: tok::l_paren)) {
4286 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4287 return false;
4288 }
4289
4290 // The definition/declaration form,
4291 // @protocol Foo
4292 // - (int)someMethod;
4293 // @end
4294
4295 nextToken(); // protocol name
4296
4297 if (FormatTok->is(Kind: tok::less))
4298 parseObjCProtocolList();
4299
4300 // Check for protocol declaration.
4301 if (FormatTok->is(Kind: tok::semi)) {
4302 nextToken();
4303 addUnwrappedLine();
4304 return true;
4305 }
4306
4307 addUnwrappedLine();
4308 parseObjCUntilAtEnd();
4309 return true;
4310}
4311
4312void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4313 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4314 assert(IsImport || FormatTok->is(tok::kw_export));
4315 nextToken();
4316
4317 // Consume the "default" in "export default class/function".
4318 if (FormatTok->is(Kind: tok::kw_default))
4319 nextToken();
4320
4321 // Consume "async function", "function" and "default function", so that these
4322 // get parsed as free-standing JS functions, i.e. do not require a trailing
4323 // semicolon.
4324 if (FormatTok->is(II: Keywords.kw_async))
4325 nextToken();
4326 if (FormatTok->is(II: Keywords.kw_function)) {
4327 nextToken();
4328 return;
4329 }
4330
4331 // For imports, `export *`, `export {...}`, consume the rest of the line up
4332 // to the terminating `;`. For everything else, just return and continue
4333 // parsing the structural element, i.e. the declaration or expression for
4334 // `export default`.
4335 if (!IsImport && !FormatTok->isOneOf(K1: tok::l_brace, K2: tok::star) &&
4336 !FormatTok->isStringLiteral() &&
4337 !(FormatTok->is(II: Keywords.kw_type) &&
4338 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4339 return;
4340 }
4341
4342 while (!eof()) {
4343 if (FormatTok->is(Kind: tok::semi))
4344 return;
4345 if (Line->Tokens.empty()) {
4346 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4347 // import statement should terminate.
4348 return;
4349 }
4350 if (FormatTok->is(Kind: tok::l_brace)) {
4351 FormatTok->setBlockKind(BK_Block);
4352 nextToken();
4353 parseBracedList();
4354 } else {
4355 nextToken();
4356 }
4357 }
4358}
4359
4360void UnwrappedLineParser::parseStatementMacro() {
4361 nextToken();
4362 if (FormatTok->is(Kind: tok::l_paren))
4363 parseParens();
4364 if (FormatTok->is(Kind: tok::semi))
4365 nextToken();
4366 addUnwrappedLine();
4367}
4368
4369void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4370 // consume things like a::`b.c[d:e] or a::*
4371 while (true) {
4372 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4373 Ks: tok::coloncolon, Ks: tok::hash) ||
4374 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4375 nextToken();
4376 } else if (FormatTok->is(Kind: tok::l_square)) {
4377 parseSquare();
4378 } else {
4379 break;
4380 }
4381 }
4382}
4383
4384void UnwrappedLineParser::parseVerilogSensitivityList() {
4385 if (FormatTok->isNot(Kind: tok::at))
4386 return;
4387 nextToken();
4388 // A block event expression has 2 at signs.
4389 if (FormatTok->is(Kind: tok::at))
4390 nextToken();
4391 switch (FormatTok->Tok.getKind()) {
4392 case tok::star:
4393 nextToken();
4394 break;
4395 case tok::l_paren:
4396 parseParens();
4397 break;
4398 default:
4399 parseVerilogHierarchyIdentifier();
4400 break;
4401 }
4402}
4403
4404unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4405 unsigned AddLevels = 0;
4406
4407 if (FormatTok->is(II: Keywords.kw_clocking)) {
4408 nextToken();
4409 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4410 nextToken();
4411 parseVerilogSensitivityList();
4412 if (FormatTok->is(Kind: tok::semi))
4413 nextToken();
4414 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4415 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4416 Ks: Keywords.kw_randsequence)) {
4417 if (Style.IndentCaseLabels)
4418 AddLevels++;
4419 nextToken();
4420 if (FormatTok->is(Kind: tok::l_paren)) {
4421 FormatTok->setFinalizedType(TT_ConditionLParen);
4422 parseParens();
4423 }
4424 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4425 nextToken();
4426 // The case header has no semicolon.
4427 } else {
4428 // "module" etc.
4429 nextToken();
4430 // all the words like the name of the module and specifiers like
4431 // "automatic" and the width of function return type
4432 while (true) {
4433 if (FormatTok->is(Kind: tok::l_square)) {
4434 auto Prev = FormatTok->getPreviousNonComment();
4435 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4436 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4437 parseSquare();
4438 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4439 FormatTok->isOneOf(K1: tok::hash, K2: tok::hashhash, Ks: tok::coloncolon,
4440 Ks: Keywords.kw_automatic, Ks: tok::kw_static)) {
4441 nextToken();
4442 } else {
4443 break;
4444 }
4445 }
4446
4447 auto NewLine = [this]() {
4448 addUnwrappedLine();
4449 Line->IsContinuation = true;
4450 };
4451
4452 // package imports
4453 while (FormatTok->is(II: Keywords.kw_import)) {
4454 NewLine();
4455 nextToken();
4456 parseVerilogHierarchyIdentifier();
4457 if (FormatTok->is(Kind: tok::semi))
4458 nextToken();
4459 }
4460
4461 // parameters and ports
4462 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4463 NewLine();
4464 nextToken();
4465 if (FormatTok->is(Kind: tok::l_paren)) {
4466 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4467 parseParens();
4468 }
4469 }
4470 if (FormatTok->is(Kind: tok::l_paren)) {
4471 NewLine();
4472 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4473 parseParens();
4474 }
4475
4476 // extends and implements
4477 if (FormatTok->is(II: Keywords.kw_extends)) {
4478 NewLine();
4479 nextToken();
4480 parseVerilogHierarchyIdentifier();
4481 if (FormatTok->is(Kind: tok::l_paren))
4482 parseParens();
4483 }
4484 if (FormatTok->is(II: Keywords.kw_implements)) {
4485 NewLine();
4486 do {
4487 nextToken();
4488 parseVerilogHierarchyIdentifier();
4489 } while (FormatTok->is(Kind: tok::comma));
4490 }
4491
4492 // Coverage event for cover groups.
4493 if (FormatTok->is(Kind: tok::at)) {
4494 NewLine();
4495 parseVerilogSensitivityList();
4496 }
4497
4498 if (FormatTok->is(Kind: tok::semi))
4499 nextToken(/*LevelDifference=*/1);
4500 addUnwrappedLine();
4501 }
4502
4503 return AddLevels;
4504}
4505
4506void UnwrappedLineParser::parseVerilogTable() {
4507 assert(FormatTok->is(Keywords.kw_table));
4508 nextToken(/*LevelDifference=*/1);
4509 addUnwrappedLine();
4510
4511 auto InitialLevel = Line->Level++;
4512 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4513 FormatToken *Tok = FormatTok;
4514 nextToken();
4515 if (Tok->is(Kind: tok::semi))
4516 addUnwrappedLine();
4517 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4518 Tok->setFinalizedType(TT_VerilogTableItem);
4519 }
4520 Line->Level = InitialLevel;
4521 nextToken(/*LevelDifference=*/-1);
4522 addUnwrappedLine();
4523}
4524
4525void UnwrappedLineParser::parseVerilogCaseLabel() {
4526 // The label will get unindented in AnnotatingParser. If there are no leading
4527 // spaces, indent the rest here so that things inside the block will be
4528 // indented relative to things outside. We don't use parseLabel because we
4529 // don't know whether this colon is a label or a ternary expression at this
4530 // point.
4531 auto OrigLevel = Line->Level;
4532 auto FirstLine = CurrentLines->size();
4533 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4534 ++Line->Level;
4535 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4536 --Line->Level;
4537 parseStructuralElement();
4538 // Restore the indentation in both the new line and the line that has the
4539 // label.
4540 if (CurrentLines->size() > FirstLine)
4541 (*CurrentLines)[FirstLine].Level = OrigLevel;
4542 Line->Level = OrigLevel;
4543}
4544
4545bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4546 for (const auto &N : Line.Tokens) {
4547 if (N.Tok->MacroCtx)
4548 return true;
4549 for (const UnwrappedLine &Child : N.Children)
4550 if (containsExpansion(Line: Child))
4551 return true;
4552 }
4553 return false;
4554}
4555
4556void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4557 if (Line->Tokens.empty())
4558 return;
4559 LLVM_DEBUG({
4560 if (!parsingPPDirective()) {
4561 llvm::dbgs() << "Adding unwrapped line:\n";
4562 printDebugInfo(*Line);
4563 }
4564 });
4565
4566 // If this line closes a block when in Whitesmiths mode, remember that
4567 // information so that the level can be decreased after the line is added.
4568 // This has to happen after the addition of the line since the line itself
4569 // needs to be indented.
4570 bool ClosesWhitesmithsBlock =
4571 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4572 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4573
4574 // If the current line was expanded from a macro call, we use it to
4575 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4576 // line and the unexpanded token stream.
4577 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4578 if (!Reconstruct)
4579 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4580 Reconstruct->addLine(Line: *Line);
4581
4582 // While the reconstructed unexpanded lines are stored in the normal
4583 // flow of lines, the expanded lines are stored on the side to be analyzed
4584 // in an extra step.
4585 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4586
4587 if (Reconstruct->finished()) {
4588 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4589 assert(!Reconstructed.Tokens.empty() &&
4590 "Reconstructed must at least contain the macro identifier.");
4591 assert(!parsingPPDirective());
4592 LLVM_DEBUG({
4593 llvm::dbgs() << "Adding unexpanded line:\n";
4594 printDebugInfo(Reconstructed);
4595 });
4596 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4597 Lines.push_back(Elt: std::move(Reconstructed));
4598 CurrentExpandedLines.clear();
4599 Reconstruct.reset();
4600 }
4601 } else {
4602 // At the top level we only get here when no unexpansion is going on, or
4603 // when conditional formatting led to unfinished macro reconstructions.
4604 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4605 CurrentLines->push_back(Elt: std::move(*Line));
4606 }
4607 Line->Tokens.clear();
4608 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4609 Line->FirstStartColumn = 0;
4610 Line->IsContinuation = false;
4611 Line->SeenDecltypeAuto = false;
4612
4613 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4614 --Line->Level;
4615 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4616 CurrentLines->append(
4617 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4618 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4619 PreprocessorDirectives.clear();
4620 }
4621 // Disconnect the current token from the last token on the previous line.
4622 FormatTok->Previous = nullptr;
4623}
4624
4625bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4626
4627bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4628 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4629 FormatTok.NewlinesBefore > 0;
4630}
4631
4632// Checks if \p FormatTok is a line comment that continues the line comment
4633// section on \p Line.
4634static bool
4635continuesLineCommentSection(const FormatToken &FormatTok,
4636 const UnwrappedLine &Line, const FormatStyle &Style,
4637 const llvm::Regex &CommentPragmasRegex) {
4638 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4639 return false;
4640
4641 StringRef IndentContent = FormatTok.TokenText;
4642 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4643 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4644 IndentContent = FormatTok.TokenText.substr(Start: 2);
4645 }
4646 if (CommentPragmasRegex.match(String: IndentContent))
4647 return false;
4648
4649 // If Line starts with a line comment, then FormatTok continues the comment
4650 // section if its original column is greater or equal to the original start
4651 // column of the line.
4652 //
4653 // Define the min column token of a line as follows: if a line ends in '{' or
4654 // contains a '{' followed by a line comment, then the min column token is
4655 // that '{'. Otherwise, the min column token of the line is the first token of
4656 // the line.
4657 //
4658 // If Line starts with a token other than a line comment, then FormatTok
4659 // continues the comment section if its original column is greater than the
4660 // original start column of the min column token of the line.
4661 //
4662 // For example, the second line comment continues the first in these cases:
4663 //
4664 // // first line
4665 // // second line
4666 //
4667 // and:
4668 //
4669 // // first line
4670 // // second line
4671 //
4672 // and:
4673 //
4674 // int i; // first line
4675 // // second line
4676 //
4677 // and:
4678 //
4679 // do { // first line
4680 // // second line
4681 // int i;
4682 // } while (true);
4683 //
4684 // and:
4685 //
4686 // enum {
4687 // a, // first line
4688 // // second line
4689 // b
4690 // };
4691 //
4692 // The second line comment doesn't continue the first in these cases:
4693 //
4694 // // first line
4695 // // second line
4696 //
4697 // and:
4698 //
4699 // int i; // first line
4700 // // second line
4701 //
4702 // and:
4703 //
4704 // do { // first line
4705 // // second line
4706 // int i;
4707 // } while (true);
4708 //
4709 // and:
4710 //
4711 // enum {
4712 // a, // first line
4713 // // second line
4714 // };
4715 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4716
4717 // Scan for '{//'. If found, use the column of '{' as a min column for line
4718 // comment section continuation.
4719 const FormatToken *PreviousToken = nullptr;
4720 for (const UnwrappedLineNode &Node : Line.Tokens) {
4721 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4722 isLineComment(FormatTok: *Node.Tok)) {
4723 MinColumnToken = PreviousToken;
4724 break;
4725 }
4726 PreviousToken = Node.Tok;
4727
4728 // Grab the last newline preceding a token in this unwrapped line.
4729 if (Node.Tok->NewlinesBefore > 0)
4730 MinColumnToken = Node.Tok;
4731 }
4732 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4733 MinColumnToken = PreviousToken;
4734
4735 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4736 MinColumnToken);
4737}
4738
4739void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4740 bool JustComments = Line->Tokens.empty();
4741 for (FormatToken *Tok : CommentsBeforeNextToken) {
4742 // Line comments that belong to the same line comment section are put on the
4743 // same line since later we might want to reflow content between them.
4744 // Additional fine-grained breaking of line comment sections is controlled
4745 // by the class BreakableLineCommentSection in case it is desirable to keep
4746 // several line comment sections in the same unwrapped line.
4747 //
4748 // FIXME: Consider putting separate line comment sections as children to the
4749 // unwrapped line instead.
4750 Tok->ContinuesLineCommentSection =
4751 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, Style, CommentPragmasRegex);
4752 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4753 addUnwrappedLine();
4754 pushToken(Tok);
4755 }
4756 if (NewlineBeforeNext && JustComments)
4757 addUnwrappedLine();
4758 CommentsBeforeNextToken.clear();
4759}
4760
4761void UnwrappedLineParser::nextToken(int LevelDifference) {
4762 if (eof())
4763 return;
4764 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4765 pushToken(Tok: FormatTok);
4766 FormatToken *Previous = FormatTok;
4767 if (!Style.isJavaScript())
4768 readToken(LevelDifference);
4769 else
4770 readTokenWithJavaScriptASI();
4771 FormatTok->Previous = Previous;
4772 if (Style.isVerilog()) {
4773 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4774 // keywords like `begin`, we can't treat them the same as left braces
4775 // because some contexts require one of them. For example structs use
4776 // braces and if blocks use keywords, and a left brace can occur in an if
4777 // statement, but it is not a block. For keywords like `end`, we simply
4778 // treat them the same as right braces.
4779 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4780 FormatTok->Tok.setKind(tok::r_brace);
4781 }
4782}
4783
4784void UnwrappedLineParser::distributeComments(
4785 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4786 // Whether or not a line comment token continues a line is controlled by
4787 // the method continuesLineCommentSection, with the following caveat:
4788 //
4789 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4790 // that each comment line from the trail is aligned with the next token, if
4791 // the next token exists. If a trail exists, the beginning of the maximal
4792 // trail is marked as a start of a new comment section.
4793 //
4794 // For example in this code:
4795 //
4796 // int a; // line about a
4797 // // line 1 about b
4798 // // line 2 about b
4799 // int b;
4800 //
4801 // the two lines about b form a maximal trail, so there are two sections, the
4802 // first one consisting of the single comment "// line about a" and the
4803 // second one consisting of the next two comments.
4804 if (Comments.empty())
4805 return;
4806 bool ShouldPushCommentsInCurrentLine = true;
4807 bool HasTrailAlignedWithNextToken = false;
4808 unsigned StartOfTrailAlignedWithNextToken = 0;
4809 if (NextTok) {
4810 // We are skipping the first element intentionally.
4811 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4812 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4813 HasTrailAlignedWithNextToken = true;
4814 StartOfTrailAlignedWithNextToken = i;
4815 }
4816 }
4817 }
4818 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4819 FormatToken *FormatTok = Comments[i];
4820 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4821 FormatTok->ContinuesLineCommentSection = false;
4822 } else {
4823 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4824 FormatTok: *FormatTok, Line: *Line, Style, CommentPragmasRegex);
4825 }
4826 if (!FormatTok->ContinuesLineCommentSection &&
4827 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4828 ShouldPushCommentsInCurrentLine = false;
4829 }
4830 if (ShouldPushCommentsInCurrentLine)
4831 pushToken(Tok: FormatTok);
4832 else
4833 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4834 }
4835}
4836
4837void UnwrappedLineParser::readToken(int LevelDifference) {
4838 SmallVector<FormatToken *, 1> Comments;
4839 bool PreviousWasComment = false;
4840 bool FirstNonCommentOnLine = false;
4841 do {
4842 FormatTok = Tokens->getNextToken();
4843 assert(FormatTok);
4844 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4845 Ks: TT_ConflictAlternative)) {
4846 if (FormatTok->is(TT: TT_ConflictStart))
4847 conditionalCompilationStart(/*Unreachable=*/false);
4848 else if (FormatTok->is(TT: TT_ConflictAlternative))
4849 conditionalCompilationAlternative();
4850 else if (FormatTok->is(TT: TT_ConflictEnd))
4851 conditionalCompilationEnd();
4852 FormatTok = Tokens->getNextToken();
4853 FormatTok->MustBreakBefore = true;
4854 FormatTok->MustBreakBeforeFinalized = true;
4855 }
4856
4857 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4858 const FormatToken &Tok,
4859 bool PreviousWasComment) {
4860 auto IsFirstOnLine = [](const FormatToken &Tok) {
4861 return Tok.HasUnescapedNewline || Tok.IsFirst;
4862 };
4863
4864 // Consider preprocessor directives preceded by block comments as first
4865 // on line.
4866 if (PreviousWasComment)
4867 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4868 return IsFirstOnLine(Tok);
4869 };
4870
4871 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4872 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4873 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4874
4875 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
4876 FirstNonCommentOnLine) {
4877 // In Verilog, the backtick is used for macro invocations. In TableGen,
4878 // the single hash is used for the paste operator.
4879 const auto *Next = Tokens->peekNextToken();
4880 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(Tok: *Next)) ||
4881 (Style.isTableGen() &&
4882 !Next->isOneOf(K1: tok::kw_else, K2: tok::pp_define, Ks: tok::pp_ifdef,
4883 Ks: tok::pp_ifndef, Ks: tok::pp_endif))) {
4884 break;
4885 }
4886 distributeComments(Comments, NextTok: FormatTok);
4887 Comments.clear();
4888 // If there is an unfinished unwrapped line, we flush the preprocessor
4889 // directives only after that unwrapped line was finished later.
4890 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4891 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4892 assert((LevelDifference >= 0 ||
4893 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4894 "LevelDifference makes Line->Level negative");
4895 Line->Level += LevelDifference;
4896 // Comments stored before the preprocessor directive need to be output
4897 // before the preprocessor directive, at the same level as the
4898 // preprocessor directive, as we consider them to apply to the directive.
4899 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4900 PPBranchLevel > 0) {
4901 Line->Level += PPBranchLevel;
4902 }
4903 assert(Line->Level >= Line->UnbracedBodyLevel);
4904 Line->Level -= Line->UnbracedBodyLevel;
4905 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4906 parsePPDirective();
4907 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4908 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4909 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4910 }
4911
4912 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4913 !Line->InPPDirective) {
4914 continue;
4915 }
4916
4917 if (FormatTok->is(Kind: tok::identifier) &&
4918 Macros.defined(Name: FormatTok->TokenText) &&
4919 // FIXME: Allow expanding macros in preprocessor directives.
4920 !Line->InPPDirective) {
4921 FormatToken *ID = FormatTok;
4922 unsigned Position = Tokens->getPosition();
4923
4924 // To correctly parse the code, we need to replace the tokens of the macro
4925 // call with its expansion.
4926 auto PreCall = std::move(Line);
4927 Line.reset(p: new UnwrappedLine);
4928 bool OldInExpansion = InExpansion;
4929 InExpansion = true;
4930 // We parse the macro call into a new line.
4931 auto Args = parseMacroCall();
4932 InExpansion = OldInExpansion;
4933 assert(Line->Tokens.front().Tok == ID);
4934 // And remember the unexpanded macro call tokens.
4935 auto UnexpandedLine = std::move(Line);
4936 // Reset to the old line.
4937 Line = std::move(PreCall);
4938
4939 LLVM_DEBUG({
4940 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4941 if (Args) {
4942 llvm::dbgs() << "(";
4943 for (const auto &Arg : Args.value())
4944 for (const auto &T : Arg)
4945 llvm::dbgs() << T->TokenText << " ";
4946 llvm::dbgs() << ")";
4947 }
4948 llvm::dbgs() << "\n";
4949 });
4950 if (Macros.objectLike(Name: ID->TokenText) && Args &&
4951 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
4952 // The macro is either
4953 // - object-like, but we got argumnets, or
4954 // - overloaded to be both object-like and function-like, but none of
4955 // the function-like arities match the number of arguments.
4956 // Thus, expand as object-like macro.
4957 LLVM_DEBUG(llvm::dbgs()
4958 << "Macro \"" << ID->TokenText
4959 << "\" not overloaded for arity " << Args->size()
4960 << "or not function-like, using object-like overload.");
4961 Args.reset();
4962 UnexpandedLine->Tokens.resize(new_size: 1);
4963 Tokens->setPosition(Position);
4964 nextToken();
4965 assert(!Args && Macros.objectLike(ID->TokenText));
4966 }
4967 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
4968 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
4969 // Next, we insert the expanded tokens in the token stream at the
4970 // current position, and continue parsing.
4971 Unexpanded[ID] = std::move(UnexpandedLine);
4972 SmallVector<FormatToken *, 8> Expansion =
4973 Macros.expand(ID, OptionalArgs: std::move(Args));
4974 if (!Expansion.empty())
4975 FormatTok = Tokens->insertTokens(Tokens: Expansion);
4976
4977 LLVM_DEBUG({
4978 llvm::dbgs() << "Expanded: ";
4979 for (const auto &T : Expansion)
4980 llvm::dbgs() << T->TokenText << " ";
4981 llvm::dbgs() << "\n";
4982 });
4983 } else {
4984 LLVM_DEBUG({
4985 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4986 << "\", because it was used ";
4987 if (Args)
4988 llvm::dbgs() << "with " << Args->size();
4989 else
4990 llvm::dbgs() << "without";
4991 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4992 });
4993 Tokens->setPosition(Position);
4994 FormatTok = ID;
4995 }
4996 }
4997
4998 if (FormatTok->isNot(Kind: tok::comment)) {
4999 distributeComments(Comments, NextTok: FormatTok);
5000 Comments.clear();
5001 return;
5002 }
5003
5004 Comments.push_back(Elt: FormatTok);
5005 } while (!eof());
5006
5007 distributeComments(Comments, NextTok: nullptr);
5008 Comments.clear();
5009}
5010
5011namespace {
5012template <typename Iterator>
5013void pushTokens(Iterator Begin, Iterator End,
5014 SmallVectorImpl<FormatToken *> &Into) {
5015 for (auto I = Begin; I != End; ++I) {
5016 Into.push_back(Elt: I->Tok);
5017 for (const auto &Child : I->Children)
5018 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5019 }
5020}
5021} // namespace
5022
5023std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5024UnwrappedLineParser::parseMacroCall() {
5025 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5026 assert(Line->Tokens.empty());
5027 nextToken();
5028 if (FormatTok->isNot(Kind: tok::l_paren))
5029 return Args;
5030 unsigned Position = Tokens->getPosition();
5031 FormatToken *Tok = FormatTok;
5032 nextToken();
5033 Args.emplace();
5034 auto ArgStart = std::prev(x: Line->Tokens.end());
5035
5036 int Parens = 0;
5037 do {
5038 switch (FormatTok->Tok.getKind()) {
5039 case tok::l_paren:
5040 ++Parens;
5041 nextToken();
5042 break;
5043 case tok::r_paren: {
5044 if (Parens > 0) {
5045 --Parens;
5046 nextToken();
5047 break;
5048 }
5049 Args->push_back(Elt: {});
5050 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5051 nextToken();
5052 return Args;
5053 }
5054 case tok::comma: {
5055 if (Parens > 0) {
5056 nextToken();
5057 break;
5058 }
5059 Args->push_back(Elt: {});
5060 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5061 nextToken();
5062 ArgStart = std::prev(x: Line->Tokens.end());
5063 break;
5064 }
5065 default:
5066 nextToken();
5067 break;
5068 }
5069 } while (!eof());
5070 Line->Tokens.resize(new_size: 1);
5071 Tokens->setPosition(Position);
5072 FormatTok = Tok;
5073 return {};
5074}
5075
5076void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5077 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
5078 if (AtEndOfPPLine) {
5079 auto &Tok = *Line->Tokens.back().Tok;
5080 Tok.MustBreakBefore = true;
5081 Tok.MustBreakBeforeFinalized = true;
5082 Tok.FirstAfterPPLine = true;
5083 AtEndOfPPLine = false;
5084 }
5085}
5086
5087} // end namespace format
5088} // end namespace clang
5089

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/lib/Format/UnwrappedLineParser.cpp