1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
23enum LineType {
24 LT_Invalid,
25 LT_ImportStatement,
26 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
27 LT_ObjCMethodDecl,
28 LT_ObjCProperty, // An @property line.
29 LT_Other,
30 LT_PreprocessorDirective,
31 LT_VirtualFunctionDecl,
32 LT_ArrayOfStructInitializer,
33 LT_CommentAbovePPDirective,
34};
35
36enum ScopeType {
37 // Contained in class declaration/definition.
38 ST_Class,
39 // Contained within function definition.
40 ST_Function,
41 // Contained within other scope block (loop, if/else, etc).
42 ST_Other,
43};
44
45class AnnotatedLine {
46public:
47 AnnotatedLine(const UnwrappedLine &Line)
48 : First(Line.Tokens.front().Tok), Level(Line.Level),
49 PPLevel(Line.PPLevel),
50 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
51 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
52 InPPDirective(Line.InPPDirective),
53 InPragmaDirective(Line.InPragmaDirective),
54 InMacroBody(Line.InMacroBody),
55 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
56 IsMultiVariableDeclStmt(false), Affected(false),
57 LeadingEmptyLinesAffected(false), ChildrenAffected(false),
58 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
59 FirstStartColumn(Line.FirstStartColumn) {
60 assert(!Line.Tokens.empty());
61
62 // Calculate Next and Previous for all tokens. Note that we must overwrite
63 // Next and Previous for every token, as previous formatting runs might have
64 // left them in a different state.
65 First->Previous = nullptr;
66 FormatToken *Current = First;
67 addChildren(Node: Line.Tokens.front(), Current);
68 for (const UnwrappedLineNode &Node : llvm::drop_begin(RangeOrContainer: Line.Tokens)) {
69 if (Node.Tok->MacroParent)
70 ContainsMacroCall = true;
71 Current->Next = Node.Tok;
72 Node.Tok->Previous = Current;
73 Current = Current->Next;
74 addChildren(Node, Current);
75 // FIXME: if we add children, previous will point to the token before
76 // the children; changing this requires significant changes across
77 // clang-format.
78 }
79 Last = Current;
80 Last->Next = nullptr;
81 }
82
83 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
84 Current->Children.clear();
85 for (const auto &Child : Node.Children) {
86 Children.push_back(Elt: new AnnotatedLine(Child));
87 if (Children.back()->ContainsMacroCall)
88 ContainsMacroCall = true;
89 Current->Children.push_back(Elt: Children.back());
90 }
91 }
92
93 size_t size() const {
94 size_t Size = 1;
95 for (const auto *Child : Children)
96 Size += Child->size();
97 return Size;
98 }
99
100 ~AnnotatedLine() {
101 for (AnnotatedLine *Child : Children)
102 delete Child;
103 FormatToken *Current = First;
104 while (Current) {
105 Current->Children.clear();
106 Current->Role.reset();
107 Current = Current->Next;
108 }
109 }
110
111 bool isComment() const {
112 return First && First->is(Kind: tok::comment) && !First->getNextNonComment();
113 }
114
115 /// \c true if this line starts with the given tokens in order, ignoring
116 /// comments.
117 template <typename... Ts> bool startsWith(Ts... Tokens) const {
118 return First && First->startsSequence(Tokens...);
119 }
120
121 /// \c true if this line ends with the given tokens in reversed order,
122 /// ignoring comments.
123 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
124 /// this line is like "... T3 T2 T1".
125 template <typename... Ts> bool endsWith(Ts... Tokens) const {
126 return Last && Last->endsSequence(Tokens...);
127 }
128
129 /// \c true if this line looks like a function definition instead of a
130 /// function declaration. Asserts MightBeFunctionDecl.
131 bool mightBeFunctionDefinition() const {
132 assert(MightBeFunctionDecl);
133 // Try to determine if the end of a stream of tokens is either the
134 // Definition or the Declaration for a function. It does this by looking for
135 // the ';' in foo(); and using that it ends with a ; to know this is the
136 // Definition, however the line could end with
137 // foo(); /* comment */
138 // or
139 // foo(); // comment
140 // or
141 // foo() // comment
142 // endsWith() ignores the comment.
143 return !endsWith(Tokens: tok::semi);
144 }
145
146 /// \c true if this line starts a namespace definition.
147 bool startsWithNamespace() const {
148 return startsWith(Tokens: tok::kw_namespace) || startsWith(Tokens: TT_NamespaceMacro) ||
149 startsWith(Tokens: tok::kw_inline, Tokens: tok::kw_namespace) ||
150 startsWith(Tokens: tok::kw_export, Tokens: tok::kw_namespace);
151 }
152
153 FormatToken *getFirstNonComment() const {
154 assert(First);
155 return First->is(Kind: tok::comment) ? First->getNextNonComment() : First;
156 }
157
158 FormatToken *getLastNonComment() const {
159 assert(Last);
160 return Last->is(Kind: tok::comment) ? Last->getPreviousNonComment() : Last;
161 }
162
163 FormatToken *First;
164 FormatToken *Last;
165
166 SmallVector<AnnotatedLine *, 0> Children;
167
168 LineType Type;
169 unsigned Level;
170 unsigned PPLevel;
171 size_t MatchingOpeningBlockLineIndex;
172 size_t MatchingClosingBlockLineIndex;
173 bool InPPDirective;
174 bool InPragmaDirective;
175 bool InMacroBody;
176 bool MustBeDeclaration;
177 bool MightBeFunctionDecl;
178 bool IsMultiVariableDeclStmt;
179
180 /// \c True if this line contains a macro call for which an expansion exists.
181 bool ContainsMacroCall = false;
182
183 /// \c True if this line should be formatted, i.e. intersects directly or
184 /// indirectly with one of the input ranges.
185 bool Affected;
186
187 /// \c True if the leading empty lines of this line intersect with one of the
188 /// input ranges.
189 bool LeadingEmptyLinesAffected;
190
191 /// \c True if one of this line's children intersects with an input range.
192 bool ChildrenAffected;
193
194 /// \c True if breaking after last attribute group in function return type.
195 bool ReturnTypeWrapped;
196
197 /// \c True if this line should be indented by ContinuationIndent in addition
198 /// to the normal indention level.
199 bool IsContinuation;
200
201 unsigned FirstStartColumn;
202
203private:
204 // Disallow copying.
205 AnnotatedLine(const AnnotatedLine &) = delete;
206 void operator=(const AnnotatedLine &) = delete;
207};
208
209/// Determines extra information about the tokens comprising an
210/// \c UnwrappedLine.
211class TokenAnnotator {
212public:
213 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
214 : Style(Style), IsCpp(Style.isCpp()), Keywords(Keywords) {}
215
216 /// Adapts the indent levels of comment lines to the indent of the
217 /// subsequent line.
218 // FIXME: Can/should this be done in the UnwrappedLineParser?
219 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
220
221 void annotate(AnnotatedLine &Line);
222 void calculateFormattingInformation(AnnotatedLine &Line) const;
223
224private:
225 /// Calculate the penalty for splitting before \c Tok.
226 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
227 bool InFunctionDecl) const;
228
229 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
230
231 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
232 const FormatToken &Right) const;
233
234 bool spaceRequiredBefore(const AnnotatedLine &Line,
235 const FormatToken &Right) const;
236
237 bool mustBreakBefore(const AnnotatedLine &Line,
238 const FormatToken &Right) const;
239
240 bool canBreakBefore(const AnnotatedLine &Line,
241 const FormatToken &Right) const;
242
243 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
244
245 void printDebugInfo(const AnnotatedLine &Line) const;
246
247 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
248
249 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
250
251 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
252 FormatToken *CurrentToken,
253 unsigned Depth) const;
254 FormatStyle::PointerAlignmentStyle
255 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
256
257 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
258 const FormatToken &PointerOrReference) const;
259
260 const FormatStyle &Style;
261
262 bool IsCpp;
263
264 const AdditionalKeywords &Keywords;
265
266 SmallVector<ScopeType> Scopes;
267};
268
269} // end namespace format
270} // end namespace clang
271
272#endif
273

source code of clang/lib/Format/TokenAnnotator.h