1 | //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements a token annotator, i.e. creates |
11 | /// \c AnnotatedTokens out of \c FormatTokens with required extra information. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
16 | #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
17 | |
18 | #include "UnwrappedLineParser.h" |
19 | |
20 | namespace clang { |
21 | namespace format { |
22 | |
23 | enum LineType { |
24 | LT_Invalid, |
25 | // Contains public/private/protected followed by TT_InheritanceColon. |
26 | LT_AccessModifier, |
27 | LT_ImportStatement, |
28 | LT_ObjCDecl, // An @interface, @implementation, or @protocol line. |
29 | LT_ObjCMethodDecl, |
30 | LT_ObjCProperty, // An @property line. |
31 | LT_Other, |
32 | LT_PreprocessorDirective, |
33 | LT_VirtualFunctionDecl, |
34 | LT_ArrayOfStructInitializer, |
35 | , |
36 | LT_RequiresExpression, |
37 | LT_SimpleRequirement, |
38 | }; |
39 | |
40 | enum ScopeType { |
41 | // Contained in class declaration/definition. |
42 | ST_Class, |
43 | // Contained in compound requirement. |
44 | ST_CompoundRequirement, |
45 | // Contained in other blocks (function, lambda, loop, if/else, child, etc). |
46 | ST_Other, |
47 | }; |
48 | |
49 | class AnnotatedLine { |
50 | public: |
51 | AnnotatedLine(const UnwrappedLine &Line) |
52 | : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level), |
53 | PPLevel(Line.PPLevel), |
54 | MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), |
55 | MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), |
56 | InPPDirective(Line.InPPDirective), |
57 | InPragmaDirective(Line.InPragmaDirective), |
58 | InMacroBody(Line.InMacroBody), |
59 | MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), |
60 | IsMultiVariableDeclStmt(false), Affected(false), |
61 | LeadingEmptyLinesAffected(false), ChildrenAffected(false), |
62 | ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), |
63 | FirstStartColumn(Line.FirstStartColumn) { |
64 | assert(!Line.Tokens.empty()); |
65 | |
66 | // Calculate Next and Previous for all tokens. Note that we must overwrite |
67 | // Next and Previous for every token, as previous formatting runs might have |
68 | // left them in a different state. |
69 | First->Previous = nullptr; |
70 | FormatToken *Current = First; |
71 | addChildren(Node: Line.Tokens.front(), Current); |
72 | for (const UnwrappedLineNode &Node : llvm::drop_begin(RangeOrContainer: Line.Tokens)) { |
73 | if (Node.Tok->MacroParent) |
74 | ContainsMacroCall = true; |
75 | Current->Next = Node.Tok; |
76 | Node.Tok->Previous = Current; |
77 | Current = Current->Next; |
78 | addChildren(Node, Current); |
79 | // FIXME: if we add children, previous will point to the token before |
80 | // the children; changing this requires significant changes across |
81 | // clang-format. |
82 | } |
83 | Last = Current; |
84 | Last->Next = nullptr; |
85 | } |
86 | |
87 | void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { |
88 | Current->Children.clear(); |
89 | for (const auto &Child : Node.Children) { |
90 | Children.push_back(Elt: new AnnotatedLine(Child)); |
91 | if (Children.back()->ContainsMacroCall) |
92 | ContainsMacroCall = true; |
93 | Current->Children.push_back(Elt: Children.back()); |
94 | } |
95 | } |
96 | |
97 | size_t size() const { |
98 | size_t Size = 1; |
99 | for (const auto *Child : Children) |
100 | Size += Child->size(); |
101 | return Size; |
102 | } |
103 | |
104 | ~AnnotatedLine() { |
105 | for (AnnotatedLine *Child : Children) |
106 | delete Child; |
107 | FormatToken *Current = First; |
108 | while (Current) { |
109 | Current->Children.clear(); |
110 | Current->Role.reset(); |
111 | Current = Current->Next; |
112 | } |
113 | } |
114 | |
115 | bool () const { |
116 | return First && First->is(Kind: tok::comment) && !First->getNextNonComment(); |
117 | } |
118 | |
119 | /// \c true if this line starts with the given tokens in order, ignoring |
120 | /// comments. |
121 | template <typename... Ts> bool startsWith(Ts... Tokens) const { |
122 | return First && First->startsSequence(Tokens...); |
123 | } |
124 | |
125 | /// \c true if this line ends with the given tokens in reversed order, |
126 | /// ignoring comments. |
127 | /// For example, given tokens [T1, T2, T3, ...], the function returns true if |
128 | /// this line is like "... T3 T2 T1". |
129 | template <typename... Ts> bool endsWith(Ts... Tokens) const { |
130 | return Last && Last->endsSequence(Tokens...); |
131 | } |
132 | |
133 | /// \c true if this line looks like a function definition instead of a |
134 | /// function declaration. Asserts MightBeFunctionDecl. |
135 | bool mightBeFunctionDefinition() const { |
136 | assert(MightBeFunctionDecl); |
137 | // Try to determine if the end of a stream of tokens is either the |
138 | // Definition or the Declaration for a function. It does this by looking for |
139 | // the ';' in foo(); and using that it ends with a ; to know this is the |
140 | // Definition, however the line could end with |
141 | // foo(); /* comment */ |
142 | // or |
143 | // foo(); // comment |
144 | // or |
145 | // foo() // comment |
146 | // endsWith() ignores the comment. |
147 | return !endsWith(Tokens: tok::semi); |
148 | } |
149 | |
150 | /// \c true if this line starts a namespace definition. |
151 | bool startsWithNamespace() const { |
152 | return startsWith(Tokens: tok::kw_namespace) || startsWith(Tokens: TT_NamespaceMacro) || |
153 | startsWith(Tokens: tok::kw_inline, Tokens: tok::kw_namespace) || |
154 | startsWith(Tokens: tok::kw_export, Tokens: tok::kw_namespace); |
155 | } |
156 | |
157 | /// \c true if this line starts a C++ export block. |
158 | bool startsWithExportBlock() const { |
159 | return startsWith(Tokens: tok::kw_export, Tokens: tok::l_brace); |
160 | } |
161 | |
162 | FormatToken *() const { |
163 | assert(First); |
164 | return First->is(Kind: tok::comment) ? First->getNextNonComment() : First; |
165 | } |
166 | |
167 | FormatToken *() const { |
168 | assert(Last); |
169 | return Last->is(Kind: tok::comment) ? Last->getPreviousNonComment() : Last; |
170 | } |
171 | |
172 | FormatToken *First; |
173 | FormatToken *Last; |
174 | |
175 | SmallVector<AnnotatedLine *, 0> Children; |
176 | |
177 | LineType Type; |
178 | unsigned Level; |
179 | unsigned PPLevel; |
180 | size_t MatchingOpeningBlockLineIndex; |
181 | size_t MatchingClosingBlockLineIndex; |
182 | bool InPPDirective; |
183 | bool InPragmaDirective; |
184 | bool InMacroBody; |
185 | bool MustBeDeclaration; |
186 | bool MightBeFunctionDecl; |
187 | bool IsMultiVariableDeclStmt; |
188 | |
189 | /// \c True if this line contains a macro call for which an expansion exists. |
190 | bool ContainsMacroCall = false; |
191 | |
192 | /// \c True if calculateFormattingInformation() has been called on this line. |
193 | bool Computed = false; |
194 | |
195 | /// \c True if this line should be formatted, i.e. intersects directly or |
196 | /// indirectly with one of the input ranges. |
197 | bool Affected; |
198 | |
199 | /// \c True if the leading empty lines of this line intersect with one of the |
200 | /// input ranges. |
201 | bool LeadingEmptyLinesAffected; |
202 | |
203 | /// \c True if one of this line's children intersects with an input range. |
204 | bool ChildrenAffected; |
205 | |
206 | /// \c True if breaking after last attribute group in function return type. |
207 | bool ReturnTypeWrapped; |
208 | |
209 | /// \c True if this line should be indented by ContinuationIndent in addition |
210 | /// to the normal indention level. |
211 | bool IsContinuation; |
212 | |
213 | unsigned FirstStartColumn; |
214 | |
215 | private: |
216 | // Disallow copying. |
217 | AnnotatedLine(const AnnotatedLine &) = delete; |
218 | void operator=(const AnnotatedLine &) = delete; |
219 | }; |
220 | |
221 | /// Determines extra information about the tokens comprising an |
222 | /// \c UnwrappedLine. |
223 | class TokenAnnotator { |
224 | public: |
225 | TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) |
226 | : Style(Style), IsCpp(Style.isCpp()), |
227 | LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {} |
228 | |
229 | /// Adapts the indent levels of comment lines to the indent of the |
230 | /// subsequent line. |
231 | // FIXME: Can/should this be done in the UnwrappedLineParser? |
232 | void (SmallVectorImpl<AnnotatedLine *> &Lines) const; |
233 | |
234 | void annotate(AnnotatedLine &Line); |
235 | void calculateFormattingInformation(AnnotatedLine &Line) const; |
236 | |
237 | private: |
238 | /// Calculate the penalty for splitting before \c Tok. |
239 | unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, |
240 | bool InFunctionDecl) const; |
241 | |
242 | bool spaceRequiredBeforeParens(const FormatToken &Right) const; |
243 | |
244 | bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, |
245 | const FormatToken &Right) const; |
246 | |
247 | bool spaceRequiredBefore(const AnnotatedLine &Line, |
248 | const FormatToken &Right) const; |
249 | |
250 | bool mustBreakBefore(const AnnotatedLine &Line, |
251 | const FormatToken &Right) const; |
252 | |
253 | bool canBreakBefore(const AnnotatedLine &Line, |
254 | const FormatToken &Right) const; |
255 | |
256 | bool mustBreakForReturnType(const AnnotatedLine &Line) const; |
257 | |
258 | void printDebugInfo(const AnnotatedLine &Line) const; |
259 | |
260 | void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; |
261 | |
262 | void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; |
263 | |
264 | FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, |
265 | FormatToken *CurrentToken, |
266 | unsigned Depth) const; |
267 | FormatStyle::PointerAlignmentStyle |
268 | getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; |
269 | |
270 | FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( |
271 | const FormatToken &PointerOrReference) const; |
272 | |
273 | const FormatStyle &Style; |
274 | |
275 | bool IsCpp; |
276 | LangOptions LangOpts; |
277 | |
278 | const AdditionalKeywords &Keywords; |
279 | |
280 | SmallVector<ScopeType> Scopes, MacroBodyScopes; |
281 | }; |
282 | |
283 | } // end namespace format |
284 | } // end namespace clang |
285 | |
286 | #endif |
287 | |