1 | //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements a token annotator, i.e. creates |
11 | /// \c AnnotatedTokens out of \c FormatTokens with required extra information. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
16 | #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
17 | |
18 | #include "UnwrappedLineParser.h" |
19 | |
20 | namespace clang { |
21 | namespace format { |
22 | |
23 | enum LineType { |
24 | LT_Invalid, |
25 | LT_ImportStatement, |
26 | LT_ObjCDecl, // An @interface, @implementation, or @protocol line. |
27 | LT_ObjCMethodDecl, |
28 | LT_ObjCProperty, // An @property line. |
29 | LT_Other, |
30 | LT_PreprocessorDirective, |
31 | LT_VirtualFunctionDecl, |
32 | LT_ArrayOfStructInitializer, |
33 | , |
34 | }; |
35 | |
36 | enum ScopeType { |
37 | // Contained in class declaration/definition. |
38 | ST_Class, |
39 | // Contained within function definition. |
40 | ST_Function, |
41 | // Contained within other scope block (loop, if/else, etc). |
42 | ST_Other, |
43 | }; |
44 | |
45 | class AnnotatedLine { |
46 | public: |
47 | AnnotatedLine(const UnwrappedLine &Line) |
48 | : First(Line.Tokens.front().Tok), Level(Line.Level), |
49 | PPLevel(Line.PPLevel), |
50 | MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), |
51 | MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), |
52 | InPPDirective(Line.InPPDirective), |
53 | InPragmaDirective(Line.InPragmaDirective), |
54 | InMacroBody(Line.InMacroBody), |
55 | MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), |
56 | IsMultiVariableDeclStmt(false), Affected(false), |
57 | LeadingEmptyLinesAffected(false), ChildrenAffected(false), |
58 | ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), |
59 | FirstStartColumn(Line.FirstStartColumn) { |
60 | assert(!Line.Tokens.empty()); |
61 | |
62 | // Calculate Next and Previous for all tokens. Note that we must overwrite |
63 | // Next and Previous for every token, as previous formatting runs might have |
64 | // left them in a different state. |
65 | First->Previous = nullptr; |
66 | FormatToken *Current = First; |
67 | addChildren(Node: Line.Tokens.front(), Current); |
68 | for (const UnwrappedLineNode &Node : llvm::drop_begin(RangeOrContainer: Line.Tokens)) { |
69 | if (Node.Tok->MacroParent) |
70 | ContainsMacroCall = true; |
71 | Current->Next = Node.Tok; |
72 | Node.Tok->Previous = Current; |
73 | Current = Current->Next; |
74 | addChildren(Node, Current); |
75 | // FIXME: if we add children, previous will point to the token before |
76 | // the children; changing this requires significant changes across |
77 | // clang-format. |
78 | } |
79 | Last = Current; |
80 | Last->Next = nullptr; |
81 | } |
82 | |
83 | void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { |
84 | Current->Children.clear(); |
85 | for (const auto &Child : Node.Children) { |
86 | Children.push_back(Elt: new AnnotatedLine(Child)); |
87 | if (Children.back()->ContainsMacroCall) |
88 | ContainsMacroCall = true; |
89 | Current->Children.push_back(Elt: Children.back()); |
90 | } |
91 | } |
92 | |
93 | size_t size() const { |
94 | size_t Size = 1; |
95 | for (const auto *Child : Children) |
96 | Size += Child->size(); |
97 | return Size; |
98 | } |
99 | |
100 | ~AnnotatedLine() { |
101 | for (AnnotatedLine *Child : Children) |
102 | delete Child; |
103 | FormatToken *Current = First; |
104 | while (Current) { |
105 | Current->Children.clear(); |
106 | Current->Role.reset(); |
107 | Current = Current->Next; |
108 | } |
109 | } |
110 | |
111 | bool () const { |
112 | return First && First->is(Kind: tok::comment) && !First->getNextNonComment(); |
113 | } |
114 | |
115 | /// \c true if this line starts with the given tokens in order, ignoring |
116 | /// comments. |
117 | template <typename... Ts> bool startsWith(Ts... Tokens) const { |
118 | return First && First->startsSequence(Tokens...); |
119 | } |
120 | |
121 | /// \c true if this line ends with the given tokens in reversed order, |
122 | /// ignoring comments. |
123 | /// For example, given tokens [T1, T2, T3, ...], the function returns true if |
124 | /// this line is like "... T3 T2 T1". |
125 | template <typename... Ts> bool endsWith(Ts... Tokens) const { |
126 | return Last && Last->endsSequence(Tokens...); |
127 | } |
128 | |
129 | /// \c true if this line looks like a function definition instead of a |
130 | /// function declaration. Asserts MightBeFunctionDecl. |
131 | bool mightBeFunctionDefinition() const { |
132 | assert(MightBeFunctionDecl); |
133 | // Try to determine if the end of a stream of tokens is either the |
134 | // Definition or the Declaration for a function. It does this by looking for |
135 | // the ';' in foo(); and using that it ends with a ; to know this is the |
136 | // Definition, however the line could end with |
137 | // foo(); /* comment */ |
138 | // or |
139 | // foo(); // comment |
140 | // or |
141 | // foo() // comment |
142 | // endsWith() ignores the comment. |
143 | return !endsWith(Tokens: tok::semi); |
144 | } |
145 | |
146 | /// \c true if this line starts a namespace definition. |
147 | bool startsWithNamespace() const { |
148 | return startsWith(Tokens: tok::kw_namespace) || startsWith(Tokens: TT_NamespaceMacro) || |
149 | startsWith(Tokens: tok::kw_inline, Tokens: tok::kw_namespace) || |
150 | startsWith(Tokens: tok::kw_export, Tokens: tok::kw_namespace); |
151 | } |
152 | |
153 | FormatToken *() const { |
154 | assert(First); |
155 | return First->is(Kind: tok::comment) ? First->getNextNonComment() : First; |
156 | } |
157 | |
158 | FormatToken *() const { |
159 | assert(Last); |
160 | return Last->is(Kind: tok::comment) ? Last->getPreviousNonComment() : Last; |
161 | } |
162 | |
163 | FormatToken *First; |
164 | FormatToken *Last; |
165 | |
166 | SmallVector<AnnotatedLine *, 0> Children; |
167 | |
168 | LineType Type; |
169 | unsigned Level; |
170 | unsigned PPLevel; |
171 | size_t MatchingOpeningBlockLineIndex; |
172 | size_t MatchingClosingBlockLineIndex; |
173 | bool InPPDirective; |
174 | bool InPragmaDirective; |
175 | bool InMacroBody; |
176 | bool MustBeDeclaration; |
177 | bool MightBeFunctionDecl; |
178 | bool IsMultiVariableDeclStmt; |
179 | |
180 | /// \c True if this line contains a macro call for which an expansion exists. |
181 | bool ContainsMacroCall = false; |
182 | |
183 | /// \c True if this line should be formatted, i.e. intersects directly or |
184 | /// indirectly with one of the input ranges. |
185 | bool Affected; |
186 | |
187 | /// \c True if the leading empty lines of this line intersect with one of the |
188 | /// input ranges. |
189 | bool LeadingEmptyLinesAffected; |
190 | |
191 | /// \c True if one of this line's children intersects with an input range. |
192 | bool ChildrenAffected; |
193 | |
194 | /// \c True if breaking after last attribute group in function return type. |
195 | bool ReturnTypeWrapped; |
196 | |
197 | /// \c True if this line should be indented by ContinuationIndent in addition |
198 | /// to the normal indention level. |
199 | bool IsContinuation; |
200 | |
201 | unsigned FirstStartColumn; |
202 | |
203 | private: |
204 | // Disallow copying. |
205 | AnnotatedLine(const AnnotatedLine &) = delete; |
206 | void operator=(const AnnotatedLine &) = delete; |
207 | }; |
208 | |
209 | /// Determines extra information about the tokens comprising an |
210 | /// \c UnwrappedLine. |
211 | class TokenAnnotator { |
212 | public: |
213 | TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) |
214 | : Style(Style), IsCpp(Style.isCpp()), Keywords(Keywords) {} |
215 | |
216 | /// Adapts the indent levels of comment lines to the indent of the |
217 | /// subsequent line. |
218 | // FIXME: Can/should this be done in the UnwrappedLineParser? |
219 | void (SmallVectorImpl<AnnotatedLine *> &Lines) const; |
220 | |
221 | void annotate(AnnotatedLine &Line); |
222 | void calculateFormattingInformation(AnnotatedLine &Line) const; |
223 | |
224 | private: |
225 | /// Calculate the penalty for splitting before \c Tok. |
226 | unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, |
227 | bool InFunctionDecl) const; |
228 | |
229 | bool spaceRequiredBeforeParens(const FormatToken &Right) const; |
230 | |
231 | bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, |
232 | const FormatToken &Right) const; |
233 | |
234 | bool spaceRequiredBefore(const AnnotatedLine &Line, |
235 | const FormatToken &Right) const; |
236 | |
237 | bool mustBreakBefore(const AnnotatedLine &Line, |
238 | const FormatToken &Right) const; |
239 | |
240 | bool canBreakBefore(const AnnotatedLine &Line, |
241 | const FormatToken &Right) const; |
242 | |
243 | bool mustBreakForReturnType(const AnnotatedLine &Line) const; |
244 | |
245 | void printDebugInfo(const AnnotatedLine &Line) const; |
246 | |
247 | void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; |
248 | |
249 | void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; |
250 | |
251 | FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, |
252 | FormatToken *CurrentToken, |
253 | unsigned Depth) const; |
254 | FormatStyle::PointerAlignmentStyle |
255 | getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; |
256 | |
257 | FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( |
258 | const FormatToken &PointerOrReference) const; |
259 | |
260 | const FormatStyle &Style; |
261 | |
262 | bool IsCpp; |
263 | |
264 | const AdditionalKeywords &Keywords; |
265 | |
266 | SmallVector<ScopeType> Scopes; |
267 | }; |
268 | |
269 | } // end namespace format |
270 | } // end namespace clang |
271 | |
272 | #endif |
273 | |