1 | //===--- FormatToken.cpp - Format C++ code --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements specific functions of \c FormatTokens and their |
11 | /// roles. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "FormatToken.h" |
16 | #include "ContinuationIndenter.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Support/Debug.h" |
19 | #include <climits> |
20 | |
21 | namespace clang { |
22 | namespace format { |
23 | |
24 | const char *getTokenTypeName(TokenType Type) { |
25 | static const char *const TokNames[] = { |
26 | #define TYPE(X) #X, |
27 | LIST_TOKEN_TYPES |
28 | #undef TYPE |
29 | nullptr}; |
30 | |
31 | if (Type < NUM_TOKEN_TYPES) |
32 | return TokNames[Type]; |
33 | llvm_unreachable("unknown TokenType" ); |
34 | return nullptr; |
35 | } |
36 | |
37 | // FIXME: This is copy&pasted from Sema. Put it in a common place and remove |
38 | // duplication. |
39 | bool FormatToken::isSimpleTypeSpecifier() const { |
40 | switch (Tok.getKind()) { |
41 | case tok::kw_short: |
42 | case tok::kw_long: |
43 | case tok::kw___int64: |
44 | case tok::kw___int128: |
45 | case tok::kw_signed: |
46 | case tok::kw_unsigned: |
47 | case tok::kw_void: |
48 | case tok::kw_char: |
49 | case tok::kw_int: |
50 | case tok::kw_half: |
51 | case tok::kw_float: |
52 | case tok::kw_double: |
53 | case tok::kw___bf16: |
54 | case tok::kw__Float16: |
55 | case tok::kw___float128: |
56 | case tok::kw___ibm128: |
57 | case tok::kw_wchar_t: |
58 | case tok::kw_bool: |
59 | #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: |
60 | #include "clang/Basic/TransformTypeTraits.def" |
61 | case tok::annot_typename: |
62 | case tok::kw_char8_t: |
63 | case tok::kw_char16_t: |
64 | case tok::kw_char32_t: |
65 | case tok::kw_typeof: |
66 | case tok::kw_decltype: |
67 | case tok::kw__Atomic: |
68 | return true; |
69 | default: |
70 | return false; |
71 | } |
72 | } |
73 | |
74 | // Sorted common C++ non-keyword types. |
75 | static SmallVector<StringRef> CppNonKeywordTypes = { |
76 | "clock_t" , "int16_t" , "int32_t" , "int64_t" , "int8_t" , |
77 | "intptr_t" , "ptrdiff_t" , "size_t" , "time_t" , "uint16_t" , |
78 | "uint32_t" , "uint64_t" , "uint8_t" , "uintptr_t" , |
79 | }; |
80 | |
81 | bool FormatToken::isTypeName(bool IsCpp) const { |
82 | return is(TT: TT_TypeName) || isSimpleTypeSpecifier() || |
83 | (IsCpp && is(Kind: tok::identifier) && |
84 | std::binary_search(first: CppNonKeywordTypes.begin(), |
85 | last: CppNonKeywordTypes.end(), val: TokenText)); |
86 | } |
87 | |
88 | bool FormatToken::isTypeOrIdentifier(bool IsCpp) const { |
89 | return isTypeName(IsCpp) || isOneOf(K1: tok::kw_auto, K2: tok::identifier); |
90 | } |
91 | |
92 | bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { |
93 | assert(is(tok::r_brace)); |
94 | if (!Style.Cpp11BracedListStyle || |
95 | Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { |
96 | return false; |
97 | } |
98 | const auto *LBrace = MatchingParen; |
99 | assert(LBrace && LBrace->is(tok::l_brace)); |
100 | if (LBrace->is(BBK: BK_BracedInit)) |
101 | return true; |
102 | if (LBrace->Previous && LBrace->Previous->is(Kind: tok::equal)) |
103 | return true; |
104 | return false; |
105 | } |
106 | |
107 | bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { |
108 | // C# Does not indent object initialisers as continuations. |
109 | if (is(Kind: tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) |
110 | return true; |
111 | if (is(TT: TT_TemplateString) && opensScope()) |
112 | return true; |
113 | return is(TT: TT_ArrayInitializerLSquare) || is(TT: TT_ProtoExtensionLSquare) || |
114 | (is(Kind: tok::l_brace) && |
115 | (getBlockKind() == BK_Block || is(TT: TT_DictLiteral) || |
116 | (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || |
117 | (is(Kind: tok::less) && Style.isProto()); |
118 | } |
119 | |
120 | TokenRole::~TokenRole() {} |
121 | |
122 | void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} |
123 | |
124 | unsigned CommaSeparatedList::formatAfterToken(LineState &State, |
125 | ContinuationIndenter *Indenter, |
126 | bool DryRun) { |
127 | if (!State.NextToken || !State.NextToken->Previous) |
128 | return 0; |
129 | |
130 | if (Formats.size() <= 1) |
131 | return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). |
132 | |
133 | // Ensure that we start on the opening brace. |
134 | const FormatToken *LBrace = |
135 | State.NextToken->Previous->getPreviousNonComment(); |
136 | if (!LBrace || !LBrace->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare) || |
137 | LBrace->is(BBK: BK_Block) || LBrace->is(TT: TT_DictLiteral) || |
138 | LBrace->Next->is(TT: TT_DesignatedInitializerPeriod)) { |
139 | return 0; |
140 | } |
141 | |
142 | // Calculate the number of code points we have to format this list. As the |
143 | // first token is already placed, we have to subtract it. |
144 | unsigned RemainingCodePoints = |
145 | Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; |
146 | |
147 | // Find the best ColumnFormat, i.e. the best number of columns to use. |
148 | const ColumnFormat *Format = getColumnFormat(RemainingCharacters: RemainingCodePoints); |
149 | |
150 | // If no ColumnFormat can be used, the braced list would generally be |
151 | // bin-packed. Add a severe penalty to this so that column layouts are |
152 | // preferred if possible. |
153 | if (!Format) |
154 | return 10'000; |
155 | |
156 | // Format the entire list. |
157 | unsigned Penalty = 0; |
158 | unsigned Column = 0; |
159 | unsigned Item = 0; |
160 | while (State.NextToken != LBrace->MatchingParen) { |
161 | bool NewLine = false; |
162 | unsigned = 0; |
163 | |
164 | // If the previous token was one of our commas, we are now on the next item. |
165 | if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { |
166 | if (!State.NextToken->isTrailingComment()) { |
167 | ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; |
168 | ++Column; |
169 | } |
170 | ++Item; |
171 | } |
172 | |
173 | if (Column == Format->Columns || State.NextToken->MustBreakBefore) { |
174 | Column = 0; |
175 | NewLine = true; |
176 | } |
177 | |
178 | // Place token using the continuation indenter and store the penalty. |
179 | Penalty += Indenter->addTokenToState(State, Newline: NewLine, DryRun, ExtraSpaces); |
180 | } |
181 | return Penalty; |
182 | } |
183 | |
184 | unsigned CommaSeparatedList::formatFromToken(LineState &State, |
185 | ContinuationIndenter *Indenter, |
186 | bool DryRun) { |
187 | // Formatting with 1 Column isn't really a column layout, so we don't need the |
188 | // special logic here. We can just avoid bin packing any of the parameters. |
189 | if (Formats.size() == 1 || HasNestedBracedList) |
190 | State.Stack.back().AvoidBinPacking = true; |
191 | return 0; |
192 | } |
193 | |
194 | // Returns the lengths in code points between Begin and End (both included), |
195 | // assuming that the entire sequence is put on a single line. |
196 | static unsigned CodePointsBetween(const FormatToken *Begin, |
197 | const FormatToken *End) { |
198 | assert(End->TotalLength >= Begin->TotalLength); |
199 | return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; |
200 | } |
201 | |
202 | void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { |
203 | // FIXME: At some point we might want to do this for other lists, too. |
204 | if (!Token->MatchingParen || |
205 | !Token->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare)) { |
206 | return; |
207 | } |
208 | |
209 | // In C++11 braced list style, we should not format in columns unless they |
210 | // have many items (20 or more) or we allow bin-packing of function call |
211 | // arguments. |
212 | if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && |
213 | Commas.size() < 19) { |
214 | return; |
215 | } |
216 | |
217 | // Limit column layout for JavaScript array initializers to 20 or more items |
218 | // for now to introduce it carefully. We can become more aggressive if this |
219 | // necessary. |
220 | if (Token->is(TT: TT_ArrayInitializerLSquare) && Commas.size() < 19) |
221 | return; |
222 | |
223 | // Column format doesn't really make sense if we don't align after brackets. |
224 | if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) |
225 | return; |
226 | |
227 | FormatToken *ItemBegin = Token->Next; |
228 | while (ItemBegin->isTrailingComment()) |
229 | ItemBegin = ItemBegin->Next; |
230 | SmallVector<bool, 8> MustBreakBeforeItem; |
231 | |
232 | // The lengths of an item if it is put at the end of the line. This includes |
233 | // trailing comments which are otherwise ignored for column alignment. |
234 | SmallVector<unsigned, 8> EndOfLineItemLength; |
235 | MustBreakBeforeItem.reserve(N: Commas.size() + 1); |
236 | EndOfLineItemLength.reserve(N: Commas.size() + 1); |
237 | ItemLengths.reserve(N: Commas.size() + 1); |
238 | |
239 | bool = false; |
240 | for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { |
241 | assert(ItemBegin); |
242 | // Skip comments on their own line. |
243 | while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { |
244 | ItemBegin = ItemBegin->Next; |
245 | HasSeparatingComment = i > 0; |
246 | } |
247 | |
248 | MustBreakBeforeItem.push_back(Elt: ItemBegin->MustBreakBefore); |
249 | if (ItemBegin->is(Kind: tok::l_brace)) |
250 | HasNestedBracedList = true; |
251 | const FormatToken *ItemEnd = nullptr; |
252 | if (i == Commas.size()) { |
253 | ItemEnd = Token->MatchingParen; |
254 | const FormatToken * = ItemEnd->getPreviousNonComment(); |
255 | ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: NonCommentEnd)); |
256 | if (Style.Cpp11BracedListStyle && |
257 | !ItemEnd->Previous->isTrailingComment()) { |
258 | // In Cpp11 braced list style, the } and possibly other subsequent |
259 | // tokens will need to stay on a line with the last element. |
260 | while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) |
261 | ItemEnd = ItemEnd->Next; |
262 | } else { |
263 | // In other braced lists styles, the "}" can be wrapped to the new line. |
264 | ItemEnd = Token->MatchingParen->Previous; |
265 | } |
266 | } else { |
267 | ItemEnd = Commas[i]; |
268 | // The comma is counted as part of the item when calculating the length. |
269 | ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd)); |
270 | |
271 | // Consume trailing comments so the are included in EndOfLineItemLength. |
272 | if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && |
273 | ItemEnd->Next->isTrailingComment()) { |
274 | ItemEnd = ItemEnd->Next; |
275 | } |
276 | } |
277 | EndOfLineItemLength.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd)); |
278 | // If there is a trailing comma in the list, the next item will start at the |
279 | // closing brace. Don't create an extra item for this. |
280 | if (ItemEnd->getNextNonComment() == Token->MatchingParen) |
281 | break; |
282 | ItemBegin = ItemEnd->Next; |
283 | } |
284 | |
285 | // Don't use column layout for lists with few elements and in presence of |
286 | // separating comments. |
287 | if (Commas.size() < 5 || HasSeparatingComment) |
288 | return; |
289 | |
290 | if (Token->NestingLevel != 0 && Token->is(Kind: tok::l_brace) && Commas.size() < 19) |
291 | return; |
292 | |
293 | // We can never place more than ColumnLimit / 3 items in a row (because of the |
294 | // spaces and the comma). |
295 | unsigned MaxItems = Style.ColumnLimit / 3; |
296 | SmallVector<unsigned> MinSizeInColumn; |
297 | MinSizeInColumn.reserve(N: MaxItems); |
298 | for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { |
299 | ColumnFormat Format; |
300 | Format.Columns = Columns; |
301 | Format.ColumnSizes.resize(N: Columns); |
302 | MinSizeInColumn.assign(NumElts: Columns, UINT_MAX); |
303 | Format.LineCount = 1; |
304 | bool HasRowWithSufficientColumns = false; |
305 | unsigned Column = 0; |
306 | for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { |
307 | assert(i < MustBreakBeforeItem.size()); |
308 | if (MustBreakBeforeItem[i] || Column == Columns) { |
309 | ++Format.LineCount; |
310 | Column = 0; |
311 | } |
312 | if (Column == Columns - 1) |
313 | HasRowWithSufficientColumns = true; |
314 | unsigned Length = |
315 | (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; |
316 | Format.ColumnSizes[Column] = std::max(a: Format.ColumnSizes[Column], b: Length); |
317 | MinSizeInColumn[Column] = std::min(a: MinSizeInColumn[Column], b: Length); |
318 | ++Column; |
319 | } |
320 | // If all rows are terminated early (e.g. by trailing comments), we don't |
321 | // need to look further. |
322 | if (!HasRowWithSufficientColumns) |
323 | break; |
324 | Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. |
325 | |
326 | for (unsigned i = 0; i < Columns; ++i) |
327 | Format.TotalWidth += Format.ColumnSizes[i]; |
328 | |
329 | // Don't use this Format, if the difference between the longest and shortest |
330 | // element in a column exceeds a threshold to avoid excessive spaces. |
331 | if ([&] { |
332 | for (unsigned i = 0; i < Columns - 1; ++i) |
333 | if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) |
334 | return true; |
335 | return false; |
336 | }()) { |
337 | continue; |
338 | } |
339 | |
340 | // Ignore layouts that are bound to violate the column limit. |
341 | if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) |
342 | continue; |
343 | |
344 | Formats.push_back(Elt: Format); |
345 | } |
346 | } |
347 | |
348 | const CommaSeparatedList::ColumnFormat * |
349 | CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { |
350 | const ColumnFormat *BestFormat = nullptr; |
351 | for (const ColumnFormat &Format : llvm::reverse(C: Formats)) { |
352 | if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { |
353 | if (BestFormat && Format.LineCount > BestFormat->LineCount) |
354 | break; |
355 | BestFormat = &Format; |
356 | } |
357 | } |
358 | return BestFormat; |
359 | } |
360 | |
361 | } // namespace format |
362 | } // namespace clang |
363 | |