1 | //===--- FormatToken.cpp - Format C++ code --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements specific functions of \c FormatTokens and their |
11 | /// roles. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "FormatToken.h" |
16 | #include "ContinuationIndenter.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include <climits> |
19 | |
20 | namespace clang { |
21 | namespace format { |
22 | |
23 | const char *getTokenTypeName(TokenType Type) { |
24 | static const char *const TokNames[] = { |
25 | #define TYPE(X) #X, |
26 | LIST_TOKEN_TYPES |
27 | #undef TYPE |
28 | nullptr}; |
29 | |
30 | if (Type < NUM_TOKEN_TYPES) |
31 | return TokNames[Type]; |
32 | llvm_unreachable("unknown TokenType" ); |
33 | return nullptr; |
34 | } |
35 | |
36 | // Sorted common C++ non-keyword types. |
37 | static SmallVector<StringRef> CppNonKeywordTypes = { |
38 | "clock_t" , "int16_t" , "int32_t" , "int64_t" , "int8_t" , |
39 | "intptr_t" , "ptrdiff_t" , "size_t" , "time_t" , "uint16_t" , |
40 | "uint32_t" , "uint64_t" , "uint8_t" , "uintptr_t" , |
41 | }; |
42 | |
43 | bool FormatToken::isTypeName(const LangOptions &LangOpts) const { |
44 | if (is(TT: TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts)) |
45 | return true; |
46 | return (LangOpts.CXXOperatorNames || LangOpts.C11) && is(Kind: tok::identifier) && |
47 | llvm::binary_search(Range&: CppNonKeywordTypes, Value: TokenText); |
48 | } |
49 | |
50 | bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const { |
51 | return isTypeName(LangOpts) || isOneOf(K1: tok::kw_auto, K2: tok::identifier); |
52 | } |
53 | |
54 | bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { |
55 | assert(is(tok::r_brace)); |
56 | if (!Style.Cpp11BracedListStyle || |
57 | Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { |
58 | return false; |
59 | } |
60 | const auto *LBrace = MatchingParen; |
61 | assert(LBrace && LBrace->is(tok::l_brace)); |
62 | if (LBrace->is(BBK: BK_BracedInit)) |
63 | return true; |
64 | if (LBrace->Previous && LBrace->Previous->is(Kind: tok::equal)) |
65 | return true; |
66 | return false; |
67 | } |
68 | |
69 | bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { |
70 | // C# Does not indent object initialisers as continuations. |
71 | if (is(Kind: tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) |
72 | return true; |
73 | if (is(TT: TT_TemplateString) && opensScope()) |
74 | return true; |
75 | return is(TT: TT_ArrayInitializerLSquare) || is(TT: TT_ProtoExtensionLSquare) || |
76 | (is(Kind: tok::l_brace) && |
77 | (getBlockKind() == BK_Block || is(TT: TT_DictLiteral) || |
78 | (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || |
79 | (is(Kind: tok::less) && Style.isProto()); |
80 | } |
81 | |
82 | TokenRole::~TokenRole() {} |
83 | |
84 | void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} |
85 | |
86 | unsigned CommaSeparatedList::formatAfterToken(LineState &State, |
87 | ContinuationIndenter *Indenter, |
88 | bool DryRun) { |
89 | if (!State.NextToken || !State.NextToken->Previous) |
90 | return 0; |
91 | |
92 | if (Formats.size() <= 1) |
93 | return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). |
94 | |
95 | // Ensure that we start on the opening brace. |
96 | const FormatToken *LBrace = |
97 | State.NextToken->Previous->getPreviousNonComment(); |
98 | if (!LBrace || !LBrace->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare) || |
99 | LBrace->is(BBK: BK_Block) || LBrace->is(TT: TT_DictLiteral) || |
100 | LBrace->Next->is(TT: TT_DesignatedInitializerPeriod)) { |
101 | return 0; |
102 | } |
103 | |
104 | // Calculate the number of code points we have to format this list. As the |
105 | // first token is already placed, we have to subtract it. |
106 | unsigned RemainingCodePoints = |
107 | Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; |
108 | |
109 | // Find the best ColumnFormat, i.e. the best number of columns to use. |
110 | const ColumnFormat *Format = getColumnFormat(RemainingCharacters: RemainingCodePoints); |
111 | |
112 | // If no ColumnFormat can be used, the braced list would generally be |
113 | // bin-packed. Add a severe penalty to this so that column layouts are |
114 | // preferred if possible. |
115 | if (!Format) |
116 | return 10'000; |
117 | |
118 | // Format the entire list. |
119 | unsigned Penalty = 0; |
120 | unsigned Column = 0; |
121 | unsigned Item = 0; |
122 | while (State.NextToken != LBrace->MatchingParen) { |
123 | bool NewLine = false; |
124 | unsigned = 0; |
125 | |
126 | // If the previous token was one of our commas, we are now on the next item. |
127 | if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { |
128 | if (!State.NextToken->isTrailingComment()) { |
129 | ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; |
130 | ++Column; |
131 | } |
132 | ++Item; |
133 | } |
134 | |
135 | if (Column == Format->Columns || State.NextToken->MustBreakBefore) { |
136 | Column = 0; |
137 | NewLine = true; |
138 | } |
139 | |
140 | // Place token using the continuation indenter and store the penalty. |
141 | Penalty += Indenter->addTokenToState(State, Newline: NewLine, DryRun, ExtraSpaces); |
142 | } |
143 | return Penalty; |
144 | } |
145 | |
146 | unsigned CommaSeparatedList::formatFromToken(LineState &State, |
147 | ContinuationIndenter *Indenter, |
148 | bool DryRun) { |
149 | // Formatting with 1 Column isn't really a column layout, so we don't need the |
150 | // special logic here. We can just avoid bin packing any of the parameters. |
151 | if (Formats.size() == 1 || HasNestedBracedList) |
152 | State.Stack.back().AvoidBinPacking = true; |
153 | return 0; |
154 | } |
155 | |
156 | // Returns the lengths in code points between Begin and End (both included), |
157 | // assuming that the entire sequence is put on a single line. |
158 | static unsigned CodePointsBetween(const FormatToken *Begin, |
159 | const FormatToken *End) { |
160 | assert(End->TotalLength >= Begin->TotalLength); |
161 | return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; |
162 | } |
163 | |
164 | void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { |
165 | // FIXME: At some point we might want to do this for other lists, too. |
166 | if (!Token->MatchingParen || |
167 | !Token->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare)) { |
168 | return; |
169 | } |
170 | |
171 | // In C++11 braced list style, we should not format in columns unless they |
172 | // have many items (20 or more) or we allow bin-packing of function call |
173 | // arguments. |
174 | if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && |
175 | (Commas.size() < 19 || !Style.BinPackLongBracedList)) { |
176 | return; |
177 | } |
178 | |
179 | // Limit column layout for JavaScript array initializers to 20 or more items |
180 | // for now to introduce it carefully. We can become more aggressive if this |
181 | // necessary. |
182 | if (Token->is(TT: TT_ArrayInitializerLSquare) && Commas.size() < 19) |
183 | return; |
184 | |
185 | // Column format doesn't really make sense if we don't align after brackets. |
186 | if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) |
187 | return; |
188 | |
189 | FormatToken *ItemBegin = Token->Next; |
190 | while (ItemBegin->isTrailingComment()) |
191 | ItemBegin = ItemBegin->Next; |
192 | SmallVector<bool, 8> MustBreakBeforeItem; |
193 | |
194 | // The lengths of an item if it is put at the end of the line. This includes |
195 | // trailing comments which are otherwise ignored for column alignment. |
196 | SmallVector<unsigned, 8> EndOfLineItemLength; |
197 | MustBreakBeforeItem.reserve(N: Commas.size() + 1); |
198 | EndOfLineItemLength.reserve(N: Commas.size() + 1); |
199 | ItemLengths.reserve(N: Commas.size() + 1); |
200 | |
201 | bool = false; |
202 | for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { |
203 | assert(ItemBegin); |
204 | // Skip comments on their own line. |
205 | while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { |
206 | ItemBegin = ItemBegin->Next; |
207 | HasSeparatingComment = i > 0; |
208 | } |
209 | |
210 | MustBreakBeforeItem.push_back(Elt: ItemBegin->MustBreakBefore); |
211 | if (ItemBegin->is(Kind: tok::l_brace)) |
212 | HasNestedBracedList = true; |
213 | const FormatToken *ItemEnd = nullptr; |
214 | if (i == Commas.size()) { |
215 | ItemEnd = Token->MatchingParen; |
216 | const FormatToken * = ItemEnd->getPreviousNonComment(); |
217 | ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: NonCommentEnd)); |
218 | if (Style.Cpp11BracedListStyle && |
219 | !ItemEnd->Previous->isTrailingComment()) { |
220 | // In Cpp11 braced list style, the } and possibly other subsequent |
221 | // tokens will need to stay on a line with the last element. |
222 | while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) |
223 | ItemEnd = ItemEnd->Next; |
224 | } else { |
225 | // In other braced lists styles, the "}" can be wrapped to the new line. |
226 | ItemEnd = Token->MatchingParen->Previous; |
227 | } |
228 | } else { |
229 | ItemEnd = Commas[i]; |
230 | // The comma is counted as part of the item when calculating the length. |
231 | ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd)); |
232 | |
233 | // Consume trailing comments so the are included in EndOfLineItemLength. |
234 | if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && |
235 | ItemEnd->Next->isTrailingComment()) { |
236 | ItemEnd = ItemEnd->Next; |
237 | } |
238 | } |
239 | EndOfLineItemLength.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd)); |
240 | // If there is a trailing comma in the list, the next item will start at the |
241 | // closing brace. Don't create an extra item for this. |
242 | if (ItemEnd->getNextNonComment() == Token->MatchingParen) |
243 | break; |
244 | ItemBegin = ItemEnd->Next; |
245 | } |
246 | |
247 | // Don't use column layout for lists with few elements and in presence of |
248 | // separating comments. |
249 | if (Commas.size() < 5 || HasSeparatingComment) |
250 | return; |
251 | |
252 | if (Token->NestingLevel != 0 && Token->is(Kind: tok::l_brace) && Commas.size() < 19) |
253 | return; |
254 | |
255 | // We can never place more than ColumnLimit / 3 items in a row (because of the |
256 | // spaces and the comma). |
257 | unsigned MaxItems = Style.ColumnLimit / 3; |
258 | SmallVector<unsigned> MinSizeInColumn; |
259 | MinSizeInColumn.reserve(N: MaxItems); |
260 | for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { |
261 | ColumnFormat Format; |
262 | Format.Columns = Columns; |
263 | Format.ColumnSizes.resize(N: Columns); |
264 | MinSizeInColumn.assign(NumElts: Columns, UINT_MAX); |
265 | Format.LineCount = 1; |
266 | bool HasRowWithSufficientColumns = false; |
267 | unsigned Column = 0; |
268 | for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { |
269 | assert(i < MustBreakBeforeItem.size()); |
270 | if (MustBreakBeforeItem[i] || Column == Columns) { |
271 | ++Format.LineCount; |
272 | Column = 0; |
273 | } |
274 | if (Column == Columns - 1) |
275 | HasRowWithSufficientColumns = true; |
276 | unsigned Length = |
277 | (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; |
278 | Format.ColumnSizes[Column] = std::max(a: Format.ColumnSizes[Column], b: Length); |
279 | MinSizeInColumn[Column] = std::min(a: MinSizeInColumn[Column], b: Length); |
280 | ++Column; |
281 | } |
282 | // If all rows are terminated early (e.g. by trailing comments), we don't |
283 | // need to look further. |
284 | if (!HasRowWithSufficientColumns) |
285 | break; |
286 | Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. |
287 | |
288 | for (unsigned i = 0; i < Columns; ++i) |
289 | Format.TotalWidth += Format.ColumnSizes[i]; |
290 | |
291 | // Don't use this Format, if the difference between the longest and shortest |
292 | // element in a column exceeds a threshold to avoid excessive spaces. |
293 | if ([&] { |
294 | for (unsigned i = 0; i < Columns - 1; ++i) |
295 | if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) |
296 | return true; |
297 | return false; |
298 | }()) { |
299 | continue; |
300 | } |
301 | |
302 | // Ignore layouts that are bound to violate the column limit. |
303 | if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) |
304 | continue; |
305 | |
306 | Formats.push_back(Elt: Format); |
307 | } |
308 | } |
309 | |
310 | const CommaSeparatedList::ColumnFormat * |
311 | CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { |
312 | const ColumnFormat *BestFormat = nullptr; |
313 | for (const ColumnFormat &Format : llvm::reverse(C: Formats)) { |
314 | if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { |
315 | if (BestFormat && Format.LineCount > BestFormat->LineCount) |
316 | break; |
317 | BestFormat = &Format; |
318 | } |
319 | } |
320 | return BestFormat; |
321 | } |
322 | |
323 | bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style) { |
324 | assert(Current.Previous); |
325 | const auto &Previous = *Current.Previous; |
326 | if (Current.is(TT: TT_CtorInitializerComma) && |
327 | Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) { |
328 | return true; |
329 | } |
330 | if (Style.Language == FormatStyle::LK_Proto && Current.is(TT: TT_SelectorName)) |
331 | return true; |
332 | return Previous.is(Kind: tok::comma) && !Current.isTrailingComment() && |
333 | ((Previous.isNot(Kind: TT_CtorInitializerComma) || |
334 | Style.BreakConstructorInitializers != |
335 | FormatStyle::BCIS_BeforeComma) && |
336 | (Previous.isNot(Kind: TT_InheritanceComma) || |
337 | Style.BreakInheritanceList != FormatStyle::BILS_BeforeComma)); |
338 | } |
339 | |
340 | } // namespace format |
341 | } // namespace clang |
342 | |